{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999576540334533, "eval_steps": 500, "global_step": 11807, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.469193309337285e-05, "grad_norm": 4.0744372536826825, "learning_rate": 2.8169014084507045e-08, "loss": 0.9619, "step": 1 }, { "epoch": 0.0001693838661867457, "grad_norm": 4.592684542249665, "learning_rate": 5.633802816901409e-08, "loss": 0.9865, "step": 2 }, { "epoch": 0.0002540757992801186, "grad_norm": 4.404593924699454, "learning_rate": 8.450704225352113e-08, "loss": 0.9141, "step": 3 }, { "epoch": 0.0003387677323734914, "grad_norm": 6.703606346702504, "learning_rate": 1.1267605633802818e-07, "loss": 1.0538, "step": 4 }, { "epoch": 0.0004234596654668643, "grad_norm": 0.7812603873441497, "learning_rate": 1.4084507042253522e-07, "loss": 0.8522, "step": 5 }, { "epoch": 0.0005081515985602372, "grad_norm": 4.084037510869649, "learning_rate": 1.6901408450704225e-07, "loss": 0.9698, "step": 6 }, { "epoch": 0.00059284353165361, "grad_norm": 5.068919183983209, "learning_rate": 1.9718309859154932e-07, "loss": 0.9819, "step": 7 }, { "epoch": 0.0006775354647469828, "grad_norm": 4.345369561391515, "learning_rate": 2.2535211267605636e-07, "loss": 0.9366, "step": 8 }, { "epoch": 0.0007622273978403557, "grad_norm": 4.584932185195616, "learning_rate": 2.535211267605634e-07, "loss": 0.9443, "step": 9 }, { "epoch": 0.0008469193309337286, "grad_norm": 4.104741510056592, "learning_rate": 2.8169014084507043e-07, "loss": 0.9932, "step": 10 }, { "epoch": 0.0009316112640271014, "grad_norm": 4.05793699393999, "learning_rate": 3.0985915492957747e-07, "loss": 0.8975, "step": 11 }, { "epoch": 0.0010163031971204743, "grad_norm": 4.7743115639407945, "learning_rate": 3.380281690140845e-07, "loss": 0.9808, "step": 12 }, { "epoch": 0.001100995130213847, "grad_norm": 0.9211472504032457, "learning_rate": 3.661971830985916e-07, "loss": 0.8449, "step": 13 }, { "epoch": 0.00118568706330722, "grad_norm": 3.7901824368336987, "learning_rate": 3.9436619718309864e-07, "loss": 0.9249, "step": 14 }, { "epoch": 0.0012703789964005929, "grad_norm": 5.067095694087421, "learning_rate": 4.225352112676057e-07, "loss": 0.8846, "step": 15 }, { "epoch": 0.0013550709294939656, "grad_norm": 4.215750590532763, "learning_rate": 4.507042253521127e-07, "loss": 0.9598, "step": 16 }, { "epoch": 0.0014397628625873386, "grad_norm": 4.511132172026537, "learning_rate": 4.788732394366198e-07, "loss": 0.9376, "step": 17 }, { "epoch": 0.0015244547956807114, "grad_norm": 4.258088142545767, "learning_rate": 5.070422535211268e-07, "loss": 1.0293, "step": 18 }, { "epoch": 0.0016091467287740844, "grad_norm": 3.8591073764837813, "learning_rate": 5.352112676056338e-07, "loss": 0.9672, "step": 19 }, { "epoch": 0.0016938386618674571, "grad_norm": 0.9144084924620056, "learning_rate": 5.633802816901409e-07, "loss": 0.8831, "step": 20 }, { "epoch": 0.00177853059496083, "grad_norm": 3.5580686789404012, "learning_rate": 5.915492957746479e-07, "loss": 0.9891, "step": 21 }, { "epoch": 0.001863222528054203, "grad_norm": 3.8452508221108697, "learning_rate": 6.197183098591549e-07, "loss": 0.9158, "step": 22 }, { "epoch": 0.0019479144611475757, "grad_norm": 3.5759406630943276, "learning_rate": 6.47887323943662e-07, "loss": 0.9352, "step": 23 }, { "epoch": 0.0020326063942409487, "grad_norm": 5.841272571327968, "learning_rate": 6.76056338028169e-07, "loss": 0.956, "step": 24 }, { "epoch": 0.002117298327334321, "grad_norm": 4.156006348116952, "learning_rate": 7.042253521126762e-07, "loss": 0.9807, "step": 25 }, { "epoch": 0.002201990260427694, "grad_norm": 4.253873819476285, "learning_rate": 7.323943661971832e-07, "loss": 0.983, "step": 26 }, { "epoch": 0.002286682193521067, "grad_norm": 3.2844315073457615, "learning_rate": 7.605633802816901e-07, "loss": 0.948, "step": 27 }, { "epoch": 0.00237137412661444, "grad_norm": 2.9136716582385396, "learning_rate": 7.887323943661973e-07, "loss": 0.9686, "step": 28 }, { "epoch": 0.0024560660597078127, "grad_norm": 3.0687365553538557, "learning_rate": 8.169014084507043e-07, "loss": 0.938, "step": 29 }, { "epoch": 0.0025407579928011857, "grad_norm": 3.6918740271321937, "learning_rate": 8.450704225352114e-07, "loss": 0.8686, "step": 30 }, { "epoch": 0.0026254499258945587, "grad_norm": 3.3929304230719213, "learning_rate": 8.732394366197183e-07, "loss": 0.9608, "step": 31 }, { "epoch": 0.0027101418589879312, "grad_norm": 0.8761578002798169, "learning_rate": 9.014084507042254e-07, "loss": 0.8839, "step": 32 }, { "epoch": 0.0027948337920813042, "grad_norm": 2.896382794123025, "learning_rate": 9.295774647887325e-07, "loss": 0.8799, "step": 33 }, { "epoch": 0.0028795257251746772, "grad_norm": 3.359667658117667, "learning_rate": 9.577464788732395e-07, "loss": 0.9778, "step": 34 }, { "epoch": 0.0029642176582680498, "grad_norm": 0.8987624828442248, "learning_rate": 9.859154929577465e-07, "loss": 0.881, "step": 35 }, { "epoch": 0.0030489095913614228, "grad_norm": 3.501342554527627, "learning_rate": 1.0140845070422536e-06, "loss": 0.9479, "step": 36 }, { "epoch": 0.0031336015244547957, "grad_norm": 2.665014500074956, "learning_rate": 1.0422535211267606e-06, "loss": 1.004, "step": 37 }, { "epoch": 0.0032182934575481687, "grad_norm": 2.7104682026179217, "learning_rate": 1.0704225352112677e-06, "loss": 0.9314, "step": 38 }, { "epoch": 0.0033029853906415413, "grad_norm": 0.837393930868368, "learning_rate": 1.098591549295775e-06, "loss": 0.8152, "step": 39 }, { "epoch": 0.0033876773237349143, "grad_norm": 2.495082373852111, "learning_rate": 1.1267605633802817e-06, "loss": 0.855, "step": 40 }, { "epoch": 0.0034723692568282873, "grad_norm": 2.2057167290557533, "learning_rate": 1.1549295774647888e-06, "loss": 0.8439, "step": 41 }, { "epoch": 0.00355706118992166, "grad_norm": 2.370934814976097, "learning_rate": 1.1830985915492958e-06, "loss": 0.881, "step": 42 }, { "epoch": 0.003641753123015033, "grad_norm": 4.059550425075507, "learning_rate": 1.211267605633803e-06, "loss": 0.8539, "step": 43 }, { "epoch": 0.003726445056108406, "grad_norm": 2.1211805704562834, "learning_rate": 1.2394366197183099e-06, "loss": 0.8536, "step": 44 }, { "epoch": 0.0038111369892017783, "grad_norm": 2.041152731012668, "learning_rate": 1.267605633802817e-06, "loss": 0.8378, "step": 45 }, { "epoch": 0.0038958289222951513, "grad_norm": 2.035428166471623, "learning_rate": 1.295774647887324e-06, "loss": 0.8576, "step": 46 }, { "epoch": 0.003980520855388524, "grad_norm": 2.2156960015429834, "learning_rate": 1.323943661971831e-06, "loss": 0.8653, "step": 47 }, { "epoch": 0.004065212788481897, "grad_norm": 2.311969432373611, "learning_rate": 1.352112676056338e-06, "loss": 0.8655, "step": 48 }, { "epoch": 0.00414990472157527, "grad_norm": 2.0943827134021546, "learning_rate": 1.3802816901408453e-06, "loss": 0.8356, "step": 49 }, { "epoch": 0.004234596654668642, "grad_norm": 2.7594060066843036, "learning_rate": 1.4084507042253523e-06, "loss": 0.8504, "step": 50 }, { "epoch": 0.004319288587762015, "grad_norm": 2.118282739882444, "learning_rate": 1.4366197183098594e-06, "loss": 0.8765, "step": 51 }, { "epoch": 0.004403980520855388, "grad_norm": 2.4859843818066967, "learning_rate": 1.4647887323943664e-06, "loss": 0.8405, "step": 52 }, { "epoch": 0.004488672453948761, "grad_norm": 1.9190087565157639, "learning_rate": 1.4929577464788732e-06, "loss": 0.9167, "step": 53 }, { "epoch": 0.004573364387042134, "grad_norm": 2.6029180655138036, "learning_rate": 1.5211267605633803e-06, "loss": 0.9046, "step": 54 }, { "epoch": 0.004658056320135507, "grad_norm": 1.6823231526448001, "learning_rate": 1.5492957746478873e-06, "loss": 0.7619, "step": 55 }, { "epoch": 0.00474274825322888, "grad_norm": 1.0690152878802526, "learning_rate": 1.5774647887323946e-06, "loss": 0.8646, "step": 56 }, { "epoch": 0.0048274401863222524, "grad_norm": 1.8941970212074895, "learning_rate": 1.6056338028169016e-06, "loss": 0.8446, "step": 57 }, { "epoch": 0.0049121321194156254, "grad_norm": 1.7267246103144687, "learning_rate": 1.6338028169014086e-06, "loss": 0.8975, "step": 58 }, { "epoch": 0.004996824052508998, "grad_norm": 0.9527795508637809, "learning_rate": 1.6619718309859157e-06, "loss": 0.9036, "step": 59 }, { "epoch": 0.005081515985602371, "grad_norm": 1.6543170953651207, "learning_rate": 1.6901408450704227e-06, "loss": 0.8043, "step": 60 }, { "epoch": 0.005166207918695744, "grad_norm": 2.0567672806022554, "learning_rate": 1.7183098591549297e-06, "loss": 0.7639, "step": 61 }, { "epoch": 0.005250899851789117, "grad_norm": 1.8733660400962506, "learning_rate": 1.7464788732394366e-06, "loss": 0.8111, "step": 62 }, { "epoch": 0.00533559178488249, "grad_norm": 1.3955234560663614, "learning_rate": 1.774647887323944e-06, "loss": 0.8384, "step": 63 }, { "epoch": 0.0054202837179758625, "grad_norm": 1.605136523422616, "learning_rate": 1.8028169014084509e-06, "loss": 0.7285, "step": 64 }, { "epoch": 0.0055049756510692355, "grad_norm": 1.5261315154405297, "learning_rate": 1.8309859154929579e-06, "loss": 0.8019, "step": 65 }, { "epoch": 0.0055896675841626085, "grad_norm": 1.5775194483683035, "learning_rate": 1.859154929577465e-06, "loss": 0.8877, "step": 66 }, { "epoch": 0.0056743595172559814, "grad_norm": 0.9597843768528909, "learning_rate": 1.887323943661972e-06, "loss": 0.8529, "step": 67 }, { "epoch": 0.0057590514503493544, "grad_norm": 1.436052247886578, "learning_rate": 1.915492957746479e-06, "loss": 0.8161, "step": 68 }, { "epoch": 0.005843743383442727, "grad_norm": 0.9881470820279581, "learning_rate": 1.943661971830986e-06, "loss": 0.8482, "step": 69 }, { "epoch": 0.0059284353165360995, "grad_norm": 1.3102999541213354, "learning_rate": 1.971830985915493e-06, "loss": 0.8333, "step": 70 }, { "epoch": 0.0060131272496294725, "grad_norm": 1.5531012895560474, "learning_rate": 2.0000000000000003e-06, "loss": 0.8161, "step": 71 }, { "epoch": 0.0060978191827228455, "grad_norm": 1.5077213686542985, "learning_rate": 2.028169014084507e-06, "loss": 0.7712, "step": 72 }, { "epoch": 0.0061825111158162185, "grad_norm": 1.6668970461869783, "learning_rate": 2.0563380281690144e-06, "loss": 0.8054, "step": 73 }, { "epoch": 0.0062672030489095915, "grad_norm": 1.4759770324545887, "learning_rate": 2.0845070422535212e-06, "loss": 0.8455, "step": 74 }, { "epoch": 0.0063518949820029645, "grad_norm": 1.795526024605226, "learning_rate": 2.1126760563380285e-06, "loss": 0.834, "step": 75 }, { "epoch": 0.0064365869150963375, "grad_norm": 1.9970641586679905, "learning_rate": 2.1408450704225353e-06, "loss": 0.7772, "step": 76 }, { "epoch": 0.00652127884818971, "grad_norm": 1.77894107552745, "learning_rate": 2.169014084507042e-06, "loss": 0.8313, "step": 77 }, { "epoch": 0.006605970781283083, "grad_norm": 1.3888863926702513, "learning_rate": 2.19718309859155e-06, "loss": 0.7238, "step": 78 }, { "epoch": 0.0066906627143764556, "grad_norm": 1.6400751950622878, "learning_rate": 2.2253521126760566e-06, "loss": 0.812, "step": 79 }, { "epoch": 0.0067753546474698285, "grad_norm": 1.9620690323796488, "learning_rate": 2.2535211267605635e-06, "loss": 0.8247, "step": 80 }, { "epoch": 0.0068600465805632015, "grad_norm": 1.6047450011713382, "learning_rate": 2.2816901408450707e-06, "loss": 0.8163, "step": 81 }, { "epoch": 0.0069447385136565745, "grad_norm": 1.5029407505790262, "learning_rate": 2.3098591549295775e-06, "loss": 0.8105, "step": 82 }, { "epoch": 0.007029430446749947, "grad_norm": 1.3659109773429323, "learning_rate": 2.338028169014085e-06, "loss": 0.7527, "step": 83 }, { "epoch": 0.00711412237984332, "grad_norm": 1.3351451445547737, "learning_rate": 2.3661971830985916e-06, "loss": 0.7912, "step": 84 }, { "epoch": 0.007198814312936693, "grad_norm": 1.1823711960281034, "learning_rate": 2.3943661971830984e-06, "loss": 0.7441, "step": 85 }, { "epoch": 0.007283506246030066, "grad_norm": 2.318736850036389, "learning_rate": 2.422535211267606e-06, "loss": 0.8509, "step": 86 }, { "epoch": 0.007368198179123439, "grad_norm": 1.3088422559213697, "learning_rate": 2.450704225352113e-06, "loss": 0.8117, "step": 87 }, { "epoch": 0.007452890112216812, "grad_norm": 1.174759348306234, "learning_rate": 2.4788732394366198e-06, "loss": 0.7241, "step": 88 }, { "epoch": 0.0075375820453101846, "grad_norm": 1.2664862493426674, "learning_rate": 2.507042253521127e-06, "loss": 0.7422, "step": 89 }, { "epoch": 0.007622273978403557, "grad_norm": 1.3638948806515694, "learning_rate": 2.535211267605634e-06, "loss": 0.7865, "step": 90 }, { "epoch": 0.00770696591149693, "grad_norm": 1.5550120213114484, "learning_rate": 2.563380281690141e-06, "loss": 0.7657, "step": 91 }, { "epoch": 0.007791657844590303, "grad_norm": 1.7421021653747917, "learning_rate": 2.591549295774648e-06, "loss": 0.7772, "step": 92 }, { "epoch": 0.007876349777683676, "grad_norm": 1.4533896379300624, "learning_rate": 2.619718309859155e-06, "loss": 0.8021, "step": 93 }, { "epoch": 0.007961041710777049, "grad_norm": 2.0302231046956893, "learning_rate": 2.647887323943662e-06, "loss": 0.7837, "step": 94 }, { "epoch": 0.008045733643870422, "grad_norm": 1.594643247247018, "learning_rate": 2.676056338028169e-06, "loss": 0.7983, "step": 95 }, { "epoch": 0.008130425576963795, "grad_norm": 1.2297913918332926, "learning_rate": 2.704225352112676e-06, "loss": 0.7557, "step": 96 }, { "epoch": 0.008215117510057168, "grad_norm": 1.223312674210928, "learning_rate": 2.7323943661971837e-06, "loss": 0.8243, "step": 97 }, { "epoch": 0.00829980944315054, "grad_norm": 2.4361737066340274, "learning_rate": 2.7605633802816906e-06, "loss": 0.8114, "step": 98 }, { "epoch": 0.008384501376243914, "grad_norm": 1.4697292203300942, "learning_rate": 2.7887323943661974e-06, "loss": 0.7757, "step": 99 }, { "epoch": 0.008469193309337285, "grad_norm": 1.3270282253531869, "learning_rate": 2.8169014084507046e-06, "loss": 0.6591, "step": 100 }, { "epoch": 0.008553885242430658, "grad_norm": 1.4639023977898018, "learning_rate": 2.8450704225352115e-06, "loss": 0.7449, "step": 101 }, { "epoch": 0.00863857717552403, "grad_norm": 1.4579522535049019, "learning_rate": 2.8732394366197187e-06, "loss": 0.8224, "step": 102 }, { "epoch": 0.008723269108617404, "grad_norm": 1.299384546000664, "learning_rate": 2.9014084507042255e-06, "loss": 0.8379, "step": 103 }, { "epoch": 0.008807961041710777, "grad_norm": 1.6854664526367793, "learning_rate": 2.929577464788733e-06, "loss": 0.7454, "step": 104 }, { "epoch": 0.00889265297480415, "grad_norm": 1.5407314432563743, "learning_rate": 2.9577464788732396e-06, "loss": 0.703, "step": 105 }, { "epoch": 0.008977344907897523, "grad_norm": 1.428478960291425, "learning_rate": 2.9859154929577465e-06, "loss": 0.7363, "step": 106 }, { "epoch": 0.009062036840990896, "grad_norm": 1.3470892237277823, "learning_rate": 3.0140845070422537e-06, "loss": 0.7369, "step": 107 }, { "epoch": 0.009146728774084269, "grad_norm": 1.3788636162692036, "learning_rate": 3.0422535211267605e-06, "loss": 0.8204, "step": 108 }, { "epoch": 0.009231420707177642, "grad_norm": 1.4496189346046613, "learning_rate": 3.0704225352112678e-06, "loss": 0.7393, "step": 109 }, { "epoch": 0.009316112640271015, "grad_norm": 2.031387477522849, "learning_rate": 3.0985915492957746e-06, "loss": 0.7746, "step": 110 }, { "epoch": 0.009400804573364388, "grad_norm": 1.5204840678214686, "learning_rate": 3.1267605633802823e-06, "loss": 0.7113, "step": 111 }, { "epoch": 0.00948549650645776, "grad_norm": 1.4003190360486082, "learning_rate": 3.154929577464789e-06, "loss": 0.7537, "step": 112 }, { "epoch": 0.009570188439551132, "grad_norm": 1.4657567433486836, "learning_rate": 3.1830985915492964e-06, "loss": 0.8093, "step": 113 }, { "epoch": 0.009654880372644505, "grad_norm": 1.3563949639051265, "learning_rate": 3.211267605633803e-06, "loss": 0.7396, "step": 114 }, { "epoch": 0.009739572305737878, "grad_norm": 1.6214192265945446, "learning_rate": 3.2394366197183104e-06, "loss": 0.703, "step": 115 }, { "epoch": 0.009824264238831251, "grad_norm": 2.0282199457924732, "learning_rate": 3.2676056338028173e-06, "loss": 0.7733, "step": 116 }, { "epoch": 0.009908956171924624, "grad_norm": 1.0535215282367618, "learning_rate": 3.295774647887324e-06, "loss": 0.8718, "step": 117 }, { "epoch": 0.009993648105017997, "grad_norm": 1.4193653246142222, "learning_rate": 3.3239436619718313e-06, "loss": 0.7912, "step": 118 }, { "epoch": 0.01007834003811137, "grad_norm": 1.291515607163403, "learning_rate": 3.352112676056338e-06, "loss": 0.6861, "step": 119 }, { "epoch": 0.010163031971204743, "grad_norm": 1.2608645020651434, "learning_rate": 3.3802816901408454e-06, "loss": 0.7691, "step": 120 }, { "epoch": 0.010247723904298116, "grad_norm": 1.2908440199156657, "learning_rate": 3.4084507042253522e-06, "loss": 0.7614, "step": 121 }, { "epoch": 0.010332415837391489, "grad_norm": 1.5715004179736582, "learning_rate": 3.4366197183098595e-06, "loss": 0.7399, "step": 122 }, { "epoch": 0.010417107770484862, "grad_norm": 1.4005124572627605, "learning_rate": 3.4647887323943663e-06, "loss": 0.7282, "step": 123 }, { "epoch": 0.010501799703578235, "grad_norm": 1.2086569430193288, "learning_rate": 3.492957746478873e-06, "loss": 0.8072, "step": 124 }, { "epoch": 0.010586491636671608, "grad_norm": 1.5643749597457373, "learning_rate": 3.5211267605633804e-06, "loss": 0.7491, "step": 125 }, { "epoch": 0.01067118356976498, "grad_norm": 1.540807371091072, "learning_rate": 3.549295774647888e-06, "loss": 0.7583, "step": 126 }, { "epoch": 0.010755875502858352, "grad_norm": 1.4471336506141044, "learning_rate": 3.577464788732395e-06, "loss": 0.7675, "step": 127 }, { "epoch": 0.010840567435951725, "grad_norm": 1.5263338231160575, "learning_rate": 3.6056338028169017e-06, "loss": 0.7155, "step": 128 }, { "epoch": 0.010925259369045098, "grad_norm": 0.9057479346131309, "learning_rate": 3.633802816901409e-06, "loss": 0.8646, "step": 129 }, { "epoch": 0.011009951302138471, "grad_norm": 1.3043018372916735, "learning_rate": 3.6619718309859158e-06, "loss": 0.7156, "step": 130 }, { "epoch": 0.011094643235231844, "grad_norm": 1.812040370280273, "learning_rate": 3.690140845070423e-06, "loss": 0.7654, "step": 131 }, { "epoch": 0.011179335168325217, "grad_norm": 8.94246249694414, "learning_rate": 3.71830985915493e-06, "loss": 0.7281, "step": 132 }, { "epoch": 0.01126402710141859, "grad_norm": 1.2389563097265808, "learning_rate": 3.746478873239437e-06, "loss": 0.755, "step": 133 }, { "epoch": 0.011348719034511963, "grad_norm": 1.277170864893811, "learning_rate": 3.774647887323944e-06, "loss": 0.7132, "step": 134 }, { "epoch": 0.011433410967605336, "grad_norm": 1.3422130417775253, "learning_rate": 3.8028169014084508e-06, "loss": 0.7841, "step": 135 }, { "epoch": 0.011518102900698709, "grad_norm": 2.026494463430119, "learning_rate": 3.830985915492958e-06, "loss": 0.724, "step": 136 }, { "epoch": 0.011602794833792082, "grad_norm": 1.5434914605339847, "learning_rate": 3.859154929577465e-06, "loss": 0.7445, "step": 137 }, { "epoch": 0.011687486766885455, "grad_norm": 1.30138861719516, "learning_rate": 3.887323943661972e-06, "loss": 0.7888, "step": 138 }, { "epoch": 0.011772178699978828, "grad_norm": 1.4152907286720775, "learning_rate": 3.915492957746479e-06, "loss": 0.7982, "step": 139 }, { "epoch": 0.011856870633072199, "grad_norm": 1.3250494049362662, "learning_rate": 3.943661971830986e-06, "loss": 0.7778, "step": 140 }, { "epoch": 0.011941562566165572, "grad_norm": 1.356356041393835, "learning_rate": 3.971830985915493e-06, "loss": 0.6824, "step": 141 }, { "epoch": 0.012026254499258945, "grad_norm": 1.3456324584967674, "learning_rate": 4.000000000000001e-06, "loss": 0.7076, "step": 142 }, { "epoch": 0.012110946432352318, "grad_norm": 1.2890062004338154, "learning_rate": 4.028169014084508e-06, "loss": 0.7387, "step": 143 }, { "epoch": 0.012195638365445691, "grad_norm": 0.8133760373128721, "learning_rate": 4.056338028169014e-06, "loss": 0.8383, "step": 144 }, { "epoch": 0.012280330298539064, "grad_norm": 0.7639124930947621, "learning_rate": 4.0845070422535216e-06, "loss": 0.8572, "step": 145 }, { "epoch": 0.012365022231632437, "grad_norm": 1.3868162677085427, "learning_rate": 4.112676056338029e-06, "loss": 0.6994, "step": 146 }, { "epoch": 0.01244971416472581, "grad_norm": 1.0971871093202274, "learning_rate": 4.140845070422535e-06, "loss": 0.7224, "step": 147 }, { "epoch": 0.012534406097819183, "grad_norm": 0.7134865034697839, "learning_rate": 4.1690140845070425e-06, "loss": 0.8461, "step": 148 }, { "epoch": 0.012619098030912556, "grad_norm": 1.3448044272752926, "learning_rate": 4.19718309859155e-06, "loss": 0.7343, "step": 149 }, { "epoch": 0.012703789964005929, "grad_norm": 1.3365744799676194, "learning_rate": 4.225352112676057e-06, "loss": 0.696, "step": 150 }, { "epoch": 0.012788481897099302, "grad_norm": 4.661079202565365, "learning_rate": 4.253521126760563e-06, "loss": 0.7243, "step": 151 }, { "epoch": 0.012873173830192675, "grad_norm": 1.3025894215199227, "learning_rate": 4.281690140845071e-06, "loss": 0.7061, "step": 152 }, { "epoch": 0.012957865763286046, "grad_norm": 1.204490595686934, "learning_rate": 4.309859154929578e-06, "loss": 0.7564, "step": 153 }, { "epoch": 0.01304255769637942, "grad_norm": 1.7835525497570253, "learning_rate": 4.338028169014084e-06, "loss": 0.6876, "step": 154 }, { "epoch": 0.013127249629472792, "grad_norm": 1.433645712366279, "learning_rate": 4.3661971830985915e-06, "loss": 0.7209, "step": 155 }, { "epoch": 0.013211941562566165, "grad_norm": 1.282863570679683, "learning_rate": 4.3943661971831e-06, "loss": 0.7733, "step": 156 }, { "epoch": 0.013296633495659538, "grad_norm": 1.336982937561033, "learning_rate": 4.422535211267606e-06, "loss": 0.8016, "step": 157 }, { "epoch": 0.013381325428752911, "grad_norm": 1.4451616584470033, "learning_rate": 4.450704225352113e-06, "loss": 0.7028, "step": 158 }, { "epoch": 0.013466017361846284, "grad_norm": 1.2224323452582062, "learning_rate": 4.4788732394366205e-06, "loss": 0.7029, "step": 159 }, { "epoch": 0.013550709294939657, "grad_norm": 0.71067414484853, "learning_rate": 4.507042253521127e-06, "loss": 0.8587, "step": 160 }, { "epoch": 0.01363540122803303, "grad_norm": 1.1225641587527733, "learning_rate": 4.535211267605634e-06, "loss": 0.6903, "step": 161 }, { "epoch": 0.013720093161126403, "grad_norm": 1.6772127558597838, "learning_rate": 4.5633802816901414e-06, "loss": 0.7808, "step": 162 }, { "epoch": 0.013804785094219776, "grad_norm": 1.3615841249845853, "learning_rate": 4.591549295774648e-06, "loss": 0.7405, "step": 163 }, { "epoch": 0.013889477027313149, "grad_norm": 1.263770418616889, "learning_rate": 4.619718309859155e-06, "loss": 0.7343, "step": 164 }, { "epoch": 0.013974168960406522, "grad_norm": 1.1863319248670516, "learning_rate": 4.647887323943662e-06, "loss": 0.6995, "step": 165 }, { "epoch": 0.014058860893499893, "grad_norm": 1.2251835603211065, "learning_rate": 4.67605633802817e-06, "loss": 0.7139, "step": 166 }, { "epoch": 0.014143552826593266, "grad_norm": 1.4781384613767956, "learning_rate": 4.704225352112676e-06, "loss": 0.6916, "step": 167 }, { "epoch": 0.01422824475968664, "grad_norm": 1.223410426270711, "learning_rate": 4.732394366197183e-06, "loss": 0.7354, "step": 168 }, { "epoch": 0.014312936692780012, "grad_norm": 1.6749017075753367, "learning_rate": 4.7605633802816905e-06, "loss": 0.7475, "step": 169 }, { "epoch": 0.014397628625873385, "grad_norm": 1.4300587323465321, "learning_rate": 4.788732394366197e-06, "loss": 0.7571, "step": 170 }, { "epoch": 0.014482320558966758, "grad_norm": 0.6576254215331655, "learning_rate": 4.816901408450705e-06, "loss": 0.8394, "step": 171 }, { "epoch": 0.014567012492060131, "grad_norm": 1.592180276234626, "learning_rate": 4.845070422535212e-06, "loss": 0.7783, "step": 172 }, { "epoch": 0.014651704425153504, "grad_norm": 1.341661140983864, "learning_rate": 4.873239436619719e-06, "loss": 0.7695, "step": 173 }, { "epoch": 0.014736396358246877, "grad_norm": 1.5788355949400832, "learning_rate": 4.901408450704226e-06, "loss": 0.7215, "step": 174 }, { "epoch": 0.01482108829134025, "grad_norm": 1.1943911176132835, "learning_rate": 4.929577464788733e-06, "loss": 0.7444, "step": 175 }, { "epoch": 0.014905780224433623, "grad_norm": 3.72985798258023, "learning_rate": 4.9577464788732395e-06, "loss": 0.732, "step": 176 }, { "epoch": 0.014990472157526996, "grad_norm": 1.4620762921457129, "learning_rate": 4.985915492957747e-06, "loss": 0.703, "step": 177 }, { "epoch": 0.015075164090620369, "grad_norm": 1.7179503042969952, "learning_rate": 5.014084507042254e-06, "loss": 0.6919, "step": 178 }, { "epoch": 0.015159856023713742, "grad_norm": 1.6541272869675407, "learning_rate": 5.042253521126761e-06, "loss": 0.6859, "step": 179 }, { "epoch": 0.015244547956807113, "grad_norm": 1.3048405965673848, "learning_rate": 5.070422535211268e-06, "loss": 0.7502, "step": 180 }, { "epoch": 0.015329239889900486, "grad_norm": 1.847920780506289, "learning_rate": 5.098591549295775e-06, "loss": 0.7009, "step": 181 }, { "epoch": 0.01541393182299386, "grad_norm": 1.5103282186564724, "learning_rate": 5.126760563380282e-06, "loss": 0.7114, "step": 182 }, { "epoch": 0.015498623756087232, "grad_norm": 1.6033456927775247, "learning_rate": 5.154929577464789e-06, "loss": 0.7147, "step": 183 }, { "epoch": 0.015583315689180605, "grad_norm": 0.6782820035945544, "learning_rate": 5.183098591549296e-06, "loss": 0.8343, "step": 184 }, { "epoch": 0.01566800762227398, "grad_norm": 2.7934687524199395, "learning_rate": 5.211267605633803e-06, "loss": 0.6883, "step": 185 }, { "epoch": 0.01575269955536735, "grad_norm": 1.3071740687129279, "learning_rate": 5.23943661971831e-06, "loss": 0.6857, "step": 186 }, { "epoch": 0.015837391488460723, "grad_norm": 1.1724288858385419, "learning_rate": 5.267605633802817e-06, "loss": 0.6706, "step": 187 }, { "epoch": 0.015922083421554097, "grad_norm": 1.5717943002958106, "learning_rate": 5.295774647887324e-06, "loss": 0.7233, "step": 188 }, { "epoch": 0.01600677535464747, "grad_norm": 0.6712742487705105, "learning_rate": 5.323943661971831e-06, "loss": 0.8373, "step": 189 }, { "epoch": 0.016091467287740843, "grad_norm": 1.3252736226898019, "learning_rate": 5.352112676056338e-06, "loss": 0.7263, "step": 190 }, { "epoch": 0.016176159220834214, "grad_norm": 1.6462597110122419, "learning_rate": 5.380281690140845e-06, "loss": 0.7602, "step": 191 }, { "epoch": 0.01626085115392759, "grad_norm": 2.393719172848473, "learning_rate": 5.408450704225352e-06, "loss": 0.7999, "step": 192 }, { "epoch": 0.01634554308702096, "grad_norm": 0.6622548941117262, "learning_rate": 5.43661971830986e-06, "loss": 0.7931, "step": 193 }, { "epoch": 0.016430235020114335, "grad_norm": 2.1783578990857055, "learning_rate": 5.4647887323943675e-06, "loss": 0.7214, "step": 194 }, { "epoch": 0.016514926953207706, "grad_norm": 1.3387201540164724, "learning_rate": 5.492957746478874e-06, "loss": 0.7392, "step": 195 }, { "epoch": 0.01659961888630108, "grad_norm": 1.3559472832530683, "learning_rate": 5.521126760563381e-06, "loss": 0.6949, "step": 196 }, { "epoch": 0.016684310819394452, "grad_norm": 3.162721578205358, "learning_rate": 5.549295774647888e-06, "loss": 0.7427, "step": 197 }, { "epoch": 0.016769002752487827, "grad_norm": 1.7962333826269505, "learning_rate": 5.577464788732395e-06, "loss": 0.7387, "step": 198 }, { "epoch": 0.0168536946855812, "grad_norm": 1.3413509726284694, "learning_rate": 5.605633802816902e-06, "loss": 0.7606, "step": 199 }, { "epoch": 0.01693838661867457, "grad_norm": 1.22549958073241, "learning_rate": 5.633802816901409e-06, "loss": 0.7074, "step": 200 }, { "epoch": 0.017023078551767944, "grad_norm": 1.5627456610129467, "learning_rate": 5.6619718309859165e-06, "loss": 0.6491, "step": 201 }, { "epoch": 0.017107770484861316, "grad_norm": 1.347192289304895, "learning_rate": 5.690140845070423e-06, "loss": 0.682, "step": 202 }, { "epoch": 0.01719246241795469, "grad_norm": 1.3617591109508074, "learning_rate": 5.71830985915493e-06, "loss": 0.7398, "step": 203 }, { "epoch": 0.01727715435104806, "grad_norm": 1.4025274926590923, "learning_rate": 5.7464788732394374e-06, "loss": 0.7556, "step": 204 }, { "epoch": 0.017361846284141436, "grad_norm": 1.4431372099188822, "learning_rate": 5.774647887323944e-06, "loss": 0.715, "step": 205 }, { "epoch": 0.017446538217234808, "grad_norm": 1.2723711257924764, "learning_rate": 5.802816901408451e-06, "loss": 0.7622, "step": 206 }, { "epoch": 0.017531230150328182, "grad_norm": 1.4944037255172853, "learning_rate": 5.830985915492958e-06, "loss": 0.7398, "step": 207 }, { "epoch": 0.017615922083421554, "grad_norm": 1.349272323015877, "learning_rate": 5.859154929577466e-06, "loss": 0.7407, "step": 208 }, { "epoch": 0.017700614016514928, "grad_norm": 1.5172263035309401, "learning_rate": 5.887323943661972e-06, "loss": 0.7174, "step": 209 }, { "epoch": 0.0177853059496083, "grad_norm": 1.1993054372418979, "learning_rate": 5.915492957746479e-06, "loss": 0.658, "step": 210 }, { "epoch": 0.017869997882701674, "grad_norm": 1.7878978159595509, "learning_rate": 5.9436619718309865e-06, "loss": 0.661, "step": 211 }, { "epoch": 0.017954689815795045, "grad_norm": 1.9693964093750043, "learning_rate": 5.971830985915493e-06, "loss": 0.719, "step": 212 }, { "epoch": 0.018039381748888417, "grad_norm": 1.4774706903672903, "learning_rate": 6e-06, "loss": 0.7163, "step": 213 }, { "epoch": 0.01812407368198179, "grad_norm": 1.5102711999434077, "learning_rate": 6.028169014084507e-06, "loss": 0.7097, "step": 214 }, { "epoch": 0.018208765615075163, "grad_norm": 0.7093493792373116, "learning_rate": 6.056338028169015e-06, "loss": 0.8553, "step": 215 }, { "epoch": 0.018293457548168537, "grad_norm": 1.3580626206800739, "learning_rate": 6.084507042253521e-06, "loss": 0.7108, "step": 216 }, { "epoch": 0.01837814948126191, "grad_norm": 0.6493492901066413, "learning_rate": 6.112676056338028e-06, "loss": 0.852, "step": 217 }, { "epoch": 0.018462841414355283, "grad_norm": 1.4118309828198434, "learning_rate": 6.1408450704225356e-06, "loss": 0.7161, "step": 218 }, { "epoch": 0.018547533347448655, "grad_norm": 1.7197388511754932, "learning_rate": 6.169014084507042e-06, "loss": 0.7905, "step": 219 }, { "epoch": 0.01863222528054203, "grad_norm": 2.172846313464195, "learning_rate": 6.197183098591549e-06, "loss": 0.6841, "step": 220 }, { "epoch": 0.0187169172136354, "grad_norm": 1.3868097147543599, "learning_rate": 6.2253521126760565e-06, "loss": 0.6852, "step": 221 }, { "epoch": 0.018801609146728775, "grad_norm": 1.222345696717358, "learning_rate": 6.2535211267605646e-06, "loss": 0.6995, "step": 222 }, { "epoch": 0.018886301079822147, "grad_norm": 1.3755190259004726, "learning_rate": 6.281690140845072e-06, "loss": 0.7144, "step": 223 }, { "epoch": 0.01897099301291552, "grad_norm": 0.7064876080052365, "learning_rate": 6.309859154929578e-06, "loss": 0.9062, "step": 224 }, { "epoch": 0.019055684946008893, "grad_norm": 1.3977567582071186, "learning_rate": 6.3380281690140855e-06, "loss": 0.7026, "step": 225 }, { "epoch": 0.019140376879102264, "grad_norm": 6.816847010033593, "learning_rate": 6.366197183098593e-06, "loss": 0.6283, "step": 226 }, { "epoch": 0.01922506881219564, "grad_norm": 1.4152764690385213, "learning_rate": 6.394366197183099e-06, "loss": 0.6644, "step": 227 }, { "epoch": 0.01930976074528901, "grad_norm": 1.3512636325718135, "learning_rate": 6.422535211267606e-06, "loss": 0.6711, "step": 228 }, { "epoch": 0.019394452678382385, "grad_norm": 1.8997047741350805, "learning_rate": 6.450704225352114e-06, "loss": 0.7559, "step": 229 }, { "epoch": 0.019479144611475756, "grad_norm": 0.5999907478093204, "learning_rate": 6.478873239436621e-06, "loss": 0.8224, "step": 230 }, { "epoch": 0.01956383654456913, "grad_norm": 1.3088150500058764, "learning_rate": 6.507042253521127e-06, "loss": 0.6837, "step": 231 }, { "epoch": 0.019648528477662502, "grad_norm": 1.300415995595563, "learning_rate": 6.5352112676056345e-06, "loss": 0.6542, "step": 232 }, { "epoch": 0.019733220410755876, "grad_norm": 1.5884249553788838, "learning_rate": 6.563380281690142e-06, "loss": 0.7355, "step": 233 }, { "epoch": 0.019817912343849248, "grad_norm": 1.7790177165346772, "learning_rate": 6.591549295774648e-06, "loss": 0.7518, "step": 234 }, { "epoch": 0.019902604276942622, "grad_norm": 0.6174483472698002, "learning_rate": 6.619718309859155e-06, "loss": 0.8427, "step": 235 }, { "epoch": 0.019987296210035994, "grad_norm": 1.3127529708344239, "learning_rate": 6.647887323943663e-06, "loss": 0.68, "step": 236 }, { "epoch": 0.02007198814312937, "grad_norm": 1.8096123357554021, "learning_rate": 6.67605633802817e-06, "loss": 0.7038, "step": 237 }, { "epoch": 0.02015668007622274, "grad_norm": 1.2935722256025763, "learning_rate": 6.704225352112676e-06, "loss": 0.6587, "step": 238 }, { "epoch": 0.02024137200931611, "grad_norm": 1.734926163610349, "learning_rate": 6.7323943661971836e-06, "loss": 0.6912, "step": 239 }, { "epoch": 0.020326063942409486, "grad_norm": 1.1538268430130212, "learning_rate": 6.760563380281691e-06, "loss": 0.7088, "step": 240 }, { "epoch": 0.020410755875502857, "grad_norm": 1.5784030041174513, "learning_rate": 6.788732394366197e-06, "loss": 0.7222, "step": 241 }, { "epoch": 0.02049544780859623, "grad_norm": 1.7622747273536088, "learning_rate": 6.8169014084507045e-06, "loss": 0.7038, "step": 242 }, { "epoch": 0.020580139741689603, "grad_norm": 1.6818627365332646, "learning_rate": 6.845070422535212e-06, "loss": 0.7103, "step": 243 }, { "epoch": 0.020664831674782978, "grad_norm": 2.6809890968551255, "learning_rate": 6.873239436619719e-06, "loss": 0.7294, "step": 244 }, { "epoch": 0.02074952360787635, "grad_norm": 1.6332098007878306, "learning_rate": 6.901408450704225e-06, "loss": 0.6806, "step": 245 }, { "epoch": 0.020834215540969724, "grad_norm": 1.1660458548147454, "learning_rate": 6.929577464788733e-06, "loss": 0.7499, "step": 246 }, { "epoch": 0.020918907474063095, "grad_norm": 0.6723319908212276, "learning_rate": 6.95774647887324e-06, "loss": 0.8791, "step": 247 }, { "epoch": 0.02100359940715647, "grad_norm": 1.315818658500149, "learning_rate": 6.985915492957746e-06, "loss": 0.723, "step": 248 }, { "epoch": 0.02108829134024984, "grad_norm": 1.3754652857360536, "learning_rate": 7.0140845070422535e-06, "loss": 0.7731, "step": 249 }, { "epoch": 0.021172983273343216, "grad_norm": 1.526206744733635, "learning_rate": 7.042253521126761e-06, "loss": 0.739, "step": 250 }, { "epoch": 0.021257675206436587, "grad_norm": 1.6146085034751312, "learning_rate": 7.070422535211268e-06, "loss": 0.6804, "step": 251 }, { "epoch": 0.02134236713952996, "grad_norm": 1.482689365386518, "learning_rate": 7.098591549295776e-06, "loss": 0.6799, "step": 252 }, { "epoch": 0.021427059072623333, "grad_norm": 1.3993293509473033, "learning_rate": 7.1267605633802825e-06, "loss": 0.6787, "step": 253 }, { "epoch": 0.021511751005716704, "grad_norm": 1.2081669311233227, "learning_rate": 7.15492957746479e-06, "loss": 0.744, "step": 254 }, { "epoch": 0.02159644293881008, "grad_norm": 1.4913709974567186, "learning_rate": 7.183098591549297e-06, "loss": 0.6827, "step": 255 }, { "epoch": 0.02168113487190345, "grad_norm": 1.2203973179310232, "learning_rate": 7.211267605633803e-06, "loss": 0.7364, "step": 256 }, { "epoch": 0.021765826804996825, "grad_norm": 1.551849910698067, "learning_rate": 7.239436619718311e-06, "loss": 0.713, "step": 257 }, { "epoch": 0.021850518738090196, "grad_norm": 1.3625585796652404, "learning_rate": 7.267605633802818e-06, "loss": 0.7175, "step": 258 }, { "epoch": 0.02193521067118357, "grad_norm": 1.6330179955867523, "learning_rate": 7.295774647887325e-06, "loss": 0.7399, "step": 259 }, { "epoch": 0.022019902604276942, "grad_norm": 1.3058319598059198, "learning_rate": 7.3239436619718316e-06, "loss": 0.731, "step": 260 }, { "epoch": 0.022104594537370317, "grad_norm": 1.3175293425688746, "learning_rate": 7.352112676056339e-06, "loss": 0.7364, "step": 261 }, { "epoch": 0.022189286470463688, "grad_norm": 1.4074604494965348, "learning_rate": 7.380281690140846e-06, "loss": 0.7045, "step": 262 }, { "epoch": 0.022273978403557063, "grad_norm": 1.5726450054321564, "learning_rate": 7.4084507042253525e-06, "loss": 0.6687, "step": 263 }, { "epoch": 0.022358670336650434, "grad_norm": 1.545258345236008, "learning_rate": 7.43661971830986e-06, "loss": 0.6531, "step": 264 }, { "epoch": 0.02244336226974381, "grad_norm": 1.3704251861700345, "learning_rate": 7.464788732394367e-06, "loss": 0.6162, "step": 265 }, { "epoch": 0.02252805420283718, "grad_norm": 1.6305519342852983, "learning_rate": 7.492957746478874e-06, "loss": 0.7083, "step": 266 }, { "epoch": 0.02261274613593055, "grad_norm": 1.436136857137381, "learning_rate": 7.521126760563381e-06, "loss": 0.7229, "step": 267 }, { "epoch": 0.022697438069023926, "grad_norm": 1.6987443162711282, "learning_rate": 7.549295774647888e-06, "loss": 0.7772, "step": 268 }, { "epoch": 0.022782130002117297, "grad_norm": 2.140318407277499, "learning_rate": 7.577464788732395e-06, "loss": 0.7497, "step": 269 }, { "epoch": 0.022866821935210672, "grad_norm": 1.3666118059666148, "learning_rate": 7.6056338028169015e-06, "loss": 0.7062, "step": 270 }, { "epoch": 0.022951513868304043, "grad_norm": 1.2843227494835907, "learning_rate": 7.633802816901409e-06, "loss": 0.704, "step": 271 }, { "epoch": 0.023036205801397418, "grad_norm": 1.370943655537785, "learning_rate": 7.661971830985916e-06, "loss": 0.7318, "step": 272 }, { "epoch": 0.02312089773449079, "grad_norm": 1.3609747742986131, "learning_rate": 7.690140845070423e-06, "loss": 0.7287, "step": 273 }, { "epoch": 0.023205589667584164, "grad_norm": 1.328222190199093, "learning_rate": 7.71830985915493e-06, "loss": 0.7332, "step": 274 }, { "epoch": 0.023290281600677535, "grad_norm": 1.3291067393843339, "learning_rate": 7.746478873239436e-06, "loss": 0.7226, "step": 275 }, { "epoch": 0.02337497353377091, "grad_norm": 1.227388224117603, "learning_rate": 7.774647887323943e-06, "loss": 0.6323, "step": 276 }, { "epoch": 0.02345966546686428, "grad_norm": 1.2793057433441588, "learning_rate": 7.80281690140845e-06, "loss": 0.7015, "step": 277 }, { "epoch": 0.023544357399957656, "grad_norm": 1.4023110320850667, "learning_rate": 7.830985915492958e-06, "loss": 0.7198, "step": 278 }, { "epoch": 0.023629049333051027, "grad_norm": 1.5425286860549094, "learning_rate": 7.859154929577465e-06, "loss": 0.7348, "step": 279 }, { "epoch": 0.023713741266144398, "grad_norm": 0.6272630322945952, "learning_rate": 7.887323943661972e-06, "loss": 0.8217, "step": 280 }, { "epoch": 0.023798433199237773, "grad_norm": 1.2269432766642032, "learning_rate": 7.91549295774648e-06, "loss": 0.711, "step": 281 }, { "epoch": 0.023883125132331144, "grad_norm": 0.6468130852614125, "learning_rate": 7.943661971830987e-06, "loss": 0.8763, "step": 282 }, { "epoch": 0.02396781706542452, "grad_norm": 1.2270800608107597, "learning_rate": 7.971830985915494e-06, "loss": 0.69, "step": 283 }, { "epoch": 0.02405250899851789, "grad_norm": 1.4958861208407148, "learning_rate": 8.000000000000001e-06, "loss": 0.6875, "step": 284 }, { "epoch": 0.024137200931611265, "grad_norm": 1.322732389337291, "learning_rate": 8.028169014084509e-06, "loss": 0.7691, "step": 285 }, { "epoch": 0.024221892864704636, "grad_norm": 0.6502576118786113, "learning_rate": 8.056338028169016e-06, "loss": 0.8291, "step": 286 }, { "epoch": 0.02430658479779801, "grad_norm": 1.2528040472515667, "learning_rate": 8.084507042253521e-06, "loss": 0.6977, "step": 287 }, { "epoch": 0.024391276730891382, "grad_norm": 1.5905082480043435, "learning_rate": 8.112676056338029e-06, "loss": 0.6844, "step": 288 }, { "epoch": 0.024475968663984757, "grad_norm": 1.261061378179289, "learning_rate": 8.140845070422536e-06, "loss": 0.7558, "step": 289 }, { "epoch": 0.024560660597078128, "grad_norm": 1.5988397638265057, "learning_rate": 8.169014084507043e-06, "loss": 0.6948, "step": 290 }, { "epoch": 0.024645352530171503, "grad_norm": 1.2162309155062714, "learning_rate": 8.19718309859155e-06, "loss": 0.7256, "step": 291 }, { "epoch": 0.024730044463264874, "grad_norm": 1.4604784613608393, "learning_rate": 8.225352112676058e-06, "loss": 0.6928, "step": 292 }, { "epoch": 0.024814736396358245, "grad_norm": 0.651755455946981, "learning_rate": 8.253521126760565e-06, "loss": 0.8773, "step": 293 }, { "epoch": 0.02489942832945162, "grad_norm": 1.2790439257076796, "learning_rate": 8.28169014084507e-06, "loss": 0.6682, "step": 294 }, { "epoch": 0.02498412026254499, "grad_norm": 1.3869097657429894, "learning_rate": 8.309859154929578e-06, "loss": 0.7105, "step": 295 }, { "epoch": 0.025068812195638366, "grad_norm": 1.8540940624564892, "learning_rate": 8.338028169014085e-06, "loss": 0.7243, "step": 296 }, { "epoch": 0.025153504128731737, "grad_norm": 0.6819082624680487, "learning_rate": 8.366197183098592e-06, "loss": 0.8413, "step": 297 }, { "epoch": 0.025238196061825112, "grad_norm": 1.2549808315831927, "learning_rate": 8.3943661971831e-06, "loss": 0.6728, "step": 298 }, { "epoch": 0.025322887994918483, "grad_norm": 1.4477713389205504, "learning_rate": 8.422535211267607e-06, "loss": 0.7159, "step": 299 }, { "epoch": 0.025407579928011858, "grad_norm": 1.9286991055192055, "learning_rate": 8.450704225352114e-06, "loss": 0.6965, "step": 300 }, { "epoch": 0.02549227186110523, "grad_norm": 1.5293071967787684, "learning_rate": 8.47887323943662e-06, "loss": 0.6848, "step": 301 }, { "epoch": 0.025576963794198604, "grad_norm": 1.4300534356418593, "learning_rate": 8.507042253521127e-06, "loss": 0.7422, "step": 302 }, { "epoch": 0.025661655727291975, "grad_norm": 1.6612196277735862, "learning_rate": 8.535211267605634e-06, "loss": 0.6718, "step": 303 }, { "epoch": 0.02574634766038535, "grad_norm": 1.4315142743489142, "learning_rate": 8.563380281690141e-06, "loss": 0.6586, "step": 304 }, { "epoch": 0.02583103959347872, "grad_norm": 1.1953435167541764, "learning_rate": 8.591549295774648e-06, "loss": 0.6885, "step": 305 }, { "epoch": 0.025915731526572092, "grad_norm": 1.1018125490830117, "learning_rate": 8.619718309859156e-06, "loss": 0.6683, "step": 306 }, { "epoch": 0.026000423459665467, "grad_norm": 1.2635886833970114, "learning_rate": 8.647887323943663e-06, "loss": 0.6875, "step": 307 }, { "epoch": 0.02608511539275884, "grad_norm": 1.3906772774392762, "learning_rate": 8.676056338028169e-06, "loss": 0.7369, "step": 308 }, { "epoch": 0.026169807325852213, "grad_norm": 1.812596824789902, "learning_rate": 8.704225352112676e-06, "loss": 0.6788, "step": 309 }, { "epoch": 0.026254499258945584, "grad_norm": 1.4629848965958736, "learning_rate": 8.732394366197183e-06, "loss": 0.6994, "step": 310 }, { "epoch": 0.02633919119203896, "grad_norm": 1.5156913238381353, "learning_rate": 8.760563380281692e-06, "loss": 0.6706, "step": 311 }, { "epoch": 0.02642388312513233, "grad_norm": 0.6852616006482886, "learning_rate": 8.7887323943662e-06, "loss": 0.8881, "step": 312 }, { "epoch": 0.026508575058225705, "grad_norm": 1.9426858894378214, "learning_rate": 8.816901408450705e-06, "loss": 0.7199, "step": 313 }, { "epoch": 0.026593266991319076, "grad_norm": 1.585950381510644, "learning_rate": 8.845070422535212e-06, "loss": 0.6909, "step": 314 }, { "epoch": 0.02667795892441245, "grad_norm": 1.5103586497161199, "learning_rate": 8.87323943661972e-06, "loss": 0.6921, "step": 315 }, { "epoch": 0.026762650857505822, "grad_norm": 1.2354643771323144, "learning_rate": 8.901408450704227e-06, "loss": 0.6607, "step": 316 }, { "epoch": 0.026847342790599197, "grad_norm": 0.6359398440471887, "learning_rate": 8.929577464788734e-06, "loss": 0.8546, "step": 317 }, { "epoch": 0.026932034723692568, "grad_norm": 2.0765069413607797, "learning_rate": 8.957746478873241e-06, "loss": 0.7372, "step": 318 }, { "epoch": 0.02701672665678594, "grad_norm": 1.3645276324391755, "learning_rate": 8.985915492957748e-06, "loss": 0.674, "step": 319 }, { "epoch": 0.027101418589879314, "grad_norm": 1.3503513730799637, "learning_rate": 9.014084507042254e-06, "loss": 0.7358, "step": 320 }, { "epoch": 0.027186110522972685, "grad_norm": 1.3142271976754192, "learning_rate": 9.042253521126761e-06, "loss": 0.6118, "step": 321 }, { "epoch": 0.02727080245606606, "grad_norm": 1.181630438577719, "learning_rate": 9.070422535211268e-06, "loss": 0.7063, "step": 322 }, { "epoch": 0.02735549438915943, "grad_norm": 1.6922582185396629, "learning_rate": 9.098591549295776e-06, "loss": 0.6531, "step": 323 }, { "epoch": 0.027440186322252806, "grad_norm": 1.3023629122843694, "learning_rate": 9.126760563380283e-06, "loss": 0.7391, "step": 324 }, { "epoch": 0.027524878255346177, "grad_norm": 1.5344056519294451, "learning_rate": 9.15492957746479e-06, "loss": 0.7312, "step": 325 }, { "epoch": 0.027609570188439552, "grad_norm": 1.4305529343357213, "learning_rate": 9.183098591549296e-06, "loss": 0.7043, "step": 326 }, { "epoch": 0.027694262121532923, "grad_norm": 1.3039961724192848, "learning_rate": 9.211267605633803e-06, "loss": 0.7015, "step": 327 }, { "epoch": 0.027778954054626298, "grad_norm": 1.343296354159552, "learning_rate": 9.23943661971831e-06, "loss": 0.7367, "step": 328 }, { "epoch": 0.02786364598771967, "grad_norm": 1.4844024409993584, "learning_rate": 9.267605633802817e-06, "loss": 0.7844, "step": 329 }, { "epoch": 0.027948337920813044, "grad_norm": 1.534992274135166, "learning_rate": 9.295774647887325e-06, "loss": 0.7378, "step": 330 }, { "epoch": 0.028033029853906415, "grad_norm": 1.4441202641707938, "learning_rate": 9.323943661971832e-06, "loss": 0.694, "step": 331 }, { "epoch": 0.028117721786999787, "grad_norm": 1.1536791833585136, "learning_rate": 9.35211267605634e-06, "loss": 0.6569, "step": 332 }, { "epoch": 0.02820241372009316, "grad_norm": 1.4693527666253312, "learning_rate": 9.380281690140845e-06, "loss": 0.6896, "step": 333 }, { "epoch": 0.028287105653186533, "grad_norm": 1.5040827398151195, "learning_rate": 9.408450704225352e-06, "loss": 0.7428, "step": 334 }, { "epoch": 0.028371797586279907, "grad_norm": 1.47908881667025, "learning_rate": 9.43661971830986e-06, "loss": 0.7068, "step": 335 }, { "epoch": 0.02845648951937328, "grad_norm": 1.5064831455314798, "learning_rate": 9.464788732394366e-06, "loss": 0.7108, "step": 336 }, { "epoch": 0.028541181452466653, "grad_norm": 1.490372250384717, "learning_rate": 9.492957746478874e-06, "loss": 0.7149, "step": 337 }, { "epoch": 0.028625873385560024, "grad_norm": 1.7081063287968268, "learning_rate": 9.521126760563381e-06, "loss": 0.7076, "step": 338 }, { "epoch": 0.0287105653186534, "grad_norm": 1.483582863745492, "learning_rate": 9.549295774647888e-06, "loss": 0.7177, "step": 339 }, { "epoch": 0.02879525725174677, "grad_norm": 1.8763763619809048, "learning_rate": 9.577464788732394e-06, "loss": 0.6511, "step": 340 }, { "epoch": 0.028879949184840145, "grad_norm": 1.38466376502316, "learning_rate": 9.605633802816903e-06, "loss": 0.7733, "step": 341 }, { "epoch": 0.028964641117933516, "grad_norm": 2.960670862497645, "learning_rate": 9.63380281690141e-06, "loss": 0.7266, "step": 342 }, { "epoch": 0.02904933305102689, "grad_norm": 1.3885714971346552, "learning_rate": 9.661971830985917e-06, "loss": 0.7116, "step": 343 }, { "epoch": 0.029134024984120262, "grad_norm": 1.3367283005028534, "learning_rate": 9.690140845070424e-06, "loss": 0.7253, "step": 344 }, { "epoch": 0.029218716917213637, "grad_norm": 1.2831520133620735, "learning_rate": 9.71830985915493e-06, "loss": 0.7375, "step": 345 }, { "epoch": 0.02930340885030701, "grad_norm": 1.2103657881082226, "learning_rate": 9.746478873239437e-06, "loss": 0.7373, "step": 346 }, { "epoch": 0.02938810078340038, "grad_norm": 1.860004275888134, "learning_rate": 9.774647887323945e-06, "loss": 0.7763, "step": 347 }, { "epoch": 0.029472792716493754, "grad_norm": 1.5682504426059638, "learning_rate": 9.802816901408452e-06, "loss": 0.6531, "step": 348 }, { "epoch": 0.029557484649587126, "grad_norm": 1.4649789125848265, "learning_rate": 9.830985915492959e-06, "loss": 0.6934, "step": 349 }, { "epoch": 0.0296421765826805, "grad_norm": 0.6937232522381515, "learning_rate": 9.859154929577466e-06, "loss": 0.9199, "step": 350 }, { "epoch": 0.02972686851577387, "grad_norm": 2.0539374036245563, "learning_rate": 9.887323943661974e-06, "loss": 0.7497, "step": 351 }, { "epoch": 0.029811560448867246, "grad_norm": 1.2775905046840847, "learning_rate": 9.915492957746479e-06, "loss": 0.72, "step": 352 }, { "epoch": 0.029896252381960618, "grad_norm": 1.3724857878624506, "learning_rate": 9.943661971830986e-06, "loss": 0.7056, "step": 353 }, { "epoch": 0.029980944315053992, "grad_norm": 1.3436991581496602, "learning_rate": 9.971830985915494e-06, "loss": 0.7297, "step": 354 }, { "epoch": 0.030065636248147364, "grad_norm": 1.3915888817213593, "learning_rate": 1e-05, "loss": 0.7437, "step": 355 }, { "epoch": 0.030150328181240738, "grad_norm": 1.7668178642256598, "learning_rate": 9.999999811861762e-06, "loss": 0.6604, "step": 356 }, { "epoch": 0.03023502011433411, "grad_norm": 0.5986626283490132, "learning_rate": 9.999999247447063e-06, "loss": 0.8339, "step": 357 }, { "epoch": 0.030319712047427484, "grad_norm": 2.0009457863984044, "learning_rate": 9.999998306755942e-06, "loss": 0.6532, "step": 358 }, { "epoch": 0.030404403980520855, "grad_norm": 2.601389278951494, "learning_rate": 9.999996989788473e-06, "loss": 0.6781, "step": 359 }, { "epoch": 0.030489095913614227, "grad_norm": 1.4681707578073993, "learning_rate": 9.999995296544756e-06, "loss": 0.6696, "step": 360 }, { "epoch": 0.0305737878467076, "grad_norm": 1.3643449608697167, "learning_rate": 9.999993227024916e-06, "loss": 0.7193, "step": 361 }, { "epoch": 0.030658479779800973, "grad_norm": 2.7075348764008136, "learning_rate": 9.999990781229107e-06, "loss": 0.6198, "step": 362 }, { "epoch": 0.030743171712894347, "grad_norm": 1.6153480659983674, "learning_rate": 9.999987959157518e-06, "loss": 0.7002, "step": 363 }, { "epoch": 0.03082786364598772, "grad_norm": 1.3137005920748117, "learning_rate": 9.999984760810357e-06, "loss": 0.6212, "step": 364 }, { "epoch": 0.030912555579081093, "grad_norm": 1.297609428088413, "learning_rate": 9.999981186187868e-06, "loss": 0.6743, "step": 365 }, { "epoch": 0.030997247512174465, "grad_norm": 0.6523530170845508, "learning_rate": 9.99997723529032e-06, "loss": 0.8435, "step": 366 }, { "epoch": 0.03108193944526784, "grad_norm": 1.8048827104500211, "learning_rate": 9.999972908118006e-06, "loss": 0.7336, "step": 367 }, { "epoch": 0.03116663137836121, "grad_norm": 2.3797608588377157, "learning_rate": 9.999968204671256e-06, "loss": 0.6861, "step": 368 }, { "epoch": 0.03125132331145458, "grad_norm": 1.3048682083444387, "learning_rate": 9.999963124950422e-06, "loss": 0.7004, "step": 369 }, { "epoch": 0.03133601524454796, "grad_norm": 1.510070103035014, "learning_rate": 9.999957668955888e-06, "loss": 0.6935, "step": 370 }, { "epoch": 0.03142070717764133, "grad_norm": 1.4721454967773246, "learning_rate": 9.999951836688063e-06, "loss": 0.7069, "step": 371 }, { "epoch": 0.0315053991107347, "grad_norm": 1.786245483290481, "learning_rate": 9.999945628147384e-06, "loss": 0.7077, "step": 372 }, { "epoch": 0.031590091043828074, "grad_norm": 1.3568113911596913, "learning_rate": 9.999939043334323e-06, "loss": 0.7292, "step": 373 }, { "epoch": 0.031674782976921445, "grad_norm": 2.1766715079564185, "learning_rate": 9.999932082249372e-06, "loss": 0.6831, "step": 374 }, { "epoch": 0.03175947491001482, "grad_norm": 1.171214674321602, "learning_rate": 9.999924744893057e-06, "loss": 0.6079, "step": 375 }, { "epoch": 0.031844166843108195, "grad_norm": 1.6617791578462413, "learning_rate": 9.999917031265928e-06, "loss": 0.6899, "step": 376 }, { "epoch": 0.031928858776201566, "grad_norm": 4.15548661639251, "learning_rate": 9.999908941368566e-06, "loss": 0.7058, "step": 377 }, { "epoch": 0.03201355070929494, "grad_norm": 1.3314982286853303, "learning_rate": 9.999900475201581e-06, "loss": 0.6583, "step": 378 }, { "epoch": 0.032098242642388315, "grad_norm": 1.7622836417022287, "learning_rate": 9.99989163276561e-06, "loss": 0.7251, "step": 379 }, { "epoch": 0.032182934575481686, "grad_norm": 1.2125360952444673, "learning_rate": 9.999882414061319e-06, "loss": 0.6719, "step": 380 }, { "epoch": 0.03226762650857506, "grad_norm": 1.2928593520647473, "learning_rate": 9.999872819089399e-06, "loss": 0.6952, "step": 381 }, { "epoch": 0.03235231844166843, "grad_norm": 1.2236962230041326, "learning_rate": 9.999862847850575e-06, "loss": 0.64, "step": 382 }, { "epoch": 0.03243701037476181, "grad_norm": 1.3818181228100146, "learning_rate": 9.999852500345595e-06, "loss": 0.6748, "step": 383 }, { "epoch": 0.03252170230785518, "grad_norm": 1.2762986530762988, "learning_rate": 9.999841776575238e-06, "loss": 0.6778, "step": 384 }, { "epoch": 0.03260639424094855, "grad_norm": 1.2255128370503587, "learning_rate": 9.999830676540315e-06, "loss": 0.6804, "step": 385 }, { "epoch": 0.03269108617404192, "grad_norm": 1.596063782361363, "learning_rate": 9.999819200241656e-06, "loss": 0.7235, "step": 386 }, { "epoch": 0.03277577810713529, "grad_norm": 1.351489404635431, "learning_rate": 9.999807347680126e-06, "loss": 0.7293, "step": 387 }, { "epoch": 0.03286047004022867, "grad_norm": 0.6066796966536746, "learning_rate": 9.999795118856619e-06, "loss": 0.8949, "step": 388 }, { "epoch": 0.03294516197332204, "grad_norm": 2.1884962742736738, "learning_rate": 9.999782513772055e-06, "loss": 0.7101, "step": 389 }, { "epoch": 0.03302985390641541, "grad_norm": 1.8608755587888055, "learning_rate": 9.999769532427381e-06, "loss": 0.7271, "step": 390 }, { "epoch": 0.033114545839508784, "grad_norm": 1.288313852056057, "learning_rate": 9.999756174823574e-06, "loss": 0.6692, "step": 391 }, { "epoch": 0.03319923777260216, "grad_norm": 2.223623826165453, "learning_rate": 9.999742440961642e-06, "loss": 0.7066, "step": 392 }, { "epoch": 0.033283929705695534, "grad_norm": 1.3891256693705778, "learning_rate": 9.999728330842614e-06, "loss": 0.7211, "step": 393 }, { "epoch": 0.033368621638788905, "grad_norm": 1.745539167964889, "learning_rate": 9.999713844467556e-06, "loss": 0.7018, "step": 394 }, { "epoch": 0.033453313571882276, "grad_norm": 1.4881488086778545, "learning_rate": 9.999698981837556e-06, "loss": 0.7409, "step": 395 }, { "epoch": 0.033538005504975654, "grad_norm": 1.7234470354441747, "learning_rate": 9.999683742953732e-06, "loss": 0.7668, "step": 396 }, { "epoch": 0.033622697438069025, "grad_norm": 1.3362247200833128, "learning_rate": 9.999668127817232e-06, "loss": 0.6753, "step": 397 }, { "epoch": 0.0337073893711624, "grad_norm": 2.321450990676444, "learning_rate": 9.999652136429232e-06, "loss": 0.6232, "step": 398 }, { "epoch": 0.03379208130425577, "grad_norm": 1.386119280454937, "learning_rate": 9.999635768790934e-06, "loss": 0.6995, "step": 399 }, { "epoch": 0.03387677323734914, "grad_norm": 1.4952378467500913, "learning_rate": 9.999619024903569e-06, "loss": 0.7168, "step": 400 }, { "epoch": 0.03396146517044252, "grad_norm": 1.3178463541009389, "learning_rate": 9.9996019047684e-06, "loss": 0.6675, "step": 401 }, { "epoch": 0.03404615710353589, "grad_norm": 1.5166978902138497, "learning_rate": 9.999584408386711e-06, "loss": 0.6899, "step": 402 }, { "epoch": 0.03413084903662926, "grad_norm": 0.6651924833620746, "learning_rate": 9.999566535759822e-06, "loss": 0.8552, "step": 403 }, { "epoch": 0.03421554096972263, "grad_norm": 1.7377302264644683, "learning_rate": 9.999548286889078e-06, "loss": 0.714, "step": 404 }, { "epoch": 0.03430023290281601, "grad_norm": 1.3330381039131316, "learning_rate": 9.999529661775851e-06, "loss": 0.6484, "step": 405 }, { "epoch": 0.03438492483590938, "grad_norm": 0.623710531953404, "learning_rate": 9.999510660421545e-06, "loss": 0.8365, "step": 406 }, { "epoch": 0.03446961676900275, "grad_norm": 1.4812748333577799, "learning_rate": 9.999491282827588e-06, "loss": 0.6729, "step": 407 }, { "epoch": 0.03455430870209612, "grad_norm": 1.3899977662288325, "learning_rate": 9.999471528995435e-06, "loss": 0.6399, "step": 408 }, { "epoch": 0.0346390006351895, "grad_norm": 1.5785917607983806, "learning_rate": 9.999451398926578e-06, "loss": 0.7418, "step": 409 }, { "epoch": 0.03472369256828287, "grad_norm": 1.33779408276683, "learning_rate": 9.99943089262253e-06, "loss": 0.6746, "step": 410 }, { "epoch": 0.034808384501376244, "grad_norm": 2.299389253123296, "learning_rate": 9.999410010084834e-06, "loss": 0.7661, "step": 411 }, { "epoch": 0.034893076434469615, "grad_norm": 2.0212630352836642, "learning_rate": 9.999388751315063e-06, "loss": 0.7488, "step": 412 }, { "epoch": 0.034977768367562986, "grad_norm": 1.7454378958153132, "learning_rate": 9.999367116314813e-06, "loss": 0.7664, "step": 413 }, { "epoch": 0.035062460300656365, "grad_norm": 1.842138768859292, "learning_rate": 9.999345105085715e-06, "loss": 0.7014, "step": 414 }, { "epoch": 0.035147152233749736, "grad_norm": 3.469144399600082, "learning_rate": 9.999322717629428e-06, "loss": 0.7148, "step": 415 }, { "epoch": 0.03523184416684311, "grad_norm": 1.4455771520675367, "learning_rate": 9.99929995394763e-06, "loss": 0.6874, "step": 416 }, { "epoch": 0.03531653609993648, "grad_norm": 1.2271146792461658, "learning_rate": 9.99927681404204e-06, "loss": 0.6386, "step": 417 }, { "epoch": 0.035401228033029856, "grad_norm": 1.173718570244764, "learning_rate": 9.999253297914397e-06, "loss": 0.7432, "step": 418 }, { "epoch": 0.03548591996612323, "grad_norm": 1.6883640364711627, "learning_rate": 9.99922940556647e-06, "loss": 0.6951, "step": 419 }, { "epoch": 0.0355706118992166, "grad_norm": 1.2460389953560573, "learning_rate": 9.999205137000059e-06, "loss": 0.7175, "step": 420 }, { "epoch": 0.03565530383230997, "grad_norm": 1.2949479019876133, "learning_rate": 9.99918049221699e-06, "loss": 0.723, "step": 421 }, { "epoch": 0.03573999576540335, "grad_norm": 0.7331583433106269, "learning_rate": 9.999155471219115e-06, "loss": 0.9028, "step": 422 }, { "epoch": 0.03582468769849672, "grad_norm": 1.4888136959701548, "learning_rate": 9.99913007400832e-06, "loss": 0.6924, "step": 423 }, { "epoch": 0.03590937963159009, "grad_norm": 1.7652578320740362, "learning_rate": 9.999104300586517e-06, "loss": 0.6722, "step": 424 }, { "epoch": 0.03599407156468346, "grad_norm": 1.4814511095707923, "learning_rate": 9.999078150955642e-06, "loss": 0.7142, "step": 425 }, { "epoch": 0.03607876349777683, "grad_norm": 1.2931356047778952, "learning_rate": 9.999051625117665e-06, "loss": 0.7989, "step": 426 }, { "epoch": 0.03616345543087021, "grad_norm": 1.5623858405296753, "learning_rate": 9.999024723074582e-06, "loss": 0.6836, "step": 427 }, { "epoch": 0.03624814736396358, "grad_norm": 0.637865153034139, "learning_rate": 9.998997444828418e-06, "loss": 0.82, "step": 428 }, { "epoch": 0.036332839297056954, "grad_norm": 1.4801387024816983, "learning_rate": 9.998969790381226e-06, "loss": 0.6614, "step": 429 }, { "epoch": 0.036417531230150325, "grad_norm": 1.3623277486035792, "learning_rate": 9.998941759735086e-06, "loss": 0.6963, "step": 430 }, { "epoch": 0.036502223163243704, "grad_norm": 1.3886819370826717, "learning_rate": 9.998913352892107e-06, "loss": 0.7139, "step": 431 }, { "epoch": 0.036586915096337075, "grad_norm": 0.6804219614864264, "learning_rate": 9.998884569854429e-06, "loss": 0.8481, "step": 432 }, { "epoch": 0.036671607029430446, "grad_norm": 1.3850598615403824, "learning_rate": 9.998855410624216e-06, "loss": 0.685, "step": 433 }, { "epoch": 0.03675629896252382, "grad_norm": 1.390833158210276, "learning_rate": 9.998825875203662e-06, "loss": 0.7176, "step": 434 }, { "epoch": 0.036840990895617196, "grad_norm": 1.8045186733445082, "learning_rate": 9.998795963594992e-06, "loss": 0.648, "step": 435 }, { "epoch": 0.03692568282871057, "grad_norm": 1.7067991247969647, "learning_rate": 9.998765675800455e-06, "loss": 0.743, "step": 436 }, { "epoch": 0.03701037476180394, "grad_norm": 1.586263246853119, "learning_rate": 9.998735011822332e-06, "loss": 0.7315, "step": 437 }, { "epoch": 0.03709506669489731, "grad_norm": 0.6068848454186808, "learning_rate": 9.99870397166293e-06, "loss": 0.7997, "step": 438 }, { "epoch": 0.03717975862799068, "grad_norm": 1.4015907603588935, "learning_rate": 9.998672555324585e-06, "loss": 0.7253, "step": 439 }, { "epoch": 0.03726445056108406, "grad_norm": 1.4058427197445036, "learning_rate": 9.99864076280966e-06, "loss": 0.7485, "step": 440 }, { "epoch": 0.03734914249417743, "grad_norm": 1.2698310364212564, "learning_rate": 9.998608594120549e-06, "loss": 0.6008, "step": 441 }, { "epoch": 0.0374338344272708, "grad_norm": 1.421197013049192, "learning_rate": 9.998576049259672e-06, "loss": 0.7645, "step": 442 }, { "epoch": 0.03751852636036417, "grad_norm": 1.5139343657374993, "learning_rate": 9.99854312822948e-06, "loss": 0.6703, "step": 443 }, { "epoch": 0.03760321829345755, "grad_norm": 1.1598317456166805, "learning_rate": 9.998509831032448e-06, "loss": 0.7422, "step": 444 }, { "epoch": 0.03768791022655092, "grad_norm": 1.3423919863862113, "learning_rate": 9.998476157671084e-06, "loss": 0.7052, "step": 445 }, { "epoch": 0.03777260215964429, "grad_norm": 1.4239176695218059, "learning_rate": 9.998442108147919e-06, "loss": 0.6702, "step": 446 }, { "epoch": 0.037857294092737664, "grad_norm": 1.4003296637708618, "learning_rate": 9.998407682465518e-06, "loss": 0.72, "step": 447 }, { "epoch": 0.03794198602583104, "grad_norm": 0.5864469450828834, "learning_rate": 9.998372880626471e-06, "loss": 0.883, "step": 448 }, { "epoch": 0.038026677958924414, "grad_norm": 1.5468095335426344, "learning_rate": 9.998337702633398e-06, "loss": 0.7101, "step": 449 }, { "epoch": 0.038111369892017785, "grad_norm": 0.667892872615734, "learning_rate": 9.998302148488946e-06, "loss": 0.9131, "step": 450 }, { "epoch": 0.038196061825111156, "grad_norm": 1.196813413196778, "learning_rate": 9.998266218195786e-06, "loss": 0.6563, "step": 451 }, { "epoch": 0.03828075375820453, "grad_norm": 0.6269441061697923, "learning_rate": 9.99822991175663e-06, "loss": 0.8375, "step": 452 }, { "epoch": 0.038365445691297906, "grad_norm": 1.1616405670931782, "learning_rate": 9.998193229174206e-06, "loss": 0.6921, "step": 453 }, { "epoch": 0.03845013762439128, "grad_norm": 1.3534258385478386, "learning_rate": 9.998156170451274e-06, "loss": 0.7433, "step": 454 }, { "epoch": 0.03853482955748465, "grad_norm": 1.157008931211866, "learning_rate": 9.998118735590622e-06, "loss": 0.7009, "step": 455 }, { "epoch": 0.03861952149057802, "grad_norm": 1.761046264376433, "learning_rate": 9.998080924595072e-06, "loss": 0.7352, "step": 456 }, { "epoch": 0.0387042134236714, "grad_norm": 1.4004812946901997, "learning_rate": 9.998042737467463e-06, "loss": 0.7144, "step": 457 }, { "epoch": 0.03878890535676477, "grad_norm": 1.3910798111760754, "learning_rate": 9.998004174210677e-06, "loss": 0.6492, "step": 458 }, { "epoch": 0.03887359728985814, "grad_norm": 2.279598834285797, "learning_rate": 9.997965234827607e-06, "loss": 0.7355, "step": 459 }, { "epoch": 0.03895828922295151, "grad_norm": 1.6312682954717506, "learning_rate": 9.997925919321188e-06, "loss": 0.6754, "step": 460 }, { "epoch": 0.03904298115604489, "grad_norm": 1.7220787070954278, "learning_rate": 9.99788622769438e-06, "loss": 0.688, "step": 461 }, { "epoch": 0.03912767308913826, "grad_norm": 1.4775869426662092, "learning_rate": 9.997846159950166e-06, "loss": 0.6161, "step": 462 }, { "epoch": 0.03921236502223163, "grad_norm": 1.3168836316325774, "learning_rate": 9.997805716091567e-06, "loss": 0.6693, "step": 463 }, { "epoch": 0.039297056955325003, "grad_norm": 1.3095845048846186, "learning_rate": 9.99776489612162e-06, "loss": 0.7156, "step": 464 }, { "epoch": 0.039381748888418375, "grad_norm": 1.4447819286658306, "learning_rate": 9.997723700043402e-06, "loss": 0.6847, "step": 465 }, { "epoch": 0.03946644082151175, "grad_norm": 0.6843272242799969, "learning_rate": 9.99768212786001e-06, "loss": 0.8931, "step": 466 }, { "epoch": 0.039551132754605124, "grad_norm": 1.2340352600804494, "learning_rate": 9.997640179574575e-06, "loss": 0.7139, "step": 467 }, { "epoch": 0.039635824687698495, "grad_norm": 1.4915517298472243, "learning_rate": 9.997597855190251e-06, "loss": 0.7036, "step": 468 }, { "epoch": 0.03972051662079187, "grad_norm": 1.5075675120669851, "learning_rate": 9.997555154710228e-06, "loss": 0.702, "step": 469 }, { "epoch": 0.039805208553885245, "grad_norm": 1.3725595542728954, "learning_rate": 9.997512078137715e-06, "loss": 0.6769, "step": 470 }, { "epoch": 0.039889900486978616, "grad_norm": 2.248135727958788, "learning_rate": 9.997468625475953e-06, "loss": 0.6992, "step": 471 }, { "epoch": 0.03997459242007199, "grad_norm": 1.290990053937097, "learning_rate": 9.997424796728217e-06, "loss": 0.7042, "step": 472 }, { "epoch": 0.04005928435316536, "grad_norm": 1.0765379990616484, "learning_rate": 9.997380591897801e-06, "loss": 0.7009, "step": 473 }, { "epoch": 0.04014397628625874, "grad_norm": 1.3862976827787332, "learning_rate": 9.997336010988032e-06, "loss": 0.656, "step": 474 }, { "epoch": 0.04022866821935211, "grad_norm": 1.6545324803972739, "learning_rate": 9.997291054002267e-06, "loss": 0.7832, "step": 475 }, { "epoch": 0.04031336015244548, "grad_norm": 1.4400894392669428, "learning_rate": 9.997245720943888e-06, "loss": 0.69, "step": 476 }, { "epoch": 0.04039805208553885, "grad_norm": 1.439576214767381, "learning_rate": 9.997200011816308e-06, "loss": 0.7366, "step": 477 }, { "epoch": 0.04048274401863222, "grad_norm": 1.4399556158948636, "learning_rate": 9.997153926622964e-06, "loss": 0.6727, "step": 478 }, { "epoch": 0.0405674359517256, "grad_norm": 0.6790686708319209, "learning_rate": 9.997107465367328e-06, "loss": 0.8219, "step": 479 }, { "epoch": 0.04065212788481897, "grad_norm": 1.0945630906776624, "learning_rate": 9.997060628052892e-06, "loss": 0.6106, "step": 480 }, { "epoch": 0.04073681981791234, "grad_norm": 1.5022295771704832, "learning_rate": 9.997013414683185e-06, "loss": 0.6759, "step": 481 }, { "epoch": 0.040821511751005714, "grad_norm": 2.2054306979685947, "learning_rate": 9.996965825261758e-06, "loss": 0.7279, "step": 482 }, { "epoch": 0.04090620368409909, "grad_norm": 1.3341345735544194, "learning_rate": 9.99691785979219e-06, "loss": 0.7324, "step": 483 }, { "epoch": 0.04099089561719246, "grad_norm": 1.5671011668865134, "learning_rate": 9.996869518278096e-06, "loss": 0.657, "step": 484 }, { "epoch": 0.041075587550285834, "grad_norm": 1.4086686103621313, "learning_rate": 9.99682080072311e-06, "loss": 0.7308, "step": 485 }, { "epoch": 0.041160279483379206, "grad_norm": 1.4365846617440403, "learning_rate": 9.996771707130898e-06, "loss": 0.7458, "step": 486 }, { "epoch": 0.041244971416472584, "grad_norm": 1.1958393525605282, "learning_rate": 9.996722237505159e-06, "loss": 0.6754, "step": 487 }, { "epoch": 0.041329663349565955, "grad_norm": 1.3549911639236083, "learning_rate": 9.996672391849609e-06, "loss": 0.7582, "step": 488 }, { "epoch": 0.041414355282659326, "grad_norm": 1.5441723999723411, "learning_rate": 9.996622170168006e-06, "loss": 0.6839, "step": 489 }, { "epoch": 0.0414990472157527, "grad_norm": 1.3721607812706618, "learning_rate": 9.996571572464125e-06, "loss": 0.7024, "step": 490 }, { "epoch": 0.041583739148846076, "grad_norm": 1.5960339556687624, "learning_rate": 9.996520598741774e-06, "loss": 0.7029, "step": 491 }, { "epoch": 0.04166843108193945, "grad_norm": 1.1696693379899608, "learning_rate": 9.996469249004791e-06, "loss": 0.6412, "step": 492 }, { "epoch": 0.04175312301503282, "grad_norm": 1.2711605900413707, "learning_rate": 9.99641752325704e-06, "loss": 0.6521, "step": 493 }, { "epoch": 0.04183781494812619, "grad_norm": 1.603019983657884, "learning_rate": 9.996365421502413e-06, "loss": 0.7102, "step": 494 }, { "epoch": 0.04192250688121956, "grad_norm": 1.3608296411534835, "learning_rate": 9.99631294374483e-06, "loss": 0.7352, "step": 495 }, { "epoch": 0.04200719881431294, "grad_norm": 1.5401065330783257, "learning_rate": 9.996260089988243e-06, "loss": 0.6668, "step": 496 }, { "epoch": 0.04209189074740631, "grad_norm": 1.3524830378689174, "learning_rate": 9.996206860236626e-06, "loss": 0.7334, "step": 497 }, { "epoch": 0.04217658268049968, "grad_norm": 1.6729552976883233, "learning_rate": 9.996153254493987e-06, "loss": 0.7682, "step": 498 }, { "epoch": 0.04226127461359305, "grad_norm": 1.443094611494485, "learning_rate": 9.996099272764361e-06, "loss": 0.6983, "step": 499 }, { "epoch": 0.04234596654668643, "grad_norm": 0.7163767377951301, "learning_rate": 9.996044915051808e-06, "loss": 0.8982, "step": 500 }, { "epoch": 0.0424306584797798, "grad_norm": 0.6666994490288864, "learning_rate": 9.99599018136042e-06, "loss": 0.8537, "step": 501 }, { "epoch": 0.042515350412873174, "grad_norm": 1.2035514027477316, "learning_rate": 9.995935071694316e-06, "loss": 0.6531, "step": 502 }, { "epoch": 0.042600042345966545, "grad_norm": 1.4394966263013553, "learning_rate": 9.995879586057644e-06, "loss": 0.6789, "step": 503 }, { "epoch": 0.04268473427905992, "grad_norm": 1.463503925311527, "learning_rate": 9.995823724454576e-06, "loss": 0.6714, "step": 504 }, { "epoch": 0.042769426212153294, "grad_norm": 1.8367595896925997, "learning_rate": 9.99576748688932e-06, "loss": 0.7401, "step": 505 }, { "epoch": 0.042854118145246665, "grad_norm": 1.6918778603464903, "learning_rate": 9.99571087336611e-06, "loss": 0.7228, "step": 506 }, { "epoch": 0.04293881007834004, "grad_norm": 1.3700470744043949, "learning_rate": 9.995653883889199e-06, "loss": 0.6498, "step": 507 }, { "epoch": 0.04302350201143341, "grad_norm": 1.3620293754151613, "learning_rate": 9.99559651846288e-06, "loss": 0.6893, "step": 508 }, { "epoch": 0.043108193944526786, "grad_norm": 1.3979878027263808, "learning_rate": 9.995538777091472e-06, "loss": 0.6392, "step": 509 }, { "epoch": 0.04319288587762016, "grad_norm": 3.4169209586875344, "learning_rate": 9.995480659779317e-06, "loss": 0.7127, "step": 510 }, { "epoch": 0.04327757781071353, "grad_norm": 1.7214031457987913, "learning_rate": 9.995422166530793e-06, "loss": 0.7244, "step": 511 }, { "epoch": 0.0433622697438069, "grad_norm": 1.4509062561942838, "learning_rate": 9.995363297350296e-06, "loss": 0.6929, "step": 512 }, { "epoch": 0.04344696167690028, "grad_norm": 1.4697046196090038, "learning_rate": 9.99530405224226e-06, "loss": 0.7047, "step": 513 }, { "epoch": 0.04353165360999365, "grad_norm": 1.3824783533685157, "learning_rate": 9.995244431211142e-06, "loss": 0.645, "step": 514 }, { "epoch": 0.04361634554308702, "grad_norm": 1.3446219799304524, "learning_rate": 9.99518443426143e-06, "loss": 0.6583, "step": 515 }, { "epoch": 0.04370103747618039, "grad_norm": 1.1856012642243332, "learning_rate": 9.995124061397638e-06, "loss": 0.7025, "step": 516 }, { "epoch": 0.04378572940927377, "grad_norm": 1.4309121988984652, "learning_rate": 9.995063312624312e-06, "loss": 0.683, "step": 517 }, { "epoch": 0.04387042134236714, "grad_norm": 1.6157099972204074, "learning_rate": 9.995002187946018e-06, "loss": 0.6605, "step": 518 }, { "epoch": 0.04395511327546051, "grad_norm": 1.619172274622234, "learning_rate": 9.994940687367363e-06, "loss": 0.6776, "step": 519 }, { "epoch": 0.044039805208553884, "grad_norm": 1.4673237101699224, "learning_rate": 9.994878810892972e-06, "loss": 0.723, "step": 520 }, { "epoch": 0.044124497141647255, "grad_norm": 1.340818136462021, "learning_rate": 9.994816558527498e-06, "loss": 0.7234, "step": 521 }, { "epoch": 0.04420918907474063, "grad_norm": 2.075561447100926, "learning_rate": 9.994753930275631e-06, "loss": 0.6642, "step": 522 }, { "epoch": 0.044293881007834005, "grad_norm": 1.5233705278188676, "learning_rate": 9.994690926142083e-06, "loss": 0.6902, "step": 523 }, { "epoch": 0.044378572940927376, "grad_norm": 1.67207868736659, "learning_rate": 9.994627546131594e-06, "loss": 0.694, "step": 524 }, { "epoch": 0.04446326487402075, "grad_norm": 1.306068582573052, "learning_rate": 9.994563790248934e-06, "loss": 0.6604, "step": 525 }, { "epoch": 0.044547956807114125, "grad_norm": 0.7375523520710316, "learning_rate": 9.994499658498902e-06, "loss": 0.8117, "step": 526 }, { "epoch": 0.044632648740207496, "grad_norm": 1.6669559966648089, "learning_rate": 9.994435150886324e-06, "loss": 0.7168, "step": 527 }, { "epoch": 0.04471734067330087, "grad_norm": 1.5370616705589468, "learning_rate": 9.994370267416053e-06, "loss": 0.7572, "step": 528 }, { "epoch": 0.04480203260639424, "grad_norm": 1.3547860394792268, "learning_rate": 9.994305008092973e-06, "loss": 0.641, "step": 529 }, { "epoch": 0.04488672453948762, "grad_norm": 1.3448688547413374, "learning_rate": 9.994239372921996e-06, "loss": 0.6946, "step": 530 }, { "epoch": 0.04497141647258099, "grad_norm": 1.3284564615173338, "learning_rate": 9.99417336190806e-06, "loss": 0.7083, "step": 531 }, { "epoch": 0.04505610840567436, "grad_norm": 1.680281564259981, "learning_rate": 9.994106975056132e-06, "loss": 0.6939, "step": 532 }, { "epoch": 0.04514080033876773, "grad_norm": 1.5759934168907717, "learning_rate": 9.99404021237121e-06, "loss": 0.7399, "step": 533 }, { "epoch": 0.0452254922718611, "grad_norm": 1.3825634022973619, "learning_rate": 9.993973073858318e-06, "loss": 0.6835, "step": 534 }, { "epoch": 0.04531018420495448, "grad_norm": 0.6220334289038626, "learning_rate": 9.993905559522507e-06, "loss": 0.9015, "step": 535 }, { "epoch": 0.04539487613804785, "grad_norm": 4.302433427863126, "learning_rate": 9.993837669368858e-06, "loss": 0.6472, "step": 536 }, { "epoch": 0.04547956807114122, "grad_norm": 1.5786158286505374, "learning_rate": 9.993769403402483e-06, "loss": 0.7055, "step": 537 }, { "epoch": 0.045564260004234594, "grad_norm": 1.8086009338339348, "learning_rate": 9.993700761628515e-06, "loss": 0.741, "step": 538 }, { "epoch": 0.04564895193732797, "grad_norm": 1.584776314310096, "learning_rate": 9.993631744052122e-06, "loss": 0.6583, "step": 539 }, { "epoch": 0.045733643870421344, "grad_norm": 7.7121643959637804, "learning_rate": 9.993562350678499e-06, "loss": 0.7338, "step": 540 }, { "epoch": 0.045818335803514715, "grad_norm": 1.2790361213410686, "learning_rate": 9.993492581512865e-06, "loss": 0.6768, "step": 541 }, { "epoch": 0.045903027736608086, "grad_norm": 1.1580390952857404, "learning_rate": 9.993422436560474e-06, "loss": 0.6673, "step": 542 }, { "epoch": 0.045987719669701464, "grad_norm": 1.5731686466637584, "learning_rate": 9.993351915826604e-06, "loss": 0.7157, "step": 543 }, { "epoch": 0.046072411602794835, "grad_norm": 1.4575226406872153, "learning_rate": 9.993281019316559e-06, "loss": 0.7034, "step": 544 }, { "epoch": 0.04615710353588821, "grad_norm": 1.2188902182187893, "learning_rate": 9.993209747035677e-06, "loss": 0.7211, "step": 545 }, { "epoch": 0.04624179546898158, "grad_norm": 1.5747805921982623, "learning_rate": 9.993138098989322e-06, "loss": 0.7063, "step": 546 }, { "epoch": 0.04632648740207495, "grad_norm": 1.6383004593798485, "learning_rate": 9.993066075182886e-06, "loss": 0.7155, "step": 547 }, { "epoch": 0.04641117933516833, "grad_norm": 1.5944783549730757, "learning_rate": 9.992993675621786e-06, "loss": 0.6749, "step": 548 }, { "epoch": 0.0464958712682617, "grad_norm": 1.272407907633792, "learning_rate": 9.992920900311474e-06, "loss": 0.7061, "step": 549 }, { "epoch": 0.04658056320135507, "grad_norm": 1.32914663208799, "learning_rate": 9.992847749257426e-06, "loss": 0.7228, "step": 550 }, { "epoch": 0.04666525513444844, "grad_norm": 1.4256919354140054, "learning_rate": 9.992774222465147e-06, "loss": 0.6821, "step": 551 }, { "epoch": 0.04674994706754182, "grad_norm": 1.2572664744695508, "learning_rate": 9.992700319940169e-06, "loss": 0.7297, "step": 552 }, { "epoch": 0.04683463900063519, "grad_norm": 1.2683594712997257, "learning_rate": 9.992626041688054e-06, "loss": 0.6725, "step": 553 }, { "epoch": 0.04691933093372856, "grad_norm": 1.4077387246610957, "learning_rate": 9.992551387714392e-06, "loss": 0.6464, "step": 554 }, { "epoch": 0.04700402286682193, "grad_norm": 0.6845910776865534, "learning_rate": 9.992476358024802e-06, "loss": 0.8959, "step": 555 }, { "epoch": 0.04708871479991531, "grad_norm": 1.9547491930567038, "learning_rate": 9.99240095262493e-06, "loss": 0.7114, "step": 556 }, { "epoch": 0.04717340673300868, "grad_norm": 1.3066630045371466, "learning_rate": 9.992325171520447e-06, "loss": 0.7055, "step": 557 }, { "epoch": 0.047258098666102054, "grad_norm": 2.5077324271782184, "learning_rate": 9.992249014717063e-06, "loss": 0.6805, "step": 558 }, { "epoch": 0.047342790599195425, "grad_norm": 1.1191125511852618, "learning_rate": 9.992172482220504e-06, "loss": 0.6301, "step": 559 }, { "epoch": 0.047427482532288796, "grad_norm": 1.1403120565933413, "learning_rate": 9.99209557403653e-06, "loss": 0.7287, "step": 560 }, { "epoch": 0.047512174465382175, "grad_norm": 1.1824989667294037, "learning_rate": 9.992018290170932e-06, "loss": 0.674, "step": 561 }, { "epoch": 0.047596866398475546, "grad_norm": 1.291447790632429, "learning_rate": 9.991940630629522e-06, "loss": 0.6958, "step": 562 }, { "epoch": 0.04768155833156892, "grad_norm": 1.464261750892477, "learning_rate": 9.991862595418147e-06, "loss": 0.6928, "step": 563 }, { "epoch": 0.04776625026466229, "grad_norm": 1.5036679998778875, "learning_rate": 9.99178418454268e-06, "loss": 0.7304, "step": 564 }, { "epoch": 0.047850942197755666, "grad_norm": 1.3289914330897958, "learning_rate": 9.991705398009017e-06, "loss": 0.6997, "step": 565 }, { "epoch": 0.04793563413084904, "grad_norm": 1.377167678648793, "learning_rate": 9.99162623582309e-06, "loss": 0.7193, "step": 566 }, { "epoch": 0.04802032606394241, "grad_norm": 1.473290244834987, "learning_rate": 9.99154669799086e-06, "loss": 0.7259, "step": 567 }, { "epoch": 0.04810501799703578, "grad_norm": 1.5539450547265137, "learning_rate": 9.991466784518309e-06, "loss": 0.7143, "step": 568 }, { "epoch": 0.04818970993012916, "grad_norm": 1.1037295403497152, "learning_rate": 9.99138649541145e-06, "loss": 0.6516, "step": 569 }, { "epoch": 0.04827440186322253, "grad_norm": 1.4165606525773207, "learning_rate": 9.991305830676325e-06, "loss": 0.7069, "step": 570 }, { "epoch": 0.0483590937963159, "grad_norm": 1.3161951765075843, "learning_rate": 9.99122479031901e-06, "loss": 0.6791, "step": 571 }, { "epoch": 0.04844378572940927, "grad_norm": 1.343467170713533, "learning_rate": 9.991143374345597e-06, "loss": 0.6781, "step": 572 }, { "epoch": 0.04852847766250264, "grad_norm": 1.896548518146627, "learning_rate": 9.991061582762217e-06, "loss": 0.6488, "step": 573 }, { "epoch": 0.04861316959559602, "grad_norm": 1.5019524626151417, "learning_rate": 9.990979415575024e-06, "loss": 0.7462, "step": 574 }, { "epoch": 0.04869786152868939, "grad_norm": 1.4429758609840062, "learning_rate": 9.9908968727902e-06, "loss": 0.706, "step": 575 }, { "epoch": 0.048782553461782764, "grad_norm": 1.8166800229874747, "learning_rate": 9.99081395441396e-06, "loss": 0.6865, "step": 576 }, { "epoch": 0.048867245394876135, "grad_norm": 1.4970939018826683, "learning_rate": 9.99073066045254e-06, "loss": 0.6707, "step": 577 }, { "epoch": 0.048951937327969514, "grad_norm": 0.6375868141108181, "learning_rate": 9.990646990912213e-06, "loss": 0.8753, "step": 578 }, { "epoch": 0.049036629261062885, "grad_norm": 1.5814014394371594, "learning_rate": 9.990562945799272e-06, "loss": 0.6743, "step": 579 }, { "epoch": 0.049121321194156256, "grad_norm": 1.4253422192129783, "learning_rate": 9.990478525120044e-06, "loss": 0.6817, "step": 580 }, { "epoch": 0.04920601312724963, "grad_norm": 1.349599042665638, "learning_rate": 9.99039372888088e-06, "loss": 0.653, "step": 581 }, { "epoch": 0.049290705060343006, "grad_norm": 1.438338407453091, "learning_rate": 9.990308557088164e-06, "loss": 0.7465, "step": 582 }, { "epoch": 0.04937539699343638, "grad_norm": 1.2777364366853707, "learning_rate": 9.990223009748303e-06, "loss": 0.6749, "step": 583 }, { "epoch": 0.04946008892652975, "grad_norm": 2.4618589555213366, "learning_rate": 9.990137086867737e-06, "loss": 0.6611, "step": 584 }, { "epoch": 0.04954478085962312, "grad_norm": 1.3398516521319883, "learning_rate": 9.990050788452929e-06, "loss": 0.6224, "step": 585 }, { "epoch": 0.04962947279271649, "grad_norm": 1.2581641803376407, "learning_rate": 9.989964114510378e-06, "loss": 0.6752, "step": 586 }, { "epoch": 0.04971416472580987, "grad_norm": 1.289548958996374, "learning_rate": 9.989877065046604e-06, "loss": 0.6484, "step": 587 }, { "epoch": 0.04979885665890324, "grad_norm": 7.543737218932855, "learning_rate": 9.989789640068157e-06, "loss": 0.6736, "step": 588 }, { "epoch": 0.04988354859199661, "grad_norm": 1.6092750707775862, "learning_rate": 9.989701839581618e-06, "loss": 0.6874, "step": 589 }, { "epoch": 0.04996824052508998, "grad_norm": 1.2701615369685935, "learning_rate": 9.989613663593594e-06, "loss": 0.6973, "step": 590 }, { "epoch": 0.05005293245818336, "grad_norm": 7.13466573216288, "learning_rate": 9.989525112110721e-06, "loss": 0.6693, "step": 591 }, { "epoch": 0.05013762439127673, "grad_norm": 1.6654350920247873, "learning_rate": 9.98943618513966e-06, "loss": 0.6352, "step": 592 }, { "epoch": 0.0502223163243701, "grad_norm": 1.2863784331157364, "learning_rate": 9.989346882687109e-06, "loss": 0.6085, "step": 593 }, { "epoch": 0.050307008257463474, "grad_norm": 1.290895204695195, "learning_rate": 9.989257204759783e-06, "loss": 0.6603, "step": 594 }, { "epoch": 0.05039170019055685, "grad_norm": 1.2063712195094234, "learning_rate": 9.989167151364434e-06, "loss": 0.6528, "step": 595 }, { "epoch": 0.050476392123650224, "grad_norm": 1.5209189373499001, "learning_rate": 9.989076722507838e-06, "loss": 0.6261, "step": 596 }, { "epoch": 0.050561084056743595, "grad_norm": 1.2291248136823747, "learning_rate": 9.988985918196801e-06, "loss": 0.7185, "step": 597 }, { "epoch": 0.050645775989836966, "grad_norm": 1.3136054114953897, "learning_rate": 9.988894738438154e-06, "loss": 0.6718, "step": 598 }, { "epoch": 0.05073046792293034, "grad_norm": 1.4345678401020734, "learning_rate": 9.988803183238763e-06, "loss": 0.7753, "step": 599 }, { "epoch": 0.050815159856023716, "grad_norm": 1.5062665326753484, "learning_rate": 9.988711252605511e-06, "loss": 0.6607, "step": 600 }, { "epoch": 0.05089985178911709, "grad_norm": 1.5149748139966799, "learning_rate": 9.988618946545326e-06, "loss": 0.7269, "step": 601 }, { "epoch": 0.05098454372221046, "grad_norm": 1.2797119094944442, "learning_rate": 9.988526265065146e-06, "loss": 0.7409, "step": 602 }, { "epoch": 0.05106923565530383, "grad_norm": 1.4325263255957479, "learning_rate": 9.98843320817195e-06, "loss": 0.6531, "step": 603 }, { "epoch": 0.05115392758839721, "grad_norm": 1.9860847061186102, "learning_rate": 9.98833977587274e-06, "loss": 0.7096, "step": 604 }, { "epoch": 0.05123861952149058, "grad_norm": 1.5743247456984073, "learning_rate": 9.988245968174546e-06, "loss": 0.6933, "step": 605 }, { "epoch": 0.05132331145458395, "grad_norm": 0.6926732502366137, "learning_rate": 9.98815178508443e-06, "loss": 0.8687, "step": 606 }, { "epoch": 0.05140800338767732, "grad_norm": 0.7259684827126981, "learning_rate": 9.98805722660948e-06, "loss": 0.8872, "step": 607 }, { "epoch": 0.0514926953207707, "grad_norm": 1.377337558556989, "learning_rate": 9.987962292756809e-06, "loss": 0.7718, "step": 608 }, { "epoch": 0.05157738725386407, "grad_norm": 1.7476342806295007, "learning_rate": 9.987866983533562e-06, "loss": 0.7055, "step": 609 }, { "epoch": 0.05166207918695744, "grad_norm": 1.1717737185277042, "learning_rate": 9.987771298946916e-06, "loss": 0.6998, "step": 610 }, { "epoch": 0.051746771120050813, "grad_norm": 1.2330321865074227, "learning_rate": 9.987675239004066e-06, "loss": 0.6565, "step": 611 }, { "epoch": 0.051831463053144185, "grad_norm": 6.911987318641533, "learning_rate": 9.987578803712244e-06, "loss": 0.6861, "step": 612 }, { "epoch": 0.05191615498623756, "grad_norm": 1.3732498452002218, "learning_rate": 9.987481993078705e-06, "loss": 0.7142, "step": 613 }, { "epoch": 0.052000846919330934, "grad_norm": 1.9335327540546083, "learning_rate": 9.987384807110738e-06, "loss": 0.6854, "step": 614 }, { "epoch": 0.052085538852424305, "grad_norm": 1.817274267695067, "learning_rate": 9.987287245815654e-06, "loss": 0.721, "step": 615 }, { "epoch": 0.05217023078551768, "grad_norm": 1.7248139386064092, "learning_rate": 9.987189309200795e-06, "loss": 0.7182, "step": 616 }, { "epoch": 0.052254922718611055, "grad_norm": 1.5746962688414639, "learning_rate": 9.987090997273531e-06, "loss": 0.663, "step": 617 }, { "epoch": 0.052339614651704426, "grad_norm": 0.605983094417401, "learning_rate": 9.986992310041265e-06, "loss": 0.8311, "step": 618 }, { "epoch": 0.0524243065847978, "grad_norm": 1.4376074057034596, "learning_rate": 9.986893247511418e-06, "loss": 0.6812, "step": 619 }, { "epoch": 0.05250899851789117, "grad_norm": 1.549527895760206, "learning_rate": 9.986793809691449e-06, "loss": 0.7432, "step": 620 }, { "epoch": 0.05259369045098455, "grad_norm": 0.6737585487069707, "learning_rate": 9.986693996588836e-06, "loss": 0.8683, "step": 621 }, { "epoch": 0.05267838238407792, "grad_norm": 1.2224914323695981, "learning_rate": 9.986593808211097e-06, "loss": 0.7441, "step": 622 }, { "epoch": 0.05276307431717129, "grad_norm": 1.6140570027521255, "learning_rate": 9.986493244565769e-06, "loss": 0.6774, "step": 623 }, { "epoch": 0.05284776625026466, "grad_norm": 1.4202960671178897, "learning_rate": 9.986392305660417e-06, "loss": 0.668, "step": 624 }, { "epoch": 0.05293245818335803, "grad_norm": 1.2626829672136717, "learning_rate": 9.986290991502643e-06, "loss": 0.6655, "step": 625 }, { "epoch": 0.05301715011645141, "grad_norm": 3.1566525331361377, "learning_rate": 9.986189302100067e-06, "loss": 0.7179, "step": 626 }, { "epoch": 0.05310184204954478, "grad_norm": 0.626449255570964, "learning_rate": 9.986087237460342e-06, "loss": 0.8129, "step": 627 }, { "epoch": 0.05318653398263815, "grad_norm": 1.307847212045582, "learning_rate": 9.98598479759115e-06, "loss": 0.7128, "step": 628 }, { "epoch": 0.053271225915731524, "grad_norm": 2.6970750290505014, "learning_rate": 9.9858819825002e-06, "loss": 0.7489, "step": 629 }, { "epoch": 0.0533559178488249, "grad_norm": 1.3257004242934247, "learning_rate": 9.98577879219523e-06, "loss": 0.6216, "step": 630 }, { "epoch": 0.05344060978191827, "grad_norm": 1.966607402973117, "learning_rate": 9.985675226684004e-06, "loss": 0.7037, "step": 631 }, { "epoch": 0.053525301715011644, "grad_norm": 1.2344685324127054, "learning_rate": 9.985571285974319e-06, "loss": 0.6986, "step": 632 }, { "epoch": 0.053609993648105016, "grad_norm": 1.4822649129949999, "learning_rate": 9.985466970073991e-06, "loss": 0.7099, "step": 633 }, { "epoch": 0.053694685581198394, "grad_norm": 0.6454411730286261, "learning_rate": 9.985362278990878e-06, "loss": 0.8448, "step": 634 }, { "epoch": 0.053779377514291765, "grad_norm": 2.11994950116977, "learning_rate": 9.985257212732853e-06, "loss": 0.6832, "step": 635 }, { "epoch": 0.053864069447385136, "grad_norm": 1.3085329969669965, "learning_rate": 9.985151771307824e-06, "loss": 0.6719, "step": 636 }, { "epoch": 0.05394876138047851, "grad_norm": 1.1266991365016317, "learning_rate": 9.985045954723727e-06, "loss": 0.6268, "step": 637 }, { "epoch": 0.05403345331357188, "grad_norm": 0.628896337016944, "learning_rate": 9.984939762988525e-06, "loss": 0.8819, "step": 638 }, { "epoch": 0.05411814524666526, "grad_norm": 1.3183260022129404, "learning_rate": 9.98483319611021e-06, "loss": 0.6594, "step": 639 }, { "epoch": 0.05420283717975863, "grad_norm": 1.279613855465789, "learning_rate": 9.9847262540968e-06, "loss": 0.678, "step": 640 }, { "epoch": 0.054287529112852, "grad_norm": 1.3332184226949242, "learning_rate": 9.984618936956345e-06, "loss": 0.6662, "step": 641 }, { "epoch": 0.05437222104594537, "grad_norm": 1.4712688605805346, "learning_rate": 9.98451124469692e-06, "loss": 0.6321, "step": 642 }, { "epoch": 0.05445691297903875, "grad_norm": 2.0372411492177065, "learning_rate": 9.984403177326629e-06, "loss": 0.7044, "step": 643 }, { "epoch": 0.05454160491213212, "grad_norm": 1.2425541813275003, "learning_rate": 9.984294734853605e-06, "loss": 0.6393, "step": 644 }, { "epoch": 0.05462629684522549, "grad_norm": 1.8063325663575969, "learning_rate": 9.98418591728601e-06, "loss": 0.6725, "step": 645 }, { "epoch": 0.05471098877831886, "grad_norm": 1.7116891699607701, "learning_rate": 9.984076724632032e-06, "loss": 0.7071, "step": 646 }, { "epoch": 0.05479568071141224, "grad_norm": 1.5118538112297848, "learning_rate": 9.983967156899888e-06, "loss": 0.676, "step": 647 }, { "epoch": 0.05488037264450561, "grad_norm": 1.320121154326672, "learning_rate": 9.983857214097825e-06, "loss": 0.6415, "step": 648 }, { "epoch": 0.054965064577598984, "grad_norm": 0.6245696511019597, "learning_rate": 9.983746896234114e-06, "loss": 0.8314, "step": 649 }, { "epoch": 0.055049756510692355, "grad_norm": 1.491653407657895, "learning_rate": 9.983636203317061e-06, "loss": 0.6531, "step": 650 }, { "epoch": 0.055134448443785726, "grad_norm": 1.845098659827458, "learning_rate": 9.983525135354993e-06, "loss": 0.6801, "step": 651 }, { "epoch": 0.055219140376879104, "grad_norm": 1.253892885931456, "learning_rate": 9.98341369235627e-06, "loss": 0.7196, "step": 652 }, { "epoch": 0.055303832309972475, "grad_norm": 1.3063372588672018, "learning_rate": 9.98330187432928e-06, "loss": 0.6883, "step": 653 }, { "epoch": 0.05538852424306585, "grad_norm": 1.252870257814895, "learning_rate": 9.983189681282433e-06, "loss": 0.7055, "step": 654 }, { "epoch": 0.05547321617615922, "grad_norm": 1.4331926673675348, "learning_rate": 9.983077113224176e-06, "loss": 0.7313, "step": 655 }, { "epoch": 0.055557908109252596, "grad_norm": 1.6020218659147765, "learning_rate": 9.98296417016298e-06, "loss": 0.6983, "step": 656 }, { "epoch": 0.05564260004234597, "grad_norm": 1.5312908976769226, "learning_rate": 9.982850852107344e-06, "loss": 0.7193, "step": 657 }, { "epoch": 0.05572729197543934, "grad_norm": 1.407228499577236, "learning_rate": 9.982737159065796e-06, "loss": 0.7521, "step": 658 }, { "epoch": 0.05581198390853271, "grad_norm": 1.2381099209778976, "learning_rate": 9.982623091046892e-06, "loss": 0.7103, "step": 659 }, { "epoch": 0.05589667584162609, "grad_norm": 0.7160034227854922, "learning_rate": 9.982508648059216e-06, "loss": 0.9018, "step": 660 }, { "epoch": 0.05598136777471946, "grad_norm": 1.180491198674099, "learning_rate": 9.98239383011138e-06, "loss": 0.6364, "step": 661 }, { "epoch": 0.05606605970781283, "grad_norm": 0.6190247976401657, "learning_rate": 9.982278637212026e-06, "loss": 0.8936, "step": 662 }, { "epoch": 0.0561507516409062, "grad_norm": 0.6213385880692133, "learning_rate": 9.982163069369823e-06, "loss": 0.8962, "step": 663 }, { "epoch": 0.05623544357399957, "grad_norm": 1.7905363892358808, "learning_rate": 9.982047126593466e-06, "loss": 0.716, "step": 664 }, { "epoch": 0.05632013550709295, "grad_norm": 1.6980226884304452, "learning_rate": 9.981930808891683e-06, "loss": 0.6829, "step": 665 }, { "epoch": 0.05640482744018632, "grad_norm": 1.1733143418872838, "learning_rate": 9.981814116273224e-06, "loss": 0.6735, "step": 666 }, { "epoch": 0.056489519373279694, "grad_norm": 1.2620158223370153, "learning_rate": 9.981697048746875e-06, "loss": 0.6693, "step": 667 }, { "epoch": 0.056574211306373065, "grad_norm": 1.2306603881353968, "learning_rate": 9.98157960632144e-06, "loss": 0.6736, "step": 668 }, { "epoch": 0.05665890323946644, "grad_norm": 1.4395971942367591, "learning_rate": 9.981461789005763e-06, "loss": 0.6919, "step": 669 }, { "epoch": 0.056743595172559814, "grad_norm": 1.4339375838437678, "learning_rate": 9.98134359680871e-06, "loss": 0.7181, "step": 670 }, { "epoch": 0.056828287105653186, "grad_norm": 1.4613755133967898, "learning_rate": 9.981225029739173e-06, "loss": 0.67, "step": 671 }, { "epoch": 0.05691297903874656, "grad_norm": 1.7432972678144156, "learning_rate": 9.981106087806076e-06, "loss": 0.6987, "step": 672 }, { "epoch": 0.056997670971839935, "grad_norm": 1.5769370811743917, "learning_rate": 9.980986771018369e-06, "loss": 0.7315, "step": 673 }, { "epoch": 0.057082362904933306, "grad_norm": 2.5839738941771873, "learning_rate": 9.980867079385032e-06, "loss": 0.6996, "step": 674 }, { "epoch": 0.05716705483802668, "grad_norm": 1.193177552532049, "learning_rate": 9.980747012915072e-06, "loss": 0.6652, "step": 675 }, { "epoch": 0.05725174677112005, "grad_norm": 1.5816497046502045, "learning_rate": 9.980626571617525e-06, "loss": 0.6814, "step": 676 }, { "epoch": 0.05733643870421342, "grad_norm": 1.4040774480611073, "learning_rate": 9.980505755501455e-06, "loss": 0.7315, "step": 677 }, { "epoch": 0.0574211306373068, "grad_norm": 1.5978493399293352, "learning_rate": 9.980384564575953e-06, "loss": 0.722, "step": 678 }, { "epoch": 0.05750582257040017, "grad_norm": 3.1895708351666805, "learning_rate": 9.980262998850141e-06, "loss": 0.741, "step": 679 }, { "epoch": 0.05759051450349354, "grad_norm": 1.304979467374188, "learning_rate": 9.980141058333167e-06, "loss": 0.6302, "step": 680 }, { "epoch": 0.05767520643658691, "grad_norm": 1.5823971140644544, "learning_rate": 9.980018743034208e-06, "loss": 0.7343, "step": 681 }, { "epoch": 0.05775989836968029, "grad_norm": 1.6834652983263279, "learning_rate": 9.979896052962466e-06, "loss": 0.627, "step": 682 }, { "epoch": 0.05784459030277366, "grad_norm": 1.6083571838797377, "learning_rate": 9.979772988127176e-06, "loss": 0.7021, "step": 683 }, { "epoch": 0.05792928223586703, "grad_norm": 1.238624514969168, "learning_rate": 9.979649548537602e-06, "loss": 0.6396, "step": 684 }, { "epoch": 0.058013974168960404, "grad_norm": 1.3177654164294226, "learning_rate": 9.979525734203029e-06, "loss": 0.6933, "step": 685 }, { "epoch": 0.05809866610205378, "grad_norm": 1.212106253910793, "learning_rate": 9.979401545132777e-06, "loss": 0.7067, "step": 686 }, { "epoch": 0.058183358035147154, "grad_norm": 1.3534298664983024, "learning_rate": 9.979276981336193e-06, "loss": 0.6817, "step": 687 }, { "epoch": 0.058268049968240525, "grad_norm": 1.3288015544377003, "learning_rate": 9.979152042822648e-06, "loss": 0.6803, "step": 688 }, { "epoch": 0.058352741901333896, "grad_norm": 0.6549599322902144, "learning_rate": 9.979026729601546e-06, "loss": 0.8878, "step": 689 }, { "epoch": 0.058437433834427274, "grad_norm": 1.375968841683321, "learning_rate": 9.978901041682318e-06, "loss": 0.7092, "step": 690 }, { "epoch": 0.058522125767520645, "grad_norm": 2.2126426894407563, "learning_rate": 9.978774979074422e-06, "loss": 0.6147, "step": 691 }, { "epoch": 0.05860681770061402, "grad_norm": 1.2196083362425412, "learning_rate": 9.978648541787346e-06, "loss": 0.6659, "step": 692 }, { "epoch": 0.05869150963370739, "grad_norm": 1.5515736374908191, "learning_rate": 9.978521729830601e-06, "loss": 0.6908, "step": 693 }, { "epoch": 0.05877620156680076, "grad_norm": 0.6604700963080717, "learning_rate": 9.978394543213736e-06, "loss": 0.8753, "step": 694 }, { "epoch": 0.05886089349989414, "grad_norm": 1.190612126742207, "learning_rate": 9.978266981946318e-06, "loss": 0.7343, "step": 695 }, { "epoch": 0.05894558543298751, "grad_norm": 3.374790867386261, "learning_rate": 9.978139046037952e-06, "loss": 0.6839, "step": 696 }, { "epoch": 0.05903027736608088, "grad_norm": 1.3543665064520323, "learning_rate": 9.978010735498258e-06, "loss": 0.6759, "step": 697 }, { "epoch": 0.05911496929917425, "grad_norm": 0.622177297544192, "learning_rate": 9.977882050336899e-06, "loss": 0.9191, "step": 698 }, { "epoch": 0.05919966123226763, "grad_norm": 0.6145799122747624, "learning_rate": 9.977752990563554e-06, "loss": 0.8687, "step": 699 }, { "epoch": 0.059284353165361, "grad_norm": 1.5659766082702231, "learning_rate": 9.97762355618794e-06, "loss": 0.6919, "step": 700 }, { "epoch": 0.05936904509845437, "grad_norm": 1.3434142039041537, "learning_rate": 9.977493747219796e-06, "loss": 0.7147, "step": 701 }, { "epoch": 0.05945373703154774, "grad_norm": 1.5619174087082166, "learning_rate": 9.977363563668889e-06, "loss": 0.7061, "step": 702 }, { "epoch": 0.05953842896464112, "grad_norm": 1.6928299733222019, "learning_rate": 9.97723300554502e-06, "loss": 0.7043, "step": 703 }, { "epoch": 0.05962312089773449, "grad_norm": 1.417190323671534, "learning_rate": 9.977102072858008e-06, "loss": 0.6836, "step": 704 }, { "epoch": 0.059707812830827864, "grad_norm": 1.1932366211394085, "learning_rate": 9.976970765617713e-06, "loss": 0.6705, "step": 705 }, { "epoch": 0.059792504763921235, "grad_norm": 1.141749177625762, "learning_rate": 9.976839083834011e-06, "loss": 0.7264, "step": 706 }, { "epoch": 0.059877196697014606, "grad_norm": 1.4351069903996474, "learning_rate": 9.976707027516817e-06, "loss": 0.6912, "step": 707 }, { "epoch": 0.059961888630107985, "grad_norm": 1.5778545324348747, "learning_rate": 9.976574596676065e-06, "loss": 0.6254, "step": 708 }, { "epoch": 0.060046580563201356, "grad_norm": 1.323109685415193, "learning_rate": 9.976441791321722e-06, "loss": 0.6826, "step": 709 }, { "epoch": 0.06013127249629473, "grad_norm": 1.3819733050774858, "learning_rate": 9.976308611463782e-06, "loss": 0.7132, "step": 710 }, { "epoch": 0.0602159644293881, "grad_norm": 1.843854005111733, "learning_rate": 9.97617505711227e-06, "loss": 0.6749, "step": 711 }, { "epoch": 0.060300656362481476, "grad_norm": 1.1757749289574608, "learning_rate": 9.976041128277234e-06, "loss": 0.6941, "step": 712 }, { "epoch": 0.06038534829557485, "grad_norm": 1.2698258464499053, "learning_rate": 9.975906824968754e-06, "loss": 0.7192, "step": 713 }, { "epoch": 0.06047004022866822, "grad_norm": 1.6267142546766036, "learning_rate": 9.975772147196935e-06, "loss": 0.6561, "step": 714 }, { "epoch": 0.06055473216176159, "grad_norm": 1.7885271464566541, "learning_rate": 9.975637094971917e-06, "loss": 0.7067, "step": 715 }, { "epoch": 0.06063942409485497, "grad_norm": 1.2694709941945164, "learning_rate": 9.975501668303858e-06, "loss": 0.6571, "step": 716 }, { "epoch": 0.06072411602794834, "grad_norm": 1.3281681964490237, "learning_rate": 9.975365867202954e-06, "loss": 0.6746, "step": 717 }, { "epoch": 0.06080880796104171, "grad_norm": 1.5314603930401771, "learning_rate": 9.97522969167942e-06, "loss": 0.7425, "step": 718 }, { "epoch": 0.06089349989413508, "grad_norm": 2.1308179631574884, "learning_rate": 9.97509314174351e-06, "loss": 0.7186, "step": 719 }, { "epoch": 0.06097819182722845, "grad_norm": 1.4358166060851352, "learning_rate": 9.974956217405493e-06, "loss": 0.6147, "step": 720 }, { "epoch": 0.06106288376032183, "grad_norm": 1.8735388347089048, "learning_rate": 9.974818918675679e-06, "loss": 0.7038, "step": 721 }, { "epoch": 0.0611475756934152, "grad_norm": 1.3175271553662677, "learning_rate": 9.9746812455644e-06, "loss": 0.7267, "step": 722 }, { "epoch": 0.061232267626508574, "grad_norm": 1.3275370206263142, "learning_rate": 9.974543198082012e-06, "loss": 0.6551, "step": 723 }, { "epoch": 0.061316959559601945, "grad_norm": 1.246924113853814, "learning_rate": 9.97440477623891e-06, "loss": 0.656, "step": 724 }, { "epoch": 0.061401651492695324, "grad_norm": 1.4991819573156435, "learning_rate": 9.974265980045505e-06, "loss": 0.6632, "step": 725 }, { "epoch": 0.061486343425788695, "grad_norm": 0.6434921016713326, "learning_rate": 9.974126809512245e-06, "loss": 0.9004, "step": 726 }, { "epoch": 0.061571035358882066, "grad_norm": 1.4083581364207065, "learning_rate": 9.973987264649606e-06, "loss": 0.6877, "step": 727 }, { "epoch": 0.06165572729197544, "grad_norm": 1.813034098190022, "learning_rate": 9.973847345468084e-06, "loss": 0.6881, "step": 728 }, { "epoch": 0.061740419225068816, "grad_norm": 1.3911876542368258, "learning_rate": 9.973707051978212e-06, "loss": 0.709, "step": 729 }, { "epoch": 0.06182511115816219, "grad_norm": 2.0237775333026957, "learning_rate": 9.973566384190549e-06, "loss": 0.7205, "step": 730 }, { "epoch": 0.06190980309125556, "grad_norm": 1.1006830756019574, "learning_rate": 9.97342534211568e-06, "loss": 0.6114, "step": 731 }, { "epoch": 0.06199449502434893, "grad_norm": 1.1098813520354176, "learning_rate": 9.973283925764216e-06, "loss": 0.6564, "step": 732 }, { "epoch": 0.0620791869574423, "grad_norm": 0.6169429773815212, "learning_rate": 9.973142135146803e-06, "loss": 0.822, "step": 733 }, { "epoch": 0.06216387889053568, "grad_norm": 1.1516410351542221, "learning_rate": 9.972999970274108e-06, "loss": 0.6697, "step": 734 }, { "epoch": 0.06224857082362905, "grad_norm": 0.6506615555384825, "learning_rate": 9.972857431156834e-06, "loss": 0.8608, "step": 735 }, { "epoch": 0.06233326275672242, "grad_norm": 1.34304685383912, "learning_rate": 9.972714517805706e-06, "loss": 0.6857, "step": 736 }, { "epoch": 0.06241795468981579, "grad_norm": 1.7495899992613053, "learning_rate": 9.972571230231479e-06, "loss": 0.6689, "step": 737 }, { "epoch": 0.06250264662290916, "grad_norm": 1.5595469348475226, "learning_rate": 9.972427568444936e-06, "loss": 0.7152, "step": 738 }, { "epoch": 0.06258733855600254, "grad_norm": 0.6550816640713143, "learning_rate": 9.972283532456887e-06, "loss": 0.8192, "step": 739 }, { "epoch": 0.06267203048909592, "grad_norm": 1.3992706809754265, "learning_rate": 9.972139122278173e-06, "loss": 0.6668, "step": 740 }, { "epoch": 0.06275672242218928, "grad_norm": 1.3350013463195314, "learning_rate": 9.971994337919661e-06, "loss": 0.6286, "step": 741 }, { "epoch": 0.06284141435528266, "grad_norm": 1.1944343508571587, "learning_rate": 9.971849179392248e-06, "loss": 0.6024, "step": 742 }, { "epoch": 0.06292610628837603, "grad_norm": 1.522114933124767, "learning_rate": 9.971703646706858e-06, "loss": 0.7016, "step": 743 }, { "epoch": 0.0630107982214694, "grad_norm": 1.9223940292001152, "learning_rate": 9.97155773987444e-06, "loss": 0.6786, "step": 744 }, { "epoch": 0.06309549015456278, "grad_norm": 1.189905708494008, "learning_rate": 9.971411458905977e-06, "loss": 0.6711, "step": 745 }, { "epoch": 0.06318018208765615, "grad_norm": 0.6295786389196373, "learning_rate": 9.971264803812477e-06, "loss": 0.8487, "step": 746 }, { "epoch": 0.06326487402074953, "grad_norm": 1.4053780698899654, "learning_rate": 9.971117774604978e-06, "loss": 0.6841, "step": 747 }, { "epoch": 0.06334956595384289, "grad_norm": 1.7798255506606335, "learning_rate": 9.970970371294542e-06, "loss": 0.7061, "step": 748 }, { "epoch": 0.06343425788693627, "grad_norm": 2.0211106704160122, "learning_rate": 9.970822593892262e-06, "loss": 0.6584, "step": 749 }, { "epoch": 0.06351894982002965, "grad_norm": 1.2826718389358842, "learning_rate": 9.97067444240926e-06, "loss": 0.6843, "step": 750 }, { "epoch": 0.06360364175312301, "grad_norm": 0.6667695755025068, "learning_rate": 9.970525916856688e-06, "loss": 0.8332, "step": 751 }, { "epoch": 0.06368833368621639, "grad_norm": 1.2158708685574215, "learning_rate": 9.970377017245719e-06, "loss": 0.6874, "step": 752 }, { "epoch": 0.06377302561930977, "grad_norm": 1.6895558861136302, "learning_rate": 9.970227743587558e-06, "loss": 0.6519, "step": 753 }, { "epoch": 0.06385771755240313, "grad_norm": 0.592322684280491, "learning_rate": 9.970078095893443e-06, "loss": 0.7876, "step": 754 }, { "epoch": 0.06394240948549651, "grad_norm": 0.6817460971325536, "learning_rate": 9.969928074174634e-06, "loss": 0.8873, "step": 755 }, { "epoch": 0.06402710141858987, "grad_norm": 2.619261652657411, "learning_rate": 9.969777678442418e-06, "loss": 0.6602, "step": 756 }, { "epoch": 0.06411179335168325, "grad_norm": 1.1607192326863647, "learning_rate": 9.969626908708116e-06, "loss": 0.6698, "step": 757 }, { "epoch": 0.06419648528477663, "grad_norm": 3.090562672097392, "learning_rate": 9.969475764983075e-06, "loss": 0.7114, "step": 758 }, { "epoch": 0.06428117721787, "grad_norm": 1.3669694738802791, "learning_rate": 9.969324247278667e-06, "loss": 0.7275, "step": 759 }, { "epoch": 0.06436586915096337, "grad_norm": 1.4213045433606506, "learning_rate": 9.969172355606296e-06, "loss": 0.6998, "step": 760 }, { "epoch": 0.06445056108405674, "grad_norm": 1.151624936883074, "learning_rate": 9.969020089977393e-06, "loss": 0.6461, "step": 761 }, { "epoch": 0.06453525301715012, "grad_norm": 1.3025401218704653, "learning_rate": 9.968867450403414e-06, "loss": 0.7274, "step": 762 }, { "epoch": 0.0646199449502435, "grad_norm": 1.5368678029969092, "learning_rate": 9.96871443689585e-06, "loss": 0.6832, "step": 763 }, { "epoch": 0.06470463688333686, "grad_norm": 1.5409090762953372, "learning_rate": 9.968561049466214e-06, "loss": 0.6886, "step": 764 }, { "epoch": 0.06478932881643024, "grad_norm": 1.364307263373828, "learning_rate": 9.968407288126048e-06, "loss": 0.7397, "step": 765 }, { "epoch": 0.06487402074952361, "grad_norm": 1.2260047912298548, "learning_rate": 9.968253152886925e-06, "loss": 0.6933, "step": 766 }, { "epoch": 0.06495871268261698, "grad_norm": 4.9157223622411435, "learning_rate": 9.968098643760444e-06, "loss": 0.6658, "step": 767 }, { "epoch": 0.06504340461571036, "grad_norm": 1.4086686495396479, "learning_rate": 9.967943760758234e-06, "loss": 0.6422, "step": 768 }, { "epoch": 0.06512809654880372, "grad_norm": 1.6088299813137728, "learning_rate": 9.967788503891949e-06, "loss": 0.7002, "step": 769 }, { "epoch": 0.0652127884818971, "grad_norm": 2.122984527941395, "learning_rate": 9.967632873173272e-06, "loss": 0.6376, "step": 770 }, { "epoch": 0.06529748041499048, "grad_norm": 1.9546155937696654, "learning_rate": 9.967476868613917e-06, "loss": 0.6569, "step": 771 }, { "epoch": 0.06538217234808384, "grad_norm": 1.4458170736580782, "learning_rate": 9.967320490225626e-06, "loss": 0.6573, "step": 772 }, { "epoch": 0.06546686428117722, "grad_norm": 1.4268364976050403, "learning_rate": 9.967163738020162e-06, "loss": 0.6612, "step": 773 }, { "epoch": 0.06555155621427058, "grad_norm": 1.5223165982972733, "learning_rate": 9.967006612009325e-06, "loss": 0.7324, "step": 774 }, { "epoch": 0.06563624814736396, "grad_norm": 1.4354480362195559, "learning_rate": 9.966849112204938e-06, "loss": 0.683, "step": 775 }, { "epoch": 0.06572094008045734, "grad_norm": 1.6109524006959037, "learning_rate": 9.966691238618855e-06, "loss": 0.6456, "step": 776 }, { "epoch": 0.0658056320135507, "grad_norm": 1.2217408331170485, "learning_rate": 9.966532991262959e-06, "loss": 0.6732, "step": 777 }, { "epoch": 0.06589032394664408, "grad_norm": 1.5655854990178615, "learning_rate": 9.966374370149153e-06, "loss": 0.7014, "step": 778 }, { "epoch": 0.06597501587973746, "grad_norm": 1.2508547148658262, "learning_rate": 9.96621537528938e-06, "loss": 0.6669, "step": 779 }, { "epoch": 0.06605970781283083, "grad_norm": 0.6780339073584484, "learning_rate": 9.966056006695601e-06, "loss": 0.837, "step": 780 }, { "epoch": 0.0661443997459242, "grad_norm": 1.5681946493496366, "learning_rate": 9.965896264379811e-06, "loss": 0.7055, "step": 781 }, { "epoch": 0.06622909167901757, "grad_norm": 1.1924591573818122, "learning_rate": 9.965736148354033e-06, "loss": 0.6574, "step": 782 }, { "epoch": 0.06631378361211095, "grad_norm": 1.0548726075248942, "learning_rate": 9.965575658630314e-06, "loss": 0.6122, "step": 783 }, { "epoch": 0.06639847554520432, "grad_norm": 1.743132297831016, "learning_rate": 9.96541479522073e-06, "loss": 0.6579, "step": 784 }, { "epoch": 0.06648316747829769, "grad_norm": 1.6784170416826747, "learning_rate": 9.965253558137394e-06, "loss": 0.653, "step": 785 }, { "epoch": 0.06656785941139107, "grad_norm": 1.4361474295266585, "learning_rate": 9.965091947392434e-06, "loss": 0.6687, "step": 786 }, { "epoch": 0.06665255134448443, "grad_norm": 2.144939611648576, "learning_rate": 9.964929962998013e-06, "loss": 0.6784, "step": 787 }, { "epoch": 0.06673724327757781, "grad_norm": 0.5969283212744222, "learning_rate": 9.964767604966323e-06, "loss": 0.8168, "step": 788 }, { "epoch": 0.06682193521067119, "grad_norm": 1.887698446089028, "learning_rate": 9.964604873309578e-06, "loss": 0.6721, "step": 789 }, { "epoch": 0.06690662714376455, "grad_norm": 3.353234966812284, "learning_rate": 9.964441768040031e-06, "loss": 0.6505, "step": 790 }, { "epoch": 0.06699131907685793, "grad_norm": 1.5984908553416166, "learning_rate": 9.96427828916995e-06, "loss": 0.6323, "step": 791 }, { "epoch": 0.06707601100995131, "grad_norm": 1.3902283960869275, "learning_rate": 9.964114436711643e-06, "loss": 0.6761, "step": 792 }, { "epoch": 0.06716070294304467, "grad_norm": 1.243499086681606, "learning_rate": 9.963950210677438e-06, "loss": 0.6974, "step": 793 }, { "epoch": 0.06724539487613805, "grad_norm": 1.546982527595036, "learning_rate": 9.963785611079694e-06, "loss": 0.7105, "step": 794 }, { "epoch": 0.06733008680923142, "grad_norm": 1.4258103998791682, "learning_rate": 9.963620637930798e-06, "loss": 0.6043, "step": 795 }, { "epoch": 0.0674147787423248, "grad_norm": 1.2695920363761064, "learning_rate": 9.963455291243165e-06, "loss": 0.5877, "step": 796 }, { "epoch": 0.06749947067541817, "grad_norm": 1.4475513496271064, "learning_rate": 9.96328957102924e-06, "loss": 0.7114, "step": 797 }, { "epoch": 0.06758416260851154, "grad_norm": 1.2873886032018858, "learning_rate": 9.963123477301491e-06, "loss": 0.7263, "step": 798 }, { "epoch": 0.06766885454160491, "grad_norm": 2.154243849240788, "learning_rate": 9.962957010072421e-06, "loss": 0.7141, "step": 799 }, { "epoch": 0.06775354647469828, "grad_norm": 1.4686633267804619, "learning_rate": 9.962790169354554e-06, "loss": 0.7108, "step": 800 }, { "epoch": 0.06783823840779166, "grad_norm": 1.8437691152415066, "learning_rate": 9.96262295516045e-06, "loss": 0.7524, "step": 801 }, { "epoch": 0.06792293034088503, "grad_norm": 1.2731294697991467, "learning_rate": 9.962455367502688e-06, "loss": 0.6737, "step": 802 }, { "epoch": 0.0680076222739784, "grad_norm": 1.4393749685320212, "learning_rate": 9.962287406393883e-06, "loss": 0.6373, "step": 803 }, { "epoch": 0.06809231420707178, "grad_norm": 1.8121995671451017, "learning_rate": 9.962119071846674e-06, "loss": 0.6725, "step": 804 }, { "epoch": 0.06817700614016516, "grad_norm": 1.2690328264195803, "learning_rate": 9.96195036387373e-06, "loss": 0.7113, "step": 805 }, { "epoch": 0.06826169807325852, "grad_norm": 1.6079951221457636, "learning_rate": 9.961781282487746e-06, "loss": 0.6735, "step": 806 }, { "epoch": 0.0683463900063519, "grad_norm": 1.6964335935828478, "learning_rate": 9.961611827701448e-06, "loss": 0.6933, "step": 807 }, { "epoch": 0.06843108193944526, "grad_norm": 0.7078244358832599, "learning_rate": 9.961441999527583e-06, "loss": 0.9053, "step": 808 }, { "epoch": 0.06851577387253864, "grad_norm": 1.3073910850082817, "learning_rate": 9.96127179797894e-06, "loss": 0.6966, "step": 809 }, { "epoch": 0.06860046580563202, "grad_norm": 2.1125699822262076, "learning_rate": 9.96110122306832e-06, "loss": 0.6491, "step": 810 }, { "epoch": 0.06868515773872538, "grad_norm": 8.658749346913542, "learning_rate": 9.960930274808564e-06, "loss": 0.6161, "step": 811 }, { "epoch": 0.06876984967181876, "grad_norm": 1.2504246425606214, "learning_rate": 9.960758953212535e-06, "loss": 0.6261, "step": 812 }, { "epoch": 0.06885454160491213, "grad_norm": 2.1117318349611467, "learning_rate": 9.960587258293126e-06, "loss": 0.7121, "step": 813 }, { "epoch": 0.0689392335380055, "grad_norm": 1.5406911618349624, "learning_rate": 9.960415190063258e-06, "loss": 0.7163, "step": 814 }, { "epoch": 0.06902392547109888, "grad_norm": 0.6608417604518535, "learning_rate": 9.960242748535882e-06, "loss": 0.8903, "step": 815 }, { "epoch": 0.06910861740419225, "grad_norm": 1.4781702969873032, "learning_rate": 9.96006993372397e-06, "loss": 0.6953, "step": 816 }, { "epoch": 0.06919330933728562, "grad_norm": 1.3336778542442131, "learning_rate": 9.959896745640535e-06, "loss": 0.639, "step": 817 }, { "epoch": 0.069278001270379, "grad_norm": 1.406744830985588, "learning_rate": 9.959723184298604e-06, "loss": 0.6825, "step": 818 }, { "epoch": 0.06936269320347237, "grad_norm": 1.2872987340380302, "learning_rate": 9.95954924971124e-06, "loss": 0.6788, "step": 819 }, { "epoch": 0.06944738513656575, "grad_norm": 1.7908811127038573, "learning_rate": 9.959374941891534e-06, "loss": 0.7565, "step": 820 }, { "epoch": 0.06953207706965911, "grad_norm": 1.6300687494312995, "learning_rate": 9.959200260852601e-06, "loss": 0.6898, "step": 821 }, { "epoch": 0.06961676900275249, "grad_norm": 1.5323172982168822, "learning_rate": 9.95902520660759e-06, "loss": 0.7428, "step": 822 }, { "epoch": 0.06970146093584587, "grad_norm": 1.4006545456716313, "learning_rate": 9.95884977916967e-06, "loss": 0.6982, "step": 823 }, { "epoch": 0.06978615286893923, "grad_norm": 1.5803864188287453, "learning_rate": 9.958673978552049e-06, "loss": 0.6968, "step": 824 }, { "epoch": 0.06987084480203261, "grad_norm": 1.3325983703391282, "learning_rate": 9.958497804767953e-06, "loss": 0.7052, "step": 825 }, { "epoch": 0.06995553673512597, "grad_norm": 1.5420512416999756, "learning_rate": 9.958321257830639e-06, "loss": 0.704, "step": 826 }, { "epoch": 0.07004022866821935, "grad_norm": 0.623718013886099, "learning_rate": 9.958144337753396e-06, "loss": 0.8455, "step": 827 }, { "epoch": 0.07012492060131273, "grad_norm": 2.438093686523751, "learning_rate": 9.957967044549537e-06, "loss": 0.7132, "step": 828 }, { "epoch": 0.0702096125344061, "grad_norm": 1.7459546169483953, "learning_rate": 9.957789378232403e-06, "loss": 0.6936, "step": 829 }, { "epoch": 0.07029430446749947, "grad_norm": 1.2853771646304093, "learning_rate": 9.957611338815367e-06, "loss": 0.6163, "step": 830 }, { "epoch": 0.07037899640059285, "grad_norm": 2.09061516060591, "learning_rate": 9.957432926311826e-06, "loss": 0.6617, "step": 831 }, { "epoch": 0.07046368833368621, "grad_norm": 1.5267982704839507, "learning_rate": 9.957254140735206e-06, "loss": 0.6614, "step": 832 }, { "epoch": 0.07054838026677959, "grad_norm": 1.1972045160269733, "learning_rate": 9.957074982098961e-06, "loss": 0.6711, "step": 833 }, { "epoch": 0.07063307219987296, "grad_norm": 1.7431459177332402, "learning_rate": 9.956895450416576e-06, "loss": 0.6774, "step": 834 }, { "epoch": 0.07071776413296633, "grad_norm": 1.1466341219031702, "learning_rate": 9.95671554570156e-06, "loss": 0.6288, "step": 835 }, { "epoch": 0.07080245606605971, "grad_norm": 1.1234693022355362, "learning_rate": 9.956535267967453e-06, "loss": 0.6975, "step": 836 }, { "epoch": 0.07088714799915308, "grad_norm": 1.320722489627907, "learning_rate": 9.956354617227819e-06, "loss": 0.6479, "step": 837 }, { "epoch": 0.07097183993224646, "grad_norm": 1.7930557410270038, "learning_rate": 9.956173593496256e-06, "loss": 0.6214, "step": 838 }, { "epoch": 0.07105653186533982, "grad_norm": 1.7415427690063725, "learning_rate": 9.955992196786386e-06, "loss": 0.7078, "step": 839 }, { "epoch": 0.0711412237984332, "grad_norm": 1.9671607497965107, "learning_rate": 9.95581042711186e-06, "loss": 0.7377, "step": 840 }, { "epoch": 0.07122591573152658, "grad_norm": 1.4585125281066835, "learning_rate": 9.955628284486358e-06, "loss": 0.6776, "step": 841 }, { "epoch": 0.07131060766461994, "grad_norm": 0.6474170802340434, "learning_rate": 9.955445768923583e-06, "loss": 0.898, "step": 842 }, { "epoch": 0.07139529959771332, "grad_norm": 1.3035520965194445, "learning_rate": 9.955262880437278e-06, "loss": 0.6291, "step": 843 }, { "epoch": 0.0714799915308067, "grad_norm": 1.582194225309556, "learning_rate": 9.955079619041198e-06, "loss": 0.698, "step": 844 }, { "epoch": 0.07156468346390006, "grad_norm": 1.5273540311187441, "learning_rate": 9.95489598474914e-06, "loss": 0.7259, "step": 845 }, { "epoch": 0.07164937539699344, "grad_norm": 3.7129524151842666, "learning_rate": 9.954711977574922e-06, "loss": 0.6719, "step": 846 }, { "epoch": 0.0717340673300868, "grad_norm": 1.2405529739033727, "learning_rate": 9.954527597532391e-06, "loss": 0.6833, "step": 847 }, { "epoch": 0.07181875926318018, "grad_norm": 1.4870041422900424, "learning_rate": 9.954342844635423e-06, "loss": 0.6734, "step": 848 }, { "epoch": 0.07190345119627356, "grad_norm": 1.2452856477091174, "learning_rate": 9.95415771889792e-06, "loss": 0.6532, "step": 849 }, { "epoch": 0.07198814312936692, "grad_norm": 1.334977661370686, "learning_rate": 9.953972220333819e-06, "loss": 0.7015, "step": 850 }, { "epoch": 0.0720728350624603, "grad_norm": 0.7155781830764602, "learning_rate": 9.953786348957072e-06, "loss": 0.9058, "step": 851 }, { "epoch": 0.07215752699555367, "grad_norm": 0.6892051310284573, "learning_rate": 9.953600104781673e-06, "loss": 0.8253, "step": 852 }, { "epoch": 0.07224221892864705, "grad_norm": 1.3202754784717845, "learning_rate": 9.953413487821632e-06, "loss": 0.7149, "step": 853 }, { "epoch": 0.07232691086174042, "grad_norm": 1.6272708968409428, "learning_rate": 9.953226498091e-06, "loss": 0.698, "step": 854 }, { "epoch": 0.07241160279483379, "grad_norm": 1.400475414981363, "learning_rate": 9.953039135603845e-06, "loss": 0.633, "step": 855 }, { "epoch": 0.07249629472792717, "grad_norm": 1.227986941433723, "learning_rate": 9.952851400374267e-06, "loss": 0.6488, "step": 856 }, { "epoch": 0.07258098666102054, "grad_norm": 1.1846001901061503, "learning_rate": 9.952663292416393e-06, "loss": 0.6815, "step": 857 }, { "epoch": 0.07266567859411391, "grad_norm": 1.2641922010068354, "learning_rate": 9.952474811744383e-06, "loss": 0.6555, "step": 858 }, { "epoch": 0.07275037052720729, "grad_norm": 1.5130597505649477, "learning_rate": 9.952285958372418e-06, "loss": 0.6293, "step": 859 }, { "epoch": 0.07283506246030065, "grad_norm": 0.612151704261584, "learning_rate": 9.952096732314711e-06, "loss": 0.8294, "step": 860 }, { "epoch": 0.07291975439339403, "grad_norm": 1.384548332144581, "learning_rate": 9.951907133585503e-06, "loss": 0.6899, "step": 861 }, { "epoch": 0.07300444632648741, "grad_norm": 1.166596526531811, "learning_rate": 9.951717162199059e-06, "loss": 0.642, "step": 862 }, { "epoch": 0.07308913825958077, "grad_norm": 1.211575317215623, "learning_rate": 9.951526818169682e-06, "loss": 0.6878, "step": 863 }, { "epoch": 0.07317383019267415, "grad_norm": 0.6002735493982556, "learning_rate": 9.951336101511689e-06, "loss": 0.8281, "step": 864 }, { "epoch": 0.07325852212576751, "grad_norm": 1.3333253842380264, "learning_rate": 9.951145012239436e-06, "loss": 0.714, "step": 865 }, { "epoch": 0.07334321405886089, "grad_norm": 1.1977912758414946, "learning_rate": 9.950953550367304e-06, "loss": 0.7019, "step": 866 }, { "epoch": 0.07342790599195427, "grad_norm": 1.2985168697156586, "learning_rate": 9.950761715909702e-06, "loss": 0.6654, "step": 867 }, { "epoch": 0.07351259792504763, "grad_norm": 1.6038577427416487, "learning_rate": 9.950569508881065e-06, "loss": 0.6671, "step": 868 }, { "epoch": 0.07359728985814101, "grad_norm": 7.40073486301492, "learning_rate": 9.950376929295857e-06, "loss": 0.7046, "step": 869 }, { "epoch": 0.07368198179123439, "grad_norm": 1.602308595497251, "learning_rate": 9.950183977168572e-06, "loss": 0.7229, "step": 870 }, { "epoch": 0.07376667372432776, "grad_norm": 1.3269024833078547, "learning_rate": 9.94999065251373e-06, "loss": 0.6861, "step": 871 }, { "epoch": 0.07385136565742113, "grad_norm": 1.3824185003188736, "learning_rate": 9.94979695534588e-06, "loss": 0.686, "step": 872 }, { "epoch": 0.0739360575905145, "grad_norm": 1.6780238160481833, "learning_rate": 9.949602885679599e-06, "loss": 0.6866, "step": 873 }, { "epoch": 0.07402074952360788, "grad_norm": 1.6654072199170633, "learning_rate": 9.94940844352949e-06, "loss": 0.6745, "step": 874 }, { "epoch": 0.07410544145670125, "grad_norm": 1.6096810884894033, "learning_rate": 9.94921362891019e-06, "loss": 0.748, "step": 875 }, { "epoch": 0.07419013338979462, "grad_norm": 1.429564776922078, "learning_rate": 9.949018441836356e-06, "loss": 0.7123, "step": 876 }, { "epoch": 0.074274825322888, "grad_norm": 1.5402333189736557, "learning_rate": 9.948822882322676e-06, "loss": 0.5898, "step": 877 }, { "epoch": 0.07435951725598136, "grad_norm": 1.590754201571233, "learning_rate": 9.94862695038387e-06, "loss": 0.6985, "step": 878 }, { "epoch": 0.07444420918907474, "grad_norm": 0.6969800508797576, "learning_rate": 9.948430646034683e-06, "loss": 0.8697, "step": 879 }, { "epoch": 0.07452890112216812, "grad_norm": 1.3790916214932967, "learning_rate": 9.948233969289886e-06, "loss": 0.5908, "step": 880 }, { "epoch": 0.07461359305526148, "grad_norm": 1.430661240502087, "learning_rate": 9.948036920164282e-06, "loss": 0.6858, "step": 881 }, { "epoch": 0.07469828498835486, "grad_norm": 1.4271332218560413, "learning_rate": 9.947839498672696e-06, "loss": 0.6167, "step": 882 }, { "epoch": 0.07478297692144824, "grad_norm": 1.6626291232091392, "learning_rate": 9.94764170482999e-06, "loss": 0.7059, "step": 883 }, { "epoch": 0.0748676688545416, "grad_norm": 1.5011081787687086, "learning_rate": 9.947443538651045e-06, "loss": 0.6807, "step": 884 }, { "epoch": 0.07495236078763498, "grad_norm": 1.4735039686923963, "learning_rate": 9.947245000150775e-06, "loss": 0.6573, "step": 885 }, { "epoch": 0.07503705272072834, "grad_norm": 0.7251670004297435, "learning_rate": 9.947046089344123e-06, "loss": 0.8391, "step": 886 }, { "epoch": 0.07512174465382172, "grad_norm": 6.33264113722487, "learning_rate": 9.946846806246058e-06, "loss": 0.6464, "step": 887 }, { "epoch": 0.0752064365869151, "grad_norm": 1.4201638100516214, "learning_rate": 9.946647150871575e-06, "loss": 0.7393, "step": 888 }, { "epoch": 0.07529112852000847, "grad_norm": 1.4120595661091777, "learning_rate": 9.946447123235701e-06, "loss": 0.6722, "step": 889 }, { "epoch": 0.07537582045310184, "grad_norm": 2.8423571245786015, "learning_rate": 9.946246723353486e-06, "loss": 0.6463, "step": 890 }, { "epoch": 0.07546051238619521, "grad_norm": 1.5949412390215296, "learning_rate": 9.946045951240015e-06, "loss": 0.6405, "step": 891 }, { "epoch": 0.07554520431928859, "grad_norm": 2.364180874509446, "learning_rate": 9.945844806910397e-06, "loss": 0.6476, "step": 892 }, { "epoch": 0.07562989625238196, "grad_norm": 1.2128124516850285, "learning_rate": 9.945643290379765e-06, "loss": 0.6655, "step": 893 }, { "epoch": 0.07571458818547533, "grad_norm": 1.7790928972645565, "learning_rate": 9.94544140166329e-06, "loss": 0.709, "step": 894 }, { "epoch": 0.07579928011856871, "grad_norm": 0.6596553139581031, "learning_rate": 9.94523914077616e-06, "loss": 0.8499, "step": 895 }, { "epoch": 0.07588397205166209, "grad_norm": 1.379853982353845, "learning_rate": 9.9450365077336e-06, "loss": 0.6842, "step": 896 }, { "epoch": 0.07596866398475545, "grad_norm": 1.2804101665615844, "learning_rate": 9.944833502550855e-06, "loss": 0.6177, "step": 897 }, { "epoch": 0.07605335591784883, "grad_norm": 1.6059400760584008, "learning_rate": 9.944630125243206e-06, "loss": 0.6486, "step": 898 }, { "epoch": 0.07613804785094219, "grad_norm": 1.5307509699509492, "learning_rate": 9.944426375825958e-06, "loss": 0.6386, "step": 899 }, { "epoch": 0.07622273978403557, "grad_norm": 1.2908092977710073, "learning_rate": 9.944222254314443e-06, "loss": 0.677, "step": 900 }, { "epoch": 0.07630743171712895, "grad_norm": 1.3160113464857481, "learning_rate": 9.944017760724022e-06, "loss": 0.7063, "step": 901 }, { "epoch": 0.07639212365022231, "grad_norm": 1.5794769899958936, "learning_rate": 9.943812895070084e-06, "loss": 0.6834, "step": 902 }, { "epoch": 0.07647681558331569, "grad_norm": 0.6622496676464944, "learning_rate": 9.943607657368049e-06, "loss": 0.8143, "step": 903 }, { "epoch": 0.07656150751640906, "grad_norm": 1.205152313889236, "learning_rate": 9.943402047633358e-06, "loss": 0.6418, "step": 904 }, { "epoch": 0.07664619944950243, "grad_norm": 1.6247975881091625, "learning_rate": 9.943196065881486e-06, "loss": 0.6869, "step": 905 }, { "epoch": 0.07673089138259581, "grad_norm": 1.3099307632199664, "learning_rate": 9.942989712127937e-06, "loss": 0.6206, "step": 906 }, { "epoch": 0.07681558331568918, "grad_norm": 1.4090659841820963, "learning_rate": 9.942782986388236e-06, "loss": 0.6242, "step": 907 }, { "epoch": 0.07690027524878255, "grad_norm": 1.5924527469865923, "learning_rate": 9.942575888677944e-06, "loss": 0.6764, "step": 908 }, { "epoch": 0.07698496718187593, "grad_norm": 1.4936821254623585, "learning_rate": 9.942368419012643e-06, "loss": 0.6677, "step": 909 }, { "epoch": 0.0770696591149693, "grad_norm": 1.630589961698214, "learning_rate": 9.942160577407947e-06, "loss": 0.7062, "step": 910 }, { "epoch": 0.07715435104806267, "grad_norm": 1.3482154886097, "learning_rate": 9.941952363879497e-06, "loss": 0.7169, "step": 911 }, { "epoch": 0.07723904298115604, "grad_norm": 1.3696657949722602, "learning_rate": 9.941743778442963e-06, "loss": 0.6302, "step": 912 }, { "epoch": 0.07732373491424942, "grad_norm": 0.6757911714411411, "learning_rate": 9.941534821114043e-06, "loss": 0.8248, "step": 913 }, { "epoch": 0.0774084268473428, "grad_norm": 1.3607163191999583, "learning_rate": 9.941325491908461e-06, "loss": 0.6981, "step": 914 }, { "epoch": 0.07749311878043616, "grad_norm": 1.3918053973076636, "learning_rate": 9.941115790841969e-06, "loss": 0.6864, "step": 915 }, { "epoch": 0.07757781071352954, "grad_norm": 1.5718614131865563, "learning_rate": 9.940905717930349e-06, "loss": 0.6447, "step": 916 }, { "epoch": 0.0776625026466229, "grad_norm": 1.2844015267759417, "learning_rate": 9.94069527318941e-06, "loss": 0.6744, "step": 917 }, { "epoch": 0.07774719457971628, "grad_norm": 1.3847989184701914, "learning_rate": 9.940484456634991e-06, "loss": 0.7116, "step": 918 }, { "epoch": 0.07783188651280966, "grad_norm": 1.2936623389392201, "learning_rate": 9.940273268282956e-06, "loss": 0.6143, "step": 919 }, { "epoch": 0.07791657844590302, "grad_norm": 1.3641702124381634, "learning_rate": 9.940061708149197e-06, "loss": 0.6121, "step": 920 }, { "epoch": 0.0780012703789964, "grad_norm": 1.3041650642555418, "learning_rate": 9.939849776249634e-06, "loss": 0.7017, "step": 921 }, { "epoch": 0.07808596231208978, "grad_norm": 1.919542901977665, "learning_rate": 9.93963747260022e-06, "loss": 0.6807, "step": 922 }, { "epoch": 0.07817065424518314, "grad_norm": 1.2516605191988455, "learning_rate": 9.93942479721693e-06, "loss": 0.6728, "step": 923 }, { "epoch": 0.07825534617827652, "grad_norm": 2.1953668224423577, "learning_rate": 9.939211750115766e-06, "loss": 0.7087, "step": 924 }, { "epoch": 0.07834003811136989, "grad_norm": 2.514004959480685, "learning_rate": 9.938998331312765e-06, "loss": 0.7466, "step": 925 }, { "epoch": 0.07842473004446326, "grad_norm": 1.2755373263276286, "learning_rate": 9.938784540823984e-06, "loss": 0.6543, "step": 926 }, { "epoch": 0.07850942197755664, "grad_norm": 1.561536826229125, "learning_rate": 9.938570378665518e-06, "loss": 0.6239, "step": 927 }, { "epoch": 0.07859411391065001, "grad_norm": 1.5764265517192426, "learning_rate": 9.938355844853477e-06, "loss": 0.7102, "step": 928 }, { "epoch": 0.07867880584374339, "grad_norm": 2.121688508750434, "learning_rate": 9.938140939404011e-06, "loss": 0.6598, "step": 929 }, { "epoch": 0.07876349777683675, "grad_norm": 1.2335573965136346, "learning_rate": 9.937925662333288e-06, "loss": 0.6248, "step": 930 }, { "epoch": 0.07884818970993013, "grad_norm": 1.9097737489312174, "learning_rate": 9.937710013657514e-06, "loss": 0.685, "step": 931 }, { "epoch": 0.0789328816430235, "grad_norm": 1.6842904684569224, "learning_rate": 9.937493993392914e-06, "loss": 0.6745, "step": 932 }, { "epoch": 0.07901757357611687, "grad_norm": 1.7276215177291998, "learning_rate": 9.937277601555746e-06, "loss": 0.7266, "step": 933 }, { "epoch": 0.07910226550921025, "grad_norm": 0.5725979378260482, "learning_rate": 9.937060838162294e-06, "loss": 0.8123, "step": 934 }, { "epoch": 0.07918695744230363, "grad_norm": 2.1349771107678612, "learning_rate": 9.936843703228871e-06, "loss": 0.6397, "step": 935 }, { "epoch": 0.07927164937539699, "grad_norm": 1.3549830136457808, "learning_rate": 9.936626196771817e-06, "loss": 0.7187, "step": 936 }, { "epoch": 0.07935634130849037, "grad_norm": 1.743159958782161, "learning_rate": 9.936408318807503e-06, "loss": 0.6327, "step": 937 }, { "epoch": 0.07944103324158373, "grad_norm": 1.3863735765923906, "learning_rate": 9.936190069352321e-06, "loss": 0.6935, "step": 938 }, { "epoch": 0.07952572517467711, "grad_norm": 5.746199174100016, "learning_rate": 9.935971448422698e-06, "loss": 0.6338, "step": 939 }, { "epoch": 0.07961041710777049, "grad_norm": 2.4420864058242713, "learning_rate": 9.935752456035088e-06, "loss": 0.7426, "step": 940 }, { "epoch": 0.07969510904086385, "grad_norm": 1.6929575041563474, "learning_rate": 9.935533092205969e-06, "loss": 0.7042, "step": 941 }, { "epoch": 0.07977980097395723, "grad_norm": 1.719393936778142, "learning_rate": 9.93531335695185e-06, "loss": 0.748, "step": 942 }, { "epoch": 0.0798644929070506, "grad_norm": 1.5873168041781365, "learning_rate": 9.935093250289266e-06, "loss": 0.7074, "step": 943 }, { "epoch": 0.07994918484014397, "grad_norm": 1.4858464015000514, "learning_rate": 9.934872772234783e-06, "loss": 0.7088, "step": 944 }, { "epoch": 0.08003387677323735, "grad_norm": 0.7936514679836857, "learning_rate": 9.934651922804994e-06, "loss": 0.8592, "step": 945 }, { "epoch": 0.08011856870633072, "grad_norm": 1.5035807848688556, "learning_rate": 9.934430702016515e-06, "loss": 0.7068, "step": 946 }, { "epoch": 0.0802032606394241, "grad_norm": 1.7935573428508824, "learning_rate": 9.934209109886e-06, "loss": 0.7002, "step": 947 }, { "epoch": 0.08028795257251747, "grad_norm": 0.660524311267529, "learning_rate": 9.933987146430117e-06, "loss": 0.8596, "step": 948 }, { "epoch": 0.08037264450561084, "grad_norm": 1.2918317238364236, "learning_rate": 9.933764811665579e-06, "loss": 0.6926, "step": 949 }, { "epoch": 0.08045733643870422, "grad_norm": 1.8389071102846042, "learning_rate": 9.93354210560911e-06, "loss": 0.6918, "step": 950 }, { "epoch": 0.08054202837179758, "grad_norm": 1.5672121440062516, "learning_rate": 9.933319028277476e-06, "loss": 0.6812, "step": 951 }, { "epoch": 0.08062672030489096, "grad_norm": 1.8172167848739034, "learning_rate": 9.93309557968746e-06, "loss": 0.6961, "step": 952 }, { "epoch": 0.08071141223798434, "grad_norm": 1.5451412155992796, "learning_rate": 9.93287175985588e-06, "loss": 0.6636, "step": 953 }, { "epoch": 0.0807961041710777, "grad_norm": 1.2685056783433444, "learning_rate": 9.93264756879958e-06, "loss": 0.7118, "step": 954 }, { "epoch": 0.08088079610417108, "grad_norm": 2.0466597336364556, "learning_rate": 9.932423006535431e-06, "loss": 0.6167, "step": 955 }, { "epoch": 0.08096548803726444, "grad_norm": 1.2124783063498208, "learning_rate": 9.932198073080331e-06, "loss": 0.7017, "step": 956 }, { "epoch": 0.08105017997035782, "grad_norm": 1.7606938835994175, "learning_rate": 9.93197276845121e-06, "loss": 0.6333, "step": 957 }, { "epoch": 0.0811348719034512, "grad_norm": 1.4041991492352885, "learning_rate": 9.931747092665022e-06, "loss": 0.5863, "step": 958 }, { "epoch": 0.08121956383654456, "grad_norm": 1.3279862208318538, "learning_rate": 9.93152104573875e-06, "loss": 0.6828, "step": 959 }, { "epoch": 0.08130425576963794, "grad_norm": 1.476696405351909, "learning_rate": 9.931294627689405e-06, "loss": 0.6685, "step": 960 }, { "epoch": 0.08138894770273132, "grad_norm": 1.9820818717765905, "learning_rate": 9.931067838534029e-06, "loss": 0.6698, "step": 961 }, { "epoch": 0.08147363963582469, "grad_norm": 1.9278536174091359, "learning_rate": 9.930840678289686e-06, "loss": 0.7026, "step": 962 }, { "epoch": 0.08155833156891806, "grad_norm": 2.421874012647656, "learning_rate": 9.93061314697347e-06, "loss": 0.7196, "step": 963 }, { "epoch": 0.08164302350201143, "grad_norm": 1.2335641731251787, "learning_rate": 9.930385244602506e-06, "loss": 0.6931, "step": 964 }, { "epoch": 0.0817277154351048, "grad_norm": 1.2593947479235648, "learning_rate": 9.930156971193947e-06, "loss": 0.6601, "step": 965 }, { "epoch": 0.08181240736819818, "grad_norm": 1.5460087345443447, "learning_rate": 9.92992832676497e-06, "loss": 0.6064, "step": 966 }, { "epoch": 0.08189709930129155, "grad_norm": 1.3021625536857377, "learning_rate": 9.929699311332779e-06, "loss": 0.7255, "step": 967 }, { "epoch": 0.08198179123438493, "grad_norm": 1.500744518136564, "learning_rate": 9.929469924914612e-06, "loss": 0.6442, "step": 968 }, { "epoch": 0.0820664831674783, "grad_norm": 4.688546436904151, "learning_rate": 9.929240167527729e-06, "loss": 0.6678, "step": 969 }, { "epoch": 0.08215117510057167, "grad_norm": 1.4476023655309918, "learning_rate": 9.929010039189424e-06, "loss": 0.7068, "step": 970 }, { "epoch": 0.08223586703366505, "grad_norm": 1.3677860110445763, "learning_rate": 9.928779539917012e-06, "loss": 0.6578, "step": 971 }, { "epoch": 0.08232055896675841, "grad_norm": 1.3276304467547808, "learning_rate": 9.92854866972784e-06, "loss": 0.6896, "step": 972 }, { "epoch": 0.08240525089985179, "grad_norm": 1.4403930863583512, "learning_rate": 9.928317428639282e-06, "loss": 0.6948, "step": 973 }, { "epoch": 0.08248994283294517, "grad_norm": 1.3332215995261505, "learning_rate": 9.928085816668744e-06, "loss": 0.6587, "step": 974 }, { "epoch": 0.08257463476603853, "grad_norm": 0.6317871298669466, "learning_rate": 9.92785383383365e-06, "loss": 0.9141, "step": 975 }, { "epoch": 0.08265932669913191, "grad_norm": 1.427614190940092, "learning_rate": 9.927621480151462e-06, "loss": 0.6284, "step": 976 }, { "epoch": 0.08274401863222527, "grad_norm": 1.9936650672354068, "learning_rate": 9.927388755639664e-06, "loss": 0.6938, "step": 977 }, { "epoch": 0.08282871056531865, "grad_norm": 1.66650887992972, "learning_rate": 9.92715566031577e-06, "loss": 0.7437, "step": 978 }, { "epoch": 0.08291340249841203, "grad_norm": 0.5636580981344762, "learning_rate": 9.926922194197324e-06, "loss": 0.8601, "step": 979 }, { "epoch": 0.0829980944315054, "grad_norm": 2.1371129587447832, "learning_rate": 9.926688357301892e-06, "loss": 0.6726, "step": 980 }, { "epoch": 0.08308278636459877, "grad_norm": 1.467212407453633, "learning_rate": 9.926454149647074e-06, "loss": 0.7007, "step": 981 }, { "epoch": 0.08316747829769215, "grad_norm": 0.5796197289342045, "learning_rate": 9.926219571250492e-06, "loss": 0.7974, "step": 982 }, { "epoch": 0.08325217023078552, "grad_norm": 1.284481608934329, "learning_rate": 9.925984622129803e-06, "loss": 0.7014, "step": 983 }, { "epoch": 0.0833368621638789, "grad_norm": 1.5818556764825318, "learning_rate": 9.925749302302689e-06, "loss": 0.7297, "step": 984 }, { "epoch": 0.08342155409697226, "grad_norm": 1.7462090545159554, "learning_rate": 9.925513611786855e-06, "loss": 0.6827, "step": 985 }, { "epoch": 0.08350624603006564, "grad_norm": 1.3974669990105695, "learning_rate": 9.92527755060004e-06, "loss": 0.6695, "step": 986 }, { "epoch": 0.08359093796315901, "grad_norm": 1.202920555613023, "learning_rate": 9.925041118760009e-06, "loss": 0.6733, "step": 987 }, { "epoch": 0.08367562989625238, "grad_norm": 1.6397420414289172, "learning_rate": 9.924804316284553e-06, "loss": 0.6657, "step": 988 }, { "epoch": 0.08376032182934576, "grad_norm": 1.8875299917707706, "learning_rate": 9.924567143191497e-06, "loss": 0.6634, "step": 989 }, { "epoch": 0.08384501376243912, "grad_norm": 1.6490940736747526, "learning_rate": 9.924329599498685e-06, "loss": 0.685, "step": 990 }, { "epoch": 0.0839297056955325, "grad_norm": 1.1055285924769647, "learning_rate": 9.924091685223995e-06, "loss": 0.6669, "step": 991 }, { "epoch": 0.08401439762862588, "grad_norm": 1.1643377533839323, "learning_rate": 9.92385340038533e-06, "loss": 0.6141, "step": 992 }, { "epoch": 0.08409908956171924, "grad_norm": 1.390483829671482, "learning_rate": 9.923614745000627e-06, "loss": 0.675, "step": 993 }, { "epoch": 0.08418378149481262, "grad_norm": 2.460737325808213, "learning_rate": 9.92337571908784e-06, "loss": 0.7107, "step": 994 }, { "epoch": 0.084268473427906, "grad_norm": 1.4998527516243172, "learning_rate": 9.923136322664962e-06, "loss": 0.6855, "step": 995 }, { "epoch": 0.08435316536099936, "grad_norm": 1.342498803598866, "learning_rate": 9.922896555750004e-06, "loss": 0.7242, "step": 996 }, { "epoch": 0.08443785729409274, "grad_norm": 1.3474146007906889, "learning_rate": 9.922656418361011e-06, "loss": 0.6669, "step": 997 }, { "epoch": 0.0845225492271861, "grad_norm": 1.3514559231237742, "learning_rate": 9.922415910516059e-06, "loss": 0.7033, "step": 998 }, { "epoch": 0.08460724116027948, "grad_norm": 1.7846072224985956, "learning_rate": 9.922175032233244e-06, "loss": 0.7261, "step": 999 }, { "epoch": 0.08469193309337286, "grad_norm": 1.524732266372679, "learning_rate": 9.921933783530693e-06, "loss": 0.6467, "step": 1000 }, { "epoch": 0.08477662502646623, "grad_norm": 1.3675560122292396, "learning_rate": 9.921692164426563e-06, "loss": 0.6744, "step": 1001 }, { "epoch": 0.0848613169595596, "grad_norm": 1.2776709390422334, "learning_rate": 9.921450174939034e-06, "loss": 0.7026, "step": 1002 }, { "epoch": 0.08494600889265297, "grad_norm": 4.28975748348844, "learning_rate": 9.92120781508632e-06, "loss": 0.6337, "step": 1003 }, { "epoch": 0.08503070082574635, "grad_norm": 1.6847689167764412, "learning_rate": 9.920965084886658e-06, "loss": 0.6573, "step": 1004 }, { "epoch": 0.08511539275883973, "grad_norm": 0.622098496647665, "learning_rate": 9.920721984358317e-06, "loss": 0.8643, "step": 1005 }, { "epoch": 0.08520008469193309, "grad_norm": 1.5001305230993884, "learning_rate": 9.92047851351959e-06, "loss": 0.7494, "step": 1006 }, { "epoch": 0.08528477662502647, "grad_norm": 1.2642784299647971, "learning_rate": 9.920234672388797e-06, "loss": 0.6131, "step": 1007 }, { "epoch": 0.08536946855811985, "grad_norm": 0.5986196268447403, "learning_rate": 9.919990460984294e-06, "loss": 0.8348, "step": 1008 }, { "epoch": 0.08545416049121321, "grad_norm": 1.512781700212726, "learning_rate": 9.919745879324456e-06, "loss": 0.6347, "step": 1009 }, { "epoch": 0.08553885242430659, "grad_norm": 1.2386071401621155, "learning_rate": 9.919500927427689e-06, "loss": 0.6514, "step": 1010 }, { "epoch": 0.08562354435739995, "grad_norm": 1.1767530113175197, "learning_rate": 9.919255605312428e-06, "loss": 0.6073, "step": 1011 }, { "epoch": 0.08570823629049333, "grad_norm": 5.31938396910946, "learning_rate": 9.919009912997133e-06, "loss": 0.7459, "step": 1012 }, { "epoch": 0.08579292822358671, "grad_norm": 1.367855794837674, "learning_rate": 9.918763850500293e-06, "loss": 0.7041, "step": 1013 }, { "epoch": 0.08587762015668007, "grad_norm": 0.6576289061480483, "learning_rate": 9.91851741784043e-06, "loss": 0.8437, "step": 1014 }, { "epoch": 0.08596231208977345, "grad_norm": 1.3713696173901904, "learning_rate": 9.918270615036086e-06, "loss": 0.5903, "step": 1015 }, { "epoch": 0.08604700402286682, "grad_norm": 1.3872428153632252, "learning_rate": 9.918023442105833e-06, "loss": 0.6306, "step": 1016 }, { "epoch": 0.0861316959559602, "grad_norm": 1.4650901872444937, "learning_rate": 9.917775899068275e-06, "loss": 0.6589, "step": 1017 }, { "epoch": 0.08621638788905357, "grad_norm": 1.334177729772401, "learning_rate": 9.91752798594204e-06, "loss": 0.7246, "step": 1018 }, { "epoch": 0.08630107982214694, "grad_norm": 0.6926892457705754, "learning_rate": 9.917279702745784e-06, "loss": 0.8647, "step": 1019 }, { "epoch": 0.08638577175524031, "grad_norm": 1.3756591526622564, "learning_rate": 9.917031049498193e-06, "loss": 0.6772, "step": 1020 }, { "epoch": 0.08647046368833369, "grad_norm": 1.323807672360935, "learning_rate": 9.916782026217977e-06, "loss": 0.7146, "step": 1021 }, { "epoch": 0.08655515562142706, "grad_norm": 1.4659350339369566, "learning_rate": 9.916532632923879e-06, "loss": 0.6588, "step": 1022 }, { "epoch": 0.08663984755452044, "grad_norm": 1.4950180604146308, "learning_rate": 9.916282869634666e-06, "loss": 0.7249, "step": 1023 }, { "epoch": 0.0867245394876138, "grad_norm": 1.37807149450138, "learning_rate": 9.916032736369135e-06, "loss": 0.6678, "step": 1024 }, { "epoch": 0.08680923142070718, "grad_norm": 1.4956891727782522, "learning_rate": 9.915782233146107e-06, "loss": 0.6397, "step": 1025 }, { "epoch": 0.08689392335380056, "grad_norm": 1.2075173031236475, "learning_rate": 9.915531359984437e-06, "loss": 0.6825, "step": 1026 }, { "epoch": 0.08697861528689392, "grad_norm": 2.429975183446041, "learning_rate": 9.915280116903003e-06, "loss": 0.7178, "step": 1027 }, { "epoch": 0.0870633072199873, "grad_norm": 1.4439216786751203, "learning_rate": 9.915028503920711e-06, "loss": 0.6864, "step": 1028 }, { "epoch": 0.08714799915308066, "grad_norm": 1.492490053096575, "learning_rate": 9.9147765210565e-06, "loss": 0.6761, "step": 1029 }, { "epoch": 0.08723269108617404, "grad_norm": 1.160409928162907, "learning_rate": 9.914524168329332e-06, "loss": 0.6862, "step": 1030 }, { "epoch": 0.08731738301926742, "grad_norm": 0.6208775992305712, "learning_rate": 9.914271445758193e-06, "loss": 0.8854, "step": 1031 }, { "epoch": 0.08740207495236078, "grad_norm": 1.4040111731801908, "learning_rate": 9.914018353362108e-06, "loss": 0.6433, "step": 1032 }, { "epoch": 0.08748676688545416, "grad_norm": 1.3048476498757164, "learning_rate": 9.913764891160121e-06, "loss": 0.672, "step": 1033 }, { "epoch": 0.08757145881854754, "grad_norm": 0.6331274176438986, "learning_rate": 9.913511059171304e-06, "loss": 0.8378, "step": 1034 }, { "epoch": 0.0876561507516409, "grad_norm": 4.865590627592577, "learning_rate": 9.913256857414764e-06, "loss": 0.7201, "step": 1035 }, { "epoch": 0.08774084268473428, "grad_norm": 1.8343053061610828, "learning_rate": 9.913002285909626e-06, "loss": 0.6722, "step": 1036 }, { "epoch": 0.08782553461782765, "grad_norm": 1.5416912605738007, "learning_rate": 9.912747344675053e-06, "loss": 0.7032, "step": 1037 }, { "epoch": 0.08791022655092103, "grad_norm": 0.6107546390902511, "learning_rate": 9.912492033730226e-06, "loss": 0.8467, "step": 1038 }, { "epoch": 0.0879949184840144, "grad_norm": 1.4118884591446386, "learning_rate": 9.912236353094363e-06, "loss": 0.7478, "step": 1039 }, { "epoch": 0.08807961041710777, "grad_norm": 1.599706530322386, "learning_rate": 9.9119803027867e-06, "loss": 0.6297, "step": 1040 }, { "epoch": 0.08816430235020115, "grad_norm": 1.1458302387700143, "learning_rate": 9.911723882826511e-06, "loss": 0.64, "step": 1041 }, { "epoch": 0.08824899428329451, "grad_norm": 1.312191627688094, "learning_rate": 9.91146709323309e-06, "loss": 0.6454, "step": 1042 }, { "epoch": 0.08833368621638789, "grad_norm": 1.3907612828635763, "learning_rate": 9.911209934025764e-06, "loss": 0.6611, "step": 1043 }, { "epoch": 0.08841837814948127, "grad_norm": 1.2997549153811974, "learning_rate": 9.910952405223883e-06, "loss": 0.6716, "step": 1044 }, { "epoch": 0.08850307008257463, "grad_norm": 1.780176927861001, "learning_rate": 9.91069450684683e-06, "loss": 0.6788, "step": 1045 }, { "epoch": 0.08858776201566801, "grad_norm": 1.3835897512356419, "learning_rate": 9.910436238914012e-06, "loss": 0.6764, "step": 1046 }, { "epoch": 0.08867245394876139, "grad_norm": 1.1904637854769506, "learning_rate": 9.910177601444864e-06, "loss": 0.6502, "step": 1047 }, { "epoch": 0.08875714588185475, "grad_norm": 1.2545209663293282, "learning_rate": 9.909918594458851e-06, "loss": 0.6533, "step": 1048 }, { "epoch": 0.08884183781494813, "grad_norm": 1.5104174976866418, "learning_rate": 9.909659217975464e-06, "loss": 0.6381, "step": 1049 }, { "epoch": 0.0889265297480415, "grad_norm": 1.4099581847258604, "learning_rate": 9.909399472014225e-06, "loss": 0.5992, "step": 1050 }, { "epoch": 0.08901122168113487, "grad_norm": 1.5358709792840355, "learning_rate": 9.909139356594679e-06, "loss": 0.6407, "step": 1051 }, { "epoch": 0.08909591361422825, "grad_norm": 1.6468542416584802, "learning_rate": 9.9088788717364e-06, "loss": 0.6596, "step": 1052 }, { "epoch": 0.08918060554732161, "grad_norm": 1.2971739668879945, "learning_rate": 9.908618017458992e-06, "loss": 0.711, "step": 1053 }, { "epoch": 0.08926529748041499, "grad_norm": 1.3398351470063554, "learning_rate": 9.908356793782086e-06, "loss": 0.6694, "step": 1054 }, { "epoch": 0.08934998941350836, "grad_norm": 1.6654773034049386, "learning_rate": 9.90809520072534e-06, "loss": 0.6253, "step": 1055 }, { "epoch": 0.08943468134660174, "grad_norm": 2.01958674504562, "learning_rate": 9.907833238308443e-06, "loss": 0.7411, "step": 1056 }, { "epoch": 0.08951937327969511, "grad_norm": 1.2829578493455356, "learning_rate": 9.907570906551104e-06, "loss": 0.6709, "step": 1057 }, { "epoch": 0.08960406521278848, "grad_norm": 1.7243799182116633, "learning_rate": 9.907308205473067e-06, "loss": 0.6902, "step": 1058 }, { "epoch": 0.08968875714588186, "grad_norm": 4.001945884501772, "learning_rate": 9.907045135094105e-06, "loss": 0.7267, "step": 1059 }, { "epoch": 0.08977344907897523, "grad_norm": 1.1118278133477704, "learning_rate": 9.90678169543401e-06, "loss": 0.672, "step": 1060 }, { "epoch": 0.0898581410120686, "grad_norm": 1.2420116408813109, "learning_rate": 9.90651788651261e-06, "loss": 0.6847, "step": 1061 }, { "epoch": 0.08994283294516198, "grad_norm": 1.6044643029360497, "learning_rate": 9.906253708349759e-06, "loss": 0.6932, "step": 1062 }, { "epoch": 0.09002752487825534, "grad_norm": 2.3891047089857365, "learning_rate": 9.905989160965337e-06, "loss": 0.7375, "step": 1063 }, { "epoch": 0.09011221681134872, "grad_norm": 1.7017963962946328, "learning_rate": 9.90572424437925e-06, "loss": 0.6645, "step": 1064 }, { "epoch": 0.0901969087444421, "grad_norm": 1.5971472666708404, "learning_rate": 9.905458958611437e-06, "loss": 0.6744, "step": 1065 }, { "epoch": 0.09028160067753546, "grad_norm": 1.717942904036938, "learning_rate": 9.905193303681864e-06, "loss": 0.6299, "step": 1066 }, { "epoch": 0.09036629261062884, "grad_norm": 1.2245365576811127, "learning_rate": 9.904927279610519e-06, "loss": 0.6867, "step": 1067 }, { "epoch": 0.0904509845437222, "grad_norm": 1.3261059883399569, "learning_rate": 9.904660886417424e-06, "loss": 0.697, "step": 1068 }, { "epoch": 0.09053567647681558, "grad_norm": 0.6509628676594689, "learning_rate": 9.904394124122626e-06, "loss": 0.8854, "step": 1069 }, { "epoch": 0.09062036840990896, "grad_norm": 1.9398059110241144, "learning_rate": 9.904126992746199e-06, "loss": 0.639, "step": 1070 }, { "epoch": 0.09070506034300233, "grad_norm": 1.5669521875647416, "learning_rate": 9.903859492308247e-06, "loss": 0.6316, "step": 1071 }, { "epoch": 0.0907897522760957, "grad_norm": 1.486185387280673, "learning_rate": 9.903591622828903e-06, "loss": 0.6714, "step": 1072 }, { "epoch": 0.09087444420918908, "grad_norm": 1.3229672796533336, "learning_rate": 9.903323384328323e-06, "loss": 0.6653, "step": 1073 }, { "epoch": 0.09095913614228245, "grad_norm": 1.4162873194149632, "learning_rate": 9.903054776826694e-06, "loss": 0.6654, "step": 1074 }, { "epoch": 0.09104382807537582, "grad_norm": 5.353028322749269, "learning_rate": 9.902785800344229e-06, "loss": 0.695, "step": 1075 }, { "epoch": 0.09112852000846919, "grad_norm": 1.8880423714589178, "learning_rate": 9.902516454901171e-06, "loss": 0.6948, "step": 1076 }, { "epoch": 0.09121321194156257, "grad_norm": 1.3983289571342532, "learning_rate": 9.90224674051779e-06, "loss": 0.684, "step": 1077 }, { "epoch": 0.09129790387465594, "grad_norm": 0.6826855260060158, "learning_rate": 9.901976657214385e-06, "loss": 0.8564, "step": 1078 }, { "epoch": 0.09138259580774931, "grad_norm": 1.7668095773251593, "learning_rate": 9.901706205011277e-06, "loss": 0.6716, "step": 1079 }, { "epoch": 0.09146728774084269, "grad_norm": 1.370586255662208, "learning_rate": 9.901435383928822e-06, "loss": 0.6632, "step": 1080 }, { "epoch": 0.09155197967393605, "grad_norm": 2.215808867206384, "learning_rate": 9.9011641939874e-06, "loss": 0.694, "step": 1081 }, { "epoch": 0.09163667160702943, "grad_norm": 1.55549539813238, "learning_rate": 9.900892635207419e-06, "loss": 0.6631, "step": 1082 }, { "epoch": 0.09172136354012281, "grad_norm": 1.3942695954613773, "learning_rate": 9.900620707609318e-06, "loss": 0.7358, "step": 1083 }, { "epoch": 0.09180605547321617, "grad_norm": 1.266545317542065, "learning_rate": 9.900348411213558e-06, "loss": 0.6407, "step": 1084 }, { "epoch": 0.09189074740630955, "grad_norm": 1.7379539291446293, "learning_rate": 9.90007574604063e-06, "loss": 0.6717, "step": 1085 }, { "epoch": 0.09197543933940293, "grad_norm": 1.246752539401496, "learning_rate": 9.899802712111055e-06, "loss": 0.6377, "step": 1086 }, { "epoch": 0.09206013127249629, "grad_norm": 1.3956890332960832, "learning_rate": 9.899529309445381e-06, "loss": 0.6495, "step": 1087 }, { "epoch": 0.09214482320558967, "grad_norm": 1.871217731680719, "learning_rate": 9.899255538064184e-06, "loss": 0.6737, "step": 1088 }, { "epoch": 0.09222951513868304, "grad_norm": 1.7196214455570875, "learning_rate": 9.89898139798806e-06, "loss": 0.6853, "step": 1089 }, { "epoch": 0.09231420707177641, "grad_norm": 1.3746767113426874, "learning_rate": 9.89870688923765e-06, "loss": 0.6272, "step": 1090 }, { "epoch": 0.09239889900486979, "grad_norm": 1.8495483355951268, "learning_rate": 9.898432011833603e-06, "loss": 0.7302, "step": 1091 }, { "epoch": 0.09248359093796316, "grad_norm": 1.2670109490626145, "learning_rate": 9.898156765796612e-06, "loss": 0.6839, "step": 1092 }, { "epoch": 0.09256828287105653, "grad_norm": 1.6984204867802841, "learning_rate": 9.897881151147383e-06, "loss": 0.6941, "step": 1093 }, { "epoch": 0.0926529748041499, "grad_norm": 2.8373714922371134, "learning_rate": 9.897605167906665e-06, "loss": 0.6542, "step": 1094 }, { "epoch": 0.09273766673724328, "grad_norm": 1.3181264025037434, "learning_rate": 9.897328816095224e-06, "loss": 0.6917, "step": 1095 }, { "epoch": 0.09282235867033665, "grad_norm": 1.3278843735457966, "learning_rate": 9.897052095733857e-06, "loss": 0.6421, "step": 1096 }, { "epoch": 0.09290705060343002, "grad_norm": 1.3636597007825442, "learning_rate": 9.896775006843387e-06, "loss": 0.7189, "step": 1097 }, { "epoch": 0.0929917425365234, "grad_norm": 1.356884572516147, "learning_rate": 9.89649754944467e-06, "loss": 0.6072, "step": 1098 }, { "epoch": 0.09307643446961678, "grad_norm": 0.7726242328787365, "learning_rate": 9.896219723558582e-06, "loss": 0.8633, "step": 1099 }, { "epoch": 0.09316112640271014, "grad_norm": 1.456880995691964, "learning_rate": 9.895941529206035e-06, "loss": 0.736, "step": 1100 }, { "epoch": 0.09324581833580352, "grad_norm": 1.5659875177504432, "learning_rate": 9.895662966407962e-06, "loss": 0.7078, "step": 1101 }, { "epoch": 0.09333051026889688, "grad_norm": 1.4293231401853803, "learning_rate": 9.895384035185327e-06, "loss": 0.685, "step": 1102 }, { "epoch": 0.09341520220199026, "grad_norm": 1.4188272672568372, "learning_rate": 9.89510473555912e-06, "loss": 0.7, "step": 1103 }, { "epoch": 0.09349989413508364, "grad_norm": 1.9840511466128306, "learning_rate": 9.894825067550363e-06, "loss": 0.6645, "step": 1104 }, { "epoch": 0.093584586068177, "grad_norm": 2.3198142311648335, "learning_rate": 9.894545031180099e-06, "loss": 0.687, "step": 1105 }, { "epoch": 0.09366927800127038, "grad_norm": 3.507906469765746, "learning_rate": 9.894264626469406e-06, "loss": 0.6814, "step": 1106 }, { "epoch": 0.09375396993436375, "grad_norm": 1.2642372082840398, "learning_rate": 9.89398385343938e-06, "loss": 0.643, "step": 1107 }, { "epoch": 0.09383866186745712, "grad_norm": 1.112668445574515, "learning_rate": 9.893702712111155e-06, "loss": 0.7151, "step": 1108 }, { "epoch": 0.0939233538005505, "grad_norm": 1.5506088559404436, "learning_rate": 9.89342120250589e-06, "loss": 0.6672, "step": 1109 }, { "epoch": 0.09400804573364387, "grad_norm": 1.2608936357219498, "learning_rate": 9.893139324644764e-06, "loss": 0.6574, "step": 1110 }, { "epoch": 0.09409273766673724, "grad_norm": 1.532569541265127, "learning_rate": 9.892857078548996e-06, "loss": 0.6758, "step": 1111 }, { "epoch": 0.09417742959983062, "grad_norm": 1.4848801287767348, "learning_rate": 9.892574464239822e-06, "loss": 0.6909, "step": 1112 }, { "epoch": 0.09426212153292399, "grad_norm": 1.296183563720103, "learning_rate": 9.892291481738514e-06, "loss": 0.6655, "step": 1113 }, { "epoch": 0.09434681346601737, "grad_norm": 2.3096445123868827, "learning_rate": 9.892008131066364e-06, "loss": 0.6571, "step": 1114 }, { "epoch": 0.09443150539911073, "grad_norm": 1.273742487602041, "learning_rate": 9.891724412244699e-06, "loss": 0.6459, "step": 1115 }, { "epoch": 0.09451619733220411, "grad_norm": 1.2573114929434812, "learning_rate": 9.89144032529487e-06, "loss": 0.6645, "step": 1116 }, { "epoch": 0.09460088926529749, "grad_norm": 1.6364346476941585, "learning_rate": 9.891155870238253e-06, "loss": 0.7017, "step": 1117 }, { "epoch": 0.09468558119839085, "grad_norm": 1.5456738800353924, "learning_rate": 9.89087104709626e-06, "loss": 0.7276, "step": 1118 }, { "epoch": 0.09477027313148423, "grad_norm": 1.4686548369326122, "learning_rate": 9.89058585589032e-06, "loss": 0.6543, "step": 1119 }, { "epoch": 0.09485496506457759, "grad_norm": 1.218781042066649, "learning_rate": 9.890300296641898e-06, "loss": 0.6612, "step": 1120 }, { "epoch": 0.09493965699767097, "grad_norm": 1.2620686502363845, "learning_rate": 9.890014369372483e-06, "loss": 0.6491, "step": 1121 }, { "epoch": 0.09502434893076435, "grad_norm": 0.6821083506577404, "learning_rate": 9.889728074103593e-06, "loss": 0.8769, "step": 1122 }, { "epoch": 0.09510904086385771, "grad_norm": 1.9529578783127983, "learning_rate": 9.889441410856773e-06, "loss": 0.7083, "step": 1123 }, { "epoch": 0.09519373279695109, "grad_norm": 0.6573005391368377, "learning_rate": 9.889154379653597e-06, "loss": 0.8971, "step": 1124 }, { "epoch": 0.09527842473004447, "grad_norm": 1.3980524573027473, "learning_rate": 9.888866980515663e-06, "loss": 0.7127, "step": 1125 }, { "epoch": 0.09536311666313783, "grad_norm": 1.4773448745740927, "learning_rate": 9.888579213464601e-06, "loss": 0.7086, "step": 1126 }, { "epoch": 0.09544780859623121, "grad_norm": 1.3283384923868924, "learning_rate": 9.888291078522067e-06, "loss": 0.6216, "step": 1127 }, { "epoch": 0.09553250052932458, "grad_norm": 1.3451416384650845, "learning_rate": 9.888002575709746e-06, "loss": 0.71, "step": 1128 }, { "epoch": 0.09561719246241795, "grad_norm": 0.6115093225460213, "learning_rate": 9.887713705049348e-06, "loss": 0.8719, "step": 1129 }, { "epoch": 0.09570188439551133, "grad_norm": 1.4009898973462969, "learning_rate": 9.88742446656261e-06, "loss": 0.685, "step": 1130 }, { "epoch": 0.0957865763286047, "grad_norm": 1.6690695176539525, "learning_rate": 9.887134860271303e-06, "loss": 0.7072, "step": 1131 }, { "epoch": 0.09587126826169808, "grad_norm": 1.1992194061302257, "learning_rate": 9.886844886197218e-06, "loss": 0.6708, "step": 1132 }, { "epoch": 0.09595596019479144, "grad_norm": 1.5921953238637947, "learning_rate": 9.886554544362178e-06, "loss": 0.6773, "step": 1133 }, { "epoch": 0.09604065212788482, "grad_norm": 1.267270798954175, "learning_rate": 9.886263834788035e-06, "loss": 0.6372, "step": 1134 }, { "epoch": 0.0961253440609782, "grad_norm": 1.6214289223663894, "learning_rate": 9.885972757496662e-06, "loss": 0.6261, "step": 1135 }, { "epoch": 0.09621003599407156, "grad_norm": 6.745738974110231, "learning_rate": 9.885681312509967e-06, "loss": 0.7154, "step": 1136 }, { "epoch": 0.09629472792716494, "grad_norm": 1.2846204560541372, "learning_rate": 9.885389499849882e-06, "loss": 0.6545, "step": 1137 }, { "epoch": 0.09637941986025832, "grad_norm": 1.6789852966409073, "learning_rate": 9.88509731953837e-06, "loss": 0.6404, "step": 1138 }, { "epoch": 0.09646411179335168, "grad_norm": 3.3855650146831238, "learning_rate": 9.884804771597414e-06, "loss": 0.6867, "step": 1139 }, { "epoch": 0.09654880372644506, "grad_norm": 1.457988858823365, "learning_rate": 9.884511856049035e-06, "loss": 0.6453, "step": 1140 }, { "epoch": 0.09663349565953842, "grad_norm": 1.4274758261256346, "learning_rate": 9.884218572915273e-06, "loss": 0.6497, "step": 1141 }, { "epoch": 0.0967181875926318, "grad_norm": 1.7060458540312455, "learning_rate": 9.8839249222182e-06, "loss": 0.68, "step": 1142 }, { "epoch": 0.09680287952572518, "grad_norm": 1.6695360383650093, "learning_rate": 9.883630903979914e-06, "loss": 0.6573, "step": 1143 }, { "epoch": 0.09688757145881854, "grad_norm": 1.7857105976692569, "learning_rate": 9.883336518222546e-06, "loss": 0.6949, "step": 1144 }, { "epoch": 0.09697226339191192, "grad_norm": 2.000723453232369, "learning_rate": 9.883041764968244e-06, "loss": 0.6759, "step": 1145 }, { "epoch": 0.09705695532500529, "grad_norm": 1.393985486849695, "learning_rate": 9.882746644239192e-06, "loss": 0.6776, "step": 1146 }, { "epoch": 0.09714164725809867, "grad_norm": 1.5077829071905444, "learning_rate": 9.8824511560576e-06, "loss": 0.6498, "step": 1147 }, { "epoch": 0.09722633919119204, "grad_norm": 1.3267819492026294, "learning_rate": 9.882155300445705e-06, "loss": 0.6951, "step": 1148 }, { "epoch": 0.09731103112428541, "grad_norm": 1.861532033364375, "learning_rate": 9.881859077425771e-06, "loss": 0.6982, "step": 1149 }, { "epoch": 0.09739572305737879, "grad_norm": 1.6333357716571804, "learning_rate": 9.88156248702009e-06, "loss": 0.68, "step": 1150 }, { "epoch": 0.09748041499047216, "grad_norm": 1.5233115862028237, "learning_rate": 9.881265529250986e-06, "loss": 0.6727, "step": 1151 }, { "epoch": 0.09756510692356553, "grad_norm": 1.58727947461221, "learning_rate": 9.8809682041408e-06, "loss": 0.6949, "step": 1152 }, { "epoch": 0.0976497988566589, "grad_norm": 1.6348304356527321, "learning_rate": 9.880670511711912e-06, "loss": 0.6745, "step": 1153 }, { "epoch": 0.09773449078975227, "grad_norm": 1.6551216842961909, "learning_rate": 9.880372451986724e-06, "loss": 0.7378, "step": 1154 }, { "epoch": 0.09781918272284565, "grad_norm": 1.8054008680515612, "learning_rate": 9.880074024987666e-06, "loss": 0.6212, "step": 1155 }, { "epoch": 0.09790387465593903, "grad_norm": 1.4555361122439503, "learning_rate": 9.879775230737196e-06, "loss": 0.6781, "step": 1156 }, { "epoch": 0.09798856658903239, "grad_norm": 1.5661176551633307, "learning_rate": 9.8794760692578e-06, "loss": 0.6456, "step": 1157 }, { "epoch": 0.09807325852212577, "grad_norm": 1.3779166880711873, "learning_rate": 9.879176540571993e-06, "loss": 0.6575, "step": 1158 }, { "epoch": 0.09815795045521913, "grad_norm": 1.6418983220421823, "learning_rate": 9.878876644702313e-06, "loss": 0.7083, "step": 1159 }, { "epoch": 0.09824264238831251, "grad_norm": 1.3763500917212639, "learning_rate": 9.878576381671332e-06, "loss": 0.6814, "step": 1160 }, { "epoch": 0.09832733432140589, "grad_norm": 1.8103081229082116, "learning_rate": 9.878275751501644e-06, "loss": 0.7423, "step": 1161 }, { "epoch": 0.09841202625449925, "grad_norm": 2.258761108814839, "learning_rate": 9.877974754215876e-06, "loss": 0.6986, "step": 1162 }, { "epoch": 0.09849671818759263, "grad_norm": 1.5335974293376646, "learning_rate": 9.877673389836675e-06, "loss": 0.661, "step": 1163 }, { "epoch": 0.09858141012068601, "grad_norm": 1.607561340029899, "learning_rate": 9.877371658386725e-06, "loss": 0.7814, "step": 1164 }, { "epoch": 0.09866610205377938, "grad_norm": 1.2473952928310543, "learning_rate": 9.87706955988873e-06, "loss": 0.6761, "step": 1165 }, { "epoch": 0.09875079398687275, "grad_norm": 1.4339676990324197, "learning_rate": 9.876767094365425e-06, "loss": 0.6775, "step": 1166 }, { "epoch": 0.09883548591996612, "grad_norm": 2.64940512739057, "learning_rate": 9.876464261839572e-06, "loss": 0.699, "step": 1167 }, { "epoch": 0.0989201778530595, "grad_norm": 1.4742077522349393, "learning_rate": 9.876161062333961e-06, "loss": 0.6666, "step": 1168 }, { "epoch": 0.09900486978615287, "grad_norm": 1.5119190921261358, "learning_rate": 9.87585749587141e-06, "loss": 0.7178, "step": 1169 }, { "epoch": 0.09908956171924624, "grad_norm": 0.7430335932316164, "learning_rate": 9.875553562474765e-06, "loss": 0.9167, "step": 1170 }, { "epoch": 0.09917425365233962, "grad_norm": 1.7369044920400714, "learning_rate": 9.875249262166898e-06, "loss": 0.6099, "step": 1171 }, { "epoch": 0.09925894558543298, "grad_norm": 0.6291035404856322, "learning_rate": 9.874944594970706e-06, "loss": 0.9057, "step": 1172 }, { "epoch": 0.09934363751852636, "grad_norm": 0.7426926952567218, "learning_rate": 9.874639560909118e-06, "loss": 0.888, "step": 1173 }, { "epoch": 0.09942832945161974, "grad_norm": 1.3273364686050109, "learning_rate": 9.874334160005092e-06, "loss": 0.6418, "step": 1174 }, { "epoch": 0.0995130213847131, "grad_norm": 2.276510945144772, "learning_rate": 9.87402839228161e-06, "loss": 0.6643, "step": 1175 }, { "epoch": 0.09959771331780648, "grad_norm": 1.4893001820739962, "learning_rate": 9.873722257761684e-06, "loss": 0.659, "step": 1176 }, { "epoch": 0.09968240525089986, "grad_norm": 1.5045803122639607, "learning_rate": 9.873415756468348e-06, "loss": 0.7081, "step": 1177 }, { "epoch": 0.09976709718399322, "grad_norm": 1.4141660880801998, "learning_rate": 9.873108888424671e-06, "loss": 0.647, "step": 1178 }, { "epoch": 0.0998517891170866, "grad_norm": 1.3933019661557697, "learning_rate": 9.872801653653746e-06, "loss": 0.6746, "step": 1179 }, { "epoch": 0.09993648105017996, "grad_norm": 1.2922806809953036, "learning_rate": 9.872494052178694e-06, "loss": 0.6982, "step": 1180 }, { "epoch": 0.10002117298327334, "grad_norm": 0.6485156175046494, "learning_rate": 9.872186084022663e-06, "loss": 0.8907, "step": 1181 }, { "epoch": 0.10010586491636672, "grad_norm": 1.1878088449665813, "learning_rate": 9.871877749208829e-06, "loss": 0.6162, "step": 1182 }, { "epoch": 0.10019055684946009, "grad_norm": 1.5448468464202203, "learning_rate": 9.871569047760399e-06, "loss": 0.6944, "step": 1183 }, { "epoch": 0.10027524878255346, "grad_norm": 0.6733787322156175, "learning_rate": 9.8712599797006e-06, "loss": 0.8576, "step": 1184 }, { "epoch": 0.10035994071564683, "grad_norm": 1.440550964491178, "learning_rate": 9.870950545052694e-06, "loss": 0.6709, "step": 1185 }, { "epoch": 0.1004446326487402, "grad_norm": 1.2120609655135757, "learning_rate": 9.870640743839966e-06, "loss": 0.6099, "step": 1186 }, { "epoch": 0.10052932458183358, "grad_norm": 1.3547282452373552, "learning_rate": 9.87033057608573e-06, "loss": 0.6815, "step": 1187 }, { "epoch": 0.10061401651492695, "grad_norm": 1.5571599572455068, "learning_rate": 9.87002004181333e-06, "loss": 0.6879, "step": 1188 }, { "epoch": 0.10069870844802033, "grad_norm": 2.418482832859567, "learning_rate": 9.869709141046133e-06, "loss": 0.6467, "step": 1189 }, { "epoch": 0.1007834003811137, "grad_norm": 0.649830424314901, "learning_rate": 9.869397873807536e-06, "loss": 0.8804, "step": 1190 }, { "epoch": 0.10086809231420707, "grad_norm": 0.5944250727212254, "learning_rate": 9.869086240120966e-06, "loss": 0.8548, "step": 1191 }, { "epoch": 0.10095278424730045, "grad_norm": 1.559495925421599, "learning_rate": 9.868774240009872e-06, "loss": 0.728, "step": 1192 }, { "epoch": 0.10103747618039381, "grad_norm": 1.9127894661566958, "learning_rate": 9.868461873497737e-06, "loss": 0.7429, "step": 1193 }, { "epoch": 0.10112216811348719, "grad_norm": 1.4704255764976004, "learning_rate": 9.868149140608064e-06, "loss": 0.69, "step": 1194 }, { "epoch": 0.10120686004658057, "grad_norm": 1.3812675695311507, "learning_rate": 9.867836041364392e-06, "loss": 0.6155, "step": 1195 }, { "epoch": 0.10129155197967393, "grad_norm": 1.5641036996693958, "learning_rate": 9.86752257579028e-06, "loss": 0.6504, "step": 1196 }, { "epoch": 0.10137624391276731, "grad_norm": 0.6814433652323504, "learning_rate": 9.86720874390932e-06, "loss": 0.8648, "step": 1197 }, { "epoch": 0.10146093584586068, "grad_norm": 1.274381870167139, "learning_rate": 9.86689454574513e-06, "loss": 0.6645, "step": 1198 }, { "epoch": 0.10154562777895405, "grad_norm": 0.7571341865604021, "learning_rate": 9.866579981321351e-06, "loss": 0.879, "step": 1199 }, { "epoch": 0.10163031971204743, "grad_norm": 1.6865809845438786, "learning_rate": 9.86626505066166e-06, "loss": 0.6871, "step": 1200 }, { "epoch": 0.1017150116451408, "grad_norm": 1.3625466415747514, "learning_rate": 9.865949753789759e-06, "loss": 0.6888, "step": 1201 }, { "epoch": 0.10179970357823417, "grad_norm": 1.661065492249295, "learning_rate": 9.865634090729369e-06, "loss": 0.7271, "step": 1202 }, { "epoch": 0.10188439551132755, "grad_norm": 1.4198881506737606, "learning_rate": 9.86531806150425e-06, "loss": 0.6579, "step": 1203 }, { "epoch": 0.10196908744442092, "grad_norm": 1.4916060063039354, "learning_rate": 9.865001666138183e-06, "loss": 0.6823, "step": 1204 }, { "epoch": 0.1020537793775143, "grad_norm": 1.762800813279009, "learning_rate": 9.864684904654981e-06, "loss": 0.6546, "step": 1205 }, { "epoch": 0.10213847131060766, "grad_norm": 1.3674086077904979, "learning_rate": 9.864367777078478e-06, "loss": 0.5982, "step": 1206 }, { "epoch": 0.10222316324370104, "grad_norm": 1.4997685858441372, "learning_rate": 9.864050283432544e-06, "loss": 0.6467, "step": 1207 }, { "epoch": 0.10230785517679442, "grad_norm": 0.6888052782752568, "learning_rate": 9.863732423741069e-06, "loss": 0.843, "step": 1208 }, { "epoch": 0.10239254710988778, "grad_norm": 0.6429289757830462, "learning_rate": 9.863414198027974e-06, "loss": 0.8459, "step": 1209 }, { "epoch": 0.10247723904298116, "grad_norm": 2.89930637758765, "learning_rate": 9.863095606317207e-06, "loss": 0.6819, "step": 1210 }, { "epoch": 0.10256193097607452, "grad_norm": 1.4084002847528216, "learning_rate": 9.862776648632746e-06, "loss": 0.6612, "step": 1211 }, { "epoch": 0.1026466229091679, "grad_norm": 1.4924312049104003, "learning_rate": 9.862457324998591e-06, "loss": 0.6233, "step": 1212 }, { "epoch": 0.10273131484226128, "grad_norm": 1.3386660144922236, "learning_rate": 9.862137635438775e-06, "loss": 0.624, "step": 1213 }, { "epoch": 0.10281600677535464, "grad_norm": 2.3704043035836833, "learning_rate": 9.861817579977355e-06, "loss": 0.6796, "step": 1214 }, { "epoch": 0.10290069870844802, "grad_norm": 1.2906046729571277, "learning_rate": 9.86149715863842e-06, "loss": 0.6321, "step": 1215 }, { "epoch": 0.1029853906415414, "grad_norm": 1.7272655966294668, "learning_rate": 9.861176371446078e-06, "loss": 0.7413, "step": 1216 }, { "epoch": 0.10307008257463476, "grad_norm": 1.2398573878573838, "learning_rate": 9.860855218424475e-06, "loss": 0.5776, "step": 1217 }, { "epoch": 0.10315477450772814, "grad_norm": 1.2146794361930244, "learning_rate": 9.860533699597776e-06, "loss": 0.673, "step": 1218 }, { "epoch": 0.1032394664408215, "grad_norm": 1.4566114757695063, "learning_rate": 9.86021181499018e-06, "loss": 0.7076, "step": 1219 }, { "epoch": 0.10332415837391488, "grad_norm": 1.6515257911742445, "learning_rate": 9.859889564625907e-06, "loss": 0.6735, "step": 1220 }, { "epoch": 0.10340885030700826, "grad_norm": 1.5803022862943388, "learning_rate": 9.85956694852921e-06, "loss": 0.6681, "step": 1221 }, { "epoch": 0.10349354224010163, "grad_norm": 1.1781673449722747, "learning_rate": 9.859243966724367e-06, "loss": 0.6341, "step": 1222 }, { "epoch": 0.103578234173195, "grad_norm": 1.1541218996719873, "learning_rate": 9.858920619235689e-06, "loss": 0.6338, "step": 1223 }, { "epoch": 0.10366292610628837, "grad_norm": 1.4801024556504059, "learning_rate": 9.8585969060875e-06, "loss": 0.6899, "step": 1224 }, { "epoch": 0.10374761803938175, "grad_norm": 2.451087910131394, "learning_rate": 9.858272827304168e-06, "loss": 0.7486, "step": 1225 }, { "epoch": 0.10383230997247513, "grad_norm": 1.452465092193286, "learning_rate": 9.85794838291008e-06, "loss": 0.6876, "step": 1226 }, { "epoch": 0.10391700190556849, "grad_norm": 1.2563497661206104, "learning_rate": 9.857623572929653e-06, "loss": 0.6828, "step": 1227 }, { "epoch": 0.10400169383866187, "grad_norm": 1.4059919102871534, "learning_rate": 9.85729839738733e-06, "loss": 0.6655, "step": 1228 }, { "epoch": 0.10408638577175525, "grad_norm": 1.7764928085796514, "learning_rate": 9.856972856307581e-06, "loss": 0.5884, "step": 1229 }, { "epoch": 0.10417107770484861, "grad_norm": 1.2672693387791596, "learning_rate": 9.856646949714905e-06, "loss": 0.6515, "step": 1230 }, { "epoch": 0.10425576963794199, "grad_norm": 1.9439491730356502, "learning_rate": 9.85632067763383e-06, "loss": 0.6057, "step": 1231 }, { "epoch": 0.10434046157103535, "grad_norm": 1.3153224701164483, "learning_rate": 9.855994040088908e-06, "loss": 0.7174, "step": 1232 }, { "epoch": 0.10442515350412873, "grad_norm": 1.2864518712329864, "learning_rate": 9.855667037104721e-06, "loss": 0.6591, "step": 1233 }, { "epoch": 0.10450984543722211, "grad_norm": 1.6416044802218899, "learning_rate": 9.855339668705876e-06, "loss": 0.6989, "step": 1234 }, { "epoch": 0.10459453737031547, "grad_norm": 1.5025042155059656, "learning_rate": 9.855011934917013e-06, "loss": 0.6523, "step": 1235 }, { "epoch": 0.10467922930340885, "grad_norm": 1.5783434641734382, "learning_rate": 9.854683835762794e-06, "loss": 0.6433, "step": 1236 }, { "epoch": 0.10476392123650222, "grad_norm": 0.6255836894820332, "learning_rate": 9.854355371267907e-06, "loss": 0.8197, "step": 1237 }, { "epoch": 0.1048486131695956, "grad_norm": 1.74887102386249, "learning_rate": 9.854026541457074e-06, "loss": 0.6819, "step": 1238 }, { "epoch": 0.10493330510268897, "grad_norm": 1.4486566535682517, "learning_rate": 9.853697346355042e-06, "loss": 0.6444, "step": 1239 }, { "epoch": 0.10501799703578234, "grad_norm": 0.671746995213592, "learning_rate": 9.853367785986582e-06, "loss": 0.8133, "step": 1240 }, { "epoch": 0.10510268896887572, "grad_norm": 1.3697949368827649, "learning_rate": 9.853037860376496e-06, "loss": 0.7138, "step": 1241 }, { "epoch": 0.1051873809019691, "grad_norm": 1.3379789953540038, "learning_rate": 9.852707569549613e-06, "loss": 0.7023, "step": 1242 }, { "epoch": 0.10527207283506246, "grad_norm": 1.2931008105367903, "learning_rate": 9.85237691353079e-06, "loss": 0.6775, "step": 1243 }, { "epoch": 0.10535676476815584, "grad_norm": 1.5397035078782768, "learning_rate": 9.852045892344908e-06, "loss": 0.6781, "step": 1244 }, { "epoch": 0.1054414567012492, "grad_norm": 1.7771095024861145, "learning_rate": 9.851714506016882e-06, "loss": 0.6872, "step": 1245 }, { "epoch": 0.10552614863434258, "grad_norm": 1.4311139564204234, "learning_rate": 9.851382754571648e-06, "loss": 0.7251, "step": 1246 }, { "epoch": 0.10561084056743596, "grad_norm": 1.3296637422185897, "learning_rate": 9.85105063803417e-06, "loss": 0.692, "step": 1247 }, { "epoch": 0.10569553250052932, "grad_norm": 1.8239092138015718, "learning_rate": 9.850718156429446e-06, "loss": 0.6351, "step": 1248 }, { "epoch": 0.1057802244336227, "grad_norm": 1.4460429367461258, "learning_rate": 9.850385309782496e-06, "loss": 0.6432, "step": 1249 }, { "epoch": 0.10586491636671606, "grad_norm": 1.698003345843918, "learning_rate": 9.850052098118365e-06, "loss": 0.6557, "step": 1250 }, { "epoch": 0.10594960829980944, "grad_norm": 1.3927974680332162, "learning_rate": 9.849718521462133e-06, "loss": 0.5847, "step": 1251 }, { "epoch": 0.10603430023290282, "grad_norm": 1.4938099756981569, "learning_rate": 9.849384579838902e-06, "loss": 0.6235, "step": 1252 }, { "epoch": 0.10611899216599618, "grad_norm": 0.6698885466595856, "learning_rate": 9.849050273273801e-06, "loss": 0.8915, "step": 1253 }, { "epoch": 0.10620368409908956, "grad_norm": 1.3534872407856184, "learning_rate": 9.84871560179199e-06, "loss": 0.696, "step": 1254 }, { "epoch": 0.10628837603218294, "grad_norm": 1.7137459171989111, "learning_rate": 9.848380565418655e-06, "loss": 0.6863, "step": 1255 }, { "epoch": 0.1063730679652763, "grad_norm": 1.4799872955505726, "learning_rate": 9.848045164179011e-06, "loss": 0.6633, "step": 1256 }, { "epoch": 0.10645775989836968, "grad_norm": 0.659574691849809, "learning_rate": 9.847709398098296e-06, "loss": 0.8601, "step": 1257 }, { "epoch": 0.10654245183146305, "grad_norm": 1.5649668235098362, "learning_rate": 9.847373267201779e-06, "loss": 0.6615, "step": 1258 }, { "epoch": 0.10662714376455643, "grad_norm": 1.5470798615872448, "learning_rate": 9.847036771514753e-06, "loss": 0.7217, "step": 1259 }, { "epoch": 0.1067118356976498, "grad_norm": 1.5421347525161468, "learning_rate": 9.846699911062547e-06, "loss": 0.7118, "step": 1260 }, { "epoch": 0.10679652763074317, "grad_norm": 1.4708094058589058, "learning_rate": 9.846362685870506e-06, "loss": 0.627, "step": 1261 }, { "epoch": 0.10688121956383655, "grad_norm": 2.3874943933082755, "learning_rate": 9.846025095964012e-06, "loss": 0.6826, "step": 1262 }, { "epoch": 0.10696591149692991, "grad_norm": 2.139810720048793, "learning_rate": 9.845687141368468e-06, "loss": 0.6774, "step": 1263 }, { "epoch": 0.10705060343002329, "grad_norm": 2.4219536936194244, "learning_rate": 9.845348822109306e-06, "loss": 0.6799, "step": 1264 }, { "epoch": 0.10713529536311667, "grad_norm": 1.4073385071210276, "learning_rate": 9.84501013821199e-06, "loss": 0.684, "step": 1265 }, { "epoch": 0.10721998729621003, "grad_norm": 1.3469301762694024, "learning_rate": 9.844671089702005e-06, "loss": 0.6565, "step": 1266 }, { "epoch": 0.10730467922930341, "grad_norm": 1.4344017032052945, "learning_rate": 9.844331676604866e-06, "loss": 0.6458, "step": 1267 }, { "epoch": 0.10738937116239679, "grad_norm": 1.279819111469554, "learning_rate": 9.843991898946116e-06, "loss": 0.661, "step": 1268 }, { "epoch": 0.10747406309549015, "grad_norm": 1.415096924649495, "learning_rate": 9.843651756751327e-06, "loss": 0.6723, "step": 1269 }, { "epoch": 0.10755875502858353, "grad_norm": 1.6173649506945291, "learning_rate": 9.843311250046092e-06, "loss": 0.6211, "step": 1270 }, { "epoch": 0.1076434469616769, "grad_norm": 2.3666146551754395, "learning_rate": 9.842970378856043e-06, "loss": 0.6397, "step": 1271 }, { "epoch": 0.10772813889477027, "grad_norm": 1.5571506532658503, "learning_rate": 9.842629143206826e-06, "loss": 0.663, "step": 1272 }, { "epoch": 0.10781283082786365, "grad_norm": 1.9597214047496998, "learning_rate": 9.842287543124123e-06, "loss": 0.7298, "step": 1273 }, { "epoch": 0.10789752276095702, "grad_norm": 1.3590192299433368, "learning_rate": 9.84194557863364e-06, "loss": 0.6402, "step": 1274 }, { "epoch": 0.1079822146940504, "grad_norm": 0.6792463675263042, "learning_rate": 9.841603249761116e-06, "loss": 0.8674, "step": 1275 }, { "epoch": 0.10806690662714376, "grad_norm": 1.3993308846578711, "learning_rate": 9.841260556532307e-06, "loss": 0.6896, "step": 1276 }, { "epoch": 0.10815159856023714, "grad_norm": 0.6298365344583807, "learning_rate": 9.840917498973009e-06, "loss": 0.8325, "step": 1277 }, { "epoch": 0.10823629049333051, "grad_norm": 1.468563456542413, "learning_rate": 9.84057407710903e-06, "loss": 0.654, "step": 1278 }, { "epoch": 0.10832098242642388, "grad_norm": 3.456579121143994, "learning_rate": 9.840230290966224e-06, "loss": 0.6394, "step": 1279 }, { "epoch": 0.10840567435951726, "grad_norm": 1.415441900632099, "learning_rate": 9.839886140570458e-06, "loss": 0.6424, "step": 1280 }, { "epoch": 0.10849036629261063, "grad_norm": 1.3725178416073587, "learning_rate": 9.839541625947631e-06, "loss": 0.6036, "step": 1281 }, { "epoch": 0.108575058225704, "grad_norm": 2.0876737115483, "learning_rate": 9.83919674712367e-06, "loss": 0.6771, "step": 1282 }, { "epoch": 0.10865975015879738, "grad_norm": 1.501862194867761, "learning_rate": 9.838851504124528e-06, "loss": 0.6894, "step": 1283 }, { "epoch": 0.10874444209189074, "grad_norm": 1.6515494167875084, "learning_rate": 9.838505896976188e-06, "loss": 0.6325, "step": 1284 }, { "epoch": 0.10882913402498412, "grad_norm": 1.5639708129865366, "learning_rate": 9.838159925704657e-06, "loss": 0.6057, "step": 1285 }, { "epoch": 0.1089138259580775, "grad_norm": 1.3769001973990747, "learning_rate": 9.837813590335974e-06, "loss": 0.6601, "step": 1286 }, { "epoch": 0.10899851789117086, "grad_norm": 1.6711804290142884, "learning_rate": 9.837466890896202e-06, "loss": 0.6267, "step": 1287 }, { "epoch": 0.10908320982426424, "grad_norm": 1.2866095880115211, "learning_rate": 9.837119827411427e-06, "loss": 0.6471, "step": 1288 }, { "epoch": 0.1091679017573576, "grad_norm": 1.2907760250921436, "learning_rate": 9.836772399907775e-06, "loss": 0.6068, "step": 1289 }, { "epoch": 0.10925259369045098, "grad_norm": 1.4308177818423233, "learning_rate": 9.836424608411386e-06, "loss": 0.6681, "step": 1290 }, { "epoch": 0.10933728562354436, "grad_norm": 1.952143114744294, "learning_rate": 9.836076452948436e-06, "loss": 0.6616, "step": 1291 }, { "epoch": 0.10942197755663773, "grad_norm": 1.6114379191471029, "learning_rate": 9.835727933545123e-06, "loss": 0.6956, "step": 1292 }, { "epoch": 0.1095066694897311, "grad_norm": 0.703506036296397, "learning_rate": 9.835379050227678e-06, "loss": 0.8531, "step": 1293 }, { "epoch": 0.10959136142282448, "grad_norm": 1.7502779204935621, "learning_rate": 9.835029803022356e-06, "loss": 0.6985, "step": 1294 }, { "epoch": 0.10967605335591785, "grad_norm": 1.4004774916696547, "learning_rate": 9.834680191955436e-06, "loss": 0.6387, "step": 1295 }, { "epoch": 0.10976074528901122, "grad_norm": 1.806068495789604, "learning_rate": 9.834330217053233e-06, "loss": 0.6467, "step": 1296 }, { "epoch": 0.10984543722210459, "grad_norm": 1.4767871163535633, "learning_rate": 9.833979878342082e-06, "loss": 0.6299, "step": 1297 }, { "epoch": 0.10993012915519797, "grad_norm": 1.5909677899389636, "learning_rate": 9.833629175848347e-06, "loss": 0.705, "step": 1298 }, { "epoch": 0.11001482108829135, "grad_norm": 1.6061439454589628, "learning_rate": 9.83327810959842e-06, "loss": 0.7268, "step": 1299 }, { "epoch": 0.11009951302138471, "grad_norm": 1.5310200446254432, "learning_rate": 9.832926679618725e-06, "loss": 0.7194, "step": 1300 }, { "epoch": 0.11018420495447809, "grad_norm": 2.303589265819388, "learning_rate": 9.832574885935704e-06, "loss": 0.6556, "step": 1301 }, { "epoch": 0.11026889688757145, "grad_norm": 1.5342206979732367, "learning_rate": 9.832222728575832e-06, "loss": 0.641, "step": 1302 }, { "epoch": 0.11035358882066483, "grad_norm": 1.426504253412342, "learning_rate": 9.831870207565615e-06, "loss": 0.6876, "step": 1303 }, { "epoch": 0.11043828075375821, "grad_norm": 1.350755602605259, "learning_rate": 9.831517322931576e-06, "loss": 0.6663, "step": 1304 }, { "epoch": 0.11052297268685157, "grad_norm": 1.5687946736714915, "learning_rate": 9.831164074700278e-06, "loss": 0.6732, "step": 1305 }, { "epoch": 0.11060766461994495, "grad_norm": 0.6978804281822405, "learning_rate": 9.830810462898296e-06, "loss": 0.8321, "step": 1306 }, { "epoch": 0.11069235655303833, "grad_norm": 1.5448245997899488, "learning_rate": 9.83045648755225e-06, "loss": 0.6562, "step": 1307 }, { "epoch": 0.1107770484861317, "grad_norm": 1.2869528139408142, "learning_rate": 9.830102148688773e-06, "loss": 0.6925, "step": 1308 }, { "epoch": 0.11086174041922507, "grad_norm": 1.3417264089908854, "learning_rate": 9.829747446334534e-06, "loss": 0.6754, "step": 1309 }, { "epoch": 0.11094643235231844, "grad_norm": 1.4573335542609405, "learning_rate": 9.829392380516225e-06, "loss": 0.6338, "step": 1310 }, { "epoch": 0.11103112428541181, "grad_norm": 1.608132076735192, "learning_rate": 9.829036951260567e-06, "loss": 0.6599, "step": 1311 }, { "epoch": 0.11111581621850519, "grad_norm": 4.228557794654338, "learning_rate": 9.828681158594305e-06, "loss": 0.6322, "step": 1312 }, { "epoch": 0.11120050815159856, "grad_norm": 1.3407201487533171, "learning_rate": 9.82832500254422e-06, "loss": 0.6929, "step": 1313 }, { "epoch": 0.11128520008469193, "grad_norm": 1.444234451364573, "learning_rate": 9.827968483137107e-06, "loss": 0.72, "step": 1314 }, { "epoch": 0.1113698920177853, "grad_norm": 2.2399529922290706, "learning_rate": 9.827611600399803e-06, "loss": 0.684, "step": 1315 }, { "epoch": 0.11145458395087868, "grad_norm": 1.4825880728870544, "learning_rate": 9.827254354359163e-06, "loss": 0.672, "step": 1316 }, { "epoch": 0.11153927588397206, "grad_norm": 1.6552125515762435, "learning_rate": 9.826896745042072e-06, "loss": 0.7256, "step": 1317 }, { "epoch": 0.11162396781706542, "grad_norm": 1.3314055119235095, "learning_rate": 9.826538772475439e-06, "loss": 0.6619, "step": 1318 }, { "epoch": 0.1117086597501588, "grad_norm": 1.4657897089083158, "learning_rate": 9.826180436686207e-06, "loss": 0.6577, "step": 1319 }, { "epoch": 0.11179335168325218, "grad_norm": 1.4720470097802387, "learning_rate": 9.82582173770134e-06, "loss": 0.69, "step": 1320 }, { "epoch": 0.11187804361634554, "grad_norm": 1.3429625596517734, "learning_rate": 9.825462675547836e-06, "loss": 0.6408, "step": 1321 }, { "epoch": 0.11196273554943892, "grad_norm": 1.4968368517526243, "learning_rate": 9.825103250252711e-06, "loss": 0.6941, "step": 1322 }, { "epoch": 0.11204742748253228, "grad_norm": 1.2546897599835978, "learning_rate": 9.824743461843019e-06, "loss": 0.6713, "step": 1323 }, { "epoch": 0.11213211941562566, "grad_norm": 1.520823458310468, "learning_rate": 9.82438331034583e-06, "loss": 0.7241, "step": 1324 }, { "epoch": 0.11221681134871904, "grad_norm": 1.3119854991822744, "learning_rate": 9.824022795788253e-06, "loss": 0.6463, "step": 1325 }, { "epoch": 0.1123015032818124, "grad_norm": 1.7228801850387097, "learning_rate": 9.823661918197415e-06, "loss": 0.6744, "step": 1326 }, { "epoch": 0.11238619521490578, "grad_norm": 1.663856539171002, "learning_rate": 9.823300677600475e-06, "loss": 0.6762, "step": 1327 }, { "epoch": 0.11247088714799915, "grad_norm": 1.7343590898162609, "learning_rate": 9.822939074024619e-06, "loss": 0.7174, "step": 1328 }, { "epoch": 0.11255557908109252, "grad_norm": 1.380158861690978, "learning_rate": 9.822577107497058e-06, "loss": 0.5978, "step": 1329 }, { "epoch": 0.1126402710141859, "grad_norm": 1.6904635031993027, "learning_rate": 9.822214778045033e-06, "loss": 0.6038, "step": 1330 }, { "epoch": 0.11272496294727927, "grad_norm": 1.340638981623032, "learning_rate": 9.821852085695813e-06, "loss": 0.7288, "step": 1331 }, { "epoch": 0.11280965488037265, "grad_norm": 2.343795989915273, "learning_rate": 9.82148903047669e-06, "loss": 0.6926, "step": 1332 }, { "epoch": 0.11289434681346602, "grad_norm": 1.3563593784610286, "learning_rate": 9.821125612414985e-06, "loss": 0.7109, "step": 1333 }, { "epoch": 0.11297903874655939, "grad_norm": 1.4807920652938653, "learning_rate": 9.82076183153805e-06, "loss": 0.6564, "step": 1334 }, { "epoch": 0.11306373067965277, "grad_norm": 2.261234056467976, "learning_rate": 9.82039768787326e-06, "loss": 0.6585, "step": 1335 }, { "epoch": 0.11314842261274613, "grad_norm": 1.213950203987322, "learning_rate": 9.82003318144802e-06, "loss": 0.7149, "step": 1336 }, { "epoch": 0.11323311454583951, "grad_norm": 1.8958789128849058, "learning_rate": 9.819668312289756e-06, "loss": 0.6771, "step": 1337 }, { "epoch": 0.11331780647893289, "grad_norm": 1.2011219722400164, "learning_rate": 9.819303080425933e-06, "loss": 0.6799, "step": 1338 }, { "epoch": 0.11340249841202625, "grad_norm": 1.4745597396795829, "learning_rate": 9.818937485884034e-06, "loss": 0.6711, "step": 1339 }, { "epoch": 0.11348719034511963, "grad_norm": 1.4578258669718631, "learning_rate": 9.818571528691569e-06, "loss": 0.748, "step": 1340 }, { "epoch": 0.113571882278213, "grad_norm": 1.3499789260140302, "learning_rate": 9.818205208876084e-06, "loss": 0.688, "step": 1341 }, { "epoch": 0.11365657421130637, "grad_norm": 3.360374894253165, "learning_rate": 9.817838526465143e-06, "loss": 0.7215, "step": 1342 }, { "epoch": 0.11374126614439975, "grad_norm": 1.9072069468554027, "learning_rate": 9.81747148148634e-06, "loss": 0.6803, "step": 1343 }, { "epoch": 0.11382595807749311, "grad_norm": 1.2250117218979424, "learning_rate": 9.817104073967298e-06, "loss": 0.649, "step": 1344 }, { "epoch": 0.11391065001058649, "grad_norm": 1.3793075361553595, "learning_rate": 9.816736303935668e-06, "loss": 0.6499, "step": 1345 }, { "epoch": 0.11399534194367987, "grad_norm": 1.3455136847417208, "learning_rate": 9.816368171419123e-06, "loss": 0.6547, "step": 1346 }, { "epoch": 0.11408003387677323, "grad_norm": 3.5226078355991075, "learning_rate": 9.815999676445373e-06, "loss": 0.6446, "step": 1347 }, { "epoch": 0.11416472580986661, "grad_norm": 1.5539714513644283, "learning_rate": 9.815630819042144e-06, "loss": 0.6224, "step": 1348 }, { "epoch": 0.11424941774295998, "grad_norm": 0.647321763474481, "learning_rate": 9.815261599237193e-06, "loss": 0.8811, "step": 1349 }, { "epoch": 0.11433410967605336, "grad_norm": 1.8974555521950633, "learning_rate": 9.814892017058311e-06, "loss": 0.6703, "step": 1350 }, { "epoch": 0.11441880160914673, "grad_norm": 0.8214707688814914, "learning_rate": 9.814522072533309e-06, "loss": 0.8329, "step": 1351 }, { "epoch": 0.1145034935422401, "grad_norm": 1.8338263136429112, "learning_rate": 9.814151765690026e-06, "loss": 0.6612, "step": 1352 }, { "epoch": 0.11458818547533348, "grad_norm": 1.5864392984902422, "learning_rate": 9.813781096556332e-06, "loss": 0.6907, "step": 1353 }, { "epoch": 0.11467287740842684, "grad_norm": 1.4292204337427905, "learning_rate": 9.813410065160118e-06, "loss": 0.6815, "step": 1354 }, { "epoch": 0.11475756934152022, "grad_norm": 1.4570095209787046, "learning_rate": 9.813038671529311e-06, "loss": 0.69, "step": 1355 }, { "epoch": 0.1148422612746136, "grad_norm": 0.5864685071665123, "learning_rate": 9.812666915691854e-06, "loss": 0.8464, "step": 1356 }, { "epoch": 0.11492695320770696, "grad_norm": 1.2301519514148493, "learning_rate": 9.812294797675732e-06, "loss": 0.6769, "step": 1357 }, { "epoch": 0.11501164514080034, "grad_norm": 1.4198297115808372, "learning_rate": 9.811922317508942e-06, "loss": 0.6624, "step": 1358 }, { "epoch": 0.11509633707389372, "grad_norm": 1.698029446817171, "learning_rate": 9.811549475219515e-06, "loss": 0.66, "step": 1359 }, { "epoch": 0.11518102900698708, "grad_norm": 1.4423968261770115, "learning_rate": 9.811176270835515e-06, "loss": 0.6742, "step": 1360 }, { "epoch": 0.11526572094008046, "grad_norm": 1.469775023257495, "learning_rate": 9.810802704385023e-06, "loss": 0.7216, "step": 1361 }, { "epoch": 0.11535041287317382, "grad_norm": 1.5734700104342323, "learning_rate": 9.810428775896152e-06, "loss": 0.6888, "step": 1362 }, { "epoch": 0.1154351048062672, "grad_norm": 1.5944668779327738, "learning_rate": 9.810054485397045e-06, "loss": 0.658, "step": 1363 }, { "epoch": 0.11551979673936058, "grad_norm": 2.290155621998883, "learning_rate": 9.809679832915867e-06, "loss": 0.7015, "step": 1364 }, { "epoch": 0.11560448867245394, "grad_norm": 1.5531022199462334, "learning_rate": 9.809304818480812e-06, "loss": 0.697, "step": 1365 }, { "epoch": 0.11568918060554732, "grad_norm": 1.4952403256106686, "learning_rate": 9.808929442120105e-06, "loss": 0.7183, "step": 1366 }, { "epoch": 0.1157738725386407, "grad_norm": 1.4333991221926607, "learning_rate": 9.808553703861991e-06, "loss": 0.6517, "step": 1367 }, { "epoch": 0.11585856447173407, "grad_norm": 2.5551261151771176, "learning_rate": 9.80817760373475e-06, "loss": 0.6695, "step": 1368 }, { "epoch": 0.11594325640482744, "grad_norm": 1.4596110192618874, "learning_rate": 9.807801141766682e-06, "loss": 0.6341, "step": 1369 }, { "epoch": 0.11602794833792081, "grad_norm": 2.469809106515475, "learning_rate": 9.80742431798612e-06, "loss": 0.6941, "step": 1370 }, { "epoch": 0.11611264027101419, "grad_norm": 1.6850495213407661, "learning_rate": 9.807047132421424e-06, "loss": 0.678, "step": 1371 }, { "epoch": 0.11619733220410756, "grad_norm": 0.6496421992039509, "learning_rate": 9.806669585100974e-06, "loss": 0.8226, "step": 1372 }, { "epoch": 0.11628202413720093, "grad_norm": 1.4487308358398334, "learning_rate": 9.806291676053186e-06, "loss": 0.6634, "step": 1373 }, { "epoch": 0.11636671607029431, "grad_norm": 0.6385437285285143, "learning_rate": 9.805913405306498e-06, "loss": 0.8913, "step": 1374 }, { "epoch": 0.11645140800338767, "grad_norm": 1.3352280512620087, "learning_rate": 9.80553477288938e-06, "loss": 0.7037, "step": 1375 }, { "epoch": 0.11653609993648105, "grad_norm": 1.5059605232797861, "learning_rate": 9.805155778830323e-06, "loss": 0.6466, "step": 1376 }, { "epoch": 0.11662079186957443, "grad_norm": 1.4273548815697352, "learning_rate": 9.804776423157847e-06, "loss": 0.7007, "step": 1377 }, { "epoch": 0.11670548380266779, "grad_norm": 1.5442771673922793, "learning_rate": 9.804396705900503e-06, "loss": 0.6263, "step": 1378 }, { "epoch": 0.11679017573576117, "grad_norm": 1.5655585595172785, "learning_rate": 9.804016627086868e-06, "loss": 0.6505, "step": 1379 }, { "epoch": 0.11687486766885455, "grad_norm": 2.374412360376776, "learning_rate": 9.803636186745543e-06, "loss": 0.6697, "step": 1380 }, { "epoch": 0.11695955960194791, "grad_norm": 1.5834668630857982, "learning_rate": 9.80325538490516e-06, "loss": 0.6831, "step": 1381 }, { "epoch": 0.11704425153504129, "grad_norm": 0.6307872128301137, "learning_rate": 9.802874221594373e-06, "loss": 0.8345, "step": 1382 }, { "epoch": 0.11712894346813466, "grad_norm": 1.5978743223824396, "learning_rate": 9.802492696841867e-06, "loss": 0.6939, "step": 1383 }, { "epoch": 0.11721363540122803, "grad_norm": 1.3452178346504169, "learning_rate": 9.802110810676358e-06, "loss": 0.7223, "step": 1384 }, { "epoch": 0.11729832733432141, "grad_norm": 1.7603906153461795, "learning_rate": 9.80172856312658e-06, "loss": 0.692, "step": 1385 }, { "epoch": 0.11738301926741478, "grad_norm": 1.4803628913985794, "learning_rate": 9.801345954221301e-06, "loss": 0.7123, "step": 1386 }, { "epoch": 0.11746771120050815, "grad_norm": 1.663599052025513, "learning_rate": 9.800962983989317e-06, "loss": 0.5875, "step": 1387 }, { "epoch": 0.11755240313360152, "grad_norm": 0.620305978309715, "learning_rate": 9.800579652459445e-06, "loss": 0.8706, "step": 1388 }, { "epoch": 0.1176370950666949, "grad_norm": 1.7094495936000234, "learning_rate": 9.800195959660534e-06, "loss": 0.6484, "step": 1389 }, { "epoch": 0.11772178699978827, "grad_norm": 1.4949141590587534, "learning_rate": 9.79981190562146e-06, "loss": 0.7046, "step": 1390 }, { "epoch": 0.11780647893288164, "grad_norm": 1.2578450408690858, "learning_rate": 9.79942749037112e-06, "loss": 0.6668, "step": 1391 }, { "epoch": 0.11789117086597502, "grad_norm": 1.3327471277656069, "learning_rate": 9.79904271393845e-06, "loss": 0.6131, "step": 1392 }, { "epoch": 0.1179758627990684, "grad_norm": 1.39257905586248, "learning_rate": 9.798657576352404e-06, "loss": 0.7241, "step": 1393 }, { "epoch": 0.11806055473216176, "grad_norm": 1.5303343496148378, "learning_rate": 9.798272077641965e-06, "loss": 0.6061, "step": 1394 }, { "epoch": 0.11814524666525514, "grad_norm": 0.6465336187161252, "learning_rate": 9.797886217836144e-06, "loss": 0.899, "step": 1395 }, { "epoch": 0.1182299385983485, "grad_norm": 1.4218289363975136, "learning_rate": 9.797499996963979e-06, "loss": 0.6629, "step": 1396 }, { "epoch": 0.11831463053144188, "grad_norm": 1.423067501913577, "learning_rate": 9.797113415054535e-06, "loss": 0.6348, "step": 1397 }, { "epoch": 0.11839932246453526, "grad_norm": 1.4680153377400242, "learning_rate": 9.796726472136903e-06, "loss": 0.6355, "step": 1398 }, { "epoch": 0.11848401439762862, "grad_norm": 2.2664937340028057, "learning_rate": 9.796339168240208e-06, "loss": 0.6798, "step": 1399 }, { "epoch": 0.118568706330722, "grad_norm": 1.2655062370941488, "learning_rate": 9.79595150339359e-06, "loss": 0.6418, "step": 1400 }, { "epoch": 0.11865339826381537, "grad_norm": 1.3832798765038974, "learning_rate": 9.795563477626226e-06, "loss": 0.7001, "step": 1401 }, { "epoch": 0.11873809019690874, "grad_norm": 2.2731906820807835, "learning_rate": 9.795175090967316e-06, "loss": 0.6633, "step": 1402 }, { "epoch": 0.11882278213000212, "grad_norm": 1.7477697111915325, "learning_rate": 9.79478634344609e-06, "loss": 0.707, "step": 1403 }, { "epoch": 0.11890747406309549, "grad_norm": 1.4487212085652923, "learning_rate": 9.794397235091801e-06, "loss": 0.6445, "step": 1404 }, { "epoch": 0.11899216599618886, "grad_norm": 0.7090531063532335, "learning_rate": 9.794007765933733e-06, "loss": 0.8805, "step": 1405 }, { "epoch": 0.11907685792928224, "grad_norm": 1.5711831935225897, "learning_rate": 9.793617936001196e-06, "loss": 0.6303, "step": 1406 }, { "epoch": 0.11916154986237561, "grad_norm": 0.578427574328572, "learning_rate": 9.793227745323525e-06, "loss": 0.8591, "step": 1407 }, { "epoch": 0.11924624179546899, "grad_norm": 0.6103131939510694, "learning_rate": 9.792837193930086e-06, "loss": 0.8711, "step": 1408 }, { "epoch": 0.11933093372856235, "grad_norm": 1.9808884789154237, "learning_rate": 9.792446281850266e-06, "loss": 0.6969, "step": 1409 }, { "epoch": 0.11941562566165573, "grad_norm": 1.5340930365411447, "learning_rate": 9.792055009113488e-06, "loss": 0.6608, "step": 1410 }, { "epoch": 0.1195003175947491, "grad_norm": 1.7669542769507933, "learning_rate": 9.791663375749196e-06, "loss": 0.655, "step": 1411 }, { "epoch": 0.11958500952784247, "grad_norm": 1.4153084714542006, "learning_rate": 9.791271381786861e-06, "loss": 0.6262, "step": 1412 }, { "epoch": 0.11966970146093585, "grad_norm": 1.1280771379342232, "learning_rate": 9.790879027255984e-06, "loss": 0.7337, "step": 1413 }, { "epoch": 0.11975439339402921, "grad_norm": 0.6763180633867295, "learning_rate": 9.79048631218609e-06, "loss": 0.8937, "step": 1414 }, { "epoch": 0.11983908532712259, "grad_norm": 1.480031428226504, "learning_rate": 9.790093236606737e-06, "loss": 0.6444, "step": 1415 }, { "epoch": 0.11992377726021597, "grad_norm": 1.4510697298263615, "learning_rate": 9.7896998005475e-06, "loss": 0.5826, "step": 1416 }, { "epoch": 0.12000846919330933, "grad_norm": 1.3339573765444046, "learning_rate": 9.789306004037993e-06, "loss": 0.6127, "step": 1417 }, { "epoch": 0.12009316112640271, "grad_norm": 1.4921322626258016, "learning_rate": 9.788911847107847e-06, "loss": 0.6463, "step": 1418 }, { "epoch": 0.12017785305949609, "grad_norm": 1.5324799270798206, "learning_rate": 9.788517329786726e-06, "loss": 0.6576, "step": 1419 }, { "epoch": 0.12026254499258945, "grad_norm": 1.4686587414134338, "learning_rate": 9.78812245210432e-06, "loss": 0.6246, "step": 1420 }, { "epoch": 0.12034723692568283, "grad_norm": 1.256373166865833, "learning_rate": 9.787727214090346e-06, "loss": 0.6264, "step": 1421 }, { "epoch": 0.1204319288587762, "grad_norm": 1.2825404094016604, "learning_rate": 9.787331615774545e-06, "loss": 0.6614, "step": 1422 }, { "epoch": 0.12051662079186957, "grad_norm": 1.3043825632899435, "learning_rate": 9.78693565718669e-06, "loss": 0.6778, "step": 1423 }, { "epoch": 0.12060131272496295, "grad_norm": 1.2799898909420322, "learning_rate": 9.78653933835658e-06, "loss": 0.6127, "step": 1424 }, { "epoch": 0.12068600465805632, "grad_norm": 2.288072011314446, "learning_rate": 9.78614265931404e-06, "loss": 0.6596, "step": 1425 }, { "epoch": 0.1207706965911497, "grad_norm": 1.4584460230283676, "learning_rate": 9.785745620088917e-06, "loss": 0.6322, "step": 1426 }, { "epoch": 0.12085538852424306, "grad_norm": 1.269354713652867, "learning_rate": 9.785348220711098e-06, "loss": 0.7019, "step": 1427 }, { "epoch": 0.12094008045733644, "grad_norm": 1.6409568955112046, "learning_rate": 9.784950461210485e-06, "loss": 0.619, "step": 1428 }, { "epoch": 0.12102477239042982, "grad_norm": 1.57188980145221, "learning_rate": 9.784552341617012e-06, "loss": 0.6656, "step": 1429 }, { "epoch": 0.12110946432352318, "grad_norm": 1.3966559756543688, "learning_rate": 9.78415386196064e-06, "loss": 0.6512, "step": 1430 }, { "epoch": 0.12119415625661656, "grad_norm": 2.333385054358687, "learning_rate": 9.783755022271355e-06, "loss": 0.6826, "step": 1431 }, { "epoch": 0.12127884818970994, "grad_norm": 1.2843382662234242, "learning_rate": 9.783355822579176e-06, "loss": 0.7118, "step": 1432 }, { "epoch": 0.1213635401228033, "grad_norm": 1.2215420994631163, "learning_rate": 9.78295626291414e-06, "loss": 0.6829, "step": 1433 }, { "epoch": 0.12144823205589668, "grad_norm": 1.2252174112788448, "learning_rate": 9.78255634330632e-06, "loss": 0.6801, "step": 1434 }, { "epoch": 0.12153292398899004, "grad_norm": 1.4864360876235065, "learning_rate": 9.78215606378581e-06, "loss": 0.712, "step": 1435 }, { "epoch": 0.12161761592208342, "grad_norm": 1.5287142479465348, "learning_rate": 9.781755424382733e-06, "loss": 0.6449, "step": 1436 }, { "epoch": 0.1217023078551768, "grad_norm": 0.6622652451507822, "learning_rate": 9.78135442512724e-06, "loss": 0.8471, "step": 1437 }, { "epoch": 0.12178699978827016, "grad_norm": 1.3142323304698937, "learning_rate": 9.780953066049508e-06, "loss": 0.6763, "step": 1438 }, { "epoch": 0.12187169172136354, "grad_norm": 1.5870297561663302, "learning_rate": 9.78055134717974e-06, "loss": 0.6168, "step": 1439 }, { "epoch": 0.1219563836544569, "grad_norm": 2.2301469563414176, "learning_rate": 9.780149268548171e-06, "loss": 0.709, "step": 1440 }, { "epoch": 0.12204107558755029, "grad_norm": 7.862821266783343, "learning_rate": 9.779746830185057e-06, "loss": 0.6817, "step": 1441 }, { "epoch": 0.12212576752064366, "grad_norm": 1.4087824084673344, "learning_rate": 9.779344032120684e-06, "loss": 0.6459, "step": 1442 }, { "epoch": 0.12221045945373703, "grad_norm": 1.2653757342391723, "learning_rate": 9.778940874385366e-06, "loss": 0.6343, "step": 1443 }, { "epoch": 0.1222951513868304, "grad_norm": 1.4223155713309417, "learning_rate": 9.778537357009438e-06, "loss": 0.7211, "step": 1444 }, { "epoch": 0.12237984331992378, "grad_norm": 1.4094642190627673, "learning_rate": 9.778133480023274e-06, "loss": 0.6238, "step": 1445 }, { "epoch": 0.12246453525301715, "grad_norm": 1.5594824882001863, "learning_rate": 9.777729243457261e-06, "loss": 0.6549, "step": 1446 }, { "epoch": 0.12254922718611053, "grad_norm": 1.324273558660811, "learning_rate": 9.777324647341826e-06, "loss": 0.7033, "step": 1447 }, { "epoch": 0.12263391911920389, "grad_norm": 1.7595303956975634, "learning_rate": 9.776919691707411e-06, "loss": 0.6199, "step": 1448 }, { "epoch": 0.12271861105229727, "grad_norm": 1.4110738824259152, "learning_rate": 9.776514376584498e-06, "loss": 0.6378, "step": 1449 }, { "epoch": 0.12280330298539065, "grad_norm": 1.5367579864148018, "learning_rate": 9.776108702003583e-06, "loss": 0.6825, "step": 1450 }, { "epoch": 0.12288799491848401, "grad_norm": 1.38308927293606, "learning_rate": 9.775702667995198e-06, "loss": 0.6858, "step": 1451 }, { "epoch": 0.12297268685157739, "grad_norm": 1.1996862791870182, "learning_rate": 9.775296274589898e-06, "loss": 0.6362, "step": 1452 }, { "epoch": 0.12305737878467075, "grad_norm": 2.0826917222120778, "learning_rate": 9.774889521818267e-06, "loss": 0.6423, "step": 1453 }, { "epoch": 0.12314207071776413, "grad_norm": 1.4846467212934238, "learning_rate": 9.774482409710918e-06, "loss": 0.6504, "step": 1454 }, { "epoch": 0.12322676265085751, "grad_norm": 1.3603862351775267, "learning_rate": 9.774074938298483e-06, "loss": 0.6432, "step": 1455 }, { "epoch": 0.12331145458395087, "grad_norm": 0.6094249403177163, "learning_rate": 9.773667107611628e-06, "loss": 0.8316, "step": 1456 }, { "epoch": 0.12339614651704425, "grad_norm": 1.4710286889155717, "learning_rate": 9.773258917681048e-06, "loss": 0.678, "step": 1457 }, { "epoch": 0.12348083845013763, "grad_norm": 1.21022530134185, "learning_rate": 9.772850368537456e-06, "loss": 0.6911, "step": 1458 }, { "epoch": 0.123565530383231, "grad_norm": 0.6874047956464947, "learning_rate": 9.772441460211603e-06, "loss": 0.7901, "step": 1459 }, { "epoch": 0.12365022231632437, "grad_norm": 2.5609825895447136, "learning_rate": 9.772032192734258e-06, "loss": 0.7111, "step": 1460 }, { "epoch": 0.12373491424941774, "grad_norm": 1.9206378835896822, "learning_rate": 9.77162256613622e-06, "loss": 0.6463, "step": 1461 }, { "epoch": 0.12381960618251112, "grad_norm": 1.513322383868717, "learning_rate": 9.77121258044832e-06, "loss": 0.6825, "step": 1462 }, { "epoch": 0.1239042981156045, "grad_norm": 0.6498278059496656, "learning_rate": 9.770802235701405e-06, "loss": 0.8759, "step": 1463 }, { "epoch": 0.12398899004869786, "grad_norm": 1.2955928747981464, "learning_rate": 9.770391531926361e-06, "loss": 0.6453, "step": 1464 }, { "epoch": 0.12407368198179124, "grad_norm": 1.318189779053333, "learning_rate": 9.769980469154094e-06, "loss": 0.6852, "step": 1465 }, { "epoch": 0.1241583739148846, "grad_norm": 1.4866071542349801, "learning_rate": 9.769569047415536e-06, "loss": 0.7153, "step": 1466 }, { "epoch": 0.12424306584797798, "grad_norm": 1.324710362751092, "learning_rate": 9.769157266741656e-06, "loss": 0.7114, "step": 1467 }, { "epoch": 0.12432775778107136, "grad_norm": 1.4961920018938684, "learning_rate": 9.768745127163433e-06, "loss": 0.6832, "step": 1468 }, { "epoch": 0.12441244971416472, "grad_norm": 1.3822029673207468, "learning_rate": 9.76833262871189e-06, "loss": 0.686, "step": 1469 }, { "epoch": 0.1244971416472581, "grad_norm": 1.356426074857201, "learning_rate": 9.767919771418066e-06, "loss": 0.592, "step": 1470 }, { "epoch": 0.12458183358035148, "grad_norm": 1.2282754689408173, "learning_rate": 9.767506555313034e-06, "loss": 0.6549, "step": 1471 }, { "epoch": 0.12466652551344484, "grad_norm": 1.3996922750925886, "learning_rate": 9.767092980427885e-06, "loss": 0.684, "step": 1472 }, { "epoch": 0.12475121744653822, "grad_norm": 1.294863179569077, "learning_rate": 9.76667904679375e-06, "loss": 0.6699, "step": 1473 }, { "epoch": 0.12483590937963158, "grad_norm": 1.333142239920761, "learning_rate": 9.766264754441775e-06, "loss": 0.6666, "step": 1474 }, { "epoch": 0.12492060131272496, "grad_norm": 3.7721521153898476, "learning_rate": 9.765850103403137e-06, "loss": 0.6826, "step": 1475 }, { "epoch": 0.12500529324581833, "grad_norm": 1.1014856855545083, "learning_rate": 9.765435093709044e-06, "loss": 0.6444, "step": 1476 }, { "epoch": 0.1250899851789117, "grad_norm": 1.299274207671382, "learning_rate": 9.765019725390726e-06, "loss": 0.6669, "step": 1477 }, { "epoch": 0.12517467711200508, "grad_norm": 1.1513908812084008, "learning_rate": 9.76460399847944e-06, "loss": 0.6477, "step": 1478 }, { "epoch": 0.12525936904509846, "grad_norm": 1.266569263907469, "learning_rate": 9.764187913006473e-06, "loss": 0.6381, "step": 1479 }, { "epoch": 0.12534406097819184, "grad_norm": 1.5290647945940279, "learning_rate": 9.763771469003141e-06, "loss": 0.6857, "step": 1480 }, { "epoch": 0.1254287529112852, "grad_norm": 1.8710178594484812, "learning_rate": 9.763354666500778e-06, "loss": 0.6874, "step": 1481 }, { "epoch": 0.12551344484437857, "grad_norm": 0.6944020760147399, "learning_rate": 9.762937505530754e-06, "loss": 0.9005, "step": 1482 }, { "epoch": 0.12559813677747195, "grad_norm": 1.3550251693429638, "learning_rate": 9.762519986124462e-06, "loss": 0.699, "step": 1483 }, { "epoch": 0.12568282871056533, "grad_norm": 1.885920759669244, "learning_rate": 9.762102108313323e-06, "loss": 0.6564, "step": 1484 }, { "epoch": 0.1257675206436587, "grad_norm": 1.386401658019552, "learning_rate": 9.761683872128782e-06, "loss": 0.6479, "step": 1485 }, { "epoch": 0.12585221257675205, "grad_norm": 1.5376012142163475, "learning_rate": 9.761265277602316e-06, "loss": 0.6415, "step": 1486 }, { "epoch": 0.12593690450984543, "grad_norm": 1.8101247988204916, "learning_rate": 9.760846324765428e-06, "loss": 0.6602, "step": 1487 }, { "epoch": 0.1260215964429388, "grad_norm": 0.6728862172100408, "learning_rate": 9.76042701364964e-06, "loss": 0.9617, "step": 1488 }, { "epoch": 0.1261062883760322, "grad_norm": 1.2153284603242591, "learning_rate": 9.760007344286515e-06, "loss": 0.6706, "step": 1489 }, { "epoch": 0.12619098030912557, "grad_norm": 1.4970694317322475, "learning_rate": 9.75958731670763e-06, "loss": 0.6313, "step": 1490 }, { "epoch": 0.12627567224221892, "grad_norm": 1.5506511607986035, "learning_rate": 9.759166930944597e-06, "loss": 0.7241, "step": 1491 }, { "epoch": 0.1263603641753123, "grad_norm": 1.2793762893998128, "learning_rate": 9.758746187029052e-06, "loss": 0.6901, "step": 1492 }, { "epoch": 0.12644505610840567, "grad_norm": 1.3446319905835877, "learning_rate": 9.758325084992658e-06, "loss": 0.6615, "step": 1493 }, { "epoch": 0.12652974804149905, "grad_norm": 1.406084137107209, "learning_rate": 9.757903624867101e-06, "loss": 0.6718, "step": 1494 }, { "epoch": 0.12661443997459243, "grad_norm": 0.596285143416324, "learning_rate": 9.757481806684107e-06, "loss": 0.8402, "step": 1495 }, { "epoch": 0.12669913190768578, "grad_norm": 1.4016874270291022, "learning_rate": 9.757059630475411e-06, "loss": 0.6391, "step": 1496 }, { "epoch": 0.12678382384077916, "grad_norm": 1.3452993406810334, "learning_rate": 9.756637096272793e-06, "loss": 0.647, "step": 1497 }, { "epoch": 0.12686851577387254, "grad_norm": 1.6743384574028422, "learning_rate": 9.756214204108042e-06, "loss": 0.6485, "step": 1498 }, { "epoch": 0.12695320770696591, "grad_norm": 1.4814050074195995, "learning_rate": 9.755790954012987e-06, "loss": 0.6856, "step": 1499 }, { "epoch": 0.1270378996400593, "grad_norm": 1.415516163733151, "learning_rate": 9.755367346019479e-06, "loss": 0.7068, "step": 1500 }, { "epoch": 0.12712259157315264, "grad_norm": 1.5159870634343617, "learning_rate": 9.754943380159398e-06, "loss": 0.6818, "step": 1501 }, { "epoch": 0.12720728350624602, "grad_norm": 1.689877780094841, "learning_rate": 9.754519056464651e-06, "loss": 0.6933, "step": 1502 }, { "epoch": 0.1272919754393394, "grad_norm": 1.3074473245741747, "learning_rate": 9.754094374967166e-06, "loss": 0.5919, "step": 1503 }, { "epoch": 0.12737666737243278, "grad_norm": 1.5488901255647025, "learning_rate": 9.753669335698906e-06, "loss": 0.6813, "step": 1504 }, { "epoch": 0.12746135930552616, "grad_norm": 1.3888440247644032, "learning_rate": 9.753243938691857e-06, "loss": 0.6743, "step": 1505 }, { "epoch": 0.12754605123861953, "grad_norm": 1.2653526126911208, "learning_rate": 9.752818183978033e-06, "loss": 0.6602, "step": 1506 }, { "epoch": 0.12763074317171288, "grad_norm": 1.3511816451035785, "learning_rate": 9.752392071589471e-06, "loss": 0.658, "step": 1507 }, { "epoch": 0.12771543510480626, "grad_norm": 1.4242279047582758, "learning_rate": 9.751965601558242e-06, "loss": 0.6792, "step": 1508 }, { "epoch": 0.12780012703789964, "grad_norm": 1.3573295222924446, "learning_rate": 9.751538773916438e-06, "loss": 0.6812, "step": 1509 }, { "epoch": 0.12788481897099302, "grad_norm": 1.6803351902597685, "learning_rate": 9.751111588696181e-06, "loss": 0.7057, "step": 1510 }, { "epoch": 0.1279695109040864, "grad_norm": 1.5937699304158741, "learning_rate": 9.750684045929619e-06, "loss": 0.6495, "step": 1511 }, { "epoch": 0.12805420283717975, "grad_norm": 0.643812305844065, "learning_rate": 9.750256145648926e-06, "loss": 0.8854, "step": 1512 }, { "epoch": 0.12813889477027313, "grad_norm": 2.5007580972775107, "learning_rate": 9.749827887886305e-06, "loss": 0.7317, "step": 1513 }, { "epoch": 0.1282235867033665, "grad_norm": 1.6698503642341156, "learning_rate": 9.749399272673983e-06, "loss": 0.6414, "step": 1514 }, { "epoch": 0.12830827863645988, "grad_norm": 1.2730030489282975, "learning_rate": 9.748970300044216e-06, "loss": 0.65, "step": 1515 }, { "epoch": 0.12839297056955326, "grad_norm": 1.5044046125941668, "learning_rate": 9.748540970029288e-06, "loss": 0.6526, "step": 1516 }, { "epoch": 0.1284776625026466, "grad_norm": 1.385077148460441, "learning_rate": 9.748111282661507e-06, "loss": 0.6853, "step": 1517 }, { "epoch": 0.12856235443574, "grad_norm": 1.3292961226682705, "learning_rate": 9.74768123797321e-06, "loss": 0.641, "step": 1518 }, { "epoch": 0.12864704636883337, "grad_norm": 1.5457484589140267, "learning_rate": 9.747250835996759e-06, "loss": 0.6067, "step": 1519 }, { "epoch": 0.12873173830192675, "grad_norm": 2.2621161589340035, "learning_rate": 9.746820076764545e-06, "loss": 0.6211, "step": 1520 }, { "epoch": 0.12881643023502012, "grad_norm": 1.2543194054255349, "learning_rate": 9.746388960308983e-06, "loss": 0.6422, "step": 1521 }, { "epoch": 0.12890112216811347, "grad_norm": 1.542569933132992, "learning_rate": 9.745957486662521e-06, "loss": 0.7392, "step": 1522 }, { "epoch": 0.12898581410120685, "grad_norm": 1.868370590872068, "learning_rate": 9.745525655857626e-06, "loss": 0.6364, "step": 1523 }, { "epoch": 0.12907050603430023, "grad_norm": 1.1443442209436332, "learning_rate": 9.745093467926796e-06, "loss": 0.7056, "step": 1524 }, { "epoch": 0.1291551979673936, "grad_norm": 1.5863257194570506, "learning_rate": 9.744660922902558e-06, "loss": 0.7014, "step": 1525 }, { "epoch": 0.129239889900487, "grad_norm": 1.6949521944151258, "learning_rate": 9.744228020817458e-06, "loss": 0.6162, "step": 1526 }, { "epoch": 0.12932458183358034, "grad_norm": 1.5253411448249656, "learning_rate": 9.743794761704081e-06, "loss": 0.657, "step": 1527 }, { "epoch": 0.12940927376667372, "grad_norm": 1.7068211663584982, "learning_rate": 9.743361145595027e-06, "loss": 0.7127, "step": 1528 }, { "epoch": 0.1294939656997671, "grad_norm": 1.266869385572695, "learning_rate": 9.742927172522929e-06, "loss": 0.7056, "step": 1529 }, { "epoch": 0.12957865763286047, "grad_norm": 1.2583108246099104, "learning_rate": 9.742492842520447e-06, "loss": 0.6352, "step": 1530 }, { "epoch": 0.12966334956595385, "grad_norm": 1.3284417791186258, "learning_rate": 9.742058155620267e-06, "loss": 0.7029, "step": 1531 }, { "epoch": 0.12974804149904723, "grad_norm": 1.233945934938729, "learning_rate": 9.741623111855098e-06, "loss": 0.6667, "step": 1532 }, { "epoch": 0.12983273343214058, "grad_norm": 1.1095649229417617, "learning_rate": 9.741187711257684e-06, "loss": 0.6341, "step": 1533 }, { "epoch": 0.12991742536523396, "grad_norm": 1.557899141026484, "learning_rate": 9.740751953860788e-06, "loss": 0.6958, "step": 1534 }, { "epoch": 0.13000211729832734, "grad_norm": 1.4332624972096064, "learning_rate": 9.740315839697205e-06, "loss": 0.6713, "step": 1535 }, { "epoch": 0.1300868092314207, "grad_norm": 1.2105443689807982, "learning_rate": 9.739879368799752e-06, "loss": 0.6352, "step": 1536 }, { "epoch": 0.1301715011645141, "grad_norm": 1.3798250027614765, "learning_rate": 9.739442541201278e-06, "loss": 0.6354, "step": 1537 }, { "epoch": 0.13025619309760744, "grad_norm": 1.4515068673645717, "learning_rate": 9.739005356934657e-06, "loss": 0.7181, "step": 1538 }, { "epoch": 0.13034088503070082, "grad_norm": 1.7778791385290915, "learning_rate": 9.738567816032788e-06, "loss": 0.6619, "step": 1539 }, { "epoch": 0.1304255769637942, "grad_norm": 1.2706510873386796, "learning_rate": 9.7381299185286e-06, "loss": 0.6506, "step": 1540 }, { "epoch": 0.13051026889688758, "grad_norm": 1.380216047726685, "learning_rate": 9.737691664455045e-06, "loss": 0.6576, "step": 1541 }, { "epoch": 0.13059496082998096, "grad_norm": 1.4962190032175697, "learning_rate": 9.737253053845106e-06, "loss": 0.6954, "step": 1542 }, { "epoch": 0.1306796527630743, "grad_norm": 1.8798476206826444, "learning_rate": 9.73681408673179e-06, "loss": 0.6642, "step": 1543 }, { "epoch": 0.13076434469616768, "grad_norm": 1.0640056099815898, "learning_rate": 9.73637476314813e-06, "loss": 0.6854, "step": 1544 }, { "epoch": 0.13084903662926106, "grad_norm": 1.5325170779752852, "learning_rate": 9.735935083127188e-06, "loss": 0.655, "step": 1545 }, { "epoch": 0.13093372856235444, "grad_norm": 1.5573829358372304, "learning_rate": 9.735495046702056e-06, "loss": 0.6093, "step": 1546 }, { "epoch": 0.13101842049544782, "grad_norm": 1.8086327817222934, "learning_rate": 9.735054653905844e-06, "loss": 0.706, "step": 1547 }, { "epoch": 0.13110311242854117, "grad_norm": 1.3570038839079595, "learning_rate": 9.734613904771697e-06, "loss": 0.6417, "step": 1548 }, { "epoch": 0.13118780436163455, "grad_norm": 1.1035654391432503, "learning_rate": 9.734172799332782e-06, "loss": 0.6831, "step": 1549 }, { "epoch": 0.13127249629472793, "grad_norm": 1.502793140771003, "learning_rate": 9.733731337622296e-06, "loss": 0.6501, "step": 1550 }, { "epoch": 0.1313571882278213, "grad_norm": 1.3890727754652146, "learning_rate": 9.733289519673458e-06, "loss": 0.7067, "step": 1551 }, { "epoch": 0.13144188016091468, "grad_norm": 0.6487938730574176, "learning_rate": 9.732847345519523e-06, "loss": 0.8774, "step": 1552 }, { "epoch": 0.13152657209400806, "grad_norm": 1.6618113100177367, "learning_rate": 9.732404815193761e-06, "loss": 0.6434, "step": 1553 }, { "epoch": 0.1316112640271014, "grad_norm": 1.4319530303712087, "learning_rate": 9.73196192872948e-06, "loss": 0.6616, "step": 1554 }, { "epoch": 0.1316959559601948, "grad_norm": 1.2282883772648483, "learning_rate": 9.731518686160004e-06, "loss": 0.6311, "step": 1555 }, { "epoch": 0.13178064789328817, "grad_norm": 3.4427417752785074, "learning_rate": 9.731075087518696e-06, "loss": 0.6506, "step": 1556 }, { "epoch": 0.13186533982638154, "grad_norm": 1.4398040403644772, "learning_rate": 9.730631132838933e-06, "loss": 0.6955, "step": 1557 }, { "epoch": 0.13195003175947492, "grad_norm": 1.3674603000810341, "learning_rate": 9.730186822154126e-06, "loss": 0.6982, "step": 1558 }, { "epoch": 0.13203472369256827, "grad_norm": 1.515786641318194, "learning_rate": 9.729742155497715e-06, "loss": 0.6717, "step": 1559 }, { "epoch": 0.13211941562566165, "grad_norm": 1.1658869011151616, "learning_rate": 9.729297132903161e-06, "loss": 0.6486, "step": 1560 }, { "epoch": 0.13220410755875503, "grad_norm": 0.7012803712581955, "learning_rate": 9.728851754403957e-06, "loss": 0.8695, "step": 1561 }, { "epoch": 0.1322887994918484, "grad_norm": 0.7088146637124284, "learning_rate": 9.728406020033616e-06, "loss": 0.8296, "step": 1562 }, { "epoch": 0.13237349142494179, "grad_norm": 1.5391710516511954, "learning_rate": 9.727959929825683e-06, "loss": 0.6641, "step": 1563 }, { "epoch": 0.13245818335803514, "grad_norm": 1.3226024614411598, "learning_rate": 9.72751348381373e-06, "loss": 0.6017, "step": 1564 }, { "epoch": 0.13254287529112851, "grad_norm": 1.3190646142237645, "learning_rate": 9.727066682031357e-06, "loss": 0.6676, "step": 1565 }, { "epoch": 0.1326275672242219, "grad_norm": 1.132380845730517, "learning_rate": 9.726619524512182e-06, "loss": 0.6971, "step": 1566 }, { "epoch": 0.13271225915731527, "grad_norm": 1.3152051802019147, "learning_rate": 9.72617201128986e-06, "loss": 0.6441, "step": 1567 }, { "epoch": 0.13279695109040865, "grad_norm": 1.242305788184994, "learning_rate": 9.725724142398068e-06, "loss": 0.6054, "step": 1568 }, { "epoch": 0.132881643023502, "grad_norm": 1.2315148438987598, "learning_rate": 9.72527591787051e-06, "loss": 0.6549, "step": 1569 }, { "epoch": 0.13296633495659538, "grad_norm": 1.3662606874440388, "learning_rate": 9.72482733774092e-06, "loss": 0.6469, "step": 1570 }, { "epoch": 0.13305102688968876, "grad_norm": 1.265604082529079, "learning_rate": 9.72437840204305e-06, "loss": 0.6617, "step": 1571 }, { "epoch": 0.13313571882278213, "grad_norm": 0.6952289153465838, "learning_rate": 9.723929110810691e-06, "loss": 0.8867, "step": 1572 }, { "epoch": 0.1332204107558755, "grad_norm": 0.6207780116068422, "learning_rate": 9.72347946407765e-06, "loss": 0.8435, "step": 1573 }, { "epoch": 0.13330510268896886, "grad_norm": 1.2098948723363203, "learning_rate": 9.723029461877768e-06, "loss": 0.7013, "step": 1574 }, { "epoch": 0.13338979462206224, "grad_norm": 1.3461478065407138, "learning_rate": 9.72257910424491e-06, "loss": 0.6666, "step": 1575 }, { "epoch": 0.13347448655515562, "grad_norm": 1.349873035703175, "learning_rate": 9.722128391212968e-06, "loss": 0.6309, "step": 1576 }, { "epoch": 0.133559178488249, "grad_norm": 1.290203406944031, "learning_rate": 9.72167732281586e-06, "loss": 0.6593, "step": 1577 }, { "epoch": 0.13364387042134238, "grad_norm": 1.3614311413383753, "learning_rate": 9.721225899087528e-06, "loss": 0.687, "step": 1578 }, { "epoch": 0.13372856235443575, "grad_norm": 1.5200082878395216, "learning_rate": 9.720774120061949e-06, "loss": 0.6516, "step": 1579 }, { "epoch": 0.1338132542875291, "grad_norm": 1.873843779401844, "learning_rate": 9.72032198577312e-06, "loss": 0.6408, "step": 1580 }, { "epoch": 0.13389794622062248, "grad_norm": 1.4429272399959663, "learning_rate": 9.719869496255066e-06, "loss": 0.561, "step": 1581 }, { "epoch": 0.13398263815371586, "grad_norm": 1.6073102069612861, "learning_rate": 9.719416651541839e-06, "loss": 0.7281, "step": 1582 }, { "epoch": 0.13406733008680924, "grad_norm": 1.3315271372294593, "learning_rate": 9.718963451667517e-06, "loss": 0.6872, "step": 1583 }, { "epoch": 0.13415202201990262, "grad_norm": 1.4333950274708873, "learning_rate": 9.718509896666211e-06, "loss": 0.688, "step": 1584 }, { "epoch": 0.13423671395299597, "grad_norm": 1.2866633360898014, "learning_rate": 9.718055986572047e-06, "loss": 0.6466, "step": 1585 }, { "epoch": 0.13432140588608935, "grad_norm": 1.5557107784168691, "learning_rate": 9.717601721419187e-06, "loss": 0.6669, "step": 1586 }, { "epoch": 0.13440609781918272, "grad_norm": 1.549031823857582, "learning_rate": 9.717147101241817e-06, "loss": 0.6916, "step": 1587 }, { "epoch": 0.1344907897522761, "grad_norm": 1.2065154114784946, "learning_rate": 9.71669212607415e-06, "loss": 0.6635, "step": 1588 }, { "epoch": 0.13457548168536948, "grad_norm": 2.5159881570470928, "learning_rate": 9.716236795950422e-06, "loss": 0.6758, "step": 1589 }, { "epoch": 0.13466017361846283, "grad_norm": 2.0182317450293645, "learning_rate": 9.715781110904903e-06, "loss": 0.6724, "step": 1590 }, { "epoch": 0.1347448655515562, "grad_norm": 0.7130448993391081, "learning_rate": 9.715325070971884e-06, "loss": 0.8453, "step": 1591 }, { "epoch": 0.1348295574846496, "grad_norm": 2.0224925686943678, "learning_rate": 9.714868676185685e-06, "loss": 0.6658, "step": 1592 }, { "epoch": 0.13491424941774297, "grad_norm": 1.396378414053792, "learning_rate": 9.714411926580652e-06, "loss": 0.6539, "step": 1593 }, { "epoch": 0.13499894135083634, "grad_norm": 0.6619137005538175, "learning_rate": 9.713954822191156e-06, "loss": 0.8971, "step": 1594 }, { "epoch": 0.1350836332839297, "grad_norm": 1.6218661774395382, "learning_rate": 9.713497363051599e-06, "loss": 0.7014, "step": 1595 }, { "epoch": 0.13516832521702307, "grad_norm": 1.1726917291179104, "learning_rate": 9.713039549196405e-06, "loss": 0.6715, "step": 1596 }, { "epoch": 0.13525301715011645, "grad_norm": 1.427255243742251, "learning_rate": 9.71258138066003e-06, "loss": 0.653, "step": 1597 }, { "epoch": 0.13533770908320983, "grad_norm": 1.241891733854448, "learning_rate": 9.712122857476951e-06, "loss": 0.6548, "step": 1598 }, { "epoch": 0.1354224010163032, "grad_norm": 1.7123526316669333, "learning_rate": 9.711663979681676e-06, "loss": 0.6773, "step": 1599 }, { "epoch": 0.13550709294939656, "grad_norm": 0.6705004832832253, "learning_rate": 9.711204747308737e-06, "loss": 0.8922, "step": 1600 }, { "epoch": 0.13559178488248994, "grad_norm": 1.4655135625257485, "learning_rate": 9.710745160392693e-06, "loss": 0.6421, "step": 1601 }, { "epoch": 0.1356764768155833, "grad_norm": 1.2208796819320464, "learning_rate": 9.71028521896813e-06, "loss": 0.6571, "step": 1602 }, { "epoch": 0.1357611687486767, "grad_norm": 1.3292739633485342, "learning_rate": 9.709824923069664e-06, "loss": 0.6765, "step": 1603 }, { "epoch": 0.13584586068177007, "grad_norm": 1.393422233366989, "learning_rate": 9.709364272731933e-06, "loss": 0.6601, "step": 1604 }, { "epoch": 0.13593055261486345, "grad_norm": 1.17392765355778, "learning_rate": 9.708903267989603e-06, "loss": 0.6308, "step": 1605 }, { "epoch": 0.1360152445479568, "grad_norm": 1.440103357706964, "learning_rate": 9.708441908877364e-06, "loss": 0.6709, "step": 1606 }, { "epoch": 0.13609993648105018, "grad_norm": 0.6393348743826308, "learning_rate": 9.707980195429943e-06, "loss": 0.8381, "step": 1607 }, { "epoch": 0.13618462841414355, "grad_norm": 1.7753120666597402, "learning_rate": 9.707518127682081e-06, "loss": 0.6515, "step": 1608 }, { "epoch": 0.13626932034723693, "grad_norm": 1.2308972283106212, "learning_rate": 9.707055705668552e-06, "loss": 0.6571, "step": 1609 }, { "epoch": 0.1363540122803303, "grad_norm": 1.5199123700775343, "learning_rate": 9.706592929424156e-06, "loss": 0.6809, "step": 1610 }, { "epoch": 0.13643870421342366, "grad_norm": 1.61366571117322, "learning_rate": 9.70612979898372e-06, "loss": 0.6388, "step": 1611 }, { "epoch": 0.13652339614651704, "grad_norm": 0.6493313710728198, "learning_rate": 9.705666314382097e-06, "loss": 0.8718, "step": 1612 }, { "epoch": 0.13660808807961042, "grad_norm": 1.2262906961533844, "learning_rate": 9.705202475654166e-06, "loss": 0.666, "step": 1613 }, { "epoch": 0.1366927800127038, "grad_norm": 1.3852126866507717, "learning_rate": 9.704738282834834e-06, "loss": 0.6344, "step": 1614 }, { "epoch": 0.13677747194579717, "grad_norm": 1.3401155678846661, "learning_rate": 9.70427373595903e-06, "loss": 0.6654, "step": 1615 }, { "epoch": 0.13686216387889052, "grad_norm": 1.750717725170609, "learning_rate": 9.703808835061721e-06, "loss": 0.6354, "step": 1616 }, { "epoch": 0.1369468558119839, "grad_norm": 1.5826968722672259, "learning_rate": 9.703343580177889e-06, "loss": 0.6463, "step": 1617 }, { "epoch": 0.13703154774507728, "grad_norm": 0.6011604093359648, "learning_rate": 9.702877971342546e-06, "loss": 0.8123, "step": 1618 }, { "epoch": 0.13711623967817066, "grad_norm": 2.562891352368477, "learning_rate": 9.702412008590732e-06, "loss": 0.6882, "step": 1619 }, { "epoch": 0.13720093161126404, "grad_norm": 1.229224758616569, "learning_rate": 9.701945691957516e-06, "loss": 0.6326, "step": 1620 }, { "epoch": 0.1372856235443574, "grad_norm": 1.2500308827232762, "learning_rate": 9.701479021477987e-06, "loss": 0.6511, "step": 1621 }, { "epoch": 0.13737031547745077, "grad_norm": 1.2673641886027898, "learning_rate": 9.701011997187266e-06, "loss": 0.6981, "step": 1622 }, { "epoch": 0.13745500741054414, "grad_norm": 1.2049588188399187, "learning_rate": 9.7005446191205e-06, "loss": 0.6259, "step": 1623 }, { "epoch": 0.13753969934363752, "grad_norm": 1.2796021784798948, "learning_rate": 9.70007688731286e-06, "loss": 0.6343, "step": 1624 }, { "epoch": 0.1376243912767309, "grad_norm": 1.1320114984938117, "learning_rate": 9.699608801799548e-06, "loss": 0.6545, "step": 1625 }, { "epoch": 0.13770908320982425, "grad_norm": 0.692168585464734, "learning_rate": 9.699140362615787e-06, "loss": 0.8453, "step": 1626 }, { "epoch": 0.13779377514291763, "grad_norm": 1.4296796728433947, "learning_rate": 9.698671569796829e-06, "loss": 0.6766, "step": 1627 }, { "epoch": 0.137878467076011, "grad_norm": 1.8178423278333364, "learning_rate": 9.698202423377955e-06, "loss": 0.6594, "step": 1628 }, { "epoch": 0.13796315900910439, "grad_norm": 1.2712631699484116, "learning_rate": 9.697732923394473e-06, "loss": 0.6802, "step": 1629 }, { "epoch": 0.13804785094219776, "grad_norm": 1.2202884841517463, "learning_rate": 9.69726306988171e-06, "loss": 0.6698, "step": 1630 }, { "epoch": 0.13813254287529114, "grad_norm": 1.4719053961950974, "learning_rate": 9.696792862875031e-06, "loss": 0.6004, "step": 1631 }, { "epoch": 0.1382172348083845, "grad_norm": 0.6435471405942047, "learning_rate": 9.696322302409816e-06, "loss": 0.8881, "step": 1632 }, { "epoch": 0.13830192674147787, "grad_norm": 1.1588094492706387, "learning_rate": 9.69585138852148e-06, "loss": 0.6325, "step": 1633 }, { "epoch": 0.13838661867457125, "grad_norm": 2.0381247962841806, "learning_rate": 9.69538012124546e-06, "loss": 0.6609, "step": 1634 }, { "epoch": 0.13847131060766463, "grad_norm": 1.4752384245073829, "learning_rate": 9.694908500617225e-06, "loss": 0.6506, "step": 1635 }, { "epoch": 0.138556002540758, "grad_norm": 1.367708664190331, "learning_rate": 9.694436526672264e-06, "loss": 0.6796, "step": 1636 }, { "epoch": 0.13864069447385136, "grad_norm": 2.069241324622836, "learning_rate": 9.693964199446097e-06, "loss": 0.662, "step": 1637 }, { "epoch": 0.13872538640694473, "grad_norm": 1.214425513787261, "learning_rate": 9.693491518974268e-06, "loss": 0.6424, "step": 1638 }, { "epoch": 0.1388100783400381, "grad_norm": 0.6651333247665802, "learning_rate": 9.693018485292348e-06, "loss": 0.8581, "step": 1639 }, { "epoch": 0.1388947702731315, "grad_norm": 1.3718748935160943, "learning_rate": 9.692545098435936e-06, "loss": 0.6568, "step": 1640 }, { "epoch": 0.13897946220622487, "grad_norm": 0.5842715359164756, "learning_rate": 9.692071358440657e-06, "loss": 0.8673, "step": 1641 }, { "epoch": 0.13906415413931822, "grad_norm": 1.5552763889705528, "learning_rate": 9.691597265342163e-06, "loss": 0.6748, "step": 1642 }, { "epoch": 0.1391488460724116, "grad_norm": 0.6099581051782323, "learning_rate": 9.69112281917613e-06, "loss": 0.8535, "step": 1643 }, { "epoch": 0.13923353800550498, "grad_norm": 1.323240520284454, "learning_rate": 9.690648019978268e-06, "loss": 0.6995, "step": 1644 }, { "epoch": 0.13931822993859835, "grad_norm": 1.3176971179506822, "learning_rate": 9.690172867784302e-06, "loss": 0.6665, "step": 1645 }, { "epoch": 0.13940292187169173, "grad_norm": 1.3512894924187508, "learning_rate": 9.689697362629992e-06, "loss": 0.6839, "step": 1646 }, { "epoch": 0.13948761380478508, "grad_norm": 1.6116543076010594, "learning_rate": 9.689221504551122e-06, "loss": 0.6486, "step": 1647 }, { "epoch": 0.13957230573787846, "grad_norm": 1.3727690650918263, "learning_rate": 9.688745293583504e-06, "loss": 0.6738, "step": 1648 }, { "epoch": 0.13965699767097184, "grad_norm": 1.3598183854129549, "learning_rate": 9.688268729762975e-06, "loss": 0.7376, "step": 1649 }, { "epoch": 0.13974168960406522, "grad_norm": 1.3848843550683816, "learning_rate": 9.687791813125398e-06, "loss": 0.6688, "step": 1650 }, { "epoch": 0.1398263815371586, "grad_norm": 1.3246748858366768, "learning_rate": 9.687314543706662e-06, "loss": 0.6594, "step": 1651 }, { "epoch": 0.13991107347025195, "grad_norm": 1.3195781754948452, "learning_rate": 9.686836921542689e-06, "loss": 0.6881, "step": 1652 }, { "epoch": 0.13999576540334532, "grad_norm": 1.977862081971806, "learning_rate": 9.686358946669419e-06, "loss": 0.6404, "step": 1653 }, { "epoch": 0.1400804573364387, "grad_norm": 1.3861309312614518, "learning_rate": 9.685880619122822e-06, "loss": 0.6443, "step": 1654 }, { "epoch": 0.14016514926953208, "grad_norm": 1.686815947265014, "learning_rate": 9.685401938938897e-06, "loss": 0.6366, "step": 1655 }, { "epoch": 0.14024984120262546, "grad_norm": 0.6696044619816117, "learning_rate": 9.684922906153665e-06, "loss": 0.8721, "step": 1656 }, { "epoch": 0.14033453313571884, "grad_norm": 1.3551539772060943, "learning_rate": 9.684443520803176e-06, "loss": 0.6131, "step": 1657 }, { "epoch": 0.1404192250688122, "grad_norm": 1.8167261840596367, "learning_rate": 9.683963782923507e-06, "loss": 0.6685, "step": 1658 }, { "epoch": 0.14050391700190556, "grad_norm": 0.6356949174052668, "learning_rate": 9.68348369255076e-06, "loss": 0.8774, "step": 1659 }, { "epoch": 0.14058860893499894, "grad_norm": 1.69205461924918, "learning_rate": 9.683003249721066e-06, "loss": 0.7038, "step": 1660 }, { "epoch": 0.14067330086809232, "grad_norm": 1.3285322788176162, "learning_rate": 9.682522454470577e-06, "loss": 0.6738, "step": 1661 }, { "epoch": 0.1407579928011857, "grad_norm": 1.287010265098674, "learning_rate": 9.682041306835481e-06, "loss": 0.6811, "step": 1662 }, { "epoch": 0.14084268473427905, "grad_norm": 1.1503386024089757, "learning_rate": 9.681559806851985e-06, "loss": 0.6495, "step": 1663 }, { "epoch": 0.14092737666737243, "grad_norm": 1.3256816482826046, "learning_rate": 9.681077954556321e-06, "loss": 0.7059, "step": 1664 }, { "epoch": 0.1410120686004658, "grad_norm": 0.6228040010366678, "learning_rate": 9.680595749984755e-06, "loss": 0.8418, "step": 1665 }, { "epoch": 0.14109676053355918, "grad_norm": 1.2971417611925842, "learning_rate": 9.680113193173573e-06, "loss": 0.7008, "step": 1666 }, { "epoch": 0.14118145246665256, "grad_norm": 3.088405928043562, "learning_rate": 9.679630284159091e-06, "loss": 0.683, "step": 1667 }, { "epoch": 0.1412661443997459, "grad_norm": 1.3885958574539008, "learning_rate": 9.679147022977651e-06, "loss": 0.6696, "step": 1668 }, { "epoch": 0.1413508363328393, "grad_norm": 1.4026492471602452, "learning_rate": 9.678663409665618e-06, "loss": 0.6845, "step": 1669 }, { "epoch": 0.14143552826593267, "grad_norm": 1.6973216532000954, "learning_rate": 9.678179444259391e-06, "loss": 0.7075, "step": 1670 }, { "epoch": 0.14152022019902605, "grad_norm": 1.2812093800968154, "learning_rate": 9.677695126795388e-06, "loss": 0.6352, "step": 1671 }, { "epoch": 0.14160491213211943, "grad_norm": 8.20884710596853, "learning_rate": 9.677210457310058e-06, "loss": 0.673, "step": 1672 }, { "epoch": 0.14168960406521278, "grad_norm": 1.470809741565883, "learning_rate": 9.676725435839873e-06, "loss": 0.7164, "step": 1673 }, { "epoch": 0.14177429599830615, "grad_norm": 1.2779519116521556, "learning_rate": 9.676240062421334e-06, "loss": 0.7536, "step": 1674 }, { "epoch": 0.14185898793139953, "grad_norm": 1.4619216838716367, "learning_rate": 9.67575433709097e-06, "loss": 0.6971, "step": 1675 }, { "epoch": 0.1419436798644929, "grad_norm": 0.6162142011408482, "learning_rate": 9.675268259885332e-06, "loss": 0.8964, "step": 1676 }, { "epoch": 0.1420283717975863, "grad_norm": 1.2328651505225225, "learning_rate": 9.674781830841e-06, "loss": 0.7164, "step": 1677 }, { "epoch": 0.14211306373067964, "grad_norm": 1.6998681593333702, "learning_rate": 9.674295049994583e-06, "loss": 0.7095, "step": 1678 }, { "epoch": 0.14219775566377302, "grad_norm": 1.434544307891861, "learning_rate": 9.673807917382711e-06, "loss": 0.6191, "step": 1679 }, { "epoch": 0.1422824475968664, "grad_norm": 1.277730241144471, "learning_rate": 9.673320433042044e-06, "loss": 0.6734, "step": 1680 }, { "epoch": 0.14236713952995977, "grad_norm": 1.415870241078384, "learning_rate": 9.672832597009268e-06, "loss": 0.689, "step": 1681 }, { "epoch": 0.14245183146305315, "grad_norm": 1.9238754055122633, "learning_rate": 9.672344409321095e-06, "loss": 0.65, "step": 1682 }, { "epoch": 0.14253652339614653, "grad_norm": 1.38642321626185, "learning_rate": 9.671855870014264e-06, "loss": 0.6696, "step": 1683 }, { "epoch": 0.14262121532923988, "grad_norm": 1.6078181663024573, "learning_rate": 9.67136697912554e-06, "loss": 0.6009, "step": 1684 }, { "epoch": 0.14270590726233326, "grad_norm": 1.4292411338697957, "learning_rate": 9.670877736691716e-06, "loss": 0.6781, "step": 1685 }, { "epoch": 0.14279059919542664, "grad_norm": 1.1262155202288524, "learning_rate": 9.670388142749609e-06, "loss": 0.7015, "step": 1686 }, { "epoch": 0.14287529112852002, "grad_norm": 1.9060949291251614, "learning_rate": 9.66989819733606e-06, "loss": 0.6583, "step": 1687 }, { "epoch": 0.1429599830616134, "grad_norm": 1.2727543161202473, "learning_rate": 9.669407900487946e-06, "loss": 0.6918, "step": 1688 }, { "epoch": 0.14304467499470674, "grad_norm": 1.8387766482372085, "learning_rate": 9.668917252242163e-06, "loss": 0.6802, "step": 1689 }, { "epoch": 0.14312936692780012, "grad_norm": 1.3374542248440084, "learning_rate": 9.668426252635632e-06, "loss": 0.6836, "step": 1690 }, { "epoch": 0.1432140588608935, "grad_norm": 1.3980648946295269, "learning_rate": 9.667934901705305e-06, "loss": 0.6891, "step": 1691 }, { "epoch": 0.14329875079398688, "grad_norm": 1.585694495354694, "learning_rate": 9.667443199488159e-06, "loss": 0.678, "step": 1692 }, { "epoch": 0.14338344272708026, "grad_norm": 1.3345497717075032, "learning_rate": 9.666951146021197e-06, "loss": 0.698, "step": 1693 }, { "epoch": 0.1434681346601736, "grad_norm": 2.9427411063416877, "learning_rate": 9.66645874134145e-06, "loss": 0.6355, "step": 1694 }, { "epoch": 0.14355282659326699, "grad_norm": 1.2898813511747622, "learning_rate": 9.66596598548597e-06, "loss": 0.6643, "step": 1695 }, { "epoch": 0.14363751852636036, "grad_norm": 1.3479536491514743, "learning_rate": 9.665472878491843e-06, "loss": 0.6276, "step": 1696 }, { "epoch": 0.14372221045945374, "grad_norm": 1.6410000273333187, "learning_rate": 9.664979420396178e-06, "loss": 0.7033, "step": 1697 }, { "epoch": 0.14380690239254712, "grad_norm": 4.081655268220769, "learning_rate": 9.664485611236108e-06, "loss": 0.6397, "step": 1698 }, { "epoch": 0.14389159432564047, "grad_norm": 1.2658914002868111, "learning_rate": 9.663991451048798e-06, "loss": 0.6769, "step": 1699 }, { "epoch": 0.14397628625873385, "grad_norm": 1.3817026042333849, "learning_rate": 9.663496939871433e-06, "loss": 0.6229, "step": 1700 }, { "epoch": 0.14406097819182723, "grad_norm": 1.5840517609170235, "learning_rate": 9.663002077741229e-06, "loss": 0.6837, "step": 1701 }, { "epoch": 0.1441456701249206, "grad_norm": 1.263937176910101, "learning_rate": 9.662506864695426e-06, "loss": 0.6309, "step": 1702 }, { "epoch": 0.14423036205801398, "grad_norm": 1.6910448670791638, "learning_rate": 9.662011300771293e-06, "loss": 0.6555, "step": 1703 }, { "epoch": 0.14431505399110733, "grad_norm": 2.305615866191091, "learning_rate": 9.661515386006124e-06, "loss": 0.6864, "step": 1704 }, { "epoch": 0.1443997459242007, "grad_norm": 1.4975505024704914, "learning_rate": 9.661019120437238e-06, "loss": 0.646, "step": 1705 }, { "epoch": 0.1444844378572941, "grad_norm": 1.2155933014396743, "learning_rate": 9.66052250410198e-06, "loss": 0.6375, "step": 1706 }, { "epoch": 0.14456912979038747, "grad_norm": 1.534707823419654, "learning_rate": 9.660025537037727e-06, "loss": 0.7219, "step": 1707 }, { "epoch": 0.14465382172348085, "grad_norm": 1.3494224243966806, "learning_rate": 9.659528219281876e-06, "loss": 0.6739, "step": 1708 }, { "epoch": 0.14473851365657422, "grad_norm": 1.1146114897394048, "learning_rate": 9.659030550871852e-06, "loss": 0.5627, "step": 1709 }, { "epoch": 0.14482320558966758, "grad_norm": 1.4087363038385128, "learning_rate": 9.658532531845111e-06, "loss": 0.6448, "step": 1710 }, { "epoch": 0.14490789752276095, "grad_norm": 1.187656872543572, "learning_rate": 9.658034162239126e-06, "loss": 0.6367, "step": 1711 }, { "epoch": 0.14499258945585433, "grad_norm": 1.827802432445277, "learning_rate": 9.657535442091407e-06, "loss": 0.6779, "step": 1712 }, { "epoch": 0.1450772813889477, "grad_norm": 1.7242397532302745, "learning_rate": 9.657036371439481e-06, "loss": 0.6337, "step": 1713 }, { "epoch": 0.1451619733220411, "grad_norm": 1.2611951844297025, "learning_rate": 9.656536950320909e-06, "loss": 0.6607, "step": 1714 }, { "epoch": 0.14524666525513444, "grad_norm": 1.3777206656784435, "learning_rate": 9.656037178773275e-06, "loss": 0.6482, "step": 1715 }, { "epoch": 0.14533135718822782, "grad_norm": 1.7410207527402866, "learning_rate": 9.655537056834188e-06, "loss": 0.691, "step": 1716 }, { "epoch": 0.1454160491213212, "grad_norm": 1.345622889181735, "learning_rate": 9.655036584541284e-06, "loss": 0.6322, "step": 1717 }, { "epoch": 0.14550074105441457, "grad_norm": 4.142820106894121, "learning_rate": 9.654535761932228e-06, "loss": 0.6485, "step": 1718 }, { "epoch": 0.14558543298750795, "grad_norm": 1.2295158596367122, "learning_rate": 9.65403458904471e-06, "loss": 0.6543, "step": 1719 }, { "epoch": 0.1456701249206013, "grad_norm": 1.3585988988017954, "learning_rate": 9.653533065916443e-06, "loss": 0.6657, "step": 1720 }, { "epoch": 0.14575481685369468, "grad_norm": 1.1830801074275084, "learning_rate": 9.653031192585172e-06, "loss": 0.6698, "step": 1721 }, { "epoch": 0.14583950878678806, "grad_norm": 1.7670480072791526, "learning_rate": 9.652528969088665e-06, "loss": 0.6861, "step": 1722 }, { "epoch": 0.14592420071988144, "grad_norm": 1.8018098948229366, "learning_rate": 9.652026395464717e-06, "loss": 0.6526, "step": 1723 }, { "epoch": 0.14600889265297481, "grad_norm": 1.4362053500499241, "learning_rate": 9.651523471751148e-06, "loss": 0.6558, "step": 1724 }, { "epoch": 0.14609358458606816, "grad_norm": 1.417418169665696, "learning_rate": 9.651020197985807e-06, "loss": 0.6435, "step": 1725 }, { "epoch": 0.14617827651916154, "grad_norm": 1.3052537464014264, "learning_rate": 9.650516574206568e-06, "loss": 0.6855, "step": 1726 }, { "epoch": 0.14626296845225492, "grad_norm": 1.8356396379666005, "learning_rate": 9.650012600451333e-06, "loss": 0.7161, "step": 1727 }, { "epoch": 0.1463476603853483, "grad_norm": 1.1675231312975292, "learning_rate": 9.649508276758024e-06, "loss": 0.6706, "step": 1728 }, { "epoch": 0.14643235231844168, "grad_norm": 1.7108775925919213, "learning_rate": 9.649003603164598e-06, "loss": 0.6929, "step": 1729 }, { "epoch": 0.14651704425153503, "grad_norm": 1.1589825904148194, "learning_rate": 9.648498579709034e-06, "loss": 0.6683, "step": 1730 }, { "epoch": 0.1466017361846284, "grad_norm": 1.6171402043005305, "learning_rate": 9.647993206429336e-06, "loss": 0.7029, "step": 1731 }, { "epoch": 0.14668642811772178, "grad_norm": 1.2768022669062213, "learning_rate": 9.647487483363537e-06, "loss": 0.6192, "step": 1732 }, { "epoch": 0.14677112005081516, "grad_norm": 1.325891630754191, "learning_rate": 9.646981410549696e-06, "loss": 0.6911, "step": 1733 }, { "epoch": 0.14685581198390854, "grad_norm": 1.4065208580316937, "learning_rate": 9.646474988025895e-06, "loss": 0.6466, "step": 1734 }, { "epoch": 0.14694050391700192, "grad_norm": 1.6482948360768364, "learning_rate": 9.645968215830249e-06, "loss": 0.663, "step": 1735 }, { "epoch": 0.14702519585009527, "grad_norm": 1.4329611439345298, "learning_rate": 9.645461094000892e-06, "loss": 0.6308, "step": 1736 }, { "epoch": 0.14710988778318865, "grad_norm": 2.611845242537149, "learning_rate": 9.64495362257599e-06, "loss": 0.6934, "step": 1737 }, { "epoch": 0.14719457971628203, "grad_norm": 2.5188803098141928, "learning_rate": 9.644445801593731e-06, "loss": 0.6469, "step": 1738 }, { "epoch": 0.1472792716493754, "grad_norm": 1.8075284135038712, "learning_rate": 9.643937631092332e-06, "loss": 0.6555, "step": 1739 }, { "epoch": 0.14736396358246878, "grad_norm": 1.2851458962612312, "learning_rate": 9.643429111110034e-06, "loss": 0.6726, "step": 1740 }, { "epoch": 0.14744865551556213, "grad_norm": 3.9288731137629966, "learning_rate": 9.64292024168511e-06, "loss": 0.6773, "step": 1741 }, { "epoch": 0.1475333474486555, "grad_norm": 2.1283959699416255, "learning_rate": 9.64241102285585e-06, "loss": 0.6938, "step": 1742 }, { "epoch": 0.1476180393817489, "grad_norm": 2.1581097826404174, "learning_rate": 9.64190145466058e-06, "loss": 0.7143, "step": 1743 }, { "epoch": 0.14770273131484227, "grad_norm": 2.28638873763301, "learning_rate": 9.641391537137644e-06, "loss": 0.6671, "step": 1744 }, { "epoch": 0.14778742324793565, "grad_norm": 1.536751305077117, "learning_rate": 9.640881270325418e-06, "loss": 0.6867, "step": 1745 }, { "epoch": 0.147872115181029, "grad_norm": 1.6933242233974426, "learning_rate": 9.6403706542623e-06, "loss": 0.6445, "step": 1746 }, { "epoch": 0.14795680711412237, "grad_norm": 0.6349868966686646, "learning_rate": 9.639859688986722e-06, "loss": 0.8716, "step": 1747 }, { "epoch": 0.14804149904721575, "grad_norm": 1.4357927475539194, "learning_rate": 9.63934837453713e-06, "loss": 0.6285, "step": 1748 }, { "epoch": 0.14812619098030913, "grad_norm": 1.2259488760413948, "learning_rate": 9.638836710952007e-06, "loss": 0.6725, "step": 1749 }, { "epoch": 0.1482108829134025, "grad_norm": 1.439028175663995, "learning_rate": 9.638324698269858e-06, "loss": 0.7473, "step": 1750 }, { "epoch": 0.14829557484649586, "grad_norm": 1.2006754894785892, "learning_rate": 9.637812336529214e-06, "loss": 0.7015, "step": 1751 }, { "epoch": 0.14838026677958924, "grad_norm": 1.5681915960647512, "learning_rate": 9.637299625768635e-06, "loss": 0.6931, "step": 1752 }, { "epoch": 0.14846495871268262, "grad_norm": 1.3087550441009381, "learning_rate": 9.636786566026701e-06, "loss": 0.6375, "step": 1753 }, { "epoch": 0.148549650645776, "grad_norm": 1.8464207440576947, "learning_rate": 9.636273157342027e-06, "loss": 0.6298, "step": 1754 }, { "epoch": 0.14863434257886937, "grad_norm": 1.2373304025441803, "learning_rate": 9.635759399753247e-06, "loss": 0.6405, "step": 1755 }, { "epoch": 0.14871903451196272, "grad_norm": 1.4939362699943044, "learning_rate": 9.635245293299024e-06, "loss": 0.6521, "step": 1756 }, { "epoch": 0.1488037264450561, "grad_norm": 0.6338769457057373, "learning_rate": 9.634730838018048e-06, "loss": 0.8656, "step": 1757 }, { "epoch": 0.14888841837814948, "grad_norm": 1.5637696008152409, "learning_rate": 9.634216033949035e-06, "loss": 0.6651, "step": 1758 }, { "epoch": 0.14897311031124286, "grad_norm": 1.4875923533520963, "learning_rate": 9.633700881130725e-06, "loss": 0.6721, "step": 1759 }, { "epoch": 0.14905780224433623, "grad_norm": 1.5346259493750207, "learning_rate": 9.633185379601888e-06, "loss": 0.6941, "step": 1760 }, { "epoch": 0.1491424941774296, "grad_norm": 1.5447663176720972, "learning_rate": 9.632669529401317e-06, "loss": 0.7052, "step": 1761 }, { "epoch": 0.14922718611052296, "grad_norm": 1.6442996113464012, "learning_rate": 9.632153330567834e-06, "loss": 0.6271, "step": 1762 }, { "epoch": 0.14931187804361634, "grad_norm": 1.3841834312337016, "learning_rate": 9.631636783140284e-06, "loss": 0.6464, "step": 1763 }, { "epoch": 0.14939656997670972, "grad_norm": 0.6842159878483535, "learning_rate": 9.63111988715754e-06, "loss": 0.8378, "step": 1764 }, { "epoch": 0.1494812619098031, "grad_norm": 1.4656974637775382, "learning_rate": 9.630602642658501e-06, "loss": 0.69, "step": 1765 }, { "epoch": 0.14956595384289648, "grad_norm": 1.1147954844376504, "learning_rate": 9.630085049682093e-06, "loss": 0.6503, "step": 1766 }, { "epoch": 0.14965064577598983, "grad_norm": 1.5227509784473863, "learning_rate": 9.629567108267268e-06, "loss": 0.704, "step": 1767 }, { "epoch": 0.1497353377090832, "grad_norm": 2.2163779177559433, "learning_rate": 9.629048818453004e-06, "loss": 0.6547, "step": 1768 }, { "epoch": 0.14982002964217658, "grad_norm": 1.413372113230316, "learning_rate": 9.628530180278305e-06, "loss": 0.7245, "step": 1769 }, { "epoch": 0.14990472157526996, "grad_norm": 1.6680226295868301, "learning_rate": 9.628011193782197e-06, "loss": 0.6723, "step": 1770 }, { "epoch": 0.14998941350836334, "grad_norm": 1.2421886273039626, "learning_rate": 9.627491859003743e-06, "loss": 0.7011, "step": 1771 }, { "epoch": 0.1500741054414567, "grad_norm": 1.3901553206657171, "learning_rate": 9.626972175982023e-06, "loss": 0.6609, "step": 1772 }, { "epoch": 0.15015879737455007, "grad_norm": 1.4853459473184172, "learning_rate": 9.626452144756144e-06, "loss": 0.6894, "step": 1773 }, { "epoch": 0.15024348930764345, "grad_norm": 1.6457978457538174, "learning_rate": 9.625931765365244e-06, "loss": 0.6367, "step": 1774 }, { "epoch": 0.15032818124073682, "grad_norm": 0.6074301069613536, "learning_rate": 9.625411037848484e-06, "loss": 0.8387, "step": 1775 }, { "epoch": 0.1504128731738302, "grad_norm": 1.1590877121392542, "learning_rate": 9.62488996224505e-06, "loss": 0.6042, "step": 1776 }, { "epoch": 0.15049756510692355, "grad_norm": 1.7505479678332374, "learning_rate": 9.624368538594158e-06, "loss": 0.7219, "step": 1777 }, { "epoch": 0.15058225704001693, "grad_norm": 1.5822592239298185, "learning_rate": 9.623846766935044e-06, "loss": 0.6393, "step": 1778 }, { "epoch": 0.1506669489731103, "grad_norm": 1.478853243330217, "learning_rate": 9.623324647306978e-06, "loss": 0.6622, "step": 1779 }, { "epoch": 0.1507516409062037, "grad_norm": 1.591298579511761, "learning_rate": 9.622802179749249e-06, "loss": 0.6643, "step": 1780 }, { "epoch": 0.15083633283929707, "grad_norm": 1.1213406182901076, "learning_rate": 9.622279364301176e-06, "loss": 0.6162, "step": 1781 }, { "epoch": 0.15092102477239042, "grad_norm": 1.352928983530413, "learning_rate": 9.621756201002109e-06, "loss": 0.7098, "step": 1782 }, { "epoch": 0.1510057167054838, "grad_norm": 1.55303624931992, "learning_rate": 9.621232689891411e-06, "loss": 0.6398, "step": 1783 }, { "epoch": 0.15109040863857717, "grad_norm": 0.671129524483278, "learning_rate": 9.620708831008483e-06, "loss": 0.8218, "step": 1784 }, { "epoch": 0.15117510057167055, "grad_norm": 1.4547957882189693, "learning_rate": 9.620184624392747e-06, "loss": 0.666, "step": 1785 }, { "epoch": 0.15125979250476393, "grad_norm": 1.484303466938259, "learning_rate": 9.619660070083654e-06, "loss": 0.6813, "step": 1786 }, { "epoch": 0.1513444844378573, "grad_norm": 1.9083118887016877, "learning_rate": 9.61913516812068e-06, "loss": 0.6969, "step": 1787 }, { "epoch": 0.15142917637095066, "grad_norm": 1.4082704778314292, "learning_rate": 9.618609918543324e-06, "loss": 0.6549, "step": 1788 }, { "epoch": 0.15151386830404404, "grad_norm": 16.764471251754095, "learning_rate": 9.618084321391115e-06, "loss": 0.6333, "step": 1789 }, { "epoch": 0.15159856023713741, "grad_norm": 0.7427790946206494, "learning_rate": 9.617558376703606e-06, "loss": 0.9109, "step": 1790 }, { "epoch": 0.1516832521702308, "grad_norm": 1.348957447297676, "learning_rate": 9.617032084520378e-06, "loss": 0.692, "step": 1791 }, { "epoch": 0.15176794410332417, "grad_norm": 1.8714063387134834, "learning_rate": 9.616505444881039e-06, "loss": 0.6369, "step": 1792 }, { "epoch": 0.15185263603641752, "grad_norm": 1.4065731801897927, "learning_rate": 9.615978457825217e-06, "loss": 0.6682, "step": 1793 }, { "epoch": 0.1519373279695109, "grad_norm": 1.3009968693052985, "learning_rate": 9.615451123392576e-06, "loss": 0.6449, "step": 1794 }, { "epoch": 0.15202201990260428, "grad_norm": 1.4203448690332587, "learning_rate": 9.614923441622797e-06, "loss": 0.6426, "step": 1795 }, { "epoch": 0.15210671183569766, "grad_norm": 1.3198913075192495, "learning_rate": 9.61439541255559e-06, "loss": 0.6701, "step": 1796 }, { "epoch": 0.15219140376879103, "grad_norm": 1.1997966973317689, "learning_rate": 9.613867036230697e-06, "loss": 0.5953, "step": 1797 }, { "epoch": 0.15227609570188438, "grad_norm": 1.5483818063268429, "learning_rate": 9.613338312687876e-06, "loss": 0.7001, "step": 1798 }, { "epoch": 0.15236078763497776, "grad_norm": 1.7822025877461893, "learning_rate": 9.612809241966918e-06, "loss": 0.675, "step": 1799 }, { "epoch": 0.15244547956807114, "grad_norm": 1.2739110556249338, "learning_rate": 9.61227982410764e-06, "loss": 0.6384, "step": 1800 }, { "epoch": 0.15253017150116452, "grad_norm": 1.341620603924882, "learning_rate": 9.611750059149881e-06, "loss": 0.6722, "step": 1801 }, { "epoch": 0.1526148634342579, "grad_norm": 1.2122595339346809, "learning_rate": 9.61121994713351e-06, "loss": 0.6808, "step": 1802 }, { "epoch": 0.15269955536735125, "grad_norm": 1.2198079230495504, "learning_rate": 9.61068948809842e-06, "loss": 0.6933, "step": 1803 }, { "epoch": 0.15278424730044463, "grad_norm": 1.1961224283482979, "learning_rate": 9.610158682084531e-06, "loss": 0.6328, "step": 1804 }, { "epoch": 0.152868939233538, "grad_norm": 1.3324196486486724, "learning_rate": 9.609627529131791e-06, "loss": 0.6825, "step": 1805 }, { "epoch": 0.15295363116663138, "grad_norm": 1.230024354637499, "learning_rate": 9.609096029280169e-06, "loss": 0.6368, "step": 1806 }, { "epoch": 0.15303832309972476, "grad_norm": 1.5166324492318082, "learning_rate": 9.608564182569664e-06, "loss": 0.657, "step": 1807 }, { "epoch": 0.1531230150328181, "grad_norm": 1.6049434567628518, "learning_rate": 9.608031989040301e-06, "loss": 0.6561, "step": 1808 }, { "epoch": 0.1532077069659115, "grad_norm": 1.3831863091806094, "learning_rate": 9.60749944873213e-06, "loss": 0.7022, "step": 1809 }, { "epoch": 0.15329239889900487, "grad_norm": 1.2653298713293841, "learning_rate": 9.60696656168523e-06, "loss": 0.6787, "step": 1810 }, { "epoch": 0.15337709083209825, "grad_norm": 1.3810955948962447, "learning_rate": 9.606433327939699e-06, "loss": 0.7087, "step": 1811 }, { "epoch": 0.15346178276519162, "grad_norm": 1.695012612139016, "learning_rate": 9.605899747535668e-06, "loss": 0.6824, "step": 1812 }, { "epoch": 0.153546474698285, "grad_norm": 1.735226272116433, "learning_rate": 9.605365820513291e-06, "loss": 0.6272, "step": 1813 }, { "epoch": 0.15363116663137835, "grad_norm": 1.6402184702454963, "learning_rate": 9.604831546912751e-06, "loss": 0.6854, "step": 1814 }, { "epoch": 0.15371585856447173, "grad_norm": 1.5757182921555708, "learning_rate": 9.604296926774253e-06, "loss": 0.6989, "step": 1815 }, { "epoch": 0.1538005504975651, "grad_norm": 1.1462175772639716, "learning_rate": 9.603761960138029e-06, "loss": 0.6657, "step": 1816 }, { "epoch": 0.1538852424306585, "grad_norm": 1.2003438114441105, "learning_rate": 9.603226647044341e-06, "loss": 0.6618, "step": 1817 }, { "epoch": 0.15396993436375186, "grad_norm": 1.3046858503589922, "learning_rate": 9.602690987533472e-06, "loss": 0.6955, "step": 1818 }, { "epoch": 0.15405462629684522, "grad_norm": 2.2634787212282252, "learning_rate": 9.602154981645732e-06, "loss": 0.6343, "step": 1819 }, { "epoch": 0.1541393182299386, "grad_norm": 3.006560532590382, "learning_rate": 9.601618629421463e-06, "loss": 0.6878, "step": 1820 }, { "epoch": 0.15422401016303197, "grad_norm": 1.2252396012633802, "learning_rate": 9.601081930901023e-06, "loss": 0.7118, "step": 1821 }, { "epoch": 0.15430870209612535, "grad_norm": 1.4393510506810634, "learning_rate": 9.600544886124805e-06, "loss": 0.7069, "step": 1822 }, { "epoch": 0.15439339402921873, "grad_norm": 0.7513791831976859, "learning_rate": 9.600007495133223e-06, "loss": 0.8816, "step": 1823 }, { "epoch": 0.15447808596231208, "grad_norm": 1.375304397770196, "learning_rate": 9.59946975796672e-06, "loss": 0.7096, "step": 1824 }, { "epoch": 0.15456277789540546, "grad_norm": 1.0994606323872722, "learning_rate": 9.598931674665761e-06, "loss": 0.662, "step": 1825 }, { "epoch": 0.15464746982849883, "grad_norm": 1.2617808784582907, "learning_rate": 9.598393245270842e-06, "loss": 0.6633, "step": 1826 }, { "epoch": 0.1547321617615922, "grad_norm": 2.833922512757455, "learning_rate": 9.59785446982248e-06, "loss": 0.6745, "step": 1827 }, { "epoch": 0.1548168536946856, "grad_norm": 1.3612003006596274, "learning_rate": 9.597315348361225e-06, "loss": 0.6832, "step": 1828 }, { "epoch": 0.15490154562777894, "grad_norm": 1.4123670526507355, "learning_rate": 9.596775880927644e-06, "loss": 0.6293, "step": 1829 }, { "epoch": 0.15498623756087232, "grad_norm": 1.6116986023748117, "learning_rate": 9.596236067562336e-06, "loss": 0.6557, "step": 1830 }, { "epoch": 0.1550709294939657, "grad_norm": 1.5137178126649673, "learning_rate": 9.595695908305928e-06, "loss": 0.7386, "step": 1831 }, { "epoch": 0.15515562142705908, "grad_norm": 1.3898746066901584, "learning_rate": 9.595155403199066e-06, "loss": 0.6777, "step": 1832 }, { "epoch": 0.15524031336015245, "grad_norm": 1.3280704610958831, "learning_rate": 9.594614552282429e-06, "loss": 0.7014, "step": 1833 }, { "epoch": 0.1553250052932458, "grad_norm": 1.5337415584926273, "learning_rate": 9.594073355596716e-06, "loss": 0.6551, "step": 1834 }, { "epoch": 0.15540969722633918, "grad_norm": 0.6417351918257067, "learning_rate": 9.593531813182657e-06, "loss": 0.8698, "step": 1835 }, { "epoch": 0.15549438915943256, "grad_norm": 1.5946623465828647, "learning_rate": 9.592989925081004e-06, "loss": 0.6661, "step": 1836 }, { "epoch": 0.15557908109252594, "grad_norm": 1.3784454342724008, "learning_rate": 9.592447691332539e-06, "loss": 0.6695, "step": 1837 }, { "epoch": 0.15566377302561932, "grad_norm": 1.3882037968677858, "learning_rate": 9.591905111978066e-06, "loss": 0.6235, "step": 1838 }, { "epoch": 0.1557484649587127, "grad_norm": 1.2277309287150033, "learning_rate": 9.59136218705842e-06, "loss": 0.6467, "step": 1839 }, { "epoch": 0.15583315689180605, "grad_norm": 2.130552621149038, "learning_rate": 9.590818916614455e-06, "loss": 0.6173, "step": 1840 }, { "epoch": 0.15591784882489942, "grad_norm": 1.618774612100392, "learning_rate": 9.590275300687057e-06, "loss": 0.7435, "step": 1841 }, { "epoch": 0.1560025407579928, "grad_norm": 1.742100640390267, "learning_rate": 9.589731339317137e-06, "loss": 0.6822, "step": 1842 }, { "epoch": 0.15608723269108618, "grad_norm": 1.6378558621053738, "learning_rate": 9.58918703254563e-06, "loss": 0.6858, "step": 1843 }, { "epoch": 0.15617192462417956, "grad_norm": 1.3960032446212123, "learning_rate": 9.588642380413498e-06, "loss": 0.6461, "step": 1844 }, { "epoch": 0.1562566165572729, "grad_norm": 1.3167612550011016, "learning_rate": 9.58809738296173e-06, "loss": 0.6533, "step": 1845 }, { "epoch": 0.1563413084903663, "grad_norm": 0.6222179870939093, "learning_rate": 9.587552040231336e-06, "loss": 0.8601, "step": 1846 }, { "epoch": 0.15642600042345967, "grad_norm": 1.4747687739004307, "learning_rate": 9.58700635226336e-06, "loss": 0.6318, "step": 1847 }, { "epoch": 0.15651069235655304, "grad_norm": 14.170281272763946, "learning_rate": 9.586460319098868e-06, "loss": 0.639, "step": 1848 }, { "epoch": 0.15659538428964642, "grad_norm": 1.576634301577309, "learning_rate": 9.585913940778949e-06, "loss": 0.6286, "step": 1849 }, { "epoch": 0.15668007622273977, "grad_norm": 1.6753757406187486, "learning_rate": 9.585367217344722e-06, "loss": 0.6951, "step": 1850 }, { "epoch": 0.15676476815583315, "grad_norm": 1.596379460945326, "learning_rate": 9.584820148837331e-06, "loss": 0.6423, "step": 1851 }, { "epoch": 0.15684946008892653, "grad_norm": 1.7925576605705633, "learning_rate": 9.584272735297949e-06, "loss": 0.6233, "step": 1852 }, { "epoch": 0.1569341520220199, "grad_norm": 1.208502813384371, "learning_rate": 9.583724976767766e-06, "loss": 0.6355, "step": 1853 }, { "epoch": 0.15701884395511329, "grad_norm": 2.734063300704476, "learning_rate": 9.583176873288007e-06, "loss": 0.6684, "step": 1854 }, { "epoch": 0.15710353588820664, "grad_norm": 1.394729853534527, "learning_rate": 9.58262842489992e-06, "loss": 0.6589, "step": 1855 }, { "epoch": 0.15718822782130001, "grad_norm": 1.4388386894389642, "learning_rate": 9.582079631644776e-06, "loss": 0.6893, "step": 1856 }, { "epoch": 0.1572729197543934, "grad_norm": 1.5999612169152766, "learning_rate": 9.581530493563878e-06, "loss": 0.678, "step": 1857 }, { "epoch": 0.15735761168748677, "grad_norm": 1.3945193574243693, "learning_rate": 9.58098101069855e-06, "loss": 0.6716, "step": 1858 }, { "epoch": 0.15744230362058015, "grad_norm": 1.459089637493747, "learning_rate": 9.580431183090141e-06, "loss": 0.646, "step": 1859 }, { "epoch": 0.1575269955536735, "grad_norm": 1.4640065285652304, "learning_rate": 9.579881010780035e-06, "loss": 0.6782, "step": 1860 }, { "epoch": 0.15761168748676688, "grad_norm": 1.3599256854424726, "learning_rate": 9.579330493809629e-06, "loss": 0.7033, "step": 1861 }, { "epoch": 0.15769637941986026, "grad_norm": 1.2807068285358112, "learning_rate": 9.578779632220355e-06, "loss": 0.6659, "step": 1862 }, { "epoch": 0.15778107135295363, "grad_norm": 1.5105243502903618, "learning_rate": 9.578228426053668e-06, "loss": 0.6404, "step": 1863 }, { "epoch": 0.157865763286047, "grad_norm": 1.473534708076647, "learning_rate": 9.57767687535105e-06, "loss": 0.6786, "step": 1864 }, { "epoch": 0.1579504552191404, "grad_norm": 1.575458100456613, "learning_rate": 9.577124980154006e-06, "loss": 0.65, "step": 1865 }, { "epoch": 0.15803514715223374, "grad_norm": 1.6899874401707995, "learning_rate": 9.57657274050407e-06, "loss": 0.6399, "step": 1866 }, { "epoch": 0.15811983908532712, "grad_norm": 1.169662162467701, "learning_rate": 9.576020156442802e-06, "loss": 0.6147, "step": 1867 }, { "epoch": 0.1582045310184205, "grad_norm": 0.6379553157584834, "learning_rate": 9.575467228011786e-06, "loss": 0.8917, "step": 1868 }, { "epoch": 0.15828922295151387, "grad_norm": 1.3110598709775578, "learning_rate": 9.574913955252632e-06, "loss": 0.7131, "step": 1869 }, { "epoch": 0.15837391488460725, "grad_norm": 1.28559480529455, "learning_rate": 9.574360338206978e-06, "loss": 0.7096, "step": 1870 }, { "epoch": 0.1584586068177006, "grad_norm": 1.6551763349448445, "learning_rate": 9.573806376916486e-06, "loss": 0.6779, "step": 1871 }, { "epoch": 0.15854329875079398, "grad_norm": 1.3651514212648541, "learning_rate": 9.573252071422845e-06, "loss": 0.6882, "step": 1872 }, { "epoch": 0.15862799068388736, "grad_norm": 1.569384281212983, "learning_rate": 9.57269742176777e-06, "loss": 0.6959, "step": 1873 }, { "epoch": 0.15871268261698074, "grad_norm": 1.079112001353473, "learning_rate": 9.572142427992998e-06, "loss": 0.6235, "step": 1874 }, { "epoch": 0.15879737455007412, "grad_norm": 1.1931712613585344, "learning_rate": 9.5715870901403e-06, "loss": 0.6051, "step": 1875 }, { "epoch": 0.15888206648316747, "grad_norm": 1.2876062411775955, "learning_rate": 9.571031408251466e-06, "loss": 0.695, "step": 1876 }, { "epoch": 0.15896675841626084, "grad_norm": 1.2681518721041414, "learning_rate": 9.570475382368312e-06, "loss": 0.6546, "step": 1877 }, { "epoch": 0.15905145034935422, "grad_norm": 0.6733663831891964, "learning_rate": 9.569919012532684e-06, "loss": 0.9171, "step": 1878 }, { "epoch": 0.1591361422824476, "grad_norm": 0.5974940691642108, "learning_rate": 9.569362298786452e-06, "loss": 0.8819, "step": 1879 }, { "epoch": 0.15922083421554098, "grad_norm": 1.2926639864219942, "learning_rate": 9.568805241171511e-06, "loss": 0.6456, "step": 1880 }, { "epoch": 0.15930552614863433, "grad_norm": 1.3069236505021222, "learning_rate": 9.568247839729783e-06, "loss": 0.6005, "step": 1881 }, { "epoch": 0.1593902180817277, "grad_norm": 1.58516303544092, "learning_rate": 9.567690094503214e-06, "loss": 0.6955, "step": 1882 }, { "epoch": 0.1594749100148211, "grad_norm": 1.2171193246891105, "learning_rate": 9.56713200553378e-06, "loss": 0.6843, "step": 1883 }, { "epoch": 0.15955960194791446, "grad_norm": 1.5448822330980583, "learning_rate": 9.566573572863475e-06, "loss": 0.6462, "step": 1884 }, { "epoch": 0.15964429388100784, "grad_norm": 1.4025250597904577, "learning_rate": 9.566014796534332e-06, "loss": 0.7066, "step": 1885 }, { "epoch": 0.1597289858141012, "grad_norm": 2.463321914399917, "learning_rate": 9.565455676588394e-06, "loss": 0.7053, "step": 1886 }, { "epoch": 0.15981367774719457, "grad_norm": 1.4355320552625397, "learning_rate": 9.564896213067743e-06, "loss": 0.6464, "step": 1887 }, { "epoch": 0.15989836968028795, "grad_norm": 1.3464800593418633, "learning_rate": 9.564336406014479e-06, "loss": 0.6607, "step": 1888 }, { "epoch": 0.15998306161338133, "grad_norm": 1.2625737406350614, "learning_rate": 9.56377625547073e-06, "loss": 0.6056, "step": 1889 }, { "epoch": 0.1600677535464747, "grad_norm": 1.349181520968397, "learning_rate": 9.563215761478653e-06, "loss": 0.6711, "step": 1890 }, { "epoch": 0.16015244547956808, "grad_norm": 1.464884110341762, "learning_rate": 9.562654924080426e-06, "loss": 0.669, "step": 1891 }, { "epoch": 0.16023713741266143, "grad_norm": 1.8842006670621514, "learning_rate": 9.562093743318256e-06, "loss": 0.6523, "step": 1892 }, { "epoch": 0.1603218293457548, "grad_norm": 1.91621504515598, "learning_rate": 9.561532219234374e-06, "loss": 0.6374, "step": 1893 }, { "epoch": 0.1604065212788482, "grad_norm": 1.580191746450081, "learning_rate": 9.560970351871037e-06, "loss": 0.7135, "step": 1894 }, { "epoch": 0.16049121321194157, "grad_norm": 1.3225870261173442, "learning_rate": 9.560408141270531e-06, "loss": 0.6691, "step": 1895 }, { "epoch": 0.16057590514503495, "grad_norm": 1.306119218031899, "learning_rate": 9.559845587475165e-06, "loss": 0.6891, "step": 1896 }, { "epoch": 0.1606605970781283, "grad_norm": 1.3746154996094186, "learning_rate": 9.55928269052727e-06, "loss": 0.661, "step": 1897 }, { "epoch": 0.16074528901122168, "grad_norm": 1.33316709386423, "learning_rate": 9.558719450469212e-06, "loss": 0.6774, "step": 1898 }, { "epoch": 0.16082998094431505, "grad_norm": 2.1851734691929967, "learning_rate": 9.558155867343376e-06, "loss": 0.6623, "step": 1899 }, { "epoch": 0.16091467287740843, "grad_norm": 1.4281972599369737, "learning_rate": 9.557591941192174e-06, "loss": 0.7109, "step": 1900 }, { "epoch": 0.1609993648105018, "grad_norm": 1.3926319334955526, "learning_rate": 9.557027672058044e-06, "loss": 0.6425, "step": 1901 }, { "epoch": 0.16108405674359516, "grad_norm": 0.6858002618472424, "learning_rate": 9.556463059983453e-06, "loss": 0.8793, "step": 1902 }, { "epoch": 0.16116874867668854, "grad_norm": 1.7763595636309644, "learning_rate": 9.555898105010885e-06, "loss": 0.6779, "step": 1903 }, { "epoch": 0.16125344060978192, "grad_norm": 1.3254652060227954, "learning_rate": 9.555332807182865e-06, "loss": 0.6297, "step": 1904 }, { "epoch": 0.1613381325428753, "grad_norm": 7.764133650534454, "learning_rate": 9.554767166541926e-06, "loss": 0.6287, "step": 1905 }, { "epoch": 0.16142282447596867, "grad_norm": 1.2685042338720798, "learning_rate": 9.554201183130641e-06, "loss": 0.596, "step": 1906 }, { "epoch": 0.16150751640906202, "grad_norm": 1.3091896776536291, "learning_rate": 9.5536348569916e-06, "loss": 0.6927, "step": 1907 }, { "epoch": 0.1615922083421554, "grad_norm": 0.6176564866306394, "learning_rate": 9.553068188167425e-06, "loss": 0.857, "step": 1908 }, { "epoch": 0.16167690027524878, "grad_norm": 1.5527137749873532, "learning_rate": 9.552501176700758e-06, "loss": 0.6573, "step": 1909 }, { "epoch": 0.16176159220834216, "grad_norm": 1.1189022049561508, "learning_rate": 9.551933822634271e-06, "loss": 0.6517, "step": 1910 }, { "epoch": 0.16184628414143554, "grad_norm": 1.1899880850287368, "learning_rate": 9.55136612601066e-06, "loss": 0.6582, "step": 1911 }, { "epoch": 0.1619309760745289, "grad_norm": 1.3813455943048896, "learning_rate": 9.550798086872647e-06, "loss": 0.6908, "step": 1912 }, { "epoch": 0.16201566800762227, "grad_norm": 1.3336906386842775, "learning_rate": 9.550229705262983e-06, "loss": 0.7318, "step": 1913 }, { "epoch": 0.16210035994071564, "grad_norm": 1.1851871832358298, "learning_rate": 9.549660981224437e-06, "loss": 0.653, "step": 1914 }, { "epoch": 0.16218505187380902, "grad_norm": 1.2311788610089778, "learning_rate": 9.54909191479981e-06, "loss": 0.704, "step": 1915 }, { "epoch": 0.1622697438069024, "grad_norm": 1.2753952832473734, "learning_rate": 9.548522506031928e-06, "loss": 0.6394, "step": 1916 }, { "epoch": 0.16235443573999578, "grad_norm": 1.5120455613353634, "learning_rate": 9.547952754963643e-06, "loss": 0.6768, "step": 1917 }, { "epoch": 0.16243912767308913, "grad_norm": 1.9328811008916853, "learning_rate": 9.54738266163783e-06, "loss": 0.5838, "step": 1918 }, { "epoch": 0.1625238196061825, "grad_norm": 1.7902481421599665, "learning_rate": 9.546812226097393e-06, "loss": 0.6319, "step": 1919 }, { "epoch": 0.16260851153927589, "grad_norm": 1.5511988596942405, "learning_rate": 9.546241448385258e-06, "loss": 0.6738, "step": 1920 }, { "epoch": 0.16269320347236926, "grad_norm": 1.2518140767225783, "learning_rate": 9.545670328544382e-06, "loss": 0.6805, "step": 1921 }, { "epoch": 0.16277789540546264, "grad_norm": 1.5769014050550503, "learning_rate": 9.54509886661774e-06, "loss": 0.6602, "step": 1922 }, { "epoch": 0.162862587338556, "grad_norm": 1.1971370338170644, "learning_rate": 9.544527062648346e-06, "loss": 0.6491, "step": 1923 }, { "epoch": 0.16294727927164937, "grad_norm": 1.5927486621077234, "learning_rate": 9.543954916679223e-06, "loss": 0.682, "step": 1924 }, { "epoch": 0.16303197120474275, "grad_norm": 1.8168109800799053, "learning_rate": 9.543382428753431e-06, "loss": 0.642, "step": 1925 }, { "epoch": 0.16311666313783613, "grad_norm": 1.6705484736861222, "learning_rate": 9.542809598914053e-06, "loss": 0.6817, "step": 1926 }, { "epoch": 0.1632013550709295, "grad_norm": 1.3275057235943932, "learning_rate": 9.5422364272042e-06, "loss": 0.6562, "step": 1927 }, { "epoch": 0.16328604700402286, "grad_norm": 1.2023694277001538, "learning_rate": 9.541662913667e-06, "loss": 0.6237, "step": 1928 }, { "epoch": 0.16337073893711623, "grad_norm": 1.3402175688062934, "learning_rate": 9.541089058345619e-06, "loss": 0.7535, "step": 1929 }, { "epoch": 0.1634554308702096, "grad_norm": 1.2667536682576095, "learning_rate": 9.54051486128324e-06, "loss": 0.639, "step": 1930 }, { "epoch": 0.163540122803303, "grad_norm": 2.3412267572867718, "learning_rate": 9.539940322523073e-06, "loss": 0.6151, "step": 1931 }, { "epoch": 0.16362481473639637, "grad_norm": 1.4222195637480772, "learning_rate": 9.539365442108357e-06, "loss": 0.6968, "step": 1932 }, { "epoch": 0.16370950666948972, "grad_norm": 1.478982269708994, "learning_rate": 9.538790220082355e-06, "loss": 0.6039, "step": 1933 }, { "epoch": 0.1637941986025831, "grad_norm": 1.3806217154400076, "learning_rate": 9.538214656488354e-06, "loss": 0.6559, "step": 1934 }, { "epoch": 0.16387889053567647, "grad_norm": 0.8194902901461246, "learning_rate": 9.537638751369668e-06, "loss": 0.8646, "step": 1935 }, { "epoch": 0.16396358246876985, "grad_norm": 1.7369530555486037, "learning_rate": 9.53706250476964e-06, "loss": 0.6831, "step": 1936 }, { "epoch": 0.16404827440186323, "grad_norm": 1.2628319601519271, "learning_rate": 9.536485916731634e-06, "loss": 0.6401, "step": 1937 }, { "epoch": 0.1641329663349566, "grad_norm": 1.382203522570499, "learning_rate": 9.53590898729904e-06, "loss": 0.6374, "step": 1938 }, { "epoch": 0.16421765826804996, "grad_norm": 1.7492807617492636, "learning_rate": 9.535331716515277e-06, "loss": 0.6572, "step": 1939 }, { "epoch": 0.16430235020114334, "grad_norm": 3.0137721625426446, "learning_rate": 9.534754104423785e-06, "loss": 0.6354, "step": 1940 }, { "epoch": 0.16438704213423672, "grad_norm": 1.2676990729654383, "learning_rate": 9.534176151068035e-06, "loss": 0.6095, "step": 1941 }, { "epoch": 0.1644717340673301, "grad_norm": 1.6930595562064639, "learning_rate": 9.533597856491522e-06, "loss": 0.7173, "step": 1942 }, { "epoch": 0.16455642600042347, "grad_norm": 0.6745161872644371, "learning_rate": 9.53301922073776e-06, "loss": 0.8756, "step": 1943 }, { "epoch": 0.16464111793351682, "grad_norm": 0.6271525533088015, "learning_rate": 9.5324402438503e-06, "loss": 0.9288, "step": 1944 }, { "epoch": 0.1647258098666102, "grad_norm": 1.6409055157942674, "learning_rate": 9.53186092587271e-06, "loss": 0.6828, "step": 1945 }, { "epoch": 0.16481050179970358, "grad_norm": 1.6784451126171789, "learning_rate": 9.53128126684859e-06, "loss": 0.6153, "step": 1946 }, { "epoch": 0.16489519373279696, "grad_norm": 1.556216112778638, "learning_rate": 9.53070126682156e-06, "loss": 0.7317, "step": 1947 }, { "epoch": 0.16497988566589034, "grad_norm": 1.4438467713213172, "learning_rate": 9.530120925835267e-06, "loss": 0.6619, "step": 1948 }, { "epoch": 0.16506457759898369, "grad_norm": 1.6122006683172345, "learning_rate": 9.529540243933387e-06, "loss": 0.7002, "step": 1949 }, { "epoch": 0.16514926953207706, "grad_norm": 1.259397977420466, "learning_rate": 9.528959221159619e-06, "loss": 0.6434, "step": 1950 }, { "epoch": 0.16523396146517044, "grad_norm": 2.313075391282109, "learning_rate": 9.528377857557686e-06, "loss": 0.6919, "step": 1951 }, { "epoch": 0.16531865339826382, "grad_norm": 0.716612784442106, "learning_rate": 9.527796153171342e-06, "loss": 0.896, "step": 1952 }, { "epoch": 0.1654033453313572, "grad_norm": 1.3393992365099754, "learning_rate": 9.52721410804436e-06, "loss": 0.7006, "step": 1953 }, { "epoch": 0.16548803726445055, "grad_norm": 1.248754348376321, "learning_rate": 9.526631722220548e-06, "loss": 0.6548, "step": 1954 }, { "epoch": 0.16557272919754393, "grad_norm": 1.1939127486166197, "learning_rate": 9.526048995743724e-06, "loss": 0.6796, "step": 1955 }, { "epoch": 0.1656574211306373, "grad_norm": 0.7303306584347959, "learning_rate": 9.52546592865775e-06, "loss": 0.8386, "step": 1956 }, { "epoch": 0.16574211306373068, "grad_norm": 3.255182639272683, "learning_rate": 9.5248825210065e-06, "loss": 0.6966, "step": 1957 }, { "epoch": 0.16582680499682406, "grad_norm": 1.9362864456314144, "learning_rate": 9.52429877283388e-06, "loss": 0.6061, "step": 1958 }, { "epoch": 0.1659114969299174, "grad_norm": 1.575039685542176, "learning_rate": 9.52371468418382e-06, "loss": 0.6697, "step": 1959 }, { "epoch": 0.1659961888630108, "grad_norm": 1.508620118847034, "learning_rate": 9.523130255100275e-06, "loss": 0.658, "step": 1960 }, { "epoch": 0.16608088079610417, "grad_norm": 1.3147207440225273, "learning_rate": 9.522545485627228e-06, "loss": 0.6432, "step": 1961 }, { "epoch": 0.16616557272919755, "grad_norm": 2.0875487665324175, "learning_rate": 9.521960375808685e-06, "loss": 0.6938, "step": 1962 }, { "epoch": 0.16625026466229093, "grad_norm": 1.4456158480754473, "learning_rate": 9.52137492568868e-06, "loss": 0.6847, "step": 1963 }, { "epoch": 0.1663349565953843, "grad_norm": 1.7393110552617344, "learning_rate": 9.520789135311267e-06, "loss": 0.6529, "step": 1964 }, { "epoch": 0.16641964852847765, "grad_norm": 1.7285977561908696, "learning_rate": 9.520203004720536e-06, "loss": 0.6939, "step": 1965 }, { "epoch": 0.16650434046157103, "grad_norm": 1.3258945835604614, "learning_rate": 9.519616533960591e-06, "loss": 0.6372, "step": 1966 }, { "epoch": 0.1665890323946644, "grad_norm": 3.857114683405077, "learning_rate": 9.519029723075572e-06, "loss": 0.6496, "step": 1967 }, { "epoch": 0.1666737243277578, "grad_norm": 1.7477793161118442, "learning_rate": 9.518442572109636e-06, "loss": 0.6833, "step": 1968 }, { "epoch": 0.16675841626085117, "grad_norm": 1.467846342887264, "learning_rate": 9.517855081106968e-06, "loss": 0.7045, "step": 1969 }, { "epoch": 0.16684310819394452, "grad_norm": 1.2766230020328544, "learning_rate": 9.517267250111783e-06, "loss": 0.6269, "step": 1970 }, { "epoch": 0.1669278001270379, "grad_norm": 1.653030404046892, "learning_rate": 9.516679079168318e-06, "loss": 0.6826, "step": 1971 }, { "epoch": 0.16701249206013127, "grad_norm": 1.8692801314016987, "learning_rate": 9.516090568320837e-06, "loss": 0.6518, "step": 1972 }, { "epoch": 0.16709718399322465, "grad_norm": 1.424386767785003, "learning_rate": 9.515501717613625e-06, "loss": 0.65, "step": 1973 }, { "epoch": 0.16718187592631803, "grad_norm": 1.4282843597442823, "learning_rate": 9.514912527090999e-06, "loss": 0.6358, "step": 1974 }, { "epoch": 0.16726656785941138, "grad_norm": 1.6152926464532833, "learning_rate": 9.514322996797297e-06, "loss": 0.6366, "step": 1975 }, { "epoch": 0.16735125979250476, "grad_norm": 1.6083176513273572, "learning_rate": 9.513733126776884e-06, "loss": 0.6616, "step": 1976 }, { "epoch": 0.16743595172559814, "grad_norm": 2.0830050970358034, "learning_rate": 9.513142917074151e-06, "loss": 0.648, "step": 1977 }, { "epoch": 0.16752064365869151, "grad_norm": 1.606845142826889, "learning_rate": 9.512552367733518e-06, "loss": 0.6962, "step": 1978 }, { "epoch": 0.1676053355917849, "grad_norm": 1.442682513207123, "learning_rate": 9.511961478799424e-06, "loss": 0.7115, "step": 1979 }, { "epoch": 0.16769002752487824, "grad_norm": 0.7281693472615983, "learning_rate": 9.511370250316337e-06, "loss": 0.8716, "step": 1980 }, { "epoch": 0.16777471945797162, "grad_norm": 1.4903018425227212, "learning_rate": 9.510778682328748e-06, "loss": 0.6514, "step": 1981 }, { "epoch": 0.167859411391065, "grad_norm": 3.114768333414248, "learning_rate": 9.510186774881179e-06, "loss": 0.6591, "step": 1982 }, { "epoch": 0.16794410332415838, "grad_norm": 2.0312664644626888, "learning_rate": 9.50959452801817e-06, "loss": 0.6235, "step": 1983 }, { "epoch": 0.16802879525725176, "grad_norm": 1.628838623383532, "learning_rate": 9.509001941784297e-06, "loss": 0.6777, "step": 1984 }, { "epoch": 0.1681134871903451, "grad_norm": 1.6356087961315071, "learning_rate": 9.508409016224149e-06, "loss": 0.6616, "step": 1985 }, { "epoch": 0.16819817912343848, "grad_norm": 1.8140017103442923, "learning_rate": 9.50781575138235e-06, "loss": 0.6412, "step": 1986 }, { "epoch": 0.16828287105653186, "grad_norm": 1.4802439384363986, "learning_rate": 9.507222147303545e-06, "loss": 0.6268, "step": 1987 }, { "epoch": 0.16836756298962524, "grad_norm": 1.2465685904760615, "learning_rate": 9.506628204032408e-06, "loss": 0.6334, "step": 1988 }, { "epoch": 0.16845225492271862, "grad_norm": 1.2963066240048062, "learning_rate": 9.506033921613636e-06, "loss": 0.6826, "step": 1989 }, { "epoch": 0.168536946855812, "grad_norm": 1.3678570346409944, "learning_rate": 9.505439300091947e-06, "loss": 0.6797, "step": 1990 }, { "epoch": 0.16862163878890535, "grad_norm": 1.6163372569144996, "learning_rate": 9.504844339512096e-06, "loss": 0.7373, "step": 1991 }, { "epoch": 0.16870633072199873, "grad_norm": 1.389062245990073, "learning_rate": 9.504249039918854e-06, "loss": 0.6666, "step": 1992 }, { "epoch": 0.1687910226550921, "grad_norm": 0.6373648301834626, "learning_rate": 9.503653401357021e-06, "loss": 0.8306, "step": 1993 }, { "epoch": 0.16887571458818548, "grad_norm": 1.822294545860096, "learning_rate": 9.50305742387142e-06, "loss": 0.6644, "step": 1994 }, { "epoch": 0.16896040652127886, "grad_norm": 1.2656864361344555, "learning_rate": 9.502461107506903e-06, "loss": 0.6131, "step": 1995 }, { "epoch": 0.1690450984543722, "grad_norm": 0.5630375349419748, "learning_rate": 9.501864452308348e-06, "loss": 0.8517, "step": 1996 }, { "epoch": 0.1691297903874656, "grad_norm": 1.5835502201419591, "learning_rate": 9.501267458320654e-06, "loss": 0.7218, "step": 1997 }, { "epoch": 0.16921448232055897, "grad_norm": 1.7085639323733555, "learning_rate": 9.500670125588747e-06, "loss": 0.6367, "step": 1998 }, { "epoch": 0.16929917425365235, "grad_norm": 1.51388680636628, "learning_rate": 9.500072454157581e-06, "loss": 0.6965, "step": 1999 }, { "epoch": 0.16938386618674572, "grad_norm": 1.6704780966975503, "learning_rate": 9.499474444072134e-06, "loss": 0.668, "step": 2000 }, { "epoch": 0.16946855811983907, "grad_norm": 1.7223174412065148, "learning_rate": 9.49887609537741e-06, "loss": 0.6439, "step": 2001 }, { "epoch": 0.16955325005293245, "grad_norm": 2.8023163456466444, "learning_rate": 9.498277408118437e-06, "loss": 0.6315, "step": 2002 }, { "epoch": 0.16963794198602583, "grad_norm": 1.2633885451472378, "learning_rate": 9.49767838234027e-06, "loss": 0.6749, "step": 2003 }, { "epoch": 0.1697226339191192, "grad_norm": 1.4386091442921787, "learning_rate": 9.497079018087987e-06, "loss": 0.6678, "step": 2004 }, { "epoch": 0.1698073258522126, "grad_norm": 1.8052107213376154, "learning_rate": 9.496479315406694e-06, "loss": 0.578, "step": 2005 }, { "epoch": 0.16989201778530594, "grad_norm": 1.8856805206636702, "learning_rate": 9.495879274341525e-06, "loss": 0.6693, "step": 2006 }, { "epoch": 0.16997670971839932, "grad_norm": 1.3549780117208097, "learning_rate": 9.495278894937633e-06, "loss": 0.6529, "step": 2007 }, { "epoch": 0.1700614016514927, "grad_norm": 1.6993503353982038, "learning_rate": 9.4946781772402e-06, "loss": 0.7015, "step": 2008 }, { "epoch": 0.17014609358458607, "grad_norm": 1.4894043665442165, "learning_rate": 9.494077121294434e-06, "loss": 0.6651, "step": 2009 }, { "epoch": 0.17023078551767945, "grad_norm": 2.066376210959679, "learning_rate": 9.493475727145567e-06, "loss": 0.696, "step": 2010 }, { "epoch": 0.1703154774507728, "grad_norm": 1.4573036396076136, "learning_rate": 9.492873994838858e-06, "loss": 0.6539, "step": 2011 }, { "epoch": 0.17040016938386618, "grad_norm": 1.545994049114867, "learning_rate": 9.492271924419591e-06, "loss": 0.5654, "step": 2012 }, { "epoch": 0.17048486131695956, "grad_norm": 1.3591373591565692, "learning_rate": 9.491669515933072e-06, "loss": 0.6564, "step": 2013 }, { "epoch": 0.17056955325005294, "grad_norm": 1.6174253657597157, "learning_rate": 9.491066769424638e-06, "loss": 0.6616, "step": 2014 }, { "epoch": 0.1706542451831463, "grad_norm": 1.489803014990869, "learning_rate": 9.490463684939648e-06, "loss": 0.7599, "step": 2015 }, { "epoch": 0.1707389371162397, "grad_norm": 2.0902195359842093, "learning_rate": 9.489860262523489e-06, "loss": 0.624, "step": 2016 }, { "epoch": 0.17082362904933304, "grad_norm": 1.781273062538819, "learning_rate": 9.48925650222157e-06, "loss": 0.6242, "step": 2017 }, { "epoch": 0.17090832098242642, "grad_norm": 2.2748073761557253, "learning_rate": 9.488652404079326e-06, "loss": 0.6447, "step": 2018 }, { "epoch": 0.1709930129155198, "grad_norm": 1.4326910585591763, "learning_rate": 9.48804796814222e-06, "loss": 0.683, "step": 2019 }, { "epoch": 0.17107770484861318, "grad_norm": 1.5805725036902833, "learning_rate": 9.487443194455742e-06, "loss": 0.5887, "step": 2020 }, { "epoch": 0.17116239678170656, "grad_norm": 1.2228146694836577, "learning_rate": 9.486838083065397e-06, "loss": 0.6639, "step": 2021 }, { "epoch": 0.1712470887147999, "grad_norm": 0.6502056033171486, "learning_rate": 9.48623263401673e-06, "loss": 0.798, "step": 2022 }, { "epoch": 0.17133178064789328, "grad_norm": 1.4519004660863422, "learning_rate": 9.485626847355303e-06, "loss": 0.6491, "step": 2023 }, { "epoch": 0.17141647258098666, "grad_norm": 1.7993673790724465, "learning_rate": 9.4850207231267e-06, "loss": 0.6828, "step": 2024 }, { "epoch": 0.17150116451408004, "grad_norm": 1.5373958095885734, "learning_rate": 9.48441426137654e-06, "loss": 0.6541, "step": 2025 }, { "epoch": 0.17158585644717342, "grad_norm": 1.5219674546219726, "learning_rate": 9.483807462150461e-06, "loss": 0.6855, "step": 2026 }, { "epoch": 0.17167054838026677, "grad_norm": 1.336360703403624, "learning_rate": 9.483200325494125e-06, "loss": 0.652, "step": 2027 }, { "epoch": 0.17175524031336015, "grad_norm": 1.5182612308212848, "learning_rate": 9.482592851453229e-06, "loss": 0.6453, "step": 2028 }, { "epoch": 0.17183993224645353, "grad_norm": 1.4579295379177022, "learning_rate": 9.481985040073482e-06, "loss": 0.683, "step": 2029 }, { "epoch": 0.1719246241795469, "grad_norm": 1.3376218627522463, "learning_rate": 9.481376891400628e-06, "loss": 0.6258, "step": 2030 }, { "epoch": 0.17200931611264028, "grad_norm": 1.2040187223912693, "learning_rate": 9.480768405480433e-06, "loss": 0.7081, "step": 2031 }, { "epoch": 0.17209400804573363, "grad_norm": 1.2240935959188732, "learning_rate": 9.480159582358688e-06, "loss": 0.6495, "step": 2032 }, { "epoch": 0.172178699978827, "grad_norm": 1.3048116777133403, "learning_rate": 9.479550422081212e-06, "loss": 0.6644, "step": 2033 }, { "epoch": 0.1722633919119204, "grad_norm": 1.9317897776427333, "learning_rate": 9.478940924693845e-06, "loss": 0.6363, "step": 2034 }, { "epoch": 0.17234808384501377, "grad_norm": 2.024370255314876, "learning_rate": 9.478331090242457e-06, "loss": 0.6864, "step": 2035 }, { "epoch": 0.17243277577810714, "grad_norm": 0.6265281612054586, "learning_rate": 9.477720918772942e-06, "loss": 0.8047, "step": 2036 }, { "epoch": 0.1725174677112005, "grad_norm": 1.2380333077129846, "learning_rate": 9.477110410331213e-06, "loss": 0.6836, "step": 2037 }, { "epoch": 0.17260215964429387, "grad_norm": 1.341554767889776, "learning_rate": 9.476499564963223e-06, "loss": 0.6105, "step": 2038 }, { "epoch": 0.17268685157738725, "grad_norm": 1.582628659412613, "learning_rate": 9.475888382714935e-06, "loss": 0.6209, "step": 2039 }, { "epoch": 0.17277154351048063, "grad_norm": 0.6321663860432187, "learning_rate": 9.475276863632343e-06, "loss": 0.8452, "step": 2040 }, { "epoch": 0.172856235443574, "grad_norm": 2.107965348351563, "learning_rate": 9.474665007761472e-06, "loss": 0.6679, "step": 2041 }, { "epoch": 0.17294092737666739, "grad_norm": 1.3135057973056659, "learning_rate": 9.474052815148364e-06, "loss": 0.6624, "step": 2042 }, { "epoch": 0.17302561930976074, "grad_norm": 1.7578394639493573, "learning_rate": 9.47344028583909e-06, "loss": 0.6549, "step": 2043 }, { "epoch": 0.17311031124285411, "grad_norm": 1.288772225147011, "learning_rate": 9.472827419879747e-06, "loss": 0.6554, "step": 2044 }, { "epoch": 0.1731950031759475, "grad_norm": 1.748248668442484, "learning_rate": 9.472214217316456e-06, "loss": 0.5874, "step": 2045 }, { "epoch": 0.17327969510904087, "grad_norm": 1.1827961812099699, "learning_rate": 9.471600678195363e-06, "loss": 0.6635, "step": 2046 }, { "epoch": 0.17336438704213425, "grad_norm": 1.2709375383369097, "learning_rate": 9.47098680256264e-06, "loss": 0.6345, "step": 2047 }, { "epoch": 0.1734490789752276, "grad_norm": 1.3559095791448232, "learning_rate": 9.470372590464487e-06, "loss": 0.669, "step": 2048 }, { "epoch": 0.17353377090832098, "grad_norm": 1.4745904286833997, "learning_rate": 9.469758041947124e-06, "loss": 0.6669, "step": 2049 }, { "epoch": 0.17361846284141436, "grad_norm": 2.1190127792474174, "learning_rate": 9.4691431570568e-06, "loss": 0.6764, "step": 2050 }, { "epoch": 0.17370315477450773, "grad_norm": 2.3633001680813965, "learning_rate": 9.468527935839788e-06, "loss": 0.6524, "step": 2051 }, { "epoch": 0.1737878467076011, "grad_norm": 1.1543823810119214, "learning_rate": 9.467912378342386e-06, "loss": 0.6402, "step": 2052 }, { "epoch": 0.17387253864069446, "grad_norm": 1.1113533323197602, "learning_rate": 9.46729648461092e-06, "loss": 0.6375, "step": 2053 }, { "epoch": 0.17395723057378784, "grad_norm": 1.3056352669924514, "learning_rate": 9.466680254691739e-06, "loss": 0.6651, "step": 2054 }, { "epoch": 0.17404192250688122, "grad_norm": 2.4958357774733604, "learning_rate": 9.466063688631214e-06, "loss": 0.6422, "step": 2055 }, { "epoch": 0.1741266144399746, "grad_norm": 1.70691705561984, "learning_rate": 9.46544678647575e-06, "loss": 0.7071, "step": 2056 }, { "epoch": 0.17421130637306798, "grad_norm": 1.5477365126669835, "learning_rate": 9.464829548271768e-06, "loss": 0.7352, "step": 2057 }, { "epoch": 0.17429599830616133, "grad_norm": 1.5340455952245606, "learning_rate": 9.46421197406572e-06, "loss": 0.6689, "step": 2058 }, { "epoch": 0.1743806902392547, "grad_norm": 1.164670377008166, "learning_rate": 9.463594063904083e-06, "loss": 0.6161, "step": 2059 }, { "epoch": 0.17446538217234808, "grad_norm": 1.8329530605330604, "learning_rate": 9.462975817833356e-06, "loss": 0.6384, "step": 2060 }, { "epoch": 0.17455007410544146, "grad_norm": 1.679023449011165, "learning_rate": 9.462357235900067e-06, "loss": 0.6301, "step": 2061 }, { "epoch": 0.17463476603853484, "grad_norm": 2.165456283119355, "learning_rate": 9.461738318150764e-06, "loss": 0.6688, "step": 2062 }, { "epoch": 0.1747194579716282, "grad_norm": 1.1887356015363915, "learning_rate": 9.46111906463203e-06, "loss": 0.6169, "step": 2063 }, { "epoch": 0.17480414990472157, "grad_norm": 1.3058712874989016, "learning_rate": 9.460499475390463e-06, "loss": 0.688, "step": 2064 }, { "epoch": 0.17488884183781495, "grad_norm": 1.368032904701382, "learning_rate": 9.459879550472689e-06, "loss": 0.6771, "step": 2065 }, { "epoch": 0.17497353377090832, "grad_norm": 1.3596064464032485, "learning_rate": 9.459259289925365e-06, "loss": 0.6319, "step": 2066 }, { "epoch": 0.1750582257040017, "grad_norm": 1.5647484670185627, "learning_rate": 9.458638693795165e-06, "loss": 0.6635, "step": 2067 }, { "epoch": 0.17514291763709508, "grad_norm": 1.7072915162262248, "learning_rate": 9.458017762128794e-06, "loss": 0.6763, "step": 2068 }, { "epoch": 0.17522760957018843, "grad_norm": 1.7480178568372118, "learning_rate": 9.45739649497298e-06, "loss": 0.6234, "step": 2069 }, { "epoch": 0.1753123015032818, "grad_norm": 0.6682805408460809, "learning_rate": 9.456774892374476e-06, "loss": 0.8635, "step": 2070 }, { "epoch": 0.1753969934363752, "grad_norm": 1.2019608002234217, "learning_rate": 9.456152954380063e-06, "loss": 0.7086, "step": 2071 }, { "epoch": 0.17548168536946857, "grad_norm": 1.3518669408158714, "learning_rate": 9.455530681036545e-06, "loss": 0.6946, "step": 2072 }, { "epoch": 0.17556637730256194, "grad_norm": 1.3316220864781592, "learning_rate": 9.454908072390748e-06, "loss": 0.7228, "step": 2073 }, { "epoch": 0.1756510692356553, "grad_norm": 1.3899454589399023, "learning_rate": 9.454285128489529e-06, "loss": 0.6559, "step": 2074 }, { "epoch": 0.17573576116874867, "grad_norm": 2.2346641302112262, "learning_rate": 9.453661849379768e-06, "loss": 0.6768, "step": 2075 }, { "epoch": 0.17582045310184205, "grad_norm": 0.6664517302753588, "learning_rate": 9.453038235108368e-06, "loss": 0.8321, "step": 2076 }, { "epoch": 0.17590514503493543, "grad_norm": 1.144516705121897, "learning_rate": 9.452414285722263e-06, "loss": 0.7129, "step": 2077 }, { "epoch": 0.1759898369680288, "grad_norm": 1.4903937609510263, "learning_rate": 9.451790001268404e-06, "loss": 0.6748, "step": 2078 }, { "epoch": 0.17607452890112216, "grad_norm": 1.5670441552934076, "learning_rate": 9.451165381793777e-06, "loss": 0.6534, "step": 2079 }, { "epoch": 0.17615922083421554, "grad_norm": 1.3326661220500686, "learning_rate": 9.450540427345383e-06, "loss": 0.7155, "step": 2080 }, { "epoch": 0.1762439127673089, "grad_norm": 1.3135480872286618, "learning_rate": 9.449915137970256e-06, "loss": 0.6803, "step": 2081 }, { "epoch": 0.1763286047004023, "grad_norm": 1.758097780186601, "learning_rate": 9.44928951371545e-06, "loss": 0.6511, "step": 2082 }, { "epoch": 0.17641329663349567, "grad_norm": 1.983280455342128, "learning_rate": 9.44866355462805e-06, "loss": 0.7287, "step": 2083 }, { "epoch": 0.17649798856658902, "grad_norm": 1.137421052407408, "learning_rate": 9.448037260755159e-06, "loss": 0.7112, "step": 2084 }, { "epoch": 0.1765826804996824, "grad_norm": 2.1543394036560928, "learning_rate": 9.447410632143912e-06, "loss": 0.7037, "step": 2085 }, { "epoch": 0.17666737243277578, "grad_norm": 1.7471081593236533, "learning_rate": 9.446783668841463e-06, "loss": 0.6534, "step": 2086 }, { "epoch": 0.17675206436586915, "grad_norm": 2.0112260545248666, "learning_rate": 9.446156370894996e-06, "loss": 0.6778, "step": 2087 }, { "epoch": 0.17683675629896253, "grad_norm": 2.2069429207291438, "learning_rate": 9.445528738351721e-06, "loss": 0.6244, "step": 2088 }, { "epoch": 0.17692144823205588, "grad_norm": 1.3092651983712522, "learning_rate": 9.444900771258867e-06, "loss": 0.6675, "step": 2089 }, { "epoch": 0.17700614016514926, "grad_norm": 1.3437689784007225, "learning_rate": 9.444272469663693e-06, "loss": 0.7046, "step": 2090 }, { "epoch": 0.17709083209824264, "grad_norm": 1.7002865271819154, "learning_rate": 9.443643833613482e-06, "loss": 0.6302, "step": 2091 }, { "epoch": 0.17717552403133602, "grad_norm": 1.3587977270972884, "learning_rate": 9.443014863155544e-06, "loss": 0.6633, "step": 2092 }, { "epoch": 0.1772602159644294, "grad_norm": 0.6265154987047614, "learning_rate": 9.442385558337209e-06, "loss": 0.8514, "step": 2093 }, { "epoch": 0.17734490789752277, "grad_norm": 3.1485562948792345, "learning_rate": 9.441755919205836e-06, "loss": 0.6466, "step": 2094 }, { "epoch": 0.17742959983061612, "grad_norm": 1.258791520692106, "learning_rate": 9.441125945808812e-06, "loss": 0.6621, "step": 2095 }, { "epoch": 0.1775142917637095, "grad_norm": 1.3006758380916865, "learning_rate": 9.440495638193544e-06, "loss": 0.6232, "step": 2096 }, { "epoch": 0.17759898369680288, "grad_norm": 1.862297583785665, "learning_rate": 9.439864996407464e-06, "loss": 0.6365, "step": 2097 }, { "epoch": 0.17768367562989626, "grad_norm": 1.581187231353403, "learning_rate": 9.439234020498034e-06, "loss": 0.6395, "step": 2098 }, { "epoch": 0.17776836756298964, "grad_norm": 1.9240164603697831, "learning_rate": 9.438602710512736e-06, "loss": 0.66, "step": 2099 }, { "epoch": 0.177853059496083, "grad_norm": 1.2279804047965432, "learning_rate": 9.43797106649908e-06, "loss": 0.6069, "step": 2100 }, { "epoch": 0.17793775142917637, "grad_norm": 1.4943553437630877, "learning_rate": 9.437339088504603e-06, "loss": 0.6885, "step": 2101 }, { "epoch": 0.17802244336226974, "grad_norm": 0.6330115227145857, "learning_rate": 9.436706776576862e-06, "loss": 0.8502, "step": 2102 }, { "epoch": 0.17810713529536312, "grad_norm": 1.3242256333625335, "learning_rate": 9.436074130763444e-06, "loss": 0.6363, "step": 2103 }, { "epoch": 0.1781918272284565, "grad_norm": 1.3569908799236032, "learning_rate": 9.435441151111955e-06, "loss": 0.6447, "step": 2104 }, { "epoch": 0.17827651916154985, "grad_norm": 1.5684827866924458, "learning_rate": 9.434807837670034e-06, "loss": 0.6108, "step": 2105 }, { "epoch": 0.17836121109464323, "grad_norm": 1.3545394351102664, "learning_rate": 9.434174190485339e-06, "loss": 0.6744, "step": 2106 }, { "epoch": 0.1784459030277366, "grad_norm": 1.2573624116399862, "learning_rate": 9.433540209605557e-06, "loss": 0.6602, "step": 2107 }, { "epoch": 0.17853059496082999, "grad_norm": 1.4530844898124644, "learning_rate": 9.432905895078397e-06, "loss": 0.7294, "step": 2108 }, { "epoch": 0.17861528689392336, "grad_norm": 1.3637186982109957, "learning_rate": 9.432271246951594e-06, "loss": 0.6526, "step": 2109 }, { "epoch": 0.17869997882701671, "grad_norm": 1.7076535313378594, "learning_rate": 9.431636265272913e-06, "loss": 0.692, "step": 2110 }, { "epoch": 0.1787846707601101, "grad_norm": 1.2983945534295203, "learning_rate": 9.431000950090133e-06, "loss": 0.6206, "step": 2111 }, { "epoch": 0.17886936269320347, "grad_norm": 1.6857799531824356, "learning_rate": 9.43036530145107e-06, "loss": 0.6155, "step": 2112 }, { "epoch": 0.17895405462629685, "grad_norm": 1.5469656454068104, "learning_rate": 9.429729319403558e-06, "loss": 0.7002, "step": 2113 }, { "epoch": 0.17903874655939023, "grad_norm": 1.4600961287386278, "learning_rate": 9.429093003995458e-06, "loss": 0.6936, "step": 2114 }, { "epoch": 0.17912343849248358, "grad_norm": 1.8016707614478205, "learning_rate": 9.428456355274655e-06, "loss": 0.6813, "step": 2115 }, { "epoch": 0.17920813042557696, "grad_norm": 1.6710429353192644, "learning_rate": 9.427819373289061e-06, "loss": 0.6656, "step": 2116 }, { "epoch": 0.17929282235867033, "grad_norm": 0.6934603984472103, "learning_rate": 9.427182058086615e-06, "loss": 0.8619, "step": 2117 }, { "epoch": 0.1793775142917637, "grad_norm": 1.6518300223303957, "learning_rate": 9.426544409715277e-06, "loss": 0.6608, "step": 2118 }, { "epoch": 0.1794622062248571, "grad_norm": 1.333372560762834, "learning_rate": 9.425906428223031e-06, "loss": 0.6321, "step": 2119 }, { "epoch": 0.17954689815795047, "grad_norm": 1.5286853170272099, "learning_rate": 9.42526811365789e-06, "loss": 0.6005, "step": 2120 }, { "epoch": 0.17963159009104382, "grad_norm": 1.4202209730412543, "learning_rate": 9.42462946606789e-06, "loss": 0.6234, "step": 2121 }, { "epoch": 0.1797162820241372, "grad_norm": 2.1161984138483745, "learning_rate": 9.423990485501094e-06, "loss": 0.5988, "step": 2122 }, { "epoch": 0.17980097395723058, "grad_norm": 1.3552632930212123, "learning_rate": 9.423351172005588e-06, "loss": 0.675, "step": 2123 }, { "epoch": 0.17988566589032395, "grad_norm": 1.650632107750278, "learning_rate": 9.422711525629482e-06, "loss": 0.6497, "step": 2124 }, { "epoch": 0.17997035782341733, "grad_norm": 1.178441325548198, "learning_rate": 9.422071546420915e-06, "loss": 0.6055, "step": 2125 }, { "epoch": 0.18005504975651068, "grad_norm": 1.9336580817017583, "learning_rate": 9.42143123442805e-06, "loss": 0.645, "step": 2126 }, { "epoch": 0.18013974168960406, "grad_norm": 0.5758300239541654, "learning_rate": 9.42079058969907e-06, "loss": 0.8743, "step": 2127 }, { "epoch": 0.18022443362269744, "grad_norm": 1.5499444982117367, "learning_rate": 9.420149612282188e-06, "loss": 0.592, "step": 2128 }, { "epoch": 0.18030912555579082, "grad_norm": 0.6676265674323126, "learning_rate": 9.419508302225644e-06, "loss": 0.8378, "step": 2129 }, { "epoch": 0.1803938174888842, "grad_norm": 1.2689008261178827, "learning_rate": 9.418866659577696e-06, "loss": 0.5905, "step": 2130 }, { "epoch": 0.18047850942197755, "grad_norm": 1.422081586542479, "learning_rate": 9.418224684386634e-06, "loss": 0.6341, "step": 2131 }, { "epoch": 0.18056320135507092, "grad_norm": 1.6816548325049092, "learning_rate": 9.41758237670077e-06, "loss": 0.6864, "step": 2132 }, { "epoch": 0.1806478932881643, "grad_norm": 0.7068665411746213, "learning_rate": 9.41693973656844e-06, "loss": 0.8358, "step": 2133 }, { "epoch": 0.18073258522125768, "grad_norm": 1.9705954010704456, "learning_rate": 9.416296764038003e-06, "loss": 0.6731, "step": 2134 }, { "epoch": 0.18081727715435106, "grad_norm": 1.2139061703616822, "learning_rate": 9.415653459157852e-06, "loss": 0.6665, "step": 2135 }, { "epoch": 0.1809019690874444, "grad_norm": 0.6275340246610678, "learning_rate": 9.415009821976395e-06, "loss": 0.8326, "step": 2136 }, { "epoch": 0.1809866610205378, "grad_norm": 1.636476823281904, "learning_rate": 9.414365852542072e-06, "loss": 0.6201, "step": 2137 }, { "epoch": 0.18107135295363117, "grad_norm": 1.6390934556974301, "learning_rate": 9.413721550903341e-06, "loss": 0.6223, "step": 2138 }, { "epoch": 0.18115604488672454, "grad_norm": 1.1786608655965987, "learning_rate": 9.413076917108692e-06, "loss": 0.6892, "step": 2139 }, { "epoch": 0.18124073681981792, "grad_norm": 1.2163500780846856, "learning_rate": 9.412431951206637e-06, "loss": 0.6853, "step": 2140 }, { "epoch": 0.18132542875291127, "grad_norm": 0.7343171250969153, "learning_rate": 9.411786653245712e-06, "loss": 0.8673, "step": 2141 }, { "epoch": 0.18141012068600465, "grad_norm": 1.4969645561178382, "learning_rate": 9.411141023274481e-06, "loss": 0.6703, "step": 2142 }, { "epoch": 0.18149481261909803, "grad_norm": 1.3127813290733945, "learning_rate": 9.410495061341528e-06, "loss": 0.6772, "step": 2143 }, { "epoch": 0.1815795045521914, "grad_norm": 1.467166286592843, "learning_rate": 9.409848767495472e-06, "loss": 0.6934, "step": 2144 }, { "epoch": 0.18166419648528478, "grad_norm": 1.4261529593240092, "learning_rate": 9.40920214178494e-06, "loss": 0.676, "step": 2145 }, { "epoch": 0.18174888841837816, "grad_norm": 1.318533738888733, "learning_rate": 9.408555184258601e-06, "loss": 0.6366, "step": 2146 }, { "epoch": 0.1818335803514715, "grad_norm": 1.5616168500155978, "learning_rate": 9.407907894965138e-06, "loss": 0.6938, "step": 2147 }, { "epoch": 0.1819182722845649, "grad_norm": 1.4908167087779718, "learning_rate": 9.407260273953267e-06, "loss": 0.646, "step": 2148 }, { "epoch": 0.18200296421765827, "grad_norm": 1.7165651066854999, "learning_rate": 9.406612321271721e-06, "loss": 0.7026, "step": 2149 }, { "epoch": 0.18208765615075165, "grad_norm": 1.2925811602436361, "learning_rate": 9.405964036969266e-06, "loss": 0.6603, "step": 2150 }, { "epoch": 0.18217234808384503, "grad_norm": 1.3140134721556809, "learning_rate": 9.405315421094685e-06, "loss": 0.6414, "step": 2151 }, { "epoch": 0.18225704001693838, "grad_norm": 1.1521324265315909, "learning_rate": 9.404666473696793e-06, "loss": 0.6338, "step": 2152 }, { "epoch": 0.18234173195003175, "grad_norm": 1.431375138733226, "learning_rate": 9.404017194824424e-06, "loss": 0.659, "step": 2153 }, { "epoch": 0.18242642388312513, "grad_norm": 1.4943488175933866, "learning_rate": 9.40336758452644e-06, "loss": 0.6458, "step": 2154 }, { "epoch": 0.1825111158162185, "grad_norm": 1.3536414523833242, "learning_rate": 9.402717642851729e-06, "loss": 0.6954, "step": 2155 }, { "epoch": 0.1825958077493119, "grad_norm": 7.489685429734671, "learning_rate": 9.402067369849202e-06, "loss": 0.6918, "step": 2156 }, { "epoch": 0.18268049968240524, "grad_norm": 0.6621582407417227, "learning_rate": 9.401416765567795e-06, "loss": 0.9018, "step": 2157 }, { "epoch": 0.18276519161549862, "grad_norm": 1.6940082357592954, "learning_rate": 9.40076583005647e-06, "loss": 0.6611, "step": 2158 }, { "epoch": 0.182849883548592, "grad_norm": 1.2703539507844375, "learning_rate": 9.400114563364214e-06, "loss": 0.6744, "step": 2159 }, { "epoch": 0.18293457548168537, "grad_norm": 1.4269375120738519, "learning_rate": 9.399462965540034e-06, "loss": 0.678, "step": 2160 }, { "epoch": 0.18301926741477875, "grad_norm": 1.2765013269179641, "learning_rate": 9.398811036632973e-06, "loss": 0.6847, "step": 2161 }, { "epoch": 0.1831039593478721, "grad_norm": 1.3050880189390341, "learning_rate": 9.398158776692088e-06, "loss": 0.5944, "step": 2162 }, { "epoch": 0.18318865128096548, "grad_norm": 1.4662281946404518, "learning_rate": 9.397506185766466e-06, "loss": 0.6183, "step": 2163 }, { "epoch": 0.18327334321405886, "grad_norm": 2.434436740573946, "learning_rate": 9.396853263905217e-06, "loss": 0.6555, "step": 2164 }, { "epoch": 0.18335803514715224, "grad_norm": 1.3224663143850335, "learning_rate": 9.396200011157477e-06, "loss": 0.6267, "step": 2165 }, { "epoch": 0.18344272708024562, "grad_norm": 1.703588057557222, "learning_rate": 9.395546427572407e-06, "loss": 0.6535, "step": 2166 }, { "epoch": 0.18352741901333897, "grad_norm": 1.5493577833714962, "learning_rate": 9.394892513199194e-06, "loss": 0.6884, "step": 2167 }, { "epoch": 0.18361211094643234, "grad_norm": 0.638456699592268, "learning_rate": 9.394238268087045e-06, "loss": 0.8209, "step": 2168 }, { "epoch": 0.18369680287952572, "grad_norm": 1.3105718242252937, "learning_rate": 9.393583692285201e-06, "loss": 0.6868, "step": 2169 }, { "epoch": 0.1837814948126191, "grad_norm": 1.2833475103082825, "learning_rate": 9.392928785842917e-06, "loss": 0.6322, "step": 2170 }, { "epoch": 0.18386618674571248, "grad_norm": 1.4444520149416524, "learning_rate": 9.392273548809481e-06, "loss": 0.6364, "step": 2171 }, { "epoch": 0.18395087867880586, "grad_norm": 1.4263359915026064, "learning_rate": 9.391617981234203e-06, "loss": 0.6616, "step": 2172 }, { "epoch": 0.1840355706118992, "grad_norm": 1.2997460378540766, "learning_rate": 9.390962083166414e-06, "loss": 0.6598, "step": 2173 }, { "epoch": 0.18412026254499259, "grad_norm": 1.1733011110302798, "learning_rate": 9.39030585465548e-06, "loss": 0.6304, "step": 2174 }, { "epoch": 0.18420495447808596, "grad_norm": 0.648129523282726, "learning_rate": 9.389649295750783e-06, "loss": 0.8385, "step": 2175 }, { "epoch": 0.18428964641117934, "grad_norm": 1.6069211107846155, "learning_rate": 9.38899240650173e-06, "loss": 0.5864, "step": 2176 }, { "epoch": 0.18437433834427272, "grad_norm": 1.4480249020760234, "learning_rate": 9.38833518695776e-06, "loss": 0.7, "step": 2177 }, { "epoch": 0.18445903027736607, "grad_norm": 1.5455213516353805, "learning_rate": 9.387677637168327e-06, "loss": 0.6699, "step": 2178 }, { "epoch": 0.18454372221045945, "grad_norm": 1.2403202901249881, "learning_rate": 9.38701975718292e-06, "loss": 0.6257, "step": 2179 }, { "epoch": 0.18462841414355283, "grad_norm": 2.194746560619351, "learning_rate": 9.386361547051046e-06, "loss": 0.6341, "step": 2180 }, { "epoch": 0.1847131060766462, "grad_norm": 1.1746930433705223, "learning_rate": 9.385703006822238e-06, "loss": 0.626, "step": 2181 }, { "epoch": 0.18479779800973958, "grad_norm": 1.5072478645791285, "learning_rate": 9.385044136546054e-06, "loss": 0.7427, "step": 2182 }, { "epoch": 0.18488248994283293, "grad_norm": 1.9032464495162265, "learning_rate": 9.38438493627208e-06, "loss": 0.6844, "step": 2183 }, { "epoch": 0.1849671818759263, "grad_norm": 1.7438346559558715, "learning_rate": 9.383725406049924e-06, "loss": 0.6478, "step": 2184 }, { "epoch": 0.1850518738090197, "grad_norm": 1.3327350250769616, "learning_rate": 9.383065545929217e-06, "loss": 0.7093, "step": 2185 }, { "epoch": 0.18513656574211307, "grad_norm": 1.1350810147797836, "learning_rate": 9.382405355959618e-06, "loss": 0.6735, "step": 2186 }, { "epoch": 0.18522125767520645, "grad_norm": 1.2316712223925865, "learning_rate": 9.381744836190811e-06, "loss": 0.6911, "step": 2187 }, { "epoch": 0.1853059496082998, "grad_norm": 1.3634258785067135, "learning_rate": 9.381083986672503e-06, "loss": 0.677, "step": 2188 }, { "epoch": 0.18539064154139318, "grad_norm": 1.5172418654917048, "learning_rate": 9.380422807454426e-06, "loss": 0.7236, "step": 2189 }, { "epoch": 0.18547533347448655, "grad_norm": 1.365666974723168, "learning_rate": 9.379761298586337e-06, "loss": 0.6754, "step": 2190 }, { "epoch": 0.18556002540757993, "grad_norm": 1.4274783660388122, "learning_rate": 9.379099460118018e-06, "loss": 0.663, "step": 2191 }, { "epoch": 0.1856447173406733, "grad_norm": 0.6685048074656024, "learning_rate": 9.378437292099276e-06, "loss": 0.8426, "step": 2192 }, { "epoch": 0.18572940927376666, "grad_norm": 2.048654115543888, "learning_rate": 9.377774794579943e-06, "loss": 0.635, "step": 2193 }, { "epoch": 0.18581410120686004, "grad_norm": 1.277611955662626, "learning_rate": 9.377111967609875e-06, "loss": 0.6354, "step": 2194 }, { "epoch": 0.18589879313995342, "grad_norm": 1.3224113020184727, "learning_rate": 9.376448811238955e-06, "loss": 0.6322, "step": 2195 }, { "epoch": 0.1859834850730468, "grad_norm": 1.2148196058002874, "learning_rate": 9.375785325517086e-06, "loss": 0.6143, "step": 2196 }, { "epoch": 0.18606817700614017, "grad_norm": 1.4677419207298177, "learning_rate": 9.375121510494201e-06, "loss": 0.6855, "step": 2197 }, { "epoch": 0.18615286893923355, "grad_norm": 1.3047767803532362, "learning_rate": 9.374457366220255e-06, "loss": 0.7599, "step": 2198 }, { "epoch": 0.1862375608723269, "grad_norm": 0.6095632567636369, "learning_rate": 9.373792892745228e-06, "loss": 0.8536, "step": 2199 }, { "epoch": 0.18632225280542028, "grad_norm": 1.8690905384665115, "learning_rate": 9.373128090119127e-06, "loss": 0.6523, "step": 2200 }, { "epoch": 0.18640694473851366, "grad_norm": 1.2719921393375864, "learning_rate": 9.372462958391979e-06, "loss": 0.5839, "step": 2201 }, { "epoch": 0.18649163667160704, "grad_norm": 1.468197670635953, "learning_rate": 9.371797497613841e-06, "loss": 0.6648, "step": 2202 }, { "epoch": 0.18657632860470041, "grad_norm": 1.4522922342168565, "learning_rate": 9.37113170783479e-06, "loss": 0.665, "step": 2203 }, { "epoch": 0.18666102053779376, "grad_norm": 1.2637430948778836, "learning_rate": 9.370465589104932e-06, "loss": 0.7019, "step": 2204 }, { "epoch": 0.18674571247088714, "grad_norm": 3.055927504121357, "learning_rate": 9.369799141474396e-06, "loss": 0.6563, "step": 2205 }, { "epoch": 0.18683040440398052, "grad_norm": 0.6370142159238683, "learning_rate": 9.369132364993337e-06, "loss": 0.8478, "step": 2206 }, { "epoch": 0.1869150963370739, "grad_norm": 1.2036915680205773, "learning_rate": 9.368465259711929e-06, "loss": 0.6681, "step": 2207 }, { "epoch": 0.18699978827016728, "grad_norm": 1.6496741043449752, "learning_rate": 9.36779782568038e-06, "loss": 0.6481, "step": 2208 }, { "epoch": 0.18708448020326063, "grad_norm": 1.3631190156692465, "learning_rate": 9.367130062948916e-06, "loss": 0.7109, "step": 2209 }, { "epoch": 0.187169172136354, "grad_norm": 1.7265757673386462, "learning_rate": 9.36646197156779e-06, "loss": 0.621, "step": 2210 }, { "epoch": 0.18725386406944738, "grad_norm": 1.2446190953721523, "learning_rate": 9.36579355158728e-06, "loss": 0.6226, "step": 2211 }, { "epoch": 0.18733855600254076, "grad_norm": 1.325002012601711, "learning_rate": 9.365124803057684e-06, "loss": 0.6134, "step": 2212 }, { "epoch": 0.18742324793563414, "grad_norm": 1.5015846707437621, "learning_rate": 9.364455726029334e-06, "loss": 0.6317, "step": 2213 }, { "epoch": 0.1875079398687275, "grad_norm": 1.3571361284836005, "learning_rate": 9.363786320552578e-06, "loss": 0.6437, "step": 2214 }, { "epoch": 0.18759263180182087, "grad_norm": 1.4724434560935382, "learning_rate": 9.363116586677797e-06, "loss": 0.6488, "step": 2215 }, { "epoch": 0.18767732373491425, "grad_norm": 1.1940722118662723, "learning_rate": 9.362446524455389e-06, "loss": 0.7285, "step": 2216 }, { "epoch": 0.18776201566800763, "grad_norm": 1.2053278933008431, "learning_rate": 9.361776133935779e-06, "loss": 0.6385, "step": 2217 }, { "epoch": 0.187846707601101, "grad_norm": 1.3729708233762734, "learning_rate": 9.361105415169416e-06, "loss": 0.6862, "step": 2218 }, { "epoch": 0.18793139953419435, "grad_norm": 1.29434119088641, "learning_rate": 9.360434368206778e-06, "loss": 0.6652, "step": 2219 }, { "epoch": 0.18801609146728773, "grad_norm": 3.085761507191517, "learning_rate": 9.359762993098367e-06, "loss": 0.564, "step": 2220 }, { "epoch": 0.1881007834003811, "grad_norm": 1.3398586858871446, "learning_rate": 9.359091289894702e-06, "loss": 0.6581, "step": 2221 }, { "epoch": 0.1881854753334745, "grad_norm": 1.4246490167526384, "learning_rate": 9.358419258646336e-06, "loss": 0.6514, "step": 2222 }, { "epoch": 0.18827016726656787, "grad_norm": 1.6235053288437462, "learning_rate": 9.357746899403843e-06, "loss": 0.6678, "step": 2223 }, { "epoch": 0.18835485919966125, "grad_norm": 1.2713318850144248, "learning_rate": 9.35707421221782e-06, "loss": 0.6661, "step": 2224 }, { "epoch": 0.1884395511327546, "grad_norm": 1.2526507570159158, "learning_rate": 9.356401197138889e-06, "loss": 0.6743, "step": 2225 }, { "epoch": 0.18852424306584797, "grad_norm": 0.6478356588286968, "learning_rate": 9.3557278542177e-06, "loss": 0.8793, "step": 2226 }, { "epoch": 0.18860893499894135, "grad_norm": 1.9751527189569262, "learning_rate": 9.355054183504926e-06, "loss": 0.7018, "step": 2227 }, { "epoch": 0.18869362693203473, "grad_norm": 1.2841218775802037, "learning_rate": 9.354380185051264e-06, "loss": 0.6038, "step": 2228 }, { "epoch": 0.1887783188651281, "grad_norm": 2.218971692601514, "learning_rate": 9.353705858907436e-06, "loss": 0.6235, "step": 2229 }, { "epoch": 0.18886301079822146, "grad_norm": 1.8883035035788063, "learning_rate": 9.353031205124188e-06, "loss": 0.6643, "step": 2230 }, { "epoch": 0.18894770273131484, "grad_norm": 1.2065861101390085, "learning_rate": 9.35235622375229e-06, "loss": 0.6555, "step": 2231 }, { "epoch": 0.18903239466440822, "grad_norm": 1.3246104467507545, "learning_rate": 9.35168091484254e-06, "loss": 0.6213, "step": 2232 }, { "epoch": 0.1891170865975016, "grad_norm": 1.6283640515689382, "learning_rate": 9.351005278445757e-06, "loss": 0.6941, "step": 2233 }, { "epoch": 0.18920177853059497, "grad_norm": 1.5328486850968495, "learning_rate": 9.350329314612789e-06, "loss": 0.6631, "step": 2234 }, { "epoch": 0.18928647046368832, "grad_norm": 0.6256198933501753, "learning_rate": 9.349653023394502e-06, "loss": 0.8919, "step": 2235 }, { "epoch": 0.1893711623967817, "grad_norm": 1.9130867625784105, "learning_rate": 9.348976404841793e-06, "loss": 0.7277, "step": 2236 }, { "epoch": 0.18945585432987508, "grad_norm": 1.3651688677425917, "learning_rate": 9.348299459005583e-06, "loss": 0.6669, "step": 2237 }, { "epoch": 0.18954054626296846, "grad_norm": 1.502525300418709, "learning_rate": 9.34762218593681e-06, "loss": 0.6873, "step": 2238 }, { "epoch": 0.18962523819606183, "grad_norm": 1.2501014195508124, "learning_rate": 9.346944585686448e-06, "loss": 0.6555, "step": 2239 }, { "epoch": 0.18970993012915519, "grad_norm": 1.193482563565099, "learning_rate": 9.346266658305488e-06, "loss": 0.634, "step": 2240 }, { "epoch": 0.18979462206224856, "grad_norm": 1.2016620641723357, "learning_rate": 9.345588403844944e-06, "loss": 0.6213, "step": 2241 }, { "epoch": 0.18987931399534194, "grad_norm": 1.5681716161213148, "learning_rate": 9.344909822355866e-06, "loss": 0.6815, "step": 2242 }, { "epoch": 0.18996400592843532, "grad_norm": 1.2135298174030258, "learning_rate": 9.344230913889313e-06, "loss": 0.6643, "step": 2243 }, { "epoch": 0.1900486978615287, "grad_norm": 2.1371205772843553, "learning_rate": 9.343551678496383e-06, "loss": 0.6412, "step": 2244 }, { "epoch": 0.19013338979462205, "grad_norm": 1.253737876515686, "learning_rate": 9.342872116228187e-06, "loss": 0.6381, "step": 2245 }, { "epoch": 0.19021808172771543, "grad_norm": 1.5903893749353646, "learning_rate": 9.342192227135869e-06, "loss": 0.6601, "step": 2246 }, { "epoch": 0.1903027736608088, "grad_norm": 1.2107329063868726, "learning_rate": 9.341512011270593e-06, "loss": 0.6807, "step": 2247 }, { "epoch": 0.19038746559390218, "grad_norm": 1.446262084782698, "learning_rate": 9.340831468683547e-06, "loss": 0.6759, "step": 2248 }, { "epoch": 0.19047215752699556, "grad_norm": 1.4715902517357728, "learning_rate": 9.340150599425947e-06, "loss": 0.6832, "step": 2249 }, { "epoch": 0.19055684946008894, "grad_norm": 1.4984397818650126, "learning_rate": 9.339469403549033e-06, "loss": 0.6817, "step": 2250 }, { "epoch": 0.1906415413931823, "grad_norm": 1.4037192276207535, "learning_rate": 9.33878788110407e-06, "loss": 0.6461, "step": 2251 }, { "epoch": 0.19072623332627567, "grad_norm": 1.1338380147354115, "learning_rate": 9.338106032142342e-06, "loss": 0.6539, "step": 2252 }, { "epoch": 0.19081092525936905, "grad_norm": 1.5849277933443633, "learning_rate": 9.337423856715163e-06, "loss": 0.7435, "step": 2253 }, { "epoch": 0.19089561719246242, "grad_norm": 1.17374636310712, "learning_rate": 9.336741354873871e-06, "loss": 0.6916, "step": 2254 }, { "epoch": 0.1909803091255558, "grad_norm": 2.059556668389202, "learning_rate": 9.336058526669829e-06, "loss": 0.6664, "step": 2255 }, { "epoch": 0.19106500105864915, "grad_norm": 1.0724316526776685, "learning_rate": 9.335375372154422e-06, "loss": 0.6255, "step": 2256 }, { "epoch": 0.19114969299174253, "grad_norm": 1.4850919923540518, "learning_rate": 9.334691891379062e-06, "loss": 0.6673, "step": 2257 }, { "epoch": 0.1912343849248359, "grad_norm": 1.1088430755264107, "learning_rate": 9.334008084395182e-06, "loss": 0.6638, "step": 2258 }, { "epoch": 0.1913190768579293, "grad_norm": 1.4410387164901133, "learning_rate": 9.333323951254246e-06, "loss": 0.653, "step": 2259 }, { "epoch": 0.19140376879102267, "grad_norm": 1.935706985403355, "learning_rate": 9.332639492007735e-06, "loss": 0.5909, "step": 2260 }, { "epoch": 0.19148846072411602, "grad_norm": 1.2355858770818129, "learning_rate": 9.331954706707163e-06, "loss": 0.65, "step": 2261 }, { "epoch": 0.1915731526572094, "grad_norm": 1.1946050221099618, "learning_rate": 9.331269595404058e-06, "loss": 0.6659, "step": 2262 }, { "epoch": 0.19165784459030277, "grad_norm": 1.2685189946731856, "learning_rate": 9.33058415814998e-06, "loss": 0.6518, "step": 2263 }, { "epoch": 0.19174253652339615, "grad_norm": 1.5118318002523743, "learning_rate": 9.329898394996513e-06, "loss": 0.6323, "step": 2264 }, { "epoch": 0.19182722845648953, "grad_norm": 1.2488829766334824, "learning_rate": 9.329212305995265e-06, "loss": 0.6241, "step": 2265 }, { "epoch": 0.19191192038958288, "grad_norm": 1.4918930286605097, "learning_rate": 9.328525891197866e-06, "loss": 0.6041, "step": 2266 }, { "epoch": 0.19199661232267626, "grad_norm": 1.254940353249727, "learning_rate": 9.327839150655973e-06, "loss": 0.6179, "step": 2267 }, { "epoch": 0.19208130425576964, "grad_norm": 2.0782962195148493, "learning_rate": 9.327152084421266e-06, "loss": 0.6495, "step": 2268 }, { "epoch": 0.19216599618886301, "grad_norm": 2.1112961286975196, "learning_rate": 9.326464692545453e-06, "loss": 0.6861, "step": 2269 }, { "epoch": 0.1922506881219564, "grad_norm": 1.3292945958174933, "learning_rate": 9.325776975080264e-06, "loss": 0.6664, "step": 2270 }, { "epoch": 0.19233538005504974, "grad_norm": 1.7906258211095427, "learning_rate": 9.32508893207745e-06, "loss": 0.6368, "step": 2271 }, { "epoch": 0.19242007198814312, "grad_norm": 1.35775003619018, "learning_rate": 9.32440056358879e-06, "loss": 0.695, "step": 2272 }, { "epoch": 0.1925047639212365, "grad_norm": 1.3739919891449164, "learning_rate": 9.323711869666088e-06, "loss": 0.7059, "step": 2273 }, { "epoch": 0.19258945585432988, "grad_norm": 1.3508052196838325, "learning_rate": 9.323022850361174e-06, "loss": 0.6757, "step": 2274 }, { "epoch": 0.19267414778742326, "grad_norm": 1.3411906289906854, "learning_rate": 9.3223335057259e-06, "loss": 0.6723, "step": 2275 }, { "epoch": 0.19275883972051663, "grad_norm": 1.2342016532654192, "learning_rate": 9.321643835812139e-06, "loss": 0.6506, "step": 2276 }, { "epoch": 0.19284353165360998, "grad_norm": 2.6855697072722036, "learning_rate": 9.320953840671798e-06, "loss": 0.6353, "step": 2277 }, { "epoch": 0.19292822358670336, "grad_norm": 1.590298167659805, "learning_rate": 9.3202635203568e-06, "loss": 0.6481, "step": 2278 }, { "epoch": 0.19301291551979674, "grad_norm": 1.4087040017707813, "learning_rate": 9.319572874919094e-06, "loss": 0.6763, "step": 2279 }, { "epoch": 0.19309760745289012, "grad_norm": 0.6457233219343632, "learning_rate": 9.318881904410655e-06, "loss": 0.8711, "step": 2280 }, { "epoch": 0.1931822993859835, "grad_norm": 1.2364564351774974, "learning_rate": 9.318190608883486e-06, "loss": 0.6178, "step": 2281 }, { "epoch": 0.19326699131907685, "grad_norm": 1.3811084839271435, "learning_rate": 9.317498988389605e-06, "loss": 0.6553, "step": 2282 }, { "epoch": 0.19335168325217023, "grad_norm": 1.3253711889472986, "learning_rate": 9.316807042981063e-06, "loss": 0.6615, "step": 2283 }, { "epoch": 0.1934363751852636, "grad_norm": 0.6220440431865348, "learning_rate": 9.316114772709935e-06, "loss": 0.8292, "step": 2284 }, { "epoch": 0.19352106711835698, "grad_norm": 1.2449495611066816, "learning_rate": 9.315422177628312e-06, "loss": 0.6751, "step": 2285 }, { "epoch": 0.19360575905145036, "grad_norm": 1.4897599131087536, "learning_rate": 9.314729257788321e-06, "loss": 0.6612, "step": 2286 }, { "epoch": 0.1936904509845437, "grad_norm": 1.6854655599876331, "learning_rate": 9.314036013242108e-06, "loss": 0.7042, "step": 2287 }, { "epoch": 0.1937751429176371, "grad_norm": 1.4453687386469214, "learning_rate": 9.313342444041838e-06, "loss": 0.6514, "step": 2288 }, { "epoch": 0.19385983485073047, "grad_norm": 1.3170237323668887, "learning_rate": 9.312648550239708e-06, "loss": 0.665, "step": 2289 }, { "epoch": 0.19394452678382385, "grad_norm": 2.0837105771763316, "learning_rate": 9.31195433188794e-06, "loss": 0.6409, "step": 2290 }, { "epoch": 0.19402921871691722, "grad_norm": 0.6100589405264235, "learning_rate": 9.311259789038776e-06, "loss": 0.8422, "step": 2291 }, { "epoch": 0.19411391065001057, "grad_norm": 1.6342817786581185, "learning_rate": 9.310564921744482e-06, "loss": 0.7057, "step": 2292 }, { "epoch": 0.19419860258310395, "grad_norm": 1.9943021196319608, "learning_rate": 9.309869730057352e-06, "loss": 0.6733, "step": 2293 }, { "epoch": 0.19428329451619733, "grad_norm": 1.3616041002036985, "learning_rate": 9.309174214029706e-06, "loss": 0.7328, "step": 2294 }, { "epoch": 0.1943679864492907, "grad_norm": 1.2763274078939715, "learning_rate": 9.30847837371388e-06, "loss": 0.7002, "step": 2295 }, { "epoch": 0.1944526783823841, "grad_norm": 1.2192246277891683, "learning_rate": 9.307782209162242e-06, "loss": 0.6616, "step": 2296 }, { "epoch": 0.19453737031547744, "grad_norm": 1.7528111287896162, "learning_rate": 9.307085720427182e-06, "loss": 0.6228, "step": 2297 }, { "epoch": 0.19462206224857082, "grad_norm": 0.6942859800733526, "learning_rate": 9.306388907561115e-06, "loss": 0.8334, "step": 2298 }, { "epoch": 0.1947067541816642, "grad_norm": 1.4175005939727425, "learning_rate": 9.30569177061648e-06, "loss": 0.7144, "step": 2299 }, { "epoch": 0.19479144611475757, "grad_norm": 2.435055857701645, "learning_rate": 9.304994309645737e-06, "loss": 0.7006, "step": 2300 }, { "epoch": 0.19487613804785095, "grad_norm": 1.2837015052923146, "learning_rate": 9.304296524701377e-06, "loss": 0.605, "step": 2301 }, { "epoch": 0.19496082998094433, "grad_norm": 1.309270306254869, "learning_rate": 9.30359841583591e-06, "loss": 0.5936, "step": 2302 }, { "epoch": 0.19504552191403768, "grad_norm": 1.848256187462849, "learning_rate": 9.302899983101876e-06, "loss": 0.6774, "step": 2303 }, { "epoch": 0.19513021384713106, "grad_norm": 1.0671813537766492, "learning_rate": 9.302201226551833e-06, "loss": 0.6111, "step": 2304 }, { "epoch": 0.19521490578022443, "grad_norm": 1.1803237326585414, "learning_rate": 9.301502146238366e-06, "loss": 0.6373, "step": 2305 }, { "epoch": 0.1952995977133178, "grad_norm": 1.2128078816804422, "learning_rate": 9.300802742214085e-06, "loss": 0.6336, "step": 2306 }, { "epoch": 0.1953842896464112, "grad_norm": 1.3091281782656954, "learning_rate": 9.300103014531624e-06, "loss": 0.6768, "step": 2307 }, { "epoch": 0.19546898157950454, "grad_norm": 1.328095608021812, "learning_rate": 9.299402963243642e-06, "loss": 0.6793, "step": 2308 }, { "epoch": 0.19555367351259792, "grad_norm": 1.507421184645532, "learning_rate": 9.298702588402818e-06, "loss": 0.6694, "step": 2309 }, { "epoch": 0.1956383654456913, "grad_norm": 1.362101315040624, "learning_rate": 9.298001890061863e-06, "loss": 0.6415, "step": 2310 }, { "epoch": 0.19572305737878468, "grad_norm": 1.1991115805315984, "learning_rate": 9.297300868273506e-06, "loss": 0.6652, "step": 2311 }, { "epoch": 0.19580774931187805, "grad_norm": 1.521622390162399, "learning_rate": 9.296599523090506e-06, "loss": 0.701, "step": 2312 }, { "epoch": 0.1958924412449714, "grad_norm": 1.3426050283746471, "learning_rate": 9.295897854565637e-06, "loss": 0.6353, "step": 2313 }, { "epoch": 0.19597713317806478, "grad_norm": 1.5231725255271202, "learning_rate": 9.295195862751709e-06, "loss": 0.6801, "step": 2314 }, { "epoch": 0.19606182511115816, "grad_norm": 0.6000781232499428, "learning_rate": 9.294493547701546e-06, "loss": 0.856, "step": 2315 }, { "epoch": 0.19614651704425154, "grad_norm": 1.4447700652081932, "learning_rate": 9.293790909468005e-06, "loss": 0.6508, "step": 2316 }, { "epoch": 0.19623120897734492, "grad_norm": 1.3457637211433293, "learning_rate": 9.29308794810396e-06, "loss": 0.6698, "step": 2317 }, { "epoch": 0.19631590091043827, "grad_norm": 4.613495115729935, "learning_rate": 9.292384663662316e-06, "loss": 0.5946, "step": 2318 }, { "epoch": 0.19640059284353165, "grad_norm": 1.5325882441994216, "learning_rate": 9.291681056195995e-06, "loss": 0.68, "step": 2319 }, { "epoch": 0.19648528477662502, "grad_norm": 1.3055993643126105, "learning_rate": 9.29097712575795e-06, "loss": 0.6969, "step": 2320 }, { "epoch": 0.1965699767097184, "grad_norm": 1.5010021825351605, "learning_rate": 9.290272872401154e-06, "loss": 0.6499, "step": 2321 }, { "epoch": 0.19665466864281178, "grad_norm": 1.3396254689867146, "learning_rate": 9.289568296178606e-06, "loss": 0.6666, "step": 2322 }, { "epoch": 0.19673936057590516, "grad_norm": 1.3560979669049589, "learning_rate": 9.28886339714333e-06, "loss": 0.635, "step": 2323 }, { "epoch": 0.1968240525089985, "grad_norm": 1.6018585679839492, "learning_rate": 9.288158175348372e-06, "loss": 0.6297, "step": 2324 }, { "epoch": 0.1969087444420919, "grad_norm": 1.5229053018292096, "learning_rate": 9.287452630846804e-06, "loss": 0.6177, "step": 2325 }, { "epoch": 0.19699343637518527, "grad_norm": 1.382887274846489, "learning_rate": 9.286746763691723e-06, "loss": 0.6982, "step": 2326 }, { "epoch": 0.19707812830827864, "grad_norm": 1.717411111718407, "learning_rate": 9.286040573936249e-06, "loss": 0.6287, "step": 2327 }, { "epoch": 0.19716282024137202, "grad_norm": 1.3951529006361107, "learning_rate": 9.285334061633526e-06, "loss": 0.5944, "step": 2328 }, { "epoch": 0.19724751217446537, "grad_norm": 1.6891354025656498, "learning_rate": 9.284627226836722e-06, "loss": 0.7299, "step": 2329 }, { "epoch": 0.19733220410755875, "grad_norm": 1.154506697075849, "learning_rate": 9.283920069599033e-06, "loss": 0.6631, "step": 2330 }, { "epoch": 0.19741689604065213, "grad_norm": 1.2947403002398885, "learning_rate": 9.283212589973673e-06, "loss": 0.7218, "step": 2331 }, { "epoch": 0.1975015879737455, "grad_norm": 1.3664213822943925, "learning_rate": 9.282504788013884e-06, "loss": 0.6868, "step": 2332 }, { "epoch": 0.19758627990683889, "grad_norm": 1.606540969798607, "learning_rate": 9.281796663772935e-06, "loss": 0.6765, "step": 2333 }, { "epoch": 0.19767097183993224, "grad_norm": 1.5015884022542136, "learning_rate": 9.281088217304113e-06, "loss": 0.6621, "step": 2334 }, { "epoch": 0.19775566377302561, "grad_norm": 0.7473226716470734, "learning_rate": 9.280379448660732e-06, "loss": 0.8078, "step": 2335 }, { "epoch": 0.197840355706119, "grad_norm": 1.189944768967121, "learning_rate": 9.279670357896133e-06, "loss": 0.668, "step": 2336 }, { "epoch": 0.19792504763921237, "grad_norm": 1.3794739261339382, "learning_rate": 9.278960945063678e-06, "loss": 0.6866, "step": 2337 }, { "epoch": 0.19800973957230575, "grad_norm": 1.2055923466112128, "learning_rate": 9.278251210216754e-06, "loss": 0.6678, "step": 2338 }, { "epoch": 0.1980944315053991, "grad_norm": 1.3326475069736416, "learning_rate": 9.27754115340877e-06, "loss": 0.6334, "step": 2339 }, { "epoch": 0.19817912343849248, "grad_norm": 2.0662188027773807, "learning_rate": 9.276830774693163e-06, "loss": 0.5837, "step": 2340 }, { "epoch": 0.19826381537158586, "grad_norm": 1.6600594438656306, "learning_rate": 9.276120074123396e-06, "loss": 0.6728, "step": 2341 }, { "epoch": 0.19834850730467923, "grad_norm": 3.991182143993931, "learning_rate": 9.27540905175295e-06, "loss": 0.6686, "step": 2342 }, { "epoch": 0.1984331992377726, "grad_norm": 1.2529453792071632, "learning_rate": 9.274697707635332e-06, "loss": 0.6196, "step": 2343 }, { "epoch": 0.19851789117086596, "grad_norm": 3.5677027381187587, "learning_rate": 9.273986041824078e-06, "loss": 0.6708, "step": 2344 }, { "epoch": 0.19860258310395934, "grad_norm": 1.2499296493065322, "learning_rate": 9.273274054372741e-06, "loss": 0.6352, "step": 2345 }, { "epoch": 0.19868727503705272, "grad_norm": 1.7859869877636823, "learning_rate": 9.272561745334902e-06, "loss": 0.6787, "step": 2346 }, { "epoch": 0.1987719669701461, "grad_norm": 1.5541270823007869, "learning_rate": 9.27184911476417e-06, "loss": 0.6413, "step": 2347 }, { "epoch": 0.19885665890323947, "grad_norm": 1.3099221951157225, "learning_rate": 9.27113616271417e-06, "loss": 0.6366, "step": 2348 }, { "epoch": 0.19894135083633285, "grad_norm": 1.3243604098942283, "learning_rate": 9.270422889238558e-06, "loss": 0.6918, "step": 2349 }, { "epoch": 0.1990260427694262, "grad_norm": 1.3099924519628676, "learning_rate": 9.269709294391009e-06, "loss": 0.6781, "step": 2350 }, { "epoch": 0.19911073470251958, "grad_norm": 1.8232214209255582, "learning_rate": 9.268995378225229e-06, "loss": 0.6318, "step": 2351 }, { "epoch": 0.19919542663561296, "grad_norm": 1.5716282160198352, "learning_rate": 9.268281140794938e-06, "loss": 0.6948, "step": 2352 }, { "epoch": 0.19928011856870634, "grad_norm": 1.2537943136258127, "learning_rate": 9.267566582153892e-06, "loss": 0.6837, "step": 2353 }, { "epoch": 0.19936481050179972, "grad_norm": 1.8673224362718888, "learning_rate": 9.266851702355863e-06, "loss": 0.6338, "step": 2354 }, { "epoch": 0.19944950243489307, "grad_norm": 1.5080440080879618, "learning_rate": 9.26613650145465e-06, "loss": 0.6317, "step": 2355 }, { "epoch": 0.19953419436798644, "grad_norm": 1.7652938742722386, "learning_rate": 9.265420979504073e-06, "loss": 0.6849, "step": 2356 }, { "epoch": 0.19961888630107982, "grad_norm": 1.4264147800519877, "learning_rate": 9.26470513655798e-06, "loss": 0.6522, "step": 2357 }, { "epoch": 0.1997035782341732, "grad_norm": 1.5679068504814488, "learning_rate": 9.263988972670246e-06, "loss": 0.6441, "step": 2358 }, { "epoch": 0.19978827016726658, "grad_norm": 5.7271659187378745, "learning_rate": 9.26327248789476e-06, "loss": 0.6889, "step": 2359 }, { "epoch": 0.19987296210035993, "grad_norm": 1.5451056133868333, "learning_rate": 9.262555682285446e-06, "loss": 0.6771, "step": 2360 }, { "epoch": 0.1999576540334533, "grad_norm": 1.261959470277761, "learning_rate": 9.261838555896245e-06, "loss": 0.6512, "step": 2361 }, { "epoch": 0.2000423459665467, "grad_norm": 0.6556071582070753, "learning_rate": 9.261121108781125e-06, "loss": 0.8114, "step": 2362 }, { "epoch": 0.20012703789964006, "grad_norm": 1.2564387255859948, "learning_rate": 9.260403340994079e-06, "loss": 0.6784, "step": 2363 }, { "epoch": 0.20021172983273344, "grad_norm": 1.3457937032719456, "learning_rate": 9.25968525258912e-06, "loss": 0.6793, "step": 2364 }, { "epoch": 0.2002964217658268, "grad_norm": 1.4417127616251633, "learning_rate": 9.258966843620291e-06, "loss": 0.6661, "step": 2365 }, { "epoch": 0.20038111369892017, "grad_norm": 1.5200440268910482, "learning_rate": 9.258248114141654e-06, "loss": 0.7118, "step": 2366 }, { "epoch": 0.20046580563201355, "grad_norm": 2.2033016184052956, "learning_rate": 9.257529064207298e-06, "loss": 0.6788, "step": 2367 }, { "epoch": 0.20055049756510693, "grad_norm": 1.6337528306774374, "learning_rate": 9.256809693871336e-06, "loss": 0.6506, "step": 2368 }, { "epoch": 0.2006351894982003, "grad_norm": 0.6332827732221455, "learning_rate": 9.256090003187902e-06, "loss": 0.8475, "step": 2369 }, { "epoch": 0.20071988143129366, "grad_norm": 1.3948083616301281, "learning_rate": 9.255369992211158e-06, "loss": 0.6849, "step": 2370 }, { "epoch": 0.20080457336438703, "grad_norm": 1.3504374342003596, "learning_rate": 9.254649660995291e-06, "loss": 0.6763, "step": 2371 }, { "epoch": 0.2008892652974804, "grad_norm": 2.553641346886787, "learning_rate": 9.253929009594507e-06, "loss": 0.6583, "step": 2372 }, { "epoch": 0.2009739572305738, "grad_norm": 0.5985307610385103, "learning_rate": 9.253208038063037e-06, "loss": 0.8561, "step": 2373 }, { "epoch": 0.20105864916366717, "grad_norm": 1.4540748551156848, "learning_rate": 9.252486746455144e-06, "loss": 0.6463, "step": 2374 }, { "epoch": 0.20114334109676055, "grad_norm": 1.3707609879381075, "learning_rate": 9.251765134825103e-06, "loss": 0.665, "step": 2375 }, { "epoch": 0.2012280330298539, "grad_norm": 1.7927362306439742, "learning_rate": 9.251043203227222e-06, "loss": 0.6171, "step": 2376 }, { "epoch": 0.20131272496294728, "grad_norm": 1.1240670473879297, "learning_rate": 9.25032095171583e-06, "loss": 0.6501, "step": 2377 }, { "epoch": 0.20139741689604065, "grad_norm": 1.19018920356849, "learning_rate": 9.24959838034528e-06, "loss": 0.6375, "step": 2378 }, { "epoch": 0.20148210882913403, "grad_norm": 1.284159468091179, "learning_rate": 9.248875489169949e-06, "loss": 0.6965, "step": 2379 }, { "epoch": 0.2015668007622274, "grad_norm": 2.476505575264783, "learning_rate": 9.248152278244237e-06, "loss": 0.723, "step": 2380 }, { "epoch": 0.20165149269532076, "grad_norm": 1.0514093245667093, "learning_rate": 9.247428747622573e-06, "loss": 0.697, "step": 2381 }, { "epoch": 0.20173618462841414, "grad_norm": 1.3172670519131242, "learning_rate": 9.246704897359403e-06, "loss": 0.6178, "step": 2382 }, { "epoch": 0.20182087656150752, "grad_norm": 2.025127026487152, "learning_rate": 9.245980727509203e-06, "loss": 0.6788, "step": 2383 }, { "epoch": 0.2019055684946009, "grad_norm": 1.3596754314551571, "learning_rate": 9.24525623812647e-06, "loss": 0.621, "step": 2384 }, { "epoch": 0.20199026042769427, "grad_norm": 1.481785177601679, "learning_rate": 9.244531429265726e-06, "loss": 0.6885, "step": 2385 }, { "epoch": 0.20207495236078762, "grad_norm": 1.5150829560537096, "learning_rate": 9.243806300981515e-06, "loss": 0.7229, "step": 2386 }, { "epoch": 0.202159644293881, "grad_norm": 1.3464679876657761, "learning_rate": 9.243080853328406e-06, "loss": 0.6963, "step": 2387 }, { "epoch": 0.20224433622697438, "grad_norm": 0.6164377377568714, "learning_rate": 9.242355086360998e-06, "loss": 0.835, "step": 2388 }, { "epoch": 0.20232902816006776, "grad_norm": 1.5582290488313113, "learning_rate": 9.241629000133903e-06, "loss": 0.6384, "step": 2389 }, { "epoch": 0.20241372009316114, "grad_norm": 2.0556581942586094, "learning_rate": 9.240902594701767e-06, "loss": 0.6277, "step": 2390 }, { "epoch": 0.2024984120262545, "grad_norm": 1.1862546847283966, "learning_rate": 9.240175870119254e-06, "loss": 0.6062, "step": 2391 }, { "epoch": 0.20258310395934787, "grad_norm": 1.231035795612051, "learning_rate": 9.239448826441052e-06, "loss": 0.6559, "step": 2392 }, { "epoch": 0.20266779589244124, "grad_norm": 1.5123560296274368, "learning_rate": 9.238721463721878e-06, "loss": 0.6681, "step": 2393 }, { "epoch": 0.20275248782553462, "grad_norm": 2.2239579058046117, "learning_rate": 9.23799378201647e-06, "loss": 0.6759, "step": 2394 }, { "epoch": 0.202837179758628, "grad_norm": 1.2249913485472272, "learning_rate": 9.237265781379588e-06, "loss": 0.6457, "step": 2395 }, { "epoch": 0.20292187169172135, "grad_norm": 1.9402835692511122, "learning_rate": 9.236537461866017e-06, "loss": 0.6686, "step": 2396 }, { "epoch": 0.20300656362481473, "grad_norm": 1.4400054729660812, "learning_rate": 9.23580882353057e-06, "loss": 0.6705, "step": 2397 }, { "epoch": 0.2030912555579081, "grad_norm": 1.8219804319741622, "learning_rate": 9.235079866428079e-06, "loss": 0.6435, "step": 2398 }, { "epoch": 0.20317594749100149, "grad_norm": 0.6766979427702646, "learning_rate": 9.234350590613402e-06, "loss": 0.926, "step": 2399 }, { "epoch": 0.20326063942409486, "grad_norm": 1.3752150595361847, "learning_rate": 9.233620996141421e-06, "loss": 0.5884, "step": 2400 }, { "epoch": 0.20334533135718824, "grad_norm": 1.2605788243329827, "learning_rate": 9.232891083067044e-06, "loss": 0.6963, "step": 2401 }, { "epoch": 0.2034300232902816, "grad_norm": 1.1039581950525548, "learning_rate": 9.232160851445196e-06, "loss": 0.7227, "step": 2402 }, { "epoch": 0.20351471522337497, "grad_norm": 1.321073153862446, "learning_rate": 9.231430301330834e-06, "loss": 0.638, "step": 2403 }, { "epoch": 0.20359940715646835, "grad_norm": 1.879069802472482, "learning_rate": 9.230699432778937e-06, "loss": 0.6355, "step": 2404 }, { "epoch": 0.20368409908956173, "grad_norm": 1.6395922655767563, "learning_rate": 9.229968245844503e-06, "loss": 0.6641, "step": 2405 }, { "epoch": 0.2037687910226551, "grad_norm": 1.6834018978807828, "learning_rate": 9.22923674058256e-06, "loss": 0.6797, "step": 2406 }, { "epoch": 0.20385348295574846, "grad_norm": 1.3706698550861827, "learning_rate": 9.22850491704816e-06, "loss": 0.6979, "step": 2407 }, { "epoch": 0.20393817488884183, "grad_norm": 1.3487197168975529, "learning_rate": 9.227772775296372e-06, "loss": 0.6489, "step": 2408 }, { "epoch": 0.2040228668219352, "grad_norm": 2.2919671908721333, "learning_rate": 9.227040315382297e-06, "loss": 0.6368, "step": 2409 }, { "epoch": 0.2041075587550286, "grad_norm": 1.278241101494878, "learning_rate": 9.226307537361052e-06, "loss": 0.6278, "step": 2410 }, { "epoch": 0.20419225068812197, "grad_norm": 0.6034747664533758, "learning_rate": 9.225574441287788e-06, "loss": 0.8583, "step": 2411 }, { "epoch": 0.20427694262121532, "grad_norm": 1.2856470384061272, "learning_rate": 9.22484102721767e-06, "loss": 0.638, "step": 2412 }, { "epoch": 0.2043616345543087, "grad_norm": 1.4008458717208814, "learning_rate": 9.224107295205894e-06, "loss": 0.6596, "step": 2413 }, { "epoch": 0.20444632648740207, "grad_norm": 1.3290862456329664, "learning_rate": 9.223373245307677e-06, "loss": 0.6173, "step": 2414 }, { "epoch": 0.20453101842049545, "grad_norm": 1.4337839644728259, "learning_rate": 9.222638877578259e-06, "loss": 0.6695, "step": 2415 }, { "epoch": 0.20461571035358883, "grad_norm": 2.701205113260473, "learning_rate": 9.221904192072906e-06, "loss": 0.7286, "step": 2416 }, { "epoch": 0.20470040228668218, "grad_norm": 1.5001663416360373, "learning_rate": 9.221169188846906e-06, "loss": 0.707, "step": 2417 }, { "epoch": 0.20478509421977556, "grad_norm": 1.1812894037748678, "learning_rate": 9.220433867955572e-06, "loss": 0.6752, "step": 2418 }, { "epoch": 0.20486978615286894, "grad_norm": 1.335096144394677, "learning_rate": 9.219698229454241e-06, "loss": 0.712, "step": 2419 }, { "epoch": 0.20495447808596232, "grad_norm": 1.2914286119073288, "learning_rate": 9.218962273398275e-06, "loss": 0.6748, "step": 2420 }, { "epoch": 0.2050391700190557, "grad_norm": 1.6638729135405905, "learning_rate": 9.218225999843057e-06, "loss": 0.6374, "step": 2421 }, { "epoch": 0.20512386195214904, "grad_norm": 1.2885577567521778, "learning_rate": 9.217489408843995e-06, "loss": 0.6493, "step": 2422 }, { "epoch": 0.20520855388524242, "grad_norm": 1.2540729326933444, "learning_rate": 9.216752500456525e-06, "loss": 0.6688, "step": 2423 }, { "epoch": 0.2052932458183358, "grad_norm": 1.3276987209298132, "learning_rate": 9.216015274736098e-06, "loss": 0.6509, "step": 2424 }, { "epoch": 0.20537793775142918, "grad_norm": 0.6522012201252718, "learning_rate": 9.215277731738197e-06, "loss": 0.8604, "step": 2425 }, { "epoch": 0.20546262968452256, "grad_norm": 1.1362374371698951, "learning_rate": 9.214539871518326e-06, "loss": 0.6384, "step": 2426 }, { "epoch": 0.20554732161761594, "grad_norm": 1.8024176166493673, "learning_rate": 9.213801694132014e-06, "loss": 0.68, "step": 2427 }, { "epoch": 0.20563201355070929, "grad_norm": 1.309920233955019, "learning_rate": 9.21306319963481e-06, "loss": 0.6259, "step": 2428 }, { "epoch": 0.20571670548380266, "grad_norm": 1.38241095498805, "learning_rate": 9.212324388082289e-06, "loss": 0.6505, "step": 2429 }, { "epoch": 0.20580139741689604, "grad_norm": 1.3609925292598168, "learning_rate": 9.211585259530055e-06, "loss": 0.6949, "step": 2430 }, { "epoch": 0.20588608934998942, "grad_norm": 1.1494976509362786, "learning_rate": 9.21084581403373e-06, "loss": 0.6945, "step": 2431 }, { "epoch": 0.2059707812830828, "grad_norm": 1.2991476954430399, "learning_rate": 9.210106051648959e-06, "loss": 0.6439, "step": 2432 }, { "epoch": 0.20605547321617615, "grad_norm": 1.1631451900453205, "learning_rate": 9.209365972431413e-06, "loss": 0.6751, "step": 2433 }, { "epoch": 0.20614016514926953, "grad_norm": 1.5415784505815628, "learning_rate": 9.208625576436788e-06, "loss": 0.6259, "step": 2434 }, { "epoch": 0.2062248570823629, "grad_norm": 1.3120850122205843, "learning_rate": 9.207884863720804e-06, "loss": 0.7287, "step": 2435 }, { "epoch": 0.20630954901545628, "grad_norm": 1.3426815154516891, "learning_rate": 9.207143834339201e-06, "loss": 0.6013, "step": 2436 }, { "epoch": 0.20639424094854966, "grad_norm": 0.6231006858726186, "learning_rate": 9.206402488347746e-06, "loss": 0.888, "step": 2437 }, { "epoch": 0.206478932881643, "grad_norm": 1.7637432248958154, "learning_rate": 9.205660825802232e-06, "loss": 0.6832, "step": 2438 }, { "epoch": 0.2065636248147364, "grad_norm": 1.2855680857713072, "learning_rate": 9.204918846758468e-06, "loss": 0.6789, "step": 2439 }, { "epoch": 0.20664831674782977, "grad_norm": 1.5197912138906076, "learning_rate": 9.204176551272297e-06, "loss": 0.6252, "step": 2440 }, { "epoch": 0.20673300868092315, "grad_norm": 0.6961432474158152, "learning_rate": 9.203433939399577e-06, "loss": 0.8492, "step": 2441 }, { "epoch": 0.20681770061401653, "grad_norm": 0.6482302024620321, "learning_rate": 9.202691011196196e-06, "loss": 0.8183, "step": 2442 }, { "epoch": 0.20690239254710988, "grad_norm": 2.8470737610537027, "learning_rate": 9.201947766718062e-06, "loss": 0.5986, "step": 2443 }, { "epoch": 0.20698708448020325, "grad_norm": 1.2327598712109686, "learning_rate": 9.201204206021107e-06, "loss": 0.6769, "step": 2444 }, { "epoch": 0.20707177641329663, "grad_norm": 1.761416485725309, "learning_rate": 9.20046032916129e-06, "loss": 0.6555, "step": 2445 }, { "epoch": 0.20715646834639, "grad_norm": 1.210865234259899, "learning_rate": 9.19971613619459e-06, "loss": 0.7319, "step": 2446 }, { "epoch": 0.2072411602794834, "grad_norm": 2.1308611333499536, "learning_rate": 9.198971627177013e-06, "loss": 0.6691, "step": 2447 }, { "epoch": 0.20732585221257674, "grad_norm": 2.1426026784446304, "learning_rate": 9.198226802164586e-06, "loss": 0.6819, "step": 2448 }, { "epoch": 0.20741054414567012, "grad_norm": 1.5653552301670828, "learning_rate": 9.19748166121336e-06, "loss": 0.6652, "step": 2449 }, { "epoch": 0.2074952360787635, "grad_norm": 3.3224548107939347, "learning_rate": 9.196736204379416e-06, "loss": 0.7067, "step": 2450 }, { "epoch": 0.20757992801185687, "grad_norm": 1.3576798653114843, "learning_rate": 9.195990431718847e-06, "loss": 0.6396, "step": 2451 }, { "epoch": 0.20766461994495025, "grad_norm": 2.2582547969518125, "learning_rate": 9.19524434328778e-06, "loss": 0.6741, "step": 2452 }, { "epoch": 0.20774931187804363, "grad_norm": 1.8844397988211108, "learning_rate": 9.194497939142361e-06, "loss": 0.6349, "step": 2453 }, { "epoch": 0.20783400381113698, "grad_norm": 1.3917464670438238, "learning_rate": 9.193751219338763e-06, "loss": 0.6377, "step": 2454 }, { "epoch": 0.20791869574423036, "grad_norm": 0.6451772611844474, "learning_rate": 9.193004183933175e-06, "loss": 0.8249, "step": 2455 }, { "epoch": 0.20800338767732374, "grad_norm": 1.2702212533488844, "learning_rate": 9.192256832981822e-06, "loss": 0.6632, "step": 2456 }, { "epoch": 0.20808807961041711, "grad_norm": 0.6369274895425845, "learning_rate": 9.191509166540941e-06, "loss": 0.852, "step": 2457 }, { "epoch": 0.2081727715435105, "grad_norm": 1.2661381054964256, "learning_rate": 9.190761184666803e-06, "loss": 0.6577, "step": 2458 }, { "epoch": 0.20825746347660384, "grad_norm": 1.4603470999882113, "learning_rate": 9.190012887415693e-06, "loss": 0.6653, "step": 2459 }, { "epoch": 0.20834215540969722, "grad_norm": 1.1606589357250963, "learning_rate": 9.189264274843928e-06, "loss": 0.6295, "step": 2460 }, { "epoch": 0.2084268473427906, "grad_norm": 1.139444016516175, "learning_rate": 9.188515347007841e-06, "loss": 0.6601, "step": 2461 }, { "epoch": 0.20851153927588398, "grad_norm": 1.3135984429981966, "learning_rate": 9.187766103963796e-06, "loss": 0.6529, "step": 2462 }, { "epoch": 0.20859623120897736, "grad_norm": 2.170311862750112, "learning_rate": 9.187016545768176e-06, "loss": 0.6903, "step": 2463 }, { "epoch": 0.2086809231420707, "grad_norm": 1.6591971916543757, "learning_rate": 9.18626667247739e-06, "loss": 0.6369, "step": 2464 }, { "epoch": 0.20876561507516408, "grad_norm": 0.6275439656798771, "learning_rate": 9.185516484147867e-06, "loss": 0.8647, "step": 2465 }, { "epoch": 0.20885030700825746, "grad_norm": 0.619921269781578, "learning_rate": 9.184765980836069e-06, "loss": 0.8698, "step": 2466 }, { "epoch": 0.20893499894135084, "grad_norm": 1.2236952635399987, "learning_rate": 9.184015162598467e-06, "loss": 0.6527, "step": 2467 }, { "epoch": 0.20901969087444422, "grad_norm": 1.352800190003263, "learning_rate": 9.183264029491572e-06, "loss": 0.695, "step": 2468 }, { "epoch": 0.20910438280753757, "grad_norm": 1.5591238718798244, "learning_rate": 9.182512581571907e-06, "loss": 0.7025, "step": 2469 }, { "epoch": 0.20918907474063095, "grad_norm": 1.3297010431168794, "learning_rate": 9.181760818896024e-06, "loss": 0.7109, "step": 2470 }, { "epoch": 0.20927376667372433, "grad_norm": 1.4156038899682724, "learning_rate": 9.181008741520493e-06, "loss": 0.6257, "step": 2471 }, { "epoch": 0.2093584586068177, "grad_norm": 1.1324567416013926, "learning_rate": 9.180256349501917e-06, "loss": 0.6654, "step": 2472 }, { "epoch": 0.20944315053991108, "grad_norm": 1.9737213835013583, "learning_rate": 9.179503642896915e-06, "loss": 0.6525, "step": 2473 }, { "epoch": 0.20952784247300443, "grad_norm": 1.3726511728198139, "learning_rate": 9.17875062176213e-06, "loss": 0.7016, "step": 2474 }, { "epoch": 0.2096125344060978, "grad_norm": 1.8670729768361396, "learning_rate": 9.177997286154236e-06, "loss": 0.6814, "step": 2475 }, { "epoch": 0.2096972263391912, "grad_norm": 1.3404516489167857, "learning_rate": 9.17724363612992e-06, "loss": 0.6102, "step": 2476 }, { "epoch": 0.20978191827228457, "grad_norm": 1.4558180781467829, "learning_rate": 9.176489671745904e-06, "loss": 0.6544, "step": 2477 }, { "epoch": 0.20986661020537795, "grad_norm": 2.1432043606882485, "learning_rate": 9.175735393058922e-06, "loss": 0.6205, "step": 2478 }, { "epoch": 0.20995130213847132, "grad_norm": 1.3507842431638026, "learning_rate": 9.174980800125741e-06, "loss": 0.6508, "step": 2479 }, { "epoch": 0.21003599407156467, "grad_norm": 1.3022013831517827, "learning_rate": 9.174225893003148e-06, "loss": 0.6872, "step": 2480 }, { "epoch": 0.21012068600465805, "grad_norm": 1.4415473619047836, "learning_rate": 9.173470671747953e-06, "loss": 0.676, "step": 2481 }, { "epoch": 0.21020537793775143, "grad_norm": 1.5059693463423427, "learning_rate": 9.17271513641699e-06, "loss": 0.6244, "step": 2482 }, { "epoch": 0.2102900698708448, "grad_norm": 1.2472189538215557, "learning_rate": 9.171959287067115e-06, "loss": 0.6886, "step": 2483 }, { "epoch": 0.2103747618039382, "grad_norm": 0.7489578755343498, "learning_rate": 9.171203123755215e-06, "loss": 0.8562, "step": 2484 }, { "epoch": 0.21045945373703154, "grad_norm": 1.4748645876124082, "learning_rate": 9.17044664653819e-06, "loss": 0.6386, "step": 2485 }, { "epoch": 0.21054414567012492, "grad_norm": 1.2142062288899174, "learning_rate": 9.169689855472975e-06, "loss": 0.631, "step": 2486 }, { "epoch": 0.2106288376032183, "grad_norm": 2.4110023167992733, "learning_rate": 9.168932750616515e-06, "loss": 0.6474, "step": 2487 }, { "epoch": 0.21071352953631167, "grad_norm": 1.401056175961365, "learning_rate": 9.168175332025793e-06, "loss": 0.6302, "step": 2488 }, { "epoch": 0.21079822146940505, "grad_norm": 1.516459078886656, "learning_rate": 9.167417599757804e-06, "loss": 0.6463, "step": 2489 }, { "epoch": 0.2108829134024984, "grad_norm": 1.656947570210736, "learning_rate": 9.166659553869573e-06, "loss": 0.7023, "step": 2490 }, { "epoch": 0.21096760533559178, "grad_norm": 3.374841878563271, "learning_rate": 9.165901194418147e-06, "loss": 0.6444, "step": 2491 }, { "epoch": 0.21105229726868516, "grad_norm": 1.9077905996845264, "learning_rate": 9.165142521460599e-06, "loss": 0.6443, "step": 2492 }, { "epoch": 0.21113698920177854, "grad_norm": 1.7068641181261783, "learning_rate": 9.164383535054018e-06, "loss": 0.6393, "step": 2493 }, { "epoch": 0.2112216811348719, "grad_norm": 1.3525567577283437, "learning_rate": 9.163624235255526e-06, "loss": 0.662, "step": 2494 }, { "epoch": 0.21130637306796526, "grad_norm": 0.6873803762362906, "learning_rate": 9.162864622122262e-06, "loss": 0.8624, "step": 2495 }, { "epoch": 0.21139106500105864, "grad_norm": 1.231737133602269, "learning_rate": 9.162104695711391e-06, "loss": 0.6438, "step": 2496 }, { "epoch": 0.21147575693415202, "grad_norm": 1.425695230115336, "learning_rate": 9.161344456080105e-06, "loss": 0.7095, "step": 2497 }, { "epoch": 0.2115604488672454, "grad_norm": 1.5766092290824154, "learning_rate": 9.160583903285612e-06, "loss": 0.6501, "step": 2498 }, { "epoch": 0.21164514080033878, "grad_norm": 1.4776126830338998, "learning_rate": 9.159823037385147e-06, "loss": 0.6942, "step": 2499 }, { "epoch": 0.21172983273343213, "grad_norm": 1.442785523596445, "learning_rate": 9.159061858435974e-06, "loss": 0.6963, "step": 2500 }, { "epoch": 0.2118145246665255, "grad_norm": 1.3492751301676629, "learning_rate": 9.158300366495371e-06, "loss": 0.6578, "step": 2501 }, { "epoch": 0.21189921659961888, "grad_norm": 1.355983696715032, "learning_rate": 9.157538561620648e-06, "loss": 0.6052, "step": 2502 }, { "epoch": 0.21198390853271226, "grad_norm": 1.2383025870215665, "learning_rate": 9.156776443869132e-06, "loss": 0.5811, "step": 2503 }, { "epoch": 0.21206860046580564, "grad_norm": 1.6693214941598737, "learning_rate": 9.156014013298175e-06, "loss": 0.6296, "step": 2504 }, { "epoch": 0.21215329239889902, "grad_norm": 0.6287131090449241, "learning_rate": 9.15525126996516e-06, "loss": 0.8578, "step": 2505 }, { "epoch": 0.21223798433199237, "grad_norm": 1.2147201251080015, "learning_rate": 9.154488213927482e-06, "loss": 0.6935, "step": 2506 }, { "epoch": 0.21232267626508575, "grad_norm": 1.3882873353114669, "learning_rate": 9.153724845242566e-06, "loss": 0.6568, "step": 2507 }, { "epoch": 0.21240736819817913, "grad_norm": 1.8820528112650856, "learning_rate": 9.152961163967862e-06, "loss": 0.6148, "step": 2508 }, { "epoch": 0.2124920601312725, "grad_norm": 0.6555345192998623, "learning_rate": 9.152197170160839e-06, "loss": 0.8922, "step": 2509 }, { "epoch": 0.21257675206436588, "grad_norm": 1.2601636094433102, "learning_rate": 9.151432863878994e-06, "loss": 0.6355, "step": 2510 }, { "epoch": 0.21266144399745923, "grad_norm": 3.632919375496407, "learning_rate": 9.15066824517984e-06, "loss": 0.6869, "step": 2511 }, { "epoch": 0.2127461359305526, "grad_norm": 1.9454144080103355, "learning_rate": 9.149903314120922e-06, "loss": 0.6322, "step": 2512 }, { "epoch": 0.212830827863646, "grad_norm": 1.1320780152265457, "learning_rate": 9.149138070759807e-06, "loss": 0.7038, "step": 2513 }, { "epoch": 0.21291551979673937, "grad_norm": 1.1709464766527082, "learning_rate": 9.14837251515408e-06, "loss": 0.6382, "step": 2514 }, { "epoch": 0.21300021172983274, "grad_norm": 1.6930382211749062, "learning_rate": 9.147606647361356e-06, "loss": 0.6392, "step": 2515 }, { "epoch": 0.2130849036629261, "grad_norm": 1.224714207507723, "learning_rate": 9.146840467439267e-06, "loss": 0.6073, "step": 2516 }, { "epoch": 0.21316959559601947, "grad_norm": 1.7870971335890118, "learning_rate": 9.146073975445475e-06, "loss": 0.6688, "step": 2517 }, { "epoch": 0.21325428752911285, "grad_norm": 2.028488575789385, "learning_rate": 9.145307171437662e-06, "loss": 0.7366, "step": 2518 }, { "epoch": 0.21333897946220623, "grad_norm": 0.6553711658737478, "learning_rate": 9.144540055473534e-06, "loss": 0.8664, "step": 2519 }, { "epoch": 0.2134236713952996, "grad_norm": 1.1826327010143929, "learning_rate": 9.143772627610821e-06, "loss": 0.6692, "step": 2520 }, { "epoch": 0.21350836332839296, "grad_norm": 1.4548420912942763, "learning_rate": 9.143004887907274e-06, "loss": 0.7093, "step": 2521 }, { "epoch": 0.21359305526148634, "grad_norm": 1.3982773028316349, "learning_rate": 9.142236836420672e-06, "loss": 0.6777, "step": 2522 }, { "epoch": 0.21367774719457971, "grad_norm": 1.2496143889246476, "learning_rate": 9.141468473208815e-06, "loss": 0.6623, "step": 2523 }, { "epoch": 0.2137624391276731, "grad_norm": 1.2954058821596777, "learning_rate": 9.140699798329523e-06, "loss": 0.6002, "step": 2524 }, { "epoch": 0.21384713106076647, "grad_norm": 0.670586574366508, "learning_rate": 9.139930811840647e-06, "loss": 0.881, "step": 2525 }, { "epoch": 0.21393182299385982, "grad_norm": 1.8493247981183105, "learning_rate": 9.139161513800054e-06, "loss": 0.6931, "step": 2526 }, { "epoch": 0.2140165149269532, "grad_norm": 1.398884793442744, "learning_rate": 9.13839190426564e-06, "loss": 0.6537, "step": 2527 }, { "epoch": 0.21410120686004658, "grad_norm": 1.4126761919838342, "learning_rate": 9.13762198329532e-06, "loss": 0.637, "step": 2528 }, { "epoch": 0.21418589879313996, "grad_norm": 0.6255424861500638, "learning_rate": 9.136851750947037e-06, "loss": 0.8629, "step": 2529 }, { "epoch": 0.21427059072623333, "grad_norm": 1.2283986262368223, "learning_rate": 9.136081207278756e-06, "loss": 0.6432, "step": 2530 }, { "epoch": 0.2143552826593267, "grad_norm": 2.689811965412059, "learning_rate": 9.135310352348458e-06, "loss": 0.6975, "step": 2531 }, { "epoch": 0.21443997459242006, "grad_norm": 1.2017931935551978, "learning_rate": 9.134539186214161e-06, "loss": 0.7029, "step": 2532 }, { "epoch": 0.21452466652551344, "grad_norm": 1.265511070734727, "learning_rate": 9.133767708933899e-06, "loss": 0.6944, "step": 2533 }, { "epoch": 0.21460935845860682, "grad_norm": 1.7789437027730977, "learning_rate": 9.132995920565726e-06, "loss": 0.6474, "step": 2534 }, { "epoch": 0.2146940503917002, "grad_norm": 1.1883325130240439, "learning_rate": 9.132223821167724e-06, "loss": 0.6953, "step": 2535 }, { "epoch": 0.21477874232479358, "grad_norm": 1.43086053344257, "learning_rate": 9.131451410797998e-06, "loss": 0.6765, "step": 2536 }, { "epoch": 0.21486343425788693, "grad_norm": 1.42539014445503, "learning_rate": 9.130678689514677e-06, "loss": 0.6542, "step": 2537 }, { "epoch": 0.2149481261909803, "grad_norm": 1.2579267102543819, "learning_rate": 9.129905657375912e-06, "loss": 0.6508, "step": 2538 }, { "epoch": 0.21503281812407368, "grad_norm": 1.3347091579733263, "learning_rate": 9.129132314439877e-06, "loss": 0.6717, "step": 2539 }, { "epoch": 0.21511751005716706, "grad_norm": 1.4124172365859866, "learning_rate": 9.12835866076477e-06, "loss": 0.6322, "step": 2540 }, { "epoch": 0.21520220199026044, "grad_norm": 1.2074548388583246, "learning_rate": 9.127584696408814e-06, "loss": 0.6414, "step": 2541 }, { "epoch": 0.2152868939233538, "grad_norm": 1.1689646580680972, "learning_rate": 9.126810421430253e-06, "loss": 0.6429, "step": 2542 }, { "epoch": 0.21537158585644717, "grad_norm": 0.6165086749000768, "learning_rate": 9.126035835887355e-06, "loss": 0.8595, "step": 2543 }, { "epoch": 0.21545627778954055, "grad_norm": 0.6316884840366777, "learning_rate": 9.125260939838411e-06, "loss": 0.8229, "step": 2544 }, { "epoch": 0.21554096972263392, "grad_norm": 1.2420024431271328, "learning_rate": 9.124485733341737e-06, "loss": 0.6394, "step": 2545 }, { "epoch": 0.2156256616557273, "grad_norm": 1.3263629587339536, "learning_rate": 9.123710216455673e-06, "loss": 0.6099, "step": 2546 }, { "epoch": 0.21571035358882065, "grad_norm": 1.722355216355326, "learning_rate": 9.122934389238578e-06, "loss": 0.6276, "step": 2547 }, { "epoch": 0.21579504552191403, "grad_norm": 1.1410753701150484, "learning_rate": 9.122158251748838e-06, "loss": 0.667, "step": 2548 }, { "epoch": 0.2158797374550074, "grad_norm": 0.6957325897974856, "learning_rate": 9.121381804044861e-06, "loss": 0.8709, "step": 2549 }, { "epoch": 0.2159644293881008, "grad_norm": 1.1945531738775177, "learning_rate": 9.120605046185082e-06, "loss": 0.6442, "step": 2550 }, { "epoch": 0.21604912132119417, "grad_norm": 1.4823611102647982, "learning_rate": 9.119827978227953e-06, "loss": 0.6721, "step": 2551 }, { "epoch": 0.21613381325428752, "grad_norm": 1.8875927464356592, "learning_rate": 9.119050600231952e-06, "loss": 0.6054, "step": 2552 }, { "epoch": 0.2162185051873809, "grad_norm": 0.6101662047978892, "learning_rate": 9.118272912255584e-06, "loss": 0.8494, "step": 2553 }, { "epoch": 0.21630319712047427, "grad_norm": 2.0288532435607123, "learning_rate": 9.11749491435737e-06, "loss": 0.6808, "step": 2554 }, { "epoch": 0.21638788905356765, "grad_norm": 2.0341382021762477, "learning_rate": 9.11671660659586e-06, "loss": 0.6998, "step": 2555 }, { "epoch": 0.21647258098666103, "grad_norm": 1.8228261906410683, "learning_rate": 9.115937989029627e-06, "loss": 0.6946, "step": 2556 }, { "epoch": 0.2165572729197544, "grad_norm": 1.5083635845115306, "learning_rate": 9.115159061717265e-06, "loss": 0.6196, "step": 2557 }, { "epoch": 0.21664196485284776, "grad_norm": 1.4674436536221525, "learning_rate": 9.114379824717395e-06, "loss": 0.6544, "step": 2558 }, { "epoch": 0.21672665678594114, "grad_norm": 1.2837133456729914, "learning_rate": 9.113600278088655e-06, "loss": 0.6506, "step": 2559 }, { "epoch": 0.2168113487190345, "grad_norm": 1.270513035970483, "learning_rate": 9.112820421889711e-06, "loss": 0.6396, "step": 2560 }, { "epoch": 0.2168960406521279, "grad_norm": 1.4173403684182895, "learning_rate": 9.112040256179253e-06, "loss": 0.5861, "step": 2561 }, { "epoch": 0.21698073258522127, "grad_norm": 1.7861037713957448, "learning_rate": 9.11125978101599e-06, "loss": 0.638, "step": 2562 }, { "epoch": 0.21706542451831462, "grad_norm": 1.2788693930478638, "learning_rate": 9.11047899645866e-06, "loss": 0.6039, "step": 2563 }, { "epoch": 0.217150116451408, "grad_norm": 1.317516937767762, "learning_rate": 9.109697902566018e-06, "loss": 0.6085, "step": 2564 }, { "epoch": 0.21723480838450138, "grad_norm": 1.2639562570309446, "learning_rate": 9.108916499396847e-06, "loss": 0.7213, "step": 2565 }, { "epoch": 0.21731950031759475, "grad_norm": 2.4447838746674893, "learning_rate": 9.108134787009953e-06, "loss": 0.6326, "step": 2566 }, { "epoch": 0.21740419225068813, "grad_norm": 1.330118910297036, "learning_rate": 9.107352765464161e-06, "loss": 0.6288, "step": 2567 }, { "epoch": 0.21748888418378148, "grad_norm": 1.3258312046894056, "learning_rate": 9.106570434818326e-06, "loss": 0.6096, "step": 2568 }, { "epoch": 0.21757357611687486, "grad_norm": 1.125882565127572, "learning_rate": 9.10578779513132e-06, "loss": 0.685, "step": 2569 }, { "epoch": 0.21765826804996824, "grad_norm": 2.784065412796128, "learning_rate": 9.10500484646204e-06, "loss": 0.6733, "step": 2570 }, { "epoch": 0.21774295998306162, "grad_norm": 1.294731000572286, "learning_rate": 9.104221588869408e-06, "loss": 0.637, "step": 2571 }, { "epoch": 0.217827651916155, "grad_norm": 1.5908566252457568, "learning_rate": 9.10343802241237e-06, "loss": 0.6692, "step": 2572 }, { "epoch": 0.21791234384924835, "grad_norm": 2.574880408779271, "learning_rate": 9.10265414714989e-06, "loss": 0.6585, "step": 2573 }, { "epoch": 0.21799703578234172, "grad_norm": 1.3403262449130506, "learning_rate": 9.101869963140963e-06, "loss": 0.6658, "step": 2574 }, { "epoch": 0.2180817277154351, "grad_norm": 0.6473950091924505, "learning_rate": 9.1010854704446e-06, "loss": 0.8088, "step": 2575 }, { "epoch": 0.21816641964852848, "grad_norm": 1.4090130769269338, "learning_rate": 9.10030066911984e-06, "loss": 0.6625, "step": 2576 }, { "epoch": 0.21825111158162186, "grad_norm": 2.7345506922415947, "learning_rate": 9.099515559225742e-06, "loss": 0.6179, "step": 2577 }, { "epoch": 0.2183358035147152, "grad_norm": 1.3005882156669233, "learning_rate": 9.09873014082139e-06, "loss": 0.7051, "step": 2578 }, { "epoch": 0.2184204954478086, "grad_norm": 1.2796017728441784, "learning_rate": 9.097944413965891e-06, "loss": 0.6422, "step": 2579 }, { "epoch": 0.21850518738090197, "grad_norm": 1.2834053390704705, "learning_rate": 9.097158378718375e-06, "loss": 0.7013, "step": 2580 }, { "epoch": 0.21858987931399534, "grad_norm": 1.5921802358192467, "learning_rate": 9.096372035137996e-06, "loss": 0.637, "step": 2581 }, { "epoch": 0.21867457124708872, "grad_norm": 1.4177378873585527, "learning_rate": 9.095585383283929e-06, "loss": 0.6463, "step": 2582 }, { "epoch": 0.2187592631801821, "grad_norm": 1.313418328484851, "learning_rate": 9.094798423215376e-06, "loss": 0.647, "step": 2583 }, { "epoch": 0.21884395511327545, "grad_norm": 2.0781503193624724, "learning_rate": 9.094011154991557e-06, "loss": 0.6515, "step": 2584 }, { "epoch": 0.21892864704636883, "grad_norm": 2.5688298345580054, "learning_rate": 9.093223578671721e-06, "loss": 0.6646, "step": 2585 }, { "epoch": 0.2190133389794622, "grad_norm": 1.6233319339578927, "learning_rate": 9.092435694315138e-06, "loss": 0.6807, "step": 2586 }, { "epoch": 0.21909803091255559, "grad_norm": 4.579768717565835, "learning_rate": 9.091647501981096e-06, "loss": 0.6557, "step": 2587 }, { "epoch": 0.21918272284564896, "grad_norm": 1.2379226148302254, "learning_rate": 9.090859001728914e-06, "loss": 0.6806, "step": 2588 }, { "epoch": 0.21926741477874231, "grad_norm": 0.6718316474197977, "learning_rate": 9.09007019361793e-06, "loss": 0.8515, "step": 2589 }, { "epoch": 0.2193521067118357, "grad_norm": 0.6102413697788497, "learning_rate": 9.089281077707507e-06, "loss": 0.8151, "step": 2590 }, { "epoch": 0.21943679864492907, "grad_norm": 1.2679968277382894, "learning_rate": 9.088491654057026e-06, "loss": 0.6543, "step": 2591 }, { "epoch": 0.21952149057802245, "grad_norm": 2.0885403706561947, "learning_rate": 9.0877019227259e-06, "loss": 0.6609, "step": 2592 }, { "epoch": 0.21960618251111583, "grad_norm": 1.2065013448494215, "learning_rate": 9.08691188377356e-06, "loss": 0.6469, "step": 2593 }, { "epoch": 0.21969087444420918, "grad_norm": 1.5875917061381817, "learning_rate": 9.086121537259458e-06, "loss": 0.6315, "step": 2594 }, { "epoch": 0.21977556637730256, "grad_norm": 1.1711312531193963, "learning_rate": 9.085330883243073e-06, "loss": 0.7037, "step": 2595 }, { "epoch": 0.21986025831039593, "grad_norm": 1.3144586250115182, "learning_rate": 9.084539921783907e-06, "loss": 0.6349, "step": 2596 }, { "epoch": 0.2199449502434893, "grad_norm": 1.264297334819558, "learning_rate": 9.083748652941484e-06, "loss": 0.6739, "step": 2597 }, { "epoch": 0.2200296421765827, "grad_norm": 1.6993429287810788, "learning_rate": 9.082957076775351e-06, "loss": 0.6911, "step": 2598 }, { "epoch": 0.22011433410967604, "grad_norm": 2.3091395266945915, "learning_rate": 9.082165193345076e-06, "loss": 0.6311, "step": 2599 }, { "epoch": 0.22019902604276942, "grad_norm": 1.193512744967197, "learning_rate": 9.081373002710255e-06, "loss": 0.695, "step": 2600 }, { "epoch": 0.2202837179758628, "grad_norm": 1.8862906862935096, "learning_rate": 9.080580504930503e-06, "loss": 0.6539, "step": 2601 }, { "epoch": 0.22036840990895618, "grad_norm": 1.2088715710104723, "learning_rate": 9.079787700065462e-06, "loss": 0.6488, "step": 2602 }, { "epoch": 0.22045310184204955, "grad_norm": 2.075468430778445, "learning_rate": 9.07899458817479e-06, "loss": 0.655, "step": 2603 }, { "epoch": 0.2205377937751429, "grad_norm": 1.4939103248912464, "learning_rate": 9.078201169318178e-06, "loss": 0.6214, "step": 2604 }, { "epoch": 0.22062248570823628, "grad_norm": 1.2473034272494776, "learning_rate": 9.077407443555333e-06, "loss": 0.6711, "step": 2605 }, { "epoch": 0.22070717764132966, "grad_norm": 1.3513056159441619, "learning_rate": 9.076613410945986e-06, "loss": 0.6676, "step": 2606 }, { "epoch": 0.22079186957442304, "grad_norm": 0.6597954747962416, "learning_rate": 9.075819071549894e-06, "loss": 0.8602, "step": 2607 }, { "epoch": 0.22087656150751642, "grad_norm": 1.4454509772120676, "learning_rate": 9.075024425426832e-06, "loss": 0.6531, "step": 2608 }, { "epoch": 0.2209612534406098, "grad_norm": 1.1917477187822414, "learning_rate": 9.074229472636607e-06, "loss": 0.6363, "step": 2609 }, { "epoch": 0.22104594537370315, "grad_norm": 1.4251268969202446, "learning_rate": 9.073434213239038e-06, "loss": 0.6707, "step": 2610 }, { "epoch": 0.22113063730679652, "grad_norm": 2.0691517848768375, "learning_rate": 9.072638647293977e-06, "loss": 0.5918, "step": 2611 }, { "epoch": 0.2212153292398899, "grad_norm": 1.4842660044692988, "learning_rate": 9.07184277486129e-06, "loss": 0.6736, "step": 2612 }, { "epoch": 0.22130002117298328, "grad_norm": 4.402860176419431, "learning_rate": 9.071046596000873e-06, "loss": 0.5975, "step": 2613 }, { "epoch": 0.22138471310607666, "grad_norm": 1.3121304281247856, "learning_rate": 9.070250110772643e-06, "loss": 0.6586, "step": 2614 }, { "epoch": 0.22146940503917, "grad_norm": 1.7931138302926843, "learning_rate": 9.069453319236538e-06, "loss": 0.6225, "step": 2615 }, { "epoch": 0.2215540969722634, "grad_norm": 1.1930133469466506, "learning_rate": 9.068656221452524e-06, "loss": 0.6672, "step": 2616 }, { "epoch": 0.22163878890535677, "grad_norm": 1.4247637047749862, "learning_rate": 9.067858817480585e-06, "loss": 0.6593, "step": 2617 }, { "epoch": 0.22172348083845014, "grad_norm": 1.1851695732431604, "learning_rate": 9.067061107380727e-06, "loss": 0.6599, "step": 2618 }, { "epoch": 0.22180817277154352, "grad_norm": 2.412909231257517, "learning_rate": 9.066263091212986e-06, "loss": 0.6554, "step": 2619 }, { "epoch": 0.22189286470463687, "grad_norm": 1.793696053863399, "learning_rate": 9.065464769037415e-06, "loss": 0.6773, "step": 2620 }, { "epoch": 0.22197755663773025, "grad_norm": 0.7121856333203948, "learning_rate": 9.064666140914094e-06, "loss": 0.8421, "step": 2621 }, { "epoch": 0.22206224857082363, "grad_norm": 1.2154448153783828, "learning_rate": 9.063867206903121e-06, "loss": 0.6781, "step": 2622 }, { "epoch": 0.222146940503917, "grad_norm": 1.2857459683660932, "learning_rate": 9.06306796706462e-06, "loss": 0.6933, "step": 2623 }, { "epoch": 0.22223163243701038, "grad_norm": 0.6133889406212586, "learning_rate": 9.062268421458741e-06, "loss": 0.8415, "step": 2624 }, { "epoch": 0.22231632437010374, "grad_norm": 1.4254796905508458, "learning_rate": 9.061468570145655e-06, "loss": 0.7006, "step": 2625 }, { "epoch": 0.2224010163031971, "grad_norm": 1.5140477004778574, "learning_rate": 9.06066841318555e-06, "loss": 0.6631, "step": 2626 }, { "epoch": 0.2224857082362905, "grad_norm": 1.3260865001197892, "learning_rate": 9.059867950638645e-06, "loss": 0.6217, "step": 2627 }, { "epoch": 0.22257040016938387, "grad_norm": 0.6503366236460724, "learning_rate": 9.05906718256518e-06, "loss": 0.9116, "step": 2628 }, { "epoch": 0.22265509210247725, "grad_norm": 1.4287560031461535, "learning_rate": 9.058266109025413e-06, "loss": 0.6455, "step": 2629 }, { "epoch": 0.2227397840355706, "grad_norm": 1.5911218099920352, "learning_rate": 9.057464730079634e-06, "loss": 0.661, "step": 2630 }, { "epoch": 0.22282447596866398, "grad_norm": 1.4221873542742733, "learning_rate": 9.056663045788148e-06, "loss": 0.6725, "step": 2631 }, { "epoch": 0.22290916790175735, "grad_norm": 1.3990286725327465, "learning_rate": 9.055861056211286e-06, "loss": 0.6443, "step": 2632 }, { "epoch": 0.22299385983485073, "grad_norm": 1.3229863666998902, "learning_rate": 9.055058761409405e-06, "loss": 0.7277, "step": 2633 }, { "epoch": 0.2230785517679441, "grad_norm": 2.8906352301626796, "learning_rate": 9.054256161442878e-06, "loss": 0.6768, "step": 2634 }, { "epoch": 0.2231632437010375, "grad_norm": 1.2252359465316625, "learning_rate": 9.053453256372106e-06, "loss": 0.5975, "step": 2635 }, { "epoch": 0.22324793563413084, "grad_norm": 1.5533099051106203, "learning_rate": 9.052650046257513e-06, "loss": 0.6379, "step": 2636 }, { "epoch": 0.22333262756722422, "grad_norm": 1.3330537934372608, "learning_rate": 9.051846531159544e-06, "loss": 0.585, "step": 2637 }, { "epoch": 0.2234173195003176, "grad_norm": 1.3487035874683468, "learning_rate": 9.05104271113867e-06, "loss": 0.6196, "step": 2638 }, { "epoch": 0.22350201143341097, "grad_norm": 1.4791108274112998, "learning_rate": 9.050238586255378e-06, "loss": 0.6222, "step": 2639 }, { "epoch": 0.22358670336650435, "grad_norm": 1.2522865510480548, "learning_rate": 9.049434156570188e-06, "loss": 0.6336, "step": 2640 }, { "epoch": 0.2236713952995977, "grad_norm": 1.4802716773715914, "learning_rate": 9.04862942214363e-06, "loss": 0.6581, "step": 2641 }, { "epoch": 0.22375608723269108, "grad_norm": 1.5146035581355832, "learning_rate": 9.047824383036275e-06, "loss": 0.6191, "step": 2642 }, { "epoch": 0.22384077916578446, "grad_norm": 1.8275496480206332, "learning_rate": 9.047019039308697e-06, "loss": 0.7375, "step": 2643 }, { "epoch": 0.22392547109887784, "grad_norm": 0.7686093476165211, "learning_rate": 9.04621339102151e-06, "loss": 0.8844, "step": 2644 }, { "epoch": 0.22401016303197122, "grad_norm": 2.7035786278245184, "learning_rate": 9.045407438235337e-06, "loss": 0.6692, "step": 2645 }, { "epoch": 0.22409485496506457, "grad_norm": 1.642773038191705, "learning_rate": 9.044601181010833e-06, "loss": 0.6385, "step": 2646 }, { "epoch": 0.22417954689815794, "grad_norm": 1.786991351100926, "learning_rate": 9.043794619408674e-06, "loss": 0.6883, "step": 2647 }, { "epoch": 0.22426423883125132, "grad_norm": 4.524050267199264, "learning_rate": 9.042987753489557e-06, "loss": 0.6137, "step": 2648 }, { "epoch": 0.2243489307643447, "grad_norm": 1.2472767828814915, "learning_rate": 9.042180583314203e-06, "loss": 0.7473, "step": 2649 }, { "epoch": 0.22443362269743808, "grad_norm": 0.6633756186784848, "learning_rate": 9.041373108943354e-06, "loss": 0.8171, "step": 2650 }, { "epoch": 0.22451831463053143, "grad_norm": 1.6452489223746374, "learning_rate": 9.040565330437779e-06, "loss": 0.7165, "step": 2651 }, { "epoch": 0.2246030065636248, "grad_norm": 1.300436035875422, "learning_rate": 9.039757247858268e-06, "loss": 0.6833, "step": 2652 }, { "epoch": 0.22468769849671819, "grad_norm": 1.1499766060692205, "learning_rate": 9.038948861265632e-06, "loss": 0.6545, "step": 2653 }, { "epoch": 0.22477239042981156, "grad_norm": 1.3539575183071655, "learning_rate": 9.038140170720707e-06, "loss": 0.6918, "step": 2654 }, { "epoch": 0.22485708236290494, "grad_norm": 6.504293992921218, "learning_rate": 9.037331176284352e-06, "loss": 0.6762, "step": 2655 }, { "epoch": 0.2249417742959983, "grad_norm": 3.1114741980377283, "learning_rate": 9.036521878017446e-06, "loss": 0.6987, "step": 2656 }, { "epoch": 0.22502646622909167, "grad_norm": 1.1058275979793135, "learning_rate": 9.035712275980895e-06, "loss": 0.5835, "step": 2657 }, { "epoch": 0.22511115816218505, "grad_norm": 1.3642677205308955, "learning_rate": 9.034902370235626e-06, "loss": 0.6579, "step": 2658 }, { "epoch": 0.22519585009527843, "grad_norm": 1.3416301769699632, "learning_rate": 9.034092160842585e-06, "loss": 0.6379, "step": 2659 }, { "epoch": 0.2252805420283718, "grad_norm": 1.3703027445171982, "learning_rate": 9.03328164786275e-06, "loss": 0.6143, "step": 2660 }, { "epoch": 0.22536523396146518, "grad_norm": 1.2143853077939197, "learning_rate": 9.032470831357113e-06, "loss": 0.6662, "step": 2661 }, { "epoch": 0.22544992589455853, "grad_norm": 1.3935443216414827, "learning_rate": 9.031659711386694e-06, "loss": 0.6789, "step": 2662 }, { "epoch": 0.2255346178276519, "grad_norm": 1.4151746096963043, "learning_rate": 9.030848288012532e-06, "loss": 0.5784, "step": 2663 }, { "epoch": 0.2256193097607453, "grad_norm": 1.2845680846319605, "learning_rate": 9.030036561295691e-06, "loss": 0.6331, "step": 2664 }, { "epoch": 0.22570400169383867, "grad_norm": 1.1177934385383683, "learning_rate": 9.029224531297261e-06, "loss": 0.6312, "step": 2665 }, { "epoch": 0.22578869362693205, "grad_norm": 1.2127751849496735, "learning_rate": 9.028412198078347e-06, "loss": 0.649, "step": 2666 }, { "epoch": 0.2258733855600254, "grad_norm": 0.6944471989933927, "learning_rate": 9.027599561700084e-06, "loss": 0.824, "step": 2667 }, { "epoch": 0.22595807749311878, "grad_norm": 1.6278294781202807, "learning_rate": 9.026786622223628e-06, "loss": 0.6722, "step": 2668 }, { "epoch": 0.22604276942621215, "grad_norm": 1.664200541858523, "learning_rate": 9.025973379710156e-06, "loss": 0.683, "step": 2669 }, { "epoch": 0.22612746135930553, "grad_norm": 1.6791555476492863, "learning_rate": 9.025159834220867e-06, "loss": 0.6748, "step": 2670 }, { "epoch": 0.2262121532923989, "grad_norm": 1.7183809502497487, "learning_rate": 9.024345985816987e-06, "loss": 0.6069, "step": 2671 }, { "epoch": 0.22629684522549226, "grad_norm": 0.6203809411326537, "learning_rate": 9.023531834559763e-06, "loss": 0.8733, "step": 2672 }, { "epoch": 0.22638153715858564, "grad_norm": 1.234769557396742, "learning_rate": 9.022717380510462e-06, "loss": 0.6903, "step": 2673 }, { "epoch": 0.22646622909167902, "grad_norm": 1.3957430013950818, "learning_rate": 9.021902623730376e-06, "loss": 0.7049, "step": 2674 }, { "epoch": 0.2265509210247724, "grad_norm": 1.343033966701863, "learning_rate": 9.02108756428082e-06, "loss": 0.6584, "step": 2675 }, { "epoch": 0.22663561295786577, "grad_norm": 1.2913732439943102, "learning_rate": 9.020272202223133e-06, "loss": 0.7113, "step": 2676 }, { "epoch": 0.22672030489095912, "grad_norm": 2.299414226412894, "learning_rate": 9.019456537618676e-06, "loss": 0.7047, "step": 2677 }, { "epoch": 0.2268049968240525, "grad_norm": 1.2671161038832595, "learning_rate": 9.018640570528828e-06, "loss": 0.6674, "step": 2678 }, { "epoch": 0.22688968875714588, "grad_norm": 1.1751416739174034, "learning_rate": 9.017824301014999e-06, "loss": 0.6939, "step": 2679 }, { "epoch": 0.22697438069023926, "grad_norm": 2.0802577222717233, "learning_rate": 9.017007729138617e-06, "loss": 0.6636, "step": 2680 }, { "epoch": 0.22705907262333264, "grad_norm": 1.2276305894171045, "learning_rate": 9.01619085496113e-06, "loss": 0.6919, "step": 2681 }, { "epoch": 0.227143764556426, "grad_norm": 1.3281381747744103, "learning_rate": 9.015373678544014e-06, "loss": 0.668, "step": 2682 }, { "epoch": 0.22722845648951936, "grad_norm": 1.350608711475612, "learning_rate": 9.014556199948768e-06, "loss": 0.7569, "step": 2683 }, { "epoch": 0.22731314842261274, "grad_norm": 1.2959644231756506, "learning_rate": 9.013738419236908e-06, "loss": 0.6594, "step": 2684 }, { "epoch": 0.22739784035570612, "grad_norm": 1.2131435776641117, "learning_rate": 9.012920336469982e-06, "loss": 0.664, "step": 2685 }, { "epoch": 0.2274825322887995, "grad_norm": 1.4616265968397217, "learning_rate": 9.012101951709547e-06, "loss": 0.6306, "step": 2686 }, { "epoch": 0.22756722422189288, "grad_norm": 1.2858105188932338, "learning_rate": 9.011283265017197e-06, "loss": 0.6357, "step": 2687 }, { "epoch": 0.22765191615498623, "grad_norm": 1.321687770913941, "learning_rate": 9.01046427645454e-06, "loss": 0.6543, "step": 2688 }, { "epoch": 0.2277366080880796, "grad_norm": 1.135657703371099, "learning_rate": 9.00964498608321e-06, "loss": 0.6658, "step": 2689 }, { "epoch": 0.22782130002117298, "grad_norm": 1.3651268072632006, "learning_rate": 9.008825393964864e-06, "loss": 0.6967, "step": 2690 }, { "epoch": 0.22790599195426636, "grad_norm": 1.3921358334826575, "learning_rate": 9.008005500161178e-06, "loss": 0.6264, "step": 2691 }, { "epoch": 0.22799068388735974, "grad_norm": 1.6159499939420117, "learning_rate": 9.007185304733855e-06, "loss": 0.6914, "step": 2692 }, { "epoch": 0.2280753758204531, "grad_norm": 1.3161988481989604, "learning_rate": 9.006364807744619e-06, "loss": 0.6567, "step": 2693 }, { "epoch": 0.22816006775354647, "grad_norm": 1.573139107999805, "learning_rate": 9.005544009255216e-06, "loss": 0.6569, "step": 2694 }, { "epoch": 0.22824475968663985, "grad_norm": 1.7173441238142093, "learning_rate": 9.004722909327417e-06, "loss": 0.6562, "step": 2695 }, { "epoch": 0.22832945161973323, "grad_norm": 2.0258477857277453, "learning_rate": 9.003901508023012e-06, "loss": 0.6821, "step": 2696 }, { "epoch": 0.2284141435528266, "grad_norm": 1.3246614540698167, "learning_rate": 9.003079805403817e-06, "loss": 0.6699, "step": 2697 }, { "epoch": 0.22849883548591995, "grad_norm": 1.25536965496121, "learning_rate": 9.002257801531668e-06, "loss": 0.6568, "step": 2698 }, { "epoch": 0.22858352741901333, "grad_norm": 1.4937969525435517, "learning_rate": 9.001435496468429e-06, "loss": 0.6265, "step": 2699 }, { "epoch": 0.2286682193521067, "grad_norm": 1.3788335660348925, "learning_rate": 9.00061289027598e-06, "loss": 0.6319, "step": 2700 }, { "epoch": 0.2287529112852001, "grad_norm": 1.1113698947363164, "learning_rate": 8.999789983016224e-06, "loss": 0.6533, "step": 2701 }, { "epoch": 0.22883760321829347, "grad_norm": 1.5352772879864345, "learning_rate": 8.998966774751094e-06, "loss": 0.701, "step": 2702 }, { "epoch": 0.22892229515138682, "grad_norm": 2.5954871878181764, "learning_rate": 8.998143265542539e-06, "loss": 0.6929, "step": 2703 }, { "epoch": 0.2290069870844802, "grad_norm": 1.3817468999932496, "learning_rate": 8.997319455452533e-06, "loss": 0.6586, "step": 2704 }, { "epoch": 0.22909167901757357, "grad_norm": 1.2776063217562716, "learning_rate": 8.996495344543068e-06, "loss": 0.6256, "step": 2705 }, { "epoch": 0.22917637095066695, "grad_norm": 1.4207327527511964, "learning_rate": 8.995670932876168e-06, "loss": 0.6052, "step": 2706 }, { "epoch": 0.22926106288376033, "grad_norm": 1.3967338501107067, "learning_rate": 8.994846220513872e-06, "loss": 0.6335, "step": 2707 }, { "epoch": 0.22934575481685368, "grad_norm": 1.2312251134203498, "learning_rate": 8.994021207518244e-06, "loss": 0.6379, "step": 2708 }, { "epoch": 0.22943044674994706, "grad_norm": 1.2886641214677566, "learning_rate": 8.993195893951371e-06, "loss": 0.6144, "step": 2709 }, { "epoch": 0.22951513868304044, "grad_norm": 1.2564554091360234, "learning_rate": 8.992370279875363e-06, "loss": 0.6107, "step": 2710 }, { "epoch": 0.22959983061613382, "grad_norm": 1.1699799384462917, "learning_rate": 8.99154436535235e-06, "loss": 0.6416, "step": 2711 }, { "epoch": 0.2296845225492272, "grad_norm": 1.122016216135589, "learning_rate": 8.990718150444486e-06, "loss": 0.6073, "step": 2712 }, { "epoch": 0.22976921448232057, "grad_norm": 1.3200207677633646, "learning_rate": 8.98989163521395e-06, "loss": 0.6699, "step": 2713 }, { "epoch": 0.22985390641541392, "grad_norm": 0.6606863196236834, "learning_rate": 8.989064819722941e-06, "loss": 0.8877, "step": 2714 }, { "epoch": 0.2299385983485073, "grad_norm": 1.176082975394489, "learning_rate": 8.988237704033682e-06, "loss": 0.6912, "step": 2715 }, { "epoch": 0.23002329028160068, "grad_norm": 1.2775565563876614, "learning_rate": 8.987410288208416e-06, "loss": 0.652, "step": 2716 }, { "epoch": 0.23010798221469406, "grad_norm": 1.3771257064931863, "learning_rate": 8.986582572309412e-06, "loss": 0.6356, "step": 2717 }, { "epoch": 0.23019267414778743, "grad_norm": 1.204653686980259, "learning_rate": 8.98575455639896e-06, "loss": 0.6577, "step": 2718 }, { "epoch": 0.23027736608088079, "grad_norm": 1.2191652256083017, "learning_rate": 8.984926240539372e-06, "loss": 0.578, "step": 2719 }, { "epoch": 0.23036205801397416, "grad_norm": 1.415227027089811, "learning_rate": 8.984097624792982e-06, "loss": 0.6554, "step": 2720 }, { "epoch": 0.23044674994706754, "grad_norm": 1.2231874758806098, "learning_rate": 8.98326870922215e-06, "loss": 0.698, "step": 2721 }, { "epoch": 0.23053144188016092, "grad_norm": 1.2285534170564443, "learning_rate": 8.982439493889254e-06, "loss": 0.6186, "step": 2722 }, { "epoch": 0.2306161338132543, "grad_norm": 1.4289162987435844, "learning_rate": 8.9816099788567e-06, "loss": 0.6554, "step": 2723 }, { "epoch": 0.23070082574634765, "grad_norm": 1.3223119411358015, "learning_rate": 8.98078016418691e-06, "loss": 0.6518, "step": 2724 }, { "epoch": 0.23078551767944103, "grad_norm": 1.289994974613884, "learning_rate": 8.979950049942333e-06, "loss": 0.6479, "step": 2725 }, { "epoch": 0.2308702096125344, "grad_norm": 1.3692282507594802, "learning_rate": 8.979119636185442e-06, "loss": 0.6303, "step": 2726 }, { "epoch": 0.23095490154562778, "grad_norm": 1.5313736690274953, "learning_rate": 8.978288922978727e-06, "loss": 0.7132, "step": 2727 }, { "epoch": 0.23103959347872116, "grad_norm": 2.9320496917485337, "learning_rate": 8.977457910384704e-06, "loss": 0.63, "step": 2728 }, { "epoch": 0.2311242854118145, "grad_norm": 1.3317838590822233, "learning_rate": 8.976626598465912e-06, "loss": 0.6633, "step": 2729 }, { "epoch": 0.2312089773449079, "grad_norm": 1.61534356186557, "learning_rate": 8.975794987284912e-06, "loss": 0.6144, "step": 2730 }, { "epoch": 0.23129366927800127, "grad_norm": 1.4404608727974848, "learning_rate": 8.974963076904285e-06, "loss": 0.6361, "step": 2731 }, { "epoch": 0.23137836121109465, "grad_norm": 1.5869383743377021, "learning_rate": 8.974130867386637e-06, "loss": 0.6184, "step": 2732 }, { "epoch": 0.23146305314418802, "grad_norm": 1.3602614337880825, "learning_rate": 8.9732983587946e-06, "loss": 0.6331, "step": 2733 }, { "epoch": 0.2315477450772814, "grad_norm": 3.571131265320885, "learning_rate": 8.97246555119082e-06, "loss": 0.6369, "step": 2734 }, { "epoch": 0.23163243701037475, "grad_norm": 1.3760490456079741, "learning_rate": 8.971632444637972e-06, "loss": 0.6198, "step": 2735 }, { "epoch": 0.23171712894346813, "grad_norm": 1.3256974348422297, "learning_rate": 8.970799039198753e-06, "loss": 0.6444, "step": 2736 }, { "epoch": 0.2318018208765615, "grad_norm": 0.5956484516696069, "learning_rate": 8.96996533493588e-06, "loss": 0.8464, "step": 2737 }, { "epoch": 0.2318865128096549, "grad_norm": 1.2117512421357635, "learning_rate": 8.969131331912091e-06, "loss": 0.6494, "step": 2738 }, { "epoch": 0.23197120474274827, "grad_norm": 1.9956126857700105, "learning_rate": 8.968297030190155e-06, "loss": 0.7007, "step": 2739 }, { "epoch": 0.23205589667584162, "grad_norm": 0.632566322151214, "learning_rate": 8.967462429832852e-06, "loss": 0.8772, "step": 2740 }, { "epoch": 0.232140588608935, "grad_norm": 1.3631021299845387, "learning_rate": 8.966627530902994e-06, "loss": 0.685, "step": 2741 }, { "epoch": 0.23222528054202837, "grad_norm": 1.272320275783675, "learning_rate": 8.96579233346341e-06, "loss": 0.7453, "step": 2742 }, { "epoch": 0.23230997247512175, "grad_norm": 1.5844001149652815, "learning_rate": 8.964956837576952e-06, "loss": 0.6317, "step": 2743 }, { "epoch": 0.23239466440821513, "grad_norm": 1.3931273661777437, "learning_rate": 8.964121043306498e-06, "loss": 0.6301, "step": 2744 }, { "epoch": 0.23247935634130848, "grad_norm": 1.3142951682764976, "learning_rate": 8.963284950714944e-06, "loss": 0.6611, "step": 2745 }, { "epoch": 0.23256404827440186, "grad_norm": 1.3734969937135941, "learning_rate": 8.962448559865209e-06, "loss": 0.6774, "step": 2746 }, { "epoch": 0.23264874020749524, "grad_norm": 1.3336129037962818, "learning_rate": 8.961611870820239e-06, "loss": 0.6044, "step": 2747 }, { "epoch": 0.23273343214058861, "grad_norm": 1.357593094650834, "learning_rate": 8.960774883642998e-06, "loss": 0.6639, "step": 2748 }, { "epoch": 0.232818124073682, "grad_norm": 1.5646588093358194, "learning_rate": 8.959937598396475e-06, "loss": 0.6502, "step": 2749 }, { "epoch": 0.23290281600677534, "grad_norm": 1.48735536591142, "learning_rate": 8.959100015143675e-06, "loss": 0.6547, "step": 2750 }, { "epoch": 0.23298750793986872, "grad_norm": 2.416066747680968, "learning_rate": 8.95826213394764e-06, "loss": 0.6429, "step": 2751 }, { "epoch": 0.2330721998729621, "grad_norm": 1.17318347075754, "learning_rate": 8.957423954871415e-06, "loss": 0.6546, "step": 2752 }, { "epoch": 0.23315689180605548, "grad_norm": 1.3108358039591577, "learning_rate": 8.956585477978084e-06, "loss": 0.6611, "step": 2753 }, { "epoch": 0.23324158373914886, "grad_norm": 1.5198053366849444, "learning_rate": 8.955746703330744e-06, "loss": 0.6925, "step": 2754 }, { "epoch": 0.2333262756722422, "grad_norm": 1.5418763642002962, "learning_rate": 8.954907630992517e-06, "loss": 0.6001, "step": 2755 }, { "epoch": 0.23341096760533558, "grad_norm": 1.48421941845434, "learning_rate": 8.954068261026549e-06, "loss": 0.6657, "step": 2756 }, { "epoch": 0.23349565953842896, "grad_norm": 1.1844607919732657, "learning_rate": 8.953228593496009e-06, "loss": 0.6342, "step": 2757 }, { "epoch": 0.23358035147152234, "grad_norm": 1.2143188269787824, "learning_rate": 8.95238862846408e-06, "loss": 0.6508, "step": 2758 }, { "epoch": 0.23366504340461572, "grad_norm": 1.3600710808409338, "learning_rate": 8.95154836599398e-06, "loss": 0.7228, "step": 2759 }, { "epoch": 0.2337497353377091, "grad_norm": 1.2354504993389455, "learning_rate": 8.950707806148942e-06, "loss": 0.6671, "step": 2760 }, { "epoch": 0.23383442727080245, "grad_norm": 1.806645061605876, "learning_rate": 8.94986694899222e-06, "loss": 0.603, "step": 2761 }, { "epoch": 0.23391911920389583, "grad_norm": 1.6637825653512592, "learning_rate": 8.949025794587095e-06, "loss": 0.6507, "step": 2762 }, { "epoch": 0.2340038111369892, "grad_norm": 1.3026329874355798, "learning_rate": 8.948184342996869e-06, "loss": 0.6321, "step": 2763 }, { "epoch": 0.23408850307008258, "grad_norm": 0.6232413929113002, "learning_rate": 8.947342594284863e-06, "loss": 0.8322, "step": 2764 }, { "epoch": 0.23417319500317596, "grad_norm": 1.2697964180092038, "learning_rate": 8.946500548514425e-06, "loss": 0.6649, "step": 2765 }, { "epoch": 0.2342578869362693, "grad_norm": 1.33470185931903, "learning_rate": 8.945658205748922e-06, "loss": 0.6158, "step": 2766 }, { "epoch": 0.2343425788693627, "grad_norm": 1.2507878887007613, "learning_rate": 8.944815566051748e-06, "loss": 0.6214, "step": 2767 }, { "epoch": 0.23442727080245607, "grad_norm": 1.5453742573114317, "learning_rate": 8.943972629486312e-06, "loss": 0.6338, "step": 2768 }, { "epoch": 0.23451196273554945, "grad_norm": 1.1420506896375462, "learning_rate": 8.943129396116052e-06, "loss": 0.6264, "step": 2769 }, { "epoch": 0.23459665466864282, "grad_norm": 1.2116936643053406, "learning_rate": 8.942285866004425e-06, "loss": 0.7062, "step": 2770 }, { "epoch": 0.23468134660173617, "grad_norm": 1.1969179053230166, "learning_rate": 8.941442039214912e-06, "loss": 0.6732, "step": 2771 }, { "epoch": 0.23476603853482955, "grad_norm": 1.6845061215621968, "learning_rate": 8.940597915811013e-06, "loss": 0.6375, "step": 2772 }, { "epoch": 0.23485073046792293, "grad_norm": 1.1891851459430336, "learning_rate": 8.939753495856255e-06, "loss": 0.6515, "step": 2773 }, { "epoch": 0.2349354224010163, "grad_norm": 1.508996468521421, "learning_rate": 8.938908779414185e-06, "loss": 0.6762, "step": 2774 }, { "epoch": 0.2350201143341097, "grad_norm": 11.541051236676413, "learning_rate": 8.938063766548372e-06, "loss": 0.6909, "step": 2775 }, { "epoch": 0.23510480626720304, "grad_norm": 1.5558253441260157, "learning_rate": 8.937218457322406e-06, "loss": 0.6807, "step": 2776 }, { "epoch": 0.23518949820029642, "grad_norm": 2.916263799259886, "learning_rate": 8.936372851799904e-06, "loss": 0.6136, "step": 2777 }, { "epoch": 0.2352741901333898, "grad_norm": 1.7177185795959637, "learning_rate": 8.9355269500445e-06, "loss": 0.6472, "step": 2778 }, { "epoch": 0.23535888206648317, "grad_norm": 0.658569943517912, "learning_rate": 8.934680752119856e-06, "loss": 0.8421, "step": 2779 }, { "epoch": 0.23544357399957655, "grad_norm": 0.6377221989221367, "learning_rate": 8.933834258089647e-06, "loss": 0.8529, "step": 2780 }, { "epoch": 0.2355282659326699, "grad_norm": 1.3687493223714406, "learning_rate": 8.932987468017579e-06, "loss": 0.6738, "step": 2781 }, { "epoch": 0.23561295786576328, "grad_norm": 0.6756987232669122, "learning_rate": 8.93214038196738e-06, "loss": 0.854, "step": 2782 }, { "epoch": 0.23569764979885666, "grad_norm": 1.2546405433655023, "learning_rate": 8.931293000002795e-06, "loss": 0.6658, "step": 2783 }, { "epoch": 0.23578234173195003, "grad_norm": 1.2292693810448896, "learning_rate": 8.930445322187596e-06, "loss": 0.6343, "step": 2784 }, { "epoch": 0.2358670336650434, "grad_norm": 1.3132336102940096, "learning_rate": 8.929597348585571e-06, "loss": 0.6114, "step": 2785 }, { "epoch": 0.2359517255981368, "grad_norm": 1.3327742484146563, "learning_rate": 8.92874907926054e-06, "loss": 0.6514, "step": 2786 }, { "epoch": 0.23603641753123014, "grad_norm": 1.5248572441628567, "learning_rate": 8.927900514276335e-06, "loss": 0.6644, "step": 2787 }, { "epoch": 0.23612110946432352, "grad_norm": 1.2784839208610665, "learning_rate": 8.927051653696817e-06, "loss": 0.6909, "step": 2788 }, { "epoch": 0.2362058013974169, "grad_norm": 1.5592367998713836, "learning_rate": 8.92620249758587e-06, "loss": 0.6269, "step": 2789 }, { "epoch": 0.23629049333051028, "grad_norm": 1.9542627186188777, "learning_rate": 8.925353046007391e-06, "loss": 0.636, "step": 2790 }, { "epoch": 0.23637518526360365, "grad_norm": 1.3486568597924908, "learning_rate": 8.924503299025313e-06, "loss": 0.6347, "step": 2791 }, { "epoch": 0.236459877196697, "grad_norm": 1.302722420170226, "learning_rate": 8.923653256703579e-06, "loss": 0.6797, "step": 2792 }, { "epoch": 0.23654456912979038, "grad_norm": 2.0577006356910537, "learning_rate": 8.92280291910616e-06, "loss": 0.6703, "step": 2793 }, { "epoch": 0.23662926106288376, "grad_norm": 0.6703484951442883, "learning_rate": 8.92195228629705e-06, "loss": 0.8407, "step": 2794 }, { "epoch": 0.23671395299597714, "grad_norm": 2.670980631364609, "learning_rate": 8.921101358340263e-06, "loss": 0.6017, "step": 2795 }, { "epoch": 0.23679864492907052, "grad_norm": 1.4049800787022382, "learning_rate": 8.920250135299835e-06, "loss": 0.6847, "step": 2796 }, { "epoch": 0.23688333686216387, "grad_norm": 1.4975903053687818, "learning_rate": 8.919398617239826e-06, "loss": 0.6842, "step": 2797 }, { "epoch": 0.23696802879525725, "grad_norm": 5.378529293864709, "learning_rate": 8.918546804224317e-06, "loss": 0.5932, "step": 2798 }, { "epoch": 0.23705272072835062, "grad_norm": 1.261349800712663, "learning_rate": 8.917694696317412e-06, "loss": 0.6681, "step": 2799 }, { "epoch": 0.237137412661444, "grad_norm": 0.6549371764062069, "learning_rate": 8.916842293583235e-06, "loss": 0.8816, "step": 2800 }, { "epoch": 0.23722210459453738, "grad_norm": 1.4540549313504274, "learning_rate": 8.915989596085934e-06, "loss": 0.7061, "step": 2801 }, { "epoch": 0.23730679652763073, "grad_norm": 1.402808631262981, "learning_rate": 8.91513660388968e-06, "loss": 0.6926, "step": 2802 }, { "epoch": 0.2373914884607241, "grad_norm": 1.4776796809869583, "learning_rate": 8.914283317058665e-06, "loss": 0.6229, "step": 2803 }, { "epoch": 0.2374761803938175, "grad_norm": 0.6198030145861757, "learning_rate": 8.913429735657104e-06, "loss": 0.8336, "step": 2804 }, { "epoch": 0.23756087232691087, "grad_norm": 1.1488554072495754, "learning_rate": 8.912575859749233e-06, "loss": 0.6877, "step": 2805 }, { "epoch": 0.23764556426000424, "grad_norm": 1.2038309636130875, "learning_rate": 8.911721689399308e-06, "loss": 0.599, "step": 2806 }, { "epoch": 0.2377302561930976, "grad_norm": 1.2453817538651495, "learning_rate": 8.910867224671616e-06, "loss": 0.69, "step": 2807 }, { "epoch": 0.23781494812619097, "grad_norm": 1.5211096831364177, "learning_rate": 8.910012465630453e-06, "loss": 0.6134, "step": 2808 }, { "epoch": 0.23789964005928435, "grad_norm": 1.3249334006754612, "learning_rate": 8.90915741234015e-06, "loss": 0.6331, "step": 2809 }, { "epoch": 0.23798433199237773, "grad_norm": 1.3387659731125452, "learning_rate": 8.90830206486505e-06, "loss": 0.6578, "step": 2810 }, { "epoch": 0.2380690239254711, "grad_norm": 0.5710521833258609, "learning_rate": 8.907446423269526e-06, "loss": 0.8284, "step": 2811 }, { "epoch": 0.23815371585856449, "grad_norm": 1.2932858397353493, "learning_rate": 8.906590487617965e-06, "loss": 0.6136, "step": 2812 }, { "epoch": 0.23823840779165784, "grad_norm": 1.8934586127423398, "learning_rate": 8.905734257974787e-06, "loss": 0.665, "step": 2813 }, { "epoch": 0.23832309972475121, "grad_norm": 1.2729601401054236, "learning_rate": 8.904877734404422e-06, "loss": 0.7128, "step": 2814 }, { "epoch": 0.2384077916578446, "grad_norm": 1.3345237991303094, "learning_rate": 8.90402091697133e-06, "loss": 0.7053, "step": 2815 }, { "epoch": 0.23849248359093797, "grad_norm": 1.7502438522878188, "learning_rate": 8.903163805739991e-06, "loss": 0.6434, "step": 2816 }, { "epoch": 0.23857717552403135, "grad_norm": 1.8967481811967553, "learning_rate": 8.902306400774909e-06, "loss": 0.723, "step": 2817 }, { "epoch": 0.2386618674571247, "grad_norm": 1.5074822880136192, "learning_rate": 8.901448702140605e-06, "loss": 0.6063, "step": 2818 }, { "epoch": 0.23874655939021808, "grad_norm": 1.108967736723843, "learning_rate": 8.900590709901628e-06, "loss": 0.633, "step": 2819 }, { "epoch": 0.23883125132331146, "grad_norm": 1.314640290302839, "learning_rate": 8.899732424122546e-06, "loss": 0.6162, "step": 2820 }, { "epoch": 0.23891594325640483, "grad_norm": 1.577838732263888, "learning_rate": 8.898873844867948e-06, "loss": 0.6441, "step": 2821 }, { "epoch": 0.2390006351894982, "grad_norm": 1.9517576528245004, "learning_rate": 8.898014972202448e-06, "loss": 0.5923, "step": 2822 }, { "epoch": 0.23908532712259156, "grad_norm": 1.2483036958280127, "learning_rate": 8.89715580619068e-06, "loss": 0.5817, "step": 2823 }, { "epoch": 0.23917001905568494, "grad_norm": 1.3772127496169375, "learning_rate": 8.896296346897303e-06, "loss": 0.678, "step": 2824 }, { "epoch": 0.23925471098877832, "grad_norm": 1.2275989975139836, "learning_rate": 8.895436594386992e-06, "loss": 0.6329, "step": 2825 }, { "epoch": 0.2393394029218717, "grad_norm": 1.3047532155609713, "learning_rate": 8.894576548724449e-06, "loss": 0.6357, "step": 2826 }, { "epoch": 0.23942409485496507, "grad_norm": 1.5090725937449216, "learning_rate": 8.8937162099744e-06, "loss": 0.6567, "step": 2827 }, { "epoch": 0.23950878678805843, "grad_norm": 1.6665600069158428, "learning_rate": 8.892855578201588e-06, "loss": 0.6793, "step": 2828 }, { "epoch": 0.2395934787211518, "grad_norm": 1.3840398196978905, "learning_rate": 8.89199465347078e-06, "loss": 0.6312, "step": 2829 }, { "epoch": 0.23967817065424518, "grad_norm": 1.2558114941476999, "learning_rate": 8.891133435846763e-06, "loss": 0.6488, "step": 2830 }, { "epoch": 0.23976286258733856, "grad_norm": 1.5479869182223525, "learning_rate": 8.890271925394353e-06, "loss": 0.6797, "step": 2831 }, { "epoch": 0.23984755452043194, "grad_norm": 1.1734584808535002, "learning_rate": 8.88941012217838e-06, "loss": 0.6321, "step": 2832 }, { "epoch": 0.2399322464535253, "grad_norm": 1.3225831266474741, "learning_rate": 8.8885480262637e-06, "loss": 0.6435, "step": 2833 }, { "epoch": 0.24001693838661867, "grad_norm": 1.4798399554369674, "learning_rate": 8.887685637715189e-06, "loss": 0.743, "step": 2834 }, { "epoch": 0.24010163031971204, "grad_norm": 1.4475132472247243, "learning_rate": 8.88682295659775e-06, "loss": 0.673, "step": 2835 }, { "epoch": 0.24018632225280542, "grad_norm": 0.6486605962659632, "learning_rate": 8.8859599829763e-06, "loss": 0.8709, "step": 2836 }, { "epoch": 0.2402710141858988, "grad_norm": 0.6394890154416725, "learning_rate": 8.885096716915783e-06, "loss": 0.8786, "step": 2837 }, { "epoch": 0.24035570611899218, "grad_norm": 1.4374139936442225, "learning_rate": 8.884233158481166e-06, "loss": 0.6852, "step": 2838 }, { "epoch": 0.24044039805208553, "grad_norm": 1.563996133274901, "learning_rate": 8.883369307737438e-06, "loss": 0.7267, "step": 2839 }, { "epoch": 0.2405250899851789, "grad_norm": 1.6738589136065052, "learning_rate": 8.882505164749604e-06, "loss": 0.6451, "step": 2840 }, { "epoch": 0.2406097819182723, "grad_norm": 1.0742276794024537, "learning_rate": 8.881640729582699e-06, "loss": 0.6636, "step": 2841 }, { "epoch": 0.24069447385136566, "grad_norm": 1.3385001718370408, "learning_rate": 8.880776002301774e-06, "loss": 0.624, "step": 2842 }, { "epoch": 0.24077916578445904, "grad_norm": 1.5587224258879182, "learning_rate": 8.879910982971905e-06, "loss": 0.6702, "step": 2843 }, { "epoch": 0.2408638577175524, "grad_norm": 0.6022419860819452, "learning_rate": 8.879045671658191e-06, "loss": 0.8656, "step": 2844 }, { "epoch": 0.24094854965064577, "grad_norm": 1.20948720119982, "learning_rate": 8.878180068425748e-06, "loss": 0.6666, "step": 2845 }, { "epoch": 0.24103324158373915, "grad_norm": 1.3461707944352683, "learning_rate": 8.87731417333972e-06, "loss": 0.7408, "step": 2846 }, { "epoch": 0.24111793351683253, "grad_norm": 1.1974098513249263, "learning_rate": 8.876447986465269e-06, "loss": 0.6733, "step": 2847 }, { "epoch": 0.2412026254499259, "grad_norm": 1.819506584425668, "learning_rate": 8.875581507867579e-06, "loss": 0.6965, "step": 2848 }, { "epoch": 0.24128731738301926, "grad_norm": 1.4926560187283293, "learning_rate": 8.874714737611859e-06, "loss": 0.7199, "step": 2849 }, { "epoch": 0.24137200931611263, "grad_norm": 1.6830244474832528, "learning_rate": 8.873847675763338e-06, "loss": 0.6975, "step": 2850 }, { "epoch": 0.241456701249206, "grad_norm": 1.126052109214739, "learning_rate": 8.872980322387265e-06, "loss": 0.6271, "step": 2851 }, { "epoch": 0.2415413931822994, "grad_norm": 1.3776794022540633, "learning_rate": 8.872112677548916e-06, "loss": 0.6475, "step": 2852 }, { "epoch": 0.24162608511539277, "grad_norm": 1.7689727806262796, "learning_rate": 8.871244741313582e-06, "loss": 0.6079, "step": 2853 }, { "epoch": 0.24171077704848612, "grad_norm": 1.1819241941231797, "learning_rate": 8.870376513746584e-06, "loss": 0.6398, "step": 2854 }, { "epoch": 0.2417954689815795, "grad_norm": 1.242208015724714, "learning_rate": 8.869507994913258e-06, "loss": 0.631, "step": 2855 }, { "epoch": 0.24188016091467288, "grad_norm": 1.1035433516906596, "learning_rate": 8.868639184878966e-06, "loss": 0.6822, "step": 2856 }, { "epoch": 0.24196485284776625, "grad_norm": 1.4281558234215583, "learning_rate": 8.867770083709088e-06, "loss": 0.6707, "step": 2857 }, { "epoch": 0.24204954478085963, "grad_norm": 1.4206592828885103, "learning_rate": 8.866900691469034e-06, "loss": 0.6651, "step": 2858 }, { "epoch": 0.24213423671395298, "grad_norm": 1.586346351036505, "learning_rate": 8.866031008224223e-06, "loss": 0.652, "step": 2859 }, { "epoch": 0.24221892864704636, "grad_norm": 1.4567471730063701, "learning_rate": 8.865161034040109e-06, "loss": 0.6025, "step": 2860 }, { "epoch": 0.24230362058013974, "grad_norm": 1.3056799553387244, "learning_rate": 8.86429076898216e-06, "loss": 0.6427, "step": 2861 }, { "epoch": 0.24238831251323312, "grad_norm": 0.5977876028392222, "learning_rate": 8.863420213115868e-06, "loss": 0.8192, "step": 2862 }, { "epoch": 0.2424730044463265, "grad_norm": 1.3101468599403123, "learning_rate": 8.862549366506748e-06, "loss": 0.6577, "step": 2863 }, { "epoch": 0.24255769637941987, "grad_norm": 1.310156319173552, "learning_rate": 8.861678229220334e-06, "loss": 0.7062, "step": 2864 }, { "epoch": 0.24264238831251322, "grad_norm": 1.3497033484082697, "learning_rate": 8.860806801322184e-06, "loss": 0.6851, "step": 2865 }, { "epoch": 0.2427270802456066, "grad_norm": 1.263908754257066, "learning_rate": 8.85993508287788e-06, "loss": 0.6941, "step": 2866 }, { "epoch": 0.24281177217869998, "grad_norm": 1.4051814239282479, "learning_rate": 8.85906307395302e-06, "loss": 0.7092, "step": 2867 }, { "epoch": 0.24289646411179336, "grad_norm": 1.3112592761811726, "learning_rate": 8.858190774613231e-06, "loss": 0.6178, "step": 2868 }, { "epoch": 0.24298115604488674, "grad_norm": 2.086268846574329, "learning_rate": 8.857318184924155e-06, "loss": 0.6207, "step": 2869 }, { "epoch": 0.2430658479779801, "grad_norm": 1.3311186562215072, "learning_rate": 8.85644530495146e-06, "loss": 0.5983, "step": 2870 }, { "epoch": 0.24315053991107347, "grad_norm": 1.6178226933131403, "learning_rate": 8.855572134760835e-06, "loss": 0.6277, "step": 2871 }, { "epoch": 0.24323523184416684, "grad_norm": 1.1403898212318169, "learning_rate": 8.85469867441799e-06, "loss": 0.6866, "step": 2872 }, { "epoch": 0.24331992377726022, "grad_norm": 1.4703839242242245, "learning_rate": 8.853824923988659e-06, "loss": 0.6484, "step": 2873 }, { "epoch": 0.2434046157103536, "grad_norm": 1.5303914871156918, "learning_rate": 8.852950883538596e-06, "loss": 0.6711, "step": 2874 }, { "epoch": 0.24348930764344695, "grad_norm": 1.4983350839046203, "learning_rate": 8.852076553133576e-06, "loss": 0.6554, "step": 2875 }, { "epoch": 0.24357399957654033, "grad_norm": 1.2936191297340942, "learning_rate": 8.851201932839398e-06, "loss": 0.6949, "step": 2876 }, { "epoch": 0.2436586915096337, "grad_norm": 1.6329692180086344, "learning_rate": 8.85032702272188e-06, "loss": 0.6275, "step": 2877 }, { "epoch": 0.24374338344272709, "grad_norm": 1.1563300936984031, "learning_rate": 8.849451822846867e-06, "loss": 0.6448, "step": 2878 }, { "epoch": 0.24382807537582046, "grad_norm": 1.4045208557503661, "learning_rate": 8.84857633328022e-06, "loss": 0.6125, "step": 2879 }, { "epoch": 0.2439127673089138, "grad_norm": 0.717124586959047, "learning_rate": 8.847700554087824e-06, "loss": 0.8964, "step": 2880 }, { "epoch": 0.2439974592420072, "grad_norm": 1.7316899417981448, "learning_rate": 8.846824485335588e-06, "loss": 0.6421, "step": 2881 }, { "epoch": 0.24408215117510057, "grad_norm": 1.48956398725525, "learning_rate": 8.845948127089439e-06, "loss": 0.6479, "step": 2882 }, { "epoch": 0.24416684310819395, "grad_norm": 1.304790489944966, "learning_rate": 8.845071479415329e-06, "loss": 0.6432, "step": 2883 }, { "epoch": 0.24425153504128733, "grad_norm": 1.4919098207782329, "learning_rate": 8.844194542379228e-06, "loss": 0.6425, "step": 2884 }, { "epoch": 0.24433622697438068, "grad_norm": 1.4617510346615825, "learning_rate": 8.84331731604713e-06, "loss": 0.6644, "step": 2885 }, { "epoch": 0.24442091890747406, "grad_norm": 1.2271244983117386, "learning_rate": 8.842439800485057e-06, "loss": 0.6037, "step": 2886 }, { "epoch": 0.24450561084056743, "grad_norm": 0.6147104261054085, "learning_rate": 8.84156199575904e-06, "loss": 0.8465, "step": 2887 }, { "epoch": 0.2445903027736608, "grad_norm": 1.4061414484309651, "learning_rate": 8.840683901935141e-06, "loss": 0.6883, "step": 2888 }, { "epoch": 0.2446749947067542, "grad_norm": 1.1361284837123677, "learning_rate": 8.83980551907944e-06, "loss": 0.6057, "step": 2889 }, { "epoch": 0.24475968663984757, "grad_norm": 1.11692440736696, "learning_rate": 8.838926847258042e-06, "loss": 0.627, "step": 2890 }, { "epoch": 0.24484437857294092, "grad_norm": 1.5496554278829642, "learning_rate": 8.83804788653707e-06, "loss": 0.6156, "step": 2891 }, { "epoch": 0.2449290705060343, "grad_norm": 3.293510794222861, "learning_rate": 8.837168636982673e-06, "loss": 0.6907, "step": 2892 }, { "epoch": 0.24501376243912767, "grad_norm": 0.7037042251942275, "learning_rate": 8.836289098661016e-06, "loss": 0.9115, "step": 2893 }, { "epoch": 0.24509845437222105, "grad_norm": 2.146674584786311, "learning_rate": 8.83540927163829e-06, "loss": 0.6341, "step": 2894 }, { "epoch": 0.24518314630531443, "grad_norm": 1.285403480864566, "learning_rate": 8.834529155980706e-06, "loss": 0.6329, "step": 2895 }, { "epoch": 0.24526783823840778, "grad_norm": 1.5610526168921068, "learning_rate": 8.8336487517545e-06, "loss": 0.6293, "step": 2896 }, { "epoch": 0.24535253017150116, "grad_norm": 1.5405993628972718, "learning_rate": 8.832768059025925e-06, "loss": 0.6983, "step": 2897 }, { "epoch": 0.24543722210459454, "grad_norm": 1.1082820019293556, "learning_rate": 8.83188707786126e-06, "loss": 0.6269, "step": 2898 }, { "epoch": 0.24552191403768792, "grad_norm": 1.6757800777228073, "learning_rate": 8.831005808326799e-06, "loss": 0.6912, "step": 2899 }, { "epoch": 0.2456066059707813, "grad_norm": 0.605417487258563, "learning_rate": 8.830124250488863e-06, "loss": 0.8407, "step": 2900 }, { "epoch": 0.24569129790387464, "grad_norm": 1.282376020391886, "learning_rate": 8.829242404413799e-06, "loss": 0.6016, "step": 2901 }, { "epoch": 0.24577598983696802, "grad_norm": 1.3846279607729353, "learning_rate": 8.828360270167964e-06, "loss": 0.695, "step": 2902 }, { "epoch": 0.2458606817700614, "grad_norm": 2.138107357100828, "learning_rate": 8.827477847817749e-06, "loss": 0.6491, "step": 2903 }, { "epoch": 0.24594537370315478, "grad_norm": 1.3066089337715758, "learning_rate": 8.826595137429558e-06, "loss": 0.6279, "step": 2904 }, { "epoch": 0.24603006563624816, "grad_norm": 1.6373468021532636, "learning_rate": 8.825712139069822e-06, "loss": 0.7022, "step": 2905 }, { "epoch": 0.2461147575693415, "grad_norm": 0.6894961438590403, "learning_rate": 8.824828852804988e-06, "loss": 0.8718, "step": 2906 }, { "epoch": 0.2461994495024349, "grad_norm": 1.2413589859751055, "learning_rate": 8.823945278701528e-06, "loss": 0.7121, "step": 2907 }, { "epoch": 0.24628414143552826, "grad_norm": 1.5086028081413718, "learning_rate": 8.823061416825937e-06, "loss": 0.6769, "step": 2908 }, { "epoch": 0.24636883336862164, "grad_norm": 3.211054225184207, "learning_rate": 8.822177267244733e-06, "loss": 0.6424, "step": 2909 }, { "epoch": 0.24645352530171502, "grad_norm": 1.33047001676566, "learning_rate": 8.821292830024449e-06, "loss": 0.6661, "step": 2910 }, { "epoch": 0.24653821723480837, "grad_norm": 2.046287920471672, "learning_rate": 8.820408105231644e-06, "loss": 0.5655, "step": 2911 }, { "epoch": 0.24662290916790175, "grad_norm": 1.2683508293776142, "learning_rate": 8.8195230929329e-06, "loss": 0.6015, "step": 2912 }, { "epoch": 0.24670760110099513, "grad_norm": 1.2533323317826133, "learning_rate": 8.818637793194817e-06, "loss": 0.5842, "step": 2913 }, { "epoch": 0.2467922930340885, "grad_norm": 1.7067347972657734, "learning_rate": 8.81775220608402e-06, "loss": 0.6519, "step": 2914 }, { "epoch": 0.24687698496718188, "grad_norm": 5.033699991335159, "learning_rate": 8.816866331667154e-06, "loss": 0.7087, "step": 2915 }, { "epoch": 0.24696167690027526, "grad_norm": 2.3718256065173615, "learning_rate": 8.815980170010883e-06, "loss": 0.6842, "step": 2916 }, { "epoch": 0.2470463688333686, "grad_norm": 1.767420587056851, "learning_rate": 8.8150937211819e-06, "loss": 0.7009, "step": 2917 }, { "epoch": 0.247131060766462, "grad_norm": 0.6717328285516045, "learning_rate": 8.81420698524691e-06, "loss": 0.8841, "step": 2918 }, { "epoch": 0.24721575269955537, "grad_norm": 0.7318794222668071, "learning_rate": 8.81331996227265e-06, "loss": 0.8542, "step": 2919 }, { "epoch": 0.24730044463264875, "grad_norm": 1.3838553292006877, "learning_rate": 8.812432652325871e-06, "loss": 0.6791, "step": 2920 }, { "epoch": 0.24738513656574213, "grad_norm": 1.9630522168157218, "learning_rate": 8.811545055473345e-06, "loss": 0.7028, "step": 2921 }, { "epoch": 0.24746982849883548, "grad_norm": 1.6577688590921082, "learning_rate": 8.81065717178187e-06, "loss": 0.6621, "step": 2922 }, { "epoch": 0.24755452043192885, "grad_norm": 1.3037631839258015, "learning_rate": 8.809769001318266e-06, "loss": 0.6991, "step": 2923 }, { "epoch": 0.24763921236502223, "grad_norm": 1.2237434192367882, "learning_rate": 8.808880544149371e-06, "loss": 0.6269, "step": 2924 }, { "epoch": 0.2477239042981156, "grad_norm": 1.2017946077607966, "learning_rate": 8.807991800342046e-06, "loss": 0.6231, "step": 2925 }, { "epoch": 0.247808596231209, "grad_norm": 1.0654303409519124, "learning_rate": 8.807102769963172e-06, "loss": 0.6507, "step": 2926 }, { "epoch": 0.24789328816430234, "grad_norm": 1.3050023739988166, "learning_rate": 8.806213453079658e-06, "loss": 0.6163, "step": 2927 }, { "epoch": 0.24797798009739572, "grad_norm": 2.101565540215523, "learning_rate": 8.805323849758425e-06, "loss": 0.6058, "step": 2928 }, { "epoch": 0.2480626720304891, "grad_norm": 1.2754665392907154, "learning_rate": 8.804433960066421e-06, "loss": 0.7045, "step": 2929 }, { "epoch": 0.24814736396358247, "grad_norm": 1.2332704773873355, "learning_rate": 8.803543784070619e-06, "loss": 0.6351, "step": 2930 }, { "epoch": 0.24823205589667585, "grad_norm": 1.6185329607106114, "learning_rate": 8.802653321838004e-06, "loss": 0.692, "step": 2931 }, { "epoch": 0.2483167478297692, "grad_norm": 1.3964906910096153, "learning_rate": 8.80176257343559e-06, "loss": 0.6528, "step": 2932 }, { "epoch": 0.24840143976286258, "grad_norm": 1.876603103456322, "learning_rate": 8.800871538930413e-06, "loss": 0.6464, "step": 2933 }, { "epoch": 0.24848613169595596, "grad_norm": 1.2298894945536916, "learning_rate": 8.799980218389526e-06, "loss": 0.6275, "step": 2934 }, { "epoch": 0.24857082362904934, "grad_norm": 1.7682324148982804, "learning_rate": 8.799088611880005e-06, "loss": 0.6052, "step": 2935 }, { "epoch": 0.24865551556214271, "grad_norm": 1.292781625583126, "learning_rate": 8.79819671946895e-06, "loss": 0.6377, "step": 2936 }, { "epoch": 0.24874020749523607, "grad_norm": 1.692536180262213, "learning_rate": 8.797304541223478e-06, "loss": 0.717, "step": 2937 }, { "epoch": 0.24882489942832944, "grad_norm": 1.308844020160638, "learning_rate": 8.796412077210731e-06, "loss": 0.6631, "step": 2938 }, { "epoch": 0.24890959136142282, "grad_norm": 2.485765261859863, "learning_rate": 8.795519327497875e-06, "loss": 0.6478, "step": 2939 }, { "epoch": 0.2489942832945162, "grad_norm": 1.3163004931560252, "learning_rate": 8.794626292152089e-06, "loss": 0.6439, "step": 2940 }, { "epoch": 0.24907897522760958, "grad_norm": 1.2488674962125725, "learning_rate": 8.793732971240582e-06, "loss": 0.6121, "step": 2941 }, { "epoch": 0.24916366716070296, "grad_norm": 1.4526339698162436, "learning_rate": 8.79283936483058e-06, "loss": 0.6403, "step": 2942 }, { "epoch": 0.2492483590937963, "grad_norm": 1.8015658922345206, "learning_rate": 8.791945472989331e-06, "loss": 0.6777, "step": 2943 }, { "epoch": 0.24933305102688968, "grad_norm": 2.832222847849455, "learning_rate": 8.791051295784107e-06, "loss": 0.6789, "step": 2944 }, { "epoch": 0.24941774295998306, "grad_norm": 1.614349444828923, "learning_rate": 8.790156833282198e-06, "loss": 0.6368, "step": 2945 }, { "epoch": 0.24950243489307644, "grad_norm": 1.1216199151615296, "learning_rate": 8.789262085550917e-06, "loss": 0.7152, "step": 2946 }, { "epoch": 0.24958712682616982, "grad_norm": 1.4215205605548955, "learning_rate": 8.7883670526576e-06, "loss": 0.7125, "step": 2947 }, { "epoch": 0.24967181875926317, "grad_norm": 1.097967400602974, "learning_rate": 8.787471734669601e-06, "loss": 0.6155, "step": 2948 }, { "epoch": 0.24975651069235655, "grad_norm": 1.3588515340453453, "learning_rate": 8.786576131654298e-06, "loss": 0.6446, "step": 2949 }, { "epoch": 0.24984120262544993, "grad_norm": 1.2972230968433822, "learning_rate": 8.785680243679092e-06, "loss": 0.6291, "step": 2950 }, { "epoch": 0.2499258945585433, "grad_norm": 1.6807877670456137, "learning_rate": 8.784784070811401e-06, "loss": 0.6201, "step": 2951 }, { "epoch": 0.25001058649163665, "grad_norm": 2.7341977689188037, "learning_rate": 8.783887613118667e-06, "loss": 0.6738, "step": 2952 }, { "epoch": 0.25009527842473006, "grad_norm": 2.3064979712963902, "learning_rate": 8.782990870668353e-06, "loss": 0.7014, "step": 2953 }, { "epoch": 0.2501799703578234, "grad_norm": 1.3574847308556974, "learning_rate": 8.782093843527943e-06, "loss": 0.7099, "step": 2954 }, { "epoch": 0.2502646622909168, "grad_norm": 1.1275793564457117, "learning_rate": 8.781196531764945e-06, "loss": 0.6546, "step": 2955 }, { "epoch": 0.25034935422401017, "grad_norm": 1.429194089788605, "learning_rate": 8.780298935446887e-06, "loss": 0.6626, "step": 2956 }, { "epoch": 0.2504340461571035, "grad_norm": 1.1616387133469173, "learning_rate": 8.779401054641315e-06, "loss": 0.6136, "step": 2957 }, { "epoch": 0.2505187380901969, "grad_norm": 1.3633251811709939, "learning_rate": 8.778502889415802e-06, "loss": 0.6442, "step": 2958 }, { "epoch": 0.2506034300232903, "grad_norm": 1.6083078868643739, "learning_rate": 8.777604439837938e-06, "loss": 0.6477, "step": 2959 }, { "epoch": 0.2506881219563837, "grad_norm": 5.4962061377984925, "learning_rate": 8.776705705975336e-06, "loss": 0.6875, "step": 2960 }, { "epoch": 0.25077281388947703, "grad_norm": 4.250470336081772, "learning_rate": 8.775806687895632e-06, "loss": 0.651, "step": 2961 }, { "epoch": 0.2508575058225704, "grad_norm": 0.7054705293977249, "learning_rate": 8.77490738566648e-06, "loss": 0.9253, "step": 2962 }, { "epoch": 0.2509421977556638, "grad_norm": 1.3105022371785409, "learning_rate": 8.774007799355557e-06, "loss": 0.6722, "step": 2963 }, { "epoch": 0.25102688968875714, "grad_norm": 2.1158457430461746, "learning_rate": 8.773107929030565e-06, "loss": 0.6189, "step": 2964 }, { "epoch": 0.25111158162185054, "grad_norm": 1.1479286434973177, "learning_rate": 8.77220777475922e-06, "loss": 0.6734, "step": 2965 }, { "epoch": 0.2511962735549439, "grad_norm": 1.091623058184925, "learning_rate": 8.771307336609268e-06, "loss": 0.6015, "step": 2966 }, { "epoch": 0.25128096548803724, "grad_norm": 2.598584089468406, "learning_rate": 8.770406614648465e-06, "loss": 0.6534, "step": 2967 }, { "epoch": 0.25136565742113065, "grad_norm": 1.7634073934361587, "learning_rate": 8.769505608944601e-06, "loss": 0.6614, "step": 2968 }, { "epoch": 0.251450349354224, "grad_norm": 1.1891135218264892, "learning_rate": 8.76860431956548e-06, "loss": 0.6129, "step": 2969 }, { "epoch": 0.2515350412873174, "grad_norm": 1.2486285199445084, "learning_rate": 8.767702746578927e-06, "loss": 0.6328, "step": 2970 }, { "epoch": 0.25161973322041076, "grad_norm": 0.6955717383657848, "learning_rate": 8.766800890052794e-06, "loss": 0.8621, "step": 2971 }, { "epoch": 0.2517044251535041, "grad_norm": 1.7050349556188482, "learning_rate": 8.765898750054944e-06, "loss": 0.6413, "step": 2972 }, { "epoch": 0.2517891170865975, "grad_norm": 1.5284694019861012, "learning_rate": 8.764996326653275e-06, "loss": 0.632, "step": 2973 }, { "epoch": 0.25187380901969086, "grad_norm": 1.2103598919189127, "learning_rate": 8.764093619915695e-06, "loss": 0.6234, "step": 2974 }, { "epoch": 0.25195850095278427, "grad_norm": 1.4368199625377092, "learning_rate": 8.763190629910136e-06, "loss": 0.6944, "step": 2975 }, { "epoch": 0.2520431928858776, "grad_norm": 1.3621523950432668, "learning_rate": 8.762287356704558e-06, "loss": 0.7131, "step": 2976 }, { "epoch": 0.25212788481897097, "grad_norm": 1.8025955589376454, "learning_rate": 8.761383800366931e-06, "loss": 0.6455, "step": 2977 }, { "epoch": 0.2522125767520644, "grad_norm": 1.2464493418557723, "learning_rate": 8.76047996096526e-06, "loss": 0.612, "step": 2978 }, { "epoch": 0.2522972686851577, "grad_norm": 1.769000363028518, "learning_rate": 8.759575838567556e-06, "loss": 0.6059, "step": 2979 }, { "epoch": 0.25238196061825113, "grad_norm": 1.5313658726759878, "learning_rate": 8.758671433241864e-06, "loss": 0.5736, "step": 2980 }, { "epoch": 0.2524666525513445, "grad_norm": 2.0298766854848305, "learning_rate": 8.757766745056242e-06, "loss": 0.6826, "step": 2981 }, { "epoch": 0.25255134448443783, "grad_norm": 1.3414961362713125, "learning_rate": 8.756861774078773e-06, "loss": 0.637, "step": 2982 }, { "epoch": 0.25263603641753124, "grad_norm": 1.3255203933275619, "learning_rate": 8.755956520377564e-06, "loss": 0.6801, "step": 2983 }, { "epoch": 0.2527207283506246, "grad_norm": 1.2980053877697362, "learning_rate": 8.755050984020738e-06, "loss": 0.6674, "step": 2984 }, { "epoch": 0.252805420283718, "grad_norm": 1.5278522743362326, "learning_rate": 8.754145165076441e-06, "loss": 0.6157, "step": 2985 }, { "epoch": 0.25289011221681135, "grad_norm": 1.300519004928364, "learning_rate": 8.753239063612841e-06, "loss": 0.6821, "step": 2986 }, { "epoch": 0.2529748041499047, "grad_norm": 1.3384772976319406, "learning_rate": 8.752332679698128e-06, "loss": 0.6379, "step": 2987 }, { "epoch": 0.2530594960829981, "grad_norm": 1.0963870679693482, "learning_rate": 8.75142601340051e-06, "loss": 0.6512, "step": 2988 }, { "epoch": 0.25314418801609145, "grad_norm": 1.4952751946749598, "learning_rate": 8.750519064788221e-06, "loss": 0.6557, "step": 2989 }, { "epoch": 0.25322887994918486, "grad_norm": 1.2206809417318303, "learning_rate": 8.749611833929512e-06, "loss": 0.6163, "step": 2990 }, { "epoch": 0.2533135718822782, "grad_norm": 1.538055290189313, "learning_rate": 8.748704320892658e-06, "loss": 0.666, "step": 2991 }, { "epoch": 0.25339826381537156, "grad_norm": 1.9099039690645332, "learning_rate": 8.747796525745953e-06, "loss": 0.6788, "step": 2992 }, { "epoch": 0.25348295574846497, "grad_norm": 1.756424404981971, "learning_rate": 8.746888448557713e-06, "loss": 0.6553, "step": 2993 }, { "epoch": 0.2535676476815583, "grad_norm": 1.3812124377066093, "learning_rate": 8.745980089396278e-06, "loss": 0.7248, "step": 2994 }, { "epoch": 0.2536523396146517, "grad_norm": 0.7237219761988573, "learning_rate": 8.745071448330005e-06, "loss": 0.8249, "step": 2995 }, { "epoch": 0.2537370315477451, "grad_norm": 1.1858220453578077, "learning_rate": 8.744162525427273e-06, "loss": 0.5617, "step": 2996 }, { "epoch": 0.2538217234808384, "grad_norm": 0.7168537652032673, "learning_rate": 8.743253320756486e-06, "loss": 0.8485, "step": 2997 }, { "epoch": 0.25390641541393183, "grad_norm": 1.7474358920403665, "learning_rate": 8.742343834386066e-06, "loss": 0.6938, "step": 2998 }, { "epoch": 0.2539911073470252, "grad_norm": 1.8852862441554326, "learning_rate": 8.741434066384453e-06, "loss": 0.6785, "step": 2999 }, { "epoch": 0.2540757992801186, "grad_norm": 0.5925951709705926, "learning_rate": 8.740524016820117e-06, "loss": 0.8627, "step": 3000 }, { "epoch": 0.25416049121321194, "grad_norm": 1.3484057410889274, "learning_rate": 8.73961368576154e-06, "loss": 0.694, "step": 3001 }, { "epoch": 0.2542451831463053, "grad_norm": 1.3609307538046092, "learning_rate": 8.738703073277233e-06, "loss": 0.6236, "step": 3002 }, { "epoch": 0.2543298750793987, "grad_norm": 1.402096383408705, "learning_rate": 8.737792179435721e-06, "loss": 0.6704, "step": 3003 }, { "epoch": 0.25441456701249204, "grad_norm": 1.5619848277870814, "learning_rate": 8.736881004305557e-06, "loss": 0.7101, "step": 3004 }, { "epoch": 0.25449925894558545, "grad_norm": 1.9688554301952075, "learning_rate": 8.735969547955308e-06, "loss": 0.6159, "step": 3005 }, { "epoch": 0.2545839508786788, "grad_norm": 1.6091300893209683, "learning_rate": 8.735057810453568e-06, "loss": 0.6597, "step": 3006 }, { "epoch": 0.2546686428117722, "grad_norm": 1.6440416151646042, "learning_rate": 8.734145791868949e-06, "loss": 0.6697, "step": 3007 }, { "epoch": 0.25475333474486556, "grad_norm": 1.2566512055115304, "learning_rate": 8.733233492270085e-06, "loss": 0.6663, "step": 3008 }, { "epoch": 0.2548380266779589, "grad_norm": 1.6982486859105266, "learning_rate": 8.732320911725635e-06, "loss": 0.6403, "step": 3009 }, { "epoch": 0.2549227186110523, "grad_norm": 1.2277226836769772, "learning_rate": 8.731408050304271e-06, "loss": 0.6205, "step": 3010 }, { "epoch": 0.25500741054414566, "grad_norm": 1.1656053085436522, "learning_rate": 8.730494908074694e-06, "loss": 0.6493, "step": 3011 }, { "epoch": 0.25509210247723907, "grad_norm": 1.6069117104737844, "learning_rate": 8.72958148510562e-06, "loss": 0.6101, "step": 3012 }, { "epoch": 0.2551767944103324, "grad_norm": 1.2235479450416642, "learning_rate": 8.72866778146579e-06, "loss": 0.6693, "step": 3013 }, { "epoch": 0.25526148634342577, "grad_norm": 1.5242208888992241, "learning_rate": 8.727753797223965e-06, "loss": 0.6743, "step": 3014 }, { "epoch": 0.2553461782765192, "grad_norm": 1.340629175942756, "learning_rate": 8.72683953244893e-06, "loss": 0.6578, "step": 3015 }, { "epoch": 0.2554308702096125, "grad_norm": 0.6668675016917164, "learning_rate": 8.725924987209481e-06, "loss": 0.8619, "step": 3016 }, { "epoch": 0.25551556214270593, "grad_norm": 1.4023581287374423, "learning_rate": 8.725010161574451e-06, "loss": 0.6433, "step": 3017 }, { "epoch": 0.2556002540757993, "grad_norm": 1.1646789793799, "learning_rate": 8.72409505561268e-06, "loss": 0.6706, "step": 3018 }, { "epoch": 0.25568494600889263, "grad_norm": 1.480025941164501, "learning_rate": 8.723179669393036e-06, "loss": 0.6503, "step": 3019 }, { "epoch": 0.25576963794198604, "grad_norm": 1.372627867839497, "learning_rate": 8.722264002984406e-06, "loss": 0.6523, "step": 3020 }, { "epoch": 0.2558543298750794, "grad_norm": 1.3180857784273323, "learning_rate": 8.7213480564557e-06, "loss": 0.6767, "step": 3021 }, { "epoch": 0.2559390218081728, "grad_norm": 1.2690121741721478, "learning_rate": 8.720431829875848e-06, "loss": 0.678, "step": 3022 }, { "epoch": 0.25602371374126615, "grad_norm": 3.895222512629262, "learning_rate": 8.7195153233138e-06, "loss": 0.649, "step": 3023 }, { "epoch": 0.2561084056743595, "grad_norm": 1.807642705377296, "learning_rate": 8.71859853683853e-06, "loss": 0.6753, "step": 3024 }, { "epoch": 0.2561930976074529, "grad_norm": 1.1739591232139117, "learning_rate": 8.717681470519026e-06, "loss": 0.5612, "step": 3025 }, { "epoch": 0.25627778954054625, "grad_norm": 1.4654715724362013, "learning_rate": 8.716764124424306e-06, "loss": 0.6801, "step": 3026 }, { "epoch": 0.25636248147363966, "grad_norm": 1.679145099585889, "learning_rate": 8.715846498623405e-06, "loss": 0.6822, "step": 3027 }, { "epoch": 0.256447173406733, "grad_norm": 1.683408139392478, "learning_rate": 8.714928593185377e-06, "loss": 0.654, "step": 3028 }, { "epoch": 0.25653186533982636, "grad_norm": 1.5382305252380652, "learning_rate": 8.714010408179303e-06, "loss": 0.6355, "step": 3029 }, { "epoch": 0.25661655727291977, "grad_norm": 1.1595628792491437, "learning_rate": 8.713091943674279e-06, "loss": 0.5982, "step": 3030 }, { "epoch": 0.2567012492060131, "grad_norm": 0.6084632169811302, "learning_rate": 8.712173199739424e-06, "loss": 0.8429, "step": 3031 }, { "epoch": 0.2567859411391065, "grad_norm": 1.4744679319009273, "learning_rate": 8.711254176443878e-06, "loss": 0.6385, "step": 3032 }, { "epoch": 0.25687063307219987, "grad_norm": 1.3804204357837015, "learning_rate": 8.710334873856805e-06, "loss": 0.7492, "step": 3033 }, { "epoch": 0.2569553250052932, "grad_norm": 1.1580979114771095, "learning_rate": 8.709415292047385e-06, "loss": 0.655, "step": 3034 }, { "epoch": 0.25704001693838663, "grad_norm": 1.332283841749305, "learning_rate": 8.70849543108482e-06, "loss": 0.6701, "step": 3035 }, { "epoch": 0.25712470887148, "grad_norm": 1.1553819587817895, "learning_rate": 8.70757529103834e-06, "loss": 0.6521, "step": 3036 }, { "epoch": 0.2572094008045734, "grad_norm": 0.6500073060170789, "learning_rate": 8.706654871977183e-06, "loss": 0.8491, "step": 3037 }, { "epoch": 0.25729409273766674, "grad_norm": 1.7483228945802143, "learning_rate": 8.705734173970622e-06, "loss": 0.6672, "step": 3038 }, { "epoch": 0.2573787846707601, "grad_norm": 1.36450530883358, "learning_rate": 8.70481319708794e-06, "loss": 0.6703, "step": 3039 }, { "epoch": 0.2574634766038535, "grad_norm": 1.360108669040903, "learning_rate": 8.703891941398449e-06, "loss": 0.6854, "step": 3040 }, { "epoch": 0.25754816853694684, "grad_norm": 1.1946745680140665, "learning_rate": 8.702970406971473e-06, "loss": 0.6707, "step": 3041 }, { "epoch": 0.25763286047004025, "grad_norm": 1.1980423832860243, "learning_rate": 8.702048593876369e-06, "loss": 0.6698, "step": 3042 }, { "epoch": 0.2577175524031336, "grad_norm": 1.8036506863470654, "learning_rate": 8.701126502182504e-06, "loss": 0.5977, "step": 3043 }, { "epoch": 0.25780224433622695, "grad_norm": 1.2519493182357089, "learning_rate": 8.70020413195927e-06, "loss": 0.6611, "step": 3044 }, { "epoch": 0.25788693626932035, "grad_norm": 1.0684288199520617, "learning_rate": 8.699281483276082e-06, "loss": 0.619, "step": 3045 }, { "epoch": 0.2579716282024137, "grad_norm": 1.879838405114041, "learning_rate": 8.698358556202375e-06, "loss": 0.6451, "step": 3046 }, { "epoch": 0.2580563201355071, "grad_norm": 1.4147039965778303, "learning_rate": 8.697435350807604e-06, "loss": 0.6238, "step": 3047 }, { "epoch": 0.25814101206860046, "grad_norm": 1.3459492325606228, "learning_rate": 8.69651186716124e-06, "loss": 0.5947, "step": 3048 }, { "epoch": 0.2582257040016938, "grad_norm": 0.6525640799300789, "learning_rate": 8.695588105332788e-06, "loss": 0.8635, "step": 3049 }, { "epoch": 0.2583103959347872, "grad_norm": 1.2954205714233318, "learning_rate": 8.69466406539176e-06, "loss": 0.638, "step": 3050 }, { "epoch": 0.25839508786788057, "grad_norm": 1.3777954630196527, "learning_rate": 8.693739747407696e-06, "loss": 0.6546, "step": 3051 }, { "epoch": 0.258479779800974, "grad_norm": 1.2056045107113318, "learning_rate": 8.692815151450159e-06, "loss": 0.639, "step": 3052 }, { "epoch": 0.2585644717340673, "grad_norm": 1.482555622339101, "learning_rate": 8.691890277588726e-06, "loss": 0.679, "step": 3053 }, { "epoch": 0.2586491636671607, "grad_norm": 1.2853487139214925, "learning_rate": 8.690965125893e-06, "loss": 0.6999, "step": 3054 }, { "epoch": 0.2587338556002541, "grad_norm": 1.2734932985867728, "learning_rate": 8.690039696432607e-06, "loss": 0.6648, "step": 3055 }, { "epoch": 0.25881854753334743, "grad_norm": 1.3295167563338313, "learning_rate": 8.689113989277182e-06, "loss": 0.6902, "step": 3056 }, { "epoch": 0.25890323946644084, "grad_norm": 1.336567102858546, "learning_rate": 8.688188004496398e-06, "loss": 0.6207, "step": 3057 }, { "epoch": 0.2589879313995342, "grad_norm": 2.639829091144171, "learning_rate": 8.687261742159935e-06, "loss": 0.6657, "step": 3058 }, { "epoch": 0.2590726233326276, "grad_norm": 1.3420925636637973, "learning_rate": 8.686335202337502e-06, "loss": 0.6806, "step": 3059 }, { "epoch": 0.25915731526572094, "grad_norm": 1.0987910506812129, "learning_rate": 8.685408385098825e-06, "loss": 0.6325, "step": 3060 }, { "epoch": 0.2592420071988143, "grad_norm": 1.4785844670671264, "learning_rate": 8.68448129051365e-06, "loss": 0.7003, "step": 3061 }, { "epoch": 0.2593266991319077, "grad_norm": 1.5192177885207447, "learning_rate": 8.68355391865175e-06, "loss": 0.6552, "step": 3062 }, { "epoch": 0.25941139106500105, "grad_norm": 1.5368790165404032, "learning_rate": 8.682626269582913e-06, "loss": 0.6728, "step": 3063 }, { "epoch": 0.25949608299809446, "grad_norm": 1.3207168473987985, "learning_rate": 8.681698343376946e-06, "loss": 0.6714, "step": 3064 }, { "epoch": 0.2595807749311878, "grad_norm": 1.592693397967201, "learning_rate": 8.680770140103684e-06, "loss": 0.6777, "step": 3065 }, { "epoch": 0.25966546686428116, "grad_norm": 0.6275798138774736, "learning_rate": 8.679841659832979e-06, "loss": 0.8409, "step": 3066 }, { "epoch": 0.25975015879737456, "grad_norm": 1.258567863117902, "learning_rate": 8.678912902634703e-06, "loss": 0.7217, "step": 3067 }, { "epoch": 0.2598348507304679, "grad_norm": 1.2617074133140207, "learning_rate": 8.67798386857875e-06, "loss": 0.6679, "step": 3068 }, { "epoch": 0.2599195426635613, "grad_norm": 1.201636981675252, "learning_rate": 8.677054557735035e-06, "loss": 0.6188, "step": 3069 }, { "epoch": 0.26000423459665467, "grad_norm": 1.6497991954420106, "learning_rate": 8.676124970173495e-06, "loss": 0.6233, "step": 3070 }, { "epoch": 0.260088926529748, "grad_norm": 1.4228287319174275, "learning_rate": 8.675195105964083e-06, "loss": 0.6801, "step": 3071 }, { "epoch": 0.2601736184628414, "grad_norm": 1.7202547023167478, "learning_rate": 8.67426496517678e-06, "loss": 0.6774, "step": 3072 }, { "epoch": 0.2602583103959348, "grad_norm": 2.2634367220091653, "learning_rate": 8.673334547881581e-06, "loss": 0.6206, "step": 3073 }, { "epoch": 0.2603430023290282, "grad_norm": 0.6935311558399877, "learning_rate": 8.672403854148509e-06, "loss": 0.8156, "step": 3074 }, { "epoch": 0.26042769426212153, "grad_norm": 1.2698458935992702, "learning_rate": 8.671472884047596e-06, "loss": 0.6401, "step": 3075 }, { "epoch": 0.2605123861952149, "grad_norm": 1.611395945070009, "learning_rate": 8.67054163764891e-06, "loss": 0.6422, "step": 3076 }, { "epoch": 0.2605970781283083, "grad_norm": 1.3029438136838045, "learning_rate": 8.669610115022529e-06, "loss": 0.6493, "step": 3077 }, { "epoch": 0.26068177006140164, "grad_norm": 1.253861189924153, "learning_rate": 8.668678316238555e-06, "loss": 0.6528, "step": 3078 }, { "epoch": 0.26076646199449505, "grad_norm": 1.6703912379317811, "learning_rate": 8.66774624136711e-06, "loss": 0.6769, "step": 3079 }, { "epoch": 0.2608511539275884, "grad_norm": 1.6435935362362772, "learning_rate": 8.66681389047834e-06, "loss": 0.6717, "step": 3080 }, { "epoch": 0.26093584586068175, "grad_norm": 1.5712498181071264, "learning_rate": 8.665881263642409e-06, "loss": 0.6316, "step": 3081 }, { "epoch": 0.26102053779377515, "grad_norm": 1.2907785244907193, "learning_rate": 8.664948360929499e-06, "loss": 0.6639, "step": 3082 }, { "epoch": 0.2611052297268685, "grad_norm": 0.6568528392186175, "learning_rate": 8.664015182409819e-06, "loss": 0.8291, "step": 3083 }, { "epoch": 0.2611899216599619, "grad_norm": 1.3371516167683246, "learning_rate": 8.663081728153594e-06, "loss": 0.6854, "step": 3084 }, { "epoch": 0.26127461359305526, "grad_norm": 1.4992848697782397, "learning_rate": 8.662147998231073e-06, "loss": 0.5872, "step": 3085 }, { "epoch": 0.2613593055261486, "grad_norm": 1.1187357922980077, "learning_rate": 8.661213992712523e-06, "loss": 0.6671, "step": 3086 }, { "epoch": 0.261443997459242, "grad_norm": 3.0204927434668325, "learning_rate": 8.660279711668232e-06, "loss": 0.6158, "step": 3087 }, { "epoch": 0.26152868939233537, "grad_norm": 1.514819447176188, "learning_rate": 8.659345155168511e-06, "loss": 0.6379, "step": 3088 }, { "epoch": 0.2616133813254288, "grad_norm": 0.647674964655175, "learning_rate": 8.658410323283691e-06, "loss": 0.914, "step": 3089 }, { "epoch": 0.2616980732585221, "grad_norm": 1.7683246889523878, "learning_rate": 8.657475216084122e-06, "loss": 0.6125, "step": 3090 }, { "epoch": 0.2617827651916155, "grad_norm": 1.4700662293732543, "learning_rate": 8.656539833640175e-06, "loss": 0.6614, "step": 3091 }, { "epoch": 0.2618674571247089, "grad_norm": 1.3594332545310293, "learning_rate": 8.655604176022244e-06, "loss": 0.6771, "step": 3092 }, { "epoch": 0.26195214905780223, "grad_norm": 1.1564325566730174, "learning_rate": 8.654668243300739e-06, "loss": 0.5716, "step": 3093 }, { "epoch": 0.26203684099089564, "grad_norm": 1.2163387885295327, "learning_rate": 8.6537320355461e-06, "loss": 0.6648, "step": 3094 }, { "epoch": 0.262121532923989, "grad_norm": 1.3918018071875806, "learning_rate": 8.652795552828775e-06, "loss": 0.6183, "step": 3095 }, { "epoch": 0.26220622485708234, "grad_norm": 1.2893493061055006, "learning_rate": 8.651858795219242e-06, "loss": 0.6373, "step": 3096 }, { "epoch": 0.26229091679017574, "grad_norm": 2.0098131770476675, "learning_rate": 8.650921762787999e-06, "loss": 0.6591, "step": 3097 }, { "epoch": 0.2623756087232691, "grad_norm": 1.4170983208302272, "learning_rate": 8.64998445560556e-06, "loss": 0.6638, "step": 3098 }, { "epoch": 0.2624603006563625, "grad_norm": 1.3252338018161123, "learning_rate": 8.649046873742461e-06, "loss": 0.6426, "step": 3099 }, { "epoch": 0.26254499258945585, "grad_norm": 1.1126794632683006, "learning_rate": 8.648109017269264e-06, "loss": 0.6496, "step": 3100 }, { "epoch": 0.2626296845225492, "grad_norm": 1.4398832497392722, "learning_rate": 8.647170886256548e-06, "loss": 0.6285, "step": 3101 }, { "epoch": 0.2627143764556426, "grad_norm": 1.644079935641377, "learning_rate": 8.646232480774908e-06, "loss": 0.6559, "step": 3102 }, { "epoch": 0.26279906838873596, "grad_norm": 1.2255213398396934, "learning_rate": 8.645293800894965e-06, "loss": 0.6758, "step": 3103 }, { "epoch": 0.26288376032182936, "grad_norm": 1.7360170497131173, "learning_rate": 8.644354846687364e-06, "loss": 0.6331, "step": 3104 }, { "epoch": 0.2629684522549227, "grad_norm": 1.54968495821391, "learning_rate": 8.643415618222758e-06, "loss": 0.625, "step": 3105 }, { "epoch": 0.2630531441880161, "grad_norm": 1.520668260159422, "learning_rate": 8.642476115571838e-06, "loss": 0.631, "step": 3106 }, { "epoch": 0.26313783612110947, "grad_norm": 1.4776638861555298, "learning_rate": 8.641536338805302e-06, "loss": 0.6038, "step": 3107 }, { "epoch": 0.2632225280542028, "grad_norm": 0.6435275906829034, "learning_rate": 8.640596287993873e-06, "loss": 0.8731, "step": 3108 }, { "epoch": 0.2633072199872962, "grad_norm": 1.349862583564262, "learning_rate": 8.639655963208295e-06, "loss": 0.6854, "step": 3109 }, { "epoch": 0.2633919119203896, "grad_norm": 1.26333842388022, "learning_rate": 8.638715364519335e-06, "loss": 0.5675, "step": 3110 }, { "epoch": 0.263476603853483, "grad_norm": 2.0042107546194416, "learning_rate": 8.637774491997774e-06, "loss": 0.6863, "step": 3111 }, { "epoch": 0.26356129578657633, "grad_norm": 1.3526625123949454, "learning_rate": 8.636833345714419e-06, "loss": 0.6629, "step": 3112 }, { "epoch": 0.2636459877196697, "grad_norm": 1.9234110553231534, "learning_rate": 8.635891925740098e-06, "loss": 0.6762, "step": 3113 }, { "epoch": 0.2637306796527631, "grad_norm": 1.361259364409915, "learning_rate": 8.634950232145655e-06, "loss": 0.6445, "step": 3114 }, { "epoch": 0.26381537158585644, "grad_norm": 1.1792364714656158, "learning_rate": 8.634008265001961e-06, "loss": 0.632, "step": 3115 }, { "epoch": 0.26390006351894985, "grad_norm": 1.4484349344469623, "learning_rate": 8.633066024379901e-06, "loss": 0.6966, "step": 3116 }, { "epoch": 0.2639847554520432, "grad_norm": 1.8711869828973249, "learning_rate": 8.632123510350386e-06, "loss": 0.6893, "step": 3117 }, { "epoch": 0.26406944738513655, "grad_norm": 1.4839679448110494, "learning_rate": 8.631180722984342e-06, "loss": 0.6849, "step": 3118 }, { "epoch": 0.26415413931822995, "grad_norm": 4.455830866679437, "learning_rate": 8.630237662352723e-06, "loss": 0.6288, "step": 3119 }, { "epoch": 0.2642388312513233, "grad_norm": 1.3758708569682006, "learning_rate": 8.629294328526495e-06, "loss": 0.6538, "step": 3120 }, { "epoch": 0.2643235231844167, "grad_norm": 1.4640337672187298, "learning_rate": 8.628350721576651e-06, "loss": 0.6392, "step": 3121 }, { "epoch": 0.26440821511751006, "grad_norm": 1.2314549039005178, "learning_rate": 8.627406841574202e-06, "loss": 0.6669, "step": 3122 }, { "epoch": 0.2644929070506034, "grad_norm": 1.2550205037238773, "learning_rate": 8.62646268859018e-06, "loss": 0.6783, "step": 3123 }, { "epoch": 0.2645775989836968, "grad_norm": 1.3080789722424608, "learning_rate": 8.625518262695639e-06, "loss": 0.6386, "step": 3124 }, { "epoch": 0.26466229091679017, "grad_norm": 1.3109621554767739, "learning_rate": 8.62457356396165e-06, "loss": 0.6681, "step": 3125 }, { "epoch": 0.26474698284988357, "grad_norm": 1.3945226638629213, "learning_rate": 8.623628592459307e-06, "loss": 0.6708, "step": 3126 }, { "epoch": 0.2648316747829769, "grad_norm": 1.398457429989383, "learning_rate": 8.622683348259724e-06, "loss": 0.6915, "step": 3127 }, { "epoch": 0.2649163667160703, "grad_norm": 1.2010387473358761, "learning_rate": 8.621737831434036e-06, "loss": 0.5909, "step": 3128 }, { "epoch": 0.2650010586491637, "grad_norm": 0.6719321286232324, "learning_rate": 8.620792042053399e-06, "loss": 0.8498, "step": 3129 }, { "epoch": 0.26508575058225703, "grad_norm": 8.17064180294684, "learning_rate": 8.619845980188988e-06, "loss": 0.6358, "step": 3130 }, { "epoch": 0.26517044251535044, "grad_norm": 1.4036987591463221, "learning_rate": 8.618899645911998e-06, "loss": 0.6648, "step": 3131 }, { "epoch": 0.2652551344484438, "grad_norm": 1.6395867969603204, "learning_rate": 8.617953039293648e-06, "loss": 0.6147, "step": 3132 }, { "epoch": 0.26533982638153714, "grad_norm": 2.2269094265672535, "learning_rate": 8.617006160405172e-06, "loss": 0.6401, "step": 3133 }, { "epoch": 0.26542451831463054, "grad_norm": 1.468140310689706, "learning_rate": 8.616059009317832e-06, "loss": 0.6503, "step": 3134 }, { "epoch": 0.2655092102477239, "grad_norm": 1.6232396200811463, "learning_rate": 8.615111586102901e-06, "loss": 0.6674, "step": 3135 }, { "epoch": 0.2655939021808173, "grad_norm": 1.239412865892117, "learning_rate": 8.614163890831681e-06, "loss": 0.6607, "step": 3136 }, { "epoch": 0.26567859411391065, "grad_norm": 1.1831220846641775, "learning_rate": 8.613215923575491e-06, "loss": 0.6917, "step": 3137 }, { "epoch": 0.265763286047004, "grad_norm": 1.3068483409561467, "learning_rate": 8.61226768440567e-06, "loss": 0.5803, "step": 3138 }, { "epoch": 0.2658479779800974, "grad_norm": 1.2295731111199955, "learning_rate": 8.611319173393577e-06, "loss": 0.6389, "step": 3139 }, { "epoch": 0.26593266991319076, "grad_norm": 1.3306856718769546, "learning_rate": 8.610370390610593e-06, "loss": 0.607, "step": 3140 }, { "epoch": 0.26601736184628416, "grad_norm": 1.158667456903945, "learning_rate": 8.60942133612812e-06, "loss": 0.6457, "step": 3141 }, { "epoch": 0.2661020537793775, "grad_norm": 1.247198862357855, "learning_rate": 8.608472010017578e-06, "loss": 0.6702, "step": 3142 }, { "epoch": 0.26618674571247086, "grad_norm": 1.3839915079386864, "learning_rate": 8.607522412350411e-06, "loss": 0.5297, "step": 3143 }, { "epoch": 0.26627143764556427, "grad_norm": 1.1782608777260477, "learning_rate": 8.60657254319808e-06, "loss": 0.6199, "step": 3144 }, { "epoch": 0.2663561295786576, "grad_norm": 2.5485067428049137, "learning_rate": 8.605622402632066e-06, "loss": 0.6292, "step": 3145 }, { "epoch": 0.266440821511751, "grad_norm": 2.2293308553441813, "learning_rate": 8.604671990723874e-06, "loss": 0.697, "step": 3146 }, { "epoch": 0.2665255134448444, "grad_norm": 0.7047655297306146, "learning_rate": 8.603721307545027e-06, "loss": 0.8504, "step": 3147 }, { "epoch": 0.2666102053779377, "grad_norm": 1.285702051263853, "learning_rate": 8.602770353167068e-06, "loss": 0.6932, "step": 3148 }, { "epoch": 0.26669489731103113, "grad_norm": 1.4566769502925274, "learning_rate": 8.601819127661563e-06, "loss": 0.6624, "step": 3149 }, { "epoch": 0.2667795892441245, "grad_norm": 2.2872667196367846, "learning_rate": 8.600867631100096e-06, "loss": 0.6347, "step": 3150 }, { "epoch": 0.2668642811772179, "grad_norm": 1.4220133419650738, "learning_rate": 8.599915863554274e-06, "loss": 0.6726, "step": 3151 }, { "epoch": 0.26694897311031124, "grad_norm": 1.3108879794647519, "learning_rate": 8.598963825095718e-06, "loss": 0.6983, "step": 3152 }, { "epoch": 0.2670336650434046, "grad_norm": 2.3344681171698696, "learning_rate": 8.598011515796078e-06, "loss": 0.6749, "step": 3153 }, { "epoch": 0.267118356976498, "grad_norm": 1.1798088618711524, "learning_rate": 8.59705893572702e-06, "loss": 0.6612, "step": 3154 }, { "epoch": 0.26720304890959135, "grad_norm": 1.2592970627141817, "learning_rate": 8.596106084960229e-06, "loss": 0.6108, "step": 3155 }, { "epoch": 0.26728774084268475, "grad_norm": 0.6022510620231089, "learning_rate": 8.595152963567412e-06, "loss": 0.8541, "step": 3156 }, { "epoch": 0.2673724327757781, "grad_norm": 2.6463845050246713, "learning_rate": 8.594199571620298e-06, "loss": 0.6091, "step": 3157 }, { "epoch": 0.2674571247088715, "grad_norm": 1.4537252657849802, "learning_rate": 8.593245909190635e-06, "loss": 0.5959, "step": 3158 }, { "epoch": 0.26754181664196486, "grad_norm": 1.6422742011617013, "learning_rate": 8.59229197635019e-06, "loss": 0.6934, "step": 3159 }, { "epoch": 0.2676265085750582, "grad_norm": 1.6014535021017573, "learning_rate": 8.59133777317075e-06, "loss": 0.672, "step": 3160 }, { "epoch": 0.2677112005081516, "grad_norm": 1.3979795395517212, "learning_rate": 8.590383299724128e-06, "loss": 0.7182, "step": 3161 }, { "epoch": 0.26779589244124496, "grad_norm": 1.5381570419506663, "learning_rate": 8.589428556082149e-06, "loss": 0.6483, "step": 3162 }, { "epoch": 0.26788058437433837, "grad_norm": 1.3937095564263682, "learning_rate": 8.588473542316665e-06, "loss": 0.6281, "step": 3163 }, { "epoch": 0.2679652763074317, "grad_norm": 1.4700109103825738, "learning_rate": 8.587518258499544e-06, "loss": 0.7246, "step": 3164 }, { "epoch": 0.26804996824052507, "grad_norm": 2.461730525107756, "learning_rate": 8.586562704702677e-06, "loss": 0.645, "step": 3165 }, { "epoch": 0.2681346601736185, "grad_norm": 1.4663870120978115, "learning_rate": 8.585606880997975e-06, "loss": 0.6953, "step": 3166 }, { "epoch": 0.26821935210671183, "grad_norm": 1.831689728804366, "learning_rate": 8.584650787457369e-06, "loss": 0.649, "step": 3167 }, { "epoch": 0.26830404403980523, "grad_norm": 1.4443215514302268, "learning_rate": 8.583694424152811e-06, "loss": 0.6792, "step": 3168 }, { "epoch": 0.2683887359728986, "grad_norm": 1.389207900174657, "learning_rate": 8.582737791156269e-06, "loss": 0.6757, "step": 3169 }, { "epoch": 0.26847342790599193, "grad_norm": 1.4701117791734823, "learning_rate": 8.581780888539737e-06, "loss": 0.6274, "step": 3170 }, { "epoch": 0.26855811983908534, "grad_norm": 0.6743526736759807, "learning_rate": 8.580823716375227e-06, "loss": 0.865, "step": 3171 }, { "epoch": 0.2686428117721787, "grad_norm": 0.6562934370882346, "learning_rate": 8.579866274734771e-06, "loss": 0.8784, "step": 3172 }, { "epoch": 0.2687275037052721, "grad_norm": 2.7686133772852153, "learning_rate": 8.578908563690422e-06, "loss": 0.6425, "step": 3173 }, { "epoch": 0.26881219563836545, "grad_norm": 1.6104995811306795, "learning_rate": 8.577950583314252e-06, "loss": 0.6468, "step": 3174 }, { "epoch": 0.2688968875714588, "grad_norm": 2.476164526931968, "learning_rate": 8.576992333678354e-06, "loss": 0.6366, "step": 3175 }, { "epoch": 0.2689815795045522, "grad_norm": 1.3698391237317922, "learning_rate": 8.576033814854842e-06, "loss": 0.6683, "step": 3176 }, { "epoch": 0.26906627143764555, "grad_norm": 1.419013884207602, "learning_rate": 8.575075026915851e-06, "loss": 0.6832, "step": 3177 }, { "epoch": 0.26915096337073896, "grad_norm": 1.4720530893726058, "learning_rate": 8.574115969933532e-06, "loss": 0.6708, "step": 3178 }, { "epoch": 0.2692356553038323, "grad_norm": 1.494730455109518, "learning_rate": 8.57315664398006e-06, "loss": 0.7015, "step": 3179 }, { "epoch": 0.26932034723692566, "grad_norm": 1.2903504269432875, "learning_rate": 8.572197049127629e-06, "loss": 0.644, "step": 3180 }, { "epoch": 0.26940503917001907, "grad_norm": 1.22327556381753, "learning_rate": 8.571237185448456e-06, "loss": 0.6658, "step": 3181 }, { "epoch": 0.2694897311031124, "grad_norm": 1.551507037774751, "learning_rate": 8.570277053014774e-06, "loss": 0.6318, "step": 3182 }, { "epoch": 0.2695744230362058, "grad_norm": 1.8988487969955739, "learning_rate": 8.569316651898838e-06, "loss": 0.6388, "step": 3183 }, { "epoch": 0.2696591149692992, "grad_norm": 1.6168793859012673, "learning_rate": 8.568355982172925e-06, "loss": 0.663, "step": 3184 }, { "epoch": 0.2697438069023925, "grad_norm": 1.432470682386395, "learning_rate": 8.567395043909326e-06, "loss": 0.6848, "step": 3185 }, { "epoch": 0.26982849883548593, "grad_norm": 1.3148802771451176, "learning_rate": 8.566433837180362e-06, "loss": 0.6654, "step": 3186 }, { "epoch": 0.2699131907685793, "grad_norm": 1.4402550826585878, "learning_rate": 8.565472362058365e-06, "loss": 0.6722, "step": 3187 }, { "epoch": 0.2699978827016727, "grad_norm": 1.441157343449193, "learning_rate": 8.564510618615693e-06, "loss": 0.6791, "step": 3188 }, { "epoch": 0.27008257463476604, "grad_norm": 1.2986102954353165, "learning_rate": 8.563548606924723e-06, "loss": 0.6397, "step": 3189 }, { "epoch": 0.2701672665678594, "grad_norm": 0.6249894978760346, "learning_rate": 8.56258632705785e-06, "loss": 0.8367, "step": 3190 }, { "epoch": 0.2702519585009528, "grad_norm": 1.2862209087571297, "learning_rate": 8.561623779087492e-06, "loss": 0.6755, "step": 3191 }, { "epoch": 0.27033665043404614, "grad_norm": 1.4850948829958734, "learning_rate": 8.560660963086083e-06, "loss": 0.6683, "step": 3192 }, { "epoch": 0.27042134236713955, "grad_norm": 1.7709452390090563, "learning_rate": 8.559697879126083e-06, "loss": 0.6391, "step": 3193 }, { "epoch": 0.2705060343002329, "grad_norm": 1.4035281574808567, "learning_rate": 8.558734527279968e-06, "loss": 0.6627, "step": 3194 }, { "epoch": 0.27059072623332625, "grad_norm": 2.7539747216700916, "learning_rate": 8.557770907620236e-06, "loss": 0.6317, "step": 3195 }, { "epoch": 0.27067541816641966, "grad_norm": 1.6267097049890022, "learning_rate": 8.556807020219403e-06, "loss": 0.6411, "step": 3196 }, { "epoch": 0.270760110099513, "grad_norm": 1.3506318978897398, "learning_rate": 8.555842865150008e-06, "loss": 0.6544, "step": 3197 }, { "epoch": 0.2708448020326064, "grad_norm": 1.6673099598201195, "learning_rate": 8.554878442484607e-06, "loss": 0.6748, "step": 3198 }, { "epoch": 0.27092949396569976, "grad_norm": 1.3854840732386546, "learning_rate": 8.553913752295782e-06, "loss": 0.6645, "step": 3199 }, { "epoch": 0.2710141858987931, "grad_norm": 1.3253155034394408, "learning_rate": 8.552948794656127e-06, "loss": 0.6354, "step": 3200 }, { "epoch": 0.2710988778318865, "grad_norm": 1.2446707343167145, "learning_rate": 8.55198356963826e-06, "loss": 0.6507, "step": 3201 }, { "epoch": 0.27118356976497987, "grad_norm": 1.4914492049728367, "learning_rate": 8.55101807731482e-06, "loss": 0.6861, "step": 3202 }, { "epoch": 0.2712682616980733, "grad_norm": 1.2540886196744756, "learning_rate": 8.55005231775847e-06, "loss": 0.6556, "step": 3203 }, { "epoch": 0.2713529536311666, "grad_norm": 1.5081535281483724, "learning_rate": 8.549086291041882e-06, "loss": 0.6728, "step": 3204 }, { "epoch": 0.27143764556426, "grad_norm": 1.4332136038796104, "learning_rate": 8.548119997237758e-06, "loss": 0.6533, "step": 3205 }, { "epoch": 0.2715223374973534, "grad_norm": 1.9403622434548706, "learning_rate": 8.547153436418816e-06, "loss": 0.6831, "step": 3206 }, { "epoch": 0.27160702943044673, "grad_norm": 1.4241671300783303, "learning_rate": 8.546186608657796e-06, "loss": 0.6465, "step": 3207 }, { "epoch": 0.27169172136354014, "grad_norm": 0.6834800762579504, "learning_rate": 8.545219514027454e-06, "loss": 0.8506, "step": 3208 }, { "epoch": 0.2717764132966335, "grad_norm": 1.942533769017002, "learning_rate": 8.544252152600572e-06, "loss": 0.6988, "step": 3209 }, { "epoch": 0.2718611052297269, "grad_norm": 0.6718889777177579, "learning_rate": 8.543284524449946e-06, "loss": 0.8865, "step": 3210 }, { "epoch": 0.27194579716282025, "grad_norm": 1.4483887495583585, "learning_rate": 8.542316629648399e-06, "loss": 0.6621, "step": 3211 }, { "epoch": 0.2720304890959136, "grad_norm": 1.368970011931732, "learning_rate": 8.541348468268767e-06, "loss": 0.683, "step": 3212 }, { "epoch": 0.272115181029007, "grad_norm": 1.7469149043930734, "learning_rate": 8.540380040383911e-06, "loss": 0.6519, "step": 3213 }, { "epoch": 0.27219987296210035, "grad_norm": 1.3083382971063777, "learning_rate": 8.539411346066708e-06, "loss": 0.6088, "step": 3214 }, { "epoch": 0.27228456489519376, "grad_norm": 2.2169343539244366, "learning_rate": 8.538442385390061e-06, "loss": 0.6741, "step": 3215 }, { "epoch": 0.2723692568282871, "grad_norm": 1.313032035256652, "learning_rate": 8.537473158426888e-06, "loss": 0.6477, "step": 3216 }, { "epoch": 0.27245394876138046, "grad_norm": 1.464431281252321, "learning_rate": 8.536503665250126e-06, "loss": 0.5996, "step": 3217 }, { "epoch": 0.27253864069447387, "grad_norm": 1.1921460552966388, "learning_rate": 8.535533905932739e-06, "loss": 0.5954, "step": 3218 }, { "epoch": 0.2726233326275672, "grad_norm": 1.2820401217320314, "learning_rate": 8.534563880547702e-06, "loss": 0.6462, "step": 3219 }, { "epoch": 0.2727080245606606, "grad_norm": 1.45761771070098, "learning_rate": 8.533593589168017e-06, "loss": 0.6338, "step": 3220 }, { "epoch": 0.272792716493754, "grad_norm": 1.3450023821932477, "learning_rate": 8.532623031866704e-06, "loss": 0.6612, "step": 3221 }, { "epoch": 0.2728774084268473, "grad_norm": 1.140679068264456, "learning_rate": 8.531652208716801e-06, "loss": 0.6549, "step": 3222 }, { "epoch": 0.27296210035994073, "grad_norm": 1.7113879236851524, "learning_rate": 8.530681119791368e-06, "loss": 0.6677, "step": 3223 }, { "epoch": 0.2730467922930341, "grad_norm": 1.2119484628553165, "learning_rate": 8.529709765163486e-06, "loss": 0.5669, "step": 3224 }, { "epoch": 0.2731314842261275, "grad_norm": 0.6886502203115367, "learning_rate": 8.528738144906252e-06, "loss": 0.8459, "step": 3225 }, { "epoch": 0.27321617615922084, "grad_norm": 1.459331349070976, "learning_rate": 8.52776625909279e-06, "loss": 0.6592, "step": 3226 }, { "epoch": 0.2733008680923142, "grad_norm": 1.3488173261577536, "learning_rate": 8.526794107796233e-06, "loss": 0.6795, "step": 3227 }, { "epoch": 0.2733855600254076, "grad_norm": 1.8908299912425741, "learning_rate": 8.525821691089746e-06, "loss": 0.6537, "step": 3228 }, { "epoch": 0.27347025195850094, "grad_norm": 1.4074706700193498, "learning_rate": 8.524849009046506e-06, "loss": 0.6346, "step": 3229 }, { "epoch": 0.27355494389159435, "grad_norm": 1.3675678625647667, "learning_rate": 8.523876061739714e-06, "loss": 0.7032, "step": 3230 }, { "epoch": 0.2736396358246877, "grad_norm": 1.4446442836132942, "learning_rate": 8.522902849242587e-06, "loss": 0.6439, "step": 3231 }, { "epoch": 0.27372432775778105, "grad_norm": 1.3461640435376616, "learning_rate": 8.521929371628368e-06, "loss": 0.6213, "step": 3232 }, { "epoch": 0.27380901969087446, "grad_norm": 1.2555040925627763, "learning_rate": 8.520955628970312e-06, "loss": 0.7113, "step": 3233 }, { "epoch": 0.2738937116239678, "grad_norm": 2.233452944412685, "learning_rate": 8.519981621341702e-06, "loss": 0.6828, "step": 3234 }, { "epoch": 0.2739784035570612, "grad_norm": 1.3950811298821675, "learning_rate": 8.519007348815835e-06, "loss": 0.6679, "step": 3235 }, { "epoch": 0.27406309549015456, "grad_norm": 2.1739574766089333, "learning_rate": 8.518032811466033e-06, "loss": 0.6474, "step": 3236 }, { "epoch": 0.2741477874232479, "grad_norm": 1.2417604015511503, "learning_rate": 8.51705800936563e-06, "loss": 0.6569, "step": 3237 }, { "epoch": 0.2742324793563413, "grad_norm": 1.974114526300141, "learning_rate": 8.516082942587991e-06, "loss": 0.6298, "step": 3238 }, { "epoch": 0.27431717128943467, "grad_norm": 1.3478838270445244, "learning_rate": 8.51510761120649e-06, "loss": 0.6463, "step": 3239 }, { "epoch": 0.2744018632225281, "grad_norm": 1.174081250033357, "learning_rate": 8.51413201529453e-06, "loss": 0.5766, "step": 3240 }, { "epoch": 0.2744865551556214, "grad_norm": 1.3948861017179446, "learning_rate": 8.513156154925524e-06, "loss": 0.6543, "step": 3241 }, { "epoch": 0.2745712470887148, "grad_norm": 1.3232825322792605, "learning_rate": 8.512180030172916e-06, "loss": 0.677, "step": 3242 }, { "epoch": 0.2746559390218082, "grad_norm": 0.6350193962280218, "learning_rate": 8.511203641110165e-06, "loss": 0.8839, "step": 3243 }, { "epoch": 0.27474063095490153, "grad_norm": 2.275039524853165, "learning_rate": 8.510226987810746e-06, "loss": 0.668, "step": 3244 }, { "epoch": 0.27482532288799494, "grad_norm": 1.1677507670096954, "learning_rate": 8.509250070348158e-06, "loss": 0.6523, "step": 3245 }, { "epoch": 0.2749100148210883, "grad_norm": 1.7279790735367813, "learning_rate": 8.50827288879592e-06, "loss": 0.6577, "step": 3246 }, { "epoch": 0.27499470675418164, "grad_norm": 1.516574750967321, "learning_rate": 8.50729544322757e-06, "loss": 0.701, "step": 3247 }, { "epoch": 0.27507939868727505, "grad_norm": 1.152097904294396, "learning_rate": 8.506317733716666e-06, "loss": 0.6798, "step": 3248 }, { "epoch": 0.2751640906203684, "grad_norm": 1.876293337564045, "learning_rate": 8.505339760336785e-06, "loss": 0.6438, "step": 3249 }, { "epoch": 0.2752487825534618, "grad_norm": 1.2854241312301204, "learning_rate": 8.504361523161526e-06, "loss": 0.6215, "step": 3250 }, { "epoch": 0.27533347448655515, "grad_norm": 1.1397065514535403, "learning_rate": 8.503383022264506e-06, "loss": 0.6499, "step": 3251 }, { "epoch": 0.2754181664196485, "grad_norm": 1.3873721720546168, "learning_rate": 8.502404257719364e-06, "loss": 0.6327, "step": 3252 }, { "epoch": 0.2755028583527419, "grad_norm": 1.3175542708819714, "learning_rate": 8.501425229599754e-06, "loss": 0.5975, "step": 3253 }, { "epoch": 0.27558755028583526, "grad_norm": 1.2635125587732383, "learning_rate": 8.500445937979353e-06, "loss": 0.674, "step": 3254 }, { "epoch": 0.27567224221892866, "grad_norm": 1.3350915562694434, "learning_rate": 8.49946638293186e-06, "loss": 0.645, "step": 3255 }, { "epoch": 0.275756934152022, "grad_norm": 1.374674441099839, "learning_rate": 8.498486564530991e-06, "loss": 0.6724, "step": 3256 }, { "epoch": 0.27584162608511537, "grad_norm": 1.8028679411226707, "learning_rate": 8.497506482850485e-06, "loss": 0.6701, "step": 3257 }, { "epoch": 0.27592631801820877, "grad_norm": 1.1292470706841542, "learning_rate": 8.496526137964095e-06, "loss": 0.6181, "step": 3258 }, { "epoch": 0.2760110099513021, "grad_norm": 0.5661079293218996, "learning_rate": 8.495545529945598e-06, "loss": 0.8579, "step": 3259 }, { "epoch": 0.27609570188439553, "grad_norm": 1.6590962268123637, "learning_rate": 8.49456465886879e-06, "loss": 0.6696, "step": 3260 }, { "epoch": 0.2761803938174889, "grad_norm": 1.2973361887043895, "learning_rate": 8.493583524807485e-06, "loss": 0.7011, "step": 3261 }, { "epoch": 0.2762650857505823, "grad_norm": 1.3721576427856879, "learning_rate": 8.492602127835521e-06, "loss": 0.6499, "step": 3262 }, { "epoch": 0.27634977768367563, "grad_norm": 2.37538555975959, "learning_rate": 8.491620468026754e-06, "loss": 0.6417, "step": 3263 }, { "epoch": 0.276434469616769, "grad_norm": 1.5324060528847354, "learning_rate": 8.490638545455057e-06, "loss": 0.7038, "step": 3264 }, { "epoch": 0.2765191615498624, "grad_norm": 1.377573510536121, "learning_rate": 8.489656360194327e-06, "loss": 0.6689, "step": 3265 }, { "epoch": 0.27660385348295574, "grad_norm": 1.3761751290206272, "learning_rate": 8.488673912318476e-06, "loss": 0.6468, "step": 3266 }, { "epoch": 0.27668854541604915, "grad_norm": 1.3971620041345862, "learning_rate": 8.48769120190144e-06, "loss": 0.6308, "step": 3267 }, { "epoch": 0.2767732373491425, "grad_norm": 1.5336297573463056, "learning_rate": 8.486708229017173e-06, "loss": 0.6513, "step": 3268 }, { "epoch": 0.27685792928223585, "grad_norm": 1.6403750220659181, "learning_rate": 8.485724993739648e-06, "loss": 0.6876, "step": 3269 }, { "epoch": 0.27694262121532925, "grad_norm": 1.2844741567186782, "learning_rate": 8.48474149614286e-06, "loss": 0.7027, "step": 3270 }, { "epoch": 0.2770273131484226, "grad_norm": 1.0739354902114153, "learning_rate": 8.483757736300822e-06, "loss": 0.6241, "step": 3271 }, { "epoch": 0.277112005081516, "grad_norm": 1.263956468259747, "learning_rate": 8.482773714287567e-06, "loss": 0.6947, "step": 3272 }, { "epoch": 0.27719669701460936, "grad_norm": 1.2453368665405053, "learning_rate": 8.481789430177148e-06, "loss": 0.6308, "step": 3273 }, { "epoch": 0.2772813889477027, "grad_norm": 1.6656744703999127, "learning_rate": 8.480804884043636e-06, "loss": 0.6464, "step": 3274 }, { "epoch": 0.2773660808807961, "grad_norm": 1.6236068999152897, "learning_rate": 8.479820075961126e-06, "loss": 0.6678, "step": 3275 }, { "epoch": 0.27745077281388947, "grad_norm": 0.9987156401017391, "learning_rate": 8.478835006003729e-06, "loss": 0.6089, "step": 3276 }, { "epoch": 0.2775354647469829, "grad_norm": 1.3195113501837774, "learning_rate": 8.477849674245576e-06, "loss": 0.674, "step": 3277 }, { "epoch": 0.2776201566800762, "grad_norm": 1.2248409722046418, "learning_rate": 8.476864080760819e-06, "loss": 0.603, "step": 3278 }, { "epoch": 0.2777048486131696, "grad_norm": 1.4286418206827707, "learning_rate": 8.475878225623629e-06, "loss": 0.6523, "step": 3279 }, { "epoch": 0.277789540546263, "grad_norm": 1.6055396308118481, "learning_rate": 8.474892108908197e-06, "loss": 0.7292, "step": 3280 }, { "epoch": 0.27787423247935633, "grad_norm": 1.5029507182746962, "learning_rate": 8.473905730688734e-06, "loss": 0.6404, "step": 3281 }, { "epoch": 0.27795892441244974, "grad_norm": 1.14651265883838, "learning_rate": 8.472919091039469e-06, "loss": 0.667, "step": 3282 }, { "epoch": 0.2780436163455431, "grad_norm": 1.5316821287787594, "learning_rate": 8.471932190034652e-06, "loss": 0.6547, "step": 3283 }, { "epoch": 0.27812830827863644, "grad_norm": 1.2930880005477678, "learning_rate": 8.470945027748552e-06, "loss": 0.6874, "step": 3284 }, { "epoch": 0.27821300021172984, "grad_norm": 1.3682585509889738, "learning_rate": 8.46995760425546e-06, "loss": 0.6351, "step": 3285 }, { "epoch": 0.2782976921448232, "grad_norm": 1.3092166797660603, "learning_rate": 8.468969919629686e-06, "loss": 0.6576, "step": 3286 }, { "epoch": 0.2783823840779166, "grad_norm": 1.4312513772436344, "learning_rate": 8.467981973945555e-06, "loss": 0.679, "step": 3287 }, { "epoch": 0.27846707601100995, "grad_norm": 1.4182716116457659, "learning_rate": 8.466993767277416e-06, "loss": 0.5949, "step": 3288 }, { "epoch": 0.2785517679441033, "grad_norm": 1.5198931394581767, "learning_rate": 8.466005299699637e-06, "loss": 0.6538, "step": 3289 }, { "epoch": 0.2786364598771967, "grad_norm": 1.8832203523482305, "learning_rate": 8.46501657128661e-06, "loss": 0.6228, "step": 3290 }, { "epoch": 0.27872115181029006, "grad_norm": 1.2082478949439543, "learning_rate": 8.464027582112733e-06, "loss": 0.6984, "step": 3291 }, { "epoch": 0.27880584374338346, "grad_norm": 1.610793256626188, "learning_rate": 8.463038332252439e-06, "loss": 0.6883, "step": 3292 }, { "epoch": 0.2788905356764768, "grad_norm": 2.131734240024631, "learning_rate": 8.462048821780174e-06, "loss": 0.6198, "step": 3293 }, { "epoch": 0.27897522760957016, "grad_norm": 1.7271885308788069, "learning_rate": 8.4610590507704e-06, "loss": 0.7147, "step": 3294 }, { "epoch": 0.27905991954266357, "grad_norm": 1.1217765371525916, "learning_rate": 8.460069019297607e-06, "loss": 0.6218, "step": 3295 }, { "epoch": 0.2791446114757569, "grad_norm": 1.4349068780475043, "learning_rate": 8.459078727436298e-06, "loss": 0.6509, "step": 3296 }, { "epoch": 0.2792293034088503, "grad_norm": 1.3300235615450247, "learning_rate": 8.458088175260998e-06, "loss": 0.6922, "step": 3297 }, { "epoch": 0.2793139953419437, "grad_norm": 1.6164606719045556, "learning_rate": 8.457097362846252e-06, "loss": 0.6995, "step": 3298 }, { "epoch": 0.279398687275037, "grad_norm": 1.9427586052228654, "learning_rate": 8.456106290266624e-06, "loss": 0.6766, "step": 3299 }, { "epoch": 0.27948337920813043, "grad_norm": 1.3216778195402439, "learning_rate": 8.455114957596695e-06, "loss": 0.6484, "step": 3300 }, { "epoch": 0.2795680711412238, "grad_norm": 1.1234492163346563, "learning_rate": 8.454123364911071e-06, "loss": 0.6468, "step": 3301 }, { "epoch": 0.2796527630743172, "grad_norm": 1.151630534376983, "learning_rate": 8.453131512284373e-06, "loss": 0.6361, "step": 3302 }, { "epoch": 0.27973745500741054, "grad_norm": 1.265429611878502, "learning_rate": 8.452139399791243e-06, "loss": 0.6867, "step": 3303 }, { "epoch": 0.2798221469405039, "grad_norm": 1.4931018302408847, "learning_rate": 8.451147027506343e-06, "loss": 0.728, "step": 3304 }, { "epoch": 0.2799068388735973, "grad_norm": 2.1210452803012902, "learning_rate": 8.450154395504355e-06, "loss": 0.5926, "step": 3305 }, { "epoch": 0.27999153080669065, "grad_norm": 1.3792512860653279, "learning_rate": 8.44916150385998e-06, "loss": 0.6171, "step": 3306 }, { "epoch": 0.28007622273978405, "grad_norm": 1.1768820748715798, "learning_rate": 8.448168352647936e-06, "loss": 0.6304, "step": 3307 }, { "epoch": 0.2801609146728774, "grad_norm": 1.5678879820346396, "learning_rate": 8.447174941942967e-06, "loss": 0.6204, "step": 3308 }, { "epoch": 0.28024560660597075, "grad_norm": 1.1950704585708147, "learning_rate": 8.446181271819827e-06, "loss": 0.675, "step": 3309 }, { "epoch": 0.28033029853906416, "grad_norm": 1.5665219577766218, "learning_rate": 8.4451873423533e-06, "loss": 0.6322, "step": 3310 }, { "epoch": 0.2804149904721575, "grad_norm": 1.872365034016665, "learning_rate": 8.444193153618182e-06, "loss": 0.6664, "step": 3311 }, { "epoch": 0.2804996824052509, "grad_norm": 1.2115813096528834, "learning_rate": 8.44319870568929e-06, "loss": 0.6958, "step": 3312 }, { "epoch": 0.28058437433834427, "grad_norm": 1.3803427888174384, "learning_rate": 8.442203998641465e-06, "loss": 0.6752, "step": 3313 }, { "epoch": 0.2806690662714377, "grad_norm": 1.3448001011068207, "learning_rate": 8.441209032549561e-06, "loss": 0.6551, "step": 3314 }, { "epoch": 0.280753758204531, "grad_norm": 0.6249265118371669, "learning_rate": 8.440213807488453e-06, "loss": 0.8273, "step": 3315 }, { "epoch": 0.2808384501376244, "grad_norm": 1.0170478338549824, "learning_rate": 8.439218323533043e-06, "loss": 0.6143, "step": 3316 }, { "epoch": 0.2809231420707178, "grad_norm": 6.0464907941692845, "learning_rate": 8.43822258075824e-06, "loss": 0.6631, "step": 3317 }, { "epoch": 0.28100783400381113, "grad_norm": 1.3940320279978946, "learning_rate": 8.437226579238982e-06, "loss": 0.6715, "step": 3318 }, { "epoch": 0.28109252593690454, "grad_norm": 1.1620231765997198, "learning_rate": 8.436230319050222e-06, "loss": 0.6877, "step": 3319 }, { "epoch": 0.2811772178699979, "grad_norm": 1.282225615237486, "learning_rate": 8.435233800266937e-06, "loss": 0.668, "step": 3320 }, { "epoch": 0.28126190980309124, "grad_norm": 1.3765497095793982, "learning_rate": 8.434237022964118e-06, "loss": 0.6238, "step": 3321 }, { "epoch": 0.28134660173618464, "grad_norm": 1.3398492148159487, "learning_rate": 8.433239987216777e-06, "loss": 0.6321, "step": 3322 }, { "epoch": 0.281431293669278, "grad_norm": 1.1589660087675036, "learning_rate": 8.432242693099947e-06, "loss": 0.7125, "step": 3323 }, { "epoch": 0.2815159856023714, "grad_norm": 1.1532633175316063, "learning_rate": 8.431245140688679e-06, "loss": 0.6242, "step": 3324 }, { "epoch": 0.28160067753546475, "grad_norm": 1.3511685646271658, "learning_rate": 8.430247330058046e-06, "loss": 0.6482, "step": 3325 }, { "epoch": 0.2816853694685581, "grad_norm": 1.2134489708782232, "learning_rate": 8.429249261283136e-06, "loss": 0.6143, "step": 3326 }, { "epoch": 0.2817700614016515, "grad_norm": 1.237546652324259, "learning_rate": 8.428250934439063e-06, "loss": 0.667, "step": 3327 }, { "epoch": 0.28185475333474486, "grad_norm": 1.273295978687949, "learning_rate": 8.427252349600952e-06, "loss": 0.6798, "step": 3328 }, { "epoch": 0.28193944526783826, "grad_norm": 1.1589744953509107, "learning_rate": 8.426253506843954e-06, "loss": 0.6636, "step": 3329 }, { "epoch": 0.2820241372009316, "grad_norm": 0.5997381169556412, "learning_rate": 8.425254406243235e-06, "loss": 0.8719, "step": 3330 }, { "epoch": 0.28210882913402496, "grad_norm": 1.276007202994591, "learning_rate": 8.424255047873986e-06, "loss": 0.6338, "step": 3331 }, { "epoch": 0.28219352106711837, "grad_norm": 1.3974907702018065, "learning_rate": 8.423255431811413e-06, "loss": 0.5788, "step": 3332 }, { "epoch": 0.2822782130002117, "grad_norm": 1.2307362809159645, "learning_rate": 8.42225555813074e-06, "loss": 0.6879, "step": 3333 }, { "epoch": 0.2823629049333051, "grad_norm": 1.4065088091574385, "learning_rate": 8.421255426907216e-06, "loss": 0.6071, "step": 3334 }, { "epoch": 0.2824475968663985, "grad_norm": 1.1007004527309374, "learning_rate": 8.420255038216104e-06, "loss": 0.6729, "step": 3335 }, { "epoch": 0.2825322887994918, "grad_norm": 1.2619432724783226, "learning_rate": 8.41925439213269e-06, "loss": 0.6567, "step": 3336 }, { "epoch": 0.28261698073258523, "grad_norm": 1.9607306049625552, "learning_rate": 8.418253488732275e-06, "loss": 0.7372, "step": 3337 }, { "epoch": 0.2827016726656786, "grad_norm": 1.2300235379656064, "learning_rate": 8.417252328090186e-06, "loss": 0.5835, "step": 3338 }, { "epoch": 0.282786364598772, "grad_norm": 1.1166876224654116, "learning_rate": 8.416250910281764e-06, "loss": 0.647, "step": 3339 }, { "epoch": 0.28287105653186534, "grad_norm": 1.4088596004700868, "learning_rate": 8.415249235382373e-06, "loss": 0.703, "step": 3340 }, { "epoch": 0.2829557484649587, "grad_norm": 1.6943009088709344, "learning_rate": 8.41424730346739e-06, "loss": 0.692, "step": 3341 }, { "epoch": 0.2830404403980521, "grad_norm": 1.4871939159014362, "learning_rate": 8.413245114612219e-06, "loss": 0.6191, "step": 3342 }, { "epoch": 0.28312513233114545, "grad_norm": 1.2817043809040687, "learning_rate": 8.412242668892278e-06, "loss": 0.6681, "step": 3343 }, { "epoch": 0.28320982426423885, "grad_norm": 1.7488621362034118, "learning_rate": 8.411239966383008e-06, "loss": 0.6568, "step": 3344 }, { "epoch": 0.2832945161973322, "grad_norm": 1.2055361518297554, "learning_rate": 8.410237007159869e-06, "loss": 0.6506, "step": 3345 }, { "epoch": 0.28337920813042555, "grad_norm": 1.2789491689833714, "learning_rate": 8.409233791298334e-06, "loss": 0.6664, "step": 3346 }, { "epoch": 0.28346390006351896, "grad_norm": 1.2741618000053252, "learning_rate": 8.408230318873907e-06, "loss": 0.6904, "step": 3347 }, { "epoch": 0.2835485919966123, "grad_norm": 1.2844153187831897, "learning_rate": 8.4072265899621e-06, "loss": 0.647, "step": 3348 }, { "epoch": 0.2836332839297057, "grad_norm": 1.1597503694697624, "learning_rate": 8.406222604638448e-06, "loss": 0.6586, "step": 3349 }, { "epoch": 0.28371797586279907, "grad_norm": 0.621896478669669, "learning_rate": 8.405218362978508e-06, "loss": 0.8145, "step": 3350 }, { "epoch": 0.2838026677958924, "grad_norm": 1.3384546281504213, "learning_rate": 8.404213865057858e-06, "loss": 0.7253, "step": 3351 }, { "epoch": 0.2838873597289858, "grad_norm": 1.4561810301086775, "learning_rate": 8.403209110952086e-06, "loss": 0.6794, "step": 3352 }, { "epoch": 0.2839720516620792, "grad_norm": 1.6969250641686031, "learning_rate": 8.402204100736808e-06, "loss": 0.6141, "step": 3353 }, { "epoch": 0.2840567435951726, "grad_norm": 1.590882582081342, "learning_rate": 8.401198834487655e-06, "loss": 0.6872, "step": 3354 }, { "epoch": 0.28414143552826593, "grad_norm": 1.1894230558505874, "learning_rate": 8.400193312280282e-06, "loss": 0.7232, "step": 3355 }, { "epoch": 0.2842261274613593, "grad_norm": 0.658386371979345, "learning_rate": 8.399187534190356e-06, "loss": 0.8611, "step": 3356 }, { "epoch": 0.2843108193944527, "grad_norm": 1.5359872033404294, "learning_rate": 8.398181500293568e-06, "loss": 0.6883, "step": 3357 }, { "epoch": 0.28439551132754604, "grad_norm": 2.099655183355471, "learning_rate": 8.397175210665628e-06, "loss": 0.6958, "step": 3358 }, { "epoch": 0.28448020326063944, "grad_norm": 1.251985733453561, "learning_rate": 8.396168665382266e-06, "loss": 0.6343, "step": 3359 }, { "epoch": 0.2845648951937328, "grad_norm": 1.5881833384206945, "learning_rate": 8.395161864519228e-06, "loss": 0.7011, "step": 3360 }, { "epoch": 0.28464958712682614, "grad_norm": 1.1640196213439569, "learning_rate": 8.39415480815228e-06, "loss": 0.6399, "step": 3361 }, { "epoch": 0.28473427905991955, "grad_norm": 1.3656504438464991, "learning_rate": 8.39314749635721e-06, "loss": 0.6208, "step": 3362 }, { "epoch": 0.2848189709930129, "grad_norm": 1.2398336431971664, "learning_rate": 8.392139929209824e-06, "loss": 0.6114, "step": 3363 }, { "epoch": 0.2849036629261063, "grad_norm": 1.3445070504520786, "learning_rate": 8.391132106785946e-06, "loss": 0.6899, "step": 3364 }, { "epoch": 0.28498835485919966, "grad_norm": 1.4256718603648981, "learning_rate": 8.39012402916142e-06, "loss": 0.7256, "step": 3365 }, { "epoch": 0.28507304679229306, "grad_norm": 1.1412580244111599, "learning_rate": 8.389115696412109e-06, "loss": 0.6563, "step": 3366 }, { "epoch": 0.2851577387253864, "grad_norm": 1.2428786603225939, "learning_rate": 8.388107108613896e-06, "loss": 0.6448, "step": 3367 }, { "epoch": 0.28524243065847976, "grad_norm": 0.7685181014193981, "learning_rate": 8.38709826584268e-06, "loss": 0.8963, "step": 3368 }, { "epoch": 0.28532712259157317, "grad_norm": 1.4108632694751615, "learning_rate": 8.386089168174388e-06, "loss": 0.668, "step": 3369 }, { "epoch": 0.2854118145246665, "grad_norm": 1.238944023557775, "learning_rate": 8.385079815684954e-06, "loss": 0.6324, "step": 3370 }, { "epoch": 0.2854965064577599, "grad_norm": 1.461698843625218, "learning_rate": 8.384070208450339e-06, "loss": 0.6033, "step": 3371 }, { "epoch": 0.2855811983908533, "grad_norm": 1.6557579371376947, "learning_rate": 8.38306034654652e-06, "loss": 0.6674, "step": 3372 }, { "epoch": 0.2856658903239466, "grad_norm": 1.3296174825666371, "learning_rate": 8.382050230049497e-06, "loss": 0.6781, "step": 3373 }, { "epoch": 0.28575058225704003, "grad_norm": 1.4042553607512591, "learning_rate": 8.381039859035285e-06, "loss": 0.6504, "step": 3374 }, { "epoch": 0.2858352741901334, "grad_norm": 1.226221147270308, "learning_rate": 8.380029233579922e-06, "loss": 0.662, "step": 3375 }, { "epoch": 0.2859199661232268, "grad_norm": 1.2782301295574692, "learning_rate": 8.379018353759458e-06, "loss": 0.6755, "step": 3376 }, { "epoch": 0.28600465805632014, "grad_norm": 1.8298725704525078, "learning_rate": 8.378007219649973e-06, "loss": 0.6228, "step": 3377 }, { "epoch": 0.2860893499894135, "grad_norm": 0.630883259252388, "learning_rate": 8.376995831327555e-06, "loss": 0.8225, "step": 3378 }, { "epoch": 0.2861740419225069, "grad_norm": 1.4647489865367178, "learning_rate": 8.37598418886832e-06, "loss": 0.6382, "step": 3379 }, { "epoch": 0.28625873385560024, "grad_norm": 1.1145587124908845, "learning_rate": 8.374972292348398e-06, "loss": 0.7008, "step": 3380 }, { "epoch": 0.28634342578869365, "grad_norm": 1.3310867106722615, "learning_rate": 8.37396014184394e-06, "loss": 0.6495, "step": 3381 }, { "epoch": 0.286428117721787, "grad_norm": 2.5371810627481537, "learning_rate": 8.372947737431113e-06, "loss": 0.6491, "step": 3382 }, { "epoch": 0.28651280965488035, "grad_norm": 1.214760057991488, "learning_rate": 8.371935079186111e-06, "loss": 0.6751, "step": 3383 }, { "epoch": 0.28659750158797376, "grad_norm": 1.2611803656145661, "learning_rate": 8.370922167185139e-06, "loss": 0.6576, "step": 3384 }, { "epoch": 0.2866821935210671, "grad_norm": 0.6078408342380327, "learning_rate": 8.369909001504421e-06, "loss": 0.8624, "step": 3385 }, { "epoch": 0.2867668854541605, "grad_norm": 1.421084948659844, "learning_rate": 8.368895582220209e-06, "loss": 0.6647, "step": 3386 }, { "epoch": 0.28685157738725386, "grad_norm": 1.5486974660557915, "learning_rate": 8.367881909408765e-06, "loss": 0.6286, "step": 3387 }, { "epoch": 0.2869362693203472, "grad_norm": 1.3867734918610641, "learning_rate": 8.366867983146372e-06, "loss": 0.6345, "step": 3388 }, { "epoch": 0.2870209612534406, "grad_norm": 0.6398323408619662, "learning_rate": 8.365853803509335e-06, "loss": 0.8765, "step": 3389 }, { "epoch": 0.28710565318653397, "grad_norm": 0.590316385661532, "learning_rate": 8.364839370573979e-06, "loss": 0.8454, "step": 3390 }, { "epoch": 0.2871903451196274, "grad_norm": 1.1422383903656248, "learning_rate": 8.36382468441664e-06, "loss": 0.6526, "step": 3391 }, { "epoch": 0.2872750370527207, "grad_norm": 1.4845076938307369, "learning_rate": 8.362809745113683e-06, "loss": 0.6625, "step": 3392 }, { "epoch": 0.2873597289858141, "grad_norm": 1.9567526314653882, "learning_rate": 8.361794552741484e-06, "loss": 0.6565, "step": 3393 }, { "epoch": 0.2874444209189075, "grad_norm": 1.3715488351882703, "learning_rate": 8.360779107376445e-06, "loss": 0.6132, "step": 3394 }, { "epoch": 0.28752911285200083, "grad_norm": 2.484637128845758, "learning_rate": 8.35976340909498e-06, "loss": 0.6475, "step": 3395 }, { "epoch": 0.28761380478509424, "grad_norm": 1.444840683991107, "learning_rate": 8.35874745797353e-06, "loss": 0.6777, "step": 3396 }, { "epoch": 0.2876984967181876, "grad_norm": 1.3902276106949971, "learning_rate": 8.357731254088547e-06, "loss": 0.6385, "step": 3397 }, { "epoch": 0.28778318865128094, "grad_norm": 1.9038088964510713, "learning_rate": 8.356714797516508e-06, "loss": 0.6832, "step": 3398 }, { "epoch": 0.28786788058437435, "grad_norm": 2.168207855707896, "learning_rate": 8.355698088333903e-06, "loss": 0.6935, "step": 3399 }, { "epoch": 0.2879525725174677, "grad_norm": 11.62290901939995, "learning_rate": 8.354681126617251e-06, "loss": 0.6847, "step": 3400 }, { "epoch": 0.2880372644505611, "grad_norm": 1.619987877625573, "learning_rate": 8.35366391244308e-06, "loss": 0.6869, "step": 3401 }, { "epoch": 0.28812195638365445, "grad_norm": 1.3056240966675157, "learning_rate": 8.352646445887938e-06, "loss": 0.6018, "step": 3402 }, { "epoch": 0.2882066483167478, "grad_norm": 1.3106992171000265, "learning_rate": 8.3516287270284e-06, "loss": 0.6962, "step": 3403 }, { "epoch": 0.2882913402498412, "grad_norm": 1.856754828574297, "learning_rate": 8.350610755941054e-06, "loss": 0.6389, "step": 3404 }, { "epoch": 0.28837603218293456, "grad_norm": 1.9003207235541446, "learning_rate": 8.349592532702504e-06, "loss": 0.6446, "step": 3405 }, { "epoch": 0.28846072411602797, "grad_norm": 1.4739736672258164, "learning_rate": 8.34857405738938e-06, "loss": 0.6519, "step": 3406 }, { "epoch": 0.2885454160491213, "grad_norm": 1.3199108881693677, "learning_rate": 8.347555330078326e-06, "loss": 0.6301, "step": 3407 }, { "epoch": 0.28863010798221467, "grad_norm": 0.6801831253112945, "learning_rate": 8.346536350846008e-06, "loss": 0.8551, "step": 3408 }, { "epoch": 0.2887147999153081, "grad_norm": 1.6154925494447772, "learning_rate": 8.345517119769108e-06, "loss": 0.6605, "step": 3409 }, { "epoch": 0.2887994918484014, "grad_norm": 1.7911427709497412, "learning_rate": 8.344497636924332e-06, "loss": 0.6365, "step": 3410 }, { "epoch": 0.28888418378149483, "grad_norm": 1.093254571506795, "learning_rate": 8.343477902388395e-06, "loss": 0.7054, "step": 3411 }, { "epoch": 0.2889688757145882, "grad_norm": 1.259401594133571, "learning_rate": 8.342457916238044e-06, "loss": 0.6631, "step": 3412 }, { "epoch": 0.28905356764768153, "grad_norm": 1.4266715671892483, "learning_rate": 8.341437678550034e-06, "loss": 0.6858, "step": 3413 }, { "epoch": 0.28913825958077494, "grad_norm": 1.2194074905441357, "learning_rate": 8.340417189401147e-06, "loss": 0.6477, "step": 3414 }, { "epoch": 0.2892229515138683, "grad_norm": 1.1943818507625437, "learning_rate": 8.339396448868176e-06, "loss": 0.6201, "step": 3415 }, { "epoch": 0.2893076434469617, "grad_norm": 1.191561423299014, "learning_rate": 8.338375457027941e-06, "loss": 0.633, "step": 3416 }, { "epoch": 0.28939233538005504, "grad_norm": 1.6032425198684073, "learning_rate": 8.337354213957273e-06, "loss": 0.6696, "step": 3417 }, { "epoch": 0.28947702731314845, "grad_norm": 1.1496507998599954, "learning_rate": 8.33633271973303e-06, "loss": 0.6096, "step": 3418 }, { "epoch": 0.2895617192462418, "grad_norm": 1.43838740960017, "learning_rate": 8.335310974432083e-06, "loss": 0.7168, "step": 3419 }, { "epoch": 0.28964641117933515, "grad_norm": 1.661974719930109, "learning_rate": 8.334288978131324e-06, "loss": 0.6216, "step": 3420 }, { "epoch": 0.28973110311242856, "grad_norm": 1.2467226525633182, "learning_rate": 8.333266730907663e-06, "loss": 0.6725, "step": 3421 }, { "epoch": 0.2898157950455219, "grad_norm": 1.8124821609140278, "learning_rate": 8.33224423283803e-06, "loss": 0.6448, "step": 3422 }, { "epoch": 0.2899004869786153, "grad_norm": 1.1859575452363258, "learning_rate": 8.331221483999374e-06, "loss": 0.615, "step": 3423 }, { "epoch": 0.28998517891170866, "grad_norm": 1.544409618212695, "learning_rate": 8.330198484468661e-06, "loss": 0.6661, "step": 3424 }, { "epoch": 0.290069870844802, "grad_norm": 0.6126236874121938, "learning_rate": 8.32917523432288e-06, "loss": 0.8159, "step": 3425 }, { "epoch": 0.2901545627778954, "grad_norm": 2.5860454057032536, "learning_rate": 8.328151733639032e-06, "loss": 0.6596, "step": 3426 }, { "epoch": 0.29023925471098877, "grad_norm": 1.1953364642235644, "learning_rate": 8.327127982494142e-06, "loss": 0.6492, "step": 3427 }, { "epoch": 0.2903239466440822, "grad_norm": 1.486007764329726, "learning_rate": 8.326103980965253e-06, "loss": 0.6819, "step": 3428 }, { "epoch": 0.2904086385771755, "grad_norm": 0.5975540850911566, "learning_rate": 8.325079729129429e-06, "loss": 0.8495, "step": 3429 }, { "epoch": 0.2904933305102689, "grad_norm": 1.592679152426702, "learning_rate": 8.324055227063747e-06, "loss": 0.6475, "step": 3430 }, { "epoch": 0.2905780224433623, "grad_norm": 1.0581002808846034, "learning_rate": 8.323030474845307e-06, "loss": 0.6818, "step": 3431 }, { "epoch": 0.29066271437645563, "grad_norm": 1.3111385343216495, "learning_rate": 8.322005472551227e-06, "loss": 0.6147, "step": 3432 }, { "epoch": 0.29074740630954904, "grad_norm": 2.371797892189131, "learning_rate": 8.320980220258645e-06, "loss": 0.6891, "step": 3433 }, { "epoch": 0.2908320982426424, "grad_norm": 1.3313412635073048, "learning_rate": 8.319954718044718e-06, "loss": 0.6123, "step": 3434 }, { "epoch": 0.29091679017573574, "grad_norm": 1.2308169177350308, "learning_rate": 8.318928965986616e-06, "loss": 0.6958, "step": 3435 }, { "epoch": 0.29100148210882915, "grad_norm": 1.671572079887509, "learning_rate": 8.317902964161535e-06, "loss": 0.6995, "step": 3436 }, { "epoch": 0.2910861740419225, "grad_norm": 1.103943510316883, "learning_rate": 8.316876712646689e-06, "loss": 0.6252, "step": 3437 }, { "epoch": 0.2911708659750159, "grad_norm": 1.3302776821733342, "learning_rate": 8.315850211519303e-06, "loss": 0.639, "step": 3438 }, { "epoch": 0.29125555790810925, "grad_norm": 2.320429537485119, "learning_rate": 8.314823460856634e-06, "loss": 0.639, "step": 3439 }, { "epoch": 0.2913402498412026, "grad_norm": 1.2659232615086236, "learning_rate": 8.313796460735944e-06, "loss": 0.6299, "step": 3440 }, { "epoch": 0.291424941774296, "grad_norm": 1.3790295189103652, "learning_rate": 8.312769211234524e-06, "loss": 0.6288, "step": 3441 }, { "epoch": 0.29150963370738936, "grad_norm": 1.5496810107205912, "learning_rate": 8.311741712429679e-06, "loss": 0.6245, "step": 3442 }, { "epoch": 0.29159432564048277, "grad_norm": 1.4060239999404083, "learning_rate": 8.310713964398734e-06, "loss": 0.6456, "step": 3443 }, { "epoch": 0.2916790175735761, "grad_norm": 2.1932903626096385, "learning_rate": 8.309685967219032e-06, "loss": 0.685, "step": 3444 }, { "epoch": 0.29176370950666947, "grad_norm": 1.4184012962368926, "learning_rate": 8.308657720967934e-06, "loss": 0.6454, "step": 3445 }, { "epoch": 0.2918484014397629, "grad_norm": 1.1385288784464012, "learning_rate": 8.307629225722824e-06, "loss": 0.6431, "step": 3446 }, { "epoch": 0.2919330933728562, "grad_norm": 1.4081588491409194, "learning_rate": 8.3066004815611e-06, "loss": 0.6353, "step": 3447 }, { "epoch": 0.29201778530594963, "grad_norm": 1.2184892564961647, "learning_rate": 8.30557148856018e-06, "loss": 0.6431, "step": 3448 }, { "epoch": 0.292102477239043, "grad_norm": 1.3008117529905436, "learning_rate": 8.304542246797501e-06, "loss": 0.6244, "step": 3449 }, { "epoch": 0.29218716917213633, "grad_norm": 1.4105481946148146, "learning_rate": 8.30351275635052e-06, "loss": 0.6353, "step": 3450 }, { "epoch": 0.29227186110522974, "grad_norm": 1.211796048118576, "learning_rate": 8.302483017296712e-06, "loss": 0.661, "step": 3451 }, { "epoch": 0.2923565530383231, "grad_norm": 1.3176999099900604, "learning_rate": 8.301453029713568e-06, "loss": 0.6433, "step": 3452 }, { "epoch": 0.2924412449714165, "grad_norm": 1.5750792777663796, "learning_rate": 8.300422793678602e-06, "loss": 0.6288, "step": 3453 }, { "epoch": 0.29252593690450984, "grad_norm": 1.980316928159941, "learning_rate": 8.299392309269346e-06, "loss": 0.6527, "step": 3454 }, { "epoch": 0.2926106288376032, "grad_norm": 1.2333344420656478, "learning_rate": 8.298361576563345e-06, "loss": 0.6686, "step": 3455 }, { "epoch": 0.2926953207706966, "grad_norm": 1.1853100857472334, "learning_rate": 8.297330595638171e-06, "loss": 0.6581, "step": 3456 }, { "epoch": 0.29278001270378995, "grad_norm": 0.6520938648544271, "learning_rate": 8.29629936657141e-06, "loss": 0.9134, "step": 3457 }, { "epoch": 0.29286470463688336, "grad_norm": 1.358164018225983, "learning_rate": 8.295267889440666e-06, "loss": 0.6948, "step": 3458 }, { "epoch": 0.2929493965699767, "grad_norm": 1.1234498152243644, "learning_rate": 8.294236164323563e-06, "loss": 0.5845, "step": 3459 }, { "epoch": 0.29303408850307006, "grad_norm": 1.2181836746763735, "learning_rate": 8.293204191297747e-06, "loss": 0.6349, "step": 3460 }, { "epoch": 0.29311878043616346, "grad_norm": 1.2439450917501216, "learning_rate": 8.292171970440876e-06, "loss": 0.6716, "step": 3461 }, { "epoch": 0.2932034723692568, "grad_norm": 1.4324028674480214, "learning_rate": 8.291139501830632e-06, "loss": 0.6267, "step": 3462 }, { "epoch": 0.2932881643023502, "grad_norm": 0.6339419626657466, "learning_rate": 8.290106785544713e-06, "loss": 0.9109, "step": 3463 }, { "epoch": 0.29337285623544357, "grad_norm": 1.6803105342398823, "learning_rate": 8.289073821660835e-06, "loss": 0.6911, "step": 3464 }, { "epoch": 0.2934575481685369, "grad_norm": 1.328174180952633, "learning_rate": 8.288040610256737e-06, "loss": 0.6264, "step": 3465 }, { "epoch": 0.2935422401016303, "grad_norm": 1.355265495772845, "learning_rate": 8.287007151410171e-06, "loss": 0.6143, "step": 3466 }, { "epoch": 0.2936269320347237, "grad_norm": 1.4347579667997845, "learning_rate": 8.285973445198914e-06, "loss": 0.605, "step": 3467 }, { "epoch": 0.2937116239678171, "grad_norm": 1.5873335662324342, "learning_rate": 8.284939491700751e-06, "loss": 0.6625, "step": 3468 }, { "epoch": 0.29379631590091043, "grad_norm": 1.4756684721615052, "learning_rate": 8.283905290993501e-06, "loss": 0.5932, "step": 3469 }, { "epoch": 0.29388100783400384, "grad_norm": 0.7073648126784772, "learning_rate": 8.282870843154986e-06, "loss": 0.8393, "step": 3470 }, { "epoch": 0.2939656997670972, "grad_norm": 1.2931572589461142, "learning_rate": 8.281836148263057e-06, "loss": 0.6063, "step": 3471 }, { "epoch": 0.29405039170019054, "grad_norm": 1.1453536386937035, "learning_rate": 8.28080120639558e-06, "loss": 0.6116, "step": 3472 }, { "epoch": 0.29413508363328394, "grad_norm": 1.2167990891538734, "learning_rate": 8.279766017630439e-06, "loss": 0.6187, "step": 3473 }, { "epoch": 0.2942197755663773, "grad_norm": 1.295904067173421, "learning_rate": 8.278730582045538e-06, "loss": 0.6573, "step": 3474 }, { "epoch": 0.2943044674994707, "grad_norm": 1.3857120664810092, "learning_rate": 8.2776948997188e-06, "loss": 0.6957, "step": 3475 }, { "epoch": 0.29438915943256405, "grad_norm": 1.1747197154976186, "learning_rate": 8.276658970728164e-06, "loss": 0.6627, "step": 3476 }, { "epoch": 0.2944738513656574, "grad_norm": 1.1298894353705857, "learning_rate": 8.27562279515159e-06, "loss": 0.6238, "step": 3477 }, { "epoch": 0.2945585432987508, "grad_norm": 1.3216473777610813, "learning_rate": 8.274586373067054e-06, "loss": 0.6232, "step": 3478 }, { "epoch": 0.29464323523184416, "grad_norm": 1.2776450102344314, "learning_rate": 8.273549704552555e-06, "loss": 0.5853, "step": 3479 }, { "epoch": 0.29472792716493756, "grad_norm": 2.789920952762725, "learning_rate": 8.272512789686107e-06, "loss": 0.6774, "step": 3480 }, { "epoch": 0.2948126190980309, "grad_norm": 1.5146346791262086, "learning_rate": 8.271475628545742e-06, "loss": 0.6731, "step": 3481 }, { "epoch": 0.29489731103112427, "grad_norm": 1.585249775250217, "learning_rate": 8.270438221209512e-06, "loss": 0.639, "step": 3482 }, { "epoch": 0.29498200296421767, "grad_norm": 2.19791540977771, "learning_rate": 8.269400567755489e-06, "loss": 0.6569, "step": 3483 }, { "epoch": 0.295066694897311, "grad_norm": 1.712450925197308, "learning_rate": 8.26836266826176e-06, "loss": 0.6645, "step": 3484 }, { "epoch": 0.2951513868304044, "grad_norm": 1.8368047116249209, "learning_rate": 8.267324522806435e-06, "loss": 0.6848, "step": 3485 }, { "epoch": 0.2952360787634978, "grad_norm": 1.793699342103958, "learning_rate": 8.266286131467637e-06, "loss": 0.6667, "step": 3486 }, { "epoch": 0.29532077069659113, "grad_norm": 1.4358016532258373, "learning_rate": 8.265247494323512e-06, "loss": 0.5918, "step": 3487 }, { "epoch": 0.29540546262968453, "grad_norm": 1.6979431800030218, "learning_rate": 8.264208611452222e-06, "loss": 0.6537, "step": 3488 }, { "epoch": 0.2954901545627779, "grad_norm": 1.4314250833443394, "learning_rate": 8.263169482931951e-06, "loss": 0.7226, "step": 3489 }, { "epoch": 0.2955748464958713, "grad_norm": 1.3832724065286648, "learning_rate": 8.262130108840897e-06, "loss": 0.6741, "step": 3490 }, { "epoch": 0.29565953842896464, "grad_norm": 1.6055188600465375, "learning_rate": 8.261090489257278e-06, "loss": 0.6326, "step": 3491 }, { "epoch": 0.295744230362058, "grad_norm": 1.2913614707589613, "learning_rate": 8.26005062425933e-06, "loss": 0.6239, "step": 3492 }, { "epoch": 0.2958289222951514, "grad_norm": 1.4849233240127149, "learning_rate": 8.259010513925312e-06, "loss": 0.6057, "step": 3493 }, { "epoch": 0.29591361422824475, "grad_norm": 0.6975911224096069, "learning_rate": 8.257970158333495e-06, "loss": 0.8821, "step": 3494 }, { "epoch": 0.29599830616133815, "grad_norm": 1.579633528219218, "learning_rate": 8.25692955756217e-06, "loss": 0.663, "step": 3495 }, { "epoch": 0.2960829980944315, "grad_norm": 1.18669891718553, "learning_rate": 8.255888711689653e-06, "loss": 0.69, "step": 3496 }, { "epoch": 0.29616769002752485, "grad_norm": 1.5223309823677698, "learning_rate": 8.254847620794267e-06, "loss": 0.6454, "step": 3497 }, { "epoch": 0.29625238196061826, "grad_norm": 1.50167893693787, "learning_rate": 8.253806284954362e-06, "loss": 0.626, "step": 3498 }, { "epoch": 0.2963370738937116, "grad_norm": 1.283125085432907, "learning_rate": 8.252764704248305e-06, "loss": 0.7183, "step": 3499 }, { "epoch": 0.296421765826805, "grad_norm": 1.2932274668322898, "learning_rate": 8.25172287875448e-06, "loss": 0.6295, "step": 3500 }, { "epoch": 0.29650645775989837, "grad_norm": 0.6429260280994487, "learning_rate": 8.250680808551291e-06, "loss": 0.886, "step": 3501 }, { "epoch": 0.2965911496929917, "grad_norm": 1.453660058090444, "learning_rate": 8.249638493717155e-06, "loss": 0.688, "step": 3502 }, { "epoch": 0.2966758416260851, "grad_norm": 1.1935195728016381, "learning_rate": 8.248595934330516e-06, "loss": 0.6559, "step": 3503 }, { "epoch": 0.2967605335591785, "grad_norm": 1.4284863207721916, "learning_rate": 8.247553130469832e-06, "loss": 0.649, "step": 3504 }, { "epoch": 0.2968452254922719, "grad_norm": 1.2507651118612975, "learning_rate": 8.246510082213577e-06, "loss": 0.6546, "step": 3505 }, { "epoch": 0.29692991742536523, "grad_norm": 1.445453184953244, "learning_rate": 8.245466789640249e-06, "loss": 0.6513, "step": 3506 }, { "epoch": 0.2970146093584586, "grad_norm": 1.3653720773924103, "learning_rate": 8.244423252828357e-06, "loss": 0.632, "step": 3507 }, { "epoch": 0.297099301291552, "grad_norm": 2.016679964763299, "learning_rate": 8.243379471856436e-06, "loss": 0.6089, "step": 3508 }, { "epoch": 0.29718399322464534, "grad_norm": 2.7291075196207046, "learning_rate": 8.242335446803035e-06, "loss": 0.6648, "step": 3509 }, { "epoch": 0.29726868515773874, "grad_norm": 1.4120495274326268, "learning_rate": 8.241291177746724e-06, "loss": 0.6359, "step": 3510 }, { "epoch": 0.2973533770908321, "grad_norm": 1.1973315681634613, "learning_rate": 8.240246664766089e-06, "loss": 0.6101, "step": 3511 }, { "epoch": 0.29743806902392544, "grad_norm": 1.231218411709328, "learning_rate": 8.239201907939734e-06, "loss": 0.5951, "step": 3512 }, { "epoch": 0.29752276095701885, "grad_norm": 1.2660626337013934, "learning_rate": 8.23815690734628e-06, "loss": 0.6715, "step": 3513 }, { "epoch": 0.2976074528901122, "grad_norm": 1.3600342972766408, "learning_rate": 8.237111663064374e-06, "loss": 0.6769, "step": 3514 }, { "epoch": 0.2976921448232056, "grad_norm": 1.2150340745946853, "learning_rate": 8.236066175172676e-06, "loss": 0.6438, "step": 3515 }, { "epoch": 0.29777683675629896, "grad_norm": 0.6377749761487883, "learning_rate": 8.23502044374986e-06, "loss": 0.8876, "step": 3516 }, { "epoch": 0.29786152868939236, "grad_norm": 1.1354547295233537, "learning_rate": 8.233974468874625e-06, "loss": 0.6726, "step": 3517 }, { "epoch": 0.2979462206224857, "grad_norm": 1.3517636252535044, "learning_rate": 8.232928250625689e-06, "loss": 0.6428, "step": 3518 }, { "epoch": 0.29803091255557906, "grad_norm": 1.5315250011248736, "learning_rate": 8.231881789081782e-06, "loss": 0.7214, "step": 3519 }, { "epoch": 0.29811560448867247, "grad_norm": 2.5825452525574675, "learning_rate": 8.230835084321656e-06, "loss": 0.6895, "step": 3520 }, { "epoch": 0.2982002964217658, "grad_norm": 1.359658805251811, "learning_rate": 8.229788136424081e-06, "loss": 0.6215, "step": 3521 }, { "epoch": 0.2982849883548592, "grad_norm": 1.4280724275218766, "learning_rate": 8.228740945467848e-06, "loss": 0.6722, "step": 3522 }, { "epoch": 0.2983696802879526, "grad_norm": 1.967564932482362, "learning_rate": 8.227693511531762e-06, "loss": 0.6728, "step": 3523 }, { "epoch": 0.2984543722210459, "grad_norm": 1.5134099574328819, "learning_rate": 8.226645834694647e-06, "loss": 0.6647, "step": 3524 }, { "epoch": 0.29853906415413933, "grad_norm": 0.703087412409808, "learning_rate": 8.225597915035346e-06, "loss": 0.8661, "step": 3525 }, { "epoch": 0.2986237560872327, "grad_norm": 1.193761802216216, "learning_rate": 8.224549752632724e-06, "loss": 0.6281, "step": 3526 }, { "epoch": 0.2987084480203261, "grad_norm": 0.6161458799883677, "learning_rate": 8.223501347565656e-06, "loss": 0.8664, "step": 3527 }, { "epoch": 0.29879313995341944, "grad_norm": 2.046476590557862, "learning_rate": 8.222452699913043e-06, "loss": 0.6732, "step": 3528 }, { "epoch": 0.2988778318865128, "grad_norm": 1.1481536141692865, "learning_rate": 8.221403809753801e-06, "loss": 0.6531, "step": 3529 }, { "epoch": 0.2989625238196062, "grad_norm": 1.3280348928866936, "learning_rate": 8.220354677166864e-06, "loss": 0.6722, "step": 3530 }, { "epoch": 0.29904721575269955, "grad_norm": 1.3976501374399386, "learning_rate": 8.219305302231186e-06, "loss": 0.6569, "step": 3531 }, { "epoch": 0.29913190768579295, "grad_norm": 1.5176307951264976, "learning_rate": 8.218255685025735e-06, "loss": 0.6627, "step": 3532 }, { "epoch": 0.2992165996188863, "grad_norm": 1.2144945204622428, "learning_rate": 8.217205825629504e-06, "loss": 0.6217, "step": 3533 }, { "epoch": 0.29930129155197965, "grad_norm": 1.3390980308441063, "learning_rate": 8.216155724121497e-06, "loss": 0.6556, "step": 3534 }, { "epoch": 0.29938598348507306, "grad_norm": 1.194283078420777, "learning_rate": 8.215105380580744e-06, "loss": 0.6223, "step": 3535 }, { "epoch": 0.2994706754181664, "grad_norm": 1.4852035778684551, "learning_rate": 8.214054795086284e-06, "loss": 0.629, "step": 3536 }, { "epoch": 0.2995553673512598, "grad_norm": 0.5982203211743927, "learning_rate": 8.21300396771718e-06, "loss": 0.8289, "step": 3537 }, { "epoch": 0.29964005928435317, "grad_norm": 1.1837498721093316, "learning_rate": 8.211952898552517e-06, "loss": 0.6441, "step": 3538 }, { "epoch": 0.2997247512174465, "grad_norm": 1.2724932313716333, "learning_rate": 8.21090158767139e-06, "loss": 0.6919, "step": 3539 }, { "epoch": 0.2998094431505399, "grad_norm": 1.4848688443462914, "learning_rate": 8.209850035152915e-06, "loss": 0.628, "step": 3540 }, { "epoch": 0.2998941350836333, "grad_norm": 1.4936930560318333, "learning_rate": 8.208798241076229e-06, "loss": 0.6688, "step": 3541 }, { "epoch": 0.2999788270167267, "grad_norm": 1.1485319843764157, "learning_rate": 8.207746205520481e-06, "loss": 0.6405, "step": 3542 }, { "epoch": 0.30006351894982003, "grad_norm": 2.8023650832611455, "learning_rate": 8.206693928564849e-06, "loss": 0.604, "step": 3543 }, { "epoch": 0.3001482108829134, "grad_norm": 1.143568988930461, "learning_rate": 8.205641410288516e-06, "loss": 0.6747, "step": 3544 }, { "epoch": 0.3002329028160068, "grad_norm": 1.1499355664561446, "learning_rate": 8.204588650770694e-06, "loss": 0.6563, "step": 3545 }, { "epoch": 0.30031759474910014, "grad_norm": 0.6956531009157904, "learning_rate": 8.203535650090605e-06, "loss": 0.8136, "step": 3546 }, { "epoch": 0.30040228668219354, "grad_norm": 1.1766275042266172, "learning_rate": 8.202482408327496e-06, "loss": 0.6663, "step": 3547 }, { "epoch": 0.3004869786152869, "grad_norm": 1.3855916253020002, "learning_rate": 8.201428925560629e-06, "loss": 0.6622, "step": 3548 }, { "epoch": 0.30057167054838024, "grad_norm": 1.234242032541504, "learning_rate": 8.20037520186928e-06, "loss": 0.678, "step": 3549 }, { "epoch": 0.30065636248147365, "grad_norm": 1.0156181761908998, "learning_rate": 8.199321237332752e-06, "loss": 0.6646, "step": 3550 }, { "epoch": 0.300741054414567, "grad_norm": 1.2927548628696688, "learning_rate": 8.19826703203036e-06, "loss": 0.6602, "step": 3551 }, { "epoch": 0.3008257463476604, "grad_norm": 1.2576614718233021, "learning_rate": 8.197212586041438e-06, "loss": 0.6815, "step": 3552 }, { "epoch": 0.30091043828075376, "grad_norm": 1.4693754313364529, "learning_rate": 8.196157899445339e-06, "loss": 0.6755, "step": 3553 }, { "epoch": 0.3009951302138471, "grad_norm": 1.1202893224195667, "learning_rate": 8.195102972321432e-06, "loss": 0.5833, "step": 3554 }, { "epoch": 0.3010798221469405, "grad_norm": 1.9028971482867034, "learning_rate": 8.194047804749108e-06, "loss": 0.7012, "step": 3555 }, { "epoch": 0.30116451408003386, "grad_norm": 1.236182162075714, "learning_rate": 8.192992396807776e-06, "loss": 0.7074, "step": 3556 }, { "epoch": 0.30124920601312727, "grad_norm": 1.2212037029528096, "learning_rate": 8.191936748576857e-06, "loss": 0.6184, "step": 3557 }, { "epoch": 0.3013338979462206, "grad_norm": 0.6438349495154907, "learning_rate": 8.190880860135793e-06, "loss": 0.8952, "step": 3558 }, { "epoch": 0.30141858987931397, "grad_norm": 1.0819455181790847, "learning_rate": 8.189824731564052e-06, "loss": 0.5981, "step": 3559 }, { "epoch": 0.3015032818124074, "grad_norm": 1.2456628382371067, "learning_rate": 8.188768362941107e-06, "loss": 0.6676, "step": 3560 }, { "epoch": 0.3015879737455007, "grad_norm": 1.3649891710750572, "learning_rate": 8.187711754346456e-06, "loss": 0.6761, "step": 3561 }, { "epoch": 0.30167266567859413, "grad_norm": 1.1636937575661372, "learning_rate": 8.186654905859617e-06, "loss": 0.6491, "step": 3562 }, { "epoch": 0.3017573576116875, "grad_norm": 1.309256568270864, "learning_rate": 8.185597817560123e-06, "loss": 0.6643, "step": 3563 }, { "epoch": 0.30184204954478083, "grad_norm": 1.4166584552259414, "learning_rate": 8.184540489527524e-06, "loss": 0.6633, "step": 3564 }, { "epoch": 0.30192674147787424, "grad_norm": 1.472587665670216, "learning_rate": 8.18348292184139e-06, "loss": 0.6216, "step": 3565 }, { "epoch": 0.3020114334109676, "grad_norm": 1.5960424458516056, "learning_rate": 8.18242511458131e-06, "loss": 0.7112, "step": 3566 }, { "epoch": 0.302096125344061, "grad_norm": 1.30710783984053, "learning_rate": 8.181367067826886e-06, "loss": 0.6543, "step": 3567 }, { "epoch": 0.30218081727715435, "grad_norm": 1.4136193794642464, "learning_rate": 8.180308781657745e-06, "loss": 0.6999, "step": 3568 }, { "epoch": 0.30226550921024775, "grad_norm": 1.4501768474956638, "learning_rate": 8.179250256153529e-06, "loss": 0.6521, "step": 3569 }, { "epoch": 0.3023502011433411, "grad_norm": 1.2562401486677168, "learning_rate": 8.178191491393894e-06, "loss": 0.613, "step": 3570 }, { "epoch": 0.30243489307643445, "grad_norm": 1.2542778873152194, "learning_rate": 8.17713248745852e-06, "loss": 0.6636, "step": 3571 }, { "epoch": 0.30251958500952786, "grad_norm": 0.6397441475389262, "learning_rate": 8.176073244427106e-06, "loss": 0.8409, "step": 3572 }, { "epoch": 0.3026042769426212, "grad_norm": 1.3388095780181624, "learning_rate": 8.175013762379361e-06, "loss": 0.6856, "step": 3573 }, { "epoch": 0.3026889688757146, "grad_norm": 1.1466941011480636, "learning_rate": 8.173954041395016e-06, "loss": 0.6176, "step": 3574 }, { "epoch": 0.30277366080880797, "grad_norm": 1.411186165088756, "learning_rate": 8.172894081553822e-06, "loss": 0.6265, "step": 3575 }, { "epoch": 0.3028583527419013, "grad_norm": 1.105303579982559, "learning_rate": 8.171833882935549e-06, "loss": 0.6515, "step": 3576 }, { "epoch": 0.3029430446749947, "grad_norm": 1.2379819122687983, "learning_rate": 8.17077344561998e-06, "loss": 0.6154, "step": 3577 }, { "epoch": 0.30302773660808807, "grad_norm": 1.3824602209003916, "learning_rate": 8.16971276968692e-06, "loss": 0.6549, "step": 3578 }, { "epoch": 0.3031124285411815, "grad_norm": 1.433054811989319, "learning_rate": 8.168651855216188e-06, "loss": 0.5734, "step": 3579 }, { "epoch": 0.30319712047427483, "grad_norm": 1.3352442666393154, "learning_rate": 8.167590702287626e-06, "loss": 0.6515, "step": 3580 }, { "epoch": 0.3032818124073682, "grad_norm": 1.4031285043268413, "learning_rate": 8.166529310981092e-06, "loss": 0.6544, "step": 3581 }, { "epoch": 0.3033665043404616, "grad_norm": 1.3164063946396387, "learning_rate": 8.165467681376457e-06, "loss": 0.6404, "step": 3582 }, { "epoch": 0.30345119627355494, "grad_norm": 0.6029649972298042, "learning_rate": 8.164405813553619e-06, "loss": 0.8155, "step": 3583 }, { "epoch": 0.30353588820664834, "grad_norm": 1.4864334125331833, "learning_rate": 8.163343707592486e-06, "loss": 0.6494, "step": 3584 }, { "epoch": 0.3036205801397417, "grad_norm": 1.2062575107527418, "learning_rate": 8.16228136357299e-06, "loss": 0.6832, "step": 3585 }, { "epoch": 0.30370527207283504, "grad_norm": 1.3819426415748846, "learning_rate": 8.161218781575076e-06, "loss": 0.6306, "step": 3586 }, { "epoch": 0.30378996400592845, "grad_norm": 1.2561582343759305, "learning_rate": 8.160155961678708e-06, "loss": 0.6188, "step": 3587 }, { "epoch": 0.3038746559390218, "grad_norm": 1.7072391344043572, "learning_rate": 8.15909290396387e-06, "loss": 0.7117, "step": 3588 }, { "epoch": 0.3039593478721152, "grad_norm": 1.3915186254147318, "learning_rate": 8.158029608510563e-06, "loss": 0.6842, "step": 3589 }, { "epoch": 0.30404403980520855, "grad_norm": 1.3935924475594543, "learning_rate": 8.156966075398808e-06, "loss": 0.5661, "step": 3590 }, { "epoch": 0.3041287317383019, "grad_norm": 1.1708765117708744, "learning_rate": 8.155902304708634e-06, "loss": 0.6612, "step": 3591 }, { "epoch": 0.3042134236713953, "grad_norm": 1.4293942286259838, "learning_rate": 8.154838296520103e-06, "loss": 0.677, "step": 3592 }, { "epoch": 0.30429811560448866, "grad_norm": 1.303944822564843, "learning_rate": 8.153774050913286e-06, "loss": 0.6654, "step": 3593 }, { "epoch": 0.30438280753758207, "grad_norm": 0.6485704687389505, "learning_rate": 8.152709567968268e-06, "loss": 0.813, "step": 3594 }, { "epoch": 0.3044674994706754, "grad_norm": 1.276149646644372, "learning_rate": 8.151644847765164e-06, "loss": 0.618, "step": 3595 }, { "epoch": 0.30455219140376877, "grad_norm": 1.3432979567206405, "learning_rate": 8.150579890384096e-06, "loss": 0.6857, "step": 3596 }, { "epoch": 0.3046368833368622, "grad_norm": 1.64445068139433, "learning_rate": 8.149514695905206e-06, "loss": 0.6758, "step": 3597 }, { "epoch": 0.3047215752699555, "grad_norm": 1.5250002665139624, "learning_rate": 8.14844926440866e-06, "loss": 0.6278, "step": 3598 }, { "epoch": 0.30480626720304893, "grad_norm": 1.8324960251229538, "learning_rate": 8.147383595974634e-06, "loss": 0.644, "step": 3599 }, { "epoch": 0.3048909591361423, "grad_norm": 1.3471274387803835, "learning_rate": 8.146317690683325e-06, "loss": 0.6861, "step": 3600 }, { "epoch": 0.30497565106923563, "grad_norm": 1.725741713498878, "learning_rate": 8.145251548614952e-06, "loss": 0.6569, "step": 3601 }, { "epoch": 0.30506034300232904, "grad_norm": 1.7494691865302416, "learning_rate": 8.144185169849743e-06, "loss": 0.6725, "step": 3602 }, { "epoch": 0.3051450349354224, "grad_norm": 1.2213009847233773, "learning_rate": 8.14311855446795e-06, "loss": 0.6639, "step": 3603 }, { "epoch": 0.3052297268685158, "grad_norm": 1.6588714340556439, "learning_rate": 8.142051702549844e-06, "loss": 0.6566, "step": 3604 }, { "epoch": 0.30531441880160914, "grad_norm": 0.6958865071491754, "learning_rate": 8.14098461417571e-06, "loss": 0.895, "step": 3605 }, { "epoch": 0.3053991107347025, "grad_norm": 1.1363416479054211, "learning_rate": 8.13991728942585e-06, "loss": 0.6466, "step": 3606 }, { "epoch": 0.3054838026677959, "grad_norm": 1.1710683171555358, "learning_rate": 8.138849728380587e-06, "loss": 0.6376, "step": 3607 }, { "epoch": 0.30556849460088925, "grad_norm": 2.543533472817934, "learning_rate": 8.137781931120261e-06, "loss": 0.6623, "step": 3608 }, { "epoch": 0.30565318653398266, "grad_norm": 2.538989434234035, "learning_rate": 8.13671389772523e-06, "loss": 0.6291, "step": 3609 }, { "epoch": 0.305737878467076, "grad_norm": 1.4288528208446611, "learning_rate": 8.135645628275867e-06, "loss": 0.6416, "step": 3610 }, { "epoch": 0.30582257040016936, "grad_norm": 0.5893815867216792, "learning_rate": 8.13457712285257e-06, "loss": 0.8684, "step": 3611 }, { "epoch": 0.30590726233326276, "grad_norm": 1.9218909351509146, "learning_rate": 8.133508381535743e-06, "loss": 0.614, "step": 3612 }, { "epoch": 0.3059919542663561, "grad_norm": 3.650485451826175, "learning_rate": 8.132439404405818e-06, "loss": 0.6838, "step": 3613 }, { "epoch": 0.3060766461994495, "grad_norm": 1.4561721063943007, "learning_rate": 8.131370191543243e-06, "loss": 0.6615, "step": 3614 }, { "epoch": 0.30616133813254287, "grad_norm": 1.8588508056730413, "learning_rate": 8.130300743028476e-06, "loss": 0.6369, "step": 3615 }, { "epoch": 0.3062460300656362, "grad_norm": 1.2824668161768844, "learning_rate": 8.129231058942004e-06, "loss": 0.6346, "step": 3616 }, { "epoch": 0.3063307219987296, "grad_norm": 1.49678064031139, "learning_rate": 8.128161139364326e-06, "loss": 0.6628, "step": 3617 }, { "epoch": 0.306415413931823, "grad_norm": 1.3664907237029815, "learning_rate": 8.127090984375958e-06, "loss": 0.6122, "step": 3618 }, { "epoch": 0.3065001058649164, "grad_norm": 1.2380558789304763, "learning_rate": 8.126020594057433e-06, "loss": 0.6663, "step": 3619 }, { "epoch": 0.30658479779800973, "grad_norm": 2.490290951165026, "learning_rate": 8.124949968489306e-06, "loss": 0.6602, "step": 3620 }, { "epoch": 0.30666948973110314, "grad_norm": 2.331357651449627, "learning_rate": 8.123879107752147e-06, "loss": 0.6695, "step": 3621 }, { "epoch": 0.3067541816641965, "grad_norm": 2.652906555389204, "learning_rate": 8.122808011926542e-06, "loss": 0.6762, "step": 3622 }, { "epoch": 0.30683887359728984, "grad_norm": 1.1934000922038994, "learning_rate": 8.1217366810931e-06, "loss": 0.6444, "step": 3623 }, { "epoch": 0.30692356553038325, "grad_norm": 1.147437092636035, "learning_rate": 8.12066511533244e-06, "loss": 0.5908, "step": 3624 }, { "epoch": 0.3070082574634766, "grad_norm": 1.4342690626858354, "learning_rate": 8.119593314725207e-06, "loss": 0.6515, "step": 3625 }, { "epoch": 0.30709294939657, "grad_norm": 1.5294481980257117, "learning_rate": 8.118521279352057e-06, "loss": 0.6505, "step": 3626 }, { "epoch": 0.30717764132966335, "grad_norm": 1.2985738959891453, "learning_rate": 8.117449009293668e-06, "loss": 0.6441, "step": 3627 }, { "epoch": 0.3072623332627567, "grad_norm": 2.457336580299414, "learning_rate": 8.116376504630734e-06, "loss": 0.6264, "step": 3628 }, { "epoch": 0.3073470251958501, "grad_norm": 1.3995517336135586, "learning_rate": 8.115303765443966e-06, "loss": 0.6288, "step": 3629 }, { "epoch": 0.30743171712894346, "grad_norm": 1.3168706480909664, "learning_rate": 8.114230791814093e-06, "loss": 0.6487, "step": 3630 }, { "epoch": 0.30751640906203687, "grad_norm": 1.3695572399150262, "learning_rate": 8.113157583821861e-06, "loss": 0.6586, "step": 3631 }, { "epoch": 0.3076011009951302, "grad_norm": 1.3529236304356786, "learning_rate": 8.112084141548038e-06, "loss": 0.6464, "step": 3632 }, { "epoch": 0.30768579292822357, "grad_norm": 1.1563944620877769, "learning_rate": 8.1110104650734e-06, "loss": 0.6474, "step": 3633 }, { "epoch": 0.307770484861317, "grad_norm": 2.1392403829988167, "learning_rate": 8.109936554478757e-06, "loss": 0.657, "step": 3634 }, { "epoch": 0.3078551767944103, "grad_norm": 1.4854118060264165, "learning_rate": 8.108862409844917e-06, "loss": 0.6218, "step": 3635 }, { "epoch": 0.30793986872750373, "grad_norm": 1.370606375119333, "learning_rate": 8.107788031252718e-06, "loss": 0.6773, "step": 3636 }, { "epoch": 0.3080245606605971, "grad_norm": 1.2265526693288142, "learning_rate": 8.106713418783013e-06, "loss": 0.6127, "step": 3637 }, { "epoch": 0.30810925259369043, "grad_norm": 1.2459572761333695, "learning_rate": 8.105638572516674e-06, "loss": 0.6791, "step": 3638 }, { "epoch": 0.30819394452678384, "grad_norm": 3.0874288773227945, "learning_rate": 8.104563492534587e-06, "loss": 0.6957, "step": 3639 }, { "epoch": 0.3082786364598772, "grad_norm": 1.2419895019568203, "learning_rate": 8.103488178917658e-06, "loss": 0.711, "step": 3640 }, { "epoch": 0.3083633283929706, "grad_norm": 1.2459733646752733, "learning_rate": 8.102412631746808e-06, "loss": 0.6638, "step": 3641 }, { "epoch": 0.30844802032606394, "grad_norm": 1.3591586705459742, "learning_rate": 8.10133685110298e-06, "loss": 0.663, "step": 3642 }, { "epoch": 0.3085327122591573, "grad_norm": 1.4255998247447996, "learning_rate": 8.100260837067132e-06, "loss": 0.6407, "step": 3643 }, { "epoch": 0.3086174041922507, "grad_norm": 1.2878875970831978, "learning_rate": 8.09918458972024e-06, "loss": 0.6743, "step": 3644 }, { "epoch": 0.30870209612534405, "grad_norm": 1.7169578097750735, "learning_rate": 8.098108109143295e-06, "loss": 0.625, "step": 3645 }, { "epoch": 0.30878678805843746, "grad_norm": 1.3200854587146358, "learning_rate": 8.097031395417311e-06, "loss": 0.6659, "step": 3646 }, { "epoch": 0.3088714799915308, "grad_norm": 1.24071767138227, "learning_rate": 8.095954448623315e-06, "loss": 0.6504, "step": 3647 }, { "epoch": 0.30895617192462416, "grad_norm": 1.5103989146190047, "learning_rate": 8.094877268842353e-06, "loss": 0.6452, "step": 3648 }, { "epoch": 0.30904086385771756, "grad_norm": 1.3842680495202848, "learning_rate": 8.093799856155486e-06, "loss": 0.6245, "step": 3649 }, { "epoch": 0.3091255557908109, "grad_norm": 1.2674940132708175, "learning_rate": 8.0927222106438e-06, "loss": 0.6642, "step": 3650 }, { "epoch": 0.3092102477239043, "grad_norm": 1.3552633640433092, "learning_rate": 8.091644332388391e-06, "loss": 0.6433, "step": 3651 }, { "epoch": 0.30929493965699767, "grad_norm": 2.0865619857415707, "learning_rate": 8.090566221470375e-06, "loss": 0.6446, "step": 3652 }, { "epoch": 0.309379631590091, "grad_norm": 1.2893070215427545, "learning_rate": 8.089487877970884e-06, "loss": 0.6122, "step": 3653 }, { "epoch": 0.3094643235231844, "grad_norm": 0.6482312197682645, "learning_rate": 8.08840930197107e-06, "loss": 0.875, "step": 3654 }, { "epoch": 0.3095490154562778, "grad_norm": 1.294251840317957, "learning_rate": 8.087330493552104e-06, "loss": 0.591, "step": 3655 }, { "epoch": 0.3096337073893712, "grad_norm": 2.18031214956564, "learning_rate": 8.086251452795169e-06, "loss": 0.6515, "step": 3656 }, { "epoch": 0.30971839932246453, "grad_norm": 1.5516581523592303, "learning_rate": 8.08517217978147e-06, "loss": 0.6618, "step": 3657 }, { "epoch": 0.3098030912555579, "grad_norm": 1.255040757663505, "learning_rate": 8.084092674592227e-06, "loss": 0.6092, "step": 3658 }, { "epoch": 0.3098877831886513, "grad_norm": 1.2055895505971062, "learning_rate": 8.08301293730868e-06, "loss": 0.6323, "step": 3659 }, { "epoch": 0.30997247512174464, "grad_norm": 1.6347793327379088, "learning_rate": 8.081932968012085e-06, "loss": 0.659, "step": 3660 }, { "epoch": 0.31005716705483805, "grad_norm": 1.442694603960533, "learning_rate": 8.080852766783714e-06, "loss": 0.6031, "step": 3661 }, { "epoch": 0.3101418589879314, "grad_norm": 1.8408281232412977, "learning_rate": 8.079772333704859e-06, "loss": 0.6672, "step": 3662 }, { "epoch": 0.31022655092102475, "grad_norm": 1.616027744505146, "learning_rate": 8.078691668856826e-06, "loss": 0.6381, "step": 3663 }, { "epoch": 0.31031124285411815, "grad_norm": 1.6536283012625705, "learning_rate": 8.077610772320943e-06, "loss": 0.6218, "step": 3664 }, { "epoch": 0.3103959347872115, "grad_norm": 1.494028783698799, "learning_rate": 8.076529644178552e-06, "loss": 0.6638, "step": 3665 }, { "epoch": 0.3104806267203049, "grad_norm": 1.8045716497034991, "learning_rate": 8.075448284511017e-06, "loss": 0.7123, "step": 3666 }, { "epoch": 0.31056531865339826, "grad_norm": 1.2038035154893139, "learning_rate": 8.074366693399711e-06, "loss": 0.6412, "step": 3667 }, { "epoch": 0.3106500105864916, "grad_norm": 1.366576174914144, "learning_rate": 8.073284870926033e-06, "loss": 0.6555, "step": 3668 }, { "epoch": 0.310734702519585, "grad_norm": 1.6572400627916821, "learning_rate": 8.072202817171393e-06, "loss": 0.6087, "step": 3669 }, { "epoch": 0.31081939445267837, "grad_norm": 1.4950544616878876, "learning_rate": 8.071120532217224e-06, "loss": 0.6287, "step": 3670 }, { "epoch": 0.31090408638577177, "grad_norm": 0.6981083264783471, "learning_rate": 8.070038016144973e-06, "loss": 0.8511, "step": 3671 }, { "epoch": 0.3109887783188651, "grad_norm": 0.5705463812905064, "learning_rate": 8.068955269036104e-06, "loss": 0.8935, "step": 3672 }, { "epoch": 0.31107347025195853, "grad_norm": 1.4268037559108728, "learning_rate": 8.0678722909721e-06, "loss": 0.6495, "step": 3673 }, { "epoch": 0.3111581621850519, "grad_norm": 1.3077782362824306, "learning_rate": 8.06678908203446e-06, "loss": 0.6763, "step": 3674 }, { "epoch": 0.31124285411814523, "grad_norm": 1.2449637655210266, "learning_rate": 8.065705642304704e-06, "loss": 0.624, "step": 3675 }, { "epoch": 0.31132754605123864, "grad_norm": 1.3173748890460515, "learning_rate": 8.064621971864367e-06, "loss": 0.6213, "step": 3676 }, { "epoch": 0.311412237984332, "grad_norm": 1.3080015527848903, "learning_rate": 8.063538070794994e-06, "loss": 0.6662, "step": 3677 }, { "epoch": 0.3114969299174254, "grad_norm": 1.7265425499901574, "learning_rate": 8.062453939178161e-06, "loss": 0.6177, "step": 3678 }, { "epoch": 0.31158162185051874, "grad_norm": 1.3446342868962724, "learning_rate": 8.061369577095452e-06, "loss": 0.6594, "step": 3679 }, { "epoch": 0.3116663137836121, "grad_norm": 1.374697774527736, "learning_rate": 8.060284984628473e-06, "loss": 0.6911, "step": 3680 }, { "epoch": 0.3117510057167055, "grad_norm": 1.5701961890018044, "learning_rate": 8.059200161858842e-06, "loss": 0.6908, "step": 3681 }, { "epoch": 0.31183569764979885, "grad_norm": 1.4129043505443082, "learning_rate": 8.0581151088682e-06, "loss": 0.653, "step": 3682 }, { "epoch": 0.31192038958289225, "grad_norm": 1.5359597819430093, "learning_rate": 8.057029825738202e-06, "loss": 0.6621, "step": 3683 }, { "epoch": 0.3120050815159856, "grad_norm": 1.833402435702603, "learning_rate": 8.055944312550525e-06, "loss": 0.6453, "step": 3684 }, { "epoch": 0.31208977344907896, "grad_norm": 1.5593943110516615, "learning_rate": 8.054858569386855e-06, "loss": 0.6314, "step": 3685 }, { "epoch": 0.31217446538217236, "grad_norm": 1.3782431894397775, "learning_rate": 8.053772596328899e-06, "loss": 0.6245, "step": 3686 }, { "epoch": 0.3122591573152657, "grad_norm": 1.2454309199959268, "learning_rate": 8.052686393458388e-06, "loss": 0.6389, "step": 3687 }, { "epoch": 0.3123438492483591, "grad_norm": 1.1977062576456408, "learning_rate": 8.05159996085706e-06, "loss": 0.7202, "step": 3688 }, { "epoch": 0.31242854118145247, "grad_norm": 1.686451416260101, "learning_rate": 8.050513298606675e-06, "loss": 0.6204, "step": 3689 }, { "epoch": 0.3125132331145458, "grad_norm": 1.5219267764846671, "learning_rate": 8.049426406789012e-06, "loss": 0.6588, "step": 3690 }, { "epoch": 0.3125979250476392, "grad_norm": 1.2446893345336059, "learning_rate": 8.048339285485864e-06, "loss": 0.6348, "step": 3691 }, { "epoch": 0.3126826169807326, "grad_norm": 1.1832566316870676, "learning_rate": 8.047251934779043e-06, "loss": 0.6806, "step": 3692 }, { "epoch": 0.312767308913826, "grad_norm": 1.1545811456736357, "learning_rate": 8.046164354750377e-06, "loss": 0.6632, "step": 3693 }, { "epoch": 0.31285200084691933, "grad_norm": 1.251281249878352, "learning_rate": 8.045076545481713e-06, "loss": 0.6555, "step": 3694 }, { "epoch": 0.3129366927800127, "grad_norm": 1.798948710672099, "learning_rate": 8.043988507054919e-06, "loss": 0.6077, "step": 3695 }, { "epoch": 0.3130213847131061, "grad_norm": 1.2614393194825178, "learning_rate": 8.042900239551867e-06, "loss": 0.6378, "step": 3696 }, { "epoch": 0.31310607664619944, "grad_norm": 1.3165729555925043, "learning_rate": 8.041811743054459e-06, "loss": 0.7149, "step": 3697 }, { "epoch": 0.31319076857929284, "grad_norm": 1.5589419240364009, "learning_rate": 8.040723017644611e-06, "loss": 0.589, "step": 3698 }, { "epoch": 0.3132754605123862, "grad_norm": 1.1691126879871347, "learning_rate": 8.039634063404255e-06, "loss": 0.6753, "step": 3699 }, { "epoch": 0.31336015244547955, "grad_norm": 1.3464344591195148, "learning_rate": 8.03854488041534e-06, "loss": 0.6836, "step": 3700 }, { "epoch": 0.31344484437857295, "grad_norm": 1.1454544614927549, "learning_rate": 8.037455468759831e-06, "loss": 0.6462, "step": 3701 }, { "epoch": 0.3135295363116663, "grad_norm": 1.5431049486233146, "learning_rate": 8.036365828519717e-06, "loss": 0.5741, "step": 3702 }, { "epoch": 0.3136142282447597, "grad_norm": 0.6726085446914328, "learning_rate": 8.035275959776994e-06, "loss": 0.8664, "step": 3703 }, { "epoch": 0.31369892017785306, "grad_norm": 1.589516812000402, "learning_rate": 8.034185862613684e-06, "loss": 0.6661, "step": 3704 }, { "epoch": 0.3137836121109464, "grad_norm": 1.406353293694512, "learning_rate": 8.033095537111819e-06, "loss": 0.6464, "step": 3705 }, { "epoch": 0.3138683040440398, "grad_norm": 1.1989723678268178, "learning_rate": 8.032004983353457e-06, "loss": 0.7002, "step": 3706 }, { "epoch": 0.31395299597713316, "grad_norm": 1.1958660969387196, "learning_rate": 8.03091420142066e-06, "loss": 0.6312, "step": 3707 }, { "epoch": 0.31403768791022657, "grad_norm": 1.287073130772937, "learning_rate": 8.029823191395524e-06, "loss": 0.6536, "step": 3708 }, { "epoch": 0.3141223798433199, "grad_norm": 0.6407142387470087, "learning_rate": 8.028731953360147e-06, "loss": 0.876, "step": 3709 }, { "epoch": 0.31420707177641327, "grad_norm": 1.4072509040342933, "learning_rate": 8.027640487396655e-06, "loss": 0.6575, "step": 3710 }, { "epoch": 0.3142917637095067, "grad_norm": 3.386698646795799, "learning_rate": 8.02654879358718e-06, "loss": 0.6133, "step": 3711 }, { "epoch": 0.31437645564260003, "grad_norm": 1.2927096490798065, "learning_rate": 8.025456872013886e-06, "loss": 0.6066, "step": 3712 }, { "epoch": 0.31446114757569343, "grad_norm": 1.2973291179953659, "learning_rate": 8.02436472275894e-06, "loss": 0.6435, "step": 3713 }, { "epoch": 0.3145458395087868, "grad_norm": 1.4404838254319865, "learning_rate": 8.023272345904535e-06, "loss": 0.643, "step": 3714 }, { "epoch": 0.31463053144188013, "grad_norm": 1.5201313834311034, "learning_rate": 8.022179741532874e-06, "loss": 0.5919, "step": 3715 }, { "epoch": 0.31471522337497354, "grad_norm": 1.0758797627381613, "learning_rate": 8.021086909726188e-06, "loss": 0.6142, "step": 3716 }, { "epoch": 0.3147999153080669, "grad_norm": 1.2598570860584632, "learning_rate": 8.019993850566715e-06, "loss": 0.6131, "step": 3717 }, { "epoch": 0.3148846072411603, "grad_norm": 0.666081892629363, "learning_rate": 8.01890056413671e-06, "loss": 0.87, "step": 3718 }, { "epoch": 0.31496929917425365, "grad_norm": 1.6613725855527826, "learning_rate": 8.017807050518452e-06, "loss": 0.6442, "step": 3719 }, { "epoch": 0.315053991107347, "grad_norm": 1.9954775272346195, "learning_rate": 8.016713309794235e-06, "loss": 0.645, "step": 3720 }, { "epoch": 0.3151386830404404, "grad_norm": 1.2734627264504323, "learning_rate": 8.015619342046365e-06, "loss": 0.6273, "step": 3721 }, { "epoch": 0.31522337497353375, "grad_norm": 1.8316295521430916, "learning_rate": 8.014525147357174e-06, "loss": 0.6553, "step": 3722 }, { "epoch": 0.31530806690662716, "grad_norm": 4.405751948431077, "learning_rate": 8.013430725809001e-06, "loss": 0.6598, "step": 3723 }, { "epoch": 0.3153927588397205, "grad_norm": 1.9241009436971588, "learning_rate": 8.01233607748421e-06, "loss": 0.6301, "step": 3724 }, { "epoch": 0.3154774507728139, "grad_norm": 1.1063010917669795, "learning_rate": 8.011241202465177e-06, "loss": 0.6405, "step": 3725 }, { "epoch": 0.31556214270590727, "grad_norm": 1.3898499183590527, "learning_rate": 8.0101461008343e-06, "loss": 0.6411, "step": 3726 }, { "epoch": 0.3156468346390006, "grad_norm": 1.6277925217528926, "learning_rate": 8.009050772673987e-06, "loss": 0.6293, "step": 3727 }, { "epoch": 0.315731526572094, "grad_norm": 1.362724380785754, "learning_rate": 8.007955218066673e-06, "loss": 0.662, "step": 3728 }, { "epoch": 0.3158162185051874, "grad_norm": 1.7011810622553316, "learning_rate": 8.006859437094797e-06, "loss": 0.609, "step": 3729 }, { "epoch": 0.3159009104382808, "grad_norm": 1.670071459567531, "learning_rate": 8.005763429840829e-06, "loss": 0.6951, "step": 3730 }, { "epoch": 0.31598560237137413, "grad_norm": 1.45132826171074, "learning_rate": 8.004667196387246e-06, "loss": 0.5698, "step": 3731 }, { "epoch": 0.3160702943044675, "grad_norm": 1.5027573059001211, "learning_rate": 8.003570736816544e-06, "loss": 0.697, "step": 3732 }, { "epoch": 0.3161549862375609, "grad_norm": 1.3770131097022524, "learning_rate": 8.002474051211242e-06, "loss": 0.6642, "step": 3733 }, { "epoch": 0.31623967817065424, "grad_norm": 1.47937198258694, "learning_rate": 8.001377139653869e-06, "loss": 0.6498, "step": 3734 }, { "epoch": 0.31632437010374764, "grad_norm": 1.2762797543191562, "learning_rate": 8.000280002226972e-06, "loss": 0.6588, "step": 3735 }, { "epoch": 0.316409062036841, "grad_norm": 1.385215583715074, "learning_rate": 7.999182639013116e-06, "loss": 0.615, "step": 3736 }, { "epoch": 0.31649375396993434, "grad_norm": 1.428611228527957, "learning_rate": 7.998085050094888e-06, "loss": 0.6533, "step": 3737 }, { "epoch": 0.31657844590302775, "grad_norm": 1.2230388945516693, "learning_rate": 7.996987235554883e-06, "loss": 0.6521, "step": 3738 }, { "epoch": 0.3166631378361211, "grad_norm": 1.463146230602626, "learning_rate": 7.99588919547572e-06, "loss": 0.665, "step": 3739 }, { "epoch": 0.3167478297692145, "grad_norm": 1.7571307105556757, "learning_rate": 7.99479092994003e-06, "loss": 0.6868, "step": 3740 }, { "epoch": 0.31683252170230786, "grad_norm": 1.1616043062147534, "learning_rate": 7.993692439030464e-06, "loss": 0.6105, "step": 3741 }, { "epoch": 0.3169172136354012, "grad_norm": 1.3040558066644186, "learning_rate": 7.992593722829688e-06, "loss": 0.6473, "step": 3742 }, { "epoch": 0.3170019055684946, "grad_norm": 1.2694930534909559, "learning_rate": 7.991494781420393e-06, "loss": 0.6359, "step": 3743 }, { "epoch": 0.31708659750158796, "grad_norm": 1.7026055574360341, "learning_rate": 7.990395614885269e-06, "loss": 0.6191, "step": 3744 }, { "epoch": 0.31717128943468137, "grad_norm": 1.195358820579482, "learning_rate": 7.989296223307044e-06, "loss": 0.637, "step": 3745 }, { "epoch": 0.3172559813677747, "grad_norm": 1.7020889712508995, "learning_rate": 7.988196606768448e-06, "loss": 0.5919, "step": 3746 }, { "epoch": 0.31734067330086807, "grad_norm": 1.5339399250305534, "learning_rate": 7.987096765352233e-06, "loss": 0.6312, "step": 3747 }, { "epoch": 0.3174253652339615, "grad_norm": 1.5156655651849404, "learning_rate": 7.985996699141171e-06, "loss": 0.6212, "step": 3748 }, { "epoch": 0.3175100571670548, "grad_norm": 1.3946562142049954, "learning_rate": 7.984896408218045e-06, "loss": 0.6596, "step": 3749 }, { "epoch": 0.31759474910014823, "grad_norm": 1.8009789835773444, "learning_rate": 7.983795892665657e-06, "loss": 0.6166, "step": 3750 }, { "epoch": 0.3176794410332416, "grad_norm": 1.161965342075016, "learning_rate": 7.982695152566831e-06, "loss": 0.6701, "step": 3751 }, { "epoch": 0.31776413296633493, "grad_norm": 1.2303066369797075, "learning_rate": 7.981594188004397e-06, "loss": 0.6326, "step": 3752 }, { "epoch": 0.31784882489942834, "grad_norm": 1.3343794064813623, "learning_rate": 7.980492999061215e-06, "loss": 0.622, "step": 3753 }, { "epoch": 0.3179335168325217, "grad_norm": 0.5608686610440463, "learning_rate": 7.979391585820152e-06, "loss": 0.8862, "step": 3754 }, { "epoch": 0.3180182087656151, "grad_norm": 1.3181925777767205, "learning_rate": 7.978289948364094e-06, "loss": 0.6748, "step": 3755 }, { "epoch": 0.31810290069870845, "grad_norm": 1.178112289641382, "learning_rate": 7.977188086775948e-06, "loss": 0.6523, "step": 3756 }, { "epoch": 0.3181875926318018, "grad_norm": 1.2218780455767997, "learning_rate": 7.976086001138634e-06, "loss": 0.5983, "step": 3757 }, { "epoch": 0.3182722845648952, "grad_norm": 2.5394589110157386, "learning_rate": 7.974983691535089e-06, "loss": 0.6933, "step": 3758 }, { "epoch": 0.31835697649798855, "grad_norm": 1.4198386447023041, "learning_rate": 7.973881158048267e-06, "loss": 0.6337, "step": 3759 }, { "epoch": 0.31844166843108196, "grad_norm": 1.3899779321247678, "learning_rate": 7.972778400761141e-06, "loss": 0.6854, "step": 3760 }, { "epoch": 0.3185263603641753, "grad_norm": 1.6013362095898322, "learning_rate": 7.9716754197567e-06, "loss": 0.6306, "step": 3761 }, { "epoch": 0.31861105229726866, "grad_norm": 1.219906231119593, "learning_rate": 7.970572215117943e-06, "loss": 0.6117, "step": 3762 }, { "epoch": 0.31869574423036207, "grad_norm": 1.4635170053265636, "learning_rate": 7.969468786927902e-06, "loss": 0.6456, "step": 3763 }, { "epoch": 0.3187804361634554, "grad_norm": 1.394745563074508, "learning_rate": 7.968365135269609e-06, "loss": 0.6709, "step": 3764 }, { "epoch": 0.3188651280965488, "grad_norm": 1.2800552822070999, "learning_rate": 7.967261260226122e-06, "loss": 0.6811, "step": 3765 }, { "epoch": 0.3189498200296422, "grad_norm": 0.6096361592918818, "learning_rate": 7.966157161880513e-06, "loss": 0.9068, "step": 3766 }, { "epoch": 0.3190345119627355, "grad_norm": 1.3580309867638918, "learning_rate": 7.965052840315869e-06, "loss": 0.6942, "step": 3767 }, { "epoch": 0.31911920389582893, "grad_norm": 2.715449786697247, "learning_rate": 7.963948295615298e-06, "loss": 0.6295, "step": 3768 }, { "epoch": 0.3192038958289223, "grad_norm": 1.4150283213238026, "learning_rate": 7.962843527861926e-06, "loss": 0.6841, "step": 3769 }, { "epoch": 0.3192885877620157, "grad_norm": 1.2772667216556415, "learning_rate": 7.961738537138887e-06, "loss": 0.6418, "step": 3770 }, { "epoch": 0.31937327969510904, "grad_norm": 1.1225373053175858, "learning_rate": 7.960633323529342e-06, "loss": 0.6067, "step": 3771 }, { "epoch": 0.3194579716282024, "grad_norm": 1.1968772284853055, "learning_rate": 7.95952788711646e-06, "loss": 0.6799, "step": 3772 }, { "epoch": 0.3195426635612958, "grad_norm": 1.3551999048392311, "learning_rate": 7.958422227983433e-06, "loss": 0.5785, "step": 3773 }, { "epoch": 0.31962735549438914, "grad_norm": 1.3231302834440153, "learning_rate": 7.957316346213468e-06, "loss": 0.6538, "step": 3774 }, { "epoch": 0.31971204742748255, "grad_norm": 1.27904480446952, "learning_rate": 7.956210241889788e-06, "loss": 0.6437, "step": 3775 }, { "epoch": 0.3197967393605759, "grad_norm": 1.1479450209185211, "learning_rate": 7.955103915095635e-06, "loss": 0.6332, "step": 3776 }, { "epoch": 0.3198814312936693, "grad_norm": 0.6183019175802253, "learning_rate": 7.953997365914263e-06, "loss": 0.912, "step": 3777 }, { "epoch": 0.31996612322676266, "grad_norm": 1.1386662512674306, "learning_rate": 7.952890594428948e-06, "loss": 0.6421, "step": 3778 }, { "epoch": 0.320050815159856, "grad_norm": 1.2506347151382615, "learning_rate": 7.95178360072298e-06, "loss": 0.5906, "step": 3779 }, { "epoch": 0.3201355070929494, "grad_norm": 1.1603217495885418, "learning_rate": 7.950676384879663e-06, "loss": 0.6387, "step": 3780 }, { "epoch": 0.32022019902604276, "grad_norm": 1.565026962766312, "learning_rate": 7.949568946982325e-06, "loss": 0.6654, "step": 3781 }, { "epoch": 0.32030489095913617, "grad_norm": 1.4710448115303318, "learning_rate": 7.948461287114306e-06, "loss": 0.6849, "step": 3782 }, { "epoch": 0.3203895828922295, "grad_norm": 1.2644893913235309, "learning_rate": 7.947353405358961e-06, "loss": 0.6767, "step": 3783 }, { "epoch": 0.32047427482532287, "grad_norm": 1.4421890935866906, "learning_rate": 7.946245301799667e-06, "loss": 0.6302, "step": 3784 }, { "epoch": 0.3205589667584163, "grad_norm": 1.1841695555127447, "learning_rate": 7.94513697651981e-06, "loss": 0.6982, "step": 3785 }, { "epoch": 0.3206436586915096, "grad_norm": 1.295499348924163, "learning_rate": 7.944028429602802e-06, "loss": 0.6427, "step": 3786 }, { "epoch": 0.32072835062460303, "grad_norm": 1.154580203911005, "learning_rate": 7.942919661132065e-06, "loss": 0.6339, "step": 3787 }, { "epoch": 0.3208130425576964, "grad_norm": 1.4964016923901404, "learning_rate": 7.941810671191042e-06, "loss": 0.6464, "step": 3788 }, { "epoch": 0.32089773449078973, "grad_norm": 1.2183368742576361, "learning_rate": 7.940701459863185e-06, "loss": 0.6422, "step": 3789 }, { "epoch": 0.32098242642388314, "grad_norm": 1.086940665968587, "learning_rate": 7.939592027231973e-06, "loss": 0.6446, "step": 3790 }, { "epoch": 0.3210671183569765, "grad_norm": 0.6320633590082227, "learning_rate": 7.938482373380896e-06, "loss": 0.822, "step": 3791 }, { "epoch": 0.3211518102900699, "grad_norm": 1.2467012204306698, "learning_rate": 7.937372498393459e-06, "loss": 0.6367, "step": 3792 }, { "epoch": 0.32123650222316324, "grad_norm": 1.6273342375759434, "learning_rate": 7.936262402353188e-06, "loss": 0.6625, "step": 3793 }, { "epoch": 0.3213211941562566, "grad_norm": 1.1400327832824684, "learning_rate": 7.935152085343623e-06, "loss": 0.6353, "step": 3794 }, { "epoch": 0.32140588608935, "grad_norm": 1.4431083085880596, "learning_rate": 7.934041547448322e-06, "loss": 0.5889, "step": 3795 }, { "epoch": 0.32149057802244335, "grad_norm": 1.1304225855962535, "learning_rate": 7.932930788750855e-06, "loss": 0.6796, "step": 3796 }, { "epoch": 0.32157526995553676, "grad_norm": 1.6483064211864644, "learning_rate": 7.931819809334817e-06, "loss": 0.6731, "step": 3797 }, { "epoch": 0.3216599618886301, "grad_norm": 1.6325524855300206, "learning_rate": 7.930708609283815e-06, "loss": 0.6564, "step": 3798 }, { "epoch": 0.32174465382172346, "grad_norm": 0.6227974654282862, "learning_rate": 7.929597188681471e-06, "loss": 0.8156, "step": 3799 }, { "epoch": 0.32182934575481686, "grad_norm": 1.29981666324275, "learning_rate": 7.928485547611425e-06, "loss": 0.6716, "step": 3800 }, { "epoch": 0.3219140376879102, "grad_norm": 1.2411195064459168, "learning_rate": 7.927373686157334e-06, "loss": 0.6977, "step": 3801 }, { "epoch": 0.3219987296210036, "grad_norm": 1.9521471487615791, "learning_rate": 7.926261604402874e-06, "loss": 0.6938, "step": 3802 }, { "epoch": 0.32208342155409697, "grad_norm": 1.262008128004134, "learning_rate": 7.92514930243173e-06, "loss": 0.6504, "step": 3803 }, { "epoch": 0.3221681134871903, "grad_norm": 1.7362950895572626, "learning_rate": 7.924036780327614e-06, "loss": 0.7046, "step": 3804 }, { "epoch": 0.32225280542028373, "grad_norm": 1.4214039830762515, "learning_rate": 7.922924038174248e-06, "loss": 0.6555, "step": 3805 }, { "epoch": 0.3223374973533771, "grad_norm": 1.5095776356730402, "learning_rate": 7.921811076055366e-06, "loss": 0.629, "step": 3806 }, { "epoch": 0.3224221892864705, "grad_norm": 1.443078183754033, "learning_rate": 7.920697894054731e-06, "loss": 0.6943, "step": 3807 }, { "epoch": 0.32250688121956383, "grad_norm": 1.6236650215371504, "learning_rate": 7.919584492256114e-06, "loss": 0.6388, "step": 3808 }, { "epoch": 0.3225915731526572, "grad_norm": 1.492117281345717, "learning_rate": 7.918470870743304e-06, "loss": 0.6348, "step": 3809 }, { "epoch": 0.3226762650857506, "grad_norm": 1.2490704689259102, "learning_rate": 7.917357029600107e-06, "loss": 0.6463, "step": 3810 }, { "epoch": 0.32276095701884394, "grad_norm": 0.6534469238845353, "learning_rate": 7.916242968910347e-06, "loss": 0.9, "step": 3811 }, { "epoch": 0.32284564895193735, "grad_norm": 1.752384759581173, "learning_rate": 7.915128688757858e-06, "loss": 0.6469, "step": 3812 }, { "epoch": 0.3229303408850307, "grad_norm": 1.367909039383111, "learning_rate": 7.9140141892265e-06, "loss": 0.6772, "step": 3813 }, { "epoch": 0.32301503281812405, "grad_norm": 0.5541568515669675, "learning_rate": 7.912899470400144e-06, "loss": 0.8175, "step": 3814 }, { "epoch": 0.32309972475121745, "grad_norm": 1.1446992064515764, "learning_rate": 7.911784532362678e-06, "loss": 0.624, "step": 3815 }, { "epoch": 0.3231844166843108, "grad_norm": 1.3583124316810038, "learning_rate": 7.910669375198008e-06, "loss": 0.6074, "step": 3816 }, { "epoch": 0.3232691086174042, "grad_norm": 1.2263282857924518, "learning_rate": 7.909553998990056e-06, "loss": 0.597, "step": 3817 }, { "epoch": 0.32335380055049756, "grad_norm": 1.7719231688847605, "learning_rate": 7.908438403822757e-06, "loss": 0.6591, "step": 3818 }, { "epoch": 0.3234384924835909, "grad_norm": 1.1900188857118392, "learning_rate": 7.907322589780068e-06, "loss": 0.693, "step": 3819 }, { "epoch": 0.3235231844166843, "grad_norm": 0.6040087481683764, "learning_rate": 7.906206556945959e-06, "loss": 0.8207, "step": 3820 }, { "epoch": 0.32360787634977767, "grad_norm": 1.3115560796795778, "learning_rate": 7.905090305404417e-06, "loss": 0.6378, "step": 3821 }, { "epoch": 0.3236925682828711, "grad_norm": 1.3228159354253293, "learning_rate": 7.903973835239445e-06, "loss": 0.6738, "step": 3822 }, { "epoch": 0.3237772602159644, "grad_norm": 1.3027006844016717, "learning_rate": 7.902857146535068e-06, "loss": 0.652, "step": 3823 }, { "epoch": 0.3238619521490578, "grad_norm": 1.3950455831496664, "learning_rate": 7.901740239375318e-06, "loss": 0.6698, "step": 3824 }, { "epoch": 0.3239466440821512, "grad_norm": 1.2579946058940354, "learning_rate": 7.900623113844248e-06, "loss": 0.6817, "step": 3825 }, { "epoch": 0.32403133601524453, "grad_norm": 1.4366597224395798, "learning_rate": 7.899505770025931e-06, "loss": 0.6515, "step": 3826 }, { "epoch": 0.32411602794833794, "grad_norm": 0.6575494609106132, "learning_rate": 7.898388208004449e-06, "loss": 0.89, "step": 3827 }, { "epoch": 0.3242007198814313, "grad_norm": 1.375919699789472, "learning_rate": 7.897270427863909e-06, "loss": 0.704, "step": 3828 }, { "epoch": 0.3242854118145247, "grad_norm": 1.7023648673086167, "learning_rate": 7.896152429688426e-06, "loss": 0.6578, "step": 3829 }, { "epoch": 0.32437010374761804, "grad_norm": 1.2290482060236505, "learning_rate": 7.895034213562137e-06, "loss": 0.5966, "step": 3830 }, { "epoch": 0.3244547956807114, "grad_norm": 1.7387172477041064, "learning_rate": 7.893915779569194e-06, "loss": 0.664, "step": 3831 }, { "epoch": 0.3245394876138048, "grad_norm": 1.7649564004480571, "learning_rate": 7.892797127793765e-06, "loss": 0.6889, "step": 3832 }, { "epoch": 0.32462417954689815, "grad_norm": 1.4079853545013987, "learning_rate": 7.891678258320035e-06, "loss": 0.624, "step": 3833 }, { "epoch": 0.32470887147999156, "grad_norm": 1.2481280812089022, "learning_rate": 7.890559171232201e-06, "loss": 0.6199, "step": 3834 }, { "epoch": 0.3247935634130849, "grad_norm": 6.584131722432722, "learning_rate": 7.889439866614485e-06, "loss": 0.6196, "step": 3835 }, { "epoch": 0.32487825534617826, "grad_norm": 1.6910360618407678, "learning_rate": 7.888320344551117e-06, "loss": 0.6598, "step": 3836 }, { "epoch": 0.32496294727927166, "grad_norm": 0.6665018426597704, "learning_rate": 7.887200605126351e-06, "loss": 0.8791, "step": 3837 }, { "epoch": 0.325047639212365, "grad_norm": 1.2692718505458107, "learning_rate": 7.88608064842445e-06, "loss": 0.6338, "step": 3838 }, { "epoch": 0.3251323311454584, "grad_norm": 0.645994419501565, "learning_rate": 7.884960474529697e-06, "loss": 0.8355, "step": 3839 }, { "epoch": 0.32521702307855177, "grad_norm": 1.5338742478606742, "learning_rate": 7.883840083526393e-06, "loss": 0.6111, "step": 3840 }, { "epoch": 0.3253017150116451, "grad_norm": 1.7419576549578326, "learning_rate": 7.882719475498852e-06, "loss": 0.6761, "step": 3841 }, { "epoch": 0.3253864069447385, "grad_norm": 0.5845411000977715, "learning_rate": 7.881598650531406e-06, "loss": 0.9259, "step": 3842 }, { "epoch": 0.3254710988778319, "grad_norm": 0.6379018233611362, "learning_rate": 7.8804776087084e-06, "loss": 0.8648, "step": 3843 }, { "epoch": 0.3255557908109253, "grad_norm": 0.6414200937662415, "learning_rate": 7.879356350114205e-06, "loss": 0.8373, "step": 3844 }, { "epoch": 0.32564048274401863, "grad_norm": 1.2295919142583913, "learning_rate": 7.878234874833195e-06, "loss": 0.6482, "step": 3845 }, { "epoch": 0.325725174677112, "grad_norm": 1.3069766340814952, "learning_rate": 7.87711318294977e-06, "loss": 0.6178, "step": 3846 }, { "epoch": 0.3258098666102054, "grad_norm": 1.2172057053219143, "learning_rate": 7.875991274548343e-06, "loss": 0.6482, "step": 3847 }, { "epoch": 0.32589455854329874, "grad_norm": 1.4397506617848341, "learning_rate": 7.874869149713344e-06, "loss": 0.6008, "step": 3848 }, { "epoch": 0.32597925047639215, "grad_norm": 1.2462394790683462, "learning_rate": 7.87374680852922e-06, "loss": 0.594, "step": 3849 }, { "epoch": 0.3260639424094855, "grad_norm": 1.4708892124540263, "learning_rate": 7.872624251080429e-06, "loss": 0.6642, "step": 3850 }, { "epoch": 0.32614863434257885, "grad_norm": 1.3241865492431109, "learning_rate": 7.871501477451453e-06, "loss": 0.6092, "step": 3851 }, { "epoch": 0.32623332627567225, "grad_norm": 1.4959677705807082, "learning_rate": 7.870378487726784e-06, "loss": 0.6506, "step": 3852 }, { "epoch": 0.3263180182087656, "grad_norm": 1.4333915728875255, "learning_rate": 7.869255281990935e-06, "loss": 0.6448, "step": 3853 }, { "epoch": 0.326402710141859, "grad_norm": 1.502737621258036, "learning_rate": 7.868131860328434e-06, "loss": 0.6205, "step": 3854 }, { "epoch": 0.32648740207495236, "grad_norm": 1.1057570614170693, "learning_rate": 7.86700822282382e-06, "loss": 0.6601, "step": 3855 }, { "epoch": 0.3265720940080457, "grad_norm": 1.4363888744413322, "learning_rate": 7.865884369561659e-06, "loss": 0.6311, "step": 3856 }, { "epoch": 0.3266567859411391, "grad_norm": 1.4497391554035906, "learning_rate": 7.864760300626523e-06, "loss": 0.6443, "step": 3857 }, { "epoch": 0.32674147787423247, "grad_norm": 1.5832242835247614, "learning_rate": 7.863636016103005e-06, "loss": 0.6716, "step": 3858 }, { "epoch": 0.3268261698073259, "grad_norm": 1.2576092466056308, "learning_rate": 7.86251151607571e-06, "loss": 0.6219, "step": 3859 }, { "epoch": 0.3269108617404192, "grad_norm": 1.4391029683467513, "learning_rate": 7.86138680062927e-06, "loss": 0.6853, "step": 3860 }, { "epoch": 0.3269955536735126, "grad_norm": 1.4119826497152668, "learning_rate": 7.860261869848318e-06, "loss": 0.651, "step": 3861 }, { "epoch": 0.327080245606606, "grad_norm": 1.2665277778543917, "learning_rate": 7.859136723817518e-06, "loss": 0.7046, "step": 3862 }, { "epoch": 0.32716493753969933, "grad_norm": 1.3303009760734927, "learning_rate": 7.858011362621535e-06, "loss": 0.6981, "step": 3863 }, { "epoch": 0.32724962947279274, "grad_norm": 2.3166431293778027, "learning_rate": 7.856885786345068e-06, "loss": 0.6497, "step": 3864 }, { "epoch": 0.3273343214058861, "grad_norm": 0.6672700898453408, "learning_rate": 7.855759995072815e-06, "loss": 0.8492, "step": 3865 }, { "epoch": 0.32741901333897944, "grad_norm": 1.1417895347013614, "learning_rate": 7.8546339888895e-06, "loss": 0.5939, "step": 3866 }, { "epoch": 0.32750370527207284, "grad_norm": 1.3889742986662539, "learning_rate": 7.853507767879862e-06, "loss": 0.5946, "step": 3867 }, { "epoch": 0.3275883972051662, "grad_norm": 1.079473833621473, "learning_rate": 7.852381332128655e-06, "loss": 0.6336, "step": 3868 }, { "epoch": 0.3276730891382596, "grad_norm": 1.4911558898526311, "learning_rate": 7.851254681720649e-06, "loss": 0.7226, "step": 3869 }, { "epoch": 0.32775778107135295, "grad_norm": 1.284553065701864, "learning_rate": 7.850127816740628e-06, "loss": 0.6741, "step": 3870 }, { "epoch": 0.3278424730044463, "grad_norm": 3.0137388650378862, "learning_rate": 7.849000737273397e-06, "loss": 0.68, "step": 3871 }, { "epoch": 0.3279271649375397, "grad_norm": 0.668505935485743, "learning_rate": 7.847873443403777e-06, "loss": 0.8445, "step": 3872 }, { "epoch": 0.32801185687063306, "grad_norm": 0.605750023204533, "learning_rate": 7.846745935216597e-06, "loss": 0.8302, "step": 3873 }, { "epoch": 0.32809654880372646, "grad_norm": 1.1528399186581997, "learning_rate": 7.845618212796714e-06, "loss": 0.6608, "step": 3874 }, { "epoch": 0.3281812407368198, "grad_norm": 1.0524677769828523, "learning_rate": 7.844490276228991e-06, "loss": 0.6681, "step": 3875 }, { "epoch": 0.3282659326699132, "grad_norm": 1.6009756261963426, "learning_rate": 7.843362125598311e-06, "loss": 0.6273, "step": 3876 }, { "epoch": 0.32835062460300657, "grad_norm": 1.2098983918613415, "learning_rate": 7.842233760989576e-06, "loss": 0.6893, "step": 3877 }, { "epoch": 0.3284353165360999, "grad_norm": 1.5666233743801434, "learning_rate": 7.841105182487701e-06, "loss": 0.6925, "step": 3878 }, { "epoch": 0.3285200084691933, "grad_norm": 1.174089665770073, "learning_rate": 7.839976390177616e-06, "loss": 0.6693, "step": 3879 }, { "epoch": 0.3286047004022867, "grad_norm": 1.2558888107170747, "learning_rate": 7.838847384144269e-06, "loss": 0.6436, "step": 3880 }, { "epoch": 0.3286893923353801, "grad_norm": 1.5227997513831224, "learning_rate": 7.837718164472623e-06, "loss": 0.6735, "step": 3881 }, { "epoch": 0.32877408426847343, "grad_norm": 1.21055137802637, "learning_rate": 7.836588731247661e-06, "loss": 0.6399, "step": 3882 }, { "epoch": 0.3288587762015668, "grad_norm": 1.16783221908874, "learning_rate": 7.835459084554375e-06, "loss": 0.6266, "step": 3883 }, { "epoch": 0.3289434681346602, "grad_norm": 1.482714589375978, "learning_rate": 7.83432922447778e-06, "loss": 0.685, "step": 3884 }, { "epoch": 0.32902816006775354, "grad_norm": 1.217954451490598, "learning_rate": 7.833199151102899e-06, "loss": 0.6304, "step": 3885 }, { "epoch": 0.32911285200084694, "grad_norm": 1.223574836821039, "learning_rate": 7.83206886451478e-06, "loss": 0.613, "step": 3886 }, { "epoch": 0.3291975439339403, "grad_norm": 1.4511386084782745, "learning_rate": 7.830938364798487e-06, "loss": 0.7067, "step": 3887 }, { "epoch": 0.32928223586703365, "grad_norm": 1.2155969147009913, "learning_rate": 7.829807652039087e-06, "loss": 0.6796, "step": 3888 }, { "epoch": 0.32936692780012705, "grad_norm": 1.557366573511201, "learning_rate": 7.828676726321678e-06, "loss": 0.6301, "step": 3889 }, { "epoch": 0.3294516197332204, "grad_norm": 1.2224599220431762, "learning_rate": 7.827545587731367e-06, "loss": 0.6694, "step": 3890 }, { "epoch": 0.3295363116663138, "grad_norm": 0.6740041205249591, "learning_rate": 7.826414236353277e-06, "loss": 0.8906, "step": 3891 }, { "epoch": 0.32962100359940716, "grad_norm": 1.2602649999107258, "learning_rate": 7.825282672272549e-06, "loss": 0.6381, "step": 3892 }, { "epoch": 0.3297056955325005, "grad_norm": 2.3995801093604805, "learning_rate": 7.824150895574342e-06, "loss": 0.6479, "step": 3893 }, { "epoch": 0.3297903874655939, "grad_norm": 1.2090580153310075, "learning_rate": 7.823018906343823e-06, "loss": 0.6799, "step": 3894 }, { "epoch": 0.32987507939868727, "grad_norm": 1.2827253556394838, "learning_rate": 7.821886704666184e-06, "loss": 0.6166, "step": 3895 }, { "epoch": 0.32995977133178067, "grad_norm": 1.8221187814119832, "learning_rate": 7.820754290626627e-06, "loss": 0.7394, "step": 3896 }, { "epoch": 0.330044463264874, "grad_norm": 0.6402372707991109, "learning_rate": 7.819621664310373e-06, "loss": 0.7793, "step": 3897 }, { "epoch": 0.33012915519796737, "grad_norm": 1.3737980899521913, "learning_rate": 7.81848882580266e-06, "loss": 0.6667, "step": 3898 }, { "epoch": 0.3302138471310608, "grad_norm": 2.7301725280686786, "learning_rate": 7.817355775188735e-06, "loss": 0.651, "step": 3899 }, { "epoch": 0.33029853906415413, "grad_norm": 0.6554702220004089, "learning_rate": 7.816222512553872e-06, "loss": 0.8786, "step": 3900 }, { "epoch": 0.33038323099724753, "grad_norm": 1.5390028100666897, "learning_rate": 7.81508903798335e-06, "loss": 0.6292, "step": 3901 }, { "epoch": 0.3304679229303409, "grad_norm": 1.495462325436874, "learning_rate": 7.813955351562473e-06, "loss": 0.6691, "step": 3902 }, { "epoch": 0.33055261486343424, "grad_norm": 1.228763757448043, "learning_rate": 7.812821453376555e-06, "loss": 0.688, "step": 3903 }, { "epoch": 0.33063730679652764, "grad_norm": 1.4369084861667942, "learning_rate": 7.811687343510928e-06, "loss": 0.6644, "step": 3904 }, { "epoch": 0.330721998729621, "grad_norm": 1.37140812767402, "learning_rate": 7.81055302205094e-06, "loss": 0.5887, "step": 3905 }, { "epoch": 0.3308066906627144, "grad_norm": 1.34715738766879, "learning_rate": 7.809418489081954e-06, "loss": 0.5734, "step": 3906 }, { "epoch": 0.33089138259580775, "grad_norm": 1.1587781034882603, "learning_rate": 7.80828374468935e-06, "loss": 0.6524, "step": 3907 }, { "epoch": 0.3309760745289011, "grad_norm": 1.3611563147423063, "learning_rate": 7.807148788958525e-06, "loss": 0.6565, "step": 3908 }, { "epoch": 0.3310607664619945, "grad_norm": 1.2393748149103185, "learning_rate": 7.806013621974887e-06, "loss": 0.616, "step": 3909 }, { "epoch": 0.33114545839508785, "grad_norm": 1.3570403664712722, "learning_rate": 7.804878243823867e-06, "loss": 0.6409, "step": 3910 }, { "epoch": 0.33123015032818126, "grad_norm": 1.3372808683412343, "learning_rate": 7.803742654590907e-06, "loss": 0.6643, "step": 3911 }, { "epoch": 0.3313148422612746, "grad_norm": 1.3610543376747302, "learning_rate": 7.802606854361465e-06, "loss": 0.647, "step": 3912 }, { "epoch": 0.33139953419436796, "grad_norm": 0.6473940754816403, "learning_rate": 7.801470843221015e-06, "loss": 0.8581, "step": 3913 }, { "epoch": 0.33148422612746137, "grad_norm": 1.3332680989988555, "learning_rate": 7.800334621255052e-06, "loss": 0.5903, "step": 3914 }, { "epoch": 0.3315689180605547, "grad_norm": 1.771002701225607, "learning_rate": 7.79919818854908e-06, "loss": 0.6805, "step": 3915 }, { "epoch": 0.3316536099936481, "grad_norm": 1.5786778053054884, "learning_rate": 7.79806154518862e-06, "loss": 0.6806, "step": 3916 }, { "epoch": 0.3317383019267415, "grad_norm": 5.189690311455838, "learning_rate": 7.796924691259213e-06, "loss": 0.6539, "step": 3917 }, { "epoch": 0.3318229938598348, "grad_norm": 1.37656143045986, "learning_rate": 7.795787626846414e-06, "loss": 0.6614, "step": 3918 }, { "epoch": 0.33190768579292823, "grad_norm": 2.2601967696858063, "learning_rate": 7.79465035203579e-06, "loss": 0.6399, "step": 3919 }, { "epoch": 0.3319923777260216, "grad_norm": 1.547119185556574, "learning_rate": 7.79351286691293e-06, "loss": 0.6926, "step": 3920 }, { "epoch": 0.332077069659115, "grad_norm": 1.1245027604312543, "learning_rate": 7.792375171563434e-06, "loss": 0.6208, "step": 3921 }, { "epoch": 0.33216176159220834, "grad_norm": 1.8825879827628265, "learning_rate": 7.791237266072919e-06, "loss": 0.6904, "step": 3922 }, { "epoch": 0.3322464535253017, "grad_norm": 1.5365574373177424, "learning_rate": 7.79009915052702e-06, "loss": 0.6794, "step": 3923 }, { "epoch": 0.3323311454583951, "grad_norm": 1.3458462872009371, "learning_rate": 7.788960825011385e-06, "loss": 0.7027, "step": 3924 }, { "epoch": 0.33241583739148844, "grad_norm": 0.6869687113076226, "learning_rate": 7.78782228961168e-06, "loss": 0.8904, "step": 3925 }, { "epoch": 0.33250052932458185, "grad_norm": 1.3782918895006515, "learning_rate": 7.786683544413587e-06, "loss": 0.6663, "step": 3926 }, { "epoch": 0.3325852212576752, "grad_norm": 1.474765620762393, "learning_rate": 7.7855445895028e-06, "loss": 0.6099, "step": 3927 }, { "epoch": 0.3326699131907686, "grad_norm": 1.4679551275343303, "learning_rate": 7.784405424965034e-06, "loss": 0.6727, "step": 3928 }, { "epoch": 0.33275460512386196, "grad_norm": 1.3122661212697329, "learning_rate": 7.783266050886013e-06, "loss": 0.6697, "step": 3929 }, { "epoch": 0.3328392970569553, "grad_norm": 1.3614026936382366, "learning_rate": 7.782126467351485e-06, "loss": 0.685, "step": 3930 }, { "epoch": 0.3329239889900487, "grad_norm": 1.4048651602641096, "learning_rate": 7.780986674447208e-06, "loss": 0.6714, "step": 3931 }, { "epoch": 0.33300868092314206, "grad_norm": 1.391423720720364, "learning_rate": 7.779846672258958e-06, "loss": 0.5951, "step": 3932 }, { "epoch": 0.33309337285623547, "grad_norm": 1.947052648229138, "learning_rate": 7.778706460872524e-06, "loss": 0.6712, "step": 3933 }, { "epoch": 0.3331780647893288, "grad_norm": 1.3264098766450512, "learning_rate": 7.777566040373719e-06, "loss": 0.6628, "step": 3934 }, { "epoch": 0.33326275672242217, "grad_norm": 1.3425950489324645, "learning_rate": 7.776425410848358e-06, "loss": 0.6861, "step": 3935 }, { "epoch": 0.3333474486555156, "grad_norm": 2.147588400794552, "learning_rate": 7.775284572382285e-06, "loss": 0.603, "step": 3936 }, { "epoch": 0.3334321405886089, "grad_norm": 1.1800230235184193, "learning_rate": 7.77414352506135e-06, "loss": 0.607, "step": 3937 }, { "epoch": 0.33351683252170233, "grad_norm": 1.0483369829035991, "learning_rate": 7.773002268971427e-06, "loss": 0.6291, "step": 3938 }, { "epoch": 0.3336015244547957, "grad_norm": 1.2718076472142112, "learning_rate": 7.7718608041984e-06, "loss": 0.6467, "step": 3939 }, { "epoch": 0.33368621638788903, "grad_norm": 1.8345604351294882, "learning_rate": 7.770719130828168e-06, "loss": 0.6472, "step": 3940 }, { "epoch": 0.33377090832098244, "grad_norm": 1.4275551299111011, "learning_rate": 7.769577248946649e-06, "loss": 0.6208, "step": 3941 }, { "epoch": 0.3338556002540758, "grad_norm": 1.4262662724169088, "learning_rate": 7.768435158639778e-06, "loss": 0.6697, "step": 3942 }, { "epoch": 0.3339402921871692, "grad_norm": 2.3750862185695136, "learning_rate": 7.7672928599935e-06, "loss": 0.6458, "step": 3943 }, { "epoch": 0.33402498412026255, "grad_norm": 1.2505842323249614, "learning_rate": 7.766150353093784e-06, "loss": 0.6479, "step": 3944 }, { "epoch": 0.3341096760533559, "grad_norm": 1.6605880903617989, "learning_rate": 7.765007638026604e-06, "loss": 0.6911, "step": 3945 }, { "epoch": 0.3341943679864493, "grad_norm": 1.9155340333379227, "learning_rate": 7.763864714877957e-06, "loss": 0.6281, "step": 3946 }, { "epoch": 0.33427905991954265, "grad_norm": 1.3218434001855242, "learning_rate": 7.762721583733857e-06, "loss": 0.6585, "step": 3947 }, { "epoch": 0.33436375185263606, "grad_norm": 0.6238329995729966, "learning_rate": 7.761578244680327e-06, "loss": 0.867, "step": 3948 }, { "epoch": 0.3344484437857294, "grad_norm": 1.4581397866806536, "learning_rate": 7.760434697803414e-06, "loss": 0.6871, "step": 3949 }, { "epoch": 0.33453313571882276, "grad_norm": 1.2745760986615462, "learning_rate": 7.759290943189169e-06, "loss": 0.6344, "step": 3950 }, { "epoch": 0.33461782765191617, "grad_norm": 1.4576380747312858, "learning_rate": 7.758146980923671e-06, "loss": 0.6112, "step": 3951 }, { "epoch": 0.3347025195850095, "grad_norm": 1.2023886718541317, "learning_rate": 7.757002811093008e-06, "loss": 0.7362, "step": 3952 }, { "epoch": 0.3347872115181029, "grad_norm": 1.2604821888053839, "learning_rate": 7.755858433783284e-06, "loss": 0.6666, "step": 3953 }, { "epoch": 0.3348719034511963, "grad_norm": 1.6653872418535423, "learning_rate": 7.75471384908062e-06, "loss": 0.6387, "step": 3954 }, { "epoch": 0.3349565953842896, "grad_norm": 0.6179351981964851, "learning_rate": 7.75356905707115e-06, "loss": 0.8155, "step": 3955 }, { "epoch": 0.33504128731738303, "grad_norm": 1.7799660668652089, "learning_rate": 7.75242405784103e-06, "loss": 0.5996, "step": 3956 }, { "epoch": 0.3351259792504764, "grad_norm": 1.4310939819613824, "learning_rate": 7.751278851476424e-06, "loss": 0.7195, "step": 3957 }, { "epoch": 0.3352106711835698, "grad_norm": 1.2989633486427454, "learning_rate": 7.750133438063517e-06, "loss": 0.6056, "step": 3958 }, { "epoch": 0.33529536311666314, "grad_norm": 1.4135277150640204, "learning_rate": 7.748987817688505e-06, "loss": 0.7361, "step": 3959 }, { "epoch": 0.3353800550497565, "grad_norm": 1.3822947371329795, "learning_rate": 7.747841990437603e-06, "loss": 0.6414, "step": 3960 }, { "epoch": 0.3354647469828499, "grad_norm": 1.1833788327951946, "learning_rate": 7.746695956397042e-06, "loss": 0.6155, "step": 3961 }, { "epoch": 0.33554943891594324, "grad_norm": 0.640424205675623, "learning_rate": 7.745549715653063e-06, "loss": 0.8729, "step": 3962 }, { "epoch": 0.33563413084903665, "grad_norm": 0.6478383839447697, "learning_rate": 7.744403268291931e-06, "loss": 0.8561, "step": 3963 }, { "epoch": 0.33571882278213, "grad_norm": 1.2279802717267465, "learning_rate": 7.743256614399923e-06, "loss": 0.6943, "step": 3964 }, { "epoch": 0.33580351471522335, "grad_norm": 1.28476313008968, "learning_rate": 7.742109754063325e-06, "loss": 0.6755, "step": 3965 }, { "epoch": 0.33588820664831676, "grad_norm": 1.2412197141708452, "learning_rate": 7.74096268736845e-06, "loss": 0.6659, "step": 3966 }, { "epoch": 0.3359728985814101, "grad_norm": 1.7063716647469038, "learning_rate": 7.739815414401618e-06, "loss": 0.667, "step": 3967 }, { "epoch": 0.3360575905145035, "grad_norm": 1.319208349386416, "learning_rate": 7.738667935249168e-06, "loss": 0.7098, "step": 3968 }, { "epoch": 0.33614228244759686, "grad_norm": 1.7098937582693325, "learning_rate": 7.737520249997454e-06, "loss": 0.6007, "step": 3969 }, { "epoch": 0.3362269743806902, "grad_norm": 1.5388253370034466, "learning_rate": 7.736372358732845e-06, "loss": 0.576, "step": 3970 }, { "epoch": 0.3363116663137836, "grad_norm": 1.2336336446563094, "learning_rate": 7.735224261541727e-06, "loss": 0.6925, "step": 3971 }, { "epoch": 0.33639635824687697, "grad_norm": 1.2413479588052576, "learning_rate": 7.7340759585105e-06, "loss": 0.6488, "step": 3972 }, { "epoch": 0.3364810501799704, "grad_norm": 1.683718830523707, "learning_rate": 7.732927449725578e-06, "loss": 0.6787, "step": 3973 }, { "epoch": 0.3365657421130637, "grad_norm": 1.3751824607723169, "learning_rate": 7.731778735273395e-06, "loss": 0.6964, "step": 3974 }, { "epoch": 0.3366504340461571, "grad_norm": 0.6839109199711382, "learning_rate": 7.730629815240395e-06, "loss": 0.8653, "step": 3975 }, { "epoch": 0.3367351259792505, "grad_norm": 1.2052296559773947, "learning_rate": 7.729480689713045e-06, "loss": 0.6236, "step": 3976 }, { "epoch": 0.33681981791234383, "grad_norm": 1.61230097434026, "learning_rate": 7.728331358777818e-06, "loss": 0.6491, "step": 3977 }, { "epoch": 0.33690450984543724, "grad_norm": 2.4516234360979263, "learning_rate": 7.72718182252121e-06, "loss": 0.61, "step": 3978 }, { "epoch": 0.3369892017785306, "grad_norm": 1.2499546055704875, "learning_rate": 7.726032081029726e-06, "loss": 0.6843, "step": 3979 }, { "epoch": 0.337073893711624, "grad_norm": 0.624223365787327, "learning_rate": 7.724882134389895e-06, "loss": 0.8529, "step": 3980 }, { "epoch": 0.33715858564471735, "grad_norm": 1.978368135127325, "learning_rate": 7.723731982688255e-06, "loss": 0.691, "step": 3981 }, { "epoch": 0.3372432775778107, "grad_norm": 1.5123383537011468, "learning_rate": 7.722581626011361e-06, "loss": 0.6433, "step": 3982 }, { "epoch": 0.3373279695109041, "grad_norm": 1.4569387643510698, "learning_rate": 7.721431064445782e-06, "loss": 0.7312, "step": 3983 }, { "epoch": 0.33741266144399745, "grad_norm": 1.4504523520605623, "learning_rate": 7.720280298078104e-06, "loss": 0.6245, "step": 3984 }, { "epoch": 0.33749735337709086, "grad_norm": 1.3965580709788028, "learning_rate": 7.71912932699493e-06, "loss": 0.6301, "step": 3985 }, { "epoch": 0.3375820453101842, "grad_norm": 1.1737432879437841, "learning_rate": 7.717978151282874e-06, "loss": 0.7099, "step": 3986 }, { "epoch": 0.33766673724327756, "grad_norm": 1.6055787970775672, "learning_rate": 7.71682677102857e-06, "loss": 0.6534, "step": 3987 }, { "epoch": 0.33775142917637097, "grad_norm": 1.3880606935011506, "learning_rate": 7.715675186318667e-06, "loss": 0.6146, "step": 3988 }, { "epoch": 0.3378361211094643, "grad_norm": 1.5268514461078544, "learning_rate": 7.714523397239824e-06, "loss": 0.6821, "step": 3989 }, { "epoch": 0.3379208130425577, "grad_norm": 1.4502132764560203, "learning_rate": 7.713371403878723e-06, "loss": 0.6654, "step": 3990 }, { "epoch": 0.33800550497565107, "grad_norm": 1.2687271688802106, "learning_rate": 7.712219206322056e-06, "loss": 0.6917, "step": 3991 }, { "epoch": 0.3380901969087444, "grad_norm": 0.6245871225318608, "learning_rate": 7.711066804656531e-06, "loss": 0.847, "step": 3992 }, { "epoch": 0.33817488884183783, "grad_norm": 1.5188693831658784, "learning_rate": 7.709914198968873e-06, "loss": 0.6764, "step": 3993 }, { "epoch": 0.3382595807749312, "grad_norm": 1.7003414938393653, "learning_rate": 7.708761389345823e-06, "loss": 0.6975, "step": 3994 }, { "epoch": 0.3383442727080246, "grad_norm": 1.6018960117127488, "learning_rate": 7.707608375874132e-06, "loss": 0.6257, "step": 3995 }, { "epoch": 0.33842896464111794, "grad_norm": 1.3469178161399196, "learning_rate": 7.706455158640575e-06, "loss": 0.631, "step": 3996 }, { "epoch": 0.3385136565742113, "grad_norm": 1.1975554124466499, "learning_rate": 7.705301737731938e-06, "loss": 0.6466, "step": 3997 }, { "epoch": 0.3385983485073047, "grad_norm": 0.7336961949173897, "learning_rate": 7.704148113235018e-06, "loss": 0.8647, "step": 3998 }, { "epoch": 0.33868304044039804, "grad_norm": 1.3790476184699156, "learning_rate": 7.702994285236633e-06, "loss": 0.6215, "step": 3999 }, { "epoch": 0.33876773237349145, "grad_norm": 1.2329029430341278, "learning_rate": 7.701840253823617e-06, "loss": 0.6457, "step": 4000 }, { "epoch": 0.3388524243065848, "grad_norm": 1.663775914296086, "learning_rate": 7.700686019082813e-06, "loss": 0.6435, "step": 4001 }, { "epoch": 0.33893711623967815, "grad_norm": 1.204304581942184, "learning_rate": 7.699531581101085e-06, "loss": 0.6658, "step": 4002 }, { "epoch": 0.33902180817277155, "grad_norm": 2.3559346573328095, "learning_rate": 7.698376939965311e-06, "loss": 0.6995, "step": 4003 }, { "epoch": 0.3391065001058649, "grad_norm": 1.4693983072922405, "learning_rate": 7.697222095762384e-06, "loss": 0.6891, "step": 4004 }, { "epoch": 0.3391911920389583, "grad_norm": 1.272558748490848, "learning_rate": 7.696067048579212e-06, "loss": 0.6204, "step": 4005 }, { "epoch": 0.33927588397205166, "grad_norm": 1.2903084591928584, "learning_rate": 7.69491179850272e-06, "loss": 0.6808, "step": 4006 }, { "epoch": 0.339360575905145, "grad_norm": 1.4332136401930557, "learning_rate": 7.693756345619841e-06, "loss": 0.631, "step": 4007 }, { "epoch": 0.3394452678382384, "grad_norm": 1.1535945521822895, "learning_rate": 7.692600690017537e-06, "loss": 0.6533, "step": 4008 }, { "epoch": 0.33952995977133177, "grad_norm": 0.6534507823924811, "learning_rate": 7.69144483178277e-06, "loss": 0.8192, "step": 4009 }, { "epoch": 0.3396146517044252, "grad_norm": 1.4330754429967392, "learning_rate": 7.69028877100253e-06, "loss": 0.6632, "step": 4010 }, { "epoch": 0.3396993436375185, "grad_norm": 1.205440713490705, "learning_rate": 7.689132507763812e-06, "loss": 0.6798, "step": 4011 }, { "epoch": 0.3397840355706119, "grad_norm": 1.4323283872945545, "learning_rate": 7.687976042153636e-06, "loss": 0.6893, "step": 4012 }, { "epoch": 0.3398687275037053, "grad_norm": 1.3813369445964445, "learning_rate": 7.686819374259025e-06, "loss": 0.6925, "step": 4013 }, { "epoch": 0.33995341943679863, "grad_norm": 1.33923321002357, "learning_rate": 7.685662504167034e-06, "loss": 0.6691, "step": 4014 }, { "epoch": 0.34003811136989204, "grad_norm": 1.8369402115068827, "learning_rate": 7.684505431964714e-06, "loss": 0.6961, "step": 4015 }, { "epoch": 0.3401228033029854, "grad_norm": 0.7438395323140078, "learning_rate": 7.683348157739145e-06, "loss": 0.8434, "step": 4016 }, { "epoch": 0.34020749523607874, "grad_norm": 1.669178826285676, "learning_rate": 7.682190681577421e-06, "loss": 0.6503, "step": 4017 }, { "epoch": 0.34029218716917214, "grad_norm": 1.4564628321018889, "learning_rate": 7.681033003566644e-06, "loss": 0.6848, "step": 4018 }, { "epoch": 0.3403768791022655, "grad_norm": 1.3233905959857792, "learning_rate": 7.679875123793935e-06, "loss": 0.6678, "step": 4019 }, { "epoch": 0.3404615710353589, "grad_norm": 1.7532396930249574, "learning_rate": 7.678717042346434e-06, "loss": 0.6437, "step": 4020 }, { "epoch": 0.34054626296845225, "grad_norm": 1.3921026966504058, "learning_rate": 7.677558759311292e-06, "loss": 0.7208, "step": 4021 }, { "epoch": 0.3406309549015456, "grad_norm": 1.2928887625100125, "learning_rate": 7.676400274775675e-06, "loss": 0.6264, "step": 4022 }, { "epoch": 0.340715646834639, "grad_norm": 1.460976615353196, "learning_rate": 7.675241588826764e-06, "loss": 0.6116, "step": 4023 }, { "epoch": 0.34080033876773236, "grad_norm": 1.3879896563024312, "learning_rate": 7.674082701551758e-06, "loss": 0.6525, "step": 4024 }, { "epoch": 0.34088503070082576, "grad_norm": 1.5611935684369422, "learning_rate": 7.672923613037868e-06, "loss": 0.7073, "step": 4025 }, { "epoch": 0.3409697226339191, "grad_norm": 1.312517861082159, "learning_rate": 7.671764323372322e-06, "loss": 0.6696, "step": 4026 }, { "epoch": 0.34105441456701246, "grad_norm": 1.4392377641368934, "learning_rate": 7.670604832642366e-06, "loss": 0.6528, "step": 4027 }, { "epoch": 0.34113910650010587, "grad_norm": 1.3519755043323116, "learning_rate": 7.66944514093525e-06, "loss": 0.6915, "step": 4028 }, { "epoch": 0.3412237984331992, "grad_norm": 2.13251925496115, "learning_rate": 7.668285248338256e-06, "loss": 0.6401, "step": 4029 }, { "epoch": 0.3413084903662926, "grad_norm": 1.3416847877748825, "learning_rate": 7.667125154938667e-06, "loss": 0.6395, "step": 4030 }, { "epoch": 0.341393182299386, "grad_norm": 1.6804343717154675, "learning_rate": 7.665964860823784e-06, "loss": 0.613, "step": 4031 }, { "epoch": 0.3414778742324794, "grad_norm": 1.6096495235279247, "learning_rate": 7.66480436608093e-06, "loss": 0.6943, "step": 4032 }, { "epoch": 0.34156256616557273, "grad_norm": 1.2202613645746583, "learning_rate": 7.663643670797437e-06, "loss": 0.6242, "step": 4033 }, { "epoch": 0.3416472580986661, "grad_norm": 1.2278637686809446, "learning_rate": 7.662482775060655e-06, "loss": 0.6694, "step": 4034 }, { "epoch": 0.3417319500317595, "grad_norm": 1.5201961959382786, "learning_rate": 7.661321678957944e-06, "loss": 0.6869, "step": 4035 }, { "epoch": 0.34181664196485284, "grad_norm": 2.69604955627923, "learning_rate": 7.660160382576683e-06, "loss": 0.6332, "step": 4036 }, { "epoch": 0.34190133389794625, "grad_norm": 1.6117290666972708, "learning_rate": 7.65899888600427e-06, "loss": 0.6884, "step": 4037 }, { "epoch": 0.3419860258310396, "grad_norm": 1.45637345889213, "learning_rate": 7.657837189328107e-06, "loss": 0.6911, "step": 4038 }, { "epoch": 0.34207071776413295, "grad_norm": 1.3678190042442344, "learning_rate": 7.656675292635625e-06, "loss": 0.6284, "step": 4039 }, { "epoch": 0.34215540969722635, "grad_norm": 1.7224483447568757, "learning_rate": 7.655513196014256e-06, "loss": 0.6683, "step": 4040 }, { "epoch": 0.3422401016303197, "grad_norm": 1.60725949911121, "learning_rate": 7.654350899551459e-06, "loss": 0.643, "step": 4041 }, { "epoch": 0.3423247935634131, "grad_norm": 1.2317440457055302, "learning_rate": 7.653188403334704e-06, "loss": 0.6601, "step": 4042 }, { "epoch": 0.34240948549650646, "grad_norm": 1.646209621947444, "learning_rate": 7.65202570745147e-06, "loss": 0.6561, "step": 4043 }, { "epoch": 0.3424941774295998, "grad_norm": 1.7244138961676578, "learning_rate": 7.650862811989257e-06, "loss": 0.6174, "step": 4044 }, { "epoch": 0.3425788693626932, "grad_norm": 1.496792297188214, "learning_rate": 7.649699717035582e-06, "loss": 0.6547, "step": 4045 }, { "epoch": 0.34266356129578657, "grad_norm": 1.2633912982040465, "learning_rate": 7.648536422677972e-06, "loss": 0.6227, "step": 4046 }, { "epoch": 0.34274825322888, "grad_norm": 1.195050913741355, "learning_rate": 7.647372929003972e-06, "loss": 0.6216, "step": 4047 }, { "epoch": 0.3428329451619733, "grad_norm": 1.7344710974769253, "learning_rate": 7.646209236101141e-06, "loss": 0.6683, "step": 4048 }, { "epoch": 0.3429176370950667, "grad_norm": 1.257804301783612, "learning_rate": 7.645045344057052e-06, "loss": 0.6453, "step": 4049 }, { "epoch": 0.3430023290281601, "grad_norm": 1.5970449115231409, "learning_rate": 7.643881252959296e-06, "loss": 0.6445, "step": 4050 }, { "epoch": 0.34308702096125343, "grad_norm": 1.5032728249854148, "learning_rate": 7.642716962895473e-06, "loss": 0.6211, "step": 4051 }, { "epoch": 0.34317171289434684, "grad_norm": 0.6794128191362397, "learning_rate": 7.641552473953207e-06, "loss": 0.8599, "step": 4052 }, { "epoch": 0.3432564048274402, "grad_norm": 1.4086517851735014, "learning_rate": 7.640387786220129e-06, "loss": 0.6173, "step": 4053 }, { "epoch": 0.34334109676053354, "grad_norm": 1.6847786894489667, "learning_rate": 7.639222899783887e-06, "loss": 0.5329, "step": 4054 }, { "epoch": 0.34342578869362694, "grad_norm": 1.3032551821478933, "learning_rate": 7.638057814732148e-06, "loss": 0.6527, "step": 4055 }, { "epoch": 0.3435104806267203, "grad_norm": 1.3039175578618647, "learning_rate": 7.63689253115259e-06, "loss": 0.6539, "step": 4056 }, { "epoch": 0.3435951725598137, "grad_norm": 1.3054354124026761, "learning_rate": 7.635727049132904e-06, "loss": 0.6383, "step": 4057 }, { "epoch": 0.34367986449290705, "grad_norm": 1.4522575436266605, "learning_rate": 7.634561368760803e-06, "loss": 0.6383, "step": 4058 }, { "epoch": 0.3437645564260004, "grad_norm": 0.7261618502070744, "learning_rate": 7.633395490124007e-06, "loss": 0.8838, "step": 4059 }, { "epoch": 0.3438492483590938, "grad_norm": 1.2708327740409244, "learning_rate": 7.632229413310256e-06, "loss": 0.5999, "step": 4060 }, { "epoch": 0.34393394029218716, "grad_norm": 1.52548850472562, "learning_rate": 7.631063138407302e-06, "loss": 0.6606, "step": 4061 }, { "epoch": 0.34401863222528056, "grad_norm": 1.215782177340924, "learning_rate": 7.629896665502916e-06, "loss": 0.6488, "step": 4062 }, { "epoch": 0.3441033241583739, "grad_norm": 1.4277377967744138, "learning_rate": 7.62872999468488e-06, "loss": 0.6382, "step": 4063 }, { "epoch": 0.34418801609146726, "grad_norm": 3.935127261458833, "learning_rate": 7.627563126040993e-06, "loss": 0.7015, "step": 4064 }, { "epoch": 0.34427270802456067, "grad_norm": 1.4797361393097463, "learning_rate": 7.626396059659065e-06, "loss": 0.6335, "step": 4065 }, { "epoch": 0.344357399957654, "grad_norm": 1.4805675812870325, "learning_rate": 7.625228795626929e-06, "loss": 0.6153, "step": 4066 }, { "epoch": 0.3444420918907474, "grad_norm": 3.5157401601135163, "learning_rate": 7.624061334032422e-06, "loss": 0.6752, "step": 4067 }, { "epoch": 0.3445267838238408, "grad_norm": 2.0768958316179016, "learning_rate": 7.622893674963406e-06, "loss": 0.6468, "step": 4068 }, { "epoch": 0.3446114757569341, "grad_norm": 0.659358834122625, "learning_rate": 7.621725818507751e-06, "loss": 0.873, "step": 4069 }, { "epoch": 0.34469616769002753, "grad_norm": 0.6558171225661482, "learning_rate": 7.620557764753347e-06, "loss": 0.7782, "step": 4070 }, { "epoch": 0.3447808596231209, "grad_norm": 1.1837981993692566, "learning_rate": 7.619389513788094e-06, "loss": 0.6892, "step": 4071 }, { "epoch": 0.3448655515562143, "grad_norm": 1.7442241306726667, "learning_rate": 7.61822106569991e-06, "loss": 0.6405, "step": 4072 }, { "epoch": 0.34495024348930764, "grad_norm": 1.2240876994950727, "learning_rate": 7.617052420576727e-06, "loss": 0.6821, "step": 4073 }, { "epoch": 0.345034935422401, "grad_norm": 1.3532941676287373, "learning_rate": 7.615883578506491e-06, "loss": 0.8324, "step": 4074 }, { "epoch": 0.3451196273554944, "grad_norm": 1.2904546149456146, "learning_rate": 7.614714539577164e-06, "loss": 0.6564, "step": 4075 }, { "epoch": 0.34520431928858775, "grad_norm": 2.861260877646878, "learning_rate": 7.613545303876721e-06, "loss": 0.7012, "step": 4076 }, { "epoch": 0.34528901122168115, "grad_norm": 1.441173082868411, "learning_rate": 7.6123758714931565e-06, "loss": 0.6425, "step": 4077 }, { "epoch": 0.3453737031547745, "grad_norm": 2.4645987746854656, "learning_rate": 7.611206242514473e-06, "loss": 0.6245, "step": 4078 }, { "epoch": 0.34545839508786785, "grad_norm": 2.824298418480807, "learning_rate": 7.6100364170286935e-06, "loss": 0.6479, "step": 4079 }, { "epoch": 0.34554308702096126, "grad_norm": 1.4046889409170997, "learning_rate": 7.608866395123853e-06, "loss": 0.6558, "step": 4080 }, { "epoch": 0.3456277789540546, "grad_norm": 1.5322394681808553, "learning_rate": 7.607696176888002e-06, "loss": 0.6789, "step": 4081 }, { "epoch": 0.345712470887148, "grad_norm": 1.1469272149978156, "learning_rate": 7.6065257624092025e-06, "loss": 0.6574, "step": 4082 }, { "epoch": 0.34579716282024137, "grad_norm": 1.3048644319418863, "learning_rate": 7.60535515177554e-06, "loss": 0.6144, "step": 4083 }, { "epoch": 0.34588185475333477, "grad_norm": 2.137650685782729, "learning_rate": 7.6041843450751075e-06, "loss": 0.6782, "step": 4084 }, { "epoch": 0.3459665466864281, "grad_norm": 3.2755580239369246, "learning_rate": 7.603013342396011e-06, "loss": 0.6952, "step": 4085 }, { "epoch": 0.3460512386195215, "grad_norm": 1.236466320355724, "learning_rate": 7.601842143826375e-06, "loss": 0.6734, "step": 4086 }, { "epoch": 0.3461359305526149, "grad_norm": 1.4890894476606626, "learning_rate": 7.600670749454344e-06, "loss": 0.6885, "step": 4087 }, { "epoch": 0.34622062248570823, "grad_norm": 1.5437421120356531, "learning_rate": 7.599499159368067e-06, "loss": 0.5844, "step": 4088 }, { "epoch": 0.34630531441880164, "grad_norm": 1.3487042397401128, "learning_rate": 7.598327373655712e-06, "loss": 0.6573, "step": 4089 }, { "epoch": 0.346390006351895, "grad_norm": 3.8192852207620542, "learning_rate": 7.597155392405464e-06, "loss": 0.618, "step": 4090 }, { "epoch": 0.34647469828498834, "grad_norm": 1.2494883884322165, "learning_rate": 7.59598321570552e-06, "loss": 0.7101, "step": 4091 }, { "epoch": 0.34655939021808174, "grad_norm": 1.515055427032433, "learning_rate": 7.5948108436440925e-06, "loss": 0.6465, "step": 4092 }, { "epoch": 0.3466440821511751, "grad_norm": 2.302600429166865, "learning_rate": 7.593638276309409e-06, "loss": 0.6608, "step": 4093 }, { "epoch": 0.3467287740842685, "grad_norm": 1.3024158453691381, "learning_rate": 7.592465513789711e-06, "loss": 0.6464, "step": 4094 }, { "epoch": 0.34681346601736185, "grad_norm": 1.3097890225925422, "learning_rate": 7.5912925561732565e-06, "loss": 0.5696, "step": 4095 }, { "epoch": 0.3468981579504552, "grad_norm": 2.2285036151802253, "learning_rate": 7.590119403548315e-06, "loss": 0.6028, "step": 4096 }, { "epoch": 0.3469828498835486, "grad_norm": 1.6734579799295801, "learning_rate": 7.588946056003173e-06, "loss": 0.6365, "step": 4097 }, { "epoch": 0.34706754181664196, "grad_norm": 1.3584817646801208, "learning_rate": 7.58777251362613e-06, "loss": 0.5974, "step": 4098 }, { "epoch": 0.34715223374973536, "grad_norm": 1.2188056442804494, "learning_rate": 7.586598776505503e-06, "loss": 0.6546, "step": 4099 }, { "epoch": 0.3472369256828287, "grad_norm": 1.5500661917272855, "learning_rate": 7.585424844729623e-06, "loss": 0.6614, "step": 4100 }, { "epoch": 0.34732161761592206, "grad_norm": 1.6420956294667908, "learning_rate": 7.584250718386832e-06, "loss": 0.6495, "step": 4101 }, { "epoch": 0.34740630954901547, "grad_norm": 1.345598377233338, "learning_rate": 7.5830763975654895e-06, "loss": 0.6585, "step": 4102 }, { "epoch": 0.3474910014821088, "grad_norm": 2.3447084811899987, "learning_rate": 7.58190188235397e-06, "loss": 0.6955, "step": 4103 }, { "epoch": 0.3475756934152022, "grad_norm": 1.4806546199629602, "learning_rate": 7.580727172840663e-06, "loss": 0.6042, "step": 4104 }, { "epoch": 0.3476603853482956, "grad_norm": 1.4137033558983378, "learning_rate": 7.57955226911397e-06, "loss": 0.636, "step": 4105 }, { "epoch": 0.3477450772813889, "grad_norm": 1.698647001547794, "learning_rate": 7.57837717126231e-06, "loss": 0.6883, "step": 4106 }, { "epoch": 0.34782976921448233, "grad_norm": 0.6368536589687311, "learning_rate": 7.577201879374114e-06, "loss": 0.831, "step": 4107 }, { "epoch": 0.3479144611475757, "grad_norm": 1.905274606083683, "learning_rate": 7.57602639353783e-06, "loss": 0.658, "step": 4108 }, { "epoch": 0.3479991530806691, "grad_norm": 1.1120427201671053, "learning_rate": 7.574850713841919e-06, "loss": 0.6046, "step": 4109 }, { "epoch": 0.34808384501376244, "grad_norm": 1.6238146309681785, "learning_rate": 7.573674840374856e-06, "loss": 0.6382, "step": 4110 }, { "epoch": 0.3481685369468558, "grad_norm": 1.2672325098980208, "learning_rate": 7.572498773225137e-06, "loss": 0.6379, "step": 4111 }, { "epoch": 0.3482532288799492, "grad_norm": 1.2705808744176526, "learning_rate": 7.571322512481261e-06, "loss": 0.65, "step": 4112 }, { "epoch": 0.34833792081304255, "grad_norm": 1.8178870106660372, "learning_rate": 7.570146058231749e-06, "loss": 0.6754, "step": 4113 }, { "epoch": 0.34842261274613595, "grad_norm": 1.4487526350094244, "learning_rate": 7.568969410565137e-06, "loss": 0.6091, "step": 4114 }, { "epoch": 0.3485073046792293, "grad_norm": 0.6324133265703498, "learning_rate": 7.567792569569974e-06, "loss": 0.8317, "step": 4115 }, { "epoch": 0.34859199661232265, "grad_norm": 1.6812280604409908, "learning_rate": 7.566615535334823e-06, "loss": 0.6713, "step": 4116 }, { "epoch": 0.34867668854541606, "grad_norm": 1.369274336538475, "learning_rate": 7.565438307948262e-06, "loss": 0.6353, "step": 4117 }, { "epoch": 0.3487613804785094, "grad_norm": 3.433276558731656, "learning_rate": 7.5642608874988844e-06, "loss": 0.661, "step": 4118 }, { "epoch": 0.3488460724116028, "grad_norm": 1.6361695767943085, "learning_rate": 7.563083274075296e-06, "loss": 0.6294, "step": 4119 }, { "epoch": 0.34893076434469616, "grad_norm": 1.402265831873713, "learning_rate": 7.561905467766118e-06, "loss": 0.6626, "step": 4120 }, { "epoch": 0.3490154562777895, "grad_norm": 1.2774562901691828, "learning_rate": 7.560727468659988e-06, "loss": 0.6394, "step": 4121 }, { "epoch": 0.3491001482108829, "grad_norm": 1.2692553434794367, "learning_rate": 7.559549276845558e-06, "loss": 0.5889, "step": 4122 }, { "epoch": 0.34918484014397627, "grad_norm": 1.281879866721623, "learning_rate": 7.5583708924114905e-06, "loss": 0.6584, "step": 4123 }, { "epoch": 0.3492695320770697, "grad_norm": 1.4888366413956236, "learning_rate": 7.557192315446465e-06, "loss": 0.6264, "step": 4124 }, { "epoch": 0.34935422401016303, "grad_norm": 1.1904707974436428, "learning_rate": 7.556013546039178e-06, "loss": 0.6438, "step": 4125 }, { "epoch": 0.3494389159432564, "grad_norm": 1.527986112992837, "learning_rate": 7.554834584278337e-06, "loss": 0.6481, "step": 4126 }, { "epoch": 0.3495236078763498, "grad_norm": 1.4174010970249007, "learning_rate": 7.5536554302526645e-06, "loss": 0.6429, "step": 4127 }, { "epoch": 0.34960829980944313, "grad_norm": 2.47028617226367, "learning_rate": 7.552476084050899e-06, "loss": 0.5884, "step": 4128 }, { "epoch": 0.34969299174253654, "grad_norm": 1.5117923071717803, "learning_rate": 7.551296545761792e-06, "loss": 0.6418, "step": 4129 }, { "epoch": 0.3497776836756299, "grad_norm": 2.3784536342237694, "learning_rate": 7.55011681547411e-06, "loss": 0.6697, "step": 4130 }, { "epoch": 0.34986237560872324, "grad_norm": 1.5565031250481796, "learning_rate": 7.548936893276634e-06, "loss": 0.6013, "step": 4131 }, { "epoch": 0.34994706754181665, "grad_norm": 1.5819370094672178, "learning_rate": 7.5477567792581595e-06, "loss": 0.6065, "step": 4132 }, { "epoch": 0.35003175947491, "grad_norm": 1.5154644914104862, "learning_rate": 7.5465764735074985e-06, "loss": 0.5981, "step": 4133 }, { "epoch": 0.3501164514080034, "grad_norm": 1.2753077387563776, "learning_rate": 7.54539597611347e-06, "loss": 0.6167, "step": 4134 }, { "epoch": 0.35020114334109675, "grad_norm": 1.5507512804161965, "learning_rate": 7.544215287164918e-06, "loss": 0.6624, "step": 4135 }, { "epoch": 0.35028583527419016, "grad_norm": 1.1177734648277347, "learning_rate": 7.543034406750691e-06, "loss": 0.6189, "step": 4136 }, { "epoch": 0.3503705272072835, "grad_norm": 1.3765523700973208, "learning_rate": 7.541853334959661e-06, "loss": 0.6012, "step": 4137 }, { "epoch": 0.35045521914037686, "grad_norm": 1.4890012700796882, "learning_rate": 7.540672071880708e-06, "loss": 0.6697, "step": 4138 }, { "epoch": 0.35053991107347027, "grad_norm": 2.6979682760433836, "learning_rate": 7.539490617602726e-06, "loss": 0.6124, "step": 4139 }, { "epoch": 0.3506246030065636, "grad_norm": 1.5653975269313838, "learning_rate": 7.53830897221463e-06, "loss": 0.6719, "step": 4140 }, { "epoch": 0.350709294939657, "grad_norm": 2.073931059136627, "learning_rate": 7.537127135805341e-06, "loss": 0.563, "step": 4141 }, { "epoch": 0.3507939868727504, "grad_norm": 1.6224354115376067, "learning_rate": 7.535945108463802e-06, "loss": 0.6365, "step": 4142 }, { "epoch": 0.3508786788058437, "grad_norm": 2.927011926691719, "learning_rate": 7.534762890278964e-06, "loss": 0.6781, "step": 4143 }, { "epoch": 0.35096337073893713, "grad_norm": 2.1153926551630877, "learning_rate": 7.533580481339797e-06, "loss": 0.6704, "step": 4144 }, { "epoch": 0.3510480626720305, "grad_norm": 1.8422677756597567, "learning_rate": 7.5323978817352825e-06, "loss": 0.6083, "step": 4145 }, { "epoch": 0.3511327546051239, "grad_norm": 1.9657676802981867, "learning_rate": 7.531215091554418e-06, "loss": 0.6194, "step": 4146 }, { "epoch": 0.35121744653821724, "grad_norm": 1.2035167696806868, "learning_rate": 7.530032110886214e-06, "loss": 0.711, "step": 4147 }, { "epoch": 0.3513021384713106, "grad_norm": 0.6516010023145223, "learning_rate": 7.528848939819695e-06, "loss": 0.8644, "step": 4148 }, { "epoch": 0.351386830404404, "grad_norm": 1.3097386266727873, "learning_rate": 7.527665578443906e-06, "loss": 0.6583, "step": 4149 }, { "epoch": 0.35147152233749734, "grad_norm": 1.974414956541315, "learning_rate": 7.526482026847894e-06, "loss": 0.6895, "step": 4150 }, { "epoch": 0.35155621427059075, "grad_norm": 1.2306611853591023, "learning_rate": 7.525298285120734e-06, "loss": 0.6423, "step": 4151 }, { "epoch": 0.3516409062036841, "grad_norm": 1.903342800084909, "learning_rate": 7.524114353351504e-06, "loss": 0.6401, "step": 4152 }, { "epoch": 0.35172559813677745, "grad_norm": 1.2995868843587377, "learning_rate": 7.522930231629304e-06, "loss": 0.6403, "step": 4153 }, { "epoch": 0.35181029006987086, "grad_norm": 1.4931101385022945, "learning_rate": 7.5217459200432445e-06, "loss": 0.6453, "step": 4154 }, { "epoch": 0.3518949820029642, "grad_norm": 0.6191001139748255, "learning_rate": 7.52056141868245e-06, "loss": 0.8526, "step": 4155 }, { "epoch": 0.3519796739360576, "grad_norm": 2.265074050482751, "learning_rate": 7.519376727636063e-06, "loss": 0.6294, "step": 4156 }, { "epoch": 0.35206436586915096, "grad_norm": 1.6069152380452543, "learning_rate": 7.5181918469932365e-06, "loss": 0.6322, "step": 4157 }, { "epoch": 0.3521490578022443, "grad_norm": 1.660507538186205, "learning_rate": 7.5170067768431385e-06, "loss": 0.6849, "step": 4158 }, { "epoch": 0.3522337497353377, "grad_norm": 1.53574094682878, "learning_rate": 7.515821517274954e-06, "loss": 0.6633, "step": 4159 }, { "epoch": 0.35231844166843107, "grad_norm": 1.3365020368911158, "learning_rate": 7.514636068377877e-06, "loss": 0.6734, "step": 4160 }, { "epoch": 0.3524031336015245, "grad_norm": 1.4305717708138084, "learning_rate": 7.513450430241121e-06, "loss": 0.5883, "step": 4161 }, { "epoch": 0.3524878255346178, "grad_norm": 3.159719784459255, "learning_rate": 7.512264602953909e-06, "loss": 0.6366, "step": 4162 }, { "epoch": 0.3525725174677112, "grad_norm": 0.5758855091898293, "learning_rate": 7.5110785866054846e-06, "loss": 0.8874, "step": 4163 }, { "epoch": 0.3526572094008046, "grad_norm": 1.4590176177953382, "learning_rate": 7.509892381285098e-06, "loss": 0.6413, "step": 4164 }, { "epoch": 0.35274190133389793, "grad_norm": 1.2866038428738944, "learning_rate": 7.508705987082019e-06, "loss": 0.6697, "step": 4165 }, { "epoch": 0.35282659326699134, "grad_norm": 2.177664709807805, "learning_rate": 7.507519404085533e-06, "loss": 0.6629, "step": 4166 }, { "epoch": 0.3529112852000847, "grad_norm": 1.5851457950794898, "learning_rate": 7.506332632384932e-06, "loss": 0.6419, "step": 4167 }, { "epoch": 0.35299597713317804, "grad_norm": 1.2844097292492362, "learning_rate": 7.505145672069528e-06, "loss": 0.6208, "step": 4168 }, { "epoch": 0.35308066906627145, "grad_norm": 1.4363292456663743, "learning_rate": 7.503958523228647e-06, "loss": 0.5853, "step": 4169 }, { "epoch": 0.3531653609993648, "grad_norm": 1.2172188537927269, "learning_rate": 7.502771185951629e-06, "loss": 0.6791, "step": 4170 }, { "epoch": 0.3532500529324582, "grad_norm": 1.2341212045933818, "learning_rate": 7.501583660327827e-06, "loss": 0.6106, "step": 4171 }, { "epoch": 0.35333474486555155, "grad_norm": 0.5748269882454932, "learning_rate": 7.500395946446608e-06, "loss": 0.7811, "step": 4172 }, { "epoch": 0.3534194367986449, "grad_norm": 1.3545024369354126, "learning_rate": 7.4992080443973526e-06, "loss": 0.6204, "step": 4173 }, { "epoch": 0.3535041287317383, "grad_norm": 1.3196419626161506, "learning_rate": 7.498019954269458e-06, "loss": 0.5979, "step": 4174 }, { "epoch": 0.35358882066483166, "grad_norm": 1.3579301518750735, "learning_rate": 7.496831676152334e-06, "loss": 0.6801, "step": 4175 }, { "epoch": 0.35367351259792507, "grad_norm": 1.3982621357089182, "learning_rate": 7.495643210135406e-06, "loss": 0.6325, "step": 4176 }, { "epoch": 0.3537582045310184, "grad_norm": 1.4366843390933763, "learning_rate": 7.494454556308111e-06, "loss": 0.6938, "step": 4177 }, { "epoch": 0.35384289646411177, "grad_norm": 0.5757513326047167, "learning_rate": 7.493265714759903e-06, "loss": 0.8262, "step": 4178 }, { "epoch": 0.3539275883972052, "grad_norm": 1.7055678085156682, "learning_rate": 7.492076685580245e-06, "loss": 0.6616, "step": 4179 }, { "epoch": 0.3540122803302985, "grad_norm": 0.6136886087692307, "learning_rate": 7.490887468858622e-06, "loss": 0.853, "step": 4180 }, { "epoch": 0.35409697226339193, "grad_norm": 1.5584991284841632, "learning_rate": 7.489698064684527e-06, "loss": 0.6687, "step": 4181 }, { "epoch": 0.3541816641964853, "grad_norm": 1.559051469802893, "learning_rate": 7.4885084731474685e-06, "loss": 0.667, "step": 4182 }, { "epoch": 0.35426635612957863, "grad_norm": 0.6154202583356794, "learning_rate": 7.487318694336971e-06, "loss": 0.8725, "step": 4183 }, { "epoch": 0.35435104806267204, "grad_norm": 1.166392985982564, "learning_rate": 7.48612872834257e-06, "loss": 0.624, "step": 4184 }, { "epoch": 0.3544357399957654, "grad_norm": 1.2542842561916667, "learning_rate": 7.484938575253818e-06, "loss": 0.6517, "step": 4185 }, { "epoch": 0.3545204319288588, "grad_norm": 1.384998713269079, "learning_rate": 7.483748235160279e-06, "loss": 0.6624, "step": 4186 }, { "epoch": 0.35460512386195214, "grad_norm": 1.7036976488138422, "learning_rate": 7.482557708151535e-06, "loss": 0.6988, "step": 4187 }, { "epoch": 0.35468981579504555, "grad_norm": 1.5761735552388993, "learning_rate": 7.481366994317176e-06, "loss": 0.6201, "step": 4188 }, { "epoch": 0.3547745077281389, "grad_norm": 4.675499222954974, "learning_rate": 7.4801760937468116e-06, "loss": 0.6371, "step": 4189 }, { "epoch": 0.35485919966123225, "grad_norm": 1.6775681418222204, "learning_rate": 7.478985006530062e-06, "loss": 0.6736, "step": 4190 }, { "epoch": 0.35494389159432566, "grad_norm": 2.16682483719026, "learning_rate": 7.477793732756565e-06, "loss": 0.6174, "step": 4191 }, { "epoch": 0.355028583527419, "grad_norm": 1.2687351113619225, "learning_rate": 7.47660227251597e-06, "loss": 0.6708, "step": 4192 }, { "epoch": 0.3551132754605124, "grad_norm": 1.2402459758517297, "learning_rate": 7.475410625897937e-06, "loss": 0.6824, "step": 4193 }, { "epoch": 0.35519796739360576, "grad_norm": 1.7054547497556847, "learning_rate": 7.474218792992149e-06, "loss": 0.6101, "step": 4194 }, { "epoch": 0.3552826593266991, "grad_norm": 1.2812354540387623, "learning_rate": 7.473026773888294e-06, "loss": 0.6851, "step": 4195 }, { "epoch": 0.3553673512597925, "grad_norm": 1.2346558378178434, "learning_rate": 7.47183456867608e-06, "loss": 0.5832, "step": 4196 }, { "epoch": 0.35545204319288587, "grad_norm": 1.3664675784727727, "learning_rate": 7.470642177445224e-06, "loss": 0.6179, "step": 4197 }, { "epoch": 0.3555367351259793, "grad_norm": 1.4102009061690999, "learning_rate": 7.469449600285463e-06, "loss": 0.5813, "step": 4198 }, { "epoch": 0.3556214270590726, "grad_norm": 1.2539797001112938, "learning_rate": 7.468256837286544e-06, "loss": 0.5959, "step": 4199 }, { "epoch": 0.355706118992166, "grad_norm": 1.3437165381920508, "learning_rate": 7.467063888538226e-06, "loss": 0.6313, "step": 4200 }, { "epoch": 0.3557908109252594, "grad_norm": 1.223809310512514, "learning_rate": 7.465870754130287e-06, "loss": 0.6976, "step": 4201 }, { "epoch": 0.35587550285835273, "grad_norm": 1.2615569278614207, "learning_rate": 7.4646774341525176e-06, "loss": 0.6421, "step": 4202 }, { "epoch": 0.35596019479144614, "grad_norm": 0.6563150928048699, "learning_rate": 7.463483928694718e-06, "loss": 0.8558, "step": 4203 }, { "epoch": 0.3560448867245395, "grad_norm": 1.4133795912757658, "learning_rate": 7.46229023784671e-06, "loss": 0.6727, "step": 4204 }, { "epoch": 0.35612957865763284, "grad_norm": 1.3064498952771446, "learning_rate": 7.461096361698322e-06, "loss": 0.6618, "step": 4205 }, { "epoch": 0.35621427059072625, "grad_norm": 2.6955508175321152, "learning_rate": 7.4599023003394025e-06, "loss": 0.6443, "step": 4206 }, { "epoch": 0.3562989625238196, "grad_norm": 1.3515522614853377, "learning_rate": 7.458708053859807e-06, "loss": 0.6465, "step": 4207 }, { "epoch": 0.356383654456913, "grad_norm": 2.348507410961193, "learning_rate": 7.457513622349412e-06, "loss": 0.6342, "step": 4208 }, { "epoch": 0.35646834639000635, "grad_norm": 1.3355338548924378, "learning_rate": 7.4563190058981026e-06, "loss": 0.6479, "step": 4209 }, { "epoch": 0.3565530383230997, "grad_norm": 1.160524599354825, "learning_rate": 7.455124204595783e-06, "loss": 0.6739, "step": 4210 }, { "epoch": 0.3566377302561931, "grad_norm": 2.6125844752581253, "learning_rate": 7.453929218532365e-06, "loss": 0.6713, "step": 4211 }, { "epoch": 0.35672242218928646, "grad_norm": 0.6587363378270137, "learning_rate": 7.452734047797781e-06, "loss": 0.7995, "step": 4212 }, { "epoch": 0.35680711412237986, "grad_norm": 1.4981634207729888, "learning_rate": 7.45153869248197e-06, "loss": 0.6886, "step": 4213 }, { "epoch": 0.3568918060554732, "grad_norm": 1.215254034429111, "learning_rate": 7.450343152674891e-06, "loss": 0.6257, "step": 4214 }, { "epoch": 0.35697649798856657, "grad_norm": 1.2316526371781527, "learning_rate": 7.449147428466515e-06, "loss": 0.6284, "step": 4215 }, { "epoch": 0.35706118992165997, "grad_norm": 1.2657623729437013, "learning_rate": 7.4479515199468275e-06, "loss": 0.6406, "step": 4216 }, { "epoch": 0.3571458818547533, "grad_norm": 2.2255812958684476, "learning_rate": 7.446755427205824e-06, "loss": 0.6539, "step": 4217 }, { "epoch": 0.35723057378784673, "grad_norm": 1.278343815940123, "learning_rate": 7.445559150333519e-06, "loss": 0.6159, "step": 4218 }, { "epoch": 0.3573152657209401, "grad_norm": 1.1743713243075973, "learning_rate": 7.4443626894199385e-06, "loss": 0.6117, "step": 4219 }, { "epoch": 0.35739995765403343, "grad_norm": 1.1837502362298884, "learning_rate": 7.443166044555121e-06, "loss": 0.5803, "step": 4220 }, { "epoch": 0.35748464958712683, "grad_norm": 2.715672389131831, "learning_rate": 7.441969215829122e-06, "loss": 0.5895, "step": 4221 }, { "epoch": 0.3575693415202202, "grad_norm": 1.5436646999860584, "learning_rate": 7.440772203332008e-06, "loss": 0.6715, "step": 4222 }, { "epoch": 0.3576540334533136, "grad_norm": 1.1945360159339355, "learning_rate": 7.439575007153863e-06, "loss": 0.6256, "step": 4223 }, { "epoch": 0.35773872538640694, "grad_norm": 1.72542806747792, "learning_rate": 7.438377627384778e-06, "loss": 0.6152, "step": 4224 }, { "epoch": 0.3578234173195003, "grad_norm": 0.5951071565308176, "learning_rate": 7.437180064114868e-06, "loss": 0.8782, "step": 4225 }, { "epoch": 0.3579081092525937, "grad_norm": 0.6474670553192562, "learning_rate": 7.435982317434251e-06, "loss": 0.8889, "step": 4226 }, { "epoch": 0.35799280118568705, "grad_norm": 1.2533639960813345, "learning_rate": 7.434784387433065e-06, "loss": 0.6062, "step": 4227 }, { "epoch": 0.35807749311878045, "grad_norm": 1.561823872323078, "learning_rate": 7.433586274201461e-06, "loss": 0.7, "step": 4228 }, { "epoch": 0.3581621850518738, "grad_norm": 1.4361690411069568, "learning_rate": 7.4323879778296045e-06, "loss": 0.6735, "step": 4229 }, { "epoch": 0.35824687698496716, "grad_norm": 1.5804349102867443, "learning_rate": 7.431189498407672e-06, "loss": 0.6264, "step": 4230 }, { "epoch": 0.35833156891806056, "grad_norm": 0.6199231140927551, "learning_rate": 7.429990836025855e-06, "loss": 0.8227, "step": 4231 }, { "epoch": 0.3584162608511539, "grad_norm": 1.332048574939794, "learning_rate": 7.428791990774361e-06, "loss": 0.6603, "step": 4232 }, { "epoch": 0.3585009527842473, "grad_norm": 1.2731862254848114, "learning_rate": 7.42759296274341e-06, "loss": 0.7113, "step": 4233 }, { "epoch": 0.35858564471734067, "grad_norm": 1.3141721971976752, "learning_rate": 7.426393752023232e-06, "loss": 0.6583, "step": 4234 }, { "epoch": 0.358670336650434, "grad_norm": 1.5060253154700178, "learning_rate": 7.4251943587040755e-06, "loss": 0.6361, "step": 4235 }, { "epoch": 0.3587550285835274, "grad_norm": 1.4424747800091005, "learning_rate": 7.4239947828762025e-06, "loss": 0.6587, "step": 4236 }, { "epoch": 0.3588397205166208, "grad_norm": 1.4754574668370206, "learning_rate": 7.422795024629888e-06, "loss": 0.6594, "step": 4237 }, { "epoch": 0.3589244124497142, "grad_norm": 2.080877765565573, "learning_rate": 7.421595084055415e-06, "loss": 0.655, "step": 4238 }, { "epoch": 0.35900910438280753, "grad_norm": 1.2832607001395193, "learning_rate": 7.420394961243092e-06, "loss": 0.6667, "step": 4239 }, { "epoch": 0.35909379631590094, "grad_norm": 1.5087643175734782, "learning_rate": 7.419194656283229e-06, "loss": 0.6733, "step": 4240 }, { "epoch": 0.3591784882489943, "grad_norm": 1.2648734936172261, "learning_rate": 7.417994169266159e-06, "loss": 0.6026, "step": 4241 }, { "epoch": 0.35926318018208764, "grad_norm": 1.3286208338336187, "learning_rate": 7.416793500282224e-06, "loss": 0.6651, "step": 4242 }, { "epoch": 0.35934787211518104, "grad_norm": 1.3203290716680387, "learning_rate": 7.4155926494217814e-06, "loss": 0.6464, "step": 4243 }, { "epoch": 0.3594325640482744, "grad_norm": 1.664154465611067, "learning_rate": 7.414391616775201e-06, "loss": 0.66, "step": 4244 }, { "epoch": 0.3595172559813678, "grad_norm": 1.5371514798033055, "learning_rate": 7.413190402432865e-06, "loss": 0.5987, "step": 4245 }, { "epoch": 0.35960194791446115, "grad_norm": 1.7161572006186152, "learning_rate": 7.411989006485173e-06, "loss": 0.6454, "step": 4246 }, { "epoch": 0.3596866398475545, "grad_norm": 1.9770495234907572, "learning_rate": 7.4107874290225365e-06, "loss": 0.5978, "step": 4247 }, { "epoch": 0.3597713317806479, "grad_norm": 1.3700837524177798, "learning_rate": 7.409585670135382e-06, "loss": 0.643, "step": 4248 }, { "epoch": 0.35985602371374126, "grad_norm": 0.6329934869399191, "learning_rate": 7.408383729914144e-06, "loss": 0.8524, "step": 4249 }, { "epoch": 0.35994071564683466, "grad_norm": 1.5196986965280608, "learning_rate": 7.4071816084492775e-06, "loss": 0.6006, "step": 4250 }, { "epoch": 0.360025407579928, "grad_norm": 1.188285655064094, "learning_rate": 7.40597930583125e-06, "loss": 0.6487, "step": 4251 }, { "epoch": 0.36011009951302136, "grad_norm": 1.5811422288462416, "learning_rate": 7.404776822150538e-06, "loss": 0.673, "step": 4252 }, { "epoch": 0.36019479144611477, "grad_norm": 1.5153022308815247, "learning_rate": 7.403574157497637e-06, "loss": 0.659, "step": 4253 }, { "epoch": 0.3602794833792081, "grad_norm": 1.1816836109894149, "learning_rate": 7.402371311963054e-06, "loss": 0.6448, "step": 4254 }, { "epoch": 0.3603641753123015, "grad_norm": 1.3396974853490313, "learning_rate": 7.401168285637307e-06, "loss": 0.6801, "step": 4255 }, { "epoch": 0.3604488672453949, "grad_norm": 1.521823708399096, "learning_rate": 7.399965078610931e-06, "loss": 0.642, "step": 4256 }, { "epoch": 0.3605335591784882, "grad_norm": 1.2486557605169626, "learning_rate": 7.398761690974477e-06, "loss": 0.6821, "step": 4257 }, { "epoch": 0.36061825111158163, "grad_norm": 1.4192650556913817, "learning_rate": 7.397558122818502e-06, "loss": 0.621, "step": 4258 }, { "epoch": 0.360702943044675, "grad_norm": 4.374967564508889, "learning_rate": 7.396354374233581e-06, "loss": 0.6192, "step": 4259 }, { "epoch": 0.3607876349777684, "grad_norm": 1.4338189411246753, "learning_rate": 7.395150445310308e-06, "loss": 0.6609, "step": 4260 }, { "epoch": 0.36087232691086174, "grad_norm": 1.4707011763799944, "learning_rate": 7.3939463361392785e-06, "loss": 0.6139, "step": 4261 }, { "epoch": 0.3609570188439551, "grad_norm": 1.7259106757786185, "learning_rate": 7.39274204681111e-06, "loss": 0.708, "step": 4262 }, { "epoch": 0.3610417107770485, "grad_norm": 1.3642198114710642, "learning_rate": 7.391537577416433e-06, "loss": 0.6717, "step": 4263 }, { "epoch": 0.36112640271014185, "grad_norm": 1.536391265807171, "learning_rate": 7.390332928045892e-06, "loss": 0.6399, "step": 4264 }, { "epoch": 0.36121109464323525, "grad_norm": 1.3367512205137007, "learning_rate": 7.389128098790138e-06, "loss": 0.6552, "step": 4265 }, { "epoch": 0.3612957865763286, "grad_norm": 1.517879665087701, "learning_rate": 7.387923089739844e-06, "loss": 0.6443, "step": 4266 }, { "epoch": 0.36138047850942195, "grad_norm": 1.340698147171734, "learning_rate": 7.386717900985692e-06, "loss": 0.5544, "step": 4267 }, { "epoch": 0.36146517044251536, "grad_norm": 1.933267725604034, "learning_rate": 7.3855125326183815e-06, "loss": 0.6609, "step": 4268 }, { "epoch": 0.3615498623756087, "grad_norm": 1.371681732171186, "learning_rate": 7.38430698472862e-06, "loss": 0.6582, "step": 4269 }, { "epoch": 0.3616345543087021, "grad_norm": 1.2456066072192238, "learning_rate": 7.3831012574071335e-06, "loss": 0.6366, "step": 4270 }, { "epoch": 0.36171924624179547, "grad_norm": 1.379032525818193, "learning_rate": 7.381895350744657e-06, "loss": 0.6269, "step": 4271 }, { "epoch": 0.3618039381748888, "grad_norm": 1.297263279931788, "learning_rate": 7.380689264831944e-06, "loss": 0.6265, "step": 4272 }, { "epoch": 0.3618886301079822, "grad_norm": 1.1968270889569512, "learning_rate": 7.3794829997597565e-06, "loss": 0.6479, "step": 4273 }, { "epoch": 0.3619733220410756, "grad_norm": 1.4517745928778953, "learning_rate": 7.378276555618873e-06, "loss": 0.6473, "step": 4274 }, { "epoch": 0.362058013974169, "grad_norm": 1.287586127429343, "learning_rate": 7.377069932500085e-06, "loss": 0.6929, "step": 4275 }, { "epoch": 0.36214270590726233, "grad_norm": 1.4001991596671166, "learning_rate": 7.375863130494199e-06, "loss": 0.6288, "step": 4276 }, { "epoch": 0.3622273978403557, "grad_norm": 1.6612538668615524, "learning_rate": 7.37465614969203e-06, "loss": 0.7035, "step": 4277 }, { "epoch": 0.3623120897734491, "grad_norm": 1.439573018743108, "learning_rate": 7.373448990184412e-06, "loss": 0.6337, "step": 4278 }, { "epoch": 0.36239678170654244, "grad_norm": 1.3615968148276207, "learning_rate": 7.3722416520621885e-06, "loss": 0.6682, "step": 4279 }, { "epoch": 0.36248147363963584, "grad_norm": 1.415354355483876, "learning_rate": 7.371034135416219e-06, "loss": 0.6552, "step": 4280 }, { "epoch": 0.3625661655727292, "grad_norm": 1.4995370450756682, "learning_rate": 7.369826440337378e-06, "loss": 0.6733, "step": 4281 }, { "epoch": 0.36265085750582254, "grad_norm": 3.2250702526296715, "learning_rate": 7.368618566916548e-06, "loss": 0.6481, "step": 4282 }, { "epoch": 0.36273554943891595, "grad_norm": 1.4542840607151915, "learning_rate": 7.367410515244627e-06, "loss": 0.6889, "step": 4283 }, { "epoch": 0.3628202413720093, "grad_norm": 1.5517436149575157, "learning_rate": 7.366202285412528e-06, "loss": 0.6234, "step": 4284 }, { "epoch": 0.3629049333051027, "grad_norm": 1.6283292879477902, "learning_rate": 7.36499387751118e-06, "loss": 0.6115, "step": 4285 }, { "epoch": 0.36298962523819606, "grad_norm": 1.8640195063525922, "learning_rate": 7.363785291631518e-06, "loss": 0.6291, "step": 4286 }, { "epoch": 0.36307431717128946, "grad_norm": 1.1540212177775164, "learning_rate": 7.362576527864494e-06, "loss": 0.6324, "step": 4287 }, { "epoch": 0.3631590091043828, "grad_norm": 1.4627983549156591, "learning_rate": 7.361367586301078e-06, "loss": 0.6563, "step": 4288 }, { "epoch": 0.36324370103747616, "grad_norm": 0.6663080923295591, "learning_rate": 7.360158467032248e-06, "loss": 0.8497, "step": 4289 }, { "epoch": 0.36332839297056957, "grad_norm": 1.4445312024413999, "learning_rate": 7.358949170148994e-06, "loss": 0.6043, "step": 4290 }, { "epoch": 0.3634130849036629, "grad_norm": 1.8928112274168747, "learning_rate": 7.357739695742326e-06, "loss": 0.6541, "step": 4291 }, { "epoch": 0.3634977768367563, "grad_norm": 1.3071712881614137, "learning_rate": 7.356530043903259e-06, "loss": 0.6655, "step": 4292 }, { "epoch": 0.3635824687698497, "grad_norm": 1.9022054266099104, "learning_rate": 7.355320214722828e-06, "loss": 0.6723, "step": 4293 }, { "epoch": 0.363667160702943, "grad_norm": 1.2021580822893387, "learning_rate": 7.3541102082920775e-06, "loss": 0.7032, "step": 4294 }, { "epoch": 0.36375185263603643, "grad_norm": 1.4187240038089628, "learning_rate": 7.352900024702071e-06, "loss": 0.6462, "step": 4295 }, { "epoch": 0.3638365445691298, "grad_norm": 1.5029710268192094, "learning_rate": 7.351689664043877e-06, "loss": 0.6687, "step": 4296 }, { "epoch": 0.3639212365022232, "grad_norm": 3.72758562063949, "learning_rate": 7.3504791264085825e-06, "loss": 0.6784, "step": 4297 }, { "epoch": 0.36400592843531654, "grad_norm": 1.2457849420242042, "learning_rate": 7.349268411887289e-06, "loss": 0.6391, "step": 4298 }, { "epoch": 0.3640906203684099, "grad_norm": 1.5466791827353603, "learning_rate": 7.348057520571107e-06, "loss": 0.6448, "step": 4299 }, { "epoch": 0.3641753123015033, "grad_norm": 1.386288644391921, "learning_rate": 7.346846452551162e-06, "loss": 0.6248, "step": 4300 }, { "epoch": 0.36426000423459665, "grad_norm": 1.4389523157780482, "learning_rate": 7.3456352079185945e-06, "loss": 0.7442, "step": 4301 }, { "epoch": 0.36434469616769005, "grad_norm": 8.666956249368228, "learning_rate": 7.344423786764557e-06, "loss": 0.6245, "step": 4302 }, { "epoch": 0.3644293881007834, "grad_norm": 1.5557988074816382, "learning_rate": 7.343212189180217e-06, "loss": 0.6992, "step": 4303 }, { "epoch": 0.36451408003387675, "grad_norm": 1.8073197160419865, "learning_rate": 7.342000415256749e-06, "loss": 0.5957, "step": 4304 }, { "epoch": 0.36459877196697016, "grad_norm": 1.4825099814459788, "learning_rate": 7.34078846508535e-06, "loss": 0.6577, "step": 4305 }, { "epoch": 0.3646834639000635, "grad_norm": 1.5149096079456255, "learning_rate": 7.339576338757224e-06, "loss": 0.6083, "step": 4306 }, { "epoch": 0.3647681558331569, "grad_norm": 9.237476720810673, "learning_rate": 7.338364036363589e-06, "loss": 0.5693, "step": 4307 }, { "epoch": 0.36485284776625027, "grad_norm": 1.5031470256512063, "learning_rate": 7.337151557995679e-06, "loss": 0.6775, "step": 4308 }, { "epoch": 0.3649375396993436, "grad_norm": 1.4338087662900216, "learning_rate": 7.335938903744737e-06, "loss": 0.6434, "step": 4309 }, { "epoch": 0.365022231632437, "grad_norm": 0.5935449960611296, "learning_rate": 7.3347260737020254e-06, "loss": 0.8609, "step": 4310 }, { "epoch": 0.3651069235655304, "grad_norm": 0.7094678739251504, "learning_rate": 7.333513067958812e-06, "loss": 0.8325, "step": 4311 }, { "epoch": 0.3651916154986238, "grad_norm": 1.597351543877136, "learning_rate": 7.332299886606383e-06, "loss": 0.678, "step": 4312 }, { "epoch": 0.36527630743171713, "grad_norm": 1.5782200535257138, "learning_rate": 7.33108652973604e-06, "loss": 0.6586, "step": 4313 }, { "epoch": 0.3653609993648105, "grad_norm": 0.6077832540993091, "learning_rate": 7.32987299743909e-06, "loss": 0.8841, "step": 4314 }, { "epoch": 0.3654456912979039, "grad_norm": 1.6440933787116945, "learning_rate": 7.3286592898068606e-06, "loss": 0.6357, "step": 4315 }, { "epoch": 0.36553038323099724, "grad_norm": 1.6218411047841064, "learning_rate": 7.327445406930688e-06, "loss": 0.6628, "step": 4316 }, { "epoch": 0.36561507516409064, "grad_norm": 2.0123020355169148, "learning_rate": 7.326231348901924e-06, "loss": 0.6648, "step": 4317 }, { "epoch": 0.365699767097184, "grad_norm": 1.3347890621815564, "learning_rate": 7.325017115811934e-06, "loss": 0.6596, "step": 4318 }, { "epoch": 0.36578445903027734, "grad_norm": 1.4837721527536338, "learning_rate": 7.323802707752095e-06, "loss": 0.6861, "step": 4319 }, { "epoch": 0.36586915096337075, "grad_norm": 2.3629428756871684, "learning_rate": 7.322588124813795e-06, "loss": 0.6193, "step": 4320 }, { "epoch": 0.3659538428964641, "grad_norm": 1.8229155054609363, "learning_rate": 7.3213733670884425e-06, "loss": 0.7184, "step": 4321 }, { "epoch": 0.3660385348295575, "grad_norm": 1.8905437781639371, "learning_rate": 7.320158434667449e-06, "loss": 0.6408, "step": 4322 }, { "epoch": 0.36612322676265086, "grad_norm": 1.1744574948189361, "learning_rate": 7.31894332764225e-06, "loss": 0.6173, "step": 4323 }, { "epoch": 0.3662079186957442, "grad_norm": 1.536675139136023, "learning_rate": 7.3177280461042856e-06, "loss": 0.5888, "step": 4324 }, { "epoch": 0.3662926106288376, "grad_norm": 1.590117594834308, "learning_rate": 7.316512590145011e-06, "loss": 0.6037, "step": 4325 }, { "epoch": 0.36637730256193096, "grad_norm": 0.7022068831936085, "learning_rate": 7.315296959855899e-06, "loss": 0.874, "step": 4326 }, { "epoch": 0.36646199449502437, "grad_norm": 1.7829875189950124, "learning_rate": 7.314081155328431e-06, "loss": 0.7032, "step": 4327 }, { "epoch": 0.3665466864281177, "grad_norm": 1.3565320934753156, "learning_rate": 7.3128651766541015e-06, "loss": 0.6347, "step": 4328 }, { "epoch": 0.36663137836121107, "grad_norm": 0.6903681944138279, "learning_rate": 7.3116490239244205e-06, "loss": 0.9077, "step": 4329 }, { "epoch": 0.3667160702943045, "grad_norm": 1.8932066403653753, "learning_rate": 7.31043269723091e-06, "loss": 0.633, "step": 4330 }, { "epoch": 0.3668007622273978, "grad_norm": 0.681086805856303, "learning_rate": 7.309216196665106e-06, "loss": 0.8405, "step": 4331 }, { "epoch": 0.36688545416049123, "grad_norm": 1.6386663438892055, "learning_rate": 7.307999522318553e-06, "loss": 0.6763, "step": 4332 }, { "epoch": 0.3669701460935846, "grad_norm": 1.7461530224879451, "learning_rate": 7.3067826742828155e-06, "loss": 0.6635, "step": 4333 }, { "epoch": 0.36705483802667793, "grad_norm": 1.35724935828444, "learning_rate": 7.305565652649467e-06, "loss": 0.6954, "step": 4334 }, { "epoch": 0.36713952995977134, "grad_norm": 1.16731108194904, "learning_rate": 7.304348457510093e-06, "loss": 0.9052, "step": 4335 }, { "epoch": 0.3672242218928647, "grad_norm": 1.6307052670881603, "learning_rate": 7.3031310889562965e-06, "loss": 0.6502, "step": 4336 }, { "epoch": 0.3673089138259581, "grad_norm": 2.297196085676602, "learning_rate": 7.301913547079691e-06, "loss": 0.6381, "step": 4337 }, { "epoch": 0.36739360575905144, "grad_norm": 1.7445023318834094, "learning_rate": 7.300695831971901e-06, "loss": 0.668, "step": 4338 }, { "epoch": 0.36747829769214485, "grad_norm": 2.1737554140499022, "learning_rate": 7.299477943724567e-06, "loss": 0.5984, "step": 4339 }, { "epoch": 0.3675629896252382, "grad_norm": 1.2840820839386191, "learning_rate": 7.2982598824293415e-06, "loss": 0.7208, "step": 4340 }, { "epoch": 0.36764768155833155, "grad_norm": 1.5411184044518185, "learning_rate": 7.29704164817789e-06, "loss": 0.6335, "step": 4341 }, { "epoch": 0.36773237349142496, "grad_norm": 1.381241306677133, "learning_rate": 7.29582324106189e-06, "loss": 0.6999, "step": 4342 }, { "epoch": 0.3678170654245183, "grad_norm": 1.3643228340713585, "learning_rate": 7.294604661173035e-06, "loss": 0.6563, "step": 4343 }, { "epoch": 0.3679017573576117, "grad_norm": 1.4956986019169136, "learning_rate": 7.293385908603029e-06, "loss": 0.6471, "step": 4344 }, { "epoch": 0.36798644929070506, "grad_norm": 0.7135925622103231, "learning_rate": 7.292166983443589e-06, "loss": 0.8602, "step": 4345 }, { "epoch": 0.3680711412237984, "grad_norm": 1.2366482185150547, "learning_rate": 7.290947885786445e-06, "loss": 0.6675, "step": 4346 }, { "epoch": 0.3681558331568918, "grad_norm": 2.4148353318047455, "learning_rate": 7.289728615723344e-06, "loss": 0.6189, "step": 4347 }, { "epoch": 0.36824052508998517, "grad_norm": 1.2282647874828625, "learning_rate": 7.288509173346037e-06, "loss": 0.6471, "step": 4348 }, { "epoch": 0.3683252170230786, "grad_norm": 1.444995628972243, "learning_rate": 7.287289558746299e-06, "loss": 0.6279, "step": 4349 }, { "epoch": 0.3684099089561719, "grad_norm": 2.159837732816359, "learning_rate": 7.286069772015908e-06, "loss": 0.67, "step": 4350 }, { "epoch": 0.3684946008892653, "grad_norm": 3.8519774427785785, "learning_rate": 7.284849813246663e-06, "loss": 0.6361, "step": 4351 }, { "epoch": 0.3685792928223587, "grad_norm": 1.4541307095897045, "learning_rate": 7.28362968253037e-06, "loss": 0.6197, "step": 4352 }, { "epoch": 0.36866398475545203, "grad_norm": 0.6238114546998772, "learning_rate": 7.282409379958849e-06, "loss": 0.8713, "step": 4353 }, { "epoch": 0.36874867668854544, "grad_norm": 1.226887883181757, "learning_rate": 7.2811889056239394e-06, "loss": 0.5982, "step": 4354 }, { "epoch": 0.3688333686216388, "grad_norm": 1.2307858868397734, "learning_rate": 7.2799682596174835e-06, "loss": 0.6445, "step": 4355 }, { "epoch": 0.36891806055473214, "grad_norm": 1.3030465312819748, "learning_rate": 7.278747442031343e-06, "loss": 0.6456, "step": 4356 }, { "epoch": 0.36900275248782555, "grad_norm": 0.6317467905762294, "learning_rate": 7.277526452957392e-06, "loss": 0.8991, "step": 4357 }, { "epoch": 0.3690874444209189, "grad_norm": 1.4457104769098548, "learning_rate": 7.276305292487514e-06, "loss": 0.6365, "step": 4358 }, { "epoch": 0.3691721363540123, "grad_norm": 1.4513575280129978, "learning_rate": 7.27508396071361e-06, "loss": 0.6517, "step": 4359 }, { "epoch": 0.36925682828710565, "grad_norm": 1.167673677758966, "learning_rate": 7.273862457727591e-06, "loss": 0.6657, "step": 4360 }, { "epoch": 0.369341520220199, "grad_norm": 0.6261749087240556, "learning_rate": 7.272640783621381e-06, "loss": 0.8838, "step": 4361 }, { "epoch": 0.3694262121532924, "grad_norm": 1.8644481928978445, "learning_rate": 7.271418938486918e-06, "loss": 0.6251, "step": 4362 }, { "epoch": 0.36951090408638576, "grad_norm": 1.3214252975037257, "learning_rate": 7.270196922416151e-06, "loss": 0.6383, "step": 4363 }, { "epoch": 0.36959559601947917, "grad_norm": 1.497900827884859, "learning_rate": 7.268974735501047e-06, "loss": 0.628, "step": 4364 }, { "epoch": 0.3696802879525725, "grad_norm": 1.3067494303971252, "learning_rate": 7.267752377833577e-06, "loss": 0.625, "step": 4365 }, { "epoch": 0.36976497988566587, "grad_norm": 1.5063606896138013, "learning_rate": 7.266529849505732e-06, "loss": 0.6534, "step": 4366 }, { "epoch": 0.3698496718187593, "grad_norm": 1.9474653812083034, "learning_rate": 7.265307150609515e-06, "loss": 0.639, "step": 4367 }, { "epoch": 0.3699343637518526, "grad_norm": 1.7786693862280327, "learning_rate": 7.26408428123694e-06, "loss": 0.6619, "step": 4368 }, { "epoch": 0.37001905568494603, "grad_norm": 1.3083888974775668, "learning_rate": 7.262861241480035e-06, "loss": 0.6378, "step": 4369 }, { "epoch": 0.3701037476180394, "grad_norm": 1.5949559824341737, "learning_rate": 7.261638031430836e-06, "loss": 0.6491, "step": 4370 }, { "epoch": 0.37018843955113273, "grad_norm": 1.519540668673027, "learning_rate": 7.2604146511814e-06, "loss": 0.7127, "step": 4371 }, { "epoch": 0.37027313148422614, "grad_norm": 1.4337173709509816, "learning_rate": 7.259191100823794e-06, "loss": 0.6353, "step": 4372 }, { "epoch": 0.3703578234173195, "grad_norm": 1.9110461332665007, "learning_rate": 7.257967380450093e-06, "loss": 0.5983, "step": 4373 }, { "epoch": 0.3704425153504129, "grad_norm": 0.6702782779002766, "learning_rate": 7.25674349015239e-06, "loss": 0.847, "step": 4374 }, { "epoch": 0.37052720728350624, "grad_norm": 1.7891102623423598, "learning_rate": 7.25551943002279e-06, "loss": 0.633, "step": 4375 }, { "epoch": 0.3706118992165996, "grad_norm": 0.6097927081902131, "learning_rate": 7.254295200153409e-06, "loss": 0.8578, "step": 4376 }, { "epoch": 0.370696591149693, "grad_norm": 1.4703970211812387, "learning_rate": 7.253070800636378e-06, "loss": 0.6605, "step": 4377 }, { "epoch": 0.37078128308278635, "grad_norm": 1.4464114940219346, "learning_rate": 7.251846231563837e-06, "loss": 0.6537, "step": 4378 }, { "epoch": 0.37086597501587976, "grad_norm": 1.2532953716210369, "learning_rate": 7.250621493027945e-06, "loss": 0.6079, "step": 4379 }, { "epoch": 0.3709506669489731, "grad_norm": 2.224451398638857, "learning_rate": 7.249396585120868e-06, "loss": 0.6467, "step": 4380 }, { "epoch": 0.37103535888206646, "grad_norm": 1.767496302542473, "learning_rate": 7.248171507934786e-06, "loss": 0.6784, "step": 4381 }, { "epoch": 0.37112005081515986, "grad_norm": 1.7475546022327209, "learning_rate": 7.246946261561892e-06, "loss": 0.644, "step": 4382 }, { "epoch": 0.3712047427482532, "grad_norm": 1.687743876814287, "learning_rate": 7.245720846094396e-06, "loss": 0.6649, "step": 4383 }, { "epoch": 0.3712894346813466, "grad_norm": 1.393812166717249, "learning_rate": 7.2444952616245135e-06, "loss": 0.6445, "step": 4384 }, { "epoch": 0.37137412661443997, "grad_norm": 1.351179506193301, "learning_rate": 7.243269508244478e-06, "loss": 0.6502, "step": 4385 }, { "epoch": 0.3714588185475333, "grad_norm": 1.5100932028260412, "learning_rate": 7.242043586046532e-06, "loss": 0.631, "step": 4386 }, { "epoch": 0.3715435104806267, "grad_norm": 1.4380308916149294, "learning_rate": 7.240817495122936e-06, "loss": 0.6111, "step": 4387 }, { "epoch": 0.3716282024137201, "grad_norm": 1.2547338631273446, "learning_rate": 7.239591235565956e-06, "loss": 0.604, "step": 4388 }, { "epoch": 0.3717128943468135, "grad_norm": 1.6304763076550919, "learning_rate": 7.238364807467877e-06, "loss": 0.5889, "step": 4389 }, { "epoch": 0.37179758627990683, "grad_norm": 1.2865461709108474, "learning_rate": 7.237138210920993e-06, "loss": 0.658, "step": 4390 }, { "epoch": 0.37188227821300024, "grad_norm": 2.6300565101449034, "learning_rate": 7.235911446017613e-06, "loss": 0.6039, "step": 4391 }, { "epoch": 0.3719669701460936, "grad_norm": 0.6727338179501329, "learning_rate": 7.234684512850058e-06, "loss": 0.882, "step": 4392 }, { "epoch": 0.37205166207918694, "grad_norm": 1.8314596722449341, "learning_rate": 7.233457411510659e-06, "loss": 0.6077, "step": 4393 }, { "epoch": 0.37213635401228035, "grad_norm": 1.2652617478186556, "learning_rate": 7.232230142091763e-06, "loss": 0.6676, "step": 4394 }, { "epoch": 0.3722210459453737, "grad_norm": 1.305098768494676, "learning_rate": 7.231002704685728e-06, "loss": 0.653, "step": 4395 }, { "epoch": 0.3723057378784671, "grad_norm": 0.6125889149517377, "learning_rate": 7.229775099384927e-06, "loss": 0.8604, "step": 4396 }, { "epoch": 0.37239042981156045, "grad_norm": 1.7985234458141512, "learning_rate": 7.228547326281743e-06, "loss": 0.5991, "step": 4397 }, { "epoch": 0.3724751217446538, "grad_norm": 0.6080102163383178, "learning_rate": 7.227319385468571e-06, "loss": 0.8479, "step": 4398 }, { "epoch": 0.3725598136777472, "grad_norm": 1.3639348066170158, "learning_rate": 7.22609127703782e-06, "loss": 0.6435, "step": 4399 }, { "epoch": 0.37264450561084056, "grad_norm": 1.8074574913427315, "learning_rate": 7.224863001081914e-06, "loss": 0.6551, "step": 4400 }, { "epoch": 0.37272919754393397, "grad_norm": 1.390009931533464, "learning_rate": 7.223634557693287e-06, "loss": 0.6083, "step": 4401 }, { "epoch": 0.3728138894770273, "grad_norm": 1.3491087822390342, "learning_rate": 7.2224059469643824e-06, "loss": 0.635, "step": 4402 }, { "epoch": 0.37289858141012067, "grad_norm": 1.3678492111945404, "learning_rate": 7.221177168987663e-06, "loss": 0.6346, "step": 4403 }, { "epoch": 0.3729832733432141, "grad_norm": 2.346008371806874, "learning_rate": 7.219948223855602e-06, "loss": 0.6796, "step": 4404 }, { "epoch": 0.3730679652763074, "grad_norm": 1.4106193896310184, "learning_rate": 7.218719111660679e-06, "loss": 0.6449, "step": 4405 }, { "epoch": 0.37315265720940083, "grad_norm": 1.8042754928507891, "learning_rate": 7.217489832495396e-06, "loss": 0.6141, "step": 4406 }, { "epoch": 0.3732373491424942, "grad_norm": 2.933717525176469, "learning_rate": 7.21626038645226e-06, "loss": 0.6866, "step": 4407 }, { "epoch": 0.37332204107558753, "grad_norm": 1.3244457928371371, "learning_rate": 7.215030773623795e-06, "loss": 0.5818, "step": 4408 }, { "epoch": 0.37340673300868094, "grad_norm": 1.8319147670573834, "learning_rate": 7.213800994102534e-06, "loss": 0.6469, "step": 4409 }, { "epoch": 0.3734914249417743, "grad_norm": 1.2807144684152645, "learning_rate": 7.2125710479810275e-06, "loss": 0.6462, "step": 4410 }, { "epoch": 0.3735761168748677, "grad_norm": 1.2829302798010438, "learning_rate": 7.211340935351832e-06, "loss": 0.7007, "step": 4411 }, { "epoch": 0.37366080880796104, "grad_norm": 1.2755748520132555, "learning_rate": 7.210110656307521e-06, "loss": 0.6505, "step": 4412 }, { "epoch": 0.3737455007410544, "grad_norm": 1.1878938092273668, "learning_rate": 7.208880210940681e-06, "loss": 0.6367, "step": 4413 }, { "epoch": 0.3738301926741478, "grad_norm": 1.7556465228710536, "learning_rate": 7.207649599343909e-06, "loss": 0.6813, "step": 4414 }, { "epoch": 0.37391488460724115, "grad_norm": 1.4800274085020575, "learning_rate": 7.206418821609813e-06, "loss": 0.6359, "step": 4415 }, { "epoch": 0.37399957654033456, "grad_norm": 1.7216143815294644, "learning_rate": 7.205187877831018e-06, "loss": 0.6481, "step": 4416 }, { "epoch": 0.3740842684734279, "grad_norm": 1.7052307742009405, "learning_rate": 7.203956768100159e-06, "loss": 0.6911, "step": 4417 }, { "epoch": 0.37416896040652126, "grad_norm": 0.6746142572261388, "learning_rate": 7.202725492509882e-06, "loss": 0.824, "step": 4418 }, { "epoch": 0.37425365233961466, "grad_norm": 1.5423932023927684, "learning_rate": 7.201494051152846e-06, "loss": 0.6304, "step": 4419 }, { "epoch": 0.374338344272708, "grad_norm": 1.3537417410066794, "learning_rate": 7.200262444121728e-06, "loss": 0.6655, "step": 4420 }, { "epoch": 0.3744230362058014, "grad_norm": 1.3998369824619363, "learning_rate": 7.19903067150921e-06, "loss": 0.653, "step": 4421 }, { "epoch": 0.37450772813889477, "grad_norm": 1.7286555108726587, "learning_rate": 7.197798733407988e-06, "loss": 0.6481, "step": 4422 }, { "epoch": 0.3745924200719881, "grad_norm": 1.5093073717930987, "learning_rate": 7.196566629910773e-06, "loss": 0.6023, "step": 4423 }, { "epoch": 0.3746771120050815, "grad_norm": 1.5657274018577063, "learning_rate": 7.195334361110289e-06, "loss": 0.6249, "step": 4424 }, { "epoch": 0.3747618039381749, "grad_norm": 1.4467020921157916, "learning_rate": 7.19410192709927e-06, "loss": 0.6346, "step": 4425 }, { "epoch": 0.3748464958712683, "grad_norm": 1.6767701208920782, "learning_rate": 7.1928693279704605e-06, "loss": 0.6547, "step": 4426 }, { "epoch": 0.37493118780436163, "grad_norm": 1.603076224405371, "learning_rate": 7.191636563816625e-06, "loss": 0.687, "step": 4427 }, { "epoch": 0.375015879737455, "grad_norm": 1.959307374407026, "learning_rate": 7.19040363473053e-06, "loss": 0.6629, "step": 4428 }, { "epoch": 0.3751005716705484, "grad_norm": 1.8986915420979402, "learning_rate": 7.189170540804964e-06, "loss": 0.6137, "step": 4429 }, { "epoch": 0.37518526360364174, "grad_norm": 5.109011757946306, "learning_rate": 7.187937282132724e-06, "loss": 0.6104, "step": 4430 }, { "epoch": 0.37526995553673514, "grad_norm": 1.5811300205045526, "learning_rate": 7.186703858806617e-06, "loss": 0.6535, "step": 4431 }, { "epoch": 0.3753546474698285, "grad_norm": 1.780595660316022, "learning_rate": 7.1854702709194644e-06, "loss": 0.586, "step": 4432 }, { "epoch": 0.37543933940292185, "grad_norm": 1.7732452058358732, "learning_rate": 7.184236518564101e-06, "loss": 0.6338, "step": 4433 }, { "epoch": 0.37552403133601525, "grad_norm": 1.6280949015291255, "learning_rate": 7.1830026018333755e-06, "loss": 0.6319, "step": 4434 }, { "epoch": 0.3756087232691086, "grad_norm": 2.212815327046643, "learning_rate": 7.1817685208201445e-06, "loss": 0.6444, "step": 4435 }, { "epoch": 0.375693415202202, "grad_norm": 1.4973043304183131, "learning_rate": 7.180534275617278e-06, "loss": 0.6899, "step": 4436 }, { "epoch": 0.37577810713529536, "grad_norm": 2.030637630488732, "learning_rate": 7.17929986631766e-06, "loss": 0.6413, "step": 4437 }, { "epoch": 0.3758627990683887, "grad_norm": 1.5866172788104191, "learning_rate": 7.178065293014189e-06, "loss": 0.622, "step": 4438 }, { "epoch": 0.3759474910014821, "grad_norm": 1.5954192490434458, "learning_rate": 7.176830555799771e-06, "loss": 0.6086, "step": 4439 }, { "epoch": 0.37603218293457547, "grad_norm": 1.6152115214301137, "learning_rate": 7.1755956547673255e-06, "loss": 0.6402, "step": 4440 }, { "epoch": 0.37611687486766887, "grad_norm": 0.6258828198541877, "learning_rate": 7.174360590009788e-06, "loss": 0.856, "step": 4441 }, { "epoch": 0.3762015668007622, "grad_norm": 1.6529867488710384, "learning_rate": 7.173125361620103e-06, "loss": 0.6204, "step": 4442 }, { "epoch": 0.3762862587338556, "grad_norm": 1.6960880384964427, "learning_rate": 7.171889969691226e-06, "loss": 0.5837, "step": 4443 }, { "epoch": 0.376370950666949, "grad_norm": 1.3495073307937564, "learning_rate": 7.170654414316127e-06, "loss": 0.6812, "step": 4444 }, { "epoch": 0.37645564260004233, "grad_norm": 1.5158383074977815, "learning_rate": 7.169418695587791e-06, "loss": 0.5946, "step": 4445 }, { "epoch": 0.37654033453313573, "grad_norm": 2.6057313435835843, "learning_rate": 7.168182813599212e-06, "loss": 0.6341, "step": 4446 }, { "epoch": 0.3766250264662291, "grad_norm": 1.4983225687553179, "learning_rate": 7.1669467684433914e-06, "loss": 0.6428, "step": 4447 }, { "epoch": 0.3767097183993225, "grad_norm": 1.2223264455409375, "learning_rate": 7.165710560213353e-06, "loss": 0.666, "step": 4448 }, { "epoch": 0.37679441033241584, "grad_norm": 1.2353597297700032, "learning_rate": 7.164474189002129e-06, "loss": 0.6443, "step": 4449 }, { "epoch": 0.3768791022655092, "grad_norm": 2.2267919886036647, "learning_rate": 7.163237654902759e-06, "loss": 0.6785, "step": 4450 }, { "epoch": 0.3769637941986026, "grad_norm": 1.572884261318003, "learning_rate": 7.1620009580083014e-06, "loss": 0.6256, "step": 4451 }, { "epoch": 0.37704848613169595, "grad_norm": 1.3640557913958158, "learning_rate": 7.160764098411823e-06, "loss": 0.7104, "step": 4452 }, { "epoch": 0.37713317806478935, "grad_norm": 1.4279576448077727, "learning_rate": 7.159527076206405e-06, "loss": 0.5972, "step": 4453 }, { "epoch": 0.3772178699978827, "grad_norm": 0.6762803580242887, "learning_rate": 7.1582898914851385e-06, "loss": 0.8547, "step": 4454 }, { "epoch": 0.37730256193097605, "grad_norm": 1.3406299823644037, "learning_rate": 7.15705254434113e-06, "loss": 0.6614, "step": 4455 }, { "epoch": 0.37738725386406946, "grad_norm": 1.4192259572910457, "learning_rate": 7.155815034867494e-06, "loss": 0.6504, "step": 4456 }, { "epoch": 0.3774719457971628, "grad_norm": 1.3161411881221377, "learning_rate": 7.154577363157361e-06, "loss": 0.6558, "step": 4457 }, { "epoch": 0.3775566377302562, "grad_norm": 1.6789478497180055, "learning_rate": 7.153339529303873e-06, "loss": 0.645, "step": 4458 }, { "epoch": 0.37764132966334957, "grad_norm": 1.6710191603612028, "learning_rate": 7.152101533400184e-06, "loss": 0.6014, "step": 4459 }, { "epoch": 0.3777260215964429, "grad_norm": 2.5437396272208206, "learning_rate": 7.150863375539458e-06, "loss": 0.6656, "step": 4460 }, { "epoch": 0.3778107135295363, "grad_norm": 1.4766153001380231, "learning_rate": 7.149625055814873e-06, "loss": 0.6968, "step": 4461 }, { "epoch": 0.3778954054626297, "grad_norm": 1.468590828594827, "learning_rate": 7.14838657431962e-06, "loss": 0.6893, "step": 4462 }, { "epoch": 0.3779800973957231, "grad_norm": 0.5884574028289455, "learning_rate": 7.147147931146902e-06, "loss": 0.7731, "step": 4463 }, { "epoch": 0.37806478932881643, "grad_norm": 1.2269086916074685, "learning_rate": 7.1459091263899315e-06, "loss": 0.6565, "step": 4464 }, { "epoch": 0.3781494812619098, "grad_norm": 1.5242695867671734, "learning_rate": 7.144670160141935e-06, "loss": 0.5928, "step": 4465 }, { "epoch": 0.3782341731950032, "grad_norm": 1.2870389845403558, "learning_rate": 7.143431032496155e-06, "loss": 0.656, "step": 4466 }, { "epoch": 0.37831886512809654, "grad_norm": 2.784622377206752, "learning_rate": 7.1421917435458376e-06, "loss": 0.6063, "step": 4467 }, { "epoch": 0.37840355706118994, "grad_norm": 1.699112083951864, "learning_rate": 7.140952293384249e-06, "loss": 0.647, "step": 4468 }, { "epoch": 0.3784882489942833, "grad_norm": 1.2389046196265818, "learning_rate": 7.139712682104663e-06, "loss": 0.6188, "step": 4469 }, { "epoch": 0.37857294092737664, "grad_norm": 0.7097762012946806, "learning_rate": 7.138472909800369e-06, "loss": 0.919, "step": 4470 }, { "epoch": 0.37865763286047005, "grad_norm": 1.863977003349697, "learning_rate": 7.137232976564663e-06, "loss": 0.6714, "step": 4471 }, { "epoch": 0.3787423247935634, "grad_norm": 2.256558715874109, "learning_rate": 7.135992882490858e-06, "loss": 0.6552, "step": 4472 }, { "epoch": 0.3788270167266568, "grad_norm": 1.6239194210979198, "learning_rate": 7.134752627672279e-06, "loss": 0.5971, "step": 4473 }, { "epoch": 0.37891170865975016, "grad_norm": 1.290881064830266, "learning_rate": 7.1335122122022615e-06, "loss": 0.6227, "step": 4474 }, { "epoch": 0.3789964005928435, "grad_norm": 1.5992118076627786, "learning_rate": 7.13227163617415e-06, "loss": 0.6654, "step": 4475 }, { "epoch": 0.3790810925259369, "grad_norm": 1.4113895720348482, "learning_rate": 7.1310308996813105e-06, "loss": 0.632, "step": 4476 }, { "epoch": 0.37916578445903026, "grad_norm": 1.2196755026978885, "learning_rate": 7.129790002817109e-06, "loss": 0.6737, "step": 4477 }, { "epoch": 0.37925047639212367, "grad_norm": 1.2923078462073914, "learning_rate": 7.1285489456749315e-06, "loss": 0.5959, "step": 4478 }, { "epoch": 0.379335168325217, "grad_norm": 1.6429347050847445, "learning_rate": 7.127307728348176e-06, "loss": 0.6508, "step": 4479 }, { "epoch": 0.37941986025831037, "grad_norm": 1.4488499384716997, "learning_rate": 7.12606635093025e-06, "loss": 0.6453, "step": 4480 }, { "epoch": 0.3795045521914038, "grad_norm": 1.2795578053164791, "learning_rate": 7.124824813514572e-06, "loss": 0.6357, "step": 4481 }, { "epoch": 0.3795892441244971, "grad_norm": 1.4377962947300664, "learning_rate": 7.1235831161945745e-06, "loss": 0.6628, "step": 4482 }, { "epoch": 0.37967393605759053, "grad_norm": 1.6588392846728925, "learning_rate": 7.122341259063703e-06, "loss": 0.6457, "step": 4483 }, { "epoch": 0.3797586279906839, "grad_norm": 1.4659870323000133, "learning_rate": 7.121099242215415e-06, "loss": 0.6906, "step": 4484 }, { "epoch": 0.37984331992377723, "grad_norm": 1.6455511133693508, "learning_rate": 7.119857065743175e-06, "loss": 0.6464, "step": 4485 }, { "epoch": 0.37992801185687064, "grad_norm": 1.5330671029179708, "learning_rate": 7.118614729740467e-06, "loss": 0.6284, "step": 4486 }, { "epoch": 0.380012703789964, "grad_norm": 1.740296960168985, "learning_rate": 7.117372234300782e-06, "loss": 0.6373, "step": 4487 }, { "epoch": 0.3800973957230574, "grad_norm": 1.7496607295016335, "learning_rate": 7.116129579517623e-06, "loss": 0.6445, "step": 4488 }, { "epoch": 0.38018208765615075, "grad_norm": 1.8443799513425796, "learning_rate": 7.114886765484509e-06, "loss": 0.6445, "step": 4489 }, { "epoch": 0.3802667795892441, "grad_norm": 1.3212859670637316, "learning_rate": 7.113643792294968e-06, "loss": 0.6435, "step": 4490 }, { "epoch": 0.3803514715223375, "grad_norm": 1.6447672369822322, "learning_rate": 7.112400660042537e-06, "loss": 0.7081, "step": 4491 }, { "epoch": 0.38043616345543085, "grad_norm": 2.8654496656332045, "learning_rate": 7.1111573688207725e-06, "loss": 0.6359, "step": 4492 }, { "epoch": 0.38052085538852426, "grad_norm": 1.9525851174001125, "learning_rate": 7.109913918723236e-06, "loss": 0.677, "step": 4493 }, { "epoch": 0.3806055473216176, "grad_norm": 1.2367906404502744, "learning_rate": 7.108670309843505e-06, "loss": 0.672, "step": 4494 }, { "epoch": 0.380690239254711, "grad_norm": 1.321119289139262, "learning_rate": 7.107426542275166e-06, "loss": 0.6431, "step": 4495 }, { "epoch": 0.38077493118780437, "grad_norm": 1.6613789215617532, "learning_rate": 7.106182616111822e-06, "loss": 0.6561, "step": 4496 }, { "epoch": 0.3808596231208977, "grad_norm": 1.1576231500813428, "learning_rate": 7.104938531447083e-06, "loss": 0.6614, "step": 4497 }, { "epoch": 0.3809443150539911, "grad_norm": 1.46684574102626, "learning_rate": 7.103694288374573e-06, "loss": 0.6625, "step": 4498 }, { "epoch": 0.3810290069870845, "grad_norm": 1.273065896936809, "learning_rate": 7.1024498869879274e-06, "loss": 0.6749, "step": 4499 }, { "epoch": 0.3811136989201779, "grad_norm": 1.5284979308899687, "learning_rate": 7.101205327380797e-06, "loss": 0.6751, "step": 4500 }, { "epoch": 0.38119839085327123, "grad_norm": 1.9219321038237656, "learning_rate": 7.099960609646839e-06, "loss": 0.6268, "step": 4501 }, { "epoch": 0.3812830827863646, "grad_norm": 1.5257340515807523, "learning_rate": 7.098715733879723e-06, "loss": 0.5685, "step": 4502 }, { "epoch": 0.381367774719458, "grad_norm": 1.1913290486132282, "learning_rate": 7.0974707001731355e-06, "loss": 0.6378, "step": 4503 }, { "epoch": 0.38145246665255134, "grad_norm": 2.068498423304912, "learning_rate": 7.096225508620772e-06, "loss": 0.676, "step": 4504 }, { "epoch": 0.38153715858564474, "grad_norm": 1.3448054701010017, "learning_rate": 7.094980159316338e-06, "loss": 0.5946, "step": 4505 }, { "epoch": 0.3816218505187381, "grad_norm": 1.458147865749778, "learning_rate": 7.093734652353554e-06, "loss": 0.6885, "step": 4506 }, { "epoch": 0.38170654245183144, "grad_norm": 1.3939625312921478, "learning_rate": 7.092488987826151e-06, "loss": 0.6352, "step": 4507 }, { "epoch": 0.38179123438492485, "grad_norm": 1.2026563145006242, "learning_rate": 7.091243165827873e-06, "loss": 0.6015, "step": 4508 }, { "epoch": 0.3818759263180182, "grad_norm": 1.2459688795098807, "learning_rate": 7.089997186452471e-06, "loss": 0.6301, "step": 4509 }, { "epoch": 0.3819606182511116, "grad_norm": 1.2805578923845562, "learning_rate": 7.088751049793713e-06, "loss": 0.6572, "step": 4510 }, { "epoch": 0.38204531018420496, "grad_norm": 1.3320151754337772, "learning_rate": 7.08750475594538e-06, "loss": 0.6358, "step": 4511 }, { "epoch": 0.3821300021172983, "grad_norm": 1.3902026252225368, "learning_rate": 7.086258305001259e-06, "loss": 0.6284, "step": 4512 }, { "epoch": 0.3822146940503917, "grad_norm": 1.7394860658882163, "learning_rate": 7.085011697055153e-06, "loss": 0.6701, "step": 4513 }, { "epoch": 0.38229938598348506, "grad_norm": 1.263123281890651, "learning_rate": 7.083764932200877e-06, "loss": 0.6353, "step": 4514 }, { "epoch": 0.38238407791657847, "grad_norm": 1.3241436371601136, "learning_rate": 7.0825180105322554e-06, "loss": 0.7401, "step": 4515 }, { "epoch": 0.3824687698496718, "grad_norm": 1.309269829074956, "learning_rate": 7.081270932143126e-06, "loss": 0.5957, "step": 4516 }, { "epoch": 0.38255346178276517, "grad_norm": 1.6227466365975338, "learning_rate": 7.0800236971273386e-06, "loss": 0.6718, "step": 4517 }, { "epoch": 0.3826381537158586, "grad_norm": 2.7867595936092133, "learning_rate": 7.078776305578754e-06, "loss": 0.5986, "step": 4518 }, { "epoch": 0.3827228456489519, "grad_norm": 1.6460001146450738, "learning_rate": 7.077528757591245e-06, "loss": 0.6697, "step": 4519 }, { "epoch": 0.38280753758204533, "grad_norm": 1.3830011744921389, "learning_rate": 7.076281053258693e-06, "loss": 0.6729, "step": 4520 }, { "epoch": 0.3828922295151387, "grad_norm": 1.2961324903055536, "learning_rate": 7.075033192675001e-06, "loss": 0.6101, "step": 4521 }, { "epoch": 0.38297692144823203, "grad_norm": 1.1922774589670555, "learning_rate": 7.0737851759340716e-06, "loss": 0.6131, "step": 4522 }, { "epoch": 0.38306161338132544, "grad_norm": 1.504033862141012, "learning_rate": 7.072537003129826e-06, "loss": 0.6449, "step": 4523 }, { "epoch": 0.3831463053144188, "grad_norm": 1.4766838536966747, "learning_rate": 7.071288674356198e-06, "loss": 0.6049, "step": 4524 }, { "epoch": 0.3832309972475122, "grad_norm": 1.1966969579795264, "learning_rate": 7.070040189707128e-06, "loss": 0.618, "step": 4525 }, { "epoch": 0.38331568918060555, "grad_norm": 1.6979581167569042, "learning_rate": 7.068791549276572e-06, "loss": 0.6536, "step": 4526 }, { "epoch": 0.3834003811136989, "grad_norm": 1.409477958876827, "learning_rate": 7.0675427531584986e-06, "loss": 0.6849, "step": 4527 }, { "epoch": 0.3834850730467923, "grad_norm": 0.6373417385580477, "learning_rate": 7.066293801446884e-06, "loss": 0.8253, "step": 4528 }, { "epoch": 0.38356976497988565, "grad_norm": 1.3543809935544342, "learning_rate": 7.06504469423572e-06, "loss": 0.6498, "step": 4529 }, { "epoch": 0.38365445691297906, "grad_norm": 1.9707885919983577, "learning_rate": 7.0637954316190075e-06, "loss": 0.6255, "step": 4530 }, { "epoch": 0.3837391488460724, "grad_norm": 1.3705569391087793, "learning_rate": 7.062546013690758e-06, "loss": 0.6623, "step": 4531 }, { "epoch": 0.38382384077916576, "grad_norm": 1.3067199157490184, "learning_rate": 7.0612964405450024e-06, "loss": 0.6199, "step": 4532 }, { "epoch": 0.38390853271225917, "grad_norm": 0.6465590452234548, "learning_rate": 7.060046712275775e-06, "loss": 0.8579, "step": 4533 }, { "epoch": 0.3839932246453525, "grad_norm": 3.2708498686566623, "learning_rate": 7.058796828977121e-06, "loss": 0.6199, "step": 4534 }, { "epoch": 0.3840779165784459, "grad_norm": 1.45303531792865, "learning_rate": 7.057546790743106e-06, "loss": 0.6439, "step": 4535 }, { "epoch": 0.38416260851153927, "grad_norm": 3.995138968917052, "learning_rate": 7.0562965976678e-06, "loss": 0.6753, "step": 4536 }, { "epoch": 0.3842473004446326, "grad_norm": 1.1503776361770914, "learning_rate": 7.055046249845285e-06, "loss": 0.6047, "step": 4537 }, { "epoch": 0.38433199237772603, "grad_norm": 1.6652519611985055, "learning_rate": 7.053795747369658e-06, "loss": 0.5929, "step": 4538 }, { "epoch": 0.3844166843108194, "grad_norm": 1.366734734623287, "learning_rate": 7.052545090335026e-06, "loss": 0.5927, "step": 4539 }, { "epoch": 0.3845013762439128, "grad_norm": 1.4870128998652605, "learning_rate": 7.051294278835508e-06, "loss": 0.6152, "step": 4540 }, { "epoch": 0.38458606817700614, "grad_norm": 1.0599185625654752, "learning_rate": 7.050043312965232e-06, "loss": 0.5713, "step": 4541 }, { "epoch": 0.3846707601100995, "grad_norm": 1.9187200959909432, "learning_rate": 7.048792192818342e-06, "loss": 0.6273, "step": 4542 }, { "epoch": 0.3847554520431929, "grad_norm": 1.3000869880815733, "learning_rate": 7.047540918488991e-06, "loss": 0.6163, "step": 4543 }, { "epoch": 0.38484014397628624, "grad_norm": 1.4214743650157464, "learning_rate": 7.046289490071342e-06, "loss": 0.6274, "step": 4544 }, { "epoch": 0.38492483590937965, "grad_norm": 1.8905460557135547, "learning_rate": 7.045037907659575e-06, "loss": 0.6747, "step": 4545 }, { "epoch": 0.385009527842473, "grad_norm": 1.4559739342191667, "learning_rate": 7.043786171347876e-06, "loss": 0.6982, "step": 4546 }, { "epoch": 0.3850942197755664, "grad_norm": 1.7821800213200918, "learning_rate": 7.042534281230444e-06, "loss": 0.6067, "step": 4547 }, { "epoch": 0.38517891170865975, "grad_norm": 2.0408327883323687, "learning_rate": 7.041282237401493e-06, "loss": 0.6764, "step": 4548 }, { "epoch": 0.3852636036417531, "grad_norm": 1.525890561850854, "learning_rate": 7.0400300399552445e-06, "loss": 0.6018, "step": 4549 }, { "epoch": 0.3853482955748465, "grad_norm": 1.2024665780944417, "learning_rate": 7.0387776889859334e-06, "loss": 0.6303, "step": 4550 }, { "epoch": 0.38543298750793986, "grad_norm": 1.595732858599427, "learning_rate": 7.0375251845878034e-06, "loss": 0.6265, "step": 4551 }, { "epoch": 0.38551767944103327, "grad_norm": 1.7095005016754452, "learning_rate": 7.036272526855116e-06, "loss": 0.633, "step": 4552 }, { "epoch": 0.3856023713741266, "grad_norm": 1.6641413434903536, "learning_rate": 7.035019715882137e-06, "loss": 0.5955, "step": 4553 }, { "epoch": 0.38568706330721997, "grad_norm": 1.339590843465996, "learning_rate": 7.033766751763149e-06, "loss": 0.633, "step": 4554 }, { "epoch": 0.3857717552403134, "grad_norm": 1.3792626277186244, "learning_rate": 7.032513634592444e-06, "loss": 0.5938, "step": 4555 }, { "epoch": 0.3858564471734067, "grad_norm": 1.7554778484908256, "learning_rate": 7.031260364464325e-06, "loss": 0.676, "step": 4556 }, { "epoch": 0.38594113910650013, "grad_norm": 1.7327821378582282, "learning_rate": 7.030006941473109e-06, "loss": 0.6314, "step": 4557 }, { "epoch": 0.3860258310395935, "grad_norm": 0.636830139063727, "learning_rate": 7.028753365713119e-06, "loss": 0.8212, "step": 4558 }, { "epoch": 0.38611052297268683, "grad_norm": 1.3046117892025484, "learning_rate": 7.027499637278696e-06, "loss": 0.6023, "step": 4559 }, { "epoch": 0.38619521490578024, "grad_norm": 1.3524030436710577, "learning_rate": 7.02624575626419e-06, "loss": 0.6475, "step": 4560 }, { "epoch": 0.3862799068388736, "grad_norm": 1.250702618301646, "learning_rate": 7.024991722763961e-06, "loss": 0.6924, "step": 4561 }, { "epoch": 0.386364598771967, "grad_norm": 1.8946682912842003, "learning_rate": 7.023737536872381e-06, "loss": 0.5825, "step": 4562 }, { "epoch": 0.38644929070506034, "grad_norm": 1.2890706414438393, "learning_rate": 7.022483198683836e-06, "loss": 0.6503, "step": 4563 }, { "epoch": 0.3865339826381537, "grad_norm": 1.6387358696954633, "learning_rate": 7.021228708292722e-06, "loss": 0.6722, "step": 4564 }, { "epoch": 0.3866186745712471, "grad_norm": 1.7676589864750605, "learning_rate": 7.019974065793442e-06, "loss": 0.6544, "step": 4565 }, { "epoch": 0.38670336650434045, "grad_norm": 0.6297422772551192, "learning_rate": 7.01871927128042e-06, "loss": 0.8401, "step": 4566 }, { "epoch": 0.38678805843743386, "grad_norm": 1.6643816327265912, "learning_rate": 7.017464324848082e-06, "loss": 0.6572, "step": 4567 }, { "epoch": 0.3868727503705272, "grad_norm": 1.3326568465111261, "learning_rate": 7.016209226590871e-06, "loss": 0.6646, "step": 4568 }, { "epoch": 0.38695744230362056, "grad_norm": 2.2562298357836905, "learning_rate": 7.014953976603238e-06, "loss": 0.6232, "step": 4569 }, { "epoch": 0.38704213423671396, "grad_norm": 1.221341410184723, "learning_rate": 7.013698574979651e-06, "loss": 0.6408, "step": 4570 }, { "epoch": 0.3871268261698073, "grad_norm": 1.3921626261171134, "learning_rate": 7.0124430218145815e-06, "loss": 0.6439, "step": 4571 }, { "epoch": 0.3872115181029007, "grad_norm": 1.2849168540228066, "learning_rate": 7.011187317202517e-06, "loss": 0.6069, "step": 4572 }, { "epoch": 0.38729621003599407, "grad_norm": 1.526178309597128, "learning_rate": 7.00993146123796e-06, "loss": 0.6567, "step": 4573 }, { "epoch": 0.3873809019690874, "grad_norm": 1.5577634645928886, "learning_rate": 7.0086754540154164e-06, "loss": 0.6785, "step": 4574 }, { "epoch": 0.3874655939021808, "grad_norm": 1.1897463010609346, "learning_rate": 7.0074192956294076e-06, "loss": 0.6252, "step": 4575 }, { "epoch": 0.3875502858352742, "grad_norm": 1.7179812097928386, "learning_rate": 7.006162986174468e-06, "loss": 0.5853, "step": 4576 }, { "epoch": 0.3876349777683676, "grad_norm": 1.2171092843921176, "learning_rate": 7.0049065257451394e-06, "loss": 0.6583, "step": 4577 }, { "epoch": 0.38771966970146093, "grad_norm": 1.3769162207757828, "learning_rate": 7.0036499144359795e-06, "loss": 0.5834, "step": 4578 }, { "epoch": 0.3878043616345543, "grad_norm": 1.42122020386147, "learning_rate": 7.002393152341553e-06, "loss": 0.6063, "step": 4579 }, { "epoch": 0.3878890535676477, "grad_norm": 0.5926749713599186, "learning_rate": 7.0011362395564384e-06, "loss": 0.866, "step": 4580 }, { "epoch": 0.38797374550074104, "grad_norm": 1.6674758380342822, "learning_rate": 6.999879176175226e-06, "loss": 0.6574, "step": 4581 }, { "epoch": 0.38805843743383445, "grad_norm": 1.4447294360675258, "learning_rate": 6.998621962292515e-06, "loss": 0.6619, "step": 4582 }, { "epoch": 0.3881431293669278, "grad_norm": 1.4931118835351007, "learning_rate": 6.9973645980029195e-06, "loss": 0.6182, "step": 4583 }, { "epoch": 0.38822782130002115, "grad_norm": 1.6164238140926876, "learning_rate": 6.996107083401059e-06, "loss": 0.6652, "step": 4584 }, { "epoch": 0.38831251323311455, "grad_norm": 1.4951123616621025, "learning_rate": 6.994849418581573e-06, "loss": 0.6478, "step": 4585 }, { "epoch": 0.3883972051662079, "grad_norm": 1.2455446147998015, "learning_rate": 6.993591603639104e-06, "loss": 0.6213, "step": 4586 }, { "epoch": 0.3884818970993013, "grad_norm": 1.6938778670475383, "learning_rate": 6.992333638668311e-06, "loss": 0.6419, "step": 4587 }, { "epoch": 0.38856658903239466, "grad_norm": 1.3324778822904015, "learning_rate": 6.9910755237638595e-06, "loss": 0.6069, "step": 4588 }, { "epoch": 0.388651280965488, "grad_norm": 1.25174641432903, "learning_rate": 6.9898172590204326e-06, "loss": 0.6605, "step": 4589 }, { "epoch": 0.3887359728985814, "grad_norm": 0.6228965376500752, "learning_rate": 6.988558844532722e-06, "loss": 0.8738, "step": 4590 }, { "epoch": 0.38882066483167477, "grad_norm": 1.6434442099334698, "learning_rate": 6.987300280395428e-06, "loss": 0.5858, "step": 4591 }, { "epoch": 0.3889053567647682, "grad_norm": 1.6711580480579833, "learning_rate": 6.986041566703263e-06, "loss": 0.621, "step": 4592 }, { "epoch": 0.3889900486978615, "grad_norm": 1.3596649466811292, "learning_rate": 6.984782703550954e-06, "loss": 0.6953, "step": 4593 }, { "epoch": 0.3890747406309549, "grad_norm": 1.534944674601267, "learning_rate": 6.983523691033238e-06, "loss": 0.6686, "step": 4594 }, { "epoch": 0.3891594325640483, "grad_norm": 2.2661061317315756, "learning_rate": 6.982264529244861e-06, "loss": 0.6259, "step": 4595 }, { "epoch": 0.38924412449714163, "grad_norm": 1.8725389851024652, "learning_rate": 6.981005218280581e-06, "loss": 0.6512, "step": 4596 }, { "epoch": 0.38932881643023504, "grad_norm": 1.3614379206925131, "learning_rate": 6.9797457582351664e-06, "loss": 0.6111, "step": 4597 }, { "epoch": 0.3894135083633284, "grad_norm": 1.3892739026790042, "learning_rate": 6.978486149203403e-06, "loss": 0.6143, "step": 4598 }, { "epoch": 0.3894982002964218, "grad_norm": 1.3564600326378844, "learning_rate": 6.977226391280079e-06, "loss": 0.6349, "step": 4599 }, { "epoch": 0.38958289222951514, "grad_norm": 1.262016899654836, "learning_rate": 6.97596648456e-06, "loss": 0.6844, "step": 4600 }, { "epoch": 0.3896675841626085, "grad_norm": 1.903001219420126, "learning_rate": 6.974706429137978e-06, "loss": 0.6576, "step": 4601 }, { "epoch": 0.3897522760957019, "grad_norm": 1.2941644787964839, "learning_rate": 6.973446225108844e-06, "loss": 0.626, "step": 4602 }, { "epoch": 0.38983696802879525, "grad_norm": 2.6913156681772548, "learning_rate": 6.9721858725674286e-06, "loss": 0.6889, "step": 4603 }, { "epoch": 0.38992165996188866, "grad_norm": 1.598758969536919, "learning_rate": 6.970925371608584e-06, "loss": 0.6979, "step": 4604 }, { "epoch": 0.390006351894982, "grad_norm": 1.1892276645515594, "learning_rate": 6.969664722327168e-06, "loss": 0.6103, "step": 4605 }, { "epoch": 0.39009104382807536, "grad_norm": 1.2616420362478367, "learning_rate": 6.968403924818054e-06, "loss": 0.6991, "step": 4606 }, { "epoch": 0.39017573576116876, "grad_norm": 1.4413162507337958, "learning_rate": 6.967142979176119e-06, "loss": 0.6467, "step": 4607 }, { "epoch": 0.3902604276942621, "grad_norm": 1.9946265126661042, "learning_rate": 6.9658818854962596e-06, "loss": 0.5889, "step": 4608 }, { "epoch": 0.3903451196273555, "grad_norm": 1.2186277752694867, "learning_rate": 6.964620643873378e-06, "loss": 0.6682, "step": 4609 }, { "epoch": 0.39042981156044887, "grad_norm": 1.4227445346480143, "learning_rate": 6.96335925440239e-06, "loss": 0.6757, "step": 4610 }, { "epoch": 0.3905145034935422, "grad_norm": 1.6106460396038633, "learning_rate": 6.9620977171782215e-06, "loss": 0.6568, "step": 4611 }, { "epoch": 0.3905991954266356, "grad_norm": 1.7095344972913311, "learning_rate": 6.96083603229581e-06, "loss": 0.659, "step": 4612 }, { "epoch": 0.390683887359729, "grad_norm": 0.7065232793149496, "learning_rate": 6.959574199850105e-06, "loss": 0.8683, "step": 4613 }, { "epoch": 0.3907685792928224, "grad_norm": 2.3321326309228327, "learning_rate": 6.958312219936063e-06, "loss": 0.6452, "step": 4614 }, { "epoch": 0.39085327122591573, "grad_norm": 1.4743801841636819, "learning_rate": 6.9570500926486575e-06, "loss": 0.6426, "step": 4615 }, { "epoch": 0.3909379631590091, "grad_norm": 1.4325216239176115, "learning_rate": 6.955787818082871e-06, "loss": 0.6388, "step": 4616 }, { "epoch": 0.3910226550921025, "grad_norm": 1.5116496606890153, "learning_rate": 6.9545253963336915e-06, "loss": 0.6101, "step": 4617 }, { "epoch": 0.39110734702519584, "grad_norm": 1.3405404375403656, "learning_rate": 6.9532628274961275e-06, "loss": 0.6443, "step": 4618 }, { "epoch": 0.39119203895828925, "grad_norm": 1.5209943538004473, "learning_rate": 6.952000111665195e-06, "loss": 0.6863, "step": 4619 }, { "epoch": 0.3912767308913826, "grad_norm": 2.125632316190202, "learning_rate": 6.9507372489359145e-06, "loss": 0.6589, "step": 4620 }, { "epoch": 0.39136142282447595, "grad_norm": 2.197220659305835, "learning_rate": 6.949474239403329e-06, "loss": 0.6099, "step": 4621 }, { "epoch": 0.39144611475756935, "grad_norm": 2.2604602408821752, "learning_rate": 6.948211083162482e-06, "loss": 0.6587, "step": 4622 }, { "epoch": 0.3915308066906627, "grad_norm": 1.3907017539671893, "learning_rate": 6.946947780308437e-06, "loss": 0.6161, "step": 4623 }, { "epoch": 0.3916154986237561, "grad_norm": 1.5061542500624705, "learning_rate": 6.945684330936261e-06, "loss": 0.6671, "step": 4624 }, { "epoch": 0.39170019055684946, "grad_norm": 1.308713603213535, "learning_rate": 6.9444207351410355e-06, "loss": 0.6411, "step": 4625 }, { "epoch": 0.3917848824899428, "grad_norm": 1.3590316075505744, "learning_rate": 6.943156993017855e-06, "loss": 0.6521, "step": 4626 }, { "epoch": 0.3918695744230362, "grad_norm": 1.6958480826381745, "learning_rate": 6.941893104661819e-06, "loss": 0.6303, "step": 4627 }, { "epoch": 0.39195426635612957, "grad_norm": 0.811715594936158, "learning_rate": 6.940629070168045e-06, "loss": 0.8852, "step": 4628 }, { "epoch": 0.39203895828922297, "grad_norm": 1.2289209990879466, "learning_rate": 6.939364889631658e-06, "loss": 0.6158, "step": 4629 }, { "epoch": 0.3921236502223163, "grad_norm": 1.4422842405685614, "learning_rate": 6.938100563147794e-06, "loss": 0.6414, "step": 4630 }, { "epoch": 0.3922083421554097, "grad_norm": 8.129250683721484, "learning_rate": 6.936836090811599e-06, "loss": 0.6897, "step": 4631 }, { "epoch": 0.3922930340885031, "grad_norm": 1.7299955900988222, "learning_rate": 6.935571472718232e-06, "loss": 0.6718, "step": 4632 }, { "epoch": 0.39237772602159643, "grad_norm": 1.464220327271869, "learning_rate": 6.934306708962864e-06, "loss": 0.6483, "step": 4633 }, { "epoch": 0.39246241795468984, "grad_norm": 0.6230899329865937, "learning_rate": 6.93304179964067e-06, "loss": 0.8272, "step": 4634 }, { "epoch": 0.3925471098877832, "grad_norm": 1.2473702816645245, "learning_rate": 6.931776744846846e-06, "loss": 0.6282, "step": 4635 }, { "epoch": 0.39263180182087654, "grad_norm": 1.8522181559438444, "learning_rate": 6.930511544676595e-06, "loss": 0.7, "step": 4636 }, { "epoch": 0.39271649375396994, "grad_norm": 1.188778616918056, "learning_rate": 6.929246199225126e-06, "loss": 0.6561, "step": 4637 }, { "epoch": 0.3928011856870633, "grad_norm": 0.6288727302172314, "learning_rate": 6.927980708587664e-06, "loss": 0.8222, "step": 4638 }, { "epoch": 0.3928858776201567, "grad_norm": 1.5481919917349465, "learning_rate": 6.926715072859446e-06, "loss": 0.6372, "step": 4639 }, { "epoch": 0.39297056955325005, "grad_norm": 1.3893891461945083, "learning_rate": 6.925449292135716e-06, "loss": 0.5736, "step": 4640 }, { "epoch": 0.3930552614863434, "grad_norm": 2.317895065165178, "learning_rate": 6.92418336651173e-06, "loss": 0.6584, "step": 4641 }, { "epoch": 0.3931399534194368, "grad_norm": 2.585763816372866, "learning_rate": 6.922917296082757e-06, "loss": 0.6825, "step": 4642 }, { "epoch": 0.39322464535253016, "grad_norm": 1.6579800856849016, "learning_rate": 6.921651080944076e-06, "loss": 0.6301, "step": 4643 }, { "epoch": 0.39330933728562356, "grad_norm": 1.4844431418578188, "learning_rate": 6.920384721190976e-06, "loss": 0.638, "step": 4644 }, { "epoch": 0.3933940292187169, "grad_norm": 1.263650563004509, "learning_rate": 6.919118216918755e-06, "loss": 0.6497, "step": 4645 }, { "epoch": 0.3934787211518103, "grad_norm": 0.6179101019140817, "learning_rate": 6.917851568222726e-06, "loss": 0.8989, "step": 4646 }, { "epoch": 0.39356341308490367, "grad_norm": 1.326976581653624, "learning_rate": 6.916584775198213e-06, "loss": 0.6388, "step": 4647 }, { "epoch": 0.393648105017997, "grad_norm": 1.7049836010586035, "learning_rate": 6.915317837940545e-06, "loss": 0.6743, "step": 4648 }, { "epoch": 0.3937327969510904, "grad_norm": 1.491513128211699, "learning_rate": 6.914050756545068e-06, "loss": 0.6038, "step": 4649 }, { "epoch": 0.3938174888841838, "grad_norm": 1.4738527656243097, "learning_rate": 6.912783531107137e-06, "loss": 0.6913, "step": 4650 }, { "epoch": 0.3939021808172772, "grad_norm": 1.1542368428377872, "learning_rate": 6.911516161722116e-06, "loss": 0.5993, "step": 4651 }, { "epoch": 0.39398687275037053, "grad_norm": 1.7399361641686537, "learning_rate": 6.910248648485383e-06, "loss": 0.7128, "step": 4652 }, { "epoch": 0.3940715646834639, "grad_norm": 1.5317825983248337, "learning_rate": 6.908980991492322e-06, "loss": 0.6737, "step": 4653 }, { "epoch": 0.3941562566165573, "grad_norm": 1.5446956983869697, "learning_rate": 6.9077131908383345e-06, "loss": 0.5752, "step": 4654 }, { "epoch": 0.39424094854965064, "grad_norm": 1.4083126867182623, "learning_rate": 6.906445246618826e-06, "loss": 0.6311, "step": 4655 }, { "epoch": 0.39432564048274404, "grad_norm": 1.4251418353043737, "learning_rate": 6.905177158929218e-06, "loss": 0.609, "step": 4656 }, { "epoch": 0.3944103324158374, "grad_norm": 4.54383576071812, "learning_rate": 6.903908927864942e-06, "loss": 0.6292, "step": 4657 }, { "epoch": 0.39449502434893075, "grad_norm": 1.6440623177696083, "learning_rate": 6.902640553521436e-06, "loss": 0.6159, "step": 4658 }, { "epoch": 0.39457971628202415, "grad_norm": 1.3554099090079321, "learning_rate": 6.901372035994152e-06, "loss": 0.663, "step": 4659 }, { "epoch": 0.3946644082151175, "grad_norm": 1.2513630287936683, "learning_rate": 6.900103375378557e-06, "loss": 0.6499, "step": 4660 }, { "epoch": 0.3947491001482109, "grad_norm": 1.442605875007415, "learning_rate": 6.898834571770121e-06, "loss": 0.6384, "step": 4661 }, { "epoch": 0.39483379208130426, "grad_norm": 1.2496405854582529, "learning_rate": 6.897565625264328e-06, "loss": 0.645, "step": 4662 }, { "epoch": 0.3949184840143976, "grad_norm": 1.3735137628629028, "learning_rate": 6.896296535956672e-06, "loss": 0.6067, "step": 4663 }, { "epoch": 0.395003175947491, "grad_norm": 0.6258833575037421, "learning_rate": 6.895027303942663e-06, "loss": 0.8448, "step": 4664 }, { "epoch": 0.39508786788058436, "grad_norm": 1.8519043540509514, "learning_rate": 6.893757929317813e-06, "loss": 0.6602, "step": 4665 }, { "epoch": 0.39517255981367777, "grad_norm": 1.5885156262887932, "learning_rate": 6.892488412177651e-06, "loss": 0.6254, "step": 4666 }, { "epoch": 0.3952572517467711, "grad_norm": 1.871415550953504, "learning_rate": 6.891218752617715e-06, "loss": 0.6409, "step": 4667 }, { "epoch": 0.39534194367986447, "grad_norm": 1.617485976260182, "learning_rate": 6.889948950733555e-06, "loss": 0.6031, "step": 4668 }, { "epoch": 0.3954266356129579, "grad_norm": 1.499553382472937, "learning_rate": 6.888679006620726e-06, "loss": 0.6038, "step": 4669 }, { "epoch": 0.39551132754605123, "grad_norm": 1.6662610995851492, "learning_rate": 6.887408920374803e-06, "loss": 0.6879, "step": 4670 }, { "epoch": 0.39559601947914463, "grad_norm": 1.5398077647086534, "learning_rate": 6.886138692091363e-06, "loss": 0.623, "step": 4671 }, { "epoch": 0.395680711412238, "grad_norm": 1.2856233745736447, "learning_rate": 6.884868321866e-06, "loss": 0.5771, "step": 4672 }, { "epoch": 0.39576540334533133, "grad_norm": 0.6425087481968522, "learning_rate": 6.883597809794313e-06, "loss": 0.8208, "step": 4673 }, { "epoch": 0.39585009527842474, "grad_norm": 3.001353461057096, "learning_rate": 6.88232715597192e-06, "loss": 0.6342, "step": 4674 }, { "epoch": 0.3959347872115181, "grad_norm": 0.6310655463240978, "learning_rate": 6.881056360494438e-06, "loss": 0.8846, "step": 4675 }, { "epoch": 0.3960194791446115, "grad_norm": 1.4035319628159089, "learning_rate": 6.8797854234575044e-06, "loss": 0.6576, "step": 4676 }, { "epoch": 0.39610417107770485, "grad_norm": 1.4354218290394891, "learning_rate": 6.878514344956766e-06, "loss": 0.6837, "step": 4677 }, { "epoch": 0.3961888630107982, "grad_norm": 1.2442478811398139, "learning_rate": 6.877243125087874e-06, "loss": 0.5802, "step": 4678 }, { "epoch": 0.3962735549438916, "grad_norm": 1.3163187023246334, "learning_rate": 6.8759717639464975e-06, "loss": 0.6472, "step": 4679 }, { "epoch": 0.39635824687698495, "grad_norm": 1.4191539177095185, "learning_rate": 6.874700261628311e-06, "loss": 0.6041, "step": 4680 }, { "epoch": 0.39644293881007836, "grad_norm": 1.4694535712453924, "learning_rate": 6.873428618229003e-06, "loss": 0.5951, "step": 4681 }, { "epoch": 0.3965276307431717, "grad_norm": 0.6262222067984994, "learning_rate": 6.872156833844272e-06, "loss": 0.8753, "step": 4682 }, { "epoch": 0.39661232267626506, "grad_norm": 1.4814486667014206, "learning_rate": 6.870884908569824e-06, "loss": 0.6225, "step": 4683 }, { "epoch": 0.39669701460935847, "grad_norm": 1.5369771863077377, "learning_rate": 6.869612842501381e-06, "loss": 0.6552, "step": 4684 }, { "epoch": 0.3967817065424518, "grad_norm": 1.650606300225353, "learning_rate": 6.868340635734672e-06, "loss": 0.7486, "step": 4685 }, { "epoch": 0.3968663984755452, "grad_norm": 1.3154332451723145, "learning_rate": 6.867068288365436e-06, "loss": 0.6644, "step": 4686 }, { "epoch": 0.3969510904086386, "grad_norm": 1.812296507219229, "learning_rate": 6.865795800489425e-06, "loss": 0.7233, "step": 4687 }, { "epoch": 0.3970357823417319, "grad_norm": 1.436300958233079, "learning_rate": 6.864523172202401e-06, "loss": 0.6561, "step": 4688 }, { "epoch": 0.39712047427482533, "grad_norm": 1.7593052018768538, "learning_rate": 6.8632504036001345e-06, "loss": 0.6604, "step": 4689 }, { "epoch": 0.3972051662079187, "grad_norm": 1.4719148493484389, "learning_rate": 6.861977494778408e-06, "loss": 0.6396, "step": 4690 }, { "epoch": 0.3972898581410121, "grad_norm": 1.7032349022539872, "learning_rate": 6.8607044458330156e-06, "loss": 0.6395, "step": 4691 }, { "epoch": 0.39737455007410544, "grad_norm": 2.8069877347754546, "learning_rate": 6.859431256859762e-06, "loss": 0.6264, "step": 4692 }, { "epoch": 0.3974592420071988, "grad_norm": 1.513654872304652, "learning_rate": 6.858157927954459e-06, "loss": 0.6425, "step": 4693 }, { "epoch": 0.3975439339402922, "grad_norm": 1.5288573993852923, "learning_rate": 6.856884459212934e-06, "loss": 0.6542, "step": 4694 }, { "epoch": 0.39762862587338554, "grad_norm": 0.6542092168727379, "learning_rate": 6.8556108507310185e-06, "loss": 0.8713, "step": 4695 }, { "epoch": 0.39771331780647895, "grad_norm": 1.357837622752614, "learning_rate": 6.854337102604562e-06, "loss": 0.6551, "step": 4696 }, { "epoch": 0.3977980097395723, "grad_norm": 1.3638016023118904, "learning_rate": 6.853063214929418e-06, "loss": 0.6895, "step": 4697 }, { "epoch": 0.3978827016726657, "grad_norm": 1.2378892100862091, "learning_rate": 6.851789187801457e-06, "loss": 0.6089, "step": 4698 }, { "epoch": 0.39796739360575906, "grad_norm": 1.5142370279476844, "learning_rate": 6.8505150213165515e-06, "loss": 0.6432, "step": 4699 }, { "epoch": 0.3980520855388524, "grad_norm": 1.339556215013403, "learning_rate": 6.849240715570593e-06, "loss": 0.6909, "step": 4700 }, { "epoch": 0.3981367774719458, "grad_norm": 0.6189746533401549, "learning_rate": 6.847966270659479e-06, "loss": 0.8127, "step": 4701 }, { "epoch": 0.39822146940503916, "grad_norm": 1.6647678073098569, "learning_rate": 6.846691686679117e-06, "loss": 0.6169, "step": 4702 }, { "epoch": 0.39830616133813257, "grad_norm": 2.347610264597172, "learning_rate": 6.8454169637254265e-06, "loss": 0.6338, "step": 4703 }, { "epoch": 0.3983908532712259, "grad_norm": 2.2605132889910973, "learning_rate": 6.844142101894338e-06, "loss": 0.6244, "step": 4704 }, { "epoch": 0.39847554520431927, "grad_norm": 1.3533223498110745, "learning_rate": 6.84286710128179e-06, "loss": 0.6507, "step": 4705 }, { "epoch": 0.3985602371374127, "grad_norm": 3.6001414576528075, "learning_rate": 6.841591961983735e-06, "loss": 0.6207, "step": 4706 }, { "epoch": 0.398644929070506, "grad_norm": 0.5979767131277337, "learning_rate": 6.840316684096134e-06, "loss": 0.7713, "step": 4707 }, { "epoch": 0.39872962100359943, "grad_norm": 1.5685285900688353, "learning_rate": 6.839041267714957e-06, "loss": 0.5983, "step": 4708 }, { "epoch": 0.3988143129366928, "grad_norm": 1.434847783321562, "learning_rate": 6.837765712936187e-06, "loss": 0.6791, "step": 4709 }, { "epoch": 0.39889900486978613, "grad_norm": 1.2800840130991364, "learning_rate": 6.836490019855815e-06, "loss": 0.6174, "step": 4710 }, { "epoch": 0.39898369680287954, "grad_norm": 1.532383479234602, "learning_rate": 6.835214188569844e-06, "loss": 0.6916, "step": 4711 }, { "epoch": 0.3990683887359729, "grad_norm": 1.772166808651317, "learning_rate": 6.833938219174288e-06, "loss": 0.6526, "step": 4712 }, { "epoch": 0.3991530806690663, "grad_norm": 2.0783747589030517, "learning_rate": 6.832662111765169e-06, "loss": 0.6581, "step": 4713 }, { "epoch": 0.39923777260215965, "grad_norm": 1.433270708547288, "learning_rate": 6.831385866438522e-06, "loss": 0.6505, "step": 4714 }, { "epoch": 0.399322464535253, "grad_norm": 1.4659182531309554, "learning_rate": 6.830109483290392e-06, "loss": 0.6567, "step": 4715 }, { "epoch": 0.3994071564683464, "grad_norm": 1.156500117242911, "learning_rate": 6.828832962416831e-06, "loss": 0.5925, "step": 4716 }, { "epoch": 0.39949184840143975, "grad_norm": 2.2997866846216954, "learning_rate": 6.827556303913907e-06, "loss": 0.6239, "step": 4717 }, { "epoch": 0.39957654033453316, "grad_norm": 1.6412439187956909, "learning_rate": 6.826279507877693e-06, "loss": 0.7012, "step": 4718 }, { "epoch": 0.3996612322676265, "grad_norm": 1.5968981720878097, "learning_rate": 6.8250025744042745e-06, "loss": 0.6535, "step": 4719 }, { "epoch": 0.39974592420071986, "grad_norm": 1.4400231015118223, "learning_rate": 6.823725503589749e-06, "loss": 0.6176, "step": 4720 }, { "epoch": 0.39983061613381327, "grad_norm": 1.6472445577495247, "learning_rate": 6.822448295530222e-06, "loss": 0.62, "step": 4721 }, { "epoch": 0.3999153080669066, "grad_norm": 1.379677444668433, "learning_rate": 6.821170950321811e-06, "loss": 0.6868, "step": 4722 }, { "epoch": 0.4, "grad_norm": 4.094981787409781, "learning_rate": 6.8198934680606435e-06, "loss": 0.632, "step": 4723 }, { "epoch": 0.4000846919330934, "grad_norm": 1.3730388322866556, "learning_rate": 6.818615848842855e-06, "loss": 0.6641, "step": 4724 }, { "epoch": 0.4001693838661867, "grad_norm": 1.3489543841272902, "learning_rate": 6.817338092764592e-06, "loss": 0.6892, "step": 4725 }, { "epoch": 0.40025407579928013, "grad_norm": 1.9887703616868204, "learning_rate": 6.8160601999220165e-06, "loss": 0.5706, "step": 4726 }, { "epoch": 0.4003387677323735, "grad_norm": 1.242753762932488, "learning_rate": 6.814782170411294e-06, "loss": 0.6286, "step": 4727 }, { "epoch": 0.4004234596654669, "grad_norm": 3.607109670872308, "learning_rate": 6.813504004328603e-06, "loss": 0.6494, "step": 4728 }, { "epoch": 0.40050815159856024, "grad_norm": 2.627867481899588, "learning_rate": 6.812225701770132e-06, "loss": 0.6287, "step": 4729 }, { "epoch": 0.4005928435316536, "grad_norm": 1.5753237428161735, "learning_rate": 6.810947262832082e-06, "loss": 0.6601, "step": 4730 }, { "epoch": 0.400677535464747, "grad_norm": 1.8711219366912175, "learning_rate": 6.80966868761066e-06, "loss": 0.7181, "step": 4731 }, { "epoch": 0.40076222739784034, "grad_norm": 1.46943433889581, "learning_rate": 6.808389976202088e-06, "loss": 0.6772, "step": 4732 }, { "epoch": 0.40084691933093375, "grad_norm": 1.7538493432513638, "learning_rate": 6.807111128702594e-06, "loss": 0.6672, "step": 4733 }, { "epoch": 0.4009316112640271, "grad_norm": 1.6230737498196695, "learning_rate": 6.805832145208418e-06, "loss": 0.642, "step": 4734 }, { "epoch": 0.40101630319712045, "grad_norm": 1.5343568624338373, "learning_rate": 6.80455302581581e-06, "loss": 0.6524, "step": 4735 }, { "epoch": 0.40110099513021386, "grad_norm": 1.731040845873381, "learning_rate": 6.803273770621033e-06, "loss": 0.5985, "step": 4736 }, { "epoch": 0.4011856870633072, "grad_norm": 1.452342757801755, "learning_rate": 6.801994379720354e-06, "loss": 0.5914, "step": 4737 }, { "epoch": 0.4012703789964006, "grad_norm": 1.267562290534595, "learning_rate": 6.800714853210058e-06, "loss": 0.696, "step": 4738 }, { "epoch": 0.40135507092949396, "grad_norm": 1.4283827191254954, "learning_rate": 6.799435191186432e-06, "loss": 0.6504, "step": 4739 }, { "epoch": 0.4014397628625873, "grad_norm": 1.4457397954946183, "learning_rate": 6.798155393745782e-06, "loss": 0.642, "step": 4740 }, { "epoch": 0.4015244547956807, "grad_norm": 2.128929185689078, "learning_rate": 6.7968754609844145e-06, "loss": 0.6458, "step": 4741 }, { "epoch": 0.40160914672877407, "grad_norm": 1.723102212196819, "learning_rate": 6.795595392998654e-06, "loss": 0.5942, "step": 4742 }, { "epoch": 0.4016938386618675, "grad_norm": 1.2967950431948132, "learning_rate": 6.794315189884834e-06, "loss": 0.6279, "step": 4743 }, { "epoch": 0.4017785305949608, "grad_norm": 1.5365005339956552, "learning_rate": 6.793034851739293e-06, "loss": 0.6479, "step": 4744 }, { "epoch": 0.4018632225280542, "grad_norm": 3.326736961109882, "learning_rate": 6.791754378658384e-06, "loss": 0.6731, "step": 4745 }, { "epoch": 0.4019479144611476, "grad_norm": 1.3387458638078877, "learning_rate": 6.790473770738471e-06, "loss": 0.603, "step": 4746 }, { "epoch": 0.40203260639424093, "grad_norm": 1.4628466524398862, "learning_rate": 6.789193028075927e-06, "loss": 0.6434, "step": 4747 }, { "epoch": 0.40211729832733434, "grad_norm": 1.3001044790687353, "learning_rate": 6.787912150767133e-06, "loss": 0.5824, "step": 4748 }, { "epoch": 0.4022019902604277, "grad_norm": 1.4685808073453133, "learning_rate": 6.78663113890848e-06, "loss": 0.6402, "step": 4749 }, { "epoch": 0.4022866821935211, "grad_norm": 1.805001653613561, "learning_rate": 6.785349992596375e-06, "loss": 0.5881, "step": 4750 }, { "epoch": 0.40237137412661445, "grad_norm": 1.9373725629335838, "learning_rate": 6.78406871192723e-06, "loss": 0.6327, "step": 4751 }, { "epoch": 0.4024560660597078, "grad_norm": 1.366595638083193, "learning_rate": 6.782787296997465e-06, "loss": 0.6261, "step": 4752 }, { "epoch": 0.4025407579928012, "grad_norm": 5.817036448040422, "learning_rate": 6.7815057479035165e-06, "loss": 0.6323, "step": 4753 }, { "epoch": 0.40262544992589455, "grad_norm": 1.2982132517209506, "learning_rate": 6.780224064741828e-06, "loss": 0.6233, "step": 4754 }, { "epoch": 0.40271014185898796, "grad_norm": 1.3017329009519698, "learning_rate": 6.7789422476088516e-06, "loss": 0.645, "step": 4755 }, { "epoch": 0.4027948337920813, "grad_norm": 1.6947366718491634, "learning_rate": 6.777660296601051e-06, "loss": 0.6466, "step": 4756 }, { "epoch": 0.40287952572517466, "grad_norm": 1.3664244989387673, "learning_rate": 6.776378211814899e-06, "loss": 0.614, "step": 4757 }, { "epoch": 0.40296421765826806, "grad_norm": 1.7924560717907425, "learning_rate": 6.775095993346881e-06, "loss": 0.659, "step": 4758 }, { "epoch": 0.4030489095913614, "grad_norm": 1.6781867937186483, "learning_rate": 6.773813641293489e-06, "loss": 0.6542, "step": 4759 }, { "epoch": 0.4031336015244548, "grad_norm": 1.316501721538773, "learning_rate": 6.77253115575123e-06, "loss": 0.6423, "step": 4760 }, { "epoch": 0.40321829345754817, "grad_norm": 0.6320455990401437, "learning_rate": 6.771248536816612e-06, "loss": 0.8091, "step": 4761 }, { "epoch": 0.4033029853906415, "grad_norm": 1.4007733751444624, "learning_rate": 6.769965784586165e-06, "loss": 0.6635, "step": 4762 }, { "epoch": 0.40338767732373493, "grad_norm": 1.3572383381486963, "learning_rate": 6.76868289915642e-06, "loss": 0.6585, "step": 4763 }, { "epoch": 0.4034723692568283, "grad_norm": 1.4006917897249733, "learning_rate": 6.767399880623921e-06, "loss": 0.6333, "step": 4764 }, { "epoch": 0.4035570611899217, "grad_norm": 1.2638064111827845, "learning_rate": 6.766116729085223e-06, "loss": 0.6333, "step": 4765 }, { "epoch": 0.40364175312301503, "grad_norm": 1.5130170702992196, "learning_rate": 6.764833444636888e-06, "loss": 0.704, "step": 4766 }, { "epoch": 0.4037264450561084, "grad_norm": 1.6994918771629095, "learning_rate": 6.7635500273754906e-06, "loss": 0.7056, "step": 4767 }, { "epoch": 0.4038111369892018, "grad_norm": 1.4531675868746148, "learning_rate": 6.762266477397617e-06, "loss": 0.5594, "step": 4768 }, { "epoch": 0.40389582892229514, "grad_norm": 1.459073884716456, "learning_rate": 6.760982794799858e-06, "loss": 0.6769, "step": 4769 }, { "epoch": 0.40398052085538855, "grad_norm": 1.8475976198761135, "learning_rate": 6.759698979678817e-06, "loss": 0.6242, "step": 4770 }, { "epoch": 0.4040652127884819, "grad_norm": 1.2644763328999715, "learning_rate": 6.758415032131113e-06, "loss": 0.5679, "step": 4771 }, { "epoch": 0.40414990472157525, "grad_norm": 1.2118609461949517, "learning_rate": 6.757130952253366e-06, "loss": 0.6606, "step": 4772 }, { "epoch": 0.40423459665466865, "grad_norm": 1.7645161469673876, "learning_rate": 6.755846740142209e-06, "loss": 0.648, "step": 4773 }, { "epoch": 0.404319288587762, "grad_norm": 2.1998267866909864, "learning_rate": 6.754562395894288e-06, "loss": 0.6477, "step": 4774 }, { "epoch": 0.4044039805208554, "grad_norm": 1.5657199225539853, "learning_rate": 6.753277919606256e-06, "loss": 0.666, "step": 4775 }, { "epoch": 0.40448867245394876, "grad_norm": 1.4249134175432587, "learning_rate": 6.751993311374776e-06, "loss": 0.7006, "step": 4776 }, { "epoch": 0.4045733643870421, "grad_norm": 1.3981873686218176, "learning_rate": 6.750708571296523e-06, "loss": 0.5975, "step": 4777 }, { "epoch": 0.4046580563201355, "grad_norm": 1.6673786562178015, "learning_rate": 6.749423699468179e-06, "loss": 0.5866, "step": 4778 }, { "epoch": 0.40474274825322887, "grad_norm": 1.3538056535334786, "learning_rate": 6.748138695986437e-06, "loss": 0.6288, "step": 4779 }, { "epoch": 0.4048274401863223, "grad_norm": 1.3040351573799744, "learning_rate": 6.746853560948002e-06, "loss": 0.642, "step": 4780 }, { "epoch": 0.4049121321194156, "grad_norm": 1.32121817578569, "learning_rate": 6.745568294449587e-06, "loss": 0.6363, "step": 4781 }, { "epoch": 0.404996824052509, "grad_norm": 1.2643560934589595, "learning_rate": 6.7442828965879135e-06, "loss": 0.6498, "step": 4782 }, { "epoch": 0.4050815159856024, "grad_norm": 1.3853867611010275, "learning_rate": 6.742997367459717e-06, "loss": 0.5945, "step": 4783 }, { "epoch": 0.40516620791869573, "grad_norm": 1.217475447273985, "learning_rate": 6.741711707161738e-06, "loss": 0.6341, "step": 4784 }, { "epoch": 0.40525089985178914, "grad_norm": 0.5756967563677435, "learning_rate": 6.7404259157907315e-06, "loss": 0.841, "step": 4785 }, { "epoch": 0.4053355917848825, "grad_norm": 1.3515679554989806, "learning_rate": 6.7391399934434574e-06, "loss": 0.5985, "step": 4786 }, { "epoch": 0.40542028371797584, "grad_norm": 1.6794491194404997, "learning_rate": 6.73785394021669e-06, "loss": 0.6503, "step": 4787 }, { "epoch": 0.40550497565106924, "grad_norm": 1.3178636240621597, "learning_rate": 6.736567756207212e-06, "loss": 0.6427, "step": 4788 }, { "epoch": 0.4055896675841626, "grad_norm": 1.3670734886332472, "learning_rate": 6.735281441511814e-06, "loss": 0.6441, "step": 4789 }, { "epoch": 0.405674359517256, "grad_norm": 1.64912297155008, "learning_rate": 6.733994996227299e-06, "loss": 0.6198, "step": 4790 }, { "epoch": 0.40575905145034935, "grad_norm": 1.9020103055586453, "learning_rate": 6.732708420450478e-06, "loss": 0.6353, "step": 4791 }, { "epoch": 0.4058437433834427, "grad_norm": 1.5449898884073174, "learning_rate": 6.731421714278174e-06, "loss": 0.6791, "step": 4792 }, { "epoch": 0.4059284353165361, "grad_norm": 1.5268685883249866, "learning_rate": 6.7301348778072185e-06, "loss": 0.6579, "step": 4793 }, { "epoch": 0.40601312724962946, "grad_norm": 1.3046816302677133, "learning_rate": 6.728847911134451e-06, "loss": 0.6129, "step": 4794 }, { "epoch": 0.40609781918272286, "grad_norm": 2.9666839979766295, "learning_rate": 6.727560814356722e-06, "loss": 0.6304, "step": 4795 }, { "epoch": 0.4061825111158162, "grad_norm": 2.7417123427086496, "learning_rate": 6.726273587570896e-06, "loss": 0.651, "step": 4796 }, { "epoch": 0.40626720304890956, "grad_norm": 1.6112983331039437, "learning_rate": 6.72498623087384e-06, "loss": 0.6843, "step": 4797 }, { "epoch": 0.40635189498200297, "grad_norm": 1.3345925593570094, "learning_rate": 6.723698744362437e-06, "loss": 0.6493, "step": 4798 }, { "epoch": 0.4064365869150963, "grad_norm": 1.7375976216806122, "learning_rate": 6.722411128133576e-06, "loss": 0.6533, "step": 4799 }, { "epoch": 0.4065212788481897, "grad_norm": 1.3951301993544043, "learning_rate": 6.721123382284157e-06, "loss": 0.6761, "step": 4800 }, { "epoch": 0.4066059707812831, "grad_norm": 2.0737141617981942, "learning_rate": 6.719835506911088e-06, "loss": 0.6419, "step": 4801 }, { "epoch": 0.4066906627143765, "grad_norm": 1.9825260684223116, "learning_rate": 6.718547502111292e-06, "loss": 0.6741, "step": 4802 }, { "epoch": 0.40677535464746983, "grad_norm": 1.6164768477624494, "learning_rate": 6.7172593679816965e-06, "loss": 0.6182, "step": 4803 }, { "epoch": 0.4068600465805632, "grad_norm": 1.9618252168688493, "learning_rate": 6.71597110461924e-06, "loss": 0.6317, "step": 4804 }, { "epoch": 0.4069447385136566, "grad_norm": 1.391125533773902, "learning_rate": 6.71468271212087e-06, "loss": 0.6362, "step": 4805 }, { "epoch": 0.40702943044674994, "grad_norm": 1.6248185112423434, "learning_rate": 6.713394190583548e-06, "loss": 0.6169, "step": 4806 }, { "epoch": 0.40711412237984335, "grad_norm": 1.2350718264791034, "learning_rate": 6.712105540104239e-06, "loss": 0.6469, "step": 4807 }, { "epoch": 0.4071988143129367, "grad_norm": 1.7728276386787611, "learning_rate": 6.7108167607799225e-06, "loss": 0.6423, "step": 4808 }, { "epoch": 0.40728350624603005, "grad_norm": 1.4601815236862161, "learning_rate": 6.709527852707587e-06, "loss": 0.634, "step": 4809 }, { "epoch": 0.40736819817912345, "grad_norm": 2.3029950413246976, "learning_rate": 6.708238815984227e-06, "loss": 0.5877, "step": 4810 }, { "epoch": 0.4074528901122168, "grad_norm": 1.6102119625833984, "learning_rate": 6.70694965070685e-06, "loss": 0.6746, "step": 4811 }, { "epoch": 0.4075375820453102, "grad_norm": 1.335240930289785, "learning_rate": 6.705660356972473e-06, "loss": 0.605, "step": 4812 }, { "epoch": 0.40762227397840356, "grad_norm": 1.2552259346707202, "learning_rate": 6.704370934878124e-06, "loss": 0.6185, "step": 4813 }, { "epoch": 0.4077069659114969, "grad_norm": 1.2489531061342156, "learning_rate": 6.703081384520835e-06, "loss": 0.6592, "step": 4814 }, { "epoch": 0.4077916578445903, "grad_norm": 1.4101884044095427, "learning_rate": 6.701791705997653e-06, "loss": 0.5856, "step": 4815 }, { "epoch": 0.40787634977768367, "grad_norm": 1.215326184299268, "learning_rate": 6.700501899405636e-06, "loss": 0.6941, "step": 4816 }, { "epoch": 0.4079610417107771, "grad_norm": 2.644913038987943, "learning_rate": 6.6992119648418465e-06, "loss": 0.6271, "step": 4817 }, { "epoch": 0.4080457336438704, "grad_norm": 1.4528460134017105, "learning_rate": 6.697921902403357e-06, "loss": 0.646, "step": 4818 }, { "epoch": 0.4081304255769638, "grad_norm": 1.623244977053608, "learning_rate": 6.696631712187254e-06, "loss": 0.653, "step": 4819 }, { "epoch": 0.4082151175100572, "grad_norm": 1.462551877511606, "learning_rate": 6.695341394290632e-06, "loss": 0.6557, "step": 4820 }, { "epoch": 0.40829980944315053, "grad_norm": 1.1775068799759245, "learning_rate": 6.694050948810592e-06, "loss": 0.6777, "step": 4821 }, { "epoch": 0.40838450137624394, "grad_norm": 2.6002783955155477, "learning_rate": 6.6927603758442475e-06, "loss": 0.6325, "step": 4822 }, { "epoch": 0.4084691933093373, "grad_norm": 1.4921921823266084, "learning_rate": 6.69146967548872e-06, "loss": 0.6799, "step": 4823 }, { "epoch": 0.40855388524243064, "grad_norm": 3.0131664748533113, "learning_rate": 6.690178847841144e-06, "loss": 0.6562, "step": 4824 }, { "epoch": 0.40863857717552404, "grad_norm": 1.541147226828192, "learning_rate": 6.688887892998659e-06, "loss": 0.6623, "step": 4825 }, { "epoch": 0.4087232691086174, "grad_norm": 1.458470712007632, "learning_rate": 6.687596811058419e-06, "loss": 0.6107, "step": 4826 }, { "epoch": 0.4088079610417108, "grad_norm": 1.197670721009538, "learning_rate": 6.68630560211758e-06, "loss": 0.5783, "step": 4827 }, { "epoch": 0.40889265297480415, "grad_norm": 1.5051217506795231, "learning_rate": 6.6850142662733174e-06, "loss": 0.6577, "step": 4828 }, { "epoch": 0.4089773449078975, "grad_norm": 1.1878825706490845, "learning_rate": 6.683722803622806e-06, "loss": 0.6267, "step": 4829 }, { "epoch": 0.4090620368409909, "grad_norm": 1.459163686754209, "learning_rate": 6.682431214263241e-06, "loss": 0.6221, "step": 4830 }, { "epoch": 0.40914672877408426, "grad_norm": 1.2672871022730985, "learning_rate": 6.681139498291816e-06, "loss": 0.6435, "step": 4831 }, { "epoch": 0.40923142070717766, "grad_norm": 1.915674065402002, "learning_rate": 6.679847655805742e-06, "loss": 0.673, "step": 4832 }, { "epoch": 0.409316112640271, "grad_norm": 0.6239839947220266, "learning_rate": 6.678555686902237e-06, "loss": 0.8768, "step": 4833 }, { "epoch": 0.40940080457336436, "grad_norm": 1.3648087241592217, "learning_rate": 6.677263591678529e-06, "loss": 0.6169, "step": 4834 }, { "epoch": 0.40948549650645777, "grad_norm": 1.2923378552043288, "learning_rate": 6.675971370231853e-06, "loss": 0.6575, "step": 4835 }, { "epoch": 0.4095701884395511, "grad_norm": 0.680054018386555, "learning_rate": 6.674679022659456e-06, "loss": 0.8693, "step": 4836 }, { "epoch": 0.4096548803726445, "grad_norm": 1.380625287950272, "learning_rate": 6.673386549058597e-06, "loss": 0.584, "step": 4837 }, { "epoch": 0.4097395723057379, "grad_norm": 1.6717594065432795, "learning_rate": 6.672093949526539e-06, "loss": 0.6667, "step": 4838 }, { "epoch": 0.4098242642388312, "grad_norm": 1.6490882766322668, "learning_rate": 6.670801224160555e-06, "loss": 0.6565, "step": 4839 }, { "epoch": 0.40990895617192463, "grad_norm": 1.4684064239810688, "learning_rate": 6.669508373057932e-06, "loss": 0.643, "step": 4840 }, { "epoch": 0.409993648105018, "grad_norm": 0.597778663224727, "learning_rate": 6.668215396315965e-06, "loss": 0.8575, "step": 4841 }, { "epoch": 0.4100783400381114, "grad_norm": 1.313233486380946, "learning_rate": 6.6669222940319554e-06, "loss": 0.6498, "step": 4842 }, { "epoch": 0.41016303197120474, "grad_norm": 1.5840206205666671, "learning_rate": 6.665629066303216e-06, "loss": 0.6856, "step": 4843 }, { "epoch": 0.4102477239042981, "grad_norm": 1.358668738742669, "learning_rate": 6.664335713227069e-06, "loss": 0.6288, "step": 4844 }, { "epoch": 0.4103324158373915, "grad_norm": 1.8440918072843095, "learning_rate": 6.663042234900848e-06, "loss": 0.6713, "step": 4845 }, { "epoch": 0.41041710777048485, "grad_norm": 1.7457112677313804, "learning_rate": 6.66174863142189e-06, "loss": 0.6081, "step": 4846 }, { "epoch": 0.41050179970357825, "grad_norm": 1.468506871984732, "learning_rate": 6.66045490288755e-06, "loss": 0.6042, "step": 4847 }, { "epoch": 0.4105864916366716, "grad_norm": 1.0534728323908886, "learning_rate": 6.659161049395187e-06, "loss": 0.6156, "step": 4848 }, { "epoch": 0.41067118356976495, "grad_norm": 1.3518780930097631, "learning_rate": 6.657867071042168e-06, "loss": 0.6439, "step": 4849 }, { "epoch": 0.41075587550285836, "grad_norm": 1.351340065516228, "learning_rate": 6.656572967925872e-06, "loss": 0.6658, "step": 4850 }, { "epoch": 0.4108405674359517, "grad_norm": 1.959116018012699, "learning_rate": 6.655278740143689e-06, "loss": 0.637, "step": 4851 }, { "epoch": 0.4109252593690451, "grad_norm": 1.7473419504042775, "learning_rate": 6.653984387793016e-06, "loss": 0.6618, "step": 4852 }, { "epoch": 0.41100995130213847, "grad_norm": 0.6505663707884832, "learning_rate": 6.6526899109712595e-06, "loss": 0.8975, "step": 4853 }, { "epoch": 0.41109464323523187, "grad_norm": 1.4485387644550514, "learning_rate": 6.651395309775837e-06, "loss": 0.6187, "step": 4854 }, { "epoch": 0.4111793351683252, "grad_norm": 1.6004956445762657, "learning_rate": 6.650100584304171e-06, "loss": 0.6277, "step": 4855 }, { "epoch": 0.41126402710141857, "grad_norm": 1.677574698874609, "learning_rate": 6.648805734653699e-06, "loss": 0.6551, "step": 4856 }, { "epoch": 0.411348719034512, "grad_norm": 2.1337772954774836, "learning_rate": 6.6475107609218644e-06, "loss": 0.6739, "step": 4857 }, { "epoch": 0.41143341096760533, "grad_norm": 1.7623611859012074, "learning_rate": 6.646215663206122e-06, "loss": 0.6047, "step": 4858 }, { "epoch": 0.41151810290069873, "grad_norm": 1.477050558432058, "learning_rate": 6.644920441603933e-06, "loss": 0.5959, "step": 4859 }, { "epoch": 0.4116027948337921, "grad_norm": 1.2637361670039644, "learning_rate": 6.643625096212771e-06, "loss": 0.6553, "step": 4860 }, { "epoch": 0.41168748676688544, "grad_norm": 1.5531947191274869, "learning_rate": 6.642329627130115e-06, "loss": 0.6907, "step": 4861 }, { "epoch": 0.41177217869997884, "grad_norm": 1.4680891818879134, "learning_rate": 6.641034034453462e-06, "loss": 0.5789, "step": 4862 }, { "epoch": 0.4118568706330722, "grad_norm": 0.6042818877709277, "learning_rate": 6.639738318280304e-06, "loss": 0.9235, "step": 4863 }, { "epoch": 0.4119415625661656, "grad_norm": 1.2622217086585623, "learning_rate": 6.638442478708157e-06, "loss": 0.5887, "step": 4864 }, { "epoch": 0.41202625449925895, "grad_norm": 1.8825778131379949, "learning_rate": 6.637146515834538e-06, "loss": 0.601, "step": 4865 }, { "epoch": 0.4121109464323523, "grad_norm": 1.2731984827010778, "learning_rate": 6.635850429756974e-06, "loss": 0.6381, "step": 4866 }, { "epoch": 0.4121956383654457, "grad_norm": 2.1388327652908257, "learning_rate": 6.634554220573002e-06, "loss": 0.6303, "step": 4867 }, { "epoch": 0.41228033029853906, "grad_norm": 1.2772005234864923, "learning_rate": 6.63325788838017e-06, "loss": 0.6032, "step": 4868 }, { "epoch": 0.41236502223163246, "grad_norm": 1.3316151173872668, "learning_rate": 6.631961433276034e-06, "loss": 0.6381, "step": 4869 }, { "epoch": 0.4124497141647258, "grad_norm": 1.6250940843767563, "learning_rate": 6.6306648553581586e-06, "loss": 0.6478, "step": 4870 }, { "epoch": 0.41253440609781916, "grad_norm": 3.354043735274988, "learning_rate": 6.629368154724117e-06, "loss": 0.6129, "step": 4871 }, { "epoch": 0.41261909803091257, "grad_norm": 2.003752159841911, "learning_rate": 6.628071331471495e-06, "loss": 0.7164, "step": 4872 }, { "epoch": 0.4127037899640059, "grad_norm": 1.4452761680103705, "learning_rate": 6.6267743856978835e-06, "loss": 0.6275, "step": 4873 }, { "epoch": 0.4127884818970993, "grad_norm": 0.6415119828830392, "learning_rate": 6.6254773175008854e-06, "loss": 0.8248, "step": 4874 }, { "epoch": 0.4128731738301927, "grad_norm": 1.4538825559755633, "learning_rate": 6.624180126978112e-06, "loss": 0.6689, "step": 4875 }, { "epoch": 0.412957865763286, "grad_norm": 1.3324733649929557, "learning_rate": 6.622882814227185e-06, "loss": 0.6242, "step": 4876 }, { "epoch": 0.41304255769637943, "grad_norm": 1.6705531562376308, "learning_rate": 6.62158537934573e-06, "loss": 0.7071, "step": 4877 }, { "epoch": 0.4131272496294728, "grad_norm": 1.2472761103283398, "learning_rate": 6.62028782243139e-06, "loss": 0.5974, "step": 4878 }, { "epoch": 0.4132119415625662, "grad_norm": 1.3885226415555363, "learning_rate": 6.618990143581812e-06, "loss": 0.6073, "step": 4879 }, { "epoch": 0.41329663349565954, "grad_norm": 1.4291537589267729, "learning_rate": 6.617692342894651e-06, "loss": 0.6148, "step": 4880 }, { "epoch": 0.4133813254287529, "grad_norm": 1.2627864596306009, "learning_rate": 6.616394420467575e-06, "loss": 0.6194, "step": 4881 }, { "epoch": 0.4134660173618463, "grad_norm": 1.4589214188039599, "learning_rate": 6.615096376398262e-06, "loss": 0.6495, "step": 4882 }, { "epoch": 0.41355070929493964, "grad_norm": 1.49512375364187, "learning_rate": 6.613798210784393e-06, "loss": 0.6975, "step": 4883 }, { "epoch": 0.41363540122803305, "grad_norm": 2.4412516860255304, "learning_rate": 6.612499923723663e-06, "loss": 0.6263, "step": 4884 }, { "epoch": 0.4137200931611264, "grad_norm": 2.115398333710301, "learning_rate": 6.611201515313776e-06, "loss": 0.6189, "step": 4885 }, { "epoch": 0.41380478509421975, "grad_norm": 1.949647812561387, "learning_rate": 6.6099029856524425e-06, "loss": 0.6552, "step": 4886 }, { "epoch": 0.41388947702731316, "grad_norm": 1.4152782299378976, "learning_rate": 6.608604334837385e-06, "loss": 0.5739, "step": 4887 }, { "epoch": 0.4139741689604065, "grad_norm": 1.547873323603281, "learning_rate": 6.607305562966333e-06, "loss": 0.6585, "step": 4888 }, { "epoch": 0.4140588608934999, "grad_norm": 1.2674158545087462, "learning_rate": 6.606006670137025e-06, "loss": 0.6136, "step": 4889 }, { "epoch": 0.41414355282659326, "grad_norm": 1.2915386753346256, "learning_rate": 6.604707656447213e-06, "loss": 0.6516, "step": 4890 }, { "epoch": 0.4142282447596866, "grad_norm": 1.3172970228821415, "learning_rate": 6.6034085219946505e-06, "loss": 0.6829, "step": 4891 }, { "epoch": 0.41431293669278, "grad_norm": 1.3802966352514727, "learning_rate": 6.602109266877108e-06, "loss": 0.6573, "step": 4892 }, { "epoch": 0.41439762862587337, "grad_norm": 1.3809761794720832, "learning_rate": 6.6008098911923594e-06, "loss": 0.6596, "step": 4893 }, { "epoch": 0.4144823205589668, "grad_norm": 1.7732811126838641, "learning_rate": 6.59951039503819e-06, "loss": 0.7499, "step": 4894 }, { "epoch": 0.4145670124920601, "grad_norm": 1.2100879800009476, "learning_rate": 6.598210778512393e-06, "loss": 0.6367, "step": 4895 }, { "epoch": 0.4146517044251535, "grad_norm": 1.4580964179707718, "learning_rate": 6.596911041712772e-06, "loss": 0.651, "step": 4896 }, { "epoch": 0.4147363963582469, "grad_norm": 1.7255236633189464, "learning_rate": 6.595611184737139e-06, "loss": 0.6796, "step": 4897 }, { "epoch": 0.41482108829134023, "grad_norm": 1.495252467591577, "learning_rate": 6.594311207683315e-06, "loss": 0.6332, "step": 4898 }, { "epoch": 0.41490578022443364, "grad_norm": 1.357567933132156, "learning_rate": 6.59301111064913e-06, "loss": 0.6579, "step": 4899 }, { "epoch": 0.414990472157527, "grad_norm": 1.6534360193696542, "learning_rate": 6.591710893732425e-06, "loss": 0.6302, "step": 4900 }, { "epoch": 0.41507516409062034, "grad_norm": 1.1934097652368305, "learning_rate": 6.590410557031045e-06, "loss": 0.6668, "step": 4901 }, { "epoch": 0.41515985602371375, "grad_norm": 1.5974533180400488, "learning_rate": 6.58911010064285e-06, "loss": 0.7001, "step": 4902 }, { "epoch": 0.4152445479568071, "grad_norm": 1.4079740886091383, "learning_rate": 6.5878095246657045e-06, "loss": 0.6164, "step": 4903 }, { "epoch": 0.4153292398899005, "grad_norm": 1.8749192906091958, "learning_rate": 6.586508829197487e-06, "loss": 0.6108, "step": 4904 }, { "epoch": 0.41541393182299385, "grad_norm": 1.3077901310480737, "learning_rate": 6.585208014336075e-06, "loss": 0.6277, "step": 4905 }, { "epoch": 0.41549862375608726, "grad_norm": 1.2890292060466952, "learning_rate": 6.583907080179368e-06, "loss": 0.6593, "step": 4906 }, { "epoch": 0.4155833156891806, "grad_norm": 2.745438640311252, "learning_rate": 6.582606026825267e-06, "loss": 0.5887, "step": 4907 }, { "epoch": 0.41566800762227396, "grad_norm": 1.2903726067184673, "learning_rate": 6.5813048543716815e-06, "loss": 0.6243, "step": 4908 }, { "epoch": 0.41575269955536737, "grad_norm": 1.3339513260513496, "learning_rate": 6.580003562916532e-06, "loss": 0.6407, "step": 4909 }, { "epoch": 0.4158373914884607, "grad_norm": 1.398785720617002, "learning_rate": 6.578702152557746e-06, "loss": 0.6365, "step": 4910 }, { "epoch": 0.4159220834215541, "grad_norm": 2.2351062443916057, "learning_rate": 6.5774006233932665e-06, "loss": 0.6125, "step": 4911 }, { "epoch": 0.4160067753546475, "grad_norm": 1.525762145157513, "learning_rate": 6.576098975521034e-06, "loss": 0.6647, "step": 4912 }, { "epoch": 0.4160914672877408, "grad_norm": 1.5429329378547672, "learning_rate": 6.574797209039012e-06, "loss": 0.6355, "step": 4913 }, { "epoch": 0.41617615922083423, "grad_norm": 1.3442408761929718, "learning_rate": 6.573495324045158e-06, "loss": 0.6139, "step": 4914 }, { "epoch": 0.4162608511539276, "grad_norm": 1.3708969476599069, "learning_rate": 6.57219332063745e-06, "loss": 0.628, "step": 4915 }, { "epoch": 0.416345543087021, "grad_norm": 5.7569056265685035, "learning_rate": 6.5708911989138695e-06, "loss": 0.6712, "step": 4916 }, { "epoch": 0.41643023502011434, "grad_norm": 5.8966726665783185, "learning_rate": 6.569588958972408e-06, "loss": 0.6288, "step": 4917 }, { "epoch": 0.4165149269532077, "grad_norm": 1.4667142225077094, "learning_rate": 6.568286600911064e-06, "loss": 0.6181, "step": 4918 }, { "epoch": 0.4165996188863011, "grad_norm": 1.1379952844977483, "learning_rate": 6.566984124827851e-06, "loss": 0.6528, "step": 4919 }, { "epoch": 0.41668431081939444, "grad_norm": 1.183538633661469, "learning_rate": 6.5656815308207845e-06, "loss": 0.6782, "step": 4920 }, { "epoch": 0.41676900275248785, "grad_norm": 1.5028612328676472, "learning_rate": 6.564378818987893e-06, "loss": 0.7076, "step": 4921 }, { "epoch": 0.4168536946855812, "grad_norm": 1.9202781921510075, "learning_rate": 6.56307598942721e-06, "loss": 0.6471, "step": 4922 }, { "epoch": 0.41693838661867455, "grad_norm": 1.5294458230270995, "learning_rate": 6.561773042236782e-06, "loss": 0.6441, "step": 4923 }, { "epoch": 0.41702307855176796, "grad_norm": 1.1702097804963276, "learning_rate": 6.560469977514664e-06, "loss": 0.5907, "step": 4924 }, { "epoch": 0.4171077704848613, "grad_norm": 1.453123958620761, "learning_rate": 6.559166795358916e-06, "loss": 0.6824, "step": 4925 }, { "epoch": 0.4171924624179547, "grad_norm": 1.178193343300475, "learning_rate": 6.557863495867611e-06, "loss": 0.669, "step": 4926 }, { "epoch": 0.41727715435104806, "grad_norm": 1.5830580404012513, "learning_rate": 6.5565600791388285e-06, "loss": 0.6352, "step": 4927 }, { "epoch": 0.4173618462841414, "grad_norm": 1.7326531477809863, "learning_rate": 6.555256545270658e-06, "loss": 0.5929, "step": 4928 }, { "epoch": 0.4174465382172348, "grad_norm": 1.567850044825194, "learning_rate": 6.553952894361196e-06, "loss": 0.623, "step": 4929 }, { "epoch": 0.41753123015032817, "grad_norm": 1.2929861827216984, "learning_rate": 6.55264912650855e-06, "loss": 0.686, "step": 4930 }, { "epoch": 0.4176159220834216, "grad_norm": 1.3520028169139704, "learning_rate": 6.551345241810837e-06, "loss": 0.6345, "step": 4931 }, { "epoch": 0.4177006140165149, "grad_norm": 1.1980861236510474, "learning_rate": 6.55004124036618e-06, "loss": 0.6474, "step": 4932 }, { "epoch": 0.4177853059496083, "grad_norm": 1.9939184566264303, "learning_rate": 6.54873712227271e-06, "loss": 0.6563, "step": 4933 }, { "epoch": 0.4178699978827017, "grad_norm": 1.352151214209186, "learning_rate": 6.547432887628571e-06, "loss": 0.6348, "step": 4934 }, { "epoch": 0.41795468981579503, "grad_norm": 1.8121733952717054, "learning_rate": 6.546128536531915e-06, "loss": 0.6152, "step": 4935 }, { "epoch": 0.41803938174888844, "grad_norm": 1.7112385439628126, "learning_rate": 6.544824069080899e-06, "loss": 0.5984, "step": 4936 }, { "epoch": 0.4181240736819818, "grad_norm": 1.6075372673244002, "learning_rate": 6.543519485373689e-06, "loss": 0.6137, "step": 4937 }, { "epoch": 0.41820876561507514, "grad_norm": 1.2359969857914845, "learning_rate": 6.542214785508466e-06, "loss": 0.5986, "step": 4938 }, { "epoch": 0.41829345754816855, "grad_norm": 1.5866328154540805, "learning_rate": 6.540909969583415e-06, "loss": 0.5597, "step": 4939 }, { "epoch": 0.4183781494812619, "grad_norm": 1.618982559849974, "learning_rate": 6.539605037696728e-06, "loss": 0.6827, "step": 4940 }, { "epoch": 0.4184628414143553, "grad_norm": 1.7830995209734584, "learning_rate": 6.5382999899466106e-06, "loss": 0.6456, "step": 4941 }, { "epoch": 0.41854753334744865, "grad_norm": 1.2968930744552014, "learning_rate": 6.536994826431271e-06, "loss": 0.5918, "step": 4942 }, { "epoch": 0.418632225280542, "grad_norm": 4.443635493785009, "learning_rate": 6.535689547248933e-06, "loss": 0.5905, "step": 4943 }, { "epoch": 0.4187169172136354, "grad_norm": 1.2500020212186458, "learning_rate": 6.534384152497826e-06, "loss": 0.6872, "step": 4944 }, { "epoch": 0.41880160914672876, "grad_norm": 1.8725725742936306, "learning_rate": 6.533078642276186e-06, "loss": 0.6602, "step": 4945 }, { "epoch": 0.41888630107982217, "grad_norm": 0.6872299525059966, "learning_rate": 6.53177301668226e-06, "loss": 0.8225, "step": 4946 }, { "epoch": 0.4189709930129155, "grad_norm": 1.8147945744507872, "learning_rate": 6.5304672758143014e-06, "loss": 0.6266, "step": 4947 }, { "epoch": 0.41905568494600887, "grad_norm": 1.2972868862377975, "learning_rate": 6.529161419770579e-06, "loss": 0.6915, "step": 4948 }, { "epoch": 0.41914037687910227, "grad_norm": 1.2722075368119268, "learning_rate": 6.527855448649362e-06, "loss": 0.6437, "step": 4949 }, { "epoch": 0.4192250688121956, "grad_norm": 1.4438083244898563, "learning_rate": 6.526549362548931e-06, "loss": 0.6379, "step": 4950 }, { "epoch": 0.41930976074528903, "grad_norm": 1.5391554068577091, "learning_rate": 6.525243161567576e-06, "loss": 0.6821, "step": 4951 }, { "epoch": 0.4193944526783824, "grad_norm": 1.2835560871708902, "learning_rate": 6.523936845803598e-06, "loss": 0.6935, "step": 4952 }, { "epoch": 0.41947914461147573, "grad_norm": 1.2758700729673207, "learning_rate": 6.522630415355304e-06, "loss": 0.6277, "step": 4953 }, { "epoch": 0.41956383654456914, "grad_norm": 2.4487500177712698, "learning_rate": 6.521323870321006e-06, "loss": 0.6385, "step": 4954 }, { "epoch": 0.4196485284776625, "grad_norm": 1.4556307543750426, "learning_rate": 6.520017210799032e-06, "loss": 0.6249, "step": 4955 }, { "epoch": 0.4197332204107559, "grad_norm": 1.1572676173567598, "learning_rate": 6.518710436887714e-06, "loss": 0.6046, "step": 4956 }, { "epoch": 0.41981791234384924, "grad_norm": 1.9175459845872702, "learning_rate": 6.517403548685394e-06, "loss": 0.655, "step": 4957 }, { "epoch": 0.41990260427694265, "grad_norm": 1.3937173560293599, "learning_rate": 6.516096546290422e-06, "loss": 0.6358, "step": 4958 }, { "epoch": 0.419987296210036, "grad_norm": 2.1275571624072964, "learning_rate": 6.514789429801156e-06, "loss": 0.6625, "step": 4959 }, { "epoch": 0.42007198814312935, "grad_norm": 0.7039543919626264, "learning_rate": 6.513482199315966e-06, "loss": 0.8632, "step": 4960 }, { "epoch": 0.42015668007622275, "grad_norm": 1.3813440253342177, "learning_rate": 6.512174854933224e-06, "loss": 0.6287, "step": 4961 }, { "epoch": 0.4202413720093161, "grad_norm": 1.3304819942578663, "learning_rate": 6.51086739675132e-06, "loss": 0.6749, "step": 4962 }, { "epoch": 0.4203260639424095, "grad_norm": 1.2735799898728468, "learning_rate": 6.50955982486864e-06, "loss": 0.6393, "step": 4963 }, { "epoch": 0.42041075587550286, "grad_norm": 1.4413130223020714, "learning_rate": 6.508252139383592e-06, "loss": 0.6853, "step": 4964 }, { "epoch": 0.4204954478085962, "grad_norm": 1.391378053243867, "learning_rate": 6.506944340394583e-06, "loss": 0.6123, "step": 4965 }, { "epoch": 0.4205801397416896, "grad_norm": 1.4922801139962885, "learning_rate": 6.505636428000034e-06, "loss": 0.6635, "step": 4966 }, { "epoch": 0.42066483167478297, "grad_norm": 1.3741867445854439, "learning_rate": 6.50432840229837e-06, "loss": 0.6703, "step": 4967 }, { "epoch": 0.4207495236078764, "grad_norm": 1.770929563692289, "learning_rate": 6.503020263388027e-06, "loss": 0.6694, "step": 4968 }, { "epoch": 0.4208342155409697, "grad_norm": 1.1961823759080983, "learning_rate": 6.501712011367452e-06, "loss": 0.6613, "step": 4969 }, { "epoch": 0.4209189074740631, "grad_norm": 1.5851566084988373, "learning_rate": 6.500403646335096e-06, "loss": 0.6312, "step": 4970 }, { "epoch": 0.4210035994071565, "grad_norm": 1.242376713139198, "learning_rate": 6.49909516838942e-06, "loss": 0.6415, "step": 4971 }, { "epoch": 0.42108829134024983, "grad_norm": 1.3877148071558716, "learning_rate": 6.497786577628894e-06, "loss": 0.6625, "step": 4972 }, { "epoch": 0.42117298327334324, "grad_norm": 0.6469091734015217, "learning_rate": 6.496477874151999e-06, "loss": 0.8669, "step": 4973 }, { "epoch": 0.4212576752064366, "grad_norm": 1.257529187555121, "learning_rate": 6.495169058057218e-06, "loss": 0.6771, "step": 4974 }, { "epoch": 0.42134236713952994, "grad_norm": 1.3654190190462476, "learning_rate": 6.493860129443047e-06, "loss": 0.6104, "step": 4975 }, { "epoch": 0.42142705907262334, "grad_norm": 2.349492167984208, "learning_rate": 6.492551088407994e-06, "loss": 0.6315, "step": 4976 }, { "epoch": 0.4215117510057167, "grad_norm": 1.3265012541271872, "learning_rate": 6.491241935050566e-06, "loss": 0.6346, "step": 4977 }, { "epoch": 0.4215964429388101, "grad_norm": 1.6880007963212122, "learning_rate": 6.489932669469285e-06, "loss": 0.6431, "step": 4978 }, { "epoch": 0.42168113487190345, "grad_norm": 0.6558131507260971, "learning_rate": 6.488623291762684e-06, "loss": 0.809, "step": 4979 }, { "epoch": 0.4217658268049968, "grad_norm": 1.3583249182053496, "learning_rate": 6.487313802029296e-06, "loss": 0.6466, "step": 4980 }, { "epoch": 0.4218505187380902, "grad_norm": 1.3381598318402994, "learning_rate": 6.486004200367669e-06, "loss": 0.6705, "step": 4981 }, { "epoch": 0.42193521067118356, "grad_norm": 1.385118092846284, "learning_rate": 6.484694486876356e-06, "loss": 0.6616, "step": 4982 }, { "epoch": 0.42201990260427696, "grad_norm": 1.7052162806353173, "learning_rate": 6.483384661653923e-06, "loss": 0.6282, "step": 4983 }, { "epoch": 0.4221045945373703, "grad_norm": 0.6808976734738236, "learning_rate": 6.482074724798938e-06, "loss": 0.8844, "step": 4984 }, { "epoch": 0.42218928647046366, "grad_norm": 1.5813321900765003, "learning_rate": 6.480764676409982e-06, "loss": 0.662, "step": 4985 }, { "epoch": 0.42227397840355707, "grad_norm": 1.4157325677167627, "learning_rate": 6.479454516585644e-06, "loss": 0.6464, "step": 4986 }, { "epoch": 0.4223586703366504, "grad_norm": 1.3896481401678733, "learning_rate": 6.4781442454245195e-06, "loss": 0.6331, "step": 4987 }, { "epoch": 0.4224433622697438, "grad_norm": 1.249736713433979, "learning_rate": 6.476833863025211e-06, "loss": 0.6545, "step": 4988 }, { "epoch": 0.4225280542028372, "grad_norm": 1.4524488030028708, "learning_rate": 6.475523369486336e-06, "loss": 0.5905, "step": 4989 }, { "epoch": 0.42261274613593053, "grad_norm": 2.2513744934725386, "learning_rate": 6.474212764906516e-06, "loss": 0.6336, "step": 4990 }, { "epoch": 0.42269743806902393, "grad_norm": 1.5720732078220192, "learning_rate": 6.472902049384377e-06, "loss": 0.6542, "step": 4991 }, { "epoch": 0.4227821300021173, "grad_norm": 1.3570316112278622, "learning_rate": 6.4715912230185585e-06, "loss": 0.6283, "step": 4992 }, { "epoch": 0.4228668219352107, "grad_norm": 1.416789621521588, "learning_rate": 6.4702802859077085e-06, "loss": 0.6096, "step": 4993 }, { "epoch": 0.42295151386830404, "grad_norm": 1.766318899162491, "learning_rate": 6.468969238150483e-06, "loss": 0.6256, "step": 4994 }, { "epoch": 0.4230362058013974, "grad_norm": 1.268654056372071, "learning_rate": 6.467658079845542e-06, "loss": 0.5762, "step": 4995 }, { "epoch": 0.4231208977344908, "grad_norm": 1.356054829125625, "learning_rate": 6.466346811091559e-06, "loss": 0.6764, "step": 4996 }, { "epoch": 0.42320558966758415, "grad_norm": 1.2685573796689438, "learning_rate": 6.465035431987216e-06, "loss": 0.631, "step": 4997 }, { "epoch": 0.42329028160067755, "grad_norm": 1.2685600035956972, "learning_rate": 6.463723942631198e-06, "loss": 0.6068, "step": 4998 }, { "epoch": 0.4233749735337709, "grad_norm": 0.6964520898304587, "learning_rate": 6.462412343122202e-06, "loss": 0.8713, "step": 4999 }, { "epoch": 0.42345966546686425, "grad_norm": 1.3133147584898275, "learning_rate": 6.461100633558932e-06, "loss": 0.6377, "step": 5000 }, { "epoch": 0.42354435739995766, "grad_norm": 0.6576444140380604, "learning_rate": 6.459788814040105e-06, "loss": 0.8894, "step": 5001 }, { "epoch": 0.423629049333051, "grad_norm": 1.2666566667719827, "learning_rate": 6.458476884664439e-06, "loss": 0.6198, "step": 5002 }, { "epoch": 0.4237137412661444, "grad_norm": 1.572314841969136, "learning_rate": 6.457164845530664e-06, "loss": 0.6647, "step": 5003 }, { "epoch": 0.42379843319923777, "grad_norm": 1.2396135903151377, "learning_rate": 6.455852696737518e-06, "loss": 0.615, "step": 5004 }, { "epoch": 0.4238831251323311, "grad_norm": 1.8686387476077273, "learning_rate": 6.454540438383748e-06, "loss": 0.6366, "step": 5005 }, { "epoch": 0.4239678170654245, "grad_norm": 1.2713431843637508, "learning_rate": 6.453228070568107e-06, "loss": 0.6782, "step": 5006 }, { "epoch": 0.4240525089985179, "grad_norm": 1.2210977798746332, "learning_rate": 6.451915593389361e-06, "loss": 0.6615, "step": 5007 }, { "epoch": 0.4241372009316113, "grad_norm": 1.621626836260009, "learning_rate": 6.450603006946275e-06, "loss": 0.6499, "step": 5008 }, { "epoch": 0.42422189286470463, "grad_norm": 1.166035329765896, "learning_rate": 6.449290311337634e-06, "loss": 0.5916, "step": 5009 }, { "epoch": 0.42430658479779804, "grad_norm": 3.58903484472484, "learning_rate": 6.447977506662221e-06, "loss": 0.638, "step": 5010 }, { "epoch": 0.4243912767308914, "grad_norm": 1.467159924274421, "learning_rate": 6.446664593018834e-06, "loss": 0.5981, "step": 5011 }, { "epoch": 0.42447596866398474, "grad_norm": 1.3335962257555807, "learning_rate": 6.445351570506277e-06, "loss": 0.756, "step": 5012 }, { "epoch": 0.42456066059707814, "grad_norm": 1.3655418366266552, "learning_rate": 6.444038439223358e-06, "loss": 0.5781, "step": 5013 }, { "epoch": 0.4246453525301715, "grad_norm": 1.4065000325436934, "learning_rate": 6.442725199268902e-06, "loss": 0.6133, "step": 5014 }, { "epoch": 0.4247300444632649, "grad_norm": 2.419581150517727, "learning_rate": 6.441411850741735e-06, "loss": 0.6282, "step": 5015 }, { "epoch": 0.42481473639635825, "grad_norm": 1.6916368244020532, "learning_rate": 6.440098393740694e-06, "loss": 0.6324, "step": 5016 }, { "epoch": 0.4248994283294516, "grad_norm": 1.9584103670352053, "learning_rate": 6.438784828364621e-06, "loss": 0.6632, "step": 5017 }, { "epoch": 0.424984120262545, "grad_norm": 1.5377640636547771, "learning_rate": 6.437471154712373e-06, "loss": 0.6653, "step": 5018 }, { "epoch": 0.42506881219563836, "grad_norm": 1.2939300056330383, "learning_rate": 6.436157372882809e-06, "loss": 0.6339, "step": 5019 }, { "epoch": 0.42515350412873176, "grad_norm": 1.5989990386492205, "learning_rate": 6.4348434829747975e-06, "loss": 0.6058, "step": 5020 }, { "epoch": 0.4252381960618251, "grad_norm": 1.2704825993476399, "learning_rate": 6.433529485087214e-06, "loss": 0.6962, "step": 5021 }, { "epoch": 0.42532288799491846, "grad_norm": 0.6383884742209671, "learning_rate": 6.432215379318949e-06, "loss": 0.8716, "step": 5022 }, { "epoch": 0.42540757992801187, "grad_norm": 1.4281572481098475, "learning_rate": 6.43090116576889e-06, "loss": 0.6467, "step": 5023 }, { "epoch": 0.4254922718611052, "grad_norm": 0.6610597420597935, "learning_rate": 6.4295868445359435e-06, "loss": 0.8677, "step": 5024 }, { "epoch": 0.4255769637941986, "grad_norm": 0.6218042709713073, "learning_rate": 6.428272415719016e-06, "loss": 0.8928, "step": 5025 }, { "epoch": 0.425661655727292, "grad_norm": 1.2808178322317796, "learning_rate": 6.426957879417026e-06, "loss": 0.6861, "step": 5026 }, { "epoch": 0.4257463476603853, "grad_norm": 1.5671276943444745, "learning_rate": 6.4256432357289e-06, "loss": 0.6536, "step": 5027 }, { "epoch": 0.42583103959347873, "grad_norm": 3.2303193000721415, "learning_rate": 6.424328484753571e-06, "loss": 0.6308, "step": 5028 }, { "epoch": 0.4259157315265721, "grad_norm": 1.4467677889359785, "learning_rate": 6.4230136265899816e-06, "loss": 0.6216, "step": 5029 }, { "epoch": 0.4260004234596655, "grad_norm": 1.433445683512562, "learning_rate": 6.421698661337081e-06, "loss": 0.6355, "step": 5030 }, { "epoch": 0.42608511539275884, "grad_norm": 1.358642947389916, "learning_rate": 6.4203835890938284e-06, "loss": 0.6764, "step": 5031 }, { "epoch": 0.4261698073258522, "grad_norm": 1.3089910673554104, "learning_rate": 6.41906840995919e-06, "loss": 0.6402, "step": 5032 }, { "epoch": 0.4262544992589456, "grad_norm": 1.8325044173682878, "learning_rate": 6.417753124032138e-06, "loss": 0.6812, "step": 5033 }, { "epoch": 0.42633919119203895, "grad_norm": 1.4813939228178425, "learning_rate": 6.416437731411655e-06, "loss": 0.6509, "step": 5034 }, { "epoch": 0.42642388312513235, "grad_norm": 1.379220607809681, "learning_rate": 6.415122232196735e-06, "loss": 0.6079, "step": 5035 }, { "epoch": 0.4265085750582257, "grad_norm": 1.2324818827704576, "learning_rate": 6.413806626486374e-06, "loss": 0.6737, "step": 5036 }, { "epoch": 0.42659326699131905, "grad_norm": 0.6269669510826091, "learning_rate": 6.4124909143795765e-06, "loss": 0.8612, "step": 5037 }, { "epoch": 0.42667795892441246, "grad_norm": 1.6020678994817241, "learning_rate": 6.411175095975357e-06, "loss": 0.6588, "step": 5038 }, { "epoch": 0.4267626508575058, "grad_norm": 0.666717058592715, "learning_rate": 6.409859171372741e-06, "loss": 0.8468, "step": 5039 }, { "epoch": 0.4268473427905992, "grad_norm": 1.1219472832598347, "learning_rate": 6.408543140670757e-06, "loss": 0.6138, "step": 5040 }, { "epoch": 0.42693203472369257, "grad_norm": 4.02678121617003, "learning_rate": 6.40722700396844e-06, "loss": 0.6438, "step": 5041 }, { "epoch": 0.4270167266567859, "grad_norm": 1.3413582503619488, "learning_rate": 6.405910761364842e-06, "loss": 0.6586, "step": 5042 }, { "epoch": 0.4271014185898793, "grad_norm": 1.354992553462661, "learning_rate": 6.404594412959015e-06, "loss": 0.7144, "step": 5043 }, { "epoch": 0.4271861105229727, "grad_norm": 0.6947754724621013, "learning_rate": 6.40327795885002e-06, "loss": 0.8745, "step": 5044 }, { "epoch": 0.4272708024560661, "grad_norm": 0.6035473448083692, "learning_rate": 6.401961399136926e-06, "loss": 0.8695, "step": 5045 }, { "epoch": 0.42735549438915943, "grad_norm": 1.607479860247708, "learning_rate": 6.400644733918816e-06, "loss": 0.5741, "step": 5046 }, { "epoch": 0.4274401863222528, "grad_norm": 1.7526438254310646, "learning_rate": 6.399327963294772e-06, "loss": 0.628, "step": 5047 }, { "epoch": 0.4275248782553462, "grad_norm": 1.6152300482069128, "learning_rate": 6.3980110873638875e-06, "loss": 0.6815, "step": 5048 }, { "epoch": 0.42760957018843954, "grad_norm": 1.175326891642268, "learning_rate": 6.396694106225269e-06, "loss": 0.6193, "step": 5049 }, { "epoch": 0.42769426212153294, "grad_norm": 1.239030075369852, "learning_rate": 6.395377019978019e-06, "loss": 0.6907, "step": 5050 }, { "epoch": 0.4277789540546263, "grad_norm": 1.6940628735880334, "learning_rate": 6.394059828721262e-06, "loss": 0.7088, "step": 5051 }, { "epoch": 0.42786364598771964, "grad_norm": 1.576137319576229, "learning_rate": 6.392742532554122e-06, "loss": 0.6235, "step": 5052 }, { "epoch": 0.42794833792081305, "grad_norm": 0.6098422389918635, "learning_rate": 6.39142513157573e-06, "loss": 0.8838, "step": 5053 }, { "epoch": 0.4280330298539064, "grad_norm": 1.3762651306442637, "learning_rate": 6.390107625885228e-06, "loss": 0.6532, "step": 5054 }, { "epoch": 0.4281177217869998, "grad_norm": 1.2571914565607432, "learning_rate": 6.388790015581767e-06, "loss": 0.6237, "step": 5055 }, { "epoch": 0.42820241372009316, "grad_norm": 1.612776220678184, "learning_rate": 6.387472300764503e-06, "loss": 0.6402, "step": 5056 }, { "epoch": 0.42828710565318656, "grad_norm": 0.6197904077077057, "learning_rate": 6.386154481532603e-06, "loss": 0.8669, "step": 5057 }, { "epoch": 0.4283717975862799, "grad_norm": 2.400216706459036, "learning_rate": 6.384836557985236e-06, "loss": 0.6424, "step": 5058 }, { "epoch": 0.42845648951937326, "grad_norm": 2.0728511186966734, "learning_rate": 6.383518530221586e-06, "loss": 0.6434, "step": 5059 }, { "epoch": 0.42854118145246667, "grad_norm": 1.6811297180000517, "learning_rate": 6.382200398340841e-06, "loss": 0.647, "step": 5060 }, { "epoch": 0.42862587338556, "grad_norm": 0.6004958942391644, "learning_rate": 6.380882162442196e-06, "loss": 0.8549, "step": 5061 }, { "epoch": 0.4287105653186534, "grad_norm": 2.612579326807097, "learning_rate": 6.3795638226248555e-06, "loss": 0.6746, "step": 5062 }, { "epoch": 0.4287952572517468, "grad_norm": 1.222120229402693, "learning_rate": 6.378245378988035e-06, "loss": 0.5981, "step": 5063 }, { "epoch": 0.4288799491848401, "grad_norm": 1.2477599805171802, "learning_rate": 6.376926831630949e-06, "loss": 0.6069, "step": 5064 }, { "epoch": 0.42896464111793353, "grad_norm": 1.4103548158078971, "learning_rate": 6.3756081806528295e-06, "loss": 0.6396, "step": 5065 }, { "epoch": 0.4290493330510269, "grad_norm": 1.7351252383851097, "learning_rate": 6.374289426152909e-06, "loss": 0.6046, "step": 5066 }, { "epoch": 0.4291340249841203, "grad_norm": 1.268062409840633, "learning_rate": 6.3729705682304325e-06, "loss": 0.6591, "step": 5067 }, { "epoch": 0.42921871691721364, "grad_norm": 3.3188908015681915, "learning_rate": 6.37165160698465e-06, "loss": 0.6004, "step": 5068 }, { "epoch": 0.429303408850307, "grad_norm": 2.1571493589612483, "learning_rate": 6.370332542514821e-06, "loss": 0.5967, "step": 5069 }, { "epoch": 0.4293881007834004, "grad_norm": 1.4704469335786792, "learning_rate": 6.369013374920212e-06, "loss": 0.6986, "step": 5070 }, { "epoch": 0.42947279271649375, "grad_norm": 1.2840554166755114, "learning_rate": 6.367694104300098e-06, "loss": 0.6668, "step": 5071 }, { "epoch": 0.42955748464958715, "grad_norm": 1.526281030253436, "learning_rate": 6.3663747307537595e-06, "loss": 0.5902, "step": 5072 }, { "epoch": 0.4296421765826805, "grad_norm": 1.5401495803241918, "learning_rate": 6.365055254380488e-06, "loss": 0.6422, "step": 5073 }, { "epoch": 0.42972686851577385, "grad_norm": 1.5149895309395571, "learning_rate": 6.363735675279579e-06, "loss": 0.6045, "step": 5074 }, { "epoch": 0.42981156044886726, "grad_norm": 1.0832977517506328, "learning_rate": 6.36241599355034e-06, "loss": 0.6179, "step": 5075 }, { "epoch": 0.4298962523819606, "grad_norm": 1.2419802675294385, "learning_rate": 6.3610962092920825e-06, "loss": 0.6673, "step": 5076 }, { "epoch": 0.429980944315054, "grad_norm": 1.296992529821863, "learning_rate": 6.35977632260413e-06, "loss": 0.6338, "step": 5077 }, { "epoch": 0.43006563624814736, "grad_norm": 2.08675309606878, "learning_rate": 6.358456333585806e-06, "loss": 0.6054, "step": 5078 }, { "epoch": 0.4301503281812407, "grad_norm": 2.1949389557198553, "learning_rate": 6.3571362423364504e-06, "loss": 0.5849, "step": 5079 }, { "epoch": 0.4302350201143341, "grad_norm": 1.549790500412256, "learning_rate": 6.3558160489554065e-06, "loss": 0.6078, "step": 5080 }, { "epoch": 0.43031971204742747, "grad_norm": 1.6072157840204588, "learning_rate": 6.3544957535420274e-06, "loss": 0.6278, "step": 5081 }, { "epoch": 0.4304044039805209, "grad_norm": 1.39349893401903, "learning_rate": 6.3531753561956675e-06, "loss": 0.6145, "step": 5082 }, { "epoch": 0.43048909591361423, "grad_norm": 1.3207714392376237, "learning_rate": 6.3518548570156965e-06, "loss": 0.6253, "step": 5083 }, { "epoch": 0.4305737878467076, "grad_norm": 0.6506917540376359, "learning_rate": 6.350534256101492e-06, "loss": 0.9144, "step": 5084 }, { "epoch": 0.430658479779801, "grad_norm": 1.5233945068496264, "learning_rate": 6.349213553552431e-06, "loss": 0.711, "step": 5085 }, { "epoch": 0.43074317171289433, "grad_norm": 1.2800197111605645, "learning_rate": 6.347892749467907e-06, "loss": 0.7171, "step": 5086 }, { "epoch": 0.43082786364598774, "grad_norm": 1.393958747640379, "learning_rate": 6.346571843947315e-06, "loss": 0.6344, "step": 5087 }, { "epoch": 0.4309125555790811, "grad_norm": 1.245371751596998, "learning_rate": 6.345250837090062e-06, "loss": 0.6603, "step": 5088 }, { "epoch": 0.43099724751217444, "grad_norm": 1.4814675370272155, "learning_rate": 6.343929728995559e-06, "loss": 0.6503, "step": 5089 }, { "epoch": 0.43108193944526785, "grad_norm": 1.4231452305479384, "learning_rate": 6.342608519763229e-06, "loss": 0.6506, "step": 5090 }, { "epoch": 0.4311666313783612, "grad_norm": 0.6718379337902207, "learning_rate": 6.341287209492498e-06, "loss": 0.8706, "step": 5091 }, { "epoch": 0.4312513233114546, "grad_norm": 1.4967436213363114, "learning_rate": 6.339965798282802e-06, "loss": 0.6097, "step": 5092 }, { "epoch": 0.43133601524454795, "grad_norm": 1.2045897703284068, "learning_rate": 6.338644286233584e-06, "loss": 0.6501, "step": 5093 }, { "epoch": 0.4314207071776413, "grad_norm": 0.6120071099292704, "learning_rate": 6.337322673444295e-06, "loss": 0.8981, "step": 5094 }, { "epoch": 0.4315053991107347, "grad_norm": 1.7194348842328988, "learning_rate": 6.336000960014394e-06, "loss": 0.6661, "step": 5095 }, { "epoch": 0.43159009104382806, "grad_norm": 1.5986741359353138, "learning_rate": 6.334679146043345e-06, "loss": 0.6792, "step": 5096 }, { "epoch": 0.43167478297692147, "grad_norm": 1.1656094457320865, "learning_rate": 6.333357231630623e-06, "loss": 0.5807, "step": 5097 }, { "epoch": 0.4317594749100148, "grad_norm": 2.2982999413040064, "learning_rate": 6.332035216875711e-06, "loss": 0.6222, "step": 5098 }, { "epoch": 0.43184416684310817, "grad_norm": 0.6363632821230526, "learning_rate": 6.330713101878093e-06, "loss": 0.8646, "step": 5099 }, { "epoch": 0.4319288587762016, "grad_norm": 1.4861255550809707, "learning_rate": 6.329390886737268e-06, "loss": 0.6782, "step": 5100 }, { "epoch": 0.4320135507092949, "grad_norm": 1.3818190347514738, "learning_rate": 6.3280685715527416e-06, "loss": 0.6209, "step": 5101 }, { "epoch": 0.43209824264238833, "grad_norm": 1.1362477955370003, "learning_rate": 6.3267461564240205e-06, "loss": 0.6288, "step": 5102 }, { "epoch": 0.4321829345754817, "grad_norm": 1.1147935031652874, "learning_rate": 6.325423641450625e-06, "loss": 0.6358, "step": 5103 }, { "epoch": 0.43226762650857503, "grad_norm": 3.6185732625179345, "learning_rate": 6.324101026732083e-06, "loss": 0.6376, "step": 5104 }, { "epoch": 0.43235231844166844, "grad_norm": 1.331462511764263, "learning_rate": 6.322778312367927e-06, "loss": 0.6423, "step": 5105 }, { "epoch": 0.4324370103747618, "grad_norm": 1.365576535680459, "learning_rate": 6.321455498457701e-06, "loss": 0.6017, "step": 5106 }, { "epoch": 0.4325217023078552, "grad_norm": 1.837515947756171, "learning_rate": 6.3201325851009475e-06, "loss": 0.6298, "step": 5107 }, { "epoch": 0.43260639424094854, "grad_norm": 1.3734828570519975, "learning_rate": 6.318809572397229e-06, "loss": 0.6712, "step": 5108 }, { "epoch": 0.43269108617404195, "grad_norm": 1.2911968537733756, "learning_rate": 6.317486460446107e-06, "loss": 0.67, "step": 5109 }, { "epoch": 0.4327757781071353, "grad_norm": 1.8289999970768338, "learning_rate": 6.31616324934715e-06, "loss": 0.6917, "step": 5110 }, { "epoch": 0.43286047004022865, "grad_norm": 1.9849120495524692, "learning_rate": 6.314839939199941e-06, "loss": 0.6305, "step": 5111 }, { "epoch": 0.43294516197332206, "grad_norm": 1.6615722195082503, "learning_rate": 6.313516530104065e-06, "loss": 0.6771, "step": 5112 }, { "epoch": 0.4330298539064154, "grad_norm": 1.158764111852159, "learning_rate": 6.312193022159113e-06, "loss": 0.6618, "step": 5113 }, { "epoch": 0.4331145458395088, "grad_norm": 1.3467109905317478, "learning_rate": 6.310869415464688e-06, "loss": 0.6368, "step": 5114 }, { "epoch": 0.43319923777260216, "grad_norm": 1.1830401853367927, "learning_rate": 6.309545710120398e-06, "loss": 0.6067, "step": 5115 }, { "epoch": 0.4332839297056955, "grad_norm": 1.1964218550446712, "learning_rate": 6.30822190622586e-06, "loss": 0.6764, "step": 5116 }, { "epoch": 0.4333686216387889, "grad_norm": 1.7207380432420378, "learning_rate": 6.306898003880693e-06, "loss": 0.6258, "step": 5117 }, { "epoch": 0.43345331357188227, "grad_norm": 3.290346532291398, "learning_rate": 6.305574003184535e-06, "loss": 0.6237, "step": 5118 }, { "epoch": 0.4335380055049757, "grad_norm": 2.2912881312585847, "learning_rate": 6.304249904237019e-06, "loss": 0.5986, "step": 5119 }, { "epoch": 0.433622697438069, "grad_norm": 1.436986545997408, "learning_rate": 6.302925707137791e-06, "loss": 0.6337, "step": 5120 }, { "epoch": 0.4337073893711624, "grad_norm": 1.320481181545309, "learning_rate": 6.301601411986502e-06, "loss": 0.6097, "step": 5121 }, { "epoch": 0.4337920813042558, "grad_norm": 1.6366888191259221, "learning_rate": 6.300277018882817e-06, "loss": 0.6643, "step": 5122 }, { "epoch": 0.43387677323734913, "grad_norm": 0.6854167763459792, "learning_rate": 6.2989525279264e-06, "loss": 0.8734, "step": 5123 }, { "epoch": 0.43396146517044254, "grad_norm": 1.7750045214490897, "learning_rate": 6.297627939216927e-06, "loss": 0.6189, "step": 5124 }, { "epoch": 0.4340461571035359, "grad_norm": 1.2238129947663208, "learning_rate": 6.29630325285408e-06, "loss": 0.6032, "step": 5125 }, { "epoch": 0.43413084903662924, "grad_norm": 2.1994498677853462, "learning_rate": 6.29497846893755e-06, "loss": 0.6571, "step": 5126 }, { "epoch": 0.43421554096972265, "grad_norm": 1.2448072331208917, "learning_rate": 6.2936535875670325e-06, "loss": 0.6243, "step": 5127 }, { "epoch": 0.434300232902816, "grad_norm": 2.6198376728291994, "learning_rate": 6.292328608842231e-06, "loss": 0.6095, "step": 5128 }, { "epoch": 0.4343849248359094, "grad_norm": 1.527835994410838, "learning_rate": 6.291003532862861e-06, "loss": 0.6491, "step": 5129 }, { "epoch": 0.43446961676900275, "grad_norm": 1.473312803663155, "learning_rate": 6.289678359728638e-06, "loss": 0.6939, "step": 5130 }, { "epoch": 0.4345543087020961, "grad_norm": 1.258735672692102, "learning_rate": 6.288353089539288e-06, "loss": 0.6768, "step": 5131 }, { "epoch": 0.4346390006351895, "grad_norm": 1.2141993953601498, "learning_rate": 6.287027722394548e-06, "loss": 0.6596, "step": 5132 }, { "epoch": 0.43472369256828286, "grad_norm": 1.432582164797586, "learning_rate": 6.285702258394155e-06, "loss": 0.613, "step": 5133 }, { "epoch": 0.43480838450137627, "grad_norm": 0.6086955184947175, "learning_rate": 6.284376697637861e-06, "loss": 0.8878, "step": 5134 }, { "epoch": 0.4348930764344696, "grad_norm": 1.2394377397488594, "learning_rate": 6.283051040225416e-06, "loss": 0.6487, "step": 5135 }, { "epoch": 0.43497776836756297, "grad_norm": 1.4987852874647944, "learning_rate": 6.281725286256588e-06, "loss": 0.6773, "step": 5136 }, { "epoch": 0.4350624603006564, "grad_norm": 2.2736232833543673, "learning_rate": 6.280399435831146e-06, "loss": 0.6007, "step": 5137 }, { "epoch": 0.4351471522337497, "grad_norm": 1.4543790050704626, "learning_rate": 6.279073489048866e-06, "loss": 0.6712, "step": 5138 }, { "epoch": 0.43523184416684313, "grad_norm": 1.7238438971323153, "learning_rate": 6.277747446009532e-06, "loss": 0.6197, "step": 5139 }, { "epoch": 0.4353165360999365, "grad_norm": 0.6467480093914201, "learning_rate": 6.276421306812938e-06, "loss": 0.8893, "step": 5140 }, { "epoch": 0.43540122803302983, "grad_norm": 1.3783204000205116, "learning_rate": 6.275095071558881e-06, "loss": 0.6469, "step": 5141 }, { "epoch": 0.43548591996612324, "grad_norm": 1.3238874807622634, "learning_rate": 6.273768740347167e-06, "loss": 0.6347, "step": 5142 }, { "epoch": 0.4355706118992166, "grad_norm": 1.6697294068413007, "learning_rate": 6.2724423132776124e-06, "loss": 0.7081, "step": 5143 }, { "epoch": 0.43565530383231, "grad_norm": 1.6022319447564226, "learning_rate": 6.271115790450034e-06, "loss": 0.6508, "step": 5144 }, { "epoch": 0.43573999576540334, "grad_norm": 2.0810533617542446, "learning_rate": 6.269789171964263e-06, "loss": 0.6141, "step": 5145 }, { "epoch": 0.4358246876984967, "grad_norm": 1.2464907393897104, "learning_rate": 6.268462457920132e-06, "loss": 0.61, "step": 5146 }, { "epoch": 0.4359093796315901, "grad_norm": 1.1353027114324283, "learning_rate": 6.267135648417484e-06, "loss": 0.6573, "step": 5147 }, { "epoch": 0.43599407156468345, "grad_norm": 1.3068631552495635, "learning_rate": 6.265808743556169e-06, "loss": 0.595, "step": 5148 }, { "epoch": 0.43607876349777686, "grad_norm": 1.4992219323805067, "learning_rate": 6.264481743436043e-06, "loss": 0.6537, "step": 5149 }, { "epoch": 0.4361634554308702, "grad_norm": 1.4852584434102376, "learning_rate": 6.263154648156971e-06, "loss": 0.6547, "step": 5150 }, { "epoch": 0.43624814736396356, "grad_norm": 1.6041312466458555, "learning_rate": 6.261827457818822e-06, "loss": 0.6456, "step": 5151 }, { "epoch": 0.43633283929705696, "grad_norm": 2.1331571971063568, "learning_rate": 6.260500172521476e-06, "loss": 0.6356, "step": 5152 }, { "epoch": 0.4364175312301503, "grad_norm": 1.2150225670825978, "learning_rate": 6.259172792364816e-06, "loss": 0.6184, "step": 5153 }, { "epoch": 0.4365022231632437, "grad_norm": 1.6103878854690212, "learning_rate": 6.2578453174487365e-06, "loss": 0.5938, "step": 5154 }, { "epoch": 0.43658691509633707, "grad_norm": 1.4680333001362917, "learning_rate": 6.256517747873136e-06, "loss": 0.6452, "step": 5155 }, { "epoch": 0.4366716070294304, "grad_norm": 1.7646469462947318, "learning_rate": 6.255190083737922e-06, "loss": 0.8009, "step": 5156 }, { "epoch": 0.4367562989625238, "grad_norm": 1.471117519155474, "learning_rate": 6.253862325143007e-06, "loss": 0.5919, "step": 5157 }, { "epoch": 0.4368409908956172, "grad_norm": 1.1903304683534757, "learning_rate": 6.2525344721883144e-06, "loss": 0.6239, "step": 5158 }, { "epoch": 0.4369256828287106, "grad_norm": 0.5909390839121375, "learning_rate": 6.251206524973768e-06, "loss": 0.886, "step": 5159 }, { "epoch": 0.43701037476180393, "grad_norm": 1.3115356834456637, "learning_rate": 6.2498784835993055e-06, "loss": 0.6414, "step": 5160 }, { "epoch": 0.43709506669489734, "grad_norm": 1.2793646070664864, "learning_rate": 6.248550348164869e-06, "loss": 0.634, "step": 5161 }, { "epoch": 0.4371797586279907, "grad_norm": 0.6104272819995978, "learning_rate": 6.247222118770409e-06, "loss": 0.87, "step": 5162 }, { "epoch": 0.43726445056108404, "grad_norm": 1.7863354347775278, "learning_rate": 6.245893795515877e-06, "loss": 0.6892, "step": 5163 }, { "epoch": 0.43734914249417745, "grad_norm": 1.167957416216495, "learning_rate": 6.244565378501242e-06, "loss": 0.6488, "step": 5164 }, { "epoch": 0.4374338344272708, "grad_norm": 1.145573019472028, "learning_rate": 6.243236867826471e-06, "loss": 0.6959, "step": 5165 }, { "epoch": 0.4375185263603642, "grad_norm": 1.7756419809644395, "learning_rate": 6.241908263591542e-06, "loss": 0.6362, "step": 5166 }, { "epoch": 0.43760321829345755, "grad_norm": 0.5697883755928075, "learning_rate": 6.240579565896441e-06, "loss": 0.8488, "step": 5167 }, { "epoch": 0.4376879102265509, "grad_norm": 1.39656203070613, "learning_rate": 6.239250774841159e-06, "loss": 0.6429, "step": 5168 }, { "epoch": 0.4377726021596443, "grad_norm": 1.1629810966624052, "learning_rate": 6.237921890525694e-06, "loss": 0.6735, "step": 5169 }, { "epoch": 0.43785729409273766, "grad_norm": 1.3047692057840576, "learning_rate": 6.236592913050049e-06, "loss": 0.6423, "step": 5170 }, { "epoch": 0.43794198602583106, "grad_norm": 0.6656044407786331, "learning_rate": 6.235263842514242e-06, "loss": 0.7622, "step": 5171 }, { "epoch": 0.4380266779589244, "grad_norm": 1.475121787854268, "learning_rate": 6.233934679018289e-06, "loss": 0.6321, "step": 5172 }, { "epoch": 0.43811136989201777, "grad_norm": 1.3646409163524382, "learning_rate": 6.232605422662216e-06, "loss": 0.6669, "step": 5173 }, { "epoch": 0.43819606182511117, "grad_norm": 1.5339447890202396, "learning_rate": 6.23127607354606e-06, "loss": 0.6401, "step": 5174 }, { "epoch": 0.4382807537582045, "grad_norm": 1.318700519272947, "learning_rate": 6.229946631769859e-06, "loss": 0.639, "step": 5175 }, { "epoch": 0.43836544569129793, "grad_norm": 1.7235740416939906, "learning_rate": 6.22861709743366e-06, "loss": 0.6906, "step": 5176 }, { "epoch": 0.4384501376243913, "grad_norm": 1.3809413109088102, "learning_rate": 6.2272874706375195e-06, "loss": 0.5921, "step": 5177 }, { "epoch": 0.43853482955748463, "grad_norm": 1.2463620361509806, "learning_rate": 6.225957751481498e-06, "loss": 0.6136, "step": 5178 }, { "epoch": 0.43861952149057803, "grad_norm": 0.5937650187467892, "learning_rate": 6.224627940065665e-06, "loss": 0.8748, "step": 5179 }, { "epoch": 0.4387042134236714, "grad_norm": 1.8932415869688186, "learning_rate": 6.223298036490093e-06, "loss": 0.6425, "step": 5180 }, { "epoch": 0.4387889053567648, "grad_norm": 1.4012945867207218, "learning_rate": 6.221968040854866e-06, "loss": 0.6104, "step": 5181 }, { "epoch": 0.43887359728985814, "grad_norm": 0.6499629653173861, "learning_rate": 6.220637953260075e-06, "loss": 0.8592, "step": 5182 }, { "epoch": 0.4389582892229515, "grad_norm": 1.4507649501837911, "learning_rate": 6.219307773805813e-06, "loss": 0.6526, "step": 5183 }, { "epoch": 0.4390429811560449, "grad_norm": 1.2541713240204109, "learning_rate": 6.2179775025921865e-06, "loss": 0.6589, "step": 5184 }, { "epoch": 0.43912767308913825, "grad_norm": 1.3935112638790426, "learning_rate": 6.216647139719302e-06, "loss": 0.6006, "step": 5185 }, { "epoch": 0.43921236502223165, "grad_norm": 1.3137620311847351, "learning_rate": 6.215316685287277e-06, "loss": 0.6174, "step": 5186 }, { "epoch": 0.439297056955325, "grad_norm": 0.5788240516386353, "learning_rate": 6.213986139396236e-06, "loss": 0.8699, "step": 5187 }, { "epoch": 0.43938174888841836, "grad_norm": 1.2260399566859075, "learning_rate": 6.212655502146312e-06, "loss": 0.6364, "step": 5188 }, { "epoch": 0.43946644082151176, "grad_norm": 1.5840648684671461, "learning_rate": 6.211324773637639e-06, "loss": 0.6547, "step": 5189 }, { "epoch": 0.4395511327546051, "grad_norm": 1.4066641959910287, "learning_rate": 6.209993953970361e-06, "loss": 0.6045, "step": 5190 }, { "epoch": 0.4396358246876985, "grad_norm": 1.3255123280596595, "learning_rate": 6.208663043244632e-06, "loss": 0.6075, "step": 5191 }, { "epoch": 0.43972051662079187, "grad_norm": 1.4454262381470873, "learning_rate": 6.207332041560608e-06, "loss": 0.655, "step": 5192 }, { "epoch": 0.4398052085538852, "grad_norm": 1.4467634121432555, "learning_rate": 6.2060009490184555e-06, "loss": 0.6483, "step": 5193 }, { "epoch": 0.4398899004869786, "grad_norm": 1.606192731132078, "learning_rate": 6.204669765718345e-06, "loss": 0.6339, "step": 5194 }, { "epoch": 0.439974592420072, "grad_norm": 1.3712342666188209, "learning_rate": 6.203338491760456e-06, "loss": 0.6173, "step": 5195 }, { "epoch": 0.4400592843531654, "grad_norm": 1.2548252068138785, "learning_rate": 6.202007127244975e-06, "loss": 0.6507, "step": 5196 }, { "epoch": 0.44014397628625873, "grad_norm": 0.6294177735110503, "learning_rate": 6.20067567227209e-06, "loss": 0.857, "step": 5197 }, { "epoch": 0.4402286682193521, "grad_norm": 3.4796120146112015, "learning_rate": 6.199344126942004e-06, "loss": 0.6396, "step": 5198 }, { "epoch": 0.4403133601524455, "grad_norm": 1.5895614233439315, "learning_rate": 6.198012491354922e-06, "loss": 0.5838, "step": 5199 }, { "epoch": 0.44039805208553884, "grad_norm": 1.34637389710904, "learning_rate": 6.196680765611057e-06, "loss": 0.6631, "step": 5200 }, { "epoch": 0.44048274401863224, "grad_norm": 1.221209343535536, "learning_rate": 6.195348949810626e-06, "loss": 0.6115, "step": 5201 }, { "epoch": 0.4405674359517256, "grad_norm": 1.3384871143909598, "learning_rate": 6.194017044053857e-06, "loss": 0.668, "step": 5202 }, { "epoch": 0.44065212788481894, "grad_norm": 1.3692206867842494, "learning_rate": 6.192685048440984e-06, "loss": 0.6332, "step": 5203 }, { "epoch": 0.44073681981791235, "grad_norm": 1.9761162227107796, "learning_rate": 6.191352963072244e-06, "loss": 0.6824, "step": 5204 }, { "epoch": 0.4408215117510057, "grad_norm": 1.288962548296497, "learning_rate": 6.190020788047887e-06, "loss": 0.6484, "step": 5205 }, { "epoch": 0.4409062036840991, "grad_norm": 0.593025597680646, "learning_rate": 6.1886885234681624e-06, "loss": 0.8638, "step": 5206 }, { "epoch": 0.44099089561719246, "grad_norm": 1.5813155028160626, "learning_rate": 6.187356169433333e-06, "loss": 0.6906, "step": 5207 }, { "epoch": 0.4410755875502858, "grad_norm": 1.6270652982595883, "learning_rate": 6.186023726043664e-06, "loss": 0.6711, "step": 5208 }, { "epoch": 0.4411602794833792, "grad_norm": 1.1859345678949853, "learning_rate": 6.184691193399429e-06, "loss": 0.5815, "step": 5209 }, { "epoch": 0.44124497141647256, "grad_norm": 1.5365782137464004, "learning_rate": 6.1833585716009094e-06, "loss": 0.6147, "step": 5210 }, { "epoch": 0.44132966334956597, "grad_norm": 1.4570610225654412, "learning_rate": 6.18202586074839e-06, "loss": 0.6325, "step": 5211 }, { "epoch": 0.4414143552826593, "grad_norm": 1.189816330158364, "learning_rate": 6.180693060942166e-06, "loss": 0.6415, "step": 5212 }, { "epoch": 0.4414990472157527, "grad_norm": 1.3602866286215354, "learning_rate": 6.1793601722825384e-06, "loss": 0.6677, "step": 5213 }, { "epoch": 0.4415837391488461, "grad_norm": 0.7042830439559669, "learning_rate": 6.1780271948698115e-06, "loss": 0.8419, "step": 5214 }, { "epoch": 0.4416684310819394, "grad_norm": 1.563248447222578, "learning_rate": 6.176694128804299e-06, "loss": 0.635, "step": 5215 }, { "epoch": 0.44175312301503283, "grad_norm": 1.653444745341789, "learning_rate": 6.175360974186324e-06, "loss": 0.6124, "step": 5216 }, { "epoch": 0.4418378149481262, "grad_norm": 0.6134448578753108, "learning_rate": 6.174027731116213e-06, "loss": 0.8784, "step": 5217 }, { "epoch": 0.4419225068812196, "grad_norm": 1.5392797062651624, "learning_rate": 6.172694399694296e-06, "loss": 0.654, "step": 5218 }, { "epoch": 0.44200719881431294, "grad_norm": 1.8806360594826603, "learning_rate": 6.171360980020918e-06, "loss": 0.6526, "step": 5219 }, { "epoch": 0.4420918907474063, "grad_norm": 1.5306278568259775, "learning_rate": 6.170027472196422e-06, "loss": 0.6457, "step": 5220 }, { "epoch": 0.4421765826804997, "grad_norm": 1.9881689928341024, "learning_rate": 6.168693876321164e-06, "loss": 0.5867, "step": 5221 }, { "epoch": 0.44226127461359305, "grad_norm": 1.4757201884094637, "learning_rate": 6.167360192495502e-06, "loss": 0.5843, "step": 5222 }, { "epoch": 0.44234596654668645, "grad_norm": 0.7098387685810129, "learning_rate": 6.166026420819805e-06, "loss": 0.8864, "step": 5223 }, { "epoch": 0.4424306584797798, "grad_norm": 0.6637511118784488, "learning_rate": 6.1646925613944455e-06, "loss": 0.8654, "step": 5224 }, { "epoch": 0.44251535041287315, "grad_norm": 0.6171860854726438, "learning_rate": 6.163358614319803e-06, "loss": 0.8726, "step": 5225 }, { "epoch": 0.44260004234596656, "grad_norm": 1.2449645446494935, "learning_rate": 6.162024579696265e-06, "loss": 0.6145, "step": 5226 }, { "epoch": 0.4426847342790599, "grad_norm": 1.3420588988889948, "learning_rate": 6.160690457624223e-06, "loss": 0.6042, "step": 5227 }, { "epoch": 0.4427694262121533, "grad_norm": 0.666639373032948, "learning_rate": 6.15935624820408e-06, "loss": 0.9072, "step": 5228 }, { "epoch": 0.44285411814524667, "grad_norm": 3.1486490616376863, "learning_rate": 6.158021951536239e-06, "loss": 0.5711, "step": 5229 }, { "epoch": 0.44293881007834, "grad_norm": 1.4464069170390992, "learning_rate": 6.156687567721113e-06, "loss": 0.6967, "step": 5230 }, { "epoch": 0.4430235020114334, "grad_norm": 1.371241786360809, "learning_rate": 6.155353096859125e-06, "loss": 0.6666, "step": 5231 }, { "epoch": 0.4431081939445268, "grad_norm": 1.1367338892997336, "learning_rate": 6.154018539050697e-06, "loss": 0.6673, "step": 5232 }, { "epoch": 0.4431928858776202, "grad_norm": 1.5055012697117478, "learning_rate": 6.152683894396263e-06, "loss": 0.68, "step": 5233 }, { "epoch": 0.44327757781071353, "grad_norm": 0.6089115262642079, "learning_rate": 6.151349162996263e-06, "loss": 0.8557, "step": 5234 }, { "epoch": 0.4433622697438069, "grad_norm": 1.5380465981868099, "learning_rate": 6.150014344951142e-06, "loss": 0.5781, "step": 5235 }, { "epoch": 0.4434469616769003, "grad_norm": 1.5036758904883143, "learning_rate": 6.14867944036135e-06, "loss": 0.6182, "step": 5236 }, { "epoch": 0.44353165360999364, "grad_norm": 1.4255086668551933, "learning_rate": 6.14734444932735e-06, "loss": 0.6258, "step": 5237 }, { "epoch": 0.44361634554308704, "grad_norm": 1.8933616496821026, "learning_rate": 6.146009371949604e-06, "loss": 0.6958, "step": 5238 }, { "epoch": 0.4437010374761804, "grad_norm": 10.343531731590774, "learning_rate": 6.144674208328583e-06, "loss": 0.6318, "step": 5239 }, { "epoch": 0.44378572940927374, "grad_norm": 1.1802491652576588, "learning_rate": 6.143338958564767e-06, "loss": 0.6055, "step": 5240 }, { "epoch": 0.44387042134236715, "grad_norm": 1.5698821350282985, "learning_rate": 6.142003622758641e-06, "loss": 0.6098, "step": 5241 }, { "epoch": 0.4439551132754605, "grad_norm": 1.3696241491801646, "learning_rate": 6.140668201010695e-06, "loss": 0.6402, "step": 5242 }, { "epoch": 0.4440398052085539, "grad_norm": 1.6004027648228687, "learning_rate": 6.139332693421426e-06, "loss": 0.6115, "step": 5243 }, { "epoch": 0.44412449714164726, "grad_norm": 2.8426798613488176, "learning_rate": 6.137997100091339e-06, "loss": 0.6497, "step": 5244 }, { "epoch": 0.4442091890747406, "grad_norm": 1.3127589102724824, "learning_rate": 6.136661421120946e-06, "loss": 0.6777, "step": 5245 }, { "epoch": 0.444293881007834, "grad_norm": 1.431929429183714, "learning_rate": 6.13532565661076e-06, "loss": 0.5855, "step": 5246 }, { "epoch": 0.44437857294092736, "grad_norm": 1.4143160233866698, "learning_rate": 6.133989806661307e-06, "loss": 0.7097, "step": 5247 }, { "epoch": 0.44446326487402077, "grad_norm": 1.2903275829201302, "learning_rate": 6.132653871373116e-06, "loss": 0.6309, "step": 5248 }, { "epoch": 0.4445479568071141, "grad_norm": 1.3108648601584096, "learning_rate": 6.131317850846724e-06, "loss": 0.6441, "step": 5249 }, { "epoch": 0.44463264874020747, "grad_norm": 1.7517682314034546, "learning_rate": 6.129981745182674e-06, "loss": 0.6248, "step": 5250 }, { "epoch": 0.4447173406733009, "grad_norm": 1.3230286851830129, "learning_rate": 6.128645554481513e-06, "loss": 0.6608, "step": 5251 }, { "epoch": 0.4448020326063942, "grad_norm": 1.4482950425619328, "learning_rate": 6.127309278843798e-06, "loss": 0.6624, "step": 5252 }, { "epoch": 0.44488672453948763, "grad_norm": 1.1140022427857534, "learning_rate": 6.12597291837009e-06, "loss": 0.6546, "step": 5253 }, { "epoch": 0.444971416472581, "grad_norm": 1.6074933957190174, "learning_rate": 6.1246364731609585e-06, "loss": 0.6026, "step": 5254 }, { "epoch": 0.44505610840567433, "grad_norm": 1.4930753518390847, "learning_rate": 6.123299943316976e-06, "loss": 0.6342, "step": 5255 }, { "epoch": 0.44514080033876774, "grad_norm": 2.048652425499305, "learning_rate": 6.121963328938725e-06, "loss": 0.6418, "step": 5256 }, { "epoch": 0.4452254922718611, "grad_norm": 1.2944935440894572, "learning_rate": 6.120626630126792e-06, "loss": 0.6085, "step": 5257 }, { "epoch": 0.4453101842049545, "grad_norm": 1.2438654158058537, "learning_rate": 6.119289846981772e-06, "loss": 0.6593, "step": 5258 }, { "epoch": 0.44539487613804785, "grad_norm": 1.7387212896217838, "learning_rate": 6.1179529796042635e-06, "loss": 0.6511, "step": 5259 }, { "epoch": 0.4454795680711412, "grad_norm": 1.561101219208524, "learning_rate": 6.116616028094873e-06, "loss": 0.6004, "step": 5260 }, { "epoch": 0.4455642600042346, "grad_norm": 1.5533287943723704, "learning_rate": 6.115278992554214e-06, "loss": 0.6628, "step": 5261 }, { "epoch": 0.44564895193732795, "grad_norm": 2.317355778354888, "learning_rate": 6.113941873082907e-06, "loss": 0.6207, "step": 5262 }, { "epoch": 0.44573364387042136, "grad_norm": 1.6106913287242235, "learning_rate": 6.112604669781572e-06, "loss": 0.6277, "step": 5263 }, { "epoch": 0.4458183358035147, "grad_norm": 1.2489883884227304, "learning_rate": 6.111267382750846e-06, "loss": 0.6682, "step": 5264 }, { "epoch": 0.4459030277366081, "grad_norm": 0.6570462926337451, "learning_rate": 6.109930012091365e-06, "loss": 0.8352, "step": 5265 }, { "epoch": 0.44598771966970147, "grad_norm": 1.4488539088387602, "learning_rate": 6.108592557903774e-06, "loss": 0.6805, "step": 5266 }, { "epoch": 0.4460724116027948, "grad_norm": 1.6011394938941854, "learning_rate": 6.107255020288721e-06, "loss": 0.6503, "step": 5267 }, { "epoch": 0.4461571035358882, "grad_norm": 1.3841004746840717, "learning_rate": 6.1059173993468655e-06, "loss": 0.6551, "step": 5268 }, { "epoch": 0.4462417954689816, "grad_norm": 1.7135654703429755, "learning_rate": 6.104579695178869e-06, "loss": 0.7011, "step": 5269 }, { "epoch": 0.446326487402075, "grad_norm": 1.317234887702419, "learning_rate": 6.103241907885402e-06, "loss": 0.6382, "step": 5270 }, { "epoch": 0.44641117933516833, "grad_norm": 1.9833211197773883, "learning_rate": 6.10190403756714e-06, "loss": 0.688, "step": 5271 }, { "epoch": 0.4464958712682617, "grad_norm": 5.060481545741034, "learning_rate": 6.100566084324764e-06, "loss": 0.6566, "step": 5272 }, { "epoch": 0.4465805632013551, "grad_norm": 1.3535039746446993, "learning_rate": 6.099228048258962e-06, "loss": 0.6171, "step": 5273 }, { "epoch": 0.44666525513444844, "grad_norm": 1.574524223067388, "learning_rate": 6.0978899294704284e-06, "loss": 0.6694, "step": 5274 }, { "epoch": 0.44674994706754184, "grad_norm": 0.5999913795416225, "learning_rate": 6.096551728059865e-06, "loss": 0.8117, "step": 5275 }, { "epoch": 0.4468346390006352, "grad_norm": 2.1331381687860396, "learning_rate": 6.095213444127976e-06, "loss": 0.6764, "step": 5276 }, { "epoch": 0.44691933093372854, "grad_norm": 1.282405866837248, "learning_rate": 6.093875077775476e-06, "loss": 0.6014, "step": 5277 }, { "epoch": 0.44700402286682195, "grad_norm": 1.538077056159952, "learning_rate": 6.092536629103085e-06, "loss": 0.6858, "step": 5278 }, { "epoch": 0.4470887147999153, "grad_norm": 1.2415714823030777, "learning_rate": 6.091198098211529e-06, "loss": 0.5984, "step": 5279 }, { "epoch": 0.4471734067330087, "grad_norm": 1.1304333529425303, "learning_rate": 6.089859485201535e-06, "loss": 0.6889, "step": 5280 }, { "epoch": 0.44725809866610206, "grad_norm": 1.240843069212173, "learning_rate": 6.088520790173844e-06, "loss": 0.6772, "step": 5281 }, { "epoch": 0.4473427905991954, "grad_norm": 1.747989923807132, "learning_rate": 6.087182013229202e-06, "loss": 0.6402, "step": 5282 }, { "epoch": 0.4474274825322888, "grad_norm": 1.3250046585341029, "learning_rate": 6.085843154468355e-06, "loss": 0.6198, "step": 5283 }, { "epoch": 0.44751217446538216, "grad_norm": 1.4291329286105217, "learning_rate": 6.084504213992059e-06, "loss": 0.6264, "step": 5284 }, { "epoch": 0.44759686639847557, "grad_norm": 1.6587110639190703, "learning_rate": 6.0831651919010805e-06, "loss": 0.6132, "step": 5285 }, { "epoch": 0.4476815583315689, "grad_norm": 1.390929243783891, "learning_rate": 6.081826088296185e-06, "loss": 0.6518, "step": 5286 }, { "epoch": 0.44776625026466227, "grad_norm": 1.4642865355781374, "learning_rate": 6.080486903278149e-06, "loss": 0.7213, "step": 5287 }, { "epoch": 0.4478509421977557, "grad_norm": 1.6448675686202627, "learning_rate": 6.079147636947751e-06, "loss": 0.6584, "step": 5288 }, { "epoch": 0.447935634130849, "grad_norm": 1.4072197912192341, "learning_rate": 6.077808289405779e-06, "loss": 0.6369, "step": 5289 }, { "epoch": 0.44802032606394243, "grad_norm": 1.1796554579297518, "learning_rate": 6.076468860753026e-06, "loss": 0.6101, "step": 5290 }, { "epoch": 0.4481050179970358, "grad_norm": 1.833970938328551, "learning_rate": 6.075129351090291e-06, "loss": 0.6653, "step": 5291 }, { "epoch": 0.44818970993012913, "grad_norm": 1.3743556037475129, "learning_rate": 6.073789760518379e-06, "loss": 0.6754, "step": 5292 }, { "epoch": 0.44827440186322254, "grad_norm": 1.555331321682818, "learning_rate": 6.072450089138102e-06, "loss": 0.6465, "step": 5293 }, { "epoch": 0.4483590937963159, "grad_norm": 1.545742080622727, "learning_rate": 6.071110337050276e-06, "loss": 0.6407, "step": 5294 }, { "epoch": 0.4484437857294093, "grad_norm": 1.2676433684742383, "learning_rate": 6.069770504355726e-06, "loss": 0.6078, "step": 5295 }, { "epoch": 0.44852847766250264, "grad_norm": 1.9747659492139997, "learning_rate": 6.068430591155282e-06, "loss": 0.6586, "step": 5296 }, { "epoch": 0.448613169595596, "grad_norm": 1.2937072146402906, "learning_rate": 6.067090597549777e-06, "loss": 0.6954, "step": 5297 }, { "epoch": 0.4486978615286894, "grad_norm": 2.6263885628307695, "learning_rate": 6.0657505236400535e-06, "loss": 0.6488, "step": 5298 }, { "epoch": 0.44878255346178275, "grad_norm": 1.60160673577579, "learning_rate": 6.064410369526961e-06, "loss": 0.7147, "step": 5299 }, { "epoch": 0.44886724539487616, "grad_norm": 1.1405715819837987, "learning_rate": 6.063070135311352e-06, "loss": 0.6616, "step": 5300 }, { "epoch": 0.4489519373279695, "grad_norm": 1.2886685808648766, "learning_rate": 6.0617298210940854e-06, "loss": 0.6265, "step": 5301 }, { "epoch": 0.44903662926106286, "grad_norm": 0.6267894874211516, "learning_rate": 6.060389426976027e-06, "loss": 0.8626, "step": 5302 }, { "epoch": 0.44912132119415626, "grad_norm": 0.6310337352151321, "learning_rate": 6.059048953058051e-06, "loss": 0.8957, "step": 5303 }, { "epoch": 0.4492060131272496, "grad_norm": 1.5719480963060346, "learning_rate": 6.057708399441032e-06, "loss": 0.6182, "step": 5304 }, { "epoch": 0.449290705060343, "grad_norm": 1.9878125047717858, "learning_rate": 6.056367766225853e-06, "loss": 0.5997, "step": 5305 }, { "epoch": 0.44937539699343637, "grad_norm": 1.6630183068591735, "learning_rate": 6.055027053513409e-06, "loss": 0.6746, "step": 5306 }, { "epoch": 0.4494600889265297, "grad_norm": 1.604090784498351, "learning_rate": 6.053686261404593e-06, "loss": 0.6315, "step": 5307 }, { "epoch": 0.4495447808596231, "grad_norm": 2.016713118790568, "learning_rate": 6.052345390000305e-06, "loss": 0.6929, "step": 5308 }, { "epoch": 0.4496294727927165, "grad_norm": 3.9597653101796255, "learning_rate": 6.051004439401454e-06, "loss": 0.6403, "step": 5309 }, { "epoch": 0.4497141647258099, "grad_norm": 1.6363612381160717, "learning_rate": 6.0496634097089535e-06, "loss": 0.6458, "step": 5310 }, { "epoch": 0.44979885665890323, "grad_norm": 0.7938674408762455, "learning_rate": 6.048322301023723e-06, "loss": 0.8799, "step": 5311 }, { "epoch": 0.4498835485919966, "grad_norm": 1.6217234189157546, "learning_rate": 6.046981113446689e-06, "loss": 0.6487, "step": 5312 }, { "epoch": 0.44996824052509, "grad_norm": 1.5540466258684238, "learning_rate": 6.045639847078781e-06, "loss": 0.6024, "step": 5313 }, { "epoch": 0.45005293245818334, "grad_norm": 1.492540605906474, "learning_rate": 6.044298502020939e-06, "loss": 0.634, "step": 5314 }, { "epoch": 0.45013762439127675, "grad_norm": 1.4718770183968874, "learning_rate": 6.042957078374104e-06, "loss": 0.6506, "step": 5315 }, { "epoch": 0.4502223163243701, "grad_norm": 1.3375166693475227, "learning_rate": 6.041615576239227e-06, "loss": 0.6144, "step": 5316 }, { "epoch": 0.4503070082574635, "grad_norm": 15.81460765308137, "learning_rate": 6.040273995717261e-06, "loss": 0.6317, "step": 5317 }, { "epoch": 0.45039170019055685, "grad_norm": 1.4952470944086196, "learning_rate": 6.03893233690917e-06, "loss": 0.6633, "step": 5318 }, { "epoch": 0.4504763921236502, "grad_norm": 1.3632962071933292, "learning_rate": 6.037590599915917e-06, "loss": 0.5934, "step": 5319 }, { "epoch": 0.4505610840567436, "grad_norm": 2.0587758440043094, "learning_rate": 6.036248784838479e-06, "loss": 0.6979, "step": 5320 }, { "epoch": 0.45064577598983696, "grad_norm": 1.5199492957827394, "learning_rate": 6.034906891777832e-06, "loss": 0.6537, "step": 5321 }, { "epoch": 0.45073046792293037, "grad_norm": 1.37462680539809, "learning_rate": 6.033564920834961e-06, "loss": 0.6672, "step": 5322 }, { "epoch": 0.4508151598560237, "grad_norm": 1.4716858178269248, "learning_rate": 6.032222872110857e-06, "loss": 0.674, "step": 5323 }, { "epoch": 0.45089985178911707, "grad_norm": 1.4069763781237612, "learning_rate": 6.030880745706516e-06, "loss": 0.6539, "step": 5324 }, { "epoch": 0.4509845437222105, "grad_norm": 0.6331168153546065, "learning_rate": 6.0295385417229405e-06, "loss": 0.838, "step": 5325 }, { "epoch": 0.4510692356553038, "grad_norm": 1.7191118443150621, "learning_rate": 6.028196260261137e-06, "loss": 0.6377, "step": 5326 }, { "epoch": 0.45115392758839723, "grad_norm": 1.2646227734184303, "learning_rate": 6.026853901422122e-06, "loss": 0.6599, "step": 5327 }, { "epoch": 0.4512386195214906, "grad_norm": 2.325592153240206, "learning_rate": 6.025511465306913e-06, "loss": 0.624, "step": 5328 }, { "epoch": 0.45132331145458393, "grad_norm": 2.138461272680671, "learning_rate": 6.024168952016535e-06, "loss": 0.6522, "step": 5329 }, { "epoch": 0.45140800338767734, "grad_norm": 1.5666505811192781, "learning_rate": 6.02282636165202e-06, "loss": 0.6896, "step": 5330 }, { "epoch": 0.4514926953207707, "grad_norm": 1.289634451213219, "learning_rate": 6.021483694314406e-06, "loss": 0.615, "step": 5331 }, { "epoch": 0.4515773872538641, "grad_norm": 1.9993547186205631, "learning_rate": 6.0201409501047355e-06, "loss": 0.6352, "step": 5332 }, { "epoch": 0.45166207918695744, "grad_norm": 1.1789180976026647, "learning_rate": 6.018798129124055e-06, "loss": 0.6608, "step": 5333 }, { "epoch": 0.4517467711200508, "grad_norm": 1.9864679024318612, "learning_rate": 6.0174552314734214e-06, "loss": 0.6717, "step": 5334 }, { "epoch": 0.4518314630531442, "grad_norm": 1.5096132600276213, "learning_rate": 6.0161122572538945e-06, "loss": 0.7268, "step": 5335 }, { "epoch": 0.45191615498623755, "grad_norm": 1.8886334258527884, "learning_rate": 6.014769206566538e-06, "loss": 0.6113, "step": 5336 }, { "epoch": 0.45200084691933096, "grad_norm": 1.259265317011015, "learning_rate": 6.013426079512426e-06, "loss": 0.6492, "step": 5337 }, { "epoch": 0.4520855388524243, "grad_norm": 1.2640141056533862, "learning_rate": 6.012082876192635e-06, "loss": 0.6264, "step": 5338 }, { "epoch": 0.45217023078551766, "grad_norm": 1.4210323905101876, "learning_rate": 6.010739596708251e-06, "loss": 0.6672, "step": 5339 }, { "epoch": 0.45225492271861106, "grad_norm": 1.1961876079298306, "learning_rate": 6.009396241160357e-06, "loss": 0.6241, "step": 5340 }, { "epoch": 0.4523396146517044, "grad_norm": 1.790618118798762, "learning_rate": 6.008052809650052e-06, "loss": 0.6123, "step": 5341 }, { "epoch": 0.4524243065847978, "grad_norm": 1.2628781449125575, "learning_rate": 6.006709302278434e-06, "loss": 0.6306, "step": 5342 }, { "epoch": 0.45250899851789117, "grad_norm": 2.182074837945466, "learning_rate": 6.005365719146611e-06, "loss": 0.6093, "step": 5343 }, { "epoch": 0.4525936904509845, "grad_norm": 1.6139664539417888, "learning_rate": 6.004022060355695e-06, "loss": 0.6085, "step": 5344 }, { "epoch": 0.4526783823840779, "grad_norm": 1.361366120538763, "learning_rate": 6.002678326006802e-06, "loss": 0.6797, "step": 5345 }, { "epoch": 0.4527630743171713, "grad_norm": 1.287206523688155, "learning_rate": 6.001334516201055e-06, "loss": 0.7071, "step": 5346 }, { "epoch": 0.4528477662502647, "grad_norm": 1.130247638940298, "learning_rate": 5.9999906310395824e-06, "loss": 0.6377, "step": 5347 }, { "epoch": 0.45293245818335803, "grad_norm": 0.7066270174321215, "learning_rate": 5.998646670623521e-06, "loss": 0.8359, "step": 5348 }, { "epoch": 0.4530171501164514, "grad_norm": 0.6020433804878178, "learning_rate": 5.997302635054011e-06, "loss": 0.8739, "step": 5349 }, { "epoch": 0.4531018420495448, "grad_norm": 1.699641215420796, "learning_rate": 5.995958524432192e-06, "loss": 0.6462, "step": 5350 }, { "epoch": 0.45318653398263814, "grad_norm": 5.747928898239171, "learning_rate": 5.994614338859225e-06, "loss": 0.6112, "step": 5351 }, { "epoch": 0.45327122591573155, "grad_norm": 1.6288832278738055, "learning_rate": 5.993270078436261e-06, "loss": 0.6224, "step": 5352 }, { "epoch": 0.4533559178488249, "grad_norm": 1.407137208148775, "learning_rate": 5.991925743264463e-06, "loss": 0.6252, "step": 5353 }, { "epoch": 0.45344060978191825, "grad_norm": 1.4273732931112137, "learning_rate": 5.9905813334450004e-06, "loss": 0.6502, "step": 5354 }, { "epoch": 0.45352530171501165, "grad_norm": 1.151229585076739, "learning_rate": 5.989236849079047e-06, "loss": 0.5625, "step": 5355 }, { "epoch": 0.453609993648105, "grad_norm": 0.6782980493711994, "learning_rate": 5.987892290267784e-06, "loss": 0.8789, "step": 5356 }, { "epoch": 0.4536946855811984, "grad_norm": 1.8276775712739524, "learning_rate": 5.986547657112393e-06, "loss": 0.6582, "step": 5357 }, { "epoch": 0.45377937751429176, "grad_norm": 1.6558386622697534, "learning_rate": 5.9852029497140686e-06, "loss": 0.6245, "step": 5358 }, { "epoch": 0.4538640694473851, "grad_norm": 1.401441297919328, "learning_rate": 5.983858168174004e-06, "loss": 0.6619, "step": 5359 }, { "epoch": 0.4539487613804785, "grad_norm": 0.6161582085832242, "learning_rate": 5.982513312593403e-06, "loss": 0.8898, "step": 5360 }, { "epoch": 0.45403345331357187, "grad_norm": 1.4847806583678813, "learning_rate": 5.981168383073472e-06, "loss": 0.624, "step": 5361 }, { "epoch": 0.4541181452466653, "grad_norm": 1.2561885521298086, "learning_rate": 5.979823379715426e-06, "loss": 0.6051, "step": 5362 }, { "epoch": 0.4542028371797586, "grad_norm": 1.30323057778706, "learning_rate": 5.97847830262048e-06, "loss": 0.6264, "step": 5363 }, { "epoch": 0.454287529112852, "grad_norm": 1.3802135474904818, "learning_rate": 5.977133151889863e-06, "loss": 0.6747, "step": 5364 }, { "epoch": 0.4543722210459454, "grad_norm": 1.2170065675626711, "learning_rate": 5.975787927624801e-06, "loss": 0.6279, "step": 5365 }, { "epoch": 0.45445691297903873, "grad_norm": 1.25376391746709, "learning_rate": 5.9744426299265315e-06, "loss": 0.6499, "step": 5366 }, { "epoch": 0.45454160491213214, "grad_norm": 1.6943203268384965, "learning_rate": 5.973097258896292e-06, "loss": 0.6901, "step": 5367 }, { "epoch": 0.4546262968452255, "grad_norm": 1.5854177704345929, "learning_rate": 5.9717518146353315e-06, "loss": 0.6536, "step": 5368 }, { "epoch": 0.4547109887783189, "grad_norm": 1.271922394784306, "learning_rate": 5.9704062972449025e-06, "loss": 0.6698, "step": 5369 }, { "epoch": 0.45479568071141224, "grad_norm": 1.2763080734241483, "learning_rate": 5.969060706826261e-06, "loss": 0.6297, "step": 5370 }, { "epoch": 0.4548803726445056, "grad_norm": 4.4273909034036985, "learning_rate": 5.9677150434806676e-06, "loss": 0.619, "step": 5371 }, { "epoch": 0.454965064577599, "grad_norm": 1.3054376230748406, "learning_rate": 5.966369307309396e-06, "loss": 0.6172, "step": 5372 }, { "epoch": 0.45504975651069235, "grad_norm": 1.402701455835683, "learning_rate": 5.965023498413715e-06, "loss": 0.6178, "step": 5373 }, { "epoch": 0.45513444844378576, "grad_norm": 1.1406339480420307, "learning_rate": 5.963677616894906e-06, "loss": 0.6135, "step": 5374 }, { "epoch": 0.4552191403768791, "grad_norm": 1.6842353303094564, "learning_rate": 5.9623316628542526e-06, "loss": 0.6346, "step": 5375 }, { "epoch": 0.45530383230997246, "grad_norm": 0.6390759158459572, "learning_rate": 5.960985636393049e-06, "loss": 0.8486, "step": 5376 }, { "epoch": 0.45538852424306586, "grad_norm": 1.679097102901006, "learning_rate": 5.959639537612585e-06, "loss": 0.6572, "step": 5377 }, { "epoch": 0.4554732161761592, "grad_norm": 1.3204643466653538, "learning_rate": 5.958293366614164e-06, "loss": 0.637, "step": 5378 }, { "epoch": 0.4555579081092526, "grad_norm": 1.238451877334626, "learning_rate": 5.956947123499093e-06, "loss": 0.6225, "step": 5379 }, { "epoch": 0.45564260004234597, "grad_norm": 1.2508665666795955, "learning_rate": 5.955600808368684e-06, "loss": 0.6712, "step": 5380 }, { "epoch": 0.4557272919754393, "grad_norm": 1.440320016526879, "learning_rate": 5.954254421324254e-06, "loss": 0.5952, "step": 5381 }, { "epoch": 0.4558119839085327, "grad_norm": 1.1680676374697234, "learning_rate": 5.952907962467126e-06, "loss": 0.5844, "step": 5382 }, { "epoch": 0.4558966758416261, "grad_norm": 1.3175658025090782, "learning_rate": 5.951561431898628e-06, "loss": 0.6592, "step": 5383 }, { "epoch": 0.4559813677747195, "grad_norm": 0.6602650936349598, "learning_rate": 5.950214829720094e-06, "loss": 0.848, "step": 5384 }, { "epoch": 0.45606605970781283, "grad_norm": 0.8436939815193265, "learning_rate": 5.948868156032861e-06, "loss": 0.8743, "step": 5385 }, { "epoch": 0.4561507516409062, "grad_norm": 1.2818337379854572, "learning_rate": 5.947521410938276e-06, "loss": 0.5948, "step": 5386 }, { "epoch": 0.4562354435739996, "grad_norm": 0.6322631158128105, "learning_rate": 5.946174594537686e-06, "loss": 0.9148, "step": 5387 }, { "epoch": 0.45632013550709294, "grad_norm": 1.410978824696878, "learning_rate": 5.944827706932449e-06, "loss": 0.6083, "step": 5388 }, { "epoch": 0.45640482744018634, "grad_norm": 1.4937473322564458, "learning_rate": 5.943480748223923e-06, "loss": 0.6586, "step": 5389 }, { "epoch": 0.4564895193732797, "grad_norm": 1.3182289892034262, "learning_rate": 5.942133718513476e-06, "loss": 0.5984, "step": 5390 }, { "epoch": 0.45657421130637305, "grad_norm": 1.4227209191675676, "learning_rate": 5.940786617902476e-06, "loss": 0.5987, "step": 5391 }, { "epoch": 0.45665890323946645, "grad_norm": 1.1481930500214024, "learning_rate": 5.939439446492302e-06, "loss": 0.6513, "step": 5392 }, { "epoch": 0.4567435951725598, "grad_norm": 1.466440417816894, "learning_rate": 5.938092204384336e-06, "loss": 0.6776, "step": 5393 }, { "epoch": 0.4568282871056532, "grad_norm": 1.4587595833009892, "learning_rate": 5.936744891679964e-06, "loss": 0.5592, "step": 5394 }, { "epoch": 0.45691297903874656, "grad_norm": 1.5166281411363347, "learning_rate": 5.935397508480578e-06, "loss": 0.6143, "step": 5395 }, { "epoch": 0.4569976709718399, "grad_norm": 2.0074645178379833, "learning_rate": 5.934050054887575e-06, "loss": 0.6466, "step": 5396 }, { "epoch": 0.4570823629049333, "grad_norm": 0.6258275418559599, "learning_rate": 5.932702531002362e-06, "loss": 0.8119, "step": 5397 }, { "epoch": 0.45716705483802667, "grad_norm": 1.347483247012244, "learning_rate": 5.9313549369263445e-06, "loss": 0.7051, "step": 5398 }, { "epoch": 0.45725174677112007, "grad_norm": 1.1515717975248076, "learning_rate": 5.930007272760935e-06, "loss": 0.6418, "step": 5399 }, { "epoch": 0.4573364387042134, "grad_norm": 1.4758437146630794, "learning_rate": 5.928659538607553e-06, "loss": 0.6321, "step": 5400 }, { "epoch": 0.45742113063730677, "grad_norm": 2.5519668589113547, "learning_rate": 5.927311734567624e-06, "loss": 0.6217, "step": 5401 }, { "epoch": 0.4575058225704002, "grad_norm": 1.3106458681360675, "learning_rate": 5.925963860742576e-06, "loss": 0.6448, "step": 5402 }, { "epoch": 0.45759051450349353, "grad_norm": 1.314743125042859, "learning_rate": 5.924615917233847e-06, "loss": 0.6375, "step": 5403 }, { "epoch": 0.45767520643658693, "grad_norm": 1.3206150801488947, "learning_rate": 5.923267904142871e-06, "loss": 0.6475, "step": 5404 }, { "epoch": 0.4577598983696803, "grad_norm": 0.5963303421567732, "learning_rate": 5.921919821571098e-06, "loss": 0.826, "step": 5405 }, { "epoch": 0.45784459030277364, "grad_norm": 1.6083802295396012, "learning_rate": 5.920571669619976e-06, "loss": 0.6194, "step": 5406 }, { "epoch": 0.45792928223586704, "grad_norm": 1.4220089941693923, "learning_rate": 5.919223448390962e-06, "loss": 0.6736, "step": 5407 }, { "epoch": 0.4580139741689604, "grad_norm": 2.0164971516432755, "learning_rate": 5.9178751579855145e-06, "loss": 0.6184, "step": 5408 }, { "epoch": 0.4580986661020538, "grad_norm": 0.6498313895818639, "learning_rate": 5.916526798505101e-06, "loss": 0.8508, "step": 5409 }, { "epoch": 0.45818335803514715, "grad_norm": 1.7723751946043425, "learning_rate": 5.915178370051194e-06, "loss": 0.6615, "step": 5410 }, { "epoch": 0.4582680499682405, "grad_norm": 1.2248254426383125, "learning_rate": 5.9138298727252685e-06, "loss": 0.6603, "step": 5411 }, { "epoch": 0.4583527419013339, "grad_norm": 0.6540494449200985, "learning_rate": 5.9124813066288045e-06, "loss": 0.8326, "step": 5412 }, { "epoch": 0.45843743383442725, "grad_norm": 1.2372333746207944, "learning_rate": 5.911132671863291e-06, "loss": 0.6187, "step": 5413 }, { "epoch": 0.45852212576752066, "grad_norm": 1.561418198020682, "learning_rate": 5.909783968530221e-06, "loss": 0.665, "step": 5414 }, { "epoch": 0.458606817700614, "grad_norm": 1.4742332395324715, "learning_rate": 5.908435196731088e-06, "loss": 0.6689, "step": 5415 }, { "epoch": 0.45869150963370736, "grad_norm": 1.419983230317574, "learning_rate": 5.907086356567395e-06, "loss": 0.6472, "step": 5416 }, { "epoch": 0.45877620156680077, "grad_norm": 1.99929282374982, "learning_rate": 5.905737448140653e-06, "loss": 0.6214, "step": 5417 }, { "epoch": 0.4588608934998941, "grad_norm": 1.4676238018353311, "learning_rate": 5.904388471552371e-06, "loss": 0.6638, "step": 5418 }, { "epoch": 0.4589455854329875, "grad_norm": 1.8111793602547537, "learning_rate": 5.903039426904067e-06, "loss": 0.5786, "step": 5419 }, { "epoch": 0.4590302773660809, "grad_norm": 1.4658324585340865, "learning_rate": 5.9016903142972645e-06, "loss": 0.6518, "step": 5420 }, { "epoch": 0.4591149692991743, "grad_norm": 0.617400516788588, "learning_rate": 5.9003411338334915e-06, "loss": 0.8466, "step": 5421 }, { "epoch": 0.45919966123226763, "grad_norm": 1.131850834500374, "learning_rate": 5.898991885614282e-06, "loss": 0.6263, "step": 5422 }, { "epoch": 0.459284353165361, "grad_norm": 0.5935637070930626, "learning_rate": 5.897642569741171e-06, "loss": 0.8071, "step": 5423 }, { "epoch": 0.4593690450984544, "grad_norm": 1.0858955500156129, "learning_rate": 5.896293186315702e-06, "loss": 0.6193, "step": 5424 }, { "epoch": 0.45945373703154774, "grad_norm": 1.3175432701296392, "learning_rate": 5.894943735439428e-06, "loss": 0.6382, "step": 5425 }, { "epoch": 0.45953842896464114, "grad_norm": 1.5654912190416268, "learning_rate": 5.893594217213897e-06, "loss": 0.6052, "step": 5426 }, { "epoch": 0.4596231208977345, "grad_norm": 1.2388385606269499, "learning_rate": 5.892244631740669e-06, "loss": 0.6457, "step": 5427 }, { "epoch": 0.45970781283082784, "grad_norm": 0.6194542736976908, "learning_rate": 5.890894979121309e-06, "loss": 0.8881, "step": 5428 }, { "epoch": 0.45979250476392125, "grad_norm": 1.7018027190299052, "learning_rate": 5.8895452594573835e-06, "loss": 0.6193, "step": 5429 }, { "epoch": 0.4598771966970146, "grad_norm": 1.6465801649955576, "learning_rate": 5.888195472850466e-06, "loss": 0.5975, "step": 5430 }, { "epoch": 0.459961888630108, "grad_norm": 1.4887175739480436, "learning_rate": 5.886845619402139e-06, "loss": 0.6518, "step": 5431 }, { "epoch": 0.46004658056320136, "grad_norm": 0.7159857271160184, "learning_rate": 5.885495699213981e-06, "loss": 0.875, "step": 5432 }, { "epoch": 0.4601312724962947, "grad_norm": 1.440132430775892, "learning_rate": 5.884145712387582e-06, "loss": 0.6158, "step": 5433 }, { "epoch": 0.4602159644293881, "grad_norm": 1.5079375441051033, "learning_rate": 5.882795659024537e-06, "loss": 0.6786, "step": 5434 }, { "epoch": 0.46030065636248146, "grad_norm": 1.4487912668134826, "learning_rate": 5.881445539226444e-06, "loss": 0.6417, "step": 5435 }, { "epoch": 0.46038534829557487, "grad_norm": 1.2895740598020473, "learning_rate": 5.880095353094908e-06, "loss": 0.6224, "step": 5436 }, { "epoch": 0.4604700402286682, "grad_norm": 1.4821221509264606, "learning_rate": 5.878745100731533e-06, "loss": 0.6383, "step": 5437 }, { "epoch": 0.46055473216176157, "grad_norm": 1.1928390292346163, "learning_rate": 5.87739478223794e-06, "loss": 0.6426, "step": 5438 }, { "epoch": 0.460639424094855, "grad_norm": 1.2097333976651168, "learning_rate": 5.876044397715742e-06, "loss": 0.6914, "step": 5439 }, { "epoch": 0.4607241160279483, "grad_norm": 0.588389053803803, "learning_rate": 5.874693947266563e-06, "loss": 0.8831, "step": 5440 }, { "epoch": 0.46080880796104173, "grad_norm": 1.6882127549514685, "learning_rate": 5.8733434309920335e-06, "loss": 0.7022, "step": 5441 }, { "epoch": 0.4608934998941351, "grad_norm": 1.142561812283095, "learning_rate": 5.8719928489937875e-06, "loss": 0.6055, "step": 5442 }, { "epoch": 0.46097819182722843, "grad_norm": 1.3349452965249766, "learning_rate": 5.870642201373462e-06, "loss": 0.6548, "step": 5443 }, { "epoch": 0.46106288376032184, "grad_norm": 1.4129859740091246, "learning_rate": 5.869291488232699e-06, "loss": 0.5592, "step": 5444 }, { "epoch": 0.4611475756934152, "grad_norm": 0.6223836432795564, "learning_rate": 5.867940709673149e-06, "loss": 0.8331, "step": 5445 }, { "epoch": 0.4612322676265086, "grad_norm": 1.6678673214164854, "learning_rate": 5.866589865796466e-06, "loss": 0.6103, "step": 5446 }, { "epoch": 0.46131695955960195, "grad_norm": 1.5399182854211149, "learning_rate": 5.865238956704304e-06, "loss": 0.6673, "step": 5447 }, { "epoch": 0.4614016514926953, "grad_norm": 1.1043800563276456, "learning_rate": 5.863887982498332e-06, "loss": 0.5905, "step": 5448 }, { "epoch": 0.4614863434257887, "grad_norm": 0.6075609867803571, "learning_rate": 5.862536943280213e-06, "loss": 0.8128, "step": 5449 }, { "epoch": 0.46157103535888205, "grad_norm": 1.3090240516932419, "learning_rate": 5.861185839151622e-06, "loss": 0.6401, "step": 5450 }, { "epoch": 0.46165572729197546, "grad_norm": 1.3446578979492188, "learning_rate": 5.859834670214236e-06, "loss": 0.6082, "step": 5451 }, { "epoch": 0.4617404192250688, "grad_norm": 1.443739186361196, "learning_rate": 5.8584834365697385e-06, "loss": 0.6455, "step": 5452 }, { "epoch": 0.46182511115816216, "grad_norm": 1.133148537031123, "learning_rate": 5.8571321383198165e-06, "loss": 0.6437, "step": 5453 }, { "epoch": 0.46190980309125557, "grad_norm": 1.2302887264751898, "learning_rate": 5.855780775566162e-06, "loss": 0.6716, "step": 5454 }, { "epoch": 0.4619944950243489, "grad_norm": 2.8480670832344237, "learning_rate": 5.854429348410473e-06, "loss": 0.6044, "step": 5455 }, { "epoch": 0.4620791869574423, "grad_norm": 1.502891360033768, "learning_rate": 5.853077856954451e-06, "loss": 0.678, "step": 5456 }, { "epoch": 0.4621638788905357, "grad_norm": 1.2093142225385234, "learning_rate": 5.851726301299803e-06, "loss": 0.6437, "step": 5457 }, { "epoch": 0.462248570823629, "grad_norm": 1.5943727422754024, "learning_rate": 5.85037468154824e-06, "loss": 0.5695, "step": 5458 }, { "epoch": 0.46233326275672243, "grad_norm": 1.5194998735142837, "learning_rate": 5.849022997801481e-06, "loss": 0.6487, "step": 5459 }, { "epoch": 0.4624179546898158, "grad_norm": 0.6519731728207229, "learning_rate": 5.847671250161244e-06, "loss": 0.7952, "step": 5460 }, { "epoch": 0.4625026466229092, "grad_norm": 1.2518530570009565, "learning_rate": 5.846319438729258e-06, "loss": 0.6149, "step": 5461 }, { "epoch": 0.46258733855600254, "grad_norm": 1.5494962470139353, "learning_rate": 5.844967563607251e-06, "loss": 0.6397, "step": 5462 }, { "epoch": 0.4626720304890959, "grad_norm": 1.3477173236923623, "learning_rate": 5.843615624896962e-06, "loss": 0.6317, "step": 5463 }, { "epoch": 0.4627567224221893, "grad_norm": 1.9178215410560993, "learning_rate": 5.842263622700131e-06, "loss": 0.6643, "step": 5464 }, { "epoch": 0.46284141435528264, "grad_norm": 1.3246587763577005, "learning_rate": 5.8409115571185005e-06, "loss": 0.6568, "step": 5465 }, { "epoch": 0.46292610628837605, "grad_norm": 1.359422942415019, "learning_rate": 5.8395594282538225e-06, "loss": 0.652, "step": 5466 }, { "epoch": 0.4630107982214694, "grad_norm": 1.1959635965293558, "learning_rate": 5.838207236207853e-06, "loss": 0.5662, "step": 5467 }, { "epoch": 0.4630954901545628, "grad_norm": 0.6191288086307177, "learning_rate": 5.8368549810823494e-06, "loss": 0.872, "step": 5468 }, { "epoch": 0.46318018208765616, "grad_norm": 1.4833880032660691, "learning_rate": 5.835502662979078e-06, "loss": 0.6721, "step": 5469 }, { "epoch": 0.4632648740207495, "grad_norm": 1.2421239816492653, "learning_rate": 5.834150281999807e-06, "loss": 0.6808, "step": 5470 }, { "epoch": 0.4633495659538429, "grad_norm": 1.5591000803065258, "learning_rate": 5.83279783824631e-06, "loss": 0.6622, "step": 5471 }, { "epoch": 0.46343425788693626, "grad_norm": 1.2413273416011525, "learning_rate": 5.831445331820365e-06, "loss": 0.7028, "step": 5472 }, { "epoch": 0.46351894982002967, "grad_norm": 1.2044906357488079, "learning_rate": 5.830092762823758e-06, "loss": 0.7002, "step": 5473 }, { "epoch": 0.463603641753123, "grad_norm": 1.779410235064123, "learning_rate": 5.828740131358273e-06, "loss": 0.6535, "step": 5474 }, { "epoch": 0.46368833368621637, "grad_norm": 1.3634025353710277, "learning_rate": 5.827387437525705e-06, "loss": 0.6996, "step": 5475 }, { "epoch": 0.4637730256193098, "grad_norm": 1.0777021108798392, "learning_rate": 5.826034681427852e-06, "loss": 0.591, "step": 5476 }, { "epoch": 0.4638577175524031, "grad_norm": 2.4599360136347364, "learning_rate": 5.824681863166515e-06, "loss": 0.6349, "step": 5477 }, { "epoch": 0.46394240948549653, "grad_norm": 1.3670506608977484, "learning_rate": 5.8233289828435e-06, "loss": 0.5823, "step": 5478 }, { "epoch": 0.4640271014185899, "grad_norm": 1.1026787439473118, "learning_rate": 5.82197604056062e-06, "loss": 0.6165, "step": 5479 }, { "epoch": 0.46411179335168323, "grad_norm": 1.2306089640785773, "learning_rate": 5.820623036419691e-06, "loss": 0.5685, "step": 5480 }, { "epoch": 0.46419648528477664, "grad_norm": 1.2478985320560103, "learning_rate": 5.819269970522533e-06, "loss": 0.6606, "step": 5481 }, { "epoch": 0.46428117721787, "grad_norm": 1.101545968847418, "learning_rate": 5.81791684297097e-06, "loss": 0.6262, "step": 5482 }, { "epoch": 0.4643658691509634, "grad_norm": 1.2068995716120154, "learning_rate": 5.8165636538668346e-06, "loss": 0.622, "step": 5483 }, { "epoch": 0.46445056108405675, "grad_norm": 1.7673163027938734, "learning_rate": 5.815210403311961e-06, "loss": 0.6611, "step": 5484 }, { "epoch": 0.4645352530171501, "grad_norm": 0.5896488471233515, "learning_rate": 5.813857091408186e-06, "loss": 0.8581, "step": 5485 }, { "epoch": 0.4646199449502435, "grad_norm": 1.0255349943943874, "learning_rate": 5.8125037182573565e-06, "loss": 0.617, "step": 5486 }, { "epoch": 0.46470463688333685, "grad_norm": 1.3768376382502434, "learning_rate": 5.81115028396132e-06, "loss": 0.6308, "step": 5487 }, { "epoch": 0.46478932881643026, "grad_norm": 1.4764490400227268, "learning_rate": 5.8097967886219285e-06, "loss": 0.667, "step": 5488 }, { "epoch": 0.4648740207495236, "grad_norm": 1.3607543921841294, "learning_rate": 5.808443232341041e-06, "loss": 0.6284, "step": 5489 }, { "epoch": 0.46495871268261696, "grad_norm": 1.4634819039511868, "learning_rate": 5.807089615220519e-06, "loss": 0.6356, "step": 5490 }, { "epoch": 0.46504340461571037, "grad_norm": 1.5910313532934455, "learning_rate": 5.8057359373622315e-06, "loss": 0.6311, "step": 5491 }, { "epoch": 0.4651280965488037, "grad_norm": 1.7494245670943667, "learning_rate": 5.804382198868046e-06, "loss": 0.6402, "step": 5492 }, { "epoch": 0.4652127884818971, "grad_norm": 0.6819021499847525, "learning_rate": 5.803028399839842e-06, "loss": 0.868, "step": 5493 }, { "epoch": 0.46529748041499047, "grad_norm": 1.2565443268311607, "learning_rate": 5.801674540379497e-06, "loss": 0.6151, "step": 5494 }, { "epoch": 0.4653821723480838, "grad_norm": 1.9670130218976158, "learning_rate": 5.8003206205889e-06, "loss": 0.6973, "step": 5495 }, { "epoch": 0.46546686428117723, "grad_norm": 1.4382204018768288, "learning_rate": 5.798966640569936e-06, "loss": 0.6579, "step": 5496 }, { "epoch": 0.4655515562142706, "grad_norm": 1.3921634130462637, "learning_rate": 5.7976126004245034e-06, "loss": 0.672, "step": 5497 }, { "epoch": 0.465636248147364, "grad_norm": 1.4574642512367075, "learning_rate": 5.796258500254499e-06, "loss": 0.6436, "step": 5498 }, { "epoch": 0.46572094008045734, "grad_norm": 1.6075045358396522, "learning_rate": 5.794904340161825e-06, "loss": 0.6647, "step": 5499 }, { "epoch": 0.4658056320135507, "grad_norm": 1.599891996518474, "learning_rate": 5.79355012024839e-06, "loss": 0.6615, "step": 5500 }, { "epoch": 0.4658903239466441, "grad_norm": 2.2903040028658834, "learning_rate": 5.792195840616108e-06, "loss": 0.6242, "step": 5501 }, { "epoch": 0.46597501587973744, "grad_norm": 1.4207090002354685, "learning_rate": 5.790841501366894e-06, "loss": 0.6341, "step": 5502 }, { "epoch": 0.46605970781283085, "grad_norm": 2.313614218080502, "learning_rate": 5.789487102602667e-06, "loss": 0.6537, "step": 5503 }, { "epoch": 0.4661443997459242, "grad_norm": 1.2229697566182496, "learning_rate": 5.788132644425357e-06, "loss": 0.6385, "step": 5504 }, { "epoch": 0.46622909167901755, "grad_norm": 1.2581580007292854, "learning_rate": 5.786778126936892e-06, "loss": 0.6228, "step": 5505 }, { "epoch": 0.46631378361211095, "grad_norm": 1.2906345501919376, "learning_rate": 5.785423550239206e-06, "loss": 0.6746, "step": 5506 }, { "epoch": 0.4663984755452043, "grad_norm": 1.475785962253772, "learning_rate": 5.784068914434239e-06, "loss": 0.6034, "step": 5507 }, { "epoch": 0.4664831674782977, "grad_norm": 1.2955238702260674, "learning_rate": 5.782714219623935e-06, "loss": 0.6559, "step": 5508 }, { "epoch": 0.46656785941139106, "grad_norm": 1.361200476235084, "learning_rate": 5.781359465910241e-06, "loss": 0.607, "step": 5509 }, { "epoch": 0.4666525513444844, "grad_norm": 0.6173324219826769, "learning_rate": 5.780004653395111e-06, "loss": 0.8233, "step": 5510 }, { "epoch": 0.4667372432775778, "grad_norm": 1.6346713674888427, "learning_rate": 5.778649782180498e-06, "loss": 0.6264, "step": 5511 }, { "epoch": 0.46682193521067117, "grad_norm": 1.2827329242956378, "learning_rate": 5.777294852368367e-06, "loss": 0.6654, "step": 5512 }, { "epoch": 0.4669066271437646, "grad_norm": 1.6049187569862517, "learning_rate": 5.775939864060682e-06, "loss": 0.6327, "step": 5513 }, { "epoch": 0.4669913190768579, "grad_norm": 2.0645947287719553, "learning_rate": 5.774584817359415e-06, "loss": 0.5944, "step": 5514 }, { "epoch": 0.4670760110099513, "grad_norm": 0.6174904310321945, "learning_rate": 5.773229712366536e-06, "loss": 0.8427, "step": 5515 }, { "epoch": 0.4671607029430447, "grad_norm": 1.615866773862644, "learning_rate": 5.7718745491840286e-06, "loss": 0.6633, "step": 5516 }, { "epoch": 0.46724539487613803, "grad_norm": 1.3726558143510834, "learning_rate": 5.7705193279138736e-06, "loss": 0.6695, "step": 5517 }, { "epoch": 0.46733008680923144, "grad_norm": 1.2993153271613287, "learning_rate": 5.7691640486580605e-06, "loss": 0.6339, "step": 5518 }, { "epoch": 0.4674147787423248, "grad_norm": 1.2401585793413505, "learning_rate": 5.767808711518577e-06, "loss": 0.6597, "step": 5519 }, { "epoch": 0.4674994706754182, "grad_norm": 2.99096292066825, "learning_rate": 5.7664533165974244e-06, "loss": 0.6538, "step": 5520 }, { "epoch": 0.46758416260851154, "grad_norm": 1.2950291615176814, "learning_rate": 5.765097863996601e-06, "loss": 0.6371, "step": 5521 }, { "epoch": 0.4676688545416049, "grad_norm": 0.5769508869961112, "learning_rate": 5.763742353818111e-06, "loss": 0.9095, "step": 5522 }, { "epoch": 0.4677535464746983, "grad_norm": 1.2402981333061875, "learning_rate": 5.762386786163965e-06, "loss": 0.6576, "step": 5523 }, { "epoch": 0.46783823840779165, "grad_norm": 1.422088816538435, "learning_rate": 5.761031161136176e-06, "loss": 0.6417, "step": 5524 }, { "epoch": 0.46792293034088506, "grad_norm": 1.2675567987046092, "learning_rate": 5.759675478836764e-06, "loss": 0.6313, "step": 5525 }, { "epoch": 0.4680076222739784, "grad_norm": 1.4963466344909484, "learning_rate": 5.758319739367748e-06, "loss": 0.6174, "step": 5526 }, { "epoch": 0.46809231420707176, "grad_norm": 1.1901807162969325, "learning_rate": 5.756963942831156e-06, "loss": 0.6339, "step": 5527 }, { "epoch": 0.46817700614016516, "grad_norm": 0.6543123747707595, "learning_rate": 5.7556080893290185e-06, "loss": 0.8538, "step": 5528 }, { "epoch": 0.4682616980732585, "grad_norm": 1.4789113213012113, "learning_rate": 5.754252178963373e-06, "loss": 0.6814, "step": 5529 }, { "epoch": 0.4683463900063519, "grad_norm": 1.671839912737077, "learning_rate": 5.752896211836257e-06, "loss": 0.5983, "step": 5530 }, { "epoch": 0.46843108193944527, "grad_norm": 0.6060272531478703, "learning_rate": 5.751540188049714e-06, "loss": 0.8354, "step": 5531 }, { "epoch": 0.4685157738725386, "grad_norm": 1.6416930454040337, "learning_rate": 5.750184107705791e-06, "loss": 0.6389, "step": 5532 }, { "epoch": 0.468600465805632, "grad_norm": 1.5195303019659947, "learning_rate": 5.748827970906542e-06, "loss": 0.6095, "step": 5533 }, { "epoch": 0.4686851577387254, "grad_norm": 1.5186086709942872, "learning_rate": 5.747471777754022e-06, "loss": 0.64, "step": 5534 }, { "epoch": 0.4687698496718188, "grad_norm": 1.375723517219002, "learning_rate": 5.746115528350296e-06, "loss": 0.6533, "step": 5535 }, { "epoch": 0.46885454160491213, "grad_norm": 0.7507021134158858, "learning_rate": 5.744759222797422e-06, "loss": 0.8477, "step": 5536 }, { "epoch": 0.4689392335380055, "grad_norm": 1.243568152247186, "learning_rate": 5.743402861197475e-06, "loss": 0.6449, "step": 5537 }, { "epoch": 0.4690239254710989, "grad_norm": 1.3482158499586334, "learning_rate": 5.742046443652525e-06, "loss": 0.6822, "step": 5538 }, { "epoch": 0.46910861740419224, "grad_norm": 1.2310730821346967, "learning_rate": 5.740689970264651e-06, "loss": 0.6063, "step": 5539 }, { "epoch": 0.46919330933728565, "grad_norm": 1.2977707633605124, "learning_rate": 5.739333441135934e-06, "loss": 0.6274, "step": 5540 }, { "epoch": 0.469278001270379, "grad_norm": 1.2942938241717157, "learning_rate": 5.737976856368461e-06, "loss": 0.6663, "step": 5541 }, { "epoch": 0.46936269320347235, "grad_norm": 1.2720904283917214, "learning_rate": 5.736620216064324e-06, "loss": 0.6212, "step": 5542 }, { "epoch": 0.46944738513656575, "grad_norm": 1.6887477794488208, "learning_rate": 5.735263520325614e-06, "loss": 0.6912, "step": 5543 }, { "epoch": 0.4695320770696591, "grad_norm": 1.5521844044495696, "learning_rate": 5.733906769254429e-06, "loss": 0.6194, "step": 5544 }, { "epoch": 0.4696167690027525, "grad_norm": 1.298162528917445, "learning_rate": 5.7325499629528735e-06, "loss": 0.6583, "step": 5545 }, { "epoch": 0.46970146093584586, "grad_norm": 1.3257274815645261, "learning_rate": 5.731193101523056e-06, "loss": 0.6221, "step": 5546 }, { "epoch": 0.4697861528689392, "grad_norm": 1.5031494018908345, "learning_rate": 5.729836185067086e-06, "loss": 0.6336, "step": 5547 }, { "epoch": 0.4698708448020326, "grad_norm": 1.4603584534399403, "learning_rate": 5.728479213687075e-06, "loss": 0.6604, "step": 5548 }, { "epoch": 0.46995553673512597, "grad_norm": 1.5959592392888284, "learning_rate": 5.727122187485149e-06, "loss": 0.6498, "step": 5549 }, { "epoch": 0.4700402286682194, "grad_norm": 1.3523740613131603, "learning_rate": 5.725765106563428e-06, "loss": 0.6614, "step": 5550 }, { "epoch": 0.4701249206013127, "grad_norm": 1.2671238877373314, "learning_rate": 5.724407971024037e-06, "loss": 0.5948, "step": 5551 }, { "epoch": 0.4702096125344061, "grad_norm": 1.2900165087806086, "learning_rate": 5.7230507809691125e-06, "loss": 0.5811, "step": 5552 }, { "epoch": 0.4702943044674995, "grad_norm": 1.1055159028106805, "learning_rate": 5.721693536500787e-06, "loss": 0.6265, "step": 5553 }, { "epoch": 0.47037899640059283, "grad_norm": 3.1872926031575752, "learning_rate": 5.720336237721204e-06, "loss": 0.6417, "step": 5554 }, { "epoch": 0.47046368833368624, "grad_norm": 1.2599268702936872, "learning_rate": 5.718978884732501e-06, "loss": 0.6431, "step": 5555 }, { "epoch": 0.4705483802667796, "grad_norm": 1.3251192900415552, "learning_rate": 5.717621477636831e-06, "loss": 0.6225, "step": 5556 }, { "epoch": 0.47063307219987294, "grad_norm": 1.8248331573133638, "learning_rate": 5.716264016536346e-06, "loss": 0.6592, "step": 5557 }, { "epoch": 0.47071776413296634, "grad_norm": 1.2782896352115276, "learning_rate": 5.7149065015332e-06, "loss": 0.6351, "step": 5558 }, { "epoch": 0.4708024560660597, "grad_norm": 1.4242413957117368, "learning_rate": 5.713548932729554e-06, "loss": 0.6013, "step": 5559 }, { "epoch": 0.4708871479991531, "grad_norm": 1.5431576726853447, "learning_rate": 5.7121913102275725e-06, "loss": 0.6762, "step": 5560 }, { "epoch": 0.47097183993224645, "grad_norm": 1.3532590104191784, "learning_rate": 5.710833634129424e-06, "loss": 0.6587, "step": 5561 }, { "epoch": 0.4710565318653398, "grad_norm": 1.4776009023495114, "learning_rate": 5.709475904537281e-06, "loss": 0.6698, "step": 5562 }, { "epoch": 0.4711412237984332, "grad_norm": 1.1150152156586, "learning_rate": 5.708118121553319e-06, "loss": 0.6144, "step": 5563 }, { "epoch": 0.47122591573152656, "grad_norm": 1.4914103387530135, "learning_rate": 5.706760285279719e-06, "loss": 0.6745, "step": 5564 }, { "epoch": 0.47131060766461996, "grad_norm": 3.0137647465457746, "learning_rate": 5.705402395818663e-06, "loss": 0.6019, "step": 5565 }, { "epoch": 0.4713952995977133, "grad_norm": 2.7630909157864023, "learning_rate": 5.704044453272342e-06, "loss": 0.6075, "step": 5566 }, { "epoch": 0.47147999153080666, "grad_norm": 1.4819542806397537, "learning_rate": 5.7026864577429495e-06, "loss": 0.587, "step": 5567 }, { "epoch": 0.47156468346390007, "grad_norm": 1.3536891715473633, "learning_rate": 5.701328409332681e-06, "loss": 0.6245, "step": 5568 }, { "epoch": 0.4716493753969934, "grad_norm": 1.7366128479657792, "learning_rate": 5.699970308143732e-06, "loss": 0.5987, "step": 5569 }, { "epoch": 0.4717340673300868, "grad_norm": 1.3890070253578775, "learning_rate": 5.698612154278314e-06, "loss": 0.6549, "step": 5570 }, { "epoch": 0.4718187592631802, "grad_norm": 1.1218684775202015, "learning_rate": 5.697253947838632e-06, "loss": 0.6419, "step": 5571 }, { "epoch": 0.4719034511962736, "grad_norm": 1.9010001692644352, "learning_rate": 5.695895688926896e-06, "loss": 0.6205, "step": 5572 }, { "epoch": 0.47198814312936693, "grad_norm": 1.1896598683883355, "learning_rate": 5.694537377645325e-06, "loss": 0.609, "step": 5573 }, { "epoch": 0.4720728350624603, "grad_norm": 1.2599100936911243, "learning_rate": 5.693179014096141e-06, "loss": 0.6702, "step": 5574 }, { "epoch": 0.4721575269955537, "grad_norm": 2.096404828878796, "learning_rate": 5.6918205983815645e-06, "loss": 0.6355, "step": 5575 }, { "epoch": 0.47224221892864704, "grad_norm": 1.3866110889082575, "learning_rate": 5.690462130603823e-06, "loss": 0.6552, "step": 5576 }, { "epoch": 0.47232691086174045, "grad_norm": 1.2533782098941277, "learning_rate": 5.689103610865151e-06, "loss": 0.6589, "step": 5577 }, { "epoch": 0.4724116027948338, "grad_norm": 0.6520647765703412, "learning_rate": 5.687745039267785e-06, "loss": 0.9007, "step": 5578 }, { "epoch": 0.47249629472792715, "grad_norm": 1.2192788748164525, "learning_rate": 5.68638641591396e-06, "loss": 0.6261, "step": 5579 }, { "epoch": 0.47258098666102055, "grad_norm": 1.5638479621502392, "learning_rate": 5.6850277409059255e-06, "loss": 0.659, "step": 5580 }, { "epoch": 0.4726656785941139, "grad_norm": 1.3234177077659717, "learning_rate": 5.683669014345924e-06, "loss": 0.6303, "step": 5581 }, { "epoch": 0.4727503705272073, "grad_norm": 0.7617107941520745, "learning_rate": 5.68231023633621e-06, "loss": 0.8147, "step": 5582 }, { "epoch": 0.47283506246030066, "grad_norm": 1.2829593885125758, "learning_rate": 5.6809514069790375e-06, "loss": 0.6877, "step": 5583 }, { "epoch": 0.472919754393394, "grad_norm": 1.252358430512864, "learning_rate": 5.679592526376666e-06, "loss": 0.6303, "step": 5584 }, { "epoch": 0.4730044463264874, "grad_norm": 0.6194102826184203, "learning_rate": 5.678233594631357e-06, "loss": 0.8765, "step": 5585 }, { "epoch": 0.47308913825958077, "grad_norm": 2.0406503748497844, "learning_rate": 5.67687461184538e-06, "loss": 0.6329, "step": 5586 }, { "epoch": 0.47317383019267417, "grad_norm": 1.2477055627811353, "learning_rate": 5.675515578121003e-06, "loss": 0.6506, "step": 5587 }, { "epoch": 0.4732585221257675, "grad_norm": 1.414578825142549, "learning_rate": 5.674156493560504e-06, "loss": 0.6919, "step": 5588 }, { "epoch": 0.4733432140588609, "grad_norm": 0.6573689311071597, "learning_rate": 5.6727973582661565e-06, "loss": 0.8813, "step": 5589 }, { "epoch": 0.4734279059919543, "grad_norm": 1.5271233934679886, "learning_rate": 5.671438172340247e-06, "loss": 0.6558, "step": 5590 }, { "epoch": 0.47351259792504763, "grad_norm": 1.3239032559278305, "learning_rate": 5.6700789358850584e-06, "loss": 0.6431, "step": 5591 }, { "epoch": 0.47359728985814104, "grad_norm": 1.4379451332202657, "learning_rate": 5.6687196490028825e-06, "loss": 0.6391, "step": 5592 }, { "epoch": 0.4736819817912344, "grad_norm": 1.600321343030803, "learning_rate": 5.667360311796012e-06, "loss": 0.6697, "step": 5593 }, { "epoch": 0.47376667372432774, "grad_norm": 1.5845147001794984, "learning_rate": 5.666000924366742e-06, "loss": 0.6884, "step": 5594 }, { "epoch": 0.47385136565742114, "grad_norm": 1.6298960525252493, "learning_rate": 5.664641486817379e-06, "loss": 0.639, "step": 5595 }, { "epoch": 0.4739360575905145, "grad_norm": 1.1798374921464223, "learning_rate": 5.663281999250224e-06, "loss": 0.6344, "step": 5596 }, { "epoch": 0.4740207495236079, "grad_norm": 2.1709895727211377, "learning_rate": 5.661922461767584e-06, "loss": 0.6083, "step": 5597 }, { "epoch": 0.47410544145670125, "grad_norm": 1.4888446095540144, "learning_rate": 5.660562874471776e-06, "loss": 0.6093, "step": 5598 }, { "epoch": 0.4741901333897946, "grad_norm": 1.1343206277440512, "learning_rate": 5.659203237465113e-06, "loss": 0.5994, "step": 5599 }, { "epoch": 0.474274825322888, "grad_norm": 1.5263235978111425, "learning_rate": 5.6578435508499155e-06, "loss": 0.6217, "step": 5600 }, { "epoch": 0.47435951725598136, "grad_norm": 1.5981722386833033, "learning_rate": 5.656483814728508e-06, "loss": 0.668, "step": 5601 }, { "epoch": 0.47444420918907476, "grad_norm": 1.805828575502784, "learning_rate": 5.655124029203216e-06, "loss": 0.6412, "step": 5602 }, { "epoch": 0.4745289011221681, "grad_norm": 1.7142644923065904, "learning_rate": 5.653764194376374e-06, "loss": 0.6154, "step": 5603 }, { "epoch": 0.47461359305526146, "grad_norm": 1.3096327484938237, "learning_rate": 5.6524043103503125e-06, "loss": 0.6155, "step": 5604 }, { "epoch": 0.47469828498835487, "grad_norm": 1.2313013601984748, "learning_rate": 5.6510443772273726e-06, "loss": 0.6521, "step": 5605 }, { "epoch": 0.4747829769214482, "grad_norm": 1.3687085783809394, "learning_rate": 5.6496843951098955e-06, "loss": 0.6157, "step": 5606 }, { "epoch": 0.4748676688545416, "grad_norm": 1.4070531630633236, "learning_rate": 5.648324364100228e-06, "loss": 0.6422, "step": 5607 }, { "epoch": 0.474952360787635, "grad_norm": 1.4203326525456135, "learning_rate": 5.64696428430072e-06, "loss": 0.6601, "step": 5608 }, { "epoch": 0.4750370527207283, "grad_norm": 1.2163825110560098, "learning_rate": 5.645604155813723e-06, "loss": 0.6118, "step": 5609 }, { "epoch": 0.47512174465382173, "grad_norm": 1.3808867562550569, "learning_rate": 5.644243978741594e-06, "loss": 0.7011, "step": 5610 }, { "epoch": 0.4752064365869151, "grad_norm": 1.611483671075291, "learning_rate": 5.642883753186693e-06, "loss": 0.6159, "step": 5611 }, { "epoch": 0.4752911285200085, "grad_norm": 0.7115423682412702, "learning_rate": 5.641523479251389e-06, "loss": 0.8455, "step": 5612 }, { "epoch": 0.47537582045310184, "grad_norm": 2.183083702101059, "learning_rate": 5.6401631570380435e-06, "loss": 0.6621, "step": 5613 }, { "epoch": 0.4754605123861952, "grad_norm": 1.343699381377699, "learning_rate": 5.6388027866490295e-06, "loss": 0.634, "step": 5614 }, { "epoch": 0.4755452043192886, "grad_norm": 1.4138313610170827, "learning_rate": 5.637442368186725e-06, "loss": 0.669, "step": 5615 }, { "epoch": 0.47562989625238195, "grad_norm": 2.114181588468777, "learning_rate": 5.636081901753507e-06, "loss": 0.6069, "step": 5616 }, { "epoch": 0.47571458818547535, "grad_norm": 1.2531649809437164, "learning_rate": 5.6347213874517585e-06, "loss": 0.6373, "step": 5617 }, { "epoch": 0.4757992801185687, "grad_norm": 1.0881072094483413, "learning_rate": 5.6333608253838624e-06, "loss": 0.6285, "step": 5618 }, { "epoch": 0.47588397205166205, "grad_norm": 5.7063787608977075, "learning_rate": 5.632000215652211e-06, "loss": 0.622, "step": 5619 }, { "epoch": 0.47596866398475546, "grad_norm": 1.2776852228115188, "learning_rate": 5.630639558359199e-06, "loss": 0.6172, "step": 5620 }, { "epoch": 0.4760533559178488, "grad_norm": 1.6519562270867518, "learning_rate": 5.629278853607218e-06, "loss": 0.6314, "step": 5621 }, { "epoch": 0.4761380478509422, "grad_norm": 1.1783872609857862, "learning_rate": 5.6279181014986714e-06, "loss": 0.5752, "step": 5622 }, { "epoch": 0.47622273978403556, "grad_norm": 2.223627286086327, "learning_rate": 5.626557302135964e-06, "loss": 0.6393, "step": 5623 }, { "epoch": 0.47630743171712897, "grad_norm": 1.3348420862596, "learning_rate": 5.625196455621502e-06, "loss": 0.6139, "step": 5624 }, { "epoch": 0.4763921236502223, "grad_norm": 1.4405802302370765, "learning_rate": 5.623835562057694e-06, "loss": 0.6313, "step": 5625 }, { "epoch": 0.47647681558331567, "grad_norm": 2.122285285882246, "learning_rate": 5.622474621546958e-06, "loss": 0.6295, "step": 5626 }, { "epoch": 0.4765615075164091, "grad_norm": 1.5292074145848011, "learning_rate": 5.621113634191712e-06, "loss": 0.5963, "step": 5627 }, { "epoch": 0.47664619944950243, "grad_norm": 1.1940358851905477, "learning_rate": 5.619752600094374e-06, "loss": 0.595, "step": 5628 }, { "epoch": 0.47673089138259583, "grad_norm": 1.700062068023897, "learning_rate": 5.618391519357371e-06, "loss": 0.599, "step": 5629 }, { "epoch": 0.4768155833156892, "grad_norm": 1.327751855260235, "learning_rate": 5.617030392083133e-06, "loss": 0.6493, "step": 5630 }, { "epoch": 0.47690027524878253, "grad_norm": 1.6680406255895674, "learning_rate": 5.61566921837409e-06, "loss": 0.6668, "step": 5631 }, { "epoch": 0.47698496718187594, "grad_norm": 1.3577557822739585, "learning_rate": 5.6143079983326775e-06, "loss": 0.5585, "step": 5632 }, { "epoch": 0.4770696591149693, "grad_norm": 1.2295602966908938, "learning_rate": 5.612946732061336e-06, "loss": 0.6535, "step": 5633 }, { "epoch": 0.4771543510480627, "grad_norm": 1.52465477730005, "learning_rate": 5.611585419662509e-06, "loss": 0.6329, "step": 5634 }, { "epoch": 0.47723904298115605, "grad_norm": 0.6415764299859944, "learning_rate": 5.610224061238636e-06, "loss": 0.8246, "step": 5635 }, { "epoch": 0.4773237349142494, "grad_norm": 1.1948374487400095, "learning_rate": 5.6088626568921746e-06, "loss": 0.6493, "step": 5636 }, { "epoch": 0.4774084268473428, "grad_norm": 1.2968153663362318, "learning_rate": 5.607501206725574e-06, "loss": 0.6887, "step": 5637 }, { "epoch": 0.47749311878043615, "grad_norm": 1.3429196785814874, "learning_rate": 5.60613971084129e-06, "loss": 0.6496, "step": 5638 }, { "epoch": 0.47757781071352956, "grad_norm": 0.6320504079447158, "learning_rate": 5.604778169341782e-06, "loss": 0.8311, "step": 5639 }, { "epoch": 0.4776625026466229, "grad_norm": 1.739000302442373, "learning_rate": 5.603416582329518e-06, "loss": 0.6303, "step": 5640 }, { "epoch": 0.47774719457971626, "grad_norm": 1.559480436896962, "learning_rate": 5.602054949906958e-06, "loss": 0.6572, "step": 5641 }, { "epoch": 0.47783188651280967, "grad_norm": 1.18191536133601, "learning_rate": 5.600693272176575e-06, "loss": 0.6413, "step": 5642 }, { "epoch": 0.477916578445903, "grad_norm": 1.5180880011956672, "learning_rate": 5.599331549240843e-06, "loss": 0.6437, "step": 5643 }, { "epoch": 0.4780012703789964, "grad_norm": 1.533587818095869, "learning_rate": 5.597969781202238e-06, "loss": 0.6185, "step": 5644 }, { "epoch": 0.4780859623120898, "grad_norm": 1.529465562456088, "learning_rate": 5.596607968163241e-06, "loss": 0.6077, "step": 5645 }, { "epoch": 0.4781706542451831, "grad_norm": 1.291046349419822, "learning_rate": 5.595246110226336e-06, "loss": 0.6564, "step": 5646 }, { "epoch": 0.47825534617827653, "grad_norm": 1.718169838015895, "learning_rate": 5.593884207494007e-06, "loss": 0.6655, "step": 5647 }, { "epoch": 0.4783400381113699, "grad_norm": 1.5138139984248657, "learning_rate": 5.592522260068749e-06, "loss": 0.6466, "step": 5648 }, { "epoch": 0.4784247300444633, "grad_norm": 1.2071488946489648, "learning_rate": 5.591160268053051e-06, "loss": 0.6417, "step": 5649 }, { "epoch": 0.47850942197755664, "grad_norm": 3.9651182473110884, "learning_rate": 5.589798231549415e-06, "loss": 0.6322, "step": 5650 }, { "epoch": 0.47859411391065, "grad_norm": 1.4428421037564565, "learning_rate": 5.5884361506603365e-06, "loss": 0.6799, "step": 5651 }, { "epoch": 0.4786788058437434, "grad_norm": 0.6519731552550347, "learning_rate": 5.587074025488324e-06, "loss": 0.8083, "step": 5652 }, { "epoch": 0.47876349777683674, "grad_norm": 1.567220382410684, "learning_rate": 5.5857118561358806e-06, "loss": 0.6765, "step": 5653 }, { "epoch": 0.47884818970993015, "grad_norm": 1.2754883248002715, "learning_rate": 5.58434964270552e-06, "loss": 0.6642, "step": 5654 }, { "epoch": 0.4789328816430235, "grad_norm": 1.6712602244182682, "learning_rate": 5.582987385299753e-06, "loss": 0.593, "step": 5655 }, { "epoch": 0.47901757357611685, "grad_norm": 1.787781658785624, "learning_rate": 5.581625084021099e-06, "loss": 0.6396, "step": 5656 }, { "epoch": 0.47910226550921026, "grad_norm": 1.1776426485905687, "learning_rate": 5.580262738972078e-06, "loss": 0.644, "step": 5657 }, { "epoch": 0.4791869574423036, "grad_norm": 1.2828463121418041, "learning_rate": 5.578900350255213e-06, "loss": 0.6352, "step": 5658 }, { "epoch": 0.479271649375397, "grad_norm": 1.8772034544707241, "learning_rate": 5.5775379179730305e-06, "loss": 0.6422, "step": 5659 }, { "epoch": 0.47935634130849036, "grad_norm": 1.3194885681699877, "learning_rate": 5.576175442228061e-06, "loss": 0.6453, "step": 5660 }, { "epoch": 0.4794410332415837, "grad_norm": 1.6085707389386936, "learning_rate": 5.574812923122841e-06, "loss": 0.6295, "step": 5661 }, { "epoch": 0.4795257251746771, "grad_norm": 1.997361612410173, "learning_rate": 5.573450360759903e-06, "loss": 0.6648, "step": 5662 }, { "epoch": 0.47961041710777047, "grad_norm": 1.3033859869777216, "learning_rate": 5.572087755241787e-06, "loss": 0.6598, "step": 5663 }, { "epoch": 0.4796951090408639, "grad_norm": 2.333611963428307, "learning_rate": 5.570725106671041e-06, "loss": 0.5622, "step": 5664 }, { "epoch": 0.4797798009739572, "grad_norm": 1.3099522148341964, "learning_rate": 5.569362415150209e-06, "loss": 0.6293, "step": 5665 }, { "epoch": 0.4798644929070506, "grad_norm": 0.6430404569325905, "learning_rate": 5.567999680781838e-06, "loss": 0.8556, "step": 5666 }, { "epoch": 0.479949184840144, "grad_norm": 1.529902896333383, "learning_rate": 5.566636903668484e-06, "loss": 0.5865, "step": 5667 }, { "epoch": 0.48003387677323733, "grad_norm": 0.6247755890627005, "learning_rate": 5.565274083912704e-06, "loss": 0.8702, "step": 5668 }, { "epoch": 0.48011856870633074, "grad_norm": 1.3788444841530365, "learning_rate": 5.563911221617054e-06, "loss": 0.6626, "step": 5669 }, { "epoch": 0.4802032606394241, "grad_norm": 1.1695950936278379, "learning_rate": 5.5625483168841e-06, "loss": 0.648, "step": 5670 }, { "epoch": 0.48028795257251744, "grad_norm": 1.3331495720226125, "learning_rate": 5.561185369816405e-06, "loss": 0.7123, "step": 5671 }, { "epoch": 0.48037264450561085, "grad_norm": 1.5454901583206624, "learning_rate": 5.559822380516539e-06, "loss": 0.6399, "step": 5672 }, { "epoch": 0.4804573364387042, "grad_norm": 2.7964499730267405, "learning_rate": 5.558459349087075e-06, "loss": 0.6597, "step": 5673 }, { "epoch": 0.4805420283717976, "grad_norm": 1.354450946839589, "learning_rate": 5.557096275630589e-06, "loss": 0.6296, "step": 5674 }, { "epoch": 0.48062672030489095, "grad_norm": 1.262714387058089, "learning_rate": 5.555733160249659e-06, "loss": 0.5959, "step": 5675 }, { "epoch": 0.48071141223798436, "grad_norm": 1.417750997553949, "learning_rate": 5.554370003046864e-06, "loss": 0.6401, "step": 5676 }, { "epoch": 0.4807961041710777, "grad_norm": 1.374502782867776, "learning_rate": 5.55300680412479e-06, "loss": 0.6398, "step": 5677 }, { "epoch": 0.48088079610417106, "grad_norm": 1.3686061464999126, "learning_rate": 5.5516435635860274e-06, "loss": 0.6873, "step": 5678 }, { "epoch": 0.48096548803726447, "grad_norm": 1.3397554989795342, "learning_rate": 5.550280281533166e-06, "loss": 0.6157, "step": 5679 }, { "epoch": 0.4810501799703578, "grad_norm": 1.2340033605998422, "learning_rate": 5.548916958068796e-06, "loss": 0.6205, "step": 5680 }, { "epoch": 0.4811348719034512, "grad_norm": 1.660259126366518, "learning_rate": 5.547553593295522e-06, "loss": 0.6324, "step": 5681 }, { "epoch": 0.4812195638365446, "grad_norm": 1.2652664853472417, "learning_rate": 5.5461901873159395e-06, "loss": 0.646, "step": 5682 }, { "epoch": 0.4813042557696379, "grad_norm": 1.2011299783946958, "learning_rate": 5.544826740232653e-06, "loss": 0.6469, "step": 5683 }, { "epoch": 0.48138894770273133, "grad_norm": 1.1802838999184242, "learning_rate": 5.543463252148269e-06, "loss": 0.714, "step": 5684 }, { "epoch": 0.4814736396358247, "grad_norm": 1.7108265904390174, "learning_rate": 5.542099723165398e-06, "loss": 0.622, "step": 5685 }, { "epoch": 0.4815583315689181, "grad_norm": 1.3052888432645595, "learning_rate": 5.540736153386653e-06, "loss": 0.6732, "step": 5686 }, { "epoch": 0.48164302350201144, "grad_norm": 1.4034105257979823, "learning_rate": 5.539372542914649e-06, "loss": 0.6105, "step": 5687 }, { "epoch": 0.4817277154351048, "grad_norm": 1.4538242655499747, "learning_rate": 5.538008891852003e-06, "loss": 0.6491, "step": 5688 }, { "epoch": 0.4818124073681982, "grad_norm": 1.1782115837706542, "learning_rate": 5.5366452003013406e-06, "loss": 0.6153, "step": 5689 }, { "epoch": 0.48189709930129154, "grad_norm": 1.7261857145355601, "learning_rate": 5.535281468365286e-06, "loss": 0.6048, "step": 5690 }, { "epoch": 0.48198179123438495, "grad_norm": 1.527671244103433, "learning_rate": 5.533917696146465e-06, "loss": 0.6317, "step": 5691 }, { "epoch": 0.4820664831674783, "grad_norm": 1.206348907400206, "learning_rate": 5.53255388374751e-06, "loss": 0.6673, "step": 5692 }, { "epoch": 0.48215117510057165, "grad_norm": 1.1761031568583262, "learning_rate": 5.531190031271056e-06, "loss": 0.6573, "step": 5693 }, { "epoch": 0.48223586703366506, "grad_norm": 1.1981935999611733, "learning_rate": 5.5298261388197396e-06, "loss": 0.6524, "step": 5694 }, { "epoch": 0.4823205589667584, "grad_norm": 1.2621659994315777, "learning_rate": 5.5284622064962e-06, "loss": 0.6466, "step": 5695 }, { "epoch": 0.4824052508998518, "grad_norm": 1.1700017137982586, "learning_rate": 5.527098234403081e-06, "loss": 0.6719, "step": 5696 }, { "epoch": 0.48248994283294516, "grad_norm": 1.3359956592569553, "learning_rate": 5.52573422264303e-06, "loss": 0.6879, "step": 5697 }, { "epoch": 0.4825746347660385, "grad_norm": 1.5234615631268242, "learning_rate": 5.524370171318692e-06, "loss": 0.6468, "step": 5698 }, { "epoch": 0.4826593266991319, "grad_norm": 1.476239434333283, "learning_rate": 5.523006080532726e-06, "loss": 0.6179, "step": 5699 }, { "epoch": 0.48274401863222527, "grad_norm": 0.6111193868296598, "learning_rate": 5.521641950387779e-06, "loss": 0.862, "step": 5700 }, { "epoch": 0.4828287105653187, "grad_norm": 1.304090433895319, "learning_rate": 5.520277780986515e-06, "loss": 0.609, "step": 5701 }, { "epoch": 0.482913402498412, "grad_norm": 1.253371020644971, "learning_rate": 5.518913572431593e-06, "loss": 0.5923, "step": 5702 }, { "epoch": 0.4829980944315054, "grad_norm": 1.5531792047227977, "learning_rate": 5.5175493248256774e-06, "loss": 0.6347, "step": 5703 }, { "epoch": 0.4830827863645988, "grad_norm": 1.271602212259912, "learning_rate": 5.516185038271433e-06, "loss": 0.68, "step": 5704 }, { "epoch": 0.48316747829769213, "grad_norm": 1.2492750634011012, "learning_rate": 5.5148207128715315e-06, "loss": 0.6701, "step": 5705 }, { "epoch": 0.48325217023078554, "grad_norm": 1.339329074073419, "learning_rate": 5.513456348728646e-06, "loss": 0.6474, "step": 5706 }, { "epoch": 0.4833368621638789, "grad_norm": 0.6372924076311324, "learning_rate": 5.512091945945452e-06, "loss": 0.8602, "step": 5707 }, { "epoch": 0.48342155409697224, "grad_norm": 1.2895385699247373, "learning_rate": 5.510727504624627e-06, "loss": 0.6666, "step": 5708 }, { "epoch": 0.48350624603006565, "grad_norm": 1.276327266271513, "learning_rate": 5.5093630248688515e-06, "loss": 0.6465, "step": 5709 }, { "epoch": 0.483590937963159, "grad_norm": 1.4114315969889508, "learning_rate": 5.507998506780813e-06, "loss": 0.669, "step": 5710 }, { "epoch": 0.4836756298962524, "grad_norm": 0.6297652185855503, "learning_rate": 5.5066339504631945e-06, "loss": 0.835, "step": 5711 }, { "epoch": 0.48376032182934575, "grad_norm": 1.2656806210355038, "learning_rate": 5.505269356018691e-06, "loss": 0.6828, "step": 5712 }, { "epoch": 0.4838450137624391, "grad_norm": 2.116057194940238, "learning_rate": 5.503904723549991e-06, "loss": 0.6479, "step": 5713 }, { "epoch": 0.4839297056955325, "grad_norm": 1.3007530672079572, "learning_rate": 5.502540053159794e-06, "loss": 0.5854, "step": 5714 }, { "epoch": 0.48401439762862586, "grad_norm": 2.2323768620232207, "learning_rate": 5.501175344950796e-06, "loss": 0.645, "step": 5715 }, { "epoch": 0.48409908956171926, "grad_norm": 2.124786749882554, "learning_rate": 5.499810599025699e-06, "loss": 0.6505, "step": 5716 }, { "epoch": 0.4841837814948126, "grad_norm": 1.5149583722031326, "learning_rate": 5.498445815487208e-06, "loss": 0.6748, "step": 5717 }, { "epoch": 0.48426847342790597, "grad_norm": 0.6672986082555464, "learning_rate": 5.497080994438031e-06, "loss": 0.8362, "step": 5718 }, { "epoch": 0.48435316536099937, "grad_norm": 1.1984791732878746, "learning_rate": 5.4957161359808755e-06, "loss": 0.6414, "step": 5719 }, { "epoch": 0.4844378572940927, "grad_norm": 0.5599629275881208, "learning_rate": 5.494351240218457e-06, "loss": 0.8841, "step": 5720 }, { "epoch": 0.48452254922718613, "grad_norm": 1.3355549431799394, "learning_rate": 5.492986307253489e-06, "loss": 0.6214, "step": 5721 }, { "epoch": 0.4846072411602795, "grad_norm": 0.5943493773757019, "learning_rate": 5.491621337188691e-06, "loss": 0.8115, "step": 5722 }, { "epoch": 0.48469193309337283, "grad_norm": 1.4259342541191429, "learning_rate": 5.490256330126785e-06, "loss": 0.6428, "step": 5723 }, { "epoch": 0.48477662502646623, "grad_norm": 1.1225141451432452, "learning_rate": 5.488891286170494e-06, "loss": 0.5953, "step": 5724 }, { "epoch": 0.4848613169595596, "grad_norm": 1.207617692783979, "learning_rate": 5.4875262054225446e-06, "loss": 0.6238, "step": 5725 }, { "epoch": 0.484946008892653, "grad_norm": 1.3041225930377456, "learning_rate": 5.486161087985666e-06, "loss": 0.6553, "step": 5726 }, { "epoch": 0.48503070082574634, "grad_norm": 1.3353762722501943, "learning_rate": 5.484795933962592e-06, "loss": 0.6272, "step": 5727 }, { "epoch": 0.48511539275883975, "grad_norm": 1.4799928579591404, "learning_rate": 5.483430743456057e-06, "loss": 0.6572, "step": 5728 }, { "epoch": 0.4852000846919331, "grad_norm": 1.3264527351818478, "learning_rate": 5.482065516568797e-06, "loss": 0.687, "step": 5729 }, { "epoch": 0.48528477662502645, "grad_norm": 1.9009238597811535, "learning_rate": 5.480700253403557e-06, "loss": 0.6564, "step": 5730 }, { "epoch": 0.48536946855811985, "grad_norm": 1.7701864039092399, "learning_rate": 5.479334954063076e-06, "loss": 0.6255, "step": 5731 }, { "epoch": 0.4854541604912132, "grad_norm": 1.458240603325634, "learning_rate": 5.477969618650101e-06, "loss": 0.6586, "step": 5732 }, { "epoch": 0.4855388524243066, "grad_norm": 1.361183695562051, "learning_rate": 5.476604247267382e-06, "loss": 0.6133, "step": 5733 }, { "epoch": 0.48562354435739996, "grad_norm": 1.645877471529212, "learning_rate": 5.475238840017669e-06, "loss": 0.6175, "step": 5734 }, { "epoch": 0.4857082362904933, "grad_norm": 0.6774966394061529, "learning_rate": 5.473873397003719e-06, "loss": 0.8676, "step": 5735 }, { "epoch": 0.4857929282235867, "grad_norm": 1.3692680943869713, "learning_rate": 5.4725079183282835e-06, "loss": 0.6286, "step": 5736 }, { "epoch": 0.48587762015668007, "grad_norm": 2.3570183077800837, "learning_rate": 5.4711424040941275e-06, "loss": 0.6215, "step": 5737 }, { "epoch": 0.4859623120897735, "grad_norm": 1.2790383823970843, "learning_rate": 5.469776854404008e-06, "loss": 0.6614, "step": 5738 }, { "epoch": 0.4860470040228668, "grad_norm": 1.5590827465429078, "learning_rate": 5.468411269360695e-06, "loss": 0.6692, "step": 5739 }, { "epoch": 0.4861316959559602, "grad_norm": 1.552313246979478, "learning_rate": 5.467045649066953e-06, "loss": 0.6321, "step": 5740 }, { "epoch": 0.4862163878890536, "grad_norm": 1.307333345894534, "learning_rate": 5.465679993625553e-06, "loss": 0.5875, "step": 5741 }, { "epoch": 0.48630107982214693, "grad_norm": 1.2970635532842576, "learning_rate": 5.464314303139268e-06, "loss": 0.596, "step": 5742 }, { "epoch": 0.48638577175524034, "grad_norm": 1.3536125370541345, "learning_rate": 5.462948577710872e-06, "loss": 0.6823, "step": 5743 }, { "epoch": 0.4864704636883337, "grad_norm": 1.185608131771975, "learning_rate": 5.461582817443144e-06, "loss": 0.6548, "step": 5744 }, { "epoch": 0.48655515562142704, "grad_norm": 1.2660395726052938, "learning_rate": 5.460217022438866e-06, "loss": 0.602, "step": 5745 }, { "epoch": 0.48663984755452044, "grad_norm": 1.3756193159734849, "learning_rate": 5.458851192800818e-06, "loss": 0.6664, "step": 5746 }, { "epoch": 0.4867245394876138, "grad_norm": 1.3830035778267726, "learning_rate": 5.45748532863179e-06, "loss": 0.6059, "step": 5747 }, { "epoch": 0.4868092314207072, "grad_norm": 1.2073805920543645, "learning_rate": 5.456119430034569e-06, "loss": 0.594, "step": 5748 }, { "epoch": 0.48689392335380055, "grad_norm": 2.8119459397193225, "learning_rate": 5.454753497111943e-06, "loss": 0.6501, "step": 5749 }, { "epoch": 0.4869786152868939, "grad_norm": 1.2970566231687402, "learning_rate": 5.4533875299667095e-06, "loss": 0.6696, "step": 5750 }, { "epoch": 0.4870633072199873, "grad_norm": 1.3286950441492789, "learning_rate": 5.452021528701664e-06, "loss": 0.6229, "step": 5751 }, { "epoch": 0.48714799915308066, "grad_norm": 1.3586276678957743, "learning_rate": 5.450655493419605e-06, "loss": 0.6332, "step": 5752 }, { "epoch": 0.48723269108617406, "grad_norm": 1.5099572443221891, "learning_rate": 5.449289424223334e-06, "loss": 0.6282, "step": 5753 }, { "epoch": 0.4873173830192674, "grad_norm": 1.380682023837722, "learning_rate": 5.447923321215653e-06, "loss": 0.6111, "step": 5754 }, { "epoch": 0.48740207495236076, "grad_norm": 0.6613068524554779, "learning_rate": 5.446557184499373e-06, "loss": 0.9084, "step": 5755 }, { "epoch": 0.48748676688545417, "grad_norm": 1.5902912850102329, "learning_rate": 5.445191014177299e-06, "loss": 0.5835, "step": 5756 }, { "epoch": 0.4875714588185475, "grad_norm": 2.488000584777272, "learning_rate": 5.443824810352243e-06, "loss": 0.6517, "step": 5757 }, { "epoch": 0.4876561507516409, "grad_norm": 1.1845675281478079, "learning_rate": 5.44245857312702e-06, "loss": 0.5843, "step": 5758 }, { "epoch": 0.4877408426847343, "grad_norm": 1.2852259045132195, "learning_rate": 5.4410923026044475e-06, "loss": 0.6071, "step": 5759 }, { "epoch": 0.4878255346178276, "grad_norm": 1.3955462824561045, "learning_rate": 5.439725998887342e-06, "loss": 0.6592, "step": 5760 }, { "epoch": 0.48791022655092103, "grad_norm": 1.2619738023848075, "learning_rate": 5.438359662078528e-06, "loss": 0.6315, "step": 5761 }, { "epoch": 0.4879949184840144, "grad_norm": 1.4704353836062674, "learning_rate": 5.4369932922808274e-06, "loss": 0.6717, "step": 5762 }, { "epoch": 0.4880796104171078, "grad_norm": 1.3221654474887319, "learning_rate": 5.435626889597069e-06, "loss": 0.618, "step": 5763 }, { "epoch": 0.48816430235020114, "grad_norm": 1.9975290851715772, "learning_rate": 5.434260454130078e-06, "loss": 0.6245, "step": 5764 }, { "epoch": 0.4882489942832945, "grad_norm": 2.852314016944463, "learning_rate": 5.43289398598269e-06, "loss": 0.6294, "step": 5765 }, { "epoch": 0.4883336862163879, "grad_norm": 2.673457689668074, "learning_rate": 5.431527485257737e-06, "loss": 0.6014, "step": 5766 }, { "epoch": 0.48841837814948125, "grad_norm": 1.6360739356054919, "learning_rate": 5.430160952058055e-06, "loss": 0.6878, "step": 5767 }, { "epoch": 0.48850307008257465, "grad_norm": 1.2737241956824885, "learning_rate": 5.428794386486484e-06, "loss": 0.6448, "step": 5768 }, { "epoch": 0.488587762015668, "grad_norm": 1.1497035721740645, "learning_rate": 5.427427788645866e-06, "loss": 0.6699, "step": 5769 }, { "epoch": 0.48867245394876135, "grad_norm": 5.189519941216815, "learning_rate": 5.426061158639042e-06, "loss": 0.5965, "step": 5770 }, { "epoch": 0.48875714588185476, "grad_norm": 1.7016957531098615, "learning_rate": 5.42469449656886e-06, "loss": 0.6434, "step": 5771 }, { "epoch": 0.4888418378149481, "grad_norm": 1.338157838986529, "learning_rate": 5.423327802538168e-06, "loss": 0.6746, "step": 5772 }, { "epoch": 0.4889265297480415, "grad_norm": 1.3310782831648174, "learning_rate": 5.421961076649819e-06, "loss": 0.6374, "step": 5773 }, { "epoch": 0.48901122168113487, "grad_norm": 1.63129537412397, "learning_rate": 5.420594319006662e-06, "loss": 0.6343, "step": 5774 }, { "epoch": 0.4890959136142282, "grad_norm": 2.044513538845657, "learning_rate": 5.419227529711557e-06, "loss": 0.6152, "step": 5775 }, { "epoch": 0.4891806055473216, "grad_norm": 0.658417878100242, "learning_rate": 5.4178607088673605e-06, "loss": 0.877, "step": 5776 }, { "epoch": 0.489265297480415, "grad_norm": 1.2744395043687726, "learning_rate": 5.416493856576932e-06, "loss": 0.6293, "step": 5777 }, { "epoch": 0.4893499894135084, "grad_norm": 1.2503758983917381, "learning_rate": 5.4151269729431365e-06, "loss": 0.6657, "step": 5778 }, { "epoch": 0.48943468134660173, "grad_norm": 1.2354693518594502, "learning_rate": 5.413760058068836e-06, "loss": 0.6515, "step": 5779 }, { "epoch": 0.48951937327969514, "grad_norm": 1.440544137711514, "learning_rate": 5.4123931120569025e-06, "loss": 0.6786, "step": 5780 }, { "epoch": 0.4896040652127885, "grad_norm": 1.4675965114420972, "learning_rate": 5.411026135010203e-06, "loss": 0.6903, "step": 5781 }, { "epoch": 0.48968875714588184, "grad_norm": 1.6431310089488202, "learning_rate": 5.40965912703161e-06, "loss": 0.6548, "step": 5782 }, { "epoch": 0.48977344907897524, "grad_norm": 0.6604262085608099, "learning_rate": 5.408292088223998e-06, "loss": 0.8178, "step": 5783 }, { "epoch": 0.4898581410120686, "grad_norm": 1.2138755467948734, "learning_rate": 5.406925018690247e-06, "loss": 0.652, "step": 5784 }, { "epoch": 0.489942832945162, "grad_norm": 1.388049684654913, "learning_rate": 5.40555791853323e-06, "loss": 0.6452, "step": 5785 }, { "epoch": 0.49002752487825535, "grad_norm": 2.1861278442605654, "learning_rate": 5.404190787855834e-06, "loss": 0.6957, "step": 5786 }, { "epoch": 0.4901122168113487, "grad_norm": 0.66199650018159, "learning_rate": 5.40282362676094e-06, "loss": 0.829, "step": 5787 }, { "epoch": 0.4901969087444421, "grad_norm": 1.5693551054225616, "learning_rate": 5.401456435351435e-06, "loss": 0.6675, "step": 5788 }, { "epoch": 0.49028160067753546, "grad_norm": 1.7009549050170116, "learning_rate": 5.400089213730208e-06, "loss": 0.6318, "step": 5789 }, { "epoch": 0.49036629261062886, "grad_norm": 1.1848057194709576, "learning_rate": 5.398721962000151e-06, "loss": 0.5857, "step": 5790 }, { "epoch": 0.4904509845437222, "grad_norm": 1.2859757583453428, "learning_rate": 5.397354680264152e-06, "loss": 0.6077, "step": 5791 }, { "epoch": 0.49053567647681556, "grad_norm": 1.9543812688743047, "learning_rate": 5.39598736862511e-06, "loss": 0.632, "step": 5792 }, { "epoch": 0.49062036840990897, "grad_norm": 1.7814406566568963, "learning_rate": 5.394620027185923e-06, "loss": 0.6992, "step": 5793 }, { "epoch": 0.4907050603430023, "grad_norm": 1.3347621547865123, "learning_rate": 5.393252656049488e-06, "loss": 0.7246, "step": 5794 }, { "epoch": 0.4907897522760957, "grad_norm": 1.33005975946008, "learning_rate": 5.3918852553187076e-06, "loss": 0.6635, "step": 5795 }, { "epoch": 0.4908744442091891, "grad_norm": 0.6200578666012386, "learning_rate": 5.390517825096488e-06, "loss": 0.8827, "step": 5796 }, { "epoch": 0.4909591361422824, "grad_norm": 1.229929424612041, "learning_rate": 5.389150365485735e-06, "loss": 0.653, "step": 5797 }, { "epoch": 0.49104382807537583, "grad_norm": 1.5421608233313708, "learning_rate": 5.387782876589353e-06, "loss": 0.7116, "step": 5798 }, { "epoch": 0.4911285200084692, "grad_norm": 4.147840963514822, "learning_rate": 5.386415358510258e-06, "loss": 0.619, "step": 5799 }, { "epoch": 0.4912132119415626, "grad_norm": 1.592073074447856, "learning_rate": 5.385047811351363e-06, "loss": 0.5913, "step": 5800 }, { "epoch": 0.49129790387465594, "grad_norm": 1.6517128222582373, "learning_rate": 5.383680235215579e-06, "loss": 0.6403, "step": 5801 }, { "epoch": 0.4913825958077493, "grad_norm": 2.0433374569494664, "learning_rate": 5.382312630205826e-06, "loss": 0.5285, "step": 5802 }, { "epoch": 0.4914672877408427, "grad_norm": 4.19096218217525, "learning_rate": 5.380944996425024e-06, "loss": 0.665, "step": 5803 }, { "epoch": 0.49155197967393605, "grad_norm": 0.5936398745072812, "learning_rate": 5.379577333976093e-06, "loss": 0.7966, "step": 5804 }, { "epoch": 0.49163667160702945, "grad_norm": 1.6161382107194906, "learning_rate": 5.378209642961958e-06, "loss": 0.5796, "step": 5805 }, { "epoch": 0.4917213635401228, "grad_norm": 1.5080636928116171, "learning_rate": 5.376841923485544e-06, "loss": 0.6657, "step": 5806 }, { "epoch": 0.49180605547321615, "grad_norm": 1.2314154780700015, "learning_rate": 5.375474175649781e-06, "loss": 0.6806, "step": 5807 }, { "epoch": 0.49189074740630956, "grad_norm": 2.3236294113611384, "learning_rate": 5.374106399557597e-06, "loss": 0.6043, "step": 5808 }, { "epoch": 0.4919754393394029, "grad_norm": 0.6044583553083315, "learning_rate": 5.372738595311925e-06, "loss": 0.8582, "step": 5809 }, { "epoch": 0.4920601312724963, "grad_norm": 1.8388647972113303, "learning_rate": 5.371370763015702e-06, "loss": 0.6324, "step": 5810 }, { "epoch": 0.49214482320558967, "grad_norm": 1.8566448094499892, "learning_rate": 5.370002902771861e-06, "loss": 0.6625, "step": 5811 }, { "epoch": 0.492229515138683, "grad_norm": 1.7170385385853693, "learning_rate": 5.368635014683341e-06, "loss": 0.662, "step": 5812 }, { "epoch": 0.4923142070717764, "grad_norm": 1.844947837310186, "learning_rate": 5.367267098853088e-06, "loss": 0.6621, "step": 5813 }, { "epoch": 0.4923988990048698, "grad_norm": 1.6317078274604528, "learning_rate": 5.365899155384038e-06, "loss": 0.6507, "step": 5814 }, { "epoch": 0.4924835909379632, "grad_norm": 1.6283739271045408, "learning_rate": 5.364531184379139e-06, "loss": 0.6709, "step": 5815 }, { "epoch": 0.49256828287105653, "grad_norm": 1.2451999703988423, "learning_rate": 5.363163185941339e-06, "loss": 0.6628, "step": 5816 }, { "epoch": 0.4926529748041499, "grad_norm": 0.6370097864682891, "learning_rate": 5.361795160173586e-06, "loss": 0.8233, "step": 5817 }, { "epoch": 0.4927376667372433, "grad_norm": 1.511475039652888, "learning_rate": 5.360427107178833e-06, "loss": 0.6272, "step": 5818 }, { "epoch": 0.49282235867033664, "grad_norm": 2.4664466495929265, "learning_rate": 5.35905902706003e-06, "loss": 0.6154, "step": 5819 }, { "epoch": 0.49290705060343004, "grad_norm": 1.7231317138053293, "learning_rate": 5.357690919920133e-06, "loss": 0.6377, "step": 5820 }, { "epoch": 0.4929917425365234, "grad_norm": 1.938179877926602, "learning_rate": 5.356322785862102e-06, "loss": 0.639, "step": 5821 }, { "epoch": 0.49307643446961674, "grad_norm": 1.2058128719245629, "learning_rate": 5.354954624988894e-06, "loss": 0.6209, "step": 5822 }, { "epoch": 0.49316112640271015, "grad_norm": 1.5360043441984363, "learning_rate": 5.353586437403471e-06, "loss": 0.648, "step": 5823 }, { "epoch": 0.4932458183358035, "grad_norm": 1.247504177727546, "learning_rate": 5.352218223208795e-06, "loss": 0.611, "step": 5824 }, { "epoch": 0.4933305102688969, "grad_norm": 1.339971118762359, "learning_rate": 5.350849982507834e-06, "loss": 0.6463, "step": 5825 }, { "epoch": 0.49341520220199026, "grad_norm": 1.4659644057766301, "learning_rate": 5.349481715403553e-06, "loss": 0.5927, "step": 5826 }, { "epoch": 0.49349989413508366, "grad_norm": 1.177899107461634, "learning_rate": 5.348113421998924e-06, "loss": 0.6491, "step": 5827 }, { "epoch": 0.493584586068177, "grad_norm": 1.5243126232646305, "learning_rate": 5.346745102396915e-06, "loss": 0.6247, "step": 5828 }, { "epoch": 0.49366927800127036, "grad_norm": 1.4199002044622746, "learning_rate": 5.345376756700502e-06, "loss": 0.6553, "step": 5829 }, { "epoch": 0.49375396993436377, "grad_norm": 1.354624545985229, "learning_rate": 5.3440083850126574e-06, "loss": 0.6442, "step": 5830 }, { "epoch": 0.4938386618674571, "grad_norm": 1.3995644813314587, "learning_rate": 5.342639987436363e-06, "loss": 0.6432, "step": 5831 }, { "epoch": 0.4939233538005505, "grad_norm": 1.2866855804801858, "learning_rate": 5.341271564074593e-06, "loss": 0.7047, "step": 5832 }, { "epoch": 0.4940080457336439, "grad_norm": 1.2266321896477737, "learning_rate": 5.3399031150303304e-06, "loss": 0.6926, "step": 5833 }, { "epoch": 0.4940927376667372, "grad_norm": 1.4385483226032796, "learning_rate": 5.338534640406561e-06, "loss": 0.6481, "step": 5834 }, { "epoch": 0.49417742959983063, "grad_norm": 2.2591379445355626, "learning_rate": 5.337166140306266e-06, "loss": 0.6825, "step": 5835 }, { "epoch": 0.494262121532924, "grad_norm": 1.2544264327803656, "learning_rate": 5.335797614832433e-06, "loss": 0.5807, "step": 5836 }, { "epoch": 0.4943468134660174, "grad_norm": 1.9331628514382626, "learning_rate": 5.334429064088051e-06, "loss": 0.6533, "step": 5837 }, { "epoch": 0.49443150539911074, "grad_norm": 1.9996599752152917, "learning_rate": 5.333060488176111e-06, "loss": 0.6455, "step": 5838 }, { "epoch": 0.4945161973322041, "grad_norm": 1.4534381114348358, "learning_rate": 5.3316918871996084e-06, "loss": 0.6289, "step": 5839 }, { "epoch": 0.4946008892652975, "grad_norm": 1.2504846521515842, "learning_rate": 5.330323261261532e-06, "loss": 0.6685, "step": 5840 }, { "epoch": 0.49468558119839084, "grad_norm": 1.4001854866117716, "learning_rate": 5.328954610464882e-06, "loss": 0.6217, "step": 5841 }, { "epoch": 0.49477027313148425, "grad_norm": 0.6005464967201116, "learning_rate": 5.327585934912656e-06, "loss": 0.8736, "step": 5842 }, { "epoch": 0.4948549650645776, "grad_norm": 1.2113077827874554, "learning_rate": 5.326217234707852e-06, "loss": 0.638, "step": 5843 }, { "epoch": 0.49493965699767095, "grad_norm": 1.3690509655398697, "learning_rate": 5.324848509953476e-06, "loss": 0.6805, "step": 5844 }, { "epoch": 0.49502434893076436, "grad_norm": 1.2436095988385432, "learning_rate": 5.323479760752528e-06, "loss": 0.6323, "step": 5845 }, { "epoch": 0.4951090408638577, "grad_norm": 1.5380769273138286, "learning_rate": 5.322110987208016e-06, "loss": 0.6687, "step": 5846 }, { "epoch": 0.4951937327969511, "grad_norm": 2.5873812652250274, "learning_rate": 5.320742189422946e-06, "loss": 0.608, "step": 5847 }, { "epoch": 0.49527842473004446, "grad_norm": 1.6925519678967058, "learning_rate": 5.319373367500328e-06, "loss": 0.61, "step": 5848 }, { "epoch": 0.4953631166631378, "grad_norm": 1.2489400338132515, "learning_rate": 5.318004521543173e-06, "loss": 0.6463, "step": 5849 }, { "epoch": 0.4954478085962312, "grad_norm": 1.3716097779924468, "learning_rate": 5.316635651654494e-06, "loss": 0.6323, "step": 5850 }, { "epoch": 0.49553250052932457, "grad_norm": 1.349199338309005, "learning_rate": 5.315266757937305e-06, "loss": 0.6368, "step": 5851 }, { "epoch": 0.495617192462418, "grad_norm": 1.3346578562505458, "learning_rate": 5.313897840494624e-06, "loss": 0.6127, "step": 5852 }, { "epoch": 0.4957018843955113, "grad_norm": 1.9930946205029325, "learning_rate": 5.312528899429466e-06, "loss": 0.6661, "step": 5853 }, { "epoch": 0.4957865763286047, "grad_norm": 1.398071718009787, "learning_rate": 5.311159934844855e-06, "loss": 0.693, "step": 5854 }, { "epoch": 0.4958712682616981, "grad_norm": 1.2814362856926989, "learning_rate": 5.309790946843812e-06, "loss": 0.6189, "step": 5855 }, { "epoch": 0.49595596019479143, "grad_norm": 1.4417151962388186, "learning_rate": 5.3084219355293595e-06, "loss": 0.616, "step": 5856 }, { "epoch": 0.49604065212788484, "grad_norm": 1.3764877156583497, "learning_rate": 5.307052901004522e-06, "loss": 0.6166, "step": 5857 }, { "epoch": 0.4961253440609782, "grad_norm": 0.6527826400456657, "learning_rate": 5.305683843372329e-06, "loss": 0.8297, "step": 5858 }, { "epoch": 0.49621003599407154, "grad_norm": 1.138515758186843, "learning_rate": 5.304314762735808e-06, "loss": 0.6229, "step": 5859 }, { "epoch": 0.49629472792716495, "grad_norm": 1.2125793937233422, "learning_rate": 5.30294565919799e-06, "loss": 0.6424, "step": 5860 }, { "epoch": 0.4963794198602583, "grad_norm": 1.23513171274977, "learning_rate": 5.301576532861905e-06, "loss": 0.6041, "step": 5861 }, { "epoch": 0.4964641117933517, "grad_norm": 0.6179876112520806, "learning_rate": 5.300207383830591e-06, "loss": 0.8851, "step": 5862 }, { "epoch": 0.49654880372644505, "grad_norm": 1.7051126611509066, "learning_rate": 5.298838212207081e-06, "loss": 0.6141, "step": 5863 }, { "epoch": 0.4966334956595384, "grad_norm": 1.9282602332276422, "learning_rate": 5.297469018094413e-06, "loss": 0.5854, "step": 5864 }, { "epoch": 0.4967181875926318, "grad_norm": 1.306752411402326, "learning_rate": 5.2960998015956255e-06, "loss": 0.6595, "step": 5865 }, { "epoch": 0.49680287952572516, "grad_norm": 1.5201914233750113, "learning_rate": 5.2947305628137615e-06, "loss": 0.6399, "step": 5866 }, { "epoch": 0.49688757145881857, "grad_norm": 1.2231600666915499, "learning_rate": 5.293361301851863e-06, "loss": 0.6015, "step": 5867 }, { "epoch": 0.4969722633919119, "grad_norm": 1.4407779834060228, "learning_rate": 5.29199201881297e-06, "loss": 0.6431, "step": 5868 }, { "epoch": 0.49705695532500527, "grad_norm": 1.2821386016395153, "learning_rate": 5.290622713800132e-06, "loss": 0.6199, "step": 5869 }, { "epoch": 0.4971416472580987, "grad_norm": 1.43872001184042, "learning_rate": 5.289253386916398e-06, "loss": 0.6721, "step": 5870 }, { "epoch": 0.497226339191192, "grad_norm": 1.6138694351367149, "learning_rate": 5.2878840382648136e-06, "loss": 0.643, "step": 5871 }, { "epoch": 0.49731103112428543, "grad_norm": 1.3116484217635778, "learning_rate": 5.286514667948431e-06, "loss": 0.6178, "step": 5872 }, { "epoch": 0.4973957230573788, "grad_norm": 1.466373912609524, "learning_rate": 5.285145276070301e-06, "loss": 0.6846, "step": 5873 }, { "epoch": 0.49748041499047213, "grad_norm": 1.3095565535722098, "learning_rate": 5.283775862733482e-06, "loss": 0.5982, "step": 5874 }, { "epoch": 0.49756510692356554, "grad_norm": 1.4162113766308433, "learning_rate": 5.282406428041025e-06, "loss": 0.5818, "step": 5875 }, { "epoch": 0.4976497988566589, "grad_norm": 1.2428621925226582, "learning_rate": 5.28103697209599e-06, "loss": 0.5944, "step": 5876 }, { "epoch": 0.4977344907897523, "grad_norm": 1.2414353709792394, "learning_rate": 5.279667495001434e-06, "loss": 0.5985, "step": 5877 }, { "epoch": 0.49781918272284564, "grad_norm": 10.438278236841795, "learning_rate": 5.278297996860415e-06, "loss": 0.6313, "step": 5878 }, { "epoch": 0.49790387465593905, "grad_norm": 1.5953986584589102, "learning_rate": 5.2769284777760026e-06, "loss": 0.591, "step": 5879 }, { "epoch": 0.4979885665890324, "grad_norm": 1.422472167156528, "learning_rate": 5.275558937851254e-06, "loss": 0.6783, "step": 5880 }, { "epoch": 0.49807325852212575, "grad_norm": 1.4046115990694494, "learning_rate": 5.274189377189236e-06, "loss": 0.628, "step": 5881 }, { "epoch": 0.49815795045521916, "grad_norm": 1.4252355210378898, "learning_rate": 5.272819795893016e-06, "loss": 0.6407, "step": 5882 }, { "epoch": 0.4982426423883125, "grad_norm": 0.6499161761010754, "learning_rate": 5.271450194065662e-06, "loss": 0.8794, "step": 5883 }, { "epoch": 0.4983273343214059, "grad_norm": 1.6221070929932744, "learning_rate": 5.2700805718102434e-06, "loss": 0.5874, "step": 5884 }, { "epoch": 0.49841202625449926, "grad_norm": 1.291840459771235, "learning_rate": 5.268710929229831e-06, "loss": 0.6523, "step": 5885 }, { "epoch": 0.4984967181875926, "grad_norm": 2.0170401488942944, "learning_rate": 5.267341266427498e-06, "loss": 0.661, "step": 5886 }, { "epoch": 0.498581410120686, "grad_norm": 1.5762025176337402, "learning_rate": 5.26597158350632e-06, "loss": 0.633, "step": 5887 }, { "epoch": 0.49866610205377937, "grad_norm": 1.3326254460342732, "learning_rate": 5.264601880569372e-06, "loss": 0.6119, "step": 5888 }, { "epoch": 0.4987507939868728, "grad_norm": 1.5980453416905394, "learning_rate": 5.26323215771973e-06, "loss": 0.636, "step": 5889 }, { "epoch": 0.4988354859199661, "grad_norm": 1.4610447117237744, "learning_rate": 5.261862415060475e-06, "loss": 0.6393, "step": 5890 }, { "epoch": 0.4989201778530595, "grad_norm": 1.9799005031276031, "learning_rate": 5.260492652694687e-06, "loss": 0.6317, "step": 5891 }, { "epoch": 0.4990048697861529, "grad_norm": 1.5763136243326636, "learning_rate": 5.259122870725447e-06, "loss": 0.5939, "step": 5892 }, { "epoch": 0.49908956171924623, "grad_norm": 1.528198831549201, "learning_rate": 5.25775306925584e-06, "loss": 0.6231, "step": 5893 }, { "epoch": 0.49917425365233964, "grad_norm": 1.4741276434744657, "learning_rate": 5.256383248388948e-06, "loss": 0.7047, "step": 5894 }, { "epoch": 0.499258945585433, "grad_norm": 1.9606683611641833, "learning_rate": 5.25501340822786e-06, "loss": 0.5976, "step": 5895 }, { "epoch": 0.49934363751852634, "grad_norm": 1.4650704064948503, "learning_rate": 5.253643548875662e-06, "loss": 0.6023, "step": 5896 }, { "epoch": 0.49942832945161975, "grad_norm": 1.2318957981909848, "learning_rate": 5.2522736704354445e-06, "loss": 0.6656, "step": 5897 }, { "epoch": 0.4995130213847131, "grad_norm": 1.5041361076792976, "learning_rate": 5.250903773010297e-06, "loss": 0.686, "step": 5898 }, { "epoch": 0.4995977133178065, "grad_norm": 1.9147700070435887, "learning_rate": 5.249533856703311e-06, "loss": 0.6683, "step": 5899 }, { "epoch": 0.49968240525089985, "grad_norm": 1.1825268487929728, "learning_rate": 5.248163921617584e-06, "loss": 0.5777, "step": 5900 }, { "epoch": 0.4997670971839932, "grad_norm": 1.4902462272251624, "learning_rate": 5.2467939678562065e-06, "loss": 0.6598, "step": 5901 }, { "epoch": 0.4998517891170866, "grad_norm": 1.5944415104505147, "learning_rate": 5.245423995522275e-06, "loss": 0.6751, "step": 5902 }, { "epoch": 0.49993648105017996, "grad_norm": 1.4899821153124906, "learning_rate": 5.244054004718888e-06, "loss": 0.5922, "step": 5903 }, { "epoch": 0.5000211729832733, "grad_norm": 2.212022043072968, "learning_rate": 5.242683995549146e-06, "loss": 0.6825, "step": 5904 }, { "epoch": 0.5001058649163668, "grad_norm": 1.2173276991406694, "learning_rate": 5.2413139681161485e-06, "loss": 0.642, "step": 5905 }, { "epoch": 0.5001905568494601, "grad_norm": 1.3324396282874653, "learning_rate": 5.239943922522996e-06, "loss": 0.6363, "step": 5906 }, { "epoch": 0.5002752487825535, "grad_norm": 1.715470105998747, "learning_rate": 5.238573858872793e-06, "loss": 0.6184, "step": 5907 }, { "epoch": 0.5003599407156468, "grad_norm": 1.3550406276390248, "learning_rate": 5.237203777268644e-06, "loss": 0.6056, "step": 5908 }, { "epoch": 0.5004446326487402, "grad_norm": 1.5082620930749957, "learning_rate": 5.235833677813656e-06, "loss": 0.6278, "step": 5909 }, { "epoch": 0.5005293245818336, "grad_norm": 1.449529355623962, "learning_rate": 5.234463560610933e-06, "loss": 0.6127, "step": 5910 }, { "epoch": 0.500614016514927, "grad_norm": 0.5835773743408795, "learning_rate": 5.233093425763586e-06, "loss": 0.8756, "step": 5911 }, { "epoch": 0.5006987084480203, "grad_norm": 1.6349156081656846, "learning_rate": 5.231723273374725e-06, "loss": 0.6821, "step": 5912 }, { "epoch": 0.5007834003811137, "grad_norm": 1.882061189926013, "learning_rate": 5.23035310354746e-06, "loss": 0.6302, "step": 5913 }, { "epoch": 0.500868092314207, "grad_norm": 1.6838587882968734, "learning_rate": 5.228982916384905e-06, "loss": 0.658, "step": 5914 }, { "epoch": 0.5009527842473005, "grad_norm": 1.1605908361153177, "learning_rate": 5.227612711990172e-06, "loss": 0.6018, "step": 5915 }, { "epoch": 0.5010374761803938, "grad_norm": 1.2214988214340463, "learning_rate": 5.226242490466378e-06, "loss": 0.6627, "step": 5916 }, { "epoch": 0.5011221681134872, "grad_norm": 1.4230630404273816, "learning_rate": 5.224872251916637e-06, "loss": 0.6185, "step": 5917 }, { "epoch": 0.5012068600465805, "grad_norm": 1.479423886190443, "learning_rate": 5.223501996444071e-06, "loss": 0.6503, "step": 5918 }, { "epoch": 0.5012915519796739, "grad_norm": 1.4329135678053024, "learning_rate": 5.222131724151794e-06, "loss": 0.687, "step": 5919 }, { "epoch": 0.5013762439127674, "grad_norm": 3.1546213447959266, "learning_rate": 5.220761435142929e-06, "loss": 0.6427, "step": 5920 }, { "epoch": 0.5014609358458607, "grad_norm": 1.4522581180947984, "learning_rate": 5.219391129520598e-06, "loss": 0.6647, "step": 5921 }, { "epoch": 0.5015456277789541, "grad_norm": 1.4269184485208783, "learning_rate": 5.218020807387922e-06, "loss": 0.6226, "step": 5922 }, { "epoch": 0.5016303197120474, "grad_norm": 1.69364143004853, "learning_rate": 5.2166504688480255e-06, "loss": 0.6425, "step": 5923 }, { "epoch": 0.5017150116451408, "grad_norm": 1.5730158663333476, "learning_rate": 5.215280114004034e-06, "loss": 0.6601, "step": 5924 }, { "epoch": 0.5017997035782342, "grad_norm": 1.3127590368076008, "learning_rate": 5.213909742959074e-06, "loss": 0.6666, "step": 5925 }, { "epoch": 0.5018843955113276, "grad_norm": 1.355818403817716, "learning_rate": 5.212539355816275e-06, "loss": 0.685, "step": 5926 }, { "epoch": 0.5019690874444209, "grad_norm": 1.7465718980019127, "learning_rate": 5.211168952678762e-06, "loss": 0.6175, "step": 5927 }, { "epoch": 0.5020537793775143, "grad_norm": 1.401598047537786, "learning_rate": 5.209798533649667e-06, "loss": 0.5939, "step": 5928 }, { "epoch": 0.5021384713106076, "grad_norm": 2.0044796966735663, "learning_rate": 5.208428098832123e-06, "loss": 0.6473, "step": 5929 }, { "epoch": 0.5022231632437011, "grad_norm": 1.7847605238873956, "learning_rate": 5.207057648329259e-06, "loss": 0.6095, "step": 5930 }, { "epoch": 0.5023078551767944, "grad_norm": 1.8373143567252712, "learning_rate": 5.205687182244211e-06, "loss": 0.6983, "step": 5931 }, { "epoch": 0.5023925471098878, "grad_norm": 1.4350493414882928, "learning_rate": 5.204316700680114e-06, "loss": 0.6699, "step": 5932 }, { "epoch": 0.5024772390429811, "grad_norm": 0.6255822501965631, "learning_rate": 5.202946203740103e-06, "loss": 0.8799, "step": 5933 }, { "epoch": 0.5025619309760745, "grad_norm": 1.4351156710743025, "learning_rate": 5.2015756915273165e-06, "loss": 0.6265, "step": 5934 }, { "epoch": 0.502646622909168, "grad_norm": 1.439580895760456, "learning_rate": 5.20020516414489e-06, "loss": 0.6182, "step": 5935 }, { "epoch": 0.5027313148422613, "grad_norm": 0.6009465195678716, "learning_rate": 5.1988346216959665e-06, "loss": 0.8725, "step": 5936 }, { "epoch": 0.5028160067753547, "grad_norm": 1.30026975186771, "learning_rate": 5.197464064283684e-06, "loss": 0.6473, "step": 5937 }, { "epoch": 0.502900698708448, "grad_norm": 1.5644382994696204, "learning_rate": 5.196093492011185e-06, "loss": 0.6463, "step": 5938 }, { "epoch": 0.5029853906415414, "grad_norm": 1.9071674869157804, "learning_rate": 5.194722904981612e-06, "loss": 0.6233, "step": 5939 }, { "epoch": 0.5030700825746348, "grad_norm": 1.3796737354886885, "learning_rate": 5.19335230329811e-06, "loss": 0.6114, "step": 5940 }, { "epoch": 0.5031547745077282, "grad_norm": 1.4508105862042113, "learning_rate": 5.191981687063823e-06, "loss": 0.6723, "step": 5941 }, { "epoch": 0.5032394664408215, "grad_norm": 1.2915900028135507, "learning_rate": 5.190611056381898e-06, "loss": 0.6432, "step": 5942 }, { "epoch": 0.5033241583739149, "grad_norm": 1.5004660191622, "learning_rate": 5.189240411355481e-06, "loss": 0.6383, "step": 5943 }, { "epoch": 0.5034088503070082, "grad_norm": 1.84514940244264, "learning_rate": 5.187869752087721e-06, "loss": 0.6238, "step": 5944 }, { "epoch": 0.5034935422401017, "grad_norm": 1.724200215197199, "learning_rate": 5.186499078681768e-06, "loss": 0.6358, "step": 5945 }, { "epoch": 0.503578234173195, "grad_norm": 2.561965435035592, "learning_rate": 5.1851283912407715e-06, "loss": 0.6622, "step": 5946 }, { "epoch": 0.5036629261062884, "grad_norm": 1.620713599443808, "learning_rate": 5.183757689867883e-06, "loss": 0.6281, "step": 5947 }, { "epoch": 0.5037476180393817, "grad_norm": 1.1642862253302497, "learning_rate": 5.182386974666255e-06, "loss": 0.6086, "step": 5948 }, { "epoch": 0.5038323099724751, "grad_norm": 1.1863129967508539, "learning_rate": 5.181016245739043e-06, "loss": 0.5977, "step": 5949 }, { "epoch": 0.5039170019055685, "grad_norm": 1.8308797065771614, "learning_rate": 5.179645503189397e-06, "loss": 0.6429, "step": 5950 }, { "epoch": 0.5040016938386619, "grad_norm": 1.555281223494611, "learning_rate": 5.178274747120478e-06, "loss": 0.6419, "step": 5951 }, { "epoch": 0.5040863857717552, "grad_norm": 1.4617768254369308, "learning_rate": 5.176903977635439e-06, "loss": 0.6448, "step": 5952 }, { "epoch": 0.5041710777048486, "grad_norm": 0.6943526284966234, "learning_rate": 5.17553319483744e-06, "loss": 0.8736, "step": 5953 }, { "epoch": 0.5042557696379419, "grad_norm": 0.6098077918001814, "learning_rate": 5.174162398829639e-06, "loss": 0.8497, "step": 5954 }, { "epoch": 0.5043404615710354, "grad_norm": 1.8207464847728998, "learning_rate": 5.172791589715194e-06, "loss": 0.6241, "step": 5955 }, { "epoch": 0.5044251535041288, "grad_norm": 1.6110957155497494, "learning_rate": 5.1714207675972664e-06, "loss": 0.6893, "step": 5956 }, { "epoch": 0.5045098454372221, "grad_norm": 1.1740002664785885, "learning_rate": 5.17004993257902e-06, "loss": 0.6219, "step": 5957 }, { "epoch": 0.5045945373703155, "grad_norm": 0.6715339108722508, "learning_rate": 5.168679084763615e-06, "loss": 0.8491, "step": 5958 }, { "epoch": 0.5046792293034088, "grad_norm": 2.024773371163312, "learning_rate": 5.1673082242542164e-06, "loss": 0.65, "step": 5959 }, { "epoch": 0.5047639212365023, "grad_norm": 1.5454294709673597, "learning_rate": 5.165937351153986e-06, "loss": 0.6051, "step": 5960 }, { "epoch": 0.5048486131695956, "grad_norm": 1.9842722895996987, "learning_rate": 5.164566465566094e-06, "loss": 0.621, "step": 5961 }, { "epoch": 0.504933305102689, "grad_norm": 0.6591242652349932, "learning_rate": 5.1631955675937e-06, "loss": 0.8439, "step": 5962 }, { "epoch": 0.5050179970357823, "grad_norm": 1.4330670860636192, "learning_rate": 5.161824657339979e-06, "loss": 0.5994, "step": 5963 }, { "epoch": 0.5051026889688757, "grad_norm": 1.2812774422388216, "learning_rate": 5.160453734908094e-06, "loss": 0.6583, "step": 5964 }, { "epoch": 0.5051873809019691, "grad_norm": 1.9842543890785476, "learning_rate": 5.159082800401216e-06, "loss": 0.6757, "step": 5965 }, { "epoch": 0.5052720728350625, "grad_norm": 1.3707700475930977, "learning_rate": 5.157711853922516e-06, "loss": 0.6487, "step": 5966 }, { "epoch": 0.5053567647681558, "grad_norm": 1.3582806114686883, "learning_rate": 5.156340895575164e-06, "loss": 0.6193, "step": 5967 }, { "epoch": 0.5054414567012492, "grad_norm": 2.666006208401969, "learning_rate": 5.15496992546233e-06, "loss": 0.7031, "step": 5968 }, { "epoch": 0.5055261486343425, "grad_norm": 1.3915200649990116, "learning_rate": 5.153598943687188e-06, "loss": 0.6002, "step": 5969 }, { "epoch": 0.505610840567436, "grad_norm": 1.286844986684108, "learning_rate": 5.152227950352915e-06, "loss": 0.66, "step": 5970 }, { "epoch": 0.5056955325005293, "grad_norm": 1.1856639593762426, "learning_rate": 5.1508569455626815e-06, "loss": 0.6525, "step": 5971 }, { "epoch": 0.5057802244336227, "grad_norm": 1.4187223186370943, "learning_rate": 5.149485929419662e-06, "loss": 0.6231, "step": 5972 }, { "epoch": 0.505864916366716, "grad_norm": 1.1675593983491723, "learning_rate": 5.148114902027037e-06, "loss": 0.6662, "step": 5973 }, { "epoch": 0.5059496082998094, "grad_norm": 1.3337035766491907, "learning_rate": 5.1467438634879805e-06, "loss": 0.6614, "step": 5974 }, { "epoch": 0.5060343002329029, "grad_norm": 0.6182323604592681, "learning_rate": 5.145372813905672e-06, "loss": 0.8558, "step": 5975 }, { "epoch": 0.5061189921659962, "grad_norm": 1.4085825670114136, "learning_rate": 5.144001753383288e-06, "loss": 0.6424, "step": 5976 }, { "epoch": 0.5062036840990896, "grad_norm": 0.625215261216726, "learning_rate": 5.14263068202401e-06, "loss": 0.8649, "step": 5977 }, { "epoch": 0.5062883760321829, "grad_norm": 0.6313874267612934, "learning_rate": 5.14125959993102e-06, "loss": 0.8406, "step": 5978 }, { "epoch": 0.5063730679652763, "grad_norm": 1.4516105056740989, "learning_rate": 5.139888507207495e-06, "loss": 0.6998, "step": 5979 }, { "epoch": 0.5064577598983697, "grad_norm": 1.3595032849803026, "learning_rate": 5.13851740395662e-06, "loss": 0.6446, "step": 5980 }, { "epoch": 0.5065424518314631, "grad_norm": 1.5957110194633777, "learning_rate": 5.137146290281575e-06, "loss": 0.6429, "step": 5981 }, { "epoch": 0.5066271437645564, "grad_norm": 1.408437621980968, "learning_rate": 5.135775166285548e-06, "loss": 0.6715, "step": 5982 }, { "epoch": 0.5067118356976498, "grad_norm": 1.2660601395999205, "learning_rate": 5.134404032071719e-06, "loss": 0.5985, "step": 5983 }, { "epoch": 0.5067965276307431, "grad_norm": 1.3857859229938267, "learning_rate": 5.133032887743276e-06, "loss": 0.6501, "step": 5984 }, { "epoch": 0.5068812195638366, "grad_norm": 1.5863850524230199, "learning_rate": 5.131661733403402e-06, "loss": 0.6692, "step": 5985 }, { "epoch": 0.5069659114969299, "grad_norm": 1.27627914633963, "learning_rate": 5.130290569155286e-06, "loss": 0.6508, "step": 5986 }, { "epoch": 0.5070506034300233, "grad_norm": 1.4751745503785805, "learning_rate": 5.128919395102117e-06, "loss": 0.6308, "step": 5987 }, { "epoch": 0.5071352953631166, "grad_norm": 1.3923107975413793, "learning_rate": 5.1275482113470795e-06, "loss": 0.6257, "step": 5988 }, { "epoch": 0.50721998729621, "grad_norm": 0.6535690464182373, "learning_rate": 5.126177017993363e-06, "loss": 0.8169, "step": 5989 }, { "epoch": 0.5073046792293034, "grad_norm": 1.2771929743308368, "learning_rate": 5.1248058151441575e-06, "loss": 0.5693, "step": 5990 }, { "epoch": 0.5073893711623968, "grad_norm": 1.3809455518436016, "learning_rate": 5.123434602902655e-06, "loss": 0.5921, "step": 5991 }, { "epoch": 0.5074740630954901, "grad_norm": 2.8604192349898083, "learning_rate": 5.1220633813720445e-06, "loss": 0.6017, "step": 5992 }, { "epoch": 0.5075587550285835, "grad_norm": 1.313915731583995, "learning_rate": 5.120692150655516e-06, "loss": 0.6786, "step": 5993 }, { "epoch": 0.5076434469616768, "grad_norm": 1.4611862047203028, "learning_rate": 5.1193209108562665e-06, "loss": 0.646, "step": 5994 }, { "epoch": 0.5077281388947703, "grad_norm": 1.1619389702826726, "learning_rate": 5.117949662077487e-06, "loss": 0.6197, "step": 5995 }, { "epoch": 0.5078128308278637, "grad_norm": 1.9657272328541742, "learning_rate": 5.11657840442237e-06, "loss": 0.6191, "step": 5996 }, { "epoch": 0.507897522760957, "grad_norm": 1.7466040550982067, "learning_rate": 5.115207137994109e-06, "loss": 0.6439, "step": 5997 }, { "epoch": 0.5079822146940504, "grad_norm": 1.710911040440512, "learning_rate": 5.113835862895904e-06, "loss": 0.6551, "step": 5998 }, { "epoch": 0.5080669066271437, "grad_norm": 1.2941092697330512, "learning_rate": 5.112464579230947e-06, "loss": 0.6552, "step": 5999 }, { "epoch": 0.5081515985602372, "grad_norm": 1.5192437035999913, "learning_rate": 5.111093287102433e-06, "loss": 0.6123, "step": 6000 }, { "epoch": 0.5082362904933305, "grad_norm": 0.6244500997930428, "learning_rate": 5.109721986613561e-06, "loss": 0.8518, "step": 6001 }, { "epoch": 0.5083209824264239, "grad_norm": 1.6038362137057196, "learning_rate": 5.108350677867529e-06, "loss": 0.6005, "step": 6002 }, { "epoch": 0.5084056743595172, "grad_norm": 1.2028639528227174, "learning_rate": 5.106979360967535e-06, "loss": 0.6476, "step": 6003 }, { "epoch": 0.5084903662926106, "grad_norm": 1.510943417723578, "learning_rate": 5.1056080360167795e-06, "loss": 0.7008, "step": 6004 }, { "epoch": 0.508575058225704, "grad_norm": 1.2750347842983552, "learning_rate": 5.104236703118457e-06, "loss": 0.6173, "step": 6005 }, { "epoch": 0.5086597501587974, "grad_norm": 1.570771484877015, "learning_rate": 5.102865362375773e-06, "loss": 0.6272, "step": 6006 }, { "epoch": 0.5087444420918907, "grad_norm": 1.3991563230211697, "learning_rate": 5.1014940138919236e-06, "loss": 0.5907, "step": 6007 }, { "epoch": 0.5088291340249841, "grad_norm": 1.428188359325598, "learning_rate": 5.100122657770114e-06, "loss": 0.5961, "step": 6008 }, { "epoch": 0.5089138259580775, "grad_norm": 2.0045007268890798, "learning_rate": 5.098751294113543e-06, "loss": 0.6394, "step": 6009 }, { "epoch": 0.5089985178911709, "grad_norm": 1.4786305681731364, "learning_rate": 5.097379923025414e-06, "loss": 0.6587, "step": 6010 }, { "epoch": 0.5090832098242642, "grad_norm": 1.4655474760930989, "learning_rate": 5.096008544608932e-06, "loss": 0.6237, "step": 6011 }, { "epoch": 0.5091679017573576, "grad_norm": 1.2012213024673497, "learning_rate": 5.094637158967297e-06, "loss": 0.6282, "step": 6012 }, { "epoch": 0.509252593690451, "grad_norm": 1.4265436310756323, "learning_rate": 5.093265766203715e-06, "loss": 0.6746, "step": 6013 }, { "epoch": 0.5093372856235444, "grad_norm": 1.1492442442033024, "learning_rate": 5.091894366421391e-06, "loss": 0.6229, "step": 6014 }, { "epoch": 0.5094219775566378, "grad_norm": 1.275642785920935, "learning_rate": 5.0905229597235285e-06, "loss": 0.6102, "step": 6015 }, { "epoch": 0.5095066694897311, "grad_norm": 1.635676763590863, "learning_rate": 5.089151546213334e-06, "loss": 0.6645, "step": 6016 }, { "epoch": 0.5095913614228245, "grad_norm": 1.9938220356124134, "learning_rate": 5.087780125994014e-06, "loss": 0.6419, "step": 6017 }, { "epoch": 0.5096760533559178, "grad_norm": 1.906132695518543, "learning_rate": 5.0864086991687735e-06, "loss": 0.6489, "step": 6018 }, { "epoch": 0.5097607452890113, "grad_norm": 1.1789202155020584, "learning_rate": 5.085037265840822e-06, "loss": 0.7015, "step": 6019 }, { "epoch": 0.5098454372221046, "grad_norm": 1.3997309702392917, "learning_rate": 5.0836658261133665e-06, "loss": 0.6242, "step": 6020 }, { "epoch": 0.509930129155198, "grad_norm": 1.7170590112433473, "learning_rate": 5.082294380089613e-06, "loss": 0.5902, "step": 6021 }, { "epoch": 0.5100148210882913, "grad_norm": 1.2532305868054288, "learning_rate": 5.080922927872772e-06, "loss": 0.6385, "step": 6022 }, { "epoch": 0.5100995130213847, "grad_norm": 1.0959448674980743, "learning_rate": 5.079551469566053e-06, "loss": 0.561, "step": 6023 }, { "epoch": 0.5101842049544781, "grad_norm": 1.7854051135145914, "learning_rate": 5.0781800052726634e-06, "loss": 0.6834, "step": 6024 }, { "epoch": 0.5102688968875715, "grad_norm": 1.3774829205447328, "learning_rate": 5.076808535095816e-06, "loss": 0.5894, "step": 6025 }, { "epoch": 0.5103535888206648, "grad_norm": 1.4851942021835691, "learning_rate": 5.075437059138718e-06, "loss": 0.6625, "step": 6026 }, { "epoch": 0.5104382807537582, "grad_norm": 1.2384557017699922, "learning_rate": 5.074065577504584e-06, "loss": 0.6262, "step": 6027 }, { "epoch": 0.5105229726868515, "grad_norm": 1.295620058125332, "learning_rate": 5.072694090296622e-06, "loss": 0.6065, "step": 6028 }, { "epoch": 0.510607664619945, "grad_norm": 1.3053694313389679, "learning_rate": 5.071322597618045e-06, "loss": 0.6436, "step": 6029 }, { "epoch": 0.5106923565530384, "grad_norm": 1.4674280895670404, "learning_rate": 5.0699510995720635e-06, "loss": 0.6814, "step": 6030 }, { "epoch": 0.5107770484861317, "grad_norm": 1.623768776921338, "learning_rate": 5.068579596261891e-06, "loss": 0.6171, "step": 6031 }, { "epoch": 0.510861740419225, "grad_norm": 1.3311237828611804, "learning_rate": 5.067208087790742e-06, "loss": 0.5958, "step": 6032 }, { "epoch": 0.5109464323523184, "grad_norm": 1.5308543807104544, "learning_rate": 5.0658365742618295e-06, "loss": 0.6537, "step": 6033 }, { "epoch": 0.5110311242854119, "grad_norm": 1.4259497644995358, "learning_rate": 5.064465055778365e-06, "loss": 0.62, "step": 6034 }, { "epoch": 0.5111158162185052, "grad_norm": 1.0940155463044205, "learning_rate": 5.063093532443564e-06, "loss": 0.603, "step": 6035 }, { "epoch": 0.5112005081515986, "grad_norm": 1.2105942085954882, "learning_rate": 5.061722004360641e-06, "loss": 0.613, "step": 6036 }, { "epoch": 0.5112852000846919, "grad_norm": 1.4510105238675601, "learning_rate": 5.06035047163281e-06, "loss": 0.6436, "step": 6037 }, { "epoch": 0.5113698920177853, "grad_norm": 1.4423978090031278, "learning_rate": 5.058978934363284e-06, "loss": 0.5898, "step": 6038 }, { "epoch": 0.5114545839508787, "grad_norm": 1.1864382459441665, "learning_rate": 5.0576073926552835e-06, "loss": 0.6181, "step": 6039 }, { "epoch": 0.5115392758839721, "grad_norm": 1.574524478583792, "learning_rate": 5.056235846612022e-06, "loss": 0.6589, "step": 6040 }, { "epoch": 0.5116239678170654, "grad_norm": 1.64558909167972, "learning_rate": 5.054864296336715e-06, "loss": 0.6061, "step": 6041 }, { "epoch": 0.5117086597501588, "grad_norm": 1.2481458785565154, "learning_rate": 5.053492741932577e-06, "loss": 0.6074, "step": 6042 }, { "epoch": 0.5117933516832521, "grad_norm": 1.840289679155967, "learning_rate": 5.05212118350283e-06, "loss": 0.6201, "step": 6043 }, { "epoch": 0.5118780436163456, "grad_norm": 1.5244930421068106, "learning_rate": 5.050749621150686e-06, "loss": 0.6313, "step": 6044 }, { "epoch": 0.5119627355494389, "grad_norm": 1.1803363986974702, "learning_rate": 5.049378054979365e-06, "loss": 0.6039, "step": 6045 }, { "epoch": 0.5120474274825323, "grad_norm": 1.5194958051301033, "learning_rate": 5.048006485092083e-06, "loss": 0.6376, "step": 6046 }, { "epoch": 0.5121321194156256, "grad_norm": 0.6789871382156786, "learning_rate": 5.046634911592061e-06, "loss": 0.8648, "step": 6047 }, { "epoch": 0.512216811348719, "grad_norm": 1.285689719656164, "learning_rate": 5.045263334582513e-06, "loss": 0.6249, "step": 6048 }, { "epoch": 0.5123015032818125, "grad_norm": 1.380221634544185, "learning_rate": 5.04389175416666e-06, "loss": 0.6031, "step": 6049 }, { "epoch": 0.5123861952149058, "grad_norm": 2.110833184040699, "learning_rate": 5.04252017044772e-06, "loss": 0.5751, "step": 6050 }, { "epoch": 0.5124708871479992, "grad_norm": 4.027451784694036, "learning_rate": 5.041148583528912e-06, "loss": 0.667, "step": 6051 }, { "epoch": 0.5125555790810925, "grad_norm": 1.9452051969954542, "learning_rate": 5.039776993513455e-06, "loss": 0.6165, "step": 6052 }, { "epoch": 0.5126402710141859, "grad_norm": 1.1726532009722541, "learning_rate": 5.038405400504569e-06, "loss": 0.608, "step": 6053 }, { "epoch": 0.5127249629472793, "grad_norm": 1.646160228492008, "learning_rate": 5.037033804605473e-06, "loss": 0.6543, "step": 6054 }, { "epoch": 0.5128096548803727, "grad_norm": 0.6547069589874058, "learning_rate": 5.035662205919387e-06, "loss": 0.8758, "step": 6055 }, { "epoch": 0.512894346813466, "grad_norm": 1.293944700388269, "learning_rate": 5.034290604549531e-06, "loss": 0.6268, "step": 6056 }, { "epoch": 0.5129790387465594, "grad_norm": 1.2686113027626862, "learning_rate": 5.032919000599126e-06, "loss": 0.6124, "step": 6057 }, { "epoch": 0.5130637306796527, "grad_norm": 1.4853681976527122, "learning_rate": 5.031547394171392e-06, "loss": 0.6018, "step": 6058 }, { "epoch": 0.5131484226127462, "grad_norm": 1.3002986127525968, "learning_rate": 5.030175785369548e-06, "loss": 0.6397, "step": 6059 }, { "epoch": 0.5132331145458395, "grad_norm": 1.429906633097993, "learning_rate": 5.028804174296817e-06, "loss": 0.608, "step": 6060 }, { "epoch": 0.5133178064789329, "grad_norm": 0.6558548502525642, "learning_rate": 5.027432561056421e-06, "loss": 0.8802, "step": 6061 }, { "epoch": 0.5134024984120262, "grad_norm": 1.5124698095456808, "learning_rate": 5.026060945751578e-06, "loss": 0.6089, "step": 6062 }, { "epoch": 0.5134871903451196, "grad_norm": 1.1696828092497527, "learning_rate": 5.0246893284855114e-06, "loss": 0.6255, "step": 6063 }, { "epoch": 0.513571882278213, "grad_norm": 1.502688968574756, "learning_rate": 5.023317709361441e-06, "loss": 0.6536, "step": 6064 }, { "epoch": 0.5136565742113064, "grad_norm": 1.3074411078476162, "learning_rate": 5.021946088482592e-06, "loss": 0.6188, "step": 6065 }, { "epoch": 0.5137412661443997, "grad_norm": 1.6117970261283339, "learning_rate": 5.020574465952182e-06, "loss": 0.6933, "step": 6066 }, { "epoch": 0.5138259580774931, "grad_norm": 1.4944253586753111, "learning_rate": 5.019202841873434e-06, "loss": 0.6639, "step": 6067 }, { "epoch": 0.5139106500105864, "grad_norm": 1.3400721872337944, "learning_rate": 5.0178312163495716e-06, "loss": 0.6499, "step": 6068 }, { "epoch": 0.5139953419436799, "grad_norm": 1.4979593656760655, "learning_rate": 5.016459589483814e-06, "loss": 0.6833, "step": 6069 }, { "epoch": 0.5140800338767733, "grad_norm": 1.1691048580736751, "learning_rate": 5.015087961379386e-06, "loss": 0.5524, "step": 6070 }, { "epoch": 0.5141647258098666, "grad_norm": 1.279333258886772, "learning_rate": 5.01371633213951e-06, "loss": 0.6653, "step": 6071 }, { "epoch": 0.51424941774296, "grad_norm": 0.6405100444341656, "learning_rate": 5.012344701867406e-06, "loss": 0.8829, "step": 6072 }, { "epoch": 0.5143341096760533, "grad_norm": 1.4415258119486039, "learning_rate": 5.010973070666298e-06, "loss": 0.6108, "step": 6073 }, { "epoch": 0.5144188016091468, "grad_norm": 1.4161518593008215, "learning_rate": 5.00960143863941e-06, "loss": 0.6933, "step": 6074 }, { "epoch": 0.5145034935422401, "grad_norm": 0.6104091780410077, "learning_rate": 5.008229805889962e-06, "loss": 0.8347, "step": 6075 }, { "epoch": 0.5145881854753335, "grad_norm": 1.6788284184739442, "learning_rate": 5.006858172521177e-06, "loss": 0.5847, "step": 6076 }, { "epoch": 0.5146728774084268, "grad_norm": 1.656920905841571, "learning_rate": 5.005486538636278e-06, "loss": 0.7109, "step": 6077 }, { "epoch": 0.5147575693415202, "grad_norm": 1.3468641427389836, "learning_rate": 5.0041149043384895e-06, "loss": 0.6047, "step": 6078 }, { "epoch": 0.5148422612746136, "grad_norm": 4.947331542388774, "learning_rate": 5.002743269731031e-06, "loss": 0.659, "step": 6079 }, { "epoch": 0.514926953207707, "grad_norm": 1.7571232322606058, "learning_rate": 5.001371634917126e-06, "loss": 0.7259, "step": 6080 }, { "epoch": 0.5150116451408003, "grad_norm": 0.6159012524345503, "learning_rate": 5e-06, "loss": 0.8595, "step": 6081 }, { "epoch": 0.5150963370738937, "grad_norm": 1.3779234918662737, "learning_rate": 4.998628365082874e-06, "loss": 0.653, "step": 6082 }, { "epoch": 0.515181029006987, "grad_norm": 1.3811838283255096, "learning_rate": 4.997256730268971e-06, "loss": 0.6585, "step": 6083 }, { "epoch": 0.5152657209400805, "grad_norm": 2.3173063976334007, "learning_rate": 4.995885095661512e-06, "loss": 0.6167, "step": 6084 }, { "epoch": 0.5153504128731738, "grad_norm": 1.5213187640551116, "learning_rate": 4.994513461363724e-06, "loss": 0.6399, "step": 6085 }, { "epoch": 0.5154351048062672, "grad_norm": 1.6108707705850704, "learning_rate": 4.993141827478825e-06, "loss": 0.6529, "step": 6086 }, { "epoch": 0.5155197967393605, "grad_norm": 2.664551159628057, "learning_rate": 4.991770194110039e-06, "loss": 0.6544, "step": 6087 }, { "epoch": 0.5156044886724539, "grad_norm": 1.5200804309705143, "learning_rate": 4.990398561360592e-06, "loss": 0.6291, "step": 6088 }, { "epoch": 0.5156891806055474, "grad_norm": 1.545505953091011, "learning_rate": 4.989026929333703e-06, "loss": 0.6017, "step": 6089 }, { "epoch": 0.5157738725386407, "grad_norm": 1.3429993936335305, "learning_rate": 4.9876552981325945e-06, "loss": 0.6473, "step": 6090 }, { "epoch": 0.5158585644717341, "grad_norm": 1.0689348474919131, "learning_rate": 4.986283667860492e-06, "loss": 0.6196, "step": 6091 }, { "epoch": 0.5159432564048274, "grad_norm": 2.2459246448422894, "learning_rate": 4.9849120386206144e-06, "loss": 0.6873, "step": 6092 }, { "epoch": 0.5160279483379208, "grad_norm": 1.3207611220376239, "learning_rate": 4.9835404105161875e-06, "loss": 0.6178, "step": 6093 }, { "epoch": 0.5161126402710142, "grad_norm": 1.2109582640553427, "learning_rate": 4.982168783650431e-06, "loss": 0.6812, "step": 6094 }, { "epoch": 0.5161973322041076, "grad_norm": 1.4164417719196696, "learning_rate": 4.980797158126567e-06, "loss": 0.5917, "step": 6095 }, { "epoch": 0.5162820241372009, "grad_norm": 1.2168709076245003, "learning_rate": 4.979425534047821e-06, "loss": 0.6225, "step": 6096 }, { "epoch": 0.5163667160702943, "grad_norm": 0.5947589493042508, "learning_rate": 4.978053911517409e-06, "loss": 0.831, "step": 6097 }, { "epoch": 0.5164514080033876, "grad_norm": 1.5075857540239075, "learning_rate": 4.976682290638558e-06, "loss": 0.6319, "step": 6098 }, { "epoch": 0.5165360999364811, "grad_norm": 1.7249655994669217, "learning_rate": 4.97531067151449e-06, "loss": 0.6694, "step": 6099 }, { "epoch": 0.5166207918695744, "grad_norm": 1.221123689861632, "learning_rate": 4.973939054248423e-06, "loss": 0.6494, "step": 6100 }, { "epoch": 0.5167054838026678, "grad_norm": 1.5457230653832965, "learning_rate": 4.9725674389435814e-06, "loss": 0.627, "step": 6101 }, { "epoch": 0.5167901757357611, "grad_norm": 1.6138351674021703, "learning_rate": 4.971195825703183e-06, "loss": 0.6625, "step": 6102 }, { "epoch": 0.5168748676688545, "grad_norm": 1.2645389085432763, "learning_rate": 4.969824214630453e-06, "loss": 0.6404, "step": 6103 }, { "epoch": 0.516959559601948, "grad_norm": 0.6476352796504532, "learning_rate": 4.968452605828611e-06, "loss": 0.905, "step": 6104 }, { "epoch": 0.5170442515350413, "grad_norm": 1.2158679782079644, "learning_rate": 4.967080999400875e-06, "loss": 0.6117, "step": 6105 }, { "epoch": 0.5171289434681346, "grad_norm": 0.6342624444442245, "learning_rate": 4.965709395450469e-06, "loss": 0.8184, "step": 6106 }, { "epoch": 0.517213635401228, "grad_norm": 1.4993863590513876, "learning_rate": 4.964337794080614e-06, "loss": 0.6237, "step": 6107 }, { "epoch": 0.5172983273343214, "grad_norm": 1.4799265686706742, "learning_rate": 4.962966195394528e-06, "loss": 0.6032, "step": 6108 }, { "epoch": 0.5173830192674148, "grad_norm": 1.353320508414879, "learning_rate": 4.961594599495434e-06, "loss": 0.6186, "step": 6109 }, { "epoch": 0.5174677112005082, "grad_norm": 1.5715879337180334, "learning_rate": 4.960223006486547e-06, "loss": 0.6994, "step": 6110 }, { "epoch": 0.5175524031336015, "grad_norm": 1.3604969400024127, "learning_rate": 4.958851416471089e-06, "loss": 0.5905, "step": 6111 }, { "epoch": 0.5176370950666949, "grad_norm": 1.3369995373072734, "learning_rate": 4.9574798295522815e-06, "loss": 0.6178, "step": 6112 }, { "epoch": 0.5177217869997883, "grad_norm": 1.246169542743344, "learning_rate": 4.9561082458333415e-06, "loss": 0.5783, "step": 6113 }, { "epoch": 0.5178064789328817, "grad_norm": 1.2201678088516865, "learning_rate": 4.954736665417488e-06, "loss": 0.6736, "step": 6114 }, { "epoch": 0.517891170865975, "grad_norm": 2.472050890656259, "learning_rate": 4.953365088407941e-06, "loss": 0.6286, "step": 6115 }, { "epoch": 0.5179758627990684, "grad_norm": 1.2210076745285987, "learning_rate": 4.951993514907917e-06, "loss": 0.6788, "step": 6116 }, { "epoch": 0.5180605547321617, "grad_norm": 2.4289692897203063, "learning_rate": 4.950621945020636e-06, "loss": 0.6666, "step": 6117 }, { "epoch": 0.5181452466652552, "grad_norm": 1.5308842263421873, "learning_rate": 4.949250378849314e-06, "loss": 0.6347, "step": 6118 }, { "epoch": 0.5182299385983485, "grad_norm": 1.280754987929499, "learning_rate": 4.947878816497171e-06, "loss": 0.6092, "step": 6119 }, { "epoch": 0.5183146305314419, "grad_norm": 1.3876814681668506, "learning_rate": 4.946507258067424e-06, "loss": 0.5884, "step": 6120 }, { "epoch": 0.5183993224645352, "grad_norm": 1.231454685170645, "learning_rate": 4.945135703663287e-06, "loss": 0.6475, "step": 6121 }, { "epoch": 0.5184840143976286, "grad_norm": 1.4187291950743093, "learning_rate": 4.943764153387981e-06, "loss": 0.6811, "step": 6122 }, { "epoch": 0.518568706330722, "grad_norm": 1.3697451306469393, "learning_rate": 4.942392607344717e-06, "loss": 0.705, "step": 6123 }, { "epoch": 0.5186533982638154, "grad_norm": 1.6711413390706735, "learning_rate": 4.941021065636716e-06, "loss": 0.6441, "step": 6124 }, { "epoch": 0.5187380901969088, "grad_norm": 1.632795490538915, "learning_rate": 4.939649528367193e-06, "loss": 0.6419, "step": 6125 }, { "epoch": 0.5188227821300021, "grad_norm": 1.8018749348408596, "learning_rate": 4.938277995639361e-06, "loss": 0.6383, "step": 6126 }, { "epoch": 0.5189074740630955, "grad_norm": 1.3830690082073256, "learning_rate": 4.936906467556437e-06, "loss": 0.6145, "step": 6127 }, { "epoch": 0.5189921659961889, "grad_norm": 1.4800867667291824, "learning_rate": 4.9355349442216365e-06, "loss": 0.6145, "step": 6128 }, { "epoch": 0.5190768579292823, "grad_norm": 1.4991792081745714, "learning_rate": 4.934163425738171e-06, "loss": 0.5844, "step": 6129 }, { "epoch": 0.5191615498623756, "grad_norm": 1.507927679550119, "learning_rate": 4.93279191220926e-06, "loss": 0.705, "step": 6130 }, { "epoch": 0.519246241795469, "grad_norm": 1.2647818176181558, "learning_rate": 4.93142040373811e-06, "loss": 0.6717, "step": 6131 }, { "epoch": 0.5193309337285623, "grad_norm": 1.3643526024798893, "learning_rate": 4.930048900427937e-06, "loss": 0.6218, "step": 6132 }, { "epoch": 0.5194156256616558, "grad_norm": 1.4831157604850758, "learning_rate": 4.928677402381958e-06, "loss": 0.6727, "step": 6133 }, { "epoch": 0.5195003175947491, "grad_norm": 1.3722322372047615, "learning_rate": 4.927305909703381e-06, "loss": 0.6432, "step": 6134 }, { "epoch": 0.5195850095278425, "grad_norm": 1.1883261900160873, "learning_rate": 4.925934422495417e-06, "loss": 0.6913, "step": 6135 }, { "epoch": 0.5196697014609358, "grad_norm": 1.416883678306472, "learning_rate": 4.924562940861283e-06, "loss": 0.6085, "step": 6136 }, { "epoch": 0.5197543933940292, "grad_norm": 1.379544567640173, "learning_rate": 4.923191464904185e-06, "loss": 0.6648, "step": 6137 }, { "epoch": 0.5198390853271226, "grad_norm": 1.4077862341628329, "learning_rate": 4.921819994727337e-06, "loss": 0.6102, "step": 6138 }, { "epoch": 0.519923777260216, "grad_norm": 1.2181908014446055, "learning_rate": 4.9204485304339485e-06, "loss": 0.6258, "step": 6139 }, { "epoch": 0.5200084691933093, "grad_norm": 1.6388014346312123, "learning_rate": 4.9190770721272285e-06, "loss": 0.6679, "step": 6140 }, { "epoch": 0.5200931611264027, "grad_norm": 1.3370159335801728, "learning_rate": 4.91770561991039e-06, "loss": 0.6812, "step": 6141 }, { "epoch": 0.520177853059496, "grad_norm": 1.101588521195469, "learning_rate": 4.916334173886635e-06, "loss": 0.6094, "step": 6142 }, { "epoch": 0.5202625449925895, "grad_norm": 1.678559163491134, "learning_rate": 4.914962734159178e-06, "loss": 0.6661, "step": 6143 }, { "epoch": 0.5203472369256829, "grad_norm": 1.53111488882188, "learning_rate": 4.913591300831228e-06, "loss": 0.6309, "step": 6144 }, { "epoch": 0.5204319288587762, "grad_norm": 1.3838081361493237, "learning_rate": 4.912219874005988e-06, "loss": 0.5921, "step": 6145 }, { "epoch": 0.5205166207918696, "grad_norm": 1.6075420948879762, "learning_rate": 4.910848453786668e-06, "loss": 0.6177, "step": 6146 }, { "epoch": 0.5206013127249629, "grad_norm": 1.4037141904912498, "learning_rate": 4.909477040276473e-06, "loss": 0.6635, "step": 6147 }, { "epoch": 0.5206860046580564, "grad_norm": 1.4338684423890555, "learning_rate": 4.90810563357861e-06, "loss": 0.7421, "step": 6148 }, { "epoch": 0.5207706965911497, "grad_norm": 1.1469787735758639, "learning_rate": 4.906734233796286e-06, "loss": 0.6647, "step": 6149 }, { "epoch": 0.5208553885242431, "grad_norm": 1.1844577581416917, "learning_rate": 4.905362841032704e-06, "loss": 0.6318, "step": 6150 }, { "epoch": 0.5209400804573364, "grad_norm": 1.4337695367578496, "learning_rate": 4.903991455391071e-06, "loss": 0.609, "step": 6151 }, { "epoch": 0.5210247723904298, "grad_norm": 1.5852931852718342, "learning_rate": 4.9026200769745875e-06, "loss": 0.6591, "step": 6152 }, { "epoch": 0.5211094643235232, "grad_norm": 1.3484056101071331, "learning_rate": 4.901248705886458e-06, "loss": 0.611, "step": 6153 }, { "epoch": 0.5211941562566166, "grad_norm": 1.2898551130892233, "learning_rate": 4.899877342229889e-06, "loss": 0.6347, "step": 6154 }, { "epoch": 0.5212788481897099, "grad_norm": 1.0776524728419423, "learning_rate": 4.898505986108078e-06, "loss": 0.6347, "step": 6155 }, { "epoch": 0.5213635401228033, "grad_norm": 1.9256348248564819, "learning_rate": 4.897134637624229e-06, "loss": 0.6234, "step": 6156 }, { "epoch": 0.5214482320558966, "grad_norm": 1.2882769086511978, "learning_rate": 4.895763296881544e-06, "loss": 0.6066, "step": 6157 }, { "epoch": 0.5215329239889901, "grad_norm": 1.5272499755001308, "learning_rate": 4.894391963983223e-06, "loss": 0.6033, "step": 6158 }, { "epoch": 0.5216176159220834, "grad_norm": 1.1987740737985009, "learning_rate": 4.8930206390324655e-06, "loss": 0.7004, "step": 6159 }, { "epoch": 0.5217023078551768, "grad_norm": 1.367476097741548, "learning_rate": 4.891649322132472e-06, "loss": 0.6003, "step": 6160 }, { "epoch": 0.5217869997882701, "grad_norm": 3.633721611395407, "learning_rate": 4.8902780133864395e-06, "loss": 0.6372, "step": 6161 }, { "epoch": 0.5218716917213635, "grad_norm": 1.272811092667312, "learning_rate": 4.88890671289757e-06, "loss": 0.603, "step": 6162 }, { "epoch": 0.521956383654457, "grad_norm": 1.510184610358335, "learning_rate": 4.8875354207690555e-06, "loss": 0.6847, "step": 6163 }, { "epoch": 0.5220410755875503, "grad_norm": 1.546309945619477, "learning_rate": 4.886164137104097e-06, "loss": 0.5704, "step": 6164 }, { "epoch": 0.5221257675206437, "grad_norm": 1.4464601986657173, "learning_rate": 4.884792862005892e-06, "loss": 0.6104, "step": 6165 }, { "epoch": 0.522210459453737, "grad_norm": 1.567217738761336, "learning_rate": 4.883421595577632e-06, "loss": 0.6356, "step": 6166 }, { "epoch": 0.5222951513868304, "grad_norm": 1.3220727921308075, "learning_rate": 4.882050337922515e-06, "loss": 0.6453, "step": 6167 }, { "epoch": 0.5223798433199238, "grad_norm": 1.3226089825270926, "learning_rate": 4.880679089143734e-06, "loss": 0.6214, "step": 6168 }, { "epoch": 0.5224645352530172, "grad_norm": 1.4227027165267485, "learning_rate": 4.879307849344484e-06, "loss": 0.6389, "step": 6169 }, { "epoch": 0.5225492271861105, "grad_norm": 1.774536945505265, "learning_rate": 4.877936618627958e-06, "loss": 0.6688, "step": 6170 }, { "epoch": 0.5226339191192039, "grad_norm": 1.1149570316920845, "learning_rate": 4.876565397097347e-06, "loss": 0.6814, "step": 6171 }, { "epoch": 0.5227186110522972, "grad_norm": 0.6139804443607871, "learning_rate": 4.8751941848558425e-06, "loss": 0.8719, "step": 6172 }, { "epoch": 0.5228033029853907, "grad_norm": 1.1523599952071935, "learning_rate": 4.873822982006639e-06, "loss": 0.5841, "step": 6173 }, { "epoch": 0.522887994918484, "grad_norm": 0.6311546314436866, "learning_rate": 4.872451788652922e-06, "loss": 0.8763, "step": 6174 }, { "epoch": 0.5229726868515774, "grad_norm": 1.5794270591215054, "learning_rate": 4.871080604897886e-06, "loss": 0.5826, "step": 6175 }, { "epoch": 0.5230573787846707, "grad_norm": 1.3077561729705078, "learning_rate": 4.8697094308447145e-06, "loss": 0.6487, "step": 6176 }, { "epoch": 0.5231420707177641, "grad_norm": 1.269293587669108, "learning_rate": 4.868338266596599e-06, "loss": 0.61, "step": 6177 }, { "epoch": 0.5232267626508575, "grad_norm": 1.2643595463394361, "learning_rate": 4.866967112256727e-06, "loss": 0.6109, "step": 6178 }, { "epoch": 0.5233114545839509, "grad_norm": 1.5368527260455764, "learning_rate": 4.865595967928283e-06, "loss": 0.6311, "step": 6179 }, { "epoch": 0.5233961465170442, "grad_norm": 1.5538440797162805, "learning_rate": 4.864224833714453e-06, "loss": 0.6461, "step": 6180 }, { "epoch": 0.5234808384501376, "grad_norm": 1.9499788132789955, "learning_rate": 4.8628537097184256e-06, "loss": 0.6382, "step": 6181 }, { "epoch": 0.523565530383231, "grad_norm": 1.1995592720916586, "learning_rate": 4.861482596043382e-06, "loss": 0.6715, "step": 6182 }, { "epoch": 0.5236502223163244, "grad_norm": 1.2591465760236664, "learning_rate": 4.860111492792507e-06, "loss": 0.6389, "step": 6183 }, { "epoch": 0.5237349142494178, "grad_norm": 0.6627187698133908, "learning_rate": 4.858740400068982e-06, "loss": 0.8206, "step": 6184 }, { "epoch": 0.5238196061825111, "grad_norm": 1.7908848635120906, "learning_rate": 4.857369317975989e-06, "loss": 0.6757, "step": 6185 }, { "epoch": 0.5239042981156045, "grad_norm": 1.3346851808428537, "learning_rate": 4.855998246616714e-06, "loss": 0.665, "step": 6186 }, { "epoch": 0.5239889900486978, "grad_norm": 1.9936178536538804, "learning_rate": 4.85462718609433e-06, "loss": 0.6295, "step": 6187 }, { "epoch": 0.5240736819817913, "grad_norm": 1.498486150331495, "learning_rate": 4.853256136512022e-06, "loss": 0.6245, "step": 6188 }, { "epoch": 0.5241583739148846, "grad_norm": 1.8639933040329884, "learning_rate": 4.8518850979729644e-06, "loss": 0.6163, "step": 6189 }, { "epoch": 0.524243065847978, "grad_norm": 1.2104375931015425, "learning_rate": 4.850514070580339e-06, "loss": 0.6662, "step": 6190 }, { "epoch": 0.5243277577810713, "grad_norm": 1.2923384294041769, "learning_rate": 4.849143054437321e-06, "loss": 0.6321, "step": 6191 }, { "epoch": 0.5244124497141647, "grad_norm": 1.3765128148635006, "learning_rate": 4.847772049647087e-06, "loss": 0.6197, "step": 6192 }, { "epoch": 0.5244971416472581, "grad_norm": 1.1722201100143725, "learning_rate": 4.846401056312811e-06, "loss": 0.5936, "step": 6193 }, { "epoch": 0.5245818335803515, "grad_norm": 1.6749706345838864, "learning_rate": 4.845030074537672e-06, "loss": 0.6256, "step": 6194 }, { "epoch": 0.5246665255134448, "grad_norm": 1.3356844604841063, "learning_rate": 4.843659104424838e-06, "loss": 0.6493, "step": 6195 }, { "epoch": 0.5247512174465382, "grad_norm": 1.2936262562678205, "learning_rate": 4.842288146077487e-06, "loss": 0.6625, "step": 6196 }, { "epoch": 0.5248359093796315, "grad_norm": 1.4781421740715166, "learning_rate": 4.840917199598785e-06, "loss": 0.6911, "step": 6197 }, { "epoch": 0.524920601312725, "grad_norm": 1.597910305637929, "learning_rate": 4.839546265091906e-06, "loss": 0.6395, "step": 6198 }, { "epoch": 0.5250052932458183, "grad_norm": 1.3774566004698383, "learning_rate": 4.838175342660023e-06, "loss": 0.6768, "step": 6199 }, { "epoch": 0.5250899851789117, "grad_norm": 1.8580030910464311, "learning_rate": 4.8368044324063005e-06, "loss": 0.642, "step": 6200 }, { "epoch": 0.525174677112005, "grad_norm": 1.4518722784141844, "learning_rate": 4.835433534433909e-06, "loss": 0.6945, "step": 6201 }, { "epoch": 0.5252593690450984, "grad_norm": 1.8499066229276289, "learning_rate": 4.834062648846016e-06, "loss": 0.6268, "step": 6202 }, { "epoch": 0.5253440609781919, "grad_norm": 2.1968045538602947, "learning_rate": 4.832691775745786e-06, "loss": 0.5756, "step": 6203 }, { "epoch": 0.5254287529112852, "grad_norm": 0.632315971507597, "learning_rate": 4.831320915236387e-06, "loss": 0.8693, "step": 6204 }, { "epoch": 0.5255134448443786, "grad_norm": 1.109021960311747, "learning_rate": 4.829950067420982e-06, "loss": 0.6178, "step": 6205 }, { "epoch": 0.5255981367774719, "grad_norm": 1.40850851790672, "learning_rate": 4.8285792324027335e-06, "loss": 0.5877, "step": 6206 }, { "epoch": 0.5256828287105653, "grad_norm": 1.474379908444386, "learning_rate": 4.827208410284809e-06, "loss": 0.633, "step": 6207 }, { "epoch": 0.5257675206436587, "grad_norm": 1.2874073979270777, "learning_rate": 4.825837601170362e-06, "loss": 0.6142, "step": 6208 }, { "epoch": 0.5258522125767521, "grad_norm": 2.9400752333862727, "learning_rate": 4.82446680516256e-06, "loss": 0.7177, "step": 6209 }, { "epoch": 0.5259369045098454, "grad_norm": 1.4924061131404167, "learning_rate": 4.823096022364562e-06, "loss": 0.6257, "step": 6210 }, { "epoch": 0.5260215964429388, "grad_norm": 1.5036598887301722, "learning_rate": 4.821725252879523e-06, "loss": 0.6328, "step": 6211 }, { "epoch": 0.5261062883760322, "grad_norm": 1.3933504133000458, "learning_rate": 4.820354496810604e-06, "loss": 0.6419, "step": 6212 }, { "epoch": 0.5261909803091256, "grad_norm": 2.125873845972276, "learning_rate": 4.81898375426096e-06, "loss": 0.5905, "step": 6213 }, { "epoch": 0.5262756722422189, "grad_norm": 1.2761297209471014, "learning_rate": 4.817613025333746e-06, "loss": 0.6352, "step": 6214 }, { "epoch": 0.5263603641753123, "grad_norm": 1.4446531507402824, "learning_rate": 4.816242310132119e-06, "loss": 0.6606, "step": 6215 }, { "epoch": 0.5264450561084056, "grad_norm": 1.351908768297962, "learning_rate": 4.814871608759229e-06, "loss": 0.653, "step": 6216 }, { "epoch": 0.5265297480414991, "grad_norm": 1.5829095767595565, "learning_rate": 4.813500921318233e-06, "loss": 0.5897, "step": 6217 }, { "epoch": 0.5266144399745925, "grad_norm": 2.910445265266605, "learning_rate": 4.812130247912281e-06, "loss": 0.6523, "step": 6218 }, { "epoch": 0.5266991319076858, "grad_norm": 0.5906096738602813, "learning_rate": 4.810759588644519e-06, "loss": 0.8404, "step": 6219 }, { "epoch": 0.5267838238407792, "grad_norm": 1.5516267773810644, "learning_rate": 4.809388943618105e-06, "loss": 0.6272, "step": 6220 }, { "epoch": 0.5268685157738725, "grad_norm": 2.0009731775591053, "learning_rate": 4.808018312936179e-06, "loss": 0.6449, "step": 6221 }, { "epoch": 0.526953207706966, "grad_norm": 1.3691599669319765, "learning_rate": 4.806647696701891e-06, "loss": 0.6399, "step": 6222 }, { "epoch": 0.5270378996400593, "grad_norm": 1.3373501232404787, "learning_rate": 4.805277095018389e-06, "loss": 0.6611, "step": 6223 }, { "epoch": 0.5271225915731527, "grad_norm": 1.3411819611924705, "learning_rate": 4.803906507988817e-06, "loss": 0.6362, "step": 6224 }, { "epoch": 0.527207283506246, "grad_norm": 1.3671945470602775, "learning_rate": 4.802535935716318e-06, "loss": 0.5662, "step": 6225 }, { "epoch": 0.5272919754393394, "grad_norm": 0.6103155326460813, "learning_rate": 4.801165378304035e-06, "loss": 0.8156, "step": 6226 }, { "epoch": 0.5273766673724328, "grad_norm": 1.4199062414283088, "learning_rate": 4.7997948358551104e-06, "loss": 0.6751, "step": 6227 }, { "epoch": 0.5274613593055262, "grad_norm": 5.430048136829069, "learning_rate": 4.798424308472687e-06, "loss": 0.6724, "step": 6228 }, { "epoch": 0.5275460512386195, "grad_norm": 1.1091552107866942, "learning_rate": 4.7970537962598975e-06, "loss": 0.6113, "step": 6229 }, { "epoch": 0.5276307431717129, "grad_norm": 1.3143254430922469, "learning_rate": 4.795683299319886e-06, "loss": 0.6268, "step": 6230 }, { "epoch": 0.5277154351048062, "grad_norm": 1.215604503443742, "learning_rate": 4.794312817755791e-06, "loss": 0.6834, "step": 6231 }, { "epoch": 0.5278001270378997, "grad_norm": 1.8249200771188108, "learning_rate": 4.792942351670742e-06, "loss": 0.6202, "step": 6232 }, { "epoch": 0.527884818970993, "grad_norm": 1.2899725700228688, "learning_rate": 4.79157190116788e-06, "loss": 0.6885, "step": 6233 }, { "epoch": 0.5279695109040864, "grad_norm": 0.6151749378067777, "learning_rate": 4.790201466350334e-06, "loss": 0.8465, "step": 6234 }, { "epoch": 0.5280542028371797, "grad_norm": 1.7246778477460365, "learning_rate": 4.788831047321239e-06, "loss": 0.6353, "step": 6235 }, { "epoch": 0.5281388947702731, "grad_norm": 1.2601785447427856, "learning_rate": 4.787460644183728e-06, "loss": 0.7041, "step": 6236 }, { "epoch": 0.5282235867033666, "grad_norm": 2.699001804394986, "learning_rate": 4.7860902570409264e-06, "loss": 0.6368, "step": 6237 }, { "epoch": 0.5283082786364599, "grad_norm": 1.6775708970730643, "learning_rate": 4.784719885995967e-06, "loss": 0.6127, "step": 6238 }, { "epoch": 0.5283929705695533, "grad_norm": 0.5867324543193457, "learning_rate": 4.783349531151975e-06, "loss": 0.8994, "step": 6239 }, { "epoch": 0.5284776625026466, "grad_norm": 1.206276171982893, "learning_rate": 4.781979192612079e-06, "loss": 0.6977, "step": 6240 }, { "epoch": 0.52856235443574, "grad_norm": 1.2719078961111938, "learning_rate": 4.780608870479405e-06, "loss": 0.6427, "step": 6241 }, { "epoch": 0.5286470463688334, "grad_norm": 4.288346705710214, "learning_rate": 4.7792385648570724e-06, "loss": 0.6316, "step": 6242 }, { "epoch": 0.5287317383019268, "grad_norm": 1.6678134059776786, "learning_rate": 4.777868275848208e-06, "loss": 0.6604, "step": 6243 }, { "epoch": 0.5288164302350201, "grad_norm": 1.458168174798533, "learning_rate": 4.776498003555932e-06, "loss": 0.618, "step": 6244 }, { "epoch": 0.5289011221681135, "grad_norm": 1.4274174187161823, "learning_rate": 4.775127748083364e-06, "loss": 0.6315, "step": 6245 }, { "epoch": 0.5289858141012068, "grad_norm": 1.8372316177248655, "learning_rate": 4.773757509533624e-06, "loss": 0.6703, "step": 6246 }, { "epoch": 0.5290705060343003, "grad_norm": 1.5717836291249936, "learning_rate": 4.77238728800983e-06, "loss": 0.6619, "step": 6247 }, { "epoch": 0.5291551979673936, "grad_norm": 1.191999419259902, "learning_rate": 4.771017083615097e-06, "loss": 0.6002, "step": 6248 }, { "epoch": 0.529239889900487, "grad_norm": 1.1753513975427028, "learning_rate": 4.769646896452542e-06, "loss": 0.6825, "step": 6249 }, { "epoch": 0.5293245818335803, "grad_norm": 1.3950095549677515, "learning_rate": 4.7682767266252766e-06, "loss": 0.6541, "step": 6250 }, { "epoch": 0.5294092737666737, "grad_norm": 0.5972024052056794, "learning_rate": 4.766906574236415e-06, "loss": 0.8268, "step": 6251 }, { "epoch": 0.5294939656997671, "grad_norm": 1.0232664232324922, "learning_rate": 4.7655364393890694e-06, "loss": 0.6151, "step": 6252 }, { "epoch": 0.5295786576328605, "grad_norm": 0.6125823864618819, "learning_rate": 4.764166322186347e-06, "loss": 0.8188, "step": 6253 }, { "epoch": 0.5296633495659538, "grad_norm": 1.2451838532299895, "learning_rate": 4.7627962227313575e-06, "loss": 0.6314, "step": 6254 }, { "epoch": 0.5297480414990472, "grad_norm": 0.6257467218263473, "learning_rate": 4.761426141127208e-06, "loss": 0.8978, "step": 6255 }, { "epoch": 0.5298327334321405, "grad_norm": 1.6878129038388772, "learning_rate": 4.760056077477005e-06, "loss": 0.6885, "step": 6256 }, { "epoch": 0.529917425365234, "grad_norm": 1.731639468156889, "learning_rate": 4.758686031883853e-06, "loss": 0.5688, "step": 6257 }, { "epoch": 0.5300021172983274, "grad_norm": 1.0054279969458801, "learning_rate": 4.757316004450855e-06, "loss": 0.8422, "step": 6258 }, { "epoch": 0.5300868092314207, "grad_norm": 1.8571797223106812, "learning_rate": 4.755945995281112e-06, "loss": 0.6878, "step": 6259 }, { "epoch": 0.5301715011645141, "grad_norm": 1.2414042748335703, "learning_rate": 4.7545760044777265e-06, "loss": 0.6117, "step": 6260 }, { "epoch": 0.5302561930976074, "grad_norm": 2.028115948784892, "learning_rate": 4.753206032143795e-06, "loss": 0.5995, "step": 6261 }, { "epoch": 0.5303408850307009, "grad_norm": 1.4705857348098452, "learning_rate": 4.751836078382418e-06, "loss": 0.6454, "step": 6262 }, { "epoch": 0.5304255769637942, "grad_norm": 1.3955226567878594, "learning_rate": 4.75046614329669e-06, "loss": 0.6644, "step": 6263 }, { "epoch": 0.5305102688968876, "grad_norm": 1.5470177035881065, "learning_rate": 4.7490962269897026e-06, "loss": 0.6281, "step": 6264 }, { "epoch": 0.5305949608299809, "grad_norm": 1.796878512750955, "learning_rate": 4.747726329564557e-06, "loss": 0.6345, "step": 6265 }, { "epoch": 0.5306796527630743, "grad_norm": 2.150943891650941, "learning_rate": 4.746356451124339e-06, "loss": 0.6212, "step": 6266 }, { "epoch": 0.5307643446961677, "grad_norm": 1.4565012462198634, "learning_rate": 4.744986591772141e-06, "loss": 0.6138, "step": 6267 }, { "epoch": 0.5308490366292611, "grad_norm": 0.6167157773280423, "learning_rate": 4.743616751611053e-06, "loss": 0.8232, "step": 6268 }, { "epoch": 0.5309337285623544, "grad_norm": 1.3355757511009203, "learning_rate": 4.7422469307441615e-06, "loss": 0.6199, "step": 6269 }, { "epoch": 0.5310184204954478, "grad_norm": 1.4577163739336372, "learning_rate": 4.740877129274554e-06, "loss": 0.6322, "step": 6270 }, { "epoch": 0.5311031124285411, "grad_norm": 1.7542740568116324, "learning_rate": 4.7395073473053145e-06, "loss": 0.6505, "step": 6271 }, { "epoch": 0.5311878043616346, "grad_norm": 1.417213229437591, "learning_rate": 4.738137584939526e-06, "loss": 0.6515, "step": 6272 }, { "epoch": 0.531272496294728, "grad_norm": 2.0036519743155936, "learning_rate": 4.736767842280272e-06, "loss": 0.6365, "step": 6273 }, { "epoch": 0.5313571882278213, "grad_norm": 0.6555225579687097, "learning_rate": 4.73539811943063e-06, "loss": 0.8328, "step": 6274 }, { "epoch": 0.5314418801609146, "grad_norm": 1.327189229267876, "learning_rate": 4.734028416493681e-06, "loss": 0.6298, "step": 6275 }, { "epoch": 0.531526572094008, "grad_norm": 1.4371252416155886, "learning_rate": 4.732658733572504e-06, "loss": 0.6387, "step": 6276 }, { "epoch": 0.5316112640271015, "grad_norm": 1.1782301393016166, "learning_rate": 4.731289070770171e-06, "loss": 0.6632, "step": 6277 }, { "epoch": 0.5316959559601948, "grad_norm": 1.3594858866433077, "learning_rate": 4.729919428189759e-06, "loss": 0.6401, "step": 6278 }, { "epoch": 0.5317806478932882, "grad_norm": 1.4773257361847147, "learning_rate": 4.72854980593434e-06, "loss": 0.6477, "step": 6279 }, { "epoch": 0.5318653398263815, "grad_norm": 1.3672300094250078, "learning_rate": 4.7271802041069845e-06, "loss": 0.6169, "step": 6280 }, { "epoch": 0.5319500317594749, "grad_norm": 1.3934075717433838, "learning_rate": 4.725810622810765e-06, "loss": 0.6385, "step": 6281 }, { "epoch": 0.5320347236925683, "grad_norm": 1.7144774531333522, "learning_rate": 4.724441062148747e-06, "loss": 0.6076, "step": 6282 }, { "epoch": 0.5321194156256617, "grad_norm": 1.515874472884198, "learning_rate": 4.723071522223998e-06, "loss": 0.6195, "step": 6283 }, { "epoch": 0.532204107558755, "grad_norm": 1.4304578076023242, "learning_rate": 4.721702003139586e-06, "loss": 0.6608, "step": 6284 }, { "epoch": 0.5322887994918484, "grad_norm": 1.286270206234245, "learning_rate": 4.720332504998568e-06, "loss": 0.6368, "step": 6285 }, { "epoch": 0.5323734914249417, "grad_norm": 1.1720826598358465, "learning_rate": 4.7189630279040136e-06, "loss": 0.6, "step": 6286 }, { "epoch": 0.5324581833580352, "grad_norm": 1.766465465739111, "learning_rate": 4.717593571958977e-06, "loss": 0.6914, "step": 6287 }, { "epoch": 0.5325428752911285, "grad_norm": 1.3518470945059202, "learning_rate": 4.716224137266519e-06, "loss": 0.6619, "step": 6288 }, { "epoch": 0.5326275672242219, "grad_norm": 1.8974378653619126, "learning_rate": 4.7148547239297e-06, "loss": 0.6192, "step": 6289 }, { "epoch": 0.5327122591573152, "grad_norm": 1.7484779055341348, "learning_rate": 4.71348533205157e-06, "loss": 0.6228, "step": 6290 }, { "epoch": 0.5327969510904086, "grad_norm": 1.8908344207276395, "learning_rate": 4.712115961735189e-06, "loss": 0.6552, "step": 6291 }, { "epoch": 0.532881643023502, "grad_norm": 1.4218721077287484, "learning_rate": 4.710746613083604e-06, "loss": 0.6431, "step": 6292 }, { "epoch": 0.5329663349565954, "grad_norm": 0.660064905050983, "learning_rate": 4.709377286199868e-06, "loss": 0.8476, "step": 6293 }, { "epoch": 0.5330510268896888, "grad_norm": 1.4388271778510238, "learning_rate": 4.708007981187033e-06, "loss": 0.675, "step": 6294 }, { "epoch": 0.5331357188227821, "grad_norm": 3.089722640685378, "learning_rate": 4.70663869814814e-06, "loss": 0.6404, "step": 6295 }, { "epoch": 0.5332204107558755, "grad_norm": 0.6706943719248605, "learning_rate": 4.7052694371862385e-06, "loss": 0.8961, "step": 6296 }, { "epoch": 0.5333051026889689, "grad_norm": 1.2666820983491263, "learning_rate": 4.703900198404376e-06, "loss": 0.6503, "step": 6297 }, { "epoch": 0.5333897946220623, "grad_norm": 1.4419755130831329, "learning_rate": 4.702530981905588e-06, "loss": 0.6219, "step": 6298 }, { "epoch": 0.5334744865551556, "grad_norm": 1.2074041762428949, "learning_rate": 4.7011617877929215e-06, "loss": 0.6642, "step": 6299 }, { "epoch": 0.533559178488249, "grad_norm": 1.449299786640123, "learning_rate": 4.699792616169411e-06, "loss": 0.6231, "step": 6300 }, { "epoch": 0.5336438704213423, "grad_norm": 2.2179867285047177, "learning_rate": 4.6984234671380955e-06, "loss": 0.5611, "step": 6301 }, { "epoch": 0.5337285623544358, "grad_norm": 1.1276844198704845, "learning_rate": 4.6970543408020126e-06, "loss": 0.6483, "step": 6302 }, { "epoch": 0.5338132542875291, "grad_norm": 1.2407823305525048, "learning_rate": 4.695685237264193e-06, "loss": 0.6409, "step": 6303 }, { "epoch": 0.5338979462206225, "grad_norm": 1.2348704093345189, "learning_rate": 4.694316156627672e-06, "loss": 0.6316, "step": 6304 }, { "epoch": 0.5339826381537158, "grad_norm": 1.3722843448619981, "learning_rate": 4.692947098995479e-06, "loss": 0.6277, "step": 6305 }, { "epoch": 0.5340673300868092, "grad_norm": 1.3502487577162252, "learning_rate": 4.691578064470641e-06, "loss": 0.6028, "step": 6306 }, { "epoch": 0.5341520220199026, "grad_norm": 1.4024499184460402, "learning_rate": 4.69020905315619e-06, "loss": 0.6075, "step": 6307 }, { "epoch": 0.534236713952996, "grad_norm": 1.7292779666793907, "learning_rate": 4.688840065155146e-06, "loss": 0.5962, "step": 6308 }, { "epoch": 0.5343214058860893, "grad_norm": 2.2455762957546654, "learning_rate": 4.6874711005705345e-06, "loss": 0.5901, "step": 6309 }, { "epoch": 0.5344060978191827, "grad_norm": 1.342375486326384, "learning_rate": 4.6861021595053795e-06, "loss": 0.6438, "step": 6310 }, { "epoch": 0.534490789752276, "grad_norm": 1.4922019073053876, "learning_rate": 4.684733242062697e-06, "loss": 0.6269, "step": 6311 }, { "epoch": 0.5345754816853695, "grad_norm": 1.5642005307538633, "learning_rate": 4.683364348345507e-06, "loss": 0.5628, "step": 6312 }, { "epoch": 0.5346601736184629, "grad_norm": 1.5341275937163381, "learning_rate": 4.681995478456829e-06, "loss": 0.6701, "step": 6313 }, { "epoch": 0.5347448655515562, "grad_norm": 1.664552468769588, "learning_rate": 4.680626632499673e-06, "loss": 0.719, "step": 6314 }, { "epoch": 0.5348295574846496, "grad_norm": 1.4675071223566782, "learning_rate": 4.679257810577056e-06, "loss": 0.6486, "step": 6315 }, { "epoch": 0.534914249417743, "grad_norm": 1.43291983849665, "learning_rate": 4.677889012791985e-06, "loss": 0.6126, "step": 6316 }, { "epoch": 0.5349989413508364, "grad_norm": 2.8109248984495516, "learning_rate": 4.676520239247472e-06, "loss": 0.6169, "step": 6317 }, { "epoch": 0.5350836332839297, "grad_norm": 1.4029355870541829, "learning_rate": 4.675151490046526e-06, "loss": 0.6092, "step": 6318 }, { "epoch": 0.5351683252170231, "grad_norm": 1.2613284692220836, "learning_rate": 4.673782765292149e-06, "loss": 0.658, "step": 6319 }, { "epoch": 0.5352530171501164, "grad_norm": 1.341210481122512, "learning_rate": 4.672414065087344e-06, "loss": 0.6332, "step": 6320 }, { "epoch": 0.5353377090832099, "grad_norm": 0.605425935683624, "learning_rate": 4.67104538953512e-06, "loss": 0.8834, "step": 6321 }, { "epoch": 0.5354224010163032, "grad_norm": 0.6841436534763874, "learning_rate": 4.669676738738469e-06, "loss": 0.8113, "step": 6322 }, { "epoch": 0.5355070929493966, "grad_norm": 1.6002822026232766, "learning_rate": 4.668308112800394e-06, "loss": 0.6594, "step": 6323 }, { "epoch": 0.5355917848824899, "grad_norm": 2.8223621729527553, "learning_rate": 4.6669395118238895e-06, "loss": 0.6378, "step": 6324 }, { "epoch": 0.5356764768155833, "grad_norm": 1.4772621831611847, "learning_rate": 4.6655709359119495e-06, "loss": 0.6527, "step": 6325 }, { "epoch": 0.5357611687486767, "grad_norm": 1.3720053114270114, "learning_rate": 4.664202385167569e-06, "loss": 0.6924, "step": 6326 }, { "epoch": 0.5358458606817701, "grad_norm": 1.304234382215791, "learning_rate": 4.662833859693736e-06, "loss": 0.6508, "step": 6327 }, { "epoch": 0.5359305526148634, "grad_norm": 1.3524542862548743, "learning_rate": 4.661465359593442e-06, "loss": 0.5818, "step": 6328 }, { "epoch": 0.5360152445479568, "grad_norm": 1.5887510093487378, "learning_rate": 4.66009688496967e-06, "loss": 0.6531, "step": 6329 }, { "epoch": 0.5360999364810501, "grad_norm": 1.3623637105754218, "learning_rate": 4.658728435925408e-06, "loss": 0.6437, "step": 6330 }, { "epoch": 0.5361846284141436, "grad_norm": 1.3119464499961409, "learning_rate": 4.65736001256364e-06, "loss": 0.6698, "step": 6331 }, { "epoch": 0.536269320347237, "grad_norm": 1.3495474994101795, "learning_rate": 4.655991614987343e-06, "loss": 0.6349, "step": 6332 }, { "epoch": 0.5363540122803303, "grad_norm": 1.4055737713227603, "learning_rate": 4.6546232432995e-06, "loss": 0.6354, "step": 6333 }, { "epoch": 0.5364387042134237, "grad_norm": 1.0938161463299043, "learning_rate": 4.653254897603087e-06, "loss": 0.7076, "step": 6334 }, { "epoch": 0.536523396146517, "grad_norm": 0.6021424381558003, "learning_rate": 4.651886578001077e-06, "loss": 0.8219, "step": 6335 }, { "epoch": 0.5366080880796105, "grad_norm": 1.5616310342998605, "learning_rate": 4.650518284596448e-06, "loss": 0.6641, "step": 6336 }, { "epoch": 0.5366927800127038, "grad_norm": 1.5274422254127502, "learning_rate": 4.649150017492167e-06, "loss": 0.6708, "step": 6337 }, { "epoch": 0.5367774719457972, "grad_norm": 1.3243371262697938, "learning_rate": 4.647781776791206e-06, "loss": 0.5765, "step": 6338 }, { "epoch": 0.5368621638788905, "grad_norm": 1.8857568830589666, "learning_rate": 4.646413562596531e-06, "loss": 0.6542, "step": 6339 }, { "epoch": 0.5369468558119839, "grad_norm": 1.1545778800239983, "learning_rate": 4.645045375011107e-06, "loss": 0.6012, "step": 6340 }, { "epoch": 0.5370315477450773, "grad_norm": 1.760031454731947, "learning_rate": 4.6436772141378985e-06, "loss": 0.6157, "step": 6341 }, { "epoch": 0.5371162396781707, "grad_norm": 1.4807394358192754, "learning_rate": 4.642309080079868e-06, "loss": 0.6562, "step": 6342 }, { "epoch": 0.537200931611264, "grad_norm": 1.2408444820573625, "learning_rate": 4.640940972939972e-06, "loss": 0.6388, "step": 6343 }, { "epoch": 0.5372856235443574, "grad_norm": 1.3954188672099253, "learning_rate": 4.63957289282117e-06, "loss": 0.6333, "step": 6344 }, { "epoch": 0.5373703154774507, "grad_norm": 1.5673232297522197, "learning_rate": 4.638204839826415e-06, "loss": 0.6065, "step": 6345 }, { "epoch": 0.5374550074105442, "grad_norm": 0.729547976471223, "learning_rate": 4.636836814058661e-06, "loss": 0.8741, "step": 6346 }, { "epoch": 0.5375396993436375, "grad_norm": 2.500855155778226, "learning_rate": 4.635468815620862e-06, "loss": 0.6111, "step": 6347 }, { "epoch": 0.5376243912767309, "grad_norm": 1.476822377507469, "learning_rate": 4.634100844615963e-06, "loss": 0.6458, "step": 6348 }, { "epoch": 0.5377090832098242, "grad_norm": 1.3161895273918585, "learning_rate": 4.632732901146913e-06, "loss": 0.6582, "step": 6349 }, { "epoch": 0.5377937751429176, "grad_norm": 1.3793214621327625, "learning_rate": 4.63136498531666e-06, "loss": 0.6438, "step": 6350 }, { "epoch": 0.5378784670760111, "grad_norm": 1.5040532829959579, "learning_rate": 4.62999709722814e-06, "loss": 0.5938, "step": 6351 }, { "epoch": 0.5379631590091044, "grad_norm": 1.124686024808875, "learning_rate": 4.628629236984301e-06, "loss": 0.6174, "step": 6352 }, { "epoch": 0.5380478509421978, "grad_norm": 1.330335843529886, "learning_rate": 4.627261404688076e-06, "loss": 0.6998, "step": 6353 }, { "epoch": 0.5381325428752911, "grad_norm": 1.1437282029836473, "learning_rate": 4.625893600442404e-06, "loss": 0.5839, "step": 6354 }, { "epoch": 0.5382172348083845, "grad_norm": 1.347894428684481, "learning_rate": 4.624525824350221e-06, "loss": 0.6391, "step": 6355 }, { "epoch": 0.5383019267414779, "grad_norm": 1.3841556246312396, "learning_rate": 4.6231580765144565e-06, "loss": 0.6744, "step": 6356 }, { "epoch": 0.5383866186745713, "grad_norm": 1.290219483881874, "learning_rate": 4.621790357038044e-06, "loss": 0.6221, "step": 6357 }, { "epoch": 0.5384713106076646, "grad_norm": 0.6571428175540182, "learning_rate": 4.620422666023908e-06, "loss": 0.8314, "step": 6358 }, { "epoch": 0.538556002540758, "grad_norm": 1.2047057029755537, "learning_rate": 4.619055003574977e-06, "loss": 0.66, "step": 6359 }, { "epoch": 0.5386406944738513, "grad_norm": 3.075863748352758, "learning_rate": 4.617687369794176e-06, "loss": 0.6126, "step": 6360 }, { "epoch": 0.5387253864069448, "grad_norm": 1.2412505747824196, "learning_rate": 4.616319764784421e-06, "loss": 0.6337, "step": 6361 }, { "epoch": 0.5388100783400381, "grad_norm": 1.1605831612043076, "learning_rate": 4.614952188648638e-06, "loss": 0.5579, "step": 6362 }, { "epoch": 0.5388947702731315, "grad_norm": 1.8324727820264344, "learning_rate": 4.6135846414897424e-06, "loss": 0.6924, "step": 6363 }, { "epoch": 0.5389794622062248, "grad_norm": 1.269982224169779, "learning_rate": 4.6122171234106475e-06, "loss": 0.6452, "step": 6364 }, { "epoch": 0.5390641541393182, "grad_norm": 1.2721301219117316, "learning_rate": 4.610849634514269e-06, "loss": 0.6528, "step": 6365 }, { "epoch": 0.5391488460724116, "grad_norm": 1.4405507035155003, "learning_rate": 4.6094821749035135e-06, "loss": 0.5978, "step": 6366 }, { "epoch": 0.539233538005505, "grad_norm": 0.6533827297030376, "learning_rate": 4.608114744681293e-06, "loss": 0.867, "step": 6367 }, { "epoch": 0.5393182299385983, "grad_norm": 1.2303018859726695, "learning_rate": 4.606747343950514e-06, "loss": 0.6905, "step": 6368 }, { "epoch": 0.5394029218716917, "grad_norm": 1.8511576357366055, "learning_rate": 4.605379972814079e-06, "loss": 0.669, "step": 6369 }, { "epoch": 0.539487613804785, "grad_norm": 2.654978513281302, "learning_rate": 4.60401263137489e-06, "loss": 0.6736, "step": 6370 }, { "epoch": 0.5395723057378785, "grad_norm": 1.9906708811869576, "learning_rate": 4.602645319735849e-06, "loss": 0.6476, "step": 6371 }, { "epoch": 0.5396569976709719, "grad_norm": 1.369639306753537, "learning_rate": 4.6012780379998506e-06, "loss": 0.6111, "step": 6372 }, { "epoch": 0.5397416896040652, "grad_norm": 1.3213608397852836, "learning_rate": 4.599910786269793e-06, "loss": 0.6289, "step": 6373 }, { "epoch": 0.5398263815371586, "grad_norm": 1.6502563242046755, "learning_rate": 4.598543564648566e-06, "loss": 0.6291, "step": 6374 }, { "epoch": 0.5399110734702519, "grad_norm": 1.453799616416575, "learning_rate": 4.597176373239061e-06, "loss": 0.6392, "step": 6375 }, { "epoch": 0.5399957654033454, "grad_norm": 1.3265080327015126, "learning_rate": 4.5958092121441685e-06, "loss": 0.6133, "step": 6376 }, { "epoch": 0.5400804573364387, "grad_norm": 1.2389754229429597, "learning_rate": 4.594442081466771e-06, "loss": 0.6168, "step": 6377 }, { "epoch": 0.5401651492695321, "grad_norm": 1.2560540525736648, "learning_rate": 4.593074981309756e-06, "loss": 0.6375, "step": 6378 }, { "epoch": 0.5402498412026254, "grad_norm": 1.8510400052537246, "learning_rate": 4.5917079117760025e-06, "loss": 0.6351, "step": 6379 }, { "epoch": 0.5403345331357188, "grad_norm": 1.6688222071477918, "learning_rate": 4.590340872968391e-06, "loss": 0.6532, "step": 6380 }, { "epoch": 0.5404192250688122, "grad_norm": 1.3522317168173819, "learning_rate": 4.588973864989798e-06, "loss": 0.6572, "step": 6381 }, { "epoch": 0.5405039170019056, "grad_norm": 1.2563211764221798, "learning_rate": 4.587606887943098e-06, "loss": 0.6621, "step": 6382 }, { "epoch": 0.5405886089349989, "grad_norm": 0.653583498513133, "learning_rate": 4.586239941931163e-06, "loss": 0.8561, "step": 6383 }, { "epoch": 0.5406733008680923, "grad_norm": 0.564965566865076, "learning_rate": 4.584873027056867e-06, "loss": 0.8489, "step": 6384 }, { "epoch": 0.5407579928011856, "grad_norm": 1.2223740289016303, "learning_rate": 4.58350614342307e-06, "loss": 0.6271, "step": 6385 }, { "epoch": 0.5408426847342791, "grad_norm": 1.3705761110511483, "learning_rate": 4.58213929113264e-06, "loss": 0.6094, "step": 6386 }, { "epoch": 0.5409273766673725, "grad_norm": 1.6091560466257893, "learning_rate": 4.580772470288445e-06, "loss": 0.6233, "step": 6387 }, { "epoch": 0.5410120686004658, "grad_norm": 1.2198829776490683, "learning_rate": 4.579405680993339e-06, "loss": 0.6349, "step": 6388 }, { "epoch": 0.5410967605335592, "grad_norm": 1.3699764424677405, "learning_rate": 4.578038923350184e-06, "loss": 0.6417, "step": 6389 }, { "epoch": 0.5411814524666525, "grad_norm": 1.1536100272985317, "learning_rate": 4.5766721974618324e-06, "loss": 0.6243, "step": 6390 }, { "epoch": 0.541266144399746, "grad_norm": 1.1156978124444368, "learning_rate": 4.575305503431141e-06, "loss": 0.6489, "step": 6391 }, { "epoch": 0.5413508363328393, "grad_norm": 1.4659632667137452, "learning_rate": 4.57393884136096e-06, "loss": 0.6259, "step": 6392 }, { "epoch": 0.5414355282659327, "grad_norm": 1.1957405335587445, "learning_rate": 4.572572211354135e-06, "loss": 0.6285, "step": 6393 }, { "epoch": 0.541520220199026, "grad_norm": 1.0842923189697447, "learning_rate": 4.571205613513518e-06, "loss": 0.5939, "step": 6394 }, { "epoch": 0.5416049121321194, "grad_norm": 1.264733711312509, "learning_rate": 4.569839047941947e-06, "loss": 0.6314, "step": 6395 }, { "epoch": 0.5416896040652128, "grad_norm": 1.3236417271344094, "learning_rate": 4.568472514742264e-06, "loss": 0.6292, "step": 6396 }, { "epoch": 0.5417742959983062, "grad_norm": 1.2959027583916034, "learning_rate": 4.567106014017312e-06, "loss": 0.6131, "step": 6397 }, { "epoch": 0.5418589879313995, "grad_norm": 1.5164660750232892, "learning_rate": 4.565739545869923e-06, "loss": 0.6355, "step": 6398 }, { "epoch": 0.5419436798644929, "grad_norm": 1.2670689753775963, "learning_rate": 4.5643731104029335e-06, "loss": 0.6311, "step": 6399 }, { "epoch": 0.5420283717975862, "grad_norm": 1.5042739834259633, "learning_rate": 4.563006707719174e-06, "loss": 0.6765, "step": 6400 }, { "epoch": 0.5421130637306797, "grad_norm": 1.9138910356097258, "learning_rate": 4.561640337921473e-06, "loss": 0.6518, "step": 6401 }, { "epoch": 0.542197755663773, "grad_norm": 1.3369917472835777, "learning_rate": 4.560274001112659e-06, "loss": 0.6109, "step": 6402 }, { "epoch": 0.5422824475968664, "grad_norm": 1.664291461874944, "learning_rate": 4.558907697395553e-06, "loss": 0.6401, "step": 6403 }, { "epoch": 0.5423671395299597, "grad_norm": 1.3949479908861608, "learning_rate": 4.55754142687298e-06, "loss": 0.6245, "step": 6404 }, { "epoch": 0.5424518314630531, "grad_norm": 1.4706574377457147, "learning_rate": 4.556175189647759e-06, "loss": 0.6151, "step": 6405 }, { "epoch": 0.5425365233961466, "grad_norm": 2.20177986899381, "learning_rate": 4.554808985822703e-06, "loss": 0.6418, "step": 6406 }, { "epoch": 0.5426212153292399, "grad_norm": 1.5032661061629888, "learning_rate": 4.553442815500628e-06, "loss": 0.5961, "step": 6407 }, { "epoch": 0.5427059072623333, "grad_norm": 1.2429418701111188, "learning_rate": 4.552076678784348e-06, "loss": 0.6551, "step": 6408 }, { "epoch": 0.5427905991954266, "grad_norm": 1.2516580387821585, "learning_rate": 4.550710575776668e-06, "loss": 0.6298, "step": 6409 }, { "epoch": 0.54287529112852, "grad_norm": 1.2882707515908003, "learning_rate": 4.549344506580396e-06, "loss": 0.7018, "step": 6410 }, { "epoch": 0.5429599830616134, "grad_norm": 0.6081195385852494, "learning_rate": 4.547978471298337e-06, "loss": 0.871, "step": 6411 }, { "epoch": 0.5430446749947068, "grad_norm": 1.5529306898120128, "learning_rate": 4.546612470033291e-06, "loss": 0.681, "step": 6412 }, { "epoch": 0.5431293669278001, "grad_norm": 1.3252845668074453, "learning_rate": 4.545246502888059e-06, "loss": 0.5965, "step": 6413 }, { "epoch": 0.5432140588608935, "grad_norm": 1.5477605667775527, "learning_rate": 4.543880569965433e-06, "loss": 0.6478, "step": 6414 }, { "epoch": 0.5432987507939868, "grad_norm": 1.883366169678614, "learning_rate": 4.542514671368211e-06, "loss": 0.6799, "step": 6415 }, { "epoch": 0.5433834427270803, "grad_norm": 1.3490442801221454, "learning_rate": 4.541148807199183e-06, "loss": 0.6324, "step": 6416 }, { "epoch": 0.5434681346601736, "grad_norm": 1.341286025526393, "learning_rate": 4.539782977561135e-06, "loss": 0.6656, "step": 6417 }, { "epoch": 0.543552826593267, "grad_norm": 1.213727322591811, "learning_rate": 4.5384171825568576e-06, "loss": 0.6346, "step": 6418 }, { "epoch": 0.5436375185263603, "grad_norm": 1.2557029931905772, "learning_rate": 4.53705142228913e-06, "loss": 0.6287, "step": 6419 }, { "epoch": 0.5437222104594538, "grad_norm": 1.4302802457162955, "learning_rate": 4.535685696860734e-06, "loss": 0.6931, "step": 6420 }, { "epoch": 0.5438069023925471, "grad_norm": 1.2620609390166342, "learning_rate": 4.534320006374449e-06, "loss": 0.6118, "step": 6421 }, { "epoch": 0.5438915943256405, "grad_norm": 0.655381071017754, "learning_rate": 4.5329543509330486e-06, "loss": 0.8543, "step": 6422 }, { "epoch": 0.5439762862587338, "grad_norm": 1.1948540718700418, "learning_rate": 4.531588730639305e-06, "loss": 0.6425, "step": 6423 }, { "epoch": 0.5440609781918272, "grad_norm": 1.3691179248271257, "learning_rate": 4.5302231455959925e-06, "loss": 0.6129, "step": 6424 }, { "epoch": 0.5441456701249207, "grad_norm": 1.8449567021521982, "learning_rate": 4.528857595905874e-06, "loss": 0.6312, "step": 6425 }, { "epoch": 0.544230362058014, "grad_norm": 0.5836447384146333, "learning_rate": 4.527492081671719e-06, "loss": 0.8202, "step": 6426 }, { "epoch": 0.5443150539911074, "grad_norm": 1.356970549418779, "learning_rate": 4.526126602996283e-06, "loss": 0.6302, "step": 6427 }, { "epoch": 0.5443997459242007, "grad_norm": 1.423383402137447, "learning_rate": 4.52476115998233e-06, "loss": 0.6526, "step": 6428 }, { "epoch": 0.5444844378572941, "grad_norm": 1.6902387888259722, "learning_rate": 4.52339575273262e-06, "loss": 0.6505, "step": 6429 }, { "epoch": 0.5445691297903875, "grad_norm": 1.3938997850586303, "learning_rate": 4.5220303813499e-06, "loss": 0.6354, "step": 6430 }, { "epoch": 0.5446538217234809, "grad_norm": 1.2802941158823975, "learning_rate": 4.520665045936926e-06, "loss": 0.6441, "step": 6431 }, { "epoch": 0.5447385136565742, "grad_norm": 1.557850620213247, "learning_rate": 4.519299746596445e-06, "loss": 0.6608, "step": 6432 }, { "epoch": 0.5448232055896676, "grad_norm": 1.557087022611182, "learning_rate": 4.517934483431203e-06, "loss": 0.6707, "step": 6433 }, { "epoch": 0.5449078975227609, "grad_norm": 1.2655442798190626, "learning_rate": 4.516569256543945e-06, "loss": 0.6296, "step": 6434 }, { "epoch": 0.5449925894558544, "grad_norm": 0.6161239112772944, "learning_rate": 4.515204066037409e-06, "loss": 0.8884, "step": 6435 }, { "epoch": 0.5450772813889477, "grad_norm": 1.2719444040553642, "learning_rate": 4.513838912014335e-06, "loss": 0.6018, "step": 6436 }, { "epoch": 0.5451619733220411, "grad_norm": 1.3284848154362803, "learning_rate": 4.512473794577456e-06, "loss": 0.6419, "step": 6437 }, { "epoch": 0.5452466652551344, "grad_norm": 1.3392206985458488, "learning_rate": 4.511108713829507e-06, "loss": 0.6139, "step": 6438 }, { "epoch": 0.5453313571882278, "grad_norm": 1.370286518069911, "learning_rate": 4.509743669873217e-06, "loss": 0.6814, "step": 6439 }, { "epoch": 0.5454160491213212, "grad_norm": 0.6696344586030467, "learning_rate": 4.50837866281131e-06, "loss": 0.8376, "step": 6440 }, { "epoch": 0.5455007410544146, "grad_norm": 1.3761576033682608, "learning_rate": 4.5070136927465125e-06, "loss": 0.6349, "step": 6441 }, { "epoch": 0.545585432987508, "grad_norm": 1.3913589027029258, "learning_rate": 4.5056487597815455e-06, "loss": 0.6665, "step": 6442 }, { "epoch": 0.5456701249206013, "grad_norm": 2.1586252762835247, "learning_rate": 4.504283864019126e-06, "loss": 0.6388, "step": 6443 }, { "epoch": 0.5457548168536946, "grad_norm": 1.1979555322483428, "learning_rate": 4.502919005561971e-06, "loss": 0.6027, "step": 6444 }, { "epoch": 0.5458395087867881, "grad_norm": 1.5827631937598048, "learning_rate": 4.501554184512794e-06, "loss": 0.656, "step": 6445 }, { "epoch": 0.5459242007198815, "grad_norm": 2.021064293647978, "learning_rate": 4.5001894009743016e-06, "loss": 0.5807, "step": 6446 }, { "epoch": 0.5460088926529748, "grad_norm": 2.1558758264497664, "learning_rate": 4.498824655049206e-06, "loss": 0.6556, "step": 6447 }, { "epoch": 0.5460935845860682, "grad_norm": 0.6753158926068196, "learning_rate": 4.4974599468402075e-06, "loss": 0.8039, "step": 6448 }, { "epoch": 0.5461782765191615, "grad_norm": 1.6894274633560689, "learning_rate": 4.496095276450009e-06, "loss": 0.6182, "step": 6449 }, { "epoch": 0.546262968452255, "grad_norm": 1.3234276942615868, "learning_rate": 4.494730643981311e-06, "loss": 0.6299, "step": 6450 }, { "epoch": 0.5463476603853483, "grad_norm": 1.489587756605343, "learning_rate": 4.493366049536806e-06, "loss": 0.6245, "step": 6451 }, { "epoch": 0.5464323523184417, "grad_norm": 1.9690507439221474, "learning_rate": 4.492001493219188e-06, "loss": 0.635, "step": 6452 }, { "epoch": 0.546517044251535, "grad_norm": 1.2859862631840664, "learning_rate": 4.49063697513115e-06, "loss": 0.6104, "step": 6453 }, { "epoch": 0.5466017361846284, "grad_norm": 1.4321614305989832, "learning_rate": 4.489272495375376e-06, "loss": 0.6956, "step": 6454 }, { "epoch": 0.5466864281177218, "grad_norm": 1.2352556034287752, "learning_rate": 4.48790805405455e-06, "loss": 0.6382, "step": 6455 }, { "epoch": 0.5467711200508152, "grad_norm": 0.6019376012110395, "learning_rate": 4.486543651271355e-06, "loss": 0.8507, "step": 6456 }, { "epoch": 0.5468558119839085, "grad_norm": 1.4541808557715108, "learning_rate": 4.485179287128468e-06, "loss": 0.6361, "step": 6457 }, { "epoch": 0.5469405039170019, "grad_norm": 1.6007795340609174, "learning_rate": 4.483814961728568e-06, "loss": 0.548, "step": 6458 }, { "epoch": 0.5470251958500952, "grad_norm": 1.242934380852114, "learning_rate": 4.482450675174324e-06, "loss": 0.635, "step": 6459 }, { "epoch": 0.5471098877831887, "grad_norm": 1.2660582078774865, "learning_rate": 4.4810864275684076e-06, "loss": 0.6567, "step": 6460 }, { "epoch": 0.547194579716282, "grad_norm": 1.3993278016414266, "learning_rate": 4.479722219013486e-06, "loss": 0.6618, "step": 6461 }, { "epoch": 0.5472792716493754, "grad_norm": 1.9742992628011173, "learning_rate": 4.478358049612221e-06, "loss": 0.6803, "step": 6462 }, { "epoch": 0.5473639635824687, "grad_norm": 2.0442002949412346, "learning_rate": 4.476993919467278e-06, "loss": 0.6596, "step": 6463 }, { "epoch": 0.5474486555155621, "grad_norm": 1.323727254096771, "learning_rate": 4.475629828681309e-06, "loss": 0.6668, "step": 6464 }, { "epoch": 0.5475333474486556, "grad_norm": 1.4098602058715712, "learning_rate": 4.474265777356972e-06, "loss": 0.6387, "step": 6465 }, { "epoch": 0.5476180393817489, "grad_norm": 1.2536908785470577, "learning_rate": 4.4729017655969206e-06, "loss": 0.5935, "step": 6466 }, { "epoch": 0.5477027313148423, "grad_norm": 1.5436761916424597, "learning_rate": 4.471537793503801e-06, "loss": 0.6563, "step": 6467 }, { "epoch": 0.5477874232479356, "grad_norm": 1.4264333339206718, "learning_rate": 4.470173861180263e-06, "loss": 0.5699, "step": 6468 }, { "epoch": 0.547872115181029, "grad_norm": 1.458337047533946, "learning_rate": 4.468809968728946e-06, "loss": 0.6611, "step": 6469 }, { "epoch": 0.5479568071141224, "grad_norm": 1.3214606172313101, "learning_rate": 4.467446116252491e-06, "loss": 0.6389, "step": 6470 }, { "epoch": 0.5480414990472158, "grad_norm": 1.346149963888391, "learning_rate": 4.4660823038535375e-06, "loss": 0.6565, "step": 6471 }, { "epoch": 0.5481261909803091, "grad_norm": 1.619056669299232, "learning_rate": 4.464718531634715e-06, "loss": 0.6311, "step": 6472 }, { "epoch": 0.5482108829134025, "grad_norm": 1.4956402393759989, "learning_rate": 4.463354799698659e-06, "loss": 0.6307, "step": 6473 }, { "epoch": 0.5482955748464958, "grad_norm": 1.2210329396715889, "learning_rate": 4.461991108147998e-06, "loss": 0.6248, "step": 6474 }, { "epoch": 0.5483802667795893, "grad_norm": 1.927110195098972, "learning_rate": 4.460627457085353e-06, "loss": 0.6378, "step": 6475 }, { "epoch": 0.5484649587126826, "grad_norm": 1.5134648882853268, "learning_rate": 4.4592638466133494e-06, "loss": 0.6366, "step": 6476 }, { "epoch": 0.548549650645776, "grad_norm": 1.2578127136835588, "learning_rate": 4.4579002768346034e-06, "loss": 0.5564, "step": 6477 }, { "epoch": 0.5486343425788693, "grad_norm": 1.3211726739523866, "learning_rate": 4.4565367478517315e-06, "loss": 0.6023, "step": 6478 }, { "epoch": 0.5487190345119627, "grad_norm": 1.5230174464330841, "learning_rate": 4.455173259767348e-06, "loss": 0.6306, "step": 6479 }, { "epoch": 0.5488037264450562, "grad_norm": 1.3379995186623437, "learning_rate": 4.453809812684061e-06, "loss": 0.6672, "step": 6480 }, { "epoch": 0.5488884183781495, "grad_norm": 1.8809632039669457, "learning_rate": 4.452446406704478e-06, "loss": 0.6301, "step": 6481 }, { "epoch": 0.5489731103112429, "grad_norm": 1.5162574283556471, "learning_rate": 4.4510830419312046e-06, "loss": 0.5979, "step": 6482 }, { "epoch": 0.5490578022443362, "grad_norm": 3.0536211830250943, "learning_rate": 4.449719718466836e-06, "loss": 0.6233, "step": 6483 }, { "epoch": 0.5491424941774296, "grad_norm": 1.291915632802732, "learning_rate": 4.448356436413975e-06, "loss": 0.651, "step": 6484 }, { "epoch": 0.549227186110523, "grad_norm": 0.6499124753187022, "learning_rate": 4.446993195875211e-06, "loss": 0.9291, "step": 6485 }, { "epoch": 0.5493118780436164, "grad_norm": 1.670863084364917, "learning_rate": 4.445629996953138e-06, "loss": 0.6545, "step": 6486 }, { "epoch": 0.5493965699767097, "grad_norm": 1.1277491102821529, "learning_rate": 4.444266839750344e-06, "loss": 0.6483, "step": 6487 }, { "epoch": 0.5494812619098031, "grad_norm": 1.4048829837061072, "learning_rate": 4.442903724369412e-06, "loss": 0.5783, "step": 6488 }, { "epoch": 0.5495659538428964, "grad_norm": 1.2615581972288368, "learning_rate": 4.441540650912924e-06, "loss": 0.6487, "step": 6489 }, { "epoch": 0.5496506457759899, "grad_norm": 1.2450574321292374, "learning_rate": 4.4401776194834615e-06, "loss": 0.6347, "step": 6490 }, { "epoch": 0.5497353377090832, "grad_norm": 1.2523673240079705, "learning_rate": 4.438814630183596e-06, "loss": 0.6076, "step": 6491 }, { "epoch": 0.5498200296421766, "grad_norm": 0.6036137462094446, "learning_rate": 4.437451683115903e-06, "loss": 0.8713, "step": 6492 }, { "epoch": 0.5499047215752699, "grad_norm": 2.042852632692278, "learning_rate": 4.4360887783829465e-06, "loss": 0.5949, "step": 6493 }, { "epoch": 0.5499894135083633, "grad_norm": 1.268996079209949, "learning_rate": 4.4347259160872966e-06, "loss": 0.7069, "step": 6494 }, { "epoch": 0.5500741054414567, "grad_norm": 1.308554386806365, "learning_rate": 4.433363096331517e-06, "loss": 0.6343, "step": 6495 }, { "epoch": 0.5501587973745501, "grad_norm": 1.3100280178765955, "learning_rate": 4.432000319218164e-06, "loss": 0.6237, "step": 6496 }, { "epoch": 0.5502434893076434, "grad_norm": 1.510608875695218, "learning_rate": 4.430637584849794e-06, "loss": 0.6196, "step": 6497 }, { "epoch": 0.5503281812407368, "grad_norm": 1.2113186294863687, "learning_rate": 4.42927489332896e-06, "loss": 0.6593, "step": 6498 }, { "epoch": 0.5504128731738301, "grad_norm": 8.005737078538298, "learning_rate": 4.427912244758213e-06, "loss": 0.6297, "step": 6499 }, { "epoch": 0.5504975651069236, "grad_norm": 1.401817158005431, "learning_rate": 4.426549639240099e-06, "loss": 0.5907, "step": 6500 }, { "epoch": 0.550582257040017, "grad_norm": 1.8064655067293245, "learning_rate": 4.425187076877161e-06, "loss": 0.6229, "step": 6501 }, { "epoch": 0.5506669489731103, "grad_norm": 1.8564583260593908, "learning_rate": 4.423824557771938e-06, "loss": 0.6753, "step": 6502 }, { "epoch": 0.5507516409062037, "grad_norm": 1.3915488320271452, "learning_rate": 4.422462082026971e-06, "loss": 0.6078, "step": 6503 }, { "epoch": 0.550836332839297, "grad_norm": 1.4991366504759385, "learning_rate": 4.421099649744788e-06, "loss": 0.5946, "step": 6504 }, { "epoch": 0.5509210247723905, "grad_norm": 1.3656061209371004, "learning_rate": 4.419737261027925e-06, "loss": 0.6256, "step": 6505 }, { "epoch": 0.5510057167054838, "grad_norm": 1.2791181934299938, "learning_rate": 4.418374915978903e-06, "loss": 0.6052, "step": 6506 }, { "epoch": 0.5510904086385772, "grad_norm": 1.3543855227973376, "learning_rate": 4.4170126147002485e-06, "loss": 0.634, "step": 6507 }, { "epoch": 0.5511751005716705, "grad_norm": 1.5528327315493156, "learning_rate": 4.415650357294482e-06, "loss": 0.611, "step": 6508 }, { "epoch": 0.5512597925047639, "grad_norm": 5.1713885137916, "learning_rate": 4.41428814386412e-06, "loss": 0.6333, "step": 6509 }, { "epoch": 0.5513444844378573, "grad_norm": 1.4732455892271048, "learning_rate": 4.4129259745116775e-06, "loss": 0.6745, "step": 6510 }, { "epoch": 0.5514291763709507, "grad_norm": 0.6305667863986542, "learning_rate": 4.411563849339664e-06, "loss": 0.8784, "step": 6511 }, { "epoch": 0.551513868304044, "grad_norm": 1.2397946514636435, "learning_rate": 4.410201768450586e-06, "loss": 0.6273, "step": 6512 }, { "epoch": 0.5515985602371374, "grad_norm": 1.5215473546259362, "learning_rate": 4.40883973194695e-06, "loss": 0.6209, "step": 6513 }, { "epoch": 0.5516832521702307, "grad_norm": 1.2145645517233092, "learning_rate": 4.407477739931253e-06, "loss": 0.616, "step": 6514 }, { "epoch": 0.5517679441033242, "grad_norm": 1.672034146044464, "learning_rate": 4.4061157925059935e-06, "loss": 0.657, "step": 6515 }, { "epoch": 0.5518526360364175, "grad_norm": 1.7310335349204253, "learning_rate": 4.404753889773667e-06, "loss": 0.6145, "step": 6516 }, { "epoch": 0.5519373279695109, "grad_norm": 1.3101130226678674, "learning_rate": 4.403392031836761e-06, "loss": 0.6313, "step": 6517 }, { "epoch": 0.5520220199026042, "grad_norm": 1.140009001274339, "learning_rate": 4.402030218797762e-06, "loss": 0.6137, "step": 6518 }, { "epoch": 0.5521067118356976, "grad_norm": 1.3089980268072883, "learning_rate": 4.400668450759159e-06, "loss": 0.6216, "step": 6519 }, { "epoch": 0.5521914037687911, "grad_norm": 1.8079772670726364, "learning_rate": 4.399306727823426e-06, "loss": 0.7218, "step": 6520 }, { "epoch": 0.5522760957018844, "grad_norm": 1.360350209906057, "learning_rate": 4.3979450500930445e-06, "loss": 0.6514, "step": 6521 }, { "epoch": 0.5523607876349778, "grad_norm": 1.3708080428009035, "learning_rate": 4.396583417670485e-06, "loss": 0.6106, "step": 6522 }, { "epoch": 0.5524454795680711, "grad_norm": 1.5817174652652113, "learning_rate": 4.395221830658217e-06, "loss": 0.6864, "step": 6523 }, { "epoch": 0.5525301715011646, "grad_norm": 1.5063397741285416, "learning_rate": 4.393860289158711e-06, "loss": 0.68, "step": 6524 }, { "epoch": 0.5526148634342579, "grad_norm": 2.062050436624442, "learning_rate": 4.392498793274427e-06, "loss": 0.5915, "step": 6525 }, { "epoch": 0.5526995553673513, "grad_norm": 1.2367679969656313, "learning_rate": 4.391137343107825e-06, "loss": 0.6719, "step": 6526 }, { "epoch": 0.5527842473004446, "grad_norm": 1.290184305552578, "learning_rate": 4.3897759387613645e-06, "loss": 0.6482, "step": 6527 }, { "epoch": 0.552868939233538, "grad_norm": 1.3654453856070183, "learning_rate": 4.388414580337494e-06, "loss": 0.6565, "step": 6528 }, { "epoch": 0.5529536311666314, "grad_norm": 1.398004828886795, "learning_rate": 4.387053267938666e-06, "loss": 0.6288, "step": 6529 }, { "epoch": 0.5530383230997248, "grad_norm": 2.1726914168461553, "learning_rate": 4.385692001667324e-06, "loss": 0.6523, "step": 6530 }, { "epoch": 0.5531230150328181, "grad_norm": 2.739266040058878, "learning_rate": 4.384330781625911e-06, "loss": 0.5749, "step": 6531 }, { "epoch": 0.5532077069659115, "grad_norm": 0.6341926596959649, "learning_rate": 4.382969607916869e-06, "loss": 0.8447, "step": 6532 }, { "epoch": 0.5532923988990048, "grad_norm": 1.3093011730448911, "learning_rate": 4.3816084806426295e-06, "loss": 0.6357, "step": 6533 }, { "epoch": 0.5533770908320983, "grad_norm": 0.6360043628276304, "learning_rate": 4.380247399905629e-06, "loss": 0.8705, "step": 6534 }, { "epoch": 0.5534617827651916, "grad_norm": 0.6423287540404627, "learning_rate": 4.378886365808291e-06, "loss": 0.8861, "step": 6535 }, { "epoch": 0.553546474698285, "grad_norm": 1.3146978677756667, "learning_rate": 4.377525378453043e-06, "loss": 0.6573, "step": 6536 }, { "epoch": 0.5536311666313783, "grad_norm": 1.5031555091545705, "learning_rate": 4.376164437942308e-06, "loss": 0.672, "step": 6537 }, { "epoch": 0.5537158585644717, "grad_norm": 1.2710559312773577, "learning_rate": 4.374803544378499e-06, "loss": 0.6111, "step": 6538 }, { "epoch": 0.5538005504975652, "grad_norm": 1.4856765173415885, "learning_rate": 4.373442697864037e-06, "loss": 0.677, "step": 6539 }, { "epoch": 0.5538852424306585, "grad_norm": 1.2977320758508861, "learning_rate": 4.37208189850133e-06, "loss": 0.616, "step": 6540 }, { "epoch": 0.5539699343637519, "grad_norm": 1.4316308960587698, "learning_rate": 4.370721146392783e-06, "loss": 0.6869, "step": 6541 }, { "epoch": 0.5540546262968452, "grad_norm": 1.5934193620398276, "learning_rate": 4.369360441640804e-06, "loss": 0.6415, "step": 6542 }, { "epoch": 0.5541393182299386, "grad_norm": 1.3186102396375194, "learning_rate": 4.3679997843477905e-06, "loss": 0.6097, "step": 6543 }, { "epoch": 0.554224010163032, "grad_norm": 1.363422622079964, "learning_rate": 4.366639174616138e-06, "loss": 0.6238, "step": 6544 }, { "epoch": 0.5543087020961254, "grad_norm": 1.4872868328175737, "learning_rate": 4.365278612548244e-06, "loss": 0.6072, "step": 6545 }, { "epoch": 0.5543933940292187, "grad_norm": 1.2213105889717082, "learning_rate": 4.363918098246493e-06, "loss": 0.5964, "step": 6546 }, { "epoch": 0.5544780859623121, "grad_norm": 1.2815451353470289, "learning_rate": 4.362557631813275e-06, "loss": 0.6072, "step": 6547 }, { "epoch": 0.5545627778954054, "grad_norm": 0.6163604719206571, "learning_rate": 4.361197213350971e-06, "loss": 0.7844, "step": 6548 }, { "epoch": 0.5546474698284989, "grad_norm": 1.304615487136164, "learning_rate": 4.359836842961957e-06, "loss": 0.6512, "step": 6549 }, { "epoch": 0.5547321617615922, "grad_norm": 1.4011594572633577, "learning_rate": 4.358476520748615e-06, "loss": 0.6748, "step": 6550 }, { "epoch": 0.5548168536946856, "grad_norm": 1.3516710992350003, "learning_rate": 4.3571162468133075e-06, "loss": 0.6317, "step": 6551 }, { "epoch": 0.5549015456277789, "grad_norm": 1.3682253547988048, "learning_rate": 4.355756021258408e-06, "loss": 0.6293, "step": 6552 }, { "epoch": 0.5549862375608723, "grad_norm": 1.6897866882856016, "learning_rate": 4.35439584418628e-06, "loss": 0.6577, "step": 6553 }, { "epoch": 0.5550709294939657, "grad_norm": 1.5733743902642712, "learning_rate": 4.353035715699282e-06, "loss": 0.5958, "step": 6554 }, { "epoch": 0.5551556214270591, "grad_norm": 1.6062905773640592, "learning_rate": 4.351675635899773e-06, "loss": 0.6205, "step": 6555 }, { "epoch": 0.5552403133601524, "grad_norm": 1.4600124968350061, "learning_rate": 4.350315604890105e-06, "loss": 0.643, "step": 6556 }, { "epoch": 0.5553250052932458, "grad_norm": 1.4448754794265097, "learning_rate": 4.348955622772628e-06, "loss": 0.6104, "step": 6557 }, { "epoch": 0.5554096972263391, "grad_norm": 1.6494137683057042, "learning_rate": 4.34759568964969e-06, "loss": 0.6489, "step": 6558 }, { "epoch": 0.5554943891594326, "grad_norm": 1.2494302330614089, "learning_rate": 4.346235805623627e-06, "loss": 0.6606, "step": 6559 }, { "epoch": 0.555579081092526, "grad_norm": 1.1492474472443173, "learning_rate": 4.344875970796784e-06, "loss": 0.6324, "step": 6560 }, { "epoch": 0.5556637730256193, "grad_norm": 1.2901118555966584, "learning_rate": 4.343516185271494e-06, "loss": 0.697, "step": 6561 }, { "epoch": 0.5557484649587127, "grad_norm": 1.2593241928111178, "learning_rate": 4.342156449150086e-06, "loss": 0.6318, "step": 6562 }, { "epoch": 0.555833156891806, "grad_norm": 0.6587651952740483, "learning_rate": 4.340796762534888e-06, "loss": 0.8571, "step": 6563 }, { "epoch": 0.5559178488248995, "grad_norm": 1.2603481281912405, "learning_rate": 4.339437125528226e-06, "loss": 0.627, "step": 6564 }, { "epoch": 0.5560025407579928, "grad_norm": 1.310874988957827, "learning_rate": 4.338077538232417e-06, "loss": 0.6069, "step": 6565 }, { "epoch": 0.5560872326910862, "grad_norm": 1.4657343876822433, "learning_rate": 4.336718000749779e-06, "loss": 0.6294, "step": 6566 }, { "epoch": 0.5561719246241795, "grad_norm": 1.1470257127292323, "learning_rate": 4.335358513182623e-06, "loss": 0.5494, "step": 6567 }, { "epoch": 0.5562566165572729, "grad_norm": 1.4520846515533417, "learning_rate": 4.3339990756332576e-06, "loss": 0.6483, "step": 6568 }, { "epoch": 0.5563413084903663, "grad_norm": 1.3503997041428975, "learning_rate": 4.33263968820399e-06, "loss": 0.6231, "step": 6569 }, { "epoch": 0.5564260004234597, "grad_norm": 1.2836341660485058, "learning_rate": 4.331280350997118e-06, "loss": 0.6314, "step": 6570 }, { "epoch": 0.556510692356553, "grad_norm": 1.2276191589746832, "learning_rate": 4.329921064114943e-06, "loss": 0.5559, "step": 6571 }, { "epoch": 0.5565953842896464, "grad_norm": 1.4417360760187645, "learning_rate": 4.328561827659755e-06, "loss": 0.6999, "step": 6572 }, { "epoch": 0.5566800762227397, "grad_norm": 1.835601326798844, "learning_rate": 4.327202641733844e-06, "loss": 0.6935, "step": 6573 }, { "epoch": 0.5567647681558332, "grad_norm": 1.162682828568042, "learning_rate": 4.3258435064394985e-06, "loss": 0.5788, "step": 6574 }, { "epoch": 0.5568494600889266, "grad_norm": 1.2465438113043614, "learning_rate": 4.324484421878997e-06, "loss": 0.5759, "step": 6575 }, { "epoch": 0.5569341520220199, "grad_norm": 1.356402962422298, "learning_rate": 4.323125388154621e-06, "loss": 0.6467, "step": 6576 }, { "epoch": 0.5570188439551133, "grad_norm": 0.6789383223688915, "learning_rate": 4.321766405368644e-06, "loss": 0.8941, "step": 6577 }, { "epoch": 0.5571035358882066, "grad_norm": 1.6128278738844692, "learning_rate": 4.320407473623336e-06, "loss": 0.6176, "step": 6578 }, { "epoch": 0.5571882278213001, "grad_norm": 1.2889203847080553, "learning_rate": 4.319048593020965e-06, "loss": 0.6234, "step": 6579 }, { "epoch": 0.5572729197543934, "grad_norm": 1.430262519723854, "learning_rate": 4.317689763663791e-06, "loss": 0.6737, "step": 6580 }, { "epoch": 0.5573576116874868, "grad_norm": 1.9672429744235655, "learning_rate": 4.316330985654077e-06, "loss": 0.5996, "step": 6581 }, { "epoch": 0.5574423036205801, "grad_norm": 1.4989110274873063, "learning_rate": 4.314972259094078e-06, "loss": 0.6081, "step": 6582 }, { "epoch": 0.5575269955536735, "grad_norm": 1.303801900926329, "learning_rate": 4.313613584086041e-06, "loss": 0.6588, "step": 6583 }, { "epoch": 0.5576116874867669, "grad_norm": 1.186149605277246, "learning_rate": 4.312254960732216e-06, "loss": 0.6185, "step": 6584 }, { "epoch": 0.5576963794198603, "grad_norm": 1.508442376435969, "learning_rate": 4.31089638913485e-06, "loss": 0.5986, "step": 6585 }, { "epoch": 0.5577810713529536, "grad_norm": 0.6671924457446828, "learning_rate": 4.3095378693961785e-06, "loss": 0.8396, "step": 6586 }, { "epoch": 0.557865763286047, "grad_norm": 1.505258639200665, "learning_rate": 4.308179401618439e-06, "loss": 0.6874, "step": 6587 }, { "epoch": 0.5579504552191403, "grad_norm": 1.3574627087239397, "learning_rate": 4.3068209859038614e-06, "loss": 0.651, "step": 6588 }, { "epoch": 0.5580351471522338, "grad_norm": 1.3654139113152557, "learning_rate": 4.3054626223546746e-06, "loss": 0.6407, "step": 6589 }, { "epoch": 0.5581198390853271, "grad_norm": 1.7838988367494786, "learning_rate": 4.304104311073105e-06, "loss": 0.6413, "step": 6590 }, { "epoch": 0.5582045310184205, "grad_norm": 1.2543533991364624, "learning_rate": 4.30274605216137e-06, "loss": 0.657, "step": 6591 }, { "epoch": 0.5582892229515138, "grad_norm": 1.4375543850208141, "learning_rate": 4.301387845721687e-06, "loss": 0.6733, "step": 6592 }, { "epoch": 0.5583739148846072, "grad_norm": 1.4762769417274106, "learning_rate": 4.3000296918562696e-06, "loss": 0.6282, "step": 6593 }, { "epoch": 0.5584586068177007, "grad_norm": 1.392232969791487, "learning_rate": 4.298671590667322e-06, "loss": 0.6431, "step": 6594 }, { "epoch": 0.558543298750794, "grad_norm": 1.5217142935297807, "learning_rate": 4.297313542257053e-06, "loss": 0.6301, "step": 6595 }, { "epoch": 0.5586279906838874, "grad_norm": 2.60573710767915, "learning_rate": 4.295955546727658e-06, "loss": 0.6482, "step": 6596 }, { "epoch": 0.5587126826169807, "grad_norm": 1.5165568869469441, "learning_rate": 4.2945976041813385e-06, "loss": 0.6196, "step": 6597 }, { "epoch": 0.558797374550074, "grad_norm": 1.245072254691979, "learning_rate": 4.293239714720284e-06, "loss": 0.6617, "step": 6598 }, { "epoch": 0.5588820664831675, "grad_norm": 0.6077483850144948, "learning_rate": 4.291881878446683e-06, "loss": 0.816, "step": 6599 }, { "epoch": 0.5589667584162609, "grad_norm": 1.3838806629475329, "learning_rate": 4.290524095462721e-06, "loss": 0.6823, "step": 6600 }, { "epoch": 0.5590514503493542, "grad_norm": 1.6030974153261812, "learning_rate": 4.289166365870577e-06, "loss": 0.6106, "step": 6601 }, { "epoch": 0.5591361422824476, "grad_norm": 2.3424913021280607, "learning_rate": 4.287808689772428e-06, "loss": 0.5907, "step": 6602 }, { "epoch": 0.5592208342155409, "grad_norm": 1.3010178734777749, "learning_rate": 4.286451067270448e-06, "loss": 0.6332, "step": 6603 }, { "epoch": 0.5593055261486344, "grad_norm": 1.2030697793594114, "learning_rate": 4.2850934984668005e-06, "loss": 0.6518, "step": 6604 }, { "epoch": 0.5593902180817277, "grad_norm": 1.6194283908477787, "learning_rate": 4.283735983463655e-06, "loss": 0.6261, "step": 6605 }, { "epoch": 0.5594749100148211, "grad_norm": 1.1638740612833354, "learning_rate": 4.28237852236317e-06, "loss": 0.632, "step": 6606 }, { "epoch": 0.5595596019479144, "grad_norm": 1.416618108594001, "learning_rate": 4.2810211152675004e-06, "loss": 0.6144, "step": 6607 }, { "epoch": 0.5596442938810078, "grad_norm": 1.3236997201353196, "learning_rate": 4.2796637622787995e-06, "loss": 0.7043, "step": 6608 }, { "epoch": 0.5597289858141012, "grad_norm": 0.6158901657424146, "learning_rate": 4.278306463499214e-06, "loss": 0.8401, "step": 6609 }, { "epoch": 0.5598136777471946, "grad_norm": 1.2724749285155839, "learning_rate": 4.276949219030888e-06, "loss": 0.6462, "step": 6610 }, { "epoch": 0.5598983696802879, "grad_norm": 0.6369500931804494, "learning_rate": 4.275592028975964e-06, "loss": 0.9272, "step": 6611 }, { "epoch": 0.5599830616133813, "grad_norm": 1.247046728785622, "learning_rate": 4.274234893436574e-06, "loss": 0.6525, "step": 6612 }, { "epoch": 0.5600677535464746, "grad_norm": 1.3891503811848067, "learning_rate": 4.272877812514852e-06, "loss": 0.6273, "step": 6613 }, { "epoch": 0.5601524454795681, "grad_norm": 1.613382022503804, "learning_rate": 4.271520786312926e-06, "loss": 0.6149, "step": 6614 }, { "epoch": 0.5602371374126615, "grad_norm": 1.360628134440682, "learning_rate": 4.270163814932916e-06, "loss": 0.6497, "step": 6615 }, { "epoch": 0.5603218293457548, "grad_norm": 1.3495077881979494, "learning_rate": 4.268806898476946e-06, "loss": 0.6744, "step": 6616 }, { "epoch": 0.5604065212788482, "grad_norm": 0.6241533852749139, "learning_rate": 4.267450037047128e-06, "loss": 0.9285, "step": 6617 }, { "epoch": 0.5604912132119415, "grad_norm": 2.280113771902823, "learning_rate": 4.266093230745573e-06, "loss": 0.6362, "step": 6618 }, { "epoch": 0.560575905145035, "grad_norm": 1.4752967267898562, "learning_rate": 4.26473647967439e-06, "loss": 0.6376, "step": 6619 }, { "epoch": 0.5606605970781283, "grad_norm": 1.3303559319238352, "learning_rate": 4.263379783935678e-06, "loss": 0.6349, "step": 6620 }, { "epoch": 0.5607452890112217, "grad_norm": 1.6285035676348547, "learning_rate": 4.262023143631538e-06, "loss": 0.6021, "step": 6621 }, { "epoch": 0.560829980944315, "grad_norm": 0.6001717034689354, "learning_rate": 4.2606665588640665e-06, "loss": 0.9106, "step": 6622 }, { "epoch": 0.5609146728774085, "grad_norm": 2.2837585078278995, "learning_rate": 4.25931002973535e-06, "loss": 0.6433, "step": 6623 }, { "epoch": 0.5609993648105018, "grad_norm": 1.7873010737757042, "learning_rate": 4.257953556347478e-06, "loss": 0.6219, "step": 6624 }, { "epoch": 0.5610840567435952, "grad_norm": 1.6165122204867477, "learning_rate": 4.256597138802527e-06, "loss": 0.581, "step": 6625 }, { "epoch": 0.5611687486766885, "grad_norm": 0.6397816845922558, "learning_rate": 4.2552407772025785e-06, "loss": 0.8185, "step": 6626 }, { "epoch": 0.5612534406097819, "grad_norm": 1.1843837933833943, "learning_rate": 4.2538844716497075e-06, "loss": 0.6251, "step": 6627 }, { "epoch": 0.5613381325428753, "grad_norm": 1.1206752320361038, "learning_rate": 4.252528222245979e-06, "loss": 0.6509, "step": 6628 }, { "epoch": 0.5614228244759687, "grad_norm": 1.3881637295056646, "learning_rate": 4.251172029093458e-06, "loss": 0.6115, "step": 6629 }, { "epoch": 0.561507516409062, "grad_norm": 1.2816474964824331, "learning_rate": 4.249815892294211e-06, "loss": 0.6199, "step": 6630 }, { "epoch": 0.5615922083421554, "grad_norm": 1.3720691168218202, "learning_rate": 4.248459811950288e-06, "loss": 0.6444, "step": 6631 }, { "epoch": 0.5616769002752487, "grad_norm": 1.3190524737378793, "learning_rate": 4.247103788163745e-06, "loss": 0.5955, "step": 6632 }, { "epoch": 0.5617615922083422, "grad_norm": 1.6787207802602344, "learning_rate": 4.245747821036628e-06, "loss": 0.6198, "step": 6633 }, { "epoch": 0.5618462841414356, "grad_norm": 1.3029563136940527, "learning_rate": 4.244391910670981e-06, "loss": 0.6122, "step": 6634 }, { "epoch": 0.5619309760745289, "grad_norm": 1.2111742501251075, "learning_rate": 4.243036057168845e-06, "loss": 0.5801, "step": 6635 }, { "epoch": 0.5620156680076223, "grad_norm": 1.524450040910039, "learning_rate": 4.241680260632253e-06, "loss": 0.6891, "step": 6636 }, { "epoch": 0.5621003599407156, "grad_norm": 1.2112574067521122, "learning_rate": 4.240324521163239e-06, "loss": 0.6476, "step": 6637 }, { "epoch": 0.5621850518738091, "grad_norm": 2.206296745605532, "learning_rate": 4.238968838863825e-06, "loss": 0.6649, "step": 6638 }, { "epoch": 0.5622697438069024, "grad_norm": 1.3946841199174314, "learning_rate": 4.237613213836036e-06, "loss": 0.6388, "step": 6639 }, { "epoch": 0.5623544357399958, "grad_norm": 1.315870690762652, "learning_rate": 4.236257646181891e-06, "loss": 0.644, "step": 6640 }, { "epoch": 0.5624391276730891, "grad_norm": 3.1008591218480137, "learning_rate": 4.234902136003401e-06, "loss": 0.6316, "step": 6641 }, { "epoch": 0.5625238196061825, "grad_norm": 1.3618284821316844, "learning_rate": 4.233546683402576e-06, "loss": 0.6443, "step": 6642 }, { "epoch": 0.5626085115392759, "grad_norm": 1.3105488225758795, "learning_rate": 4.232191288481424e-06, "loss": 0.5992, "step": 6643 }, { "epoch": 0.5626932034723693, "grad_norm": 1.2829482778502617, "learning_rate": 4.230835951341942e-06, "loss": 0.5748, "step": 6644 }, { "epoch": 0.5627778954054626, "grad_norm": 1.3466229912026153, "learning_rate": 4.229480672086128e-06, "loss": 0.639, "step": 6645 }, { "epoch": 0.562862587338556, "grad_norm": 1.4229735541560466, "learning_rate": 4.228125450815972e-06, "loss": 0.6558, "step": 6646 }, { "epoch": 0.5629472792716493, "grad_norm": 1.3817359780536655, "learning_rate": 4.226770287633464e-06, "loss": 0.6237, "step": 6647 }, { "epoch": 0.5630319712047428, "grad_norm": 1.5626632937584322, "learning_rate": 4.225415182640589e-06, "loss": 0.6233, "step": 6648 }, { "epoch": 0.5631166631378361, "grad_norm": 30.855349701831983, "learning_rate": 4.2240601359393196e-06, "loss": 0.5972, "step": 6649 }, { "epoch": 0.5632013550709295, "grad_norm": 1.9316482124258547, "learning_rate": 4.222705147631634e-06, "loss": 0.6355, "step": 6650 }, { "epoch": 0.5632860470040228, "grad_norm": 1.4199743669262317, "learning_rate": 4.2213502178195045e-06, "loss": 0.6403, "step": 6651 }, { "epoch": 0.5633707389371162, "grad_norm": 0.6704390724312681, "learning_rate": 4.219995346604892e-06, "loss": 0.8699, "step": 6652 }, { "epoch": 0.5634554308702097, "grad_norm": 1.3869488982355316, "learning_rate": 4.2186405340897605e-06, "loss": 0.6239, "step": 6653 }, { "epoch": 0.563540122803303, "grad_norm": 1.670481854629882, "learning_rate": 4.2172857803760665e-06, "loss": 0.6392, "step": 6654 }, { "epoch": 0.5636248147363964, "grad_norm": 1.629446450721396, "learning_rate": 4.215931085565762e-06, "loss": 0.6419, "step": 6655 }, { "epoch": 0.5637095066694897, "grad_norm": 0.6652215790315047, "learning_rate": 4.2145764497607955e-06, "loss": 0.813, "step": 6656 }, { "epoch": 0.5637941986025831, "grad_norm": 1.731864359575769, "learning_rate": 4.213221873063109e-06, "loss": 0.6569, "step": 6657 }, { "epoch": 0.5638788905356765, "grad_norm": 1.2178561592814245, "learning_rate": 4.211867355574644e-06, "loss": 0.5765, "step": 6658 }, { "epoch": 0.5639635824687699, "grad_norm": 1.7026675761171923, "learning_rate": 4.210512897397335e-06, "loss": 0.6327, "step": 6659 }, { "epoch": 0.5640482744018632, "grad_norm": 3.1884879398494386, "learning_rate": 4.2091584986331075e-06, "loss": 0.6116, "step": 6660 }, { "epoch": 0.5641329663349566, "grad_norm": 1.394624115822816, "learning_rate": 4.207804159383895e-06, "loss": 0.5984, "step": 6661 }, { "epoch": 0.5642176582680499, "grad_norm": 1.4952306115471705, "learning_rate": 4.206449879751612e-06, "loss": 0.647, "step": 6662 }, { "epoch": 0.5643023502011434, "grad_norm": 1.6106943478376932, "learning_rate": 4.205095659838177e-06, "loss": 0.6061, "step": 6663 }, { "epoch": 0.5643870421342367, "grad_norm": 1.3594675535921557, "learning_rate": 4.203741499745503e-06, "loss": 0.6342, "step": 6664 }, { "epoch": 0.5644717340673301, "grad_norm": 1.1699823369577462, "learning_rate": 4.202387399575498e-06, "loss": 0.6294, "step": 6665 }, { "epoch": 0.5645564260004234, "grad_norm": 1.22460693479577, "learning_rate": 4.2010333594300645e-06, "loss": 0.6541, "step": 6666 }, { "epoch": 0.5646411179335168, "grad_norm": 1.3288369122161983, "learning_rate": 4.199679379411102e-06, "loss": 0.6567, "step": 6667 }, { "epoch": 0.5647258098666103, "grad_norm": 1.340662988216841, "learning_rate": 4.1983254596205035e-06, "loss": 0.6341, "step": 6668 }, { "epoch": 0.5648105017997036, "grad_norm": 1.314311123279032, "learning_rate": 4.196971600160161e-06, "loss": 0.6243, "step": 6669 }, { "epoch": 0.564895193732797, "grad_norm": 1.3459534968042588, "learning_rate": 4.195617801131955e-06, "loss": 0.6093, "step": 6670 }, { "epoch": 0.5649798856658903, "grad_norm": 2.2637370012884763, "learning_rate": 4.194264062637769e-06, "loss": 0.6582, "step": 6671 }, { "epoch": 0.5650645775989837, "grad_norm": 1.8209722937932038, "learning_rate": 4.192910384779483e-06, "loss": 0.6071, "step": 6672 }, { "epoch": 0.5651492695320771, "grad_norm": 1.2033444363079555, "learning_rate": 4.1915567676589605e-06, "loss": 0.6595, "step": 6673 }, { "epoch": 0.5652339614651705, "grad_norm": 1.6124739565999833, "learning_rate": 4.190203211378073e-06, "loss": 0.5894, "step": 6674 }, { "epoch": 0.5653186533982638, "grad_norm": 0.7480500261998909, "learning_rate": 4.188849716038682e-06, "loss": 0.886, "step": 6675 }, { "epoch": 0.5654033453313572, "grad_norm": 1.3241842082732322, "learning_rate": 4.187496281742644e-06, "loss": 0.6022, "step": 6676 }, { "epoch": 0.5654880372644505, "grad_norm": 1.3325912982239638, "learning_rate": 4.186142908591815e-06, "loss": 0.5927, "step": 6677 }, { "epoch": 0.565572729197544, "grad_norm": 1.4955841247980841, "learning_rate": 4.184789596688041e-06, "loss": 0.6378, "step": 6678 }, { "epoch": 0.5656574211306373, "grad_norm": 1.3098466238064435, "learning_rate": 4.183436346133166e-06, "loss": 0.6084, "step": 6679 }, { "epoch": 0.5657421130637307, "grad_norm": 1.2970022663909948, "learning_rate": 4.182083157029032e-06, "loss": 0.6365, "step": 6680 }, { "epoch": 0.565826804996824, "grad_norm": 1.323556486941163, "learning_rate": 4.180730029477468e-06, "loss": 0.6339, "step": 6681 }, { "epoch": 0.5659114969299174, "grad_norm": 1.5799544170993478, "learning_rate": 4.179376963580311e-06, "loss": 0.6622, "step": 6682 }, { "epoch": 0.5659961888630108, "grad_norm": 1.2751925093426943, "learning_rate": 4.178023959439381e-06, "loss": 0.6538, "step": 6683 }, { "epoch": 0.5660808807961042, "grad_norm": 2.1080944087389772, "learning_rate": 4.176671017156501e-06, "loss": 0.5919, "step": 6684 }, { "epoch": 0.5661655727291975, "grad_norm": 2.375694874455851, "learning_rate": 4.175318136833487e-06, "loss": 0.6665, "step": 6685 }, { "epoch": 0.5662502646622909, "grad_norm": 1.3355280552251216, "learning_rate": 4.1739653185721495e-06, "loss": 0.6498, "step": 6686 }, { "epoch": 0.5663349565953842, "grad_norm": 1.5977922667230142, "learning_rate": 4.172612562474295e-06, "loss": 0.6608, "step": 6687 }, { "epoch": 0.5664196485284777, "grad_norm": 1.3750992316018484, "learning_rate": 4.171259868641729e-06, "loss": 0.6111, "step": 6688 }, { "epoch": 0.566504340461571, "grad_norm": 1.1876553660393914, "learning_rate": 4.169907237176244e-06, "loss": 0.6465, "step": 6689 }, { "epoch": 0.5665890323946644, "grad_norm": 1.4202753295487458, "learning_rate": 4.1685546681796376e-06, "loss": 0.6665, "step": 6690 }, { "epoch": 0.5666737243277578, "grad_norm": 1.2679763877680061, "learning_rate": 4.167202161753692e-06, "loss": 0.6726, "step": 6691 }, { "epoch": 0.5667584162608511, "grad_norm": 2.3697140697287447, "learning_rate": 4.165849718000194e-06, "loss": 0.6499, "step": 6692 }, { "epoch": 0.5668431081939446, "grad_norm": 1.192573610734945, "learning_rate": 4.164497337020924e-06, "loss": 0.6075, "step": 6693 }, { "epoch": 0.5669278001270379, "grad_norm": 2.218388291873486, "learning_rate": 4.163145018917652e-06, "loss": 0.6132, "step": 6694 }, { "epoch": 0.5670124920601313, "grad_norm": 1.2891480436499505, "learning_rate": 4.1617927637921476e-06, "loss": 0.6142, "step": 6695 }, { "epoch": 0.5670971839932246, "grad_norm": 1.3232698188031151, "learning_rate": 4.160440571746179e-06, "loss": 0.6493, "step": 6696 }, { "epoch": 0.567181875926318, "grad_norm": 0.6236531994501648, "learning_rate": 4.159088442881501e-06, "loss": 0.8625, "step": 6697 }, { "epoch": 0.5672665678594114, "grad_norm": 1.3933145765379478, "learning_rate": 4.157736377299871e-06, "loss": 0.6435, "step": 6698 }, { "epoch": 0.5673512597925048, "grad_norm": 1.395970678991464, "learning_rate": 4.1563843751030385e-06, "loss": 0.6378, "step": 6699 }, { "epoch": 0.5674359517255981, "grad_norm": 0.596334948404511, "learning_rate": 4.155032436392749e-06, "loss": 0.8301, "step": 6700 }, { "epoch": 0.5675206436586915, "grad_norm": 1.3763090035848347, "learning_rate": 4.153680561270744e-06, "loss": 0.6666, "step": 6701 }, { "epoch": 0.5676053355917848, "grad_norm": 0.5963150600306905, "learning_rate": 4.152328749838757e-06, "loss": 0.885, "step": 6702 }, { "epoch": 0.5676900275248783, "grad_norm": 1.7719739540753772, "learning_rate": 4.150977002198522e-06, "loss": 0.6744, "step": 6703 }, { "epoch": 0.5677747194579716, "grad_norm": 1.617370560779324, "learning_rate": 4.1496253184517606e-06, "loss": 0.6716, "step": 6704 }, { "epoch": 0.567859411391065, "grad_norm": 1.8902147844484063, "learning_rate": 4.148273698700198e-06, "loss": 0.6171, "step": 6705 }, { "epoch": 0.5679441033241583, "grad_norm": 1.2362575598791767, "learning_rate": 4.1469221430455505e-06, "loss": 0.6225, "step": 6706 }, { "epoch": 0.5680287952572517, "grad_norm": 1.3705469782783788, "learning_rate": 4.145570651589528e-06, "loss": 0.6315, "step": 6707 }, { "epoch": 0.5681134871903452, "grad_norm": 1.5496647064446143, "learning_rate": 4.144219224433839e-06, "loss": 0.6172, "step": 6708 }, { "epoch": 0.5681981791234385, "grad_norm": 1.4322973500745442, "learning_rate": 4.142867861680185e-06, "loss": 0.6539, "step": 6709 }, { "epoch": 0.5682828710565319, "grad_norm": 1.2404801525755713, "learning_rate": 4.141516563430262e-06, "loss": 0.6179, "step": 6710 }, { "epoch": 0.5683675629896252, "grad_norm": 1.5929061124251165, "learning_rate": 4.140165329785766e-06, "loss": 0.6431, "step": 6711 }, { "epoch": 0.5684522549227186, "grad_norm": 1.3415511219734781, "learning_rate": 4.1388141608483795e-06, "loss": 0.6091, "step": 6712 }, { "epoch": 0.568536946855812, "grad_norm": 1.303461839230543, "learning_rate": 4.137463056719788e-06, "loss": 0.6137, "step": 6713 }, { "epoch": 0.5686216387889054, "grad_norm": 1.3395604628741151, "learning_rate": 4.136112017501671e-06, "loss": 0.6445, "step": 6714 }, { "epoch": 0.5687063307219987, "grad_norm": 0.6554606778163342, "learning_rate": 4.134761043295697e-06, "loss": 0.9353, "step": 6715 }, { "epoch": 0.5687910226550921, "grad_norm": 1.2376565414594547, "learning_rate": 4.133410134203535e-06, "loss": 0.5882, "step": 6716 }, { "epoch": 0.5688757145881854, "grad_norm": 1.1906175117943785, "learning_rate": 4.132059290326852e-06, "loss": 0.616, "step": 6717 }, { "epoch": 0.5689604065212789, "grad_norm": 1.5588186799996295, "learning_rate": 4.130708511767302e-06, "loss": 0.5986, "step": 6718 }, { "epoch": 0.5690450984543722, "grad_norm": 1.540025054409313, "learning_rate": 4.12935779862654e-06, "loss": 0.6375, "step": 6719 }, { "epoch": 0.5691297903874656, "grad_norm": 1.873401464949276, "learning_rate": 4.128007151006213e-06, "loss": 0.642, "step": 6720 }, { "epoch": 0.5692144823205589, "grad_norm": 1.685860750460352, "learning_rate": 4.1266565690079665e-06, "loss": 0.5914, "step": 6721 }, { "epoch": 0.5692991742536523, "grad_norm": 1.131351176024851, "learning_rate": 4.125306052733438e-06, "loss": 0.5967, "step": 6722 }, { "epoch": 0.5693838661867457, "grad_norm": 0.5556002583432627, "learning_rate": 4.1239556022842595e-06, "loss": 0.7899, "step": 6723 }, { "epoch": 0.5694685581198391, "grad_norm": 1.5369827818713109, "learning_rate": 4.122605217762061e-06, "loss": 0.6061, "step": 6724 }, { "epoch": 0.5695532500529324, "grad_norm": 1.507647877576751, "learning_rate": 4.121254899268468e-06, "loss": 0.6707, "step": 6725 }, { "epoch": 0.5696379419860258, "grad_norm": 1.529386300752773, "learning_rate": 4.119904646905093e-06, "loss": 0.6636, "step": 6726 }, { "epoch": 0.5697226339191193, "grad_norm": 1.4392623234921986, "learning_rate": 4.118554460773558e-06, "loss": 0.6627, "step": 6727 }, { "epoch": 0.5698073258522126, "grad_norm": 1.2036868177272222, "learning_rate": 4.1172043409754645e-06, "loss": 0.6266, "step": 6728 }, { "epoch": 0.569892017785306, "grad_norm": 1.5100080614226832, "learning_rate": 4.115854287612419e-06, "loss": 0.6678, "step": 6729 }, { "epoch": 0.5699767097183993, "grad_norm": 1.1740548783479057, "learning_rate": 4.114504300786021e-06, "loss": 0.5962, "step": 6730 }, { "epoch": 0.5700614016514927, "grad_norm": 1.3168352299509756, "learning_rate": 4.113154380597863e-06, "loss": 0.62, "step": 6731 }, { "epoch": 0.5701460935845861, "grad_norm": 1.5435134898827643, "learning_rate": 4.111804527149534e-06, "loss": 0.6267, "step": 6732 }, { "epoch": 0.5702307855176795, "grad_norm": 1.5150200002729597, "learning_rate": 4.110454740542617e-06, "loss": 0.6389, "step": 6733 }, { "epoch": 0.5703154774507728, "grad_norm": 1.2264773168082603, "learning_rate": 4.109105020878692e-06, "loss": 0.597, "step": 6734 }, { "epoch": 0.5704001693838662, "grad_norm": 1.1659468584501258, "learning_rate": 4.107755368259333e-06, "loss": 0.6062, "step": 6735 }, { "epoch": 0.5704848613169595, "grad_norm": 5.172766760171417, "learning_rate": 4.106405782786104e-06, "loss": 0.6279, "step": 6736 }, { "epoch": 0.570569553250053, "grad_norm": 2.4312684251873886, "learning_rate": 4.105056264560573e-06, "loss": 0.6738, "step": 6737 }, { "epoch": 0.5706542451831463, "grad_norm": 1.3910133086177943, "learning_rate": 4.103706813684299e-06, "loss": 0.6558, "step": 6738 }, { "epoch": 0.5707389371162397, "grad_norm": 1.1664842105971067, "learning_rate": 4.102357430258831e-06, "loss": 0.6087, "step": 6739 }, { "epoch": 0.570823629049333, "grad_norm": 1.2981732428198918, "learning_rate": 4.101008114385721e-06, "loss": 0.6417, "step": 6740 }, { "epoch": 0.5709083209824264, "grad_norm": 1.2137848817781771, "learning_rate": 4.099658866166509e-06, "loss": 0.6285, "step": 6741 }, { "epoch": 0.5709930129155198, "grad_norm": 1.3554989924818013, "learning_rate": 4.098309685702736e-06, "loss": 0.6631, "step": 6742 }, { "epoch": 0.5710777048486132, "grad_norm": 1.5072533901338225, "learning_rate": 4.096960573095934e-06, "loss": 0.632, "step": 6743 }, { "epoch": 0.5711623967817065, "grad_norm": 1.6884303879015792, "learning_rate": 4.09561152844763e-06, "loss": 0.6018, "step": 6744 }, { "epoch": 0.5712470887147999, "grad_norm": 2.4238515664244407, "learning_rate": 4.094262551859347e-06, "loss": 0.6242, "step": 6745 }, { "epoch": 0.5713317806478933, "grad_norm": 1.256375044523189, "learning_rate": 4.092913643432606e-06, "loss": 0.6681, "step": 6746 }, { "epoch": 0.5714164725809867, "grad_norm": 1.3741149179097818, "learning_rate": 4.091564803268913e-06, "loss": 0.6444, "step": 6747 }, { "epoch": 0.5715011645140801, "grad_norm": 2.0610458793179878, "learning_rate": 4.090216031469783e-06, "loss": 0.6043, "step": 6748 }, { "epoch": 0.5715858564471734, "grad_norm": 1.3225567729219134, "learning_rate": 4.08886732813671e-06, "loss": 0.6408, "step": 6749 }, { "epoch": 0.5716705483802668, "grad_norm": 1.6040161917953, "learning_rate": 4.087518693371197e-06, "loss": 0.6586, "step": 6750 }, { "epoch": 0.5717552403133601, "grad_norm": 1.3434442418527297, "learning_rate": 4.086170127274735e-06, "loss": 0.6061, "step": 6751 }, { "epoch": 0.5718399322464536, "grad_norm": 1.3454072245542588, "learning_rate": 4.084821629948807e-06, "loss": 0.6123, "step": 6752 }, { "epoch": 0.5719246241795469, "grad_norm": 0.6273887850166314, "learning_rate": 4.0834732014949e-06, "loss": 0.8796, "step": 6753 }, { "epoch": 0.5720093161126403, "grad_norm": 1.6334876677864507, "learning_rate": 4.082124842014488e-06, "loss": 0.7275, "step": 6754 }, { "epoch": 0.5720940080457336, "grad_norm": 1.437016239632664, "learning_rate": 4.0807765516090405e-06, "loss": 0.6579, "step": 6755 }, { "epoch": 0.572178699978827, "grad_norm": 1.5173510705376392, "learning_rate": 4.079428330380027e-06, "loss": 0.6019, "step": 6756 }, { "epoch": 0.5722633919119204, "grad_norm": 1.4130848026067178, "learning_rate": 4.0780801784289035e-06, "loss": 0.6291, "step": 6757 }, { "epoch": 0.5723480838450138, "grad_norm": 1.581699016974608, "learning_rate": 4.076732095857129e-06, "loss": 0.5559, "step": 6758 }, { "epoch": 0.5724327757781071, "grad_norm": 1.27661990614383, "learning_rate": 4.075384082766156e-06, "loss": 0.6711, "step": 6759 }, { "epoch": 0.5725174677112005, "grad_norm": 1.433978936318736, "learning_rate": 4.0740361392574245e-06, "loss": 0.6762, "step": 6760 }, { "epoch": 0.5726021596442938, "grad_norm": 1.2558128413861382, "learning_rate": 4.072688265432376e-06, "loss": 0.6063, "step": 6761 }, { "epoch": 0.5726868515773873, "grad_norm": 1.2960142371102414, "learning_rate": 4.071340461392449e-06, "loss": 0.6466, "step": 6762 }, { "epoch": 0.5727715435104807, "grad_norm": 1.714594136980063, "learning_rate": 4.069992727239067e-06, "loss": 0.6945, "step": 6763 }, { "epoch": 0.572856235443574, "grad_norm": 1.4011349551292964, "learning_rate": 4.068645063073658e-06, "loss": 0.5969, "step": 6764 }, { "epoch": 0.5729409273766674, "grad_norm": 2.76642445155429, "learning_rate": 4.067297468997639e-06, "loss": 0.6594, "step": 6765 }, { "epoch": 0.5730256193097607, "grad_norm": 1.3576079241305636, "learning_rate": 4.065949945112424e-06, "loss": 0.6231, "step": 6766 }, { "epoch": 0.5731103112428542, "grad_norm": 1.2866357679113862, "learning_rate": 4.064602491519423e-06, "loss": 0.6751, "step": 6767 }, { "epoch": 0.5731950031759475, "grad_norm": 2.0478545512820596, "learning_rate": 4.063255108320037e-06, "loss": 0.618, "step": 6768 }, { "epoch": 0.5732796951090409, "grad_norm": 1.1971356675454254, "learning_rate": 4.061907795615664e-06, "loss": 0.6145, "step": 6769 }, { "epoch": 0.5733643870421342, "grad_norm": 1.5157233517217206, "learning_rate": 4.060560553507699e-06, "loss": 0.607, "step": 6770 }, { "epoch": 0.5734490789752276, "grad_norm": 2.1581324603487797, "learning_rate": 4.0592133820975245e-06, "loss": 0.6436, "step": 6771 }, { "epoch": 0.573533770908321, "grad_norm": 1.938590721918977, "learning_rate": 4.057866281486527e-06, "loss": 0.6563, "step": 6772 }, { "epoch": 0.5736184628414144, "grad_norm": 1.495680475549174, "learning_rate": 4.056519251776078e-06, "loss": 0.6549, "step": 6773 }, { "epoch": 0.5737031547745077, "grad_norm": 1.331478007528509, "learning_rate": 4.055172293067552e-06, "loss": 0.7088, "step": 6774 }, { "epoch": 0.5737878467076011, "grad_norm": 1.7114260324695296, "learning_rate": 4.053825405462315e-06, "loss": 0.6608, "step": 6775 }, { "epoch": 0.5738725386406944, "grad_norm": 0.6258513706611011, "learning_rate": 4.052478589061726e-06, "loss": 0.885, "step": 6776 }, { "epoch": 0.5739572305737879, "grad_norm": 1.6009342656898353, "learning_rate": 4.051131843967141e-06, "loss": 0.6484, "step": 6777 }, { "epoch": 0.5740419225068812, "grad_norm": 1.360326879839704, "learning_rate": 4.049785170279908e-06, "loss": 0.6356, "step": 6778 }, { "epoch": 0.5741266144399746, "grad_norm": 1.335247712028792, "learning_rate": 4.048438568101373e-06, "loss": 0.6248, "step": 6779 }, { "epoch": 0.5742113063730679, "grad_norm": 13.830275612674459, "learning_rate": 4.047092037532876e-06, "loss": 0.5806, "step": 6780 }, { "epoch": 0.5742959983061613, "grad_norm": 1.5260361877253272, "learning_rate": 4.045745578675747e-06, "loss": 0.6463, "step": 6781 }, { "epoch": 0.5743806902392548, "grad_norm": 0.5548731721144792, "learning_rate": 4.044399191631316e-06, "loss": 0.8071, "step": 6782 }, { "epoch": 0.5744653821723481, "grad_norm": 2.072639837625332, "learning_rate": 4.043052876500908e-06, "loss": 0.6094, "step": 6783 }, { "epoch": 0.5745500741054415, "grad_norm": 1.4922281384483238, "learning_rate": 4.0417066333858375e-06, "loss": 0.6374, "step": 6784 }, { "epoch": 0.5746347660385348, "grad_norm": 1.3671982639497837, "learning_rate": 4.040360462387418e-06, "loss": 0.6097, "step": 6785 }, { "epoch": 0.5747194579716282, "grad_norm": 1.3276489462993957, "learning_rate": 4.039014363606954e-06, "loss": 0.6414, "step": 6786 }, { "epoch": 0.5748041499047216, "grad_norm": 1.1190084282501243, "learning_rate": 4.037668337145747e-06, "loss": 0.6048, "step": 6787 }, { "epoch": 0.574888841837815, "grad_norm": 2.3810289379420317, "learning_rate": 4.036322383105095e-06, "loss": 0.6437, "step": 6788 }, { "epoch": 0.5749735337709083, "grad_norm": 1.8676955248881493, "learning_rate": 4.034976501586286e-06, "loss": 0.6377, "step": 6789 }, { "epoch": 0.5750582257040017, "grad_norm": 1.711873729277975, "learning_rate": 4.033630692690605e-06, "loss": 0.577, "step": 6790 }, { "epoch": 0.575142917637095, "grad_norm": 1.3766469417822873, "learning_rate": 4.032284956519333e-06, "loss": 0.6321, "step": 6791 }, { "epoch": 0.5752276095701885, "grad_norm": 1.3876394273170745, "learning_rate": 4.03093929317374e-06, "loss": 0.615, "step": 6792 }, { "epoch": 0.5753123015032818, "grad_norm": 1.241407441905707, "learning_rate": 4.0295937027551e-06, "loss": 0.6252, "step": 6793 }, { "epoch": 0.5753969934363752, "grad_norm": 1.5629677672253641, "learning_rate": 4.028248185364669e-06, "loss": 0.6642, "step": 6794 }, { "epoch": 0.5754816853694685, "grad_norm": 1.797180260031386, "learning_rate": 4.026902741103709e-06, "loss": 0.6481, "step": 6795 }, { "epoch": 0.5755663773025619, "grad_norm": 1.6126123901002103, "learning_rate": 4.025557370073471e-06, "loss": 0.6684, "step": 6796 }, { "epoch": 0.5756510692356553, "grad_norm": 1.3487145201376265, "learning_rate": 4.0242120723752e-06, "loss": 0.6263, "step": 6797 }, { "epoch": 0.5757357611687487, "grad_norm": 1.2142981580110062, "learning_rate": 4.022866848110137e-06, "loss": 0.6243, "step": 6798 }, { "epoch": 0.575820453101842, "grad_norm": 0.6090308864773355, "learning_rate": 4.0215216973795205e-06, "loss": 0.8552, "step": 6799 }, { "epoch": 0.5759051450349354, "grad_norm": 0.6110790632928391, "learning_rate": 4.020176620284575e-06, "loss": 0.8903, "step": 6800 }, { "epoch": 0.5759898369680287, "grad_norm": 1.1426323898368644, "learning_rate": 4.018831616926529e-06, "loss": 0.6597, "step": 6801 }, { "epoch": 0.5760745289011222, "grad_norm": 3.7909087802032744, "learning_rate": 4.017486687406598e-06, "loss": 0.6811, "step": 6802 }, { "epoch": 0.5761592208342156, "grad_norm": 1.0277518607680904, "learning_rate": 4.016141831825996e-06, "loss": 0.63, "step": 6803 }, { "epoch": 0.5762439127673089, "grad_norm": 2.2883127520009174, "learning_rate": 4.014797050285933e-06, "loss": 0.6546, "step": 6804 }, { "epoch": 0.5763286047004023, "grad_norm": 1.3126458695335337, "learning_rate": 4.013452342887607e-06, "loss": 0.6206, "step": 6805 }, { "epoch": 0.5764132966334956, "grad_norm": 1.9877100963429934, "learning_rate": 4.0121077097322185e-06, "loss": 0.6565, "step": 6806 }, { "epoch": 0.5764979885665891, "grad_norm": 1.5773069145376482, "learning_rate": 4.010763150920954e-06, "loss": 0.668, "step": 6807 }, { "epoch": 0.5765826804996824, "grad_norm": 1.4109382741813252, "learning_rate": 4.009418666555e-06, "loss": 0.6272, "step": 6808 }, { "epoch": 0.5766673724327758, "grad_norm": 1.43473431822834, "learning_rate": 4.0080742567355384e-06, "loss": 0.6208, "step": 6809 }, { "epoch": 0.5767520643658691, "grad_norm": 0.720118955557181, "learning_rate": 4.006729921563741e-06, "loss": 0.8969, "step": 6810 }, { "epoch": 0.5768367562989625, "grad_norm": 0.6089510185138514, "learning_rate": 4.005385661140775e-06, "loss": 0.8575, "step": 6811 }, { "epoch": 0.5769214482320559, "grad_norm": 2.298044911620972, "learning_rate": 4.0040414755678084e-06, "loss": 0.5834, "step": 6812 }, { "epoch": 0.5770061401651493, "grad_norm": 1.6507050284757019, "learning_rate": 4.002697364945991e-06, "loss": 0.5963, "step": 6813 }, { "epoch": 0.5770908320982426, "grad_norm": 1.7362659131550098, "learning_rate": 4.001353329376481e-06, "loss": 0.6804, "step": 6814 }, { "epoch": 0.577175524031336, "grad_norm": 1.465128264299379, "learning_rate": 4.000009368960418e-06, "loss": 0.6602, "step": 6815 }, { "epoch": 0.5772602159644293, "grad_norm": 2.261972702699111, "learning_rate": 3.998665483798947e-06, "loss": 0.6369, "step": 6816 }, { "epoch": 0.5773449078975228, "grad_norm": 1.2801532677768388, "learning_rate": 3.9973216739932e-06, "loss": 0.6454, "step": 6817 }, { "epoch": 0.5774295998306161, "grad_norm": 1.1597042629121364, "learning_rate": 3.995977939644307e-06, "loss": 0.6376, "step": 6818 }, { "epoch": 0.5775142917637095, "grad_norm": 1.8391381364894333, "learning_rate": 3.994634280853389e-06, "loss": 0.6108, "step": 6819 }, { "epoch": 0.5775989836968028, "grad_norm": 1.1484344955241022, "learning_rate": 3.993290697721567e-06, "loss": 0.5739, "step": 6820 }, { "epoch": 0.5776836756298962, "grad_norm": 1.2802379450904042, "learning_rate": 3.991947190349949e-06, "loss": 0.5979, "step": 6821 }, { "epoch": 0.5777683675629897, "grad_norm": 1.3468803013054145, "learning_rate": 3.990603758839647e-06, "loss": 0.6428, "step": 6822 }, { "epoch": 0.577853059496083, "grad_norm": 1.2719634386271368, "learning_rate": 3.989260403291752e-06, "loss": 0.64, "step": 6823 }, { "epoch": 0.5779377514291764, "grad_norm": 1.4269850814385263, "learning_rate": 3.987917123807365e-06, "loss": 0.6165, "step": 6824 }, { "epoch": 0.5780224433622697, "grad_norm": 1.3072843823767353, "learning_rate": 3.986573920487576e-06, "loss": 0.6251, "step": 6825 }, { "epoch": 0.5781071352953631, "grad_norm": 1.4078481463380572, "learning_rate": 3.985230793433463e-06, "loss": 0.6277, "step": 6826 }, { "epoch": 0.5781918272284565, "grad_norm": 1.1928319682645299, "learning_rate": 3.983887742746106e-06, "loss": 0.6584, "step": 6827 }, { "epoch": 0.5782765191615499, "grad_norm": 1.5014606815082732, "learning_rate": 3.98254476852658e-06, "loss": 0.6544, "step": 6828 }, { "epoch": 0.5783612110946432, "grad_norm": 1.3120993416282947, "learning_rate": 3.981201870875947e-06, "loss": 0.6734, "step": 6829 }, { "epoch": 0.5784459030277366, "grad_norm": 1.664530043018731, "learning_rate": 3.979859049895267e-06, "loss": 0.6104, "step": 6830 }, { "epoch": 0.57853059496083, "grad_norm": 1.5089595775482483, "learning_rate": 3.9785163056855955e-06, "loss": 0.646, "step": 6831 }, { "epoch": 0.5786152868939234, "grad_norm": 1.2601120728789545, "learning_rate": 3.977173638347981e-06, "loss": 0.6708, "step": 6832 }, { "epoch": 0.5786999788270167, "grad_norm": 1.4307149160553452, "learning_rate": 3.975831047983467e-06, "loss": 0.6262, "step": 6833 }, { "epoch": 0.5787846707601101, "grad_norm": 1.384173097161606, "learning_rate": 3.974488534693088e-06, "loss": 0.6407, "step": 6834 }, { "epoch": 0.5788693626932034, "grad_norm": 1.341870371782584, "learning_rate": 3.973146098577878e-06, "loss": 0.6444, "step": 6835 }, { "epoch": 0.5789540546262969, "grad_norm": 1.2500411826910616, "learning_rate": 3.971803739738864e-06, "loss": 0.6883, "step": 6836 }, { "epoch": 0.5790387465593902, "grad_norm": 1.2759886429202558, "learning_rate": 3.970461458277061e-06, "loss": 0.5899, "step": 6837 }, { "epoch": 0.5791234384924836, "grad_norm": 1.9380617800234539, "learning_rate": 3.9691192542934855e-06, "loss": 0.6101, "step": 6838 }, { "epoch": 0.579208130425577, "grad_norm": 1.3482866531714113, "learning_rate": 3.967777127889144e-06, "loss": 0.6222, "step": 6839 }, { "epoch": 0.5792928223586703, "grad_norm": 1.3890272871042801, "learning_rate": 3.96643507916504e-06, "loss": 0.676, "step": 6840 }, { "epoch": 0.5793775142917638, "grad_norm": 1.8786414020650708, "learning_rate": 3.96509310822217e-06, "loss": 0.5966, "step": 6841 }, { "epoch": 0.5794622062248571, "grad_norm": 1.212812670849384, "learning_rate": 3.963751215161522e-06, "loss": 0.6522, "step": 6842 }, { "epoch": 0.5795468981579505, "grad_norm": 3.442720092270041, "learning_rate": 3.962409400084084e-06, "loss": 0.6183, "step": 6843 }, { "epoch": 0.5796315900910438, "grad_norm": 0.594346449941692, "learning_rate": 3.961067663090832e-06, "loss": 0.8619, "step": 6844 }, { "epoch": 0.5797162820241372, "grad_norm": 0.5916634411440729, "learning_rate": 3.959726004282739e-06, "loss": 0.8462, "step": 6845 }, { "epoch": 0.5798009739572306, "grad_norm": 2.8968570661614983, "learning_rate": 3.958384423760775e-06, "loss": 0.611, "step": 6846 }, { "epoch": 0.579885665890324, "grad_norm": 1.2521456686013306, "learning_rate": 3.957042921625898e-06, "loss": 0.6284, "step": 6847 }, { "epoch": 0.5799703578234173, "grad_norm": 1.4265969713051354, "learning_rate": 3.955701497979061e-06, "loss": 0.6284, "step": 6848 }, { "epoch": 0.5800550497565107, "grad_norm": 1.3156784042583907, "learning_rate": 3.95436015292122e-06, "loss": 0.6659, "step": 6849 }, { "epoch": 0.580139741689604, "grad_norm": 1.3407999498597694, "learning_rate": 3.953018886553313e-06, "loss": 0.6259, "step": 6850 }, { "epoch": 0.5802244336226975, "grad_norm": 1.4863477613209208, "learning_rate": 3.951677698976278e-06, "loss": 0.6126, "step": 6851 }, { "epoch": 0.5803091255557908, "grad_norm": 1.6544882967503447, "learning_rate": 3.950336590291048e-06, "loss": 0.5855, "step": 6852 }, { "epoch": 0.5803938174888842, "grad_norm": 1.5346430611123367, "learning_rate": 3.948995560598547e-06, "loss": 0.6232, "step": 6853 }, { "epoch": 0.5804785094219775, "grad_norm": 1.6883851698863717, "learning_rate": 3.9476546099996966e-06, "loss": 0.6368, "step": 6854 }, { "epoch": 0.5805632013550709, "grad_norm": 1.287022623378088, "learning_rate": 3.946313738595408e-06, "loss": 0.6049, "step": 6855 }, { "epoch": 0.5806478932881644, "grad_norm": 1.55729901144177, "learning_rate": 3.94497294648659e-06, "loss": 0.6233, "step": 6856 }, { "epoch": 0.5807325852212577, "grad_norm": 1.2818450821249616, "learning_rate": 3.943632233774148e-06, "loss": 0.6467, "step": 6857 }, { "epoch": 0.580817277154351, "grad_norm": 1.8984504513951221, "learning_rate": 3.942291600558969e-06, "loss": 0.6176, "step": 6858 }, { "epoch": 0.5809019690874444, "grad_norm": 2.0819391441509434, "learning_rate": 3.940951046941952e-06, "loss": 0.6275, "step": 6859 }, { "epoch": 0.5809866610205378, "grad_norm": 1.3941027425763042, "learning_rate": 3.939610573023974e-06, "loss": 0.647, "step": 6860 }, { "epoch": 0.5810713529536312, "grad_norm": 1.8030244671454845, "learning_rate": 3.938270178905916e-06, "loss": 0.6672, "step": 6861 }, { "epoch": 0.5811560448867246, "grad_norm": 1.3541397175852103, "learning_rate": 3.93692986468865e-06, "loss": 0.6415, "step": 6862 }, { "epoch": 0.5812407368198179, "grad_norm": 1.950275011086437, "learning_rate": 3.93558963047304e-06, "loss": 0.5931, "step": 6863 }, { "epoch": 0.5813254287529113, "grad_norm": 2.2704950269600603, "learning_rate": 3.9342494763599465e-06, "loss": 0.6265, "step": 6864 }, { "epoch": 0.5814101206860046, "grad_norm": 1.1233675484649197, "learning_rate": 3.932909402450224e-06, "loss": 0.6042, "step": 6865 }, { "epoch": 0.5814948126190981, "grad_norm": 2.0279037261141664, "learning_rate": 3.9315694088447195e-06, "loss": 0.6764, "step": 6866 }, { "epoch": 0.5815795045521914, "grad_norm": 1.3230588964283618, "learning_rate": 3.930229495644276e-06, "loss": 0.6472, "step": 6867 }, { "epoch": 0.5816641964852848, "grad_norm": 1.2240579656857915, "learning_rate": 3.9288896629497244e-06, "loss": 0.6913, "step": 6868 }, { "epoch": 0.5817488884183781, "grad_norm": 1.2180794127723016, "learning_rate": 3.9275499108618985e-06, "loss": 0.5935, "step": 6869 }, { "epoch": 0.5818335803514715, "grad_norm": 1.3537019093686555, "learning_rate": 3.926210239481623e-06, "loss": 0.6104, "step": 6870 }, { "epoch": 0.5819182722845649, "grad_norm": 1.4202274881295271, "learning_rate": 3.924870648909711e-06, "loss": 0.6623, "step": 6871 }, { "epoch": 0.5820029642176583, "grad_norm": 1.472367068077017, "learning_rate": 3.9235311392469755e-06, "loss": 0.6174, "step": 6872 }, { "epoch": 0.5820876561507516, "grad_norm": 1.2924619835950588, "learning_rate": 3.922191710594223e-06, "loss": 0.7218, "step": 6873 }, { "epoch": 0.582172348083845, "grad_norm": 1.753176311626589, "learning_rate": 3.92085236305225e-06, "loss": 0.6222, "step": 6874 }, { "epoch": 0.5822570400169383, "grad_norm": 1.137135179214931, "learning_rate": 3.919513096721853e-06, "loss": 0.6648, "step": 6875 }, { "epoch": 0.5823417319500318, "grad_norm": 2.962467451989436, "learning_rate": 3.918173911703816e-06, "loss": 0.5913, "step": 6876 }, { "epoch": 0.5824264238831252, "grad_norm": 1.724497294387748, "learning_rate": 3.9168348080989195e-06, "loss": 0.6417, "step": 6877 }, { "epoch": 0.5825111158162185, "grad_norm": 1.4093756715967856, "learning_rate": 3.915495786007942e-06, "loss": 0.6515, "step": 6878 }, { "epoch": 0.5825958077493119, "grad_norm": 1.864571821177393, "learning_rate": 3.914156845531647e-06, "loss": 0.6684, "step": 6879 }, { "epoch": 0.5826804996824052, "grad_norm": 1.377696346628149, "learning_rate": 3.912817986770801e-06, "loss": 0.6614, "step": 6880 }, { "epoch": 0.5827651916154987, "grad_norm": 1.3274781252561336, "learning_rate": 3.911479209826157e-06, "loss": 0.626, "step": 6881 }, { "epoch": 0.582849883548592, "grad_norm": 1.2874868435537945, "learning_rate": 3.910140514798466e-06, "loss": 0.6693, "step": 6882 }, { "epoch": 0.5829345754816854, "grad_norm": 1.230705070756902, "learning_rate": 3.908801901788474e-06, "loss": 0.6302, "step": 6883 }, { "epoch": 0.5830192674147787, "grad_norm": 1.1619172475753812, "learning_rate": 3.907463370896916e-06, "loss": 0.6686, "step": 6884 }, { "epoch": 0.5831039593478721, "grad_norm": 1.8505459176555934, "learning_rate": 3.906124922224524e-06, "loss": 0.5875, "step": 6885 }, { "epoch": 0.5831886512809655, "grad_norm": 1.1560582240290431, "learning_rate": 3.904786555872025e-06, "loss": 0.6586, "step": 6886 }, { "epoch": 0.5832733432140589, "grad_norm": 1.5170668817692925, "learning_rate": 3.903448271940137e-06, "loss": 0.6446, "step": 6887 }, { "epoch": 0.5833580351471522, "grad_norm": 1.3477458903420887, "learning_rate": 3.902110070529574e-06, "loss": 0.6458, "step": 6888 }, { "epoch": 0.5834427270802456, "grad_norm": 1.9850261402612146, "learning_rate": 3.900771951741039e-06, "loss": 0.668, "step": 6889 }, { "epoch": 0.5835274190133389, "grad_norm": 1.3466941134973034, "learning_rate": 3.899433915675237e-06, "loss": 0.6097, "step": 6890 }, { "epoch": 0.5836121109464324, "grad_norm": 1.3927238826415795, "learning_rate": 3.898095962432862e-06, "loss": 0.663, "step": 6891 }, { "epoch": 0.5836968028795257, "grad_norm": 1.7419144388652104, "learning_rate": 3.896758092114599e-06, "loss": 0.6498, "step": 6892 }, { "epoch": 0.5837814948126191, "grad_norm": 1.3146567893084413, "learning_rate": 3.895420304821131e-06, "loss": 0.6755, "step": 6893 }, { "epoch": 0.5838661867457124, "grad_norm": 1.4094794741159447, "learning_rate": 3.894082600653136e-06, "loss": 0.6787, "step": 6894 }, { "epoch": 0.5839508786788058, "grad_norm": 1.5078667042868386, "learning_rate": 3.89274497971128e-06, "loss": 0.6237, "step": 6895 }, { "epoch": 0.5840355706118993, "grad_norm": 1.2135041350229663, "learning_rate": 3.891407442096228e-06, "loss": 0.671, "step": 6896 }, { "epoch": 0.5841202625449926, "grad_norm": 1.395671299632062, "learning_rate": 3.890069987908636e-06, "loss": 0.5972, "step": 6897 }, { "epoch": 0.584204954478086, "grad_norm": 1.7671863196253998, "learning_rate": 3.888732617249154e-06, "loss": 0.6288, "step": 6898 }, { "epoch": 0.5842896464111793, "grad_norm": 2.300884013079401, "learning_rate": 3.887395330218429e-06, "loss": 0.6463, "step": 6899 }, { "epoch": 0.5843743383442727, "grad_norm": 1.2772964369344422, "learning_rate": 3.886058126917095e-06, "loss": 0.6337, "step": 6900 }, { "epoch": 0.5844590302773661, "grad_norm": 0.6070623161944625, "learning_rate": 3.884721007445786e-06, "loss": 0.8251, "step": 6901 }, { "epoch": 0.5845437222104595, "grad_norm": 1.3114861425143662, "learning_rate": 3.883383971905129e-06, "loss": 0.6009, "step": 6902 }, { "epoch": 0.5846284141435528, "grad_norm": 1.1974160006519443, "learning_rate": 3.882047020395738e-06, "loss": 0.6214, "step": 6903 }, { "epoch": 0.5847131060766462, "grad_norm": 1.2899938712073211, "learning_rate": 3.88071015301823e-06, "loss": 0.6572, "step": 6904 }, { "epoch": 0.5847977980097395, "grad_norm": 3.363089513893959, "learning_rate": 3.87937336987321e-06, "loss": 0.6236, "step": 6905 }, { "epoch": 0.584882489942833, "grad_norm": 0.5995972298117663, "learning_rate": 3.878036671061276e-06, "loss": 0.8596, "step": 6906 }, { "epoch": 0.5849671818759263, "grad_norm": 1.1689874473258923, "learning_rate": 3.876700056683026e-06, "loss": 0.6095, "step": 6907 }, { "epoch": 0.5850518738090197, "grad_norm": 2.0185629132378136, "learning_rate": 3.875363526839043e-06, "loss": 0.6247, "step": 6908 }, { "epoch": 0.585136565742113, "grad_norm": 2.6604340743847654, "learning_rate": 3.874027081629912e-06, "loss": 0.6536, "step": 6909 }, { "epoch": 0.5852212576752064, "grad_norm": 1.9600688181621762, "learning_rate": 3.872690721156203e-06, "loss": 0.6361, "step": 6910 }, { "epoch": 0.5853059496082998, "grad_norm": 2.061129276976692, "learning_rate": 3.871354445518487e-06, "loss": 0.6307, "step": 6911 }, { "epoch": 0.5853906415413932, "grad_norm": 1.3914878388572491, "learning_rate": 3.870018254817328e-06, "loss": 0.6154, "step": 6912 }, { "epoch": 0.5854753334744865, "grad_norm": 7.226636613101513, "learning_rate": 3.868682149153277e-06, "loss": 0.6746, "step": 6913 }, { "epoch": 0.5855600254075799, "grad_norm": 1.7902335482314937, "learning_rate": 3.867346128626883e-06, "loss": 0.6424, "step": 6914 }, { "epoch": 0.5856447173406732, "grad_norm": 1.2336788464343165, "learning_rate": 3.8660101933386945e-06, "loss": 0.6195, "step": 6915 }, { "epoch": 0.5857294092737667, "grad_norm": 1.9089600933567556, "learning_rate": 3.8646743433892415e-06, "loss": 0.6189, "step": 6916 }, { "epoch": 0.5858141012068601, "grad_norm": 1.4086113917302614, "learning_rate": 3.863338578879057e-06, "loss": 0.6282, "step": 6917 }, { "epoch": 0.5858987931399534, "grad_norm": 2.3679775760257633, "learning_rate": 3.8620028999086614e-06, "loss": 0.6566, "step": 6918 }, { "epoch": 0.5859834850730468, "grad_norm": 1.433822863523151, "learning_rate": 3.860667306578574e-06, "loss": 0.58, "step": 6919 }, { "epoch": 0.5860681770061401, "grad_norm": 1.2645455567972708, "learning_rate": 3.8593317989893065e-06, "loss": 0.645, "step": 6920 }, { "epoch": 0.5861528689392336, "grad_norm": 1.3783167164179475, "learning_rate": 3.8579963772413595e-06, "loss": 0.6479, "step": 6921 }, { "epoch": 0.5862375608723269, "grad_norm": 1.3514580811450931, "learning_rate": 3.856661041435233e-06, "loss": 0.6528, "step": 6922 }, { "epoch": 0.5863222528054203, "grad_norm": 1.1446518173426061, "learning_rate": 3.8553257916714195e-06, "loss": 0.6342, "step": 6923 }, { "epoch": 0.5864069447385136, "grad_norm": 1.300699566373422, "learning_rate": 3.853990628050398e-06, "loss": 0.6571, "step": 6924 }, { "epoch": 0.586491636671607, "grad_norm": 1.4092051794231943, "learning_rate": 3.852655550672653e-06, "loss": 0.667, "step": 6925 }, { "epoch": 0.5865763286047004, "grad_norm": 1.85415714216407, "learning_rate": 3.851320559638651e-06, "loss": 0.6855, "step": 6926 }, { "epoch": 0.5866610205377938, "grad_norm": 1.2191793486692801, "learning_rate": 3.84998565504886e-06, "loss": 0.644, "step": 6927 }, { "epoch": 0.5867457124708871, "grad_norm": 1.8040633610508883, "learning_rate": 3.848650837003739e-06, "loss": 0.6277, "step": 6928 }, { "epoch": 0.5868304044039805, "grad_norm": 1.7261351000524519, "learning_rate": 3.847316105603739e-06, "loss": 0.5982, "step": 6929 }, { "epoch": 0.5869150963370738, "grad_norm": 1.4645733020887526, "learning_rate": 3.845981460949304e-06, "loss": 0.6122, "step": 6930 }, { "epoch": 0.5869997882701673, "grad_norm": 0.6156834629752248, "learning_rate": 3.844646903140878e-06, "loss": 0.8459, "step": 6931 }, { "epoch": 0.5870844802032607, "grad_norm": 1.3021783759334766, "learning_rate": 3.843312432278888e-06, "loss": 0.6088, "step": 6932 }, { "epoch": 0.587169172136354, "grad_norm": 1.3908229556276275, "learning_rate": 3.841978048463764e-06, "loss": 0.6455, "step": 6933 }, { "epoch": 0.5872538640694474, "grad_norm": 1.4538296329781613, "learning_rate": 3.840643751795921e-06, "loss": 0.614, "step": 6934 }, { "epoch": 0.5873385560025408, "grad_norm": 1.724122260340457, "learning_rate": 3.839309542375777e-06, "loss": 0.6174, "step": 6935 }, { "epoch": 0.5874232479356342, "grad_norm": 1.7624120873244353, "learning_rate": 3.837975420303737e-06, "loss": 0.6576, "step": 6936 }, { "epoch": 0.5875079398687275, "grad_norm": 0.6445847508503342, "learning_rate": 3.836641385680199e-06, "loss": 0.8362, "step": 6937 }, { "epoch": 0.5875926318018209, "grad_norm": 1.3977477687923108, "learning_rate": 3.835307438605555e-06, "loss": 0.6246, "step": 6938 }, { "epoch": 0.5876773237349142, "grad_norm": 1.3259154344972712, "learning_rate": 3.833973579180197e-06, "loss": 0.6078, "step": 6939 }, { "epoch": 0.5877620156680077, "grad_norm": 1.3459917946665585, "learning_rate": 3.832639807504499e-06, "loss": 0.6965, "step": 6940 }, { "epoch": 0.587846707601101, "grad_norm": 1.4094190899452779, "learning_rate": 3.831306123678839e-06, "loss": 0.6504, "step": 6941 }, { "epoch": 0.5879313995341944, "grad_norm": 1.2992272143967152, "learning_rate": 3.829972527803579e-06, "loss": 0.5729, "step": 6942 }, { "epoch": 0.5880160914672877, "grad_norm": 1.4012000677879153, "learning_rate": 3.828639019979083e-06, "loss": 0.6849, "step": 6943 }, { "epoch": 0.5881007834003811, "grad_norm": 1.2428467471050149, "learning_rate": 3.827305600305707e-06, "loss": 0.623, "step": 6944 }, { "epoch": 0.5881854753334745, "grad_norm": 2.68724504487104, "learning_rate": 3.825972268883788e-06, "loss": 0.6886, "step": 6945 }, { "epoch": 0.5882701672665679, "grad_norm": 1.6876032777622898, "learning_rate": 3.824639025813678e-06, "loss": 0.6559, "step": 6946 }, { "epoch": 0.5883548591996612, "grad_norm": 1.3821899798401356, "learning_rate": 3.823305871195702e-06, "loss": 0.6531, "step": 6947 }, { "epoch": 0.5884395511327546, "grad_norm": 1.9324525520068805, "learning_rate": 3.821972805130191e-06, "loss": 0.6922, "step": 6948 }, { "epoch": 0.5885242430658479, "grad_norm": 1.3105134700904746, "learning_rate": 3.820639827717464e-06, "loss": 0.599, "step": 6949 }, { "epoch": 0.5886089349989414, "grad_norm": 1.4277385646306304, "learning_rate": 3.819306939057835e-06, "loss": 0.5981, "step": 6950 }, { "epoch": 0.5886936269320348, "grad_norm": 1.5030675873635033, "learning_rate": 3.81797413925161e-06, "loss": 0.6237, "step": 6951 }, { "epoch": 0.5887783188651281, "grad_norm": 1.452873820801597, "learning_rate": 3.816641428399093e-06, "loss": 0.6279, "step": 6952 }, { "epoch": 0.5888630107982215, "grad_norm": 1.252619699669093, "learning_rate": 3.8153088066005714e-06, "loss": 0.6464, "step": 6953 }, { "epoch": 0.5889477027313148, "grad_norm": 1.2805986198085015, "learning_rate": 3.813976273956339e-06, "loss": 0.5974, "step": 6954 }, { "epoch": 0.5890323946644083, "grad_norm": 2.0403070972903756, "learning_rate": 3.8126438305666685e-06, "loss": 0.6742, "step": 6955 }, { "epoch": 0.5891170865975016, "grad_norm": 1.3145806500864736, "learning_rate": 3.8113114765318384e-06, "loss": 0.6457, "step": 6956 }, { "epoch": 0.589201778530595, "grad_norm": 1.2689585145633562, "learning_rate": 3.8099792119521163e-06, "loss": 0.6463, "step": 6957 }, { "epoch": 0.5892864704636883, "grad_norm": 1.4202974430521067, "learning_rate": 3.808647036927757e-06, "loss": 0.6266, "step": 6958 }, { "epoch": 0.5893711623967817, "grad_norm": 1.6860741100418872, "learning_rate": 3.807314951559017e-06, "loss": 0.6355, "step": 6959 }, { "epoch": 0.5894558543298751, "grad_norm": 0.6335457121011734, "learning_rate": 3.805982955946145e-06, "loss": 0.8238, "step": 6960 }, { "epoch": 0.5895405462629685, "grad_norm": 0.5679461888946084, "learning_rate": 3.8046510501893752e-06, "loss": 0.7896, "step": 6961 }, { "epoch": 0.5896252381960618, "grad_norm": 1.409717982312223, "learning_rate": 3.803319234388946e-06, "loss": 0.627, "step": 6962 }, { "epoch": 0.5897099301291552, "grad_norm": 1.4348337522023078, "learning_rate": 3.8019875086450793e-06, "loss": 0.6807, "step": 6963 }, { "epoch": 0.5897946220622485, "grad_norm": 0.6371750278333165, "learning_rate": 3.800655873057996e-06, "loss": 0.8497, "step": 6964 }, { "epoch": 0.589879313995342, "grad_norm": 1.7593936348382617, "learning_rate": 3.7993243277279105e-06, "loss": 0.6706, "step": 6965 }, { "epoch": 0.5899640059284353, "grad_norm": 1.4733190518470483, "learning_rate": 3.7979928727550264e-06, "loss": 0.6343, "step": 6966 }, { "epoch": 0.5900486978615287, "grad_norm": 1.4585995435354238, "learning_rate": 3.7966615082395436e-06, "loss": 0.6192, "step": 6967 }, { "epoch": 0.590133389794622, "grad_norm": 1.6241468324955108, "learning_rate": 3.7953302342816563e-06, "loss": 0.6554, "step": 6968 }, { "epoch": 0.5902180817277154, "grad_norm": 1.1853689570667234, "learning_rate": 3.7939990509815453e-06, "loss": 0.6052, "step": 6969 }, { "epoch": 0.5903027736608089, "grad_norm": 1.326932350368146, "learning_rate": 3.792667958439394e-06, "loss": 0.5998, "step": 6970 }, { "epoch": 0.5903874655939022, "grad_norm": 1.3472927489514397, "learning_rate": 3.79133695675537e-06, "loss": 0.6294, "step": 6971 }, { "epoch": 0.5904721575269956, "grad_norm": 2.0423616841288768, "learning_rate": 3.79000604602964e-06, "loss": 0.668, "step": 6972 }, { "epoch": 0.5905568494600889, "grad_norm": 0.6091342844943225, "learning_rate": 3.788675226362364e-06, "loss": 0.8383, "step": 6973 }, { "epoch": 0.5906415413931823, "grad_norm": 1.5251542175929145, "learning_rate": 3.78734449785369e-06, "loss": 0.6281, "step": 6974 }, { "epoch": 0.5907262333262757, "grad_norm": 1.366972225086569, "learning_rate": 3.786013860603764e-06, "loss": 0.6232, "step": 6975 }, { "epoch": 0.5908109252593691, "grad_norm": 1.3306114572752905, "learning_rate": 3.784683314712724e-06, "loss": 0.6285, "step": 6976 }, { "epoch": 0.5908956171924624, "grad_norm": 1.5167529071845671, "learning_rate": 3.7833528602806997e-06, "loss": 0.661, "step": 6977 }, { "epoch": 0.5909803091255558, "grad_norm": 1.4850222174277774, "learning_rate": 3.782022497407817e-06, "loss": 0.6648, "step": 6978 }, { "epoch": 0.5910650010586491, "grad_norm": 1.8110426315473203, "learning_rate": 3.7806922261941883e-06, "loss": 0.6116, "step": 6979 }, { "epoch": 0.5911496929917426, "grad_norm": 1.6610327738840358, "learning_rate": 3.779362046739925e-06, "loss": 0.6132, "step": 6980 }, { "epoch": 0.5912343849248359, "grad_norm": 1.3835412935492437, "learning_rate": 3.778031959145135e-06, "loss": 0.5545, "step": 6981 }, { "epoch": 0.5913190768579293, "grad_norm": 1.6008438725365852, "learning_rate": 3.7767019635099086e-06, "loss": 0.5725, "step": 6982 }, { "epoch": 0.5914037687910226, "grad_norm": 1.4739514056110061, "learning_rate": 3.7753720599343373e-06, "loss": 0.5676, "step": 6983 }, { "epoch": 0.591488460724116, "grad_norm": 1.6305122039471414, "learning_rate": 3.774042248518503e-06, "loss": 0.6079, "step": 6984 }, { "epoch": 0.5915731526572094, "grad_norm": 1.2073501899794357, "learning_rate": 3.7727125293624813e-06, "loss": 0.6663, "step": 6985 }, { "epoch": 0.5916578445903028, "grad_norm": 1.2284732088053625, "learning_rate": 3.771382902566341e-06, "loss": 0.6348, "step": 6986 }, { "epoch": 0.5917425365233961, "grad_norm": 1.3293718019310758, "learning_rate": 3.7700533682301426e-06, "loss": 0.6419, "step": 6987 }, { "epoch": 0.5918272284564895, "grad_norm": 1.689167837454024, "learning_rate": 3.768723926453941e-06, "loss": 0.6214, "step": 6988 }, { "epoch": 0.5919119203895828, "grad_norm": 1.3369960065137096, "learning_rate": 3.7673945773377856e-06, "loss": 0.5889, "step": 6989 }, { "epoch": 0.5919966123226763, "grad_norm": 1.4126168629899465, "learning_rate": 3.7660653209817123e-06, "loss": 0.6214, "step": 6990 }, { "epoch": 0.5920813042557697, "grad_norm": 1.6421289501877616, "learning_rate": 3.764736157485761e-06, "loss": 0.5885, "step": 6991 }, { "epoch": 0.592165996188863, "grad_norm": 1.7174727057170336, "learning_rate": 3.7634070869499524e-06, "loss": 0.6814, "step": 6992 }, { "epoch": 0.5922506881219564, "grad_norm": 1.5159568744048812, "learning_rate": 3.7620781094743084e-06, "loss": 0.5868, "step": 6993 }, { "epoch": 0.5923353800550497, "grad_norm": 2.026471450885235, "learning_rate": 3.7607492251588432e-06, "loss": 0.5738, "step": 6994 }, { "epoch": 0.5924200719881432, "grad_norm": 1.2389150161059386, "learning_rate": 3.75942043410356e-06, "loss": 0.5983, "step": 6995 }, { "epoch": 0.5925047639212365, "grad_norm": 1.3943077698763076, "learning_rate": 3.758091736408458e-06, "loss": 0.6378, "step": 6996 }, { "epoch": 0.5925894558543299, "grad_norm": 0.638402478259173, "learning_rate": 3.7567631321735302e-06, "loss": 0.8944, "step": 6997 }, { "epoch": 0.5926741477874232, "grad_norm": 2.567868959800259, "learning_rate": 3.755434621498759e-06, "loss": 0.6216, "step": 6998 }, { "epoch": 0.5927588397205166, "grad_norm": 2.2672487499086946, "learning_rate": 3.7541062044841253e-06, "loss": 0.6474, "step": 6999 }, { "epoch": 0.59284353165361, "grad_norm": 0.6220050163837114, "learning_rate": 3.7527778812295934e-06, "loss": 0.8487, "step": 7000 }, { "epoch": 0.5929282235867034, "grad_norm": 1.2570994061542327, "learning_rate": 3.7514496518351313e-06, "loss": 0.6503, "step": 7001 }, { "epoch": 0.5930129155197967, "grad_norm": 1.2657062143566962, "learning_rate": 3.7501215164006966e-06, "loss": 0.6746, "step": 7002 }, { "epoch": 0.5930976074528901, "grad_norm": 1.4013793191785495, "learning_rate": 3.748793475026234e-06, "loss": 0.679, "step": 7003 }, { "epoch": 0.5931822993859834, "grad_norm": 2.3432156773207695, "learning_rate": 3.7474655278116876e-06, "loss": 0.6505, "step": 7004 }, { "epoch": 0.5932669913190769, "grad_norm": 1.4650084400829362, "learning_rate": 3.7461376748569946e-06, "loss": 0.6425, "step": 7005 }, { "epoch": 0.5933516832521702, "grad_norm": 1.42988315063562, "learning_rate": 3.744809916262079e-06, "loss": 0.6417, "step": 7006 }, { "epoch": 0.5934363751852636, "grad_norm": 1.3814723950646621, "learning_rate": 3.7434822521268654e-06, "loss": 0.5944, "step": 7007 }, { "epoch": 0.593521067118357, "grad_norm": 1.7132734923288615, "learning_rate": 3.7421546825512644e-06, "loss": 0.6322, "step": 7008 }, { "epoch": 0.5936057590514503, "grad_norm": 1.2330860830599455, "learning_rate": 3.740827207635185e-06, "loss": 0.5953, "step": 7009 }, { "epoch": 0.5936904509845438, "grad_norm": 1.7229509987058342, "learning_rate": 3.7394998274785273e-06, "loss": 0.6926, "step": 7010 }, { "epoch": 0.5937751429176371, "grad_norm": 1.8786028415472449, "learning_rate": 3.738172542181179e-06, "loss": 0.6677, "step": 7011 }, { "epoch": 0.5938598348507305, "grad_norm": 1.2938581554843744, "learning_rate": 3.7368453518430297e-06, "loss": 0.6291, "step": 7012 }, { "epoch": 0.5939445267838238, "grad_norm": 1.3524709714953975, "learning_rate": 3.735518256563959e-06, "loss": 0.6056, "step": 7013 }, { "epoch": 0.5940292187169172, "grad_norm": 2.719012501654212, "learning_rate": 3.7341912564438328e-06, "loss": 0.6267, "step": 7014 }, { "epoch": 0.5941139106500106, "grad_norm": 1.4377322856181447, "learning_rate": 3.732864351582518e-06, "loss": 0.6765, "step": 7015 }, { "epoch": 0.594198602583104, "grad_norm": 0.6290973612578586, "learning_rate": 3.73153754207987e-06, "loss": 0.7953, "step": 7016 }, { "epoch": 0.5942832945161973, "grad_norm": 3.8115695017924844, "learning_rate": 3.7302108280357386e-06, "loss": 0.6298, "step": 7017 }, { "epoch": 0.5943679864492907, "grad_norm": 1.110827903362437, "learning_rate": 3.7288842095499677e-06, "loss": 0.5897, "step": 7018 }, { "epoch": 0.594452678382384, "grad_norm": 1.309092414244568, "learning_rate": 3.7275576867223896e-06, "loss": 0.6176, "step": 7019 }, { "epoch": 0.5945373703154775, "grad_norm": 1.552725486923929, "learning_rate": 3.726231259652835e-06, "loss": 0.6978, "step": 7020 }, { "epoch": 0.5946220622485708, "grad_norm": 1.9057945969336434, "learning_rate": 3.7249049284411206e-06, "loss": 0.6536, "step": 7021 }, { "epoch": 0.5947067541816642, "grad_norm": 1.309428223019665, "learning_rate": 3.7235786931870633e-06, "loss": 0.6434, "step": 7022 }, { "epoch": 0.5947914461147575, "grad_norm": 1.362683589944204, "learning_rate": 3.7222525539904696e-06, "loss": 0.6442, "step": 7023 }, { "epoch": 0.5948761380478509, "grad_norm": 0.65342433902482, "learning_rate": 3.7209265109511363e-06, "loss": 0.8783, "step": 7024 }, { "epoch": 0.5949608299809444, "grad_norm": 1.5649011454028483, "learning_rate": 3.7196005641688538e-06, "loss": 0.6741, "step": 7025 }, { "epoch": 0.5950455219140377, "grad_norm": 1.3646533611638514, "learning_rate": 3.7182747137434126e-06, "loss": 0.6385, "step": 7026 }, { "epoch": 0.595130213847131, "grad_norm": 6.958560351031939, "learning_rate": 3.7169489597745843e-06, "loss": 0.5968, "step": 7027 }, { "epoch": 0.5952149057802244, "grad_norm": 2.010408670640459, "learning_rate": 3.7156233023621422e-06, "loss": 0.6041, "step": 7028 }, { "epoch": 0.5952995977133178, "grad_norm": 1.4642758090399517, "learning_rate": 3.714297741605846e-06, "loss": 0.6265, "step": 7029 }, { "epoch": 0.5953842896464112, "grad_norm": 1.8179960412600324, "learning_rate": 3.712972277605453e-06, "loss": 0.5902, "step": 7030 }, { "epoch": 0.5954689815795046, "grad_norm": 0.6888483258893445, "learning_rate": 3.711646910460713e-06, "loss": 0.845, "step": 7031 }, { "epoch": 0.5955536735125979, "grad_norm": 0.671360468990682, "learning_rate": 3.710321640271363e-06, "loss": 0.8637, "step": 7032 }, { "epoch": 0.5956383654456913, "grad_norm": 1.4925119759977779, "learning_rate": 3.7089964671371393e-06, "loss": 0.6212, "step": 7033 }, { "epoch": 0.5957230573787847, "grad_norm": 1.4387225262896406, "learning_rate": 3.70767139115777e-06, "loss": 0.6585, "step": 7034 }, { "epoch": 0.5958077493118781, "grad_norm": 1.4557315440324798, "learning_rate": 3.7063464124329696e-06, "loss": 0.5997, "step": 7035 }, { "epoch": 0.5958924412449714, "grad_norm": 2.587688565699345, "learning_rate": 3.705021531062452e-06, "loss": 0.6641, "step": 7036 }, { "epoch": 0.5959771331780648, "grad_norm": 2.110868847297737, "learning_rate": 3.7036967471459213e-06, "loss": 0.6403, "step": 7037 }, { "epoch": 0.5960618251111581, "grad_norm": 1.6104659755152906, "learning_rate": 3.702372060783074e-06, "loss": 0.5957, "step": 7038 }, { "epoch": 0.5961465170442516, "grad_norm": 1.2464066390730129, "learning_rate": 3.701047472073602e-06, "loss": 0.6209, "step": 7039 }, { "epoch": 0.5962312089773449, "grad_norm": 2.4195582759720486, "learning_rate": 3.6997229811171846e-06, "loss": 0.6279, "step": 7040 }, { "epoch": 0.5963159009104383, "grad_norm": 1.604302582565256, "learning_rate": 3.6983985880134987e-06, "loss": 0.7029, "step": 7041 }, { "epoch": 0.5964005928435316, "grad_norm": 1.3666591182318932, "learning_rate": 3.697074292862212e-06, "loss": 0.6463, "step": 7042 }, { "epoch": 0.596485284776625, "grad_norm": 1.751679080598232, "learning_rate": 3.695750095762983e-06, "loss": 0.6715, "step": 7043 }, { "epoch": 0.5965699767097185, "grad_norm": 1.3809070907919907, "learning_rate": 3.694425996815468e-06, "loss": 0.6159, "step": 7044 }, { "epoch": 0.5966546686428118, "grad_norm": 0.5974736105908006, "learning_rate": 3.693101996119307e-06, "loss": 0.8585, "step": 7045 }, { "epoch": 0.5967393605759052, "grad_norm": 1.9945610076451246, "learning_rate": 3.691778093774141e-06, "loss": 0.673, "step": 7046 }, { "epoch": 0.5968240525089985, "grad_norm": 1.429963388482559, "learning_rate": 3.6904542898796036e-06, "loss": 0.589, "step": 7047 }, { "epoch": 0.5969087444420919, "grad_norm": 4.217513877337406, "learning_rate": 3.6891305845353138e-06, "loss": 0.6136, "step": 7048 }, { "epoch": 0.5969934363751853, "grad_norm": 1.3979691268666792, "learning_rate": 3.6878069778408897e-06, "loss": 0.6368, "step": 7049 }, { "epoch": 0.5970781283082787, "grad_norm": 0.649430924524727, "learning_rate": 3.6864834698959375e-06, "loss": 0.8499, "step": 7050 }, { "epoch": 0.597162820241372, "grad_norm": 1.6399148342702436, "learning_rate": 3.6851600608000593e-06, "loss": 0.6234, "step": 7051 }, { "epoch": 0.5972475121744654, "grad_norm": 1.1146550779452327, "learning_rate": 3.6838367506528505e-06, "loss": 0.5966, "step": 7052 }, { "epoch": 0.5973322041075587, "grad_norm": 1.4081234211763267, "learning_rate": 3.6825135395538947e-06, "loss": 0.6189, "step": 7053 }, { "epoch": 0.5974168960406522, "grad_norm": 1.663160017012373, "learning_rate": 3.681190427602771e-06, "loss": 0.6571, "step": 7054 }, { "epoch": 0.5975015879737455, "grad_norm": 1.3681225889723294, "learning_rate": 3.6798674148990538e-06, "loss": 0.6003, "step": 7055 }, { "epoch": 0.5975862799068389, "grad_norm": 1.380884615602826, "learning_rate": 3.6785445015423005e-06, "loss": 0.621, "step": 7056 }, { "epoch": 0.5976709718399322, "grad_norm": 1.8297864971001627, "learning_rate": 3.6772216876320743e-06, "loss": 0.6175, "step": 7057 }, { "epoch": 0.5977556637730256, "grad_norm": 1.230541753359495, "learning_rate": 3.6758989732679184e-06, "loss": 0.5798, "step": 7058 }, { "epoch": 0.597840355706119, "grad_norm": 2.1250732520275424, "learning_rate": 3.6745763585493753e-06, "loss": 0.6322, "step": 7059 }, { "epoch": 0.5979250476392124, "grad_norm": 1.3617348085035155, "learning_rate": 3.6732538435759824e-06, "loss": 0.6553, "step": 7060 }, { "epoch": 0.5980097395723057, "grad_norm": 1.35074344557063, "learning_rate": 3.6719314284472605e-06, "loss": 0.6477, "step": 7061 }, { "epoch": 0.5980944315053991, "grad_norm": 1.4336063401266403, "learning_rate": 3.670609113262732e-06, "loss": 0.6743, "step": 7062 }, { "epoch": 0.5981791234384924, "grad_norm": 1.3201389522388163, "learning_rate": 3.6692868981219082e-06, "loss": 0.6212, "step": 7063 }, { "epoch": 0.5982638153715859, "grad_norm": 0.6465316674174487, "learning_rate": 3.6679647831242903e-06, "loss": 0.8046, "step": 7064 }, { "epoch": 0.5983485073046793, "grad_norm": 0.6447391323099991, "learning_rate": 3.6666427683693783e-06, "loss": 0.8541, "step": 7065 }, { "epoch": 0.5984331992377726, "grad_norm": 1.676348852885655, "learning_rate": 3.6653208539566556e-06, "loss": 0.5839, "step": 7066 }, { "epoch": 0.598517891170866, "grad_norm": 1.4935943910112461, "learning_rate": 3.6639990399856067e-06, "loss": 0.6937, "step": 7067 }, { "epoch": 0.5986025831039593, "grad_norm": 1.6862057037778573, "learning_rate": 3.6626773265557068e-06, "loss": 0.6161, "step": 7068 }, { "epoch": 0.5986872750370528, "grad_norm": 0.5756635577853256, "learning_rate": 3.6613557137664174e-06, "loss": 0.8417, "step": 7069 }, { "epoch": 0.5987719669701461, "grad_norm": 1.3528899327724677, "learning_rate": 3.6600342017171993e-06, "loss": 0.606, "step": 7070 }, { "epoch": 0.5988566589032395, "grad_norm": 1.4272477953055982, "learning_rate": 3.658712790507504e-06, "loss": 0.6315, "step": 7071 }, { "epoch": 0.5989413508363328, "grad_norm": 1.4375186515268157, "learning_rate": 3.6573914802367715e-06, "loss": 0.624, "step": 7072 }, { "epoch": 0.5990260427694262, "grad_norm": 1.395952829619415, "learning_rate": 3.6560702710044417e-06, "loss": 0.6085, "step": 7073 }, { "epoch": 0.5991107347025196, "grad_norm": 1.4381104172890022, "learning_rate": 3.6547491629099393e-06, "loss": 0.6235, "step": 7074 }, { "epoch": 0.599195426635613, "grad_norm": 3.132782557364666, "learning_rate": 3.653428156052685e-06, "loss": 0.615, "step": 7075 }, { "epoch": 0.5992801185687063, "grad_norm": 1.3550961514977815, "learning_rate": 3.6521072505320955e-06, "loss": 0.5945, "step": 7076 }, { "epoch": 0.5993648105017997, "grad_norm": 0.7024343781862633, "learning_rate": 3.6507864464475697e-06, "loss": 0.7843, "step": 7077 }, { "epoch": 0.599449502434893, "grad_norm": 1.339806266419263, "learning_rate": 3.6494657438985087e-06, "loss": 0.5649, "step": 7078 }, { "epoch": 0.5995341943679865, "grad_norm": 1.387543702126778, "learning_rate": 3.648145142984304e-06, "loss": 0.6097, "step": 7079 }, { "epoch": 0.5996188863010798, "grad_norm": 1.555894547045187, "learning_rate": 3.646824643804334e-06, "loss": 0.6554, "step": 7080 }, { "epoch": 0.5997035782341732, "grad_norm": 1.599842300880366, "learning_rate": 3.645504246457976e-06, "loss": 0.6787, "step": 7081 }, { "epoch": 0.5997882701672665, "grad_norm": 1.3376049616815837, "learning_rate": 3.6441839510445943e-06, "loss": 0.657, "step": 7082 }, { "epoch": 0.5998729621003599, "grad_norm": 1.37120434883513, "learning_rate": 3.64286375766355e-06, "loss": 0.6144, "step": 7083 }, { "epoch": 0.5999576540334534, "grad_norm": 1.3885210145491313, "learning_rate": 3.6415436664141957e-06, "loss": 0.6762, "step": 7084 }, { "epoch": 0.6000423459665467, "grad_norm": 0.626404946023339, "learning_rate": 3.640223677395872e-06, "loss": 0.8676, "step": 7085 }, { "epoch": 0.6001270378996401, "grad_norm": 0.6838225126558979, "learning_rate": 3.6389037907079196e-06, "loss": 0.8271, "step": 7086 }, { "epoch": 0.6002117298327334, "grad_norm": 1.3263680825752144, "learning_rate": 3.6375840064496613e-06, "loss": 0.6172, "step": 7087 }, { "epoch": 0.6002964217658268, "grad_norm": 1.4698578729686378, "learning_rate": 3.6362643247204214e-06, "loss": 0.6509, "step": 7088 }, { "epoch": 0.6003811136989202, "grad_norm": 14.815711803085879, "learning_rate": 3.6349447456195154e-06, "loss": 0.5913, "step": 7089 }, { "epoch": 0.6004658056320136, "grad_norm": 1.5888453039362582, "learning_rate": 3.6336252692462425e-06, "loss": 0.6323, "step": 7090 }, { "epoch": 0.6005504975651069, "grad_norm": 8.213176267556948, "learning_rate": 3.6323058956999023e-06, "loss": 0.6595, "step": 7091 }, { "epoch": 0.6006351894982003, "grad_norm": 0.6484079948163699, "learning_rate": 3.6309866250797898e-06, "loss": 0.878, "step": 7092 }, { "epoch": 0.6007198814312936, "grad_norm": 2.115546245902233, "learning_rate": 3.62966745748518e-06, "loss": 0.6122, "step": 7093 }, { "epoch": 0.6008045733643871, "grad_norm": 1.4742700403745725, "learning_rate": 3.6283483930153518e-06, "loss": 0.6705, "step": 7094 }, { "epoch": 0.6008892652974804, "grad_norm": 1.8396449207227705, "learning_rate": 3.627029431769569e-06, "loss": 0.6131, "step": 7095 }, { "epoch": 0.6009739572305738, "grad_norm": 1.69262098175729, "learning_rate": 3.6257105738470922e-06, "loss": 0.6752, "step": 7096 }, { "epoch": 0.6010586491636671, "grad_norm": 1.1304076311414546, "learning_rate": 3.6243918193471726e-06, "loss": 0.5917, "step": 7097 }, { "epoch": 0.6011433410967605, "grad_norm": 1.3656387178429148, "learning_rate": 3.623073168369051e-06, "loss": 0.6134, "step": 7098 }, { "epoch": 0.601228033029854, "grad_norm": 2.0185251320343136, "learning_rate": 3.621754621011966e-06, "loss": 0.5982, "step": 7099 }, { "epoch": 0.6013127249629473, "grad_norm": 2.0459918294168644, "learning_rate": 3.620436177375145e-06, "loss": 0.6784, "step": 7100 }, { "epoch": 0.6013974168960406, "grad_norm": 1.615590580437111, "learning_rate": 3.619117837557805e-06, "loss": 0.6762, "step": 7101 }, { "epoch": 0.601482108829134, "grad_norm": 1.63252980172944, "learning_rate": 3.617799601659161e-06, "loss": 0.6283, "step": 7102 }, { "epoch": 0.6015668007622273, "grad_norm": 1.2200326365639764, "learning_rate": 3.6164814697784157e-06, "loss": 0.6101, "step": 7103 }, { "epoch": 0.6016514926953208, "grad_norm": 1.520945532298592, "learning_rate": 3.6151634420147653e-06, "loss": 0.6004, "step": 7104 }, { "epoch": 0.6017361846284142, "grad_norm": 1.3833188216363235, "learning_rate": 3.6138455184673993e-06, "loss": 0.5991, "step": 7105 }, { "epoch": 0.6018208765615075, "grad_norm": 1.3444125495833565, "learning_rate": 3.6125276992354975e-06, "loss": 0.6205, "step": 7106 }, { "epoch": 0.6019055684946009, "grad_norm": 0.650091449097239, "learning_rate": 3.6112099844182336e-06, "loss": 0.8872, "step": 7107 }, { "epoch": 0.6019902604276942, "grad_norm": 0.6846599112513192, "learning_rate": 3.6098923741147734e-06, "loss": 0.8508, "step": 7108 }, { "epoch": 0.6020749523607877, "grad_norm": 1.689911254826801, "learning_rate": 3.6085748684242716e-06, "loss": 0.638, "step": 7109 }, { "epoch": 0.602159644293881, "grad_norm": 1.8720633755224485, "learning_rate": 3.6072574674458817e-06, "loss": 0.6486, "step": 7110 }, { "epoch": 0.6022443362269744, "grad_norm": 1.453528360540588, "learning_rate": 3.6059401712787397e-06, "loss": 0.6181, "step": 7111 }, { "epoch": 0.6023290281600677, "grad_norm": 1.6963779875925584, "learning_rate": 3.60462298002198e-06, "loss": 0.6748, "step": 7112 }, { "epoch": 0.6024137200931611, "grad_norm": 1.3691017237544056, "learning_rate": 3.6033058937747344e-06, "loss": 0.611, "step": 7113 }, { "epoch": 0.6024984120262545, "grad_norm": 1.3659338338835962, "learning_rate": 3.601988912636113e-06, "loss": 0.6292, "step": 7114 }, { "epoch": 0.6025831039593479, "grad_norm": 1.6003524825881825, "learning_rate": 3.6006720367052294e-06, "loss": 0.6589, "step": 7115 }, { "epoch": 0.6026677958924412, "grad_norm": 1.630783905800951, "learning_rate": 3.599355266081186e-06, "loss": 0.6136, "step": 7116 }, { "epoch": 0.6027524878255346, "grad_norm": 2.0077956375258648, "learning_rate": 3.5980386008630736e-06, "loss": 0.6327, "step": 7117 }, { "epoch": 0.6028371797586279, "grad_norm": 1.2593718521964796, "learning_rate": 3.596722041149982e-06, "loss": 0.6264, "step": 7118 }, { "epoch": 0.6029218716917214, "grad_norm": 1.4235520516487343, "learning_rate": 3.5954055870409866e-06, "loss": 0.6287, "step": 7119 }, { "epoch": 0.6030065636248148, "grad_norm": 1.4292266203736566, "learning_rate": 3.594089238635158e-06, "loss": 0.6131, "step": 7120 }, { "epoch": 0.6030912555579081, "grad_norm": 1.2142629315955846, "learning_rate": 3.5927729960315605e-06, "loss": 0.6791, "step": 7121 }, { "epoch": 0.6031759474910015, "grad_norm": 1.3648177305537985, "learning_rate": 3.5914568593292444e-06, "loss": 0.7041, "step": 7122 }, { "epoch": 0.6032606394240948, "grad_norm": 1.7024828888445855, "learning_rate": 3.590140828627261e-06, "loss": 0.5786, "step": 7123 }, { "epoch": 0.6033453313571883, "grad_norm": 1.4140819932395228, "learning_rate": 3.5888249040246435e-06, "loss": 0.5868, "step": 7124 }, { "epoch": 0.6034300232902816, "grad_norm": 1.7097914052910028, "learning_rate": 3.587509085620425e-06, "loss": 0.5949, "step": 7125 }, { "epoch": 0.603514715223375, "grad_norm": 1.2143119409230945, "learning_rate": 3.5861933735136286e-06, "loss": 0.6656, "step": 7126 }, { "epoch": 0.6035994071564683, "grad_norm": 1.3861661422565785, "learning_rate": 3.584877767803265e-06, "loss": 0.6301, "step": 7127 }, { "epoch": 0.6036840990895617, "grad_norm": 0.5843105049031062, "learning_rate": 3.5835622685883442e-06, "loss": 0.8334, "step": 7128 }, { "epoch": 0.6037687910226551, "grad_norm": 1.114860558097525, "learning_rate": 3.5822468759678636e-06, "loss": 0.6043, "step": 7129 }, { "epoch": 0.6038534829557485, "grad_norm": 5.378087756147023, "learning_rate": 3.5809315900408117e-06, "loss": 0.5922, "step": 7130 }, { "epoch": 0.6039381748888418, "grad_norm": 0.6697598554355847, "learning_rate": 3.579616410906174e-06, "loss": 0.8819, "step": 7131 }, { "epoch": 0.6040228668219352, "grad_norm": 1.5771218724964235, "learning_rate": 3.5783013386629203e-06, "loss": 0.5862, "step": 7132 }, { "epoch": 0.6041075587550285, "grad_norm": 1.3269517336365724, "learning_rate": 3.5769863734100197e-06, "loss": 0.6653, "step": 7133 }, { "epoch": 0.604192250688122, "grad_norm": 1.426998279424447, "learning_rate": 3.5756715152464316e-06, "loss": 0.6455, "step": 7134 }, { "epoch": 0.6042769426212153, "grad_norm": 1.2723728850129454, "learning_rate": 3.574356764271102e-06, "loss": 0.686, "step": 7135 }, { "epoch": 0.6043616345543087, "grad_norm": 1.3011359265851659, "learning_rate": 3.5730421205829745e-06, "loss": 0.6747, "step": 7136 }, { "epoch": 0.604446326487402, "grad_norm": 1.4144966256798412, "learning_rate": 3.5717275842809855e-06, "loss": 0.6373, "step": 7137 }, { "epoch": 0.6045310184204955, "grad_norm": 1.4855073278877335, "learning_rate": 3.5704131554640577e-06, "loss": 0.6318, "step": 7138 }, { "epoch": 0.6046157103535889, "grad_norm": 1.6167389682362037, "learning_rate": 3.5690988342311105e-06, "loss": 0.6708, "step": 7139 }, { "epoch": 0.6047004022866822, "grad_norm": 1.2177682931769094, "learning_rate": 3.567784620681053e-06, "loss": 0.6563, "step": 7140 }, { "epoch": 0.6047850942197756, "grad_norm": 1.8114222122124817, "learning_rate": 3.566470514912786e-06, "loss": 0.5655, "step": 7141 }, { "epoch": 0.6048697861528689, "grad_norm": 1.273605757213066, "learning_rate": 3.5651565170252055e-06, "loss": 0.6113, "step": 7142 }, { "epoch": 0.6049544780859624, "grad_norm": 1.685762906897615, "learning_rate": 3.5638426271171923e-06, "loss": 0.6587, "step": 7143 }, { "epoch": 0.6050391700190557, "grad_norm": 1.4078145672185063, "learning_rate": 3.562528845287627e-06, "loss": 0.6489, "step": 7144 }, { "epoch": 0.6051238619521491, "grad_norm": 1.6589621777928505, "learning_rate": 3.5612151716353806e-06, "loss": 0.662, "step": 7145 }, { "epoch": 0.6052085538852424, "grad_norm": 1.4009378726641728, "learning_rate": 3.5599016062593082e-06, "loss": 0.6099, "step": 7146 }, { "epoch": 0.6052932458183358, "grad_norm": 2.7099425717539103, "learning_rate": 3.558588149258267e-06, "loss": 0.6148, "step": 7147 }, { "epoch": 0.6053779377514292, "grad_norm": 1.3021518623701598, "learning_rate": 3.5572748007310994e-06, "loss": 0.6049, "step": 7148 }, { "epoch": 0.6054626296845226, "grad_norm": 1.5017172184031802, "learning_rate": 3.555961560776642e-06, "loss": 0.6262, "step": 7149 }, { "epoch": 0.6055473216176159, "grad_norm": 1.2886327994166549, "learning_rate": 3.5546484294937254e-06, "loss": 0.6677, "step": 7150 }, { "epoch": 0.6056320135507093, "grad_norm": 1.5267028942083753, "learning_rate": 3.5533354069811664e-06, "loss": 0.6091, "step": 7151 }, { "epoch": 0.6057167054838026, "grad_norm": 1.6701692485356334, "learning_rate": 3.5520224933377813e-06, "loss": 0.6663, "step": 7152 }, { "epoch": 0.6058013974168961, "grad_norm": 1.4515937604251024, "learning_rate": 3.550709688662367e-06, "loss": 0.6104, "step": 7153 }, { "epoch": 0.6058860893499894, "grad_norm": 1.716035734982714, "learning_rate": 3.5493969930537255e-06, "loss": 0.6665, "step": 7154 }, { "epoch": 0.6059707812830828, "grad_norm": 1.6125703042484112, "learning_rate": 3.5480844066106425e-06, "loss": 0.6022, "step": 7155 }, { "epoch": 0.6060554732161761, "grad_norm": 1.4478158347394139, "learning_rate": 3.546771929431894e-06, "loss": 0.6575, "step": 7156 }, { "epoch": 0.6061401651492695, "grad_norm": 1.9729178208535436, "learning_rate": 3.5454595616162524e-06, "loss": 0.6568, "step": 7157 }, { "epoch": 0.606224857082363, "grad_norm": 0.6066270000374218, "learning_rate": 3.5441473032624835e-06, "loss": 0.8506, "step": 7158 }, { "epoch": 0.6063095490154563, "grad_norm": 1.4904569574649285, "learning_rate": 3.542835154469337e-06, "loss": 0.6199, "step": 7159 }, { "epoch": 0.6063942409485497, "grad_norm": 1.7412200126804915, "learning_rate": 3.5415231153355635e-06, "loss": 0.6876, "step": 7160 }, { "epoch": 0.606478932881643, "grad_norm": 1.3779448885965437, "learning_rate": 3.5402111859598965e-06, "loss": 0.6451, "step": 7161 }, { "epoch": 0.6065636248147364, "grad_norm": 1.0747768258669848, "learning_rate": 3.5388993664410676e-06, "loss": 0.6296, "step": 7162 }, { "epoch": 0.6066483167478298, "grad_norm": 0.5829303262296618, "learning_rate": 3.5375876568778e-06, "loss": 0.8189, "step": 7163 }, { "epoch": 0.6067330086809232, "grad_norm": 1.9077116924245032, "learning_rate": 3.536276057368803e-06, "loss": 0.599, "step": 7164 }, { "epoch": 0.6068177006140165, "grad_norm": 1.735445175589435, "learning_rate": 3.534964568012784e-06, "loss": 0.6064, "step": 7165 }, { "epoch": 0.6069023925471099, "grad_norm": 1.4699037372513184, "learning_rate": 3.5336531889084413e-06, "loss": 0.6555, "step": 7166 }, { "epoch": 0.6069870844802032, "grad_norm": 1.3822296269636722, "learning_rate": 3.5323419201544582e-06, "loss": 0.6617, "step": 7167 }, { "epoch": 0.6070717764132967, "grad_norm": 1.425076148147822, "learning_rate": 3.5310307618495192e-06, "loss": 0.6282, "step": 7168 }, { "epoch": 0.60715646834639, "grad_norm": 1.3834909303989218, "learning_rate": 3.5297197140922923e-06, "loss": 0.6403, "step": 7169 }, { "epoch": 0.6072411602794834, "grad_norm": 1.3445911330400564, "learning_rate": 3.5284087769814423e-06, "loss": 0.6089, "step": 7170 }, { "epoch": 0.6073258522125767, "grad_norm": 1.317166361040454, "learning_rate": 3.5270979506156257e-06, "loss": 0.643, "step": 7171 }, { "epoch": 0.6074105441456701, "grad_norm": 1.4662587493671417, "learning_rate": 3.5257872350934863e-06, "loss": 0.6728, "step": 7172 }, { "epoch": 0.6074952360787635, "grad_norm": 1.35080693231094, "learning_rate": 3.524476630513664e-06, "loss": 0.6138, "step": 7173 }, { "epoch": 0.6075799280118569, "grad_norm": 1.2742500644520607, "learning_rate": 3.523166136974789e-06, "loss": 0.6334, "step": 7174 }, { "epoch": 0.6076646199449502, "grad_norm": 2.1937497117159768, "learning_rate": 3.521855754575482e-06, "loss": 0.6054, "step": 7175 }, { "epoch": 0.6077493118780436, "grad_norm": 1.5269047053288733, "learning_rate": 3.5205454834143587e-06, "loss": 0.6386, "step": 7176 }, { "epoch": 0.607834003811137, "grad_norm": 1.379846541447463, "learning_rate": 3.51923532359002e-06, "loss": 0.6296, "step": 7177 }, { "epoch": 0.6079186957442304, "grad_norm": 1.4702624823715977, "learning_rate": 3.517925275201063e-06, "loss": 0.7187, "step": 7178 }, { "epoch": 0.6080033876773238, "grad_norm": 1.327382695463799, "learning_rate": 3.5166153383460793e-06, "loss": 0.6137, "step": 7179 }, { "epoch": 0.6080880796104171, "grad_norm": 1.4866575248101115, "learning_rate": 3.515305513123645e-06, "loss": 0.6832, "step": 7180 }, { "epoch": 0.6081727715435105, "grad_norm": 1.4640604646758937, "learning_rate": 3.513995799632333e-06, "loss": 0.6205, "step": 7181 }, { "epoch": 0.6082574634766038, "grad_norm": 1.3321054261827692, "learning_rate": 3.512686197970706e-06, "loss": 0.6786, "step": 7182 }, { "epoch": 0.6083421554096973, "grad_norm": 1.8328472197354404, "learning_rate": 3.511376708237317e-06, "loss": 0.5975, "step": 7183 }, { "epoch": 0.6084268473427906, "grad_norm": 1.4575432921484404, "learning_rate": 3.510067330530715e-06, "loss": 0.6573, "step": 7184 }, { "epoch": 0.608511539275884, "grad_norm": 1.4179956592816438, "learning_rate": 3.5087580649494355e-06, "loss": 0.6055, "step": 7185 }, { "epoch": 0.6085962312089773, "grad_norm": 1.3661650039304043, "learning_rate": 3.5074489115920076e-06, "loss": 0.6139, "step": 7186 }, { "epoch": 0.6086809231420707, "grad_norm": 1.6578784751524271, "learning_rate": 3.5061398705569544e-06, "loss": 0.5871, "step": 7187 }, { "epoch": 0.6087656150751641, "grad_norm": 1.2756793681131637, "learning_rate": 3.504830941942783e-06, "loss": 0.6326, "step": 7188 }, { "epoch": 0.6088503070082575, "grad_norm": 1.4158495629396415, "learning_rate": 3.5035221258480046e-06, "loss": 0.5939, "step": 7189 }, { "epoch": 0.6089349989413508, "grad_norm": 1.4452458130970731, "learning_rate": 3.5022134223711075e-06, "loss": 0.6096, "step": 7190 }, { "epoch": 0.6090196908744442, "grad_norm": 1.4035206572109895, "learning_rate": 3.5009048316105817e-06, "loss": 0.6261, "step": 7191 }, { "epoch": 0.6091043828075375, "grad_norm": 1.5990743492616255, "learning_rate": 3.499596353664906e-06, "loss": 0.687, "step": 7192 }, { "epoch": 0.609189074740631, "grad_norm": 1.2806599709716207, "learning_rate": 3.4982879886325495e-06, "loss": 0.5968, "step": 7193 }, { "epoch": 0.6092737666737243, "grad_norm": 1.4911455171227523, "learning_rate": 3.4969797366119736e-06, "loss": 0.6565, "step": 7194 }, { "epoch": 0.6093584586068177, "grad_norm": 1.4056093330070916, "learning_rate": 3.495671597701632e-06, "loss": 0.6549, "step": 7195 }, { "epoch": 0.609443150539911, "grad_norm": 0.5893349784082763, "learning_rate": 3.494363571999968e-06, "loss": 0.8685, "step": 7196 }, { "epoch": 0.6095278424730044, "grad_norm": 1.2172136027696947, "learning_rate": 3.49305565960542e-06, "loss": 0.6622, "step": 7197 }, { "epoch": 0.6096125344060979, "grad_norm": 1.479806597200567, "learning_rate": 3.4917478606164095e-06, "loss": 0.6554, "step": 7198 }, { "epoch": 0.6096972263391912, "grad_norm": 1.5617151109329803, "learning_rate": 3.4904401751313606e-06, "loss": 0.5816, "step": 7199 }, { "epoch": 0.6097819182722846, "grad_norm": 1.2777730008581745, "learning_rate": 3.4891326032486838e-06, "loss": 0.6615, "step": 7200 }, { "epoch": 0.6098666102053779, "grad_norm": 1.3585865473664724, "learning_rate": 3.487825145066777e-06, "loss": 0.629, "step": 7201 }, { "epoch": 0.6099513021384713, "grad_norm": 0.606522837830528, "learning_rate": 3.4865178006840356e-06, "loss": 0.8419, "step": 7202 }, { "epoch": 0.6100359940715647, "grad_norm": 1.4337721290316014, "learning_rate": 3.485210570198845e-06, "loss": 0.6277, "step": 7203 }, { "epoch": 0.6101206860046581, "grad_norm": 1.786670046814743, "learning_rate": 3.483903453709579e-06, "loss": 0.6366, "step": 7204 }, { "epoch": 0.6102053779377514, "grad_norm": 1.2140454387299258, "learning_rate": 3.482596451314607e-06, "loss": 0.6404, "step": 7205 }, { "epoch": 0.6102900698708448, "grad_norm": 1.9030823069570517, "learning_rate": 3.481289563112287e-06, "loss": 0.6438, "step": 7206 }, { "epoch": 0.6103747618039381, "grad_norm": 1.2341357815120335, "learning_rate": 3.4799827892009686e-06, "loss": 0.6425, "step": 7207 }, { "epoch": 0.6104594537370316, "grad_norm": 2.037423923168239, "learning_rate": 3.478676129678996e-06, "loss": 0.6642, "step": 7208 }, { "epoch": 0.6105441456701249, "grad_norm": 1.497767409574184, "learning_rate": 3.4773695846446977e-06, "loss": 0.6391, "step": 7209 }, { "epoch": 0.6106288376032183, "grad_norm": 1.5693045373699794, "learning_rate": 3.476063154196402e-06, "loss": 0.6167, "step": 7210 }, { "epoch": 0.6107135295363116, "grad_norm": 1.3039837139308632, "learning_rate": 3.4747568384324252e-06, "loss": 0.6428, "step": 7211 }, { "epoch": 0.610798221469405, "grad_norm": 1.2813429073364102, "learning_rate": 3.473450637451071e-06, "loss": 0.6409, "step": 7212 }, { "epoch": 0.6108829134024985, "grad_norm": 2.1083843517697063, "learning_rate": 3.4721445513506413e-06, "loss": 0.6471, "step": 7213 }, { "epoch": 0.6109676053355918, "grad_norm": 1.3463524099167319, "learning_rate": 3.470838580229423e-06, "loss": 0.684, "step": 7214 }, { "epoch": 0.6110522972686852, "grad_norm": 1.3064604189285205, "learning_rate": 3.469532724185699e-06, "loss": 0.5986, "step": 7215 }, { "epoch": 0.6111369892017785, "grad_norm": 0.6590192046998661, "learning_rate": 3.4682269833177422e-06, "loss": 0.8704, "step": 7216 }, { "epoch": 0.6112216811348719, "grad_norm": 1.4470232843899042, "learning_rate": 3.466921357723816e-06, "loss": 0.6245, "step": 7217 }, { "epoch": 0.6113063730679653, "grad_norm": 1.519605763986508, "learning_rate": 3.4656158475021752e-06, "loss": 0.6855, "step": 7218 }, { "epoch": 0.6113910650010587, "grad_norm": 0.7090665878520555, "learning_rate": 3.4643104527510673e-06, "loss": 0.853, "step": 7219 }, { "epoch": 0.611475756934152, "grad_norm": 1.6039145352367092, "learning_rate": 3.4630051735687294e-06, "loss": 0.6711, "step": 7220 }, { "epoch": 0.6115604488672454, "grad_norm": 1.5130634494669597, "learning_rate": 3.461700010053393e-06, "loss": 0.6663, "step": 7221 }, { "epoch": 0.6116451408003387, "grad_norm": 1.5280004833673821, "learning_rate": 3.460394962303274e-06, "loss": 0.6292, "step": 7222 }, { "epoch": 0.6117298327334322, "grad_norm": 1.3464082584518766, "learning_rate": 3.4590900304165853e-06, "loss": 0.6373, "step": 7223 }, { "epoch": 0.6118145246665255, "grad_norm": 1.6973619364029089, "learning_rate": 3.4577852144915354e-06, "loss": 0.5971, "step": 7224 }, { "epoch": 0.6118992165996189, "grad_norm": 1.202650824319425, "learning_rate": 3.456480514626312e-06, "loss": 0.6252, "step": 7225 }, { "epoch": 0.6119839085327122, "grad_norm": 1.3071116228149078, "learning_rate": 3.4551759309191046e-06, "loss": 0.6556, "step": 7226 }, { "epoch": 0.6120686004658056, "grad_norm": 1.4515906894543058, "learning_rate": 3.453871463468087e-06, "loss": 0.6473, "step": 7227 }, { "epoch": 0.612153292398899, "grad_norm": 1.3369572980643238, "learning_rate": 3.452567112371429e-06, "loss": 0.5835, "step": 7228 }, { "epoch": 0.6122379843319924, "grad_norm": 1.602134026229549, "learning_rate": 3.451262877727291e-06, "loss": 0.6192, "step": 7229 }, { "epoch": 0.6123226762650857, "grad_norm": 1.5161016564184604, "learning_rate": 3.449958759633821e-06, "loss": 0.646, "step": 7230 }, { "epoch": 0.6124073681981791, "grad_norm": 1.9672162655812635, "learning_rate": 3.448654758189163e-06, "loss": 0.6201, "step": 7231 }, { "epoch": 0.6124920601312724, "grad_norm": 1.4245178051578127, "learning_rate": 3.447350873491451e-06, "loss": 0.6348, "step": 7232 }, { "epoch": 0.6125767520643659, "grad_norm": 2.430242786147571, "learning_rate": 3.4460471056388058e-06, "loss": 0.6788, "step": 7233 }, { "epoch": 0.6126614439974593, "grad_norm": 1.3877616171403986, "learning_rate": 3.4447434547293446e-06, "loss": 0.5952, "step": 7234 }, { "epoch": 0.6127461359305526, "grad_norm": 1.5178833316177673, "learning_rate": 3.4434399208611736e-06, "loss": 0.6359, "step": 7235 }, { "epoch": 0.612830827863646, "grad_norm": 1.4373083287292867, "learning_rate": 3.44213650413239e-06, "loss": 0.6599, "step": 7236 }, { "epoch": 0.6129155197967393, "grad_norm": 1.2158731557285647, "learning_rate": 3.4408332046410853e-06, "loss": 0.6275, "step": 7237 }, { "epoch": 0.6130002117298328, "grad_norm": 1.288523109633251, "learning_rate": 3.4395300224853373e-06, "loss": 0.6252, "step": 7238 }, { "epoch": 0.6130849036629261, "grad_norm": 1.3165646694035011, "learning_rate": 3.4382269577632176e-06, "loss": 0.6857, "step": 7239 }, { "epoch": 0.6131695955960195, "grad_norm": 1.5858629764546233, "learning_rate": 3.436924010572791e-06, "loss": 0.6395, "step": 7240 }, { "epoch": 0.6132542875291128, "grad_norm": 1.6580936780633133, "learning_rate": 3.4356211810121086e-06, "loss": 0.6105, "step": 7241 }, { "epoch": 0.6133389794622063, "grad_norm": 1.1766516234624405, "learning_rate": 3.4343184691792176e-06, "loss": 0.6284, "step": 7242 }, { "epoch": 0.6134236713952996, "grad_norm": 0.6326948615987424, "learning_rate": 3.433015875172151e-06, "loss": 0.8446, "step": 7243 }, { "epoch": 0.613508363328393, "grad_norm": 1.4338404758485437, "learning_rate": 3.4317133990889356e-06, "loss": 0.6612, "step": 7244 }, { "epoch": 0.6135930552614863, "grad_norm": 1.2596461909267282, "learning_rate": 3.430411041027595e-06, "loss": 0.625, "step": 7245 }, { "epoch": 0.6136777471945797, "grad_norm": 0.6324014358223836, "learning_rate": 3.429108801086132e-06, "loss": 0.7974, "step": 7246 }, { "epoch": 0.6137624391276731, "grad_norm": 1.6435773121058002, "learning_rate": 3.4278066793625507e-06, "loss": 0.6483, "step": 7247 }, { "epoch": 0.6138471310607665, "grad_norm": 2.3470244278143713, "learning_rate": 3.4265046759548436e-06, "loss": 0.6349, "step": 7248 }, { "epoch": 0.6139318229938598, "grad_norm": 1.1440799116618523, "learning_rate": 3.42520279096099e-06, "loss": 0.5703, "step": 7249 }, { "epoch": 0.6140165149269532, "grad_norm": 1.732174183539111, "learning_rate": 3.423901024478966e-06, "loss": 0.5876, "step": 7250 }, { "epoch": 0.6141012068600465, "grad_norm": 1.1833699106449356, "learning_rate": 3.422599376606735e-06, "loss": 0.6628, "step": 7251 }, { "epoch": 0.61418589879314, "grad_norm": 2.193632379102089, "learning_rate": 3.421297847442254e-06, "loss": 0.6437, "step": 7252 }, { "epoch": 0.6142705907262334, "grad_norm": 1.2041607319862204, "learning_rate": 3.4199964370834717e-06, "loss": 0.6164, "step": 7253 }, { "epoch": 0.6143552826593267, "grad_norm": 1.9315909200607788, "learning_rate": 3.41869514562832e-06, "loss": 0.6004, "step": 7254 }, { "epoch": 0.6144399745924201, "grad_norm": 1.3260034352655903, "learning_rate": 3.417393973174736e-06, "loss": 0.6712, "step": 7255 }, { "epoch": 0.6145246665255134, "grad_norm": 1.4249748336034658, "learning_rate": 3.416092919820633e-06, "loss": 0.6447, "step": 7256 }, { "epoch": 0.6146093584586069, "grad_norm": 1.4613307425017341, "learning_rate": 3.4147919856639255e-06, "loss": 0.6513, "step": 7257 }, { "epoch": 0.6146940503917002, "grad_norm": 1.4397742698102332, "learning_rate": 3.4134911708025167e-06, "loss": 0.6384, "step": 7258 }, { "epoch": 0.6147787423247936, "grad_norm": 0.6308748616490222, "learning_rate": 3.412190475334296e-06, "loss": 0.8423, "step": 7259 }, { "epoch": 0.6148634342578869, "grad_norm": 1.3164487385162973, "learning_rate": 3.4108898993571503e-06, "loss": 0.6218, "step": 7260 }, { "epoch": 0.6149481261909803, "grad_norm": 1.4685805765340005, "learning_rate": 3.4095894429689557e-06, "loss": 0.6545, "step": 7261 }, { "epoch": 0.6150328181240737, "grad_norm": 1.285115932894574, "learning_rate": 3.4082891062675766e-06, "loss": 0.6442, "step": 7262 }, { "epoch": 0.6151175100571671, "grad_norm": 1.5622615520785956, "learning_rate": 3.4069888893508724e-06, "loss": 0.6294, "step": 7263 }, { "epoch": 0.6152022019902604, "grad_norm": 1.4233735392523916, "learning_rate": 3.405688792316686e-06, "loss": 0.643, "step": 7264 }, { "epoch": 0.6152868939233538, "grad_norm": 1.3460717460397216, "learning_rate": 3.4043888152628624e-06, "loss": 0.6213, "step": 7265 }, { "epoch": 0.6153715858564471, "grad_norm": 1.2992663052674, "learning_rate": 3.4030889582872306e-06, "loss": 0.6791, "step": 7266 }, { "epoch": 0.6154562777895406, "grad_norm": 1.249786344153323, "learning_rate": 3.4017892214876093e-06, "loss": 0.6339, "step": 7267 }, { "epoch": 0.615540969722634, "grad_norm": 1.5626869251945559, "learning_rate": 3.4004896049618117e-06, "loss": 0.6545, "step": 7268 }, { "epoch": 0.6156256616557273, "grad_norm": 1.264267249191066, "learning_rate": 3.3991901088076422e-06, "loss": 0.6016, "step": 7269 }, { "epoch": 0.6157103535888206, "grad_norm": 1.2714334738576885, "learning_rate": 3.397890733122893e-06, "loss": 0.6674, "step": 7270 }, { "epoch": 0.615795045521914, "grad_norm": 2.89050436454833, "learning_rate": 3.39659147800535e-06, "loss": 0.6438, "step": 7271 }, { "epoch": 0.6158797374550075, "grad_norm": 1.2991324882424136, "learning_rate": 3.3952923435527883e-06, "loss": 0.6199, "step": 7272 }, { "epoch": 0.6159644293881008, "grad_norm": 1.2456138817793438, "learning_rate": 3.3939933298629752e-06, "loss": 0.6714, "step": 7273 }, { "epoch": 0.6160491213211942, "grad_norm": 2.4303357567492805, "learning_rate": 3.39269443703367e-06, "loss": 0.6147, "step": 7274 }, { "epoch": 0.6161338132542875, "grad_norm": 1.4463031570784852, "learning_rate": 3.391395665162617e-06, "loss": 0.6852, "step": 7275 }, { "epoch": 0.6162185051873809, "grad_norm": 2.2860318958407593, "learning_rate": 3.3900970143475583e-06, "loss": 0.6331, "step": 7276 }, { "epoch": 0.6163031971204743, "grad_norm": 2.017342848730452, "learning_rate": 3.3887984846862264e-06, "loss": 0.602, "step": 7277 }, { "epoch": 0.6163878890535677, "grad_norm": 2.3414610051409523, "learning_rate": 3.387500076276338e-06, "loss": 0.6247, "step": 7278 }, { "epoch": 0.616472580986661, "grad_norm": 1.4829342474342975, "learning_rate": 3.386201789215609e-06, "loss": 0.6476, "step": 7279 }, { "epoch": 0.6165572729197544, "grad_norm": 1.4192922923994158, "learning_rate": 3.3849036236017395e-06, "loss": 0.6737, "step": 7280 }, { "epoch": 0.6166419648528477, "grad_norm": 1.463483300384885, "learning_rate": 3.383605579532425e-06, "loss": 0.6359, "step": 7281 }, { "epoch": 0.6167266567859412, "grad_norm": 1.3097395042669846, "learning_rate": 3.3823076571053505e-06, "loss": 0.6497, "step": 7282 }, { "epoch": 0.6168113487190345, "grad_norm": 1.13571125634901, "learning_rate": 3.3810098564181904e-06, "loss": 0.6332, "step": 7283 }, { "epoch": 0.6168960406521279, "grad_norm": 0.6003188430365286, "learning_rate": 3.3797121775686107e-06, "loss": 0.8509, "step": 7284 }, { "epoch": 0.6169807325852212, "grad_norm": 1.6983163464151376, "learning_rate": 3.3784146206542713e-06, "loss": 0.6443, "step": 7285 }, { "epoch": 0.6170654245183146, "grad_norm": 1.5284138235044924, "learning_rate": 3.377117185772817e-06, "loss": 0.6239, "step": 7286 }, { "epoch": 0.617150116451408, "grad_norm": 1.3814182922319298, "learning_rate": 3.37581987302189e-06, "loss": 0.6289, "step": 7287 }, { "epoch": 0.6172348083845014, "grad_norm": 1.4125710762120358, "learning_rate": 3.3745226824991162e-06, "loss": 0.5855, "step": 7288 }, { "epoch": 0.6173195003175947, "grad_norm": 1.3662433942560646, "learning_rate": 3.373225614302117e-06, "loss": 0.6324, "step": 7289 }, { "epoch": 0.6174041922506881, "grad_norm": 2.3717365752293236, "learning_rate": 3.3719286685285067e-06, "loss": 0.6553, "step": 7290 }, { "epoch": 0.6174888841837815, "grad_norm": 1.4651876590863353, "learning_rate": 3.3706318452758835e-06, "loss": 0.607, "step": 7291 }, { "epoch": 0.6175735761168749, "grad_norm": 1.7101247949189922, "learning_rate": 3.369335144641843e-06, "loss": 0.6631, "step": 7292 }, { "epoch": 0.6176582680499683, "grad_norm": 1.484395837034654, "learning_rate": 3.368038566723967e-06, "loss": 0.6762, "step": 7293 }, { "epoch": 0.6177429599830616, "grad_norm": 1.8231560124513535, "learning_rate": 3.36674211161983e-06, "loss": 0.6485, "step": 7294 }, { "epoch": 0.617827651916155, "grad_norm": 1.3675997637283737, "learning_rate": 3.365445779426999e-06, "loss": 0.6567, "step": 7295 }, { "epoch": 0.6179123438492483, "grad_norm": 1.4282048547584953, "learning_rate": 3.364149570243027e-06, "loss": 0.6142, "step": 7296 }, { "epoch": 0.6179970357823418, "grad_norm": 1.3194159177478009, "learning_rate": 3.3628534841654627e-06, "loss": 0.6326, "step": 7297 }, { "epoch": 0.6180817277154351, "grad_norm": 1.4671833436901074, "learning_rate": 3.3615575212918445e-06, "loss": 0.6384, "step": 7298 }, { "epoch": 0.6181664196485285, "grad_norm": 1.794196068405151, "learning_rate": 3.3602616817196964e-06, "loss": 0.7049, "step": 7299 }, { "epoch": 0.6182511115816218, "grad_norm": 1.4031576838943334, "learning_rate": 3.3589659655465413e-06, "loss": 0.6435, "step": 7300 }, { "epoch": 0.6183358035147152, "grad_norm": 2.822983473607409, "learning_rate": 3.3576703728698856e-06, "loss": 0.6586, "step": 7301 }, { "epoch": 0.6184204954478086, "grad_norm": 1.7146926269740213, "learning_rate": 3.3563749037872306e-06, "loss": 0.6987, "step": 7302 }, { "epoch": 0.618505187380902, "grad_norm": 0.5972789431034804, "learning_rate": 3.3550795583960693e-06, "loss": 0.8449, "step": 7303 }, { "epoch": 0.6185898793139953, "grad_norm": 1.5390329955882218, "learning_rate": 3.3537843367938794e-06, "loss": 0.628, "step": 7304 }, { "epoch": 0.6186745712470887, "grad_norm": 18.98601238175398, "learning_rate": 3.352489239078136e-06, "loss": 0.6944, "step": 7305 }, { "epoch": 0.618759263180182, "grad_norm": 1.9305438308551788, "learning_rate": 3.3511942653463027e-06, "loss": 0.6186, "step": 7306 }, { "epoch": 0.6188439551132755, "grad_norm": 0.6227691146488068, "learning_rate": 3.34989941569583e-06, "loss": 0.8657, "step": 7307 }, { "epoch": 0.6189286470463689, "grad_norm": 0.619257366307695, "learning_rate": 3.3486046902241663e-06, "loss": 0.8342, "step": 7308 }, { "epoch": 0.6190133389794622, "grad_norm": 1.19035010941224, "learning_rate": 3.3473100890287426e-06, "loss": 0.6271, "step": 7309 }, { "epoch": 0.6190980309125556, "grad_norm": 1.3017636907936028, "learning_rate": 3.346015612206984e-06, "loss": 0.6878, "step": 7310 }, { "epoch": 0.6191827228456489, "grad_norm": 1.6438937027734575, "learning_rate": 3.3447212598563127e-06, "loss": 0.6405, "step": 7311 }, { "epoch": 0.6192674147787424, "grad_norm": 1.2172992228350064, "learning_rate": 3.343427032074129e-06, "loss": 0.5838, "step": 7312 }, { "epoch": 0.6193521067118357, "grad_norm": 2.6422573542785113, "learning_rate": 3.3421329289578343e-06, "loss": 0.6327, "step": 7313 }, { "epoch": 0.6194367986449291, "grad_norm": 1.39940705236478, "learning_rate": 3.3408389506048157e-06, "loss": 0.6055, "step": 7314 }, { "epoch": 0.6195214905780224, "grad_norm": 1.2739938203511019, "learning_rate": 3.3395450971124512e-06, "loss": 0.5956, "step": 7315 }, { "epoch": 0.6196061825111158, "grad_norm": 1.3108215168730093, "learning_rate": 3.3382513685781115e-06, "loss": 0.5967, "step": 7316 }, { "epoch": 0.6196908744442092, "grad_norm": 1.6918986810957395, "learning_rate": 3.336957765099154e-06, "loss": 0.6003, "step": 7317 }, { "epoch": 0.6197755663773026, "grad_norm": 1.3377378683480625, "learning_rate": 3.3356642867729315e-06, "loss": 0.6322, "step": 7318 }, { "epoch": 0.6198602583103959, "grad_norm": 1.3777848006605766, "learning_rate": 3.3343709336967868e-06, "loss": 0.6479, "step": 7319 }, { "epoch": 0.6199449502434893, "grad_norm": 1.5693725688789275, "learning_rate": 3.3330777059680454e-06, "loss": 0.6757, "step": 7320 }, { "epoch": 0.6200296421765826, "grad_norm": 1.415552763561187, "learning_rate": 3.331784603684035e-06, "loss": 0.5836, "step": 7321 }, { "epoch": 0.6201143341096761, "grad_norm": 1.5037036274192557, "learning_rate": 3.330491626942069e-06, "loss": 0.6437, "step": 7322 }, { "epoch": 0.6201990260427694, "grad_norm": 0.6109581817761612, "learning_rate": 3.3291987758394462e-06, "loss": 0.8473, "step": 7323 }, { "epoch": 0.6202837179758628, "grad_norm": 1.540027878467031, "learning_rate": 3.327906050473464e-06, "loss": 0.6509, "step": 7324 }, { "epoch": 0.6203684099089561, "grad_norm": 1.7534649754201113, "learning_rate": 3.3266134509414046e-06, "loss": 0.6273, "step": 7325 }, { "epoch": 0.6204531018420495, "grad_norm": 1.3244654819742248, "learning_rate": 3.3253209773405436e-06, "loss": 0.6537, "step": 7326 }, { "epoch": 0.620537793775143, "grad_norm": 1.5692456995006614, "learning_rate": 3.3240286297681486e-06, "loss": 0.6382, "step": 7327 }, { "epoch": 0.6206224857082363, "grad_norm": 1.4682110225881975, "learning_rate": 3.3227364083214718e-06, "loss": 0.6077, "step": 7328 }, { "epoch": 0.6207071776413297, "grad_norm": 1.411708314700925, "learning_rate": 3.3214443130977648e-06, "loss": 0.6413, "step": 7329 }, { "epoch": 0.620791869574423, "grad_norm": 1.6572892865700004, "learning_rate": 3.3201523441942585e-06, "loss": 0.6559, "step": 7330 }, { "epoch": 0.6208765615075164, "grad_norm": 1.4586522885620423, "learning_rate": 3.318860501708184e-06, "loss": 0.667, "step": 7331 }, { "epoch": 0.6209612534406098, "grad_norm": 0.5975582479236341, "learning_rate": 3.3175687857367615e-06, "loss": 0.8263, "step": 7332 }, { "epoch": 0.6210459453737032, "grad_norm": 1.3557932659323997, "learning_rate": 3.3162771963771946e-06, "loss": 0.6119, "step": 7333 }, { "epoch": 0.6211306373067965, "grad_norm": 1.3097754913556414, "learning_rate": 3.3149857337266842e-06, "loss": 0.5932, "step": 7334 }, { "epoch": 0.6212153292398899, "grad_norm": 1.4848240867145999, "learning_rate": 3.313694397882421e-06, "loss": 0.5919, "step": 7335 }, { "epoch": 0.6213000211729832, "grad_norm": 1.4970126442891403, "learning_rate": 3.312403188941583e-06, "loss": 0.655, "step": 7336 }, { "epoch": 0.6213847131060767, "grad_norm": 2.2516414384646195, "learning_rate": 3.311112107001342e-06, "loss": 0.6332, "step": 7337 }, { "epoch": 0.62146940503917, "grad_norm": 1.7974733916129253, "learning_rate": 3.309821152158857e-06, "loss": 0.6338, "step": 7338 }, { "epoch": 0.6215540969722634, "grad_norm": 1.3502403389587565, "learning_rate": 3.3085303245112797e-06, "loss": 0.6071, "step": 7339 }, { "epoch": 0.6216387889053567, "grad_norm": 2.026552286229094, "learning_rate": 3.3072396241557554e-06, "loss": 0.6849, "step": 7340 }, { "epoch": 0.6217234808384501, "grad_norm": 3.6852388082753644, "learning_rate": 3.3059490511894094e-06, "loss": 0.6659, "step": 7341 }, { "epoch": 0.6218081727715435, "grad_norm": 1.4633329131454138, "learning_rate": 3.304658605709369e-06, "loss": 0.6479, "step": 7342 }, { "epoch": 0.6218928647046369, "grad_norm": 2.5454856128224645, "learning_rate": 3.303368287812747e-06, "loss": 0.6087, "step": 7343 }, { "epoch": 0.6219775566377302, "grad_norm": 1.8455184728258482, "learning_rate": 3.302078097596644e-06, "loss": 0.5982, "step": 7344 }, { "epoch": 0.6220622485708236, "grad_norm": 1.533650934778942, "learning_rate": 3.300788035158156e-06, "loss": 0.6231, "step": 7345 }, { "epoch": 0.6221469405039171, "grad_norm": 1.911336372936179, "learning_rate": 3.299498100594365e-06, "loss": 0.5787, "step": 7346 }, { "epoch": 0.6222316324370104, "grad_norm": 1.5336215388683274, "learning_rate": 3.298208294002347e-06, "loss": 0.5863, "step": 7347 }, { "epoch": 0.6223163243701038, "grad_norm": 1.7830111680328384, "learning_rate": 3.2969186154791666e-06, "loss": 0.6062, "step": 7348 }, { "epoch": 0.6224010163031971, "grad_norm": 1.3795397211723919, "learning_rate": 3.295629065121878e-06, "loss": 0.6522, "step": 7349 }, { "epoch": 0.6224857082362905, "grad_norm": 1.336947516260465, "learning_rate": 3.2943396430275276e-06, "loss": 0.6416, "step": 7350 }, { "epoch": 0.6225704001693839, "grad_norm": 1.4033111065541946, "learning_rate": 3.2930503492931514e-06, "loss": 0.6441, "step": 7351 }, { "epoch": 0.6226550921024773, "grad_norm": 1.5044910992923528, "learning_rate": 3.291761184015774e-06, "loss": 0.6076, "step": 7352 }, { "epoch": 0.6227397840355706, "grad_norm": 1.3159480223791227, "learning_rate": 3.290472147292416e-06, "loss": 0.6267, "step": 7353 }, { "epoch": 0.622824475968664, "grad_norm": 2.1994489199472493, "learning_rate": 3.2891832392200783e-06, "loss": 0.6711, "step": 7354 }, { "epoch": 0.6229091679017573, "grad_norm": 1.305799039748113, "learning_rate": 3.287894459895761e-06, "loss": 0.6155, "step": 7355 }, { "epoch": 0.6229938598348508, "grad_norm": 1.2441391081509277, "learning_rate": 3.2866058094164537e-06, "loss": 0.6462, "step": 7356 }, { "epoch": 0.6230785517679441, "grad_norm": 2.1940689091788865, "learning_rate": 3.2853172878791307e-06, "loss": 0.6157, "step": 7357 }, { "epoch": 0.6231632437010375, "grad_norm": 1.217523015273846, "learning_rate": 3.2840288953807618e-06, "loss": 0.5782, "step": 7358 }, { "epoch": 0.6232479356341308, "grad_norm": 1.9534977650909375, "learning_rate": 3.282740632018305e-06, "loss": 0.5894, "step": 7359 }, { "epoch": 0.6233326275672242, "grad_norm": 1.7325520820812297, "learning_rate": 3.2814524978887084e-06, "loss": 0.6518, "step": 7360 }, { "epoch": 0.6234173195003176, "grad_norm": 1.4686286981121035, "learning_rate": 3.280164493088912e-06, "loss": 0.6276, "step": 7361 }, { "epoch": 0.623502011433411, "grad_norm": 1.7985603761244942, "learning_rate": 3.2788766177158443e-06, "loss": 0.634, "step": 7362 }, { "epoch": 0.6235867033665043, "grad_norm": 2.1053572529346583, "learning_rate": 3.277588871866425e-06, "loss": 0.615, "step": 7363 }, { "epoch": 0.6236713952995977, "grad_norm": 1.8604809675287344, "learning_rate": 3.276301255637565e-06, "loss": 0.6194, "step": 7364 }, { "epoch": 0.623756087232691, "grad_norm": 2.637514981789937, "learning_rate": 3.2750137691261607e-06, "loss": 0.6451, "step": 7365 }, { "epoch": 0.6238407791657845, "grad_norm": 1.0955806234419991, "learning_rate": 3.2737264124291067e-06, "loss": 0.6185, "step": 7366 }, { "epoch": 0.6239254710988779, "grad_norm": 1.952528776013819, "learning_rate": 3.272439185643279e-06, "loss": 0.6299, "step": 7367 }, { "epoch": 0.6240101630319712, "grad_norm": 1.3538401987684858, "learning_rate": 3.271152088865551e-06, "loss": 0.6312, "step": 7368 }, { "epoch": 0.6240948549650646, "grad_norm": 1.4631650341577545, "learning_rate": 3.269865122192784e-06, "loss": 0.6048, "step": 7369 }, { "epoch": 0.6241795468981579, "grad_norm": 2.2788196383668486, "learning_rate": 3.2685782857218273e-06, "loss": 0.6679, "step": 7370 }, { "epoch": 0.6242642388312514, "grad_norm": 1.3313275506573137, "learning_rate": 3.2672915795495225e-06, "loss": 0.6595, "step": 7371 }, { "epoch": 0.6243489307643447, "grad_norm": 1.3587934130636448, "learning_rate": 3.2660050037727026e-06, "loss": 0.6223, "step": 7372 }, { "epoch": 0.6244336226974381, "grad_norm": 1.3784386956607169, "learning_rate": 3.264718558488187e-06, "loss": 0.6471, "step": 7373 }, { "epoch": 0.6245183146305314, "grad_norm": 1.553055744621739, "learning_rate": 3.263432243792791e-06, "loss": 0.6818, "step": 7374 }, { "epoch": 0.6246030065636248, "grad_norm": 3.869470027651988, "learning_rate": 3.2621460597833123e-06, "loss": 0.6436, "step": 7375 }, { "epoch": 0.6246876984967182, "grad_norm": 1.7196826049681069, "learning_rate": 3.2608600065565434e-06, "loss": 0.6819, "step": 7376 }, { "epoch": 0.6247723904298116, "grad_norm": 1.271091621332767, "learning_rate": 3.259574084209271e-06, "loss": 0.7215, "step": 7377 }, { "epoch": 0.6248570823629049, "grad_norm": 1.6167069019702769, "learning_rate": 3.2582882928382633e-06, "loss": 0.6127, "step": 7378 }, { "epoch": 0.6249417742959983, "grad_norm": 1.2076798635956532, "learning_rate": 3.257002632540284e-06, "loss": 0.6574, "step": 7379 }, { "epoch": 0.6250264662290916, "grad_norm": 1.5273093490005083, "learning_rate": 3.2557171034120878e-06, "loss": 0.6568, "step": 7380 }, { "epoch": 0.6251111581621851, "grad_norm": 1.4051694378781576, "learning_rate": 3.254431705550414e-06, "loss": 0.6564, "step": 7381 }, { "epoch": 0.6251958500952784, "grad_norm": 1.0991866198550246, "learning_rate": 3.2531464390519996e-06, "loss": 0.6291, "step": 7382 }, { "epoch": 0.6252805420283718, "grad_norm": 1.3757295351243557, "learning_rate": 3.2518613040135644e-06, "loss": 0.6166, "step": 7383 }, { "epoch": 0.6253652339614652, "grad_norm": 1.6787022219060843, "learning_rate": 3.2505763005318226e-06, "loss": 0.5799, "step": 7384 }, { "epoch": 0.6254499258945585, "grad_norm": 1.3651774939403367, "learning_rate": 3.2492914287034805e-06, "loss": 0.6686, "step": 7385 }, { "epoch": 0.625534617827652, "grad_norm": 2.0844994417127403, "learning_rate": 3.248006688625225e-06, "loss": 0.7004, "step": 7386 }, { "epoch": 0.6256193097607453, "grad_norm": 1.5734754065416832, "learning_rate": 3.2467220803937448e-06, "loss": 0.6849, "step": 7387 }, { "epoch": 0.6257040016938387, "grad_norm": 1.3689728715011897, "learning_rate": 3.245437604105714e-06, "loss": 0.6361, "step": 7388 }, { "epoch": 0.625788693626932, "grad_norm": 1.21363079652123, "learning_rate": 3.2441532598577926e-06, "loss": 0.6064, "step": 7389 }, { "epoch": 0.6258733855600254, "grad_norm": 1.6172428258278686, "learning_rate": 3.242869047746636e-06, "loss": 0.5753, "step": 7390 }, { "epoch": 0.6259580774931188, "grad_norm": 1.3143436278969618, "learning_rate": 3.2415849678688883e-06, "loss": 0.6198, "step": 7391 }, { "epoch": 0.6260427694262122, "grad_norm": 1.578235290027564, "learning_rate": 3.2403010203211826e-06, "loss": 0.6261, "step": 7392 }, { "epoch": 0.6261274613593055, "grad_norm": 1.3191012958718618, "learning_rate": 3.2390172052001444e-06, "loss": 0.6474, "step": 7393 }, { "epoch": 0.6262121532923989, "grad_norm": 1.7915819622416227, "learning_rate": 3.2377335226023846e-06, "loss": 0.6349, "step": 7394 }, { "epoch": 0.6262968452254922, "grad_norm": 1.357891507492852, "learning_rate": 3.236449972624512e-06, "loss": 0.6444, "step": 7395 }, { "epoch": 0.6263815371585857, "grad_norm": 1.5558917813507784, "learning_rate": 3.2351665553631136e-06, "loss": 0.6117, "step": 7396 }, { "epoch": 0.626466229091679, "grad_norm": 1.544558549460691, "learning_rate": 3.2338832709147784e-06, "loss": 0.6535, "step": 7397 }, { "epoch": 0.6265509210247724, "grad_norm": 1.53042401284915, "learning_rate": 3.232600119376081e-06, "loss": 0.6277, "step": 7398 }, { "epoch": 0.6266356129578657, "grad_norm": 1.212924684467056, "learning_rate": 3.2313171008435814e-06, "loss": 0.5844, "step": 7399 }, { "epoch": 0.6267203048909591, "grad_norm": 1.2748291874220878, "learning_rate": 3.2300342154138354e-06, "loss": 0.6027, "step": 7400 }, { "epoch": 0.6268049968240526, "grad_norm": 1.5076579295511492, "learning_rate": 3.2287514631833883e-06, "loss": 0.6018, "step": 7401 }, { "epoch": 0.6268896887571459, "grad_norm": 0.6267379393848095, "learning_rate": 3.2274688442487724e-06, "loss": 0.801, "step": 7402 }, { "epoch": 0.6269743806902393, "grad_norm": 1.1896390227493527, "learning_rate": 3.2261863587065123e-06, "loss": 0.6002, "step": 7403 }, { "epoch": 0.6270590726233326, "grad_norm": 0.6497203966988682, "learning_rate": 3.22490400665312e-06, "loss": 0.8486, "step": 7404 }, { "epoch": 0.627143764556426, "grad_norm": 1.3954085410620531, "learning_rate": 3.223621788185102e-06, "loss": 0.6881, "step": 7405 }, { "epoch": 0.6272284564895194, "grad_norm": 0.6270259634938349, "learning_rate": 3.222339703398952e-06, "loss": 0.8085, "step": 7406 }, { "epoch": 0.6273131484226128, "grad_norm": 0.6365691597824659, "learning_rate": 3.2210577523911492e-06, "loss": 0.864, "step": 7407 }, { "epoch": 0.6273978403557061, "grad_norm": 3.0566966424743427, "learning_rate": 3.2197759352581724e-06, "loss": 0.6202, "step": 7408 }, { "epoch": 0.6274825322887995, "grad_norm": 1.5623753756084544, "learning_rate": 3.2184942520964848e-06, "loss": 0.6094, "step": 7409 }, { "epoch": 0.6275672242218928, "grad_norm": 1.9071191387928643, "learning_rate": 3.217212703002536e-06, "loss": 0.6501, "step": 7410 }, { "epoch": 0.6276519161549863, "grad_norm": 1.2645219029188872, "learning_rate": 3.215931288072773e-06, "loss": 0.6076, "step": 7411 }, { "epoch": 0.6277366080880796, "grad_norm": 1.561306816455798, "learning_rate": 3.2146500074036264e-06, "loss": 0.5588, "step": 7412 }, { "epoch": 0.627821300021173, "grad_norm": 1.6820620146801057, "learning_rate": 3.2133688610915202e-06, "loss": 0.6294, "step": 7413 }, { "epoch": 0.6279059919542663, "grad_norm": 1.2090032554121966, "learning_rate": 3.21208784923287e-06, "loss": 0.6257, "step": 7414 }, { "epoch": 0.6279906838873597, "grad_norm": 1.6354844134679742, "learning_rate": 3.210806971924074e-06, "loss": 0.6369, "step": 7415 }, { "epoch": 0.6280753758204531, "grad_norm": 1.2908242456675676, "learning_rate": 3.209526229261529e-06, "loss": 0.603, "step": 7416 }, { "epoch": 0.6281600677535465, "grad_norm": 2.4095698844603923, "learning_rate": 3.2082456213416167e-06, "loss": 0.5818, "step": 7417 }, { "epoch": 0.6282447596866398, "grad_norm": 1.3002955327432197, "learning_rate": 3.2069651482607084e-06, "loss": 0.6361, "step": 7418 }, { "epoch": 0.6283294516197332, "grad_norm": 7.800316822825367, "learning_rate": 3.2056848101151696e-06, "loss": 0.6098, "step": 7419 }, { "epoch": 0.6284141435528265, "grad_norm": 1.6235289474418777, "learning_rate": 3.2044046070013473e-06, "loss": 0.6987, "step": 7420 }, { "epoch": 0.62849883548592, "grad_norm": 0.6041100483028982, "learning_rate": 3.203124539015586e-06, "loss": 0.8532, "step": 7421 }, { "epoch": 0.6285835274190134, "grad_norm": 0.6562475283761043, "learning_rate": 3.2018446062542206e-06, "loss": 0.8146, "step": 7422 }, { "epoch": 0.6286682193521067, "grad_norm": 1.751849245356649, "learning_rate": 3.200564808813569e-06, "loss": 0.5994, "step": 7423 }, { "epoch": 0.6287529112852001, "grad_norm": 1.4816683439243736, "learning_rate": 3.1992851467899435e-06, "loss": 0.595, "step": 7424 }, { "epoch": 0.6288376032182934, "grad_norm": 1.3114677070770568, "learning_rate": 3.198005620279647e-06, "loss": 0.6347, "step": 7425 }, { "epoch": 0.6289222951513869, "grad_norm": 1.282454554567148, "learning_rate": 3.196726229378968e-06, "loss": 0.6525, "step": 7426 }, { "epoch": 0.6290069870844802, "grad_norm": 1.708669491863116, "learning_rate": 3.195446974184191e-06, "loss": 0.5888, "step": 7427 }, { "epoch": 0.6290916790175736, "grad_norm": 1.45412712140955, "learning_rate": 3.194167854791583e-06, "loss": 0.6469, "step": 7428 }, { "epoch": 0.6291763709506669, "grad_norm": 2.033563458576138, "learning_rate": 3.192888871297407e-06, "loss": 0.6521, "step": 7429 }, { "epoch": 0.6292610628837603, "grad_norm": 1.3601675502433004, "learning_rate": 3.191610023797914e-06, "loss": 0.6223, "step": 7430 }, { "epoch": 0.6293457548168537, "grad_norm": 1.1904633473476065, "learning_rate": 3.190331312389341e-06, "loss": 0.6472, "step": 7431 }, { "epoch": 0.6294304467499471, "grad_norm": 1.4039462824655804, "learning_rate": 3.18905273716792e-06, "loss": 0.6563, "step": 7432 }, { "epoch": 0.6295151386830404, "grad_norm": 0.5691518683975081, "learning_rate": 3.1877742982298694e-06, "loss": 0.8069, "step": 7433 }, { "epoch": 0.6295998306161338, "grad_norm": 1.601201467937532, "learning_rate": 3.186495995671399e-06, "loss": 0.6224, "step": 7434 }, { "epoch": 0.6296845225492271, "grad_norm": 1.2206704866277913, "learning_rate": 3.185217829588708e-06, "loss": 0.6677, "step": 7435 }, { "epoch": 0.6297692144823206, "grad_norm": 1.4599369790669396, "learning_rate": 3.183939800077985e-06, "loss": 0.655, "step": 7436 }, { "epoch": 0.6298539064154139, "grad_norm": 1.6068141959494247, "learning_rate": 3.1826619072354083e-06, "loss": 0.5876, "step": 7437 }, { "epoch": 0.6299385983485073, "grad_norm": 1.5832836953637794, "learning_rate": 3.1813841511571474e-06, "loss": 0.6191, "step": 7438 }, { "epoch": 0.6300232902816006, "grad_norm": 1.2923844376186722, "learning_rate": 3.1801065319393578e-06, "loss": 0.6233, "step": 7439 }, { "epoch": 0.630107982214694, "grad_norm": 1.493568578876022, "learning_rate": 3.1788290496781903e-06, "loss": 0.6404, "step": 7440 }, { "epoch": 0.6301926741477875, "grad_norm": 2.380089849601673, "learning_rate": 3.177551704469779e-06, "loss": 0.6625, "step": 7441 }, { "epoch": 0.6302773660808808, "grad_norm": 1.3120692769686524, "learning_rate": 3.176274496410251e-06, "loss": 0.6236, "step": 7442 }, { "epoch": 0.6303620580139742, "grad_norm": 1.3346465871675872, "learning_rate": 3.174997425595727e-06, "loss": 0.6603, "step": 7443 }, { "epoch": 0.6304467499470675, "grad_norm": 1.2657309143270261, "learning_rate": 3.17372049212231e-06, "loss": 0.6106, "step": 7444 }, { "epoch": 0.630531441880161, "grad_norm": 1.3731972169155144, "learning_rate": 3.172443696086095e-06, "loss": 0.6414, "step": 7445 }, { "epoch": 0.6306161338132543, "grad_norm": 1.3027423853883506, "learning_rate": 3.1711670375831703e-06, "loss": 0.6081, "step": 7446 }, { "epoch": 0.6307008257463477, "grad_norm": 1.3392552681109444, "learning_rate": 3.1698905167096093e-06, "loss": 0.6681, "step": 7447 }, { "epoch": 0.630785517679441, "grad_norm": 1.6631337211800117, "learning_rate": 3.1686141335614795e-06, "loss": 0.6233, "step": 7448 }, { "epoch": 0.6308702096125344, "grad_norm": 1.2960089196846898, "learning_rate": 3.167337888234832e-06, "loss": 0.5948, "step": 7449 }, { "epoch": 0.6309549015456278, "grad_norm": 1.3958713213611298, "learning_rate": 3.1660617808257135e-06, "loss": 0.7073, "step": 7450 }, { "epoch": 0.6310395934787212, "grad_norm": 1.398089311240511, "learning_rate": 3.164785811430159e-06, "loss": 0.6767, "step": 7451 }, { "epoch": 0.6311242854118145, "grad_norm": 1.5219356668493447, "learning_rate": 3.163509980144186e-06, "loss": 0.6403, "step": 7452 }, { "epoch": 0.6312089773449079, "grad_norm": 1.345589626292611, "learning_rate": 3.1622342870638133e-06, "loss": 0.5846, "step": 7453 }, { "epoch": 0.6312936692780012, "grad_norm": 1.2279462811394324, "learning_rate": 3.1609587322850445e-06, "loss": 0.646, "step": 7454 }, { "epoch": 0.6313783612110947, "grad_norm": 1.433823335285638, "learning_rate": 3.1596833159038677e-06, "loss": 0.6285, "step": 7455 }, { "epoch": 0.631463053144188, "grad_norm": 1.2712865728278497, "learning_rate": 3.1584080380162663e-06, "loss": 0.6639, "step": 7456 }, { "epoch": 0.6315477450772814, "grad_norm": 1.4619803059079504, "learning_rate": 3.15713289871821e-06, "loss": 0.6558, "step": 7457 }, { "epoch": 0.6316324370103747, "grad_norm": 1.3788392325465255, "learning_rate": 3.1558578981056632e-06, "loss": 0.601, "step": 7458 }, { "epoch": 0.6317171289434681, "grad_norm": 2.9330843711278702, "learning_rate": 3.1545830362745756e-06, "loss": 0.6453, "step": 7459 }, { "epoch": 0.6318018208765616, "grad_norm": 1.3979764351046664, "learning_rate": 3.153308313320884e-06, "loss": 0.5953, "step": 7460 }, { "epoch": 0.6318865128096549, "grad_norm": 1.1344828026675964, "learning_rate": 3.152033729340524e-06, "loss": 0.6245, "step": 7461 }, { "epoch": 0.6319712047427483, "grad_norm": 0.6698170841411722, "learning_rate": 3.1507592844294077e-06, "loss": 0.8482, "step": 7462 }, { "epoch": 0.6320558966758416, "grad_norm": 1.150170020124496, "learning_rate": 3.1494849786834485e-06, "loss": 0.6098, "step": 7463 }, { "epoch": 0.632140588608935, "grad_norm": 1.269797514081147, "learning_rate": 3.1482108121985454e-06, "loss": 0.6028, "step": 7464 }, { "epoch": 0.6322252805420284, "grad_norm": 2.3275637033028413, "learning_rate": 3.146936785070583e-06, "loss": 0.6047, "step": 7465 }, { "epoch": 0.6323099724751218, "grad_norm": 1.3001570040734285, "learning_rate": 3.1456628973954397e-06, "loss": 0.6217, "step": 7466 }, { "epoch": 0.6323946644082151, "grad_norm": 1.3611780593257636, "learning_rate": 3.144389149268983e-06, "loss": 0.596, "step": 7467 }, { "epoch": 0.6324793563413085, "grad_norm": 0.6161871913572606, "learning_rate": 3.143115540787068e-06, "loss": 0.9003, "step": 7468 }, { "epoch": 0.6325640482744018, "grad_norm": 2.838789073710497, "learning_rate": 3.1418420720455427e-06, "loss": 0.6206, "step": 7469 }, { "epoch": 0.6326487402074953, "grad_norm": 1.1682937891895622, "learning_rate": 3.1405687431402397e-06, "loss": 0.6218, "step": 7470 }, { "epoch": 0.6327334321405886, "grad_norm": 1.5128981090154427, "learning_rate": 3.1392955541669844e-06, "loss": 0.6313, "step": 7471 }, { "epoch": 0.632818124073682, "grad_norm": 1.2767513082225388, "learning_rate": 3.138022505221594e-06, "loss": 0.6487, "step": 7472 }, { "epoch": 0.6329028160067753, "grad_norm": 1.2827025120183781, "learning_rate": 3.1367495963998668e-06, "loss": 0.616, "step": 7473 }, { "epoch": 0.6329875079398687, "grad_norm": 1.3385099653421857, "learning_rate": 3.1354768277976e-06, "loss": 0.6257, "step": 7474 }, { "epoch": 0.6330721998729621, "grad_norm": 1.2483502399038096, "learning_rate": 3.1342041995105767e-06, "loss": 0.6324, "step": 7475 }, { "epoch": 0.6331568918060555, "grad_norm": 1.56095676273565, "learning_rate": 3.132931711634565e-06, "loss": 0.6594, "step": 7476 }, { "epoch": 0.6332415837391489, "grad_norm": 1.4637864470492625, "learning_rate": 3.1316593642653305e-06, "loss": 0.6113, "step": 7477 }, { "epoch": 0.6333262756722422, "grad_norm": 1.4558715251728207, "learning_rate": 3.13038715749862e-06, "loss": 0.6192, "step": 7478 }, { "epoch": 0.6334109676053356, "grad_norm": 1.4221940785062825, "learning_rate": 3.1291150914301767e-06, "loss": 0.6237, "step": 7479 }, { "epoch": 0.633495659538429, "grad_norm": 1.3710849938682348, "learning_rate": 3.12784316615573e-06, "loss": 0.6679, "step": 7480 }, { "epoch": 0.6335803514715224, "grad_norm": 1.2250074551256687, "learning_rate": 3.126571381770998e-06, "loss": 0.6133, "step": 7481 }, { "epoch": 0.6336650434046157, "grad_norm": 5.505173577676954, "learning_rate": 3.12529973837169e-06, "loss": 0.5907, "step": 7482 }, { "epoch": 0.6337497353377091, "grad_norm": 2.294184185173929, "learning_rate": 3.1240282360535045e-06, "loss": 0.6393, "step": 7483 }, { "epoch": 0.6338344272708024, "grad_norm": 1.7177106797870783, "learning_rate": 3.1227568749121266e-06, "loss": 0.6332, "step": 7484 }, { "epoch": 0.6339191192038959, "grad_norm": 1.3297760119369852, "learning_rate": 3.121485655043237e-06, "loss": 0.6739, "step": 7485 }, { "epoch": 0.6340038111369892, "grad_norm": 1.1976242321626187, "learning_rate": 3.1202145765424964e-06, "loss": 0.6552, "step": 7486 }, { "epoch": 0.6340885030700826, "grad_norm": 1.5291512543669938, "learning_rate": 3.118943639505563e-06, "loss": 0.6356, "step": 7487 }, { "epoch": 0.6341731950031759, "grad_norm": 1.3553875147765426, "learning_rate": 3.1176728440280834e-06, "loss": 0.5897, "step": 7488 }, { "epoch": 0.6342578869362693, "grad_norm": 1.4632899061122384, "learning_rate": 3.116402190205687e-06, "loss": 0.6545, "step": 7489 }, { "epoch": 0.6343425788693627, "grad_norm": 1.487900645725464, "learning_rate": 3.1151316781340014e-06, "loss": 0.6241, "step": 7490 }, { "epoch": 0.6344272708024561, "grad_norm": 1.4676496247854949, "learning_rate": 3.1138613079086384e-06, "loss": 0.6734, "step": 7491 }, { "epoch": 0.6345119627355494, "grad_norm": 1.4612114186278182, "learning_rate": 3.112591079625198e-06, "loss": 0.574, "step": 7492 }, { "epoch": 0.6345966546686428, "grad_norm": 1.692657955432335, "learning_rate": 3.1113209933792747e-06, "loss": 0.5993, "step": 7493 }, { "epoch": 0.6346813466017361, "grad_norm": 1.250094806847198, "learning_rate": 3.1100510492664464e-06, "loss": 0.6314, "step": 7494 }, { "epoch": 0.6347660385348296, "grad_norm": 1.3413586565296132, "learning_rate": 3.1087812473822846e-06, "loss": 0.6502, "step": 7495 }, { "epoch": 0.634850730467923, "grad_norm": 2.8876176255184984, "learning_rate": 3.1075115878223505e-06, "loss": 0.663, "step": 7496 }, { "epoch": 0.6349354224010163, "grad_norm": 1.3012235027171946, "learning_rate": 3.1062420706821884e-06, "loss": 0.5989, "step": 7497 }, { "epoch": 0.6350201143341097, "grad_norm": 0.5950098784738804, "learning_rate": 3.1049726960573394e-06, "loss": 0.8236, "step": 7498 }, { "epoch": 0.635104806267203, "grad_norm": 1.984828110206002, "learning_rate": 3.103703464043329e-06, "loss": 0.6163, "step": 7499 }, { "epoch": 0.6351894982002965, "grad_norm": 1.4095704514865117, "learning_rate": 3.102434374735674e-06, "loss": 0.6296, "step": 7500 }, { "epoch": 0.6352741901333898, "grad_norm": 1.168007276466362, "learning_rate": 3.1011654282298814e-06, "loss": 0.5662, "step": 7501 }, { "epoch": 0.6353588820664832, "grad_norm": 2.2586113289166265, "learning_rate": 3.099896624621444e-06, "loss": 0.5892, "step": 7502 }, { "epoch": 0.6354435739995765, "grad_norm": 1.6688771547664203, "learning_rate": 3.0986279640058476e-06, "loss": 0.6563, "step": 7503 }, { "epoch": 0.6355282659326699, "grad_norm": 1.4404387677086312, "learning_rate": 3.0973594464785654e-06, "loss": 0.5941, "step": 7504 }, { "epoch": 0.6356129578657633, "grad_norm": 1.4666405817171306, "learning_rate": 3.0960910721350595e-06, "loss": 0.6066, "step": 7505 }, { "epoch": 0.6356976497988567, "grad_norm": 1.4672621972339563, "learning_rate": 3.0948228410707837e-06, "loss": 0.6274, "step": 7506 }, { "epoch": 0.63578234173195, "grad_norm": 1.5122816917496753, "learning_rate": 3.0935547533811763e-06, "loss": 0.5934, "step": 7507 }, { "epoch": 0.6358670336650434, "grad_norm": 1.4104149187567387, "learning_rate": 3.0922868091616663e-06, "loss": 0.624, "step": 7508 }, { "epoch": 0.6359517255981367, "grad_norm": 1.6272781423335807, "learning_rate": 3.09101900850768e-06, "loss": 0.6546, "step": 7509 }, { "epoch": 0.6360364175312302, "grad_norm": 2.2133887863454107, "learning_rate": 3.0897513515146194e-06, "loss": 0.6548, "step": 7510 }, { "epoch": 0.6361211094643235, "grad_norm": 1.669770359245332, "learning_rate": 3.0884838382778846e-06, "loss": 0.6637, "step": 7511 }, { "epoch": 0.6362058013974169, "grad_norm": 1.3986292764441524, "learning_rate": 3.0872164688928645e-06, "loss": 0.6338, "step": 7512 }, { "epoch": 0.6362904933305102, "grad_norm": 0.6440537662510201, "learning_rate": 3.0859492434549325e-06, "loss": 0.8862, "step": 7513 }, { "epoch": 0.6363751852636036, "grad_norm": 1.7829863264719117, "learning_rate": 3.0846821620594564e-06, "loss": 0.6009, "step": 7514 }, { "epoch": 0.636459877196697, "grad_norm": 1.6650567391861197, "learning_rate": 3.0834152248017884e-06, "loss": 0.6259, "step": 7515 }, { "epoch": 0.6365445691297904, "grad_norm": 1.219202657704505, "learning_rate": 3.0821484317772736e-06, "loss": 0.6093, "step": 7516 }, { "epoch": 0.6366292610628838, "grad_norm": 1.5932350603907017, "learning_rate": 3.080881783081247e-06, "loss": 0.7006, "step": 7517 }, { "epoch": 0.6367139529959771, "grad_norm": 1.7726142593018435, "learning_rate": 3.0796152788090256e-06, "loss": 0.6229, "step": 7518 }, { "epoch": 0.6367986449290705, "grad_norm": 2.5642117691725215, "learning_rate": 3.078348919055924e-06, "loss": 0.6079, "step": 7519 }, { "epoch": 0.6368833368621639, "grad_norm": 1.8997039876731703, "learning_rate": 3.077082703917244e-06, "loss": 0.664, "step": 7520 }, { "epoch": 0.6369680287952573, "grad_norm": 1.4806934834908265, "learning_rate": 3.0758166334882712e-06, "loss": 0.6505, "step": 7521 }, { "epoch": 0.6370527207283506, "grad_norm": 1.1495887045780397, "learning_rate": 3.0745507078642868e-06, "loss": 0.6687, "step": 7522 }, { "epoch": 0.637137412661444, "grad_norm": 1.4941660738628977, "learning_rate": 3.0732849271405553e-06, "loss": 0.6399, "step": 7523 }, { "epoch": 0.6372221045945373, "grad_norm": 2.1712893711050776, "learning_rate": 3.0720192914123367e-06, "loss": 0.6681, "step": 7524 }, { "epoch": 0.6373067965276308, "grad_norm": 0.6422582376972654, "learning_rate": 3.070753800774876e-06, "loss": 0.8424, "step": 7525 }, { "epoch": 0.6373914884607241, "grad_norm": 1.3853499620971366, "learning_rate": 3.0694884553234062e-06, "loss": 0.6066, "step": 7526 }, { "epoch": 0.6374761803938175, "grad_norm": 1.4120193086438362, "learning_rate": 3.068223255153153e-06, "loss": 0.6858, "step": 7527 }, { "epoch": 0.6375608723269108, "grad_norm": 1.8373670427382183, "learning_rate": 3.066958200359331e-06, "loss": 0.6265, "step": 7528 }, { "epoch": 0.6376455642600042, "grad_norm": 1.1707513496743647, "learning_rate": 3.065693291037138e-06, "loss": 0.5855, "step": 7529 }, { "epoch": 0.6377302561930976, "grad_norm": 1.4509061528700096, "learning_rate": 3.0644285272817707e-06, "loss": 0.5815, "step": 7530 }, { "epoch": 0.637814948126191, "grad_norm": 1.199177165574366, "learning_rate": 3.0631639091884034e-06, "loss": 0.6566, "step": 7531 }, { "epoch": 0.6378996400592843, "grad_norm": 0.6036360016583056, "learning_rate": 3.0618994368522082e-06, "loss": 0.8612, "step": 7532 }, { "epoch": 0.6379843319923777, "grad_norm": 0.6581694052404267, "learning_rate": 3.060635110368344e-06, "loss": 0.878, "step": 7533 }, { "epoch": 0.638069023925471, "grad_norm": 1.6181542181757373, "learning_rate": 3.0593709298319555e-06, "loss": 0.6582, "step": 7534 }, { "epoch": 0.6381537158585645, "grad_norm": 1.458288646342449, "learning_rate": 3.0581068953381827e-06, "loss": 0.6905, "step": 7535 }, { "epoch": 0.6382384077916579, "grad_norm": 1.0720317899902652, "learning_rate": 3.0568430069821475e-06, "loss": 0.5738, "step": 7536 }, { "epoch": 0.6383230997247512, "grad_norm": 1.2111896929862986, "learning_rate": 3.0555792648589657e-06, "loss": 0.6104, "step": 7537 }, { "epoch": 0.6384077916578446, "grad_norm": 1.9637028871001079, "learning_rate": 3.0543156690637423e-06, "loss": 0.6296, "step": 7538 }, { "epoch": 0.6384924835909379, "grad_norm": 1.4055187515035972, "learning_rate": 3.053052219691564e-06, "loss": 0.6298, "step": 7539 }, { "epoch": 0.6385771755240314, "grad_norm": 3.4908812237711784, "learning_rate": 3.051788916837517e-06, "loss": 0.6287, "step": 7540 }, { "epoch": 0.6386618674571247, "grad_norm": 0.5955386508808134, "learning_rate": 3.050525760596673e-06, "loss": 0.799, "step": 7541 }, { "epoch": 0.6387465593902181, "grad_norm": 1.3395464183430221, "learning_rate": 3.049262751064086e-06, "loss": 0.6685, "step": 7542 }, { "epoch": 0.6388312513233114, "grad_norm": 1.4079892424041154, "learning_rate": 3.047999888334807e-06, "loss": 0.6688, "step": 7543 }, { "epoch": 0.6389159432564048, "grad_norm": 1.204808043930899, "learning_rate": 3.046737172503873e-06, "loss": 0.5836, "step": 7544 }, { "epoch": 0.6390006351894982, "grad_norm": 1.3517448200985385, "learning_rate": 3.045474603666309e-06, "loss": 0.6543, "step": 7545 }, { "epoch": 0.6390853271225916, "grad_norm": 1.4541552404956097, "learning_rate": 3.044212181917132e-06, "loss": 0.6434, "step": 7546 }, { "epoch": 0.6391700190556849, "grad_norm": 4.736357225798462, "learning_rate": 3.0429499073513433e-06, "loss": 0.6528, "step": 7547 }, { "epoch": 0.6392547109887783, "grad_norm": 1.2209968133434639, "learning_rate": 3.0416877800639376e-06, "loss": 0.6503, "step": 7548 }, { "epoch": 0.6393394029218717, "grad_norm": 1.417695159196272, "learning_rate": 3.0404258001498974e-06, "loss": 0.6079, "step": 7549 }, { "epoch": 0.6394240948549651, "grad_norm": 3.6351246825104133, "learning_rate": 3.0391639677041905e-06, "loss": 0.5878, "step": 7550 }, { "epoch": 0.6395087867880584, "grad_norm": 1.4492417897081287, "learning_rate": 3.037902282821781e-06, "loss": 0.6974, "step": 7551 }, { "epoch": 0.6395934787211518, "grad_norm": 1.28075954453637, "learning_rate": 3.036640745597612e-06, "loss": 0.6109, "step": 7552 }, { "epoch": 0.6396781706542451, "grad_norm": 1.1849940643104298, "learning_rate": 3.035379356126622e-06, "loss": 0.5897, "step": 7553 }, { "epoch": 0.6397628625873386, "grad_norm": 1.360469773415636, "learning_rate": 3.0341181145037425e-06, "loss": 0.5992, "step": 7554 }, { "epoch": 0.639847554520432, "grad_norm": 2.258104841924058, "learning_rate": 3.0328570208238824e-06, "loss": 0.6853, "step": 7555 }, { "epoch": 0.6399322464535253, "grad_norm": 1.900757869106087, "learning_rate": 3.0315960751819475e-06, "loss": 0.6507, "step": 7556 }, { "epoch": 0.6400169383866187, "grad_norm": 1.2969849775648163, "learning_rate": 3.030335277672832e-06, "loss": 0.7139, "step": 7557 }, { "epoch": 0.640101630319712, "grad_norm": 1.5801401343208736, "learning_rate": 3.0290746283914167e-06, "loss": 0.618, "step": 7558 }, { "epoch": 0.6401863222528055, "grad_norm": 1.7967564401947678, "learning_rate": 3.0278141274325727e-06, "loss": 0.642, "step": 7559 }, { "epoch": 0.6402710141858988, "grad_norm": 1.502195049881483, "learning_rate": 3.0265537748911576e-06, "loss": 0.6257, "step": 7560 }, { "epoch": 0.6403557061189922, "grad_norm": 1.3148250952520482, "learning_rate": 3.0252935708620214e-06, "loss": 0.661, "step": 7561 }, { "epoch": 0.6404403980520855, "grad_norm": 0.5858849169188737, "learning_rate": 3.0240335154400026e-06, "loss": 0.8479, "step": 7562 }, { "epoch": 0.6405250899851789, "grad_norm": 2.5709056721933115, "learning_rate": 3.022773608719922e-06, "loss": 0.6207, "step": 7563 }, { "epoch": 0.6406097819182723, "grad_norm": 1.477309108569321, "learning_rate": 3.021513850796597e-06, "loss": 0.6187, "step": 7564 }, { "epoch": 0.6406944738513657, "grad_norm": 1.3021383250819356, "learning_rate": 3.020254241764834e-06, "loss": 0.6702, "step": 7565 }, { "epoch": 0.640779165784459, "grad_norm": 1.3723975408100078, "learning_rate": 3.018994781719421e-06, "loss": 0.5833, "step": 7566 }, { "epoch": 0.6408638577175524, "grad_norm": 1.305205356324897, "learning_rate": 3.017735470755141e-06, "loss": 0.61, "step": 7567 }, { "epoch": 0.6409485496506457, "grad_norm": 1.2597190573290484, "learning_rate": 3.0164763089667626e-06, "loss": 0.5595, "step": 7568 }, { "epoch": 0.6410332415837392, "grad_norm": 1.172919475733561, "learning_rate": 3.0152172964490456e-06, "loss": 0.6232, "step": 7569 }, { "epoch": 0.6411179335168325, "grad_norm": 1.4473084299534187, "learning_rate": 3.0139584332967374e-06, "loss": 0.6754, "step": 7570 }, { "epoch": 0.6412026254499259, "grad_norm": 0.6298010601405596, "learning_rate": 3.012699719604573e-06, "loss": 0.8595, "step": 7571 }, { "epoch": 0.6412873173830193, "grad_norm": 1.6610710751761375, "learning_rate": 3.01144115546728e-06, "loss": 0.6267, "step": 7572 }, { "epoch": 0.6413720093161126, "grad_norm": 1.194695114100368, "learning_rate": 3.0101827409795683e-06, "loss": 0.6749, "step": 7573 }, { "epoch": 0.6414567012492061, "grad_norm": 1.9726928152517251, "learning_rate": 3.0089244762361405e-06, "loss": 0.5945, "step": 7574 }, { "epoch": 0.6415413931822994, "grad_norm": 2.1632853165782273, "learning_rate": 3.007666361331692e-06, "loss": 0.6154, "step": 7575 }, { "epoch": 0.6416260851153928, "grad_norm": 1.725930141161224, "learning_rate": 3.006408396360898e-06, "loss": 0.6659, "step": 7576 }, { "epoch": 0.6417107770484861, "grad_norm": 1.6394029248891204, "learning_rate": 3.0051505814184286e-06, "loss": 0.5981, "step": 7577 }, { "epoch": 0.6417954689815795, "grad_norm": 1.524476363506892, "learning_rate": 3.0038929165989415e-06, "loss": 0.6211, "step": 7578 }, { "epoch": 0.6418801609146729, "grad_norm": 1.4926705771724562, "learning_rate": 3.0026354019970825e-06, "loss": 0.6028, "step": 7579 }, { "epoch": 0.6419648528477663, "grad_norm": 1.600502704004478, "learning_rate": 3.0013780377074864e-06, "loss": 0.6407, "step": 7580 }, { "epoch": 0.6420495447808596, "grad_norm": 1.5396711404125232, "learning_rate": 3.000120823824775e-06, "loss": 0.6587, "step": 7581 }, { "epoch": 0.642134236713953, "grad_norm": 0.6651279192994788, "learning_rate": 2.9988637604435624e-06, "loss": 0.812, "step": 7582 }, { "epoch": 0.6422189286470463, "grad_norm": 1.459668699979216, "learning_rate": 2.99760684765845e-06, "loss": 0.6766, "step": 7583 }, { "epoch": 0.6423036205801398, "grad_norm": 1.2215704525785671, "learning_rate": 2.9963500855640214e-06, "loss": 0.651, "step": 7584 }, { "epoch": 0.6423883125132331, "grad_norm": 1.235241451828967, "learning_rate": 2.995093474254861e-06, "loss": 0.623, "step": 7585 }, { "epoch": 0.6424730044463265, "grad_norm": 0.6434100606237007, "learning_rate": 2.9938370138255348e-06, "loss": 0.8528, "step": 7586 }, { "epoch": 0.6425576963794198, "grad_norm": 1.2562697763369621, "learning_rate": 2.992580704370594e-06, "loss": 0.6159, "step": 7587 }, { "epoch": 0.6426423883125132, "grad_norm": 0.6374364390048604, "learning_rate": 2.9913245459845865e-06, "loss": 0.8466, "step": 7588 }, { "epoch": 0.6427270802456067, "grad_norm": 1.3457502003240727, "learning_rate": 2.990068538762042e-06, "loss": 0.6254, "step": 7589 }, { "epoch": 0.6428117721787, "grad_norm": 1.741394676360168, "learning_rate": 2.988812682797483e-06, "loss": 0.6523, "step": 7590 }, { "epoch": 0.6428964641117934, "grad_norm": 1.5214565945297396, "learning_rate": 2.9875569781854206e-06, "loss": 0.6402, "step": 7591 }, { "epoch": 0.6429811560448867, "grad_norm": 1.4054726459822973, "learning_rate": 2.986301425020351e-06, "loss": 0.6185, "step": 7592 }, { "epoch": 0.64306584797798, "grad_norm": 2.5343747847466545, "learning_rate": 2.9850460233967617e-06, "loss": 0.5861, "step": 7593 }, { "epoch": 0.6431505399110735, "grad_norm": 1.4127075042097417, "learning_rate": 2.9837907734091305e-06, "loss": 0.6491, "step": 7594 }, { "epoch": 0.6432352318441669, "grad_norm": 1.840660057946839, "learning_rate": 2.9825356751519185e-06, "loss": 0.6432, "step": 7595 }, { "epoch": 0.6433199237772602, "grad_norm": 0.638528111828989, "learning_rate": 2.981280728719582e-06, "loss": 0.844, "step": 7596 }, { "epoch": 0.6434046157103536, "grad_norm": 2.2738934859775086, "learning_rate": 2.9800259342065584e-06, "loss": 0.6197, "step": 7597 }, { "epoch": 0.6434893076434469, "grad_norm": 1.4679797982364418, "learning_rate": 2.9787712917072796e-06, "loss": 0.6181, "step": 7598 }, { "epoch": 0.6435739995765404, "grad_norm": 1.329324076211038, "learning_rate": 2.977516801316165e-06, "loss": 0.6468, "step": 7599 }, { "epoch": 0.6436586915096337, "grad_norm": 1.2347969430573233, "learning_rate": 2.976262463127619e-06, "loss": 0.6239, "step": 7600 }, { "epoch": 0.6437433834427271, "grad_norm": 1.1967733797731337, "learning_rate": 2.975008277236041e-06, "loss": 0.6338, "step": 7601 }, { "epoch": 0.6438280753758204, "grad_norm": 1.2566023654972274, "learning_rate": 2.9737542437358115e-06, "loss": 0.6099, "step": 7602 }, { "epoch": 0.6439127673089138, "grad_norm": 1.4277165819904507, "learning_rate": 2.9725003627213046e-06, "loss": 0.5796, "step": 7603 }, { "epoch": 0.6439974592420072, "grad_norm": 2.5508730621319047, "learning_rate": 2.9712466342868833e-06, "loss": 0.6693, "step": 7604 }, { "epoch": 0.6440821511751006, "grad_norm": 2.101765611011774, "learning_rate": 2.9699930585268934e-06, "loss": 0.6198, "step": 7605 }, { "epoch": 0.6441668431081939, "grad_norm": 1.1515202800104223, "learning_rate": 2.968739635535675e-06, "loss": 0.645, "step": 7606 }, { "epoch": 0.6442515350412873, "grad_norm": 0.6240544052039386, "learning_rate": 2.9674863654075575e-06, "loss": 0.806, "step": 7607 }, { "epoch": 0.6443362269743806, "grad_norm": 1.4894549151731573, "learning_rate": 2.9662332482368516e-06, "loss": 0.661, "step": 7608 }, { "epoch": 0.6444209189074741, "grad_norm": 1.5657603224153787, "learning_rate": 2.9649802841178643e-06, "loss": 0.6648, "step": 7609 }, { "epoch": 0.6445056108405675, "grad_norm": 1.8959306078525384, "learning_rate": 2.9637274731448864e-06, "loss": 0.5862, "step": 7610 }, { "epoch": 0.6445903027736608, "grad_norm": 0.6519480391290293, "learning_rate": 2.9624748154121974e-06, "loss": 0.8644, "step": 7611 }, { "epoch": 0.6446749947067542, "grad_norm": 1.2974929588570665, "learning_rate": 2.961222311014069e-06, "loss": 0.6312, "step": 7612 }, { "epoch": 0.6447596866398475, "grad_norm": 1.3941205729230655, "learning_rate": 2.9599699600447567e-06, "loss": 0.6825, "step": 7613 }, { "epoch": 0.644844378572941, "grad_norm": 1.2918937556000032, "learning_rate": 2.9587177625985075e-06, "loss": 0.6334, "step": 7614 }, { "epoch": 0.6449290705060343, "grad_norm": 1.630433505540239, "learning_rate": 2.9574657187695565e-06, "loss": 0.6311, "step": 7615 }, { "epoch": 0.6450137624391277, "grad_norm": 5.616371935155126, "learning_rate": 2.956213828652125e-06, "loss": 0.6219, "step": 7616 }, { "epoch": 0.645098454372221, "grad_norm": 1.2420708676753398, "learning_rate": 2.954962092340428e-06, "loss": 0.6263, "step": 7617 }, { "epoch": 0.6451831463053144, "grad_norm": 1.4563999835365358, "learning_rate": 2.9537105099286595e-06, "loss": 0.6047, "step": 7618 }, { "epoch": 0.6452678382384078, "grad_norm": 1.2368612603673426, "learning_rate": 2.95245908151101e-06, "loss": 0.6422, "step": 7619 }, { "epoch": 0.6453525301715012, "grad_norm": 1.397015031598253, "learning_rate": 2.9512078071816596e-06, "loss": 0.6428, "step": 7620 }, { "epoch": 0.6454372221045945, "grad_norm": 1.4574926617176813, "learning_rate": 2.9499566870347697e-06, "loss": 0.6698, "step": 7621 }, { "epoch": 0.6455219140376879, "grad_norm": 0.6333841039811686, "learning_rate": 2.948705721164493e-06, "loss": 0.8398, "step": 7622 }, { "epoch": 0.6456066059707812, "grad_norm": 2.0406009257407827, "learning_rate": 2.947454909664975e-06, "loss": 0.6332, "step": 7623 }, { "epoch": 0.6456912979038747, "grad_norm": 1.877610090815288, "learning_rate": 2.9462042526303425e-06, "loss": 0.6721, "step": 7624 }, { "epoch": 0.645775989836968, "grad_norm": 1.469263515226202, "learning_rate": 2.9449537501547164e-06, "loss": 0.597, "step": 7625 }, { "epoch": 0.6458606817700614, "grad_norm": 1.7473278441353404, "learning_rate": 2.943703402332202e-06, "loss": 0.6831, "step": 7626 }, { "epoch": 0.6459453737031547, "grad_norm": 1.206917345574581, "learning_rate": 2.9424532092568947e-06, "loss": 0.6237, "step": 7627 }, { "epoch": 0.6460300656362481, "grad_norm": 1.2389053963813967, "learning_rate": 2.9412031710228805e-06, "loss": 0.6496, "step": 7628 }, { "epoch": 0.6461147575693416, "grad_norm": 3.2201543742610865, "learning_rate": 2.9399532877242274e-06, "loss": 0.6303, "step": 7629 }, { "epoch": 0.6461994495024349, "grad_norm": 1.6170105366620253, "learning_rate": 2.938703559454997e-06, "loss": 0.632, "step": 7630 }, { "epoch": 0.6462841414355283, "grad_norm": 6.476629767346785, "learning_rate": 2.937453986309242e-06, "loss": 0.6131, "step": 7631 }, { "epoch": 0.6463688333686216, "grad_norm": 1.5466048425331838, "learning_rate": 2.9362045683809946e-06, "loss": 0.6082, "step": 7632 }, { "epoch": 0.646453525301715, "grad_norm": 1.1716030466283436, "learning_rate": 2.9349553057642823e-06, "loss": 0.5978, "step": 7633 }, { "epoch": 0.6465382172348084, "grad_norm": 1.3422338350951564, "learning_rate": 2.9337061985531174e-06, "loss": 0.6368, "step": 7634 }, { "epoch": 0.6466229091679018, "grad_norm": 2.360946484440195, "learning_rate": 2.9324572468415022e-06, "loss": 0.6317, "step": 7635 }, { "epoch": 0.6467076011009951, "grad_norm": 0.5882369189661646, "learning_rate": 2.9312084507234283e-06, "loss": 0.8595, "step": 7636 }, { "epoch": 0.6467922930340885, "grad_norm": 1.202824205676919, "learning_rate": 2.9299598102928727e-06, "loss": 0.611, "step": 7637 }, { "epoch": 0.6468769849671818, "grad_norm": 1.4957932465569774, "learning_rate": 2.928711325643805e-06, "loss": 0.596, "step": 7638 }, { "epoch": 0.6469616769002753, "grad_norm": 0.6066617685464177, "learning_rate": 2.927462996870175e-06, "loss": 0.8378, "step": 7639 }, { "epoch": 0.6470463688333686, "grad_norm": 1.2826304526406953, "learning_rate": 2.9262148240659293e-06, "loss": 0.6313, "step": 7640 }, { "epoch": 0.647131060766462, "grad_norm": 1.3597363729516714, "learning_rate": 2.9249668073250014e-06, "loss": 0.6501, "step": 7641 }, { "epoch": 0.6472157526995553, "grad_norm": 1.4603672346441319, "learning_rate": 2.9237189467413075e-06, "loss": 0.625, "step": 7642 }, { "epoch": 0.6473004446326487, "grad_norm": 0.6317682517985719, "learning_rate": 2.9224712424087574e-06, "loss": 0.8444, "step": 7643 }, { "epoch": 0.6473851365657421, "grad_norm": 1.4568932060545428, "learning_rate": 2.921223694421248e-06, "loss": 0.6732, "step": 7644 }, { "epoch": 0.6474698284988355, "grad_norm": 1.4648566638655454, "learning_rate": 2.9199763028726623e-06, "loss": 0.6395, "step": 7645 }, { "epoch": 0.6475545204319288, "grad_norm": 0.6226694405270936, "learning_rate": 2.9187290678568757e-06, "loss": 0.8082, "step": 7646 }, { "epoch": 0.6476392123650222, "grad_norm": 0.676584888170008, "learning_rate": 2.9174819894677462e-06, "loss": 0.8207, "step": 7647 }, { "epoch": 0.6477239042981155, "grad_norm": 2.3819157767424173, "learning_rate": 2.9162350677991247e-06, "loss": 0.6323, "step": 7648 }, { "epoch": 0.647808596231209, "grad_norm": 1.7026646296183257, "learning_rate": 2.9149883029448476e-06, "loss": 0.6368, "step": 7649 }, { "epoch": 0.6478932881643024, "grad_norm": 1.472969395157314, "learning_rate": 2.9137416949987416e-06, "loss": 0.653, "step": 7650 }, { "epoch": 0.6479779800973957, "grad_norm": 1.2413127908589943, "learning_rate": 2.9124952440546207e-06, "loss": 0.5875, "step": 7651 }, { "epoch": 0.6480626720304891, "grad_norm": 1.2613280571674113, "learning_rate": 2.9112489502062886e-06, "loss": 0.6259, "step": 7652 }, { "epoch": 0.6481473639635825, "grad_norm": 2.1173859266024704, "learning_rate": 2.910002813547531e-06, "loss": 0.6936, "step": 7653 }, { "epoch": 0.6482320558966759, "grad_norm": 4.161996877991854, "learning_rate": 2.9087568341721306e-06, "loss": 0.6298, "step": 7654 }, { "epoch": 0.6483167478297692, "grad_norm": 1.4964582604416417, "learning_rate": 2.9075110121738487e-06, "loss": 0.6276, "step": 7655 }, { "epoch": 0.6484014397628626, "grad_norm": 1.4796789034672058, "learning_rate": 2.906265347646445e-06, "loss": 0.5766, "step": 7656 }, { "epoch": 0.6484861316959559, "grad_norm": 3.1541045850738394, "learning_rate": 2.905019840683663e-06, "loss": 0.611, "step": 7657 }, { "epoch": 0.6485708236290494, "grad_norm": 0.6403662333916079, "learning_rate": 2.903774491379229e-06, "loss": 0.8435, "step": 7658 }, { "epoch": 0.6486555155621427, "grad_norm": 1.3797461890854, "learning_rate": 2.902529299826865e-06, "loss": 0.6942, "step": 7659 }, { "epoch": 0.6487402074952361, "grad_norm": 0.6283832355800822, "learning_rate": 2.9012842661202795e-06, "loss": 0.8665, "step": 7660 }, { "epoch": 0.6488248994283294, "grad_norm": 1.550106564751958, "learning_rate": 2.900039390353164e-06, "loss": 0.6615, "step": 7661 }, { "epoch": 0.6489095913614228, "grad_norm": 1.5676474402613854, "learning_rate": 2.898794672619205e-06, "loss": 0.6178, "step": 7662 }, { "epoch": 0.6489942832945162, "grad_norm": 2.1263514156485934, "learning_rate": 2.8975501130120725e-06, "loss": 0.6033, "step": 7663 }, { "epoch": 0.6490789752276096, "grad_norm": 1.3576815242555527, "learning_rate": 2.8963057116254273e-06, "loss": 0.5901, "step": 7664 }, { "epoch": 0.649163667160703, "grad_norm": 1.1519370306875683, "learning_rate": 2.895061468552919e-06, "loss": 0.6756, "step": 7665 }, { "epoch": 0.6492483590937963, "grad_norm": 2.8741492688947683, "learning_rate": 2.893817383888179e-06, "loss": 0.633, "step": 7666 }, { "epoch": 0.6493330510268897, "grad_norm": 1.570302259236246, "learning_rate": 2.8925734577248343e-06, "loss": 0.6164, "step": 7667 }, { "epoch": 0.6494177429599831, "grad_norm": 0.6523483926767047, "learning_rate": 2.891329690156498e-06, "loss": 0.8037, "step": 7668 }, { "epoch": 0.6495024348930765, "grad_norm": 6.35669243841001, "learning_rate": 2.890086081276766e-06, "loss": 0.6213, "step": 7669 }, { "epoch": 0.6495871268261698, "grad_norm": 1.2893476281215794, "learning_rate": 2.8888426311792296e-06, "loss": 0.6919, "step": 7670 }, { "epoch": 0.6496718187592632, "grad_norm": 2.7238488179819837, "learning_rate": 2.8875993399574635e-06, "loss": 0.5872, "step": 7671 }, { "epoch": 0.6497565106923565, "grad_norm": 1.2860325490535645, "learning_rate": 2.8863562077050335e-06, "loss": 0.6468, "step": 7672 }, { "epoch": 0.64984120262545, "grad_norm": 0.7501966938468456, "learning_rate": 2.8851132345154925e-06, "loss": 0.8306, "step": 7673 }, { "epoch": 0.6499258945585433, "grad_norm": 1.5682823404790234, "learning_rate": 2.8838704204823775e-06, "loss": 0.6225, "step": 7674 }, { "epoch": 0.6500105864916367, "grad_norm": 1.1787341245898602, "learning_rate": 2.882627765699222e-06, "loss": 0.6503, "step": 7675 }, { "epoch": 0.65009527842473, "grad_norm": 1.4653396773132874, "learning_rate": 2.8813852702595336e-06, "loss": 0.6037, "step": 7676 }, { "epoch": 0.6501799703578234, "grad_norm": 0.6152513288463837, "learning_rate": 2.880142934256825e-06, "loss": 0.8339, "step": 7677 }, { "epoch": 0.6502646622909168, "grad_norm": 1.3460347356748834, "learning_rate": 2.8789007577845873e-06, "loss": 0.6167, "step": 7678 }, { "epoch": 0.6503493542240102, "grad_norm": 1.4327072291573744, "learning_rate": 2.8776587409362978e-06, "loss": 0.6118, "step": 7679 }, { "epoch": 0.6504340461571035, "grad_norm": 1.3887384009819608, "learning_rate": 2.8764168838054263e-06, "loss": 0.6272, "step": 7680 }, { "epoch": 0.6505187380901969, "grad_norm": 1.3541359832711681, "learning_rate": 2.8751751864854316e-06, "loss": 0.6252, "step": 7681 }, { "epoch": 0.6506034300232902, "grad_norm": 1.3486781586147396, "learning_rate": 2.873933649069753e-06, "loss": 0.5818, "step": 7682 }, { "epoch": 0.6506881219563837, "grad_norm": 1.423951869157101, "learning_rate": 2.8726922716518254e-06, "loss": 0.6798, "step": 7683 }, { "epoch": 0.650772813889477, "grad_norm": 1.3393109748820022, "learning_rate": 2.871451054325069e-06, "loss": 0.6114, "step": 7684 }, { "epoch": 0.6508575058225704, "grad_norm": 1.2488960354296605, "learning_rate": 2.8702099971828924e-06, "loss": 0.6294, "step": 7685 }, { "epoch": 0.6509421977556638, "grad_norm": 1.4282664088685844, "learning_rate": 2.8689691003186925e-06, "loss": 0.6591, "step": 7686 }, { "epoch": 0.6510268896887571, "grad_norm": 1.3150493733960793, "learning_rate": 2.86772836382585e-06, "loss": 0.6433, "step": 7687 }, { "epoch": 0.6511115816218506, "grad_norm": 1.8408082806702468, "learning_rate": 2.8664877877977406e-06, "loss": 0.6382, "step": 7688 }, { "epoch": 0.6511962735549439, "grad_norm": 1.5943988508836449, "learning_rate": 2.865247372327723e-06, "loss": 0.6291, "step": 7689 }, { "epoch": 0.6512809654880373, "grad_norm": 1.2722156235002358, "learning_rate": 2.8640071175091434e-06, "loss": 0.6251, "step": 7690 }, { "epoch": 0.6513656574211306, "grad_norm": 1.3107403961440591, "learning_rate": 2.8627670234353388e-06, "loss": 0.6225, "step": 7691 }, { "epoch": 0.651450349354224, "grad_norm": 1.4001025315006812, "learning_rate": 2.861527090199633e-06, "loss": 0.5957, "step": 7692 }, { "epoch": 0.6515350412873174, "grad_norm": 3.774399746809703, "learning_rate": 2.860287317895337e-06, "loss": 0.6185, "step": 7693 }, { "epoch": 0.6516197332204108, "grad_norm": 1.910422995478527, "learning_rate": 2.859047706615753e-06, "loss": 0.6966, "step": 7694 }, { "epoch": 0.6517044251535041, "grad_norm": 0.5645104985992755, "learning_rate": 2.8578082564541637e-06, "loss": 0.7993, "step": 7695 }, { "epoch": 0.6517891170865975, "grad_norm": 1.5461238724216118, "learning_rate": 2.8565689675038466e-06, "loss": 0.6744, "step": 7696 }, { "epoch": 0.6518738090196908, "grad_norm": 1.299538774133733, "learning_rate": 2.8553298398580655e-06, "loss": 0.5578, "step": 7697 }, { "epoch": 0.6519585009527843, "grad_norm": 1.0711666634890604, "learning_rate": 2.8540908736100693e-06, "loss": 0.6539, "step": 7698 }, { "epoch": 0.6520431928858776, "grad_norm": 2.4880717231996243, "learning_rate": 2.8528520688531003e-06, "loss": 0.6561, "step": 7699 }, { "epoch": 0.652127884818971, "grad_norm": 1.7590142074233683, "learning_rate": 2.851613425680381e-06, "loss": 0.6456, "step": 7700 }, { "epoch": 0.6522125767520643, "grad_norm": 4.942365445048342, "learning_rate": 2.850374944185128e-06, "loss": 0.6102, "step": 7701 }, { "epoch": 0.6522972686851577, "grad_norm": 1.2248339353248667, "learning_rate": 2.8491366244605444e-06, "loss": 0.6255, "step": 7702 }, { "epoch": 0.6523819606182512, "grad_norm": 1.4602834521981256, "learning_rate": 2.8478984665998175e-06, "loss": 0.5881, "step": 7703 }, { "epoch": 0.6524666525513445, "grad_norm": 1.152458327186912, "learning_rate": 2.8466604706961274e-06, "loss": 0.6448, "step": 7704 }, { "epoch": 0.6525513444844379, "grad_norm": 1.4607480165787587, "learning_rate": 2.8454226368426397e-06, "loss": 0.6006, "step": 7705 }, { "epoch": 0.6526360364175312, "grad_norm": 1.5645658867757681, "learning_rate": 2.8441849651325067e-06, "loss": 0.6203, "step": 7706 }, { "epoch": 0.6527207283506246, "grad_norm": 1.1270967035914994, "learning_rate": 2.8429474556588733e-06, "loss": 0.6463, "step": 7707 }, { "epoch": 0.652805420283718, "grad_norm": 2.3683992000232155, "learning_rate": 2.8417101085148635e-06, "loss": 0.6039, "step": 7708 }, { "epoch": 0.6528901122168114, "grad_norm": 1.5321819384140518, "learning_rate": 2.840472923793597e-06, "loss": 0.658, "step": 7709 }, { "epoch": 0.6529748041499047, "grad_norm": 1.3317384859137236, "learning_rate": 2.83923590158818e-06, "loss": 0.5767, "step": 7710 }, { "epoch": 0.6530594960829981, "grad_norm": 1.1649831274503741, "learning_rate": 2.8379990419916994e-06, "loss": 0.6178, "step": 7711 }, { "epoch": 0.6531441880160914, "grad_norm": 0.683833251139906, "learning_rate": 2.8367623450972425e-06, "loss": 0.9207, "step": 7712 }, { "epoch": 0.6532288799491849, "grad_norm": 1.4453008050783722, "learning_rate": 2.835525810997872e-06, "loss": 0.6095, "step": 7713 }, { "epoch": 0.6533135718822782, "grad_norm": 1.255695482997849, "learning_rate": 2.834289439786647e-06, "loss": 0.6252, "step": 7714 }, { "epoch": 0.6533982638153716, "grad_norm": 1.4330371735118108, "learning_rate": 2.8330532315566106e-06, "loss": 0.6664, "step": 7715 }, { "epoch": 0.6534829557484649, "grad_norm": 0.6380194993536329, "learning_rate": 2.8318171864007914e-06, "loss": 0.8142, "step": 7716 }, { "epoch": 0.6535676476815583, "grad_norm": 1.2941254111227476, "learning_rate": 2.83058130441221e-06, "loss": 0.6167, "step": 7717 }, { "epoch": 0.6536523396146517, "grad_norm": 0.669947318130113, "learning_rate": 2.829345585683873e-06, "loss": 0.8332, "step": 7718 }, { "epoch": 0.6537370315477451, "grad_norm": 1.213696320390278, "learning_rate": 2.828110030308775e-06, "loss": 0.5708, "step": 7719 }, { "epoch": 0.6538217234808384, "grad_norm": 1.4969697632222885, "learning_rate": 2.8268746383798995e-06, "loss": 0.5839, "step": 7720 }, { "epoch": 0.6539064154139318, "grad_norm": 1.4147966265411, "learning_rate": 2.825639409990213e-06, "loss": 0.6856, "step": 7721 }, { "epoch": 0.6539911073470251, "grad_norm": 1.1448269972580996, "learning_rate": 2.824404345232675e-06, "loss": 0.6105, "step": 7722 }, { "epoch": 0.6540757992801186, "grad_norm": 1.4650009463076545, "learning_rate": 2.823169444200232e-06, "loss": 0.6308, "step": 7723 }, { "epoch": 0.654160491213212, "grad_norm": 1.7638634330508438, "learning_rate": 2.821934706985813e-06, "loss": 0.627, "step": 7724 }, { "epoch": 0.6542451831463053, "grad_norm": 0.6132834988610909, "learning_rate": 2.820700133682341e-06, "loss": 0.8299, "step": 7725 }, { "epoch": 0.6543298750793987, "grad_norm": 1.3015686563673172, "learning_rate": 2.8194657243827234e-06, "loss": 0.6616, "step": 7726 }, { "epoch": 0.654414567012492, "grad_norm": 1.4173879141849703, "learning_rate": 2.818231479179857e-06, "loss": 0.6046, "step": 7727 }, { "epoch": 0.6544992589455855, "grad_norm": 0.7223448997857368, "learning_rate": 2.8169973981666266e-06, "loss": 0.8582, "step": 7728 }, { "epoch": 0.6545839508786788, "grad_norm": 1.184495489502455, "learning_rate": 2.8157634814359e-06, "loss": 0.6129, "step": 7729 }, { "epoch": 0.6546686428117722, "grad_norm": 1.2781825116900651, "learning_rate": 2.814529729080537e-06, "loss": 0.6229, "step": 7730 }, { "epoch": 0.6547533347448655, "grad_norm": 1.3453496602182515, "learning_rate": 2.8132961411933845e-06, "loss": 0.6307, "step": 7731 }, { "epoch": 0.6548380266779589, "grad_norm": 1.5008526976042555, "learning_rate": 2.8120627178672765e-06, "loss": 0.66, "step": 7732 }, { "epoch": 0.6549227186110523, "grad_norm": 1.3757827766497719, "learning_rate": 2.8108294591950345e-06, "loss": 0.6286, "step": 7733 }, { "epoch": 0.6550074105441457, "grad_norm": 1.289523810159584, "learning_rate": 2.8095963652694704e-06, "loss": 0.6664, "step": 7734 }, { "epoch": 0.655092102477239, "grad_norm": 1.209593346006497, "learning_rate": 2.8083634361833767e-06, "loss": 0.6383, "step": 7735 }, { "epoch": 0.6551767944103324, "grad_norm": 1.3472238094377198, "learning_rate": 2.807130672029541e-06, "loss": 0.6826, "step": 7736 }, { "epoch": 0.6552614863434257, "grad_norm": 1.4433589532506994, "learning_rate": 2.805898072900732e-06, "loss": 0.7076, "step": 7737 }, { "epoch": 0.6553461782765192, "grad_norm": 1.4475775037487189, "learning_rate": 2.804665638889712e-06, "loss": 0.6942, "step": 7738 }, { "epoch": 0.6554308702096125, "grad_norm": 1.099861405028902, "learning_rate": 2.8034333700892276e-06, "loss": 0.5932, "step": 7739 }, { "epoch": 0.6555155621427059, "grad_norm": 0.6294581224710882, "learning_rate": 2.8022012665920127e-06, "loss": 0.8526, "step": 7740 }, { "epoch": 0.6556002540757992, "grad_norm": 0.5552833192187997, "learning_rate": 2.800969328490793e-06, "loss": 0.8506, "step": 7741 }, { "epoch": 0.6556849460088926, "grad_norm": 1.8904860160482015, "learning_rate": 2.7997375558782737e-06, "loss": 0.6286, "step": 7742 }, { "epoch": 0.6557696379419861, "grad_norm": 1.4209821721797424, "learning_rate": 2.798505948847154e-06, "loss": 0.6479, "step": 7743 }, { "epoch": 0.6558543298750794, "grad_norm": 1.4534428544621463, "learning_rate": 2.797274507490121e-06, "loss": 0.6223, "step": 7744 }, { "epoch": 0.6559390218081728, "grad_norm": 0.6122605887773293, "learning_rate": 2.7960432318998436e-06, "loss": 0.8799, "step": 7745 }, { "epoch": 0.6560237137412661, "grad_norm": 1.163152008954773, "learning_rate": 2.794812122168982e-06, "loss": 0.6484, "step": 7746 }, { "epoch": 0.6561084056743595, "grad_norm": 1.3741268505054196, "learning_rate": 2.7935811783901878e-06, "loss": 0.6667, "step": 7747 }, { "epoch": 0.6561930976074529, "grad_norm": 1.6136126808263513, "learning_rate": 2.7923504006560925e-06, "loss": 0.6455, "step": 7748 }, { "epoch": 0.6562777895405463, "grad_norm": 0.6226330053513461, "learning_rate": 2.791119789059321e-06, "loss": 0.7992, "step": 7749 }, { "epoch": 0.6563624814736396, "grad_norm": 0.6326461822082966, "learning_rate": 2.7898893436924814e-06, "loss": 0.8417, "step": 7750 }, { "epoch": 0.656447173406733, "grad_norm": 1.5779908673710037, "learning_rate": 2.78865906464817e-06, "loss": 0.6518, "step": 7751 }, { "epoch": 0.6565318653398264, "grad_norm": 1.8782345993496254, "learning_rate": 2.7874289520189746e-06, "loss": 0.6463, "step": 7752 }, { "epoch": 0.6566165572729198, "grad_norm": 1.5461651216175407, "learning_rate": 2.7861990058974663e-06, "loss": 0.6446, "step": 7753 }, { "epoch": 0.6567012492060131, "grad_norm": 1.4137744338056706, "learning_rate": 2.784969226376206e-06, "loss": 0.5389, "step": 7754 }, { "epoch": 0.6567859411391065, "grad_norm": 1.4749541040460938, "learning_rate": 2.7837396135477416e-06, "loss": 0.6305, "step": 7755 }, { "epoch": 0.6568706330721998, "grad_norm": 1.3652456106904494, "learning_rate": 2.7825101675046057e-06, "loss": 0.6632, "step": 7756 }, { "epoch": 0.6569553250052933, "grad_norm": 1.3438899605310675, "learning_rate": 2.781280888339324e-06, "loss": 0.6341, "step": 7757 }, { "epoch": 0.6570400169383867, "grad_norm": 1.2649241590982034, "learning_rate": 2.780051776144401e-06, "loss": 0.6114, "step": 7758 }, { "epoch": 0.65712470887148, "grad_norm": 1.4201733872361835, "learning_rate": 2.7788228310123378e-06, "loss": 0.6879, "step": 7759 }, { "epoch": 0.6572094008045734, "grad_norm": 1.2245240643719493, "learning_rate": 2.7775940530356184e-06, "loss": 0.5946, "step": 7760 }, { "epoch": 0.6572940927376667, "grad_norm": 1.805360136019904, "learning_rate": 2.7763654423067144e-06, "loss": 0.6392, "step": 7761 }, { "epoch": 0.6573787846707602, "grad_norm": 2.0804819608133323, "learning_rate": 2.7751369989180855e-06, "loss": 0.6263, "step": 7762 }, { "epoch": 0.6574634766038535, "grad_norm": 1.5703876965017793, "learning_rate": 2.7739087229621806e-06, "loss": 0.6485, "step": 7763 }, { "epoch": 0.6575481685369469, "grad_norm": 2.4250475585691365, "learning_rate": 2.772680614531431e-06, "loss": 0.6339, "step": 7764 }, { "epoch": 0.6576328604700402, "grad_norm": 1.6584901176434086, "learning_rate": 2.77145267371826e-06, "loss": 0.5741, "step": 7765 }, { "epoch": 0.6577175524031336, "grad_norm": 1.354008122831412, "learning_rate": 2.770224900615075e-06, "loss": 0.6581, "step": 7766 }, { "epoch": 0.657802244336227, "grad_norm": 1.3379068224097854, "learning_rate": 2.768997295314271e-06, "loss": 0.6328, "step": 7767 }, { "epoch": 0.6578869362693204, "grad_norm": 1.3546200962298343, "learning_rate": 2.7677698579082385e-06, "loss": 0.6407, "step": 7768 }, { "epoch": 0.6579716282024137, "grad_norm": 1.296943183710742, "learning_rate": 2.766542588489342e-06, "loss": 0.6475, "step": 7769 }, { "epoch": 0.6580563201355071, "grad_norm": 1.6335171911821726, "learning_rate": 2.7653154871499434e-06, "loss": 0.571, "step": 7770 }, { "epoch": 0.6581410120686004, "grad_norm": 1.4458364455039712, "learning_rate": 2.764088553982388e-06, "loss": 0.5958, "step": 7771 }, { "epoch": 0.6582257040016939, "grad_norm": 1.7733662154533842, "learning_rate": 2.762861789079008e-06, "loss": 0.62, "step": 7772 }, { "epoch": 0.6583103959347872, "grad_norm": 0.6443838115010341, "learning_rate": 2.761635192532124e-06, "loss": 0.8572, "step": 7773 }, { "epoch": 0.6583950878678806, "grad_norm": 1.5837397336573138, "learning_rate": 2.7604087644340446e-06, "loss": 0.6695, "step": 7774 }, { "epoch": 0.6584797798009739, "grad_norm": 1.4907393460330471, "learning_rate": 2.7591825048770648e-06, "loss": 0.6714, "step": 7775 }, { "epoch": 0.6585644717340673, "grad_norm": 1.4827433460858472, "learning_rate": 2.7579564139534693e-06, "loss": 0.6128, "step": 7776 }, { "epoch": 0.6586491636671608, "grad_norm": 1.344227487228767, "learning_rate": 2.7567304917555237e-06, "loss": 0.5978, "step": 7777 }, { "epoch": 0.6587338556002541, "grad_norm": 1.2510562466727129, "learning_rate": 2.7555047383754894e-06, "loss": 0.6, "step": 7778 }, { "epoch": 0.6588185475333475, "grad_norm": 1.8183676606069474, "learning_rate": 2.7542791539056067e-06, "loss": 0.5946, "step": 7779 }, { "epoch": 0.6589032394664408, "grad_norm": 1.4833317137006652, "learning_rate": 2.753053738438109e-06, "loss": 0.6385, "step": 7780 }, { "epoch": 0.6589879313995342, "grad_norm": 1.4855421533959847, "learning_rate": 2.751828492065216e-06, "loss": 0.6438, "step": 7781 }, { "epoch": 0.6590726233326276, "grad_norm": 1.7667027175164505, "learning_rate": 2.7506034148791332e-06, "loss": 0.5747, "step": 7782 }, { "epoch": 0.659157315265721, "grad_norm": 1.328257621612882, "learning_rate": 2.7493785069720546e-06, "loss": 0.6007, "step": 7783 }, { "epoch": 0.6592420071988143, "grad_norm": 1.3142356171722733, "learning_rate": 2.7481537684361637e-06, "loss": 0.6157, "step": 7784 }, { "epoch": 0.6593266991319077, "grad_norm": 1.8101235585236457, "learning_rate": 2.7469291993636233e-06, "loss": 0.5927, "step": 7785 }, { "epoch": 0.659411391065001, "grad_norm": 1.4306134085996889, "learning_rate": 2.7457047998465937e-06, "loss": 0.6159, "step": 7786 }, { "epoch": 0.6594960829980945, "grad_norm": 1.402645242265908, "learning_rate": 2.7444805699772103e-06, "loss": 0.5875, "step": 7787 }, { "epoch": 0.6595807749311878, "grad_norm": 1.772566782624115, "learning_rate": 2.7432565098476095e-06, "loss": 0.7048, "step": 7788 }, { "epoch": 0.6596654668642812, "grad_norm": 1.7882436298690911, "learning_rate": 2.7420326195499086e-06, "loss": 0.6326, "step": 7789 }, { "epoch": 0.6597501587973745, "grad_norm": 2.705225676050381, "learning_rate": 2.7408088991762073e-06, "loss": 0.568, "step": 7790 }, { "epoch": 0.6598348507304679, "grad_norm": 1.1337224003817747, "learning_rate": 2.7395853488185995e-06, "loss": 0.6507, "step": 7791 }, { "epoch": 0.6599195426635613, "grad_norm": 1.3165783991383644, "learning_rate": 2.7383619685691663e-06, "loss": 0.612, "step": 7792 }, { "epoch": 0.6600042345966547, "grad_norm": 1.4157572607224904, "learning_rate": 2.7371387585199683e-06, "loss": 0.5975, "step": 7793 }, { "epoch": 0.660088926529748, "grad_norm": 1.2546415452412525, "learning_rate": 2.7359157187630615e-06, "loss": 0.6868, "step": 7794 }, { "epoch": 0.6601736184628414, "grad_norm": 1.5953148704031923, "learning_rate": 2.734692849390485e-06, "loss": 0.6727, "step": 7795 }, { "epoch": 0.6602583103959347, "grad_norm": 1.4273866546054357, "learning_rate": 2.7334701504942675e-06, "loss": 0.6033, "step": 7796 }, { "epoch": 0.6603430023290282, "grad_norm": 1.3577874811723385, "learning_rate": 2.732247622166425e-06, "loss": 0.6491, "step": 7797 }, { "epoch": 0.6604276942621216, "grad_norm": 1.342198397145671, "learning_rate": 2.7310252644989553e-06, "loss": 0.5775, "step": 7798 }, { "epoch": 0.6605123861952149, "grad_norm": 1.5597133945374777, "learning_rate": 2.729803077583849e-06, "loss": 0.6192, "step": 7799 }, { "epoch": 0.6605970781283083, "grad_norm": 1.4025794653768118, "learning_rate": 2.728581061513085e-06, "loss": 0.6207, "step": 7800 }, { "epoch": 0.6606817700614016, "grad_norm": 1.8912430523299666, "learning_rate": 2.727359216378621e-06, "loss": 0.6763, "step": 7801 }, { "epoch": 0.6607664619944951, "grad_norm": 1.7608963878814146, "learning_rate": 2.7261375422724105e-06, "loss": 0.6198, "step": 7802 }, { "epoch": 0.6608511539275884, "grad_norm": 1.369723970361834, "learning_rate": 2.7249160392863905e-06, "loss": 0.634, "step": 7803 }, { "epoch": 0.6609358458606818, "grad_norm": 0.5956051637591407, "learning_rate": 2.7236947075124865e-06, "loss": 0.8506, "step": 7804 }, { "epoch": 0.6610205377937751, "grad_norm": 0.5629737163073638, "learning_rate": 2.72247354704261e-06, "loss": 0.8422, "step": 7805 }, { "epoch": 0.6611052297268685, "grad_norm": 1.9648389793637526, "learning_rate": 2.7212525579686583e-06, "loss": 0.6605, "step": 7806 }, { "epoch": 0.6611899216599619, "grad_norm": 0.6488140754800401, "learning_rate": 2.7200317403825194e-06, "loss": 0.8999, "step": 7807 }, { "epoch": 0.6612746135930553, "grad_norm": 1.3697533758160032, "learning_rate": 2.7188110943760614e-06, "loss": 0.5908, "step": 7808 }, { "epoch": 0.6613593055261486, "grad_norm": 1.4096326077993648, "learning_rate": 2.71759062004115e-06, "loss": 0.6495, "step": 7809 }, { "epoch": 0.661443997459242, "grad_norm": 0.647598738888254, "learning_rate": 2.716370317469632e-06, "loss": 0.8438, "step": 7810 }, { "epoch": 0.6615286893923353, "grad_norm": 1.334904421213558, "learning_rate": 2.715150186753339e-06, "loss": 0.5781, "step": 7811 }, { "epoch": 0.6616133813254288, "grad_norm": 1.1385034474343045, "learning_rate": 2.713930227984093e-06, "loss": 0.5642, "step": 7812 }, { "epoch": 0.6616980732585221, "grad_norm": 1.4546382522853305, "learning_rate": 2.712710441253704e-06, "loss": 0.6702, "step": 7813 }, { "epoch": 0.6617827651916155, "grad_norm": 1.3008273879201266, "learning_rate": 2.7114908266539642e-06, "loss": 0.6159, "step": 7814 }, { "epoch": 0.6618674571247088, "grad_norm": 1.9866484112676037, "learning_rate": 2.710271384276658e-06, "loss": 0.6649, "step": 7815 }, { "epoch": 0.6619521490578022, "grad_norm": 1.9750148939603756, "learning_rate": 2.709052114213555e-06, "loss": 0.5953, "step": 7816 }, { "epoch": 0.6620368409908957, "grad_norm": 2.084805468967209, "learning_rate": 2.7078330165564113e-06, "loss": 0.6558, "step": 7817 }, { "epoch": 0.662121532923989, "grad_norm": 1.5103970603408554, "learning_rate": 2.706614091396973e-06, "loss": 0.6354, "step": 7818 }, { "epoch": 0.6622062248570824, "grad_norm": 1.3982940045700554, "learning_rate": 2.705395338826966e-06, "loss": 0.6371, "step": 7819 }, { "epoch": 0.6622909167901757, "grad_norm": 1.2655073217435986, "learning_rate": 2.7041767589381106e-06, "loss": 0.6099, "step": 7820 }, { "epoch": 0.6623756087232691, "grad_norm": 1.2595756187230058, "learning_rate": 2.7029583518221137e-06, "loss": 0.6312, "step": 7821 }, { "epoch": 0.6624603006563625, "grad_norm": 1.4606960810771528, "learning_rate": 2.7017401175706614e-06, "loss": 0.5935, "step": 7822 }, { "epoch": 0.6625449925894559, "grad_norm": 1.8697036233588848, "learning_rate": 2.7005220562754354e-06, "loss": 0.6129, "step": 7823 }, { "epoch": 0.6626296845225492, "grad_norm": 1.6864422610436836, "learning_rate": 2.6993041680281008e-06, "loss": 0.6649, "step": 7824 }, { "epoch": 0.6627143764556426, "grad_norm": 1.640594681014574, "learning_rate": 2.69808645292031e-06, "loss": 0.6381, "step": 7825 }, { "epoch": 0.6627990683887359, "grad_norm": 1.8925561341606711, "learning_rate": 2.696868911043705e-06, "loss": 0.6788, "step": 7826 }, { "epoch": 0.6628837603218294, "grad_norm": 1.6387221084199157, "learning_rate": 2.6956515424899082e-06, "loss": 0.6368, "step": 7827 }, { "epoch": 0.6629684522549227, "grad_norm": 1.6920462898806559, "learning_rate": 2.694434347350535e-06, "loss": 0.6061, "step": 7828 }, { "epoch": 0.6630531441880161, "grad_norm": 1.5029002632231871, "learning_rate": 2.6932173257171857e-06, "loss": 0.6233, "step": 7829 }, { "epoch": 0.6631378361211094, "grad_norm": 1.41965626134976, "learning_rate": 2.692000477681448e-06, "loss": 0.6037, "step": 7830 }, { "epoch": 0.6632225280542028, "grad_norm": 1.4764121556640029, "learning_rate": 2.6907838033348973e-06, "loss": 0.6207, "step": 7831 }, { "epoch": 0.6633072199872962, "grad_norm": 0.5887306424362048, "learning_rate": 2.689567302769091e-06, "loss": 0.8553, "step": 7832 }, { "epoch": 0.6633919119203896, "grad_norm": 1.4196871078842694, "learning_rate": 2.68835097607558e-06, "loss": 0.6446, "step": 7833 }, { "epoch": 0.663476603853483, "grad_norm": 1.9591085838958302, "learning_rate": 2.6871348233459006e-06, "loss": 0.6341, "step": 7834 }, { "epoch": 0.6635612957865763, "grad_norm": 1.7411345352720684, "learning_rate": 2.685918844671571e-06, "loss": 0.6314, "step": 7835 }, { "epoch": 0.6636459877196696, "grad_norm": 1.4186206957624778, "learning_rate": 2.6847030401441022e-06, "loss": 0.6447, "step": 7836 }, { "epoch": 0.6637306796527631, "grad_norm": 0.6281625915644313, "learning_rate": 2.6834874098549897e-06, "loss": 0.8569, "step": 7837 }, { "epoch": 0.6638153715858565, "grad_norm": 0.6394342915887806, "learning_rate": 2.682271953895716e-06, "loss": 0.8118, "step": 7838 }, { "epoch": 0.6639000635189498, "grad_norm": 1.5102101428566672, "learning_rate": 2.6810566723577524e-06, "loss": 0.6062, "step": 7839 }, { "epoch": 0.6639847554520432, "grad_norm": 1.5352246918205965, "learning_rate": 2.6798415653325515e-06, "loss": 0.5815, "step": 7840 }, { "epoch": 0.6640694473851365, "grad_norm": 1.4370536491349788, "learning_rate": 2.6786266329115596e-06, "loss": 0.5973, "step": 7841 }, { "epoch": 0.66415413931823, "grad_norm": 1.273104529131336, "learning_rate": 2.677411875186207e-06, "loss": 0.5976, "step": 7842 }, { "epoch": 0.6642388312513233, "grad_norm": 1.5930915201316926, "learning_rate": 2.6761972922479056e-06, "loss": 0.6356, "step": 7843 }, { "epoch": 0.6643235231844167, "grad_norm": 1.499264910749258, "learning_rate": 2.6749828841880675e-06, "loss": 0.6158, "step": 7844 }, { "epoch": 0.66440821511751, "grad_norm": 2.0400959197346116, "learning_rate": 2.6737686510980763e-06, "loss": 0.6397, "step": 7845 }, { "epoch": 0.6644929070506034, "grad_norm": 0.619330803626439, "learning_rate": 2.6725545930693127e-06, "loss": 0.8534, "step": 7846 }, { "epoch": 0.6645775989836968, "grad_norm": 1.3476100932137078, "learning_rate": 2.671340710193142e-06, "loss": 0.6118, "step": 7847 }, { "epoch": 0.6646622909167902, "grad_norm": 2.553042152431685, "learning_rate": 2.6701270025609115e-06, "loss": 0.6269, "step": 7848 }, { "epoch": 0.6647469828498835, "grad_norm": 1.318461384623733, "learning_rate": 2.6689134702639616e-06, "loss": 0.6413, "step": 7849 }, { "epoch": 0.6648316747829769, "grad_norm": 1.2829881607153546, "learning_rate": 2.6677001133936164e-06, "loss": 0.6085, "step": 7850 }, { "epoch": 0.6649163667160702, "grad_norm": 1.2567177506658256, "learning_rate": 2.6664869320411885e-06, "loss": 0.6167, "step": 7851 }, { "epoch": 0.6650010586491637, "grad_norm": 1.440636901054142, "learning_rate": 2.665273926297977e-06, "loss": 0.6738, "step": 7852 }, { "epoch": 0.665085750582257, "grad_norm": 1.766848494748378, "learning_rate": 2.664061096255264e-06, "loss": 0.6209, "step": 7853 }, { "epoch": 0.6651704425153504, "grad_norm": 2.0157506961760445, "learning_rate": 2.6628484420043223e-06, "loss": 0.6299, "step": 7854 }, { "epoch": 0.6652551344484438, "grad_norm": 0.6202197062658467, "learning_rate": 2.661635963636413e-06, "loss": 0.8273, "step": 7855 }, { "epoch": 0.6653398263815372, "grad_norm": 3.14128796965656, "learning_rate": 2.660423661242778e-06, "loss": 0.6491, "step": 7856 }, { "epoch": 0.6654245183146306, "grad_norm": 1.6549778452913522, "learning_rate": 2.659211534914651e-06, "loss": 0.5968, "step": 7857 }, { "epoch": 0.6655092102477239, "grad_norm": 1.4288371489467682, "learning_rate": 2.6579995847432515e-06, "loss": 0.6413, "step": 7858 }, { "epoch": 0.6655939021808173, "grad_norm": 1.3252348879986096, "learning_rate": 2.656787810819784e-06, "loss": 0.6417, "step": 7859 }, { "epoch": 0.6656785941139106, "grad_norm": 3.4060562767982328, "learning_rate": 2.6555762132354447e-06, "loss": 0.6381, "step": 7860 }, { "epoch": 0.6657632860470041, "grad_norm": 1.575520914348646, "learning_rate": 2.6543647920814068e-06, "loss": 0.6552, "step": 7861 }, { "epoch": 0.6658479779800974, "grad_norm": 2.6498654286570655, "learning_rate": 2.6531535474488394e-06, "loss": 0.6212, "step": 7862 }, { "epoch": 0.6659326699131908, "grad_norm": 1.193628548124633, "learning_rate": 2.6519424794288943e-06, "loss": 0.6203, "step": 7863 }, { "epoch": 0.6660173618462841, "grad_norm": 1.4402254388958506, "learning_rate": 2.6507315881127114e-06, "loss": 0.5926, "step": 7864 }, { "epoch": 0.6661020537793775, "grad_norm": 1.2655132985189321, "learning_rate": 2.649520873591417e-06, "loss": 0.6453, "step": 7865 }, { "epoch": 0.6661867457124709, "grad_norm": 1.1782981130299837, "learning_rate": 2.6483103359561245e-06, "loss": 0.6653, "step": 7866 }, { "epoch": 0.6662714376455643, "grad_norm": 1.4704812617910736, "learning_rate": 2.6470999752979303e-06, "loss": 0.6418, "step": 7867 }, { "epoch": 0.6663561295786576, "grad_norm": 1.3612846921309862, "learning_rate": 2.645889791707924e-06, "loss": 0.6417, "step": 7868 }, { "epoch": 0.666440821511751, "grad_norm": 1.5925291339422274, "learning_rate": 2.6446797852771743e-06, "loss": 0.5726, "step": 7869 }, { "epoch": 0.6665255134448443, "grad_norm": 1.4433894656708364, "learning_rate": 2.6434699560967435e-06, "loss": 0.6471, "step": 7870 }, { "epoch": 0.6666102053779378, "grad_norm": 1.536365702434797, "learning_rate": 2.642260304257677e-06, "loss": 0.6032, "step": 7871 }, { "epoch": 0.6666948973110312, "grad_norm": 1.364410205934607, "learning_rate": 2.641050829851006e-06, "loss": 0.6392, "step": 7872 }, { "epoch": 0.6667795892441245, "grad_norm": 1.191722750426785, "learning_rate": 2.6398415329677525e-06, "loss": 0.6498, "step": 7873 }, { "epoch": 0.6668642811772179, "grad_norm": 1.327502232389375, "learning_rate": 2.6386324136989226e-06, "loss": 0.6357, "step": 7874 }, { "epoch": 0.6669489731103112, "grad_norm": 1.3055518884939132, "learning_rate": 2.637423472135506e-06, "loss": 0.6315, "step": 7875 }, { "epoch": 0.6670336650434047, "grad_norm": 2.4327711579769313, "learning_rate": 2.6362147083684854e-06, "loss": 0.6826, "step": 7876 }, { "epoch": 0.667118356976498, "grad_norm": 1.2257175351395568, "learning_rate": 2.6350061224888233e-06, "loss": 0.632, "step": 7877 }, { "epoch": 0.6672030489095914, "grad_norm": 1.2081409084771977, "learning_rate": 2.6337977145874716e-06, "loss": 0.5949, "step": 7878 }, { "epoch": 0.6672877408426847, "grad_norm": 1.3677651014156365, "learning_rate": 2.6325894847553746e-06, "loss": 0.6113, "step": 7879 }, { "epoch": 0.6673724327757781, "grad_norm": 1.8038285921827149, "learning_rate": 2.631381433083454e-06, "loss": 0.6441, "step": 7880 }, { "epoch": 0.6674571247088715, "grad_norm": 1.503666746694376, "learning_rate": 2.6301735596626243e-06, "loss": 0.6633, "step": 7881 }, { "epoch": 0.6675418166419649, "grad_norm": 1.3866770652901539, "learning_rate": 2.628965864583781e-06, "loss": 0.622, "step": 7882 }, { "epoch": 0.6676265085750582, "grad_norm": 1.2067631964793046, "learning_rate": 2.6277583479378123e-06, "loss": 0.6148, "step": 7883 }, { "epoch": 0.6677112005081516, "grad_norm": 1.6337205029653985, "learning_rate": 2.626551009815589e-06, "loss": 0.6161, "step": 7884 }, { "epoch": 0.6677958924412449, "grad_norm": 1.217687974154866, "learning_rate": 2.6253438503079707e-06, "loss": 0.6622, "step": 7885 }, { "epoch": 0.6678805843743384, "grad_norm": 1.4525533224707903, "learning_rate": 2.6241368695058017e-06, "loss": 0.5953, "step": 7886 }, { "epoch": 0.6679652763074317, "grad_norm": 1.4254099189443872, "learning_rate": 2.6229300674999157e-06, "loss": 0.6157, "step": 7887 }, { "epoch": 0.6680499682405251, "grad_norm": 1.3745267652197066, "learning_rate": 2.6217234443811277e-06, "loss": 0.6168, "step": 7888 }, { "epoch": 0.6681346601736184, "grad_norm": 0.567799697638391, "learning_rate": 2.6205170002402465e-06, "loss": 0.825, "step": 7889 }, { "epoch": 0.6682193521067118, "grad_norm": 1.4589612518949981, "learning_rate": 2.6193107351680587e-06, "loss": 0.5908, "step": 7890 }, { "epoch": 0.6683040440398053, "grad_norm": 1.4185681572437705, "learning_rate": 2.6181046492553442e-06, "loss": 0.6034, "step": 7891 }, { "epoch": 0.6683887359728986, "grad_norm": 1.4431472819372086, "learning_rate": 2.6168987425928678e-06, "loss": 0.6226, "step": 7892 }, { "epoch": 0.668473427905992, "grad_norm": 1.3202432166770355, "learning_rate": 2.61569301527138e-06, "loss": 0.6182, "step": 7893 }, { "epoch": 0.6685581198390853, "grad_norm": 0.64099668322441, "learning_rate": 2.6144874673816185e-06, "loss": 0.8312, "step": 7894 }, { "epoch": 0.6686428117721787, "grad_norm": 1.5768722178530488, "learning_rate": 2.613282099014308e-06, "loss": 0.6323, "step": 7895 }, { "epoch": 0.6687275037052721, "grad_norm": 1.1930407086113288, "learning_rate": 2.612076910260157e-06, "loss": 0.627, "step": 7896 }, { "epoch": 0.6688121956383655, "grad_norm": 1.7146995049274565, "learning_rate": 2.610871901209865e-06, "loss": 0.636, "step": 7897 }, { "epoch": 0.6688968875714588, "grad_norm": 1.3680549364978676, "learning_rate": 2.6096670719541113e-06, "loss": 0.6159, "step": 7898 }, { "epoch": 0.6689815795045522, "grad_norm": 1.2283891373729956, "learning_rate": 2.608462422583566e-06, "loss": 0.5959, "step": 7899 }, { "epoch": 0.6690662714376455, "grad_norm": 1.5468287376090788, "learning_rate": 2.6072579531888907e-06, "loss": 0.5333, "step": 7900 }, { "epoch": 0.669150963370739, "grad_norm": 0.662935407164117, "learning_rate": 2.6060536638607228e-06, "loss": 0.8897, "step": 7901 }, { "epoch": 0.6692356553038323, "grad_norm": 1.69591443448455, "learning_rate": 2.6048495546896936e-06, "loss": 0.6627, "step": 7902 }, { "epoch": 0.6693203472369257, "grad_norm": 1.4530828533971096, "learning_rate": 2.60364562576642e-06, "loss": 0.6513, "step": 7903 }, { "epoch": 0.669405039170019, "grad_norm": 1.6971488942014838, "learning_rate": 2.6024418771815e-06, "loss": 0.619, "step": 7904 }, { "epoch": 0.6694897311031124, "grad_norm": 1.290074601178406, "learning_rate": 2.601238309025525e-06, "loss": 0.5957, "step": 7905 }, { "epoch": 0.6695744230362058, "grad_norm": 1.237383797587926, "learning_rate": 2.600034921389069e-06, "loss": 0.5887, "step": 7906 }, { "epoch": 0.6696591149692992, "grad_norm": 1.741119942121671, "learning_rate": 2.598831714362694e-06, "loss": 0.5813, "step": 7907 }, { "epoch": 0.6697438069023925, "grad_norm": 1.2321374739329625, "learning_rate": 2.597628688036949e-06, "loss": 0.6278, "step": 7908 }, { "epoch": 0.6698284988354859, "grad_norm": 1.6345940691217027, "learning_rate": 2.596425842502364e-06, "loss": 0.6559, "step": 7909 }, { "epoch": 0.6699131907685792, "grad_norm": 1.660862309555621, "learning_rate": 2.595223177849464e-06, "loss": 0.6135, "step": 7910 }, { "epoch": 0.6699978827016727, "grad_norm": 1.321851642996494, "learning_rate": 2.594020694168753e-06, "loss": 0.6651, "step": 7911 }, { "epoch": 0.6700825746347661, "grad_norm": 1.4924752516621174, "learning_rate": 2.5928183915507233e-06, "loss": 0.6163, "step": 7912 }, { "epoch": 0.6701672665678594, "grad_norm": 0.5928673843315422, "learning_rate": 2.591616270085857e-06, "loss": 0.8174, "step": 7913 }, { "epoch": 0.6702519585009528, "grad_norm": 1.3470791976798706, "learning_rate": 2.59041432986462e-06, "loss": 0.6278, "step": 7914 }, { "epoch": 0.6703366504340461, "grad_norm": 1.6867926599509082, "learning_rate": 2.589212570977463e-06, "loss": 0.6806, "step": 7915 }, { "epoch": 0.6704213423671396, "grad_norm": 2.527862743703709, "learning_rate": 2.588010993514828e-06, "loss": 0.636, "step": 7916 }, { "epoch": 0.6705060343002329, "grad_norm": 1.561524467208107, "learning_rate": 2.586809597567136e-06, "loss": 0.6558, "step": 7917 }, { "epoch": 0.6705907262333263, "grad_norm": 1.4280573807305705, "learning_rate": 2.5856083832248024e-06, "loss": 0.6296, "step": 7918 }, { "epoch": 0.6706754181664196, "grad_norm": 1.553198841041909, "learning_rate": 2.5844073505782185e-06, "loss": 0.5597, "step": 7919 }, { "epoch": 0.670760110099513, "grad_norm": 1.318571745685194, "learning_rate": 2.5832064997177754e-06, "loss": 0.6634, "step": 7920 }, { "epoch": 0.6708448020326064, "grad_norm": 0.6112752281476611, "learning_rate": 2.582005830733841e-06, "loss": 0.8301, "step": 7921 }, { "epoch": 0.6709294939656998, "grad_norm": 1.4563852178437608, "learning_rate": 2.580805343716771e-06, "loss": 0.5685, "step": 7922 }, { "epoch": 0.6710141858987931, "grad_norm": 0.6441659127211958, "learning_rate": 2.579605038756909e-06, "loss": 0.8809, "step": 7923 }, { "epoch": 0.6710988778318865, "grad_norm": 0.6066718146429247, "learning_rate": 2.578404915944587e-06, "loss": 0.8548, "step": 7924 }, { "epoch": 0.6711835697649798, "grad_norm": 5.042611464967391, "learning_rate": 2.577204975370115e-06, "loss": 0.5975, "step": 7925 }, { "epoch": 0.6712682616980733, "grad_norm": 1.8428143292361132, "learning_rate": 2.5760052171237983e-06, "loss": 0.6359, "step": 7926 }, { "epoch": 0.6713529536311666, "grad_norm": 1.2791740874208504, "learning_rate": 2.5748056412959244e-06, "loss": 0.5898, "step": 7927 }, { "epoch": 0.67143764556426, "grad_norm": 0.6593175122542292, "learning_rate": 2.573606247976769e-06, "loss": 0.8893, "step": 7928 }, { "epoch": 0.6715223374973533, "grad_norm": 1.3431847082405675, "learning_rate": 2.5724070372565923e-06, "loss": 0.6218, "step": 7929 }, { "epoch": 0.6716070294304467, "grad_norm": 1.7455644858949635, "learning_rate": 2.5712080092256396e-06, "loss": 0.631, "step": 7930 }, { "epoch": 0.6716917213635402, "grad_norm": 1.7172968406738343, "learning_rate": 2.5700091639741453e-06, "loss": 0.5867, "step": 7931 }, { "epoch": 0.6717764132966335, "grad_norm": 0.6008642031401338, "learning_rate": 2.5688105015923307e-06, "loss": 0.8326, "step": 7932 }, { "epoch": 0.6718611052297269, "grad_norm": 0.6543994658555503, "learning_rate": 2.567612022170398e-06, "loss": 0.8759, "step": 7933 }, { "epoch": 0.6719457971628202, "grad_norm": 1.7985762457385315, "learning_rate": 2.56641372579854e-06, "loss": 0.6802, "step": 7934 }, { "epoch": 0.6720304890959136, "grad_norm": 1.5541373335525843, "learning_rate": 2.565215612566936e-06, "loss": 0.6414, "step": 7935 }, { "epoch": 0.672115181029007, "grad_norm": 1.3174914799775221, "learning_rate": 2.56401768256575e-06, "loss": 0.6186, "step": 7936 }, { "epoch": 0.6721998729621004, "grad_norm": 2.4265558435248646, "learning_rate": 2.562819935885135e-06, "loss": 0.6365, "step": 7937 }, { "epoch": 0.6722845648951937, "grad_norm": 1.3973603939262103, "learning_rate": 2.5616223726152225e-06, "loss": 0.6272, "step": 7938 }, { "epoch": 0.6723692568282871, "grad_norm": 1.3343624108773542, "learning_rate": 2.560424992846138e-06, "loss": 0.604, "step": 7939 }, { "epoch": 0.6724539487613804, "grad_norm": 1.1488883186830294, "learning_rate": 2.559227796667992e-06, "loss": 0.6188, "step": 7940 }, { "epoch": 0.6725386406944739, "grad_norm": 1.3127196703069501, "learning_rate": 2.5580307841708785e-06, "loss": 0.6492, "step": 7941 }, { "epoch": 0.6726233326275672, "grad_norm": 2.077453560528941, "learning_rate": 2.5568339554448806e-06, "loss": 0.6776, "step": 7942 }, { "epoch": 0.6727080245606606, "grad_norm": 1.4014139276549404, "learning_rate": 2.5556373105800636e-06, "loss": 0.6029, "step": 7943 }, { "epoch": 0.6727927164937539, "grad_norm": 1.3968485631440974, "learning_rate": 2.554440849666482e-06, "loss": 0.6037, "step": 7944 }, { "epoch": 0.6728774084268473, "grad_norm": 0.5990487707826492, "learning_rate": 2.553244572794178e-06, "loss": 0.8064, "step": 7945 }, { "epoch": 0.6729621003599408, "grad_norm": 1.8364107003492283, "learning_rate": 2.5520484800531746e-06, "loss": 0.6366, "step": 7946 }, { "epoch": 0.6730467922930341, "grad_norm": 1.2769851996408825, "learning_rate": 2.550852571533486e-06, "loss": 0.6585, "step": 7947 }, { "epoch": 0.6731314842261275, "grad_norm": 0.5671584663783454, "learning_rate": 2.5496568473251092e-06, "loss": 0.7711, "step": 7948 }, { "epoch": 0.6732161761592208, "grad_norm": 1.401374124726211, "learning_rate": 2.5484613075180307e-06, "loss": 0.6207, "step": 7949 }, { "epoch": 0.6733008680923142, "grad_norm": 1.5308850872131856, "learning_rate": 2.547265952202222e-06, "loss": 0.6564, "step": 7950 }, { "epoch": 0.6733855600254076, "grad_norm": 1.754998183493853, "learning_rate": 2.5460707814676366e-06, "loss": 0.6577, "step": 7951 }, { "epoch": 0.673470251958501, "grad_norm": 8.96215102693495, "learning_rate": 2.544875795404218e-06, "loss": 0.654, "step": 7952 }, { "epoch": 0.6735549438915943, "grad_norm": 1.44536773876219, "learning_rate": 2.543680994101899e-06, "loss": 0.6485, "step": 7953 }, { "epoch": 0.6736396358246877, "grad_norm": 1.55324511140683, "learning_rate": 2.54248637765059e-06, "loss": 0.6371, "step": 7954 }, { "epoch": 0.673724327757781, "grad_norm": 1.6182374778755697, "learning_rate": 2.541291946140195e-06, "loss": 0.631, "step": 7955 }, { "epoch": 0.6738090196908745, "grad_norm": 1.4166812888909082, "learning_rate": 2.5400976996605996e-06, "loss": 0.5859, "step": 7956 }, { "epoch": 0.6738937116239678, "grad_norm": 1.35811985198223, "learning_rate": 2.5389036383016786e-06, "loss": 0.6632, "step": 7957 }, { "epoch": 0.6739784035570612, "grad_norm": 1.4773016633114138, "learning_rate": 2.537709762153292e-06, "loss": 0.5589, "step": 7958 }, { "epoch": 0.6740630954901545, "grad_norm": 1.5627589231401884, "learning_rate": 2.5365160713052827e-06, "loss": 0.6427, "step": 7959 }, { "epoch": 0.674147787423248, "grad_norm": 1.7399105533758679, "learning_rate": 2.5353225658474845e-06, "loss": 0.6306, "step": 7960 }, { "epoch": 0.6742324793563413, "grad_norm": 1.7648442338661656, "learning_rate": 2.5341292458697136e-06, "loss": 0.6255, "step": 7961 }, { "epoch": 0.6743171712894347, "grad_norm": 1.2942484533565581, "learning_rate": 2.5329361114617746e-06, "loss": 0.5667, "step": 7962 }, { "epoch": 0.674401863222528, "grad_norm": 0.5977746394557047, "learning_rate": 2.5317431627134587e-06, "loss": 0.8627, "step": 7963 }, { "epoch": 0.6744865551556214, "grad_norm": 3.226450831654323, "learning_rate": 2.530550399714538e-06, "loss": 0.6408, "step": 7964 }, { "epoch": 0.6745712470887149, "grad_norm": 1.161628915023109, "learning_rate": 2.5293578225547765e-06, "loss": 0.6844, "step": 7965 }, { "epoch": 0.6746559390218082, "grad_norm": 1.3611411337550907, "learning_rate": 2.528165431323922e-06, "loss": 0.6612, "step": 7966 }, { "epoch": 0.6747406309549016, "grad_norm": 1.9059867142890865, "learning_rate": 2.5269732261117073e-06, "loss": 0.6444, "step": 7967 }, { "epoch": 0.6748253228879949, "grad_norm": 1.5350143400146339, "learning_rate": 2.5257812070078526e-06, "loss": 0.6506, "step": 7968 }, { "epoch": 0.6749100148210883, "grad_norm": 1.3497575250170701, "learning_rate": 2.5245893741020634e-06, "loss": 0.5823, "step": 7969 }, { "epoch": 0.6749947067541817, "grad_norm": 2.006114155000491, "learning_rate": 2.5233977274840316e-06, "loss": 0.6788, "step": 7970 }, { "epoch": 0.6750793986872751, "grad_norm": 0.6365740106831138, "learning_rate": 2.5222062672434366e-06, "loss": 0.8121, "step": 7971 }, { "epoch": 0.6751640906203684, "grad_norm": 1.5482074959328174, "learning_rate": 2.521014993469939e-06, "loss": 0.6964, "step": 7972 }, { "epoch": 0.6752487825534618, "grad_norm": 1.3375038417938878, "learning_rate": 2.5198239062531905e-06, "loss": 0.5965, "step": 7973 }, { "epoch": 0.6753334744865551, "grad_norm": 0.5825636502812901, "learning_rate": 2.5186330056828277e-06, "loss": 0.8743, "step": 7974 }, { "epoch": 0.6754181664196486, "grad_norm": 1.701570922067059, "learning_rate": 2.5174422918484666e-06, "loss": 0.593, "step": 7975 }, { "epoch": 0.6755028583527419, "grad_norm": 1.2449234294489369, "learning_rate": 2.5162517648397212e-06, "loss": 0.604, "step": 7976 }, { "epoch": 0.6755875502858353, "grad_norm": 1.1640035745793533, "learning_rate": 2.5150614247461836e-06, "loss": 0.6294, "step": 7977 }, { "epoch": 0.6756722422189286, "grad_norm": 1.322037491548491, "learning_rate": 2.513871271657431e-06, "loss": 0.5967, "step": 7978 }, { "epoch": 0.675756934152022, "grad_norm": 4.400479725779782, "learning_rate": 2.5126813056630315e-06, "loss": 0.6095, "step": 7979 }, { "epoch": 0.6758416260851154, "grad_norm": 1.7233821488186527, "learning_rate": 2.511491526852533e-06, "loss": 0.6796, "step": 7980 }, { "epoch": 0.6759263180182088, "grad_norm": 1.7500181556652206, "learning_rate": 2.5103019353154743e-06, "loss": 0.6667, "step": 7981 }, { "epoch": 0.6760110099513021, "grad_norm": 0.6802347860267496, "learning_rate": 2.5091125311413788e-06, "loss": 0.8576, "step": 7982 }, { "epoch": 0.6760957018843955, "grad_norm": 1.3949497790430403, "learning_rate": 2.5079233144197546e-06, "loss": 0.6471, "step": 7983 }, { "epoch": 0.6761803938174888, "grad_norm": 1.8050030163482718, "learning_rate": 2.5067342852400998e-06, "loss": 0.6186, "step": 7984 }, { "epoch": 0.6762650857505823, "grad_norm": 1.399571605044433, "learning_rate": 2.50554544369189e-06, "loss": 0.6342, "step": 7985 }, { "epoch": 0.6763497776836757, "grad_norm": 1.3605949167295603, "learning_rate": 2.504356789864595e-06, "loss": 0.6695, "step": 7986 }, { "epoch": 0.676434469616769, "grad_norm": 0.6353624994257601, "learning_rate": 2.503168323847668e-06, "loss": 0.8644, "step": 7987 }, { "epoch": 0.6765191615498624, "grad_norm": 1.5398485545271863, "learning_rate": 2.501980045730544e-06, "loss": 0.6353, "step": 7988 }, { "epoch": 0.6766038534829557, "grad_norm": 1.3701883441454863, "learning_rate": 2.5007919556026495e-06, "loss": 0.6062, "step": 7989 }, { "epoch": 0.6766885454160492, "grad_norm": 1.3837717448657165, "learning_rate": 2.4996040535533937e-06, "loss": 0.6252, "step": 7990 }, { "epoch": 0.6767732373491425, "grad_norm": 1.3019605045766403, "learning_rate": 2.4984163396721738e-06, "loss": 0.5746, "step": 7991 }, { "epoch": 0.6768579292822359, "grad_norm": 1.3927539708449284, "learning_rate": 2.4972288140483725e-06, "loss": 0.6326, "step": 7992 }, { "epoch": 0.6769426212153292, "grad_norm": 1.6938302076177232, "learning_rate": 2.4960414767713535e-06, "loss": 0.6748, "step": 7993 }, { "epoch": 0.6770273131484226, "grad_norm": 1.7706100023965592, "learning_rate": 2.4948543279304734e-06, "loss": 0.6702, "step": 7994 }, { "epoch": 0.677112005081516, "grad_norm": 0.6723832045973008, "learning_rate": 2.49366736761507e-06, "loss": 0.8685, "step": 7995 }, { "epoch": 0.6771966970146094, "grad_norm": 1.332757615968188, "learning_rate": 2.492480595914468e-06, "loss": 0.6125, "step": 7996 }, { "epoch": 0.6772813889477027, "grad_norm": 1.2344928850373462, "learning_rate": 2.49129401291798e-06, "loss": 0.5962, "step": 7997 }, { "epoch": 0.6773660808807961, "grad_norm": 1.232982980181429, "learning_rate": 2.490107618714904e-06, "loss": 0.6332, "step": 7998 }, { "epoch": 0.6774507728138894, "grad_norm": 0.6132602609875097, "learning_rate": 2.488921413394517e-06, "loss": 0.8884, "step": 7999 }, { "epoch": 0.6775354647469829, "grad_norm": 1.3757563110822804, "learning_rate": 2.4877353970460937e-06, "loss": 0.6618, "step": 8000 }, { "epoch": 0.6776201566800762, "grad_norm": 1.3434674892441274, "learning_rate": 2.486549569758882e-06, "loss": 0.6336, "step": 8001 }, { "epoch": 0.6777048486131696, "grad_norm": 1.48523391298203, "learning_rate": 2.485363931622125e-06, "loss": 0.6916, "step": 8002 }, { "epoch": 0.677789540546263, "grad_norm": 0.6285946393033194, "learning_rate": 2.4841784827250474e-06, "loss": 0.8917, "step": 8003 }, { "epoch": 0.6778742324793563, "grad_norm": 1.4174179030499532, "learning_rate": 2.4829932231568615e-06, "loss": 0.6372, "step": 8004 }, { "epoch": 0.6779589244124498, "grad_norm": 1.716094235854958, "learning_rate": 2.4818081530067635e-06, "loss": 0.6381, "step": 8005 }, { "epoch": 0.6780436163455431, "grad_norm": 2.2557103964054135, "learning_rate": 2.4806232723639385e-06, "loss": 0.6499, "step": 8006 }, { "epoch": 0.6781283082786365, "grad_norm": 1.4876720104963042, "learning_rate": 2.47943858131755e-06, "loss": 0.6164, "step": 8007 }, { "epoch": 0.6782130002117298, "grad_norm": 1.1207861634074126, "learning_rate": 2.4782540799567585e-06, "loss": 0.6331, "step": 8008 }, { "epoch": 0.6782976921448232, "grad_norm": 1.7812616204088085, "learning_rate": 2.4770697683706985e-06, "loss": 0.6424, "step": 8009 }, { "epoch": 0.6783823840779166, "grad_norm": 0.6128670867041708, "learning_rate": 2.475885646648496e-06, "loss": 0.8514, "step": 8010 }, { "epoch": 0.67846707601101, "grad_norm": 1.5171896213731342, "learning_rate": 2.474701714879268e-06, "loss": 0.6157, "step": 8011 }, { "epoch": 0.6785517679441033, "grad_norm": 1.5261598797171883, "learning_rate": 2.4735179731521064e-06, "loss": 0.7077, "step": 8012 }, { "epoch": 0.6786364598771967, "grad_norm": 1.625946945702539, "learning_rate": 2.4723344215560973e-06, "loss": 0.7219, "step": 8013 }, { "epoch": 0.67872115181029, "grad_norm": 1.4135181179406893, "learning_rate": 2.471151060180306e-06, "loss": 0.5637, "step": 8014 }, { "epoch": 0.6788058437433835, "grad_norm": 1.261445617588339, "learning_rate": 2.469967889113788e-06, "loss": 0.6146, "step": 8015 }, { "epoch": 0.6788905356764768, "grad_norm": 1.8666393654542999, "learning_rate": 2.468784908445584e-06, "loss": 0.5958, "step": 8016 }, { "epoch": 0.6789752276095702, "grad_norm": 1.3549702034327284, "learning_rate": 2.4676021182647187e-06, "loss": 0.5935, "step": 8017 }, { "epoch": 0.6790599195426635, "grad_norm": 2.2533492514058464, "learning_rate": 2.4664195186602034e-06, "loss": 0.5606, "step": 8018 }, { "epoch": 0.6791446114757569, "grad_norm": 1.4873613196057782, "learning_rate": 2.4652371097210376e-06, "loss": 0.6589, "step": 8019 }, { "epoch": 0.6792293034088503, "grad_norm": 1.3884205761709532, "learning_rate": 2.4640548915361996e-06, "loss": 0.6577, "step": 8020 }, { "epoch": 0.6793139953419437, "grad_norm": 1.557325739433735, "learning_rate": 2.462872864194661e-06, "loss": 0.6538, "step": 8021 }, { "epoch": 0.679398687275037, "grad_norm": 1.282145393186666, "learning_rate": 2.461691027785372e-06, "loss": 0.6604, "step": 8022 }, { "epoch": 0.6794833792081304, "grad_norm": 1.3827671790119342, "learning_rate": 2.4605093823972753e-06, "loss": 0.5464, "step": 8023 }, { "epoch": 0.6795680711412238, "grad_norm": 1.8680056903470235, "learning_rate": 2.459327928119294e-06, "loss": 0.6424, "step": 8024 }, { "epoch": 0.6796527630743172, "grad_norm": 1.674754020296214, "learning_rate": 2.4581466650403395e-06, "loss": 0.6744, "step": 8025 }, { "epoch": 0.6797374550074106, "grad_norm": 0.6000136879336394, "learning_rate": 2.4569655932493084e-06, "loss": 0.8278, "step": 8026 }, { "epoch": 0.6798221469405039, "grad_norm": 1.7137843157578292, "learning_rate": 2.455784712835084e-06, "loss": 0.612, "step": 8027 }, { "epoch": 0.6799068388735973, "grad_norm": 2.1806358189444506, "learning_rate": 2.454604023886531e-06, "loss": 0.6282, "step": 8028 }, { "epoch": 0.6799915308066906, "grad_norm": 0.6465516261261379, "learning_rate": 2.4534235264925053e-06, "loss": 0.8059, "step": 8029 }, { "epoch": 0.6800762227397841, "grad_norm": 0.6625173931854714, "learning_rate": 2.452243220741842e-06, "loss": 0.8976, "step": 8030 }, { "epoch": 0.6801609146728774, "grad_norm": 1.2514135574487997, "learning_rate": 2.451063106723366e-06, "loss": 0.6667, "step": 8031 }, { "epoch": 0.6802456066059708, "grad_norm": 9.305828726368695, "learning_rate": 2.4498831845258914e-06, "loss": 0.6039, "step": 8032 }, { "epoch": 0.6803302985390641, "grad_norm": 1.226466229696265, "learning_rate": 2.4487034542382094e-06, "loss": 0.6039, "step": 8033 }, { "epoch": 0.6804149904721575, "grad_norm": 1.6156032549894053, "learning_rate": 2.4475239159491016e-06, "loss": 0.6118, "step": 8034 }, { "epoch": 0.6804996824052509, "grad_norm": 1.6219210271656177, "learning_rate": 2.4463445697473376e-06, "loss": 0.6253, "step": 8035 }, { "epoch": 0.6805843743383443, "grad_norm": 1.3854865996007615, "learning_rate": 2.4451654157216648e-06, "loss": 0.6107, "step": 8036 }, { "epoch": 0.6806690662714376, "grad_norm": 1.1937262596910505, "learning_rate": 2.443986453960823e-06, "loss": 0.6287, "step": 8037 }, { "epoch": 0.680753758204531, "grad_norm": 1.1954831314451302, "learning_rate": 2.4428076845535352e-06, "loss": 0.5938, "step": 8038 }, { "epoch": 0.6808384501376243, "grad_norm": 1.6493687356194762, "learning_rate": 2.4416291075885107e-06, "loss": 0.5986, "step": 8039 }, { "epoch": 0.6809231420707178, "grad_norm": 1.3200242542270422, "learning_rate": 2.4404507231544444e-06, "loss": 0.6582, "step": 8040 }, { "epoch": 0.6810078340038112, "grad_norm": 1.509634079953173, "learning_rate": 2.4392725313400127e-06, "loss": 0.5995, "step": 8041 }, { "epoch": 0.6810925259369045, "grad_norm": 0.6049295737436915, "learning_rate": 2.438094532233883e-06, "loss": 0.8422, "step": 8042 }, { "epoch": 0.6811772178699979, "grad_norm": 1.2351230606374475, "learning_rate": 2.4369167259247075e-06, "loss": 0.5905, "step": 8043 }, { "epoch": 0.6812619098030912, "grad_norm": 1.4237740104712082, "learning_rate": 2.435739112501118e-06, "loss": 0.6437, "step": 8044 }, { "epoch": 0.6813466017361847, "grad_norm": 1.2278477128268366, "learning_rate": 2.4345616920517396e-06, "loss": 0.6374, "step": 8045 }, { "epoch": 0.681431293669278, "grad_norm": 1.229372528909443, "learning_rate": 2.433384464665178e-06, "loss": 0.6268, "step": 8046 }, { "epoch": 0.6815159856023714, "grad_norm": 1.205328695200845, "learning_rate": 2.432207430430027e-06, "loss": 0.6283, "step": 8047 }, { "epoch": 0.6816006775354647, "grad_norm": 1.225089338273614, "learning_rate": 2.431030589434865e-06, "loss": 0.6176, "step": 8048 }, { "epoch": 0.6816853694685581, "grad_norm": 1.648535427390204, "learning_rate": 2.4298539417682533e-06, "loss": 0.6996, "step": 8049 }, { "epoch": 0.6817700614016515, "grad_norm": 1.4846407298271047, "learning_rate": 2.4286774875187436e-06, "loss": 0.6644, "step": 8050 }, { "epoch": 0.6818547533347449, "grad_norm": 1.4208197549065422, "learning_rate": 2.427501226774865e-06, "loss": 0.6768, "step": 8051 }, { "epoch": 0.6819394452678382, "grad_norm": 1.2959613652827002, "learning_rate": 2.4263251596251424e-06, "loss": 0.6128, "step": 8052 }, { "epoch": 0.6820241372009316, "grad_norm": 1.4589514442340668, "learning_rate": 2.4251492861580827e-06, "loss": 0.637, "step": 8053 }, { "epoch": 0.6821088291340249, "grad_norm": 2.6626045822449687, "learning_rate": 2.423973606462171e-06, "loss": 0.5999, "step": 8054 }, { "epoch": 0.6821935210671184, "grad_norm": 1.3151043968631535, "learning_rate": 2.4227981206258865e-06, "loss": 0.576, "step": 8055 }, { "epoch": 0.6822782130002117, "grad_norm": 5.023950329130387, "learning_rate": 2.4216228287376925e-06, "loss": 0.6046, "step": 8056 }, { "epoch": 0.6823629049333051, "grad_norm": 1.4455581802380257, "learning_rate": 2.4204477308860315e-06, "loss": 0.5991, "step": 8057 }, { "epoch": 0.6824475968663984, "grad_norm": 1.218215259268612, "learning_rate": 2.419272827159338e-06, "loss": 0.6307, "step": 8058 }, { "epoch": 0.6825322887994918, "grad_norm": 1.3388601153641342, "learning_rate": 2.4180981176460304e-06, "loss": 0.657, "step": 8059 }, { "epoch": 0.6826169807325853, "grad_norm": 1.461252363887747, "learning_rate": 2.416923602434511e-06, "loss": 0.6447, "step": 8060 }, { "epoch": 0.6827016726656786, "grad_norm": 1.3479108790368042, "learning_rate": 2.41574928161317e-06, "loss": 0.6279, "step": 8061 }, { "epoch": 0.682786364598772, "grad_norm": 5.164166610833298, "learning_rate": 2.4145751552703783e-06, "loss": 0.6128, "step": 8062 }, { "epoch": 0.6828710565318653, "grad_norm": 1.3618971301101814, "learning_rate": 2.413401223494497e-06, "loss": 0.6135, "step": 8063 }, { "epoch": 0.6829557484649588, "grad_norm": 1.3983609690331482, "learning_rate": 2.4122274863738722e-06, "loss": 0.6214, "step": 8064 }, { "epoch": 0.6830404403980521, "grad_norm": 1.9753656321579052, "learning_rate": 2.4110539439968294e-06, "loss": 0.5955, "step": 8065 }, { "epoch": 0.6831251323311455, "grad_norm": 1.3743244086021793, "learning_rate": 2.409880596451687e-06, "loss": 0.6332, "step": 8066 }, { "epoch": 0.6832098242642388, "grad_norm": 2.046812899606182, "learning_rate": 2.4087074438267447e-06, "loss": 0.6619, "step": 8067 }, { "epoch": 0.6832945161973322, "grad_norm": 1.9059464079108506, "learning_rate": 2.407534486210289e-06, "loss": 0.5767, "step": 8068 }, { "epoch": 0.6833792081304256, "grad_norm": 1.8524836039232195, "learning_rate": 2.406361723690593e-06, "loss": 0.6653, "step": 8069 }, { "epoch": 0.683463900063519, "grad_norm": 11.095214529871459, "learning_rate": 2.4051891563559088e-06, "loss": 0.6564, "step": 8070 }, { "epoch": 0.6835485919966123, "grad_norm": 1.5077208106955189, "learning_rate": 2.4040167842944813e-06, "loss": 0.6148, "step": 8071 }, { "epoch": 0.6836332839297057, "grad_norm": 2.5377361449010953, "learning_rate": 2.4028446075945365e-06, "loss": 0.6571, "step": 8072 }, { "epoch": 0.683717975862799, "grad_norm": 1.2833856886891455, "learning_rate": 2.4016726263442886e-06, "loss": 0.6818, "step": 8073 }, { "epoch": 0.6838026677958925, "grad_norm": 0.6133756783766895, "learning_rate": 2.400500840631936e-06, "loss": 0.8805, "step": 8074 }, { "epoch": 0.6838873597289858, "grad_norm": 3.0319809165009852, "learning_rate": 2.3993292505456574e-06, "loss": 0.6758, "step": 8075 }, { "epoch": 0.6839720516620792, "grad_norm": 1.4737437481162545, "learning_rate": 2.3981578561736246e-06, "loss": 0.6893, "step": 8076 }, { "epoch": 0.6840567435951725, "grad_norm": 1.4020764487834314, "learning_rate": 2.3969866576039924e-06, "loss": 0.6483, "step": 8077 }, { "epoch": 0.6841414355282659, "grad_norm": 1.79736370428748, "learning_rate": 2.395815654924896e-06, "loss": 0.632, "step": 8078 }, { "epoch": 0.6842261274613594, "grad_norm": 1.5522101523096448, "learning_rate": 2.394644848224461e-06, "loss": 0.6616, "step": 8079 }, { "epoch": 0.6843108193944527, "grad_norm": 1.2976567274760438, "learning_rate": 2.393474237590797e-06, "loss": 0.6309, "step": 8080 }, { "epoch": 0.6843955113275461, "grad_norm": 1.2525782538150065, "learning_rate": 2.3923038231119993e-06, "loss": 0.6586, "step": 8081 }, { "epoch": 0.6844802032606394, "grad_norm": 1.4984933039748487, "learning_rate": 2.391133604876149e-06, "loss": 0.6227, "step": 8082 }, { "epoch": 0.6845648951937328, "grad_norm": 1.9964723075450233, "learning_rate": 2.389963582971308e-06, "loss": 0.617, "step": 8083 }, { "epoch": 0.6846495871268262, "grad_norm": 1.2571964399559303, "learning_rate": 2.388793757485528e-06, "loss": 0.6333, "step": 8084 }, { "epoch": 0.6847342790599196, "grad_norm": 1.3569330297792703, "learning_rate": 2.3876241285068464e-06, "loss": 0.6664, "step": 8085 }, { "epoch": 0.6848189709930129, "grad_norm": 1.4488102945271994, "learning_rate": 2.3864546961232805e-06, "loss": 0.6498, "step": 8086 }, { "epoch": 0.6849036629261063, "grad_norm": 3.412002965500617, "learning_rate": 2.385285460422838e-06, "loss": 0.6106, "step": 8087 }, { "epoch": 0.6849883548591996, "grad_norm": 1.5480018779837603, "learning_rate": 2.384116421493511e-06, "loss": 0.6375, "step": 8088 }, { "epoch": 0.6850730467922931, "grad_norm": 1.367603420786617, "learning_rate": 2.3829475794232742e-06, "loss": 0.6254, "step": 8089 }, { "epoch": 0.6851577387253864, "grad_norm": 1.2995898789213083, "learning_rate": 2.3817789343000917e-06, "loss": 0.7038, "step": 8090 }, { "epoch": 0.6852424306584798, "grad_norm": 0.6186729515010041, "learning_rate": 2.380610486211907e-06, "loss": 0.8253, "step": 8091 }, { "epoch": 0.6853271225915731, "grad_norm": 2.2514190713836824, "learning_rate": 2.379442235246654e-06, "loss": 0.6222, "step": 8092 }, { "epoch": 0.6854118145246665, "grad_norm": 1.5221251300341208, "learning_rate": 2.378274181492249e-06, "loss": 0.6875, "step": 8093 }, { "epoch": 0.68549650645776, "grad_norm": 0.7715094698449085, "learning_rate": 2.3771063250365944e-06, "loss": 0.849, "step": 8094 }, { "epoch": 0.6855811983908533, "grad_norm": 0.7236714331878196, "learning_rate": 2.3759386659675792e-06, "loss": 0.8327, "step": 8095 }, { "epoch": 0.6856658903239466, "grad_norm": 1.373563062016825, "learning_rate": 2.374771204373073e-06, "loss": 0.6658, "step": 8096 }, { "epoch": 0.68575058225704, "grad_norm": 1.4836543497994785, "learning_rate": 2.373603940340935e-06, "loss": 0.6408, "step": 8097 }, { "epoch": 0.6858352741901333, "grad_norm": 1.2600982228808548, "learning_rate": 2.3724368739590096e-06, "loss": 0.6851, "step": 8098 }, { "epoch": 0.6859199661232268, "grad_norm": 1.190771150111851, "learning_rate": 2.3712700053151217e-06, "loss": 0.6212, "step": 8099 }, { "epoch": 0.6860046580563202, "grad_norm": 1.4732197254750379, "learning_rate": 2.3701033344970847e-06, "loss": 0.6356, "step": 8100 }, { "epoch": 0.6860893499894135, "grad_norm": 1.3178476186965684, "learning_rate": 2.3689368615926987e-06, "loss": 0.6278, "step": 8101 }, { "epoch": 0.6861740419225069, "grad_norm": 1.4172205946857672, "learning_rate": 2.3677705866897455e-06, "loss": 0.6408, "step": 8102 }, { "epoch": 0.6862587338556002, "grad_norm": 1.3321869764996024, "learning_rate": 2.366604509875996e-06, "loss": 0.6117, "step": 8103 }, { "epoch": 0.6863434257886937, "grad_norm": 1.4082272515868541, "learning_rate": 2.3654386312392e-06, "loss": 0.6445, "step": 8104 }, { "epoch": 0.686428117721787, "grad_norm": 1.2033929599483355, "learning_rate": 2.364272950867097e-06, "loss": 0.609, "step": 8105 }, { "epoch": 0.6865128096548804, "grad_norm": 1.8081804705213198, "learning_rate": 2.3631074688474135e-06, "loss": 0.6343, "step": 8106 }, { "epoch": 0.6865975015879737, "grad_norm": 0.6008391826715624, "learning_rate": 2.361942185267852e-06, "loss": 0.8794, "step": 8107 }, { "epoch": 0.6866821935210671, "grad_norm": 1.5509667320810467, "learning_rate": 2.3607771002161127e-06, "loss": 0.6621, "step": 8108 }, { "epoch": 0.6867668854541605, "grad_norm": 1.4699515076509961, "learning_rate": 2.3596122137798734e-06, "loss": 0.6463, "step": 8109 }, { "epoch": 0.6868515773872539, "grad_norm": 1.1966233971444966, "learning_rate": 2.3584475260467947e-06, "loss": 0.6197, "step": 8110 }, { "epoch": 0.6869362693203472, "grad_norm": 1.5842866078379307, "learning_rate": 2.357283037104529e-06, "loss": 0.5996, "step": 8111 }, { "epoch": 0.6870209612534406, "grad_norm": 1.39292426302117, "learning_rate": 2.3561187470407073e-06, "loss": 0.6363, "step": 8112 }, { "epoch": 0.6871056531865339, "grad_norm": 1.22387087142671, "learning_rate": 2.354954655942949e-06, "loss": 0.627, "step": 8113 }, { "epoch": 0.6871903451196274, "grad_norm": 1.0007583272393372, "learning_rate": 2.35379076389886e-06, "loss": 0.5555, "step": 8114 }, { "epoch": 0.6872750370527207, "grad_norm": 1.2377076401786684, "learning_rate": 2.352627070996028e-06, "loss": 0.6233, "step": 8115 }, { "epoch": 0.6873597289858141, "grad_norm": 1.3837157658008086, "learning_rate": 2.3514635773220273e-06, "loss": 0.6137, "step": 8116 }, { "epoch": 0.6874444209189075, "grad_norm": 1.3462582300892456, "learning_rate": 2.3503002829644196e-06, "loss": 0.6086, "step": 8117 }, { "epoch": 0.6875291128520008, "grad_norm": 1.5935412261386566, "learning_rate": 2.3491371880107437e-06, "loss": 0.6247, "step": 8118 }, { "epoch": 0.6876138047850943, "grad_norm": 1.4684657471011044, "learning_rate": 2.347974292548533e-06, "loss": 0.6186, "step": 8119 }, { "epoch": 0.6876984967181876, "grad_norm": 1.296548849667589, "learning_rate": 2.346811596665299e-06, "loss": 0.6277, "step": 8120 }, { "epoch": 0.687783188651281, "grad_norm": 1.5366832586123618, "learning_rate": 2.3456491004485415e-06, "loss": 0.5942, "step": 8121 }, { "epoch": 0.6878678805843743, "grad_norm": 1.4133955782198633, "learning_rate": 2.344486803985744e-06, "loss": 0.6204, "step": 8122 }, { "epoch": 0.6879525725174677, "grad_norm": 2.124637548487301, "learning_rate": 2.3433247073643767e-06, "loss": 0.6999, "step": 8123 }, { "epoch": 0.6880372644505611, "grad_norm": 1.440754444681726, "learning_rate": 2.3421628106718947e-06, "loss": 0.6534, "step": 8124 }, { "epoch": 0.6881219563836545, "grad_norm": 1.36003622036938, "learning_rate": 2.3410011139957328e-06, "loss": 0.5782, "step": 8125 }, { "epoch": 0.6882066483167478, "grad_norm": 1.44362948031585, "learning_rate": 2.339839617423318e-06, "loss": 0.6713, "step": 8126 }, { "epoch": 0.6882913402498412, "grad_norm": 2.236236664926659, "learning_rate": 2.338678321042057e-06, "loss": 0.6294, "step": 8127 }, { "epoch": 0.6883760321829345, "grad_norm": 2.1743579517561833, "learning_rate": 2.337517224939346e-06, "loss": 0.6524, "step": 8128 }, { "epoch": 0.688460724116028, "grad_norm": 1.2092707437495767, "learning_rate": 2.3363563292025616e-06, "loss": 0.6298, "step": 8129 }, { "epoch": 0.6885454160491213, "grad_norm": 1.5835115481912954, "learning_rate": 2.33519563391907e-06, "loss": 0.6273, "step": 8130 }, { "epoch": 0.6886301079822147, "grad_norm": 2.0570436134502046, "learning_rate": 2.334035139176216e-06, "loss": 0.6459, "step": 8131 }, { "epoch": 0.688714799915308, "grad_norm": 1.4556036215967836, "learning_rate": 2.3328748450613365e-06, "loss": 0.6471, "step": 8132 }, { "epoch": 0.6887994918484014, "grad_norm": 0.6206882510439385, "learning_rate": 2.331714751661746e-06, "loss": 0.8322, "step": 8133 }, { "epoch": 0.6888841837814949, "grad_norm": 1.9470616922479553, "learning_rate": 2.33055485906475e-06, "loss": 0.5644, "step": 8134 }, { "epoch": 0.6889688757145882, "grad_norm": 1.2950536874637415, "learning_rate": 2.3293951673576364e-06, "loss": 0.6736, "step": 8135 }, { "epoch": 0.6890535676476816, "grad_norm": 1.125384660340822, "learning_rate": 2.3282356766276775e-06, "loss": 0.6243, "step": 8136 }, { "epoch": 0.6891382595807749, "grad_norm": 1.2956972683034604, "learning_rate": 2.3270763869621323e-06, "loss": 0.5993, "step": 8137 }, { "epoch": 0.6892229515138683, "grad_norm": 1.308118291812165, "learning_rate": 2.325917298448244e-06, "loss": 0.6348, "step": 8138 }, { "epoch": 0.6893076434469617, "grad_norm": 1.8327712218591963, "learning_rate": 2.324758411173237e-06, "loss": 0.6134, "step": 8139 }, { "epoch": 0.6893923353800551, "grad_norm": 1.419403094779504, "learning_rate": 2.323599725224328e-06, "loss": 0.5915, "step": 8140 }, { "epoch": 0.6894770273131484, "grad_norm": 0.6477436363970248, "learning_rate": 2.32244124068871e-06, "loss": 0.794, "step": 8141 }, { "epoch": 0.6895617192462418, "grad_norm": 2.081326042134413, "learning_rate": 2.3212829576535643e-06, "loss": 0.6224, "step": 8142 }, { "epoch": 0.6896464111793351, "grad_norm": 6.078677300763693, "learning_rate": 2.3201248762060654e-06, "loss": 0.6273, "step": 8143 }, { "epoch": 0.6897311031124286, "grad_norm": 1.582893378412607, "learning_rate": 2.318966996433357e-06, "loss": 0.6238, "step": 8144 }, { "epoch": 0.6898157950455219, "grad_norm": 3.0696924532681926, "learning_rate": 2.31780931842258e-06, "loss": 0.5951, "step": 8145 }, { "epoch": 0.6899004869786153, "grad_norm": 1.2130271017601661, "learning_rate": 2.316651842260856e-06, "loss": 0.6051, "step": 8146 }, { "epoch": 0.6899851789117086, "grad_norm": 1.3213529188920348, "learning_rate": 2.315494568035288e-06, "loss": 0.6701, "step": 8147 }, { "epoch": 0.690069870844802, "grad_norm": 1.5606496034784227, "learning_rate": 2.314337495832969e-06, "loss": 0.6266, "step": 8148 }, { "epoch": 0.6901545627778954, "grad_norm": 1.1775213911013507, "learning_rate": 2.313180625740975e-06, "loss": 0.6303, "step": 8149 }, { "epoch": 0.6902392547109888, "grad_norm": 1.797689075898614, "learning_rate": 2.312023957846366e-06, "loss": 0.6598, "step": 8150 }, { "epoch": 0.6903239466440821, "grad_norm": 1.2199170518844327, "learning_rate": 2.3108674922361896e-06, "loss": 0.5772, "step": 8151 }, { "epoch": 0.6904086385771755, "grad_norm": 1.5334554986356173, "learning_rate": 2.309711228997472e-06, "loss": 0.5792, "step": 8152 }, { "epoch": 0.6904933305102688, "grad_norm": 1.1360448077112177, "learning_rate": 2.308555168217232e-06, "loss": 0.5873, "step": 8153 }, { "epoch": 0.6905780224433623, "grad_norm": 1.2806505440418106, "learning_rate": 2.307399309982466e-06, "loss": 0.624, "step": 8154 }, { "epoch": 0.6906627143764557, "grad_norm": 1.246711144689592, "learning_rate": 2.3062436543801596e-06, "loss": 0.6419, "step": 8155 }, { "epoch": 0.690747406309549, "grad_norm": 1.9124315076939182, "learning_rate": 2.3050882014972825e-06, "loss": 0.6305, "step": 8156 }, { "epoch": 0.6908320982426424, "grad_norm": 2.6982110443140397, "learning_rate": 2.303932951420788e-06, "loss": 0.7037, "step": 8157 }, { "epoch": 0.6909167901757357, "grad_norm": 1.2382802335006546, "learning_rate": 2.3027779042376157e-06, "loss": 0.6494, "step": 8158 }, { "epoch": 0.6910014821088292, "grad_norm": 1.1426461600844242, "learning_rate": 2.3016230600346906e-06, "loss": 0.6045, "step": 8159 }, { "epoch": 0.6910861740419225, "grad_norm": 1.4233735829529095, "learning_rate": 2.300468418898917e-06, "loss": 0.5773, "step": 8160 }, { "epoch": 0.6911708659750159, "grad_norm": 1.2636205001757153, "learning_rate": 2.299313980917191e-06, "loss": 0.6803, "step": 8161 }, { "epoch": 0.6912555579081092, "grad_norm": 1.2977086646851683, "learning_rate": 2.2981597461763867e-06, "loss": 0.6261, "step": 8162 }, { "epoch": 0.6913402498412027, "grad_norm": 1.9414442211678855, "learning_rate": 2.2970057147633672e-06, "loss": 0.6645, "step": 8163 }, { "epoch": 0.691424941774296, "grad_norm": 0.6084997301873273, "learning_rate": 2.295851886764984e-06, "loss": 0.8388, "step": 8164 }, { "epoch": 0.6915096337073894, "grad_norm": 2.7395694461424287, "learning_rate": 2.2946982622680636e-06, "loss": 0.5571, "step": 8165 }, { "epoch": 0.6915943256404827, "grad_norm": 1.4834598798511922, "learning_rate": 2.2935448413594245e-06, "loss": 0.6341, "step": 8166 }, { "epoch": 0.6916790175735761, "grad_norm": 1.1767261342515045, "learning_rate": 2.292391624125869e-06, "loss": 0.5979, "step": 8167 }, { "epoch": 0.6917637095066695, "grad_norm": 1.2505294198748522, "learning_rate": 2.2912386106541795e-06, "loss": 0.6669, "step": 8168 }, { "epoch": 0.6918484014397629, "grad_norm": 1.522680647975773, "learning_rate": 2.2900858010311284e-06, "loss": 0.6293, "step": 8169 }, { "epoch": 0.6919330933728562, "grad_norm": 1.2222103048768203, "learning_rate": 2.28893319534347e-06, "loss": 0.6127, "step": 8170 }, { "epoch": 0.6920177853059496, "grad_norm": 1.2204882299038247, "learning_rate": 2.287780793677945e-06, "loss": 0.6459, "step": 8171 }, { "epoch": 0.692102477239043, "grad_norm": 1.4748190901713965, "learning_rate": 2.286628596121279e-06, "loss": 0.6707, "step": 8172 }, { "epoch": 0.6921871691721364, "grad_norm": 1.0667587325878842, "learning_rate": 2.2854766027601765e-06, "loss": 0.612, "step": 8173 }, { "epoch": 0.6922718611052298, "grad_norm": 1.1320794397946805, "learning_rate": 2.284324813681334e-06, "loss": 0.5853, "step": 8174 }, { "epoch": 0.6923565530383231, "grad_norm": 1.4631967013175733, "learning_rate": 2.2831732289714315e-06, "loss": 0.6348, "step": 8175 }, { "epoch": 0.6924412449714165, "grad_norm": 1.1834677500165254, "learning_rate": 2.282021848717128e-06, "loss": 0.5884, "step": 8176 }, { "epoch": 0.6925259369045098, "grad_norm": 0.5996436829471186, "learning_rate": 2.2808706730050727e-06, "loss": 0.8491, "step": 8177 }, { "epoch": 0.6926106288376033, "grad_norm": 1.5654522176286647, "learning_rate": 2.2797197019218977e-06, "loss": 0.6156, "step": 8178 }, { "epoch": 0.6926953207706966, "grad_norm": 1.5794865886631648, "learning_rate": 2.2785689355542197e-06, "loss": 0.6044, "step": 8179 }, { "epoch": 0.69278001270379, "grad_norm": 1.7555805073095023, "learning_rate": 2.2774183739886417e-06, "loss": 0.6466, "step": 8180 }, { "epoch": 0.6928647046368833, "grad_norm": 1.299397916651316, "learning_rate": 2.276268017311746e-06, "loss": 0.627, "step": 8181 }, { "epoch": 0.6929493965699767, "grad_norm": 1.384434176972148, "learning_rate": 2.2751178656101046e-06, "loss": 0.6748, "step": 8182 }, { "epoch": 0.6930340885030701, "grad_norm": 1.1847199779922144, "learning_rate": 2.2739679189702733e-06, "loss": 0.6507, "step": 8183 }, { "epoch": 0.6931187804361635, "grad_norm": 3.3528650291823157, "learning_rate": 2.272818177478791e-06, "loss": 0.6116, "step": 8184 }, { "epoch": 0.6932034723692568, "grad_norm": 1.7845247246822584, "learning_rate": 2.271668641222184e-06, "loss": 0.5798, "step": 8185 }, { "epoch": 0.6932881643023502, "grad_norm": 0.696667792096815, "learning_rate": 2.2705193102869566e-06, "loss": 0.9058, "step": 8186 }, { "epoch": 0.6933728562354435, "grad_norm": 1.2938778720737028, "learning_rate": 2.2693701847596044e-06, "loss": 0.6508, "step": 8187 }, { "epoch": 0.693457548168537, "grad_norm": 1.4956453093854423, "learning_rate": 2.268221264726607e-06, "loss": 0.6123, "step": 8188 }, { "epoch": 0.6935422401016303, "grad_norm": 1.2240955101087407, "learning_rate": 2.2670725502744235e-06, "loss": 0.6501, "step": 8189 }, { "epoch": 0.6936269320347237, "grad_norm": 0.5541487749930297, "learning_rate": 2.2659240414895017e-06, "loss": 0.7672, "step": 8190 }, { "epoch": 0.693711623967817, "grad_norm": 2.1966350257275042, "learning_rate": 2.2647757384582734e-06, "loss": 0.6266, "step": 8191 }, { "epoch": 0.6937963159009104, "grad_norm": 1.1848500358205536, "learning_rate": 2.2636276412671546e-06, "loss": 0.6656, "step": 8192 }, { "epoch": 0.6938810078340039, "grad_norm": 1.4846584259078865, "learning_rate": 2.2624797500025474e-06, "loss": 0.6538, "step": 8193 }, { "epoch": 0.6939656997670972, "grad_norm": 1.6775547460843045, "learning_rate": 2.2613320647508334e-06, "loss": 0.6452, "step": 8194 }, { "epoch": 0.6940503917001906, "grad_norm": 2.0370827218308083, "learning_rate": 2.260184585598383e-06, "loss": 0.6255, "step": 8195 }, { "epoch": 0.6941350836332839, "grad_norm": 1.6569562712482813, "learning_rate": 2.2590373126315526e-06, "loss": 0.6623, "step": 8196 }, { "epoch": 0.6942197755663773, "grad_norm": 0.5924268709905918, "learning_rate": 2.2578902459366762e-06, "loss": 0.8416, "step": 8197 }, { "epoch": 0.6943044674994707, "grad_norm": 1.3342126547892843, "learning_rate": 2.2567433856000797e-06, "loss": 0.5991, "step": 8198 }, { "epoch": 0.6943891594325641, "grad_norm": 1.198456499344515, "learning_rate": 2.255596731708069e-06, "loss": 0.5968, "step": 8199 }, { "epoch": 0.6944738513656574, "grad_norm": 1.4068359319611454, "learning_rate": 2.2544502843469373e-06, "loss": 0.6666, "step": 8200 }, { "epoch": 0.6945585432987508, "grad_norm": 1.3947823280020557, "learning_rate": 2.2533040436029613e-06, "loss": 0.6218, "step": 8201 }, { "epoch": 0.6946432352318441, "grad_norm": 1.0482162451571901, "learning_rate": 2.2521580095623984e-06, "loss": 0.6058, "step": 8202 }, { "epoch": 0.6947279271649376, "grad_norm": 2.4374796501048666, "learning_rate": 2.251012182311497e-06, "loss": 0.5777, "step": 8203 }, { "epoch": 0.6948126190980309, "grad_norm": 2.710043841685486, "learning_rate": 2.249866561936484e-06, "loss": 0.6446, "step": 8204 }, { "epoch": 0.6948973110311243, "grad_norm": 1.6571034979685546, "learning_rate": 2.2487211485235754e-06, "loss": 0.6205, "step": 8205 }, { "epoch": 0.6949820029642176, "grad_norm": 1.4841237205127615, "learning_rate": 2.2475759421589716e-06, "loss": 0.6423, "step": 8206 }, { "epoch": 0.695066694897311, "grad_norm": 1.3666402584971813, "learning_rate": 2.24643094292885e-06, "loss": 0.6338, "step": 8207 }, { "epoch": 0.6951513868304044, "grad_norm": 1.3817299270572796, "learning_rate": 2.2452861509193813e-06, "loss": 0.612, "step": 8208 }, { "epoch": 0.6952360787634978, "grad_norm": 1.6973673171358494, "learning_rate": 2.244141566216719e-06, "loss": 0.6373, "step": 8209 }, { "epoch": 0.6953207706965912, "grad_norm": 1.2918908142629146, "learning_rate": 2.242997188906994e-06, "loss": 0.6671, "step": 8210 }, { "epoch": 0.6954054626296845, "grad_norm": 2.5846223214260995, "learning_rate": 2.24185301907633e-06, "loss": 0.6017, "step": 8211 }, { "epoch": 0.6954901545627779, "grad_norm": 1.905044982357439, "learning_rate": 2.2407090568108314e-06, "loss": 0.5929, "step": 8212 }, { "epoch": 0.6955748464958713, "grad_norm": 1.8244307890818905, "learning_rate": 2.2395653021965873e-06, "loss": 0.6592, "step": 8213 }, { "epoch": 0.6956595384289647, "grad_norm": 1.7284445450585635, "learning_rate": 2.2384217553196735e-06, "loss": 0.6108, "step": 8214 }, { "epoch": 0.695744230362058, "grad_norm": 1.2946646632228493, "learning_rate": 2.2372784162661443e-06, "loss": 0.6427, "step": 8215 }, { "epoch": 0.6958289222951514, "grad_norm": 1.5641682300636417, "learning_rate": 2.236135285122043e-06, "loss": 0.6409, "step": 8216 }, { "epoch": 0.6959136142282447, "grad_norm": 1.2984343826502491, "learning_rate": 2.2349923619733987e-06, "loss": 0.6391, "step": 8217 }, { "epoch": 0.6959983061613382, "grad_norm": 2.041961312513113, "learning_rate": 2.233849646906219e-06, "loss": 0.604, "step": 8218 }, { "epoch": 0.6960829980944315, "grad_norm": 1.6852212270241902, "learning_rate": 2.2327071400064987e-06, "loss": 0.6251, "step": 8219 }, { "epoch": 0.6961676900275249, "grad_norm": 1.608371974656658, "learning_rate": 2.231564841360224e-06, "loss": 0.6308, "step": 8220 }, { "epoch": 0.6962523819606182, "grad_norm": 1.7967019940894673, "learning_rate": 2.2304227510533515e-06, "loss": 0.6616, "step": 8221 }, { "epoch": 0.6963370738937116, "grad_norm": 1.3697682318311526, "learning_rate": 2.2292808691718354e-06, "loss": 0.6499, "step": 8222 }, { "epoch": 0.696421765826805, "grad_norm": 1.298250477811117, "learning_rate": 2.2281391958016035e-06, "loss": 0.6051, "step": 8223 }, { "epoch": 0.6965064577598984, "grad_norm": 2.0109781438558625, "learning_rate": 2.2269977310285746e-06, "loss": 0.6642, "step": 8224 }, { "epoch": 0.6965911496929917, "grad_norm": 1.5563859246400629, "learning_rate": 2.22585647493865e-06, "loss": 0.6663, "step": 8225 }, { "epoch": 0.6966758416260851, "grad_norm": 3.190447700353199, "learning_rate": 2.2247154276177164e-06, "loss": 0.6583, "step": 8226 }, { "epoch": 0.6967605335591784, "grad_norm": 1.6857119979240751, "learning_rate": 2.2235745891516437e-06, "loss": 0.6309, "step": 8227 }, { "epoch": 0.6968452254922719, "grad_norm": 1.5451191407345362, "learning_rate": 2.222433959626283e-06, "loss": 0.6629, "step": 8228 }, { "epoch": 0.6969299174253653, "grad_norm": 1.4237848667829405, "learning_rate": 2.2212935391274753e-06, "loss": 0.6842, "step": 8229 }, { "epoch": 0.6970146093584586, "grad_norm": 1.4337863295692204, "learning_rate": 2.2201533277410447e-06, "loss": 0.6535, "step": 8230 }, { "epoch": 0.697099301291552, "grad_norm": 1.2787439933314477, "learning_rate": 2.219013325552794e-06, "loss": 0.6388, "step": 8231 }, { "epoch": 0.6971839932246453, "grad_norm": 1.4249187078334755, "learning_rate": 2.217873532648517e-06, "loss": 0.5864, "step": 8232 }, { "epoch": 0.6972686851577388, "grad_norm": 4.084360156512933, "learning_rate": 2.216733949113988e-06, "loss": 0.5829, "step": 8233 }, { "epoch": 0.6973533770908321, "grad_norm": 1.8462327787875727, "learning_rate": 2.215594575034968e-06, "loss": 0.6063, "step": 8234 }, { "epoch": 0.6974380690239255, "grad_norm": 1.2394703137198855, "learning_rate": 2.2144554104972015e-06, "loss": 0.6123, "step": 8235 }, { "epoch": 0.6975227609570188, "grad_norm": 1.2859204905557888, "learning_rate": 2.2133164555864146e-06, "loss": 0.5994, "step": 8236 }, { "epoch": 0.6976074528901122, "grad_norm": 1.3474390590070482, "learning_rate": 2.21217771038832e-06, "loss": 0.6797, "step": 8237 }, { "epoch": 0.6976921448232056, "grad_norm": 1.7183833285311327, "learning_rate": 2.2110391749886167e-06, "loss": 0.6325, "step": 8238 }, { "epoch": 0.697776836756299, "grad_norm": 0.6275855453362135, "learning_rate": 2.2099008494729805e-06, "loss": 0.8626, "step": 8239 }, { "epoch": 0.6978615286893923, "grad_norm": 1.5585510626490304, "learning_rate": 2.208762733927081e-06, "loss": 0.6617, "step": 8240 }, { "epoch": 0.6979462206224857, "grad_norm": 1.760055571632511, "learning_rate": 2.207624828436568e-06, "loss": 0.6276, "step": 8241 }, { "epoch": 0.698030912555579, "grad_norm": 1.4918398807527304, "learning_rate": 2.206487133087072e-06, "loss": 0.6273, "step": 8242 }, { "epoch": 0.6981156044886725, "grad_norm": 1.2321278501943211, "learning_rate": 2.2053496479642124e-06, "loss": 0.5524, "step": 8243 }, { "epoch": 0.6982002964217658, "grad_norm": 1.5170924155146273, "learning_rate": 2.2042123731535886e-06, "loss": 0.5823, "step": 8244 }, { "epoch": 0.6982849883548592, "grad_norm": 2.301643942083927, "learning_rate": 2.2030753087407887e-06, "loss": 0.6523, "step": 8245 }, { "epoch": 0.6983696802879525, "grad_norm": 0.5974552292095541, "learning_rate": 2.2019384548113813e-06, "loss": 0.8646, "step": 8246 }, { "epoch": 0.6984543722210459, "grad_norm": 1.4296142107501042, "learning_rate": 2.2008018114509223e-06, "loss": 0.5934, "step": 8247 }, { "epoch": 0.6985390641541394, "grad_norm": 1.2101868857777098, "learning_rate": 2.199665378744949e-06, "loss": 0.6325, "step": 8248 }, { "epoch": 0.6986237560872327, "grad_norm": 2.337153676785111, "learning_rate": 2.1985291567789862e-06, "loss": 0.6745, "step": 8249 }, { "epoch": 0.6987084480203261, "grad_norm": 1.8624459324698077, "learning_rate": 2.1973931456385374e-06, "loss": 0.6475, "step": 8250 }, { "epoch": 0.6987931399534194, "grad_norm": 1.4243596407843029, "learning_rate": 2.196257345409097e-06, "loss": 0.6426, "step": 8251 }, { "epoch": 0.6988778318865128, "grad_norm": 1.2884688407244953, "learning_rate": 2.195121756176135e-06, "loss": 0.6706, "step": 8252 }, { "epoch": 0.6989625238196062, "grad_norm": 1.4501712055171374, "learning_rate": 2.193986378025114e-06, "loss": 0.6573, "step": 8253 }, { "epoch": 0.6990472157526996, "grad_norm": 0.6262306907737581, "learning_rate": 2.1928512110414766e-06, "loss": 0.8602, "step": 8254 }, { "epoch": 0.6991319076857929, "grad_norm": 1.3321764240369256, "learning_rate": 2.19171625531065e-06, "loss": 0.6064, "step": 8255 }, { "epoch": 0.6992165996188863, "grad_norm": 1.4979879610565139, "learning_rate": 2.1905815109180485e-06, "loss": 0.5908, "step": 8256 }, { "epoch": 0.6993012915519796, "grad_norm": 1.2973264162461038, "learning_rate": 2.1894469779490617e-06, "loss": 0.6487, "step": 8257 }, { "epoch": 0.6993859834850731, "grad_norm": 1.3271841956549624, "learning_rate": 2.1883126564890735e-06, "loss": 0.6181, "step": 8258 }, { "epoch": 0.6994706754181664, "grad_norm": 1.416182961770918, "learning_rate": 2.1871785466234458e-06, "loss": 0.707, "step": 8259 }, { "epoch": 0.6995553673512598, "grad_norm": 1.280547594456388, "learning_rate": 2.186044648437527e-06, "loss": 0.6283, "step": 8260 }, { "epoch": 0.6996400592843531, "grad_norm": 1.3342547067831334, "learning_rate": 2.184910962016649e-06, "loss": 0.5901, "step": 8261 }, { "epoch": 0.6997247512174465, "grad_norm": 2.1960110419989314, "learning_rate": 2.1837774874461296e-06, "loss": 0.6039, "step": 8262 }, { "epoch": 0.69980944315054, "grad_norm": 1.3095845377923814, "learning_rate": 2.1826442248112657e-06, "loss": 0.6463, "step": 8263 }, { "epoch": 0.6998941350836333, "grad_norm": 1.472455554412374, "learning_rate": 2.1815111741973437e-06, "loss": 0.5968, "step": 8264 }, { "epoch": 0.6999788270167266, "grad_norm": 1.7494210103644734, "learning_rate": 2.180378335689629e-06, "loss": 0.6247, "step": 8265 }, { "epoch": 0.70006351894982, "grad_norm": 0.632103005905455, "learning_rate": 2.179245709373375e-06, "loss": 0.8255, "step": 8266 }, { "epoch": 0.7001482108829135, "grad_norm": 1.45512492882957, "learning_rate": 2.1781132953338174e-06, "loss": 0.5759, "step": 8267 }, { "epoch": 0.7002329028160068, "grad_norm": 1.27659494392953, "learning_rate": 2.1769810936561774e-06, "loss": 0.6087, "step": 8268 }, { "epoch": 0.7003175947491002, "grad_norm": 0.5785262746496964, "learning_rate": 2.1758491044256593e-06, "loss": 0.8444, "step": 8269 }, { "epoch": 0.7004022866821935, "grad_norm": 3.094877155472544, "learning_rate": 2.1747173277274513e-06, "loss": 0.6339, "step": 8270 }, { "epoch": 0.7004869786152869, "grad_norm": 1.5920413133922644, "learning_rate": 2.173585763646724e-06, "loss": 0.6134, "step": 8271 }, { "epoch": 0.7005716705483803, "grad_norm": 1.583700667983102, "learning_rate": 2.172454412268636e-06, "loss": 0.6925, "step": 8272 }, { "epoch": 0.7006563624814737, "grad_norm": 0.5896581508912268, "learning_rate": 2.1713232736783242e-06, "loss": 0.8688, "step": 8273 }, { "epoch": 0.700741054414567, "grad_norm": 1.7795467659102353, "learning_rate": 2.1701923479609134e-06, "loss": 0.6292, "step": 8274 }, { "epoch": 0.7008257463476604, "grad_norm": 1.6227838467753781, "learning_rate": 2.169061635201516e-06, "loss": 0.6904, "step": 8275 }, { "epoch": 0.7009104382807537, "grad_norm": 0.6285401662128888, "learning_rate": 2.167931135485219e-06, "loss": 0.8311, "step": 8276 }, { "epoch": 0.7009951302138472, "grad_norm": 1.6755630129295622, "learning_rate": 2.166800848897101e-06, "loss": 0.6275, "step": 8277 }, { "epoch": 0.7010798221469405, "grad_norm": 1.584657375384607, "learning_rate": 2.165670775522223e-06, "loss": 0.6288, "step": 8278 }, { "epoch": 0.7011645140800339, "grad_norm": 1.2484946994547181, "learning_rate": 2.1645409154456266e-06, "loss": 0.5977, "step": 8279 }, { "epoch": 0.7012492060131272, "grad_norm": 0.6404843797536678, "learning_rate": 2.1634112687523407e-06, "loss": 0.8591, "step": 8280 }, { "epoch": 0.7013338979462206, "grad_norm": 1.3268756081035886, "learning_rate": 2.1622818355273766e-06, "loss": 0.5654, "step": 8281 }, { "epoch": 0.701418589879314, "grad_norm": 1.5564519039927254, "learning_rate": 2.161152615855731e-06, "loss": 0.6185, "step": 8282 }, { "epoch": 0.7015032818124074, "grad_norm": 2.479775511472882, "learning_rate": 2.160023609822386e-06, "loss": 0.6144, "step": 8283 }, { "epoch": 0.7015879737455007, "grad_norm": 1.3154808226672026, "learning_rate": 2.1588948175123003e-06, "loss": 0.6291, "step": 8284 }, { "epoch": 0.7016726656785941, "grad_norm": 1.5955737994254675, "learning_rate": 2.1577662390104235e-06, "loss": 0.6533, "step": 8285 }, { "epoch": 0.7017573576116874, "grad_norm": 1.5219768745874398, "learning_rate": 2.1566378744016903e-06, "loss": 0.6435, "step": 8286 }, { "epoch": 0.7018420495447809, "grad_norm": 1.3963491410446485, "learning_rate": 2.155509723771011e-06, "loss": 0.6156, "step": 8287 }, { "epoch": 0.7019267414778743, "grad_norm": 1.1557622026315857, "learning_rate": 2.1543817872032872e-06, "loss": 0.6376, "step": 8288 }, { "epoch": 0.7020114334109676, "grad_norm": 4.231641366146939, "learning_rate": 2.1532540647834026e-06, "loss": 0.7113, "step": 8289 }, { "epoch": 0.702096125344061, "grad_norm": 2.209807623066575, "learning_rate": 2.1521265565962234e-06, "loss": 0.6085, "step": 8290 }, { "epoch": 0.7021808172771543, "grad_norm": 0.614198553480226, "learning_rate": 2.1509992627266034e-06, "loss": 0.8598, "step": 8291 }, { "epoch": 0.7022655092102478, "grad_norm": 1.2393715086280854, "learning_rate": 2.149872183259373e-06, "loss": 0.5411, "step": 8292 }, { "epoch": 0.7023502011433411, "grad_norm": 4.974231039913869, "learning_rate": 2.148745318279355e-06, "loss": 0.6835, "step": 8293 }, { "epoch": 0.7024348930764345, "grad_norm": 1.8069267726476794, "learning_rate": 2.1476186678713475e-06, "loss": 0.6767, "step": 8294 }, { "epoch": 0.7025195850095278, "grad_norm": 1.5288600085687671, "learning_rate": 2.1464922321201375e-06, "loss": 0.5511, "step": 8295 }, { "epoch": 0.7026042769426212, "grad_norm": 1.6095963347526745, "learning_rate": 2.1453660111105013e-06, "loss": 0.6328, "step": 8296 }, { "epoch": 0.7026889688757146, "grad_norm": 2.1868475704506136, "learning_rate": 2.144240004927187e-06, "loss": 0.6283, "step": 8297 }, { "epoch": 0.702773660808808, "grad_norm": 1.178026701023201, "learning_rate": 2.1431142136549336e-06, "loss": 0.6899, "step": 8298 }, { "epoch": 0.7028583527419013, "grad_norm": 2.0074084339315976, "learning_rate": 2.141988637378466e-06, "loss": 0.6138, "step": 8299 }, { "epoch": 0.7029430446749947, "grad_norm": 1.2147611395616107, "learning_rate": 2.140863276182485e-06, "loss": 0.6672, "step": 8300 }, { "epoch": 0.703027736608088, "grad_norm": 1.4064290474220305, "learning_rate": 2.1397381301516825e-06, "loss": 0.5598, "step": 8301 }, { "epoch": 0.7031124285411815, "grad_norm": 1.437846096731492, "learning_rate": 2.1386131993707314e-06, "loss": 0.6801, "step": 8302 }, { "epoch": 0.7031971204742749, "grad_norm": 1.1057015426530992, "learning_rate": 2.1374884839242892e-06, "loss": 0.5899, "step": 8303 }, { "epoch": 0.7032818124073682, "grad_norm": 1.5681111645825765, "learning_rate": 2.136363983896998e-06, "loss": 0.6615, "step": 8304 }, { "epoch": 0.7033665043404616, "grad_norm": 1.4031040925047271, "learning_rate": 2.1352396993734784e-06, "loss": 0.6132, "step": 8305 }, { "epoch": 0.7034511962735549, "grad_norm": 1.2422807814040446, "learning_rate": 2.1341156304383414e-06, "loss": 0.6175, "step": 8306 }, { "epoch": 0.7035358882066484, "grad_norm": 1.2912162580767819, "learning_rate": 2.1329917771761806e-06, "loss": 0.7034, "step": 8307 }, { "epoch": 0.7036205801397417, "grad_norm": 4.318171923250263, "learning_rate": 2.1318681396715684e-06, "loss": 0.6668, "step": 8308 }, { "epoch": 0.7037052720728351, "grad_norm": 1.4169752581444508, "learning_rate": 2.1307447180090662e-06, "loss": 0.6965, "step": 8309 }, { "epoch": 0.7037899640059284, "grad_norm": 1.196243305322593, "learning_rate": 2.1296215122732173e-06, "loss": 0.6279, "step": 8310 }, { "epoch": 0.7038746559390218, "grad_norm": 1.5985793592750857, "learning_rate": 2.1284985225485487e-06, "loss": 0.6409, "step": 8311 }, { "epoch": 0.7039593478721152, "grad_norm": 1.2333695856770022, "learning_rate": 2.1273757489195736e-06, "loss": 0.6689, "step": 8312 }, { "epoch": 0.7040440398052086, "grad_norm": 1.700415510031639, "learning_rate": 2.126253191470783e-06, "loss": 0.6287, "step": 8313 }, { "epoch": 0.7041287317383019, "grad_norm": 1.2414471830573672, "learning_rate": 2.125130850286657e-06, "loss": 0.6063, "step": 8314 }, { "epoch": 0.7042134236713953, "grad_norm": 1.2339961961388006, "learning_rate": 2.124008725451657e-06, "loss": 0.6364, "step": 8315 }, { "epoch": 0.7042981156044886, "grad_norm": 0.6643184361064275, "learning_rate": 2.1228868170502303e-06, "loss": 0.8468, "step": 8316 }, { "epoch": 0.7043828075375821, "grad_norm": 1.5897638905937124, "learning_rate": 2.121765125166807e-06, "loss": 0.6281, "step": 8317 }, { "epoch": 0.7044674994706754, "grad_norm": 1.4908499817424496, "learning_rate": 2.1206436498857973e-06, "loss": 0.6747, "step": 8318 }, { "epoch": 0.7045521914037688, "grad_norm": 1.1024075171796646, "learning_rate": 2.1195223912916e-06, "loss": 0.6167, "step": 8319 }, { "epoch": 0.7046368833368621, "grad_norm": 1.6640603527272948, "learning_rate": 2.1184013494685973e-06, "loss": 0.6658, "step": 8320 }, { "epoch": 0.7047215752699555, "grad_norm": 1.6789992815940216, "learning_rate": 2.11728052450115e-06, "loss": 0.6263, "step": 8321 }, { "epoch": 0.704806267203049, "grad_norm": 1.3770276727781037, "learning_rate": 2.116159916473608e-06, "loss": 0.621, "step": 8322 }, { "epoch": 0.7048909591361423, "grad_norm": 1.231936034502268, "learning_rate": 2.1150395254703034e-06, "loss": 0.6225, "step": 8323 }, { "epoch": 0.7049756510692357, "grad_norm": 0.6997009183052788, "learning_rate": 2.1139193515755506e-06, "loss": 0.8101, "step": 8324 }, { "epoch": 0.705060343002329, "grad_norm": 1.2037729122266745, "learning_rate": 2.112799394873651e-06, "loss": 0.6082, "step": 8325 }, { "epoch": 0.7051450349354224, "grad_norm": 1.4649534441883925, "learning_rate": 2.1116796554488835e-06, "loss": 0.6404, "step": 8326 }, { "epoch": 0.7052297268685158, "grad_norm": 1.2742599129338927, "learning_rate": 2.1105601333855163e-06, "loss": 0.5845, "step": 8327 }, { "epoch": 0.7053144188016092, "grad_norm": 1.4683756088534716, "learning_rate": 2.1094408287678014e-06, "loss": 0.5879, "step": 8328 }, { "epoch": 0.7053991107347025, "grad_norm": 1.3589764362206396, "learning_rate": 2.1083217416799686e-06, "loss": 0.6381, "step": 8329 }, { "epoch": 0.7054838026677959, "grad_norm": 1.9853801112374336, "learning_rate": 2.1072028722062366e-06, "loss": 0.6268, "step": 8330 }, { "epoch": 0.7055684946008892, "grad_norm": 2.4584178004321666, "learning_rate": 2.1060842204308064e-06, "loss": 0.6149, "step": 8331 }, { "epoch": 0.7056531865339827, "grad_norm": 1.2196899016505711, "learning_rate": 2.104965786437863e-06, "loss": 0.6006, "step": 8332 }, { "epoch": 0.705737878467076, "grad_norm": 1.1281414519097668, "learning_rate": 2.1038475703115756e-06, "loss": 0.645, "step": 8333 }, { "epoch": 0.7058225704001694, "grad_norm": 1.3057329627876613, "learning_rate": 2.102729572136093e-06, "loss": 0.7034, "step": 8334 }, { "epoch": 0.7059072623332627, "grad_norm": 1.4209216288942645, "learning_rate": 2.1016117919955513e-06, "loss": 0.6224, "step": 8335 }, { "epoch": 0.7059919542663561, "grad_norm": 1.5208469735863175, "learning_rate": 2.1004942299740703e-06, "loss": 0.5977, "step": 8336 }, { "epoch": 0.7060766461994495, "grad_norm": 1.5395271007502778, "learning_rate": 2.0993768861557524e-06, "loss": 0.6229, "step": 8337 }, { "epoch": 0.7061613381325429, "grad_norm": 1.1200794225920325, "learning_rate": 2.098259760624685e-06, "loss": 0.6003, "step": 8338 }, { "epoch": 0.7062460300656362, "grad_norm": 1.4179778940598728, "learning_rate": 2.097142853464934e-06, "loss": 0.705, "step": 8339 }, { "epoch": 0.7063307219987296, "grad_norm": 1.483292249613841, "learning_rate": 2.096026164760555e-06, "loss": 0.6229, "step": 8340 }, { "epoch": 0.7064154139318229, "grad_norm": 1.5327178787608204, "learning_rate": 2.094909694595586e-06, "loss": 0.6063, "step": 8341 }, { "epoch": 0.7065001058649164, "grad_norm": 1.5391084661244159, "learning_rate": 2.0937934430540435e-06, "loss": 0.6175, "step": 8342 }, { "epoch": 0.7065847977980098, "grad_norm": 1.2048275765627374, "learning_rate": 2.0926774102199337e-06, "loss": 0.5979, "step": 8343 }, { "epoch": 0.7066694897311031, "grad_norm": 1.24344939962701, "learning_rate": 2.091561596177244e-06, "loss": 0.587, "step": 8344 }, { "epoch": 0.7067541816641965, "grad_norm": 2.4888185289957034, "learning_rate": 2.090446001009945e-06, "loss": 0.6385, "step": 8345 }, { "epoch": 0.7068388735972898, "grad_norm": 1.3804071397312034, "learning_rate": 2.089330624801993e-06, "loss": 0.6508, "step": 8346 }, { "epoch": 0.7069235655303833, "grad_norm": 1.456932852072904, "learning_rate": 2.0882154676373225e-06, "loss": 0.6166, "step": 8347 }, { "epoch": 0.7070082574634766, "grad_norm": 1.7394732986398993, "learning_rate": 2.0871005295998565e-06, "loss": 0.6578, "step": 8348 }, { "epoch": 0.70709294939657, "grad_norm": 1.2639160791211577, "learning_rate": 2.085985810773502e-06, "loss": 0.5905, "step": 8349 }, { "epoch": 0.7071776413296633, "grad_norm": 0.6417873348048259, "learning_rate": 2.0848713112421442e-06, "loss": 0.8638, "step": 8350 }, { "epoch": 0.7072623332627567, "grad_norm": 1.3711459317431987, "learning_rate": 2.083757031089654e-06, "loss": 0.6436, "step": 8351 }, { "epoch": 0.7073470251958501, "grad_norm": 1.1916900597438558, "learning_rate": 2.082642970399894e-06, "loss": 0.6114, "step": 8352 }, { "epoch": 0.7074317171289435, "grad_norm": 0.6084051717351803, "learning_rate": 2.0815291292566963e-06, "loss": 0.8475, "step": 8353 }, { "epoch": 0.7075164090620368, "grad_norm": 3.0278419161033345, "learning_rate": 2.0804155077438877e-06, "loss": 0.6447, "step": 8354 }, { "epoch": 0.7076011009951302, "grad_norm": 1.6917063256012854, "learning_rate": 2.07930210594527e-06, "loss": 0.5682, "step": 8355 }, { "epoch": 0.7076857929282235, "grad_norm": 1.4888105814233674, "learning_rate": 2.0781889239446353e-06, "loss": 0.6248, "step": 8356 }, { "epoch": 0.707770484861317, "grad_norm": 1.4953206289990666, "learning_rate": 2.0770759618257554e-06, "loss": 0.6448, "step": 8357 }, { "epoch": 0.7078551767944103, "grad_norm": 1.3771981244821225, "learning_rate": 2.075963219672387e-06, "loss": 0.6175, "step": 8358 }, { "epoch": 0.7079398687275037, "grad_norm": 1.374204057504438, "learning_rate": 2.074850697568271e-06, "loss": 0.6553, "step": 8359 }, { "epoch": 0.708024560660597, "grad_norm": 1.7095816133924713, "learning_rate": 2.073738395597128e-06, "loss": 0.6322, "step": 8360 }, { "epoch": 0.7081092525936904, "grad_norm": 1.9466621644428623, "learning_rate": 2.072626313842666e-06, "loss": 0.6542, "step": 8361 }, { "epoch": 0.7081939445267839, "grad_norm": 1.285614828438686, "learning_rate": 2.071514452388577e-06, "loss": 0.652, "step": 8362 }, { "epoch": 0.7082786364598772, "grad_norm": 1.3880763076065854, "learning_rate": 2.070402811318531e-06, "loss": 0.6619, "step": 8363 }, { "epoch": 0.7083633283929706, "grad_norm": 1.9268140003910146, "learning_rate": 2.069291390716186e-06, "loss": 0.6638, "step": 8364 }, { "epoch": 0.7084480203260639, "grad_norm": 1.6122612190851038, "learning_rate": 2.068180190665183e-06, "loss": 0.624, "step": 8365 }, { "epoch": 0.7085327122591573, "grad_norm": 1.5622635865732029, "learning_rate": 2.0670692112491453e-06, "loss": 0.649, "step": 8366 }, { "epoch": 0.7086174041922507, "grad_norm": 1.417241399757583, "learning_rate": 2.0659584525516817e-06, "loss": 0.646, "step": 8367 }, { "epoch": 0.7087020961253441, "grad_norm": 1.271744336669283, "learning_rate": 2.0648479146563795e-06, "loss": 0.6028, "step": 8368 }, { "epoch": 0.7087867880584374, "grad_norm": 1.8600855661417, "learning_rate": 2.063737597646814e-06, "loss": 0.6321, "step": 8369 }, { "epoch": 0.7088714799915308, "grad_norm": 0.6017428570952654, "learning_rate": 2.062627501606544e-06, "loss": 0.8077, "step": 8370 }, { "epoch": 0.7089561719246242, "grad_norm": 1.275648541381821, "learning_rate": 2.061517626619105e-06, "loss": 0.6621, "step": 8371 }, { "epoch": 0.7090408638577176, "grad_norm": 1.6955130687479818, "learning_rate": 2.0604079727680267e-06, "loss": 0.6098, "step": 8372 }, { "epoch": 0.7091255557908109, "grad_norm": 1.508823748460106, "learning_rate": 2.059298540136816e-06, "loss": 0.6079, "step": 8373 }, { "epoch": 0.7092102477239043, "grad_norm": 2.1190785740700737, "learning_rate": 2.05818932880896e-06, "loss": 0.6515, "step": 8374 }, { "epoch": 0.7092949396569976, "grad_norm": 1.1759868771900042, "learning_rate": 2.0570803388679367e-06, "loss": 0.6678, "step": 8375 }, { "epoch": 0.7093796315900911, "grad_norm": 1.4077763867124067, "learning_rate": 2.0559715703971995e-06, "loss": 0.6627, "step": 8376 }, { "epoch": 0.7094643235231844, "grad_norm": 0.6045480136271425, "learning_rate": 2.054863023480191e-06, "loss": 0.8681, "step": 8377 }, { "epoch": 0.7095490154562778, "grad_norm": 1.5405980003566253, "learning_rate": 2.0537546982003355e-06, "loss": 0.6434, "step": 8378 }, { "epoch": 0.7096337073893711, "grad_norm": 1.5660317663768466, "learning_rate": 2.0526465946410395e-06, "loss": 0.5969, "step": 8379 }, { "epoch": 0.7097183993224645, "grad_norm": 0.6205631868299261, "learning_rate": 2.0515387128856945e-06, "loss": 0.8444, "step": 8380 }, { "epoch": 0.709803091255558, "grad_norm": 1.391258134240753, "learning_rate": 2.0504310530176757e-06, "loss": 0.6585, "step": 8381 }, { "epoch": 0.7098877831886513, "grad_norm": 1.3089841887368059, "learning_rate": 2.0493236151203378e-06, "loss": 0.5746, "step": 8382 }, { "epoch": 0.7099724751217447, "grad_norm": 1.2929911244401644, "learning_rate": 2.048216399277024e-06, "loss": 0.648, "step": 8383 }, { "epoch": 0.710057167054838, "grad_norm": 1.3000128391986403, "learning_rate": 2.0471094055710543e-06, "loss": 0.6234, "step": 8384 }, { "epoch": 0.7101418589879314, "grad_norm": 1.3552476195039167, "learning_rate": 2.046002634085738e-06, "loss": 0.6186, "step": 8385 }, { "epoch": 0.7102265509210248, "grad_norm": 1.3791405450874583, "learning_rate": 2.0448960849043664e-06, "loss": 0.6248, "step": 8386 }, { "epoch": 0.7103112428541182, "grad_norm": 1.7620886017349175, "learning_rate": 2.0437897581102123e-06, "loss": 0.7131, "step": 8387 }, { "epoch": 0.7103959347872115, "grad_norm": 1.5245401078486662, "learning_rate": 2.0426836537865326e-06, "loss": 0.6513, "step": 8388 }, { "epoch": 0.7104806267203049, "grad_norm": 1.3494372608536562, "learning_rate": 2.041577772016569e-06, "loss": 0.5719, "step": 8389 }, { "epoch": 0.7105653186533982, "grad_norm": 0.6007861856328287, "learning_rate": 2.0404721128835424e-06, "loss": 0.89, "step": 8390 }, { "epoch": 0.7106500105864917, "grad_norm": 1.6300166923947113, "learning_rate": 2.039366676470661e-06, "loss": 0.6555, "step": 8391 }, { "epoch": 0.710734702519585, "grad_norm": 2.5617274990019006, "learning_rate": 2.0382614628611142e-06, "loss": 0.67, "step": 8392 }, { "epoch": 0.7108193944526784, "grad_norm": 1.4449744820160675, "learning_rate": 2.037156472138075e-06, "loss": 0.6487, "step": 8393 }, { "epoch": 0.7109040863857717, "grad_norm": 1.292272433091959, "learning_rate": 2.036051704384703e-06, "loss": 0.6519, "step": 8394 }, { "epoch": 0.7109887783188651, "grad_norm": 1.3372637050771854, "learning_rate": 2.0349471596841323e-06, "loss": 0.6306, "step": 8395 }, { "epoch": 0.7110734702519586, "grad_norm": 1.3323461843602697, "learning_rate": 2.0338428381194906e-06, "loss": 0.6704, "step": 8396 }, { "epoch": 0.7111581621850519, "grad_norm": 1.2649599077806672, "learning_rate": 2.0327387397738807e-06, "loss": 0.6517, "step": 8397 }, { "epoch": 0.7112428541181453, "grad_norm": 1.1912673985866986, "learning_rate": 2.0316348647303923e-06, "loss": 0.559, "step": 8398 }, { "epoch": 0.7113275460512386, "grad_norm": 0.6360117512058655, "learning_rate": 2.030531213072099e-06, "loss": 0.8553, "step": 8399 }, { "epoch": 0.711412237984332, "grad_norm": 0.6076715992302811, "learning_rate": 2.029427784882056e-06, "loss": 0.8207, "step": 8400 }, { "epoch": 0.7114969299174254, "grad_norm": 1.7386439046670707, "learning_rate": 2.028324580243302e-06, "loss": 0.6246, "step": 8401 }, { "epoch": 0.7115816218505188, "grad_norm": 2.4880418324405804, "learning_rate": 2.027221599238861e-06, "loss": 0.6322, "step": 8402 }, { "epoch": 0.7116663137836121, "grad_norm": 1.460619193274665, "learning_rate": 2.0261188419517343e-06, "loss": 0.634, "step": 8403 }, { "epoch": 0.7117510057167055, "grad_norm": 1.3562140112019343, "learning_rate": 2.025016308464914e-06, "loss": 0.5987, "step": 8404 }, { "epoch": 0.7118356976497988, "grad_norm": 1.4890346901626792, "learning_rate": 2.023913998861368e-06, "loss": 0.6541, "step": 8405 }, { "epoch": 0.7119203895828923, "grad_norm": 1.3483997625561879, "learning_rate": 2.022811913224051e-06, "loss": 0.6621, "step": 8406 }, { "epoch": 0.7120050815159856, "grad_norm": 1.1746954558760638, "learning_rate": 2.0217100516359064e-06, "loss": 0.5992, "step": 8407 }, { "epoch": 0.712089773449079, "grad_norm": 1.7233131003775817, "learning_rate": 2.020608414179849e-06, "loss": 0.6248, "step": 8408 }, { "epoch": 0.7121744653821723, "grad_norm": 1.3335755974121761, "learning_rate": 2.0195070009387847e-06, "loss": 0.6371, "step": 8409 }, { "epoch": 0.7122591573152657, "grad_norm": 0.6552421526202373, "learning_rate": 2.018405811995603e-06, "loss": 0.8644, "step": 8410 }, { "epoch": 0.7123438492483591, "grad_norm": 1.5990229496249513, "learning_rate": 2.0173048474331706e-06, "loss": 0.6357, "step": 8411 }, { "epoch": 0.7124285411814525, "grad_norm": 0.5834646181446572, "learning_rate": 2.016204107334343e-06, "loss": 0.8075, "step": 8412 }, { "epoch": 0.7125132331145458, "grad_norm": 1.455402493285328, "learning_rate": 2.0151035917819554e-06, "loss": 0.6296, "step": 8413 }, { "epoch": 0.7125979250476392, "grad_norm": 1.2445208333094955, "learning_rate": 2.014003300858829e-06, "loss": 0.6001, "step": 8414 }, { "epoch": 0.7126826169807325, "grad_norm": 1.3077303346946663, "learning_rate": 2.012903234647767e-06, "loss": 0.6637, "step": 8415 }, { "epoch": 0.712767308913826, "grad_norm": 0.6119535634856583, "learning_rate": 2.0118033932315533e-06, "loss": 0.8194, "step": 8416 }, { "epoch": 0.7128520008469194, "grad_norm": 1.1866311303934445, "learning_rate": 2.0107037766929566e-06, "loss": 0.6518, "step": 8417 }, { "epoch": 0.7129366927800127, "grad_norm": 1.4708377797141032, "learning_rate": 2.009604385114732e-06, "loss": 0.6199, "step": 8418 }, { "epoch": 0.713021384713106, "grad_norm": 0.6683937384612826, "learning_rate": 2.00850521857961e-06, "loss": 0.8734, "step": 8419 }, { "epoch": 0.7131060766461994, "grad_norm": 1.444341011331437, "learning_rate": 2.007406277170312e-06, "loss": 0.5793, "step": 8420 }, { "epoch": 0.7131907685792929, "grad_norm": 0.599682734086167, "learning_rate": 2.006307560969537e-06, "loss": 0.8193, "step": 8421 }, { "epoch": 0.7132754605123862, "grad_norm": 2.6233113421557483, "learning_rate": 2.0052090700599707e-06, "loss": 0.5837, "step": 8422 }, { "epoch": 0.7133601524454796, "grad_norm": 1.4201889420074127, "learning_rate": 2.0041108045242823e-06, "loss": 0.6162, "step": 8423 }, { "epoch": 0.7134448443785729, "grad_norm": 2.4571058561178574, "learning_rate": 2.003012764445118e-06, "loss": 0.624, "step": 8424 }, { "epoch": 0.7135295363116663, "grad_norm": 1.7748146667676252, "learning_rate": 2.001914949905113e-06, "loss": 0.5903, "step": 8425 }, { "epoch": 0.7136142282447597, "grad_norm": 2.408922928571273, "learning_rate": 2.0008173609868847e-06, "loss": 0.6279, "step": 8426 }, { "epoch": 0.7136989201778531, "grad_norm": 2.291208825633703, "learning_rate": 1.9997199977730286e-06, "loss": 0.6098, "step": 8427 }, { "epoch": 0.7137836121109464, "grad_norm": 1.3206159754844555, "learning_rate": 1.9986228603461334e-06, "loss": 0.6413, "step": 8428 }, { "epoch": 0.7138683040440398, "grad_norm": 1.7886358655529218, "learning_rate": 1.997525948788759e-06, "loss": 0.6297, "step": 8429 }, { "epoch": 0.7139529959771331, "grad_norm": 1.4477429867132825, "learning_rate": 1.9964292631834555e-06, "loss": 0.5937, "step": 8430 }, { "epoch": 0.7140376879102266, "grad_norm": 1.3346115320116365, "learning_rate": 1.9953328036127566e-06, "loss": 0.6414, "step": 8431 }, { "epoch": 0.7141223798433199, "grad_norm": 1.237822235101664, "learning_rate": 1.9942365701591734e-06, "loss": 0.6382, "step": 8432 }, { "epoch": 0.7142070717764133, "grad_norm": 1.7300569252172013, "learning_rate": 1.993140562905204e-06, "loss": 0.5959, "step": 8433 }, { "epoch": 0.7142917637095066, "grad_norm": 1.5604372046331663, "learning_rate": 1.9920447819333294e-06, "loss": 0.5934, "step": 8434 }, { "epoch": 0.7143764556426, "grad_norm": 1.194291093532501, "learning_rate": 1.9909492273260126e-06, "loss": 0.6657, "step": 8435 }, { "epoch": 0.7144611475756935, "grad_norm": 1.9362117403963324, "learning_rate": 1.989853899165703e-06, "loss": 0.6246, "step": 8436 }, { "epoch": 0.7145458395087868, "grad_norm": 1.4610230944937783, "learning_rate": 1.9887587975348245e-06, "loss": 0.5556, "step": 8437 }, { "epoch": 0.7146305314418802, "grad_norm": 0.5837825805306839, "learning_rate": 1.9876639225157912e-06, "loss": 0.8308, "step": 8438 }, { "epoch": 0.7147152233749735, "grad_norm": 1.2537070241873396, "learning_rate": 1.9865692741910016e-06, "loss": 0.7172, "step": 8439 }, { "epoch": 0.7147999153080669, "grad_norm": 1.3688040545950655, "learning_rate": 1.9854748526428287e-06, "loss": 0.6937, "step": 8440 }, { "epoch": 0.7148846072411603, "grad_norm": 1.217079220526532, "learning_rate": 1.9843806579536355e-06, "loss": 0.6216, "step": 8441 }, { "epoch": 0.7149692991742537, "grad_norm": 1.4943278537226397, "learning_rate": 1.9832866902057667e-06, "loss": 0.6616, "step": 8442 }, { "epoch": 0.715053991107347, "grad_norm": 1.6714228749447817, "learning_rate": 1.9821929494815484e-06, "loss": 0.6205, "step": 8443 }, { "epoch": 0.7151386830404404, "grad_norm": 1.373786330464597, "learning_rate": 1.9810994358632927e-06, "loss": 0.6776, "step": 8444 }, { "epoch": 0.7152233749735337, "grad_norm": 0.6331129980663773, "learning_rate": 1.9800061494332885e-06, "loss": 0.8366, "step": 8445 }, { "epoch": 0.7153080669066272, "grad_norm": 1.3041908965169722, "learning_rate": 1.9789130902738128e-06, "loss": 0.6346, "step": 8446 }, { "epoch": 0.7153927588397205, "grad_norm": 1.4482041653768358, "learning_rate": 1.977820258467125e-06, "loss": 0.6217, "step": 8447 }, { "epoch": 0.7154774507728139, "grad_norm": 1.2649774699958232, "learning_rate": 1.976727654095466e-06, "loss": 0.6276, "step": 8448 }, { "epoch": 0.7155621427059072, "grad_norm": 0.6707480315930495, "learning_rate": 1.9756352772410615e-06, "loss": 0.8986, "step": 8449 }, { "epoch": 0.7156468346390006, "grad_norm": 1.4002022833231769, "learning_rate": 1.9745431279861155e-06, "loss": 0.6871, "step": 8450 }, { "epoch": 0.715731526572094, "grad_norm": 0.5896953108963926, "learning_rate": 1.9734512064128198e-06, "loss": 0.8743, "step": 8451 }, { "epoch": 0.7158162185051874, "grad_norm": 1.5987427101842737, "learning_rate": 1.9723595126033484e-06, "loss": 0.5952, "step": 8452 }, { "epoch": 0.7159009104382807, "grad_norm": 0.6146648112531349, "learning_rate": 1.971268046639854e-06, "loss": 0.8729, "step": 8453 }, { "epoch": 0.7159856023713741, "grad_norm": 2.101835220670921, "learning_rate": 1.9701768086044774e-06, "loss": 0.642, "step": 8454 }, { "epoch": 0.7160702943044674, "grad_norm": 1.4843934147162712, "learning_rate": 1.96908579857934e-06, "loss": 0.6108, "step": 8455 }, { "epoch": 0.7161549862375609, "grad_norm": 1.427238057387892, "learning_rate": 1.967995016646545e-06, "loss": 0.6015, "step": 8456 }, { "epoch": 0.7162396781706543, "grad_norm": 1.2829934256882394, "learning_rate": 1.9669044628881823e-06, "loss": 0.6933, "step": 8457 }, { "epoch": 0.7163243701037476, "grad_norm": 2.8814310253413216, "learning_rate": 1.9658141373863184e-06, "loss": 0.6206, "step": 8458 }, { "epoch": 0.716409062036841, "grad_norm": 1.1891314701258553, "learning_rate": 1.964724040223007e-06, "loss": 0.6577, "step": 8459 }, { "epoch": 0.7164937539699343, "grad_norm": 1.6404630551502897, "learning_rate": 1.963634171480286e-06, "loss": 0.6419, "step": 8460 }, { "epoch": 0.7165784459030278, "grad_norm": 1.6234286548300154, "learning_rate": 1.9625445312401695e-06, "loss": 0.6283, "step": 8461 }, { "epoch": 0.7166631378361211, "grad_norm": 1.231691811694485, "learning_rate": 1.961455119584662e-06, "loss": 0.6254, "step": 8462 }, { "epoch": 0.7167478297692145, "grad_norm": 1.3806301364360045, "learning_rate": 1.9603659365957462e-06, "loss": 0.6636, "step": 8463 }, { "epoch": 0.7168325217023078, "grad_norm": 2.138573294589432, "learning_rate": 1.9592769823553894e-06, "loss": 0.6027, "step": 8464 }, { "epoch": 0.7169172136354012, "grad_norm": 1.3159843679429093, "learning_rate": 1.9581882569455428e-06, "loss": 0.6533, "step": 8465 }, { "epoch": 0.7170019055684946, "grad_norm": 1.4544525698914912, "learning_rate": 1.957099760448135e-06, "loss": 0.6438, "step": 8466 }, { "epoch": 0.717086597501588, "grad_norm": 1.263147622744797, "learning_rate": 1.9560114929450835e-06, "loss": 0.6646, "step": 8467 }, { "epoch": 0.7171712894346813, "grad_norm": 1.126053780351254, "learning_rate": 1.954923454518286e-06, "loss": 0.6075, "step": 8468 }, { "epoch": 0.7172559813677747, "grad_norm": 1.563836935028119, "learning_rate": 1.9538356452496226e-06, "loss": 0.6064, "step": 8469 }, { "epoch": 0.717340673300868, "grad_norm": 1.2210296223251176, "learning_rate": 1.952748065220959e-06, "loss": 0.5883, "step": 8470 }, { "epoch": 0.7174253652339615, "grad_norm": 1.2926156027990505, "learning_rate": 1.951660714514138e-06, "loss": 0.6, "step": 8471 }, { "epoch": 0.7175100571670548, "grad_norm": 1.3453452345444805, "learning_rate": 1.9505735932109894e-06, "loss": 0.6866, "step": 8472 }, { "epoch": 0.7175947491001482, "grad_norm": 2.201977972987417, "learning_rate": 1.949486701393327e-06, "loss": 0.7053, "step": 8473 }, { "epoch": 0.7176794410332415, "grad_norm": 1.430127210469338, "learning_rate": 1.9484000391429424e-06, "loss": 0.6322, "step": 8474 }, { "epoch": 0.717764132966335, "grad_norm": 1.226231035330776, "learning_rate": 1.9473136065416136e-06, "loss": 0.6295, "step": 8475 }, { "epoch": 0.7178488248994284, "grad_norm": 0.6777421669527558, "learning_rate": 1.946227403671101e-06, "loss": 0.8293, "step": 8476 }, { "epoch": 0.7179335168325217, "grad_norm": 1.522954543930302, "learning_rate": 1.9451414306131468e-06, "loss": 0.6878, "step": 8477 }, { "epoch": 0.7180182087656151, "grad_norm": 1.7056442187994736, "learning_rate": 1.9440556874494772e-06, "loss": 0.6433, "step": 8478 }, { "epoch": 0.7181029006987084, "grad_norm": 1.2328215136565552, "learning_rate": 1.942970174261798e-06, "loss": 0.6297, "step": 8479 }, { "epoch": 0.7181875926318019, "grad_norm": 1.5154137076409437, "learning_rate": 1.9418848911318004e-06, "loss": 0.6215, "step": 8480 }, { "epoch": 0.7182722845648952, "grad_norm": 1.5270910279353431, "learning_rate": 1.9407998381411603e-06, "loss": 0.5869, "step": 8481 }, { "epoch": 0.7183569764979886, "grad_norm": 1.4292298083938335, "learning_rate": 1.93971501537153e-06, "loss": 0.6407, "step": 8482 }, { "epoch": 0.7184416684310819, "grad_norm": 1.9949174381639625, "learning_rate": 1.9386304229045477e-06, "loss": 0.6178, "step": 8483 }, { "epoch": 0.7185263603641753, "grad_norm": 1.5771980569957127, "learning_rate": 1.9375460608218404e-06, "loss": 0.6763, "step": 8484 }, { "epoch": 0.7186110522972687, "grad_norm": 1.7473639879593084, "learning_rate": 1.936461929205007e-06, "loss": 0.6547, "step": 8485 }, { "epoch": 0.7186957442303621, "grad_norm": 2.525715316373634, "learning_rate": 1.935378028135637e-06, "loss": 0.7178, "step": 8486 }, { "epoch": 0.7187804361634554, "grad_norm": 1.2347281719121292, "learning_rate": 1.9342943576952968e-06, "loss": 0.6306, "step": 8487 }, { "epoch": 0.7188651280965488, "grad_norm": 1.2699262576929997, "learning_rate": 1.93321091796554e-06, "loss": 0.637, "step": 8488 }, { "epoch": 0.7189498200296421, "grad_norm": 1.4254003308807683, "learning_rate": 1.9321277090279006e-06, "loss": 0.6295, "step": 8489 }, { "epoch": 0.7190345119627356, "grad_norm": 1.7960176439757658, "learning_rate": 1.9310447309638965e-06, "loss": 0.6549, "step": 8490 }, { "epoch": 0.719119203895829, "grad_norm": 1.1232381744694275, "learning_rate": 1.9299619838550272e-06, "loss": 0.6339, "step": 8491 }, { "epoch": 0.7192038958289223, "grad_norm": 1.732469570398505, "learning_rate": 1.928879467782777e-06, "loss": 0.6229, "step": 8492 }, { "epoch": 0.7192885877620157, "grad_norm": 1.2281214106564613, "learning_rate": 1.927797182828608e-06, "loss": 0.6465, "step": 8493 }, { "epoch": 0.719373279695109, "grad_norm": 1.4062581879003642, "learning_rate": 1.92671512907397e-06, "loss": 0.6352, "step": 8494 }, { "epoch": 0.7194579716282025, "grad_norm": 1.2725127440318331, "learning_rate": 1.9256333066002907e-06, "loss": 0.6345, "step": 8495 }, { "epoch": 0.7195426635612958, "grad_norm": 2.0991470712213487, "learning_rate": 1.9245517154889854e-06, "loss": 0.7024, "step": 8496 }, { "epoch": 0.7196273554943892, "grad_norm": 1.6841007722095676, "learning_rate": 1.923470355821448e-06, "loss": 0.6828, "step": 8497 }, { "epoch": 0.7197120474274825, "grad_norm": 1.4625790863002575, "learning_rate": 1.9223892276790574e-06, "loss": 0.6388, "step": 8498 }, { "epoch": 0.7197967393605759, "grad_norm": 0.6787501805031374, "learning_rate": 1.921308331143176e-06, "loss": 0.8564, "step": 8499 }, { "epoch": 0.7198814312936693, "grad_norm": 2.120723037635902, "learning_rate": 1.9202276662951436e-06, "loss": 0.6226, "step": 8500 }, { "epoch": 0.7199661232267627, "grad_norm": 1.7247641724490106, "learning_rate": 1.9191472332162874e-06, "loss": 0.6177, "step": 8501 }, { "epoch": 0.720050815159856, "grad_norm": 1.54669530695867, "learning_rate": 1.9180670319879172e-06, "loss": 0.6383, "step": 8502 }, { "epoch": 0.7201355070929494, "grad_norm": 1.1575698567049604, "learning_rate": 1.9169870626913194e-06, "loss": 0.6, "step": 8503 }, { "epoch": 0.7202201990260427, "grad_norm": 1.5125139521395596, "learning_rate": 1.915907325407772e-06, "loss": 0.5904, "step": 8504 }, { "epoch": 0.7203048909591362, "grad_norm": 1.8982757591910429, "learning_rate": 1.914827820218531e-06, "loss": 0.5614, "step": 8505 }, { "epoch": 0.7203895828922295, "grad_norm": 1.3240353515279395, "learning_rate": 1.9137485472048316e-06, "loss": 0.6703, "step": 8506 }, { "epoch": 0.7204742748253229, "grad_norm": 1.266711717812671, "learning_rate": 1.912669506447899e-06, "loss": 0.6556, "step": 8507 }, { "epoch": 0.7205589667584162, "grad_norm": 1.4543017559524107, "learning_rate": 1.9115906980289317e-06, "loss": 0.5885, "step": 8508 }, { "epoch": 0.7206436586915096, "grad_norm": 1.251772808801154, "learning_rate": 1.9105121220291183e-06, "loss": 0.6139, "step": 8509 }, { "epoch": 0.720728350624603, "grad_norm": 1.4028405218727593, "learning_rate": 1.9094337785296275e-06, "loss": 0.6069, "step": 8510 }, { "epoch": 0.7208130425576964, "grad_norm": 1.4334086616304234, "learning_rate": 1.90835566761161e-06, "loss": 0.648, "step": 8511 }, { "epoch": 0.7208977344907898, "grad_norm": 1.616249993633362, "learning_rate": 1.9072777893562e-06, "loss": 0.5825, "step": 8512 }, { "epoch": 0.7209824264238831, "grad_norm": 1.589074991416859, "learning_rate": 1.9062001438445143e-06, "loss": 0.6185, "step": 8513 }, { "epoch": 0.7210671183569765, "grad_norm": 1.3124582568652365, "learning_rate": 1.9051227311576487e-06, "loss": 0.6351, "step": 8514 }, { "epoch": 0.7211518102900699, "grad_norm": 1.9443040160168545, "learning_rate": 1.9040455513766875e-06, "loss": 0.6142, "step": 8515 }, { "epoch": 0.7212365022231633, "grad_norm": 1.1985354680085256, "learning_rate": 1.9029686045826906e-06, "loss": 0.6255, "step": 8516 }, { "epoch": 0.7213211941562566, "grad_norm": 1.2692770790510712, "learning_rate": 1.9018918908567058e-06, "loss": 0.6332, "step": 8517 }, { "epoch": 0.72140588608935, "grad_norm": 1.42272290680256, "learning_rate": 1.9008154102797615e-06, "loss": 0.6198, "step": 8518 }, { "epoch": 0.7214905780224433, "grad_norm": 1.3698602222133118, "learning_rate": 1.8997391629328687e-06, "loss": 0.6544, "step": 8519 }, { "epoch": 0.7215752699555368, "grad_norm": 1.2140814847805017, "learning_rate": 1.8986631488970202e-06, "loss": 0.6203, "step": 8520 }, { "epoch": 0.7216599618886301, "grad_norm": 2.046957590301776, "learning_rate": 1.8975873682531942e-06, "loss": 0.6257, "step": 8521 }, { "epoch": 0.7217446538217235, "grad_norm": 1.5325981448961286, "learning_rate": 1.8965118210823447e-06, "loss": 0.6919, "step": 8522 }, { "epoch": 0.7218293457548168, "grad_norm": 2.320788400048114, "learning_rate": 1.8954365074654146e-06, "loss": 0.6924, "step": 8523 }, { "epoch": 0.7219140376879102, "grad_norm": 1.448975454650968, "learning_rate": 1.8943614274833267e-06, "loss": 0.6853, "step": 8524 }, { "epoch": 0.7219987296210036, "grad_norm": 1.5008850238968983, "learning_rate": 1.8932865812169864e-06, "loss": 0.6214, "step": 8525 }, { "epoch": 0.722083421554097, "grad_norm": 1.6115920529227465, "learning_rate": 1.8922119687472839e-06, "loss": 0.66, "step": 8526 }, { "epoch": 0.7221681134871903, "grad_norm": 1.2871492099189574, "learning_rate": 1.891137590155085e-06, "loss": 0.6398, "step": 8527 }, { "epoch": 0.7222528054202837, "grad_norm": 0.6004160433338803, "learning_rate": 1.8900634455212452e-06, "loss": 0.8488, "step": 8528 }, { "epoch": 0.722337497353377, "grad_norm": 1.2019525454306488, "learning_rate": 1.8889895349266002e-06, "loss": 0.5891, "step": 8529 }, { "epoch": 0.7224221892864705, "grad_norm": 2.5446575478978746, "learning_rate": 1.8879158584519646e-06, "loss": 0.5988, "step": 8530 }, { "epoch": 0.7225068812195639, "grad_norm": 1.4570214501564627, "learning_rate": 1.8868424161781401e-06, "loss": 0.6507, "step": 8531 }, { "epoch": 0.7225915731526572, "grad_norm": 1.3738399813094146, "learning_rate": 1.8857692081859086e-06, "loss": 0.5898, "step": 8532 }, { "epoch": 0.7226762650857506, "grad_norm": 2.210882852248142, "learning_rate": 1.8846962345560348e-06, "loss": 0.6364, "step": 8533 }, { "epoch": 0.7227609570188439, "grad_norm": 1.3367045064919885, "learning_rate": 1.8836234953692679e-06, "loss": 0.6496, "step": 8534 }, { "epoch": 0.7228456489519374, "grad_norm": 1.4407264959112147, "learning_rate": 1.8825509907063328e-06, "loss": 0.6163, "step": 8535 }, { "epoch": 0.7229303408850307, "grad_norm": 46.62069142879687, "learning_rate": 1.881478720647945e-06, "loss": 0.6393, "step": 8536 }, { "epoch": 0.7230150328181241, "grad_norm": 1.5269727045601809, "learning_rate": 1.8804066852747955e-06, "loss": 0.6398, "step": 8537 }, { "epoch": 0.7230997247512174, "grad_norm": 1.3660374763435177, "learning_rate": 1.8793348846675597e-06, "loss": 0.599, "step": 8538 }, { "epoch": 0.7231844166843108, "grad_norm": 1.3943914525387866, "learning_rate": 1.878263318906902e-06, "loss": 0.6341, "step": 8539 }, { "epoch": 0.7232691086174042, "grad_norm": 1.4277635816346095, "learning_rate": 1.877191988073459e-06, "loss": 0.6647, "step": 8540 }, { "epoch": 0.7233538005504976, "grad_norm": 1.3615130458667828, "learning_rate": 1.876120892247854e-06, "loss": 0.6759, "step": 8541 }, { "epoch": 0.7234384924835909, "grad_norm": 1.3545439386148472, "learning_rate": 1.8750500315106956e-06, "loss": 0.6041, "step": 8542 }, { "epoch": 0.7235231844166843, "grad_norm": 1.3454918302703056, "learning_rate": 1.8739794059425686e-06, "loss": 0.6259, "step": 8543 }, { "epoch": 0.7236078763497776, "grad_norm": 1.2233067057425322, "learning_rate": 1.8729090156240438e-06, "loss": 0.6244, "step": 8544 }, { "epoch": 0.7236925682828711, "grad_norm": 1.4337267857083933, "learning_rate": 1.871838860635674e-06, "loss": 0.6023, "step": 8545 }, { "epoch": 0.7237772602159644, "grad_norm": 1.524382126313725, "learning_rate": 1.870768941057995e-06, "loss": 0.5961, "step": 8546 }, { "epoch": 0.7238619521490578, "grad_norm": 1.2532994217538396, "learning_rate": 1.8696992569715245e-06, "loss": 0.6462, "step": 8547 }, { "epoch": 0.7239466440821511, "grad_norm": 1.2652768123033558, "learning_rate": 1.8686298084567595e-06, "loss": 0.6411, "step": 8548 }, { "epoch": 0.7240313360152445, "grad_norm": 2.5505391661337957, "learning_rate": 1.8675605955941822e-06, "loss": 0.6337, "step": 8549 }, { "epoch": 0.724116027948338, "grad_norm": 1.7786931304227735, "learning_rate": 1.8664916184642589e-06, "loss": 0.63, "step": 8550 }, { "epoch": 0.7242007198814313, "grad_norm": 1.283780045498891, "learning_rate": 1.8654228771474325e-06, "loss": 0.63, "step": 8551 }, { "epoch": 0.7242854118145247, "grad_norm": 1.4341010694549874, "learning_rate": 1.864354371724133e-06, "loss": 0.6065, "step": 8552 }, { "epoch": 0.724370103747618, "grad_norm": 1.2662100238343856, "learning_rate": 1.8632861022747711e-06, "loss": 0.6464, "step": 8553 }, { "epoch": 0.7244547956807114, "grad_norm": 1.7313018114302996, "learning_rate": 1.8622180688797393e-06, "loss": 0.5962, "step": 8554 }, { "epoch": 0.7245394876138048, "grad_norm": 1.4754703831684617, "learning_rate": 1.8611502716194153e-06, "loss": 0.6833, "step": 8555 }, { "epoch": 0.7246241795468982, "grad_norm": 2.3271444889296786, "learning_rate": 1.8600827105741525e-06, "loss": 0.643, "step": 8556 }, { "epoch": 0.7247088714799915, "grad_norm": 1.5993983634826308, "learning_rate": 1.8590153858242926e-06, "loss": 0.6368, "step": 8557 }, { "epoch": 0.7247935634130849, "grad_norm": 1.906859885164969, "learning_rate": 1.8579482974501584e-06, "loss": 0.6345, "step": 8558 }, { "epoch": 0.7248782553461782, "grad_norm": 2.1829948403561703, "learning_rate": 1.8568814455320499e-06, "loss": 0.6514, "step": 8559 }, { "epoch": 0.7249629472792717, "grad_norm": 1.1560793264054017, "learning_rate": 1.8558148301502593e-06, "loss": 0.5896, "step": 8560 }, { "epoch": 0.725047639212365, "grad_norm": 1.2779175040427806, "learning_rate": 1.8547484513850505e-06, "loss": 0.568, "step": 8561 }, { "epoch": 0.7251323311454584, "grad_norm": 2.2780376215054217, "learning_rate": 1.8536823093166756e-06, "loss": 0.6551, "step": 8562 }, { "epoch": 0.7252170230785517, "grad_norm": 0.6405150714600043, "learning_rate": 1.8526164040253691e-06, "loss": 0.8897, "step": 8563 }, { "epoch": 0.7253017150116451, "grad_norm": 1.330927921428757, "learning_rate": 1.8515507355913426e-06, "loss": 0.5812, "step": 8564 }, { "epoch": 0.7253864069447385, "grad_norm": 2.7658939358145913, "learning_rate": 1.850485304094795e-06, "loss": 0.6027, "step": 8565 }, { "epoch": 0.7254710988778319, "grad_norm": 1.6727830116654874, "learning_rate": 1.8494201096159058e-06, "loss": 0.6482, "step": 8566 }, { "epoch": 0.7255557908109252, "grad_norm": 1.3290056508630987, "learning_rate": 1.8483551522348364e-06, "loss": 0.6959, "step": 8567 }, { "epoch": 0.7256404827440186, "grad_norm": 1.184573230335407, "learning_rate": 1.8472904320317325e-06, "loss": 0.57, "step": 8568 }, { "epoch": 0.725725174677112, "grad_norm": 1.5612392678164289, "learning_rate": 1.8462259490867163e-06, "loss": 0.6522, "step": 8569 }, { "epoch": 0.7258098666102054, "grad_norm": 1.22515263636349, "learning_rate": 1.8451617034798973e-06, "loss": 0.6173, "step": 8570 }, { "epoch": 0.7258945585432988, "grad_norm": 1.475966932513994, "learning_rate": 1.8440976952913675e-06, "loss": 0.6363, "step": 8571 }, { "epoch": 0.7259792504763921, "grad_norm": 0.6942415743153889, "learning_rate": 1.8430339246011958e-06, "loss": 0.8182, "step": 8572 }, { "epoch": 0.7260639424094855, "grad_norm": 1.584655140731116, "learning_rate": 1.8419703914894376e-06, "loss": 0.6206, "step": 8573 }, { "epoch": 0.7261486343425789, "grad_norm": 1.7992505533238707, "learning_rate": 1.8409070960361308e-06, "loss": 0.596, "step": 8574 }, { "epoch": 0.7262333262756723, "grad_norm": 1.321596763976454, "learning_rate": 1.839844038321293e-06, "loss": 0.5835, "step": 8575 }, { "epoch": 0.7263180182087656, "grad_norm": 1.4608603768900232, "learning_rate": 1.8387812184249265e-06, "loss": 0.5965, "step": 8576 }, { "epoch": 0.726402710141859, "grad_norm": 1.3197054119628147, "learning_rate": 1.8377186364270116e-06, "loss": 0.6748, "step": 8577 }, { "epoch": 0.7264874020749523, "grad_norm": 1.272284482696986, "learning_rate": 1.8366562924075143e-06, "loss": 0.6393, "step": 8578 }, { "epoch": 0.7265720940080458, "grad_norm": 1.4659168340160602, "learning_rate": 1.835594186446381e-06, "loss": 0.6376, "step": 8579 }, { "epoch": 0.7266567859411391, "grad_norm": 1.2339850255484879, "learning_rate": 1.8345323186235426e-06, "loss": 0.6253, "step": 8580 }, { "epoch": 0.7267414778742325, "grad_norm": 0.6187021267617465, "learning_rate": 1.8334706890189102e-06, "loss": 0.8323, "step": 8581 }, { "epoch": 0.7268261698073258, "grad_norm": 2.1250445661186843, "learning_rate": 1.8324092977123742e-06, "loss": 0.6458, "step": 8582 }, { "epoch": 0.7269108617404192, "grad_norm": 1.9806074467388741, "learning_rate": 1.8313481447838116e-06, "loss": 0.6416, "step": 8583 }, { "epoch": 0.7269955536735127, "grad_norm": 1.3077123181216972, "learning_rate": 1.830287230313082e-06, "loss": 0.5813, "step": 8584 }, { "epoch": 0.727080245606606, "grad_norm": 1.4970322328467298, "learning_rate": 1.8292265543800213e-06, "loss": 0.6308, "step": 8585 }, { "epoch": 0.7271649375396994, "grad_norm": 1.5016505035068488, "learning_rate": 1.8281661170644522e-06, "loss": 0.6141, "step": 8586 }, { "epoch": 0.7272496294727927, "grad_norm": 1.3375518617170694, "learning_rate": 1.8271059184461781e-06, "loss": 0.6169, "step": 8587 }, { "epoch": 0.727334321405886, "grad_norm": 3.459178099058568, "learning_rate": 1.826045958604985e-06, "loss": 0.6763, "step": 8588 }, { "epoch": 0.7274190133389795, "grad_norm": 1.4722575554477677, "learning_rate": 1.8249862376206423e-06, "loss": 0.7091, "step": 8589 }, { "epoch": 0.7275037052720729, "grad_norm": 1.5052978806555446, "learning_rate": 1.8239267555728962e-06, "loss": 0.6365, "step": 8590 }, { "epoch": 0.7275883972051662, "grad_norm": 1.4369823251165268, "learning_rate": 1.8228675125414796e-06, "loss": 0.6419, "step": 8591 }, { "epoch": 0.7276730891382596, "grad_norm": 1.5169166759376034, "learning_rate": 1.8218085086061082e-06, "loss": 0.5582, "step": 8592 }, { "epoch": 0.7277577810713529, "grad_norm": 1.8183357046955, "learning_rate": 1.8207497438464738e-06, "loss": 0.6496, "step": 8593 }, { "epoch": 0.7278424730044464, "grad_norm": 1.430651070703424, "learning_rate": 1.819691218342255e-06, "loss": 0.6495, "step": 8594 }, { "epoch": 0.7279271649375397, "grad_norm": 1.20330018295594, "learning_rate": 1.8186329321731156e-06, "loss": 0.6441, "step": 8595 }, { "epoch": 0.7280118568706331, "grad_norm": 1.4670330275294141, "learning_rate": 1.8175748854186924e-06, "loss": 0.6144, "step": 8596 }, { "epoch": 0.7280965488037264, "grad_norm": 1.5735872829192548, "learning_rate": 1.8165170781586122e-06, "loss": 0.6889, "step": 8597 }, { "epoch": 0.7281812407368198, "grad_norm": 1.271397789810399, "learning_rate": 1.815459510472478e-06, "loss": 0.6202, "step": 8598 }, { "epoch": 0.7282659326699132, "grad_norm": 0.6211838369064072, "learning_rate": 1.8144021824398788e-06, "loss": 0.8572, "step": 8599 }, { "epoch": 0.7283506246030066, "grad_norm": 1.185575184174039, "learning_rate": 1.8133450941403836e-06, "loss": 0.636, "step": 8600 }, { "epoch": 0.7284353165360999, "grad_norm": 1.5643188198130589, "learning_rate": 1.812288245653544e-06, "loss": 0.641, "step": 8601 }, { "epoch": 0.7285200084691933, "grad_norm": 1.2534530366417433, "learning_rate": 1.8112316370588957e-06, "loss": 0.6176, "step": 8602 }, { "epoch": 0.7286047004022866, "grad_norm": 2.2521334771424533, "learning_rate": 1.8101752684359502e-06, "loss": 0.6542, "step": 8603 }, { "epoch": 0.7286893923353801, "grad_norm": 1.4211726122358934, "learning_rate": 1.8091191398642066e-06, "loss": 0.5717, "step": 8604 }, { "epoch": 0.7287740842684735, "grad_norm": 0.5988968771100865, "learning_rate": 1.808063251423146e-06, "loss": 0.8821, "step": 8605 }, { "epoch": 0.7288587762015668, "grad_norm": 2.1218524882460676, "learning_rate": 1.8070076031922263e-06, "loss": 0.625, "step": 8606 }, { "epoch": 0.7289434681346602, "grad_norm": 1.3686145531658045, "learning_rate": 1.8059521952508919e-06, "loss": 0.6322, "step": 8607 }, { "epoch": 0.7290281600677535, "grad_norm": 1.663390312973934, "learning_rate": 1.8048970276785682e-06, "loss": 0.6678, "step": 8608 }, { "epoch": 0.729112852000847, "grad_norm": 1.2878133171105375, "learning_rate": 1.8038421005546624e-06, "loss": 0.5719, "step": 8609 }, { "epoch": 0.7291975439339403, "grad_norm": 1.4355366160460452, "learning_rate": 1.8027874139585644e-06, "loss": 0.6452, "step": 8610 }, { "epoch": 0.7292822358670337, "grad_norm": 2.018662740756828, "learning_rate": 1.8017329679696415e-06, "loss": 0.6624, "step": 8611 }, { "epoch": 0.729366927800127, "grad_norm": 2.0343519188858195, "learning_rate": 1.800678762667249e-06, "loss": 0.5889, "step": 8612 }, { "epoch": 0.7294516197332204, "grad_norm": 1.5799579135150257, "learning_rate": 1.7996247981307218e-06, "loss": 0.6298, "step": 8613 }, { "epoch": 0.7295363116663138, "grad_norm": 1.3458722688320457, "learning_rate": 1.7985710744393741e-06, "loss": 0.6044, "step": 8614 }, { "epoch": 0.7296210035994072, "grad_norm": 1.3615134428956015, "learning_rate": 1.7975175916725034e-06, "loss": 0.5953, "step": 8615 }, { "epoch": 0.7297056955325005, "grad_norm": 0.6450992154707398, "learning_rate": 1.796464349909396e-06, "loss": 0.8409, "step": 8616 }, { "epoch": 0.7297903874655939, "grad_norm": 0.610545404718864, "learning_rate": 1.7954113492293075e-06, "loss": 0.8647, "step": 8617 }, { "epoch": 0.7298750793986872, "grad_norm": 1.3802757108415882, "learning_rate": 1.7943585897114856e-06, "loss": 0.6378, "step": 8618 }, { "epoch": 0.7299597713317807, "grad_norm": 1.265328010686523, "learning_rate": 1.793306071435153e-06, "loss": 0.6338, "step": 8619 }, { "epoch": 0.730044463264874, "grad_norm": 1.415369146583258, "learning_rate": 1.7922537944795194e-06, "loss": 0.6187, "step": 8620 }, { "epoch": 0.7301291551979674, "grad_norm": 1.236453233598013, "learning_rate": 1.791201758923773e-06, "loss": 0.6166, "step": 8621 }, { "epoch": 0.7302138471310607, "grad_norm": 0.6221338136919379, "learning_rate": 1.7901499648470855e-06, "loss": 0.8891, "step": 8622 }, { "epoch": 0.7302985390641541, "grad_norm": 1.421671671613609, "learning_rate": 1.7890984123286104e-06, "loss": 0.6902, "step": 8623 }, { "epoch": 0.7303832309972476, "grad_norm": 1.0593144674032575, "learning_rate": 1.7880471014474836e-06, "loss": 0.5933, "step": 8624 }, { "epoch": 0.7304679229303409, "grad_norm": 1.5314871514632333, "learning_rate": 1.7869960322828194e-06, "loss": 0.7067, "step": 8625 }, { "epoch": 0.7305526148634343, "grad_norm": 1.342967419180882, "learning_rate": 1.7859452049137188e-06, "loss": 0.6093, "step": 8626 }, { "epoch": 0.7306373067965276, "grad_norm": 1.332843660496602, "learning_rate": 1.784894619419259e-06, "loss": 0.6266, "step": 8627 }, { "epoch": 0.730721998729621, "grad_norm": 0.6045152274578302, "learning_rate": 1.783844275878504e-06, "loss": 0.8408, "step": 8628 }, { "epoch": 0.7308066906627144, "grad_norm": 1.6485247763595545, "learning_rate": 1.7827941743704974e-06, "loss": 0.636, "step": 8629 }, { "epoch": 0.7308913825958078, "grad_norm": 1.5132354407765656, "learning_rate": 1.7817443149742652e-06, "loss": 0.6371, "step": 8630 }, { "epoch": 0.7309760745289011, "grad_norm": 1.2884831747238952, "learning_rate": 1.780694697768815e-06, "loss": 0.6483, "step": 8631 }, { "epoch": 0.7310607664619945, "grad_norm": 1.3266046365042325, "learning_rate": 1.7796453228331373e-06, "loss": 0.5844, "step": 8632 }, { "epoch": 0.7311454583950878, "grad_norm": 1.1519391194796962, "learning_rate": 1.7785961902462e-06, "loss": 0.6164, "step": 8633 }, { "epoch": 0.7312301503281813, "grad_norm": 1.32094156477665, "learning_rate": 1.7775473000869591e-06, "loss": 0.6026, "step": 8634 }, { "epoch": 0.7313148422612746, "grad_norm": 1.2131518726911763, "learning_rate": 1.7764986524343441e-06, "loss": 0.6279, "step": 8635 }, { "epoch": 0.731399534194368, "grad_norm": 1.4984089928870228, "learning_rate": 1.775450247367277e-06, "loss": 0.5795, "step": 8636 }, { "epoch": 0.7314842261274613, "grad_norm": 1.5537009332018656, "learning_rate": 1.7744020849646547e-06, "loss": 0.6581, "step": 8637 }, { "epoch": 0.7315689180605547, "grad_norm": 0.5736101977871528, "learning_rate": 1.7733541653053542e-06, "loss": 0.8247, "step": 8638 }, { "epoch": 0.7316536099936481, "grad_norm": 1.4903184527524789, "learning_rate": 1.7723064884682406e-06, "loss": 0.6133, "step": 8639 }, { "epoch": 0.7317383019267415, "grad_norm": 1.1542493240243963, "learning_rate": 1.7712590545321533e-06, "loss": 0.7, "step": 8640 }, { "epoch": 0.7318229938598348, "grad_norm": 1.6961001059037162, "learning_rate": 1.7702118635759197e-06, "loss": 0.5532, "step": 8641 }, { "epoch": 0.7319076857929282, "grad_norm": 0.5875961216780254, "learning_rate": 1.7691649156783453e-06, "loss": 0.8367, "step": 8642 }, { "epoch": 0.7319923777260215, "grad_norm": 1.575758255627649, "learning_rate": 1.7681182109182193e-06, "loss": 0.6078, "step": 8643 }, { "epoch": 0.732077069659115, "grad_norm": 0.628366882088911, "learning_rate": 1.7670717493743118e-06, "loss": 0.8867, "step": 8644 }, { "epoch": 0.7321617615922084, "grad_norm": 0.5766396489098448, "learning_rate": 1.7660255311253754e-06, "loss": 0.8506, "step": 8645 }, { "epoch": 0.7322464535253017, "grad_norm": 1.4188779761955197, "learning_rate": 1.764979556250141e-06, "loss": 0.6177, "step": 8646 }, { "epoch": 0.7323311454583951, "grad_norm": 0.6316008520044011, "learning_rate": 1.7639338248273274e-06, "loss": 0.8594, "step": 8647 }, { "epoch": 0.7324158373914884, "grad_norm": 1.2975244087392481, "learning_rate": 1.762888336935627e-06, "loss": 0.5923, "step": 8648 }, { "epoch": 0.7325005293245819, "grad_norm": 1.3576305430773317, "learning_rate": 1.761843092653721e-06, "loss": 0.6533, "step": 8649 }, { "epoch": 0.7325852212576752, "grad_norm": 1.6691398341448316, "learning_rate": 1.7607980920602685e-06, "loss": 0.6301, "step": 8650 }, { "epoch": 0.7326699131907686, "grad_norm": 1.191831380738994, "learning_rate": 1.7597533352339125e-06, "loss": 0.6249, "step": 8651 }, { "epoch": 0.7327546051238619, "grad_norm": 3.6240768989886702, "learning_rate": 1.7587088222532762e-06, "loss": 0.6515, "step": 8652 }, { "epoch": 0.7328392970569553, "grad_norm": 1.1215702720184137, "learning_rate": 1.7576645531969654e-06, "loss": 0.5652, "step": 8653 }, { "epoch": 0.7329239889900487, "grad_norm": 1.3205147743266388, "learning_rate": 1.756620528143565e-06, "loss": 0.6395, "step": 8654 }, { "epoch": 0.7330086809231421, "grad_norm": 1.4014837346766809, "learning_rate": 1.755576747171644e-06, "loss": 0.6295, "step": 8655 }, { "epoch": 0.7330933728562354, "grad_norm": 1.350203850876304, "learning_rate": 1.754533210359753e-06, "loss": 0.6013, "step": 8656 }, { "epoch": 0.7331780647893288, "grad_norm": 1.5307047270929783, "learning_rate": 1.7534899177864228e-06, "loss": 0.6718, "step": 8657 }, { "epoch": 0.7332627567224221, "grad_norm": 0.5804339718820996, "learning_rate": 1.75244686953017e-06, "loss": 0.8662, "step": 8658 }, { "epoch": 0.7333474486555156, "grad_norm": 1.419864573093764, "learning_rate": 1.7514040656694848e-06, "loss": 0.622, "step": 8659 }, { "epoch": 0.733432140588609, "grad_norm": 1.274988102184692, "learning_rate": 1.7503615062828456e-06, "loss": 0.6311, "step": 8660 }, { "epoch": 0.7335168325217023, "grad_norm": 1.2416476572416775, "learning_rate": 1.7493191914487123e-06, "loss": 0.629, "step": 8661 }, { "epoch": 0.7336015244547957, "grad_norm": 1.684461892671396, "learning_rate": 1.7482771212455218e-06, "loss": 0.5906, "step": 8662 }, { "epoch": 0.733686216387889, "grad_norm": 1.928556459708759, "learning_rate": 1.7472352957516964e-06, "loss": 0.6325, "step": 8663 }, { "epoch": 0.7337709083209825, "grad_norm": 1.371472768188999, "learning_rate": 1.7461937150456386e-06, "loss": 0.6895, "step": 8664 }, { "epoch": 0.7338556002540758, "grad_norm": 1.550672120114854, "learning_rate": 1.7451523792057345e-06, "loss": 0.5835, "step": 8665 }, { "epoch": 0.7339402921871692, "grad_norm": 1.490500443631743, "learning_rate": 1.74411128831035e-06, "loss": 0.6482, "step": 8666 }, { "epoch": 0.7340249841202625, "grad_norm": 1.3348477957317462, "learning_rate": 1.743070442437831e-06, "loss": 0.6044, "step": 8667 }, { "epoch": 0.7341096760533559, "grad_norm": 1.8447762710193596, "learning_rate": 1.7420298416665067e-06, "loss": 0.6637, "step": 8668 }, { "epoch": 0.7341943679864493, "grad_norm": 1.6663614275419416, "learning_rate": 1.7409894860746906e-06, "loss": 0.6359, "step": 8669 }, { "epoch": 0.7342790599195427, "grad_norm": 4.088133062711579, "learning_rate": 1.7399493757406695e-06, "loss": 0.608, "step": 8670 }, { "epoch": 0.734363751852636, "grad_norm": 1.5031736006989445, "learning_rate": 1.738909510742724e-06, "loss": 0.6354, "step": 8671 }, { "epoch": 0.7344484437857294, "grad_norm": 1.3367404798388218, "learning_rate": 1.7378698911591042e-06, "loss": 0.6497, "step": 8672 }, { "epoch": 0.7345331357188227, "grad_norm": 1.1456571849774217, "learning_rate": 1.7368305170680495e-06, "loss": 0.6462, "step": 8673 }, { "epoch": 0.7346178276519162, "grad_norm": 1.50860663787496, "learning_rate": 1.7357913885477784e-06, "loss": 0.6209, "step": 8674 }, { "epoch": 0.7347025195850095, "grad_norm": 1.307945948189013, "learning_rate": 1.734752505676489e-06, "loss": 0.6107, "step": 8675 }, { "epoch": 0.7347872115181029, "grad_norm": 1.3814086250466038, "learning_rate": 1.733713868532364e-06, "loss": 0.6583, "step": 8676 }, { "epoch": 0.7348719034511962, "grad_norm": 1.3046555765707493, "learning_rate": 1.7326754771935661e-06, "loss": 0.6543, "step": 8677 }, { "epoch": 0.7349565953842897, "grad_norm": 1.3731873057517545, "learning_rate": 1.7316373317382401e-06, "loss": 0.6301, "step": 8678 }, { "epoch": 0.735041287317383, "grad_norm": 1.2450527644788039, "learning_rate": 1.730599432244513e-06, "loss": 0.6032, "step": 8679 }, { "epoch": 0.7351259792504764, "grad_norm": 1.2696229886072117, "learning_rate": 1.729561778790489e-06, "loss": 0.5987, "step": 8680 }, { "epoch": 0.7352106711835698, "grad_norm": 1.6087953455904789, "learning_rate": 1.7285243714542594e-06, "loss": 0.5937, "step": 8681 }, { "epoch": 0.7352953631166631, "grad_norm": 1.547577711264875, "learning_rate": 1.7274872103138958e-06, "loss": 0.6649, "step": 8682 }, { "epoch": 0.7353800550497566, "grad_norm": 0.605916343926396, "learning_rate": 1.7264502954474465e-06, "loss": 0.8827, "step": 8683 }, { "epoch": 0.7354647469828499, "grad_norm": 0.6374373529441912, "learning_rate": 1.725413626932947e-06, "loss": 0.8523, "step": 8684 }, { "epoch": 0.7355494389159433, "grad_norm": 1.8961422116769653, "learning_rate": 1.7243772048484113e-06, "loss": 0.675, "step": 8685 }, { "epoch": 0.7356341308490366, "grad_norm": 0.6718502464411056, "learning_rate": 1.7233410292718367e-06, "loss": 0.8605, "step": 8686 }, { "epoch": 0.73571882278213, "grad_norm": 1.9347748617880878, "learning_rate": 1.722305100281202e-06, "loss": 0.6495, "step": 8687 }, { "epoch": 0.7358035147152234, "grad_norm": 2.7696780734836177, "learning_rate": 1.721269417954463e-06, "loss": 0.6263, "step": 8688 }, { "epoch": 0.7358882066483168, "grad_norm": 1.465053690563571, "learning_rate": 1.7202339823695618e-06, "loss": 0.5981, "step": 8689 }, { "epoch": 0.7359728985814101, "grad_norm": 1.271156168744992, "learning_rate": 1.7191987936044223e-06, "loss": 0.5761, "step": 8690 }, { "epoch": 0.7360575905145035, "grad_norm": 1.580401864992663, "learning_rate": 1.7181638517369432e-06, "loss": 0.6101, "step": 8691 }, { "epoch": 0.7361422824475968, "grad_norm": 1.258739776719037, "learning_rate": 1.7171291568450155e-06, "loss": 0.63, "step": 8692 }, { "epoch": 0.7362269743806903, "grad_norm": 0.6360507756137014, "learning_rate": 1.7160947090065011e-06, "loss": 0.8317, "step": 8693 }, { "epoch": 0.7363116663137836, "grad_norm": 1.7161605562458575, "learning_rate": 1.7150605082992483e-06, "loss": 0.642, "step": 8694 }, { "epoch": 0.736396358246877, "grad_norm": 1.4077540574314504, "learning_rate": 1.7140265548010886e-06, "loss": 0.6305, "step": 8695 }, { "epoch": 0.7364810501799703, "grad_norm": 1.3834312461357077, "learning_rate": 1.7129928485898295e-06, "loss": 0.6288, "step": 8696 }, { "epoch": 0.7365657421130637, "grad_norm": 1.217923130849785, "learning_rate": 1.711959389743264e-06, "loss": 0.6583, "step": 8697 }, { "epoch": 0.7366504340461572, "grad_norm": 1.530994582635582, "learning_rate": 1.710926178339165e-06, "loss": 0.5859, "step": 8698 }, { "epoch": 0.7367351259792505, "grad_norm": 1.5197486536180862, "learning_rate": 1.7098932144552881e-06, "loss": 0.6366, "step": 8699 }, { "epoch": 0.7368198179123439, "grad_norm": 1.3036968927059986, "learning_rate": 1.70886049816937e-06, "loss": 0.6175, "step": 8700 }, { "epoch": 0.7369045098454372, "grad_norm": 1.3076569458032872, "learning_rate": 1.7078280295591255e-06, "loss": 0.5994, "step": 8701 }, { "epoch": 0.7369892017785306, "grad_norm": 1.4350157616318546, "learning_rate": 1.706795808702254e-06, "loss": 0.6787, "step": 8702 }, { "epoch": 0.737073893711624, "grad_norm": 1.3000644930379321, "learning_rate": 1.7057638356764384e-06, "loss": 0.5864, "step": 8703 }, { "epoch": 0.7371585856447174, "grad_norm": 1.2771756728517518, "learning_rate": 1.7047321105593363e-06, "loss": 0.6186, "step": 8704 }, { "epoch": 0.7372432775778107, "grad_norm": 1.3655052606002165, "learning_rate": 1.703700633428592e-06, "loss": 0.5808, "step": 8705 }, { "epoch": 0.7373279695109041, "grad_norm": 2.320548600574714, "learning_rate": 1.7026694043618302e-06, "loss": 0.6036, "step": 8706 }, { "epoch": 0.7374126614439974, "grad_norm": 0.6401312344854101, "learning_rate": 1.7016384234366557e-06, "loss": 0.8117, "step": 8707 }, { "epoch": 0.7374973533770909, "grad_norm": 1.3042785680767153, "learning_rate": 1.7006076907306568e-06, "loss": 0.5928, "step": 8708 }, { "epoch": 0.7375820453101842, "grad_norm": 1.4074346419615047, "learning_rate": 1.699577206321399e-06, "loss": 0.6238, "step": 8709 }, { "epoch": 0.7376667372432776, "grad_norm": 1.4795483604376596, "learning_rate": 1.6985469702864327e-06, "loss": 0.6139, "step": 8710 }, { "epoch": 0.7377514291763709, "grad_norm": 1.2790612750981967, "learning_rate": 1.697516982703289e-06, "loss": 0.6051, "step": 8711 }, { "epoch": 0.7378361211094643, "grad_norm": 1.3806567501408242, "learning_rate": 1.69648724364948e-06, "loss": 0.6992, "step": 8712 }, { "epoch": 0.7379208130425577, "grad_norm": 2.2944912897182137, "learning_rate": 1.6954577532025002e-06, "loss": 0.6789, "step": 8713 }, { "epoch": 0.7380055049756511, "grad_norm": 0.6782842635419457, "learning_rate": 1.6944285114398219e-06, "loss": 0.8017, "step": 8714 }, { "epoch": 0.7380901969087444, "grad_norm": 1.7704877071194445, "learning_rate": 1.6933995184389012e-06, "loss": 0.6271, "step": 8715 }, { "epoch": 0.7381748888418378, "grad_norm": 1.6113447758869122, "learning_rate": 1.6923707742771777e-06, "loss": 0.5837, "step": 8716 }, { "epoch": 0.7382595807749311, "grad_norm": 1.2838749234965292, "learning_rate": 1.6913422790320665e-06, "loss": 0.5954, "step": 8717 }, { "epoch": 0.7383442727080246, "grad_norm": 1.3686738237408664, "learning_rate": 1.6903140327809697e-06, "loss": 0.5856, "step": 8718 }, { "epoch": 0.738428964641118, "grad_norm": 0.6346664537391771, "learning_rate": 1.6892860356012669e-06, "loss": 0.8583, "step": 8719 }, { "epoch": 0.7385136565742113, "grad_norm": 0.6210307306473547, "learning_rate": 1.6882582875703212e-06, "loss": 0.8522, "step": 8720 }, { "epoch": 0.7385983485073047, "grad_norm": 1.4476192153541283, "learning_rate": 1.687230788765477e-06, "loss": 0.5603, "step": 8721 }, { "epoch": 0.738683040440398, "grad_norm": 1.5255723643330183, "learning_rate": 1.6862035392640569e-06, "loss": 0.6194, "step": 8722 }, { "epoch": 0.7387677323734915, "grad_norm": 1.3308260809947527, "learning_rate": 1.6851765391433678e-06, "loss": 0.617, "step": 8723 }, { "epoch": 0.7388524243065848, "grad_norm": 1.5380643362560658, "learning_rate": 1.6841497884806985e-06, "loss": 0.6668, "step": 8724 }, { "epoch": 0.7389371162396782, "grad_norm": 1.6720162962312726, "learning_rate": 1.6831232873533139e-06, "loss": 0.5955, "step": 8725 }, { "epoch": 0.7390218081727715, "grad_norm": 1.2962555014244634, "learning_rate": 1.6820970358384643e-06, "loss": 0.6357, "step": 8726 }, { "epoch": 0.7391065001058649, "grad_norm": 1.556549532891841, "learning_rate": 1.681071034013385e-06, "loss": 0.6587, "step": 8727 }, { "epoch": 0.7391911920389583, "grad_norm": 1.3330112181969775, "learning_rate": 1.6800452819552838e-06, "loss": 0.6894, "step": 8728 }, { "epoch": 0.7392758839720517, "grad_norm": 1.3728779144437768, "learning_rate": 1.679019779741356e-06, "loss": 0.6367, "step": 8729 }, { "epoch": 0.739360575905145, "grad_norm": 1.3472318847682307, "learning_rate": 1.6779945274487742e-06, "loss": 0.6675, "step": 8730 }, { "epoch": 0.7394452678382384, "grad_norm": 1.2758104335826537, "learning_rate": 1.6769695251546948e-06, "loss": 0.6794, "step": 8731 }, { "epoch": 0.7395299597713317, "grad_norm": 1.4840384321169597, "learning_rate": 1.6759447729362549e-06, "loss": 0.6524, "step": 8732 }, { "epoch": 0.7396146517044252, "grad_norm": 1.3079367470331973, "learning_rate": 1.6749202708705725e-06, "loss": 0.6904, "step": 8733 }, { "epoch": 0.7396993436375185, "grad_norm": 1.654094195404924, "learning_rate": 1.673896019034747e-06, "loss": 0.5735, "step": 8734 }, { "epoch": 0.7397840355706119, "grad_norm": 0.6604305062372766, "learning_rate": 1.6728720175058599e-06, "loss": 0.8917, "step": 8735 }, { "epoch": 0.7398687275037052, "grad_norm": 0.690542061637598, "learning_rate": 1.6718482663609703e-06, "loss": 0.9261, "step": 8736 }, { "epoch": 0.7399534194367986, "grad_norm": 1.605255086789876, "learning_rate": 1.6708247656771231e-06, "loss": 0.6156, "step": 8737 }, { "epoch": 0.7400381113698921, "grad_norm": 1.183031528525813, "learning_rate": 1.6698015155313401e-06, "loss": 0.5885, "step": 8738 }, { "epoch": 0.7401228033029854, "grad_norm": 1.2425382118237982, "learning_rate": 1.668778516000627e-06, "loss": 0.6226, "step": 8739 }, { "epoch": 0.7402074952360788, "grad_norm": 0.6272205754570641, "learning_rate": 1.6677557671619704e-06, "loss": 0.8483, "step": 8740 }, { "epoch": 0.7402921871691721, "grad_norm": 1.2968336266811609, "learning_rate": 1.6667332690923371e-06, "loss": 0.6025, "step": 8741 }, { "epoch": 0.7403768791022655, "grad_norm": 1.2866811559542797, "learning_rate": 1.6657110218686779e-06, "loss": 0.61, "step": 8742 }, { "epoch": 0.7404615710353589, "grad_norm": 1.3342586265974385, "learning_rate": 1.6646890255679182e-06, "loss": 0.604, "step": 8743 }, { "epoch": 0.7405462629684523, "grad_norm": 1.727235463116502, "learning_rate": 1.6636672802669708e-06, "loss": 0.6299, "step": 8744 }, { "epoch": 0.7406309549015456, "grad_norm": 2.1982923063322835, "learning_rate": 1.6626457860427286e-06, "loss": 0.6181, "step": 8745 }, { "epoch": 0.740715646834639, "grad_norm": 1.4877433747707494, "learning_rate": 1.6616245429720618e-06, "loss": 0.642, "step": 8746 }, { "epoch": 0.7408003387677323, "grad_norm": 1.3791558013317933, "learning_rate": 1.6606035511318236e-06, "loss": 0.6374, "step": 8747 }, { "epoch": 0.7408850307008258, "grad_norm": 2.314291204450171, "learning_rate": 1.659582810598855e-06, "loss": 0.6721, "step": 8748 }, { "epoch": 0.7409697226339191, "grad_norm": 1.2020270118097143, "learning_rate": 1.6585623214499662e-06, "loss": 0.6514, "step": 8749 }, { "epoch": 0.7410544145670125, "grad_norm": 1.2935595718747512, "learning_rate": 1.6575420837619583e-06, "loss": 0.6248, "step": 8750 }, { "epoch": 0.7411391065001058, "grad_norm": 1.3940792066764833, "learning_rate": 1.6565220976116058e-06, "loss": 0.6503, "step": 8751 }, { "epoch": 0.7412237984331992, "grad_norm": 1.4798833685265878, "learning_rate": 1.6555023630756706e-06, "loss": 0.618, "step": 8752 }, { "epoch": 0.7413084903662926, "grad_norm": 1.79592955330208, "learning_rate": 1.654482880230892e-06, "loss": 0.6396, "step": 8753 }, { "epoch": 0.741393182299386, "grad_norm": 1.4538350433265599, "learning_rate": 1.6534636491539924e-06, "loss": 0.6423, "step": 8754 }, { "epoch": 0.7414778742324794, "grad_norm": 1.2101504266343197, "learning_rate": 1.6524446699216735e-06, "loss": 0.6388, "step": 8755 }, { "epoch": 0.7415625661655727, "grad_norm": 1.278422819647806, "learning_rate": 1.651425942610621e-06, "loss": 0.5749, "step": 8756 }, { "epoch": 0.741647258098666, "grad_norm": 1.233831118098294, "learning_rate": 1.6504074672974968e-06, "loss": 0.607, "step": 8757 }, { "epoch": 0.7417319500317595, "grad_norm": 2.0125926655725586, "learning_rate": 1.6493892440589488e-06, "loss": 0.6639, "step": 8758 }, { "epoch": 0.7418166419648529, "grad_norm": 1.5060156055176341, "learning_rate": 1.6483712729716012e-06, "loss": 0.6094, "step": 8759 }, { "epoch": 0.7419013338979462, "grad_norm": 1.3099301160086099, "learning_rate": 1.6473535541120628e-06, "loss": 0.6162, "step": 8760 }, { "epoch": 0.7419860258310396, "grad_norm": 1.3867484431840325, "learning_rate": 1.6463360875569222e-06, "loss": 0.6142, "step": 8761 }, { "epoch": 0.7420707177641329, "grad_norm": 1.204433260071656, "learning_rate": 1.6453188733827502e-06, "loss": 0.6108, "step": 8762 }, { "epoch": 0.7421554096972264, "grad_norm": 1.271212274160527, "learning_rate": 1.6443019116660963e-06, "loss": 0.6498, "step": 8763 }, { "epoch": 0.7422401016303197, "grad_norm": 2.2477334753747567, "learning_rate": 1.643285202483495e-06, "loss": 0.6438, "step": 8764 }, { "epoch": 0.7423247935634131, "grad_norm": 0.6213604476460565, "learning_rate": 1.6422687459114544e-06, "loss": 0.8704, "step": 8765 }, { "epoch": 0.7424094854965064, "grad_norm": 1.4287674009887228, "learning_rate": 1.641252542026473e-06, "loss": 0.6406, "step": 8766 }, { "epoch": 0.7424941774295998, "grad_norm": 1.3204704951582669, "learning_rate": 1.64023659090502e-06, "loss": 0.5777, "step": 8767 }, { "epoch": 0.7425788693626932, "grad_norm": 0.7212390511845046, "learning_rate": 1.6392208926235553e-06, "loss": 0.9038, "step": 8768 }, { "epoch": 0.7426635612957866, "grad_norm": 0.6484163736243302, "learning_rate": 1.6382054472585168e-06, "loss": 0.8682, "step": 8769 }, { "epoch": 0.7427482532288799, "grad_norm": 1.236382224688983, "learning_rate": 1.6371902548863183e-06, "loss": 0.6044, "step": 8770 }, { "epoch": 0.7428329451619733, "grad_norm": 1.3557185315601363, "learning_rate": 1.6361753155833599e-06, "loss": 0.6489, "step": 8771 }, { "epoch": 0.7429176370950666, "grad_norm": 1.9756655544204538, "learning_rate": 1.635160629426023e-06, "loss": 0.6116, "step": 8772 }, { "epoch": 0.7430023290281601, "grad_norm": 1.7660162475330614, "learning_rate": 1.6341461964906646e-06, "loss": 0.7006, "step": 8773 }, { "epoch": 0.7430870209612535, "grad_norm": 1.294017702739678, "learning_rate": 1.633132016853628e-06, "loss": 0.6447, "step": 8774 }, { "epoch": 0.7431717128943468, "grad_norm": 1.2975653991854417, "learning_rate": 1.632118090591236e-06, "loss": 0.5979, "step": 8775 }, { "epoch": 0.7432564048274402, "grad_norm": 1.6048876342517338, "learning_rate": 1.6311044177797908e-06, "loss": 0.6336, "step": 8776 }, { "epoch": 0.7433410967605335, "grad_norm": 2.1624622831021565, "learning_rate": 1.6300909984955793e-06, "loss": 0.6389, "step": 8777 }, { "epoch": 0.743425788693627, "grad_norm": 1.4234167599680514, "learning_rate": 1.6290778328148631e-06, "loss": 0.6612, "step": 8778 }, { "epoch": 0.7435104806267203, "grad_norm": 1.9148137455456333, "learning_rate": 1.6280649208138917e-06, "loss": 0.6711, "step": 8779 }, { "epoch": 0.7435951725598137, "grad_norm": 1.3593591599840686, "learning_rate": 1.6270522625688878e-06, "loss": 0.5674, "step": 8780 }, { "epoch": 0.743679864492907, "grad_norm": 1.4133833215682572, "learning_rate": 1.6260398581560621e-06, "loss": 0.5907, "step": 8781 }, { "epoch": 0.7437645564260005, "grad_norm": 1.3866266256055801, "learning_rate": 1.6250277076516035e-06, "loss": 0.6699, "step": 8782 }, { "epoch": 0.7438492483590938, "grad_norm": 1.4205961240141374, "learning_rate": 1.6240158111316807e-06, "loss": 0.5864, "step": 8783 }, { "epoch": 0.7439339402921872, "grad_norm": 0.6644998661375059, "learning_rate": 1.623004168672445e-06, "loss": 0.8678, "step": 8784 }, { "epoch": 0.7440186322252805, "grad_norm": 1.3914320686502182, "learning_rate": 1.6219927803500295e-06, "loss": 0.6201, "step": 8785 }, { "epoch": 0.7441033241583739, "grad_norm": 1.8269243217075504, "learning_rate": 1.6209816462405425e-06, "loss": 0.5835, "step": 8786 }, { "epoch": 0.7441880160914673, "grad_norm": 1.8169791169459781, "learning_rate": 1.6199707664200798e-06, "loss": 0.6667, "step": 8787 }, { "epoch": 0.7442727080245607, "grad_norm": 2.0758920418493245, "learning_rate": 1.618960140964715e-06, "loss": 0.5621, "step": 8788 }, { "epoch": 0.744357399957654, "grad_norm": 1.478884744956178, "learning_rate": 1.6179497699505031e-06, "loss": 0.6811, "step": 8789 }, { "epoch": 0.7444420918907474, "grad_norm": 1.3965999564297753, "learning_rate": 1.6169396534534814e-06, "loss": 0.5676, "step": 8790 }, { "epoch": 0.7445267838238407, "grad_norm": 1.2834426403551897, "learning_rate": 1.615929791549663e-06, "loss": 0.6232, "step": 8791 }, { "epoch": 0.7446114757569342, "grad_norm": 4.277876307493739, "learning_rate": 1.6149201843150475e-06, "loss": 0.6026, "step": 8792 }, { "epoch": 0.7446961676900276, "grad_norm": 0.6406004696067777, "learning_rate": 1.6139108318256148e-06, "loss": 0.788, "step": 8793 }, { "epoch": 0.7447808596231209, "grad_norm": 1.3172028039779002, "learning_rate": 1.61290173415732e-06, "loss": 0.6499, "step": 8794 }, { "epoch": 0.7448655515562143, "grad_norm": 1.4746967612425699, "learning_rate": 1.611892891386106e-06, "loss": 0.5861, "step": 8795 }, { "epoch": 0.7449502434893076, "grad_norm": 1.094057645176451, "learning_rate": 1.6108843035878924e-06, "loss": 0.573, "step": 8796 }, { "epoch": 0.7450349354224011, "grad_norm": 1.2832364969242886, "learning_rate": 1.6098759708385807e-06, "loss": 0.6612, "step": 8797 }, { "epoch": 0.7451196273554944, "grad_norm": 1.3753622837650488, "learning_rate": 1.608867893214056e-06, "loss": 0.6422, "step": 8798 }, { "epoch": 0.7452043192885878, "grad_norm": 1.624571468844201, "learning_rate": 1.6078600707901776e-06, "loss": 0.6154, "step": 8799 }, { "epoch": 0.7452890112216811, "grad_norm": 1.425861109102952, "learning_rate": 1.6068525036427913e-06, "loss": 0.6723, "step": 8800 }, { "epoch": 0.7453737031547745, "grad_norm": 1.5079930046804237, "learning_rate": 1.6058451918477225e-06, "loss": 0.6577, "step": 8801 }, { "epoch": 0.7454583950878679, "grad_norm": 3.6345461101705716, "learning_rate": 1.6048381354807736e-06, "loss": 0.6041, "step": 8802 }, { "epoch": 0.7455430870209613, "grad_norm": 1.3094379581260676, "learning_rate": 1.6038313346177358e-06, "loss": 0.5585, "step": 8803 }, { "epoch": 0.7456277789540546, "grad_norm": 1.358815687818164, "learning_rate": 1.6028247893343724e-06, "loss": 0.6481, "step": 8804 }, { "epoch": 0.745712470887148, "grad_norm": 1.4222389589329842, "learning_rate": 1.6018184997064324e-06, "loss": 0.6278, "step": 8805 }, { "epoch": 0.7457971628202413, "grad_norm": 3.5260309065802624, "learning_rate": 1.6008124658096458e-06, "loss": 0.6142, "step": 8806 }, { "epoch": 0.7458818547533348, "grad_norm": 1.3512908433401718, "learning_rate": 1.5998066877197194e-06, "loss": 0.6989, "step": 8807 }, { "epoch": 0.7459665466864281, "grad_norm": 1.3039285892437997, "learning_rate": 1.5988011655123448e-06, "loss": 0.5902, "step": 8808 }, { "epoch": 0.7460512386195215, "grad_norm": 1.4852620519810842, "learning_rate": 1.5977958992631926e-06, "loss": 0.6333, "step": 8809 }, { "epoch": 0.7461359305526148, "grad_norm": 0.6179816008552682, "learning_rate": 1.5967908890479139e-06, "loss": 0.8545, "step": 8810 }, { "epoch": 0.7462206224857082, "grad_norm": 1.2274466685871739, "learning_rate": 1.5957861349421439e-06, "loss": 0.6503, "step": 8811 }, { "epoch": 0.7463053144188017, "grad_norm": 0.5983478231209881, "learning_rate": 1.594781637021492e-06, "loss": 0.8419, "step": 8812 }, { "epoch": 0.746390006351895, "grad_norm": 1.521788509763887, "learning_rate": 1.5937773953615526e-06, "loss": 0.5998, "step": 8813 }, { "epoch": 0.7464746982849884, "grad_norm": 1.7773966258447833, "learning_rate": 1.5927734100379034e-06, "loss": 0.6602, "step": 8814 }, { "epoch": 0.7465593902180817, "grad_norm": 1.7632164399816512, "learning_rate": 1.5917696811260952e-06, "loss": 0.6325, "step": 8815 }, { "epoch": 0.7466440821511751, "grad_norm": 1.4133965967795312, "learning_rate": 1.5907662087016657e-06, "loss": 0.6573, "step": 8816 }, { "epoch": 0.7467287740842685, "grad_norm": 1.488617497354135, "learning_rate": 1.589762992840132e-06, "loss": 0.6739, "step": 8817 }, { "epoch": 0.7468134660173619, "grad_norm": 1.8245779266227888, "learning_rate": 1.5887600336169912e-06, "loss": 0.6015, "step": 8818 }, { "epoch": 0.7468981579504552, "grad_norm": 1.6148229879488765, "learning_rate": 1.5877573311077233e-06, "loss": 0.6214, "step": 8819 }, { "epoch": 0.7469828498835486, "grad_norm": 1.4104359951344787, "learning_rate": 1.5867548853877828e-06, "loss": 0.6184, "step": 8820 }, { "epoch": 0.7470675418166419, "grad_norm": 1.4103044420060753, "learning_rate": 1.5857526965326108e-06, "loss": 0.6266, "step": 8821 }, { "epoch": 0.7471522337497354, "grad_norm": 1.4188021519453942, "learning_rate": 1.5847507646176301e-06, "loss": 0.6481, "step": 8822 }, { "epoch": 0.7472369256828287, "grad_norm": 0.6188480139078639, "learning_rate": 1.583749089718235e-06, "loss": 0.8394, "step": 8823 }, { "epoch": 0.7473216176159221, "grad_norm": 1.4153770004231139, "learning_rate": 1.5827476719098145e-06, "loss": 0.6186, "step": 8824 }, { "epoch": 0.7474063095490154, "grad_norm": 1.3606640773035912, "learning_rate": 1.5817465112677254e-06, "loss": 0.6091, "step": 8825 }, { "epoch": 0.7474910014821088, "grad_norm": 1.4944673630033498, "learning_rate": 1.5807456078673117e-06, "loss": 0.6241, "step": 8826 }, { "epoch": 0.7475756934152022, "grad_norm": 1.2975482300735803, "learning_rate": 1.5797449617838983e-06, "loss": 0.6427, "step": 8827 }, { "epoch": 0.7476603853482956, "grad_norm": 1.350302218821636, "learning_rate": 1.578744573092786e-06, "loss": 0.6579, "step": 8828 }, { "epoch": 0.747745077281389, "grad_norm": 1.52457783095303, "learning_rate": 1.5777444418692611e-06, "loss": 0.6424, "step": 8829 }, { "epoch": 0.7478297692144823, "grad_norm": 1.1340353323176837, "learning_rate": 1.5767445681885885e-06, "loss": 0.6172, "step": 8830 }, { "epoch": 0.7479144611475756, "grad_norm": 1.546167845431458, "learning_rate": 1.5757449521260144e-06, "loss": 0.635, "step": 8831 }, { "epoch": 0.7479991530806691, "grad_norm": 1.2913587238000717, "learning_rate": 1.5747455937567662e-06, "loss": 0.5594, "step": 8832 }, { "epoch": 0.7480838450137625, "grad_norm": 0.625775014490597, "learning_rate": 1.573746493156048e-06, "loss": 0.823, "step": 8833 }, { "epoch": 0.7481685369468558, "grad_norm": 1.377700678687395, "learning_rate": 1.5727476503990496e-06, "loss": 0.6563, "step": 8834 }, { "epoch": 0.7482532288799492, "grad_norm": 0.5663608475279029, "learning_rate": 1.5717490655609397e-06, "loss": 0.848, "step": 8835 }, { "epoch": 0.7483379208130425, "grad_norm": 1.3094444482771854, "learning_rate": 1.5707507387168646e-06, "loss": 0.6385, "step": 8836 }, { "epoch": 0.748422612746136, "grad_norm": 2.0589145696855544, "learning_rate": 1.5697526699419552e-06, "loss": 0.6714, "step": 8837 }, { "epoch": 0.7485073046792293, "grad_norm": 2.433432646082066, "learning_rate": 1.5687548593113216e-06, "loss": 0.6084, "step": 8838 }, { "epoch": 0.7485919966123227, "grad_norm": 1.4442442240415183, "learning_rate": 1.567757306900054e-06, "loss": 0.5778, "step": 8839 }, { "epoch": 0.748676688545416, "grad_norm": 1.2448082127329603, "learning_rate": 1.5667600127832255e-06, "loss": 0.5979, "step": 8840 }, { "epoch": 0.7487613804785094, "grad_norm": 1.3501905840170112, "learning_rate": 1.5657629770358839e-06, "loss": 0.6156, "step": 8841 }, { "epoch": 0.7488460724116028, "grad_norm": 1.4352936531442022, "learning_rate": 1.5647661997330637e-06, "loss": 0.6265, "step": 8842 }, { "epoch": 0.7489307643446962, "grad_norm": 1.423394574780365, "learning_rate": 1.563769680949777e-06, "loss": 0.6195, "step": 8843 }, { "epoch": 0.7490154562777895, "grad_norm": 2.040754918644617, "learning_rate": 1.5627734207610178e-06, "loss": 0.6231, "step": 8844 }, { "epoch": 0.7491001482108829, "grad_norm": 2.095268303860851, "learning_rate": 1.5617774192417618e-06, "loss": 0.5827, "step": 8845 }, { "epoch": 0.7491848401439762, "grad_norm": 1.2859109182816257, "learning_rate": 1.5607816764669586e-06, "loss": 0.6025, "step": 8846 }, { "epoch": 0.7492695320770697, "grad_norm": 1.4194709749658456, "learning_rate": 1.5597861925115465e-06, "loss": 0.5582, "step": 8847 }, { "epoch": 0.749354224010163, "grad_norm": 1.2916136546559374, "learning_rate": 1.5587909674504415e-06, "loss": 0.6751, "step": 8848 }, { "epoch": 0.7494389159432564, "grad_norm": 2.7459668982117167, "learning_rate": 1.557796001358537e-06, "loss": 0.6298, "step": 8849 }, { "epoch": 0.7495236078763498, "grad_norm": 1.1989987221120795, "learning_rate": 1.5568012943107102e-06, "loss": 0.5846, "step": 8850 }, { "epoch": 0.7496082998094431, "grad_norm": 1.2397941818518108, "learning_rate": 1.5558068463818194e-06, "loss": 0.5985, "step": 8851 }, { "epoch": 0.7496929917425366, "grad_norm": 1.3992620879842073, "learning_rate": 1.5548126576467003e-06, "loss": 0.6506, "step": 8852 }, { "epoch": 0.7497776836756299, "grad_norm": 2.465892567493628, "learning_rate": 1.5538187281801743e-06, "loss": 0.6625, "step": 8853 }, { "epoch": 0.7498623756087233, "grad_norm": 1.5411556084177251, "learning_rate": 1.5528250580570354e-06, "loss": 0.6658, "step": 8854 }, { "epoch": 0.7499470675418166, "grad_norm": 1.5181016401759602, "learning_rate": 1.5518316473520644e-06, "loss": 0.6265, "step": 8855 }, { "epoch": 0.75003175947491, "grad_norm": 1.2888185663427694, "learning_rate": 1.5508384961400225e-06, "loss": 0.6153, "step": 8856 }, { "epoch": 0.7501164514080034, "grad_norm": 1.567929784093178, "learning_rate": 1.5498456044956466e-06, "loss": 0.6407, "step": 8857 }, { "epoch": 0.7502011433410968, "grad_norm": 1.7184061746971064, "learning_rate": 1.5488529724936563e-06, "loss": 0.5839, "step": 8858 }, { "epoch": 0.7502858352741901, "grad_norm": 1.4920813912900417, "learning_rate": 1.547860600208758e-06, "loss": 0.5982, "step": 8859 }, { "epoch": 0.7503705272072835, "grad_norm": 1.3763402490370664, "learning_rate": 1.5468684877156282e-06, "loss": 0.596, "step": 8860 }, { "epoch": 0.7504552191403768, "grad_norm": 1.3097521275757464, "learning_rate": 1.5458766350889314e-06, "loss": 0.6394, "step": 8861 }, { "epoch": 0.7505399110734703, "grad_norm": 1.431723645703302, "learning_rate": 1.5448850424033062e-06, "loss": 0.6062, "step": 8862 }, { "epoch": 0.7506246030065636, "grad_norm": 1.3614141639374977, "learning_rate": 1.5438937097333777e-06, "loss": 0.6798, "step": 8863 }, { "epoch": 0.750709294939657, "grad_norm": 1.1144465302906255, "learning_rate": 1.5429026371537486e-06, "loss": 0.6061, "step": 8864 }, { "epoch": 0.7507939868727503, "grad_norm": 1.107084178237928, "learning_rate": 1.5419118247390019e-06, "loss": 0.6279, "step": 8865 }, { "epoch": 0.7508786788058437, "grad_norm": 2.414606268562079, "learning_rate": 1.540921272563702e-06, "loss": 0.6361, "step": 8866 }, { "epoch": 0.7509633707389372, "grad_norm": 0.6499127458863766, "learning_rate": 1.5399309807023942e-06, "loss": 0.8513, "step": 8867 }, { "epoch": 0.7510480626720305, "grad_norm": 1.168019797540374, "learning_rate": 1.538940949229601e-06, "loss": 0.6087, "step": 8868 }, { "epoch": 0.7511327546051239, "grad_norm": 1.5078429542012162, "learning_rate": 1.5379511782198297e-06, "loss": 0.5556, "step": 8869 }, { "epoch": 0.7512174465382172, "grad_norm": 2.8883478573334362, "learning_rate": 1.5369616677475636e-06, "loss": 0.6466, "step": 8870 }, { "epoch": 0.7513021384713106, "grad_norm": 1.4547831099809345, "learning_rate": 1.5359724178872693e-06, "loss": 0.6716, "step": 8871 }, { "epoch": 0.751386830404404, "grad_norm": 1.3078268758193659, "learning_rate": 1.5349834287133935e-06, "loss": 0.5511, "step": 8872 }, { "epoch": 0.7514715223374974, "grad_norm": 1.929398249199495, "learning_rate": 1.533994700300363e-06, "loss": 0.6329, "step": 8873 }, { "epoch": 0.7515562142705907, "grad_norm": 1.2631197982001252, "learning_rate": 1.5330062327225843e-06, "loss": 0.6511, "step": 8874 }, { "epoch": 0.7516409062036841, "grad_norm": 3.0320169377953086, "learning_rate": 1.5320180260544471e-06, "loss": 0.626, "step": 8875 }, { "epoch": 0.7517255981367774, "grad_norm": 1.5217644058856774, "learning_rate": 1.5310300803703155e-06, "loss": 0.647, "step": 8876 }, { "epoch": 0.7518102900698709, "grad_norm": 3.0901187422768484, "learning_rate": 1.530042395744541e-06, "loss": 0.6137, "step": 8877 }, { "epoch": 0.7518949820029642, "grad_norm": 1.3262610747566788, "learning_rate": 1.5290549722514492e-06, "loss": 0.6243, "step": 8878 }, { "epoch": 0.7519796739360576, "grad_norm": 1.4639386575297928, "learning_rate": 1.5280678099653485e-06, "loss": 0.6389, "step": 8879 }, { "epoch": 0.7520643658691509, "grad_norm": 0.5965446272339873, "learning_rate": 1.5270809089605332e-06, "loss": 0.8546, "step": 8880 }, { "epoch": 0.7521490578022443, "grad_norm": 1.2804230759680775, "learning_rate": 1.5260942693112674e-06, "loss": 0.5804, "step": 8881 }, { "epoch": 0.7522337497353377, "grad_norm": 1.624818318835569, "learning_rate": 1.525107891091805e-06, "loss": 0.6621, "step": 8882 }, { "epoch": 0.7523184416684311, "grad_norm": 1.5574554981923636, "learning_rate": 1.5241217743763725e-06, "loss": 0.6169, "step": 8883 }, { "epoch": 0.7524031336015244, "grad_norm": 1.3079556286418776, "learning_rate": 1.523135919239182e-06, "loss": 0.6108, "step": 8884 }, { "epoch": 0.7524878255346178, "grad_norm": 1.3966292700216505, "learning_rate": 1.522150325754425e-06, "loss": 0.6428, "step": 8885 }, { "epoch": 0.7525725174677113, "grad_norm": 1.2868025437674468, "learning_rate": 1.5211649939962714e-06, "loss": 0.6457, "step": 8886 }, { "epoch": 0.7526572094008046, "grad_norm": 1.3308174566001363, "learning_rate": 1.5201799240388736e-06, "loss": 0.6599, "step": 8887 }, { "epoch": 0.752741901333898, "grad_norm": 2.0088818933613926, "learning_rate": 1.519195115956365e-06, "loss": 0.6223, "step": 8888 }, { "epoch": 0.7528265932669913, "grad_norm": 1.8484488276416084, "learning_rate": 1.5182105698228534e-06, "loss": 0.6175, "step": 8889 }, { "epoch": 0.7529112852000847, "grad_norm": 1.8796443981547197, "learning_rate": 1.5172262857124354e-06, "loss": 0.5952, "step": 8890 }, { "epoch": 0.7529959771331781, "grad_norm": 2.0301056853239907, "learning_rate": 1.5162422636991796e-06, "loss": 0.6683, "step": 8891 }, { "epoch": 0.7530806690662715, "grad_norm": 1.4851494665216864, "learning_rate": 1.5152585038571415e-06, "loss": 0.7154, "step": 8892 }, { "epoch": 0.7531653609993648, "grad_norm": 1.4493083502369615, "learning_rate": 1.5142750062603527e-06, "loss": 0.6358, "step": 8893 }, { "epoch": 0.7532500529324582, "grad_norm": 1.6060111498650242, "learning_rate": 1.5132917709828282e-06, "loss": 0.5843, "step": 8894 }, { "epoch": 0.7533347448655515, "grad_norm": 1.414147518154757, "learning_rate": 1.5123087980985606e-06, "loss": 0.6647, "step": 8895 }, { "epoch": 0.753419436798645, "grad_norm": 1.2470342618983674, "learning_rate": 1.5113260876815256e-06, "loss": 0.6334, "step": 8896 }, { "epoch": 0.7535041287317383, "grad_norm": 1.4970681504449284, "learning_rate": 1.5103436398056748e-06, "loss": 0.6104, "step": 8897 }, { "epoch": 0.7535888206648317, "grad_norm": 0.6476036464542032, "learning_rate": 1.5093614545449448e-06, "loss": 0.8416, "step": 8898 }, { "epoch": 0.753673512597925, "grad_norm": 1.5507924288802746, "learning_rate": 1.5083795319732464e-06, "loss": 0.6106, "step": 8899 }, { "epoch": 0.7537582045310184, "grad_norm": 1.29409592032014, "learning_rate": 1.5073978721644782e-06, "loss": 0.6197, "step": 8900 }, { "epoch": 0.7538428964641118, "grad_norm": 1.4952518348603232, "learning_rate": 1.5064164751925164e-06, "loss": 0.6192, "step": 8901 }, { "epoch": 0.7539275883972052, "grad_norm": 1.557262548127959, "learning_rate": 1.5054353411312128e-06, "loss": 0.6031, "step": 8902 }, { "epoch": 0.7540122803302985, "grad_norm": 0.6217806132199962, "learning_rate": 1.5044544700544038e-06, "loss": 0.8291, "step": 8903 }, { "epoch": 0.7540969722633919, "grad_norm": 1.4512983731001274, "learning_rate": 1.503473862035908e-06, "loss": 0.6023, "step": 8904 }, { "epoch": 0.7541816641964852, "grad_norm": 1.3110885723172419, "learning_rate": 1.5024935171495169e-06, "loss": 0.6076, "step": 8905 }, { "epoch": 0.7542663561295787, "grad_norm": 2.004831995680915, "learning_rate": 1.5015134354690092e-06, "loss": 0.6236, "step": 8906 }, { "epoch": 0.7543510480626721, "grad_norm": 1.5166712635409378, "learning_rate": 1.5005336170681406e-06, "loss": 0.709, "step": 8907 }, { "epoch": 0.7544357399957654, "grad_norm": 1.1592888401926695, "learning_rate": 1.4995540620206477e-06, "loss": 0.5896, "step": 8908 }, { "epoch": 0.7545204319288588, "grad_norm": 1.6143613918900437, "learning_rate": 1.498574770400249e-06, "loss": 0.6267, "step": 8909 }, { "epoch": 0.7546051238619521, "grad_norm": 1.250069949693037, "learning_rate": 1.4975957422806386e-06, "loss": 0.6194, "step": 8910 }, { "epoch": 0.7546898157950456, "grad_norm": 1.3903003161219463, "learning_rate": 1.4966169777354961e-06, "loss": 0.5845, "step": 8911 }, { "epoch": 0.7547745077281389, "grad_norm": 1.7500733332893177, "learning_rate": 1.4956384768384757e-06, "loss": 0.6226, "step": 8912 }, { "epoch": 0.7548591996612323, "grad_norm": 1.6778597504811155, "learning_rate": 1.4946602396632166e-06, "loss": 0.6371, "step": 8913 }, { "epoch": 0.7549438915943256, "grad_norm": 1.3648446737396862, "learning_rate": 1.4936822662833355e-06, "loss": 0.6567, "step": 8914 }, { "epoch": 0.755028583527419, "grad_norm": 1.4748956295434004, "learning_rate": 1.4927045567724313e-06, "loss": 0.6422, "step": 8915 }, { "epoch": 0.7551132754605124, "grad_norm": 1.2358276976896507, "learning_rate": 1.491727111204081e-06, "loss": 0.6067, "step": 8916 }, { "epoch": 0.7551979673936058, "grad_norm": 1.3470002805208758, "learning_rate": 1.490749929651844e-06, "loss": 0.5978, "step": 8917 }, { "epoch": 0.7552826593266991, "grad_norm": 1.3163812200118599, "learning_rate": 1.4897730121892562e-06, "loss": 0.5794, "step": 8918 }, { "epoch": 0.7553673512597925, "grad_norm": 0.6248157646446226, "learning_rate": 1.4887963588898362e-06, "loss": 0.8304, "step": 8919 }, { "epoch": 0.7554520431928858, "grad_norm": 1.400922407289128, "learning_rate": 1.4878199698270835e-06, "loss": 0.6431, "step": 8920 }, { "epoch": 0.7555367351259793, "grad_norm": 1.3490579816764885, "learning_rate": 1.4868438450744754e-06, "loss": 0.6425, "step": 8921 }, { "epoch": 0.7556214270590726, "grad_norm": 1.346366290149033, "learning_rate": 1.485867984705473e-06, "loss": 0.6121, "step": 8922 }, { "epoch": 0.755706118992166, "grad_norm": 1.23659041469427, "learning_rate": 1.4848923887935114e-06, "loss": 0.5984, "step": 8923 }, { "epoch": 0.7557908109252593, "grad_norm": 1.290262737361107, "learning_rate": 1.4839170574120104e-06, "loss": 0.6729, "step": 8924 }, { "epoch": 0.7558755028583527, "grad_norm": 1.544671725203337, "learning_rate": 1.4829419906343712e-06, "loss": 0.6187, "step": 8925 }, { "epoch": 0.7559601947914462, "grad_norm": 1.7682576159567733, "learning_rate": 1.4819671885339693e-06, "loss": 0.6278, "step": 8926 }, { "epoch": 0.7560448867245395, "grad_norm": 3.757298279582723, "learning_rate": 1.4809926511841655e-06, "loss": 0.6229, "step": 8927 }, { "epoch": 0.7561295786576329, "grad_norm": 1.658537655685083, "learning_rate": 1.480018378658299e-06, "loss": 0.6, "step": 8928 }, { "epoch": 0.7562142705907262, "grad_norm": 1.6259306072153852, "learning_rate": 1.4790443710296881e-06, "loss": 0.6392, "step": 8929 }, { "epoch": 0.7562989625238196, "grad_norm": 1.5093924310187556, "learning_rate": 1.4780706283716345e-06, "loss": 0.6566, "step": 8930 }, { "epoch": 0.756383654456913, "grad_norm": 1.8402090510679245, "learning_rate": 1.477097150757414e-06, "loss": 0.6514, "step": 8931 }, { "epoch": 0.7564683463900064, "grad_norm": 1.35972897042996, "learning_rate": 1.4761239382602877e-06, "loss": 0.623, "step": 8932 }, { "epoch": 0.7565530383230997, "grad_norm": 1.481044100837771, "learning_rate": 1.475150990953496e-06, "loss": 0.6508, "step": 8933 }, { "epoch": 0.7566377302561931, "grad_norm": 1.262955617592665, "learning_rate": 1.4741783089102545e-06, "loss": 0.6152, "step": 8934 }, { "epoch": 0.7567224221892864, "grad_norm": 0.5965170092477476, "learning_rate": 1.4732058922037685e-06, "loss": 0.802, "step": 8935 }, { "epoch": 0.7568071141223799, "grad_norm": 1.4485669497379003, "learning_rate": 1.4722337409072124e-06, "loss": 0.6156, "step": 8936 }, { "epoch": 0.7568918060554732, "grad_norm": 1.7474138388242229, "learning_rate": 1.471261855093748e-06, "loss": 0.6634, "step": 8937 }, { "epoch": 0.7569764979885666, "grad_norm": 2.172762610749124, "learning_rate": 1.470290234836516e-06, "loss": 0.6288, "step": 8938 }, { "epoch": 0.7570611899216599, "grad_norm": 1.5115915511234204, "learning_rate": 1.4693188802086328e-06, "loss": 0.6139, "step": 8939 }, { "epoch": 0.7571458818547533, "grad_norm": 1.3376130134243303, "learning_rate": 1.4683477912832e-06, "loss": 0.5727, "step": 8940 }, { "epoch": 0.7572305737878468, "grad_norm": 1.7252152499117417, "learning_rate": 1.4673769681332967e-06, "loss": 0.5705, "step": 8941 }, { "epoch": 0.7573152657209401, "grad_norm": 1.2482547852208459, "learning_rate": 1.466406410831983e-06, "loss": 0.5881, "step": 8942 }, { "epoch": 0.7573999576540335, "grad_norm": 0.5860975316922417, "learning_rate": 1.4654361194522993e-06, "loss": 0.8434, "step": 8943 }, { "epoch": 0.7574846495871268, "grad_norm": 1.5920570477952376, "learning_rate": 1.4644660940672628e-06, "loss": 0.6109, "step": 8944 }, { "epoch": 0.7575693415202202, "grad_norm": 1.5647440143310516, "learning_rate": 1.4634963347498743e-06, "loss": 0.6453, "step": 8945 }, { "epoch": 0.7576540334533136, "grad_norm": 1.1947696067130726, "learning_rate": 1.4625268415731148e-06, "loss": 0.5759, "step": 8946 }, { "epoch": 0.757738725386407, "grad_norm": 1.374571291507458, "learning_rate": 1.4615576146099403e-06, "loss": 0.6248, "step": 8947 }, { "epoch": 0.7578234173195003, "grad_norm": 1.4127720498706229, "learning_rate": 1.4605886539332925e-06, "loss": 0.6485, "step": 8948 }, { "epoch": 0.7579081092525937, "grad_norm": 1.6847410747588427, "learning_rate": 1.4596199596160903e-06, "loss": 0.5711, "step": 8949 }, { "epoch": 0.757992801185687, "grad_norm": 1.6340802666594696, "learning_rate": 1.458651531731234e-06, "loss": 0.6774, "step": 8950 }, { "epoch": 0.7580774931187805, "grad_norm": 1.1866370638319907, "learning_rate": 1.4576833703516037e-06, "loss": 0.6386, "step": 8951 }, { "epoch": 0.7581621850518738, "grad_norm": 1.3995800641410383, "learning_rate": 1.456715475550055e-06, "loss": 0.6669, "step": 8952 }, { "epoch": 0.7582468769849672, "grad_norm": 1.342830721251, "learning_rate": 1.4557478473994297e-06, "loss": 0.6098, "step": 8953 }, { "epoch": 0.7583315689180605, "grad_norm": 1.5676564221279403, "learning_rate": 1.4547804859725485e-06, "loss": 0.6029, "step": 8954 }, { "epoch": 0.7584162608511539, "grad_norm": 1.4144835913214577, "learning_rate": 1.4538133913422053e-06, "loss": 0.6282, "step": 8955 }, { "epoch": 0.7585009527842473, "grad_norm": 1.8361224311892386, "learning_rate": 1.452846563581185e-06, "loss": 0.5923, "step": 8956 }, { "epoch": 0.7585856447173407, "grad_norm": 2.049204580184596, "learning_rate": 1.4518800027622425e-06, "loss": 0.5971, "step": 8957 }, { "epoch": 0.758670336650434, "grad_norm": 1.431915380225017, "learning_rate": 1.4509137089581183e-06, "loss": 0.6226, "step": 8958 }, { "epoch": 0.7587550285835274, "grad_norm": 1.2592230717714186, "learning_rate": 1.4499476822415321e-06, "loss": 0.5659, "step": 8959 }, { "epoch": 0.7588397205166207, "grad_norm": 1.1616603059041182, "learning_rate": 1.4489819226851799e-06, "loss": 0.5868, "step": 8960 }, { "epoch": 0.7589244124497142, "grad_norm": 1.263113398586699, "learning_rate": 1.4480164303617411e-06, "loss": 0.6522, "step": 8961 }, { "epoch": 0.7590091043828076, "grad_norm": 1.3168858086548536, "learning_rate": 1.447051205343875e-06, "loss": 0.6037, "step": 8962 }, { "epoch": 0.7590937963159009, "grad_norm": 1.6438733604279865, "learning_rate": 1.4460862477042192e-06, "loss": 0.6436, "step": 8963 }, { "epoch": 0.7591784882489943, "grad_norm": 1.8777188206774438, "learning_rate": 1.4451215575153938e-06, "loss": 0.594, "step": 8964 }, { "epoch": 0.7592631801820876, "grad_norm": 1.1639968543273655, "learning_rate": 1.4441571348499938e-06, "loss": 0.6145, "step": 8965 }, { "epoch": 0.7593478721151811, "grad_norm": 1.2152657568189134, "learning_rate": 1.4431929797805983e-06, "loss": 0.6336, "step": 8966 }, { "epoch": 0.7594325640482744, "grad_norm": 1.243374005920523, "learning_rate": 1.4422290923797666e-06, "loss": 0.6513, "step": 8967 }, { "epoch": 0.7595172559813678, "grad_norm": 1.340602284246224, "learning_rate": 1.4412654727200337e-06, "loss": 0.6468, "step": 8968 }, { "epoch": 0.7596019479144611, "grad_norm": 1.3541725027953624, "learning_rate": 1.4403021208739183e-06, "loss": 0.6014, "step": 8969 }, { "epoch": 0.7596866398475545, "grad_norm": 1.374959677014722, "learning_rate": 1.4393390369139181e-06, "loss": 0.6373, "step": 8970 }, { "epoch": 0.7597713317806479, "grad_norm": 1.2527059241328677, "learning_rate": 1.4383762209125096e-06, "loss": 0.6623, "step": 8971 }, { "epoch": 0.7598560237137413, "grad_norm": 1.2453153172333453, "learning_rate": 1.4374136729421518e-06, "loss": 0.6154, "step": 8972 }, { "epoch": 0.7599407156468346, "grad_norm": 1.3690664944512019, "learning_rate": 1.4364513930752783e-06, "loss": 0.639, "step": 8973 }, { "epoch": 0.760025407579928, "grad_norm": 0.64820329805341, "learning_rate": 1.4354893813843073e-06, "loss": 0.8023, "step": 8974 }, { "epoch": 0.7601100995130213, "grad_norm": 1.3260035122665297, "learning_rate": 1.4345276379416357e-06, "loss": 0.6273, "step": 8975 }, { "epoch": 0.7601947914461148, "grad_norm": 1.3191981167081144, "learning_rate": 1.433566162819639e-06, "loss": 0.5663, "step": 8976 }, { "epoch": 0.7602794833792081, "grad_norm": 1.4732855875221502, "learning_rate": 1.4326049560906734e-06, "loss": 0.6627, "step": 8977 }, { "epoch": 0.7603641753123015, "grad_norm": 1.4243547792320037, "learning_rate": 1.4316440178270774e-06, "loss": 0.6109, "step": 8978 }, { "epoch": 0.7604488672453948, "grad_norm": 1.256175360928051, "learning_rate": 1.430683348101163e-06, "loss": 0.6207, "step": 8979 }, { "epoch": 0.7605335591784882, "grad_norm": 1.3511849440577315, "learning_rate": 1.4297229469852275e-06, "loss": 0.621, "step": 8980 }, { "epoch": 0.7606182511115817, "grad_norm": 2.2198993410561347, "learning_rate": 1.4287628145515453e-06, "loss": 0.6302, "step": 8981 }, { "epoch": 0.760702943044675, "grad_norm": 0.6780836672886945, "learning_rate": 1.4278029508723711e-06, "loss": 0.8018, "step": 8982 }, { "epoch": 0.7607876349777684, "grad_norm": 1.28438711884898, "learning_rate": 1.4268433560199413e-06, "loss": 0.669, "step": 8983 }, { "epoch": 0.7608723269108617, "grad_norm": 1.3710991893959543, "learning_rate": 1.4258840300664695e-06, "loss": 0.611, "step": 8984 }, { "epoch": 0.7609570188439552, "grad_norm": 1.8582287568321447, "learning_rate": 1.4249249730841514e-06, "loss": 0.6181, "step": 8985 }, { "epoch": 0.7610417107770485, "grad_norm": 1.3235168473633525, "learning_rate": 1.4239661851451587e-06, "loss": 0.5811, "step": 8986 }, { "epoch": 0.7611264027101419, "grad_norm": 1.539185406067812, "learning_rate": 1.4230076663216464e-06, "loss": 0.597, "step": 8987 }, { "epoch": 0.7612110946432352, "grad_norm": 2.8358298201149874, "learning_rate": 1.42204941668575e-06, "loss": 0.6003, "step": 8988 }, { "epoch": 0.7612957865763286, "grad_norm": 2.074376564958755, "learning_rate": 1.4210914363095796e-06, "loss": 0.5744, "step": 8989 }, { "epoch": 0.761380478509422, "grad_norm": 1.2137464997231322, "learning_rate": 1.4201337252652287e-06, "loss": 0.6065, "step": 8990 }, { "epoch": 0.7614651704425154, "grad_norm": 1.4763570454170796, "learning_rate": 1.4191762836247736e-06, "loss": 0.6222, "step": 8991 }, { "epoch": 0.7615498623756087, "grad_norm": 1.3072937991573914, "learning_rate": 1.4182191114602633e-06, "loss": 0.5854, "step": 8992 }, { "epoch": 0.7616345543087021, "grad_norm": 0.6033561466754165, "learning_rate": 1.4172622088437332e-06, "loss": 0.8174, "step": 8993 }, { "epoch": 0.7617192462417954, "grad_norm": 1.222155656131742, "learning_rate": 1.416305575847191e-06, "loss": 0.6618, "step": 8994 }, { "epoch": 0.7618039381748889, "grad_norm": 0.6301408760480999, "learning_rate": 1.4153492125426316e-06, "loss": 0.8805, "step": 8995 }, { "epoch": 0.7618886301079822, "grad_norm": 1.6161834820258612, "learning_rate": 1.4143931190020256e-06, "loss": 0.6029, "step": 8996 }, { "epoch": 0.7619733220410756, "grad_norm": 1.7250752672892726, "learning_rate": 1.4134372952973236e-06, "loss": 0.5998, "step": 8997 }, { "epoch": 0.762058013974169, "grad_norm": 1.2690063934517803, "learning_rate": 1.4124817415004567e-06, "loss": 0.6129, "step": 8998 }, { "epoch": 0.7621427059072623, "grad_norm": 1.2489210854845836, "learning_rate": 1.4115264576833376e-06, "loss": 0.6071, "step": 8999 }, { "epoch": 0.7622273978403558, "grad_norm": 1.2299853975757393, "learning_rate": 1.4105714439178525e-06, "loss": 0.6178, "step": 9000 }, { "epoch": 0.7623120897734491, "grad_norm": 1.555641941297658, "learning_rate": 1.4096167002758749e-06, "loss": 0.6136, "step": 9001 }, { "epoch": 0.7623967817065425, "grad_norm": 1.6031948071336588, "learning_rate": 1.4086622268292511e-06, "loss": 0.6277, "step": 9002 }, { "epoch": 0.7624814736396358, "grad_norm": 0.6435116525929518, "learning_rate": 1.407708023649812e-06, "loss": 0.8298, "step": 9003 }, { "epoch": 0.7625661655727292, "grad_norm": 1.746855022229996, "learning_rate": 1.4067540908093657e-06, "loss": 0.6394, "step": 9004 }, { "epoch": 0.7626508575058226, "grad_norm": 1.888835860221954, "learning_rate": 1.4058004283797016e-06, "loss": 0.5866, "step": 9005 }, { "epoch": 0.762735549438916, "grad_norm": 1.2446399361821123, "learning_rate": 1.4048470364325872e-06, "loss": 0.6761, "step": 9006 }, { "epoch": 0.7628202413720093, "grad_norm": 1.1698385054557583, "learning_rate": 1.4038939150397723e-06, "loss": 0.5952, "step": 9007 }, { "epoch": 0.7629049333051027, "grad_norm": 1.6485219993091562, "learning_rate": 1.4029410642729808e-06, "loss": 0.6256, "step": 9008 }, { "epoch": 0.762989625238196, "grad_norm": 1.4971808658824426, "learning_rate": 1.4019884842039232e-06, "loss": 0.5787, "step": 9009 }, { "epoch": 0.7630743171712895, "grad_norm": 1.3227502514285088, "learning_rate": 1.4010361749042817e-06, "loss": 0.6162, "step": 9010 }, { "epoch": 0.7631590091043828, "grad_norm": 1.2824754088707333, "learning_rate": 1.4000841364457267e-06, "loss": 0.6084, "step": 9011 }, { "epoch": 0.7632437010374762, "grad_norm": 1.2315706737714247, "learning_rate": 1.3991323688999043e-06, "loss": 0.6358, "step": 9012 }, { "epoch": 0.7633283929705695, "grad_norm": 3.0371241082502216, "learning_rate": 1.3981808723384372e-06, "loss": 0.634, "step": 9013 }, { "epoch": 0.7634130849036629, "grad_norm": 1.6605514174040346, "learning_rate": 1.3972296468329338e-06, "loss": 0.6015, "step": 9014 }, { "epoch": 0.7634977768367563, "grad_norm": 1.5450014328414223, "learning_rate": 1.3962786924549754e-06, "loss": 0.6746, "step": 9015 }, { "epoch": 0.7635824687698497, "grad_norm": 0.6529863653869642, "learning_rate": 1.395328009276128e-06, "loss": 0.8697, "step": 9016 }, { "epoch": 0.763667160702943, "grad_norm": 1.5625565343599923, "learning_rate": 1.3943775973679351e-06, "loss": 0.666, "step": 9017 }, { "epoch": 0.7637518526360364, "grad_norm": 2.908565163626884, "learning_rate": 1.3934274568019212e-06, "loss": 0.6258, "step": 9018 }, { "epoch": 0.7638365445691297, "grad_norm": 1.28602458555907, "learning_rate": 1.3924775876495888e-06, "loss": 0.6569, "step": 9019 }, { "epoch": 0.7639212365022232, "grad_norm": 0.6281706879160532, "learning_rate": 1.391527989982422e-06, "loss": 0.8503, "step": 9020 }, { "epoch": 0.7640059284353166, "grad_norm": 1.6212023033804066, "learning_rate": 1.3905786638718805e-06, "loss": 0.6502, "step": 9021 }, { "epoch": 0.7640906203684099, "grad_norm": 1.2089190446243658, "learning_rate": 1.3896296093894085e-06, "loss": 0.6515, "step": 9022 }, { "epoch": 0.7641753123015033, "grad_norm": 1.404975323029791, "learning_rate": 1.388680826606425e-06, "loss": 0.6498, "step": 9023 }, { "epoch": 0.7642600042345966, "grad_norm": 1.4654226028676867, "learning_rate": 1.3877323155943324e-06, "loss": 0.6016, "step": 9024 }, { "epoch": 0.7643446961676901, "grad_norm": 1.17975887830686, "learning_rate": 1.3867840764245099e-06, "loss": 0.6285, "step": 9025 }, { "epoch": 0.7644293881007834, "grad_norm": 1.2269582833031585, "learning_rate": 1.3858361091683192e-06, "loss": 0.611, "step": 9026 }, { "epoch": 0.7645140800338768, "grad_norm": 0.6513199272554422, "learning_rate": 1.3848884138970992e-06, "loss": 0.8357, "step": 9027 }, { "epoch": 0.7645987719669701, "grad_norm": 1.3940931060855748, "learning_rate": 1.3839409906821705e-06, "loss": 0.6508, "step": 9028 }, { "epoch": 0.7646834639000635, "grad_norm": 1.1573648711890923, "learning_rate": 1.3829938395948288e-06, "loss": 0.632, "step": 9029 }, { "epoch": 0.7647681558331569, "grad_norm": 1.435227042616506, "learning_rate": 1.3820469607063547e-06, "loss": 0.654, "step": 9030 }, { "epoch": 0.7648528477662503, "grad_norm": 1.3943468056242574, "learning_rate": 1.3811003540880025e-06, "loss": 0.6371, "step": 9031 }, { "epoch": 0.7649375396993436, "grad_norm": 0.6171705834017531, "learning_rate": 1.3801540198110126e-06, "loss": 0.8284, "step": 9032 }, { "epoch": 0.765022231632437, "grad_norm": 1.229230867867671, "learning_rate": 1.3792079579466023e-06, "loss": 0.6511, "step": 9033 }, { "epoch": 0.7651069235655303, "grad_norm": 2.0189458885041094, "learning_rate": 1.3782621685659641e-06, "loss": 0.5843, "step": 9034 }, { "epoch": 0.7651916154986238, "grad_norm": 1.2695686653416836, "learning_rate": 1.3773166517402764e-06, "loss": 0.5933, "step": 9035 }, { "epoch": 0.7652763074317172, "grad_norm": 1.5474057601016518, "learning_rate": 1.3763714075406952e-06, "loss": 0.6603, "step": 9036 }, { "epoch": 0.7653609993648105, "grad_norm": 1.2783726818713488, "learning_rate": 1.3754264360383517e-06, "loss": 0.6729, "step": 9037 }, { "epoch": 0.7654456912979039, "grad_norm": 1.2701203066522349, "learning_rate": 1.3744817373043623e-06, "loss": 0.6418, "step": 9038 }, { "epoch": 0.7655303832309972, "grad_norm": 1.195574975820093, "learning_rate": 1.37353731140982e-06, "loss": 0.581, "step": 9039 }, { "epoch": 0.7656150751640907, "grad_norm": 1.2267395863901835, "learning_rate": 1.3725931584257983e-06, "loss": 0.5708, "step": 9040 }, { "epoch": 0.765699767097184, "grad_norm": 1.5053442491630058, "learning_rate": 1.371649278423351e-06, "loss": 0.6402, "step": 9041 }, { "epoch": 0.7657844590302774, "grad_norm": 2.1283239899673436, "learning_rate": 1.3707056714735067e-06, "loss": 0.5345, "step": 9042 }, { "epoch": 0.7658691509633707, "grad_norm": 1.903620858212651, "learning_rate": 1.3697623376472785e-06, "loss": 0.6111, "step": 9043 }, { "epoch": 0.7659538428964641, "grad_norm": 1.8357703642062226, "learning_rate": 1.3688192770156594e-06, "loss": 0.6489, "step": 9044 }, { "epoch": 0.7660385348295575, "grad_norm": 1.9064701889101985, "learning_rate": 1.3678764896496154e-06, "loss": 0.6236, "step": 9045 }, { "epoch": 0.7661232267626509, "grad_norm": 1.3588084338256334, "learning_rate": 1.3669339756200994e-06, "loss": 0.5372, "step": 9046 }, { "epoch": 0.7662079186957442, "grad_norm": 1.524206748068709, "learning_rate": 1.3659917349980394e-06, "loss": 0.6281, "step": 9047 }, { "epoch": 0.7662926106288376, "grad_norm": 1.66342941453613, "learning_rate": 1.3650497678543446e-06, "loss": 0.6252, "step": 9048 }, { "epoch": 0.7663773025619309, "grad_norm": 0.6295784590831791, "learning_rate": 1.3641080742599039e-06, "loss": 0.8096, "step": 9049 }, { "epoch": 0.7664619944950244, "grad_norm": 1.4183339298727002, "learning_rate": 1.3631666542855821e-06, "loss": 0.6207, "step": 9050 }, { "epoch": 0.7665466864281177, "grad_norm": 1.3017066047981831, "learning_rate": 1.362225508002228e-06, "loss": 0.664, "step": 9051 }, { "epoch": 0.7666313783612111, "grad_norm": 1.186814076684555, "learning_rate": 1.3612846354806663e-06, "loss": 0.6187, "step": 9052 }, { "epoch": 0.7667160702943044, "grad_norm": 0.6388015083374278, "learning_rate": 1.3603440367917047e-06, "loss": 0.8422, "step": 9053 }, { "epoch": 0.7668007622273978, "grad_norm": 1.3998035047410897, "learning_rate": 1.3594037120061283e-06, "loss": 0.6054, "step": 9054 }, { "epoch": 0.7668854541604913, "grad_norm": 1.7634229689594694, "learning_rate": 1.3584636611946988e-06, "loss": 0.6225, "step": 9055 }, { "epoch": 0.7669701460935846, "grad_norm": 1.512640786768801, "learning_rate": 1.3575238844281624e-06, "loss": 0.6442, "step": 9056 }, { "epoch": 0.767054838026678, "grad_norm": 0.6061278068885229, "learning_rate": 1.3565843817772424e-06, "loss": 0.8694, "step": 9057 }, { "epoch": 0.7671395299597713, "grad_norm": 0.6509721502440643, "learning_rate": 1.3556451533126392e-06, "loss": 0.871, "step": 9058 }, { "epoch": 0.7672242218928647, "grad_norm": 1.3674496625734458, "learning_rate": 1.3547061991050353e-06, "loss": 0.6601, "step": 9059 }, { "epoch": 0.7673089138259581, "grad_norm": 1.2986304130307382, "learning_rate": 1.3537675192250932e-06, "loss": 0.6284, "step": 9060 }, { "epoch": 0.7673936057590515, "grad_norm": 1.316442065736002, "learning_rate": 1.352829113743453e-06, "loss": 0.5995, "step": 9061 }, { "epoch": 0.7674782976921448, "grad_norm": 1.2405338366049605, "learning_rate": 1.3518909827307364e-06, "loss": 0.5705, "step": 9062 }, { "epoch": 0.7675629896252382, "grad_norm": 1.3191207593563394, "learning_rate": 1.3509531262575392e-06, "loss": 0.647, "step": 9063 }, { "epoch": 0.7676476815583315, "grad_norm": 1.6641453628395702, "learning_rate": 1.350015544394442e-06, "loss": 0.6351, "step": 9064 }, { "epoch": 0.767732373491425, "grad_norm": 0.6872780330038484, "learning_rate": 1.3490782372120042e-06, "loss": 0.8203, "step": 9065 }, { "epoch": 0.7678170654245183, "grad_norm": 1.36977352692653, "learning_rate": 1.3481412047807586e-06, "loss": 0.5788, "step": 9066 }, { "epoch": 0.7679017573576117, "grad_norm": 1.3856676317243066, "learning_rate": 1.3472044471712276e-06, "loss": 0.6574, "step": 9067 }, { "epoch": 0.767986449290705, "grad_norm": 1.1396789277728292, "learning_rate": 1.346267964453903e-06, "loss": 0.6288, "step": 9068 }, { "epoch": 0.7680711412237984, "grad_norm": 2.3368251047975237, "learning_rate": 1.3453317566992618e-06, "loss": 0.6274, "step": 9069 }, { "epoch": 0.7681558331568918, "grad_norm": 1.4510411874759623, "learning_rate": 1.3443958239777593e-06, "loss": 0.637, "step": 9070 }, { "epoch": 0.7682405250899852, "grad_norm": 0.6430544728096573, "learning_rate": 1.3434601663598273e-06, "loss": 0.839, "step": 9071 }, { "epoch": 0.7683252170230785, "grad_norm": 1.1995459529056924, "learning_rate": 1.34252478391588e-06, "loss": 0.5662, "step": 9072 }, { "epoch": 0.7684099089561719, "grad_norm": 1.1098351473896368, "learning_rate": 1.34158967671631e-06, "loss": 0.6401, "step": 9073 }, { "epoch": 0.7684946008892652, "grad_norm": 1.4008203315835837, "learning_rate": 1.3406548448314889e-06, "loss": 0.6453, "step": 9074 }, { "epoch": 0.7685792928223587, "grad_norm": 1.3348425124685073, "learning_rate": 1.3397202883317694e-06, "loss": 0.5588, "step": 9075 }, { "epoch": 0.7686639847554521, "grad_norm": 1.1898611068144511, "learning_rate": 1.3387860072874787e-06, "loss": 0.6304, "step": 9076 }, { "epoch": 0.7687486766885454, "grad_norm": 1.4150072258623856, "learning_rate": 1.3378520017689284e-06, "loss": 0.6131, "step": 9077 }, { "epoch": 0.7688333686216388, "grad_norm": 1.5148124024536689, "learning_rate": 1.336918271846408e-06, "loss": 0.6318, "step": 9078 }, { "epoch": 0.7689180605547321, "grad_norm": 1.5531441030898419, "learning_rate": 1.335984817590183e-06, "loss": 0.6154, "step": 9079 }, { "epoch": 0.7690027524878256, "grad_norm": 1.2269032411595067, "learning_rate": 1.3350516390705025e-06, "loss": 0.6738, "step": 9080 }, { "epoch": 0.7690874444209189, "grad_norm": 1.5601763060526208, "learning_rate": 1.3341187363575937e-06, "loss": 0.6136, "step": 9081 }, { "epoch": 0.7691721363540123, "grad_norm": 1.728760405013275, "learning_rate": 1.3331861095216608e-06, "loss": 0.6157, "step": 9082 }, { "epoch": 0.7692568282871056, "grad_norm": 1.1259441210997472, "learning_rate": 1.3322537586328915e-06, "loss": 0.6422, "step": 9083 }, { "epoch": 0.769341520220199, "grad_norm": 1.5502351871839923, "learning_rate": 1.3313216837614473e-06, "loss": 0.571, "step": 9084 }, { "epoch": 0.7694262121532924, "grad_norm": 1.3582702162924236, "learning_rate": 1.3303898849774726e-06, "loss": 0.5992, "step": 9085 }, { "epoch": 0.7695109040863858, "grad_norm": 2.9438727801184594, "learning_rate": 1.3294583623510927e-06, "loss": 0.5936, "step": 9086 }, { "epoch": 0.7695955960194791, "grad_norm": 0.6263183688851633, "learning_rate": 1.3285271159524037e-06, "loss": 0.8957, "step": 9087 }, { "epoch": 0.7696802879525725, "grad_norm": 1.3866513919935932, "learning_rate": 1.3275961458514942e-06, "loss": 0.6443, "step": 9088 }, { "epoch": 0.769764979885666, "grad_norm": 0.6320392224250665, "learning_rate": 1.3266654521184196e-06, "loss": 0.8841, "step": 9089 }, { "epoch": 0.7698496718187593, "grad_norm": 1.3722880599058573, "learning_rate": 1.3257350348232206e-06, "loss": 0.6226, "step": 9090 }, { "epoch": 0.7699343637518526, "grad_norm": 1.4522649607222184, "learning_rate": 1.3248048940359182e-06, "loss": 0.6372, "step": 9091 }, { "epoch": 0.770019055684946, "grad_norm": 1.1739292824675276, "learning_rate": 1.3238750298265069e-06, "loss": 0.6004, "step": 9092 }, { "epoch": 0.7701037476180393, "grad_norm": 1.284381880430507, "learning_rate": 1.3229454422649652e-06, "loss": 0.7055, "step": 9093 }, { "epoch": 0.7701884395511328, "grad_norm": 2.029128927901268, "learning_rate": 1.3220161314212504e-06, "loss": 0.6516, "step": 9094 }, { "epoch": 0.7702731314842262, "grad_norm": 0.6711740341456617, "learning_rate": 1.3210870973652972e-06, "loss": 0.8238, "step": 9095 }, { "epoch": 0.7703578234173195, "grad_norm": 1.4207291891358254, "learning_rate": 1.3201583401670226e-06, "loss": 0.6405, "step": 9096 }, { "epoch": 0.7704425153504129, "grad_norm": 1.6123790867758456, "learning_rate": 1.3192298598963171e-06, "loss": 0.6559, "step": 9097 }, { "epoch": 0.7705272072835062, "grad_norm": 1.442879227895287, "learning_rate": 1.318301656623055e-06, "loss": 0.6025, "step": 9098 }, { "epoch": 0.7706118992165997, "grad_norm": 1.3213343030716038, "learning_rate": 1.3173737304170903e-06, "loss": 0.614, "step": 9099 }, { "epoch": 0.770696591149693, "grad_norm": 1.1962276582991278, "learning_rate": 1.316446081348251e-06, "loss": 0.6163, "step": 9100 }, { "epoch": 0.7707812830827864, "grad_norm": 1.4193937120796973, "learning_rate": 1.3155187094863497e-06, "loss": 0.5899, "step": 9101 }, { "epoch": 0.7708659750158797, "grad_norm": 1.205519331983335, "learning_rate": 1.314591614901176e-06, "loss": 0.6338, "step": 9102 }, { "epoch": 0.7709506669489731, "grad_norm": 1.9778183341629163, "learning_rate": 1.3136647976624983e-06, "loss": 0.6258, "step": 9103 }, { "epoch": 0.7710353588820665, "grad_norm": 1.712656406008829, "learning_rate": 1.312738257840066e-06, "loss": 0.6417, "step": 9104 }, { "epoch": 0.7711200508151599, "grad_norm": 1.6057293851718848, "learning_rate": 1.3118119955036035e-06, "loss": 0.6269, "step": 9105 }, { "epoch": 0.7712047427482532, "grad_norm": 1.6410607371676509, "learning_rate": 1.3108860107228182e-06, "loss": 0.6284, "step": 9106 }, { "epoch": 0.7712894346813466, "grad_norm": 1.5944196687085719, "learning_rate": 1.3099603035673952e-06, "loss": 0.6679, "step": 9107 }, { "epoch": 0.7713741266144399, "grad_norm": 1.6750904711512824, "learning_rate": 1.309034874106999e-06, "loss": 0.6778, "step": 9108 }, { "epoch": 0.7714588185475334, "grad_norm": 1.4070425078916653, "learning_rate": 1.3081097224112732e-06, "loss": 0.643, "step": 9109 }, { "epoch": 0.7715435104806267, "grad_norm": 1.1037893526524278, "learning_rate": 1.3071848485498417e-06, "loss": 0.6379, "step": 9110 }, { "epoch": 0.7716282024137201, "grad_norm": 1.3515809570508597, "learning_rate": 1.3062602525923036e-06, "loss": 0.625, "step": 9111 }, { "epoch": 0.7717128943468134, "grad_norm": 1.4011200795233203, "learning_rate": 1.3053359346082422e-06, "loss": 0.6543, "step": 9112 }, { "epoch": 0.7717975862799068, "grad_norm": 1.2587297809728546, "learning_rate": 1.3044118946672141e-06, "loss": 0.654, "step": 9113 }, { "epoch": 0.7718822782130003, "grad_norm": 1.421946112959359, "learning_rate": 1.30348813283876e-06, "loss": 0.594, "step": 9114 }, { "epoch": 0.7719669701460936, "grad_norm": 0.6346539586680605, "learning_rate": 1.3025646491923983e-06, "loss": 0.846, "step": 9115 }, { "epoch": 0.772051662079187, "grad_norm": 1.4181133492013338, "learning_rate": 1.301641443797625e-06, "loss": 0.6418, "step": 9116 }, { "epoch": 0.7721363540122803, "grad_norm": 1.3027534448392832, "learning_rate": 1.3007185167239183e-06, "loss": 0.5632, "step": 9117 }, { "epoch": 0.7722210459453737, "grad_norm": 2.4785414248054916, "learning_rate": 1.2997958680407307e-06, "loss": 0.6439, "step": 9118 }, { "epoch": 0.7723057378784671, "grad_norm": 3.429041193987179, "learning_rate": 1.2988734978174978e-06, "loss": 0.6352, "step": 9119 }, { "epoch": 0.7723904298115605, "grad_norm": 1.6651828002030344, "learning_rate": 1.2979514061236332e-06, "loss": 0.6343, "step": 9120 }, { "epoch": 0.7724751217446538, "grad_norm": 2.1950178754535847, "learning_rate": 1.2970295930285276e-06, "loss": 0.6412, "step": 9121 }, { "epoch": 0.7725598136777472, "grad_norm": 1.3778697124954489, "learning_rate": 1.2961080586015518e-06, "loss": 0.6316, "step": 9122 }, { "epoch": 0.7726445056108405, "grad_norm": 1.4346045088676713, "learning_rate": 1.2951868029120606e-06, "loss": 0.6327, "step": 9123 }, { "epoch": 0.772729197543934, "grad_norm": 1.3988502995894638, "learning_rate": 1.2942658260293789e-06, "loss": 0.6491, "step": 9124 }, { "epoch": 0.7728138894770273, "grad_norm": 1.3167207054896815, "learning_rate": 1.2933451280228182e-06, "loss": 0.6533, "step": 9125 }, { "epoch": 0.7728985814101207, "grad_norm": 2.157972671032138, "learning_rate": 1.2924247089616625e-06, "loss": 0.5953, "step": 9126 }, { "epoch": 0.772983273343214, "grad_norm": 1.2802978751483944, "learning_rate": 1.29150456891518e-06, "loss": 0.5815, "step": 9127 }, { "epoch": 0.7730679652763074, "grad_norm": 1.4011931222744156, "learning_rate": 1.2905847079526163e-06, "loss": 0.6321, "step": 9128 }, { "epoch": 0.7731526572094009, "grad_norm": 1.440845675394567, "learning_rate": 1.2896651261431958e-06, "loss": 0.6099, "step": 9129 }, { "epoch": 0.7732373491424942, "grad_norm": 1.4098638470293376, "learning_rate": 1.2887458235561211e-06, "loss": 0.6056, "step": 9130 }, { "epoch": 0.7733220410755876, "grad_norm": 1.5625313671742889, "learning_rate": 1.2878268002605776e-06, "loss": 0.5976, "step": 9131 }, { "epoch": 0.7734067330086809, "grad_norm": 1.4316909142906207, "learning_rate": 1.286908056325722e-06, "loss": 0.6423, "step": 9132 }, { "epoch": 0.7734914249417743, "grad_norm": 1.8626623402392724, "learning_rate": 1.2859895918206988e-06, "loss": 0.7019, "step": 9133 }, { "epoch": 0.7735761168748677, "grad_norm": 1.7309802125264968, "learning_rate": 1.2850714068146236e-06, "loss": 0.6228, "step": 9134 }, { "epoch": 0.7736608088079611, "grad_norm": 1.2274522810740074, "learning_rate": 1.2841535013765965e-06, "loss": 0.593, "step": 9135 }, { "epoch": 0.7737455007410544, "grad_norm": 1.405249407018226, "learning_rate": 1.2832358755756951e-06, "loss": 0.6234, "step": 9136 }, { "epoch": 0.7738301926741478, "grad_norm": 1.4690202534136438, "learning_rate": 1.2823185294809753e-06, "loss": 0.6453, "step": 9137 }, { "epoch": 0.7739148846072411, "grad_norm": 1.7228113934733764, "learning_rate": 1.2814014631614718e-06, "loss": 0.6549, "step": 9138 }, { "epoch": 0.7739995765403346, "grad_norm": 1.4620094092336156, "learning_rate": 1.280484676686201e-06, "loss": 0.6022, "step": 9139 }, { "epoch": 0.7740842684734279, "grad_norm": 1.6634776146386943, "learning_rate": 1.2795681701241525e-06, "loss": 0.6543, "step": 9140 }, { "epoch": 0.7741689604065213, "grad_norm": 1.4053394897922253, "learning_rate": 1.2786519435443012e-06, "loss": 0.6095, "step": 9141 }, { "epoch": 0.7742536523396146, "grad_norm": 1.6860368678803115, "learning_rate": 1.2777359970155933e-06, "loss": 0.638, "step": 9142 }, { "epoch": 0.774338344272708, "grad_norm": 1.5281945519323166, "learning_rate": 1.2768203306069644e-06, "loss": 0.6504, "step": 9143 }, { "epoch": 0.7744230362058014, "grad_norm": 1.4168587679955649, "learning_rate": 1.2759049443873211e-06, "loss": 0.6295, "step": 9144 }, { "epoch": 0.7745077281388948, "grad_norm": 1.4561749616025124, "learning_rate": 1.2749898384255498e-06, "loss": 0.6381, "step": 9145 }, { "epoch": 0.7745924200719881, "grad_norm": 2.272674314947328, "learning_rate": 1.2740750127905183e-06, "loss": 0.5514, "step": 9146 }, { "epoch": 0.7746771120050815, "grad_norm": 2.355806219642751, "learning_rate": 1.2731604675510729e-06, "loss": 0.6664, "step": 9147 }, { "epoch": 0.7747618039381748, "grad_norm": 1.3729606960399798, "learning_rate": 1.2722462027760357e-06, "loss": 0.6392, "step": 9148 }, { "epoch": 0.7748464958712683, "grad_norm": 2.750893800898765, "learning_rate": 1.2713322185342108e-06, "loss": 0.6492, "step": 9149 }, { "epoch": 0.7749311878043617, "grad_norm": 1.2524760582031362, "learning_rate": 1.270418514894381e-06, "loss": 0.6343, "step": 9150 }, { "epoch": 0.775015879737455, "grad_norm": 1.6489719428038931, "learning_rate": 1.269505091925307e-06, "loss": 0.6353, "step": 9151 }, { "epoch": 0.7751005716705484, "grad_norm": 1.2229546064816017, "learning_rate": 1.26859194969573e-06, "loss": 0.5759, "step": 9152 }, { "epoch": 0.7751852636036417, "grad_norm": 1.4065495113292679, "learning_rate": 1.2676790882743662e-06, "loss": 0.639, "step": 9153 }, { "epoch": 0.7752699555367352, "grad_norm": 1.5323482697668476, "learning_rate": 1.2667665077299156e-06, "loss": 0.5843, "step": 9154 }, { "epoch": 0.7753546474698285, "grad_norm": 1.2635892715132582, "learning_rate": 1.2658542081310527e-06, "loss": 0.6363, "step": 9155 }, { "epoch": 0.7754393394029219, "grad_norm": 1.3587286681000366, "learning_rate": 1.2649421895464343e-06, "loss": 0.6388, "step": 9156 }, { "epoch": 0.7755240313360152, "grad_norm": 1.8765892713000676, "learning_rate": 1.2640304520446933e-06, "loss": 0.5967, "step": 9157 }, { "epoch": 0.7756087232691086, "grad_norm": 1.4578537223684245, "learning_rate": 1.263118995694444e-06, "loss": 0.6623, "step": 9158 }, { "epoch": 0.775693415202202, "grad_norm": 1.576809060297259, "learning_rate": 1.2622078205642785e-06, "loss": 0.5968, "step": 9159 }, { "epoch": 0.7757781071352954, "grad_norm": 1.4366934900316004, "learning_rate": 1.2612969267227677e-06, "loss": 0.5802, "step": 9160 }, { "epoch": 0.7758627990683887, "grad_norm": 1.3655759382007746, "learning_rate": 1.2603863142384598e-06, "loss": 0.6396, "step": 9161 }, { "epoch": 0.7759474910014821, "grad_norm": 1.7065109562105378, "learning_rate": 1.2594759831798848e-06, "loss": 0.6531, "step": 9162 }, { "epoch": 0.7760321829345754, "grad_norm": 1.3529886786367318, "learning_rate": 1.2585659336155466e-06, "loss": 0.6183, "step": 9163 }, { "epoch": 0.7761168748676689, "grad_norm": 1.786892354779334, "learning_rate": 1.2576561656139352e-06, "loss": 0.6015, "step": 9164 }, { "epoch": 0.7762015668007622, "grad_norm": 1.0874861476154318, "learning_rate": 1.2567466792435152e-06, "loss": 0.6315, "step": 9165 }, { "epoch": 0.7762862587338556, "grad_norm": 1.2900095663745494, "learning_rate": 1.2558374745727276e-06, "loss": 0.6227, "step": 9166 }, { "epoch": 0.776370950666949, "grad_norm": 1.409733011380665, "learning_rate": 1.254928551669996e-06, "loss": 0.6048, "step": 9167 }, { "epoch": 0.7764556426000423, "grad_norm": 1.4671776476880976, "learning_rate": 1.2540199106037242e-06, "loss": 0.6709, "step": 9168 }, { "epoch": 0.7765403345331358, "grad_norm": 0.6179833408592306, "learning_rate": 1.2531115514422882e-06, "loss": 0.8446, "step": 9169 }, { "epoch": 0.7766250264662291, "grad_norm": 1.1666445547339281, "learning_rate": 1.2522034742540484e-06, "loss": 0.6173, "step": 9170 }, { "epoch": 0.7767097183993225, "grad_norm": 0.5696792275255919, "learning_rate": 1.2512956791073433e-06, "loss": 0.7851, "step": 9171 }, { "epoch": 0.7767944103324158, "grad_norm": 1.2243292645234332, "learning_rate": 1.2503881660704882e-06, "loss": 0.5871, "step": 9172 }, { "epoch": 0.7768791022655092, "grad_norm": 1.2752879014598892, "learning_rate": 1.2494809352117803e-06, "loss": 0.6688, "step": 9173 }, { "epoch": 0.7769637941986026, "grad_norm": 1.1691126314654245, "learning_rate": 1.2485739865994906e-06, "loss": 0.7169, "step": 9174 }, { "epoch": 0.777048486131696, "grad_norm": 1.4861224932257646, "learning_rate": 1.247667320301873e-06, "loss": 0.6637, "step": 9175 }, { "epoch": 0.7771331780647893, "grad_norm": 1.2583551876595975, "learning_rate": 1.2467609363871608e-06, "loss": 0.6321, "step": 9176 }, { "epoch": 0.7772178699978827, "grad_norm": 1.5880970971548265, "learning_rate": 1.2458548349235605e-06, "loss": 0.6637, "step": 9177 }, { "epoch": 0.777302561930976, "grad_norm": 1.4522622742519296, "learning_rate": 1.2449490159792639e-06, "loss": 0.6521, "step": 9178 }, { "epoch": 0.7773872538640695, "grad_norm": 1.7002394745903182, "learning_rate": 1.244043479622437e-06, "loss": 0.6536, "step": 9179 }, { "epoch": 0.7774719457971628, "grad_norm": 1.824006007381199, "learning_rate": 1.2431382259212272e-06, "loss": 0.6095, "step": 9180 }, { "epoch": 0.7775566377302562, "grad_norm": 1.4948517752539934, "learning_rate": 1.242233254943761e-06, "loss": 0.5908, "step": 9181 }, { "epoch": 0.7776413296633495, "grad_norm": 1.5124088230238737, "learning_rate": 1.2413285667581393e-06, "loss": 0.6259, "step": 9182 }, { "epoch": 0.7777260215964429, "grad_norm": 2.031225635466689, "learning_rate": 1.2404241614324458e-06, "loss": 0.6593, "step": 9183 }, { "epoch": 0.7778107135295363, "grad_norm": 1.4474088672532957, "learning_rate": 1.2395200390347418e-06, "loss": 0.6629, "step": 9184 }, { "epoch": 0.7778954054626297, "grad_norm": 1.7749454250175694, "learning_rate": 1.238616199633068e-06, "loss": 0.5883, "step": 9185 }, { "epoch": 0.777980097395723, "grad_norm": 1.4943288954604756, "learning_rate": 1.2377126432954439e-06, "loss": 0.6428, "step": 9186 }, { "epoch": 0.7780647893288164, "grad_norm": 1.2183070965573914, "learning_rate": 1.2368093700898648e-06, "loss": 0.6164, "step": 9187 }, { "epoch": 0.7781494812619097, "grad_norm": 1.7696905585115028, "learning_rate": 1.2359063800843068e-06, "loss": 0.6027, "step": 9188 }, { "epoch": 0.7782341731950032, "grad_norm": 1.5493952986905561, "learning_rate": 1.2350036733467273e-06, "loss": 0.6359, "step": 9189 }, { "epoch": 0.7783188651280966, "grad_norm": 0.6511738192473342, "learning_rate": 1.2341012499450566e-06, "loss": 0.8019, "step": 9190 }, { "epoch": 0.7784035570611899, "grad_norm": 1.7374458542099103, "learning_rate": 1.2331991099472085e-06, "loss": 0.61, "step": 9191 }, { "epoch": 0.7784882489942833, "grad_norm": 1.3824030897199286, "learning_rate": 1.2322972534210731e-06, "loss": 0.6241, "step": 9192 }, { "epoch": 0.7785729409273767, "grad_norm": 1.7699332337347726, "learning_rate": 1.23139568043452e-06, "loss": 0.6396, "step": 9193 }, { "epoch": 0.7786576328604701, "grad_norm": 1.3673395950044898, "learning_rate": 1.2304943910553996e-06, "loss": 0.5658, "step": 9194 }, { "epoch": 0.7787423247935634, "grad_norm": 1.6372945262774186, "learning_rate": 1.229593385351535e-06, "loss": 0.63, "step": 9195 }, { "epoch": 0.7788270167266568, "grad_norm": 1.3111704895416225, "learning_rate": 1.2286926633907341e-06, "loss": 0.6091, "step": 9196 }, { "epoch": 0.7789117086597501, "grad_norm": 1.5926200645689423, "learning_rate": 1.2277922252407814e-06, "loss": 0.6757, "step": 9197 }, { "epoch": 0.7789964005928436, "grad_norm": 1.7871604106058634, "learning_rate": 1.2268920709694354e-06, "loss": 0.6332, "step": 9198 }, { "epoch": 0.7790810925259369, "grad_norm": 1.2549781305934118, "learning_rate": 1.2259922006444435e-06, "loss": 0.6343, "step": 9199 }, { "epoch": 0.7791657844590303, "grad_norm": 1.1904593615641514, "learning_rate": 1.2250926143335218e-06, "loss": 0.6207, "step": 9200 }, { "epoch": 0.7792504763921236, "grad_norm": 1.2631825947727633, "learning_rate": 1.2241933121043692e-06, "loss": 0.5928, "step": 9201 }, { "epoch": 0.779335168325217, "grad_norm": 1.7979547639726985, "learning_rate": 1.2232942940246657e-06, "loss": 0.6474, "step": 9202 }, { "epoch": 0.7794198602583104, "grad_norm": 1.4772449928868299, "learning_rate": 1.2223955601620636e-06, "loss": 0.5882, "step": 9203 }, { "epoch": 0.7795045521914038, "grad_norm": 1.452732377744385, "learning_rate": 1.2214971105841988e-06, "loss": 0.6227, "step": 9204 }, { "epoch": 0.7795892441244971, "grad_norm": 1.2904141761056154, "learning_rate": 1.2205989453586853e-06, "loss": 0.5931, "step": 9205 }, { "epoch": 0.7796739360575905, "grad_norm": 1.3961199814792207, "learning_rate": 1.2197010645531131e-06, "loss": 0.6558, "step": 9206 }, { "epoch": 0.7797586279906839, "grad_norm": 0.6481538163127931, "learning_rate": 1.2188034682350552e-06, "loss": 0.8586, "step": 9207 }, { "epoch": 0.7798433199237773, "grad_norm": 1.7995766873619798, "learning_rate": 1.2179061564720573e-06, "loss": 0.6432, "step": 9208 }, { "epoch": 0.7799280118568707, "grad_norm": 1.3516983583527042, "learning_rate": 1.2170091293316483e-06, "loss": 0.6591, "step": 9209 }, { "epoch": 0.780012703789964, "grad_norm": 1.6644582927111826, "learning_rate": 1.2161123868813358e-06, "loss": 0.6302, "step": 9210 }, { "epoch": 0.7800973957230574, "grad_norm": 4.223699574216784, "learning_rate": 1.2152159291886013e-06, "loss": 0.6291, "step": 9211 }, { "epoch": 0.7801820876561507, "grad_norm": 1.351510193974065, "learning_rate": 1.214319756320909e-06, "loss": 0.6072, "step": 9212 }, { "epoch": 0.7802667795892442, "grad_norm": 1.1712118203110526, "learning_rate": 1.2134238683457018e-06, "loss": 0.6357, "step": 9213 }, { "epoch": 0.7803514715223375, "grad_norm": 1.2998782244415754, "learning_rate": 1.2125282653303994e-06, "loss": 0.6286, "step": 9214 }, { "epoch": 0.7804361634554309, "grad_norm": 1.4097193162459454, "learning_rate": 1.2116329473424016e-06, "loss": 0.6301, "step": 9215 }, { "epoch": 0.7805208553885242, "grad_norm": 2.1354826985761144, "learning_rate": 1.2107379144490837e-06, "loss": 0.645, "step": 9216 }, { "epoch": 0.7806055473216176, "grad_norm": 1.3143139754681494, "learning_rate": 1.209843166717803e-06, "loss": 0.6101, "step": 9217 }, { "epoch": 0.780690239254711, "grad_norm": 2.9212034095711203, "learning_rate": 1.2089487042158948e-06, "loss": 0.6429, "step": 9218 }, { "epoch": 0.7807749311878044, "grad_norm": 1.3569220330753342, "learning_rate": 1.2080545270106686e-06, "loss": 0.6504, "step": 9219 }, { "epoch": 0.7808596231208977, "grad_norm": 1.1002938056254512, "learning_rate": 1.2071606351694215e-06, "loss": 0.6, "step": 9220 }, { "epoch": 0.7809443150539911, "grad_norm": 1.7909022837910336, "learning_rate": 1.206267028759419e-06, "loss": 0.5828, "step": 9221 }, { "epoch": 0.7810290069870844, "grad_norm": 1.4587246461707764, "learning_rate": 1.2053737078479117e-06, "loss": 0.5678, "step": 9222 }, { "epoch": 0.7811136989201779, "grad_norm": 1.3236037198026855, "learning_rate": 1.2044806725021273e-06, "loss": 0.633, "step": 9223 }, { "epoch": 0.7811983908532713, "grad_norm": 1.3494796215756963, "learning_rate": 1.2035879227892693e-06, "loss": 0.6203, "step": 9224 }, { "epoch": 0.7812830827863646, "grad_norm": 1.3443964425537054, "learning_rate": 1.2026954587765234e-06, "loss": 0.6047, "step": 9225 }, { "epoch": 0.781367774719458, "grad_norm": 1.4399059808476564, "learning_rate": 1.2018032805310515e-06, "loss": 0.7104, "step": 9226 }, { "epoch": 0.7814524666525513, "grad_norm": 1.4793077510397759, "learning_rate": 1.2009113881199952e-06, "loss": 0.6144, "step": 9227 }, { "epoch": 0.7815371585856448, "grad_norm": 0.627453873005818, "learning_rate": 1.200019781610476e-06, "loss": 0.8446, "step": 9228 }, { "epoch": 0.7816218505187381, "grad_norm": 1.4235500304821587, "learning_rate": 1.199128461069588e-06, "loss": 0.6625, "step": 9229 }, { "epoch": 0.7817065424518315, "grad_norm": 1.3319445749910883, "learning_rate": 1.1982374265644103e-06, "loss": 0.5977, "step": 9230 }, { "epoch": 0.7817912343849248, "grad_norm": 1.6102248387122726, "learning_rate": 1.1973466781619985e-06, "loss": 0.6017, "step": 9231 }, { "epoch": 0.7818759263180182, "grad_norm": 1.5758654132963208, "learning_rate": 1.196456215929384e-06, "loss": 0.6465, "step": 9232 }, { "epoch": 0.7819606182511116, "grad_norm": 1.3916744185761218, "learning_rate": 1.19556603993358e-06, "loss": 0.6211, "step": 9233 }, { "epoch": 0.782045310184205, "grad_norm": 1.4545711221387314, "learning_rate": 1.194676150241577e-06, "loss": 0.6179, "step": 9234 }, { "epoch": 0.7821300021172983, "grad_norm": 0.6531195300406625, "learning_rate": 1.1937865469203437e-06, "loss": 0.8668, "step": 9235 }, { "epoch": 0.7822146940503917, "grad_norm": 1.4026126302250392, "learning_rate": 1.1928972300368292e-06, "loss": 0.6215, "step": 9236 }, { "epoch": 0.782299385983485, "grad_norm": 1.6677791646070668, "learning_rate": 1.1920081996579563e-06, "loss": 0.6049, "step": 9237 }, { "epoch": 0.7823840779165785, "grad_norm": 1.7366006886587173, "learning_rate": 1.19111945585063e-06, "loss": 0.6879, "step": 9238 }, { "epoch": 0.7824687698496718, "grad_norm": 0.6195161834608224, "learning_rate": 1.1902309986817345e-06, "loss": 0.8332, "step": 9239 }, { "epoch": 0.7825534617827652, "grad_norm": 2.0369061528384047, "learning_rate": 1.1893428282181295e-06, "loss": 0.6605, "step": 9240 }, { "epoch": 0.7826381537158585, "grad_norm": 1.3144431600431639, "learning_rate": 1.1884549445266552e-06, "loss": 0.6302, "step": 9241 }, { "epoch": 0.7827228456489519, "grad_norm": 1.3595928398665966, "learning_rate": 1.1875673476741312e-06, "loss": 0.6294, "step": 9242 }, { "epoch": 0.7828075375820454, "grad_norm": 1.415421355990131, "learning_rate": 1.1866800377273503e-06, "loss": 0.6376, "step": 9243 }, { "epoch": 0.7828922295151387, "grad_norm": 1.623528924209422, "learning_rate": 1.1857930147530904e-06, "loss": 0.6329, "step": 9244 }, { "epoch": 0.782976921448232, "grad_norm": 1.4721761709341799, "learning_rate": 1.184906278818102e-06, "loss": 0.6633, "step": 9245 }, { "epoch": 0.7830616133813254, "grad_norm": 2.222286412157013, "learning_rate": 1.1840198299891181e-06, "loss": 0.643, "step": 9246 }, { "epoch": 0.7831463053144188, "grad_norm": 1.283129647651928, "learning_rate": 1.183133668332848e-06, "loss": 0.6594, "step": 9247 }, { "epoch": 0.7832309972475122, "grad_norm": 1.663963252049335, "learning_rate": 1.1822477939159816e-06, "loss": 0.6432, "step": 9248 }, { "epoch": 0.7833156891806056, "grad_norm": 1.5268159160955515, "learning_rate": 1.181362206805184e-06, "loss": 0.6301, "step": 9249 }, { "epoch": 0.7834003811136989, "grad_norm": 1.8460570775432041, "learning_rate": 1.1804769070671023e-06, "loss": 0.6004, "step": 9250 }, { "epoch": 0.7834850730467923, "grad_norm": 2.1601701253077605, "learning_rate": 1.1795918947683577e-06, "loss": 0.6398, "step": 9251 }, { "epoch": 0.7835697649798856, "grad_norm": 1.2352263889899087, "learning_rate": 1.1787071699755542e-06, "loss": 0.6181, "step": 9252 }, { "epoch": 0.7836544569129791, "grad_norm": 1.3262820284170267, "learning_rate": 1.1778227327552693e-06, "loss": 0.6756, "step": 9253 }, { "epoch": 0.7837391488460724, "grad_norm": 1.6012254563708896, "learning_rate": 1.176938583174062e-06, "loss": 0.6422, "step": 9254 }, { "epoch": 0.7838238407791658, "grad_norm": 3.2660691306221024, "learning_rate": 1.1760547212984735e-06, "loss": 0.6142, "step": 9255 }, { "epoch": 0.7839085327122591, "grad_norm": 1.3854523928930524, "learning_rate": 1.175171147195014e-06, "loss": 0.6203, "step": 9256 }, { "epoch": 0.7839932246453525, "grad_norm": 1.3775545230495494, "learning_rate": 1.1742878609301806e-06, "loss": 0.5969, "step": 9257 }, { "epoch": 0.7840779165784459, "grad_norm": 1.4301283218514305, "learning_rate": 1.1734048625704425e-06, "loss": 0.6281, "step": 9258 }, { "epoch": 0.7841626085115393, "grad_norm": 2.5425744920136073, "learning_rate": 1.1725221521822517e-06, "loss": 0.6596, "step": 9259 }, { "epoch": 0.7842473004446326, "grad_norm": 1.502595509841295, "learning_rate": 1.171639729832036e-06, "loss": 0.5914, "step": 9260 }, { "epoch": 0.784331992377726, "grad_norm": 1.5532101427890388, "learning_rate": 1.1707575955862022e-06, "loss": 0.6036, "step": 9261 }, { "epoch": 0.7844166843108193, "grad_norm": 1.394051324826415, "learning_rate": 1.1698757495111368e-06, "loss": 0.6725, "step": 9262 }, { "epoch": 0.7845013762439128, "grad_norm": 0.6658924287214821, "learning_rate": 1.168994191673204e-06, "loss": 0.8474, "step": 9263 }, { "epoch": 0.7845860681770062, "grad_norm": 1.3569729536178723, "learning_rate": 1.1681129221387433e-06, "loss": 0.6664, "step": 9264 }, { "epoch": 0.7846707601100995, "grad_norm": 2.704765190443506, "learning_rate": 1.1672319409740767e-06, "loss": 0.644, "step": 9265 }, { "epoch": 0.7847554520431929, "grad_norm": 1.2307959772154446, "learning_rate": 1.1663512482455014e-06, "loss": 0.6228, "step": 9266 }, { "epoch": 0.7848401439762862, "grad_norm": 0.6091240492972317, "learning_rate": 1.1654708440192942e-06, "loss": 0.8504, "step": 9267 }, { "epoch": 0.7849248359093797, "grad_norm": 1.633365210363941, "learning_rate": 1.1645907283617109e-06, "loss": 0.66, "step": 9268 }, { "epoch": 0.785009527842473, "grad_norm": 1.7733568297116282, "learning_rate": 1.1637109013389847e-06, "loss": 0.6557, "step": 9269 }, { "epoch": 0.7850942197755664, "grad_norm": 11.211825281040483, "learning_rate": 1.1628313630173276e-06, "loss": 0.683, "step": 9270 }, { "epoch": 0.7851789117086597, "grad_norm": 1.3460653228584054, "learning_rate": 1.1619521134629303e-06, "loss": 0.6319, "step": 9271 }, { "epoch": 0.7852636036417531, "grad_norm": 1.3424388333785156, "learning_rate": 1.161073152741959e-06, "loss": 0.5865, "step": 9272 }, { "epoch": 0.7853482955748465, "grad_norm": 0.668046418854685, "learning_rate": 1.1601944809205618e-06, "loss": 0.8599, "step": 9273 }, { "epoch": 0.7854329875079399, "grad_norm": 1.5323536931401975, "learning_rate": 1.1593160980648605e-06, "loss": 0.6625, "step": 9274 }, { "epoch": 0.7855176794410332, "grad_norm": 1.3035745496032376, "learning_rate": 1.158438004240961e-06, "loss": 0.5782, "step": 9275 }, { "epoch": 0.7856023713741266, "grad_norm": 1.8044354001615688, "learning_rate": 1.1575601995149455e-06, "loss": 0.6244, "step": 9276 }, { "epoch": 0.7856870633072199, "grad_norm": 1.4039554784939003, "learning_rate": 1.15668268395287e-06, "loss": 0.6709, "step": 9277 }, { "epoch": 0.7857717552403134, "grad_norm": 0.6211748262256118, "learning_rate": 1.155805457620774e-06, "loss": 0.8148, "step": 9278 }, { "epoch": 0.7858564471734067, "grad_norm": 1.1843623909172514, "learning_rate": 1.1549285205846745e-06, "loss": 0.6305, "step": 9279 }, { "epoch": 0.7859411391065001, "grad_norm": 1.2803376211596937, "learning_rate": 1.1540518729105632e-06, "loss": 0.6105, "step": 9280 }, { "epoch": 0.7860258310395934, "grad_norm": 1.269763116346548, "learning_rate": 1.1531755146644136e-06, "loss": 0.6569, "step": 9281 }, { "epoch": 0.7861105229726868, "grad_norm": 1.341248349871961, "learning_rate": 1.1522994459121767e-06, "loss": 0.5942, "step": 9282 }, { "epoch": 0.7861952149057803, "grad_norm": 2.101213968038109, "learning_rate": 1.1514236667197808e-06, "loss": 0.6165, "step": 9283 }, { "epoch": 0.7862799068388736, "grad_norm": 1.4677228127920696, "learning_rate": 1.1505481771531347e-06, "loss": 0.6556, "step": 9284 }, { "epoch": 0.786364598771967, "grad_norm": 1.3530125601094458, "learning_rate": 1.1496729772781206e-06, "loss": 0.657, "step": 9285 }, { "epoch": 0.7864492907050603, "grad_norm": 1.2083575299267253, "learning_rate": 1.1487980671606036e-06, "loss": 0.5893, "step": 9286 }, { "epoch": 0.7865339826381537, "grad_norm": 1.3139451048292288, "learning_rate": 1.1479234468664264e-06, "loss": 0.624, "step": 9287 }, { "epoch": 0.7866186745712471, "grad_norm": 1.371589577900841, "learning_rate": 1.1470491164614062e-06, "loss": 0.5649, "step": 9288 }, { "epoch": 0.7867033665043405, "grad_norm": 1.9968664107153764, "learning_rate": 1.1461750760113421e-06, "loss": 0.6015, "step": 9289 }, { "epoch": 0.7867880584374338, "grad_norm": 1.9997133407781054, "learning_rate": 1.1453013255820106e-06, "loss": 0.6374, "step": 9290 }, { "epoch": 0.7868727503705272, "grad_norm": 0.6344590251965343, "learning_rate": 1.144427865239166e-06, "loss": 0.8523, "step": 9291 }, { "epoch": 0.7869574423036206, "grad_norm": 1.308938714313398, "learning_rate": 1.143554695048542e-06, "loss": 0.6185, "step": 9292 }, { "epoch": 0.787042134236714, "grad_norm": 0.6225757145397435, "learning_rate": 1.1426818150758468e-06, "loss": 0.8064, "step": 9293 }, { "epoch": 0.7871268261698073, "grad_norm": 1.184807652606083, "learning_rate": 1.1418092253867719e-06, "loss": 0.5651, "step": 9294 }, { "epoch": 0.7872115181029007, "grad_norm": 1.4000200158211098, "learning_rate": 1.14093692604698e-06, "loss": 0.658, "step": 9295 }, { "epoch": 0.787296210035994, "grad_norm": 1.6261450349010975, "learning_rate": 1.1400649171221206e-06, "loss": 0.6432, "step": 9296 }, { "epoch": 0.7873809019690875, "grad_norm": 0.6087532952902057, "learning_rate": 1.1391931986778164e-06, "loss": 0.8726, "step": 9297 }, { "epoch": 0.7874655939021808, "grad_norm": 1.1994257328499547, "learning_rate": 1.1383217707796673e-06, "loss": 0.6143, "step": 9298 }, { "epoch": 0.7875502858352742, "grad_norm": 1.1776501027182487, "learning_rate": 1.1374506334932534e-06, "loss": 0.5625, "step": 9299 }, { "epoch": 0.7876349777683676, "grad_norm": 2.061433432422777, "learning_rate": 1.1365797868841338e-06, "loss": 0.603, "step": 9300 }, { "epoch": 0.7877196697014609, "grad_norm": 1.3081438921007935, "learning_rate": 1.1357092310178414e-06, "loss": 0.6222, "step": 9301 }, { "epoch": 0.7878043616345544, "grad_norm": 1.7629112155182478, "learning_rate": 1.1348389659598917e-06, "loss": 0.6377, "step": 9302 }, { "epoch": 0.7878890535676477, "grad_norm": 1.285427529929513, "learning_rate": 1.1339689917757773e-06, "loss": 0.6191, "step": 9303 }, { "epoch": 0.7879737455007411, "grad_norm": 1.8020118385536588, "learning_rate": 1.133099308530967e-06, "loss": 0.626, "step": 9304 }, { "epoch": 0.7880584374338344, "grad_norm": 1.2736098384312942, "learning_rate": 1.1322299162909122e-06, "loss": 0.6252, "step": 9305 }, { "epoch": 0.7881431293669278, "grad_norm": 1.277130294778737, "learning_rate": 1.1313608151210354e-06, "loss": 0.6414, "step": 9306 }, { "epoch": 0.7882278213000212, "grad_norm": 0.6153965733248519, "learning_rate": 1.1304920050867429e-06, "loss": 0.8887, "step": 9307 }, { "epoch": 0.7883125132331146, "grad_norm": 1.584975557473214, "learning_rate": 1.1296234862534179e-06, "loss": 0.6754, "step": 9308 }, { "epoch": 0.7883972051662079, "grad_norm": 0.655929124177409, "learning_rate": 1.1287552586864192e-06, "loss": 0.8714, "step": 9309 }, { "epoch": 0.7884818970993013, "grad_norm": 1.3214156975347953, "learning_rate": 1.1278873224510861e-06, "loss": 0.5874, "step": 9310 }, { "epoch": 0.7885665890323946, "grad_norm": 1.5264568791297755, "learning_rate": 1.1270196776127363e-06, "loss": 0.6557, "step": 9311 }, { "epoch": 0.7886512809654881, "grad_norm": 1.3005525141528806, "learning_rate": 1.1261523242366635e-06, "loss": 0.6499, "step": 9312 }, { "epoch": 0.7887359728985814, "grad_norm": 0.5947945119016502, "learning_rate": 1.1252852623881433e-06, "loss": 0.8151, "step": 9313 }, { "epoch": 0.7888206648316748, "grad_norm": 1.4035923515435391, "learning_rate": 1.124418492132423e-06, "loss": 0.6743, "step": 9314 }, { "epoch": 0.7889053567647681, "grad_norm": 1.4940903831128935, "learning_rate": 1.1235520135347334e-06, "loss": 0.6407, "step": 9315 }, { "epoch": 0.7889900486978615, "grad_norm": 2.296039171298043, "learning_rate": 1.1226858266602813e-06, "loss": 0.641, "step": 9316 }, { "epoch": 0.789074740630955, "grad_norm": 1.4602631843122966, "learning_rate": 1.1218199315742523e-06, "loss": 0.6319, "step": 9317 }, { "epoch": 0.7891594325640483, "grad_norm": 1.408314811609862, "learning_rate": 1.1209543283418111e-06, "loss": 0.6174, "step": 9318 }, { "epoch": 0.7892441244971417, "grad_norm": 1.6338589684795177, "learning_rate": 1.1200890170280954e-06, "loss": 0.6709, "step": 9319 }, { "epoch": 0.789328816430235, "grad_norm": 0.6282443823714456, "learning_rate": 1.1192239976982265e-06, "loss": 0.8123, "step": 9320 }, { "epoch": 0.7894135083633284, "grad_norm": 1.2275953029832345, "learning_rate": 1.1183592704173029e-06, "loss": 0.5691, "step": 9321 }, { "epoch": 0.7894982002964218, "grad_norm": 1.3192586165280449, "learning_rate": 1.1174948352503968e-06, "loss": 0.5758, "step": 9322 }, { "epoch": 0.7895828922295152, "grad_norm": 4.0806170244246, "learning_rate": 1.1166306922625637e-06, "loss": 0.6615, "step": 9323 }, { "epoch": 0.7896675841626085, "grad_norm": 1.39974196641106, "learning_rate": 1.1157668415188338e-06, "loss": 0.6388, "step": 9324 }, { "epoch": 0.7897522760957019, "grad_norm": 1.589912746050654, "learning_rate": 1.1149032830842172e-06, "loss": 0.6533, "step": 9325 }, { "epoch": 0.7898369680287952, "grad_norm": 1.1806481196227425, "learning_rate": 1.1140400170237026e-06, "loss": 0.6068, "step": 9326 }, { "epoch": 0.7899216599618887, "grad_norm": 2.178732043252874, "learning_rate": 1.1131770434022526e-06, "loss": 0.6294, "step": 9327 }, { "epoch": 0.790006351894982, "grad_norm": 1.5805608212140247, "learning_rate": 1.1123143622848116e-06, "loss": 0.6514, "step": 9328 }, { "epoch": 0.7900910438280754, "grad_norm": 1.4122114841843578, "learning_rate": 1.1114519737363027e-06, "loss": 0.6766, "step": 9329 }, { "epoch": 0.7901757357611687, "grad_norm": 1.4431428232015857, "learning_rate": 1.1105898778216207e-06, "loss": 0.6095, "step": 9330 }, { "epoch": 0.7902604276942621, "grad_norm": 2.1696778364077938, "learning_rate": 1.1097280746056482e-06, "loss": 0.6413, "step": 9331 }, { "epoch": 0.7903451196273555, "grad_norm": 2.1080462990931195, "learning_rate": 1.108866564153237e-06, "loss": 0.6202, "step": 9332 }, { "epoch": 0.7904298115604489, "grad_norm": 1.5987986975742028, "learning_rate": 1.1080053465292217e-06, "loss": 0.6657, "step": 9333 }, { "epoch": 0.7905145034935422, "grad_norm": 1.2332199700338407, "learning_rate": 1.107144421798414e-06, "loss": 0.5976, "step": 9334 }, { "epoch": 0.7905991954266356, "grad_norm": 1.2905618594519317, "learning_rate": 1.1062837900256013e-06, "loss": 0.6166, "step": 9335 }, { "epoch": 0.7906838873597289, "grad_norm": 1.306638353562127, "learning_rate": 1.1054234512755513e-06, "loss": 0.6871, "step": 9336 }, { "epoch": 0.7907685792928224, "grad_norm": 1.3945149727238368, "learning_rate": 1.1045634056130095e-06, "loss": 0.648, "step": 9337 }, { "epoch": 0.7908532712259158, "grad_norm": 1.725403774289994, "learning_rate": 1.103703653102699e-06, "loss": 0.6167, "step": 9338 }, { "epoch": 0.7909379631590091, "grad_norm": 1.336886835145377, "learning_rate": 1.1028441938093215e-06, "loss": 0.6344, "step": 9339 }, { "epoch": 0.7910226550921025, "grad_norm": 0.6517645906024323, "learning_rate": 1.101985027797553e-06, "loss": 0.8887, "step": 9340 }, { "epoch": 0.7911073470251958, "grad_norm": 0.6571911930554346, "learning_rate": 1.101126155132053e-06, "loss": 0.8415, "step": 9341 }, { "epoch": 0.7911920389582893, "grad_norm": 1.4484975873715134, "learning_rate": 1.1002675758774561e-06, "loss": 0.5996, "step": 9342 }, { "epoch": 0.7912767308913826, "grad_norm": 1.201658123387185, "learning_rate": 1.0994092900983732e-06, "loss": 0.5928, "step": 9343 }, { "epoch": 0.791361422824476, "grad_norm": 1.6390359744170724, "learning_rate": 1.0985512978593954e-06, "loss": 0.6095, "step": 9344 }, { "epoch": 0.7914461147575693, "grad_norm": 0.6261499666116332, "learning_rate": 1.097693599225092e-06, "loss": 0.8685, "step": 9345 }, { "epoch": 0.7915308066906627, "grad_norm": 1.6831438228730529, "learning_rate": 1.0968361942600087e-06, "loss": 0.5686, "step": 9346 }, { "epoch": 0.7916154986237561, "grad_norm": 1.6580398900494706, "learning_rate": 1.0959790830286714e-06, "loss": 0.6144, "step": 9347 }, { "epoch": 0.7917001905568495, "grad_norm": 1.4664924955794552, "learning_rate": 1.09512226559558e-06, "loss": 0.6699, "step": 9348 }, { "epoch": 0.7917848824899428, "grad_norm": 1.3729433166937828, "learning_rate": 1.094265742025215e-06, "loss": 0.6692, "step": 9349 }, { "epoch": 0.7918695744230362, "grad_norm": 1.6443236662893834, "learning_rate": 1.093409512382036e-06, "loss": 0.6978, "step": 9350 }, { "epoch": 0.7919542663561295, "grad_norm": 1.0859694340053303, "learning_rate": 1.0925535767304752e-06, "loss": 0.6578, "step": 9351 }, { "epoch": 0.792038958289223, "grad_norm": 1.312935749638186, "learning_rate": 1.0916979351349494e-06, "loss": 0.654, "step": 9352 }, { "epoch": 0.7921236502223163, "grad_norm": 1.6323596923996224, "learning_rate": 1.0908425876598512e-06, "loss": 0.6314, "step": 9353 }, { "epoch": 0.7922083421554097, "grad_norm": 3.12509079830361, "learning_rate": 1.0899875343695472e-06, "loss": 0.6701, "step": 9354 }, { "epoch": 0.792293034088503, "grad_norm": 2.1096742947113083, "learning_rate": 1.0891327753283865e-06, "loss": 0.7057, "step": 9355 }, { "epoch": 0.7923777260215964, "grad_norm": 1.697329898977264, "learning_rate": 1.0882783106006922e-06, "loss": 0.6538, "step": 9356 }, { "epoch": 0.7924624179546899, "grad_norm": 0.6986042994214481, "learning_rate": 1.087424140250769e-06, "loss": 0.819, "step": 9357 }, { "epoch": 0.7925471098877832, "grad_norm": 1.2777550357236815, "learning_rate": 1.0865702643428972e-06, "loss": 0.6586, "step": 9358 }, { "epoch": 0.7926318018208766, "grad_norm": 1.3874032327343013, "learning_rate": 1.0857166829413352e-06, "loss": 0.6382, "step": 9359 }, { "epoch": 0.7927164937539699, "grad_norm": 0.5692558219937428, "learning_rate": 1.0848633961103216e-06, "loss": 0.8161, "step": 9360 }, { "epoch": 0.7928011856870633, "grad_norm": 1.5276469759229663, "learning_rate": 1.084010403914068e-06, "loss": 0.6275, "step": 9361 }, { "epoch": 0.7928858776201567, "grad_norm": 2.1439814586875348, "learning_rate": 1.083157706416767e-06, "loss": 0.5838, "step": 9362 }, { "epoch": 0.7929705695532501, "grad_norm": 1.4151903001191368, "learning_rate": 1.0823053036825909e-06, "loss": 0.6426, "step": 9363 }, { "epoch": 0.7930552614863434, "grad_norm": 1.6089154680383126, "learning_rate": 1.0814531957756847e-06, "loss": 0.6457, "step": 9364 }, { "epoch": 0.7931399534194368, "grad_norm": 1.3696350227778664, "learning_rate": 1.0806013827601752e-06, "loss": 0.6809, "step": 9365 }, { "epoch": 0.7932246453525301, "grad_norm": 1.4330119933523213, "learning_rate": 1.0797498647001657e-06, "loss": 0.604, "step": 9366 }, { "epoch": 0.7933093372856236, "grad_norm": 1.6032945996102768, "learning_rate": 1.0788986416597374e-06, "loss": 0.6203, "step": 9367 }, { "epoch": 0.7933940292187169, "grad_norm": 1.4479263751990386, "learning_rate": 1.078047713702951e-06, "loss": 0.6394, "step": 9368 }, { "epoch": 0.7934787211518103, "grad_norm": 2.576523847608841, "learning_rate": 1.0771970808938409e-06, "loss": 0.6434, "step": 9369 }, { "epoch": 0.7935634130849036, "grad_norm": 1.7341802829336173, "learning_rate": 1.0763467432964226e-06, "loss": 0.6803, "step": 9370 }, { "epoch": 0.793648105017997, "grad_norm": 1.4869271758781972, "learning_rate": 1.075496700974688e-06, "loss": 0.6564, "step": 9371 }, { "epoch": 0.7937327969510904, "grad_norm": 1.3807435650949584, "learning_rate": 1.0746469539926085e-06, "loss": 0.6056, "step": 9372 }, { "epoch": 0.7938174888841838, "grad_norm": 1.2752239935746603, "learning_rate": 1.0737975024141312e-06, "loss": 0.5996, "step": 9373 }, { "epoch": 0.7939021808172771, "grad_norm": 1.4325003692334006, "learning_rate": 1.0729483463031831e-06, "loss": 0.6457, "step": 9374 }, { "epoch": 0.7939868727503705, "grad_norm": 1.5907081496186541, "learning_rate": 1.072099485723666e-06, "loss": 0.636, "step": 9375 }, { "epoch": 0.7940715646834638, "grad_norm": 1.5379597806755139, "learning_rate": 1.0712509207394628e-06, "loss": 0.6514, "step": 9376 }, { "epoch": 0.7941562566165573, "grad_norm": 1.410100645009156, "learning_rate": 1.07040265141443e-06, "loss": 0.6804, "step": 9377 }, { "epoch": 0.7942409485496507, "grad_norm": 1.751401660527861, "learning_rate": 1.0695546778124062e-06, "loss": 0.6552, "step": 9378 }, { "epoch": 0.794325640482744, "grad_norm": 1.4796010172558516, "learning_rate": 1.0687069999972054e-06, "loss": 0.6144, "step": 9379 }, { "epoch": 0.7944103324158374, "grad_norm": 1.2575205886659018, "learning_rate": 1.0678596180326201e-06, "loss": 0.5804, "step": 9380 }, { "epoch": 0.7944950243489307, "grad_norm": 1.2556587417842469, "learning_rate": 1.0670125319824203e-06, "loss": 0.6223, "step": 9381 }, { "epoch": 0.7945797162820242, "grad_norm": 1.5620013069865706, "learning_rate": 1.066165741910355e-06, "loss": 0.6387, "step": 9382 }, { "epoch": 0.7946644082151175, "grad_norm": 2.6561496556245228, "learning_rate": 1.0653192478801467e-06, "loss": 0.6007, "step": 9383 }, { "epoch": 0.7947491001482109, "grad_norm": 0.6274668706929138, "learning_rate": 1.0644730499555018e-06, "loss": 0.7956, "step": 9384 }, { "epoch": 0.7948337920813042, "grad_norm": 1.9089509140934866, "learning_rate": 1.0636271482000976e-06, "loss": 0.6201, "step": 9385 }, { "epoch": 0.7949184840143976, "grad_norm": 0.7540840075792221, "learning_rate": 1.0627815426775933e-06, "loss": 0.8435, "step": 9386 }, { "epoch": 0.795003175947491, "grad_norm": 1.1565751856193685, "learning_rate": 1.0619362334516297e-06, "loss": 0.5806, "step": 9387 }, { "epoch": 0.7950878678805844, "grad_norm": 0.6272964024137493, "learning_rate": 1.0610912205858158e-06, "loss": 0.824, "step": 9388 }, { "epoch": 0.7951725598136777, "grad_norm": 1.2433857134969561, "learning_rate": 1.0602465041437455e-06, "loss": 0.6086, "step": 9389 }, { "epoch": 0.7952572517467711, "grad_norm": 2.153410334280636, "learning_rate": 1.0594020841889884e-06, "loss": 0.5696, "step": 9390 }, { "epoch": 0.7953419436798644, "grad_norm": 1.4032689762051997, "learning_rate": 1.0585579607850904e-06, "loss": 0.6468, "step": 9391 }, { "epoch": 0.7954266356129579, "grad_norm": 1.548432382684312, "learning_rate": 1.057714133995576e-06, "loss": 0.6267, "step": 9392 }, { "epoch": 0.7955113275460513, "grad_norm": 1.2666958869859, "learning_rate": 1.0568706038839487e-06, "loss": 0.6318, "step": 9393 }, { "epoch": 0.7955960194791446, "grad_norm": 1.5339881082114128, "learning_rate": 1.0560273705136887e-06, "loss": 0.6627, "step": 9394 }, { "epoch": 0.795680711412238, "grad_norm": 1.5091394172121433, "learning_rate": 1.0551844339482543e-06, "loss": 0.6681, "step": 9395 }, { "epoch": 0.7957654033453314, "grad_norm": 1.3964779004200316, "learning_rate": 1.0543417942510786e-06, "loss": 0.6268, "step": 9396 }, { "epoch": 0.7958500952784248, "grad_norm": 1.7693309824143217, "learning_rate": 1.053499451485578e-06, "loss": 0.6517, "step": 9397 }, { "epoch": 0.7959347872115181, "grad_norm": 1.5755196192448504, "learning_rate": 1.0526574057151396e-06, "loss": 0.5626, "step": 9398 }, { "epoch": 0.7960194791446115, "grad_norm": 1.1283324354351307, "learning_rate": 1.0518156570031336e-06, "loss": 0.6157, "step": 9399 }, { "epoch": 0.7961041710777048, "grad_norm": 1.2684656569007149, "learning_rate": 1.0509742054129062e-06, "loss": 0.6267, "step": 9400 }, { "epoch": 0.7961888630107983, "grad_norm": 2.2375326041501262, "learning_rate": 1.0501330510077811e-06, "loss": 0.612, "step": 9401 }, { "epoch": 0.7962735549438916, "grad_norm": 0.5489828925097701, "learning_rate": 1.0492921938510591e-06, "loss": 0.8305, "step": 9402 }, { "epoch": 0.796358246876985, "grad_norm": 1.200315054961843, "learning_rate": 1.0484516340060208e-06, "loss": 0.609, "step": 9403 }, { "epoch": 0.7964429388100783, "grad_norm": 1.290119076005527, "learning_rate": 1.0476113715359205e-06, "loss": 0.5708, "step": 9404 }, { "epoch": 0.7965276307431717, "grad_norm": 1.248522448641734, "learning_rate": 1.0467714065039947e-06, "loss": 0.6513, "step": 9405 }, { "epoch": 0.7966123226762651, "grad_norm": 1.2326675999222871, "learning_rate": 1.0459317389734509e-06, "loss": 0.5744, "step": 9406 }, { "epoch": 0.7966970146093585, "grad_norm": 1.9309408780296426, "learning_rate": 1.0450923690074832e-06, "loss": 0.6428, "step": 9407 }, { "epoch": 0.7967817065424518, "grad_norm": 0.6064119113232335, "learning_rate": 1.0442532966692582e-06, "loss": 0.8401, "step": 9408 }, { "epoch": 0.7968663984755452, "grad_norm": 1.4816106514449898, "learning_rate": 1.0434145220219178e-06, "loss": 0.6229, "step": 9409 }, { "epoch": 0.7969510904086385, "grad_norm": 2.328917121722492, "learning_rate": 1.0425760451285855e-06, "loss": 0.6733, "step": 9410 }, { "epoch": 0.797035782341732, "grad_norm": 1.4004032430469389, "learning_rate": 1.041737866052363e-06, "loss": 0.5992, "step": 9411 }, { "epoch": 0.7971204742748254, "grad_norm": 1.3333584162876428, "learning_rate": 1.0408999848563251e-06, "loss": 0.6354, "step": 9412 }, { "epoch": 0.7972051662079187, "grad_norm": 1.3392462417887518, "learning_rate": 1.0400624016035272e-06, "loss": 0.5792, "step": 9413 }, { "epoch": 0.797289858141012, "grad_norm": 1.448831088930133, "learning_rate": 1.0392251163570028e-06, "loss": 0.6296, "step": 9414 }, { "epoch": 0.7973745500741054, "grad_norm": 1.41100095858665, "learning_rate": 1.0383881291797615e-06, "loss": 0.648, "step": 9415 }, { "epoch": 0.7974592420071989, "grad_norm": 1.2139337190018082, "learning_rate": 1.0375514401347924e-06, "loss": 0.6109, "step": 9416 }, { "epoch": 0.7975439339402922, "grad_norm": 1.62886346788725, "learning_rate": 1.0367150492850586e-06, "loss": 0.6542, "step": 9417 }, { "epoch": 0.7976286258733856, "grad_norm": 1.417221778376838, "learning_rate": 1.0358789566935036e-06, "loss": 0.6483, "step": 9418 }, { "epoch": 0.7977133178064789, "grad_norm": 2.1098970450178487, "learning_rate": 1.0350431624230495e-06, "loss": 0.6462, "step": 9419 }, { "epoch": 0.7977980097395723, "grad_norm": 0.5998758990197248, "learning_rate": 1.0342076665365918e-06, "loss": 0.8845, "step": 9420 }, { "epoch": 0.7978827016726657, "grad_norm": 1.6738488604589716, "learning_rate": 1.033372469097007e-06, "loss": 0.6024, "step": 9421 }, { "epoch": 0.7979673936057591, "grad_norm": 1.4543713658349828, "learning_rate": 1.0325375701671482e-06, "loss": 0.6147, "step": 9422 }, { "epoch": 0.7980520855388524, "grad_norm": 1.4052443087571764, "learning_rate": 1.0317029698098457e-06, "loss": 0.6405, "step": 9423 }, { "epoch": 0.7981367774719458, "grad_norm": 0.5995223720564308, "learning_rate": 1.0308686680879093e-06, "loss": 0.8471, "step": 9424 }, { "epoch": 0.7982214694050391, "grad_norm": 1.7787813379102804, "learning_rate": 1.0300346650641218e-06, "loss": 0.5952, "step": 9425 }, { "epoch": 0.7983061613381326, "grad_norm": 1.408954953899036, "learning_rate": 1.0292009608012476e-06, "loss": 0.6374, "step": 9426 }, { "epoch": 0.7983908532712259, "grad_norm": 1.2876108369235821, "learning_rate": 1.0283675553620281e-06, "loss": 0.6117, "step": 9427 }, { "epoch": 0.7984755452043193, "grad_norm": 1.4493922824246699, "learning_rate": 1.0275344488091805e-06, "loss": 0.5897, "step": 9428 }, { "epoch": 0.7985602371374126, "grad_norm": 0.6134253667314168, "learning_rate": 1.026701641205402e-06, "loss": 0.8215, "step": 9429 }, { "epoch": 0.798644929070506, "grad_norm": 1.146419651343858, "learning_rate": 1.0258691326133635e-06, "loss": 0.625, "step": 9430 }, { "epoch": 0.7987296210035995, "grad_norm": 1.330876250489551, "learning_rate": 1.0250369230957163e-06, "loss": 0.5079, "step": 9431 }, { "epoch": 0.7988143129366928, "grad_norm": 1.8048396689598531, "learning_rate": 1.0242050127150909e-06, "loss": 0.6381, "step": 9432 }, { "epoch": 0.7988990048697862, "grad_norm": 1.5142753661652608, "learning_rate": 1.0233734015340896e-06, "loss": 0.5565, "step": 9433 }, { "epoch": 0.7989836968028795, "grad_norm": 1.399565018282166, "learning_rate": 1.022542089615297e-06, "loss": 0.6661, "step": 9434 }, { "epoch": 0.7990683887359729, "grad_norm": 1.443496321491622, "learning_rate": 1.021711077021274e-06, "loss": 0.5405, "step": 9435 }, { "epoch": 0.7991530806690663, "grad_norm": 1.2911446043159496, "learning_rate": 1.0208803638145586e-06, "loss": 0.5626, "step": 9436 }, { "epoch": 0.7992377726021597, "grad_norm": 1.4416098540616544, "learning_rate": 1.0200499500576672e-06, "loss": 0.6477, "step": 9437 }, { "epoch": 0.799322464535253, "grad_norm": 1.2026845403444868, "learning_rate": 1.019219835813091e-06, "loss": 0.6406, "step": 9438 }, { "epoch": 0.7994071564683464, "grad_norm": 1.6432695023304011, "learning_rate": 1.0183900211433012e-06, "loss": 0.5657, "step": 9439 }, { "epoch": 0.7994918484014397, "grad_norm": 1.2972904219253418, "learning_rate": 1.017560506110747e-06, "loss": 0.6227, "step": 9440 }, { "epoch": 0.7995765403345332, "grad_norm": 0.6450533635563733, "learning_rate": 1.0167312907778515e-06, "loss": 0.7952, "step": 9441 }, { "epoch": 0.7996612322676265, "grad_norm": 1.251761203302184, "learning_rate": 1.015902375207019e-06, "loss": 0.6071, "step": 9442 }, { "epoch": 0.7997459242007199, "grad_norm": 1.1451786751975626, "learning_rate": 1.0150737594606297e-06, "loss": 0.5976, "step": 9443 }, { "epoch": 0.7998306161338132, "grad_norm": 0.6563383530488084, "learning_rate": 1.0142454436010408e-06, "loss": 0.802, "step": 9444 }, { "epoch": 0.7999153080669066, "grad_norm": 1.3416927846706506, "learning_rate": 1.0134174276905895e-06, "loss": 0.6031, "step": 9445 }, { "epoch": 0.8, "grad_norm": 1.642704011564435, "learning_rate": 1.012589711791585e-06, "loss": 0.6535, "step": 9446 }, { "epoch": 0.8000846919330934, "grad_norm": 1.3352622125756257, "learning_rate": 1.0117622959663192e-06, "loss": 0.6429, "step": 9447 }, { "epoch": 0.8001693838661867, "grad_norm": 1.2976796089307878, "learning_rate": 1.0109351802770595e-06, "loss": 0.6312, "step": 9448 }, { "epoch": 0.8002540757992801, "grad_norm": 1.4777919333382599, "learning_rate": 1.0101083647860505e-06, "loss": 0.6083, "step": 9449 }, { "epoch": 0.8003387677323734, "grad_norm": 1.686522398711001, "learning_rate": 1.0092818495555157e-06, "loss": 0.6337, "step": 9450 }, { "epoch": 0.8004234596654669, "grad_norm": 1.2316643210043117, "learning_rate": 1.0084556346476526e-06, "loss": 0.5852, "step": 9451 }, { "epoch": 0.8005081515985603, "grad_norm": 1.3924318817870815, "learning_rate": 1.0076297201246387e-06, "loss": 0.6901, "step": 9452 }, { "epoch": 0.8005928435316536, "grad_norm": 1.4184354032730233, "learning_rate": 1.0068041060486306e-06, "loss": 0.6555, "step": 9453 }, { "epoch": 0.800677535464747, "grad_norm": 1.9827117781941153, "learning_rate": 1.0059787924817571e-06, "loss": 0.6567, "step": 9454 }, { "epoch": 0.8007622273978403, "grad_norm": 1.3147147327248305, "learning_rate": 1.0051537794861288e-06, "loss": 0.6279, "step": 9455 }, { "epoch": 0.8008469193309338, "grad_norm": 0.6341047096907052, "learning_rate": 1.0043290671238326e-06, "loss": 0.8918, "step": 9456 }, { "epoch": 0.8009316112640271, "grad_norm": 1.4302066477947406, "learning_rate": 1.0035046554569316e-06, "loss": 0.6407, "step": 9457 }, { "epoch": 0.8010163031971205, "grad_norm": 2.0054027923365836, "learning_rate": 1.0026805445474697e-06, "loss": 0.6536, "step": 9458 }, { "epoch": 0.8011009951302138, "grad_norm": 1.4203120752647949, "learning_rate": 1.001856734457462e-06, "loss": 0.5775, "step": 9459 }, { "epoch": 0.8011856870633072, "grad_norm": 0.6255658590033433, "learning_rate": 1.0010332252489063e-06, "loss": 0.8868, "step": 9460 }, { "epoch": 0.8012703789964006, "grad_norm": 1.7038464653547938, "learning_rate": 1.000210016983777e-06, "loss": 0.6369, "step": 9461 }, { "epoch": 0.801355070929494, "grad_norm": 1.367401299326666, "learning_rate": 9.993871097240216e-07, "loss": 0.645, "step": 9462 }, { "epoch": 0.8014397628625873, "grad_norm": 1.366886124639685, "learning_rate": 9.985645035315728e-07, "loss": 0.6128, "step": 9463 }, { "epoch": 0.8015244547956807, "grad_norm": 1.2413956766313787, "learning_rate": 9.97742198468332e-07, "loss": 0.6111, "step": 9464 }, { "epoch": 0.801609146728774, "grad_norm": 0.6887327991697536, "learning_rate": 9.969201945961843e-07, "loss": 0.8975, "step": 9465 }, { "epoch": 0.8016938386618675, "grad_norm": 1.735224449295971, "learning_rate": 9.960984919769907e-07, "loss": 0.5531, "step": 9466 }, { "epoch": 0.8017785305949608, "grad_norm": 1.4163185923073565, "learning_rate": 9.952770906725856e-07, "loss": 0.6458, "step": 9467 }, { "epoch": 0.8018632225280542, "grad_norm": 1.4338193099465621, "learning_rate": 9.944559907447855e-07, "loss": 0.5537, "step": 9468 }, { "epoch": 0.8019479144611475, "grad_norm": 1.5741924159799354, "learning_rate": 9.936351922553822e-07, "loss": 0.6372, "step": 9469 }, { "epoch": 0.8020326063942409, "grad_norm": 1.5825337299745283, "learning_rate": 9.928146952661455e-07, "loss": 0.6045, "step": 9470 }, { "epoch": 0.8021172983273344, "grad_norm": 0.6741522124518674, "learning_rate": 9.919944998388238e-07, "loss": 0.879, "step": 9471 }, { "epoch": 0.8022019902604277, "grad_norm": 1.6240130965271498, "learning_rate": 9.911746060351374e-07, "loss": 0.609, "step": 9472 }, { "epoch": 0.8022866821935211, "grad_norm": 1.5947551691559696, "learning_rate": 9.9035501391679e-07, "loss": 0.6605, "step": 9473 }, { "epoch": 0.8023713741266144, "grad_norm": 1.737773962062135, "learning_rate": 9.895357235454612e-07, "loss": 0.6119, "step": 9474 }, { "epoch": 0.8024560660597078, "grad_norm": 1.8840037838515635, "learning_rate": 9.887167349828042e-07, "loss": 0.6223, "step": 9475 }, { "epoch": 0.8025407579928012, "grad_norm": 1.2295673025508105, "learning_rate": 9.878980482904538e-07, "loss": 0.6436, "step": 9476 }, { "epoch": 0.8026254499258946, "grad_norm": 1.49770507721965, "learning_rate": 9.870796635300206e-07, "loss": 0.6358, "step": 9477 }, { "epoch": 0.8027101418589879, "grad_norm": 1.5657845248522801, "learning_rate": 9.862615807630915e-07, "loss": 0.5811, "step": 9478 }, { "epoch": 0.8027948337920813, "grad_norm": 1.4733255062196908, "learning_rate": 9.85443800051234e-07, "loss": 0.6195, "step": 9479 }, { "epoch": 0.8028795257251746, "grad_norm": 1.3792990279736677, "learning_rate": 9.84626321455987e-07, "loss": 0.6066, "step": 9480 }, { "epoch": 0.8029642176582681, "grad_norm": 1.6801859387099325, "learning_rate": 9.83809145038872e-07, "loss": 0.6148, "step": 9481 }, { "epoch": 0.8030489095913614, "grad_norm": 1.5146732831763101, "learning_rate": 9.82992270861387e-07, "loss": 0.6701, "step": 9482 }, { "epoch": 0.8031336015244548, "grad_norm": 1.5098266644114873, "learning_rate": 9.821756989850017e-07, "loss": 0.6742, "step": 9483 }, { "epoch": 0.8032182934575481, "grad_norm": 1.652319309171554, "learning_rate": 9.81359429471172e-07, "loss": 0.5764, "step": 9484 }, { "epoch": 0.8033029853906415, "grad_norm": 1.2284429001940225, "learning_rate": 9.805434623813258e-07, "loss": 0.6104, "step": 9485 }, { "epoch": 0.803387677323735, "grad_norm": 1.5654522375771558, "learning_rate": 9.797277977768671e-07, "loss": 0.6293, "step": 9486 }, { "epoch": 0.8034723692568283, "grad_norm": 1.6381930810720067, "learning_rate": 9.789124357191815e-07, "loss": 0.5789, "step": 9487 }, { "epoch": 0.8035570611899217, "grad_norm": 1.9517638516415563, "learning_rate": 9.78097376269626e-07, "loss": 0.5879, "step": 9488 }, { "epoch": 0.803641753123015, "grad_norm": 1.4739111393788227, "learning_rate": 9.772826194895403e-07, "loss": 0.6704, "step": 9489 }, { "epoch": 0.8037264450561084, "grad_norm": 1.45402758893249, "learning_rate": 9.764681654402385e-07, "loss": 0.644, "step": 9490 }, { "epoch": 0.8038111369892018, "grad_norm": 1.262756573940111, "learning_rate": 9.756540141830134e-07, "loss": 0.6302, "step": 9491 }, { "epoch": 0.8038958289222952, "grad_norm": 1.268030815575008, "learning_rate": 9.74840165779133e-07, "loss": 0.6534, "step": 9492 }, { "epoch": 0.8039805208553885, "grad_norm": 1.7712764102334213, "learning_rate": 9.740266202898457e-07, "loss": 0.6222, "step": 9493 }, { "epoch": 0.8040652127884819, "grad_norm": 1.2050081622648434, "learning_rate": 9.73213377776373e-07, "loss": 0.5851, "step": 9494 }, { "epoch": 0.8041499047215752, "grad_norm": 1.4481376121077265, "learning_rate": 9.724004382999175e-07, "loss": 0.635, "step": 9495 }, { "epoch": 0.8042345966546687, "grad_norm": 1.501720087111398, "learning_rate": 9.715878019216545e-07, "loss": 0.5967, "step": 9496 }, { "epoch": 0.804319288587762, "grad_norm": 4.66987211971565, "learning_rate": 9.707754687027416e-07, "loss": 0.6515, "step": 9497 }, { "epoch": 0.8044039805208554, "grad_norm": 1.30652382161573, "learning_rate": 9.6996343870431e-07, "loss": 0.6222, "step": 9498 }, { "epoch": 0.8044886724539487, "grad_norm": 1.2134417606042427, "learning_rate": 9.691517119874693e-07, "loss": 0.5858, "step": 9499 }, { "epoch": 0.8045733643870422, "grad_norm": 1.661867446535953, "learning_rate": 9.683402886133085e-07, "loss": 0.6412, "step": 9500 }, { "epoch": 0.8046580563201355, "grad_norm": 1.2990055673458147, "learning_rate": 9.675291686428885e-07, "loss": 0.6378, "step": 9501 }, { "epoch": 0.8047427482532289, "grad_norm": 1.6629765416427267, "learning_rate": 9.667183521372508e-07, "loss": 0.6248, "step": 9502 }, { "epoch": 0.8048274401863222, "grad_norm": 1.4643427803140376, "learning_rate": 9.65907839157415e-07, "loss": 0.6103, "step": 9503 }, { "epoch": 0.8049121321194156, "grad_norm": 1.3632788513058824, "learning_rate": 9.650976297643755e-07, "loss": 0.5725, "step": 9504 }, { "epoch": 0.804996824052509, "grad_norm": 0.6584841726618692, "learning_rate": 9.64287724019105e-07, "loss": 0.8523, "step": 9505 }, { "epoch": 0.8050815159856024, "grad_norm": 1.6369624187126022, "learning_rate": 9.634781219825552e-07, "loss": 0.609, "step": 9506 }, { "epoch": 0.8051662079186958, "grad_norm": 3.4227349639868767, "learning_rate": 9.626688237156495e-07, "loss": 0.6306, "step": 9507 }, { "epoch": 0.8052508998517891, "grad_norm": 1.268387957401788, "learning_rate": 9.618598292792946e-07, "loss": 0.6172, "step": 9508 }, { "epoch": 0.8053355917848825, "grad_norm": 1.8188638666969899, "learning_rate": 9.610511387343695e-07, "loss": 0.5813, "step": 9509 }, { "epoch": 0.8054202837179759, "grad_norm": 0.6569054749268427, "learning_rate": 9.602427521417334e-07, "loss": 0.8908, "step": 9510 }, { "epoch": 0.8055049756510693, "grad_norm": 1.3391774503314746, "learning_rate": 9.594346695622219e-07, "loss": 0.6482, "step": 9511 }, { "epoch": 0.8055896675841626, "grad_norm": 1.5683146064805094, "learning_rate": 9.58626891056647e-07, "loss": 0.6433, "step": 9512 }, { "epoch": 0.805674359517256, "grad_norm": 1.6728800172909806, "learning_rate": 9.57819416685799e-07, "loss": 0.5777, "step": 9513 }, { "epoch": 0.8057590514503493, "grad_norm": 0.6364880593856502, "learning_rate": 9.570122465104454e-07, "loss": 0.8445, "step": 9514 }, { "epoch": 0.8058437433834428, "grad_norm": 1.397730049801433, "learning_rate": 9.562053805913273e-07, "loss": 0.6368, "step": 9515 }, { "epoch": 0.8059284353165361, "grad_norm": 1.5418977139807233, "learning_rate": 9.553988189891688e-07, "loss": 0.6678, "step": 9516 }, { "epoch": 0.8060131272496295, "grad_norm": 1.3960866409326655, "learning_rate": 9.54592561764665e-07, "loss": 0.6341, "step": 9517 }, { "epoch": 0.8060978191827228, "grad_norm": 1.2742988160702793, "learning_rate": 9.537866089784908e-07, "loss": 0.644, "step": 9518 }, { "epoch": 0.8061825111158162, "grad_norm": 1.2083882872162752, "learning_rate": 9.529809606913032e-07, "loss": 0.6027, "step": 9519 }, { "epoch": 0.8062672030489096, "grad_norm": 0.5945712593382556, "learning_rate": 9.521756169637264e-07, "loss": 0.8881, "step": 9520 }, { "epoch": 0.806351894982003, "grad_norm": 1.256352120561624, "learning_rate": 9.513705778563693e-07, "loss": 0.669, "step": 9521 }, { "epoch": 0.8064365869150963, "grad_norm": 1.460236645419157, "learning_rate": 9.505658434298154e-07, "loss": 0.6364, "step": 9522 }, { "epoch": 0.8065212788481897, "grad_norm": 2.140072737470278, "learning_rate": 9.497614137446237e-07, "loss": 0.6134, "step": 9523 }, { "epoch": 0.806605970781283, "grad_norm": 0.5985767531652484, "learning_rate": 9.489572888613325e-07, "loss": 0.8635, "step": 9524 }, { "epoch": 0.8066906627143765, "grad_norm": 1.3738723190876974, "learning_rate": 9.481534688404564e-07, "loss": 0.6246, "step": 9525 }, { "epoch": 0.8067753546474699, "grad_norm": 1.1457492873730628, "learning_rate": 9.473499537424874e-07, "loss": 0.5923, "step": 9526 }, { "epoch": 0.8068600465805632, "grad_norm": 1.3862381365230763, "learning_rate": 9.465467436278953e-07, "loss": 0.6249, "step": 9527 }, { "epoch": 0.8069447385136566, "grad_norm": 1.2959516067842878, "learning_rate": 9.457438385571238e-07, "loss": 0.6717, "step": 9528 }, { "epoch": 0.8070294304467499, "grad_norm": 1.6029556447487694, "learning_rate": 9.44941238590597e-07, "loss": 0.5422, "step": 9529 }, { "epoch": 0.8071141223798434, "grad_norm": 1.397969411327484, "learning_rate": 9.441389437887155e-07, "loss": 0.6289, "step": 9530 }, { "epoch": 0.8071988143129367, "grad_norm": 0.6030248698022612, "learning_rate": 9.433369542118537e-07, "loss": 0.8045, "step": 9531 }, { "epoch": 0.8072835062460301, "grad_norm": 1.3458747104480269, "learning_rate": 9.425352699203677e-07, "loss": 0.6015, "step": 9532 }, { "epoch": 0.8073681981791234, "grad_norm": 1.4608614578685546, "learning_rate": 9.417338909745877e-07, "loss": 0.6248, "step": 9533 }, { "epoch": 0.8074528901122168, "grad_norm": 0.6117296424810531, "learning_rate": 9.40932817434822e-07, "loss": 0.8948, "step": 9534 }, { "epoch": 0.8075375820453102, "grad_norm": 1.3011112704667283, "learning_rate": 9.401320493613563e-07, "loss": 0.6385, "step": 9535 }, { "epoch": 0.8076222739784036, "grad_norm": 2.24651602089225, "learning_rate": 9.393315868144515e-07, "loss": 0.6109, "step": 9536 }, { "epoch": 0.8077069659114969, "grad_norm": 1.1736174766732361, "learning_rate": 9.38531429854348e-07, "loss": 0.6464, "step": 9537 }, { "epoch": 0.8077916578445903, "grad_norm": 1.888330862853207, "learning_rate": 9.377315785412583e-07, "loss": 0.6251, "step": 9538 }, { "epoch": 0.8078763497776836, "grad_norm": 1.3827461145857785, "learning_rate": 9.369320329353792e-07, "loss": 0.6593, "step": 9539 }, { "epoch": 0.8079610417107771, "grad_norm": 1.5560313495939158, "learning_rate": 9.361327930968811e-07, "loss": 0.6632, "step": 9540 }, { "epoch": 0.8080457336438704, "grad_norm": 1.5350174859307117, "learning_rate": 9.35333859085908e-07, "loss": 0.6055, "step": 9541 }, { "epoch": 0.8081304255769638, "grad_norm": 2.036678587495229, "learning_rate": 9.345352309625855e-07, "loss": 0.6064, "step": 9542 }, { "epoch": 0.8082151175100571, "grad_norm": 1.3990935540360736, "learning_rate": 9.337369087870157e-07, "loss": 0.64, "step": 9543 }, { "epoch": 0.8082998094431505, "grad_norm": 1.9454277680257106, "learning_rate": 9.329388926192745e-07, "loss": 0.5724, "step": 9544 }, { "epoch": 0.808384501376244, "grad_norm": 1.263009347955088, "learning_rate": 9.321411825194177e-07, "loss": 0.6605, "step": 9545 }, { "epoch": 0.8084691933093373, "grad_norm": 1.3758553859607354, "learning_rate": 9.313437785474766e-07, "loss": 0.5995, "step": 9546 }, { "epoch": 0.8085538852424307, "grad_norm": 1.2661791269818266, "learning_rate": 9.305466807634617e-07, "loss": 0.6265, "step": 9547 }, { "epoch": 0.808638577175524, "grad_norm": 0.6538127394646976, "learning_rate": 9.297498892273582e-07, "loss": 0.86, "step": 9548 }, { "epoch": 0.8087232691086174, "grad_norm": 1.3141400400951517, "learning_rate": 9.289534039991277e-07, "loss": 0.6518, "step": 9549 }, { "epoch": 0.8088079610417108, "grad_norm": 1.2672310119564505, "learning_rate": 9.281572251387106e-07, "loss": 0.5625, "step": 9550 }, { "epoch": 0.8088926529748042, "grad_norm": 1.3477191882817348, "learning_rate": 9.273613527060255e-07, "loss": 0.6239, "step": 9551 }, { "epoch": 0.8089773449078975, "grad_norm": 1.439797996669705, "learning_rate": 9.265657867609624e-07, "loss": 0.5534, "step": 9552 }, { "epoch": 0.8090620368409909, "grad_norm": 1.4770236306069455, "learning_rate": 9.257705273633938e-07, "loss": 0.6799, "step": 9553 }, { "epoch": 0.8091467287740842, "grad_norm": 1.2371443511194977, "learning_rate": 9.249755745731676e-07, "loss": 0.6469, "step": 9554 }, { "epoch": 0.8092314207071777, "grad_norm": 1.5214016946935578, "learning_rate": 9.241809284501069e-07, "loss": 0.6209, "step": 9555 }, { "epoch": 0.809316112640271, "grad_norm": 3.203420361683441, "learning_rate": 9.233865890540156e-07, "loss": 0.6738, "step": 9556 }, { "epoch": 0.8094008045733644, "grad_norm": 1.303317688075506, "learning_rate": 9.225925564446686e-07, "loss": 0.6579, "step": 9557 }, { "epoch": 0.8094854965064577, "grad_norm": 1.1771832423550417, "learning_rate": 9.217988306818232e-07, "loss": 0.5985, "step": 9558 }, { "epoch": 0.8095701884395511, "grad_norm": 0.614520394245952, "learning_rate": 9.210054118252104e-07, "loss": 0.8475, "step": 9559 }, { "epoch": 0.8096548803726445, "grad_norm": 0.6765786948256433, "learning_rate": 9.202122999345397e-07, "loss": 0.8245, "step": 9560 }, { "epoch": 0.8097395723057379, "grad_norm": 1.3176962122167337, "learning_rate": 9.194194950694984e-07, "loss": 0.6722, "step": 9561 }, { "epoch": 0.8098242642388312, "grad_norm": 1.906733855191685, "learning_rate": 9.18626997289746e-07, "loss": 0.6539, "step": 9562 }, { "epoch": 0.8099089561719246, "grad_norm": 1.9354823743392733, "learning_rate": 9.178348066549248e-07, "loss": 0.6248, "step": 9563 }, { "epoch": 0.809993648105018, "grad_norm": 0.5539517899628855, "learning_rate": 9.170429232246508e-07, "loss": 0.7792, "step": 9564 }, { "epoch": 0.8100783400381114, "grad_norm": 1.444096748450578, "learning_rate": 9.162513470585166e-07, "loss": 0.6431, "step": 9565 }, { "epoch": 0.8101630319712048, "grad_norm": 1.507709376360525, "learning_rate": 9.154600782160927e-07, "loss": 0.6028, "step": 9566 }, { "epoch": 0.8102477239042981, "grad_norm": 1.2516081802354213, "learning_rate": 9.146691167569266e-07, "loss": 0.5818, "step": 9567 }, { "epoch": 0.8103324158373915, "grad_norm": 1.1095473945748477, "learning_rate": 9.138784627405422e-07, "loss": 0.5944, "step": 9568 }, { "epoch": 0.8104171077704848, "grad_norm": 1.3290897037012006, "learning_rate": 9.130881162264422e-07, "loss": 0.6393, "step": 9569 }, { "epoch": 0.8105017997035783, "grad_norm": 1.3555254673351222, "learning_rate": 9.122980772741008e-07, "loss": 0.6021, "step": 9570 }, { "epoch": 0.8105864916366716, "grad_norm": 1.231395221210144, "learning_rate": 9.115083459429752e-07, "loss": 0.6616, "step": 9571 }, { "epoch": 0.810671183569765, "grad_norm": 1.2374591692849068, "learning_rate": 9.107189222924967e-07, "loss": 0.6147, "step": 9572 }, { "epoch": 0.8107558755028583, "grad_norm": 1.4099554059525015, "learning_rate": 9.099298063820722e-07, "loss": 0.6344, "step": 9573 }, { "epoch": 0.8108405674359517, "grad_norm": 2.5702239762676977, "learning_rate": 9.091409982710875e-07, "loss": 0.6149, "step": 9574 }, { "epoch": 0.8109252593690451, "grad_norm": 1.502017161326681, "learning_rate": 9.083524980189052e-07, "loss": 0.5981, "step": 9575 }, { "epoch": 0.8110099513021385, "grad_norm": 0.6452571941165869, "learning_rate": 9.075643056848637e-07, "loss": 0.8556, "step": 9576 }, { "epoch": 0.8110946432352318, "grad_norm": 1.2537174568406606, "learning_rate": 9.067764213282792e-07, "loss": 0.6065, "step": 9577 }, { "epoch": 0.8111793351683252, "grad_norm": 2.0472543869192483, "learning_rate": 9.059888450084431e-07, "loss": 0.6615, "step": 9578 }, { "epoch": 0.8112640271014185, "grad_norm": 0.5769441003564167, "learning_rate": 9.052015767846251e-07, "loss": 0.8603, "step": 9579 }, { "epoch": 0.811348719034512, "grad_norm": 1.5847390957698024, "learning_rate": 9.044146167160716e-07, "loss": 0.6312, "step": 9580 }, { "epoch": 0.8114334109676054, "grad_norm": 1.3403699251489476, "learning_rate": 9.03627964862005e-07, "loss": 0.6563, "step": 9581 }, { "epoch": 0.8115181029006987, "grad_norm": 2.6622515935357103, "learning_rate": 9.028416212816266e-07, "loss": 0.6819, "step": 9582 }, { "epoch": 0.811602794833792, "grad_norm": 1.3706044206145553, "learning_rate": 9.020555860341107e-07, "loss": 0.6091, "step": 9583 }, { "epoch": 0.8116874867668854, "grad_norm": 1.5866331025609073, "learning_rate": 9.012698591786112e-07, "loss": 0.6233, "step": 9584 }, { "epoch": 0.8117721786999789, "grad_norm": 1.4391560610091152, "learning_rate": 9.004844407742602e-07, "loss": 0.6008, "step": 9585 }, { "epoch": 0.8118568706330722, "grad_norm": 1.487327044161486, "learning_rate": 8.996993308801616e-07, "loss": 0.5925, "step": 9586 }, { "epoch": 0.8119415625661656, "grad_norm": 1.4966188370301912, "learning_rate": 8.989145295554008e-07, "loss": 0.6149, "step": 9587 }, { "epoch": 0.8120262544992589, "grad_norm": 1.2838940320334296, "learning_rate": 8.981300368590373e-07, "loss": 0.6278, "step": 9588 }, { "epoch": 0.8121109464323523, "grad_norm": 1.5305107448874187, "learning_rate": 8.973458528501094e-07, "loss": 0.6129, "step": 9589 }, { "epoch": 0.8121956383654457, "grad_norm": 1.314440925608489, "learning_rate": 8.965619775876322e-07, "loss": 0.65, "step": 9590 }, { "epoch": 0.8122803302985391, "grad_norm": 1.517432779137348, "learning_rate": 8.957784111305928e-07, "loss": 0.6123, "step": 9591 }, { "epoch": 0.8123650222316324, "grad_norm": 1.8138706750319953, "learning_rate": 8.949951535379614e-07, "loss": 0.6684, "step": 9592 }, { "epoch": 0.8124497141647258, "grad_norm": 1.7443094259710887, "learning_rate": 8.94212204868683e-07, "loss": 0.6427, "step": 9593 }, { "epoch": 0.8125344060978191, "grad_norm": 1.3031487253217533, "learning_rate": 8.934295651816749e-07, "loss": 0.6353, "step": 9594 }, { "epoch": 0.8126190980309126, "grad_norm": 2.0125983065347186, "learning_rate": 8.926472345358383e-07, "loss": 0.6584, "step": 9595 }, { "epoch": 0.8127037899640059, "grad_norm": 2.2836887676278264, "learning_rate": 8.918652129900484e-07, "loss": 0.5946, "step": 9596 }, { "epoch": 0.8127884818970993, "grad_norm": 1.6826907172818972, "learning_rate": 8.910835006031532e-07, "loss": 0.6279, "step": 9597 }, { "epoch": 0.8128731738301926, "grad_norm": 1.1716568790534563, "learning_rate": 8.903020974339837e-07, "loss": 0.6437, "step": 9598 }, { "epoch": 0.812957865763286, "grad_norm": 1.2918064550760127, "learning_rate": 8.895210035413421e-07, "loss": 0.5658, "step": 9599 }, { "epoch": 0.8130425576963795, "grad_norm": 1.3279886893498072, "learning_rate": 8.88740218984011e-07, "loss": 0.6123, "step": 9600 }, { "epoch": 0.8131272496294728, "grad_norm": 1.46501667528883, "learning_rate": 8.879597438207482e-07, "loss": 0.5332, "step": 9601 }, { "epoch": 0.8132119415625662, "grad_norm": 1.635871520102623, "learning_rate": 8.871795781102893e-07, "loss": 0.6478, "step": 9602 }, { "epoch": 0.8132966334956595, "grad_norm": 0.5642555246688659, "learning_rate": 8.863997219113468e-07, "loss": 0.8064, "step": 9603 }, { "epoch": 0.813381325428753, "grad_norm": 1.3918473838369603, "learning_rate": 8.856201752826066e-07, "loss": 0.6431, "step": 9604 }, { "epoch": 0.8134660173618463, "grad_norm": 1.334726952466973, "learning_rate": 8.84840938282735e-07, "loss": 0.6062, "step": 9605 }, { "epoch": 0.8135507092949397, "grad_norm": 1.3273830870955934, "learning_rate": 8.840620109703746e-07, "loss": 0.6701, "step": 9606 }, { "epoch": 0.813635401228033, "grad_norm": 1.2687089544933476, "learning_rate": 8.832833934041418e-07, "loss": 0.6579, "step": 9607 }, { "epoch": 0.8137200931611264, "grad_norm": 1.433554435005622, "learning_rate": 8.825050856426321e-07, "loss": 0.6385, "step": 9608 }, { "epoch": 0.8138047850942198, "grad_norm": 1.3022643534948533, "learning_rate": 8.817270877444184e-07, "loss": 0.7052, "step": 9609 }, { "epoch": 0.8138894770273132, "grad_norm": 1.722005542392077, "learning_rate": 8.809493997680484e-07, "loss": 0.6633, "step": 9610 }, { "epoch": 0.8139741689604065, "grad_norm": 1.3395646224907112, "learning_rate": 8.801720217720488e-07, "loss": 0.5893, "step": 9611 }, { "epoch": 0.8140588608934999, "grad_norm": 1.6036089870094505, "learning_rate": 8.793949538149188e-07, "loss": 0.5968, "step": 9612 }, { "epoch": 0.8141435528265932, "grad_norm": 1.1667337765764374, "learning_rate": 8.786181959551382e-07, "loss": 0.6394, "step": 9613 }, { "epoch": 0.8142282447596867, "grad_norm": 0.6937039180364147, "learning_rate": 8.778417482511636e-07, "loss": 0.8684, "step": 9614 }, { "epoch": 0.81431293669278, "grad_norm": 1.6008815039946933, "learning_rate": 8.770656107614223e-07, "loss": 0.6369, "step": 9615 }, { "epoch": 0.8143976286258734, "grad_norm": 1.7221092797695514, "learning_rate": 8.762897835443274e-07, "loss": 0.6131, "step": 9616 }, { "epoch": 0.8144823205589667, "grad_norm": 1.3849706679459322, "learning_rate": 8.755142666582633e-07, "loss": 0.6158, "step": 9617 }, { "epoch": 0.8145670124920601, "grad_norm": 1.2880169892640825, "learning_rate": 8.747390601615902e-07, "loss": 0.6193, "step": 9618 }, { "epoch": 0.8146517044251536, "grad_norm": 1.3397607343299218, "learning_rate": 8.739641641126478e-07, "loss": 0.6806, "step": 9619 }, { "epoch": 0.8147363963582469, "grad_norm": 1.1669443904730474, "learning_rate": 8.731895785697491e-07, "loss": 0.5884, "step": 9620 }, { "epoch": 0.8148210882913403, "grad_norm": 1.185356342267754, "learning_rate": 8.724153035911875e-07, "loss": 0.6345, "step": 9621 }, { "epoch": 0.8149057802244336, "grad_norm": 1.1814533613632539, "learning_rate": 8.716413392352308e-07, "loss": 0.5928, "step": 9622 }, { "epoch": 0.814990472157527, "grad_norm": 4.265409206640541, "learning_rate": 8.708676855601239e-07, "loss": 0.6643, "step": 9623 }, { "epoch": 0.8150751640906204, "grad_norm": 1.180299830343813, "learning_rate": 8.700943426240887e-07, "loss": 0.5896, "step": 9624 }, { "epoch": 0.8151598560237138, "grad_norm": 1.5140906247445993, "learning_rate": 8.693213104853244e-07, "loss": 0.5789, "step": 9625 }, { "epoch": 0.8152445479568071, "grad_norm": 0.6974696981568667, "learning_rate": 8.68548589202003e-07, "loss": 0.8877, "step": 9626 }, { "epoch": 0.8153292398899005, "grad_norm": 1.690104802228433, "learning_rate": 8.677761788322787e-07, "loss": 0.6521, "step": 9627 }, { "epoch": 0.8154139318229938, "grad_norm": 1.5257438577970903, "learning_rate": 8.670040794342765e-07, "loss": 0.6371, "step": 9628 }, { "epoch": 0.8154986237560873, "grad_norm": 1.2708711556040992, "learning_rate": 8.662322910661026e-07, "loss": 0.5865, "step": 9629 }, { "epoch": 0.8155833156891806, "grad_norm": 1.2768585822569358, "learning_rate": 8.654608137858384e-07, "loss": 0.6246, "step": 9630 }, { "epoch": 0.815668007622274, "grad_norm": 2.118714416781982, "learning_rate": 8.646896476515415e-07, "loss": 0.6156, "step": 9631 }, { "epoch": 0.8157526995553673, "grad_norm": 1.6652871020690108, "learning_rate": 8.639187927212456e-07, "loss": 0.6431, "step": 9632 }, { "epoch": 0.8158373914884607, "grad_norm": 1.5886710481601074, "learning_rate": 8.631482490529642e-07, "loss": 0.6141, "step": 9633 }, { "epoch": 0.8159220834215541, "grad_norm": 1.4571290706972522, "learning_rate": 8.623780167046808e-07, "loss": 0.6121, "step": 9634 }, { "epoch": 0.8160067753546475, "grad_norm": 1.6496285508973672, "learning_rate": 8.616080957343614e-07, "loss": 0.6308, "step": 9635 }, { "epoch": 0.8160914672877408, "grad_norm": 1.3512178958992567, "learning_rate": 8.608384861999469e-07, "loss": 0.6496, "step": 9636 }, { "epoch": 0.8161761592208342, "grad_norm": 1.4146461155947545, "learning_rate": 8.600691881593543e-07, "loss": 0.6196, "step": 9637 }, { "epoch": 0.8162608511539275, "grad_norm": 1.48106559227739, "learning_rate": 8.593002016704782e-07, "loss": 0.5919, "step": 9638 }, { "epoch": 0.816345543087021, "grad_norm": 1.3278313259886043, "learning_rate": 8.58531526791187e-07, "loss": 0.5808, "step": 9639 }, { "epoch": 0.8164302350201144, "grad_norm": 1.427053332500072, "learning_rate": 8.577631635793293e-07, "loss": 0.5975, "step": 9640 }, { "epoch": 0.8165149269532077, "grad_norm": 1.5245224914011868, "learning_rate": 8.569951120927272e-07, "loss": 0.6113, "step": 9641 }, { "epoch": 0.8165996188863011, "grad_norm": 1.376331634381253, "learning_rate": 8.562273723891807e-07, "loss": 0.6553, "step": 9642 }, { "epoch": 0.8166843108193944, "grad_norm": 1.2519911570093947, "learning_rate": 8.554599445264666e-07, "loss": 0.6851, "step": 9643 }, { "epoch": 0.8167690027524879, "grad_norm": 1.5468931640372623, "learning_rate": 8.546928285623385e-07, "loss": 0.6872, "step": 9644 }, { "epoch": 0.8168536946855812, "grad_norm": 1.5609141767902177, "learning_rate": 8.539260245545255e-07, "loss": 0.6071, "step": 9645 }, { "epoch": 0.8169383866186746, "grad_norm": 1.248198287048881, "learning_rate": 8.531595325607344e-07, "loss": 0.6385, "step": 9646 }, { "epoch": 0.8170230785517679, "grad_norm": 1.5859971548464935, "learning_rate": 8.523933526386463e-07, "loss": 0.6063, "step": 9647 }, { "epoch": 0.8171077704848613, "grad_norm": 1.3857985284012164, "learning_rate": 8.516274848459216e-07, "loss": 0.6452, "step": 9648 }, { "epoch": 0.8171924624179547, "grad_norm": 1.3420490578690125, "learning_rate": 8.508619292401949e-07, "loss": 0.6115, "step": 9649 }, { "epoch": 0.8172771543510481, "grad_norm": 1.5809676530823336, "learning_rate": 8.500966858790771e-07, "loss": 0.6396, "step": 9650 }, { "epoch": 0.8173618462841414, "grad_norm": 1.8029147214449353, "learning_rate": 8.493317548201607e-07, "loss": 0.6576, "step": 9651 }, { "epoch": 0.8174465382172348, "grad_norm": 1.6511867043427082, "learning_rate": 8.485671361210079e-07, "loss": 0.5647, "step": 9652 }, { "epoch": 0.8175312301503281, "grad_norm": 1.2956146237439936, "learning_rate": 8.478028298391605e-07, "loss": 0.6314, "step": 9653 }, { "epoch": 0.8176159220834216, "grad_norm": 1.7720365759418726, "learning_rate": 8.470388360321385e-07, "loss": 0.6269, "step": 9654 }, { "epoch": 0.817700614016515, "grad_norm": 1.2070777589313846, "learning_rate": 8.462751547574344e-07, "loss": 0.6185, "step": 9655 }, { "epoch": 0.8177853059496083, "grad_norm": 1.5709481520182371, "learning_rate": 8.455117860725192e-07, "loss": 0.5967, "step": 9656 }, { "epoch": 0.8178699978827016, "grad_norm": 1.491004315164903, "learning_rate": 8.447487300348411e-07, "loss": 0.5601, "step": 9657 }, { "epoch": 0.817954689815795, "grad_norm": 1.1294514095858128, "learning_rate": 8.439859867018247e-07, "loss": 0.5995, "step": 9658 }, { "epoch": 0.8180393817488885, "grad_norm": 1.7269148346111949, "learning_rate": 8.432235561308711e-07, "loss": 0.6488, "step": 9659 }, { "epoch": 0.8181240736819818, "grad_norm": 1.6359873798073024, "learning_rate": 8.424614383793545e-07, "loss": 0.652, "step": 9660 }, { "epoch": 0.8182087656150752, "grad_norm": 1.3261932826056817, "learning_rate": 8.416996335046296e-07, "loss": 0.5837, "step": 9661 }, { "epoch": 0.8182934575481685, "grad_norm": 1.1918500272069146, "learning_rate": 8.409381415640283e-07, "loss": 0.6019, "step": 9662 }, { "epoch": 0.8183781494812619, "grad_norm": 1.788242988076979, "learning_rate": 8.401769626148537e-07, "loss": 0.6161, "step": 9663 }, { "epoch": 0.8184628414143553, "grad_norm": 2.2428977020396177, "learning_rate": 8.394160967143899e-07, "loss": 0.6646, "step": 9664 }, { "epoch": 0.8185475333474487, "grad_norm": 1.4044558329640937, "learning_rate": 8.386555439198968e-07, "loss": 0.6024, "step": 9665 }, { "epoch": 0.818632225280542, "grad_norm": 1.386428696791109, "learning_rate": 8.378953042886084e-07, "loss": 0.6736, "step": 9666 }, { "epoch": 0.8187169172136354, "grad_norm": 2.4323663422794994, "learning_rate": 8.371353778777397e-07, "loss": 0.6463, "step": 9667 }, { "epoch": 0.8188016091467287, "grad_norm": 1.5870128783170268, "learning_rate": 8.363757647444759e-07, "loss": 0.6209, "step": 9668 }, { "epoch": 0.8188863010798222, "grad_norm": 1.4214717764968017, "learning_rate": 8.356164649459842e-07, "loss": 0.5988, "step": 9669 }, { "epoch": 0.8189709930129155, "grad_norm": 0.6630023043305464, "learning_rate": 8.348574785394026e-07, "loss": 0.8665, "step": 9670 }, { "epoch": 0.8190556849460089, "grad_norm": 0.5934144633261802, "learning_rate": 8.340988055818522e-07, "loss": 0.8567, "step": 9671 }, { "epoch": 0.8191403768791022, "grad_norm": 1.6999971943968175, "learning_rate": 8.333404461304278e-07, "loss": 0.6246, "step": 9672 }, { "epoch": 0.8192250688121956, "grad_norm": 1.2663198308957444, "learning_rate": 8.325824002421968e-07, "loss": 0.5723, "step": 9673 }, { "epoch": 0.819309760745289, "grad_norm": 1.744279421766449, "learning_rate": 8.318246679742081e-07, "loss": 0.6239, "step": 9674 }, { "epoch": 0.8193944526783824, "grad_norm": 1.3777983645373666, "learning_rate": 8.310672493834853e-07, "loss": 0.5859, "step": 9675 }, { "epoch": 0.8194791446114758, "grad_norm": 1.3129810771422206, "learning_rate": 8.303101445270267e-07, "loss": 0.5689, "step": 9676 }, { "epoch": 0.8195638365445691, "grad_norm": 1.457636622147728, "learning_rate": 8.295533534618094e-07, "loss": 0.6478, "step": 9677 }, { "epoch": 0.8196485284776625, "grad_norm": 1.6856699611982378, "learning_rate": 8.287968762447856e-07, "loss": 0.5869, "step": 9678 }, { "epoch": 0.8197332204107559, "grad_norm": 3.6692299501944987, "learning_rate": 8.280407129328843e-07, "loss": 0.6164, "step": 9679 }, { "epoch": 0.8198179123438493, "grad_norm": 1.426744574715131, "learning_rate": 8.272848635830127e-07, "loss": 0.6075, "step": 9680 }, { "epoch": 0.8199026042769426, "grad_norm": 1.506436689307994, "learning_rate": 8.265293282520492e-07, "loss": 0.59, "step": 9681 }, { "epoch": 0.819987296210036, "grad_norm": 1.2567801701779575, "learning_rate": 8.257741069968528e-07, "loss": 0.5755, "step": 9682 }, { "epoch": 0.8200719881431293, "grad_norm": 1.2527413703587023, "learning_rate": 8.250191998742602e-07, "loss": 0.5918, "step": 9683 }, { "epoch": 0.8201566800762228, "grad_norm": 1.3600032166708125, "learning_rate": 8.242646069410793e-07, "loss": 0.6011, "step": 9684 }, { "epoch": 0.8202413720093161, "grad_norm": 1.6597837276958418, "learning_rate": 8.235103282540979e-07, "loss": 0.6168, "step": 9685 }, { "epoch": 0.8203260639424095, "grad_norm": 1.644815434319766, "learning_rate": 8.227563638700797e-07, "loss": 0.607, "step": 9686 }, { "epoch": 0.8204107558755028, "grad_norm": 1.2986907054455443, "learning_rate": 8.220027138457654e-07, "loss": 0.5975, "step": 9687 }, { "epoch": 0.8204954478085962, "grad_norm": 1.8183560731413653, "learning_rate": 8.212493782378711e-07, "loss": 0.6299, "step": 9688 }, { "epoch": 0.8205801397416896, "grad_norm": 1.59714603421766, "learning_rate": 8.204963571030871e-07, "loss": 0.5759, "step": 9689 }, { "epoch": 0.820664831674783, "grad_norm": 1.5974223048750054, "learning_rate": 8.197436504980844e-07, "loss": 0.6235, "step": 9690 }, { "epoch": 0.8207495236078763, "grad_norm": 1.710083147857243, "learning_rate": 8.189912584795073e-07, "loss": 0.612, "step": 9691 }, { "epoch": 0.8208342155409697, "grad_norm": 1.111835824106215, "learning_rate": 8.182391811039775e-07, "loss": 0.6276, "step": 9692 }, { "epoch": 0.820918907474063, "grad_norm": 1.4393139053063861, "learning_rate": 8.174874184280939e-07, "loss": 0.6197, "step": 9693 }, { "epoch": 0.8210035994071565, "grad_norm": 1.0995571171063225, "learning_rate": 8.167359705084282e-07, "loss": 0.5645, "step": 9694 }, { "epoch": 0.8210882913402499, "grad_norm": 0.6110735783703499, "learning_rate": 8.159848374015327e-07, "loss": 0.8625, "step": 9695 }, { "epoch": 0.8211729832733432, "grad_norm": 1.1860374828655813, "learning_rate": 8.152340191639341e-07, "loss": 0.5892, "step": 9696 }, { "epoch": 0.8212576752064366, "grad_norm": 1.5452632659384427, "learning_rate": 8.144835158521341e-07, "loss": 0.6505, "step": 9697 }, { "epoch": 0.8213423671395299, "grad_norm": 1.3461802305651243, "learning_rate": 8.137333275226128e-07, "loss": 0.5866, "step": 9698 }, { "epoch": 0.8214270590726234, "grad_norm": 0.6465520939557564, "learning_rate": 8.12983454231826e-07, "loss": 0.847, "step": 9699 }, { "epoch": 0.8215117510057167, "grad_norm": 1.2538933891758564, "learning_rate": 8.122338960362059e-07, "loss": 0.5672, "step": 9700 }, { "epoch": 0.8215964429388101, "grad_norm": 1.4940031331053238, "learning_rate": 8.11484652992161e-07, "loss": 0.6141, "step": 9701 }, { "epoch": 0.8216811348719034, "grad_norm": 1.5344859739078192, "learning_rate": 8.107357251560743e-07, "loss": 0.6919, "step": 9702 }, { "epoch": 0.8217658268049969, "grad_norm": 1.1513168686042083, "learning_rate": 8.099871125843073e-07, "loss": 0.6227, "step": 9703 }, { "epoch": 0.8218505187380902, "grad_norm": 1.3438617622319466, "learning_rate": 8.092388153331987e-07, "loss": 0.6318, "step": 9704 }, { "epoch": 0.8219352106711836, "grad_norm": 1.481993916826834, "learning_rate": 8.084908334590591e-07, "loss": 0.6079, "step": 9705 }, { "epoch": 0.8220199026042769, "grad_norm": 1.1647333499800963, "learning_rate": 8.077431670181796e-07, "loss": 0.6124, "step": 9706 }, { "epoch": 0.8221045945373703, "grad_norm": 1.3408223921274518, "learning_rate": 8.069958160668256e-07, "loss": 0.6644, "step": 9707 }, { "epoch": 0.8221892864704637, "grad_norm": 1.6253597794336412, "learning_rate": 8.062487806612391e-07, "loss": 0.6355, "step": 9708 }, { "epoch": 0.8222739784035571, "grad_norm": 0.5886155534282714, "learning_rate": 8.055020608576408e-07, "loss": 0.8068, "step": 9709 }, { "epoch": 0.8223586703366504, "grad_norm": 1.3313772423880896, "learning_rate": 8.047556567122217e-07, "loss": 0.5835, "step": 9710 }, { "epoch": 0.8224433622697438, "grad_norm": 1.3937440272273873, "learning_rate": 8.040095682811539e-07, "loss": 0.6257, "step": 9711 }, { "epoch": 0.8225280542028371, "grad_norm": 1.3190987599709516, "learning_rate": 8.032637956205852e-07, "loss": 0.5902, "step": 9712 }, { "epoch": 0.8226127461359306, "grad_norm": 2.8716997156124897, "learning_rate": 8.025183387866393e-07, "loss": 0.6579, "step": 9713 }, { "epoch": 0.822697438069024, "grad_norm": 1.3210525951218166, "learning_rate": 8.017731978354154e-07, "loss": 0.6243, "step": 9714 }, { "epoch": 0.8227821300021173, "grad_norm": 2.1790824649275913, "learning_rate": 8.010283728229884e-07, "loss": 0.6086, "step": 9715 }, { "epoch": 0.8228668219352107, "grad_norm": 1.2181174050281498, "learning_rate": 8.002838638054106e-07, "loss": 0.6282, "step": 9716 }, { "epoch": 0.822951513868304, "grad_norm": 1.590785389329023, "learning_rate": 7.995396708387121e-07, "loss": 0.6, "step": 9717 }, { "epoch": 0.8230362058013975, "grad_norm": 1.2917345139474656, "learning_rate": 7.987957939788942e-07, "loss": 0.562, "step": 9718 }, { "epoch": 0.8231208977344908, "grad_norm": 1.2333285184735094, "learning_rate": 7.980522332819402e-07, "loss": 0.6841, "step": 9719 }, { "epoch": 0.8232055896675842, "grad_norm": 2.5527815853640656, "learning_rate": 7.973089888038049e-07, "loss": 0.6275, "step": 9720 }, { "epoch": 0.8232902816006775, "grad_norm": 1.6903429088553692, "learning_rate": 7.965660606004233e-07, "loss": 0.6721, "step": 9721 }, { "epoch": 0.8233749735337709, "grad_norm": 1.4743609859266689, "learning_rate": 7.958234487277044e-07, "loss": 0.6593, "step": 9722 }, { "epoch": 0.8234596654668643, "grad_norm": 1.4654694491841462, "learning_rate": 7.950811532415326e-07, "loss": 0.6397, "step": 9723 }, { "epoch": 0.8235443573999577, "grad_norm": 1.3557788646099542, "learning_rate": 7.943391741977697e-07, "loss": 0.6197, "step": 9724 }, { "epoch": 0.823629049333051, "grad_norm": 1.3218452385719877, "learning_rate": 7.935975116522554e-07, "loss": 0.6785, "step": 9725 }, { "epoch": 0.8237137412661444, "grad_norm": 1.3262699880830306, "learning_rate": 7.928561656607997e-07, "loss": 0.6331, "step": 9726 }, { "epoch": 0.8237984331992377, "grad_norm": 1.4437789154915481, "learning_rate": 7.921151362791967e-07, "loss": 0.6078, "step": 9727 }, { "epoch": 0.8238831251323312, "grad_norm": 1.4382147581308922, "learning_rate": 7.913744235632126e-07, "loss": 0.659, "step": 9728 }, { "epoch": 0.8239678170654245, "grad_norm": 1.433696232316204, "learning_rate": 7.906340275685881e-07, "loss": 0.6366, "step": 9729 }, { "epoch": 0.8240525089985179, "grad_norm": 1.5530103577564687, "learning_rate": 7.898939483510437e-07, "loss": 0.6755, "step": 9730 }, { "epoch": 0.8241372009316112, "grad_norm": 1.7880798612950168, "learning_rate": 7.891541859662716e-07, "loss": 0.6637, "step": 9731 }, { "epoch": 0.8242218928647046, "grad_norm": 3.6113582351266595, "learning_rate": 7.884147404699449e-07, "loss": 0.6066, "step": 9732 }, { "epoch": 0.8243065847977981, "grad_norm": 1.2135575280310293, "learning_rate": 7.876756119177104e-07, "loss": 0.5638, "step": 9733 }, { "epoch": 0.8243912767308914, "grad_norm": 1.4575563419797866, "learning_rate": 7.869368003651912e-07, "loss": 0.6468, "step": 9734 }, { "epoch": 0.8244759686639848, "grad_norm": 1.4135515567948993, "learning_rate": 7.861983058679873e-07, "loss": 0.6289, "step": 9735 }, { "epoch": 0.8245606605970781, "grad_norm": 1.6450236494410362, "learning_rate": 7.854601284816748e-07, "loss": 0.6553, "step": 9736 }, { "epoch": 0.8246453525301715, "grad_norm": 1.835834375353528, "learning_rate": 7.847222682618039e-07, "loss": 0.6419, "step": 9737 }, { "epoch": 0.8247300444632649, "grad_norm": 1.3827223104431354, "learning_rate": 7.839847252639038e-07, "loss": 0.6282, "step": 9738 }, { "epoch": 0.8248147363963583, "grad_norm": 1.2654410854050537, "learning_rate": 7.832474995434774e-07, "loss": 0.5842, "step": 9739 }, { "epoch": 0.8248994283294516, "grad_norm": 1.347850286467683, "learning_rate": 7.825105911560055e-07, "loss": 0.6039, "step": 9740 }, { "epoch": 0.824984120262545, "grad_norm": 1.8302727377724397, "learning_rate": 7.817740001569441e-07, "loss": 0.6182, "step": 9741 }, { "epoch": 0.8250688121956383, "grad_norm": 1.398906207800703, "learning_rate": 7.810377266017255e-07, "loss": 0.5742, "step": 9742 }, { "epoch": 0.8251535041287318, "grad_norm": 1.4712928571967663, "learning_rate": 7.8030177054576e-07, "loss": 0.6028, "step": 9743 }, { "epoch": 0.8252381960618251, "grad_norm": 1.4769146358880427, "learning_rate": 7.795661320444292e-07, "loss": 0.6721, "step": 9744 }, { "epoch": 0.8253228879949185, "grad_norm": 2.092637346746358, "learning_rate": 7.788308111530951e-07, "loss": 0.6491, "step": 9745 }, { "epoch": 0.8254075799280118, "grad_norm": 1.3528280196647235, "learning_rate": 7.780958079270961e-07, "loss": 0.6556, "step": 9746 }, { "epoch": 0.8254922718611052, "grad_norm": 1.338838073602304, "learning_rate": 7.773611224217415e-07, "loss": 0.5838, "step": 9747 }, { "epoch": 0.8255769637941986, "grad_norm": 1.8078679546843923, "learning_rate": 7.766267546923229e-07, "loss": 0.6333, "step": 9748 }, { "epoch": 0.825661655727292, "grad_norm": 1.4860874945184472, "learning_rate": 7.758927047941062e-07, "loss": 0.5752, "step": 9749 }, { "epoch": 0.8257463476603853, "grad_norm": 1.3761330804397969, "learning_rate": 7.751589727823299e-07, "loss": 0.5431, "step": 9750 }, { "epoch": 0.8258310395934787, "grad_norm": 1.263971484318589, "learning_rate": 7.74425558712214e-07, "loss": 0.6229, "step": 9751 }, { "epoch": 0.825915731526572, "grad_norm": 0.687915559475561, "learning_rate": 7.736924626389491e-07, "loss": 0.8752, "step": 9752 }, { "epoch": 0.8260004234596655, "grad_norm": 1.245149761666757, "learning_rate": 7.729596846177057e-07, "loss": 0.6622, "step": 9753 }, { "epoch": 0.8260851153927589, "grad_norm": 1.6254414951067966, "learning_rate": 7.722272247036289e-07, "loss": 0.675, "step": 9754 }, { "epoch": 0.8261698073258522, "grad_norm": 1.4688944935246229, "learning_rate": 7.714950829518409e-07, "loss": 0.6333, "step": 9755 }, { "epoch": 0.8262544992589456, "grad_norm": 6.374997252812382, "learning_rate": 7.707632594174391e-07, "loss": 0.6687, "step": 9756 }, { "epoch": 0.8263391911920389, "grad_norm": 1.3439246042316122, "learning_rate": 7.700317541554975e-07, "loss": 0.6485, "step": 9757 }, { "epoch": 0.8264238831251324, "grad_norm": 1.3345276251089546, "learning_rate": 7.693005672210646e-07, "loss": 0.6551, "step": 9758 }, { "epoch": 0.8265085750582257, "grad_norm": 1.2229893268383496, "learning_rate": 7.685696986691671e-07, "loss": 0.6386, "step": 9759 }, { "epoch": 0.8265932669913191, "grad_norm": 1.3201246963078128, "learning_rate": 7.67839148554806e-07, "loss": 0.6411, "step": 9760 }, { "epoch": 0.8266779589244124, "grad_norm": 1.4166361892897863, "learning_rate": 7.671089169329582e-07, "loss": 0.6262, "step": 9761 }, { "epoch": 0.8267626508575058, "grad_norm": 1.3998333871699218, "learning_rate": 7.663790038585794e-07, "loss": 0.5964, "step": 9762 }, { "epoch": 0.8268473427905992, "grad_norm": 1.3556542067305362, "learning_rate": 7.656494093865984e-07, "loss": 0.6316, "step": 9763 }, { "epoch": 0.8269320347236926, "grad_norm": 1.548062967717167, "learning_rate": 7.649201335719214e-07, "loss": 0.6356, "step": 9764 }, { "epoch": 0.8270167266567859, "grad_norm": 1.1995503091824598, "learning_rate": 7.641911764694315e-07, "loss": 0.6306, "step": 9765 }, { "epoch": 0.8271014185898793, "grad_norm": 0.7026514702580363, "learning_rate": 7.634625381339838e-07, "loss": 0.875, "step": 9766 }, { "epoch": 0.8271861105229726, "grad_norm": 1.5773538985998725, "learning_rate": 7.627342186204134e-07, "loss": 0.6977, "step": 9767 }, { "epoch": 0.8272708024560661, "grad_norm": 1.8976232071900991, "learning_rate": 7.620062179835308e-07, "loss": 0.6193, "step": 9768 }, { "epoch": 0.8273554943891595, "grad_norm": 1.207297083609878, "learning_rate": 7.612785362781217e-07, "loss": 0.6074, "step": 9769 }, { "epoch": 0.8274401863222528, "grad_norm": 1.8666461047483234, "learning_rate": 7.605511735589488e-07, "loss": 0.6104, "step": 9770 }, { "epoch": 0.8275248782553462, "grad_norm": 1.321399799721201, "learning_rate": 7.598241298807479e-07, "loss": 0.6677, "step": 9771 }, { "epoch": 0.8276095701884395, "grad_norm": 1.4127750685997744, "learning_rate": 7.590974052982353e-07, "loss": 0.6722, "step": 9772 }, { "epoch": 0.827694262121533, "grad_norm": 1.3517451537146106, "learning_rate": 7.583709998660982e-07, "loss": 0.634, "step": 9773 }, { "epoch": 0.8277789540546263, "grad_norm": 1.8928587569773276, "learning_rate": 7.576449136390035e-07, "loss": 0.6499, "step": 9774 }, { "epoch": 0.8278636459877197, "grad_norm": 1.6408609365984808, "learning_rate": 7.569191466715941e-07, "loss": 0.5916, "step": 9775 }, { "epoch": 0.827948337920813, "grad_norm": 1.6261189248838361, "learning_rate": 7.561936990184865e-07, "loss": 0.6779, "step": 9776 }, { "epoch": 0.8280330298539064, "grad_norm": 1.5084915350641583, "learning_rate": 7.554685707342757e-07, "loss": 0.7061, "step": 9777 }, { "epoch": 0.8281177217869998, "grad_norm": 2.0152550008658654, "learning_rate": 7.547437618735315e-07, "loss": 0.6152, "step": 9778 }, { "epoch": 0.8282024137200932, "grad_norm": 1.329973614264055, "learning_rate": 7.54019272490798e-07, "loss": 0.607, "step": 9779 }, { "epoch": 0.8282871056531865, "grad_norm": 2.654795379745168, "learning_rate": 7.532951026405988e-07, "loss": 0.5891, "step": 9780 }, { "epoch": 0.8283717975862799, "grad_norm": 1.3833516537677824, "learning_rate": 7.525712523774292e-07, "loss": 0.6644, "step": 9781 }, { "epoch": 0.8284564895193732, "grad_norm": 1.3293577656108873, "learning_rate": 7.51847721755763e-07, "loss": 0.6277, "step": 9782 }, { "epoch": 0.8285411814524667, "grad_norm": 1.7304012441946401, "learning_rate": 7.511245108300535e-07, "loss": 0.5606, "step": 9783 }, { "epoch": 0.82862587338556, "grad_norm": 2.1323884300089593, "learning_rate": 7.504016196547215e-07, "loss": 0.6531, "step": 9784 }, { "epoch": 0.8287105653186534, "grad_norm": 1.8261618202584178, "learning_rate": 7.496790482841709e-07, "loss": 0.6221, "step": 9785 }, { "epoch": 0.8287952572517467, "grad_norm": 3.098587441658874, "learning_rate": 7.489567967727795e-07, "loss": 0.5928, "step": 9786 }, { "epoch": 0.8288799491848401, "grad_norm": 1.2202216234911802, "learning_rate": 7.482348651748983e-07, "loss": 0.606, "step": 9787 }, { "epoch": 0.8289646411179336, "grad_norm": 1.7311770704123512, "learning_rate": 7.475132535448576e-07, "loss": 0.6316, "step": 9788 }, { "epoch": 0.8290493330510269, "grad_norm": 0.6123902045920251, "learning_rate": 7.467919619369624e-07, "loss": 0.8788, "step": 9789 }, { "epoch": 0.8291340249841203, "grad_norm": 1.4826496298073524, "learning_rate": 7.460709904054941e-07, "loss": 0.6504, "step": 9790 }, { "epoch": 0.8292187169172136, "grad_norm": 0.5824439295057845, "learning_rate": 7.453503390047106e-07, "loss": 0.8122, "step": 9791 }, { "epoch": 0.829303408850307, "grad_norm": 1.42148085374683, "learning_rate": 7.446300077888418e-07, "loss": 0.6292, "step": 9792 }, { "epoch": 0.8293881007834004, "grad_norm": 2.1214301724951565, "learning_rate": 7.439099968120989e-07, "loss": 0.6216, "step": 9793 }, { "epoch": 0.8294727927164938, "grad_norm": 1.3192061091792167, "learning_rate": 7.431903061286666e-07, "loss": 0.6174, "step": 9794 }, { "epoch": 0.8295574846495871, "grad_norm": 0.614988237509679, "learning_rate": 7.424709357927034e-07, "loss": 0.854, "step": 9795 }, { "epoch": 0.8296421765826805, "grad_norm": 0.6710701436340539, "learning_rate": 7.417518858583472e-07, "loss": 0.8659, "step": 9796 }, { "epoch": 0.8297268685157738, "grad_norm": 0.6461775321106608, "learning_rate": 7.410331563797102e-07, "loss": 0.844, "step": 9797 }, { "epoch": 0.8298115604488673, "grad_norm": 1.4451593653903716, "learning_rate": 7.403147474108802e-07, "loss": 0.6481, "step": 9798 }, { "epoch": 0.8298962523819606, "grad_norm": 1.4258091855404909, "learning_rate": 7.395966590059228e-07, "loss": 0.6547, "step": 9799 }, { "epoch": 0.829980944315054, "grad_norm": 1.2100873470565312, "learning_rate": 7.388788912188754e-07, "loss": 0.6274, "step": 9800 }, { "epoch": 0.8300656362481473, "grad_norm": 0.6449220792054445, "learning_rate": 7.381614441037555e-07, "loss": 0.8278, "step": 9801 }, { "epoch": 0.8301503281812407, "grad_norm": 1.9954205434071142, "learning_rate": 7.374443177145546e-07, "loss": 0.696, "step": 9802 }, { "epoch": 0.8302350201143341, "grad_norm": 1.5109515518575645, "learning_rate": 7.367275121052397e-07, "loss": 0.6204, "step": 9803 }, { "epoch": 0.8303197120474275, "grad_norm": 0.5630760603009118, "learning_rate": 7.360110273297555e-07, "loss": 0.8767, "step": 9804 }, { "epoch": 0.8304044039805208, "grad_norm": 0.642869317955592, "learning_rate": 7.352948634420199e-07, "loss": 0.8225, "step": 9805 }, { "epoch": 0.8304890959136142, "grad_norm": 1.2273984816017098, "learning_rate": 7.34579020495928e-07, "loss": 0.6191, "step": 9806 }, { "epoch": 0.8305737878467077, "grad_norm": 2.7931076581115124, "learning_rate": 7.338634985453524e-07, "loss": 0.6647, "step": 9807 }, { "epoch": 0.830658479779801, "grad_norm": 1.454195185318188, "learning_rate": 7.331482976441378e-07, "loss": 0.6226, "step": 9808 }, { "epoch": 0.8307431717128944, "grad_norm": 1.6834074022221688, "learning_rate": 7.324334178461078e-07, "loss": 0.6454, "step": 9809 }, { "epoch": 0.8308278636459877, "grad_norm": 0.6663514029168909, "learning_rate": 7.317188592050611e-07, "loss": 0.8884, "step": 9810 }, { "epoch": 0.8309125555790811, "grad_norm": 2.8123881023365733, "learning_rate": 7.310046217747718e-07, "loss": 0.5884, "step": 9811 }, { "epoch": 0.8309972475121745, "grad_norm": 1.6380559814665183, "learning_rate": 7.302907056089914e-07, "loss": 0.5822, "step": 9812 }, { "epoch": 0.8310819394452679, "grad_norm": 1.7902999481519026, "learning_rate": 7.29577110761443e-07, "loss": 0.6889, "step": 9813 }, { "epoch": 0.8311666313783612, "grad_norm": 2.170428508512935, "learning_rate": 7.288638372858303e-07, "loss": 0.6635, "step": 9814 }, { "epoch": 0.8312513233114546, "grad_norm": 1.5311167301842188, "learning_rate": 7.281508852358321e-07, "loss": 0.5641, "step": 9815 }, { "epoch": 0.8313360152445479, "grad_norm": 1.7405405594344296, "learning_rate": 7.274382546650987e-07, "loss": 0.6662, "step": 9816 }, { "epoch": 0.8314207071776414, "grad_norm": 0.6456561074896477, "learning_rate": 7.267259456272608e-07, "loss": 0.8768, "step": 9817 }, { "epoch": 0.8315053991107347, "grad_norm": 1.2814921169213702, "learning_rate": 7.26013958175924e-07, "loss": 0.6094, "step": 9818 }, { "epoch": 0.8315900910438281, "grad_norm": 0.6427743531483097, "learning_rate": 7.25302292364668e-07, "loss": 0.8357, "step": 9819 }, { "epoch": 0.8316747829769214, "grad_norm": 1.7495950900537922, "learning_rate": 7.24590948247052e-07, "loss": 0.5782, "step": 9820 }, { "epoch": 0.8317594749100148, "grad_norm": 1.3661406944909027, "learning_rate": 7.23879925876605e-07, "loss": 0.6445, "step": 9821 }, { "epoch": 0.8318441668431082, "grad_norm": 1.34773781374547, "learning_rate": 7.231692253068367e-07, "loss": 0.5979, "step": 9822 }, { "epoch": 0.8319288587762016, "grad_norm": 3.170995810722092, "learning_rate": 7.224588465912308e-07, "loss": 0.6811, "step": 9823 }, { "epoch": 0.832013550709295, "grad_norm": 1.3994212237790538, "learning_rate": 7.217487897832476e-07, "loss": 0.6202, "step": 9824 }, { "epoch": 0.8320982426423883, "grad_norm": 1.5340154649548767, "learning_rate": 7.210390549363238e-07, "loss": 0.6147, "step": 9825 }, { "epoch": 0.8321829345754816, "grad_norm": 1.438196989426595, "learning_rate": 7.20329642103868e-07, "loss": 0.6625, "step": 9826 }, { "epoch": 0.8322676265085751, "grad_norm": 1.31510917146786, "learning_rate": 7.196205513392684e-07, "loss": 0.6418, "step": 9827 }, { "epoch": 0.8323523184416685, "grad_norm": 1.4473816681528713, "learning_rate": 7.189117826958891e-07, "loss": 0.6407, "step": 9828 }, { "epoch": 0.8324370103747618, "grad_norm": 3.327243715057077, "learning_rate": 7.182033362270669e-07, "loss": 0.6347, "step": 9829 }, { "epoch": 0.8325217023078552, "grad_norm": 3.108819291556286, "learning_rate": 7.174952119861162e-07, "loss": 0.6974, "step": 9830 }, { "epoch": 0.8326063942409485, "grad_norm": 1.260479012381066, "learning_rate": 7.167874100263284e-07, "loss": 0.6199, "step": 9831 }, { "epoch": 0.832691086174042, "grad_norm": 1.464632044091994, "learning_rate": 7.16079930400968e-07, "loss": 0.6321, "step": 9832 }, { "epoch": 0.8327757781071353, "grad_norm": 1.3116953767422181, "learning_rate": 7.153727731632787e-07, "loss": 0.6074, "step": 9833 }, { "epoch": 0.8328604700402287, "grad_norm": 1.1247392075938554, "learning_rate": 7.146659383664756e-07, "loss": 0.5968, "step": 9834 }, { "epoch": 0.832945161973322, "grad_norm": 1.6302156340597813, "learning_rate": 7.139594260637522e-07, "loss": 0.6332, "step": 9835 }, { "epoch": 0.8330298539064154, "grad_norm": 1.4070198079599219, "learning_rate": 7.132532363082784e-07, "loss": 0.6555, "step": 9836 }, { "epoch": 0.8331145458395088, "grad_norm": 1.4020214704537926, "learning_rate": 7.125473691531976e-07, "loss": 0.6326, "step": 9837 }, { "epoch": 0.8331992377726022, "grad_norm": 0.6951962334287626, "learning_rate": 7.11841824651629e-07, "loss": 0.8623, "step": 9838 }, { "epoch": 0.8332839297056955, "grad_norm": 2.445609114153839, "learning_rate": 7.111366028566718e-07, "loss": 0.6307, "step": 9839 }, { "epoch": 0.8333686216387889, "grad_norm": 2.1576371953929, "learning_rate": 7.104317038213954e-07, "loss": 0.6221, "step": 9840 }, { "epoch": 0.8334533135718822, "grad_norm": 1.1288867942588494, "learning_rate": 7.097271275988482e-07, "loss": 0.5697, "step": 9841 }, { "epoch": 0.8335380055049757, "grad_norm": 1.9240492567785659, "learning_rate": 7.090228742420518e-07, "loss": 0.6503, "step": 9842 }, { "epoch": 0.833622697438069, "grad_norm": 1.4808280563392027, "learning_rate": 7.083189438040062e-07, "loss": 0.6409, "step": 9843 }, { "epoch": 0.8337073893711624, "grad_norm": 1.2250104444235905, "learning_rate": 7.076153363376853e-07, "loss": 0.6015, "step": 9844 }, { "epoch": 0.8337920813042557, "grad_norm": 0.6017964930844328, "learning_rate": 7.069120518960399e-07, "loss": 0.8501, "step": 9845 }, { "epoch": 0.8338767732373491, "grad_norm": 1.3733424188000545, "learning_rate": 7.062090905319963e-07, "loss": 0.6691, "step": 9846 }, { "epoch": 0.8339614651704426, "grad_norm": 1.1337847872576663, "learning_rate": 7.055064522984545e-07, "loss": 0.6301, "step": 9847 }, { "epoch": 0.8340461571035359, "grad_norm": 1.1998474701131647, "learning_rate": 7.048041372482922e-07, "loss": 0.6338, "step": 9848 }, { "epoch": 0.8341308490366293, "grad_norm": 1.893302282086908, "learning_rate": 7.041021454343638e-07, "loss": 0.5927, "step": 9849 }, { "epoch": 0.8342155409697226, "grad_norm": 1.752412701827976, "learning_rate": 7.034004769094965e-07, "loss": 0.5839, "step": 9850 }, { "epoch": 0.834300232902816, "grad_norm": 1.214934784646518, "learning_rate": 7.026991317264942e-07, "loss": 0.6109, "step": 9851 }, { "epoch": 0.8343849248359094, "grad_norm": 1.1295420427098553, "learning_rate": 7.019981099381378e-07, "loss": 0.6408, "step": 9852 }, { "epoch": 0.8344696167690028, "grad_norm": 1.5127409241417022, "learning_rate": 7.012974115971821e-07, "loss": 0.6434, "step": 9853 }, { "epoch": 0.8345543087020961, "grad_norm": 1.192921583268597, "learning_rate": 7.005970367563608e-07, "loss": 0.6862, "step": 9854 }, { "epoch": 0.8346390006351895, "grad_norm": 1.3487707305473546, "learning_rate": 6.998969854683774e-07, "loss": 0.6581, "step": 9855 }, { "epoch": 0.8347236925682828, "grad_norm": 1.2873752868258475, "learning_rate": 6.991972577859157e-07, "loss": 0.6519, "step": 9856 }, { "epoch": 0.8348083845013763, "grad_norm": 1.2365733493346411, "learning_rate": 6.984978537616355e-07, "loss": 0.6301, "step": 9857 }, { "epoch": 0.8348930764344696, "grad_norm": 1.5356319162873289, "learning_rate": 6.977987734481673e-07, "loss": 0.6831, "step": 9858 }, { "epoch": 0.834977768367563, "grad_norm": 1.4461815554503066, "learning_rate": 6.971000168981235e-07, "loss": 0.5967, "step": 9859 }, { "epoch": 0.8350624603006563, "grad_norm": 1.5107923311028482, "learning_rate": 6.964015841640898e-07, "loss": 0.6932, "step": 9860 }, { "epoch": 0.8351471522337497, "grad_norm": 1.2595934793154262, "learning_rate": 6.957034752986242e-07, "loss": 0.6735, "step": 9861 }, { "epoch": 0.8352318441668432, "grad_norm": 1.4388708796126277, "learning_rate": 6.950056903542657e-07, "loss": 0.7516, "step": 9862 }, { "epoch": 0.8353165360999365, "grad_norm": 1.859728753732375, "learning_rate": 6.943082293835235e-07, "loss": 0.681, "step": 9863 }, { "epoch": 0.8354012280330299, "grad_norm": 1.5491160793840038, "learning_rate": 6.936110924388873e-07, "loss": 0.6221, "step": 9864 }, { "epoch": 0.8354859199661232, "grad_norm": 1.4666701523627266, "learning_rate": 6.929142795728195e-07, "loss": 0.6836, "step": 9865 }, { "epoch": 0.8355706118992166, "grad_norm": 1.6942937682736505, "learning_rate": 6.922177908377592e-07, "loss": 0.6364, "step": 9866 }, { "epoch": 0.83565530383231, "grad_norm": 0.5745410497701641, "learning_rate": 6.915216262861207e-07, "loss": 0.8384, "step": 9867 }, { "epoch": 0.8357399957654034, "grad_norm": 1.2672571606787721, "learning_rate": 6.90825785970296e-07, "loss": 0.6188, "step": 9868 }, { "epoch": 0.8358246876984967, "grad_norm": 0.6225567977659648, "learning_rate": 6.901302699426477e-07, "loss": 0.8542, "step": 9869 }, { "epoch": 0.8359093796315901, "grad_norm": 1.3966690648579254, "learning_rate": 6.894350782555192e-07, "loss": 0.6102, "step": 9870 }, { "epoch": 0.8359940715646834, "grad_norm": 1.562746511125346, "learning_rate": 6.887402109612263e-07, "loss": 0.6554, "step": 9871 }, { "epoch": 0.8360787634977769, "grad_norm": 1.4033767234034809, "learning_rate": 6.880456681120612e-07, "loss": 0.6524, "step": 9872 }, { "epoch": 0.8361634554308702, "grad_norm": 1.1207478354515759, "learning_rate": 6.873514497602924e-07, "loss": 0.6731, "step": 9873 }, { "epoch": 0.8362481473639636, "grad_norm": 2.0879273780804524, "learning_rate": 6.866575559581635e-07, "loss": 0.6201, "step": 9874 }, { "epoch": 0.8363328392970569, "grad_norm": 1.3258670039232126, "learning_rate": 6.859639867578937e-07, "loss": 0.6453, "step": 9875 }, { "epoch": 0.8364175312301503, "grad_norm": 2.0032577866014174, "learning_rate": 6.852707422116794e-07, "loss": 0.6695, "step": 9876 }, { "epoch": 0.8365022231632437, "grad_norm": 1.8600309247304458, "learning_rate": 6.845778223716876e-07, "loss": 0.6085, "step": 9877 }, { "epoch": 0.8365869150963371, "grad_norm": 1.664233819953429, "learning_rate": 6.838852272900676e-07, "loss": 0.6165, "step": 9878 }, { "epoch": 0.8366716070294304, "grad_norm": 1.2737119394694691, "learning_rate": 6.831929570189366e-07, "loss": 0.6413, "step": 9879 }, { "epoch": 0.8367562989625238, "grad_norm": 4.7579878992115905, "learning_rate": 6.825010116103953e-07, "loss": 0.6137, "step": 9880 }, { "epoch": 0.8368409908956171, "grad_norm": 1.2273387721173397, "learning_rate": 6.818093911165163e-07, "loss": 0.5905, "step": 9881 }, { "epoch": 0.8369256828287106, "grad_norm": 2.0388869550091155, "learning_rate": 6.811180955893454e-07, "loss": 0.6475, "step": 9882 }, { "epoch": 0.837010374761804, "grad_norm": 1.5225173388749162, "learning_rate": 6.804271250809086e-07, "loss": 0.6114, "step": 9883 }, { "epoch": 0.8370950666948973, "grad_norm": 1.2527466111641996, "learning_rate": 6.797364796432021e-07, "loss": 0.6405, "step": 9884 }, { "epoch": 0.8371797586279907, "grad_norm": 1.5030461138427853, "learning_rate": 6.790461593282033e-07, "loss": 0.6376, "step": 9885 }, { "epoch": 0.837264450561084, "grad_norm": 1.7022191478545652, "learning_rate": 6.783561641878611e-07, "loss": 0.6472, "step": 9886 }, { "epoch": 0.8373491424941775, "grad_norm": 1.3791484079804217, "learning_rate": 6.776664942741018e-07, "loss": 0.6672, "step": 9887 }, { "epoch": 0.8374338344272708, "grad_norm": 1.5250181758628292, "learning_rate": 6.769771496388267e-07, "loss": 0.6681, "step": 9888 }, { "epoch": 0.8375185263603642, "grad_norm": 1.3033081899941594, "learning_rate": 6.762881303339136e-07, "loss": 0.6016, "step": 9889 }, { "epoch": 0.8376032182934575, "grad_norm": 0.6248809855853641, "learning_rate": 6.755994364112123e-07, "loss": 0.8368, "step": 9890 }, { "epoch": 0.8376879102265509, "grad_norm": 1.6399404387362098, "learning_rate": 6.74911067922554e-07, "loss": 0.611, "step": 9891 }, { "epoch": 0.8377726021596443, "grad_norm": 1.2056976503323669, "learning_rate": 6.74223024919739e-07, "loss": 0.6326, "step": 9892 }, { "epoch": 0.8378572940927377, "grad_norm": 1.8291646285686847, "learning_rate": 6.73535307454547e-07, "loss": 0.6519, "step": 9893 }, { "epoch": 0.837941986025831, "grad_norm": 1.2003643493717242, "learning_rate": 6.728479155787331e-07, "loss": 0.5591, "step": 9894 }, { "epoch": 0.8380266779589244, "grad_norm": 0.6593258870439268, "learning_rate": 6.721608493440274e-07, "loss": 0.848, "step": 9895 }, { "epoch": 0.8381113698920177, "grad_norm": 1.5421005905338074, "learning_rate": 6.714741088021343e-07, "loss": 0.6369, "step": 9896 }, { "epoch": 0.8381960618251112, "grad_norm": 2.4005427318040087, "learning_rate": 6.707876940047364e-07, "loss": 0.6139, "step": 9897 }, { "epoch": 0.8382807537582045, "grad_norm": 1.2509805619606182, "learning_rate": 6.701016050034875e-07, "loss": 0.6367, "step": 9898 }, { "epoch": 0.8383654456912979, "grad_norm": 1.339707222227281, "learning_rate": 6.694158418500207e-07, "loss": 0.6286, "step": 9899 }, { "epoch": 0.8384501376243912, "grad_norm": 1.3237479816539512, "learning_rate": 6.687304045959436e-07, "loss": 0.5852, "step": 9900 }, { "epoch": 0.8385348295574846, "grad_norm": 1.4781065470739883, "learning_rate": 6.680452932928383e-07, "loss": 0.6353, "step": 9901 }, { "epoch": 0.8386195214905781, "grad_norm": 3.5160270174834705, "learning_rate": 6.673605079922652e-07, "loss": 0.575, "step": 9902 }, { "epoch": 0.8387042134236714, "grad_norm": 1.196371213430213, "learning_rate": 6.666760487457546e-07, "loss": 0.612, "step": 9903 }, { "epoch": 0.8387889053567648, "grad_norm": 2.333407769141895, "learning_rate": 6.659919156048178e-07, "loss": 0.59, "step": 9904 }, { "epoch": 0.8388735972898581, "grad_norm": 6.849328175003465, "learning_rate": 6.653081086209395e-07, "loss": 0.6207, "step": 9905 }, { "epoch": 0.8389582892229515, "grad_norm": 1.4081894718843744, "learning_rate": 6.646246278455787e-07, "loss": 0.6065, "step": 9906 }, { "epoch": 0.8390429811560449, "grad_norm": 1.286945188915627, "learning_rate": 6.639414733301719e-07, "loss": 0.6372, "step": 9907 }, { "epoch": 0.8391276730891383, "grad_norm": 1.3220747000644557, "learning_rate": 6.632586451261291e-07, "loss": 0.6187, "step": 9908 }, { "epoch": 0.8392123650222316, "grad_norm": 1.3351510921292185, "learning_rate": 6.625761432848377e-07, "loss": 0.5983, "step": 9909 }, { "epoch": 0.839297056955325, "grad_norm": 1.4109909596751062, "learning_rate": 6.618939678576602e-07, "loss": 0.615, "step": 9910 }, { "epoch": 0.8393817488884184, "grad_norm": 1.5054963975856337, "learning_rate": 6.61212118895932e-07, "loss": 0.6377, "step": 9911 }, { "epoch": 0.8394664408215118, "grad_norm": 1.33288476043821, "learning_rate": 6.605305964509678e-07, "loss": 0.6213, "step": 9912 }, { "epoch": 0.8395511327546051, "grad_norm": 1.3982629540171985, "learning_rate": 6.598494005740536e-07, "loss": 0.6181, "step": 9913 }, { "epoch": 0.8396358246876985, "grad_norm": 1.2636829885745546, "learning_rate": 6.591685313164537e-07, "loss": 0.6437, "step": 9914 }, { "epoch": 0.8397205166207918, "grad_norm": 1.3088768986097918, "learning_rate": 6.584879887294094e-07, "loss": 0.6457, "step": 9915 }, { "epoch": 0.8398052085538853, "grad_norm": 3.0337276733633756, "learning_rate": 6.578077728641319e-07, "loss": 0.5984, "step": 9916 }, { "epoch": 0.8398899004869786, "grad_norm": 1.372909956283063, "learning_rate": 6.571278837718131e-07, "loss": 0.6094, "step": 9917 }, { "epoch": 0.839974592420072, "grad_norm": 3.5027650206804632, "learning_rate": 6.564483215036183e-07, "loss": 0.6236, "step": 9918 }, { "epoch": 0.8400592843531653, "grad_norm": 1.4432833869781552, "learning_rate": 6.557690861106864e-07, "loss": 0.6439, "step": 9919 }, { "epoch": 0.8401439762862587, "grad_norm": 1.4830921311507415, "learning_rate": 6.550901776441348e-07, "loss": 0.6555, "step": 9920 }, { "epoch": 0.8402286682193522, "grad_norm": 2.0242784849564317, "learning_rate": 6.544115961550545e-07, "loss": 0.5732, "step": 9921 }, { "epoch": 0.8403133601524455, "grad_norm": 1.6089882621744735, "learning_rate": 6.537333416945129e-07, "loss": 0.5667, "step": 9922 }, { "epoch": 0.8403980520855389, "grad_norm": 1.8947480246416464, "learning_rate": 6.530554143135525e-07, "loss": 0.6027, "step": 9923 }, { "epoch": 0.8404827440186322, "grad_norm": 0.6267926472957085, "learning_rate": 6.523778140631898e-07, "loss": 0.8319, "step": 9924 }, { "epoch": 0.8405674359517256, "grad_norm": 1.3628353284891115, "learning_rate": 6.517005409944183e-07, "loss": 0.5622, "step": 9925 }, { "epoch": 0.840652127884819, "grad_norm": 1.4928579454626851, "learning_rate": 6.510235951582073e-07, "loss": 0.6721, "step": 9926 }, { "epoch": 0.8407368198179124, "grad_norm": 0.5917296917602348, "learning_rate": 6.503469766054987e-07, "loss": 0.8546, "step": 9927 }, { "epoch": 0.8408215117510057, "grad_norm": 1.3803707539750194, "learning_rate": 6.496706853872126e-07, "loss": 0.6059, "step": 9928 }, { "epoch": 0.8409062036840991, "grad_norm": 1.588232004666437, "learning_rate": 6.489947215542431e-07, "loss": 0.6805, "step": 9929 }, { "epoch": 0.8409908956171924, "grad_norm": 1.3821087446979292, "learning_rate": 6.483190851574611e-07, "loss": 0.6229, "step": 9930 }, { "epoch": 0.8410755875502859, "grad_norm": 2.0201572077551333, "learning_rate": 6.476437762477116e-07, "loss": 0.6085, "step": 9931 }, { "epoch": 0.8411602794833792, "grad_norm": 1.7175933694275478, "learning_rate": 6.469687948758141e-07, "loss": 0.6392, "step": 9932 }, { "epoch": 0.8412449714164726, "grad_norm": 1.5936448831032664, "learning_rate": 6.462941410925655e-07, "loss": 0.6196, "step": 9933 }, { "epoch": 0.8413296633495659, "grad_norm": 1.4050120119291876, "learning_rate": 6.456198149487364e-07, "loss": 0.6664, "step": 9934 }, { "epoch": 0.8414143552826593, "grad_norm": 1.4546006427800127, "learning_rate": 6.449458164950734e-07, "loss": 0.637, "step": 9935 }, { "epoch": 0.8414990472157527, "grad_norm": 1.1186691632490888, "learning_rate": 6.442721457823003e-07, "loss": 0.6283, "step": 9936 }, { "epoch": 0.8415837391488461, "grad_norm": 1.2459597424089188, "learning_rate": 6.435988028611118e-07, "loss": 0.6345, "step": 9937 }, { "epoch": 0.8416684310819394, "grad_norm": 3.22467849128504, "learning_rate": 6.429257877821815e-07, "loss": 0.6154, "step": 9938 }, { "epoch": 0.8417531230150328, "grad_norm": 1.2049320927359402, "learning_rate": 6.42253100596159e-07, "loss": 0.6348, "step": 9939 }, { "epoch": 0.8418378149481262, "grad_norm": 1.2735459796668398, "learning_rate": 6.415807413536646e-07, "loss": 0.6372, "step": 9940 }, { "epoch": 0.8419225068812196, "grad_norm": 1.5089606151976402, "learning_rate": 6.40908710105298e-07, "loss": 0.6366, "step": 9941 }, { "epoch": 0.842007198814313, "grad_norm": 2.0661725838945997, "learning_rate": 6.40237006901634e-07, "loss": 0.6178, "step": 9942 }, { "epoch": 0.8420918907474063, "grad_norm": 1.2280031706291714, "learning_rate": 6.395656317932209e-07, "loss": 0.6273, "step": 9943 }, { "epoch": 0.8421765826804997, "grad_norm": 1.6914458125934675, "learning_rate": 6.388945848305849e-07, "loss": 0.6532, "step": 9944 }, { "epoch": 0.842261274613593, "grad_norm": 1.1378895994804468, "learning_rate": 6.382238660642237e-07, "loss": 0.6408, "step": 9945 }, { "epoch": 0.8423459665466865, "grad_norm": 1.2986894866307124, "learning_rate": 6.375534755446128e-07, "loss": 0.6119, "step": 9946 }, { "epoch": 0.8424306584797798, "grad_norm": 2.4906139948460164, "learning_rate": 6.368834133222046e-07, "loss": 0.6087, "step": 9947 }, { "epoch": 0.8425153504128732, "grad_norm": 1.4313414857476903, "learning_rate": 6.362136794474217e-07, "loss": 0.6438, "step": 9948 }, { "epoch": 0.8426000423459665, "grad_norm": 3.0197432901678978, "learning_rate": 6.355442739706674e-07, "loss": 0.6333, "step": 9949 }, { "epoch": 0.8426847342790599, "grad_norm": 1.2935343405403121, "learning_rate": 6.348751969423167e-07, "loss": 0.5868, "step": 9950 }, { "epoch": 0.8427694262121533, "grad_norm": 2.389253997071988, "learning_rate": 6.342064484127225e-07, "loss": 0.5917, "step": 9951 }, { "epoch": 0.8428541181452467, "grad_norm": 1.8158427992541797, "learning_rate": 6.335380284322118e-07, "loss": 0.6604, "step": 9952 }, { "epoch": 0.84293881007834, "grad_norm": 1.409051601164207, "learning_rate": 6.328699370510849e-07, "loss": 0.6674, "step": 9953 }, { "epoch": 0.8430235020114334, "grad_norm": 1.2457376738434593, "learning_rate": 6.322021743196205e-07, "loss": 0.629, "step": 9954 }, { "epoch": 0.8431081939445267, "grad_norm": 1.3980735701339855, "learning_rate": 6.315347402880706e-07, "loss": 0.6489, "step": 9955 }, { "epoch": 0.8431928858776202, "grad_norm": 1.4579998400775054, "learning_rate": 6.308676350066639e-07, "loss": 0.6084, "step": 9956 }, { "epoch": 0.8432775778107136, "grad_norm": 1.4067475362733612, "learning_rate": 6.302008585256048e-07, "loss": 0.5843, "step": 9957 }, { "epoch": 0.8433622697438069, "grad_norm": 1.4474468467445993, "learning_rate": 6.295344108950685e-07, "loss": 0.6139, "step": 9958 }, { "epoch": 0.8434469616769003, "grad_norm": 1.335159927873121, "learning_rate": 6.288682921652106e-07, "loss": 0.5853, "step": 9959 }, { "epoch": 0.8435316536099936, "grad_norm": 1.3904753567062655, "learning_rate": 6.282025023861615e-07, "loss": 0.6947, "step": 9960 }, { "epoch": 0.8436163455430871, "grad_norm": 1.2574188106333395, "learning_rate": 6.275370416080223e-07, "loss": 0.5753, "step": 9961 }, { "epoch": 0.8437010374761804, "grad_norm": 1.3913407555938004, "learning_rate": 6.268719098808745e-07, "loss": 0.5984, "step": 9962 }, { "epoch": 0.8437857294092738, "grad_norm": 1.4146383920770376, "learning_rate": 6.26207107254772e-07, "loss": 0.6623, "step": 9963 }, { "epoch": 0.8438704213423671, "grad_norm": 2.0873392399722133, "learning_rate": 6.255426337797449e-07, "loss": 0.6271, "step": 9964 }, { "epoch": 0.8439551132754605, "grad_norm": 0.6591900920598681, "learning_rate": 6.248784895057996e-07, "loss": 0.8634, "step": 9965 }, { "epoch": 0.8440398052085539, "grad_norm": 1.4185350050213819, "learning_rate": 6.242146744829147e-07, "loss": 0.5952, "step": 9966 }, { "epoch": 0.8441244971416473, "grad_norm": 1.2642361478563577, "learning_rate": 6.235511887610457e-07, "loss": 0.6073, "step": 9967 }, { "epoch": 0.8442091890747406, "grad_norm": 1.8420543368120452, "learning_rate": 6.228880323901254e-07, "loss": 0.6148, "step": 9968 }, { "epoch": 0.844293881007834, "grad_norm": 1.353051144394839, "learning_rate": 6.222252054200578e-07, "loss": 0.6101, "step": 9969 }, { "epoch": 0.8443785729409273, "grad_norm": 1.7889078057282466, "learning_rate": 6.215627079007235e-07, "loss": 0.6214, "step": 9970 }, { "epoch": 0.8444632648740208, "grad_norm": 1.672083745086402, "learning_rate": 6.209005398819828e-07, "loss": 0.6089, "step": 9971 }, { "epoch": 0.8445479568071141, "grad_norm": 1.432237720535855, "learning_rate": 6.20238701413664e-07, "loss": 0.6151, "step": 9972 }, { "epoch": 0.8446326487402075, "grad_norm": 1.2688632783885558, "learning_rate": 6.195771925455756e-07, "loss": 0.6701, "step": 9973 }, { "epoch": 0.8447173406733008, "grad_norm": 6.283099322148788, "learning_rate": 6.18916013327498e-07, "loss": 0.7157, "step": 9974 }, { "epoch": 0.8448020326063942, "grad_norm": 1.4703067081619892, "learning_rate": 6.182551638091888e-07, "loss": 0.6495, "step": 9975 }, { "epoch": 0.8448867245394877, "grad_norm": 1.2652430010881102, "learning_rate": 6.175946440403818e-07, "loss": 0.6031, "step": 9976 }, { "epoch": 0.844971416472581, "grad_norm": 1.7004847081944188, "learning_rate": 6.169344540707834e-07, "loss": 0.5603, "step": 9977 }, { "epoch": 0.8450561084056744, "grad_norm": 1.2803689046460531, "learning_rate": 6.162745939500764e-07, "loss": 0.6515, "step": 9978 }, { "epoch": 0.8451408003387677, "grad_norm": 1.5754632885955167, "learning_rate": 6.156150637279207e-07, "loss": 0.6543, "step": 9979 }, { "epoch": 0.8452254922718611, "grad_norm": 1.4776277711080992, "learning_rate": 6.149558634539466e-07, "loss": 0.6024, "step": 9980 }, { "epoch": 0.8453101842049545, "grad_norm": 1.5053703256464197, "learning_rate": 6.142969931777648e-07, "loss": 0.5951, "step": 9981 }, { "epoch": 0.8453948761380479, "grad_norm": 1.4748149325483175, "learning_rate": 6.136384529489564e-07, "loss": 0.5705, "step": 9982 }, { "epoch": 0.8454795680711412, "grad_norm": 0.5574305410877264, "learning_rate": 6.129802428170817e-07, "loss": 0.8601, "step": 9983 }, { "epoch": 0.8455642600042346, "grad_norm": 1.3458852528776168, "learning_rate": 6.123223628316738e-07, "loss": 0.6381, "step": 9984 }, { "epoch": 0.8456489519373279, "grad_norm": 0.5440430584517197, "learning_rate": 6.11664813042242e-07, "loss": 0.7987, "step": 9985 }, { "epoch": 0.8457336438704214, "grad_norm": 1.6454216057199385, "learning_rate": 6.110075934982712e-07, "loss": 0.6328, "step": 9986 }, { "epoch": 0.8458183358035147, "grad_norm": 1.4419214025063114, "learning_rate": 6.10350704249219e-07, "loss": 0.6369, "step": 9987 }, { "epoch": 0.8459030277366081, "grad_norm": 1.2676009791973906, "learning_rate": 6.09694145344521e-07, "loss": 0.6324, "step": 9988 }, { "epoch": 0.8459877196697014, "grad_norm": 0.5616382730432486, "learning_rate": 6.09037916833587e-07, "loss": 0.8258, "step": 9989 }, { "epoch": 0.8460724116027948, "grad_norm": 0.6339416728632371, "learning_rate": 6.083820187657985e-07, "loss": 0.8353, "step": 9990 }, { "epoch": 0.8461571035358882, "grad_norm": 1.3111784172880603, "learning_rate": 6.077264511905196e-07, "loss": 0.6, "step": 9991 }, { "epoch": 0.8462417954689816, "grad_norm": 1.2013001591639274, "learning_rate": 6.070712141570839e-07, "loss": 0.6289, "step": 9992 }, { "epoch": 0.846326487402075, "grad_norm": 1.5846473142008057, "learning_rate": 6.064163077148e-07, "loss": 0.6819, "step": 9993 }, { "epoch": 0.8464111793351683, "grad_norm": 0.5429806578402656, "learning_rate": 6.057617319129555e-07, "loss": 0.7833, "step": 9994 }, { "epoch": 0.8464958712682616, "grad_norm": 1.7333604749364102, "learning_rate": 6.051074868008078e-07, "loss": 0.5649, "step": 9995 }, { "epoch": 0.8465805632013551, "grad_norm": 1.30832122996004, "learning_rate": 6.04453572427594e-07, "loss": 0.6511, "step": 9996 }, { "epoch": 0.8466652551344485, "grad_norm": 2.7433614664713826, "learning_rate": 6.037999888425244e-07, "loss": 0.6249, "step": 9997 }, { "epoch": 0.8467499470675418, "grad_norm": 1.4009262389655242, "learning_rate": 6.031467360947846e-07, "loss": 0.6724, "step": 9998 }, { "epoch": 0.8468346390006352, "grad_norm": 0.6765204971854362, "learning_rate": 6.024938142335357e-07, "loss": 0.8801, "step": 9999 }, { "epoch": 0.8469193309337285, "grad_norm": 1.4404397208866533, "learning_rate": 6.018412233079135e-07, "loss": 0.6357, "step": 10000 }, { "epoch": 0.847004022866822, "grad_norm": 0.6220467753431108, "learning_rate": 6.011889633670281e-07, "loss": 0.8529, "step": 10001 }, { "epoch": 0.8470887147999153, "grad_norm": 1.2588792935193591, "learning_rate": 6.00537034459967e-07, "loss": 0.6359, "step": 10002 }, { "epoch": 0.8471734067330087, "grad_norm": 1.2116229940240453, "learning_rate": 5.998854366357893e-07, "loss": 0.7016, "step": 10003 }, { "epoch": 0.847258098666102, "grad_norm": 0.6236412519981132, "learning_rate": 5.992341699435317e-07, "loss": 0.8179, "step": 10004 }, { "epoch": 0.8473427905991954, "grad_norm": 0.7000956691385697, "learning_rate": 5.985832344322062e-07, "loss": 0.8142, "step": 10005 }, { "epoch": 0.8474274825322888, "grad_norm": 1.6815309509843317, "learning_rate": 5.979326301507993e-07, "loss": 0.5638, "step": 10006 }, { "epoch": 0.8475121744653822, "grad_norm": 2.157216965294381, "learning_rate": 5.972823571482717e-07, "loss": 0.6049, "step": 10007 }, { "epoch": 0.8475968663984755, "grad_norm": 1.3770344170371824, "learning_rate": 5.966324154735614e-07, "loss": 0.5979, "step": 10008 }, { "epoch": 0.8476815583315689, "grad_norm": 1.7077979763468099, "learning_rate": 5.959828051755778e-07, "loss": 0.5707, "step": 10009 }, { "epoch": 0.8477662502646622, "grad_norm": 1.287074059628384, "learning_rate": 5.953335263032095e-07, "loss": 0.6194, "step": 10010 }, { "epoch": 0.8478509421977557, "grad_norm": 0.6624084618419588, "learning_rate": 5.946845789053146e-07, "loss": 0.8992, "step": 10011 }, { "epoch": 0.847935634130849, "grad_norm": 1.4387958252275883, "learning_rate": 5.940359630307341e-07, "loss": 0.6233, "step": 10012 }, { "epoch": 0.8480203260639424, "grad_norm": 0.7186700665971759, "learning_rate": 5.933876787282788e-07, "loss": 0.8611, "step": 10013 }, { "epoch": 0.8481050179970357, "grad_norm": 0.6251386480869892, "learning_rate": 5.927397260467338e-07, "loss": 0.8087, "step": 10014 }, { "epoch": 0.8481897099301292, "grad_norm": 1.8181674656423847, "learning_rate": 5.920921050348627e-07, "loss": 0.6301, "step": 10015 }, { "epoch": 0.8482744018632226, "grad_norm": 1.1519866938624894, "learning_rate": 5.91444815741401e-07, "loss": 0.6115, "step": 10016 }, { "epoch": 0.8483590937963159, "grad_norm": 2.526544425203956, "learning_rate": 5.907978582150614e-07, "loss": 0.6599, "step": 10017 }, { "epoch": 0.8484437857294093, "grad_norm": 1.4164391568259673, "learning_rate": 5.901512325045305e-07, "loss": 0.614, "step": 10018 }, { "epoch": 0.8485284776625026, "grad_norm": 1.5664945133803627, "learning_rate": 5.895049386584712e-07, "loss": 0.5904, "step": 10019 }, { "epoch": 0.8486131695955961, "grad_norm": 1.4252907815141236, "learning_rate": 5.888589767255193e-07, "loss": 0.5674, "step": 10020 }, { "epoch": 0.8486978615286894, "grad_norm": 1.6351956073342184, "learning_rate": 5.882133467542888e-07, "loss": 0.5461, "step": 10021 }, { "epoch": 0.8487825534617828, "grad_norm": 1.6749713949411305, "learning_rate": 5.875680487933643e-07, "loss": 0.5562, "step": 10022 }, { "epoch": 0.8488672453948761, "grad_norm": 1.3038931324400327, "learning_rate": 5.869230828913102e-07, "loss": 0.5983, "step": 10023 }, { "epoch": 0.8489519373279695, "grad_norm": 1.6790620893853838, "learning_rate": 5.862784490966611e-07, "loss": 0.6531, "step": 10024 }, { "epoch": 0.8490366292610629, "grad_norm": 1.2447520264151346, "learning_rate": 5.856341474579308e-07, "loss": 0.6134, "step": 10025 }, { "epoch": 0.8491213211941563, "grad_norm": 2.5186333765527222, "learning_rate": 5.849901780236061e-07, "loss": 0.6759, "step": 10026 }, { "epoch": 0.8492060131272496, "grad_norm": 2.0286490695113164, "learning_rate": 5.84346540842149e-07, "loss": 0.6651, "step": 10027 }, { "epoch": 0.849290705060343, "grad_norm": 1.631448693377129, "learning_rate": 5.83703235961997e-07, "loss": 0.5925, "step": 10028 }, { "epoch": 0.8493753969934363, "grad_norm": 1.8524160192037724, "learning_rate": 5.830602634315624e-07, "loss": 0.6017, "step": 10029 }, { "epoch": 0.8494600889265298, "grad_norm": 1.627287924751111, "learning_rate": 5.824176232992312e-07, "loss": 0.6228, "step": 10030 }, { "epoch": 0.8495447808596231, "grad_norm": 1.3976526988357536, "learning_rate": 5.81775315613366e-07, "loss": 0.6519, "step": 10031 }, { "epoch": 0.8496294727927165, "grad_norm": 0.6335689938157961, "learning_rate": 5.811333404223035e-07, "loss": 0.859, "step": 10032 }, { "epoch": 0.8497141647258099, "grad_norm": 1.7614473764673153, "learning_rate": 5.804916977743563e-07, "loss": 0.6426, "step": 10033 }, { "epoch": 0.8497988566589032, "grad_norm": 2.0174766207818022, "learning_rate": 5.798503877178124e-07, "loss": 0.618, "step": 10034 }, { "epoch": 0.8498835485919967, "grad_norm": 1.76816001824089, "learning_rate": 5.792094103009316e-07, "loss": 0.6427, "step": 10035 }, { "epoch": 0.84996824052509, "grad_norm": 0.6469220669891258, "learning_rate": 5.785687655719518e-07, "loss": 0.8691, "step": 10036 }, { "epoch": 0.8500529324581834, "grad_norm": 1.265600114655183, "learning_rate": 5.779284535790863e-07, "loss": 0.6328, "step": 10037 }, { "epoch": 0.8501376243912767, "grad_norm": 4.222484816047302, "learning_rate": 5.772884743705193e-07, "loss": 0.5961, "step": 10038 }, { "epoch": 0.8502223163243701, "grad_norm": 1.2682211072387668, "learning_rate": 5.766488279944143e-07, "loss": 0.6398, "step": 10039 }, { "epoch": 0.8503070082574635, "grad_norm": 1.6293143302755173, "learning_rate": 5.760095144989076e-07, "loss": 0.6679, "step": 10040 }, { "epoch": 0.8503917001905569, "grad_norm": 1.5678848349549859, "learning_rate": 5.753705339321108e-07, "loss": 0.6223, "step": 10041 }, { "epoch": 0.8504763921236502, "grad_norm": 0.630996220472521, "learning_rate": 5.74731886342112e-07, "loss": 0.8454, "step": 10042 }, { "epoch": 0.8505610840567436, "grad_norm": 1.3578827461580485, "learning_rate": 5.740935717769707e-07, "loss": 0.625, "step": 10043 }, { "epoch": 0.8506457759898369, "grad_norm": 1.5985258639258715, "learning_rate": 5.734555902847245e-07, "loss": 0.6381, "step": 10044 }, { "epoch": 0.8507304679229304, "grad_norm": 1.3456854850887343, "learning_rate": 5.728179419133856e-07, "loss": 0.616, "step": 10045 }, { "epoch": 0.8508151598560237, "grad_norm": 0.6445086711957446, "learning_rate": 5.721806267109375e-07, "loss": 0.831, "step": 10046 }, { "epoch": 0.8508998517891171, "grad_norm": 1.5331877649579597, "learning_rate": 5.715436447253464e-07, "loss": 0.5815, "step": 10047 }, { "epoch": 0.8509845437222104, "grad_norm": 0.7116213462349688, "learning_rate": 5.709069960045438e-07, "loss": 0.8547, "step": 10048 }, { "epoch": 0.8510692356553038, "grad_norm": 1.3629928281750767, "learning_rate": 5.702706805964437e-07, "loss": 0.6037, "step": 10049 }, { "epoch": 0.8511539275883973, "grad_norm": 0.6248342697219315, "learning_rate": 5.696346985489321e-07, "loss": 0.8548, "step": 10050 }, { "epoch": 0.8512386195214906, "grad_norm": 1.3936245369735825, "learning_rate": 5.689990499098685e-07, "loss": 0.6473, "step": 10051 }, { "epoch": 0.851323311454584, "grad_norm": 1.3089013001590966, "learning_rate": 5.683637347270893e-07, "loss": 0.6212, "step": 10052 }, { "epoch": 0.8514080033876773, "grad_norm": 1.4089989301708634, "learning_rate": 5.67728753048406e-07, "loss": 0.6199, "step": 10053 }, { "epoch": 0.8514926953207707, "grad_norm": 1.2449001619412983, "learning_rate": 5.670941049216039e-07, "loss": 0.6616, "step": 10054 }, { "epoch": 0.8515773872538641, "grad_norm": 1.3040153201667914, "learning_rate": 5.664597903944446e-07, "loss": 0.634, "step": 10055 }, { "epoch": 0.8516620791869575, "grad_norm": 1.546901171735036, "learning_rate": 5.658258095146618e-07, "loss": 0.623, "step": 10056 }, { "epoch": 0.8517467711200508, "grad_norm": 1.2869335000448119, "learning_rate": 5.651921623299672e-07, "loss": 0.648, "step": 10057 }, { "epoch": 0.8518314630531442, "grad_norm": 0.597432451628686, "learning_rate": 5.645588488880466e-07, "loss": 0.8384, "step": 10058 }, { "epoch": 0.8519161549862375, "grad_norm": 1.3036742485613089, "learning_rate": 5.639258692365585e-07, "loss": 0.6125, "step": 10059 }, { "epoch": 0.852000846919331, "grad_norm": 0.609196459193232, "learning_rate": 5.632932234231386e-07, "loss": 0.8372, "step": 10060 }, { "epoch": 0.8520855388524243, "grad_norm": 2.7309973015690097, "learning_rate": 5.626609114953974e-07, "loss": 0.6577, "step": 10061 }, { "epoch": 0.8521702307855177, "grad_norm": 1.2572917220918298, "learning_rate": 5.620289335009194e-07, "loss": 0.6966, "step": 10062 }, { "epoch": 0.852254922718611, "grad_norm": 1.5376339961374492, "learning_rate": 5.613972894872655e-07, "loss": 0.5771, "step": 10063 }, { "epoch": 0.8523396146517044, "grad_norm": 1.2327197028054564, "learning_rate": 5.607659795019676e-07, "loss": 0.5782, "step": 10064 }, { "epoch": 0.8524243065847978, "grad_norm": 1.703897206806303, "learning_rate": 5.601350035925368e-07, "loss": 0.6887, "step": 10065 }, { "epoch": 0.8525089985178912, "grad_norm": 2.5350415520454748, "learning_rate": 5.595043618064577e-07, "loss": 0.6018, "step": 10066 }, { "epoch": 0.8525936904509845, "grad_norm": 3.8693386365862277, "learning_rate": 5.58874054191188e-07, "loss": 0.6599, "step": 10067 }, { "epoch": 0.8526783823840779, "grad_norm": 1.3846005708845364, "learning_rate": 5.582440807941641e-07, "loss": 0.6336, "step": 10068 }, { "epoch": 0.8527630743171712, "grad_norm": 1.368721698142448, "learning_rate": 5.576144416627926e-07, "loss": 0.6003, "step": 10069 }, { "epoch": 0.8528477662502647, "grad_norm": 1.4334479498886763, "learning_rate": 5.569851368444573e-07, "loss": 0.6552, "step": 10070 }, { "epoch": 0.852932458183358, "grad_norm": 0.6109265943778004, "learning_rate": 5.563561663865191e-07, "loss": 0.8784, "step": 10071 }, { "epoch": 0.8530171501164514, "grad_norm": 1.4951887358304303, "learning_rate": 5.55727530336308e-07, "loss": 0.6694, "step": 10072 }, { "epoch": 0.8531018420495448, "grad_norm": 1.2541947245292229, "learning_rate": 5.550992287411339e-07, "loss": 0.5795, "step": 10073 }, { "epoch": 0.8531865339826381, "grad_norm": 0.630567918776741, "learning_rate": 5.544712616482795e-07, "loss": 0.8079, "step": 10074 }, { "epoch": 0.8532712259157316, "grad_norm": 1.6958005583117948, "learning_rate": 5.538436291050032e-07, "loss": 0.6022, "step": 10075 }, { "epoch": 0.8533559178488249, "grad_norm": 1.4875698234072559, "learning_rate": 5.532163311585381e-07, "loss": 0.6067, "step": 10076 }, { "epoch": 0.8534406097819183, "grad_norm": 1.5731963402184246, "learning_rate": 5.5258936785609e-07, "loss": 0.6021, "step": 10077 }, { "epoch": 0.8535253017150116, "grad_norm": 1.335289302804076, "learning_rate": 5.519627392448423e-07, "loss": 0.5933, "step": 10078 }, { "epoch": 0.853609993648105, "grad_norm": 1.4201063055833674, "learning_rate": 5.513364453719528e-07, "loss": 0.6058, "step": 10079 }, { "epoch": 0.8536946855811984, "grad_norm": 1.7106356523183819, "learning_rate": 5.507104862845514e-07, "loss": 0.6234, "step": 10080 }, { "epoch": 0.8537793775142918, "grad_norm": 1.3169792101911264, "learning_rate": 5.50084862029745e-07, "loss": 0.6734, "step": 10081 }, { "epoch": 0.8538640694473851, "grad_norm": 1.2684051394985447, "learning_rate": 5.494595726546187e-07, "loss": 0.6671, "step": 10082 }, { "epoch": 0.8539487613804785, "grad_norm": 1.241242570872822, "learning_rate": 5.488346182062248e-07, "loss": 0.6643, "step": 10083 }, { "epoch": 0.8540334533135718, "grad_norm": 2.288212209355793, "learning_rate": 5.48209998731597e-07, "loss": 0.6376, "step": 10084 }, { "epoch": 0.8541181452466653, "grad_norm": 1.2977961789495058, "learning_rate": 5.475857142777392e-07, "loss": 0.5873, "step": 10085 }, { "epoch": 0.8542028371797586, "grad_norm": 1.1556789662335514, "learning_rate": 5.469617648916331e-07, "loss": 0.6115, "step": 10086 }, { "epoch": 0.854287529112852, "grad_norm": 1.390407756985033, "learning_rate": 5.463381506202337e-07, "loss": 0.6193, "step": 10087 }, { "epoch": 0.8543722210459453, "grad_norm": 0.6401230709026287, "learning_rate": 5.457148715104721e-07, "loss": 0.8736, "step": 10088 }, { "epoch": 0.8544569129790387, "grad_norm": 1.203801345798057, "learning_rate": 5.450919276092542e-07, "loss": 0.6554, "step": 10089 }, { "epoch": 0.8545416049121322, "grad_norm": 1.2210618992537263, "learning_rate": 5.44469318963457e-07, "loss": 0.6406, "step": 10090 }, { "epoch": 0.8546262968452255, "grad_norm": 1.6274719307621988, "learning_rate": 5.438470456199374e-07, "loss": 0.6272, "step": 10091 }, { "epoch": 0.8547109887783189, "grad_norm": 1.4376822995722274, "learning_rate": 5.432251076255246e-07, "loss": 0.675, "step": 10092 }, { "epoch": 0.8547956807114122, "grad_norm": 1.1361826452490733, "learning_rate": 5.426035050270212e-07, "loss": 0.6097, "step": 10093 }, { "epoch": 0.8548803726445056, "grad_norm": 0.7082559589526161, "learning_rate": 5.419822378712075e-07, "loss": 0.9105, "step": 10094 }, { "epoch": 0.854965064577599, "grad_norm": 1.4403309862014528, "learning_rate": 5.413613062048362e-07, "loss": 0.5948, "step": 10095 }, { "epoch": 0.8550497565106924, "grad_norm": 1.6100797196301466, "learning_rate": 5.407407100746365e-07, "loss": 0.6353, "step": 10096 }, { "epoch": 0.8551344484437857, "grad_norm": 1.7504599258064533, "learning_rate": 5.401204495273121e-07, "loss": 0.6449, "step": 10097 }, { "epoch": 0.8552191403768791, "grad_norm": 9.217314483275231, "learning_rate": 5.395005246095392e-07, "loss": 0.6339, "step": 10098 }, { "epoch": 0.8553038323099724, "grad_norm": 1.1183194952746554, "learning_rate": 5.388809353679714e-07, "loss": 0.5943, "step": 10099 }, { "epoch": 0.8553885242430659, "grad_norm": 1.5391458236574191, "learning_rate": 5.382616818492364e-07, "loss": 0.6697, "step": 10100 }, { "epoch": 0.8554732161761592, "grad_norm": 2.241325012962085, "learning_rate": 5.376427640999354e-07, "loss": 0.6293, "step": 10101 }, { "epoch": 0.8555579081092526, "grad_norm": 1.2356522415655897, "learning_rate": 5.37024182166645e-07, "loss": 0.6482, "step": 10102 }, { "epoch": 0.8556426000423459, "grad_norm": 1.2801765797039604, "learning_rate": 5.36405936095919e-07, "loss": 0.587, "step": 10103 }, { "epoch": 0.8557272919754393, "grad_norm": 2.0635271586350123, "learning_rate": 5.357880259342812e-07, "loss": 0.5846, "step": 10104 }, { "epoch": 0.8558119839085327, "grad_norm": 1.2429922487074763, "learning_rate": 5.351704517282341e-07, "loss": 0.6314, "step": 10105 }, { "epoch": 0.8558966758416261, "grad_norm": 1.5009922412318382, "learning_rate": 5.345532135242526e-07, "loss": 0.6598, "step": 10106 }, { "epoch": 0.8559813677747194, "grad_norm": 1.2958152789635533, "learning_rate": 5.339363113687873e-07, "loss": 0.6164, "step": 10107 }, { "epoch": 0.8560660597078128, "grad_norm": 1.2617278179468048, "learning_rate": 5.333197453082633e-07, "loss": 0.585, "step": 10108 }, { "epoch": 0.8561507516409061, "grad_norm": 1.2032493391085257, "learning_rate": 5.327035153890808e-07, "loss": 0.6567, "step": 10109 }, { "epoch": 0.8562354435739996, "grad_norm": 1.2355963008201176, "learning_rate": 5.320876216576143e-07, "loss": 0.6363, "step": 10110 }, { "epoch": 0.856320135507093, "grad_norm": 1.6987416003349562, "learning_rate": 5.31472064160214e-07, "loss": 0.6188, "step": 10111 }, { "epoch": 0.8564048274401863, "grad_norm": 1.1063897138777044, "learning_rate": 5.308568429432015e-07, "loss": 0.6195, "step": 10112 }, { "epoch": 0.8564895193732797, "grad_norm": 1.3313775335952864, "learning_rate": 5.302419580528783e-07, "loss": 0.6678, "step": 10113 }, { "epoch": 0.8565742113063731, "grad_norm": 1.3724189740701733, "learning_rate": 5.296274095355148e-07, "loss": 0.642, "step": 10114 }, { "epoch": 0.8566589032394665, "grad_norm": 1.941676876835863, "learning_rate": 5.290131974373608e-07, "loss": 0.6222, "step": 10115 }, { "epoch": 0.8567435951725598, "grad_norm": 1.279826171246468, "learning_rate": 5.283993218046385e-07, "loss": 0.6284, "step": 10116 }, { "epoch": 0.8568282871056532, "grad_norm": 1.4856776638919065, "learning_rate": 5.277857826835453e-07, "loss": 0.6193, "step": 10117 }, { "epoch": 0.8569129790387465, "grad_norm": 1.5325610758581043, "learning_rate": 5.271725801202548e-07, "loss": 0.551, "step": 10118 }, { "epoch": 0.85699767097184, "grad_norm": 1.2610928904955598, "learning_rate": 5.265597141609113e-07, "loss": 0.6493, "step": 10119 }, { "epoch": 0.8570823629049333, "grad_norm": 1.2955831997181413, "learning_rate": 5.259471848516373e-07, "loss": 0.5878, "step": 10120 }, { "epoch": 0.8571670548380267, "grad_norm": 1.2772428375774647, "learning_rate": 5.253349922385298e-07, "loss": 0.5984, "step": 10121 }, { "epoch": 0.85725174677112, "grad_norm": 1.3429136113240043, "learning_rate": 5.24723136367657e-07, "loss": 0.622, "step": 10122 }, { "epoch": 0.8573364387042134, "grad_norm": 1.4500183686636794, "learning_rate": 5.241116172850663e-07, "loss": 0.5963, "step": 10123 }, { "epoch": 0.8574211306373068, "grad_norm": 1.269080606776337, "learning_rate": 5.235004350367784e-07, "loss": 0.6303, "step": 10124 }, { "epoch": 0.8575058225704002, "grad_norm": 1.2344802236369388, "learning_rate": 5.228895896687863e-07, "loss": 0.634, "step": 10125 }, { "epoch": 0.8575905145034936, "grad_norm": 1.440292965908379, "learning_rate": 5.222790812270611e-07, "loss": 0.5864, "step": 10126 }, { "epoch": 0.8576752064365869, "grad_norm": 1.266254383901384, "learning_rate": 5.216689097575444e-07, "loss": 0.6176, "step": 10127 }, { "epoch": 0.8577598983696803, "grad_norm": 1.2386484634018098, "learning_rate": 5.210590753061562e-07, "loss": 0.6129, "step": 10128 }, { "epoch": 0.8578445903027737, "grad_norm": 0.5893677973381211, "learning_rate": 5.204495779187895e-07, "loss": 0.8401, "step": 10129 }, { "epoch": 0.8579292822358671, "grad_norm": 1.5715065727811777, "learning_rate": 5.19840417641313e-07, "loss": 0.5944, "step": 10130 }, { "epoch": 0.8580139741689604, "grad_norm": 1.422468055394518, "learning_rate": 5.192315945195681e-07, "loss": 0.6026, "step": 10131 }, { "epoch": 0.8580986661020538, "grad_norm": 1.3247337611479377, "learning_rate": 5.186231085993737e-07, "loss": 0.5959, "step": 10132 }, { "epoch": 0.8581833580351471, "grad_norm": 1.4621728976462829, "learning_rate": 5.180149599265194e-07, "loss": 0.6113, "step": 10133 }, { "epoch": 0.8582680499682406, "grad_norm": 2.5016510254430124, "learning_rate": 5.174071485467735e-07, "loss": 0.6412, "step": 10134 }, { "epoch": 0.8583527419013339, "grad_norm": 1.5309907837358783, "learning_rate": 5.167996745058751e-07, "loss": 0.6513, "step": 10135 }, { "epoch": 0.8584374338344273, "grad_norm": 1.7769413167332662, "learning_rate": 5.161925378495409e-07, "loss": 0.5811, "step": 10136 }, { "epoch": 0.8585221257675206, "grad_norm": 0.5572898847050037, "learning_rate": 5.155857386234608e-07, "loss": 0.825, "step": 10137 }, { "epoch": 0.858606817700614, "grad_norm": 2.9204019042762352, "learning_rate": 5.149792768733003e-07, "loss": 0.6161, "step": 10138 }, { "epoch": 0.8586915096337074, "grad_norm": 1.3325357832123732, "learning_rate": 5.143731526446988e-07, "loss": 0.5906, "step": 10139 }, { "epoch": 0.8587762015668008, "grad_norm": 2.1937396184697624, "learning_rate": 5.137673659832709e-07, "loss": 0.6468, "step": 10140 }, { "epoch": 0.8588608934998941, "grad_norm": 1.2145072435717115, "learning_rate": 5.131619169346031e-07, "loss": 0.6146, "step": 10141 }, { "epoch": 0.8589455854329875, "grad_norm": 1.8463791157821847, "learning_rate": 5.125568055442614e-07, "loss": 0.6103, "step": 10142 }, { "epoch": 0.8590302773660808, "grad_norm": 1.2007352551098884, "learning_rate": 5.119520318577803e-07, "loss": 0.6698, "step": 10143 }, { "epoch": 0.8591149692991743, "grad_norm": 1.3669420191001913, "learning_rate": 5.113475959206749e-07, "loss": 0.6154, "step": 10144 }, { "epoch": 0.8591996612322677, "grad_norm": 1.3838547302330577, "learning_rate": 5.10743497778432e-07, "loss": 0.591, "step": 10145 }, { "epoch": 0.859284353165361, "grad_norm": 1.90483926648329, "learning_rate": 5.101397374765121e-07, "loss": 0.6353, "step": 10146 }, { "epoch": 0.8593690450984544, "grad_norm": 1.6087083634141293, "learning_rate": 5.095363150603521e-07, "loss": 0.6878, "step": 10147 }, { "epoch": 0.8594537370315477, "grad_norm": 0.6497712744959158, "learning_rate": 5.089332305753631e-07, "loss": 0.8898, "step": 10148 }, { "epoch": 0.8595384289646412, "grad_norm": 1.5018730048660565, "learning_rate": 5.08330484066929e-07, "loss": 0.6131, "step": 10149 }, { "epoch": 0.8596231208977345, "grad_norm": 1.6394743130404072, "learning_rate": 5.077280755804109e-07, "loss": 0.609, "step": 10150 }, { "epoch": 0.8597078128308279, "grad_norm": 2.2627607897695152, "learning_rate": 5.071260051611421e-07, "loss": 0.6809, "step": 10151 }, { "epoch": 0.8597925047639212, "grad_norm": 1.8586134422372838, "learning_rate": 5.065242728544328e-07, "loss": 0.629, "step": 10152 }, { "epoch": 0.8598771966970146, "grad_norm": 1.380350752318572, "learning_rate": 5.05922878705567e-07, "loss": 0.612, "step": 10153 }, { "epoch": 0.859961888630108, "grad_norm": 1.1481891157458448, "learning_rate": 5.053218227598005e-07, "loss": 0.6284, "step": 10154 }, { "epoch": 0.8600465805632014, "grad_norm": 0.6786645046331046, "learning_rate": 5.047211050623685e-07, "loss": 0.8403, "step": 10155 }, { "epoch": 0.8601312724962947, "grad_norm": 2.3606614485645414, "learning_rate": 5.041207256584763e-07, "loss": 0.6917, "step": 10156 }, { "epoch": 0.8602159644293881, "grad_norm": 1.2542608109858975, "learning_rate": 5.03520684593306e-07, "loss": 0.5827, "step": 10157 }, { "epoch": 0.8603006563624814, "grad_norm": 1.595210529541003, "learning_rate": 5.02920981912014e-07, "loss": 0.6241, "step": 10158 }, { "epoch": 0.8603853482955749, "grad_norm": 0.574529246634692, "learning_rate": 5.023216176597317e-07, "loss": 0.8531, "step": 10159 }, { "epoch": 0.8604700402286682, "grad_norm": 11.542710103436725, "learning_rate": 5.017225918815638e-07, "loss": 0.6287, "step": 10160 }, { "epoch": 0.8605547321617616, "grad_norm": 1.385704263058509, "learning_rate": 5.011239046225913e-07, "loss": 0.5989, "step": 10161 }, { "epoch": 0.8606394240948549, "grad_norm": 1.3966523536172528, "learning_rate": 5.005255559278665e-07, "loss": 0.6031, "step": 10162 }, { "epoch": 0.8607241160279483, "grad_norm": 1.3458575287003494, "learning_rate": 4.999275458424196e-07, "loss": 0.5943, "step": 10163 }, { "epoch": 0.8608088079610418, "grad_norm": 1.4113614036608837, "learning_rate": 4.993298744112541e-07, "loss": 0.5898, "step": 10164 }, { "epoch": 0.8608934998941351, "grad_norm": 1.5229961648776733, "learning_rate": 4.987325416793476e-07, "loss": 0.6134, "step": 10165 }, { "epoch": 0.8609781918272285, "grad_norm": 1.5046463746313241, "learning_rate": 4.981355476916533e-07, "loss": 0.6978, "step": 10166 }, { "epoch": 0.8610628837603218, "grad_norm": 1.4097209024718853, "learning_rate": 4.975388924930974e-07, "loss": 0.584, "step": 10167 }, { "epoch": 0.8611475756934152, "grad_norm": 1.3419969194342267, "learning_rate": 4.969425761285807e-07, "loss": 0.5947, "step": 10168 }, { "epoch": 0.8612322676265086, "grad_norm": 1.4378251256921784, "learning_rate": 4.963465986429811e-07, "loss": 0.6011, "step": 10169 }, { "epoch": 0.861316959559602, "grad_norm": 1.1727651733290323, "learning_rate": 4.957509600811472e-07, "loss": 0.6283, "step": 10170 }, { "epoch": 0.8614016514926953, "grad_norm": 1.6837504164020776, "learning_rate": 4.951556604879049e-07, "loss": 0.6169, "step": 10171 }, { "epoch": 0.8614863434257887, "grad_norm": 1.3486127836260606, "learning_rate": 4.945606999080526e-07, "loss": 0.6694, "step": 10172 }, { "epoch": 0.861571035358882, "grad_norm": 1.5283454310524673, "learning_rate": 4.939660783863659e-07, "loss": 0.6093, "step": 10173 }, { "epoch": 0.8616557272919755, "grad_norm": 1.5909979672727663, "learning_rate": 4.933717959675927e-07, "loss": 0.6383, "step": 10174 }, { "epoch": 0.8617404192250688, "grad_norm": 1.490694517232942, "learning_rate": 4.927778526964549e-07, "loss": 0.5848, "step": 10175 }, { "epoch": 0.8618251111581622, "grad_norm": 1.9441230873588113, "learning_rate": 4.921842486176509e-07, "loss": 0.6056, "step": 10176 }, { "epoch": 0.8619098030912555, "grad_norm": 1.8408974247181678, "learning_rate": 4.915909837758525e-07, "loss": 0.5823, "step": 10177 }, { "epoch": 0.8619944950243489, "grad_norm": 1.6171716624491315, "learning_rate": 4.909980582157042e-07, "loss": 0.6592, "step": 10178 }, { "epoch": 0.8620791869574423, "grad_norm": 1.288292651704062, "learning_rate": 4.904054719818302e-07, "loss": 0.6311, "step": 10179 }, { "epoch": 0.8621638788905357, "grad_norm": 1.3660561432277452, "learning_rate": 4.898132251188231e-07, "loss": 0.6172, "step": 10180 }, { "epoch": 0.862248570823629, "grad_norm": 1.2999375471767451, "learning_rate": 4.892213176712534e-07, "loss": 0.6541, "step": 10181 }, { "epoch": 0.8623332627567224, "grad_norm": 1.5154557546543101, "learning_rate": 4.886297496836656e-07, "loss": 0.5946, "step": 10182 }, { "epoch": 0.8624179546898157, "grad_norm": 2.0423758729333676, "learning_rate": 4.880385212005778e-07, "loss": 0.6672, "step": 10183 }, { "epoch": 0.8625026466229092, "grad_norm": 2.0062233003347125, "learning_rate": 4.874476322664829e-07, "loss": 0.6145, "step": 10184 }, { "epoch": 0.8625873385560026, "grad_norm": 1.8059943977565516, "learning_rate": 4.868570829258484e-07, "loss": 0.6712, "step": 10185 }, { "epoch": 0.8626720304890959, "grad_norm": 2.134231074048625, "learning_rate": 4.862668732231174e-07, "loss": 0.6299, "step": 10186 }, { "epoch": 0.8627567224221893, "grad_norm": 0.608098835825296, "learning_rate": 4.856770032027059e-07, "loss": 0.8502, "step": 10187 }, { "epoch": 0.8628414143552826, "grad_norm": 1.583373156805671, "learning_rate": 4.850874729090033e-07, "loss": 0.6286, "step": 10188 }, { "epoch": 0.8629261062883761, "grad_norm": 1.8596341533718164, "learning_rate": 4.844982823863764e-07, "loss": 0.5396, "step": 10189 }, { "epoch": 0.8630107982214694, "grad_norm": 1.331166261193669, "learning_rate": 4.839094316791654e-07, "loss": 0.6462, "step": 10190 }, { "epoch": 0.8630954901545628, "grad_norm": 1.387589866686969, "learning_rate": 4.833209208316825e-07, "loss": 0.6042, "step": 10191 }, { "epoch": 0.8631801820876561, "grad_norm": 1.7048552528705072, "learning_rate": 4.827327498882172e-07, "loss": 0.6037, "step": 10192 }, { "epoch": 0.8632648740207495, "grad_norm": 1.6605177507442024, "learning_rate": 4.821449188930321e-07, "loss": 0.6508, "step": 10193 }, { "epoch": 0.8633495659538429, "grad_norm": 1.4818007494426513, "learning_rate": 4.815574278903657e-07, "loss": 0.6336, "step": 10194 }, { "epoch": 0.8634342578869363, "grad_norm": 0.5835481218835477, "learning_rate": 4.809702769244295e-07, "loss": 0.8148, "step": 10195 }, { "epoch": 0.8635189498200296, "grad_norm": 1.4798170959772097, "learning_rate": 4.803834660394091e-07, "loss": 0.693, "step": 10196 }, { "epoch": 0.863603641753123, "grad_norm": 1.4783946516566648, "learning_rate": 4.797969952794651e-07, "loss": 0.6155, "step": 10197 }, { "epoch": 0.8636883336862163, "grad_norm": 1.2997413568169662, "learning_rate": 4.792108646887328e-07, "loss": 0.6175, "step": 10198 }, { "epoch": 0.8637730256193098, "grad_norm": 1.6554794713804721, "learning_rate": 4.786250743113213e-07, "loss": 0.5899, "step": 10199 }, { "epoch": 0.8638577175524031, "grad_norm": 1.363855482424623, "learning_rate": 4.78039624191316e-07, "loss": 0.5795, "step": 10200 }, { "epoch": 0.8639424094854965, "grad_norm": 1.1945359239715245, "learning_rate": 4.774545143727732e-07, "loss": 0.6706, "step": 10201 }, { "epoch": 0.8640271014185898, "grad_norm": 1.7345855908216434, "learning_rate": 4.768697448997256e-07, "loss": 0.6066, "step": 10202 }, { "epoch": 0.8641117933516832, "grad_norm": 1.439877342272114, "learning_rate": 4.762853158161823e-07, "loss": 0.6556, "step": 10203 }, { "epoch": 0.8641964852847767, "grad_norm": 1.270828068679327, "learning_rate": 4.757012271661221e-07, "loss": 0.6473, "step": 10204 }, { "epoch": 0.86428117721787, "grad_norm": 1.4780383419278105, "learning_rate": 4.7511747899350145e-07, "loss": 0.6558, "step": 10205 }, { "epoch": 0.8643658691509634, "grad_norm": 0.631006162167722, "learning_rate": 4.745340713422514e-07, "loss": 0.8342, "step": 10206 }, { "epoch": 0.8644505610840567, "grad_norm": 1.3082873317609385, "learning_rate": 4.7395100425627614e-07, "loss": 0.6543, "step": 10207 }, { "epoch": 0.8645352530171501, "grad_norm": 1.4438929979977422, "learning_rate": 4.733682777794546e-07, "loss": 0.5947, "step": 10208 }, { "epoch": 0.8646199449502435, "grad_norm": 2.0314698548355827, "learning_rate": 4.727858919556394e-07, "loss": 0.5951, "step": 10209 }, { "epoch": 0.8647046368833369, "grad_norm": 1.3631024186203509, "learning_rate": 4.722038468286583e-07, "loss": 0.5956, "step": 10210 }, { "epoch": 0.8647893288164302, "grad_norm": 1.4541369905398698, "learning_rate": 4.7162214244231454e-07, "loss": 0.5863, "step": 10211 }, { "epoch": 0.8648740207495236, "grad_norm": 1.3195473657255543, "learning_rate": 4.7104077884038255e-07, "loss": 0.5776, "step": 10212 }, { "epoch": 0.8649587126826169, "grad_norm": 0.6232453631222206, "learning_rate": 4.7045975606661354e-07, "loss": 0.8359, "step": 10213 }, { "epoch": 0.8650434046157104, "grad_norm": 1.4982830565530063, "learning_rate": 4.698790741647341e-07, "loss": 0.6376, "step": 10214 }, { "epoch": 0.8651280965488037, "grad_norm": 1.897852893110354, "learning_rate": 4.6929873317844153e-07, "loss": 0.6084, "step": 10215 }, { "epoch": 0.8652127884818971, "grad_norm": 1.1975531727927418, "learning_rate": 4.6871873315141205e-07, "loss": 0.6134, "step": 10216 }, { "epoch": 0.8652974804149904, "grad_norm": 1.7572380972438268, "learning_rate": 4.6813907412729067e-07, "loss": 0.5979, "step": 10217 }, { "epoch": 0.8653821723480839, "grad_norm": 5.579975755816602, "learning_rate": 4.675597561497014e-07, "loss": 0.6412, "step": 10218 }, { "epoch": 0.8654668642811773, "grad_norm": 1.310199287835393, "learning_rate": 4.6698077926224107e-07, "loss": 0.6028, "step": 10219 }, { "epoch": 0.8655515562142706, "grad_norm": 1.4431904439679777, "learning_rate": 4.664021435084803e-07, "loss": 0.641, "step": 10220 }, { "epoch": 0.865636248147364, "grad_norm": 1.2387760306472304, "learning_rate": 4.6582384893196553e-07, "loss": 0.5972, "step": 10221 }, { "epoch": 0.8657209400804573, "grad_norm": 1.1240114697324455, "learning_rate": 4.652458955762151e-07, "loss": 0.6075, "step": 10222 }, { "epoch": 0.8658056320135508, "grad_norm": 1.238764974605142, "learning_rate": 4.646682834847238e-07, "loss": 0.6335, "step": 10223 }, { "epoch": 0.8658903239466441, "grad_norm": 1.4134410380087734, "learning_rate": 4.6409101270096067e-07, "loss": 0.6274, "step": 10224 }, { "epoch": 0.8659750158797375, "grad_norm": 2.0900730715251807, "learning_rate": 4.6351408326836655e-07, "loss": 0.6195, "step": 10225 }, { "epoch": 0.8660597078128308, "grad_norm": 1.4829084398579941, "learning_rate": 4.6293749523036004e-07, "loss": 0.5813, "step": 10226 }, { "epoch": 0.8661443997459242, "grad_norm": 1.5176923722845852, "learning_rate": 4.6236124863033137e-07, "loss": 0.6733, "step": 10227 }, { "epoch": 0.8662290916790176, "grad_norm": 1.5066752181863599, "learning_rate": 4.617853435116471e-07, "loss": 0.5982, "step": 10228 }, { "epoch": 0.866313783612111, "grad_norm": 1.7722376044108221, "learning_rate": 4.6120977991764736e-07, "loss": 0.6556, "step": 10229 }, { "epoch": 0.8663984755452043, "grad_norm": 0.5926639136052977, "learning_rate": 4.6063455789164435e-07, "loss": 0.8447, "step": 10230 }, { "epoch": 0.8664831674782977, "grad_norm": 0.6004627853361707, "learning_rate": 4.6005967747692836e-07, "loss": 0.8256, "step": 10231 }, { "epoch": 0.866567859411391, "grad_norm": 1.8175806945644437, "learning_rate": 4.5948513871676313e-07, "loss": 0.6262, "step": 10232 }, { "epoch": 0.8666525513444845, "grad_norm": 1.4715574833230265, "learning_rate": 4.58910941654383e-07, "loss": 0.5914, "step": 10233 }, { "epoch": 0.8667372432775778, "grad_norm": 1.2091523394583334, "learning_rate": 4.58337086333e-07, "loss": 0.6311, "step": 10234 }, { "epoch": 0.8668219352106712, "grad_norm": 1.2862516630941927, "learning_rate": 4.577635727958019e-07, "loss": 0.5928, "step": 10235 }, { "epoch": 0.8669066271437645, "grad_norm": 1.5891648344759026, "learning_rate": 4.571904010859468e-07, "loss": 0.577, "step": 10236 }, { "epoch": 0.8669913190768579, "grad_norm": 1.658878607668389, "learning_rate": 4.566175712465698e-07, "loss": 0.6765, "step": 10237 }, { "epoch": 0.8670760110099514, "grad_norm": 0.5998447091499689, "learning_rate": 4.56045083320778e-07, "loss": 0.852, "step": 10238 }, { "epoch": 0.8671607029430447, "grad_norm": 1.471252434561792, "learning_rate": 4.554729373516553e-07, "loss": 0.6671, "step": 10239 }, { "epoch": 0.867245394876138, "grad_norm": 1.3742087652473092, "learning_rate": 4.5490113338225894e-07, "loss": 0.6695, "step": 10240 }, { "epoch": 0.8673300868092314, "grad_norm": 1.9228123346757144, "learning_rate": 4.543296714556189e-07, "loss": 0.6145, "step": 10241 }, { "epoch": 0.8674147787423248, "grad_norm": 1.458641986107078, "learning_rate": 4.5375855161474194e-07, "loss": 0.5646, "step": 10242 }, { "epoch": 0.8674994706754182, "grad_norm": 1.3658643055012096, "learning_rate": 4.531877739026086e-07, "loss": 0.6026, "step": 10243 }, { "epoch": 0.8675841626085116, "grad_norm": 1.5616576789934642, "learning_rate": 4.526173383621707e-07, "loss": 0.6946, "step": 10244 }, { "epoch": 0.8676688545416049, "grad_norm": 1.702598820551525, "learning_rate": 4.5204724503635835e-07, "loss": 0.5994, "step": 10245 }, { "epoch": 0.8677535464746983, "grad_norm": 0.583153911399063, "learning_rate": 4.514774939680727e-07, "loss": 0.8337, "step": 10246 }, { "epoch": 0.8678382384077916, "grad_norm": 1.4751662832117496, "learning_rate": 4.5090808520019067e-07, "loss": 0.5995, "step": 10247 }, { "epoch": 0.8679229303408851, "grad_norm": 0.7058697733874473, "learning_rate": 4.5033901877556463e-07, "loss": 0.8184, "step": 10248 }, { "epoch": 0.8680076222739784, "grad_norm": 2.1392878676406775, "learning_rate": 4.497702947370186e-07, "loss": 0.573, "step": 10249 }, { "epoch": 0.8680923142070718, "grad_norm": 1.3317094378107748, "learning_rate": 4.492019131273523e-07, "loss": 0.6737, "step": 10250 }, { "epoch": 0.8681770061401651, "grad_norm": 0.6530813344969754, "learning_rate": 4.4863387398934087e-07, "loss": 0.8519, "step": 10251 }, { "epoch": 0.8682616980732585, "grad_norm": 1.2290870068908897, "learning_rate": 4.480661773657297e-07, "loss": 0.6305, "step": 10252 }, { "epoch": 0.8683463900063519, "grad_norm": 0.6696570226683857, "learning_rate": 4.474988232992439e-07, "loss": 0.894, "step": 10253 }, { "epoch": 0.8684310819394453, "grad_norm": 1.458676529075866, "learning_rate": 4.4693181183257605e-07, "loss": 0.5989, "step": 10254 }, { "epoch": 0.8685157738725386, "grad_norm": 1.2268150606981436, "learning_rate": 4.463651430083998e-07, "loss": 0.5759, "step": 10255 }, { "epoch": 0.868600465805632, "grad_norm": 1.7820593688015829, "learning_rate": 4.4579881686936055e-07, "loss": 0.6354, "step": 10256 }, { "epoch": 0.8686851577387253, "grad_norm": 1.4913023310495797, "learning_rate": 4.4523283345807465e-07, "loss": 0.688, "step": 10257 }, { "epoch": 0.8687698496718188, "grad_norm": 1.4942625144875492, "learning_rate": 4.4466719281713756e-07, "loss": 0.6374, "step": 10258 }, { "epoch": 0.8688545416049122, "grad_norm": 2.148395600767162, "learning_rate": 4.4410189498911515e-07, "loss": 0.6411, "step": 10259 }, { "epoch": 0.8689392335380055, "grad_norm": 1.7688058716687713, "learning_rate": 4.4353694001654957e-07, "loss": 0.6233, "step": 10260 }, { "epoch": 0.8690239254710989, "grad_norm": 1.2946222992837466, "learning_rate": 4.429723279419573e-07, "loss": 0.6055, "step": 10261 }, { "epoch": 0.8691086174041922, "grad_norm": 1.4867681020471708, "learning_rate": 4.4240805880782723e-07, "loss": 0.6122, "step": 10262 }, { "epoch": 0.8691933093372857, "grad_norm": 0.6970450181566529, "learning_rate": 4.4184413265662476e-07, "loss": 0.8476, "step": 10263 }, { "epoch": 0.869278001270379, "grad_norm": 1.3085178794279644, "learning_rate": 4.4128054953078867e-07, "loss": 0.6152, "step": 10264 }, { "epoch": 0.8693626932034724, "grad_norm": 1.593734559856946, "learning_rate": 4.4071730947273017e-07, "loss": 0.67, "step": 10265 }, { "epoch": 0.8694473851365657, "grad_norm": 0.621184992292341, "learning_rate": 4.401544125248375e-07, "loss": 0.8574, "step": 10266 }, { "epoch": 0.8695320770696591, "grad_norm": 1.471203529983607, "learning_rate": 4.3959185872947007e-07, "loss": 0.614, "step": 10267 }, { "epoch": 0.8696167690027525, "grad_norm": 1.8964965568941807, "learning_rate": 4.3902964812896355e-07, "loss": 0.6629, "step": 10268 }, { "epoch": 0.8697014609358459, "grad_norm": 0.6041247817826612, "learning_rate": 4.3846778076562734e-07, "loss": 0.8246, "step": 10269 }, { "epoch": 0.8697861528689392, "grad_norm": 1.6638113587794376, "learning_rate": 4.379062566817449e-07, "loss": 0.6553, "step": 10270 }, { "epoch": 0.8698708448020326, "grad_norm": 1.3459575720751737, "learning_rate": 4.3734507591957466e-07, "loss": 0.6809, "step": 10271 }, { "epoch": 0.8699555367351259, "grad_norm": 0.6993651010691556, "learning_rate": 4.3678423852134835e-07, "loss": 0.8261, "step": 10272 }, { "epoch": 0.8700402286682194, "grad_norm": 1.4105875413737694, "learning_rate": 4.362237445292705e-07, "loss": 0.6298, "step": 10273 }, { "epoch": 0.8701249206013127, "grad_norm": 1.3249516309525966, "learning_rate": 4.356635939855236e-07, "loss": 0.6633, "step": 10274 }, { "epoch": 0.8702096125344061, "grad_norm": 1.3270601877621226, "learning_rate": 4.3510378693225827e-07, "loss": 0.6215, "step": 10275 }, { "epoch": 0.8702943044674994, "grad_norm": 0.6609495802041785, "learning_rate": 4.345443234116065e-07, "loss": 0.83, "step": 10276 }, { "epoch": 0.8703789964005928, "grad_norm": 0.6568280780834167, "learning_rate": 4.339852034656705e-07, "loss": 0.8305, "step": 10277 }, { "epoch": 0.8704636883336863, "grad_norm": 1.5686240706414352, "learning_rate": 4.334264271365252e-07, "loss": 0.6408, "step": 10278 }, { "epoch": 0.8705483802667796, "grad_norm": 1.259710072823785, "learning_rate": 4.328679944662223e-07, "loss": 0.5845, "step": 10279 }, { "epoch": 0.870633072199873, "grad_norm": 1.7591006098885624, "learning_rate": 4.3230990549678775e-07, "loss": 0.6798, "step": 10280 }, { "epoch": 0.8707177641329663, "grad_norm": 1.6686103579254095, "learning_rate": 4.3175216027021906e-07, "loss": 0.6105, "step": 10281 }, { "epoch": 0.8708024560660597, "grad_norm": 1.552194197036328, "learning_rate": 4.311947588284904e-07, "loss": 0.6548, "step": 10282 }, { "epoch": 0.8708871479991531, "grad_norm": 1.5443721443363487, "learning_rate": 4.3063770121354873e-07, "loss": 0.6067, "step": 10283 }, { "epoch": 0.8709718399322465, "grad_norm": 1.2217654374929205, "learning_rate": 4.3008098746731674e-07, "loss": 0.6558, "step": 10284 }, { "epoch": 0.8710565318653398, "grad_norm": 0.6270430193149638, "learning_rate": 4.295246176316897e-07, "loss": 0.8465, "step": 10285 }, { "epoch": 0.8711412237984332, "grad_norm": 1.1911878674166043, "learning_rate": 4.289685917485359e-07, "loss": 0.6267, "step": 10286 }, { "epoch": 0.8712259157315265, "grad_norm": 1.6235770063859287, "learning_rate": 4.284129098597006e-07, "loss": 0.632, "step": 10287 }, { "epoch": 0.87131060766462, "grad_norm": 1.122720221899627, "learning_rate": 4.2785757200700274e-07, "loss": 0.5663, "step": 10288 }, { "epoch": 0.8713952995977133, "grad_norm": 1.3589691322764454, "learning_rate": 4.2730257823223156e-07, "loss": 0.6216, "step": 10289 }, { "epoch": 0.8714799915308067, "grad_norm": 1.3060281791618076, "learning_rate": 4.2674792857715584e-07, "loss": 0.6097, "step": 10290 }, { "epoch": 0.8715646834639, "grad_norm": 1.3354625444732438, "learning_rate": 4.2619362308351453e-07, "loss": 0.6545, "step": 10291 }, { "epoch": 0.8716493753969934, "grad_norm": 1.2160890343059363, "learning_rate": 4.256396617930225e-07, "loss": 0.5816, "step": 10292 }, { "epoch": 0.8717340673300868, "grad_norm": 1.1481512978766362, "learning_rate": 4.2508604474736925e-07, "loss": 0.5975, "step": 10293 }, { "epoch": 0.8718187592631802, "grad_norm": 1.5494785851137847, "learning_rate": 4.245327719882153e-07, "loss": 0.5909, "step": 10294 }, { "epoch": 0.8719034511962735, "grad_norm": 1.4371645124552854, "learning_rate": 4.23979843557199e-07, "loss": 0.5539, "step": 10295 }, { "epoch": 0.8719881431293669, "grad_norm": 1.3539865541154454, "learning_rate": 4.2342725949593047e-07, "loss": 0.6673, "step": 10296 }, { "epoch": 0.8720728350624602, "grad_norm": 1.3887766608848104, "learning_rate": 4.2287501984599467e-07, "loss": 0.5994, "step": 10297 }, { "epoch": 0.8721575269955537, "grad_norm": 1.2325201528649883, "learning_rate": 4.2232312464895174e-07, "loss": 0.6039, "step": 10298 }, { "epoch": 0.8722422189286471, "grad_norm": 3.3625457453499803, "learning_rate": 4.217715739463324e-07, "loss": 0.6073, "step": 10299 }, { "epoch": 0.8723269108617404, "grad_norm": 1.762705238610214, "learning_rate": 4.2122036777964556e-07, "loss": 0.6084, "step": 10300 }, { "epoch": 0.8724116027948338, "grad_norm": 1.400375253489798, "learning_rate": 4.2066950619037206e-07, "loss": 0.6273, "step": 10301 }, { "epoch": 0.8724962947279271, "grad_norm": 1.4657271675392785, "learning_rate": 4.2011898921996643e-07, "loss": 0.6415, "step": 10302 }, { "epoch": 0.8725809866610206, "grad_norm": 1.417402633834075, "learning_rate": 4.1956881690985827e-07, "loss": 0.6356, "step": 10303 }, { "epoch": 0.8726656785941139, "grad_norm": 1.502259678654602, "learning_rate": 4.1901898930145123e-07, "loss": 0.6248, "step": 10304 }, { "epoch": 0.8727503705272073, "grad_norm": 1.7690836195514126, "learning_rate": 4.1846950643612273e-07, "loss": 0.6228, "step": 10305 }, { "epoch": 0.8728350624603006, "grad_norm": 1.4062304626310511, "learning_rate": 4.179203683552252e-07, "loss": 0.6323, "step": 10306 }, { "epoch": 0.872919754393394, "grad_norm": 1.9212660306103564, "learning_rate": 4.173715751000823e-07, "loss": 0.6255, "step": 10307 }, { "epoch": 0.8730044463264874, "grad_norm": 1.4322871359625686, "learning_rate": 4.168231267119943e-07, "loss": 0.6269, "step": 10308 }, { "epoch": 0.8730891382595808, "grad_norm": 1.6513055972791197, "learning_rate": 4.1627502323223655e-07, "loss": 0.6631, "step": 10309 }, { "epoch": 0.8731738301926741, "grad_norm": 1.6490404176830389, "learning_rate": 4.1572726470205273e-07, "loss": 0.5861, "step": 10310 }, { "epoch": 0.8732585221257675, "grad_norm": 1.4637545106525327, "learning_rate": 4.151798511626698e-07, "loss": 0.6543, "step": 10311 }, { "epoch": 0.8733432140588608, "grad_norm": 0.6480918513619853, "learning_rate": 4.146327826552793e-07, "loss": 0.8273, "step": 10312 }, { "epoch": 0.8734279059919543, "grad_norm": 1.6874152565034082, "learning_rate": 4.140860592210527e-07, "loss": 0.5944, "step": 10313 }, { "epoch": 0.8735125979250477, "grad_norm": 1.2234184842063758, "learning_rate": 4.135396809011344e-07, "loss": 0.6687, "step": 10314 }, { "epoch": 0.873597289858141, "grad_norm": 1.2662128126064458, "learning_rate": 4.129936477366409e-07, "loss": 0.6412, "step": 10315 }, { "epoch": 0.8736819817912344, "grad_norm": 1.58810519677743, "learning_rate": 4.124479597686648e-07, "loss": 0.6567, "step": 10316 }, { "epoch": 0.8737666737243277, "grad_norm": 1.5181409646293735, "learning_rate": 4.1190261703827175e-07, "loss": 0.657, "step": 10317 }, { "epoch": 0.8738513656574212, "grad_norm": 1.5831527237368055, "learning_rate": 4.113576195865021e-07, "loss": 0.6036, "step": 10318 }, { "epoch": 0.8739360575905145, "grad_norm": 1.468286418750644, "learning_rate": 4.1081296745437036e-07, "loss": 0.5904, "step": 10319 }, { "epoch": 0.8740207495236079, "grad_norm": 1.2360775353899003, "learning_rate": 4.102686606828632e-07, "loss": 0.6014, "step": 10320 }, { "epoch": 0.8741054414567012, "grad_norm": 2.5210957462469836, "learning_rate": 4.0972469931294277e-07, "loss": 0.6183, "step": 10321 }, { "epoch": 0.8741901333897947, "grad_norm": 1.529943908151827, "learning_rate": 4.091810833855464e-07, "loss": 0.6408, "step": 10322 }, { "epoch": 0.874274825322888, "grad_norm": 1.3585399230205, "learning_rate": 4.086378129415819e-07, "loss": 0.6535, "step": 10323 }, { "epoch": 0.8743595172559814, "grad_norm": 1.9264544500716116, "learning_rate": 4.0809488802193486e-07, "loss": 0.6439, "step": 10324 }, { "epoch": 0.8744442091890747, "grad_norm": 2.0044003960625854, "learning_rate": 4.075523086674621e-07, "loss": 0.6114, "step": 10325 }, { "epoch": 0.8745289011221681, "grad_norm": 1.436076910946242, "learning_rate": 4.07010074918997e-07, "loss": 0.6405, "step": 10326 }, { "epoch": 0.8746135930552615, "grad_norm": 1.3779287678414223, "learning_rate": 4.0646818681734534e-07, "loss": 0.6074, "step": 10327 }, { "epoch": 0.8746982849883549, "grad_norm": 1.5007249964531046, "learning_rate": 4.0592664440328555e-07, "loss": 0.6293, "step": 10328 }, { "epoch": 0.8747829769214482, "grad_norm": 1.428667364908275, "learning_rate": 4.053854477175728e-07, "loss": 0.5796, "step": 10329 }, { "epoch": 0.8748676688545416, "grad_norm": 1.7580151863184408, "learning_rate": 4.0484459680093457e-07, "loss": 0.6326, "step": 10330 }, { "epoch": 0.8749523607876349, "grad_norm": 1.7277463358818252, "learning_rate": 4.0430409169407267e-07, "loss": 0.6179, "step": 10331 }, { "epoch": 0.8750370527207284, "grad_norm": 1.782382419581714, "learning_rate": 4.0376393243766466e-07, "loss": 0.6319, "step": 10332 }, { "epoch": 0.8751217446538218, "grad_norm": 1.4653056328231067, "learning_rate": 4.0322411907235736e-07, "loss": 0.6732, "step": 10333 }, { "epoch": 0.8752064365869151, "grad_norm": 1.605950853988704, "learning_rate": 4.026846516387767e-07, "loss": 0.6569, "step": 10334 }, { "epoch": 0.8752911285200085, "grad_norm": 1.22552231559621, "learning_rate": 4.0214553017752066e-07, "loss": 0.6543, "step": 10335 }, { "epoch": 0.8753758204531018, "grad_norm": 1.6618476578206227, "learning_rate": 4.0160675472915967e-07, "loss": 0.6055, "step": 10336 }, { "epoch": 0.8754605123861953, "grad_norm": 1.2522942606241918, "learning_rate": 4.010683253342401e-07, "loss": 0.5976, "step": 10337 }, { "epoch": 0.8755452043192886, "grad_norm": 1.272619766806782, "learning_rate": 4.005302420332813e-07, "loss": 0.5454, "step": 10338 }, { "epoch": 0.875629896252382, "grad_norm": 1.4481042865484623, "learning_rate": 3.9999250486677686e-07, "loss": 0.6774, "step": 10339 }, { "epoch": 0.8757145881854753, "grad_norm": 1.5164475146661223, "learning_rate": 3.9945511387519564e-07, "loss": 0.6473, "step": 10340 }, { "epoch": 0.8757992801185687, "grad_norm": 1.255322308683901, "learning_rate": 3.9891806909897745e-07, "loss": 0.6177, "step": 10341 }, { "epoch": 0.8758839720516621, "grad_norm": 0.6270474582455818, "learning_rate": 3.983813705785383e-07, "loss": 0.8479, "step": 10342 }, { "epoch": 0.8759686639847555, "grad_norm": 1.3089580556385962, "learning_rate": 3.978450183542687e-07, "loss": 0.6089, "step": 10343 }, { "epoch": 0.8760533559178488, "grad_norm": 1.4370992450445468, "learning_rate": 3.973090124665302e-07, "loss": 0.6146, "step": 10344 }, { "epoch": 0.8761380478509422, "grad_norm": 1.626183644255328, "learning_rate": 3.9677335295566e-07, "loss": 0.625, "step": 10345 }, { "epoch": 0.8762227397840355, "grad_norm": 1.358133523506339, "learning_rate": 3.9623803986197195e-07, "loss": 0.6322, "step": 10346 }, { "epoch": 0.876307431717129, "grad_norm": 1.4188065883991916, "learning_rate": 3.9570307322574885e-07, "loss": 0.6086, "step": 10347 }, { "epoch": 0.8763921236502223, "grad_norm": 1.2691314831014715, "learning_rate": 3.951684530872507e-07, "loss": 0.6317, "step": 10348 }, { "epoch": 0.8764768155833157, "grad_norm": 0.6384592292053006, "learning_rate": 3.946341794867098e-07, "loss": 0.8704, "step": 10349 }, { "epoch": 0.876561507516409, "grad_norm": 2.0342342865247582, "learning_rate": 3.941002524643334e-07, "loss": 0.588, "step": 10350 }, { "epoch": 0.8766461994495024, "grad_norm": 1.7904380189472497, "learning_rate": 3.9356667206030265e-07, "loss": 0.5721, "step": 10351 }, { "epoch": 0.8767308913825959, "grad_norm": 1.3992557908280698, "learning_rate": 3.930334383147716e-07, "loss": 0.6178, "step": 10352 }, { "epoch": 0.8768155833156892, "grad_norm": 1.9882440978633653, "learning_rate": 3.9250055126786923e-07, "loss": 0.6153, "step": 10353 }, { "epoch": 0.8769002752487826, "grad_norm": 1.8164283123410159, "learning_rate": 3.919680109596996e-07, "loss": 0.6316, "step": 10354 }, { "epoch": 0.8769849671818759, "grad_norm": 1.9046836887905787, "learning_rate": 3.9143581743033677e-07, "loss": 0.667, "step": 10355 }, { "epoch": 0.8770696591149693, "grad_norm": 1.3117288413230812, "learning_rate": 3.909039707198331e-07, "loss": 0.5614, "step": 10356 }, { "epoch": 0.8771543510480627, "grad_norm": 1.2353833830840628, "learning_rate": 3.903724708682111e-07, "loss": 0.6069, "step": 10357 }, { "epoch": 0.8772390429811561, "grad_norm": 1.2963864193067818, "learning_rate": 3.898413179154692e-07, "loss": 0.5674, "step": 10358 }, { "epoch": 0.8773237349142494, "grad_norm": 0.6200945162594694, "learning_rate": 3.893105119015811e-07, "loss": 0.7953, "step": 10359 }, { "epoch": 0.8774084268473428, "grad_norm": 1.709250700460072, "learning_rate": 3.887800528664909e-07, "loss": 0.6122, "step": 10360 }, { "epoch": 0.8774931187804361, "grad_norm": 1.694830738345366, "learning_rate": 3.882499408501206e-07, "loss": 0.6496, "step": 10361 }, { "epoch": 0.8775778107135296, "grad_norm": 1.668075732317488, "learning_rate": 3.877201758923615e-07, "loss": 0.6141, "step": 10362 }, { "epoch": 0.8776625026466229, "grad_norm": 1.2076869161662476, "learning_rate": 3.8719075803308247e-07, "loss": 0.6187, "step": 10363 }, { "epoch": 0.8777471945797163, "grad_norm": 1.649539061121302, "learning_rate": 3.8666168731212595e-07, "loss": 0.6097, "step": 10364 }, { "epoch": 0.8778318865128096, "grad_norm": 1.5985189315351984, "learning_rate": 3.861329637693051e-07, "loss": 0.6456, "step": 10365 }, { "epoch": 0.877916578445903, "grad_norm": 3.1431665629770027, "learning_rate": 3.856045874444092e-07, "loss": 0.6626, "step": 10366 }, { "epoch": 0.8780012703789964, "grad_norm": 1.1631998114099942, "learning_rate": 3.850765583772048e-07, "loss": 0.6236, "step": 10367 }, { "epoch": 0.8780859623120898, "grad_norm": 0.6088791154656888, "learning_rate": 3.845488766074257e-07, "loss": 0.801, "step": 10368 }, { "epoch": 0.8781706542451831, "grad_norm": 1.5449539016480787, "learning_rate": 3.8402154217478393e-07, "loss": 0.5954, "step": 10369 }, { "epoch": 0.8782553461782765, "grad_norm": 1.2686694331931734, "learning_rate": 3.834945551189634e-07, "loss": 0.5948, "step": 10370 }, { "epoch": 0.8783400381113698, "grad_norm": 1.3785944893240136, "learning_rate": 3.829679154796229e-07, "loss": 0.6423, "step": 10371 }, { "epoch": 0.8784247300444633, "grad_norm": 1.3455588517173627, "learning_rate": 3.8244162329639513e-07, "loss": 0.6139, "step": 10372 }, { "epoch": 0.8785094219775567, "grad_norm": 1.2543787883673296, "learning_rate": 3.819156786088868e-07, "loss": 0.6966, "step": 10373 }, { "epoch": 0.87859411391065, "grad_norm": 1.3888138598008597, "learning_rate": 3.813900814566768e-07, "loss": 0.6437, "step": 10374 }, { "epoch": 0.8786788058437434, "grad_norm": 2.723521271372662, "learning_rate": 3.808648318793212e-07, "loss": 0.6217, "step": 10375 }, { "epoch": 0.8787634977768367, "grad_norm": 1.568390928578021, "learning_rate": 3.8033992991634574e-07, "loss": 0.6105, "step": 10376 }, { "epoch": 0.8788481897099302, "grad_norm": 1.829422042096072, "learning_rate": 3.7981537560725367e-07, "loss": 0.64, "step": 10377 }, { "epoch": 0.8789328816430235, "grad_norm": 2.492830003853567, "learning_rate": 3.792911689915185e-07, "loss": 0.5879, "step": 10378 }, { "epoch": 0.8790175735761169, "grad_norm": 0.6100408154748932, "learning_rate": 3.7876731010859093e-07, "loss": 0.8566, "step": 10379 }, { "epoch": 0.8791022655092102, "grad_norm": 3.735381844910497, "learning_rate": 3.782437989978932e-07, "loss": 0.5948, "step": 10380 }, { "epoch": 0.8791869574423036, "grad_norm": 1.4825218263619577, "learning_rate": 3.777206356988239e-07, "loss": 0.6645, "step": 10381 }, { "epoch": 0.879271649375397, "grad_norm": 1.4866638524226674, "learning_rate": 3.7719782025075203e-07, "loss": 0.6356, "step": 10382 }, { "epoch": 0.8793563413084904, "grad_norm": 1.3713443850646518, "learning_rate": 3.7667535269302445e-07, "loss": 0.6153, "step": 10383 }, { "epoch": 0.8794410332415837, "grad_norm": 1.9511142330934923, "learning_rate": 3.7615323306495755e-07, "loss": 0.5891, "step": 10384 }, { "epoch": 0.8795257251746771, "grad_norm": 2.0000646453729845, "learning_rate": 3.756314614058448e-07, "loss": 0.5795, "step": 10385 }, { "epoch": 0.8796104171077704, "grad_norm": 0.6245402896108887, "learning_rate": 3.7511003775494993e-07, "loss": 0.8026, "step": 10386 }, { "epoch": 0.8796951090408639, "grad_norm": 0.6001835445555331, "learning_rate": 3.7458896215151584e-07, "loss": 0.8212, "step": 10387 }, { "epoch": 0.8797798009739572, "grad_norm": 1.579213653018229, "learning_rate": 3.740682346347557e-07, "loss": 0.6178, "step": 10388 }, { "epoch": 0.8798644929070506, "grad_norm": 2.37117332324065, "learning_rate": 3.73547855243856e-07, "loss": 0.6264, "step": 10389 }, { "epoch": 0.879949184840144, "grad_norm": 1.3228471919139992, "learning_rate": 3.73027824017978e-07, "loss": 0.6151, "step": 10390 }, { "epoch": 0.8800338767732373, "grad_norm": 1.6765432417632, "learning_rate": 3.725081409962583e-07, "loss": 0.6219, "step": 10391 }, { "epoch": 0.8801185687063308, "grad_norm": 1.566579358167863, "learning_rate": 3.719888062178034e-07, "loss": 0.6001, "step": 10392 }, { "epoch": 0.8802032606394241, "grad_norm": 1.2909473367516775, "learning_rate": 3.714698197216976e-07, "loss": 0.6263, "step": 10393 }, { "epoch": 0.8802879525725175, "grad_norm": 0.6456390317281988, "learning_rate": 3.709511815469974e-07, "loss": 0.8194, "step": 10394 }, { "epoch": 0.8803726445056108, "grad_norm": 1.6563670023681007, "learning_rate": 3.7043289173273265e-07, "loss": 0.6617, "step": 10395 }, { "epoch": 0.8804573364387042, "grad_norm": 1.4842151540290707, "learning_rate": 3.699149503179078e-07, "loss": 0.5865, "step": 10396 }, { "epoch": 0.8805420283717976, "grad_norm": 1.3128858860089805, "learning_rate": 3.693973573415e-07, "loss": 0.6922, "step": 10397 }, { "epoch": 0.880626720304891, "grad_norm": 1.7582164799877051, "learning_rate": 3.688801128424624e-07, "loss": 0.6434, "step": 10398 }, { "epoch": 0.8807114122379843, "grad_norm": 1.5854534111290233, "learning_rate": 3.6836321685971786e-07, "loss": 0.6659, "step": 10399 }, { "epoch": 0.8807961041710777, "grad_norm": 1.2294758290822376, "learning_rate": 3.6784666943216695e-07, "loss": 0.6546, "step": 10400 }, { "epoch": 0.880880796104171, "grad_norm": 1.4942218882303044, "learning_rate": 3.67330470598683e-07, "loss": 0.6451, "step": 10401 }, { "epoch": 0.8809654880372645, "grad_norm": 1.4926671447713074, "learning_rate": 3.668146203981121e-07, "loss": 0.5983, "step": 10402 }, { "epoch": 0.8810501799703578, "grad_norm": 1.7135589160109943, "learning_rate": 3.6629911886927494e-07, "loss": 0.6307, "step": 10403 }, { "epoch": 0.8811348719034512, "grad_norm": 1.4559183530719642, "learning_rate": 3.657839660509666e-07, "loss": 0.6177, "step": 10404 }, { "epoch": 0.8812195638365445, "grad_norm": 0.7292653837983116, "learning_rate": 3.652691619819526e-07, "loss": 0.8163, "step": 10405 }, { "epoch": 0.8813042557696379, "grad_norm": 0.6857768194086273, "learning_rate": 3.647547067009777e-07, "loss": 0.8881, "step": 10406 }, { "epoch": 0.8813889477027314, "grad_norm": 2.0378404336369833, "learning_rate": 3.6424060024675413e-07, "loss": 0.5766, "step": 10407 }, { "epoch": 0.8814736396358247, "grad_norm": 1.512657587378349, "learning_rate": 3.6372684265797373e-07, "loss": 0.686, "step": 10408 }, { "epoch": 0.881558331568918, "grad_norm": 1.2413163170223214, "learning_rate": 3.6321343397329956e-07, "loss": 0.6385, "step": 10409 }, { "epoch": 0.8816430235020114, "grad_norm": 1.3443456379836827, "learning_rate": 3.6270037423136675e-07, "loss": 0.612, "step": 10410 }, { "epoch": 0.8817277154351048, "grad_norm": 2.007972280902897, "learning_rate": 3.6218766347078603e-07, "loss": 0.6073, "step": 10411 }, { "epoch": 0.8818124073681982, "grad_norm": 1.4883067104498744, "learning_rate": 3.616753017301433e-07, "loss": 0.6883, "step": 10412 }, { "epoch": 0.8818970993012916, "grad_norm": 1.926688990737492, "learning_rate": 3.611632890479944e-07, "loss": 0.5712, "step": 10413 }, { "epoch": 0.8819817912343849, "grad_norm": 1.4621071939307762, "learning_rate": 3.606516254628711e-07, "loss": 0.6301, "step": 10414 }, { "epoch": 0.8820664831674783, "grad_norm": 1.6738446754483898, "learning_rate": 3.6014031101328006e-07, "loss": 0.6453, "step": 10415 }, { "epoch": 0.8821511751005716, "grad_norm": 2.4503599486961587, "learning_rate": 3.596293457376998e-07, "loss": 0.6161, "step": 10416 }, { "epoch": 0.8822358670336651, "grad_norm": 1.3765242532178736, "learning_rate": 3.591187296745841e-07, "loss": 0.6065, "step": 10417 }, { "epoch": 0.8823205589667584, "grad_norm": 1.1198712223242697, "learning_rate": 3.586084628623576e-07, "loss": 0.8348, "step": 10418 }, { "epoch": 0.8824052508998518, "grad_norm": 0.6723607915502882, "learning_rate": 3.580985453394215e-07, "loss": 0.8347, "step": 10419 }, { "epoch": 0.8824899428329451, "grad_norm": 1.7573224765429643, "learning_rate": 3.57588977144151e-07, "loss": 0.6303, "step": 10420 }, { "epoch": 0.8825746347660385, "grad_norm": 1.9464060833561339, "learning_rate": 3.5707975831489163e-07, "loss": 0.6393, "step": 10421 }, { "epoch": 0.8826593266991319, "grad_norm": 1.2707746592972526, "learning_rate": 3.5657088888996604e-07, "loss": 0.5732, "step": 10422 }, { "epoch": 0.8827440186322253, "grad_norm": 1.6257131208876159, "learning_rate": 3.560623689076692e-07, "loss": 0.6813, "step": 10423 }, { "epoch": 0.8828287105653186, "grad_norm": 2.335735055308412, "learning_rate": 3.5555419840626994e-07, "loss": 0.6037, "step": 10424 }, { "epoch": 0.882913402498412, "grad_norm": 1.2824334012231027, "learning_rate": 3.550463774240115e-07, "loss": 0.6642, "step": 10425 }, { "epoch": 0.8829980944315055, "grad_norm": 1.3408199083181516, "learning_rate": 3.5453890599910834e-07, "loss": 0.6335, "step": 10426 }, { "epoch": 0.8830827863645988, "grad_norm": 4.411588246926356, "learning_rate": 3.540317841697516e-07, "loss": 0.6142, "step": 10427 }, { "epoch": 0.8831674782976922, "grad_norm": 1.6375667576158504, "learning_rate": 3.535250119741046e-07, "loss": 0.5882, "step": 10428 }, { "epoch": 0.8832521702307855, "grad_norm": 1.3431543956875878, "learning_rate": 3.530185894503052e-07, "loss": 0.6281, "step": 10429 }, { "epoch": 0.8833368621638789, "grad_norm": 1.5259164436491508, "learning_rate": 3.5251251663646404e-07, "loss": 0.618, "step": 10430 }, { "epoch": 0.8834215540969723, "grad_norm": 1.1747169770748347, "learning_rate": 3.5200679357066505e-07, "loss": 0.6474, "step": 10431 }, { "epoch": 0.8835062460300657, "grad_norm": 1.426475912929418, "learning_rate": 3.515014202909672e-07, "loss": 0.5972, "step": 10432 }, { "epoch": 0.883590937963159, "grad_norm": 3.372403519877619, "learning_rate": 3.50996396835403e-07, "loss": 0.5577, "step": 10433 }, { "epoch": 0.8836756298962524, "grad_norm": 1.2615931159937115, "learning_rate": 3.504917232419769e-07, "loss": 0.6226, "step": 10434 }, { "epoch": 0.8837603218293457, "grad_norm": 1.2828716264327604, "learning_rate": 3.499873995486691e-07, "loss": 0.6584, "step": 10435 }, { "epoch": 0.8838450137624392, "grad_norm": 0.5898336702651589, "learning_rate": 3.494834257934321e-07, "loss": 0.8629, "step": 10436 }, { "epoch": 0.8839297056955325, "grad_norm": 2.2707917808896614, "learning_rate": 3.489798020141932e-07, "loss": 0.6125, "step": 10437 }, { "epoch": 0.8840143976286259, "grad_norm": 1.5752234945563164, "learning_rate": 3.4847652824885337e-07, "loss": 0.6491, "step": 10438 }, { "epoch": 0.8840990895617192, "grad_norm": 7.599824131472081, "learning_rate": 3.4797360453528497e-07, "loss": 0.6399, "step": 10439 }, { "epoch": 0.8841837814948126, "grad_norm": 1.7930569857772078, "learning_rate": 3.4747103091133604e-07, "loss": 0.6417, "step": 10440 }, { "epoch": 0.884268473427906, "grad_norm": 1.3274369609068386, "learning_rate": 3.4696880741482973e-07, "loss": 0.6331, "step": 10441 }, { "epoch": 0.8843531653609994, "grad_norm": 1.7670385410124752, "learning_rate": 3.46466934083558e-07, "loss": 0.6796, "step": 10442 }, { "epoch": 0.8844378572940927, "grad_norm": 1.4120295682003154, "learning_rate": 3.4596541095529233e-07, "loss": 0.6309, "step": 10443 }, { "epoch": 0.8845225492271861, "grad_norm": 0.5802580300033501, "learning_rate": 3.4546423806777306e-07, "loss": 0.7976, "step": 10444 }, { "epoch": 0.8846072411602794, "grad_norm": 1.7123054927898698, "learning_rate": 3.4496341545871724e-07, "loss": 0.587, "step": 10445 }, { "epoch": 0.8846919330933729, "grad_norm": 1.2528782339824265, "learning_rate": 3.444629431658142e-07, "loss": 0.6121, "step": 10446 }, { "epoch": 0.8847766250264663, "grad_norm": 1.4177435226689348, "learning_rate": 3.439628212267265e-07, "loss": 0.6073, "step": 10447 }, { "epoch": 0.8848613169595596, "grad_norm": 1.914309724195572, "learning_rate": 3.434630496790914e-07, "loss": 0.6544, "step": 10448 }, { "epoch": 0.884946008892653, "grad_norm": 0.620100517147437, "learning_rate": 3.429636285605192e-07, "loss": 0.7787, "step": 10449 }, { "epoch": 0.8850307008257463, "grad_norm": 3.0273526685601615, "learning_rate": 3.424645579085939e-07, "loss": 0.5694, "step": 10450 }, { "epoch": 0.8851153927588398, "grad_norm": 1.3384678808170185, "learning_rate": 3.419658377608748e-07, "loss": 0.5806, "step": 10451 }, { "epoch": 0.8852000846919331, "grad_norm": 1.611542093437057, "learning_rate": 3.4146746815489017e-07, "loss": 0.6088, "step": 10452 }, { "epoch": 0.8852847766250265, "grad_norm": 1.7399805497390328, "learning_rate": 3.409694491281473e-07, "loss": 0.578, "step": 10453 }, { "epoch": 0.8853694685581198, "grad_norm": 1.7838801370081023, "learning_rate": 3.4047178071812515e-07, "loss": 0.5937, "step": 10454 }, { "epoch": 0.8854541604912132, "grad_norm": 0.6555342188313318, "learning_rate": 3.3997446296227366e-07, "loss": 0.8175, "step": 10455 }, { "epoch": 0.8855388524243066, "grad_norm": 1.215575858217338, "learning_rate": 3.3947749589802013e-07, "loss": 0.5769, "step": 10456 }, { "epoch": 0.8856235443574, "grad_norm": 1.6455536406483868, "learning_rate": 3.389808795627636e-07, "loss": 0.6236, "step": 10457 }, { "epoch": 0.8857082362904933, "grad_norm": 1.8819737616481385, "learning_rate": 3.384846139938769e-07, "loss": 0.6445, "step": 10458 }, { "epoch": 0.8857929282235867, "grad_norm": 1.1584373956127687, "learning_rate": 3.379886992287079e-07, "loss": 0.5938, "step": 10459 }, { "epoch": 0.88587762015668, "grad_norm": 2.805560990174605, "learning_rate": 3.374931353045746e-07, "loss": 0.6022, "step": 10460 }, { "epoch": 0.8859623120897735, "grad_norm": 0.6133215830324461, "learning_rate": 3.369979222587727e-07, "loss": 0.8461, "step": 10461 }, { "epoch": 0.8860470040228668, "grad_norm": 1.2422009534089011, "learning_rate": 3.365030601285685e-07, "loss": 0.6289, "step": 10462 }, { "epoch": 0.8861316959559602, "grad_norm": 1.4370504833321716, "learning_rate": 3.3600854895120326e-07, "loss": 0.6479, "step": 10463 }, { "epoch": 0.8862163878890535, "grad_norm": 1.2820754416793039, "learning_rate": 3.3551438876389285e-07, "loss": 0.5948, "step": 10464 }, { "epoch": 0.8863010798221469, "grad_norm": 1.6006281387595662, "learning_rate": 3.350205796038236e-07, "loss": 0.6426, "step": 10465 }, { "epoch": 0.8863857717552404, "grad_norm": 1.296222441981502, "learning_rate": 3.3452712150815746e-07, "loss": 0.6171, "step": 10466 }, { "epoch": 0.8864704636883337, "grad_norm": 1.8136134461330509, "learning_rate": 3.340340145140314e-07, "loss": 0.6113, "step": 10467 }, { "epoch": 0.8865551556214271, "grad_norm": 1.2502491682276866, "learning_rate": 3.3354125865855236e-07, "loss": 0.6382, "step": 10468 }, { "epoch": 0.8866398475545204, "grad_norm": 1.3434286158061717, "learning_rate": 3.3304885397880407e-07, "loss": 0.6559, "step": 10469 }, { "epoch": 0.8867245394876138, "grad_norm": 1.5425749647083862, "learning_rate": 3.325568005118418e-07, "loss": 0.6728, "step": 10470 }, { "epoch": 0.8868092314207072, "grad_norm": 1.6698242185131362, "learning_rate": 3.3206509829469546e-07, "loss": 0.6014, "step": 10471 }, { "epoch": 0.8868939233538006, "grad_norm": 1.507147009642243, "learning_rate": 3.315737473643693e-07, "loss": 0.6496, "step": 10472 }, { "epoch": 0.8869786152868939, "grad_norm": 1.47309835654955, "learning_rate": 3.3108274775783824e-07, "loss": 0.6417, "step": 10473 }, { "epoch": 0.8870633072199873, "grad_norm": 1.3746674954278617, "learning_rate": 3.3059209951205375e-07, "loss": 0.6518, "step": 10474 }, { "epoch": 0.8871479991530806, "grad_norm": 1.3181861887312791, "learning_rate": 3.301018026639402e-07, "loss": 0.6547, "step": 10475 }, { "epoch": 0.8872326910861741, "grad_norm": 1.3680026856171856, "learning_rate": 3.296118572503931e-07, "loss": 0.6125, "step": 10476 }, { "epoch": 0.8873173830192674, "grad_norm": 1.2380732353806225, "learning_rate": 3.2912226330828466e-07, "loss": 0.6409, "step": 10477 }, { "epoch": 0.8874020749523608, "grad_norm": 0.7043124056545167, "learning_rate": 3.2863302087446035e-07, "loss": 0.8281, "step": 10478 }, { "epoch": 0.8874867668854541, "grad_norm": 1.1324450501178898, "learning_rate": 3.281441299857363e-07, "loss": 0.5866, "step": 10479 }, { "epoch": 0.8875714588185475, "grad_norm": 1.353781681317667, "learning_rate": 3.276555906789064e-07, "loss": 0.6612, "step": 10480 }, { "epoch": 0.887656150751641, "grad_norm": 1.1232328168922545, "learning_rate": 3.2716740299073345e-07, "loss": 0.5262, "step": 10481 }, { "epoch": 0.8877408426847343, "grad_norm": 1.311533612246343, "learning_rate": 3.2667956695795755e-07, "loss": 0.6332, "step": 10482 }, { "epoch": 0.8878255346178276, "grad_norm": 1.5436710007374186, "learning_rate": 3.261920826172904e-07, "loss": 0.6483, "step": 10483 }, { "epoch": 0.887910226550921, "grad_norm": 1.2053411102280218, "learning_rate": 3.257049500054177e-07, "loss": 0.6172, "step": 10484 }, { "epoch": 0.8879949184840144, "grad_norm": 0.5882036515456446, "learning_rate": 3.252181691589995e-07, "loss": 0.8621, "step": 10485 }, { "epoch": 0.8880796104171078, "grad_norm": 0.5876542576409262, "learning_rate": 3.247317401146688e-07, "loss": 0.868, "step": 10486 }, { "epoch": 0.8881643023502012, "grad_norm": 1.3298941582631398, "learning_rate": 3.242456629090307e-07, "loss": 0.588, "step": 10487 }, { "epoch": 0.8882489942832945, "grad_norm": 1.2080680167142757, "learning_rate": 3.237599375786665e-07, "loss": 0.5925, "step": 10488 }, { "epoch": 0.8883336862163879, "grad_norm": 0.5976770949122743, "learning_rate": 3.2327456416012813e-07, "loss": 0.8444, "step": 10489 }, { "epoch": 0.8884183781494812, "grad_norm": 1.4938099720207494, "learning_rate": 3.2278954268994357e-07, "loss": 0.6262, "step": 10490 }, { "epoch": 0.8885030700825747, "grad_norm": 1.4829221131104062, "learning_rate": 3.2230487320461247e-07, "loss": 0.6053, "step": 10491 }, { "epoch": 0.888587762015668, "grad_norm": 0.6099788663828378, "learning_rate": 3.2182055574060956e-07, "loss": 0.8447, "step": 10492 }, { "epoch": 0.8886724539487614, "grad_norm": 1.636086931094383, "learning_rate": 3.2133659033438183e-07, "loss": 0.6732, "step": 10493 }, { "epoch": 0.8887571458818547, "grad_norm": 1.275661362411808, "learning_rate": 3.208529770223506e-07, "loss": 0.6333, "step": 10494 }, { "epoch": 0.8888418378149481, "grad_norm": 0.5898208468899948, "learning_rate": 3.2036971584091025e-07, "loss": 0.8176, "step": 10495 }, { "epoch": 0.8889265297480415, "grad_norm": 1.2682942673307969, "learning_rate": 3.198868068264288e-07, "loss": 0.651, "step": 10496 }, { "epoch": 0.8890112216811349, "grad_norm": 0.6083277182551218, "learning_rate": 3.1940425001524667e-07, "loss": 0.8801, "step": 10497 }, { "epoch": 0.8890959136142282, "grad_norm": 1.210804879324962, "learning_rate": 3.189220454436792e-07, "loss": 0.5827, "step": 10498 }, { "epoch": 0.8891806055473216, "grad_norm": 1.6677983463253896, "learning_rate": 3.184401931480169e-07, "loss": 0.6538, "step": 10499 }, { "epoch": 0.8892652974804149, "grad_norm": 1.5048294661516373, "learning_rate": 3.17958693164519e-07, "loss": 0.6208, "step": 10500 }, { "epoch": 0.8893499894135084, "grad_norm": 1.5253412119920757, "learning_rate": 3.174775455294232e-07, "loss": 0.5834, "step": 10501 }, { "epoch": 0.8894346813466018, "grad_norm": 0.6085719323358255, "learning_rate": 3.1699675027893616e-07, "loss": 0.8802, "step": 10502 }, { "epoch": 0.8895193732796951, "grad_norm": 1.5151821462397848, "learning_rate": 3.165163074492411e-07, "loss": 0.6552, "step": 10503 }, { "epoch": 0.8896040652127885, "grad_norm": 1.175485726690046, "learning_rate": 3.160362170764947e-07, "loss": 0.6341, "step": 10504 }, { "epoch": 0.8896887571458818, "grad_norm": 0.6053083767040477, "learning_rate": 3.155564791968252e-07, "loss": 0.7896, "step": 10505 }, { "epoch": 0.8897734490789753, "grad_norm": 1.3147206484132894, "learning_rate": 3.1507709384633656e-07, "loss": 0.6163, "step": 10506 }, { "epoch": 0.8898581410120686, "grad_norm": 1.9602047806214105, "learning_rate": 3.1459806106110435e-07, "loss": 0.6647, "step": 10507 }, { "epoch": 0.889942832945162, "grad_norm": 0.6183506988293359, "learning_rate": 3.141193808771786e-07, "loss": 0.8321, "step": 10508 }, { "epoch": 0.8900275248782553, "grad_norm": 1.2716021909687893, "learning_rate": 3.1364105333058224e-07, "loss": 0.6163, "step": 10509 }, { "epoch": 0.8901122168113487, "grad_norm": 1.2347578570384192, "learning_rate": 3.1316307845731195e-07, "loss": 0.6731, "step": 10510 }, { "epoch": 0.8901969087444421, "grad_norm": 1.2713592988013942, "learning_rate": 3.126854562933379e-07, "loss": 0.64, "step": 10511 }, { "epoch": 0.8902816006775355, "grad_norm": 1.4096676094188008, "learning_rate": 3.1220818687460355e-07, "loss": 0.6607, "step": 10512 }, { "epoch": 0.8903662926106288, "grad_norm": 1.288169835750137, "learning_rate": 3.117312702370262e-07, "loss": 0.5953, "step": 10513 }, { "epoch": 0.8904509845437222, "grad_norm": 1.2215742729954095, "learning_rate": 3.112547064164967e-07, "loss": 0.6737, "step": 10514 }, { "epoch": 0.8905356764768155, "grad_norm": 1.2549577710918396, "learning_rate": 3.1077849544887905e-07, "loss": 0.5896, "step": 10515 }, { "epoch": 0.890620368409909, "grad_norm": 2.7717218792622615, "learning_rate": 3.103026373700091e-07, "loss": 0.6527, "step": 10516 }, { "epoch": 0.8907050603430023, "grad_norm": 1.3270278084117855, "learning_rate": 3.0982713221570037e-07, "loss": 0.5951, "step": 10517 }, { "epoch": 0.8907897522760957, "grad_norm": 1.2299334320010789, "learning_rate": 3.0935198002173315e-07, "loss": 0.6462, "step": 10518 }, { "epoch": 0.890874444209189, "grad_norm": 2.1569654330844528, "learning_rate": 3.0887718082386886e-07, "loss": 0.6805, "step": 10519 }, { "epoch": 0.8909591361422824, "grad_norm": 1.1710065318534362, "learning_rate": 3.0840273465783834e-07, "loss": 0.5853, "step": 10520 }, { "epoch": 0.8910438280753759, "grad_norm": 1.271427882395717, "learning_rate": 3.079286415593441e-07, "loss": 0.6183, "step": 10521 }, { "epoch": 0.8911285200084692, "grad_norm": 2.491436706524413, "learning_rate": 3.0745490156406545e-07, "loss": 0.6363, "step": 10522 }, { "epoch": 0.8912132119415626, "grad_norm": 1.4024123845423908, "learning_rate": 3.069815147076549e-07, "loss": 0.671, "step": 10523 }, { "epoch": 0.8912979038746559, "grad_norm": 1.8394935705021886, "learning_rate": 3.065084810257346e-07, "loss": 0.6491, "step": 10524 }, { "epoch": 0.8913825958077494, "grad_norm": 1.1741953684563047, "learning_rate": 3.0603580055390435e-07, "loss": 0.6204, "step": 10525 }, { "epoch": 0.8914672877408427, "grad_norm": 1.3633078279099526, "learning_rate": 3.055634733277363e-07, "loss": 0.6652, "step": 10526 }, { "epoch": 0.8915519796739361, "grad_norm": 1.3043227809851508, "learning_rate": 3.050914993827747e-07, "loss": 0.6109, "step": 10527 }, { "epoch": 0.8916366716070294, "grad_norm": 1.44978329140473, "learning_rate": 3.0461987875453956e-07, "loss": 0.654, "step": 10528 }, { "epoch": 0.8917213635401228, "grad_norm": 1.5379111478466942, "learning_rate": 3.041486114785208e-07, "loss": 0.6909, "step": 10529 }, { "epoch": 0.8918060554732162, "grad_norm": 2.680500819407152, "learning_rate": 3.036776975901845e-07, "loss": 0.5984, "step": 10530 }, { "epoch": 0.8918907474063096, "grad_norm": 0.6073408767694927, "learning_rate": 3.032071371249706e-07, "loss": 0.8693, "step": 10531 }, { "epoch": 0.8919754393394029, "grad_norm": 1.7794800009782268, "learning_rate": 3.0273693011828974e-07, "loss": 0.6256, "step": 10532 }, { "epoch": 0.8920601312724963, "grad_norm": 1.685740402925563, "learning_rate": 3.02267076605528e-07, "loss": 0.6542, "step": 10533 }, { "epoch": 0.8921448232055896, "grad_norm": 1.375017886092462, "learning_rate": 3.0179757662204433e-07, "loss": 0.5826, "step": 10534 }, { "epoch": 0.8922295151386831, "grad_norm": 1.2711877358504773, "learning_rate": 3.013284302031716e-07, "loss": 0.6737, "step": 10535 }, { "epoch": 0.8923142070717764, "grad_norm": 1.5909194190850016, "learning_rate": 3.0085963738421543e-07, "loss": 0.6049, "step": 10536 }, { "epoch": 0.8923988990048698, "grad_norm": 0.5998330469752613, "learning_rate": 3.003911982004543e-07, "loss": 0.8558, "step": 10537 }, { "epoch": 0.8924835909379631, "grad_norm": 1.5355638066810366, "learning_rate": 2.9992311268714157e-07, "loss": 0.6345, "step": 10538 }, { "epoch": 0.8925682828710565, "grad_norm": 2.025979235891389, "learning_rate": 2.9945538087950086e-07, "loss": 0.6115, "step": 10539 }, { "epoch": 0.89265297480415, "grad_norm": 2.5232950193234953, "learning_rate": 2.9898800281273453e-07, "loss": 0.6122, "step": 10540 }, { "epoch": 0.8927376667372433, "grad_norm": 2.0133726218052304, "learning_rate": 2.98520978522015e-07, "loss": 0.6523, "step": 10541 }, { "epoch": 0.8928223586703367, "grad_norm": 1.223316729094474, "learning_rate": 2.980543080424858e-07, "loss": 0.6547, "step": 10542 }, { "epoch": 0.89290705060343, "grad_norm": 1.5853689651591076, "learning_rate": 2.975879914092689e-07, "loss": 0.6081, "step": 10543 }, { "epoch": 0.8929917425365234, "grad_norm": 0.6122389795261738, "learning_rate": 2.971220286574561e-07, "loss": 0.8481, "step": 10544 }, { "epoch": 0.8930764344696168, "grad_norm": 1.6159917783446947, "learning_rate": 2.966564198221128e-07, "loss": 0.6353, "step": 10545 }, { "epoch": 0.8931611264027102, "grad_norm": 1.6984451199406643, "learning_rate": 2.9619116493827983e-07, "loss": 0.6345, "step": 10546 }, { "epoch": 0.8932458183358035, "grad_norm": 1.4115869324323416, "learning_rate": 2.9572626404096915e-07, "loss": 0.6284, "step": 10547 }, { "epoch": 0.8933305102688969, "grad_norm": 1.4271087234636393, "learning_rate": 2.952617171651678e-07, "loss": 0.6288, "step": 10548 }, { "epoch": 0.8934152022019902, "grad_norm": 1.5743153965610066, "learning_rate": 2.9479752434583507e-07, "loss": 0.6374, "step": 10549 }, { "epoch": 0.8934998941350837, "grad_norm": 1.8414684174629519, "learning_rate": 2.9433368561790354e-07, "loss": 0.6449, "step": 10550 }, { "epoch": 0.893584586068177, "grad_norm": 0.6191436798010189, "learning_rate": 2.938702010162797e-07, "loss": 0.8294, "step": 10551 }, { "epoch": 0.8936692780012704, "grad_norm": 1.8057104006282474, "learning_rate": 2.934070705758446e-07, "loss": 0.5826, "step": 10552 }, { "epoch": 0.8937539699343637, "grad_norm": 1.1525286328492885, "learning_rate": 2.9294429433144864e-07, "loss": 0.6249, "step": 10553 }, { "epoch": 0.8938386618674571, "grad_norm": 1.169130129129566, "learning_rate": 2.9248187231792016e-07, "loss": 0.6576, "step": 10554 }, { "epoch": 0.8939233538005505, "grad_norm": 1.6627600267869374, "learning_rate": 2.9201980457005785e-07, "loss": 0.6244, "step": 10555 }, { "epoch": 0.8940080457336439, "grad_norm": 1.4067936947116626, "learning_rate": 2.9155809112263513e-07, "loss": 0.6586, "step": 10556 }, { "epoch": 0.8940927376667372, "grad_norm": 1.282908060275024, "learning_rate": 2.9109673201039967e-07, "loss": 0.6343, "step": 10557 }, { "epoch": 0.8941774295998306, "grad_norm": 1.7392202655526825, "learning_rate": 2.9063572726806875e-07, "loss": 0.645, "step": 10558 }, { "epoch": 0.894262121532924, "grad_norm": 1.824346303289469, "learning_rate": 2.9017507693033684e-07, "loss": 0.614, "step": 10559 }, { "epoch": 0.8943468134660174, "grad_norm": 1.1857344667475787, "learning_rate": 2.8971478103187014e-07, "loss": 0.5619, "step": 10560 }, { "epoch": 0.8944315053991108, "grad_norm": 1.2608796638946262, "learning_rate": 2.8925483960730807e-07, "loss": 0.5574, "step": 10561 }, { "epoch": 0.8945161973322041, "grad_norm": 1.6147580800337584, "learning_rate": 2.887952526912646e-07, "loss": 0.6302, "step": 10562 }, { "epoch": 0.8946008892652975, "grad_norm": 1.2839185033582956, "learning_rate": 2.8833602031832495e-07, "loss": 0.61, "step": 10563 }, { "epoch": 0.8946855811983908, "grad_norm": 1.2810751933599602, "learning_rate": 2.878771425230492e-07, "loss": 0.6389, "step": 10564 }, { "epoch": 0.8947702731314843, "grad_norm": 1.1927976983508473, "learning_rate": 2.8741861933997084e-07, "loss": 0.6009, "step": 10565 }, { "epoch": 0.8948549650645776, "grad_norm": 1.6227130032888333, "learning_rate": 2.8696045080359505e-07, "loss": 0.6426, "step": 10566 }, { "epoch": 0.894939656997671, "grad_norm": 1.5525994545027437, "learning_rate": 2.8650263694840194e-07, "loss": 0.627, "step": 10567 }, { "epoch": 0.8950243489307643, "grad_norm": 1.2585424182230542, "learning_rate": 2.8604517780884465e-07, "loss": 0.6309, "step": 10568 }, { "epoch": 0.8951090408638577, "grad_norm": 1.8960387856948313, "learning_rate": 2.8558807341934944e-07, "loss": 0.6438, "step": 10569 }, { "epoch": 0.8951937327969511, "grad_norm": 1.422206308912556, "learning_rate": 2.851313238143161e-07, "loss": 0.5712, "step": 10570 }, { "epoch": 0.8952784247300445, "grad_norm": 1.3995370742588396, "learning_rate": 2.84674929028117e-07, "loss": 0.6388, "step": 10571 }, { "epoch": 0.8953631166631378, "grad_norm": 1.7899990990255934, "learning_rate": 2.8421888909509753e-07, "loss": 0.6256, "step": 10572 }, { "epoch": 0.8954478085962312, "grad_norm": 1.329401982522098, "learning_rate": 2.8376320404957914e-07, "loss": 0.5688, "step": 10573 }, { "epoch": 0.8955325005293245, "grad_norm": 1.3644083240319345, "learning_rate": 2.8330787392585156e-07, "loss": 0.574, "step": 10574 }, { "epoch": 0.895617192462418, "grad_norm": 1.628762302496199, "learning_rate": 2.828528987581841e-07, "loss": 0.644, "step": 10575 }, { "epoch": 0.8957018843955113, "grad_norm": 1.6470245095220235, "learning_rate": 2.823982785808138e-07, "loss": 0.6246, "step": 10576 }, { "epoch": 0.8957865763286047, "grad_norm": 1.8106658062550511, "learning_rate": 2.8194401342795386e-07, "loss": 0.6661, "step": 10577 }, { "epoch": 0.895871268261698, "grad_norm": 1.1206938757025278, "learning_rate": 2.8149010333379077e-07, "loss": 0.5746, "step": 10578 }, { "epoch": 0.8959559601947914, "grad_norm": 1.8055112071233994, "learning_rate": 2.8103654833248283e-07, "loss": 0.662, "step": 10579 }, { "epoch": 0.8960406521278849, "grad_norm": 1.4375520227680416, "learning_rate": 2.8058334845816214e-07, "loss": 0.5652, "step": 10580 }, { "epoch": 0.8961253440609782, "grad_norm": 1.3671925691846978, "learning_rate": 2.8013050374493533e-07, "loss": 0.6516, "step": 10581 }, { "epoch": 0.8962100359940716, "grad_norm": 1.8568065317465332, "learning_rate": 2.7967801422688124e-07, "loss": 0.612, "step": 10582 }, { "epoch": 0.8962947279271649, "grad_norm": 1.4889765374446182, "learning_rate": 2.7922587993805206e-07, "loss": 0.6, "step": 10583 }, { "epoch": 0.8963794198602583, "grad_norm": 1.167728643548217, "learning_rate": 2.787741009124728e-07, "loss": 0.5839, "step": 10584 }, { "epoch": 0.8964641117933517, "grad_norm": 1.4141620503562096, "learning_rate": 2.783226771841424e-07, "loss": 0.6552, "step": 10585 }, { "epoch": 0.8965488037264451, "grad_norm": 1.7013326343813717, "learning_rate": 2.778716087870337e-07, "loss": 0.6415, "step": 10586 }, { "epoch": 0.8966334956595384, "grad_norm": 1.822739874698979, "learning_rate": 2.7742089575509056e-07, "loss": 0.6153, "step": 10587 }, { "epoch": 0.8967181875926318, "grad_norm": 1.3511455321828933, "learning_rate": 2.7697053812223206e-07, "loss": 0.607, "step": 10588 }, { "epoch": 0.8968028795257251, "grad_norm": 1.4157994431947047, "learning_rate": 2.765205359223505e-07, "loss": 0.5976, "step": 10589 }, { "epoch": 0.8968875714588186, "grad_norm": 1.4343349874205262, "learning_rate": 2.7607088918931044e-07, "loss": 0.6325, "step": 10590 }, { "epoch": 0.8969722633919119, "grad_norm": 1.2609237944587923, "learning_rate": 2.75621597956951e-07, "loss": 0.6104, "step": 10591 }, { "epoch": 0.8970569553250053, "grad_norm": 1.4154373618087366, "learning_rate": 2.751726622590828e-07, "loss": 0.6321, "step": 10592 }, { "epoch": 0.8971416472580986, "grad_norm": 1.8906709615265562, "learning_rate": 2.7472408212949053e-07, "loss": 0.6942, "step": 10593 }, { "epoch": 0.897226339191192, "grad_norm": 1.7787068085524635, "learning_rate": 2.7427585760193274e-07, "loss": 0.6869, "step": 10594 }, { "epoch": 0.8973110311242855, "grad_norm": 1.1000548230302836, "learning_rate": 2.7382798871014026e-07, "loss": 0.6129, "step": 10595 }, { "epoch": 0.8973957230573788, "grad_norm": 2.0720527486996305, "learning_rate": 2.733804754878183e-07, "loss": 0.6136, "step": 10596 }, { "epoch": 0.8974804149904722, "grad_norm": 1.164412556529549, "learning_rate": 2.7293331796864497e-07, "loss": 0.6056, "step": 10597 }, { "epoch": 0.8975651069235655, "grad_norm": 1.355863339100404, "learning_rate": 2.724865161862694e-07, "loss": 0.6049, "step": 10598 }, { "epoch": 0.8976497988566589, "grad_norm": 1.8951974575285475, "learning_rate": 2.7204007017431756e-07, "loss": 0.6786, "step": 10599 }, { "epoch": 0.8977344907897523, "grad_norm": 1.4061024868532568, "learning_rate": 2.715939799663858e-07, "loss": 0.6207, "step": 10600 }, { "epoch": 0.8978191827228457, "grad_norm": 1.7496471510104916, "learning_rate": 2.7114824559604515e-07, "loss": 0.681, "step": 10601 }, { "epoch": 0.897903874655939, "grad_norm": 1.785385588308826, "learning_rate": 2.7070286709683924e-07, "loss": 0.6563, "step": 10602 }, { "epoch": 0.8979885665890324, "grad_norm": 1.5409055246730838, "learning_rate": 2.702578445022852e-07, "loss": 0.6467, "step": 10603 }, { "epoch": 0.8980732585221257, "grad_norm": 1.5189536708473765, "learning_rate": 2.6981317784587457e-07, "loss": 0.6904, "step": 10604 }, { "epoch": 0.8981579504552192, "grad_norm": 1.502445389063115, "learning_rate": 2.6936886716106893e-07, "loss": 0.5868, "step": 10605 }, { "epoch": 0.8982426423883125, "grad_norm": 1.5557847863829777, "learning_rate": 2.689249124813065e-07, "loss": 0.6218, "step": 10606 }, { "epoch": 0.8983273343214059, "grad_norm": 1.3563005968898028, "learning_rate": 2.684813138399967e-07, "loss": 0.6704, "step": 10607 }, { "epoch": 0.8984120262544992, "grad_norm": 1.1120706538336222, "learning_rate": 2.6803807127052215e-07, "loss": 0.5465, "step": 10608 }, { "epoch": 0.8984967181875926, "grad_norm": 2.5598415247577604, "learning_rate": 2.6759518480623856e-07, "loss": 0.6061, "step": 10609 }, { "epoch": 0.898581410120686, "grad_norm": 1.678888732935597, "learning_rate": 2.6715265448047864e-07, "loss": 0.6639, "step": 10610 }, { "epoch": 0.8986661020537794, "grad_norm": 1.8025892536879524, "learning_rate": 2.6671048032654187e-07, "loss": 0.6301, "step": 10611 }, { "epoch": 0.8987507939868727, "grad_norm": 1.5973108454758713, "learning_rate": 2.662686623777061e-07, "loss": 0.5962, "step": 10612 }, { "epoch": 0.8988354859199661, "grad_norm": 8.632418127454951, "learning_rate": 2.6582720066721966e-07, "loss": 0.5771, "step": 10613 }, { "epoch": 0.8989201778530594, "grad_norm": 1.2604516652613016, "learning_rate": 2.653860952283044e-07, "loss": 0.6212, "step": 10614 }, { "epoch": 0.8990048697861529, "grad_norm": 1.3472764314483352, "learning_rate": 2.64945346094157e-07, "loss": 0.6945, "step": 10615 }, { "epoch": 0.8990895617192463, "grad_norm": 2.0017300250202568, "learning_rate": 2.645049532979449e-07, "loss": 0.6537, "step": 10616 }, { "epoch": 0.8991742536523396, "grad_norm": 1.2315204751256081, "learning_rate": 2.64064916872811e-07, "loss": 0.5447, "step": 10617 }, { "epoch": 0.899258945585433, "grad_norm": 1.42916654108193, "learning_rate": 2.63625236851871e-07, "loss": 0.5977, "step": 10618 }, { "epoch": 0.8993436375185263, "grad_norm": 4.499800106526928, "learning_rate": 2.631859132682113e-07, "loss": 0.6268, "step": 10619 }, { "epoch": 0.8994283294516198, "grad_norm": 1.2168036817321337, "learning_rate": 2.6274694615489536e-07, "loss": 0.5987, "step": 10620 }, { "epoch": 0.8995130213847131, "grad_norm": 1.1294162452736232, "learning_rate": 2.623083355449557e-07, "loss": 0.6003, "step": 10621 }, { "epoch": 0.8995977133178065, "grad_norm": 1.2535765294421828, "learning_rate": 2.618700814714009e-07, "loss": 0.6307, "step": 10622 }, { "epoch": 0.8996824052508998, "grad_norm": 1.218463130443271, "learning_rate": 2.614321839672118e-07, "loss": 0.6323, "step": 10623 }, { "epoch": 0.8997670971839932, "grad_norm": 1.2783577732654483, "learning_rate": 2.6099464306534316e-07, "loss": 0.6138, "step": 10624 }, { "epoch": 0.8998517891170866, "grad_norm": 0.639249610505894, "learning_rate": 2.60557458798722e-07, "loss": 0.8885, "step": 10625 }, { "epoch": 0.89993648105018, "grad_norm": 1.7511677715599245, "learning_rate": 2.601206312002491e-07, "loss": 0.6673, "step": 10626 }, { "epoch": 0.9000211729832733, "grad_norm": 1.3489716673896963, "learning_rate": 2.5968416030279666e-07, "loss": 0.5992, "step": 10627 }, { "epoch": 0.9001058649163667, "grad_norm": 1.5680885116905474, "learning_rate": 2.592480461392133e-07, "loss": 0.6465, "step": 10628 }, { "epoch": 0.9001905568494601, "grad_norm": 1.9588068826081082, "learning_rate": 2.5881228874231724e-07, "loss": 0.6747, "step": 10629 }, { "epoch": 0.9002752487825535, "grad_norm": 2.194897754423742, "learning_rate": 2.5837688814490113e-07, "loss": 0.6219, "step": 10630 }, { "epoch": 0.9003599407156468, "grad_norm": 2.9782581444650353, "learning_rate": 2.5794184437973436e-07, "loss": 0.6448, "step": 10631 }, { "epoch": 0.9004446326487402, "grad_norm": 1.5349327581546366, "learning_rate": 2.575071574795529e-07, "loss": 0.6127, "step": 10632 }, { "epoch": 0.9005293245818335, "grad_norm": 0.5996546452029775, "learning_rate": 2.570728274770706e-07, "loss": 0.7776, "step": 10633 }, { "epoch": 0.900614016514927, "grad_norm": 1.5759624254200195, "learning_rate": 2.5663885440497415e-07, "loss": 0.6494, "step": 10634 }, { "epoch": 0.9006987084480204, "grad_norm": 1.4009301303014243, "learning_rate": 2.5620523829592015e-07, "loss": 0.6241, "step": 10635 }, { "epoch": 0.9007834003811137, "grad_norm": 1.6263096427554682, "learning_rate": 2.5577197918254137e-07, "loss": 0.5723, "step": 10636 }, { "epoch": 0.9008680923142071, "grad_norm": 1.9345696531780396, "learning_rate": 2.553390770974434e-07, "loss": 0.6377, "step": 10637 }, { "epoch": 0.9009527842473004, "grad_norm": 0.5689462844270251, "learning_rate": 2.5490653207320415e-07, "loss": 0.8755, "step": 10638 }, { "epoch": 0.9010374761803939, "grad_norm": 1.261339444523782, "learning_rate": 2.5447434414237524e-07, "loss": 0.6861, "step": 10639 }, { "epoch": 0.9011221681134872, "grad_norm": 1.276953016538322, "learning_rate": 2.540425133374802e-07, "loss": 0.6058, "step": 10640 }, { "epoch": 0.9012068600465806, "grad_norm": 1.6522397508582396, "learning_rate": 2.5361103969101744e-07, "loss": 0.6555, "step": 10641 }, { "epoch": 0.9012915519796739, "grad_norm": 2.254690447373149, "learning_rate": 2.531799232354565e-07, "loss": 0.6598, "step": 10642 }, { "epoch": 0.9013762439127673, "grad_norm": 1.4276610268383998, "learning_rate": 2.5274916400324257e-07, "loss": 0.6063, "step": 10643 }, { "epoch": 0.9014609358458607, "grad_norm": 1.603318961376529, "learning_rate": 2.523187620267914e-07, "loss": 0.5871, "step": 10644 }, { "epoch": 0.9015456277789541, "grad_norm": 0.6563049623943128, "learning_rate": 2.5188871733849376e-07, "loss": 0.8433, "step": 10645 }, { "epoch": 0.9016303197120474, "grad_norm": 1.2966192334094198, "learning_rate": 2.514590299707126e-07, "loss": 0.5751, "step": 10646 }, { "epoch": 0.9017150116451408, "grad_norm": 1.6556011926016612, "learning_rate": 2.510296999557843e-07, "loss": 0.6049, "step": 10647 }, { "epoch": 0.9017997035782341, "grad_norm": 1.3575344799214886, "learning_rate": 2.5060072732601803e-07, "loss": 0.6266, "step": 10648 }, { "epoch": 0.9018843955113276, "grad_norm": 1.7668806822920582, "learning_rate": 2.5017211211369687e-07, "loss": 0.6458, "step": 10649 }, { "epoch": 0.901969087444421, "grad_norm": 0.6355035402333129, "learning_rate": 2.497438543510744e-07, "loss": 0.8199, "step": 10650 }, { "epoch": 0.9020537793775143, "grad_norm": 1.2732374903406296, "learning_rate": 2.49315954070381e-07, "loss": 0.5817, "step": 10651 }, { "epoch": 0.9021384713106076, "grad_norm": 1.3441626200633499, "learning_rate": 2.4888841130381924e-07, "loss": 0.6033, "step": 10652 }, { "epoch": 0.902223163243701, "grad_norm": 1.2341779199823717, "learning_rate": 2.484612260835623e-07, "loss": 0.6269, "step": 10653 }, { "epoch": 0.9023078551767945, "grad_norm": 1.4883016880293403, "learning_rate": 2.480343984417582e-07, "loss": 0.6218, "step": 10654 }, { "epoch": 0.9023925471098878, "grad_norm": 1.2050214183715913, "learning_rate": 2.4760792841052927e-07, "loss": 0.6116, "step": 10655 }, { "epoch": 0.9024772390429812, "grad_norm": 1.3175586677079563, "learning_rate": 2.4718181602196853e-07, "loss": 0.6908, "step": 10656 }, { "epoch": 0.9025619309760745, "grad_norm": 1.3779775643103789, "learning_rate": 2.467560613081432e-07, "loss": 0.6182, "step": 10657 }, { "epoch": 0.9026466229091679, "grad_norm": 1.2946256338260995, "learning_rate": 2.463306643010938e-07, "loss": 0.5947, "step": 10658 }, { "epoch": 0.9027313148422613, "grad_norm": 2.3617561701177925, "learning_rate": 2.45905625032834e-07, "loss": 0.6031, "step": 10659 }, { "epoch": 0.9028160067753547, "grad_norm": 1.5075316531813154, "learning_rate": 2.454809435353506e-07, "loss": 0.6877, "step": 10660 }, { "epoch": 0.902900698708448, "grad_norm": 1.3548321080284194, "learning_rate": 2.450566198406018e-07, "loss": 0.6647, "step": 10661 }, { "epoch": 0.9029853906415414, "grad_norm": 1.4936245587688726, "learning_rate": 2.446326539805216e-07, "loss": 0.6358, "step": 10662 }, { "epoch": 0.9030700825746347, "grad_norm": 1.5681945020947319, "learning_rate": 2.44209045987015e-07, "loss": 0.628, "step": 10663 }, { "epoch": 0.9031547745077282, "grad_norm": 6.341018770453419, "learning_rate": 2.437857958919604e-07, "loss": 0.6453, "step": 10664 }, { "epoch": 0.9032394664408215, "grad_norm": 1.6946505391998952, "learning_rate": 2.4336290372721005e-07, "loss": 0.613, "step": 10665 }, { "epoch": 0.9033241583739149, "grad_norm": 2.1463251858392622, "learning_rate": 2.4294036952458857e-07, "loss": 0.605, "step": 10666 }, { "epoch": 0.9034088503070082, "grad_norm": 1.2206510946033313, "learning_rate": 2.425181933158943e-07, "loss": 0.6037, "step": 10667 }, { "epoch": 0.9034935422401016, "grad_norm": 1.4765017513087058, "learning_rate": 2.4209637513289863e-07, "loss": 0.676, "step": 10668 }, { "epoch": 0.903578234173195, "grad_norm": 0.617070600955379, "learning_rate": 2.416749150073444e-07, "loss": 0.8462, "step": 10669 }, { "epoch": 0.9036629261062884, "grad_norm": 1.6601486205002836, "learning_rate": 2.412538129709496e-07, "loss": 0.6, "step": 10670 }, { "epoch": 0.9037476180393818, "grad_norm": 1.3087647983578443, "learning_rate": 2.408330690554034e-07, "loss": 0.6284, "step": 10671 }, { "epoch": 0.9038323099724751, "grad_norm": 1.403911575577634, "learning_rate": 2.404126832923703e-07, "loss": 0.6245, "step": 10672 }, { "epoch": 0.9039170019055685, "grad_norm": 0.5744530774077603, "learning_rate": 2.399926557134863e-07, "loss": 0.8534, "step": 10673 }, { "epoch": 0.9040016938386619, "grad_norm": 1.4008991665408024, "learning_rate": 2.395729863503599e-07, "loss": 0.5501, "step": 10674 }, { "epoch": 0.9040863857717553, "grad_norm": 1.756009260845845, "learning_rate": 2.391536752345741e-07, "loss": 0.5893, "step": 10675 }, { "epoch": 0.9041710777048486, "grad_norm": 1.5259881825656854, "learning_rate": 2.3873472239768493e-07, "loss": 0.6712, "step": 10676 }, { "epoch": 0.904255769637942, "grad_norm": 1.3945090656937305, "learning_rate": 2.3831612787121871e-07, "loss": 0.5865, "step": 10677 }, { "epoch": 0.9043404615710353, "grad_norm": 1.8202608314768247, "learning_rate": 2.3789789168667866e-07, "loss": 0.6721, "step": 10678 }, { "epoch": 0.9044251535041288, "grad_norm": 1.38002560701525, "learning_rate": 2.3748001387553844e-07, "loss": 0.635, "step": 10679 }, { "epoch": 0.9045098454372221, "grad_norm": 1.5195520970507583, "learning_rate": 2.3706249446924622e-07, "loss": 0.6579, "step": 10680 }, { "epoch": 0.9045945373703155, "grad_norm": 1.6455215824772145, "learning_rate": 2.3664533349922304e-07, "loss": 0.6346, "step": 10681 }, { "epoch": 0.9046792293034088, "grad_norm": 1.7933633067975494, "learning_rate": 2.3622853099686093e-07, "loss": 0.5979, "step": 10682 }, { "epoch": 0.9047639212365022, "grad_norm": 1.320530358786839, "learning_rate": 2.358120869935271e-07, "loss": 0.6716, "step": 10683 }, { "epoch": 0.9048486131695956, "grad_norm": 2.260560425567856, "learning_rate": 2.3539600152056197e-07, "loss": 0.6872, "step": 10684 }, { "epoch": 0.904933305102689, "grad_norm": 0.6173129978824028, "learning_rate": 2.3498027460927664e-07, "loss": 0.7862, "step": 10685 }, { "epoch": 0.9050179970357823, "grad_norm": 1.3236013184938737, "learning_rate": 2.3456490629095774e-07, "loss": 0.6261, "step": 10686 }, { "epoch": 0.9051026889688757, "grad_norm": 2.168120549577211, "learning_rate": 2.3414989659686416e-07, "loss": 0.6285, "step": 10687 }, { "epoch": 0.905187380901969, "grad_norm": 0.6021264763723236, "learning_rate": 2.3373524555822646e-07, "loss": 0.8194, "step": 10688 }, { "epoch": 0.9052720728350625, "grad_norm": 1.2254194881817562, "learning_rate": 2.3332095320625137e-07, "loss": 0.6385, "step": 10689 }, { "epoch": 0.9053567647681559, "grad_norm": 1.2728682323923513, "learning_rate": 2.3290701957211448e-07, "loss": 0.6693, "step": 10690 }, { "epoch": 0.9054414567012492, "grad_norm": 1.931019381868394, "learning_rate": 2.3249344468696755e-07, "loss": 0.5851, "step": 10691 }, { "epoch": 0.9055261486343426, "grad_norm": 1.3272358264738495, "learning_rate": 2.3208022858193403e-07, "loss": 0.6082, "step": 10692 }, { "epoch": 0.9056108405674359, "grad_norm": 1.5107268375307044, "learning_rate": 2.3166737128811013e-07, "loss": 0.6099, "step": 10693 }, { "epoch": 0.9056955325005294, "grad_norm": 2.0392822862557707, "learning_rate": 2.3125487283656711e-07, "loss": 0.6394, "step": 10694 }, { "epoch": 0.9057802244336227, "grad_norm": 1.2927470670056054, "learning_rate": 2.3084273325834628e-07, "loss": 0.6131, "step": 10695 }, { "epoch": 0.9058649163667161, "grad_norm": 1.0750140329765194, "learning_rate": 2.3043095258446334e-07, "loss": 0.6359, "step": 10696 }, { "epoch": 0.9059496082998094, "grad_norm": 2.0828037509422908, "learning_rate": 2.30019530845908e-07, "loss": 0.5864, "step": 10697 }, { "epoch": 0.9060343002329028, "grad_norm": 2.016512480770246, "learning_rate": 2.296084680736399e-07, "loss": 0.65, "step": 10698 }, { "epoch": 0.9061189921659962, "grad_norm": 1.2948783003497204, "learning_rate": 2.2919776429859598e-07, "loss": 0.653, "step": 10699 }, { "epoch": 0.9062036840990896, "grad_norm": 1.4741726338530445, "learning_rate": 2.2878741955168204e-07, "loss": 0.6258, "step": 10700 }, { "epoch": 0.9062883760321829, "grad_norm": 2.5538243591129004, "learning_rate": 2.2837743386378008e-07, "loss": 0.6345, "step": 10701 }, { "epoch": 0.9063730679652763, "grad_norm": 1.270995184899059, "learning_rate": 2.2796780726574376e-07, "loss": 0.6441, "step": 10702 }, { "epoch": 0.9064577598983696, "grad_norm": 1.2948771623611643, "learning_rate": 2.2755853978839836e-07, "loss": 0.5635, "step": 10703 }, { "epoch": 0.9065424518314631, "grad_norm": 1.5520306404385085, "learning_rate": 2.2714963146254431e-07, "loss": 0.665, "step": 10704 }, { "epoch": 0.9066271437645564, "grad_norm": 1.3771984387719731, "learning_rate": 2.2674108231895419e-07, "loss": 0.5969, "step": 10705 }, { "epoch": 0.9067118356976498, "grad_norm": 1.2463418638024064, "learning_rate": 2.263328923883723e-07, "loss": 0.6059, "step": 10706 }, { "epoch": 0.9067965276307431, "grad_norm": 1.7829491478430433, "learning_rate": 2.2592506170151906e-07, "loss": 0.6646, "step": 10707 }, { "epoch": 0.9068812195638365, "grad_norm": 1.4459990807135084, "learning_rate": 2.2551759028908437e-07, "loss": 0.5955, "step": 10708 }, { "epoch": 0.90696591149693, "grad_norm": 2.950369751325503, "learning_rate": 2.2511047818173258e-07, "loss": 0.6579, "step": 10709 }, { "epoch": 0.9070506034300233, "grad_norm": 3.721738369735835, "learning_rate": 2.247037254101031e-07, "loss": 0.6102, "step": 10710 }, { "epoch": 0.9071352953631167, "grad_norm": 2.5240385990955185, "learning_rate": 2.2429733200480307e-07, "loss": 0.6037, "step": 10711 }, { "epoch": 0.90721998729621, "grad_norm": 0.6489277172431992, "learning_rate": 2.2389129799641806e-07, "loss": 0.8904, "step": 10712 }, { "epoch": 0.9073046792293034, "grad_norm": 1.214998256286453, "learning_rate": 2.2348562341550362e-07, "loss": 0.6157, "step": 10713 }, { "epoch": 0.9073893711623968, "grad_norm": 1.773503519871513, "learning_rate": 2.230803082925881e-07, "loss": 0.6588, "step": 10714 }, { "epoch": 0.9074740630954902, "grad_norm": 1.2770656050934541, "learning_rate": 2.2267535265817597e-07, "loss": 0.5963, "step": 10715 }, { "epoch": 0.9075587550285835, "grad_norm": 1.3292450594668348, "learning_rate": 2.2227075654273954e-07, "loss": 0.5834, "step": 10716 }, { "epoch": 0.9076434469616769, "grad_norm": 1.6713632540275658, "learning_rate": 2.218665199767278e-07, "loss": 0.6057, "step": 10717 }, { "epoch": 0.9077281388947702, "grad_norm": 1.5353684547563573, "learning_rate": 2.2146264299056252e-07, "loss": 0.6139, "step": 10718 }, { "epoch": 0.9078128308278637, "grad_norm": 1.2008985350083614, "learning_rate": 2.210591256146366e-07, "loss": 0.6204, "step": 10719 }, { "epoch": 0.907897522760957, "grad_norm": 1.6336288554364573, "learning_rate": 2.2065596787931687e-07, "loss": 0.6331, "step": 10720 }, { "epoch": 0.9079822146940504, "grad_norm": 2.2654429713232593, "learning_rate": 2.2025316981494349e-07, "loss": 0.6083, "step": 10721 }, { "epoch": 0.9080669066271437, "grad_norm": 1.4312241921936946, "learning_rate": 2.198507314518289e-07, "loss": 0.6088, "step": 10722 }, { "epoch": 0.9081515985602371, "grad_norm": 1.345693403233211, "learning_rate": 2.1944865282025996e-07, "loss": 0.6033, "step": 10723 }, { "epoch": 0.9082362904933305, "grad_norm": 1.5147198874207022, "learning_rate": 2.1904693395049303e-07, "loss": 0.6237, "step": 10724 }, { "epoch": 0.9083209824264239, "grad_norm": 1.1997032836961106, "learning_rate": 2.1864557487276062e-07, "loss": 0.6004, "step": 10725 }, { "epoch": 0.9084056743595172, "grad_norm": 1.8290064089863456, "learning_rate": 2.1824457561726743e-07, "loss": 0.6464, "step": 10726 }, { "epoch": 0.9084903662926106, "grad_norm": 0.6528722305477787, "learning_rate": 2.1784393621419042e-07, "loss": 0.8622, "step": 10727 }, { "epoch": 0.908575058225704, "grad_norm": 2.1670530845507474, "learning_rate": 2.1744365669367996e-07, "loss": 0.615, "step": 10728 }, { "epoch": 0.9086597501587974, "grad_norm": 1.2634119079354156, "learning_rate": 2.1704373708585967e-07, "loss": 0.6433, "step": 10729 }, { "epoch": 0.9087444420918908, "grad_norm": 1.4627940322852502, "learning_rate": 2.16644177420825e-07, "loss": 0.6079, "step": 10730 }, { "epoch": 0.9088291340249841, "grad_norm": 1.218536267210951, "learning_rate": 2.1624497772864517e-07, "loss": 0.6032, "step": 10731 }, { "epoch": 0.9089138259580775, "grad_norm": 2.2010596160542795, "learning_rate": 2.1584613803936115e-07, "loss": 0.6488, "step": 10732 }, { "epoch": 0.9089985178911709, "grad_norm": 3.4507602263892827, "learning_rate": 2.1544765838298898e-07, "loss": 0.6865, "step": 10733 }, { "epoch": 0.9090832098242643, "grad_norm": 2.0551181826825173, "learning_rate": 2.1504953878951573e-07, "loss": 0.6051, "step": 10734 }, { "epoch": 0.9091679017573576, "grad_norm": 1.236426525633688, "learning_rate": 2.1465177928890245e-07, "loss": 0.6162, "step": 10735 }, { "epoch": 0.909252593690451, "grad_norm": 1.4425884284713808, "learning_rate": 2.1425437991108188e-07, "loss": 0.633, "step": 10736 }, { "epoch": 0.9093372856235443, "grad_norm": 1.595320215813006, "learning_rate": 2.1385734068596232e-07, "loss": 0.621, "step": 10737 }, { "epoch": 0.9094219775566378, "grad_norm": 1.5293651761823501, "learning_rate": 2.1346066164342038e-07, "loss": 0.6054, "step": 10738 }, { "epoch": 0.9095066694897311, "grad_norm": 1.2082453942176268, "learning_rate": 2.1306434281331056e-07, "loss": 0.5963, "step": 10739 }, { "epoch": 0.9095913614228245, "grad_norm": 0.6327208563507829, "learning_rate": 2.1266838422545621e-07, "loss": 0.8674, "step": 10740 }, { "epoch": 0.9096760533559178, "grad_norm": 1.7243115790601358, "learning_rate": 2.1227278590965573e-07, "loss": 0.5885, "step": 10741 }, { "epoch": 0.9097607452890112, "grad_norm": 1.4424518249162648, "learning_rate": 2.1187754789568137e-07, "loss": 0.6479, "step": 10742 }, { "epoch": 0.9098454372221046, "grad_norm": 1.5282097367897953, "learning_rate": 2.1148267021327496e-07, "loss": 0.6386, "step": 10743 }, { "epoch": 0.909930129155198, "grad_norm": 1.6740495851955841, "learning_rate": 2.110881528921549e-07, "loss": 0.5481, "step": 10744 }, { "epoch": 0.9100148210882913, "grad_norm": 1.4952802680193753, "learning_rate": 2.1069399596200912e-07, "loss": 0.5847, "step": 10745 }, { "epoch": 0.9100995130213847, "grad_norm": 1.2314012106129415, "learning_rate": 2.1030019945250057e-07, "loss": 0.6323, "step": 10746 }, { "epoch": 0.910184204954478, "grad_norm": 1.8266319721093258, "learning_rate": 2.09906763393265e-07, "loss": 0.5849, "step": 10747 }, { "epoch": 0.9102688968875715, "grad_norm": 1.3372850767634357, "learning_rate": 2.0951368781391034e-07, "loss": 0.622, "step": 10748 }, { "epoch": 0.9103535888206649, "grad_norm": 0.6301728275636785, "learning_rate": 2.091209727440169e-07, "loss": 0.8174, "step": 10749 }, { "epoch": 0.9104382807537582, "grad_norm": 1.8583976617663718, "learning_rate": 2.087286182131404e-07, "loss": 0.6388, "step": 10750 }, { "epoch": 0.9105229726868516, "grad_norm": 1.3320662009469926, "learning_rate": 2.083366242508056e-07, "loss": 0.6576, "step": 10751 }, { "epoch": 0.9106076646199449, "grad_norm": 1.357701266282501, "learning_rate": 2.0794499088651333e-07, "loss": 0.5855, "step": 10752 }, { "epoch": 0.9106923565530384, "grad_norm": 0.6258476449632975, "learning_rate": 2.07553718149735e-07, "loss": 0.8585, "step": 10753 }, { "epoch": 0.9107770484861317, "grad_norm": 1.412987152344415, "learning_rate": 2.0716280606991656e-07, "loss": 0.6334, "step": 10754 }, { "epoch": 0.9108617404192251, "grad_norm": 1.2440609212528888, "learning_rate": 2.067722546764761e-07, "loss": 0.6039, "step": 10755 }, { "epoch": 0.9109464323523184, "grad_norm": 1.2880163580490365, "learning_rate": 2.0638206399880512e-07, "loss": 0.6057, "step": 10756 }, { "epoch": 0.9110311242854118, "grad_norm": 1.3200917426035867, "learning_rate": 2.0599223406626734e-07, "loss": 0.5753, "step": 10757 }, { "epoch": 0.9111158162185052, "grad_norm": 1.3799475320351116, "learning_rate": 2.0560276490819985e-07, "loss": 0.6158, "step": 10758 }, { "epoch": 0.9112005081515986, "grad_norm": 1.9670561381352385, "learning_rate": 2.0521365655391145e-07, "loss": 0.6113, "step": 10759 }, { "epoch": 0.9112852000846919, "grad_norm": 1.7774725004820162, "learning_rate": 2.0482490903268538e-07, "loss": 0.6814, "step": 10760 }, { "epoch": 0.9113698920177853, "grad_norm": 1.2980286914143278, "learning_rate": 2.0443652237377598e-07, "loss": 0.5912, "step": 10761 }, { "epoch": 0.9114545839508786, "grad_norm": 1.4719364479233867, "learning_rate": 2.04048496606411e-07, "loss": 0.6222, "step": 10762 }, { "epoch": 0.9115392758839721, "grad_norm": 2.2227257488720293, "learning_rate": 2.0366083175979433e-07, "loss": 0.6295, "step": 10763 }, { "epoch": 0.9116239678170655, "grad_norm": 0.6656011781769275, "learning_rate": 2.0327352786309706e-07, "loss": 0.8709, "step": 10764 }, { "epoch": 0.9117086597501588, "grad_norm": 1.7292761288372187, "learning_rate": 2.0288658494546642e-07, "loss": 0.5977, "step": 10765 }, { "epoch": 0.9117933516832522, "grad_norm": 1.3923178591453722, "learning_rate": 2.0250000303602302e-07, "loss": 0.634, "step": 10766 }, { "epoch": 0.9118780436163455, "grad_norm": 1.3682352932749942, "learning_rate": 2.0211378216385747e-07, "loss": 0.6134, "step": 10767 }, { "epoch": 0.911962735549439, "grad_norm": 1.3583658705843293, "learning_rate": 2.0172792235803596e-07, "loss": 0.6236, "step": 10768 }, { "epoch": 0.9120474274825323, "grad_norm": 1.1961643212832933, "learning_rate": 2.0134242364759637e-07, "loss": 0.5858, "step": 10769 }, { "epoch": 0.9121321194156257, "grad_norm": 1.4269768992526037, "learning_rate": 2.0095728606154996e-07, "loss": 0.6761, "step": 10770 }, { "epoch": 0.912216811348719, "grad_norm": 0.8322037859659213, "learning_rate": 2.0057250962887964e-07, "loss": 0.8341, "step": 10771 }, { "epoch": 0.9123015032818124, "grad_norm": 1.7671589450619483, "learning_rate": 2.0018809437854224e-07, "loss": 0.5857, "step": 10772 }, { "epoch": 0.9123861952149058, "grad_norm": 0.5954921954550582, "learning_rate": 1.9980404033946743e-07, "loss": 0.8415, "step": 10773 }, { "epoch": 0.9124708871479992, "grad_norm": 1.590116569251489, "learning_rate": 1.9942034754055595e-07, "loss": 0.6524, "step": 10774 }, { "epoch": 0.9125555790810925, "grad_norm": 0.5930976206568199, "learning_rate": 1.990370160106836e-07, "loss": 0.7775, "step": 10775 }, { "epoch": 0.9126402710141859, "grad_norm": 3.569712794461793, "learning_rate": 1.986540457786984e-07, "loss": 0.6448, "step": 10776 }, { "epoch": 0.9127249629472792, "grad_norm": 1.2537270890461656, "learning_rate": 1.9827143687342065e-07, "loss": 0.6185, "step": 10777 }, { "epoch": 0.9128096548803727, "grad_norm": 1.7172460961507745, "learning_rate": 1.9788918932364343e-07, "loss": 0.5823, "step": 10778 }, { "epoch": 0.912894346813466, "grad_norm": 1.7041199441146577, "learning_rate": 1.975073031581337e-07, "loss": 0.6438, "step": 10779 }, { "epoch": 0.9129790387465594, "grad_norm": 2.2628757863116253, "learning_rate": 1.9712577840562907e-07, "loss": 0.6658, "step": 10780 }, { "epoch": 0.9130637306796527, "grad_norm": 1.236567756116464, "learning_rate": 1.9674461509484266e-07, "loss": 0.622, "step": 10781 }, { "epoch": 0.9131484226127461, "grad_norm": 1.9674133721442209, "learning_rate": 1.9636381325445707e-07, "loss": 0.6184, "step": 10782 }, { "epoch": 0.9132331145458396, "grad_norm": 1.1719399019736503, "learning_rate": 1.9598337291313218e-07, "loss": 0.5912, "step": 10783 }, { "epoch": 0.9133178064789329, "grad_norm": 2.0947199292844836, "learning_rate": 1.956032940994973e-07, "loss": 0.5927, "step": 10784 }, { "epoch": 0.9134024984120263, "grad_norm": 1.2160033336731857, "learning_rate": 1.9522357684215398e-07, "loss": 0.659, "step": 10785 }, { "epoch": 0.9134871903451196, "grad_norm": 1.4824394540351495, "learning_rate": 1.948442211696794e-07, "loss": 0.5611, "step": 10786 }, { "epoch": 0.913571882278213, "grad_norm": 1.2748654325314632, "learning_rate": 1.9446522711062234e-07, "loss": 0.6161, "step": 10787 }, { "epoch": 0.9136565742113064, "grad_norm": 1.525466278069894, "learning_rate": 1.9408659469350277e-07, "loss": 0.5821, "step": 10788 }, { "epoch": 0.9137412661443998, "grad_norm": 1.5291913288404735, "learning_rate": 1.9370832394681572e-07, "loss": 0.6263, "step": 10789 }, { "epoch": 0.9138259580774931, "grad_norm": 1.349497846284443, "learning_rate": 1.9333041489902726e-07, "loss": 0.6426, "step": 10790 }, { "epoch": 0.9139106500105865, "grad_norm": 1.255944230393512, "learning_rate": 1.9295286757857802e-07, "loss": 0.5776, "step": 10791 }, { "epoch": 0.9139953419436798, "grad_norm": 4.1959344731672035, "learning_rate": 1.9257568201388022e-07, "loss": 0.5883, "step": 10792 }, { "epoch": 0.9140800338767733, "grad_norm": 1.4573656540063062, "learning_rate": 1.9219885823331896e-07, "loss": 0.6236, "step": 10793 }, { "epoch": 0.9141647258098666, "grad_norm": 1.4049183781386592, "learning_rate": 1.9182239626525156e-07, "loss": 0.6172, "step": 10794 }, { "epoch": 0.91424941774296, "grad_norm": 1.3785395630380743, "learning_rate": 1.914462961380098e-07, "loss": 0.6021, "step": 10795 }, { "epoch": 0.9143341096760533, "grad_norm": 2.2072227931153434, "learning_rate": 1.910705578798966e-07, "loss": 0.6335, "step": 10796 }, { "epoch": 0.9144188016091467, "grad_norm": 1.1704076384235107, "learning_rate": 1.906951815191882e-07, "loss": 0.5933, "step": 10797 }, { "epoch": 0.9145034935422401, "grad_norm": 1.5605466029372235, "learning_rate": 1.903201670841337e-07, "loss": 0.6581, "step": 10798 }, { "epoch": 0.9145881854753335, "grad_norm": 1.3969083923501375, "learning_rate": 1.8994551460295552e-07, "loss": 0.6336, "step": 10799 }, { "epoch": 0.9146728774084268, "grad_norm": 1.755917651202197, "learning_rate": 1.8957122410384832e-07, "loss": 0.6106, "step": 10800 }, { "epoch": 0.9147575693415202, "grad_norm": 1.6854106556587973, "learning_rate": 1.891972956149779e-07, "loss": 0.6704, "step": 10801 }, { "epoch": 0.9148422612746135, "grad_norm": 1.9642530093086203, "learning_rate": 1.8882372916448622e-07, "loss": 0.6387, "step": 10802 }, { "epoch": 0.914926953207707, "grad_norm": 0.6195927415735674, "learning_rate": 1.8845052478048466e-07, "loss": 0.8472, "step": 10803 }, { "epoch": 0.9150116451408004, "grad_norm": 1.6835762722906418, "learning_rate": 1.8807768249105963e-07, "loss": 0.5979, "step": 10804 }, { "epoch": 0.9150963370738937, "grad_norm": 1.1942643044586823, "learning_rate": 1.877052023242698e-07, "loss": 0.6134, "step": 10805 }, { "epoch": 0.9151810290069871, "grad_norm": 1.9171450098469958, "learning_rate": 1.8733308430814502e-07, "loss": 0.5725, "step": 10806 }, { "epoch": 0.9152657209400804, "grad_norm": 1.2292189723324838, "learning_rate": 1.869613284706906e-07, "loss": 0.61, "step": 10807 }, { "epoch": 0.9153504128731739, "grad_norm": 1.7284219238408263, "learning_rate": 1.8658993483988254e-07, "loss": 0.5786, "step": 10808 }, { "epoch": 0.9154351048062672, "grad_norm": 1.4753961725847695, "learning_rate": 1.8621890344366956e-07, "loss": 0.5741, "step": 10809 }, { "epoch": 0.9155197967393606, "grad_norm": 1.4893390854464583, "learning_rate": 1.8584823430997434e-07, "loss": 0.6015, "step": 10810 }, { "epoch": 0.9156044886724539, "grad_norm": 1.2426906324899225, "learning_rate": 1.854779274666918e-07, "loss": 0.6331, "step": 10811 }, { "epoch": 0.9156891806055473, "grad_norm": 1.3818524294328638, "learning_rate": 1.8510798294168907e-07, "loss": 0.6129, "step": 10812 }, { "epoch": 0.9157738725386407, "grad_norm": 1.494974474496922, "learning_rate": 1.8473840076280724e-07, "loss": 0.651, "step": 10813 }, { "epoch": 0.9158585644717341, "grad_norm": 1.8479309409146591, "learning_rate": 1.843691809578585e-07, "loss": 0.5976, "step": 10814 }, { "epoch": 0.9159432564048274, "grad_norm": 1.5042396274067913, "learning_rate": 1.8400032355462837e-07, "loss": 0.5907, "step": 10815 }, { "epoch": 0.9160279483379208, "grad_norm": 1.5356631093699986, "learning_rate": 1.836318285808769e-07, "loss": 0.6083, "step": 10816 }, { "epoch": 0.9161126402710141, "grad_norm": 1.5500340434176074, "learning_rate": 1.8326369606433358e-07, "loss": 0.602, "step": 10817 }, { "epoch": 0.9161973322041076, "grad_norm": 1.5414254497011084, "learning_rate": 1.828959260327029e-07, "loss": 0.6369, "step": 10818 }, { "epoch": 0.916282024137201, "grad_norm": 1.4196216429437705, "learning_rate": 1.8252851851366103e-07, "loss": 0.613, "step": 10819 }, { "epoch": 0.9163667160702943, "grad_norm": 1.4017953894944477, "learning_rate": 1.821614735348587e-07, "loss": 0.6282, "step": 10820 }, { "epoch": 0.9164514080033876, "grad_norm": 1.7823295928416412, "learning_rate": 1.8179479112391706e-07, "loss": 0.5869, "step": 10821 }, { "epoch": 0.916536099936481, "grad_norm": 1.2309697817547218, "learning_rate": 1.8142847130843079e-07, "loss": 0.6403, "step": 10822 }, { "epoch": 0.9166207918695745, "grad_norm": 1.6689949953664778, "learning_rate": 1.8106251411596775e-07, "loss": 0.6526, "step": 10823 }, { "epoch": 0.9167054838026678, "grad_norm": 0.6320169943039983, "learning_rate": 1.8069691957406765e-07, "loss": 0.8146, "step": 10824 }, { "epoch": 0.9167901757357612, "grad_norm": 1.999627677788678, "learning_rate": 1.8033168771024401e-07, "loss": 0.6235, "step": 10825 }, { "epoch": 0.9168748676688545, "grad_norm": 1.5330853267954598, "learning_rate": 1.7996681855198261e-07, "loss": 0.5973, "step": 10826 }, { "epoch": 0.9169595596019479, "grad_norm": 0.6761149760023734, "learning_rate": 1.7960231212674095e-07, "loss": 0.8013, "step": 10827 }, { "epoch": 0.9170442515350413, "grad_norm": 1.6132197769702135, "learning_rate": 1.7923816846195042e-07, "loss": 0.6437, "step": 10828 }, { "epoch": 0.9171289434681347, "grad_norm": 1.6819856304792355, "learning_rate": 1.7887438758501518e-07, "loss": 0.6625, "step": 10829 }, { "epoch": 0.917213635401228, "grad_norm": 1.4338828061062845, "learning_rate": 1.7851096952331114e-07, "loss": 0.6624, "step": 10830 }, { "epoch": 0.9172983273343214, "grad_norm": 2.3182472393773277, "learning_rate": 1.7814791430418755e-07, "loss": 0.6345, "step": 10831 }, { "epoch": 0.9173830192674147, "grad_norm": 1.3151848035593943, "learning_rate": 1.777852219549664e-07, "loss": 0.6087, "step": 10832 }, { "epoch": 0.9174677112005082, "grad_norm": 1.3784317571430178, "learning_rate": 1.7742289250294198e-07, "loss": 0.6165, "step": 10833 }, { "epoch": 0.9175524031336015, "grad_norm": 2.8677754281582506, "learning_rate": 1.770609259753825e-07, "loss": 0.6069, "step": 10834 }, { "epoch": 0.9176370950666949, "grad_norm": 1.310320145884411, "learning_rate": 1.7669932239952613e-07, "loss": 0.6048, "step": 10835 }, { "epoch": 0.9177217869997882, "grad_norm": 1.859531818654468, "learning_rate": 1.7633808180258672e-07, "loss": 0.6481, "step": 10836 }, { "epoch": 0.9178064789328817, "grad_norm": 1.390547482407046, "learning_rate": 1.7597720421174912e-07, "loss": 0.5977, "step": 10837 }, { "epoch": 0.917891170865975, "grad_norm": 1.6049541587231875, "learning_rate": 1.7561668965417055e-07, "loss": 0.63, "step": 10838 }, { "epoch": 0.9179758627990684, "grad_norm": 1.590561061472271, "learning_rate": 1.7525653815698317e-07, "loss": 0.6207, "step": 10839 }, { "epoch": 0.9180605547321617, "grad_norm": 1.6690160787678507, "learning_rate": 1.7489674974728976e-07, "loss": 0.6314, "step": 10840 }, { "epoch": 0.9181452466652551, "grad_norm": 1.2320644162054832, "learning_rate": 1.7453732445216586e-07, "loss": 0.5945, "step": 10841 }, { "epoch": 0.9182299385983486, "grad_norm": 4.97449382863984, "learning_rate": 1.741782622986604e-07, "loss": 0.6368, "step": 10842 }, { "epoch": 0.9183146305314419, "grad_norm": 0.7069024069370561, "learning_rate": 1.7381956331379456e-07, "loss": 0.8649, "step": 10843 }, { "epoch": 0.9183993224645353, "grad_norm": 1.3852722137734028, "learning_rate": 1.7346122752456173e-07, "loss": 0.6485, "step": 10844 }, { "epoch": 0.9184840143976286, "grad_norm": 1.3096129124601663, "learning_rate": 1.731032549579298e-07, "loss": 0.6112, "step": 10845 }, { "epoch": 0.918568706330722, "grad_norm": 1.3881376007820598, "learning_rate": 1.7274564564083774e-07, "loss": 0.6183, "step": 10846 }, { "epoch": 0.9186533982638154, "grad_norm": 1.2240075439057838, "learning_rate": 1.7238839960019737e-07, "loss": 0.6012, "step": 10847 }, { "epoch": 0.9187380901969088, "grad_norm": 1.525324450317209, "learning_rate": 1.7203151686289333e-07, "loss": 0.6845, "step": 10848 }, { "epoch": 0.9188227821300021, "grad_norm": 1.6507200766566341, "learning_rate": 1.7167499745578242e-07, "loss": 0.5816, "step": 10849 }, { "epoch": 0.9189074740630955, "grad_norm": 1.606911362748083, "learning_rate": 1.71318841405696e-07, "loss": 0.6393, "step": 10850 }, { "epoch": 0.9189921659961888, "grad_norm": 2.7368033615828904, "learning_rate": 1.7096304873943537e-07, "loss": 0.6357, "step": 10851 }, { "epoch": 0.9190768579292823, "grad_norm": 1.7692529518991447, "learning_rate": 1.7060761948377637e-07, "loss": 0.58, "step": 10852 }, { "epoch": 0.9191615498623756, "grad_norm": 1.258566810840054, "learning_rate": 1.7025255366546643e-07, "loss": 0.6411, "step": 10853 }, { "epoch": 0.919246241795469, "grad_norm": 1.664936507050744, "learning_rate": 1.6989785131122706e-07, "loss": 0.6531, "step": 10854 }, { "epoch": 0.9193309337285623, "grad_norm": 1.5266087527980094, "learning_rate": 1.6954351244775125e-07, "loss": 0.6132, "step": 10855 }, { "epoch": 0.9194156256616557, "grad_norm": 0.6043149773660091, "learning_rate": 1.6918953710170384e-07, "loss": 0.7731, "step": 10856 }, { "epoch": 0.9195003175947492, "grad_norm": 0.6205966082054091, "learning_rate": 1.688359252997246e-07, "loss": 0.8139, "step": 10857 }, { "epoch": 0.9195850095278425, "grad_norm": 0.6996379642762668, "learning_rate": 1.684826770684239e-07, "loss": 0.8995, "step": 10858 }, { "epoch": 0.9196697014609359, "grad_norm": 2.2170675766109653, "learning_rate": 1.6812979243438554e-07, "loss": 0.6148, "step": 10859 }, { "epoch": 0.9197543933940292, "grad_norm": 1.1794431294078955, "learning_rate": 1.6777727142416656e-07, "loss": 0.6265, "step": 10860 }, { "epoch": 0.9198390853271226, "grad_norm": 2.186009811135045, "learning_rate": 1.6742511406429684e-07, "loss": 0.6495, "step": 10861 }, { "epoch": 0.919923777260216, "grad_norm": 0.6221617626022259, "learning_rate": 1.6707332038127576e-07, "loss": 0.7994, "step": 10862 }, { "epoch": 0.9200084691933094, "grad_norm": 1.3007436823883756, "learning_rate": 1.6672189040157938e-07, "loss": 0.5818, "step": 10863 }, { "epoch": 0.9200931611264027, "grad_norm": 1.8121588673475213, "learning_rate": 1.6637082415165429e-07, "loss": 0.669, "step": 10864 }, { "epoch": 0.9201778530594961, "grad_norm": 1.409698068624576, "learning_rate": 1.6602012165791936e-07, "loss": 0.5859, "step": 10865 }, { "epoch": 0.9202625449925894, "grad_norm": 1.5002189803622907, "learning_rate": 1.6566978294676737e-07, "loss": 0.6342, "step": 10866 }, { "epoch": 0.9203472369256829, "grad_norm": 3.4485185676202814, "learning_rate": 1.6531980804456338e-07, "loss": 0.5768, "step": 10867 }, { "epoch": 0.9204319288587762, "grad_norm": 1.47384997705697, "learning_rate": 1.6497019697764516e-07, "loss": 0.6867, "step": 10868 }, { "epoch": 0.9205166207918696, "grad_norm": 1.2846074868715442, "learning_rate": 1.6462094977232224e-07, "loss": 0.6846, "step": 10869 }, { "epoch": 0.9206013127249629, "grad_norm": 1.2269681456523738, "learning_rate": 1.6427206645487692e-07, "loss": 0.6544, "step": 10870 }, { "epoch": 0.9206860046580563, "grad_norm": 1.2922947493110402, "learning_rate": 1.6392354705156544e-07, "loss": 0.6894, "step": 10871 }, { "epoch": 0.9207706965911497, "grad_norm": 2.6175835993232126, "learning_rate": 1.6357539158861513e-07, "loss": 0.6241, "step": 10872 }, { "epoch": 0.9208553885242431, "grad_norm": 1.3782731125669663, "learning_rate": 1.6322760009222615e-07, "loss": 0.6641, "step": 10873 }, { "epoch": 0.9209400804573364, "grad_norm": 1.523362026031926, "learning_rate": 1.6288017258857313e-07, "loss": 0.6876, "step": 10874 }, { "epoch": 0.9210247723904298, "grad_norm": 0.6859159646162226, "learning_rate": 1.6253310910380014e-07, "loss": 0.9096, "step": 10875 }, { "epoch": 0.9211094643235231, "grad_norm": 4.484192364980288, "learning_rate": 1.6218640966402687e-07, "loss": 0.6419, "step": 10876 }, { "epoch": 0.9211941562566166, "grad_norm": 3.6793565684220377, "learning_rate": 1.6184007429534353e-07, "loss": 0.6434, "step": 10877 }, { "epoch": 0.92127884818971, "grad_norm": 1.284809811342553, "learning_rate": 1.6149410302381373e-07, "loss": 0.6683, "step": 10878 }, { "epoch": 0.9213635401228033, "grad_norm": 1.268430159174693, "learning_rate": 1.6114849587547333e-07, "loss": 0.6174, "step": 10879 }, { "epoch": 0.9214482320558967, "grad_norm": 4.0097256179697744, "learning_rate": 1.6080325287633203e-07, "loss": 0.5882, "step": 10880 }, { "epoch": 0.92153292398899, "grad_norm": 1.3537232210564376, "learning_rate": 1.6045837405237075e-07, "loss": 0.6301, "step": 10881 }, { "epoch": 0.9216176159220835, "grad_norm": 3.1219286741438657, "learning_rate": 1.6011385942954371e-07, "loss": 0.6629, "step": 10882 }, { "epoch": 0.9217023078551768, "grad_norm": 3.064287391869224, "learning_rate": 1.5976970903377632e-07, "loss": 0.64, "step": 10883 }, { "epoch": 0.9217869997882702, "grad_norm": 1.6937131317471217, "learning_rate": 1.5942592289096947e-07, "loss": 0.5722, "step": 10884 }, { "epoch": 0.9218716917213635, "grad_norm": 1.7725768491983658, "learning_rate": 1.5908250102699363e-07, "loss": 0.6652, "step": 10885 }, { "epoch": 0.9219563836544569, "grad_norm": 0.5778059023275671, "learning_rate": 1.587394434676931e-07, "loss": 0.8104, "step": 10886 }, { "epoch": 0.9220410755875503, "grad_norm": 1.503647194504027, "learning_rate": 1.5839675023888556e-07, "loss": 0.636, "step": 10887 }, { "epoch": 0.9221257675206437, "grad_norm": 2.03499523044494, "learning_rate": 1.5805442136635984e-07, "loss": 0.585, "step": 10888 }, { "epoch": 0.922210459453737, "grad_norm": 1.3500784388967138, "learning_rate": 1.5771245687587811e-07, "loss": 0.7142, "step": 10889 }, { "epoch": 0.9222951513868304, "grad_norm": 3.421718024939332, "learning_rate": 1.5737085679317589e-07, "loss": 0.651, "step": 10890 }, { "epoch": 0.9223798433199237, "grad_norm": 1.371998758408083, "learning_rate": 1.570296211439587e-07, "loss": 0.6636, "step": 10891 }, { "epoch": 0.9224645352530172, "grad_norm": 2.088354028259578, "learning_rate": 1.5668874995390825e-07, "loss": 0.6251, "step": 10892 }, { "epoch": 0.9225492271861105, "grad_norm": 1.5368003394018461, "learning_rate": 1.5634824324867514e-07, "loss": 0.6602, "step": 10893 }, { "epoch": 0.9226339191192039, "grad_norm": 1.4711452671792957, "learning_rate": 1.5600810105388442e-07, "loss": 0.6211, "step": 10894 }, { "epoch": 0.9227186110522972, "grad_norm": 1.460643769496105, "learning_rate": 1.556683233951356e-07, "loss": 0.6398, "step": 10895 }, { "epoch": 0.9228033029853906, "grad_norm": 1.176848868526321, "learning_rate": 1.5532891029799711e-07, "loss": 0.5929, "step": 10896 }, { "epoch": 0.9228879949184841, "grad_norm": 2.9586157007053107, "learning_rate": 1.5498986178801133e-07, "loss": 0.6295, "step": 10897 }, { "epoch": 0.9229726868515774, "grad_norm": 1.3932629797078946, "learning_rate": 1.546511778906945e-07, "loss": 0.6686, "step": 10898 }, { "epoch": 0.9230573787846708, "grad_norm": 1.3472235904823424, "learning_rate": 1.54312858631534e-07, "loss": 0.5783, "step": 10899 }, { "epoch": 0.9231420707177641, "grad_norm": 1.165785309325124, "learning_rate": 1.5397490403598947e-07, "loss": 0.5827, "step": 10900 }, { "epoch": 0.9232267626508575, "grad_norm": 1.6144247125177362, "learning_rate": 1.5363731412949447e-07, "loss": 0.6665, "step": 10901 }, { "epoch": 0.9233114545839509, "grad_norm": 0.6694393777884351, "learning_rate": 1.533000889374542e-07, "loss": 0.8498, "step": 10902 }, { "epoch": 0.9233961465170443, "grad_norm": 1.5479788254066496, "learning_rate": 1.5296322848524725e-07, "loss": 0.6475, "step": 10903 }, { "epoch": 0.9234808384501376, "grad_norm": 1.3331628140347667, "learning_rate": 1.5262673279822338e-07, "loss": 0.6206, "step": 10904 }, { "epoch": 0.923565530383231, "grad_norm": 1.2523535914825088, "learning_rate": 1.5229060190170565e-07, "loss": 0.6325, "step": 10905 }, { "epoch": 0.9236502223163243, "grad_norm": 1.5048542446818458, "learning_rate": 1.5195483582099047e-07, "loss": 0.6016, "step": 10906 }, { "epoch": 0.9237349142494178, "grad_norm": 1.3092547921335391, "learning_rate": 1.5161943458134488e-07, "loss": 0.6374, "step": 10907 }, { "epoch": 0.9238196061825111, "grad_norm": 1.9823860052131912, "learning_rate": 1.5128439820801034e-07, "loss": 0.6111, "step": 10908 }, { "epoch": 0.9239042981156045, "grad_norm": 2.20905854969951, "learning_rate": 1.5094972672620002e-07, "loss": 0.6017, "step": 10909 }, { "epoch": 0.9239889900486978, "grad_norm": 1.6476610834191188, "learning_rate": 1.5061542016109986e-07, "loss": 0.6519, "step": 10910 }, { "epoch": 0.9240736819817912, "grad_norm": 1.3874380176664198, "learning_rate": 1.5028147853786868e-07, "loss": 0.6219, "step": 10911 }, { "epoch": 0.9241583739148846, "grad_norm": 1.1177038369070287, "learning_rate": 1.4994790188163578e-07, "loss": 0.6624, "step": 10912 }, { "epoch": 0.924243065847978, "grad_norm": 2.4281397000416356, "learning_rate": 1.4961469021750662e-07, "loss": 0.6167, "step": 10913 }, { "epoch": 0.9243277577810713, "grad_norm": 1.0889252943640801, "learning_rate": 1.4928184357055452e-07, "loss": 0.5873, "step": 10914 }, { "epoch": 0.9244124497141647, "grad_norm": 1.3822554779631735, "learning_rate": 1.4894936196582998e-07, "loss": 0.6237, "step": 10915 }, { "epoch": 0.924497141647258, "grad_norm": 1.3058064640844007, "learning_rate": 1.486172454283541e-07, "loss": 0.6253, "step": 10916 }, { "epoch": 0.9245818335803515, "grad_norm": 1.4517614564530104, "learning_rate": 1.4828549398311908e-07, "loss": 0.5496, "step": 10917 }, { "epoch": 0.9246665255134449, "grad_norm": 2.5201484851432494, "learning_rate": 1.4795410765509165e-07, "loss": 0.6132, "step": 10918 }, { "epoch": 0.9247512174465382, "grad_norm": 1.296640305200984, "learning_rate": 1.476230864692113e-07, "loss": 0.6344, "step": 10919 }, { "epoch": 0.9248359093796316, "grad_norm": 1.2798576110526574, "learning_rate": 1.4729243045038755e-07, "loss": 0.6234, "step": 10920 }, { "epoch": 0.9249206013127249, "grad_norm": 1.4640431982519657, "learning_rate": 1.4696213962350491e-07, "loss": 0.6403, "step": 10921 }, { "epoch": 0.9250052932458184, "grad_norm": 1.3732446769896656, "learning_rate": 1.466322140134191e-07, "loss": 0.6025, "step": 10922 }, { "epoch": 0.9250899851789117, "grad_norm": 1.2786156147332943, "learning_rate": 1.463026536449591e-07, "loss": 0.62, "step": 10923 }, { "epoch": 0.9251746771120051, "grad_norm": 0.5976245406686849, "learning_rate": 1.4597345854292623e-07, "loss": 0.7892, "step": 10924 }, { "epoch": 0.9252593690450984, "grad_norm": 1.4930736070510973, "learning_rate": 1.4564462873209394e-07, "loss": 0.634, "step": 10925 }, { "epoch": 0.9253440609781918, "grad_norm": 2.5528061112489113, "learning_rate": 1.4531616423720752e-07, "loss": 0.5674, "step": 10926 }, { "epoch": 0.9254287529112852, "grad_norm": 2.433540706756421, "learning_rate": 1.4498806508298768e-07, "loss": 0.6151, "step": 10927 }, { "epoch": 0.9255134448443786, "grad_norm": 0.6080820390710403, "learning_rate": 1.446603312941236e-07, "loss": 0.7846, "step": 10928 }, { "epoch": 0.9255981367774719, "grad_norm": 0.6687699060335475, "learning_rate": 1.4433296289528e-07, "loss": 0.8879, "step": 10929 }, { "epoch": 0.9256828287105653, "grad_norm": 1.4740428512042039, "learning_rate": 1.4400595991109324e-07, "loss": 0.5724, "step": 10930 }, { "epoch": 0.9257675206436586, "grad_norm": 1.4894939424238474, "learning_rate": 1.4367932236617145e-07, "loss": 0.635, "step": 10931 }, { "epoch": 0.9258522125767521, "grad_norm": 1.9842078591904055, "learning_rate": 1.4335305028509604e-07, "loss": 0.5851, "step": 10932 }, { "epoch": 0.9259369045098454, "grad_norm": 0.6751855206531283, "learning_rate": 1.4302714369242076e-07, "loss": 0.9141, "step": 10933 }, { "epoch": 0.9260215964429388, "grad_norm": 1.733263160390924, "learning_rate": 1.4270160261267207e-07, "loss": 0.5894, "step": 10934 }, { "epoch": 0.9261062883760321, "grad_norm": 1.2564339229442154, "learning_rate": 1.4237642707034817e-07, "loss": 0.6694, "step": 10935 }, { "epoch": 0.9261909803091256, "grad_norm": 1.204730246678642, "learning_rate": 1.4205161708991998e-07, "loss": 0.6326, "step": 10936 }, { "epoch": 0.926275672242219, "grad_norm": 1.180385917052474, "learning_rate": 1.4172717269583304e-07, "loss": 0.6827, "step": 10937 }, { "epoch": 0.9263603641753123, "grad_norm": 1.4248185183659086, "learning_rate": 1.4140309391250106e-07, "loss": 0.6567, "step": 10938 }, { "epoch": 0.9264450561084057, "grad_norm": 1.2680273171532896, "learning_rate": 1.410793807643135e-07, "loss": 0.6218, "step": 10939 }, { "epoch": 0.926529748041499, "grad_norm": 1.748730941087538, "learning_rate": 1.4075603327563302e-07, "loss": 0.6672, "step": 10940 }, { "epoch": 0.9266144399745925, "grad_norm": 1.2217249098767022, "learning_rate": 1.4043305147079078e-07, "loss": 0.6296, "step": 10941 }, { "epoch": 0.9266991319076858, "grad_norm": 1.2539740998590545, "learning_rate": 1.4011043537409453e-07, "loss": 0.6305, "step": 10942 }, { "epoch": 0.9267838238407792, "grad_norm": 1.5836276474854876, "learning_rate": 1.3978818500982205e-07, "loss": 0.6675, "step": 10943 }, { "epoch": 0.9268685157738725, "grad_norm": 1.7923719679966943, "learning_rate": 1.3946630040222454e-07, "loss": 0.6521, "step": 10944 }, { "epoch": 0.9269532077069659, "grad_norm": 1.372645478743269, "learning_rate": 1.3914478157552645e-07, "loss": 0.6303, "step": 10945 }, { "epoch": 0.9270378996400593, "grad_norm": 1.3241048848538521, "learning_rate": 1.3882362855392238e-07, "loss": 0.6486, "step": 10946 }, { "epoch": 0.9271225915731527, "grad_norm": 2.2081392989917914, "learning_rate": 1.3850284136158189e-07, "loss": 0.6062, "step": 10947 }, { "epoch": 0.927207283506246, "grad_norm": 1.3273896894836603, "learning_rate": 1.3818242002264503e-07, "loss": 0.6509, "step": 10948 }, { "epoch": 0.9272919754393394, "grad_norm": 1.6496574602500125, "learning_rate": 1.3786236456122592e-07, "loss": 0.5211, "step": 10949 }, { "epoch": 0.9273766673724327, "grad_norm": 1.1799335715277888, "learning_rate": 1.3754267500140972e-07, "loss": 0.6022, "step": 10950 }, { "epoch": 0.9274613593055262, "grad_norm": 1.2037360263321024, "learning_rate": 1.372233513672555e-07, "loss": 0.5847, "step": 10951 }, { "epoch": 0.9275460512386196, "grad_norm": 1.759350729052826, "learning_rate": 1.3690439368279296e-07, "loss": 0.6813, "step": 10952 }, { "epoch": 0.9276307431717129, "grad_norm": 0.6589293112147065, "learning_rate": 1.3658580197202732e-07, "loss": 0.8464, "step": 10953 }, { "epoch": 0.9277154351048063, "grad_norm": 0.6844448027984447, "learning_rate": 1.3626757625893216e-07, "loss": 0.8519, "step": 10954 }, { "epoch": 0.9278001270378996, "grad_norm": 1.2751140102486225, "learning_rate": 1.359497165674567e-07, "loss": 0.6388, "step": 10955 }, { "epoch": 0.9278848189709931, "grad_norm": 3.364109564477887, "learning_rate": 1.3563222292152178e-07, "loss": 0.6199, "step": 10956 }, { "epoch": 0.9279695109040864, "grad_norm": 1.5092944604398792, "learning_rate": 1.3531509534501996e-07, "loss": 0.5874, "step": 10957 }, { "epoch": 0.9280542028371798, "grad_norm": 2.8651331513609297, "learning_rate": 1.3499833386181716e-07, "loss": 0.6443, "step": 10958 }, { "epoch": 0.9281388947702731, "grad_norm": 2.3962582079136316, "learning_rate": 1.3468193849575094e-07, "loss": 0.5917, "step": 10959 }, { "epoch": 0.9282235867033665, "grad_norm": 1.4299328694550775, "learning_rate": 1.3436590927063175e-07, "loss": 0.6851, "step": 10960 }, { "epoch": 0.9283082786364599, "grad_norm": 1.2420868301422525, "learning_rate": 1.3405024621024332e-07, "loss": 0.5892, "step": 10961 }, { "epoch": 0.9283929705695533, "grad_norm": 0.6012483237713653, "learning_rate": 1.3373494933833942e-07, "loss": 0.8075, "step": 10962 }, { "epoch": 0.9284776625026466, "grad_norm": 1.529794690523525, "learning_rate": 1.3342001867864883e-07, "loss": 0.5855, "step": 10963 }, { "epoch": 0.92856235443574, "grad_norm": 2.621801876836516, "learning_rate": 1.331054542548721e-07, "loss": 0.6293, "step": 10964 }, { "epoch": 0.9286470463688333, "grad_norm": 1.5089908757986732, "learning_rate": 1.3279125609068077e-07, "loss": 0.6246, "step": 10965 }, { "epoch": 0.9287317383019268, "grad_norm": 1.6376686384933878, "learning_rate": 1.32477424209721e-07, "loss": 0.6454, "step": 10966 }, { "epoch": 0.9288164302350201, "grad_norm": 2.341483884237088, "learning_rate": 1.3216395863560992e-07, "loss": 0.6368, "step": 10967 }, { "epoch": 0.9289011221681135, "grad_norm": 1.8784994698959634, "learning_rate": 1.3185085939193654e-07, "loss": 0.5885, "step": 10968 }, { "epoch": 0.9289858141012068, "grad_norm": 1.2460533201056445, "learning_rate": 1.3153812650226526e-07, "loss": 0.6387, "step": 10969 }, { "epoch": 0.9290705060343002, "grad_norm": 1.3791931037609682, "learning_rate": 1.3122575999012843e-07, "loss": 0.6263, "step": 10970 }, { "epoch": 0.9291551979673937, "grad_norm": 1.2916372467709212, "learning_rate": 1.30913759879035e-07, "loss": 0.6386, "step": 10971 }, { "epoch": 0.929239889900487, "grad_norm": 2.1057463461595485, "learning_rate": 1.306021261924645e-07, "loss": 0.6669, "step": 10972 }, { "epoch": 0.9293245818335804, "grad_norm": 1.3915386760926827, "learning_rate": 1.3029085895386874e-07, "loss": 0.6306, "step": 10973 }, { "epoch": 0.9294092737666737, "grad_norm": 1.417013321435442, "learning_rate": 1.299799581866723e-07, "loss": 0.6262, "step": 10974 }, { "epoch": 0.929493965699767, "grad_norm": 1.3711461424369467, "learning_rate": 1.2966942391427095e-07, "loss": 0.6295, "step": 10975 }, { "epoch": 0.9295786576328605, "grad_norm": 1.29551220672794, "learning_rate": 1.2935925616003599e-07, "loss": 0.6017, "step": 10976 }, { "epoch": 0.9296633495659539, "grad_norm": 2.4820477380421044, "learning_rate": 1.2904945494730758e-07, "loss": 0.6231, "step": 10977 }, { "epoch": 0.9297480414990472, "grad_norm": 1.9579368666896264, "learning_rate": 1.2874002029940102e-07, "loss": 0.6236, "step": 10978 }, { "epoch": 0.9298327334321406, "grad_norm": 1.5254988977358956, "learning_rate": 1.2843095223960268e-07, "loss": 0.6034, "step": 10979 }, { "epoch": 0.9299174253652339, "grad_norm": 0.679690022081082, "learning_rate": 1.2812225079117115e-07, "loss": 0.8445, "step": 10980 }, { "epoch": 0.9300021172983274, "grad_norm": 1.3214570366713894, "learning_rate": 1.278139159773384e-07, "loss": 0.6534, "step": 10981 }, { "epoch": 0.9300868092314207, "grad_norm": 1.3697746171722363, "learning_rate": 1.2750594782130755e-07, "loss": 0.6311, "step": 10982 }, { "epoch": 0.9301715011645141, "grad_norm": 1.3508192716616465, "learning_rate": 1.2719834634625562e-07, "loss": 0.6001, "step": 10983 }, { "epoch": 0.9302561930976074, "grad_norm": 1.3161270394740645, "learning_rate": 1.268911115753302e-07, "loss": 0.6484, "step": 10984 }, { "epoch": 0.9303408850307008, "grad_norm": 1.942634504079466, "learning_rate": 1.2658424353165333e-07, "loss": 0.6159, "step": 10985 }, { "epoch": 0.9304255769637942, "grad_norm": 1.6131036665068235, "learning_rate": 1.2627774223831767e-07, "loss": 0.6695, "step": 10986 }, { "epoch": 0.9305102688968876, "grad_norm": 0.6299196637032358, "learning_rate": 1.2597160771839034e-07, "loss": 0.8388, "step": 10987 }, { "epoch": 0.9305949608299809, "grad_norm": 1.3486603753849926, "learning_rate": 1.2566583999490789e-07, "loss": 0.641, "step": 10988 }, { "epoch": 0.9306796527630743, "grad_norm": 1.2399226967941295, "learning_rate": 1.253604390908819e-07, "loss": 0.616, "step": 10989 }, { "epoch": 0.9307643446961676, "grad_norm": 1.262256465578832, "learning_rate": 1.2505540502929568e-07, "loss": 0.6596, "step": 10990 }, { "epoch": 0.9308490366292611, "grad_norm": 1.1483983642652837, "learning_rate": 1.247507378331042e-07, "loss": 0.6108, "step": 10991 }, { "epoch": 0.9309337285623545, "grad_norm": 1.1728119855764458, "learning_rate": 1.2444643752523523e-07, "loss": 0.6035, "step": 10992 }, { "epoch": 0.9310184204954478, "grad_norm": 1.1555467710798453, "learning_rate": 1.241425041285893e-07, "loss": 0.667, "step": 10993 }, { "epoch": 0.9311031124285412, "grad_norm": 1.2810559202323286, "learning_rate": 1.2383893766603872e-07, "loss": 0.6681, "step": 10994 }, { "epoch": 0.9311878043616345, "grad_norm": 1.6197945900266963, "learning_rate": 1.2353573816042908e-07, "loss": 0.6448, "step": 10995 }, { "epoch": 0.931272496294728, "grad_norm": 1.3337711785589308, "learning_rate": 1.2323290563457657e-07, "loss": 0.6659, "step": 10996 }, { "epoch": 0.9313571882278213, "grad_norm": 2.5281614103929466, "learning_rate": 1.2293044011127187e-07, "loss": 0.6071, "step": 10997 }, { "epoch": 0.9314418801609147, "grad_norm": 1.7596039148656355, "learning_rate": 1.226283416132762e-07, "loss": 0.5992, "step": 10998 }, { "epoch": 0.931526572094008, "grad_norm": 1.511047819066394, "learning_rate": 1.2232661016332526e-07, "loss": 0.5909, "step": 10999 }, { "epoch": 0.9316112640271014, "grad_norm": 1.3004936978950712, "learning_rate": 1.2202524578412534e-07, "loss": 0.6692, "step": 11000 }, { "epoch": 0.9316959559601948, "grad_norm": 1.3112336242064202, "learning_rate": 1.217242484983566e-07, "loss": 0.6332, "step": 11001 }, { "epoch": 0.9317806478932882, "grad_norm": 1.470206333167916, "learning_rate": 1.2142361832866877e-07, "loss": 0.6735, "step": 11002 }, { "epoch": 0.9318653398263815, "grad_norm": 1.3906826792524345, "learning_rate": 1.211233552976876e-07, "loss": 0.6307, "step": 11003 }, { "epoch": 0.9319500317594749, "grad_norm": 1.5906671796218173, "learning_rate": 1.208234594280089e-07, "loss": 0.5989, "step": 11004 }, { "epoch": 0.9320347236925682, "grad_norm": 1.5825799659414646, "learning_rate": 1.2052393074220014e-07, "loss": 0.6108, "step": 11005 }, { "epoch": 0.9321194156256617, "grad_norm": 2.180230716326783, "learning_rate": 1.20224769262805e-07, "loss": 0.5829, "step": 11006 }, { "epoch": 0.932204107558755, "grad_norm": 2.9139179737988763, "learning_rate": 1.1992597501233494e-07, "loss": 0.636, "step": 11007 }, { "epoch": 0.9322887994918484, "grad_norm": 1.4819954116929275, "learning_rate": 1.1962754801327636e-07, "loss": 0.6121, "step": 11008 }, { "epoch": 0.9323734914249417, "grad_norm": 0.6111986996895942, "learning_rate": 1.1932948828808855e-07, "loss": 0.7969, "step": 11009 }, { "epoch": 0.9324581833580351, "grad_norm": 1.2631612704727975, "learning_rate": 1.1903179585920022e-07, "loss": 0.6137, "step": 11010 }, { "epoch": 0.9325428752911286, "grad_norm": 1.1837212969936612, "learning_rate": 1.1873447074901512e-07, "loss": 0.602, "step": 11011 }, { "epoch": 0.9326275672242219, "grad_norm": 1.28770701718936, "learning_rate": 1.1843751297990924e-07, "loss": 0.546, "step": 11012 }, { "epoch": 0.9327122591573153, "grad_norm": 1.3154105434616779, "learning_rate": 1.1814092257422916e-07, "loss": 0.5958, "step": 11013 }, { "epoch": 0.9327969510904086, "grad_norm": 1.280661880345234, "learning_rate": 1.178446995542959e-07, "loss": 0.6659, "step": 11014 }, { "epoch": 0.932881643023502, "grad_norm": 1.1512347754476175, "learning_rate": 1.1754884394240051e-07, "loss": 0.6303, "step": 11015 }, { "epoch": 0.9329663349565954, "grad_norm": 2.043955186898278, "learning_rate": 1.172533557608091e-07, "loss": 0.6275, "step": 11016 }, { "epoch": 0.9330510268896888, "grad_norm": 2.1971862937096662, "learning_rate": 1.1695823503175774e-07, "loss": 0.6256, "step": 11017 }, { "epoch": 0.9331357188227821, "grad_norm": 1.702784008079957, "learning_rate": 1.166634817774559e-07, "loss": 0.6409, "step": 11018 }, { "epoch": 0.9332204107558755, "grad_norm": 1.4555337406636082, "learning_rate": 1.1636909602008529e-07, "loss": 0.6077, "step": 11019 }, { "epoch": 0.9333051026889688, "grad_norm": 1.434854007827536, "learning_rate": 1.1607507778180094e-07, "loss": 0.6549, "step": 11020 }, { "epoch": 0.9333897946220623, "grad_norm": 1.4802652227965607, "learning_rate": 1.1578142708472795e-07, "loss": 0.66, "step": 11021 }, { "epoch": 0.9334744865551556, "grad_norm": 1.7337687651902538, "learning_rate": 1.1548814395096642e-07, "loss": 0.6211, "step": 11022 }, { "epoch": 0.933559178488249, "grad_norm": 1.7685829779716382, "learning_rate": 1.1519522840258646e-07, "loss": 0.6202, "step": 11023 }, { "epoch": 0.9336438704213423, "grad_norm": 1.6076587614608688, "learning_rate": 1.149026804616321e-07, "loss": 0.6898, "step": 11024 }, { "epoch": 0.9337285623544357, "grad_norm": 1.3013589275979978, "learning_rate": 1.1461050015011854e-07, "loss": 0.637, "step": 11025 }, { "epoch": 0.9338132542875291, "grad_norm": 1.2556062577767861, "learning_rate": 1.1431868749003372e-07, "loss": 0.6075, "step": 11026 }, { "epoch": 0.9338979462206225, "grad_norm": 0.6216439061025755, "learning_rate": 1.1402724250333952e-07, "loss": 0.8032, "step": 11027 }, { "epoch": 0.9339826381537158, "grad_norm": 1.1488236755649448, "learning_rate": 1.1373616521196729e-07, "loss": 0.6074, "step": 11028 }, { "epoch": 0.9340673300868092, "grad_norm": 1.495209942433847, "learning_rate": 1.1344545563782227e-07, "loss": 0.6771, "step": 11029 }, { "epoch": 0.9341520220199026, "grad_norm": 1.3755709334343116, "learning_rate": 1.131551138027831e-07, "loss": 0.668, "step": 11030 }, { "epoch": 0.934236713952996, "grad_norm": 3.002750211286419, "learning_rate": 1.1286513972869784e-07, "loss": 0.6389, "step": 11031 }, { "epoch": 0.9343214058860894, "grad_norm": 1.2629578138203956, "learning_rate": 1.1257553343739013e-07, "loss": 0.6154, "step": 11032 }, { "epoch": 0.9344060978191827, "grad_norm": 1.3762478044461863, "learning_rate": 1.1228629495065313e-07, "loss": 0.6571, "step": 11033 }, { "epoch": 0.9344907897522761, "grad_norm": 1.3696595071824578, "learning_rate": 1.1199742429025439e-07, "loss": 0.6334, "step": 11034 }, { "epoch": 0.9345754816853694, "grad_norm": 1.2048526429663542, "learning_rate": 1.1170892147793267e-07, "loss": 0.616, "step": 11035 }, { "epoch": 0.9346601736184629, "grad_norm": 3.5906580739762988, "learning_rate": 1.1142078653539945e-07, "loss": 0.5978, "step": 11036 }, { "epoch": 0.9347448655515562, "grad_norm": 1.488437943257069, "learning_rate": 1.1113301948433796e-07, "loss": 0.6708, "step": 11037 }, { "epoch": 0.9348295574846496, "grad_norm": 1.5785082034890896, "learning_rate": 1.1084562034640477e-07, "loss": 0.6318, "step": 11038 }, { "epoch": 0.9349142494177429, "grad_norm": 3.230668063490743, "learning_rate": 1.1055858914322815e-07, "loss": 0.5919, "step": 11039 }, { "epoch": 0.9349989413508364, "grad_norm": 1.6035922610749047, "learning_rate": 1.1027192589640801e-07, "loss": 0.6084, "step": 11040 }, { "epoch": 0.9350836332839297, "grad_norm": 1.5183952377820964, "learning_rate": 1.0998563062751822e-07, "loss": 0.6161, "step": 11041 }, { "epoch": 0.9351683252170231, "grad_norm": 1.2331656771794428, "learning_rate": 1.0969970335810321e-07, "loss": 0.6315, "step": 11042 }, { "epoch": 0.9352530171501164, "grad_norm": 2.667485952508065, "learning_rate": 1.0941414410968133e-07, "loss": 0.5875, "step": 11043 }, { "epoch": 0.9353377090832098, "grad_norm": 1.448470195881837, "learning_rate": 1.0912895290374148e-07, "loss": 0.6262, "step": 11044 }, { "epoch": 0.9354224010163033, "grad_norm": 1.4114951311245996, "learning_rate": 1.0884412976174652e-07, "loss": 0.634, "step": 11045 }, { "epoch": 0.9355070929493966, "grad_norm": 1.5209140590093426, "learning_rate": 1.085596747051304e-07, "loss": 0.606, "step": 11046 }, { "epoch": 0.93559178488249, "grad_norm": 0.6307136235725131, "learning_rate": 1.0827558775530045e-07, "loss": 0.8741, "step": 11047 }, { "epoch": 0.9356764768155833, "grad_norm": 1.503753762455651, "learning_rate": 1.0799186893363622e-07, "loss": 0.6431, "step": 11048 }, { "epoch": 0.9357611687486767, "grad_norm": 1.604172344718187, "learning_rate": 1.0770851826148732e-07, "loss": 0.6274, "step": 11049 }, { "epoch": 0.9358458606817701, "grad_norm": 1.5205693758366183, "learning_rate": 1.0742553576017834e-07, "loss": 0.6479, "step": 11050 }, { "epoch": 0.9359305526148635, "grad_norm": 1.7389110297772685, "learning_rate": 1.0714292145100558e-07, "loss": 0.5845, "step": 11051 }, { "epoch": 0.9360152445479568, "grad_norm": 1.4948970461815525, "learning_rate": 1.0686067535523648e-07, "loss": 0.6068, "step": 11052 }, { "epoch": 0.9360999364810502, "grad_norm": 8.427147824613046, "learning_rate": 1.0657879749411238e-07, "loss": 0.6264, "step": 11053 }, { "epoch": 0.9361846284141435, "grad_norm": 1.1954548712338076, "learning_rate": 1.0629728788884519e-07, "loss": 0.5858, "step": 11054 }, { "epoch": 0.936269320347237, "grad_norm": 22.761588841887136, "learning_rate": 1.0601614656062076e-07, "loss": 0.6164, "step": 11055 }, { "epoch": 0.9363540122803303, "grad_norm": 1.2550740460704133, "learning_rate": 1.057353735305966e-07, "loss": 0.6519, "step": 11056 }, { "epoch": 0.9364387042134237, "grad_norm": 0.6302956801641132, "learning_rate": 1.0545496881990136e-07, "loss": 0.8536, "step": 11057 }, { "epoch": 0.936523396146517, "grad_norm": 1.7375920549702706, "learning_rate": 1.0517493244963761e-07, "loss": 0.6548, "step": 11058 }, { "epoch": 0.9366080880796104, "grad_norm": 1.3055401320308222, "learning_rate": 1.0489526444087961e-07, "loss": 0.6021, "step": 11059 }, { "epoch": 0.9366927800127038, "grad_norm": 1.6574569883810217, "learning_rate": 1.0461596481467384e-07, "loss": 0.6081, "step": 11060 }, { "epoch": 0.9367774719457972, "grad_norm": 1.401698814423233, "learning_rate": 1.0433703359203906e-07, "loss": 0.6324, "step": 11061 }, { "epoch": 0.9368621638788905, "grad_norm": 0.6768458498116116, "learning_rate": 1.040584707939657e-07, "loss": 0.8855, "step": 11062 }, { "epoch": 0.9369468558119839, "grad_norm": 1.4036609559946889, "learning_rate": 1.0378027644141808e-07, "loss": 0.6381, "step": 11063 }, { "epoch": 0.9370315477450772, "grad_norm": 2.1797570708958527, "learning_rate": 1.035024505553317e-07, "loss": 0.6515, "step": 11064 }, { "epoch": 0.9371162396781707, "grad_norm": 1.453703598546672, "learning_rate": 1.0322499315661372e-07, "loss": 0.6385, "step": 11065 }, { "epoch": 0.937200931611264, "grad_norm": 1.5240063550984426, "learning_rate": 1.0294790426614465e-07, "loss": 0.75, "step": 11066 }, { "epoch": 0.9372856235443574, "grad_norm": 1.6514954529699362, "learning_rate": 1.0267118390477726e-07, "loss": 0.6477, "step": 11067 }, { "epoch": 0.9373703154774508, "grad_norm": 1.2868797400238146, "learning_rate": 1.0239483209333544e-07, "loss": 0.6259, "step": 11068 }, { "epoch": 0.9374550074105441, "grad_norm": 1.2439671908232353, "learning_rate": 1.0211884885261702e-07, "loss": 0.6028, "step": 11069 }, { "epoch": 0.9375396993436376, "grad_norm": 1.5880471705369743, "learning_rate": 1.0184323420339037e-07, "loss": 0.6399, "step": 11070 }, { "epoch": 0.9376243912767309, "grad_norm": 2.295060837988454, "learning_rate": 1.0156798816639724e-07, "loss": 0.679, "step": 11071 }, { "epoch": 0.9377090832098243, "grad_norm": 1.2637746559381304, "learning_rate": 1.012931107623516e-07, "loss": 0.6361, "step": 11072 }, { "epoch": 0.9377937751429176, "grad_norm": 1.7598460924820563, "learning_rate": 1.0101860201193914e-07, "loss": 0.6231, "step": 11073 }, { "epoch": 0.937878467076011, "grad_norm": 0.6224572256367978, "learning_rate": 1.0074446193581833e-07, "loss": 0.8801, "step": 11074 }, { "epoch": 0.9379631590091044, "grad_norm": 1.5030695166338797, "learning_rate": 1.0047069055461933e-07, "loss": 0.6592, "step": 11075 }, { "epoch": 0.9380478509421978, "grad_norm": 1.2160728861896999, "learning_rate": 1.0019728788894512e-07, "loss": 0.6266, "step": 11076 }, { "epoch": 0.9381325428752911, "grad_norm": 1.251931776393921, "learning_rate": 9.992425395937088e-08, "loss": 0.6635, "step": 11077 }, { "epoch": 0.9382172348083845, "grad_norm": 1.1806915505578985, "learning_rate": 9.965158878644354e-08, "loss": 0.5703, "step": 11078 }, { "epoch": 0.9383019267414778, "grad_norm": 1.2492387925938944, "learning_rate": 9.937929239068278e-08, "loss": 0.6001, "step": 11079 }, { "epoch": 0.9383866186745713, "grad_norm": 1.4856283879886762, "learning_rate": 9.910736479258055e-08, "loss": 0.6237, "step": 11080 }, { "epoch": 0.9384713106076646, "grad_norm": 1.5718878676834411, "learning_rate": 9.883580601260046e-08, "loss": 0.6878, "step": 11081 }, { "epoch": 0.938556002540758, "grad_norm": 1.2627426106714184, "learning_rate": 9.85646160711784e-08, "loss": 0.6193, "step": 11082 }, { "epoch": 0.9386406944738513, "grad_norm": 2.003976998905279, "learning_rate": 9.82937949887236e-08, "loss": 0.6503, "step": 11083 }, { "epoch": 0.9387253864069447, "grad_norm": 0.6260879133791191, "learning_rate": 9.802334278561643e-08, "loss": 0.9022, "step": 11084 }, { "epoch": 0.9388100783400382, "grad_norm": 0.6384772586942701, "learning_rate": 9.775325948221059e-08, "loss": 0.8225, "step": 11085 }, { "epoch": 0.9388947702731315, "grad_norm": 0.6739677263856502, "learning_rate": 9.748354509882985e-08, "loss": 0.8248, "step": 11086 }, { "epoch": 0.9389794622062249, "grad_norm": 1.9052922699084072, "learning_rate": 9.721419965577239e-08, "loss": 0.6097, "step": 11087 }, { "epoch": 0.9390641541393182, "grad_norm": 1.2354455864151812, "learning_rate": 9.694522317330812e-08, "loss": 0.6324, "step": 11088 }, { "epoch": 0.9391488460724116, "grad_norm": 1.5192816315996154, "learning_rate": 9.667661567167863e-08, "loss": 0.6296, "step": 11089 }, { "epoch": 0.939233538005505, "grad_norm": 1.1677467238845263, "learning_rate": 9.64083771710983e-08, "loss": 0.6006, "step": 11090 }, { "epoch": 0.9393182299385984, "grad_norm": 1.196447057204599, "learning_rate": 9.61405076917532e-08, "loss": 0.6449, "step": 11091 }, { "epoch": 0.9394029218716917, "grad_norm": 1.6291561062832736, "learning_rate": 9.587300725380223e-08, "loss": 0.6295, "step": 11092 }, { "epoch": 0.9394876138047851, "grad_norm": 1.3455973128470642, "learning_rate": 9.56058758773759e-08, "loss": 0.5899, "step": 11093 }, { "epoch": 0.9395723057378784, "grad_norm": 2.46359110092915, "learning_rate": 9.533911358257763e-08, "loss": 0.6249, "step": 11094 }, { "epoch": 0.9396569976709719, "grad_norm": 1.590060942723094, "learning_rate": 9.507272038948189e-08, "loss": 0.6205, "step": 11095 }, { "epoch": 0.9397416896040652, "grad_norm": 1.8682930881027442, "learning_rate": 9.480669631813711e-08, "loss": 0.6513, "step": 11096 }, { "epoch": 0.9398263815371586, "grad_norm": 1.2440660328143458, "learning_rate": 9.454104138856223e-08, "loss": 0.6203, "step": 11097 }, { "epoch": 0.9399110734702519, "grad_norm": 1.3325452653341905, "learning_rate": 9.427575562075076e-08, "loss": 0.6328, "step": 11098 }, { "epoch": 0.9399957654033453, "grad_norm": 0.6569399768138027, "learning_rate": 9.401083903466501e-08, "loss": 0.893, "step": 11099 }, { "epoch": 0.9400804573364387, "grad_norm": 1.2724116220948702, "learning_rate": 9.374629165024184e-08, "loss": 0.6052, "step": 11100 }, { "epoch": 0.9401651492695321, "grad_norm": 1.9430165128556616, "learning_rate": 9.348211348739033e-08, "loss": 0.6249, "step": 11101 }, { "epoch": 0.9402498412026254, "grad_norm": 1.2533730950937838, "learning_rate": 9.321830456599068e-08, "loss": 0.5867, "step": 11102 }, { "epoch": 0.9403345331357188, "grad_norm": 1.1935820846429241, "learning_rate": 9.295486490589644e-08, "loss": 0.6219, "step": 11103 }, { "epoch": 0.9404192250688121, "grad_norm": 1.1541412300091163, "learning_rate": 9.269179452693288e-08, "loss": 0.6066, "step": 11104 }, { "epoch": 0.9405039170019056, "grad_norm": 1.4174700821560209, "learning_rate": 9.24290934488975e-08, "loss": 0.6295, "step": 11105 }, { "epoch": 0.940588608934999, "grad_norm": 1.6662846068523485, "learning_rate": 9.216676169155947e-08, "loss": 0.6195, "step": 11106 }, { "epoch": 0.9406733008680923, "grad_norm": 1.3407117732364742, "learning_rate": 9.190479927466023e-08, "loss": 0.6564, "step": 11107 }, { "epoch": 0.9407579928011857, "grad_norm": 1.1615644364434603, "learning_rate": 9.164320621791511e-08, "loss": 0.6498, "step": 11108 }, { "epoch": 0.940842684734279, "grad_norm": 2.4884902468251067, "learning_rate": 9.138198254100893e-08, "loss": 0.5828, "step": 11109 }, { "epoch": 0.9409273766673725, "grad_norm": 2.1688510325417267, "learning_rate": 9.112112826360154e-08, "loss": 0.6594, "step": 11110 }, { "epoch": 0.9410120686004658, "grad_norm": 1.3461945716917896, "learning_rate": 9.086064340532275e-08, "loss": 0.6383, "step": 11111 }, { "epoch": 0.9410967605335592, "grad_norm": 1.3620731416500047, "learning_rate": 9.060052798577635e-08, "loss": 0.5994, "step": 11112 }, { "epoch": 0.9411814524666525, "grad_norm": 1.1118832339273967, "learning_rate": 9.034078202453611e-08, "loss": 0.5978, "step": 11113 }, { "epoch": 0.9412661443997459, "grad_norm": 1.6675653385994769, "learning_rate": 9.00814055411503e-08, "loss": 0.6324, "step": 11114 }, { "epoch": 0.9413508363328393, "grad_norm": 1.2896579096267506, "learning_rate": 8.98223985551372e-08, "loss": 0.5887, "step": 11115 }, { "epoch": 0.9414355282659327, "grad_norm": 1.3845370583630734, "learning_rate": 8.956376108598951e-08, "loss": 0.625, "step": 11116 }, { "epoch": 0.941520220199026, "grad_norm": 1.3485005105827326, "learning_rate": 8.930549315317116e-08, "loss": 0.6094, "step": 11117 }, { "epoch": 0.9416049121321194, "grad_norm": 1.4509375686868256, "learning_rate": 8.904759477611768e-08, "loss": 0.592, "step": 11118 }, { "epoch": 0.9416896040652127, "grad_norm": 1.5353876867034502, "learning_rate": 8.879006597423744e-08, "loss": 0.6295, "step": 11119 }, { "epoch": 0.9417742959983062, "grad_norm": 1.498050898648106, "learning_rate": 8.85329067669105e-08, "loss": 0.5956, "step": 11120 }, { "epoch": 0.9418589879313995, "grad_norm": 0.626844342876964, "learning_rate": 8.827611717349027e-08, "loss": 0.8777, "step": 11121 }, { "epoch": 0.9419436798644929, "grad_norm": 1.1407395907501767, "learning_rate": 8.801969721330073e-08, "loss": 0.6103, "step": 11122 }, { "epoch": 0.9420283717975863, "grad_norm": 0.574670434864886, "learning_rate": 8.776364690563866e-08, "loss": 0.8469, "step": 11123 }, { "epoch": 0.9421130637306796, "grad_norm": 1.6295663851964595, "learning_rate": 8.75079662697742e-08, "loss": 0.5946, "step": 11124 }, { "epoch": 0.9421977556637731, "grad_norm": 1.2137265627785718, "learning_rate": 8.725265532494864e-08, "loss": 0.5745, "step": 11125 }, { "epoch": 0.9422824475968664, "grad_norm": 1.3492672121194724, "learning_rate": 8.699771409037438e-08, "loss": 0.6223, "step": 11126 }, { "epoch": 0.9423671395299598, "grad_norm": 1.427950518277461, "learning_rate": 8.67431425852383e-08, "loss": 0.6298, "step": 11127 }, { "epoch": 0.9424518314630531, "grad_norm": 1.417568046887981, "learning_rate": 8.648894082869674e-08, "loss": 0.6653, "step": 11128 }, { "epoch": 0.9425365233961465, "grad_norm": 1.7796648178851617, "learning_rate": 8.623510883988106e-08, "loss": 0.6126, "step": 11129 }, { "epoch": 0.9426212153292399, "grad_norm": 1.4933654928338167, "learning_rate": 8.598164663789322e-08, "loss": 0.6529, "step": 11130 }, { "epoch": 0.9427059072623333, "grad_norm": 25.165496888893355, "learning_rate": 8.572855424180738e-08, "loss": 0.59, "step": 11131 }, { "epoch": 0.9427905991954266, "grad_norm": 1.421537025112058, "learning_rate": 8.547583167066997e-08, "loss": 0.5989, "step": 11132 }, { "epoch": 0.94287529112852, "grad_norm": 1.4784889596483037, "learning_rate": 8.522347894350025e-08, "loss": 0.6001, "step": 11133 }, { "epoch": 0.9429599830616133, "grad_norm": 1.9629685289488346, "learning_rate": 8.497149607928856e-08, "loss": 0.6961, "step": 11134 }, { "epoch": 0.9430446749947068, "grad_norm": 2.240745936402506, "learning_rate": 8.471988309699808e-08, "loss": 0.6357, "step": 11135 }, { "epoch": 0.9431293669278001, "grad_norm": 2.1472059960863525, "learning_rate": 8.446864001556421e-08, "loss": 0.6116, "step": 11136 }, { "epoch": 0.9432140588608935, "grad_norm": 1.4403412677814795, "learning_rate": 8.42177668538935e-08, "loss": 0.5877, "step": 11137 }, { "epoch": 0.9432987507939868, "grad_norm": 1.506001406204199, "learning_rate": 8.396726363086638e-08, "loss": 0.6679, "step": 11138 }, { "epoch": 0.9433834427270802, "grad_norm": 1.6072553017235047, "learning_rate": 8.371713036533446e-08, "loss": 0.5885, "step": 11139 }, { "epoch": 0.9434681346601737, "grad_norm": 1.4810074619540041, "learning_rate": 8.346736707612158e-08, "loss": 0.6277, "step": 11140 }, { "epoch": 0.943552826593267, "grad_norm": 1.562750032567017, "learning_rate": 8.321797378202378e-08, "loss": 0.6227, "step": 11141 }, { "epoch": 0.9436375185263604, "grad_norm": 1.4894718099373199, "learning_rate": 8.296895050180831e-08, "loss": 0.5798, "step": 11142 }, { "epoch": 0.9437222104594537, "grad_norm": 4.533196937388103, "learning_rate": 8.272029725421682e-08, "loss": 0.6178, "step": 11143 }, { "epoch": 0.9438069023925472, "grad_norm": 1.481181527195407, "learning_rate": 8.247201405796102e-08, "loss": 0.6787, "step": 11144 }, { "epoch": 0.9438915943256405, "grad_norm": 1.664596148054096, "learning_rate": 8.222410093172539e-08, "loss": 0.5914, "step": 11145 }, { "epoch": 0.9439762862587339, "grad_norm": 0.7097739944432991, "learning_rate": 8.19765578941678e-08, "loss": 0.8583, "step": 11146 }, { "epoch": 0.9440609781918272, "grad_norm": 1.2952792551515075, "learning_rate": 8.172938496391559e-08, "loss": 0.5904, "step": 11147 }, { "epoch": 0.9441456701249206, "grad_norm": 1.8183211965277726, "learning_rate": 8.148258215957105e-08, "loss": 0.6825, "step": 11148 }, { "epoch": 0.944230362058014, "grad_norm": 1.4487872765579803, "learning_rate": 8.123614949970715e-08, "loss": 0.6416, "step": 11149 }, { "epoch": 0.9443150539911074, "grad_norm": 0.6799196142061049, "learning_rate": 8.099008700286903e-08, "loss": 0.8773, "step": 11150 }, { "epoch": 0.9443997459242007, "grad_norm": 1.349625004502421, "learning_rate": 8.074439468757411e-08, "loss": 0.6118, "step": 11151 }, { "epoch": 0.9444844378572941, "grad_norm": 1.2115588806957132, "learning_rate": 8.049907257231205e-08, "loss": 0.6644, "step": 11152 }, { "epoch": 0.9445691297903874, "grad_norm": 1.180187357416461, "learning_rate": 8.025412067554472e-08, "loss": 0.6214, "step": 11153 }, { "epoch": 0.9446538217234809, "grad_norm": 1.221917724987131, "learning_rate": 8.000953901570629e-08, "loss": 0.6108, "step": 11154 }, { "epoch": 0.9447385136565742, "grad_norm": 1.4371398964776991, "learning_rate": 7.97653276112026e-08, "loss": 0.6815, "step": 11155 }, { "epoch": 0.9448232055896676, "grad_norm": 1.333206248627479, "learning_rate": 7.952148648041225e-08, "loss": 0.6795, "step": 11156 }, { "epoch": 0.9449078975227609, "grad_norm": 1.6552254088573808, "learning_rate": 7.927801564168447e-08, "loss": 0.6543, "step": 11157 }, { "epoch": 0.9449925894558543, "grad_norm": 1.1577046548178025, "learning_rate": 7.903491511334238e-08, "loss": 0.5996, "step": 11158 }, { "epoch": 0.9450772813889478, "grad_norm": 1.3765505268355223, "learning_rate": 7.879218491368191e-08, "loss": 0.6257, "step": 11159 }, { "epoch": 0.9451619733220411, "grad_norm": 1.6997322174246998, "learning_rate": 7.854982506096731e-08, "loss": 0.6334, "step": 11160 }, { "epoch": 0.9452466652551345, "grad_norm": 1.288831822739515, "learning_rate": 7.830783557343901e-08, "loss": 0.6282, "step": 11161 }, { "epoch": 0.9453313571882278, "grad_norm": 1.2885069962717481, "learning_rate": 7.806621646930857e-08, "loss": 0.6118, "step": 11162 }, { "epoch": 0.9454160491213212, "grad_norm": 1.5182070733996993, "learning_rate": 7.782496776675697e-08, "loss": 0.6924, "step": 11163 }, { "epoch": 0.9455007410544146, "grad_norm": 1.1623352085232552, "learning_rate": 7.758408948394136e-08, "loss": 0.6104, "step": 11164 }, { "epoch": 0.945585432987508, "grad_norm": 1.3240707470719981, "learning_rate": 7.734358163898836e-08, "loss": 0.5834, "step": 11165 }, { "epoch": 0.9456701249206013, "grad_norm": 1.469094938294357, "learning_rate": 7.710344424999739e-08, "loss": 0.6221, "step": 11166 }, { "epoch": 0.9457548168536947, "grad_norm": 1.4794669240670846, "learning_rate": 7.686367733504063e-08, "loss": 0.6311, "step": 11167 }, { "epoch": 0.945839508786788, "grad_norm": 1.5028818152467873, "learning_rate": 7.66242809121609e-08, "loss": 0.6545, "step": 11168 }, { "epoch": 0.9459242007198815, "grad_norm": 0.6051594816291687, "learning_rate": 7.638525499937432e-08, "loss": 0.8459, "step": 11169 }, { "epoch": 0.9460088926529748, "grad_norm": 1.6120546006530367, "learning_rate": 7.614659961466985e-08, "loss": 0.6459, "step": 11170 }, { "epoch": 0.9460935845860682, "grad_norm": 4.327327581922652, "learning_rate": 7.590831477600646e-08, "loss": 0.6287, "step": 11171 }, { "epoch": 0.9461782765191615, "grad_norm": 1.9564231823647662, "learning_rate": 7.567040050131646e-08, "loss": 0.6177, "step": 11172 }, { "epoch": 0.9462629684522549, "grad_norm": 0.6758100781793208, "learning_rate": 7.543285680850443e-08, "loss": 0.8446, "step": 11173 }, { "epoch": 0.9463476603853483, "grad_norm": 1.4141315647684536, "learning_rate": 7.519568371544717e-08, "loss": 0.608, "step": 11174 }, { "epoch": 0.9464323523184417, "grad_norm": 1.3710341844292713, "learning_rate": 7.495888123999262e-08, "loss": 0.5998, "step": 11175 }, { "epoch": 0.946517044251535, "grad_norm": 1.252201471103894, "learning_rate": 7.472244939996153e-08, "loss": 0.6328, "step": 11176 }, { "epoch": 0.9466017361846284, "grad_norm": 1.7086828438680293, "learning_rate": 7.448638821314635e-08, "loss": 0.6331, "step": 11177 }, { "epoch": 0.9466864281177217, "grad_norm": 1.464912483102842, "learning_rate": 7.42506976973123e-08, "loss": 0.6731, "step": 11178 }, { "epoch": 0.9467711200508152, "grad_norm": 1.4734679607387982, "learning_rate": 7.401537787019686e-08, "loss": 0.6288, "step": 11179 }, { "epoch": 0.9468558119839086, "grad_norm": 0.5582437282078513, "learning_rate": 7.378042874950864e-08, "loss": 0.8627, "step": 11180 }, { "epoch": 0.9469405039170019, "grad_norm": 1.6159400952233547, "learning_rate": 7.354585035292794e-08, "loss": 0.6774, "step": 11181 }, { "epoch": 0.9470251958500953, "grad_norm": 1.4757777476715654, "learning_rate": 7.331164269810953e-08, "loss": 0.5812, "step": 11182 }, { "epoch": 0.9471098877831886, "grad_norm": 1.8004012340056412, "learning_rate": 7.307780580267765e-08, "loss": 0.6189, "step": 11183 }, { "epoch": 0.9471945797162821, "grad_norm": 1.4315692198228749, "learning_rate": 7.284433968423043e-08, "loss": 0.6687, "step": 11184 }, { "epoch": 0.9472792716493754, "grad_norm": 1.4495850731197113, "learning_rate": 7.261124436033717e-08, "loss": 0.6397, "step": 11185 }, { "epoch": 0.9473639635824688, "grad_norm": 1.7587686153775939, "learning_rate": 7.237851984853883e-08, "loss": 0.6232, "step": 11186 }, { "epoch": 0.9474486555155621, "grad_norm": 1.3378098116804613, "learning_rate": 7.214616616635083e-08, "loss": 0.6351, "step": 11187 }, { "epoch": 0.9475333474486555, "grad_norm": 1.291787435571945, "learning_rate": 7.191418333125755e-08, "loss": 0.5933, "step": 11188 }, { "epoch": 0.9476180393817489, "grad_norm": 1.4302084780633413, "learning_rate": 7.168257136071777e-08, "loss": 0.6051, "step": 11189 }, { "epoch": 0.9477027313148423, "grad_norm": 1.4926596742362053, "learning_rate": 7.145133027216089e-08, "loss": 0.6052, "step": 11190 }, { "epoch": 0.9477874232479356, "grad_norm": 1.5668553047918141, "learning_rate": 7.122046008298967e-08, "loss": 0.6082, "step": 11191 }, { "epoch": 0.947872115181029, "grad_norm": 0.6481433056598777, "learning_rate": 7.09899608105774e-08, "loss": 0.8501, "step": 11192 }, { "epoch": 0.9479568071141223, "grad_norm": 1.6852621646582902, "learning_rate": 7.075983247227136e-08, "loss": 0.595, "step": 11193 }, { "epoch": 0.9480414990472158, "grad_norm": 1.061597569381067, "learning_rate": 7.053007508538879e-08, "loss": 0.6223, "step": 11194 }, { "epoch": 0.9481261909803091, "grad_norm": 0.592004751884198, "learning_rate": 7.03006886672214e-08, "loss": 0.8386, "step": 11195 }, { "epoch": 0.9482108829134025, "grad_norm": 1.6111701283425977, "learning_rate": 7.00716732350315e-08, "loss": 0.6315, "step": 11196 }, { "epoch": 0.9482955748464958, "grad_norm": 1.2586095683258447, "learning_rate": 6.984302880605309e-08, "loss": 0.6231, "step": 11197 }, { "epoch": 0.9483802667795892, "grad_norm": 2.001117063503318, "learning_rate": 6.961475539749296e-08, "loss": 0.6206, "step": 11198 }, { "epoch": 0.9484649587126827, "grad_norm": 1.9764215634702054, "learning_rate": 6.938685302653014e-08, "loss": 0.6527, "step": 11199 }, { "epoch": 0.948549650645776, "grad_norm": 4.103503211574566, "learning_rate": 6.915932171031536e-08, "loss": 0.6502, "step": 11200 }, { "epoch": 0.9486343425788694, "grad_norm": 1.21661173310978, "learning_rate": 6.89321614659727e-08, "loss": 0.579, "step": 11201 }, { "epoch": 0.9487190345119627, "grad_norm": 1.3004751964101893, "learning_rate": 6.870537231059515e-08, "loss": 0.5938, "step": 11202 }, { "epoch": 0.9488037264450561, "grad_norm": 0.6308335398480439, "learning_rate": 6.847895426125074e-08, "loss": 0.8919, "step": 11203 }, { "epoch": 0.9488884183781495, "grad_norm": 1.5501126022245577, "learning_rate": 6.825290733497914e-08, "loss": 0.6164, "step": 11204 }, { "epoch": 0.9489731103112429, "grad_norm": 0.6404498629404664, "learning_rate": 6.802723154879066e-08, "loss": 0.8355, "step": 11205 }, { "epoch": 0.9490578022443362, "grad_norm": 2.233828491562094, "learning_rate": 6.780192691966947e-08, "loss": 0.5523, "step": 11206 }, { "epoch": 0.9491424941774296, "grad_norm": 2.8241465461805917, "learning_rate": 6.75769934645698e-08, "loss": 0.5917, "step": 11207 }, { "epoch": 0.9492271861105229, "grad_norm": 1.6129466858050194, "learning_rate": 6.735243120042034e-08, "loss": 0.6427, "step": 11208 }, { "epoch": 0.9493118780436164, "grad_norm": 1.2837310054552582, "learning_rate": 6.712824014412034e-08, "loss": 0.5834, "step": 11209 }, { "epoch": 0.9493965699767097, "grad_norm": 1.5183671759772528, "learning_rate": 6.690442031254073e-08, "loss": 0.6127, "step": 11210 }, { "epoch": 0.9494812619098031, "grad_norm": 1.8465278649585857, "learning_rate": 6.66809717225253e-08, "loss": 0.6427, "step": 11211 }, { "epoch": 0.9495659538428964, "grad_norm": 1.4829741449058194, "learning_rate": 6.645789439089e-08, "loss": 0.6931, "step": 11212 }, { "epoch": 0.9496506457759898, "grad_norm": 1.6761348304699104, "learning_rate": 6.623518833442255e-08, "loss": 0.668, "step": 11213 }, { "epoch": 0.9497353377090832, "grad_norm": 1.1743738305320501, "learning_rate": 6.601285356988229e-08, "loss": 0.6001, "step": 11214 }, { "epoch": 0.9498200296421766, "grad_norm": 0.6494204729142952, "learning_rate": 6.579089011400253e-08, "loss": 0.8069, "step": 11215 }, { "epoch": 0.94990472157527, "grad_norm": 1.4032452346048008, "learning_rate": 6.556929798348543e-08, "loss": 0.6445, "step": 11216 }, { "epoch": 0.9499894135083633, "grad_norm": 0.6130767013541509, "learning_rate": 6.534807719500768e-08, "loss": 0.8633, "step": 11217 }, { "epoch": 0.9500741054414567, "grad_norm": 1.450725559502451, "learning_rate": 6.512722776521763e-08, "loss": 0.625, "step": 11218 }, { "epoch": 0.9501587973745501, "grad_norm": 1.9766969933015206, "learning_rate": 6.490674971073473e-08, "loss": 0.6151, "step": 11219 }, { "epoch": 0.9502434893076435, "grad_norm": 1.8369581554376009, "learning_rate": 6.468664304815187e-08, "loss": 0.6356, "step": 11220 }, { "epoch": 0.9503281812407368, "grad_norm": 1.4673969605549828, "learning_rate": 6.446690779403241e-08, "loss": 0.6355, "step": 11221 }, { "epoch": 0.9504128731738302, "grad_norm": 1.4665662723808714, "learning_rate": 6.424754396491373e-08, "loss": 0.5512, "step": 11222 }, { "epoch": 0.9504975651069235, "grad_norm": 1.2165374074159314, "learning_rate": 6.40285515773026e-08, "loss": 0.6292, "step": 11223 }, { "epoch": 0.950582257040017, "grad_norm": 1.4117568515315266, "learning_rate": 6.380993064768026e-08, "loss": 0.6332, "step": 11224 }, { "epoch": 0.9506669489731103, "grad_norm": 0.6240804391021771, "learning_rate": 6.35916811924997e-08, "loss": 0.8298, "step": 11225 }, { "epoch": 0.9507516409062037, "grad_norm": 1.4057445101947172, "learning_rate": 6.337380322818387e-08, "loss": 0.5817, "step": 11226 }, { "epoch": 0.950836332839297, "grad_norm": 2.5886016983347537, "learning_rate": 6.315629677113078e-08, "loss": 0.6514, "step": 11227 }, { "epoch": 0.9509210247723904, "grad_norm": 1.231565386400893, "learning_rate": 6.293916183770732e-08, "loss": 0.6035, "step": 11228 }, { "epoch": 0.9510057167054838, "grad_norm": 1.465107322246012, "learning_rate": 6.272239844425543e-08, "loss": 0.5836, "step": 11229 }, { "epoch": 0.9510904086385772, "grad_norm": 1.5555290975627833, "learning_rate": 6.250600660708705e-08, "loss": 0.6986, "step": 11230 }, { "epoch": 0.9511751005716705, "grad_norm": 1.744063234836592, "learning_rate": 6.228998634248696e-08, "loss": 0.6159, "step": 11231 }, { "epoch": 0.9512597925047639, "grad_norm": 1.5377124679374352, "learning_rate": 6.207433766671211e-08, "loss": 0.6085, "step": 11232 }, { "epoch": 0.9513444844378572, "grad_norm": 1.3928933547564646, "learning_rate": 6.185906059599068e-08, "loss": 0.6039, "step": 11233 }, { "epoch": 0.9514291763709507, "grad_norm": 1.6924076826046515, "learning_rate": 6.1644155146523e-08, "loss": 0.5673, "step": 11234 }, { "epoch": 0.951513868304044, "grad_norm": 0.6546779927527399, "learning_rate": 6.142962133448337e-08, "loss": 0.7993, "step": 11235 }, { "epoch": 0.9515985602371374, "grad_norm": 1.3515206977634464, "learning_rate": 6.121545917601557e-08, "loss": 0.5893, "step": 11236 }, { "epoch": 0.9516832521702308, "grad_norm": 1.4170733794240953, "learning_rate": 6.100166868723611e-08, "loss": 0.5702, "step": 11237 }, { "epoch": 0.9517679441033241, "grad_norm": 0.6229114568884865, "learning_rate": 6.078824988423493e-08, "loss": 0.8682, "step": 11238 }, { "epoch": 0.9518526360364176, "grad_norm": 1.7191310437113512, "learning_rate": 6.057520278307194e-08, "loss": 0.6205, "step": 11239 }, { "epoch": 0.9519373279695109, "grad_norm": 1.2662687043953575, "learning_rate": 6.036252739978044e-08, "loss": 0.6106, "step": 11240 }, { "epoch": 0.9520220199026043, "grad_norm": 1.3927102235580473, "learning_rate": 6.015022375036539e-08, "loss": 0.6918, "step": 11241 }, { "epoch": 0.9521067118356976, "grad_norm": 2.2625541866100503, "learning_rate": 5.993829185080402e-08, "loss": 0.6128, "step": 11242 }, { "epoch": 0.9521914037687911, "grad_norm": 1.2120905802794706, "learning_rate": 5.972673171704468e-08, "loss": 0.6411, "step": 11243 }, { "epoch": 0.9522760957018844, "grad_norm": 1.5744483662258983, "learning_rate": 5.951554336500909e-08, "loss": 0.6658, "step": 11244 }, { "epoch": 0.9523607876349778, "grad_norm": 0.6282019345726235, "learning_rate": 5.930472681058952e-08, "loss": 0.8699, "step": 11245 }, { "epoch": 0.9524454795680711, "grad_norm": 1.452090747134798, "learning_rate": 5.909428206965218e-08, "loss": 0.662, "step": 11246 }, { "epoch": 0.9525301715011645, "grad_norm": 1.685278434315219, "learning_rate": 5.888420915803272e-08, "loss": 0.6359, "step": 11247 }, { "epoch": 0.9526148634342579, "grad_norm": 1.303038347185099, "learning_rate": 5.8674508091541295e-08, "loss": 0.6087, "step": 11248 }, { "epoch": 0.9526995553673513, "grad_norm": 1.5535155870677955, "learning_rate": 5.8465178885958596e-08, "loss": 0.5755, "step": 11249 }, { "epoch": 0.9527842473004446, "grad_norm": 1.6123455482632238, "learning_rate": 5.825622155703814e-08, "loss": 0.6094, "step": 11250 }, { "epoch": 0.952868939233538, "grad_norm": 1.2801196180702787, "learning_rate": 5.804763612050402e-08, "loss": 0.6409, "step": 11251 }, { "epoch": 0.9529536311666313, "grad_norm": 1.4118600601865918, "learning_rate": 5.7839422592055326e-08, "loss": 0.5848, "step": 11252 }, { "epoch": 0.9530383230997248, "grad_norm": 1.481660945222961, "learning_rate": 5.763158098735899e-08, "loss": 0.6606, "step": 11253 }, { "epoch": 0.9531230150328182, "grad_norm": 0.682788501899236, "learning_rate": 5.742411132205805e-08, "loss": 0.8361, "step": 11254 }, { "epoch": 0.9532077069659115, "grad_norm": 1.452281068387488, "learning_rate": 5.721701361176446e-08, "loss": 0.6101, "step": 11255 }, { "epoch": 0.9532923988990049, "grad_norm": 1.27131676299878, "learning_rate": 5.701028787206408e-08, "loss": 0.6374, "step": 11256 }, { "epoch": 0.9533770908320982, "grad_norm": 1.4091092472069875, "learning_rate": 5.680393411851393e-08, "loss": 0.58, "step": 11257 }, { "epoch": 0.9534617827651917, "grad_norm": 1.440474263351115, "learning_rate": 5.659795236664267e-08, "loss": 0.6278, "step": 11258 }, { "epoch": 0.953546474698285, "grad_norm": 0.5972832237264515, "learning_rate": 5.639234263195292e-08, "loss": 0.834, "step": 11259 }, { "epoch": 0.9536311666313784, "grad_norm": 1.391866528148326, "learning_rate": 5.618710492991675e-08, "loss": 0.6686, "step": 11260 }, { "epoch": 0.9537158585644717, "grad_norm": 1.3134620745916339, "learning_rate": 5.598223927597901e-08, "loss": 0.6038, "step": 11261 }, { "epoch": 0.9538005504975651, "grad_norm": 1.2075645912073905, "learning_rate": 5.577774568555849e-08, "loss": 0.6122, "step": 11262 }, { "epoch": 0.9538852424306585, "grad_norm": 1.4951061441609632, "learning_rate": 5.557362417404288e-08, "loss": 0.5935, "step": 11263 }, { "epoch": 0.9539699343637519, "grad_norm": 1.888655290412476, "learning_rate": 5.536987475679434e-08, "loss": 0.6272, "step": 11264 }, { "epoch": 0.9540546262968452, "grad_norm": 0.6104149103430776, "learning_rate": 5.5166497449145595e-08, "loss": 0.795, "step": 11265 }, { "epoch": 0.9541393182299386, "grad_norm": 1.3603773601340652, "learning_rate": 5.4963492266402184e-08, "loss": 0.5997, "step": 11266 }, { "epoch": 0.9542240101630319, "grad_norm": 1.4670046884306205, "learning_rate": 5.4760859223841335e-08, "loss": 0.6605, "step": 11267 }, { "epoch": 0.9543087020961254, "grad_norm": 1.1984151524318005, "learning_rate": 5.455859833671195e-08, "loss": 0.6047, "step": 11268 }, { "epoch": 0.9543933940292187, "grad_norm": 4.7956038733774715, "learning_rate": 5.4356709620234646e-08, "loss": 0.6422, "step": 11269 }, { "epoch": 0.9544780859623121, "grad_norm": 0.6252290402424433, "learning_rate": 5.415519308960449e-08, "loss": 0.7952, "step": 11270 }, { "epoch": 0.9545627778954054, "grad_norm": 1.8129858924714886, "learning_rate": 5.395404875998489e-08, "loss": 0.5705, "step": 11271 }, { "epoch": 0.9546474698284988, "grad_norm": 0.6278350218964257, "learning_rate": 5.375327664651431e-08, "loss": 0.8688, "step": 11272 }, { "epoch": 0.9547321617615923, "grad_norm": 2.0419068309026462, "learning_rate": 5.3552876764300655e-08, "loss": 0.6157, "step": 11273 }, { "epoch": 0.9548168536946856, "grad_norm": 1.2387237843925483, "learning_rate": 5.335284912842631e-08, "loss": 0.6794, "step": 11274 }, { "epoch": 0.954901545627779, "grad_norm": 1.2501165197929667, "learning_rate": 5.3153193753943125e-08, "loss": 0.6757, "step": 11275 }, { "epoch": 0.9549862375608723, "grad_norm": 1.5846592896357603, "learning_rate": 5.295391065587741e-08, "loss": 0.609, "step": 11276 }, { "epoch": 0.9550709294939657, "grad_norm": 1.1922336292415965, "learning_rate": 5.27549998492255e-08, "loss": 0.6282, "step": 11277 }, { "epoch": 0.9551556214270591, "grad_norm": 1.6153519448064713, "learning_rate": 5.255646134895709e-08, "loss": 0.6194, "step": 11278 }, { "epoch": 0.9552403133601525, "grad_norm": 1.4590934443411219, "learning_rate": 5.235829517001245e-08, "loss": 0.6216, "step": 11279 }, { "epoch": 0.9553250052932458, "grad_norm": 0.6336588642480556, "learning_rate": 5.2160501327305213e-08, "loss": 0.8661, "step": 11280 }, { "epoch": 0.9554096972263392, "grad_norm": 0.6749725172233213, "learning_rate": 5.1963079835720685e-08, "loss": 0.8301, "step": 11281 }, { "epoch": 0.9554943891594325, "grad_norm": 2.302430129545896, "learning_rate": 5.1766030710115324e-08, "loss": 0.597, "step": 11282 }, { "epoch": 0.955579081092526, "grad_norm": 1.8064579476527323, "learning_rate": 5.1569353965317816e-08, "loss": 0.6528, "step": 11283 }, { "epoch": 0.9556637730256193, "grad_norm": 1.4239274820060623, "learning_rate": 5.137304961613021e-08, "loss": 0.6278, "step": 11284 }, { "epoch": 0.9557484649587127, "grad_norm": 1.851936668240668, "learning_rate": 5.117711767732403e-08, "loss": 0.602, "step": 11285 }, { "epoch": 0.955833156891806, "grad_norm": 2.042531416848292, "learning_rate": 5.098155816364636e-08, "loss": 0.6603, "step": 11286 }, { "epoch": 0.9559178488248994, "grad_norm": 1.2723109560957613, "learning_rate": 5.078637108981155e-08, "loss": 0.5983, "step": 11287 }, { "epoch": 0.9560025407579928, "grad_norm": 1.2430789709139838, "learning_rate": 5.0591556470510065e-08, "loss": 0.6293, "step": 11288 }, { "epoch": 0.9560872326910862, "grad_norm": 1.4746564731523097, "learning_rate": 5.0397114320402396e-08, "loss": 0.6541, "step": 11289 }, { "epoch": 0.9561719246241795, "grad_norm": 1.1641755452705722, "learning_rate": 5.0203044654120734e-08, "loss": 0.5612, "step": 11290 }, { "epoch": 0.9562566165572729, "grad_norm": 1.2063389181689852, "learning_rate": 5.0009347486271175e-08, "loss": 0.5988, "step": 11291 }, { "epoch": 0.9563413084903662, "grad_norm": 0.65184338411995, "learning_rate": 4.981602283142928e-08, "loss": 0.8575, "step": 11292 }, { "epoch": 0.9564260004234597, "grad_norm": 0.6349682498734214, "learning_rate": 4.962307070414396e-08, "loss": 0.8567, "step": 11293 }, { "epoch": 0.9565106923565531, "grad_norm": 1.7080760753532176, "learning_rate": 4.9430491118936384e-08, "loss": 0.6377, "step": 11294 }, { "epoch": 0.9565953842896464, "grad_norm": 1.3832093729501975, "learning_rate": 4.9238284090298846e-08, "loss": 0.5621, "step": 11295 }, { "epoch": 0.9566800762227398, "grad_norm": 1.9444016481604307, "learning_rate": 4.9046449632695894e-08, "loss": 0.629, "step": 11296 }, { "epoch": 0.9567647681558331, "grad_norm": 1.5028386208789761, "learning_rate": 4.885498776056374e-08, "loss": 0.6682, "step": 11297 }, { "epoch": 0.9568494600889266, "grad_norm": 1.3887697188805554, "learning_rate": 4.866389848831199e-08, "loss": 0.5804, "step": 11298 }, { "epoch": 0.9569341520220199, "grad_norm": 2.2920035499532676, "learning_rate": 4.8473181830320234e-08, "loss": 0.6276, "step": 11299 }, { "epoch": 0.9570188439551133, "grad_norm": 1.4118069208686719, "learning_rate": 4.8282837800940896e-08, "loss": 0.6229, "step": 11300 }, { "epoch": 0.9571035358882066, "grad_norm": 0.626579549890329, "learning_rate": 4.809286641449862e-08, "loss": 0.8987, "step": 11301 }, { "epoch": 0.9571882278213, "grad_norm": 2.5014329053227455, "learning_rate": 4.79032676852903e-08, "loss": 0.6447, "step": 11302 }, { "epoch": 0.9572729197543934, "grad_norm": 1.2533834137753666, "learning_rate": 4.7714041627582867e-08, "loss": 0.5637, "step": 11303 }, { "epoch": 0.9573576116874868, "grad_norm": 1.8140286253584936, "learning_rate": 4.752518825561769e-08, "loss": 0.6455, "step": 11304 }, { "epoch": 0.9574423036205801, "grad_norm": 1.3686488696113606, "learning_rate": 4.733670758360676e-08, "loss": 0.6492, "step": 11305 }, { "epoch": 0.9575269955536735, "grad_norm": 1.4500617926063526, "learning_rate": 4.7148599625734256e-08, "loss": 0.6036, "step": 11306 }, { "epoch": 0.9576116874867668, "grad_norm": 1.1913962276078225, "learning_rate": 4.69608643961561e-08, "loss": 0.691, "step": 11307 }, { "epoch": 0.9576963794198603, "grad_norm": 3.3895711133978863, "learning_rate": 4.677350190900043e-08, "loss": 0.6742, "step": 11308 }, { "epoch": 0.9577810713529537, "grad_norm": 1.6405978261327114, "learning_rate": 4.6586512178367624e-08, "loss": 0.695, "step": 11309 }, { "epoch": 0.957865763286047, "grad_norm": 1.3858690416006865, "learning_rate": 4.639989521832866e-08, "loss": 0.6006, "step": 11310 }, { "epoch": 0.9579504552191404, "grad_norm": 1.1477822154177626, "learning_rate": 4.6213651042928964e-08, "loss": 0.5833, "step": 11311 }, { "epoch": 0.9580351471522337, "grad_norm": 1.4791597205377947, "learning_rate": 4.602777966618344e-08, "loss": 0.6436, "step": 11312 }, { "epoch": 0.9581198390853272, "grad_norm": 1.4025291911886428, "learning_rate": 4.584228110207978e-08, "loss": 0.6144, "step": 11313 }, { "epoch": 0.9582045310184205, "grad_norm": 1.2673769734987996, "learning_rate": 4.565715536457793e-08, "loss": 0.6301, "step": 11314 }, { "epoch": 0.9582892229515139, "grad_norm": 1.4007895965942168, "learning_rate": 4.5472402467609536e-08, "loss": 0.5734, "step": 11315 }, { "epoch": 0.9583739148846072, "grad_norm": 0.6225162811369006, "learning_rate": 4.528802242507846e-08, "loss": 0.8411, "step": 11316 }, { "epoch": 0.9584586068177006, "grad_norm": 1.7081591852575086, "learning_rate": 4.5104015250860275e-08, "loss": 0.6056, "step": 11317 }, { "epoch": 0.958543298750794, "grad_norm": 1.7053829859673253, "learning_rate": 4.4920380958802243e-08, "loss": 0.5823, "step": 11318 }, { "epoch": 0.9586279906838874, "grad_norm": 1.116417047504632, "learning_rate": 4.4737119562723864e-08, "loss": 0.5885, "step": 11319 }, { "epoch": 0.9587126826169807, "grad_norm": 1.3458288147179165, "learning_rate": 4.455423107641688e-08, "loss": 0.6193, "step": 11320 }, { "epoch": 0.9587973745500741, "grad_norm": 1.3360037118214647, "learning_rate": 4.437171551364417e-08, "loss": 0.5769, "step": 11321 }, { "epoch": 0.9588820664831674, "grad_norm": 1.4630447367476427, "learning_rate": 4.4189572888140856e-08, "loss": 0.6245, "step": 11322 }, { "epoch": 0.9589667584162609, "grad_norm": 1.75393106567983, "learning_rate": 4.4007803213614866e-08, "loss": 0.6778, "step": 11323 }, { "epoch": 0.9590514503493542, "grad_norm": 2.0133094391077355, "learning_rate": 4.38264065037447e-08, "loss": 0.6363, "step": 11324 }, { "epoch": 0.9591361422824476, "grad_norm": 2.969823330161839, "learning_rate": 4.3645382772181666e-08, "loss": 0.6286, "step": 11325 }, { "epoch": 0.9592208342155409, "grad_norm": 1.316578618343567, "learning_rate": 4.346473203254875e-08, "loss": 0.6061, "step": 11326 }, { "epoch": 0.9593055261486343, "grad_norm": 2.1636897141776377, "learning_rate": 4.3284454298440635e-08, "loss": 0.6632, "step": 11327 }, { "epoch": 0.9593902180817278, "grad_norm": 1.3138776680407358, "learning_rate": 4.3104549583424806e-08, "loss": 0.5815, "step": 11328 }, { "epoch": 0.9594749100148211, "grad_norm": 1.3164492395957914, "learning_rate": 4.292501790103931e-08, "loss": 0.6032, "step": 11329 }, { "epoch": 0.9595596019479145, "grad_norm": 1.1984697744870707, "learning_rate": 4.274585926479502e-08, "loss": 0.6053, "step": 11330 }, { "epoch": 0.9596442938810078, "grad_norm": 1.6690980537565414, "learning_rate": 4.256707368817503e-08, "loss": 0.6407, "step": 11331 }, { "epoch": 0.9597289858141012, "grad_norm": 1.2556480239245436, "learning_rate": 4.2388661184633586e-08, "loss": 0.5838, "step": 11332 }, { "epoch": 0.9598136777471946, "grad_norm": 1.257098927197264, "learning_rate": 4.221062176759716e-08, "loss": 0.5207, "step": 11333 }, { "epoch": 0.959898369680288, "grad_norm": 1.4236705108539625, "learning_rate": 4.203295545046449e-08, "loss": 0.6626, "step": 11334 }, { "epoch": 0.9599830616133813, "grad_norm": 1.7398816717657986, "learning_rate": 4.185566224660487e-08, "loss": 0.6337, "step": 11335 }, { "epoch": 0.9600677535464747, "grad_norm": 1.2277195648060983, "learning_rate": 4.1678742169362077e-08, "loss": 0.6323, "step": 11336 }, { "epoch": 0.960152445479568, "grad_norm": 1.553494071900862, "learning_rate": 4.1502195232048795e-08, "loss": 0.5742, "step": 11337 }, { "epoch": 0.9602371374126615, "grad_norm": 1.739033304900712, "learning_rate": 4.132602144795217e-08, "loss": 0.6637, "step": 11338 }, { "epoch": 0.9603218293457548, "grad_norm": 1.2944302595729473, "learning_rate": 4.115022083032993e-08, "loss": 0.6122, "step": 11339 }, { "epoch": 0.9604065212788482, "grad_norm": 1.4154500201191835, "learning_rate": 4.09747933924115e-08, "loss": 0.6093, "step": 11340 }, { "epoch": 0.9604912132119415, "grad_norm": 1.7871479848921228, "learning_rate": 4.0799739147399655e-08, "loss": 0.6111, "step": 11341 }, { "epoch": 0.9605759051450349, "grad_norm": 1.4539490873906715, "learning_rate": 4.062505810846773e-08, "loss": 0.6403, "step": 11342 }, { "epoch": 0.9606605970781283, "grad_norm": 0.5934407701204745, "learning_rate": 4.0450750288760774e-08, "loss": 0.8519, "step": 11343 }, { "epoch": 0.9607452890112217, "grad_norm": 0.6354961409661111, "learning_rate": 4.027681570139719e-08, "loss": 0.8211, "step": 11344 }, { "epoch": 0.960829980944315, "grad_norm": 1.1475442918598688, "learning_rate": 4.010325435946649e-08, "loss": 0.5824, "step": 11345 }, { "epoch": 0.9609146728774084, "grad_norm": 1.2978878152238413, "learning_rate": 3.99300662760288e-08, "loss": 0.6098, "step": 11346 }, { "epoch": 0.9609993648105019, "grad_norm": 1.8243959668370007, "learning_rate": 3.975725146411979e-08, "loss": 0.6097, "step": 11347 }, { "epoch": 0.9610840567435952, "grad_norm": 2.077005543174936, "learning_rate": 3.958480993674241e-08, "loss": 0.6562, "step": 11348 }, { "epoch": 0.9611687486766886, "grad_norm": 0.5509278841359418, "learning_rate": 3.9412741706875144e-08, "loss": 0.8438, "step": 11349 }, { "epoch": 0.9612534406097819, "grad_norm": 1.5123921865359844, "learning_rate": 3.924104678746654e-08, "loss": 0.6006, "step": 11350 }, { "epoch": 0.9613381325428753, "grad_norm": 1.3253446898965766, "learning_rate": 3.906972519143737e-08, "loss": 0.5809, "step": 11351 }, { "epoch": 0.9614228244759687, "grad_norm": 1.4177449054529376, "learning_rate": 3.88987769316812e-08, "loss": 0.6036, "step": 11352 }, { "epoch": 0.9615075164090621, "grad_norm": 1.5068530103145599, "learning_rate": 3.8728202021061646e-08, "loss": 0.6273, "step": 11353 }, { "epoch": 0.9615922083421554, "grad_norm": 1.5874752147876028, "learning_rate": 3.855800047241676e-08, "loss": 0.5951, "step": 11354 }, { "epoch": 0.9616769002752488, "grad_norm": 1.3559298811590805, "learning_rate": 3.8388172298554075e-08, "loss": 0.6388, "step": 11355 }, { "epoch": 0.9617615922083421, "grad_norm": 1.3348753104960622, "learning_rate": 3.8218717512254476e-08, "loss": 0.6055, "step": 11356 }, { "epoch": 0.9618462841414356, "grad_norm": 2.116407340072604, "learning_rate": 3.804963612627055e-08, "loss": 0.6568, "step": 11357 }, { "epoch": 0.9619309760745289, "grad_norm": 1.3859936490721452, "learning_rate": 3.7880928153326004e-08, "loss": 0.6072, "step": 11358 }, { "epoch": 0.9620156680076223, "grad_norm": 1.3441745141938295, "learning_rate": 3.771259360611734e-08, "loss": 0.6156, "step": 11359 }, { "epoch": 0.9621003599407156, "grad_norm": 1.2336434568596983, "learning_rate": 3.754463249731222e-08, "loss": 0.6293, "step": 11360 }, { "epoch": 0.962185051873809, "grad_norm": 1.2796528976590333, "learning_rate": 3.737704483955107e-08, "loss": 0.5917, "step": 11361 }, { "epoch": 0.9622697438069024, "grad_norm": 2.259194869640902, "learning_rate": 3.7209830645446033e-08, "loss": 0.6025, "step": 11362 }, { "epoch": 0.9623544357399958, "grad_norm": 1.4660245052152978, "learning_rate": 3.704298992758038e-08, "loss": 0.6633, "step": 11363 }, { "epoch": 0.9624391276730891, "grad_norm": 1.6148776684634978, "learning_rate": 3.687652269850961e-08, "loss": 0.6319, "step": 11364 }, { "epoch": 0.9625238196061825, "grad_norm": 1.3881226085669878, "learning_rate": 3.671042897076205e-08, "loss": 0.6588, "step": 11365 }, { "epoch": 0.9626085115392758, "grad_norm": 1.7406720285615478, "learning_rate": 3.6544708756835466e-08, "loss": 0.5969, "step": 11366 }, { "epoch": 0.9626932034723693, "grad_norm": 1.6489984194838374, "learning_rate": 3.637936206920267e-08, "loss": 0.6216, "step": 11367 }, { "epoch": 0.9627778954054627, "grad_norm": 2.4676278654138017, "learning_rate": 3.621438892030704e-08, "loss": 0.6161, "step": 11368 }, { "epoch": 0.962862587338556, "grad_norm": 1.9984312687557473, "learning_rate": 3.604978932256309e-08, "loss": 0.657, "step": 11369 }, { "epoch": 0.9629472792716494, "grad_norm": 1.4472685341924028, "learning_rate": 3.5885563288357574e-08, "loss": 0.6479, "step": 11370 }, { "epoch": 0.9630319712047427, "grad_norm": 1.232858669709652, "learning_rate": 3.5721710830049494e-08, "loss": 0.6095, "step": 11371 }, { "epoch": 0.9631166631378362, "grad_norm": 1.5930328556265445, "learning_rate": 3.5558231959970104e-08, "loss": 0.675, "step": 11372 }, { "epoch": 0.9632013550709295, "grad_norm": 1.3109882504736174, "learning_rate": 3.539512669042178e-08, "loss": 0.5598, "step": 11373 }, { "epoch": 0.9632860470040229, "grad_norm": 1.7576267874030702, "learning_rate": 3.523239503367859e-08, "loss": 0.6114, "step": 11374 }, { "epoch": 0.9633707389371162, "grad_norm": 1.4364550043334567, "learning_rate": 3.507003700198741e-08, "loss": 0.6447, "step": 11375 }, { "epoch": 0.9634554308702096, "grad_norm": 1.2743688458484015, "learning_rate": 3.49080526075668e-08, "loss": 0.6318, "step": 11376 }, { "epoch": 0.963540122803303, "grad_norm": 0.6581960982911907, "learning_rate": 3.474644186260645e-08, "loss": 0.8017, "step": 11377 }, { "epoch": 0.9636248147363964, "grad_norm": 0.611885072774696, "learning_rate": 3.4585204779268856e-08, "loss": 0.8142, "step": 11378 }, { "epoch": 0.9637095066694897, "grad_norm": 0.7175628997023535, "learning_rate": 3.442434136968764e-08, "loss": 0.8538, "step": 11379 }, { "epoch": 0.9637941986025831, "grad_norm": 1.5669057530518846, "learning_rate": 3.4263851645968684e-08, "loss": 0.5895, "step": 11380 }, { "epoch": 0.9638788905356764, "grad_norm": 0.5749196376980876, "learning_rate": 3.410373562018954e-08, "loss": 0.8136, "step": 11381 }, { "epoch": 0.9639635824687699, "grad_norm": 1.1390963118633035, "learning_rate": 3.3943993304400014e-08, "loss": 0.5503, "step": 11382 }, { "epoch": 0.9640482744018632, "grad_norm": 1.1632768997594765, "learning_rate": 3.378462471062161e-08, "loss": 0.6092, "step": 11383 }, { "epoch": 0.9641329663349566, "grad_norm": 1.606655368067877, "learning_rate": 3.36256298508475e-08, "loss": 0.6712, "step": 11384 }, { "epoch": 0.96421765826805, "grad_norm": 1.6172705374188603, "learning_rate": 3.346700873704256e-08, "loss": 0.586, "step": 11385 }, { "epoch": 0.9643023502011433, "grad_norm": 0.5627818101009396, "learning_rate": 3.3308761381145024e-08, "loss": 0.8962, "step": 11386 }, { "epoch": 0.9643870421342368, "grad_norm": 1.3518812644094191, "learning_rate": 3.315088779506259e-08, "loss": 0.6784, "step": 11387 }, { "epoch": 0.9644717340673301, "grad_norm": 0.638273943823097, "learning_rate": 3.2993387990676306e-08, "loss": 0.8326, "step": 11388 }, { "epoch": 0.9645564260004235, "grad_norm": 2.6215703622215605, "learning_rate": 3.283626197984002e-08, "loss": 0.5954, "step": 11389 }, { "epoch": 0.9646411179335168, "grad_norm": 1.7625046175769774, "learning_rate": 3.2679509774376504e-08, "loss": 0.5837, "step": 11390 }, { "epoch": 0.9647258098666102, "grad_norm": 1.4725156782658153, "learning_rate": 3.252313138608354e-08, "loss": 0.6759, "step": 11391 }, { "epoch": 0.9648105017997036, "grad_norm": 1.3489169074361675, "learning_rate": 3.2367126826728934e-08, "loss": 0.6466, "step": 11392 }, { "epoch": 0.964895193732797, "grad_norm": 1.344059715037699, "learning_rate": 3.221149610805274e-08, "loss": 0.5989, "step": 11393 }, { "epoch": 0.9649798856658903, "grad_norm": 1.2671473172531282, "learning_rate": 3.2056239241767264e-08, "loss": 0.6081, "step": 11394 }, { "epoch": 0.9650645775989837, "grad_norm": 2.086441936849962, "learning_rate": 3.190135623955592e-08, "loss": 0.6257, "step": 11395 }, { "epoch": 0.965149269532077, "grad_norm": 1.4276014652244342, "learning_rate": 3.174684711307552e-08, "loss": 0.6555, "step": 11396 }, { "epoch": 0.9652339614651705, "grad_norm": 1.3518328674692328, "learning_rate": 3.159271187395285e-08, "loss": 0.5846, "step": 11397 }, { "epoch": 0.9653186533982638, "grad_norm": 1.6543868715324639, "learning_rate": 3.143895053378698e-08, "loss": 0.6044, "step": 11398 }, { "epoch": 0.9654033453313572, "grad_norm": 1.2505600941793695, "learning_rate": 3.1285563104150876e-08, "loss": 0.6248, "step": 11399 }, { "epoch": 0.9654880372644505, "grad_norm": 1.3950421760275935, "learning_rate": 3.113254959658585e-08, "loss": 0.6103, "step": 11400 }, { "epoch": 0.9655727291975439, "grad_norm": 1.3553671582779354, "learning_rate": 3.0979910022607694e-08, "loss": 0.5838, "step": 11401 }, { "epoch": 0.9656574211306374, "grad_norm": 1.2826163499353398, "learning_rate": 3.082764439370445e-08, "loss": 0.6404, "step": 11402 }, { "epoch": 0.9657421130637307, "grad_norm": 1.513531414910819, "learning_rate": 3.0675752721333605e-08, "loss": 0.6572, "step": 11403 }, { "epoch": 0.965826804996824, "grad_norm": 1.4691361774762544, "learning_rate": 3.052423501692603e-08, "loss": 0.7178, "step": 11404 }, { "epoch": 0.9659114969299174, "grad_norm": 0.5956569431555767, "learning_rate": 3.037309129188426e-08, "loss": 0.8382, "step": 11405 }, { "epoch": 0.9659961888630108, "grad_norm": 1.2991145303267173, "learning_rate": 3.022232155758309e-08, "loss": 0.6224, "step": 11406 }, { "epoch": 0.9660808807961042, "grad_norm": 1.3372304411815439, "learning_rate": 3.0071925825368444e-08, "loss": 0.5615, "step": 11407 }, { "epoch": 0.9661655727291976, "grad_norm": 1.3229364638673908, "learning_rate": 2.992190410655849e-08, "loss": 0.656, "step": 11408 }, { "epoch": 0.9662502646622909, "grad_norm": 1.7851869661400288, "learning_rate": 2.9772256412442523e-08, "loss": 0.6343, "step": 11409 }, { "epoch": 0.9663349565953843, "grad_norm": 1.35574758424368, "learning_rate": 2.9622982754283214e-08, "loss": 0.6873, "step": 11410 }, { "epoch": 0.9664196485284776, "grad_norm": 1.2208128078656313, "learning_rate": 2.9474083143313792e-08, "loss": 0.6195, "step": 11411 }, { "epoch": 0.9665043404615711, "grad_norm": 1.406545868394002, "learning_rate": 2.9325557590740294e-08, "loss": 0.5895, "step": 11412 }, { "epoch": 0.9665890323946644, "grad_norm": 1.6814582102777373, "learning_rate": 2.9177406107739336e-08, "loss": 0.6391, "step": 11413 }, { "epoch": 0.9666737243277578, "grad_norm": 1.184341366071692, "learning_rate": 2.9029628705459777e-08, "loss": 0.5542, "step": 11414 }, { "epoch": 0.9667584162608511, "grad_norm": 1.3297554235430187, "learning_rate": 2.8882225395023277e-08, "loss": 0.6449, "step": 11415 }, { "epoch": 0.9668431081939445, "grad_norm": 1.3875207040323776, "learning_rate": 2.8735196187523183e-08, "loss": 0.6468, "step": 11416 }, { "epoch": 0.9669278001270379, "grad_norm": 1.4275885128174808, "learning_rate": 2.8588541094023425e-08, "loss": 0.6452, "step": 11417 }, { "epoch": 0.9670124920601313, "grad_norm": 1.39943892878679, "learning_rate": 2.8442260125560727e-08, "loss": 0.6415, "step": 11418 }, { "epoch": 0.9670971839932246, "grad_norm": 5.7160683874999, "learning_rate": 2.829635329314351e-08, "loss": 0.6078, "step": 11419 }, { "epoch": 0.967181875926318, "grad_norm": 1.4116511677974422, "learning_rate": 2.8150820607752427e-08, "loss": 0.624, "step": 11420 }, { "epoch": 0.9672665678594113, "grad_norm": 1.732544410966113, "learning_rate": 2.800566208033928e-08, "loss": 0.5956, "step": 11421 }, { "epoch": 0.9673512597925048, "grad_norm": 1.521860800041438, "learning_rate": 2.7860877721827552e-08, "loss": 0.6386, "step": 11422 }, { "epoch": 0.9674359517255982, "grad_norm": 1.5597215379021856, "learning_rate": 2.771646754311408e-08, "loss": 0.6136, "step": 11423 }, { "epoch": 0.9675206436586915, "grad_norm": 1.3795297043638604, "learning_rate": 2.7572431555065172e-08, "loss": 0.6713, "step": 11424 }, { "epoch": 0.9676053355917849, "grad_norm": 7.840821636344363, "learning_rate": 2.7428769768521602e-08, "loss": 0.6342, "step": 11425 }, { "epoch": 0.9676900275248782, "grad_norm": 0.5788760340625382, "learning_rate": 2.7285482194294165e-08, "loss": 0.8295, "step": 11426 }, { "epoch": 0.9677747194579717, "grad_norm": 5.02485628227586, "learning_rate": 2.7142568843165906e-08, "loss": 0.5901, "step": 11427 }, { "epoch": 0.967859411391065, "grad_norm": 1.195564183516055, "learning_rate": 2.7000029725891552e-08, "loss": 0.5828, "step": 11428 }, { "epoch": 0.9679441033241584, "grad_norm": 0.6076975927897779, "learning_rate": 2.6857864853198633e-08, "loss": 0.8508, "step": 11429 }, { "epoch": 0.9680287952572517, "grad_norm": 1.8175742206752459, "learning_rate": 2.6716074235785262e-08, "loss": 0.6319, "step": 11430 }, { "epoch": 0.9681134871903451, "grad_norm": 0.6169257917113534, "learning_rate": 2.657465788432234e-08, "loss": 0.8423, "step": 11431 }, { "epoch": 0.9681981791234385, "grad_norm": 1.4017947245968467, "learning_rate": 2.6433615809451363e-08, "loss": 0.6473, "step": 11432 }, { "epoch": 0.9682828710565319, "grad_norm": 1.4947839690753928, "learning_rate": 2.629294802178772e-08, "loss": 0.6266, "step": 11433 }, { "epoch": 0.9683675629896252, "grad_norm": 1.3098167560842449, "learning_rate": 2.6152654531916288e-08, "loss": 0.6343, "step": 11434 }, { "epoch": 0.9684522549227186, "grad_norm": 1.474174843849787, "learning_rate": 2.6012735350395836e-08, "loss": 0.5956, "step": 11435 }, { "epoch": 0.9685369468558119, "grad_norm": 1.5278738040151392, "learning_rate": 2.5873190487755162e-08, "loss": 0.6436, "step": 11436 }, { "epoch": 0.9686216387889054, "grad_norm": 1.355950241733809, "learning_rate": 2.5734019954495872e-08, "loss": 0.6236, "step": 11437 }, { "epoch": 0.9687063307219987, "grad_norm": 0.612146664321513, "learning_rate": 2.559522376109236e-08, "loss": 0.8502, "step": 11438 }, { "epoch": 0.9687910226550921, "grad_norm": 0.5949023769496335, "learning_rate": 2.5456801917988494e-08, "loss": 0.8582, "step": 11439 }, { "epoch": 0.9688757145881854, "grad_norm": 1.2971458200986448, "learning_rate": 2.5318754435602056e-08, "loss": 0.6255, "step": 11440 }, { "epoch": 0.9689604065212788, "grad_norm": 1.7872352265577933, "learning_rate": 2.5181081324320844e-08, "loss": 0.6624, "step": 11441 }, { "epoch": 0.9690450984543723, "grad_norm": 1.466421112537159, "learning_rate": 2.504378259450657e-08, "loss": 0.5531, "step": 11442 }, { "epoch": 0.9691297903874656, "grad_norm": 1.440095794533484, "learning_rate": 2.490685825649153e-08, "loss": 0.649, "step": 11443 }, { "epoch": 0.969214482320559, "grad_norm": 1.9276523678628634, "learning_rate": 2.4770308320579695e-08, "loss": 0.6462, "step": 11444 }, { "epoch": 0.9692991742536523, "grad_norm": 1.3699132931861884, "learning_rate": 2.4634132797047293e-08, "loss": 0.5588, "step": 11445 }, { "epoch": 0.9693838661867457, "grad_norm": 1.5607596042671448, "learning_rate": 2.449833169614224e-08, "loss": 0.6339, "step": 11446 }, { "epoch": 0.9694685581198391, "grad_norm": 1.7387242582003894, "learning_rate": 2.436290502808414e-08, "loss": 0.6207, "step": 11447 }, { "epoch": 0.9695532500529325, "grad_norm": 1.514528838734091, "learning_rate": 2.4227852803064277e-08, "loss": 0.6215, "step": 11448 }, { "epoch": 0.9696379419860258, "grad_norm": 1.4171234377246922, "learning_rate": 2.4093175031246753e-08, "loss": 0.5836, "step": 11449 }, { "epoch": 0.9697226339191192, "grad_norm": 1.2569762092617107, "learning_rate": 2.3958871722766787e-08, "loss": 0.6153, "step": 11450 }, { "epoch": 0.9698073258522126, "grad_norm": 1.2068464873021387, "learning_rate": 2.382494288773074e-08, "loss": 0.5974, "step": 11451 }, { "epoch": 0.969892017785306, "grad_norm": 1.3971498573390604, "learning_rate": 2.3691388536218328e-08, "loss": 0.6137, "step": 11452 }, { "epoch": 0.9699767097183993, "grad_norm": 1.33654599395842, "learning_rate": 2.3558208678279293e-08, "loss": 0.679, "step": 11453 }, { "epoch": 0.9700614016514927, "grad_norm": 1.4997416272988946, "learning_rate": 2.3425403323936725e-08, "loss": 0.6469, "step": 11454 }, { "epoch": 0.970146093584586, "grad_norm": 1.2040085911219707, "learning_rate": 2.329297248318485e-08, "loss": 0.6724, "step": 11455 }, { "epoch": 0.9702307855176795, "grad_norm": 1.637653482104936, "learning_rate": 2.3160916165989034e-08, "loss": 0.6694, "step": 11456 }, { "epoch": 0.9703154774507728, "grad_norm": 1.7569269854102627, "learning_rate": 2.3029234382288547e-08, "loss": 0.5955, "step": 11457 }, { "epoch": 0.9704001693838662, "grad_norm": 1.5529864929286237, "learning_rate": 2.2897927141992127e-08, "loss": 0.6152, "step": 11458 }, { "epoch": 0.9704848613169595, "grad_norm": 2.093635147623428, "learning_rate": 2.276699445498187e-08, "loss": 0.6471, "step": 11459 }, { "epoch": 0.9705695532500529, "grad_norm": 1.2276830992715804, "learning_rate": 2.2636436331111567e-08, "loss": 0.6533, "step": 11460 }, { "epoch": 0.9706542451831464, "grad_norm": 1.633905873327586, "learning_rate": 2.2506252780205016e-08, "loss": 0.6272, "step": 11461 }, { "epoch": 0.9707389371162397, "grad_norm": 0.6046474925837745, "learning_rate": 2.23764438120605e-08, "loss": 0.898, "step": 11462 }, { "epoch": 0.9708236290493331, "grad_norm": 1.568128972169298, "learning_rate": 2.2247009436445753e-08, "loss": 0.659, "step": 11463 }, { "epoch": 0.9709083209824264, "grad_norm": 1.4624558280098587, "learning_rate": 2.2117949663102433e-08, "loss": 0.6539, "step": 11464 }, { "epoch": 0.9709930129155198, "grad_norm": 1.490863611290988, "learning_rate": 2.198926450174277e-08, "loss": 0.6497, "step": 11465 }, { "epoch": 0.9710777048486132, "grad_norm": 1.5414996832353087, "learning_rate": 2.1860953962050123e-08, "loss": 0.6553, "step": 11466 }, { "epoch": 0.9711623967817066, "grad_norm": 1.4961988018422943, "learning_rate": 2.1733018053681774e-08, "loss": 0.6071, "step": 11467 }, { "epoch": 0.9712470887147999, "grad_norm": 1.2965997062083758, "learning_rate": 2.160545678626502e-08, "loss": 0.6191, "step": 11468 }, { "epoch": 0.9713317806478933, "grad_norm": 1.502996083240341, "learning_rate": 2.1478270169398853e-08, "loss": 0.6353, "step": 11469 }, { "epoch": 0.9714164725809866, "grad_norm": 1.708078527048277, "learning_rate": 2.1351458212655606e-08, "loss": 0.6247, "step": 11470 }, { "epoch": 0.9715011645140801, "grad_norm": 1.8085952387637814, "learning_rate": 2.1225020925578765e-08, "loss": 0.6415, "step": 11471 }, { "epoch": 0.9715858564471734, "grad_norm": 1.4472723369822416, "learning_rate": 2.1098958317682383e-08, "loss": 0.571, "step": 11472 }, { "epoch": 0.9716705483802668, "grad_norm": 1.4131547003504135, "learning_rate": 2.0973270398454425e-08, "loss": 0.6299, "step": 11473 }, { "epoch": 0.9717552403133601, "grad_norm": 1.1964898331067868, "learning_rate": 2.0847957177352885e-08, "loss": 0.5881, "step": 11474 }, { "epoch": 0.9718399322464535, "grad_norm": 1.163354777076928, "learning_rate": 2.0723018663808546e-08, "loss": 0.6381, "step": 11475 }, { "epoch": 0.971924624179547, "grad_norm": 1.1962498007141191, "learning_rate": 2.059845486722334e-08, "loss": 0.6306, "step": 11476 }, { "epoch": 0.9720093161126403, "grad_norm": 1.4004274472595546, "learning_rate": 2.0474265796971428e-08, "loss": 0.6294, "step": 11477 }, { "epoch": 0.9720940080457336, "grad_norm": 1.263217470622027, "learning_rate": 2.0350451462399222e-08, "loss": 0.6015, "step": 11478 }, { "epoch": 0.972178699978827, "grad_norm": 1.2325021098337439, "learning_rate": 2.022701187282372e-08, "loss": 0.5884, "step": 11479 }, { "epoch": 0.9722633919119203, "grad_norm": 1.4402921599313938, "learning_rate": 2.010394703753471e-08, "loss": 0.5975, "step": 11480 }, { "epoch": 0.9723480838450138, "grad_norm": 1.997475852754252, "learning_rate": 1.9981256965794227e-08, "loss": 0.6229, "step": 11481 }, { "epoch": 0.9724327757781072, "grad_norm": 1.3237718653643542, "learning_rate": 1.9858941666833775e-08, "loss": 0.6167, "step": 11482 }, { "epoch": 0.9725174677112005, "grad_norm": 1.2803689050267943, "learning_rate": 1.9737001149859326e-08, "loss": 0.6177, "step": 11483 }, { "epoch": 0.9726021596442939, "grad_norm": 1.2678838923227076, "learning_rate": 1.9615435424047425e-08, "loss": 0.6561, "step": 11484 }, { "epoch": 0.9726868515773872, "grad_norm": 1.341503343736525, "learning_rate": 1.9494244498546312e-08, "loss": 0.6108, "step": 11485 }, { "epoch": 0.9727715435104807, "grad_norm": 1.6123911904410906, "learning_rate": 1.9373428382475912e-08, "loss": 0.6514, "step": 11486 }, { "epoch": 0.972856235443574, "grad_norm": 1.2430553041928825, "learning_rate": 1.925298708492951e-08, "loss": 0.617, "step": 11487 }, { "epoch": 0.9729409273766674, "grad_norm": 1.8643602331536633, "learning_rate": 1.9132920614969297e-08, "loss": 0.6028, "step": 11488 }, { "epoch": 0.9730256193097607, "grad_norm": 1.4502768998925164, "learning_rate": 1.901322898163249e-08, "loss": 0.57, "step": 11489 }, { "epoch": 0.9731103112428541, "grad_norm": 0.6634031834821743, "learning_rate": 1.8893912193925224e-08, "loss": 0.8746, "step": 11490 }, { "epoch": 0.9731950031759475, "grad_norm": 1.6872713325654767, "learning_rate": 1.877497026082753e-08, "loss": 0.584, "step": 11491 }, { "epoch": 0.9732796951090409, "grad_norm": 1.4975534969854658, "learning_rate": 1.8656403191290028e-08, "loss": 0.6243, "step": 11492 }, { "epoch": 0.9733643870421342, "grad_norm": 1.454210326321129, "learning_rate": 1.8538210994236137e-08, "loss": 0.6562, "step": 11493 }, { "epoch": 0.9734490789752276, "grad_norm": 1.4599921686442605, "learning_rate": 1.84203936785593e-08, "loss": 0.6329, "step": 11494 }, { "epoch": 0.9735337709083209, "grad_norm": 1.4621754381806882, "learning_rate": 1.8302951253126865e-08, "loss": 0.6011, "step": 11495 }, { "epoch": 0.9736184628414144, "grad_norm": 0.6688101687452965, "learning_rate": 1.8185883726776765e-08, "loss": 0.8689, "step": 11496 }, { "epoch": 0.9737031547745078, "grad_norm": 2.057900792947836, "learning_rate": 1.806919110831862e-08, "loss": 0.6, "step": 11497 }, { "epoch": 0.9737878467076011, "grad_norm": 2.1208754671181658, "learning_rate": 1.7952873406534288e-08, "loss": 0.6624, "step": 11498 }, { "epoch": 0.9738725386406945, "grad_norm": 1.2965818127020117, "learning_rate": 1.7836930630177885e-08, "loss": 0.6317, "step": 11499 }, { "epoch": 0.9739572305737878, "grad_norm": 1.5176159987531888, "learning_rate": 1.7721362787974094e-08, "loss": 0.581, "step": 11500 }, { "epoch": 0.9740419225068813, "grad_norm": 1.184702983517168, "learning_rate": 1.760616988861985e-08, "loss": 0.6196, "step": 11501 }, { "epoch": 0.9741266144399746, "grad_norm": 1.8206716299239012, "learning_rate": 1.7491351940784884e-08, "loss": 0.6822, "step": 11502 }, { "epoch": 0.974211306373068, "grad_norm": 1.3937065293019542, "learning_rate": 1.7376908953108952e-08, "loss": 0.6265, "step": 11503 }, { "epoch": 0.9742959983061613, "grad_norm": 1.812864927115367, "learning_rate": 1.726284093420516e-08, "loss": 0.6167, "step": 11504 }, { "epoch": 0.9743806902392547, "grad_norm": 1.7135499359216588, "learning_rate": 1.7149147892657203e-08, "loss": 0.6392, "step": 11505 }, { "epoch": 0.9744653821723481, "grad_norm": 2.120299025844211, "learning_rate": 1.703582983702101e-08, "loss": 0.6116, "step": 11506 }, { "epoch": 0.9745500741054415, "grad_norm": 1.5174602789732208, "learning_rate": 1.692288677582532e-08, "loss": 0.6306, "step": 11507 }, { "epoch": 0.9746347660385348, "grad_norm": 0.6019973190087134, "learning_rate": 1.6810318717568886e-08, "loss": 0.7952, "step": 11508 }, { "epoch": 0.9747194579716282, "grad_norm": 2.044207118123446, "learning_rate": 1.669812567072271e-08, "loss": 0.6, "step": 11509 }, { "epoch": 0.9748041499047215, "grad_norm": 1.1740419362606378, "learning_rate": 1.658630764373115e-08, "loss": 0.6355, "step": 11510 }, { "epoch": 0.974888841837815, "grad_norm": 1.206969453953673, "learning_rate": 1.6474864645008026e-08, "loss": 0.6293, "step": 11511 }, { "epoch": 0.9749735337709083, "grad_norm": 0.629381880233526, "learning_rate": 1.636379668293997e-08, "loss": 0.8666, "step": 11512 }, { "epoch": 0.9750582257040017, "grad_norm": 14.126000854942882, "learning_rate": 1.6253103765886403e-08, "loss": 0.5716, "step": 11513 }, { "epoch": 0.975142917637095, "grad_norm": 0.5975544010643127, "learning_rate": 1.6142785902176217e-08, "loss": 0.7986, "step": 11514 }, { "epoch": 0.9752276095701884, "grad_norm": 1.2286124160622904, "learning_rate": 1.603284310011277e-08, "loss": 0.6277, "step": 11515 }, { "epoch": 0.9753123015032819, "grad_norm": 1.4661084070205577, "learning_rate": 1.5923275367969447e-08, "loss": 0.6325, "step": 11516 }, { "epoch": 0.9753969934363752, "grad_norm": 1.1612014902530057, "learning_rate": 1.581408271399132e-08, "loss": 0.6203, "step": 11517 }, { "epoch": 0.9754816853694686, "grad_norm": 1.8822329828749222, "learning_rate": 1.5705265146395698e-08, "loss": 0.647, "step": 11518 }, { "epoch": 0.9755663773025619, "grad_norm": 1.2416305915625914, "learning_rate": 1.5596822673372148e-08, "loss": 0.6323, "step": 11519 }, { "epoch": 0.9756510692356553, "grad_norm": 1.441991349502894, "learning_rate": 1.548875530308136e-08, "loss": 0.618, "step": 11520 }, { "epoch": 0.9757357611687487, "grad_norm": 1.4375482918124654, "learning_rate": 1.5381063043656274e-08, "loss": 0.6948, "step": 11521 }, { "epoch": 0.9758204531018421, "grad_norm": 1.6828633655597744, "learning_rate": 1.527374590320041e-08, "loss": 0.5729, "step": 11522 }, { "epoch": 0.9759051450349354, "grad_norm": 1.3028536262493324, "learning_rate": 1.5166803889791193e-08, "loss": 0.6282, "step": 11523 }, { "epoch": 0.9759898369680288, "grad_norm": 0.5742719783517378, "learning_rate": 1.506023701147552e-08, "loss": 0.8193, "step": 11524 }, { "epoch": 0.9760745289011221, "grad_norm": 1.343699585079682, "learning_rate": 1.4954045276273643e-08, "loss": 0.573, "step": 11525 }, { "epoch": 0.9761592208342156, "grad_norm": 1.1839426739340964, "learning_rate": 1.4848228692176948e-08, "loss": 0.6027, "step": 11526 }, { "epoch": 0.9762439127673089, "grad_norm": 1.2807410005373678, "learning_rate": 1.4742787267148506e-08, "loss": 0.5713, "step": 11527 }, { "epoch": 0.9763286047004023, "grad_norm": 6.736653864569943, "learning_rate": 1.463772100912364e-08, "loss": 0.6399, "step": 11528 }, { "epoch": 0.9764132966334956, "grad_norm": 1.5895854673892063, "learning_rate": 1.4533029926009356e-08, "loss": 0.6058, "step": 11529 }, { "epoch": 0.976497988566589, "grad_norm": 1.2610023361115017, "learning_rate": 1.4428714025683243e-08, "loss": 0.6039, "step": 11530 }, { "epoch": 0.9765826804996824, "grad_norm": 1.6362857654422815, "learning_rate": 1.4324773315996798e-08, "loss": 0.659, "step": 11531 }, { "epoch": 0.9766673724327758, "grad_norm": 0.587092194259508, "learning_rate": 1.4221207804770986e-08, "loss": 0.8311, "step": 11532 }, { "epoch": 0.9767520643658691, "grad_norm": 1.4939644011062647, "learning_rate": 1.4118017499800684e-08, "loss": 0.6202, "step": 11533 }, { "epoch": 0.9768367562989625, "grad_norm": 1.3565998089124967, "learning_rate": 1.4015202408850791e-08, "loss": 0.596, "step": 11534 }, { "epoch": 0.9769214482320558, "grad_norm": 1.6324915597558969, "learning_rate": 1.3912762539659008e-08, "loss": 0.6177, "step": 11535 }, { "epoch": 0.9770061401651493, "grad_norm": 1.7569680392791218, "learning_rate": 1.3810697899934722e-08, "loss": 0.6496, "step": 11536 }, { "epoch": 0.9770908320982427, "grad_norm": 1.3779868340159647, "learning_rate": 1.3709008497358457e-08, "loss": 0.6141, "step": 11537 }, { "epoch": 0.977175524031336, "grad_norm": 3.502926828498986, "learning_rate": 1.3607694339582978e-08, "loss": 0.6109, "step": 11538 }, { "epoch": 0.9772602159644294, "grad_norm": 1.5414280454037443, "learning_rate": 1.3506755434232188e-08, "loss": 0.6613, "step": 11539 }, { "epoch": 0.9773449078975227, "grad_norm": 1.2941971296767474, "learning_rate": 1.3406191788903345e-08, "loss": 0.644, "step": 11540 }, { "epoch": 0.9774295998306162, "grad_norm": 1.4102906096759682, "learning_rate": 1.3306003411163727e-08, "loss": 0.6137, "step": 11541 }, { "epoch": 0.9775142917637095, "grad_norm": 1.1377990258726276, "learning_rate": 1.3206190308552858e-08, "loss": 0.568, "step": 11542 }, { "epoch": 0.9775989836968029, "grad_norm": 1.3278346907586294, "learning_rate": 1.3106752488583063e-08, "loss": 0.6102, "step": 11543 }, { "epoch": 0.9776836756298962, "grad_norm": 1.5065114863279623, "learning_rate": 1.3007689958736137e-08, "loss": 0.6228, "step": 11544 }, { "epoch": 0.9777683675629896, "grad_norm": 1.1432399025479059, "learning_rate": 1.2909002726468889e-08, "loss": 0.6297, "step": 11545 }, { "epoch": 0.977853059496083, "grad_norm": 1.424858209423095, "learning_rate": 1.2810690799206493e-08, "loss": 0.6517, "step": 11546 }, { "epoch": 0.9779377514291764, "grad_norm": 1.1281914976358838, "learning_rate": 1.2712754184348031e-08, "loss": 0.5973, "step": 11547 }, { "epoch": 0.9780224433622697, "grad_norm": 1.2055542932168748, "learning_rate": 1.2615192889263716e-08, "loss": 0.6458, "step": 11548 }, { "epoch": 0.9781071352953631, "grad_norm": 1.4145535056960652, "learning_rate": 1.251800692129601e-08, "loss": 0.6284, "step": 11549 }, { "epoch": 0.9781918272284564, "grad_norm": 1.3181906647429817, "learning_rate": 1.2421196287757952e-08, "loss": 0.6223, "step": 11550 }, { "epoch": 0.9782765191615499, "grad_norm": 1.8582803199583253, "learning_rate": 1.2324760995935381e-08, "loss": 0.6039, "step": 11551 }, { "epoch": 0.9783612110946432, "grad_norm": 1.2655525156868779, "learning_rate": 1.2228701053085823e-08, "loss": 0.609, "step": 11552 }, { "epoch": 0.9784459030277366, "grad_norm": 1.5058442827683642, "learning_rate": 1.2133016466437385e-08, "loss": 0.5838, "step": 11553 }, { "epoch": 0.97853059496083, "grad_norm": 1.252908108069246, "learning_rate": 1.2037707243192087e-08, "loss": 0.6362, "step": 11554 }, { "epoch": 0.9786152868939234, "grad_norm": 1.2472973569222234, "learning_rate": 1.1942773390521412e-08, "loss": 0.6026, "step": 11555 }, { "epoch": 0.9786999788270168, "grad_norm": 1.3760345302383894, "learning_rate": 1.18482149155702e-08, "loss": 0.6274, "step": 11556 }, { "epoch": 0.9787846707601101, "grad_norm": 2.142470674688078, "learning_rate": 1.1754031825454426e-08, "loss": 0.5713, "step": 11557 }, { "epoch": 0.9788693626932035, "grad_norm": 1.262325300698672, "learning_rate": 1.1660224127261754e-08, "loss": 0.6167, "step": 11558 }, { "epoch": 0.9789540546262968, "grad_norm": 1.3393700084125684, "learning_rate": 1.1566791828051538e-08, "loss": 0.5997, "step": 11559 }, { "epoch": 0.9790387465593903, "grad_norm": 1.3679105182924383, "learning_rate": 1.1473734934855373e-08, "loss": 0.5638, "step": 11560 }, { "epoch": 0.9791234384924836, "grad_norm": 1.4481633867597834, "learning_rate": 1.1381053454675995e-08, "loss": 0.6778, "step": 11561 }, { "epoch": 0.979208130425577, "grad_norm": 0.6309850342652724, "learning_rate": 1.128874739448893e-08, "loss": 0.8201, "step": 11562 }, { "epoch": 0.9792928223586703, "grad_norm": 1.472524043389399, "learning_rate": 1.1196816761239736e-08, "loss": 0.6363, "step": 11563 }, { "epoch": 0.9793775142917637, "grad_norm": 1.3232645538549517, "learning_rate": 1.1105261561846769e-08, "loss": 0.6114, "step": 11564 }, { "epoch": 0.9794622062248571, "grad_norm": 1.4294667783418287, "learning_rate": 1.1014081803200626e-08, "loss": 0.6847, "step": 11565 }, { "epoch": 0.9795468981579505, "grad_norm": 1.9869662611263121, "learning_rate": 1.0923277492162487e-08, "loss": 0.6224, "step": 11566 }, { "epoch": 0.9796315900910438, "grad_norm": 1.5352473599014327, "learning_rate": 1.083284863556633e-08, "loss": 0.6202, "step": 11567 }, { "epoch": 0.9797162820241372, "grad_norm": 1.759648568739679, "learning_rate": 1.0742795240217263e-08, "loss": 0.5917, "step": 11568 }, { "epoch": 0.9798009739572305, "grad_norm": 1.4989951986792855, "learning_rate": 1.0653117312892092e-08, "loss": 0.5871, "step": 11569 }, { "epoch": 0.979885665890324, "grad_norm": 1.3692100141443269, "learning_rate": 1.0563814860339861e-08, "loss": 0.6028, "step": 11570 }, { "epoch": 0.9799703578234173, "grad_norm": 1.240730501449062, "learning_rate": 1.0474887889280749e-08, "loss": 0.6716, "step": 11571 }, { "epoch": 0.9800550497565107, "grad_norm": 1.2296387126455894, "learning_rate": 1.0386336406407183e-08, "loss": 0.6246, "step": 11572 }, { "epoch": 0.980139741689604, "grad_norm": 1.4207367536157143, "learning_rate": 1.0298160418382718e-08, "loss": 0.6361, "step": 11573 }, { "epoch": 0.9802244336226974, "grad_norm": 1.6718007802948214, "learning_rate": 1.0210359931843716e-08, "loss": 0.642, "step": 11574 }, { "epoch": 0.9803091255557909, "grad_norm": 2.462399341537974, "learning_rate": 1.0122934953397112e-08, "loss": 0.618, "step": 11575 }, { "epoch": 0.9803938174888842, "grad_norm": 1.369085173717506, "learning_rate": 1.0035885489622643e-08, "loss": 0.656, "step": 11576 }, { "epoch": 0.9804785094219776, "grad_norm": 1.2198674109105934, "learning_rate": 9.949211547070625e-09, "loss": 0.6404, "step": 11577 }, { "epoch": 0.9805632013550709, "grad_norm": 1.608223915371268, "learning_rate": 9.862913132264174e-09, "loss": 0.6443, "step": 11578 }, { "epoch": 0.9806478932881643, "grad_norm": 1.1925964705723628, "learning_rate": 9.776990251697538e-09, "loss": 0.5853, "step": 11579 }, { "epoch": 0.9807325852212577, "grad_norm": 0.7498874089880588, "learning_rate": 9.691442911836658e-09, "loss": 0.849, "step": 11580 }, { "epoch": 0.9808172771543511, "grad_norm": 1.3344182001020717, "learning_rate": 9.606271119119715e-09, "loss": 0.5892, "step": 11581 }, { "epoch": 0.9809019690874444, "grad_norm": 2.2635740301092246, "learning_rate": 9.521474879956582e-09, "loss": 0.6398, "step": 11582 }, { "epoch": 0.9809866610205378, "grad_norm": 1.3425525210023352, "learning_rate": 9.437054200728269e-09, "loss": 0.6355, "step": 11583 }, { "epoch": 0.9810713529536311, "grad_norm": 1.7152260798649202, "learning_rate": 9.353009087787468e-09, "loss": 0.6718, "step": 11584 }, { "epoch": 0.9811560448867246, "grad_norm": 1.3129578822355565, "learning_rate": 9.269339547459677e-09, "loss": 0.5661, "step": 11585 }, { "epoch": 0.9812407368198179, "grad_norm": 1.3126590359352748, "learning_rate": 9.186045586041526e-09, "loss": 0.6362, "step": 11586 }, { "epoch": 0.9813254287529113, "grad_norm": 1.4534952066708104, "learning_rate": 9.103127209800778e-09, "loss": 0.615, "step": 11587 }, { "epoch": 0.9814101206860046, "grad_norm": 1.2227713909449802, "learning_rate": 9.020584424977442e-09, "loss": 0.6391, "step": 11588 }, { "epoch": 0.981494812619098, "grad_norm": 1.4596239459603806, "learning_rate": 8.93841723778377e-09, "loss": 0.6578, "step": 11589 }, { "epoch": 0.9815795045521915, "grad_norm": 1.6101676212736422, "learning_rate": 8.856625654403706e-09, "loss": 0.6055, "step": 11590 }, { "epoch": 0.9816641964852848, "grad_norm": 1.8984393963557797, "learning_rate": 8.775209680991214e-09, "loss": 0.5912, "step": 11591 }, { "epoch": 0.9817488884183782, "grad_norm": 1.3753062326291525, "learning_rate": 8.694169323674729e-09, "loss": 0.6637, "step": 11592 }, { "epoch": 0.9818335803514715, "grad_norm": 0.6228165521134703, "learning_rate": 8.613504588551592e-09, "loss": 0.8735, "step": 11593 }, { "epoch": 0.9819182722845649, "grad_norm": 1.4461783056300828, "learning_rate": 8.533215481693057e-09, "loss": 0.6454, "step": 11594 }, { "epoch": 0.9820029642176583, "grad_norm": 0.6762706759917386, "learning_rate": 8.453302009140963e-09, "loss": 0.8265, "step": 11595 }, { "epoch": 0.9820876561507517, "grad_norm": 1.2334240165409318, "learning_rate": 8.373764176909937e-09, "loss": 0.6225, "step": 11596 }, { "epoch": 0.982172348083845, "grad_norm": 1.3059007033314851, "learning_rate": 8.294601990984641e-09, "loss": 0.6882, "step": 11597 }, { "epoch": 0.9822570400169384, "grad_norm": 1.2137089609498222, "learning_rate": 8.215815457323083e-09, "loss": 0.6214, "step": 11598 }, { "epoch": 0.9823417319500317, "grad_norm": 1.1937166678266382, "learning_rate": 8.137404581853858e-09, "loss": 0.6134, "step": 11599 }, { "epoch": 0.9824264238831252, "grad_norm": 1.2541315959573278, "learning_rate": 8.059369370478354e-09, "loss": 0.6303, "step": 11600 }, { "epoch": 0.9825111158162185, "grad_norm": 2.6876583430012646, "learning_rate": 7.98170982906854e-09, "loss": 0.6119, "step": 11601 }, { "epoch": 0.9825958077493119, "grad_norm": 1.6419830759669516, "learning_rate": 7.904425963469187e-09, "loss": 0.6359, "step": 11602 }, { "epoch": 0.9826804996824052, "grad_norm": 1.584126950702496, "learning_rate": 7.827517779496197e-09, "loss": 0.6634, "step": 11603 }, { "epoch": 0.9827651916154986, "grad_norm": 1.4874465901585114, "learning_rate": 7.750985282937717e-09, "loss": 0.6257, "step": 11604 }, { "epoch": 0.982849883548592, "grad_norm": 1.482230592536834, "learning_rate": 7.674828479552477e-09, "loss": 0.5976, "step": 11605 }, { "epoch": 0.9829345754816854, "grad_norm": 1.294060303615654, "learning_rate": 7.599047375072e-09, "loss": 0.6383, "step": 11606 }, { "epoch": 0.9830192674147787, "grad_norm": 2.08018032737462, "learning_rate": 7.523641975198948e-09, "loss": 0.6278, "step": 11607 }, { "epoch": 0.9831039593478721, "grad_norm": 1.3163504235171708, "learning_rate": 7.44861228560878e-09, "loss": 0.6018, "step": 11608 }, { "epoch": 0.9831886512809654, "grad_norm": 1.5510147635532892, "learning_rate": 7.373958311946982e-09, "loss": 0.6599, "step": 11609 }, { "epoch": 0.9832733432140589, "grad_norm": 1.4574869936946608, "learning_rate": 7.299680059832392e-09, "loss": 0.5929, "step": 11610 }, { "epoch": 0.9833580351471523, "grad_norm": 1.2994134964049198, "learning_rate": 7.225777534854428e-09, "loss": 0.6452, "step": 11611 }, { "epoch": 0.9834427270802456, "grad_norm": 2.402618035992232, "learning_rate": 7.152250742574196e-09, "loss": 0.5855, "step": 11612 }, { "epoch": 0.983527419013339, "grad_norm": 1.215429429975677, "learning_rate": 7.079099688526159e-09, "loss": 0.578, "step": 11613 }, { "epoch": 0.9836121109464323, "grad_norm": 1.3913803766695305, "learning_rate": 7.0063243782142464e-09, "loss": 0.6113, "step": 11614 }, { "epoch": 0.9836968028795258, "grad_norm": 1.1486863563371885, "learning_rate": 6.933924817115189e-09, "loss": 0.6465, "step": 11615 }, { "epoch": 0.9837814948126191, "grad_norm": 1.5960294319319495, "learning_rate": 6.8619010106785174e-09, "loss": 0.609, "step": 11616 }, { "epoch": 0.9838661867457125, "grad_norm": 1.4679183592722738, "learning_rate": 6.79025296432323e-09, "loss": 0.6023, "step": 11617 }, { "epoch": 0.9839508786788058, "grad_norm": 2.2027665497882807, "learning_rate": 6.718980683442233e-09, "loss": 0.6416, "step": 11618 }, { "epoch": 0.9840355706118992, "grad_norm": 1.7546008603111682, "learning_rate": 6.648084173398461e-09, "loss": 0.5528, "step": 11619 }, { "epoch": 0.9841202625449926, "grad_norm": 1.6793231049083344, "learning_rate": 6.57756343952709e-09, "loss": 0.5798, "step": 11620 }, { "epoch": 0.984204954478086, "grad_norm": 1.5630146939006466, "learning_rate": 6.507418487135542e-09, "loss": 0.6497, "step": 11621 }, { "epoch": 0.9842896464111793, "grad_norm": 1.3475966441018485, "learning_rate": 6.437649321502926e-09, "loss": 0.6204, "step": 11622 }, { "epoch": 0.9843743383442727, "grad_norm": 1.5251533301682452, "learning_rate": 6.368255947878932e-09, "loss": 0.608, "step": 11623 }, { "epoch": 0.984459030277366, "grad_norm": 1.6353013087195853, "learning_rate": 6.299238371486604e-09, "loss": 0.6333, "step": 11624 }, { "epoch": 0.9845437222104595, "grad_norm": 1.378135402264547, "learning_rate": 6.230596597519012e-09, "loss": 0.5986, "step": 11625 }, { "epoch": 0.9846284141435528, "grad_norm": 1.4929519754808809, "learning_rate": 6.162330631142577e-09, "loss": 0.6166, "step": 11626 }, { "epoch": 0.9847131060766462, "grad_norm": 1.2732277112228165, "learning_rate": 6.094440477494301e-09, "loss": 0.5913, "step": 11627 }, { "epoch": 0.9847977980097395, "grad_norm": 1.3456540879319279, "learning_rate": 6.026926141683431e-09, "loss": 0.6227, "step": 11628 }, { "epoch": 0.9848824899428329, "grad_norm": 1.6341364453228355, "learning_rate": 5.959787628790903e-09, "loss": 0.5761, "step": 11629 }, { "epoch": 0.9849671818759264, "grad_norm": 1.4007078974408962, "learning_rate": 5.893024943868786e-09, "loss": 0.6986, "step": 11630 }, { "epoch": 0.9850518738090197, "grad_norm": 1.5307071617985226, "learning_rate": 5.826638091941394e-09, "loss": 0.6406, "step": 11631 }, { "epoch": 0.9851365657421131, "grad_norm": 1.6282022238331408, "learning_rate": 5.760627078005288e-09, "loss": 0.6815, "step": 11632 }, { "epoch": 0.9852212576752064, "grad_norm": 1.6451104942955619, "learning_rate": 5.694991907027603e-09, "loss": 0.6263, "step": 11633 }, { "epoch": 0.9853059496082998, "grad_norm": 0.6334206589715795, "learning_rate": 5.629732583947722e-09, "loss": 0.8636, "step": 11634 }, { "epoch": 0.9853906415413932, "grad_norm": 1.6038575616340676, "learning_rate": 5.564849113677273e-09, "loss": 0.6243, "step": 11635 }, { "epoch": 0.9854753334744866, "grad_norm": 1.2864803430964864, "learning_rate": 5.50034150109846e-09, "loss": 0.5969, "step": 11636 }, { "epoch": 0.9855600254075799, "grad_norm": 1.243851935826183, "learning_rate": 5.43620975106629e-09, "loss": 0.627, "step": 11637 }, { "epoch": 0.9856447173406733, "grad_norm": 1.3032261894785793, "learning_rate": 5.372453868406347e-09, "loss": 0.6122, "step": 11638 }, { "epoch": 0.9857294092737666, "grad_norm": 1.750004752815367, "learning_rate": 5.309073857917569e-09, "loss": 0.6398, "step": 11639 }, { "epoch": 0.9858141012068601, "grad_norm": 1.3059055259555108, "learning_rate": 5.246069724368919e-09, "loss": 0.625, "step": 11640 }, { "epoch": 0.9858987931399534, "grad_norm": 1.2138359652037047, "learning_rate": 5.18344147250216e-09, "loss": 0.6588, "step": 11641 }, { "epoch": 0.9859834850730468, "grad_norm": 1.3425358869441077, "learning_rate": 5.121189107030189e-09, "loss": 0.6515, "step": 11642 }, { "epoch": 0.9860681770061401, "grad_norm": 1.296215467739634, "learning_rate": 5.059312632637592e-09, "loss": 0.5984, "step": 11643 }, { "epoch": 0.9861528689392335, "grad_norm": 0.6140289621008794, "learning_rate": 4.9978120539811996e-09, "loss": 0.8031, "step": 11644 }, { "epoch": 0.986237560872327, "grad_norm": 1.406139294745183, "learning_rate": 4.936687375689531e-09, "loss": 0.6283, "step": 11645 }, { "epoch": 0.9863222528054203, "grad_norm": 1.7929278976562475, "learning_rate": 4.8759386023616854e-09, "loss": 0.6421, "step": 11646 }, { "epoch": 0.9864069447385136, "grad_norm": 1.4000403795087424, "learning_rate": 4.815565738570671e-09, "loss": 0.6322, "step": 11647 }, { "epoch": 0.986491636671607, "grad_norm": 1.2491743319436586, "learning_rate": 4.755568788858966e-09, "loss": 0.6197, "step": 11648 }, { "epoch": 0.9865763286047003, "grad_norm": 1.723455548909798, "learning_rate": 4.695947757741292e-09, "loss": 0.636, "step": 11649 }, { "epoch": 0.9866610205377938, "grad_norm": 1.3822822628754425, "learning_rate": 4.636702649705171e-09, "loss": 0.6104, "step": 11650 }, { "epoch": 0.9867457124708872, "grad_norm": 1.3907272426047221, "learning_rate": 4.577833469208703e-09, "loss": 0.6777, "step": 11651 }, { "epoch": 0.9868304044039805, "grad_norm": 0.5902700247301502, "learning_rate": 4.519340220682788e-09, "loss": 0.8583, "step": 11652 }, { "epoch": 0.9869150963370739, "grad_norm": 2.654958685862252, "learning_rate": 4.4612229085283505e-09, "loss": 0.6582, "step": 11653 }, { "epoch": 0.9869997882701673, "grad_norm": 1.6387255350943861, "learning_rate": 4.403481537119669e-09, "loss": 0.6729, "step": 11654 }, { "epoch": 0.9870844802032607, "grad_norm": 1.56381929453525, "learning_rate": 4.346116110802157e-09, "loss": 0.6207, "step": 11655 }, { "epoch": 0.987169172136354, "grad_norm": 1.8727017958704317, "learning_rate": 4.28912663389236e-09, "loss": 0.636, "step": 11656 }, { "epoch": 0.9872538640694474, "grad_norm": 1.1596589147565661, "learning_rate": 4.2325131106796255e-09, "loss": 0.6263, "step": 11657 }, { "epoch": 0.9873385560025407, "grad_norm": 1.1460035982568637, "learning_rate": 4.17627554542388e-09, "loss": 0.5871, "step": 11658 }, { "epoch": 0.9874232479356342, "grad_norm": 1.4217827327056862, "learning_rate": 4.120413942357293e-09, "loss": 0.5655, "step": 11659 }, { "epoch": 0.9875079398687275, "grad_norm": 0.6417264825023119, "learning_rate": 4.064928305684279e-09, "loss": 0.8214, "step": 11660 }, { "epoch": 0.9875926318018209, "grad_norm": 1.343467847748691, "learning_rate": 4.009818639580387e-09, "loss": 0.686, "step": 11661 }, { "epoch": 0.9876773237349142, "grad_norm": 1.6411545826436524, "learning_rate": 3.9550849481923005e-09, "loss": 0.5849, "step": 11662 }, { "epoch": 0.9877620156680076, "grad_norm": 2.1880676260638094, "learning_rate": 3.900727235639501e-09, "loss": 0.5988, "step": 11663 }, { "epoch": 0.987846707601101, "grad_norm": 3.9920303605429868, "learning_rate": 3.846745506013161e-09, "loss": 0.6416, "step": 11664 }, { "epoch": 0.9879313995341944, "grad_norm": 1.6782877830150702, "learning_rate": 3.793139763373921e-09, "loss": 0.6248, "step": 11665 }, { "epoch": 0.9880160914672877, "grad_norm": 1.2605371682434825, "learning_rate": 3.739910011757997e-09, "loss": 0.6309, "step": 11666 }, { "epoch": 0.9881007834003811, "grad_norm": 1.413252232886531, "learning_rate": 3.6870562551699627e-09, "loss": 0.6412, "step": 11667 }, { "epoch": 0.9881854753334745, "grad_norm": 3.1691573103778663, "learning_rate": 3.6345784975877486e-09, "loss": 0.6455, "step": 11668 }, { "epoch": 0.9882701672665679, "grad_norm": 1.6395883975011594, "learning_rate": 3.5824767429604167e-09, "loss": 0.6175, "step": 11669 }, { "epoch": 0.9883548591996613, "grad_norm": 1.4127684277187962, "learning_rate": 3.5307509952092755e-09, "loss": 0.5992, "step": 11670 }, { "epoch": 0.9884395511327546, "grad_norm": 1.229989410283795, "learning_rate": 3.4794012582262117e-09, "loss": 0.601, "step": 11671 }, { "epoch": 0.988524243065848, "grad_norm": 1.3195919846697604, "learning_rate": 3.4284275358764663e-09, "loss": 0.6075, "step": 11672 }, { "epoch": 0.9886089349989413, "grad_norm": 1.4747384304363607, "learning_rate": 3.377829831995305e-09, "loss": 0.5987, "step": 11673 }, { "epoch": 0.9886936269320348, "grad_norm": 2.0750189793680685, "learning_rate": 3.3276081503913483e-09, "loss": 0.5769, "step": 11674 }, { "epoch": 0.9887783188651281, "grad_norm": 1.3294516753245809, "learning_rate": 3.2777624948432395e-09, "loss": 0.5866, "step": 11675 }, { "epoch": 0.9888630107982215, "grad_norm": 1.9898528483193194, "learning_rate": 3.228292869101868e-09, "loss": 0.6011, "step": 11676 }, { "epoch": 0.9889477027313148, "grad_norm": 1.3822241719861494, "learning_rate": 3.179199276891476e-09, "loss": 0.6426, "step": 11677 }, { "epoch": 0.9890323946644082, "grad_norm": 1.4807650024960781, "learning_rate": 3.1304817219052206e-09, "loss": 0.6368, "step": 11678 }, { "epoch": 0.9891170865975016, "grad_norm": 1.4271736546091673, "learning_rate": 3.0821402078101694e-09, "loss": 0.6435, "step": 11679 }, { "epoch": 0.989201778530595, "grad_norm": 1.430278174426622, "learning_rate": 3.0341747382434118e-09, "loss": 0.661, "step": 11680 }, { "epoch": 0.9892864704636883, "grad_norm": 1.6458830739024763, "learning_rate": 2.9865853168159485e-09, "loss": 0.5756, "step": 11681 }, { "epoch": 0.9893711623967817, "grad_norm": 1.3221089145412184, "learning_rate": 2.9393719471082495e-09, "loss": 0.5827, "step": 11682 }, { "epoch": 0.989455854329875, "grad_norm": 1.6611837535022302, "learning_rate": 2.8925346326730274e-09, "loss": 0.6599, "step": 11683 }, { "epoch": 0.9895405462629685, "grad_norm": 0.6365188829403571, "learning_rate": 2.8460733770357962e-09, "loss": 0.871, "step": 11684 }, { "epoch": 0.9896252381960619, "grad_norm": 1.3712618073768874, "learning_rate": 2.7999881836926477e-09, "loss": 0.5793, "step": 11685 }, { "epoch": 0.9897099301291552, "grad_norm": 1.3814527410251078, "learning_rate": 2.754279056111919e-09, "loss": 0.6391, "step": 11686 }, { "epoch": 0.9897946220622486, "grad_norm": 1.4383679886988583, "learning_rate": 2.708945997733636e-09, "loss": 0.5851, "step": 11687 }, { "epoch": 0.9898793139953419, "grad_norm": 1.3387886310993444, "learning_rate": 2.663989011968404e-09, "loss": 0.6438, "step": 11688 }, { "epoch": 0.9899640059284354, "grad_norm": 1.399787530392047, "learning_rate": 2.619408102200738e-09, "loss": 0.6545, "step": 11689 }, { "epoch": 0.9900486978615287, "grad_norm": 1.2166834044645323, "learning_rate": 2.575203271784621e-09, "loss": 0.6754, "step": 11690 }, { "epoch": 0.9901333897946221, "grad_norm": 3.0275443678450475, "learning_rate": 2.531374524047392e-09, "loss": 0.6043, "step": 11691 }, { "epoch": 0.9902180817277154, "grad_norm": 1.3877689255398473, "learning_rate": 2.487921862286968e-09, "loss": 0.6386, "step": 11692 }, { "epoch": 0.9903027736608088, "grad_norm": 1.4183729878679119, "learning_rate": 2.444845289773512e-09, "loss": 0.6077, "step": 11693 }, { "epoch": 0.9903874655939022, "grad_norm": 1.500536512629028, "learning_rate": 2.402144809748874e-09, "loss": 0.6763, "step": 11694 }, { "epoch": 0.9904721575269956, "grad_norm": 1.3229058929213107, "learning_rate": 2.3598204254260406e-09, "loss": 0.6254, "step": 11695 }, { "epoch": 0.9905568494600889, "grad_norm": 1.6712037685393588, "learning_rate": 2.3178721399907955e-09, "loss": 0.653, "step": 11696 }, { "epoch": 0.9906415413931823, "grad_norm": 1.590638093317604, "learning_rate": 2.276299956599504e-09, "loss": 0.6015, "step": 11697 }, { "epoch": 0.9907262333262756, "grad_norm": 1.441025353645278, "learning_rate": 2.235103878380773e-09, "loss": 0.6026, "step": 11698 }, { "epoch": 0.9908109252593691, "grad_norm": 1.319334338387149, "learning_rate": 2.194283908434902e-09, "loss": 0.6513, "step": 11699 }, { "epoch": 0.9908956171924624, "grad_norm": 1.612704260879817, "learning_rate": 2.1538400498338774e-09, "loss": 0.6281, "step": 11700 }, { "epoch": 0.9909803091255558, "grad_norm": 6.425672581493518, "learning_rate": 2.1137723056213757e-09, "loss": 0.6039, "step": 11701 }, { "epoch": 0.9910650010586491, "grad_norm": 1.3779214151393588, "learning_rate": 2.074080678812207e-09, "loss": 0.5811, "step": 11702 }, { "epoch": 0.9911496929917425, "grad_norm": 1.3710435165596133, "learning_rate": 2.034765172393982e-09, "loss": 0.6224, "step": 11703 }, { "epoch": 0.991234384924836, "grad_norm": 1.187390746142767, "learning_rate": 1.995825789325445e-09, "loss": 0.5755, "step": 11704 }, { "epoch": 0.9913190768579293, "grad_norm": 1.7990979506352796, "learning_rate": 1.9572625325364747e-09, "loss": 0.6309, "step": 11705 }, { "epoch": 0.9914037687910227, "grad_norm": 1.7419293002407992, "learning_rate": 1.919075404929194e-09, "loss": 0.6349, "step": 11706 }, { "epoch": 0.991488460724116, "grad_norm": 1.4949903832610465, "learning_rate": 1.8812644093774144e-09, "loss": 0.6009, "step": 11707 }, { "epoch": 0.9915731526572094, "grad_norm": 1.4022606902369754, "learning_rate": 1.843829548727194e-09, "loss": 0.6169, "step": 11708 }, { "epoch": 0.9916578445903028, "grad_norm": 1.2244529068101009, "learning_rate": 1.8067708257951677e-09, "loss": 0.5966, "step": 11709 }, { "epoch": 0.9917425365233962, "grad_norm": 2.82771063020933, "learning_rate": 1.770088243370216e-09, "loss": 0.6, "step": 11710 }, { "epoch": 0.9918272284564895, "grad_norm": 1.26294734560565, "learning_rate": 1.7337818042134636e-09, "loss": 0.6095, "step": 11711 }, { "epoch": 0.9919119203895829, "grad_norm": 1.649107840174775, "learning_rate": 1.697851511056059e-09, "loss": 0.5966, "step": 11712 }, { "epoch": 0.9919966123226762, "grad_norm": 1.3976063082603811, "learning_rate": 1.6622973666030606e-09, "loss": 0.634, "step": 11713 }, { "epoch": 0.9920813042557697, "grad_norm": 0.6551007701191818, "learning_rate": 1.6271193735295509e-09, "loss": 0.8341, "step": 11714 }, { "epoch": 0.992165996188863, "grad_norm": 1.8684158294431248, "learning_rate": 1.5923175344828568e-09, "loss": 0.6858, "step": 11715 }, { "epoch": 0.9922506881219564, "grad_norm": 1.3663381316403238, "learning_rate": 1.5578918520819942e-09, "loss": 0.6231, "step": 11716 }, { "epoch": 0.9923353800550497, "grad_norm": 1.5958756355284534, "learning_rate": 1.5238423289176685e-09, "loss": 0.6335, "step": 11717 }, { "epoch": 0.9924200719881431, "grad_norm": 1.8302014683898271, "learning_rate": 1.4901689675528298e-09, "loss": 0.5997, "step": 11718 }, { "epoch": 0.9925047639212365, "grad_norm": 1.1857210246000371, "learning_rate": 1.4568717705210067e-09, "loss": 0.5997, "step": 11719 }, { "epoch": 0.9925894558543299, "grad_norm": 1.4276615363374274, "learning_rate": 1.4239507403279728e-09, "loss": 0.6479, "step": 11720 }, { "epoch": 0.9926741477874232, "grad_norm": 0.609961923934602, "learning_rate": 1.3914058794511908e-09, "loss": 0.914, "step": 11721 }, { "epoch": 0.9927588397205166, "grad_norm": 2.5544238654314966, "learning_rate": 1.359237190340368e-09, "loss": 0.5796, "step": 11722 }, { "epoch": 0.99284353165361, "grad_norm": 1.8499111646349184, "learning_rate": 1.32744467541579e-09, "loss": 0.6025, "step": 11723 }, { "epoch": 0.9929282235867034, "grad_norm": 1.6917852967333606, "learning_rate": 1.296028337070543e-09, "loss": 0.6502, "step": 11724 }, { "epoch": 0.9930129155197968, "grad_norm": 0.5755055000308656, "learning_rate": 1.2649881776682915e-09, "loss": 0.8541, "step": 11725 }, { "epoch": 0.9930976074528901, "grad_norm": 1.316022208345741, "learning_rate": 1.2343241995455003e-09, "loss": 0.6466, "step": 11726 }, { "epoch": 0.9931822993859835, "grad_norm": 1.381461808407463, "learning_rate": 1.2040364050092124e-09, "loss": 0.6104, "step": 11727 }, { "epoch": 0.9932669913190768, "grad_norm": 1.2155954950288996, "learning_rate": 1.174124796339271e-09, "loss": 0.6368, "step": 11728 }, { "epoch": 0.9933516832521703, "grad_norm": 1.3777742653050646, "learning_rate": 1.1445893757860982e-09, "loss": 0.6873, "step": 11729 }, { "epoch": 0.9934363751852636, "grad_norm": 1.3290383179927643, "learning_rate": 1.1154301455729155e-09, "loss": 0.5908, "step": 11730 }, { "epoch": 0.993521067118357, "grad_norm": 1.5951461392968707, "learning_rate": 1.0866471078940787e-09, "loss": 0.6457, "step": 11731 }, { "epoch": 0.9936057590514503, "grad_norm": 1.3729676439076246, "learning_rate": 1.058240264915633e-09, "loss": 0.6272, "step": 11732 }, { "epoch": 0.9936904509845437, "grad_norm": 1.4911370856481152, "learning_rate": 1.030209618775313e-09, "loss": 0.6538, "step": 11733 }, { "epoch": 0.9937751429176371, "grad_norm": 6.763986431240265, "learning_rate": 1.0025551715825422e-09, "loss": 0.6269, "step": 11734 }, { "epoch": 0.9938598348507305, "grad_norm": 1.321250203995436, "learning_rate": 9.752769254178784e-10, "loss": 0.6583, "step": 11735 }, { "epoch": 0.9939445267838238, "grad_norm": 2.3340349048486466, "learning_rate": 9.483748823352346e-10, "loss": 0.5739, "step": 11736 }, { "epoch": 0.9940292187169172, "grad_norm": 1.9573593244139813, "learning_rate": 9.21849044358547e-10, "loss": 0.6333, "step": 11737 }, { "epoch": 0.9941139106500105, "grad_norm": 0.6039549245807363, "learning_rate": 8.956994134839969e-10, "loss": 0.85, "step": 11738 }, { "epoch": 0.994198602583104, "grad_norm": 1.2740695861020352, "learning_rate": 8.699259916794545e-10, "loss": 0.6475, "step": 11739 }, { "epoch": 0.9942832945161973, "grad_norm": 1.469927348105337, "learning_rate": 8.445287808850345e-10, "loss": 0.6334, "step": 11740 }, { "epoch": 0.9943679864492907, "grad_norm": 1.3057903733888487, "learning_rate": 8.195077830114307e-10, "loss": 0.6387, "step": 11741 }, { "epoch": 0.994452678382384, "grad_norm": 1.211123320935933, "learning_rate": 7.948629999415813e-10, "loss": 0.6383, "step": 11742 }, { "epoch": 0.9945373703154774, "grad_norm": 1.5568411408631326, "learning_rate": 7.705944335306692e-10, "loss": 0.6381, "step": 11743 }, { "epoch": 0.9946220622485709, "grad_norm": 0.6739853759359905, "learning_rate": 7.46702085604456e-10, "loss": 0.8025, "step": 11744 }, { "epoch": 0.9947067541816642, "grad_norm": 1.6533974736473676, "learning_rate": 7.231859579615031e-10, "loss": 0.6402, "step": 11745 }, { "epoch": 0.9947914461147576, "grad_norm": 4.298580079398976, "learning_rate": 7.000460523709506e-10, "loss": 0.5804, "step": 11746 }, { "epoch": 0.9948761380478509, "grad_norm": 0.5654345271782762, "learning_rate": 6.772823705741838e-10, "loss": 0.8345, "step": 11747 }, { "epoch": 0.9949608299809443, "grad_norm": 2.19960620040217, "learning_rate": 6.548949142848315e-10, "loss": 0.6473, "step": 11748 }, { "epoch": 0.9950455219140377, "grad_norm": 1.1374428125135398, "learning_rate": 6.328836851876574e-10, "loss": 0.6068, "step": 11749 }, { "epoch": 0.9951302138471311, "grad_norm": 1.1590950398445221, "learning_rate": 6.112486849391142e-10, "loss": 0.6379, "step": 11750 }, { "epoch": 0.9952149057802244, "grad_norm": 1.3014828409933457, "learning_rate": 5.899899151667887e-10, "loss": 0.6276, "step": 11751 }, { "epoch": 0.9952995977133178, "grad_norm": 1.603957960408444, "learning_rate": 5.691073774705125e-10, "loss": 0.5929, "step": 11752 }, { "epoch": 0.9953842896464111, "grad_norm": 0.633267236790784, "learning_rate": 5.486010734223612e-10, "loss": 0.8893, "step": 11753 }, { "epoch": 0.9954689815795046, "grad_norm": 1.2108163796658236, "learning_rate": 5.28471004565545e-10, "loss": 0.6183, "step": 11754 }, { "epoch": 0.9955536735125979, "grad_norm": 1.3794304663273111, "learning_rate": 5.087171724149631e-10, "loss": 0.6319, "step": 11755 }, { "epoch": 0.9956383654456913, "grad_norm": 1.5595232319857888, "learning_rate": 4.893395784560939e-10, "loss": 0.6384, "step": 11756 }, { "epoch": 0.9957230573787846, "grad_norm": 1.32580647534617, "learning_rate": 4.703382241488808e-10, "loss": 0.6209, "step": 11757 }, { "epoch": 0.9958077493118781, "grad_norm": 1.329949504845795, "learning_rate": 4.5171311092218063e-10, "loss": 0.6243, "step": 11758 }, { "epoch": 0.9958924412449714, "grad_norm": 1.26144753212008, "learning_rate": 4.334642401782052e-10, "loss": 0.6213, "step": 11759 }, { "epoch": 0.9959771331780648, "grad_norm": 1.4032786142732472, "learning_rate": 4.1559161328974527e-10, "loss": 0.6101, "step": 11760 }, { "epoch": 0.9960618251111582, "grad_norm": 0.6319732159900688, "learning_rate": 3.9809523160183603e-10, "loss": 0.8361, "step": 11761 }, { "epoch": 0.9961465170442515, "grad_norm": 1.3152558534154593, "learning_rate": 3.8097509643175713e-10, "loss": 0.6261, "step": 11762 }, { "epoch": 0.996231208977345, "grad_norm": 0.613637358436183, "learning_rate": 3.642312090673672e-10, "loss": 0.8296, "step": 11763 }, { "epoch": 0.9963159009104383, "grad_norm": 0.6105718399832475, "learning_rate": 3.4786357076932456e-10, "loss": 0.8741, "step": 11764 }, { "epoch": 0.9964005928435317, "grad_norm": 1.5000170965742716, "learning_rate": 3.3187218276831135e-10, "loss": 0.656, "step": 11765 }, { "epoch": 0.996485284776625, "grad_norm": 0.6162334603824532, "learning_rate": 3.1625704626891964e-10, "loss": 0.8821, "step": 11766 }, { "epoch": 0.9965699767097184, "grad_norm": 1.0869012157086047, "learning_rate": 3.0101816244576533e-10, "loss": 0.6543, "step": 11767 }, { "epoch": 0.9966546686428118, "grad_norm": 1.441003719836123, "learning_rate": 2.861555324451537e-10, "loss": 0.6036, "step": 11768 }, { "epoch": 0.9967393605759052, "grad_norm": 1.42626806426283, "learning_rate": 2.7166915738674473e-10, "loss": 0.6864, "step": 11769 }, { "epoch": 0.9968240525089985, "grad_norm": 0.5974313154755873, "learning_rate": 2.575590383596671e-10, "loss": 0.8416, "step": 11770 }, { "epoch": 0.9969087444420919, "grad_norm": 1.2093614416712384, "learning_rate": 2.4382517642640435e-10, "loss": 0.6279, "step": 11771 }, { "epoch": 0.9969934363751852, "grad_norm": 1.229135103018499, "learning_rate": 2.3046757262001894e-10, "loss": 0.6126, "step": 11772 }, { "epoch": 0.9970781283082787, "grad_norm": 2.5718860613030325, "learning_rate": 2.1748622794581787e-10, "loss": 0.5865, "step": 11773 }, { "epoch": 0.997162820241372, "grad_norm": 3.2644289902446104, "learning_rate": 2.0488114338135246e-10, "loss": 0.6013, "step": 11774 }, { "epoch": 0.9972475121744654, "grad_norm": 2.8432373159222992, "learning_rate": 1.9265231987419808e-10, "loss": 0.6466, "step": 11775 }, { "epoch": 0.9973322041075587, "grad_norm": 1.714103548610305, "learning_rate": 1.8079975834583986e-10, "loss": 0.6035, "step": 11776 }, { "epoch": 0.9974168960406521, "grad_norm": 1.7464233666892748, "learning_rate": 1.6932345968723174e-10, "loss": 0.5867, "step": 11777 }, { "epoch": 0.9975015879737456, "grad_norm": 1.739431272911183, "learning_rate": 1.5822342476212726e-10, "loss": 0.5652, "step": 11778 }, { "epoch": 0.9975862799068389, "grad_norm": 1.3687926334114986, "learning_rate": 1.4749965440652435e-10, "loss": 0.6187, "step": 11779 }, { "epoch": 0.9976709718399323, "grad_norm": 1.3969358886899779, "learning_rate": 1.3715214942700005e-10, "loss": 0.6038, "step": 11780 }, { "epoch": 0.9977556637730256, "grad_norm": 1.7174999348803588, "learning_rate": 1.271809106018207e-10, "loss": 0.5802, "step": 11781 }, { "epoch": 0.997840355706119, "grad_norm": 1.6162919628469303, "learning_rate": 1.1758593868260725e-10, "loss": 0.6201, "step": 11782 }, { "epoch": 0.9979250476392124, "grad_norm": 0.6191780131056278, "learning_rate": 1.0836723439044961e-10, "loss": 0.8686, "step": 11783 }, { "epoch": 0.9980097395723058, "grad_norm": 0.7120773782773349, "learning_rate": 9.952479841923712e-11, "loss": 0.8811, "step": 11784 }, { "epoch": 0.9980944315053991, "grad_norm": 1.1132073573950065, "learning_rate": 9.105863143454852e-11, "loss": 0.6299, "step": 11785 }, { "epoch": 0.9981791234384925, "grad_norm": 1.5518927533959463, "learning_rate": 8.296873407309669e-11, "loss": 0.6232, "step": 11786 }, { "epoch": 0.9982638153715858, "grad_norm": 1.3285857068168663, "learning_rate": 7.525510694439408e-11, "loss": 0.6162, "step": 11787 }, { "epoch": 0.9983485073046793, "grad_norm": 1.3582542374905353, "learning_rate": 6.791775062853224e-11, "loss": 0.6674, "step": 11788 }, { "epoch": 0.9984331992377726, "grad_norm": 1.3435404721142963, "learning_rate": 6.095666567784708e-11, "loss": 0.5885, "step": 11789 }, { "epoch": 0.998517891170866, "grad_norm": 1.4794286235987348, "learning_rate": 5.4371852616363906e-11, "loss": 0.6273, "step": 11790 }, { "epoch": 0.9986025831039593, "grad_norm": 1.4813973228916244, "learning_rate": 4.816331193924217e-11, "loss": 0.5697, "step": 11791 }, { "epoch": 0.9986872750370527, "grad_norm": 0.6651356663745125, "learning_rate": 4.2331044113330664e-11, "loss": 0.8856, "step": 11792 }, { "epoch": 0.9987719669701461, "grad_norm": 1.3311245067159152, "learning_rate": 3.68750495782777e-11, "loss": 0.6344, "step": 11793 }, { "epoch": 0.9988566589032395, "grad_norm": 1.4722970158978834, "learning_rate": 3.1795328744865796e-11, "loss": 0.6525, "step": 11794 }, { "epoch": 0.9989413508363328, "grad_norm": 1.4999124626406755, "learning_rate": 2.709188199445656e-11, "loss": 0.6255, "step": 11795 }, { "epoch": 0.9990260427694262, "grad_norm": 1.6149309069261748, "learning_rate": 2.276470968176625e-11, "loss": 0.6144, "step": 11796 }, { "epoch": 0.9991107347025195, "grad_norm": 1.2703761425617353, "learning_rate": 1.8813812132090214e-11, "loss": 0.633, "step": 11797 }, { "epoch": 0.999195426635613, "grad_norm": 1.4996321150419496, "learning_rate": 1.523918964296822e-11, "loss": 0.551, "step": 11798 }, { "epoch": 0.9992801185687064, "grad_norm": 1.3953789618728438, "learning_rate": 1.2040842483074244e-11, "loss": 0.6937, "step": 11799 }, { "epoch": 0.9993648105017997, "grad_norm": 0.6176676803955725, "learning_rate": 9.218770893326678e-12, "loss": 0.8783, "step": 11800 }, { "epoch": 0.9994495024348931, "grad_norm": 1.507686226721718, "learning_rate": 6.772975085778122e-12, "loss": 0.6844, "step": 11801 }, { "epoch": 0.9995341943679864, "grad_norm": 1.7302183142190322, "learning_rate": 4.703455245280708e-12, "loss": 0.6632, "step": 11802 }, { "epoch": 0.9996188863010799, "grad_norm": 1.5617148947901496, "learning_rate": 3.0102115267105494e-12, "loss": 0.6201, "step": 11803 }, { "epoch": 0.9997035782341732, "grad_norm": 1.3355912321832013, "learning_rate": 1.6932440582984044e-12, "loss": 0.6465, "step": 11804 }, { "epoch": 0.9997882701672666, "grad_norm": 1.3043682216573431, "learning_rate": 7.525529382990116e-13, "loss": 0.6614, "step": 11805 }, { "epoch": 0.9998729621003599, "grad_norm": 1.332530428633751, "learning_rate": 1.8813823832175558e-13, "loss": 0.6126, "step": 11806 }, { "epoch": 0.9999576540334533, "grad_norm": 1.386809357081698, "learning_rate": 0.0, "loss": 0.6511, "step": 11807 }, { "epoch": 0.9999576540334533, "step": 11807, "tflops": 440.74593363030937, "token/s": 573.8574488486693, "total_flos": 1.6054680467370148e+19, "train_loss": 0.6656127169815361, "train_runtime": 33359.4874, "train_samples_per_second": 90.612, "train_steps_per_second": 0.354 } ], "log_save_evaluate_time": 230.09452104568481, "logging_steps": 1.0, "max_steps": 11807, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2362, "total_flos": 1.6054680467370148e+19, "total_tokens": 19011551.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }