{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 20480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.8828125e-05, "grad_norm": 2.448300838470459, "learning_rate": 1.6666666666666669e-06, "loss": 2.6218, "step": 1 }, { "epoch": 9.765625e-05, "grad_norm": 2.3908615112304688, "learning_rate": 3.3333333333333337e-06, "loss": 2.618, "step": 2 }, { "epoch": 0.000146484375, "grad_norm": 2.4615559577941895, "learning_rate": 5e-06, "loss": 2.615, "step": 3 }, { "epoch": 0.0001953125, "grad_norm": 1.9327112436294556, "learning_rate": 6.6666666666666675e-06, "loss": 2.6483, "step": 4 }, { "epoch": 0.000244140625, "grad_norm": 1.7466607093811035, "learning_rate": 8.333333333333334e-06, "loss": 2.5652, "step": 5 }, { "epoch": 0.00029296875, "grad_norm": 1.9599254131317139, "learning_rate": 1e-05, "loss": 2.5923, "step": 6 }, { "epoch": 0.000341796875, "grad_norm": 2.0255165100097656, "learning_rate": 1.1666666666666668e-05, "loss": 2.5558, "step": 7 }, { "epoch": 0.000390625, "grad_norm": 1.7840938568115234, "learning_rate": 1.3333333333333335e-05, "loss": 2.6149, "step": 8 }, { "epoch": 0.000439453125, "grad_norm": 2.10261869430542, "learning_rate": 1.5e-05, "loss": 2.5912, "step": 9 }, { "epoch": 0.00048828125, "grad_norm": 1.8756349086761475, "learning_rate": 1.6666666666666667e-05, "loss": 2.5796, "step": 10 }, { "epoch": 0.000537109375, "grad_norm": 1.6562258005142212, "learning_rate": 1.8333333333333333e-05, "loss": 2.555, "step": 11 }, { "epoch": 0.0005859375, "grad_norm": 2.142529010772705, "learning_rate": 2e-05, "loss": 2.5616, "step": 12 }, { "epoch": 0.000634765625, "grad_norm": 2.8051650524139404, "learning_rate": 2.1666666666666667e-05, "loss": 2.5233, "step": 13 }, { "epoch": 0.00068359375, "grad_norm": 1.593424916267395, "learning_rate": 2.3333333333333336e-05, "loss": 2.5505, "step": 14 }, { "epoch": 0.000732421875, "grad_norm": 1.639112949371338, "learning_rate": 2.5e-05, "loss": 2.4901, "step": 15 }, { "epoch": 0.00078125, "grad_norm": 2.4062013626098633, "learning_rate": 2.666666666666667e-05, "loss": 2.5407, "step": 16 }, { "epoch": 0.000830078125, "grad_norm": 2.08699369430542, "learning_rate": 2.8333333333333332e-05, "loss": 2.5261, "step": 17 }, { "epoch": 0.00087890625, "grad_norm": 1.760156273841858, "learning_rate": 3e-05, "loss": 2.5438, "step": 18 }, { "epoch": 0.000927734375, "grad_norm": 2.2224390506744385, "learning_rate": 3.166666666666667e-05, "loss": 2.5177, "step": 19 }, { "epoch": 0.0009765625, "grad_norm": 1.714298129081726, "learning_rate": 3.3333333333333335e-05, "loss": 2.5106, "step": 20 }, { "epoch": 0.001025390625, "grad_norm": 2.4258809089660645, "learning_rate": 3.5000000000000004e-05, "loss": 2.4744, "step": 21 }, { "epoch": 0.00107421875, "grad_norm": 2.469712495803833, "learning_rate": 3.6666666666666666e-05, "loss": 2.5207, "step": 22 }, { "epoch": 0.001123046875, "grad_norm": 2.3763790130615234, "learning_rate": 3.8333333333333334e-05, "loss": 2.4904, "step": 23 }, { "epoch": 0.001171875, "grad_norm": 2.626877784729004, "learning_rate": 4e-05, "loss": 2.4859, "step": 24 }, { "epoch": 0.001220703125, "grad_norm": 2.270517110824585, "learning_rate": 4.1666666666666665e-05, "loss": 2.49, "step": 25 }, { "epoch": 0.00126953125, "grad_norm": 2.434091567993164, "learning_rate": 4.3333333333333334e-05, "loss": 2.4918, "step": 26 }, { "epoch": 0.001318359375, "grad_norm": 2.085463047027588, "learning_rate": 4.4999999999999996e-05, "loss": 2.4646, "step": 27 }, { "epoch": 0.0013671875, "grad_norm": 3.035066843032837, "learning_rate": 4.666666666666667e-05, "loss": 2.4704, "step": 28 }, { "epoch": 0.001416015625, "grad_norm": 2.1118831634521484, "learning_rate": 4.8333333333333334e-05, "loss": 2.4965, "step": 29 }, { "epoch": 0.00146484375, "grad_norm": 3.057985305786133, "learning_rate": 5e-05, "loss": 2.5115, "step": 30 }, { "epoch": 0.001513671875, "grad_norm": 2.5942137241363525, "learning_rate": 5.1666666666666664e-05, "loss": 2.4967, "step": 31 }, { "epoch": 0.0015625, "grad_norm": 2.3938307762145996, "learning_rate": 5.333333333333334e-05, "loss": 2.4406, "step": 32 }, { "epoch": 0.001611328125, "grad_norm": 2.798466205596924, "learning_rate": 5.5e-05, "loss": 2.4535, "step": 33 }, { "epoch": 0.00166015625, "grad_norm": 2.9483678340911865, "learning_rate": 5.6666666666666664e-05, "loss": 2.5011, "step": 34 }, { "epoch": 0.001708984375, "grad_norm": 2.3530960083007812, "learning_rate": 5.833333333333333e-05, "loss": 2.4455, "step": 35 }, { "epoch": 0.0017578125, "grad_norm": 2.9818661212921143, "learning_rate": 6e-05, "loss": 2.4078, "step": 36 }, { "epoch": 0.001806640625, "grad_norm": 2.2355690002441406, "learning_rate": 6.166666666666667e-05, "loss": 2.4619, "step": 37 }, { "epoch": 0.00185546875, "grad_norm": 3.023022174835205, "learning_rate": 6.333333333333335e-05, "loss": 2.4681, "step": 38 }, { "epoch": 0.001904296875, "grad_norm": 2.570255994796753, "learning_rate": 6.500000000000001e-05, "loss": 2.406, "step": 39 }, { "epoch": 0.001953125, "grad_norm": 3.1716997623443604, "learning_rate": 6.666666666666667e-05, "loss": 2.4711, "step": 40 }, { "epoch": 0.002001953125, "grad_norm": 2.712688446044922, "learning_rate": 6.833333333333333e-05, "loss": 2.4452, "step": 41 }, { "epoch": 0.00205078125, "grad_norm": 3.085697889328003, "learning_rate": 7.000000000000001e-05, "loss": 2.4889, "step": 42 }, { "epoch": 0.002099609375, "grad_norm": 2.438253402709961, "learning_rate": 7.166666666666667e-05, "loss": 2.4232, "step": 43 }, { "epoch": 0.0021484375, "grad_norm": 2.7191851139068604, "learning_rate": 7.333333333333333e-05, "loss": 2.4239, "step": 44 }, { "epoch": 0.002197265625, "grad_norm": 2.640497922897339, "learning_rate": 7.5e-05, "loss": 2.3933, "step": 45 }, { "epoch": 0.00224609375, "grad_norm": 2.6633174419403076, "learning_rate": 7.666666666666667e-05, "loss": 2.3897, "step": 46 }, { "epoch": 0.002294921875, "grad_norm": 3.131101608276367, "learning_rate": 7.833333333333334e-05, "loss": 2.4219, "step": 47 }, { "epoch": 0.00234375, "grad_norm": 2.8292295932769775, "learning_rate": 8e-05, "loss": 2.4151, "step": 48 }, { "epoch": 0.002392578125, "grad_norm": 3.212761402130127, "learning_rate": 8.166666666666667e-05, "loss": 2.4375, "step": 49 }, { "epoch": 0.00244140625, "grad_norm": 2.6636600494384766, "learning_rate": 8.333333333333333e-05, "loss": 2.4421, "step": 50 }, { "epoch": 0.002490234375, "grad_norm": 2.48830246925354, "learning_rate": 8.5e-05, "loss": 2.416, "step": 51 }, { "epoch": 0.0025390625, "grad_norm": 2.9093830585479736, "learning_rate": 8.666666666666667e-05, "loss": 2.4321, "step": 52 }, { "epoch": 0.002587890625, "grad_norm": 3.0472614765167236, "learning_rate": 8.833333333333333e-05, "loss": 2.4275, "step": 53 }, { "epoch": 0.00263671875, "grad_norm": 3.5212461948394775, "learning_rate": 8.999999999999999e-05, "loss": 2.4677, "step": 54 }, { "epoch": 0.002685546875, "grad_norm": 2.316643238067627, "learning_rate": 9.166666666666667e-05, "loss": 2.4037, "step": 55 }, { "epoch": 0.002734375, "grad_norm": 3.711860179901123, "learning_rate": 9.333333333333334e-05, "loss": 2.3948, "step": 56 }, { "epoch": 0.002783203125, "grad_norm": 2.40781307220459, "learning_rate": 9.5e-05, "loss": 2.4246, "step": 57 }, { "epoch": 0.00283203125, "grad_norm": 2.736288547515869, "learning_rate": 9.666666666666667e-05, "loss": 2.4107, "step": 58 }, { "epoch": 0.002880859375, "grad_norm": 2.8276584148406982, "learning_rate": 9.833333333333333e-05, "loss": 2.4113, "step": 59 }, { "epoch": 0.0029296875, "grad_norm": 2.513413906097412, "learning_rate": 0.0001, "loss": 2.4222, "step": 60 }, { "epoch": 0.002978515625, "grad_norm": 3.1459226608276367, "learning_rate": 0.00010166666666666667, "loss": 2.4197, "step": 61 }, { "epoch": 0.00302734375, "grad_norm": 2.3861896991729736, "learning_rate": 0.00010333333333333333, "loss": 2.3856, "step": 62 }, { "epoch": 0.003076171875, "grad_norm": 2.570279359817505, "learning_rate": 0.000105, "loss": 2.4056, "step": 63 }, { "epoch": 0.003125, "grad_norm": 2.760467767715454, "learning_rate": 0.00010666666666666668, "loss": 2.3716, "step": 64 }, { "epoch": 0.003173828125, "grad_norm": 3.402139186859131, "learning_rate": 0.00010833333333333334, "loss": 2.4045, "step": 65 }, { "epoch": 0.00322265625, "grad_norm": 2.985966205596924, "learning_rate": 0.00011, "loss": 2.4304, "step": 66 }, { "epoch": 0.003271484375, "grad_norm": 3.0482993125915527, "learning_rate": 0.00011166666666666667, "loss": 2.4217, "step": 67 }, { "epoch": 0.0033203125, "grad_norm": 2.9162418842315674, "learning_rate": 0.00011333333333333333, "loss": 2.4275, "step": 68 }, { "epoch": 0.003369140625, "grad_norm": 2.831961154937744, "learning_rate": 0.000115, "loss": 2.4125, "step": 69 }, { "epoch": 0.00341796875, "grad_norm": 3.293828010559082, "learning_rate": 0.00011666666666666667, "loss": 2.4255, "step": 70 }, { "epoch": 0.003466796875, "grad_norm": 2.482706069946289, "learning_rate": 0.00011833333333333334, "loss": 2.4007, "step": 71 }, { "epoch": 0.003515625, "grad_norm": 4.1448893547058105, "learning_rate": 0.00012, "loss": 2.4597, "step": 72 }, { "epoch": 0.003564453125, "grad_norm": 2.2882614135742188, "learning_rate": 0.00012166666666666668, "loss": 2.4331, "step": 73 }, { "epoch": 0.00361328125, "grad_norm": 3.412641763687134, "learning_rate": 0.00012333333333333334, "loss": 2.4507, "step": 74 }, { "epoch": 0.003662109375, "grad_norm": 2.956576108932495, "learning_rate": 0.000125, "loss": 2.4192, "step": 75 }, { "epoch": 0.0037109375, "grad_norm": 3.549410343170166, "learning_rate": 0.0001266666666666667, "loss": 2.3854, "step": 76 }, { "epoch": 0.003759765625, "grad_norm": 2.2454426288604736, "learning_rate": 0.00012833333333333333, "loss": 2.411, "step": 77 }, { "epoch": 0.00380859375, "grad_norm": 2.7541580200195312, "learning_rate": 0.00013000000000000002, "loss": 2.3612, "step": 78 }, { "epoch": 0.003857421875, "grad_norm": 2.477799415588379, "learning_rate": 0.00013166666666666665, "loss": 2.3508, "step": 79 }, { "epoch": 0.00390625, "grad_norm": 2.621974468231201, "learning_rate": 0.00013333333333333334, "loss": 2.4026, "step": 80 }, { "epoch": 0.003955078125, "grad_norm": 2.838858127593994, "learning_rate": 0.000135, "loss": 2.3514, "step": 81 }, { "epoch": 0.00400390625, "grad_norm": 2.676957845687866, "learning_rate": 0.00013666666666666666, "loss": 2.3729, "step": 82 }, { "epoch": 0.004052734375, "grad_norm": 2.9583232402801514, "learning_rate": 0.00013833333333333333, "loss": 2.3985, "step": 83 }, { "epoch": 0.0041015625, "grad_norm": 2.6675074100494385, "learning_rate": 0.00014000000000000001, "loss": 2.3857, "step": 84 }, { "epoch": 0.004150390625, "grad_norm": 2.4405019283294678, "learning_rate": 0.00014166666666666668, "loss": 2.3699, "step": 85 }, { "epoch": 0.00419921875, "grad_norm": 2.809626579284668, "learning_rate": 0.00014333333333333334, "loss": 2.3985, "step": 86 }, { "epoch": 0.004248046875, "grad_norm": 3.125958204269409, "learning_rate": 0.000145, "loss": 2.4182, "step": 87 }, { "epoch": 0.004296875, "grad_norm": 2.9860877990722656, "learning_rate": 0.00014666666666666666, "loss": 2.3894, "step": 88 }, { "epoch": 0.004345703125, "grad_norm": 2.9288225173950195, "learning_rate": 0.00014833333333333335, "loss": 2.3872, "step": 89 }, { "epoch": 0.00439453125, "grad_norm": 2.0713138580322266, "learning_rate": 0.00015, "loss": 2.3606, "step": 90 }, { "epoch": 0.004443359375, "grad_norm": 2.774677276611328, "learning_rate": 0.00015166666666666668, "loss": 2.3798, "step": 91 }, { "epoch": 0.0044921875, "grad_norm": 2.2298028469085693, "learning_rate": 0.00015333333333333334, "loss": 2.3657, "step": 92 }, { "epoch": 0.004541015625, "grad_norm": 3.1723973751068115, "learning_rate": 0.000155, "loss": 2.3639, "step": 93 }, { "epoch": 0.00458984375, "grad_norm": 2.1981260776519775, "learning_rate": 0.0001566666666666667, "loss": 2.3779, "step": 94 }, { "epoch": 0.004638671875, "grad_norm": 3.482799768447876, "learning_rate": 0.00015833333333333332, "loss": 2.3749, "step": 95 }, { "epoch": 0.0046875, "grad_norm": 2.8117873668670654, "learning_rate": 0.00016, "loss": 2.4058, "step": 96 }, { "epoch": 0.004736328125, "grad_norm": 3.0591752529144287, "learning_rate": 0.00016166666666666665, "loss": 2.3457, "step": 97 }, { "epoch": 0.00478515625, "grad_norm": 2.657057046890259, "learning_rate": 0.00016333333333333334, "loss": 2.4036, "step": 98 }, { "epoch": 0.004833984375, "grad_norm": 2.2431693077087402, "learning_rate": 0.000165, "loss": 2.3698, "step": 99 }, { "epoch": 0.0048828125, "grad_norm": 3.0716569423675537, "learning_rate": 0.00016666666666666666, "loss": 2.366, "step": 100 }, { "epoch": 0.004931640625, "grad_norm": 2.4878134727478027, "learning_rate": 0.00016833333333333335, "loss": 2.3324, "step": 101 }, { "epoch": 0.00498046875, "grad_norm": 2.3556339740753174, "learning_rate": 0.00017, "loss": 2.355, "step": 102 }, { "epoch": 0.005029296875, "grad_norm": 2.740074396133423, "learning_rate": 0.00017166666666666667, "loss": 2.3727, "step": 103 }, { "epoch": 0.005078125, "grad_norm": 2.6185343265533447, "learning_rate": 0.00017333333333333334, "loss": 2.3336, "step": 104 }, { "epoch": 0.005126953125, "grad_norm": 2.7117085456848145, "learning_rate": 0.000175, "loss": 2.3771, "step": 105 }, { "epoch": 0.00517578125, "grad_norm": 2.372227191925049, "learning_rate": 0.00017666666666666666, "loss": 2.4055, "step": 106 }, { "epoch": 0.005224609375, "grad_norm": 2.7012741565704346, "learning_rate": 0.00017833333333333335, "loss": 2.382, "step": 107 }, { "epoch": 0.0052734375, "grad_norm": 2.835235834121704, "learning_rate": 0.00017999999999999998, "loss": 2.3455, "step": 108 }, { "epoch": 0.005322265625, "grad_norm": 2.9969232082366943, "learning_rate": 0.00018166666666666667, "loss": 2.3777, "step": 109 }, { "epoch": 0.00537109375, "grad_norm": 2.0129761695861816, "learning_rate": 0.00018333333333333334, "loss": 2.3413, "step": 110 }, { "epoch": 0.005419921875, "grad_norm": 3.2138476371765137, "learning_rate": 0.000185, "loss": 2.4026, "step": 111 }, { "epoch": 0.00546875, "grad_norm": 2.4312984943389893, "learning_rate": 0.0001866666666666667, "loss": 2.3665, "step": 112 }, { "epoch": 0.005517578125, "grad_norm": 2.901214122772217, "learning_rate": 0.00018833333333333332, "loss": 2.4032, "step": 113 }, { "epoch": 0.00556640625, "grad_norm": 2.1150920391082764, "learning_rate": 0.00019, "loss": 2.3054, "step": 114 }, { "epoch": 0.005615234375, "grad_norm": 2.271794319152832, "learning_rate": 0.00019166666666666667, "loss": 2.3741, "step": 115 }, { "epoch": 0.0056640625, "grad_norm": 2.839587926864624, "learning_rate": 0.00019333333333333333, "loss": 2.345, "step": 116 }, { "epoch": 0.005712890625, "grad_norm": 2.8426780700683594, "learning_rate": 0.00019500000000000002, "loss": 2.3504, "step": 117 }, { "epoch": 0.00576171875, "grad_norm": 2.404897928237915, "learning_rate": 0.00019666666666666666, "loss": 2.3752, "step": 118 }, { "epoch": 0.005810546875, "grad_norm": 2.8277604579925537, "learning_rate": 0.00019833333333333335, "loss": 2.3137, "step": 119 }, { "epoch": 0.005859375, "grad_norm": 2.2641243934631348, "learning_rate": 0.0002, "loss": 2.4033, "step": 120 }, { "epoch": 0.005908203125, "grad_norm": 3.0354514122009277, "learning_rate": 0.00020166666666666667, "loss": 2.3603, "step": 121 }, { "epoch": 0.00595703125, "grad_norm": 2.598658800125122, "learning_rate": 0.00020333333333333333, "loss": 2.3463, "step": 122 }, { "epoch": 0.006005859375, "grad_norm": 2.641631603240967, "learning_rate": 0.000205, "loss": 2.365, "step": 123 }, { "epoch": 0.0060546875, "grad_norm": 2.6147561073303223, "learning_rate": 0.00020666666666666666, "loss": 2.3318, "step": 124 }, { "epoch": 0.006103515625, "grad_norm": 2.8690073490142822, "learning_rate": 0.00020833333333333335, "loss": 2.4203, "step": 125 }, { "epoch": 0.00615234375, "grad_norm": 2.604627847671509, "learning_rate": 0.00021, "loss": 2.3828, "step": 126 }, { "epoch": 0.006201171875, "grad_norm": 2.3659303188323975, "learning_rate": 0.00021166666666666667, "loss": 2.401, "step": 127 }, { "epoch": 0.00625, "grad_norm": 2.4328970909118652, "learning_rate": 0.00021333333333333336, "loss": 2.3686, "step": 128 }, { "epoch": 0.006298828125, "grad_norm": 2.082382917404175, "learning_rate": 0.000215, "loss": 2.3201, "step": 129 }, { "epoch": 0.00634765625, "grad_norm": 2.7716994285583496, "learning_rate": 0.00021666666666666668, "loss": 2.3312, "step": 130 }, { "epoch": 0.006396484375, "grad_norm": 2.2915403842926025, "learning_rate": 0.00021833333333333332, "loss": 2.3399, "step": 131 }, { "epoch": 0.0064453125, "grad_norm": 3.4763309955596924, "learning_rate": 0.00022, "loss": 2.4128, "step": 132 }, { "epoch": 0.006494140625, "grad_norm": 2.105339765548706, "learning_rate": 0.00022166666666666667, "loss": 2.3552, "step": 133 }, { "epoch": 0.00654296875, "grad_norm": 2.809354782104492, "learning_rate": 0.00022333333333333333, "loss": 2.3691, "step": 134 }, { "epoch": 0.006591796875, "grad_norm": 2.932333469390869, "learning_rate": 0.00022500000000000002, "loss": 2.3575, "step": 135 }, { "epoch": 0.006640625, "grad_norm": 2.3146536350250244, "learning_rate": 0.00022666666666666666, "loss": 2.3477, "step": 136 }, { "epoch": 0.006689453125, "grad_norm": 1.9505689144134521, "learning_rate": 0.00022833333333333334, "loss": 2.3245, "step": 137 }, { "epoch": 0.00673828125, "grad_norm": 2.5521299839019775, "learning_rate": 0.00023, "loss": 2.3537, "step": 138 }, { "epoch": 0.006787109375, "grad_norm": 2.38706636428833, "learning_rate": 0.00023166666666666667, "loss": 2.3732, "step": 139 }, { "epoch": 0.0068359375, "grad_norm": 2.349370002746582, "learning_rate": 0.00023333333333333333, "loss": 2.3329, "step": 140 }, { "epoch": 0.006884765625, "grad_norm": 2.526292085647583, "learning_rate": 0.000235, "loss": 2.3776, "step": 141 }, { "epoch": 0.00693359375, "grad_norm": 2.145763397216797, "learning_rate": 0.00023666666666666668, "loss": 2.3492, "step": 142 }, { "epoch": 0.006982421875, "grad_norm": 2.3557987213134766, "learning_rate": 0.00023833333333333334, "loss": 2.3868, "step": 143 }, { "epoch": 0.00703125, "grad_norm": 2.290325164794922, "learning_rate": 0.00024, "loss": 2.3431, "step": 144 }, { "epoch": 0.007080078125, "grad_norm": 2.555514097213745, "learning_rate": 0.00024166666666666667, "loss": 2.3227, "step": 145 }, { "epoch": 0.00712890625, "grad_norm": 2.3130176067352295, "learning_rate": 0.00024333333333333336, "loss": 2.3307, "step": 146 }, { "epoch": 0.007177734375, "grad_norm": 2.797921895980835, "learning_rate": 0.000245, "loss": 2.3439, "step": 147 }, { "epoch": 0.0072265625, "grad_norm": 2.041351079940796, "learning_rate": 0.0002466666666666667, "loss": 2.324, "step": 148 }, { "epoch": 0.007275390625, "grad_norm": 2.3200390338897705, "learning_rate": 0.0002483333333333333, "loss": 2.359, "step": 149 }, { "epoch": 0.00732421875, "grad_norm": 2.113550901412964, "learning_rate": 0.00025, "loss": 2.3075, "step": 150 }, { "epoch": 0.007373046875, "grad_norm": 2.2954418659210205, "learning_rate": 0.00025166666666666664, "loss": 2.3366, "step": 151 }, { "epoch": 0.007421875, "grad_norm": 2.6506223678588867, "learning_rate": 0.0002533333333333334, "loss": 2.3284, "step": 152 }, { "epoch": 0.007470703125, "grad_norm": 2.192584753036499, "learning_rate": 0.000255, "loss": 2.3235, "step": 153 }, { "epoch": 0.00751953125, "grad_norm": 2.575141191482544, "learning_rate": 0.00025666666666666665, "loss": 2.3287, "step": 154 }, { "epoch": 0.007568359375, "grad_norm": 3.003913164138794, "learning_rate": 0.00025833333333333334, "loss": 2.3995, "step": 155 }, { "epoch": 0.0076171875, "grad_norm": 2.141767740249634, "learning_rate": 0.00026000000000000003, "loss": 2.393, "step": 156 }, { "epoch": 0.007666015625, "grad_norm": 3.2794554233551025, "learning_rate": 0.00026166666666666667, "loss": 2.3355, "step": 157 }, { "epoch": 0.00771484375, "grad_norm": 1.6003507375717163, "learning_rate": 0.0002633333333333333, "loss": 2.2799, "step": 158 }, { "epoch": 0.007763671875, "grad_norm": 2.8264713287353516, "learning_rate": 0.00026500000000000004, "loss": 2.4284, "step": 159 }, { "epoch": 0.0078125, "grad_norm": 1.8204814195632935, "learning_rate": 0.0002666666666666667, "loss": 2.3304, "step": 160 }, { "epoch": 0.007861328125, "grad_norm": 3.3055431842803955, "learning_rate": 0.0002683333333333333, "loss": 2.3368, "step": 161 }, { "epoch": 0.00791015625, "grad_norm": 1.9580012559890747, "learning_rate": 0.00027, "loss": 2.3485, "step": 162 }, { "epoch": 0.007958984375, "grad_norm": 2.221494197845459, "learning_rate": 0.0002716666666666667, "loss": 2.3319, "step": 163 }, { "epoch": 0.0080078125, "grad_norm": 3.7390456199645996, "learning_rate": 0.00027333333333333333, "loss": 2.3619, "step": 164 }, { "epoch": 0.008056640625, "grad_norm": 1.6563920974731445, "learning_rate": 0.000275, "loss": 2.3509, "step": 165 }, { "epoch": 0.00810546875, "grad_norm": 2.7563695907592773, "learning_rate": 0.00027666666666666665, "loss": 2.401, "step": 166 }, { "epoch": 0.008154296875, "grad_norm": 2.030979633331299, "learning_rate": 0.00027833333333333334, "loss": 2.3594, "step": 167 }, { "epoch": 0.008203125, "grad_norm": 1.91573166847229, "learning_rate": 0.00028000000000000003, "loss": 2.3625, "step": 168 }, { "epoch": 0.008251953125, "grad_norm": 2.622398614883423, "learning_rate": 0.00028166666666666666, "loss": 2.4478, "step": 169 }, { "epoch": 0.00830078125, "grad_norm": 1.960227370262146, "learning_rate": 0.00028333333333333335, "loss": 2.3159, "step": 170 }, { "epoch": 0.008349609375, "grad_norm": 2.3916754722595215, "learning_rate": 0.000285, "loss": 2.3743, "step": 171 }, { "epoch": 0.0083984375, "grad_norm": 2.4802334308624268, "learning_rate": 0.0002866666666666667, "loss": 2.3946, "step": 172 }, { "epoch": 0.008447265625, "grad_norm": 3.170199155807495, "learning_rate": 0.0002883333333333333, "loss": 2.362, "step": 173 }, { "epoch": 0.00849609375, "grad_norm": 2.7041125297546387, "learning_rate": 0.00029, "loss": 2.3649, "step": 174 }, { "epoch": 0.008544921875, "grad_norm": 2.0458993911743164, "learning_rate": 0.0002916666666666667, "loss": 2.3565, "step": 175 }, { "epoch": 0.00859375, "grad_norm": 3.5018844604492188, "learning_rate": 0.0002933333333333333, "loss": 2.3594, "step": 176 }, { "epoch": 0.008642578125, "grad_norm": 2.0064213275909424, "learning_rate": 0.000295, "loss": 2.3496, "step": 177 }, { "epoch": 0.00869140625, "grad_norm": 2.506582736968994, "learning_rate": 0.0002966666666666667, "loss": 2.3935, "step": 178 }, { "epoch": 0.008740234375, "grad_norm": 2.065197467803955, "learning_rate": 0.00029833333333333334, "loss": 2.3303, "step": 179 }, { "epoch": 0.0087890625, "grad_norm": 2.3425021171569824, "learning_rate": 0.0003, "loss": 2.345, "step": 180 }, { "epoch": 0.008837890625, "grad_norm": 2.3031063079833984, "learning_rate": 0.0003016666666666667, "loss": 2.3457, "step": 181 }, { "epoch": 0.00888671875, "grad_norm": 1.9882452487945557, "learning_rate": 0.00030333333333333335, "loss": 2.3344, "step": 182 }, { "epoch": 0.008935546875, "grad_norm": 2.7738137245178223, "learning_rate": 0.000305, "loss": 2.3534, "step": 183 }, { "epoch": 0.008984375, "grad_norm": 2.2017409801483154, "learning_rate": 0.0003066666666666667, "loss": 2.3549, "step": 184 }, { "epoch": 0.009033203125, "grad_norm": 1.9114121198654175, "learning_rate": 0.00030833333333333337, "loss": 2.3132, "step": 185 }, { "epoch": 0.00908203125, "grad_norm": 2.6744792461395264, "learning_rate": 0.00031, "loss": 2.2926, "step": 186 }, { "epoch": 0.009130859375, "grad_norm": 2.091892957687378, "learning_rate": 0.00031166666666666663, "loss": 2.3369, "step": 187 }, { "epoch": 0.0091796875, "grad_norm": 2.3713741302490234, "learning_rate": 0.0003133333333333334, "loss": 2.3372, "step": 188 }, { "epoch": 0.009228515625, "grad_norm": 2.3777894973754883, "learning_rate": 0.000315, "loss": 2.3147, "step": 189 }, { "epoch": 0.00927734375, "grad_norm": 2.0990676879882812, "learning_rate": 0.00031666666666666665, "loss": 2.3213, "step": 190 }, { "epoch": 0.009326171875, "grad_norm": 2.071045160293579, "learning_rate": 0.00031833333333333334, "loss": 2.318, "step": 191 }, { "epoch": 0.009375, "grad_norm": 2.2684216499328613, "learning_rate": 0.00032, "loss": 2.3448, "step": 192 }, { "epoch": 0.009423828125, "grad_norm": 2.3174760341644287, "learning_rate": 0.00032166666666666666, "loss": 2.3854, "step": 193 }, { "epoch": 0.00947265625, "grad_norm": 2.1530802249908447, "learning_rate": 0.0003233333333333333, "loss": 2.3001, "step": 194 }, { "epoch": 0.009521484375, "grad_norm": 2.532703399658203, "learning_rate": 0.00032500000000000004, "loss": 2.3664, "step": 195 }, { "epoch": 0.0095703125, "grad_norm": 2.478719711303711, "learning_rate": 0.0003266666666666667, "loss": 2.3194, "step": 196 }, { "epoch": 0.009619140625, "grad_norm": 2.37851619720459, "learning_rate": 0.0003283333333333333, "loss": 2.3163, "step": 197 }, { "epoch": 0.00966796875, "grad_norm": 2.0137417316436768, "learning_rate": 0.00033, "loss": 2.3007, "step": 198 }, { "epoch": 0.009716796875, "grad_norm": 2.2473931312561035, "learning_rate": 0.0003316666666666667, "loss": 2.3237, "step": 199 }, { "epoch": 0.009765625, "grad_norm": 1.7085808515548706, "learning_rate": 0.0003333333333333333, "loss": 2.3039, "step": 200 }, { "epoch": 0.009814453125, "grad_norm": 2.0760140419006348, "learning_rate": 0.000335, "loss": 2.2995, "step": 201 }, { "epoch": 0.00986328125, "grad_norm": 2.4802536964416504, "learning_rate": 0.0003366666666666667, "loss": 2.3513, "step": 202 }, { "epoch": 0.009912109375, "grad_norm": 2.17691707611084, "learning_rate": 0.00033833333333333334, "loss": 2.3196, "step": 203 }, { "epoch": 0.0099609375, "grad_norm": 2.705263614654541, "learning_rate": 0.00034, "loss": 2.3624, "step": 204 }, { "epoch": 0.010009765625, "grad_norm": 1.9401637315750122, "learning_rate": 0.00034166666666666666, "loss": 2.3864, "step": 205 }, { "epoch": 0.01005859375, "grad_norm": 1.993364691734314, "learning_rate": 0.00034333333333333335, "loss": 2.3316, "step": 206 }, { "epoch": 0.010107421875, "grad_norm": 2.5451107025146484, "learning_rate": 0.000345, "loss": 2.2908, "step": 207 }, { "epoch": 0.01015625, "grad_norm": 2.161508083343506, "learning_rate": 0.00034666666666666667, "loss": 2.3469, "step": 208 }, { "epoch": 0.010205078125, "grad_norm": 2.3414602279663086, "learning_rate": 0.00034833333333333336, "loss": 2.3233, "step": 209 }, { "epoch": 0.01025390625, "grad_norm": 1.6552653312683105, "learning_rate": 0.00035, "loss": 2.3013, "step": 210 }, { "epoch": 0.010302734375, "grad_norm": 2.844667911529541, "learning_rate": 0.0003516666666666667, "loss": 2.3106, "step": 211 }, { "epoch": 0.0103515625, "grad_norm": 1.2507349252700806, "learning_rate": 0.0003533333333333333, "loss": 2.3633, "step": 212 }, { "epoch": 0.010400390625, "grad_norm": 2.7553701400756836, "learning_rate": 0.000355, "loss": 2.3751, "step": 213 }, { "epoch": 0.01044921875, "grad_norm": 1.60267174243927, "learning_rate": 0.0003566666666666667, "loss": 2.3829, "step": 214 }, { "epoch": 0.010498046875, "grad_norm": 2.1744813919067383, "learning_rate": 0.00035833333333333333, "loss": 2.3816, "step": 215 }, { "epoch": 0.010546875, "grad_norm": 2.437032461166382, "learning_rate": 0.00035999999999999997, "loss": 2.3495, "step": 216 }, { "epoch": 0.010595703125, "grad_norm": 2.148350238800049, "learning_rate": 0.0003616666666666667, "loss": 2.3422, "step": 217 }, { "epoch": 0.01064453125, "grad_norm": 2.138033151626587, "learning_rate": 0.00036333333333333335, "loss": 2.3267, "step": 218 }, { "epoch": 0.010693359375, "grad_norm": 3.021000862121582, "learning_rate": 0.000365, "loss": 2.3652, "step": 219 }, { "epoch": 0.0107421875, "grad_norm": 2.49159836769104, "learning_rate": 0.00036666666666666667, "loss": 2.3791, "step": 220 }, { "epoch": 0.010791015625, "grad_norm": 1.8215956687927246, "learning_rate": 0.00036833333333333336, "loss": 2.3226, "step": 221 }, { "epoch": 0.01083984375, "grad_norm": 2.1575467586517334, "learning_rate": 0.00037, "loss": 2.3551, "step": 222 }, { "epoch": 0.010888671875, "grad_norm": 2.3334078788757324, "learning_rate": 0.00037166666666666663, "loss": 2.3636, "step": 223 }, { "epoch": 0.0109375, "grad_norm": 1.9949860572814941, "learning_rate": 0.0003733333333333334, "loss": 2.2964, "step": 224 }, { "epoch": 0.010986328125, "grad_norm": 2.2417726516723633, "learning_rate": 0.000375, "loss": 2.3365, "step": 225 }, { "epoch": 0.01103515625, "grad_norm": 1.9730198383331299, "learning_rate": 0.00037666666666666664, "loss": 2.341, "step": 226 }, { "epoch": 0.011083984375, "grad_norm": 2.8892033100128174, "learning_rate": 0.0003783333333333334, "loss": 2.3392, "step": 227 }, { "epoch": 0.0111328125, "grad_norm": 1.5947840213775635, "learning_rate": 0.00038, "loss": 2.3701, "step": 228 }, { "epoch": 0.011181640625, "grad_norm": 2.3063924312591553, "learning_rate": 0.00038166666666666666, "loss": 2.3602, "step": 229 }, { "epoch": 0.01123046875, "grad_norm": 2.260455369949341, "learning_rate": 0.00038333333333333334, "loss": 2.3752, "step": 230 }, { "epoch": 0.011279296875, "grad_norm": 2.45062518119812, "learning_rate": 0.00038500000000000003, "loss": 2.3195, "step": 231 }, { "epoch": 0.011328125, "grad_norm": 2.0798773765563965, "learning_rate": 0.00038666666666666667, "loss": 2.2955, "step": 232 }, { "epoch": 0.011376953125, "grad_norm": 2.2446303367614746, "learning_rate": 0.0003883333333333333, "loss": 2.2886, "step": 233 }, { "epoch": 0.01142578125, "grad_norm": 2.0845751762390137, "learning_rate": 0.00039000000000000005, "loss": 2.3162, "step": 234 }, { "epoch": 0.011474609375, "grad_norm": 1.8726879358291626, "learning_rate": 0.0003916666666666667, "loss": 2.2943, "step": 235 }, { "epoch": 0.0115234375, "grad_norm": 2.3778045177459717, "learning_rate": 0.0003933333333333333, "loss": 2.3208, "step": 236 }, { "epoch": 0.011572265625, "grad_norm": 1.9091603755950928, "learning_rate": 0.000395, "loss": 2.3776, "step": 237 }, { "epoch": 0.01162109375, "grad_norm": 2.7529587745666504, "learning_rate": 0.0003966666666666667, "loss": 2.3785, "step": 238 }, { "epoch": 0.011669921875, "grad_norm": 1.6701799631118774, "learning_rate": 0.00039833333333333333, "loss": 2.3436, "step": 239 }, { "epoch": 0.01171875, "grad_norm": 2.5356688499450684, "learning_rate": 0.0004, "loss": 2.3244, "step": 240 }, { "epoch": 0.011767578125, "grad_norm": 2.2259910106658936, "learning_rate": 0.00040166666666666665, "loss": 2.3867, "step": 241 }, { "epoch": 0.01181640625, "grad_norm": 1.8133517503738403, "learning_rate": 0.00040333333333333334, "loss": 2.2867, "step": 242 }, { "epoch": 0.011865234375, "grad_norm": 2.01393461227417, "learning_rate": 0.00040500000000000003, "loss": 2.313, "step": 243 }, { "epoch": 0.0119140625, "grad_norm": 1.9792957305908203, "learning_rate": 0.00040666666666666667, "loss": 2.3071, "step": 244 }, { "epoch": 0.011962890625, "grad_norm": 2.523732900619507, "learning_rate": 0.00040833333333333336, "loss": 2.3576, "step": 245 }, { "epoch": 0.01201171875, "grad_norm": 1.8717317581176758, "learning_rate": 0.00041, "loss": 2.3227, "step": 246 }, { "epoch": 0.012060546875, "grad_norm": 1.7978419065475464, "learning_rate": 0.0004116666666666667, "loss": 2.3328, "step": 247 }, { "epoch": 0.012109375, "grad_norm": 1.7979665994644165, "learning_rate": 0.0004133333333333333, "loss": 2.3305, "step": 248 }, { "epoch": 0.012158203125, "grad_norm": 2.0646727085113525, "learning_rate": 0.000415, "loss": 2.3593, "step": 249 }, { "epoch": 0.01220703125, "grad_norm": 1.943149447441101, "learning_rate": 0.0004166666666666667, "loss": 2.292, "step": 250 }, { "epoch": 0.012255859375, "grad_norm": 2.1647486686706543, "learning_rate": 0.00041833333333333333, "loss": 2.3326, "step": 251 }, { "epoch": 0.0123046875, "grad_norm": 2.1849098205566406, "learning_rate": 0.00042, "loss": 2.3204, "step": 252 }, { "epoch": 0.012353515625, "grad_norm": 1.4659898281097412, "learning_rate": 0.0004216666666666667, "loss": 2.2859, "step": 253 }, { "epoch": 0.01240234375, "grad_norm": 2.225933790206909, "learning_rate": 0.00042333333333333334, "loss": 2.3005, "step": 254 }, { "epoch": 0.012451171875, "grad_norm": 1.7187961339950562, "learning_rate": 0.000425, "loss": 2.3116, "step": 255 }, { "epoch": 0.0125, "grad_norm": 1.9951553344726562, "learning_rate": 0.0004266666666666667, "loss": 2.347, "step": 256 }, { "epoch": 0.012548828125, "grad_norm": 2.0290639400482178, "learning_rate": 0.00042833333333333335, "loss": 2.3481, "step": 257 }, { "epoch": 0.01259765625, "grad_norm": 1.7600483894348145, "learning_rate": 0.00043, "loss": 2.2933, "step": 258 }, { "epoch": 0.012646484375, "grad_norm": 1.7255584001541138, "learning_rate": 0.0004316666666666667, "loss": 2.3179, "step": 259 }, { "epoch": 0.0126953125, "grad_norm": 1.9388301372528076, "learning_rate": 0.00043333333333333337, "loss": 2.3031, "step": 260 }, { "epoch": 0.012744140625, "grad_norm": 1.7977020740509033, "learning_rate": 0.000435, "loss": 2.3109, "step": 261 }, { "epoch": 0.01279296875, "grad_norm": 2.315361976623535, "learning_rate": 0.00043666666666666664, "loss": 2.311, "step": 262 }, { "epoch": 0.012841796875, "grad_norm": 2.01888370513916, "learning_rate": 0.0004383333333333334, "loss": 2.3305, "step": 263 }, { "epoch": 0.012890625, "grad_norm": 1.9578895568847656, "learning_rate": 0.00044, "loss": 2.3007, "step": 264 }, { "epoch": 0.012939453125, "grad_norm": 1.7121473550796509, "learning_rate": 0.00044166666666666665, "loss": 2.2974, "step": 265 }, { "epoch": 0.01298828125, "grad_norm": 2.1549670696258545, "learning_rate": 0.00044333333333333334, "loss": 2.3569, "step": 266 }, { "epoch": 0.013037109375, "grad_norm": 1.5895888805389404, "learning_rate": 0.00044500000000000003, "loss": 2.2899, "step": 267 }, { "epoch": 0.0130859375, "grad_norm": 2.1880476474761963, "learning_rate": 0.00044666666666666666, "loss": 2.3141, "step": 268 }, { "epoch": 0.013134765625, "grad_norm": 1.9051223993301392, "learning_rate": 0.0004483333333333333, "loss": 2.2658, "step": 269 }, { "epoch": 0.01318359375, "grad_norm": 1.7932301759719849, "learning_rate": 0.00045000000000000004, "loss": 2.3404, "step": 270 }, { "epoch": 0.013232421875, "grad_norm": 2.0408878326416016, "learning_rate": 0.0004516666666666667, "loss": 2.3019, "step": 271 }, { "epoch": 0.01328125, "grad_norm": 1.4072861671447754, "learning_rate": 0.0004533333333333333, "loss": 2.2843, "step": 272 }, { "epoch": 0.013330078125, "grad_norm": 2.440856456756592, "learning_rate": 0.000455, "loss": 2.3015, "step": 273 }, { "epoch": 0.01337890625, "grad_norm": 1.811324954032898, "learning_rate": 0.0004566666666666667, "loss": 2.285, "step": 274 }, { "epoch": 0.013427734375, "grad_norm": 2.2357184886932373, "learning_rate": 0.0004583333333333333, "loss": 2.3037, "step": 275 }, { "epoch": 0.0134765625, "grad_norm": 2.1845011711120605, "learning_rate": 0.00046, "loss": 2.3133, "step": 276 }, { "epoch": 0.013525390625, "grad_norm": 1.4941364526748657, "learning_rate": 0.0004616666666666667, "loss": 2.298, "step": 277 }, { "epoch": 0.01357421875, "grad_norm": 2.2383196353912354, "learning_rate": 0.00046333333333333334, "loss": 2.3173, "step": 278 }, { "epoch": 0.013623046875, "grad_norm": 1.7457942962646484, "learning_rate": 0.000465, "loss": 2.2823, "step": 279 }, { "epoch": 0.013671875, "grad_norm": 1.809157371520996, "learning_rate": 0.00046666666666666666, "loss": 2.3414, "step": 280 }, { "epoch": 0.013720703125, "grad_norm": 1.8743499517440796, "learning_rate": 0.00046833333333333335, "loss": 2.3062, "step": 281 }, { "epoch": 0.01376953125, "grad_norm": 2.3972299098968506, "learning_rate": 0.00047, "loss": 2.3922, "step": 282 }, { "epoch": 0.013818359375, "grad_norm": 2.6943869590759277, "learning_rate": 0.0004716666666666667, "loss": 2.3845, "step": 283 }, { "epoch": 0.0138671875, "grad_norm": 1.6764538288116455, "learning_rate": 0.00047333333333333336, "loss": 2.3273, "step": 284 }, { "epoch": 0.013916015625, "grad_norm": 2.566230535507202, "learning_rate": 0.000475, "loss": 2.3969, "step": 285 }, { "epoch": 0.01396484375, "grad_norm": 1.5940152406692505, "learning_rate": 0.0004766666666666667, "loss": 2.3352, "step": 286 }, { "epoch": 0.014013671875, "grad_norm": 1.9473210573196411, "learning_rate": 0.0004783333333333333, "loss": 2.3287, "step": 287 }, { "epoch": 0.0140625, "grad_norm": 1.603654384613037, "learning_rate": 0.00048, "loss": 2.2858, "step": 288 }, { "epoch": 0.014111328125, "grad_norm": 2.2394118309020996, "learning_rate": 0.0004816666666666667, "loss": 2.3218, "step": 289 }, { "epoch": 0.01416015625, "grad_norm": 1.748161792755127, "learning_rate": 0.00048333333333333334, "loss": 2.3083, "step": 290 }, { "epoch": 0.014208984375, "grad_norm": 2.0850954055786133, "learning_rate": 0.00048499999999999997, "loss": 2.3533, "step": 291 }, { "epoch": 0.0142578125, "grad_norm": 2.095829963684082, "learning_rate": 0.0004866666666666667, "loss": 2.3098, "step": 292 }, { "epoch": 0.014306640625, "grad_norm": 2.080570697784424, "learning_rate": 0.0004883333333333333, "loss": 2.2999, "step": 293 }, { "epoch": 0.01435546875, "grad_norm": 2.0459952354431152, "learning_rate": 0.00049, "loss": 2.3169, "step": 294 }, { "epoch": 0.014404296875, "grad_norm": 1.920600414276123, "learning_rate": 0.0004916666666666666, "loss": 2.3316, "step": 295 }, { "epoch": 0.014453125, "grad_norm": 1.4309945106506348, "learning_rate": 0.0004933333333333334, "loss": 2.2857, "step": 296 }, { "epoch": 0.014501953125, "grad_norm": 1.5884697437286377, "learning_rate": 0.000495, "loss": 2.3018, "step": 297 }, { "epoch": 0.01455078125, "grad_norm": 1.7954009771347046, "learning_rate": 0.0004966666666666666, "loss": 2.3071, "step": 298 }, { "epoch": 0.014599609375, "grad_norm": 2.015831708908081, "learning_rate": 0.0004983333333333334, "loss": 2.3396, "step": 299 }, { "epoch": 0.0146484375, "grad_norm": 1.6044420003890991, "learning_rate": 0.0005, "loss": 2.315, "step": 300 }, { "epoch": 0.014697265625, "grad_norm": 1.5348933935165405, "learning_rate": 0.0004999999972734721, "loss": 2.324, "step": 301 }, { "epoch": 0.01474609375, "grad_norm": 1.8393815755844116, "learning_rate": 0.0004999999890938886, "loss": 2.255, "step": 302 }, { "epoch": 0.014794921875, "grad_norm": 1.5688097476959229, "learning_rate": 0.0004999999754612495, "loss": 2.3176, "step": 303 }, { "epoch": 0.01484375, "grad_norm": 2.001169443130493, "learning_rate": 0.0004999999563755552, "loss": 2.2774, "step": 304 }, { "epoch": 0.014892578125, "grad_norm": 1.4551414251327515, "learning_rate": 0.0004999999318368063, "loss": 2.2941, "step": 305 }, { "epoch": 0.01494140625, "grad_norm": 2.445831775665283, "learning_rate": 0.0004999999018450032, "loss": 2.2801, "step": 306 }, { "epoch": 0.014990234375, "grad_norm": 1.2696073055267334, "learning_rate": 0.0004999998664001467, "loss": 2.2957, "step": 307 }, { "epoch": 0.0150390625, "grad_norm": 1.7918946743011475, "learning_rate": 0.0004999998255022377, "loss": 2.3043, "step": 308 }, { "epoch": 0.015087890625, "grad_norm": 1.7889060974121094, "learning_rate": 0.0004999997791512773, "loss": 2.3607, "step": 309 }, { "epoch": 0.01513671875, "grad_norm": 1.5789686441421509, "learning_rate": 0.0004999997273472664, "loss": 2.3477, "step": 310 }, { "epoch": 0.015185546875, "grad_norm": 1.6218647956848145, "learning_rate": 0.0004999996700902063, "loss": 2.2985, "step": 311 }, { "epoch": 0.015234375, "grad_norm": 1.723612904548645, "learning_rate": 0.0004999996073800985, "loss": 2.3273, "step": 312 }, { "epoch": 0.015283203125, "grad_norm": 1.4688342809677124, "learning_rate": 0.0004999995392169444, "loss": 2.2644, "step": 313 }, { "epoch": 0.01533203125, "grad_norm": 1.495717167854309, "learning_rate": 0.0004999994656007457, "loss": 2.2923, "step": 314 }, { "epoch": 0.015380859375, "grad_norm": 1.869575023651123, "learning_rate": 0.0004999993865315043, "loss": 2.3483, "step": 315 }, { "epoch": 0.0154296875, "grad_norm": 1.5056192874908447, "learning_rate": 0.0004999993020092219, "loss": 2.3176, "step": 316 }, { "epoch": 0.015478515625, "grad_norm": 2.00551700592041, "learning_rate": 0.0004999992120339005, "loss": 2.3243, "step": 317 }, { "epoch": 0.01552734375, "grad_norm": 1.61916983127594, "learning_rate": 0.0004999991166055426, "loss": 2.2417, "step": 318 }, { "epoch": 0.015576171875, "grad_norm": 1.3153414726257324, "learning_rate": 0.0004999990157241504, "loss": 2.2624, "step": 319 }, { "epoch": 0.015625, "grad_norm": 1.611999750137329, "learning_rate": 0.0004999989093897262, "loss": 2.294, "step": 320 }, { "epoch": 0.015673828125, "grad_norm": 1.4217911958694458, "learning_rate": 0.0004999987976022727, "loss": 2.3171, "step": 321 }, { "epoch": 0.01572265625, "grad_norm": 1.3953319787979126, "learning_rate": 0.0004999986803617926, "loss": 2.324, "step": 322 }, { "epoch": 0.015771484375, "grad_norm": 1.8939422369003296, "learning_rate": 0.0004999985576682887, "loss": 2.3038, "step": 323 }, { "epoch": 0.0158203125, "grad_norm": 1.28023099899292, "learning_rate": 0.0004999984295217641, "loss": 2.2884, "step": 324 }, { "epoch": 0.015869140625, "grad_norm": 1.5818454027175903, "learning_rate": 0.0004999982959222216, "loss": 2.2956, "step": 325 }, { "epoch": 0.01591796875, "grad_norm": 1.2206186056137085, "learning_rate": 0.0004999981568696648, "loss": 2.2968, "step": 326 }, { "epoch": 0.015966796875, "grad_norm": 1.510621428489685, "learning_rate": 0.0004999980123640967, "loss": 2.3189, "step": 327 }, { "epoch": 0.016015625, "grad_norm": 1.076915979385376, "learning_rate": 0.0004999978624055212, "loss": 2.2882, "step": 328 }, { "epoch": 0.016064453125, "grad_norm": 1.7010080814361572, "learning_rate": 0.0004999977069939417, "loss": 2.2958, "step": 329 }, { "epoch": 0.01611328125, "grad_norm": 1.320186972618103, "learning_rate": 0.0004999975461293621, "loss": 2.3169, "step": 330 }, { "epoch": 0.016162109375, "grad_norm": 1.2899388074874878, "learning_rate": 0.000499997379811786, "loss": 2.2783, "step": 331 }, { "epoch": 0.0162109375, "grad_norm": 1.4213272333145142, "learning_rate": 0.0004999972080412177, "loss": 2.3067, "step": 332 }, { "epoch": 0.016259765625, "grad_norm": 1.2603358030319214, "learning_rate": 0.0004999970308176614, "loss": 2.2989, "step": 333 }, { "epoch": 0.01630859375, "grad_norm": 1.2234835624694824, "learning_rate": 0.0004999968481411212, "loss": 2.2856, "step": 334 }, { "epoch": 0.016357421875, "grad_norm": 1.614462971687317, "learning_rate": 0.0004999966600116017, "loss": 2.3289, "step": 335 }, { "epoch": 0.01640625, "grad_norm": 1.5529836416244507, "learning_rate": 0.0004999964664291073, "loss": 2.2379, "step": 336 }, { "epoch": 0.016455078125, "grad_norm": 1.5822420120239258, "learning_rate": 0.0004999962673936429, "loss": 2.2658, "step": 337 }, { "epoch": 0.01650390625, "grad_norm": 1.2184113264083862, "learning_rate": 0.0004999960629052131, "loss": 2.329, "step": 338 }, { "epoch": 0.016552734375, "grad_norm": 1.615167498588562, "learning_rate": 0.0004999958529638231, "loss": 2.2995, "step": 339 }, { "epoch": 0.0166015625, "grad_norm": 1.2446022033691406, "learning_rate": 0.0004999956375694776, "loss": 2.2713, "step": 340 }, { "epoch": 0.016650390625, "grad_norm": 1.413400411605835, "learning_rate": 0.0004999954167221822, "loss": 2.25, "step": 341 }, { "epoch": 0.01669921875, "grad_norm": 1.3605417013168335, "learning_rate": 0.0004999951904219421, "loss": 2.2745, "step": 342 }, { "epoch": 0.016748046875, "grad_norm": 1.1544911861419678, "learning_rate": 0.0004999949586687628, "loss": 2.2467, "step": 343 }, { "epoch": 0.016796875, "grad_norm": 1.2432329654693604, "learning_rate": 0.0004999947214626501, "loss": 2.2545, "step": 344 }, { "epoch": 0.016845703125, "grad_norm": 1.7458783388137817, "learning_rate": 0.0004999944788036093, "loss": 2.2735, "step": 345 }, { "epoch": 0.01689453125, "grad_norm": 1.2078280448913574, "learning_rate": 0.0004999942306916466, "loss": 2.271, "step": 346 }, { "epoch": 0.016943359375, "grad_norm": 1.3425395488739014, "learning_rate": 0.0004999939771267681, "loss": 2.2787, "step": 347 }, { "epoch": 0.0169921875, "grad_norm": 1.1564786434173584, "learning_rate": 0.0004999937181089796, "loss": 2.2967, "step": 348 }, { "epoch": 0.017041015625, "grad_norm": 1.2536113262176514, "learning_rate": 0.0004999934536382876, "loss": 2.2497, "step": 349 }, { "epoch": 0.01708984375, "grad_norm": 1.0669496059417725, "learning_rate": 0.0004999931837146987, "loss": 2.2606, "step": 350 }, { "epoch": 0.017138671875, "grad_norm": 1.2707973718643188, "learning_rate": 0.0004999929083382191, "loss": 2.2735, "step": 351 }, { "epoch": 0.0171875, "grad_norm": 1.3377846479415894, "learning_rate": 0.0004999926275088556, "loss": 2.2428, "step": 352 }, { "epoch": 0.017236328125, "grad_norm": 1.0431816577911377, "learning_rate": 0.0004999923412266151, "loss": 2.2489, "step": 353 }, { "epoch": 0.01728515625, "grad_norm": 1.3960106372833252, "learning_rate": 0.0004999920494915043, "loss": 2.2484, "step": 354 }, { "epoch": 0.017333984375, "grad_norm": 1.2242456674575806, "learning_rate": 0.0004999917523035306, "loss": 2.2956, "step": 355 }, { "epoch": 0.0173828125, "grad_norm": 1.3061434030532837, "learning_rate": 0.0004999914496627009, "loss": 2.2803, "step": 356 }, { "epoch": 0.017431640625, "grad_norm": 1.1237796545028687, "learning_rate": 0.0004999911415690228, "loss": 2.2534, "step": 357 }, { "epoch": 0.01748046875, "grad_norm": 1.4995311498641968, "learning_rate": 0.0004999908280225035, "loss": 2.3072, "step": 358 }, { "epoch": 0.017529296875, "grad_norm": 1.0745128393173218, "learning_rate": 0.0004999905090231508, "loss": 2.2145, "step": 359 }, { "epoch": 0.017578125, "grad_norm": 1.1401152610778809, "learning_rate": 0.0004999901845709722, "loss": 2.2632, "step": 360 }, { "epoch": 0.017626953125, "grad_norm": 1.2056151628494263, "learning_rate": 0.000499989854665976, "loss": 2.2474, "step": 361 }, { "epoch": 0.01767578125, "grad_norm": 1.3121532201766968, "learning_rate": 0.0004999895193081698, "loss": 2.2298, "step": 362 }, { "epoch": 0.017724609375, "grad_norm": 1.3396550416946411, "learning_rate": 0.0004999891784975616, "loss": 2.2918, "step": 363 }, { "epoch": 0.0177734375, "grad_norm": 0.8788726925849915, "learning_rate": 0.0004999888322341602, "loss": 2.2032, "step": 364 }, { "epoch": 0.017822265625, "grad_norm": 1.1981935501098633, "learning_rate": 0.0004999884805179735, "loss": 2.2357, "step": 365 }, { "epoch": 0.01787109375, "grad_norm": 1.2958950996398926, "learning_rate": 0.0004999881233490104, "loss": 2.268, "step": 366 }, { "epoch": 0.017919921875, "grad_norm": 1.0939016342163086, "learning_rate": 0.0004999877607272793, "loss": 2.2046, "step": 367 }, { "epoch": 0.01796875, "grad_norm": 1.0739270448684692, "learning_rate": 0.0004999873926527891, "loss": 2.1923, "step": 368 }, { "epoch": 0.018017578125, "grad_norm": 1.1360911130905151, "learning_rate": 0.0004999870191255487, "loss": 2.2403, "step": 369 }, { "epoch": 0.01806640625, "grad_norm": 1.2470375299453735, "learning_rate": 0.0004999866401455671, "loss": 2.287, "step": 370 }, { "epoch": 0.018115234375, "grad_norm": 1.071744680404663, "learning_rate": 0.0004999862557128535, "loss": 2.2666, "step": 371 }, { "epoch": 0.0181640625, "grad_norm": 1.0374903678894043, "learning_rate": 0.0004999858658274172, "loss": 2.2612, "step": 372 }, { "epoch": 0.018212890625, "grad_norm": 1.1167505979537964, "learning_rate": 0.0004999854704892678, "loss": 2.2567, "step": 373 }, { "epoch": 0.01826171875, "grad_norm": 1.101155161857605, "learning_rate": 0.0004999850696984147, "loss": 2.2757, "step": 374 }, { "epoch": 0.018310546875, "grad_norm": 1.149690866470337, "learning_rate": 0.0004999846634548677, "loss": 2.2963, "step": 375 }, { "epoch": 0.018359375, "grad_norm": 1.0479639768600464, "learning_rate": 0.0004999842517586367, "loss": 2.2626, "step": 376 }, { "epoch": 0.018408203125, "grad_norm": 1.0391684770584106, "learning_rate": 0.0004999838346097314, "loss": 2.2158, "step": 377 }, { "epoch": 0.01845703125, "grad_norm": 1.175155520439148, "learning_rate": 0.0004999834120081624, "loss": 2.2594, "step": 378 }, { "epoch": 0.018505859375, "grad_norm": 0.9731603264808655, "learning_rate": 0.0004999829839539395, "loss": 2.2192, "step": 379 }, { "epoch": 0.0185546875, "grad_norm": 1.1802496910095215, "learning_rate": 0.0004999825504470732, "loss": 2.2302, "step": 380 }, { "epoch": 0.018603515625, "grad_norm": 1.005942702293396, "learning_rate": 0.0004999821114875741, "loss": 2.2489, "step": 381 }, { "epoch": 0.01865234375, "grad_norm": 1.0946671962738037, "learning_rate": 0.0004999816670754527, "loss": 2.2637, "step": 382 }, { "epoch": 0.018701171875, "grad_norm": 0.999866247177124, "learning_rate": 0.0004999812172107199, "loss": 2.234, "step": 383 }, { "epoch": 0.01875, "grad_norm": 1.1667693853378296, "learning_rate": 0.0004999807618933866, "loss": 2.2686, "step": 384 }, { "epoch": 0.018798828125, "grad_norm": 1.0367521047592163, "learning_rate": 0.0004999803011234639, "loss": 2.216, "step": 385 }, { "epoch": 0.01884765625, "grad_norm": 1.026023268699646, "learning_rate": 0.0004999798349009626, "loss": 2.2526, "step": 386 }, { "epoch": 0.018896484375, "grad_norm": 1.0277860164642334, "learning_rate": 0.0004999793632258945, "loss": 2.2188, "step": 387 }, { "epoch": 0.0189453125, "grad_norm": 0.9801796674728394, "learning_rate": 0.0004999788860982706, "loss": 2.2198, "step": 388 }, { "epoch": 0.018994140625, "grad_norm": 1.1303502321243286, "learning_rate": 0.0004999784035181027, "loss": 2.2386, "step": 389 }, { "epoch": 0.01904296875, "grad_norm": 0.9130602478981018, "learning_rate": 0.0004999779154854024, "loss": 2.2418, "step": 390 }, { "epoch": 0.019091796875, "grad_norm": 0.8087050318717957, "learning_rate": 0.0004999774220001817, "loss": 2.2599, "step": 391 }, { "epoch": 0.019140625, "grad_norm": 0.8423206210136414, "learning_rate": 0.0004999769230624524, "loss": 2.2143, "step": 392 }, { "epoch": 0.019189453125, "grad_norm": 0.9421727061271667, "learning_rate": 0.0004999764186722265, "loss": 2.2191, "step": 393 }, { "epoch": 0.01923828125, "grad_norm": 0.8976558446884155, "learning_rate": 0.0004999759088295165, "loss": 2.2544, "step": 394 }, { "epoch": 0.019287109375, "grad_norm": 0.8205500841140747, "learning_rate": 0.0004999753935343345, "loss": 2.185, "step": 395 }, { "epoch": 0.0193359375, "grad_norm": 0.815346896648407, "learning_rate": 0.0004999748727866932, "loss": 2.2463, "step": 396 }, { "epoch": 0.019384765625, "grad_norm": 0.9144156575202942, "learning_rate": 0.000499974346586605, "loss": 2.2429, "step": 397 }, { "epoch": 0.01943359375, "grad_norm": 0.9285619258880615, "learning_rate": 0.0004999738149340828, "loss": 2.2524, "step": 398 }, { "epoch": 0.019482421875, "grad_norm": 0.9746325016021729, "learning_rate": 0.0004999732778291395, "loss": 2.2352, "step": 399 }, { "epoch": 0.01953125, "grad_norm": 1.186003565788269, "learning_rate": 0.000499972735271788, "loss": 2.2285, "step": 400 }, { "epoch": 0.019580078125, "grad_norm": 1.022895097732544, "learning_rate": 0.0004999721872620416, "loss": 2.2275, "step": 401 }, { "epoch": 0.01962890625, "grad_norm": 0.9313697814941406, "learning_rate": 0.0004999716337999135, "loss": 2.2152, "step": 402 }, { "epoch": 0.019677734375, "grad_norm": 0.7810239195823669, "learning_rate": 0.0004999710748854171, "loss": 2.2142, "step": 403 }, { "epoch": 0.0197265625, "grad_norm": 0.7980000376701355, "learning_rate": 0.0004999705105185659, "loss": 2.207, "step": 404 }, { "epoch": 0.019775390625, "grad_norm": 0.8942157030105591, "learning_rate": 0.0004999699406993736, "loss": 2.1903, "step": 405 }, { "epoch": 0.01982421875, "grad_norm": 0.8957229256629944, "learning_rate": 0.0004999693654278542, "loss": 2.2443, "step": 406 }, { "epoch": 0.019873046875, "grad_norm": 0.9681040048599243, "learning_rate": 0.0004999687847040213, "loss": 2.1944, "step": 407 }, { "epoch": 0.019921875, "grad_norm": 0.8693997859954834, "learning_rate": 0.0004999681985278894, "loss": 2.1993, "step": 408 }, { "epoch": 0.019970703125, "grad_norm": 0.605243980884552, "learning_rate": 0.0004999676068994723, "loss": 2.2156, "step": 409 }, { "epoch": 0.02001953125, "grad_norm": 0.6179031729698181, "learning_rate": 0.0004999670098187846, "loss": 2.1941, "step": 410 }, { "epoch": 0.020068359375, "grad_norm": 0.6937376260757446, "learning_rate": 0.0004999664072858406, "loss": 2.1825, "step": 411 }, { "epoch": 0.0201171875, "grad_norm": 0.6646479368209839, "learning_rate": 0.0004999657993006551, "loss": 2.2504, "step": 412 }, { "epoch": 0.020166015625, "grad_norm": 0.6952129006385803, "learning_rate": 0.0004999651858632425, "loss": 2.1959, "step": 413 }, { "epoch": 0.02021484375, "grad_norm": 0.7860904335975647, "learning_rate": 0.0004999645669736181, "loss": 2.1977, "step": 414 }, { "epoch": 0.020263671875, "grad_norm": 0.7402817010879517, "learning_rate": 0.0004999639426317966, "loss": 2.2039, "step": 415 }, { "epoch": 0.0203125, "grad_norm": 0.7312660813331604, "learning_rate": 0.0004999633128377932, "loss": 2.1754, "step": 416 }, { "epoch": 0.020361328125, "grad_norm": 0.7617877721786499, "learning_rate": 0.0004999626775916233, "loss": 2.1794, "step": 417 }, { "epoch": 0.02041015625, "grad_norm": 0.7947528958320618, "learning_rate": 0.000499962036893302, "loss": 2.1924, "step": 418 }, { "epoch": 0.020458984375, "grad_norm": 0.8301486968994141, "learning_rate": 0.0004999613907428451, "loss": 2.1735, "step": 419 }, { "epoch": 0.0205078125, "grad_norm": 0.7679468989372253, "learning_rate": 0.0004999607391402681, "loss": 2.2662, "step": 420 }, { "epoch": 0.020556640625, "grad_norm": 0.7471601366996765, "learning_rate": 0.000499960082085587, "loss": 2.1819, "step": 421 }, { "epoch": 0.02060546875, "grad_norm": 0.7870673537254333, "learning_rate": 0.0004999594195788175, "loss": 2.1662, "step": 422 }, { "epoch": 0.020654296875, "grad_norm": 0.9833211898803711, "learning_rate": 0.0004999587516199757, "loss": 2.238, "step": 423 }, { "epoch": 0.020703125, "grad_norm": 1.162604570388794, "learning_rate": 0.0004999580782090778, "loss": 2.2117, "step": 424 }, { "epoch": 0.020751953125, "grad_norm": 0.8635139465332031, "learning_rate": 0.0004999573993461402, "loss": 2.2199, "step": 425 }, { "epoch": 0.02080078125, "grad_norm": 0.7970139384269714, "learning_rate": 0.0004999567150311793, "loss": 2.1553, "step": 426 }, { "epoch": 0.020849609375, "grad_norm": 0.8810883164405823, "learning_rate": 0.0004999560252642116, "loss": 2.2069, "step": 427 }, { "epoch": 0.0208984375, "grad_norm": 0.9708061218261719, "learning_rate": 0.0004999553300452541, "loss": 2.2211, "step": 428 }, { "epoch": 0.020947265625, "grad_norm": 1.0271644592285156, "learning_rate": 0.0004999546293743233, "loss": 2.2538, "step": 429 }, { "epoch": 0.02099609375, "grad_norm": 0.9244514107704163, "learning_rate": 0.0004999539232514363, "loss": 2.2164, "step": 430 }, { "epoch": 0.021044921875, "grad_norm": 0.9049526453018188, "learning_rate": 0.0004999532116766102, "loss": 2.1859, "step": 431 }, { "epoch": 0.02109375, "grad_norm": 0.9149224758148193, "learning_rate": 0.0004999524946498624, "loss": 2.2179, "step": 432 }, { "epoch": 0.021142578125, "grad_norm": 1.0929995775222778, "learning_rate": 0.0004999517721712102, "loss": 2.1691, "step": 433 }, { "epoch": 0.02119140625, "grad_norm": 0.8563957214355469, "learning_rate": 0.000499951044240671, "loss": 2.1543, "step": 434 }, { "epoch": 0.021240234375, "grad_norm": 0.6833221912384033, "learning_rate": 0.0004999503108582626, "loss": 2.1851, "step": 435 }, { "epoch": 0.0212890625, "grad_norm": 0.6179527044296265, "learning_rate": 0.0004999495720240027, "loss": 2.2119, "step": 436 }, { "epoch": 0.021337890625, "grad_norm": 0.6697141528129578, "learning_rate": 0.0004999488277379091, "loss": 2.178, "step": 437 }, { "epoch": 0.02138671875, "grad_norm": 0.7857320308685303, "learning_rate": 0.000499948078, "loss": 2.2023, "step": 438 }, { "epoch": 0.021435546875, "grad_norm": 0.8651211261749268, "learning_rate": 0.0004999473228102934, "loss": 2.1944, "step": 439 }, { "epoch": 0.021484375, "grad_norm": 0.8215554356575012, "learning_rate": 0.0004999465621688078, "loss": 2.1841, "step": 440 }, { "epoch": 0.021533203125, "grad_norm": 0.8712490200996399, "learning_rate": 0.0004999457960755615, "loss": 2.2254, "step": 441 }, { "epoch": 0.02158203125, "grad_norm": 0.8187093138694763, "learning_rate": 0.0004999450245305732, "loss": 2.1938, "step": 442 }, { "epoch": 0.021630859375, "grad_norm": 0.6408248543739319, "learning_rate": 0.0004999442475338615, "loss": 2.1184, "step": 443 }, { "epoch": 0.0216796875, "grad_norm": 0.6392512917518616, "learning_rate": 0.0004999434650854452, "loss": 2.1873, "step": 444 }, { "epoch": 0.021728515625, "grad_norm": 0.7408367395401001, "learning_rate": 0.0004999426771853432, "loss": 2.1936, "step": 445 }, { "epoch": 0.02177734375, "grad_norm": 0.8407337665557861, "learning_rate": 0.000499941883833575, "loss": 2.2237, "step": 446 }, { "epoch": 0.021826171875, "grad_norm": 0.7074450254440308, "learning_rate": 0.0004999410850301592, "loss": 2.2101, "step": 447 }, { "epoch": 0.021875, "grad_norm": 0.7326095700263977, "learning_rate": 0.0004999402807751157, "loss": 2.192, "step": 448 }, { "epoch": 0.021923828125, "grad_norm": 0.868790864944458, "learning_rate": 0.0004999394710684637, "loss": 2.2082, "step": 449 }, { "epoch": 0.02197265625, "grad_norm": 0.8310099840164185, "learning_rate": 0.000499938655910223, "loss": 2.2207, "step": 450 }, { "epoch": 0.022021484375, "grad_norm": 0.7394064664840698, "learning_rate": 0.0004999378353004131, "loss": 2.1902, "step": 451 }, { "epoch": 0.0220703125, "grad_norm": 0.7682573199272156, "learning_rate": 0.0004999370092390541, "loss": 2.1845, "step": 452 }, { "epoch": 0.022119140625, "grad_norm": 0.5243517756462097, "learning_rate": 0.0004999361777261659, "loss": 2.2298, "step": 453 }, { "epoch": 0.02216796875, "grad_norm": 0.542306125164032, "learning_rate": 0.0004999353407617689, "loss": 2.2155, "step": 454 }, { "epoch": 0.022216796875, "grad_norm": 0.5445544719696045, "learning_rate": 0.0004999344983458831, "loss": 2.1741, "step": 455 }, { "epoch": 0.022265625, "grad_norm": 0.5082871913909912, "learning_rate": 0.000499933650478529, "loss": 2.1946, "step": 456 }, { "epoch": 0.022314453125, "grad_norm": 0.5569180846214294, "learning_rate": 0.0004999327971597272, "loss": 2.1718, "step": 457 }, { "epoch": 0.02236328125, "grad_norm": 0.6935536861419678, "learning_rate": 0.0004999319383894985, "loss": 2.1658, "step": 458 }, { "epoch": 0.022412109375, "grad_norm": 0.9169337749481201, "learning_rate": 0.0004999310741678635, "loss": 2.2267, "step": 459 }, { "epoch": 0.0224609375, "grad_norm": 1.185689926147461, "learning_rate": 0.000499930204494843, "loss": 2.2367, "step": 460 }, { "epoch": 0.022509765625, "grad_norm": 0.6849246025085449, "learning_rate": 0.0004999293293704584, "loss": 2.1848, "step": 461 }, { "epoch": 0.02255859375, "grad_norm": 0.7429744005203247, "learning_rate": 0.0004999284487947308, "loss": 2.1931, "step": 462 }, { "epoch": 0.022607421875, "grad_norm": 0.7490849494934082, "learning_rate": 0.0004999275627676816, "loss": 2.1668, "step": 463 }, { "epoch": 0.02265625, "grad_norm": 0.7807202935218811, "learning_rate": 0.0004999266712893322, "loss": 2.1802, "step": 464 }, { "epoch": 0.022705078125, "grad_norm": 0.7953987121582031, "learning_rate": 0.0004999257743597042, "loss": 2.1969, "step": 465 }, { "epoch": 0.02275390625, "grad_norm": 0.6700525283813477, "learning_rate": 0.0004999248719788193, "loss": 2.1749, "step": 466 }, { "epoch": 0.022802734375, "grad_norm": 0.59281325340271, "learning_rate": 0.0004999239641466995, "loss": 2.1663, "step": 467 }, { "epoch": 0.0228515625, "grad_norm": 0.5844286680221558, "learning_rate": 0.0004999230508633667, "loss": 2.1705, "step": 468 }, { "epoch": 0.022900390625, "grad_norm": 0.5928427577018738, "learning_rate": 0.0004999221321288431, "loss": 2.1821, "step": 469 }, { "epoch": 0.02294921875, "grad_norm": 0.7102261185646057, "learning_rate": 0.0004999212079431507, "loss": 2.1532, "step": 470 }, { "epoch": 0.022998046875, "grad_norm": 0.7195190787315369, "learning_rate": 0.0004999202783063124, "loss": 2.2063, "step": 471 }, { "epoch": 0.023046875, "grad_norm": 0.8541423082351685, "learning_rate": 0.0004999193432183504, "loss": 2.2079, "step": 472 }, { "epoch": 0.023095703125, "grad_norm": 0.7826182246208191, "learning_rate": 0.0004999184026792874, "loss": 2.22, "step": 473 }, { "epoch": 0.02314453125, "grad_norm": 0.821683943271637, "learning_rate": 0.0004999174566891461, "loss": 2.1681, "step": 474 }, { "epoch": 0.023193359375, "grad_norm": 0.9101322889328003, "learning_rate": 0.0004999165052479497, "loss": 2.2059, "step": 475 }, { "epoch": 0.0232421875, "grad_norm": 0.7827659249305725, "learning_rate": 0.000499915548355721, "loss": 2.1815, "step": 476 }, { "epoch": 0.023291015625, "grad_norm": 0.7013096809387207, "learning_rate": 0.0004999145860124834, "loss": 2.2089, "step": 477 }, { "epoch": 0.02333984375, "grad_norm": 0.6963813304901123, "learning_rate": 0.0004999136182182601, "loss": 2.1794, "step": 478 }, { "epoch": 0.023388671875, "grad_norm": 0.7144843339920044, "learning_rate": 0.0004999126449730744, "loss": 2.1221, "step": 479 }, { "epoch": 0.0234375, "grad_norm": 0.6296992301940918, "learning_rate": 0.0004999116662769502, "loss": 2.2053, "step": 480 }, { "epoch": 0.023486328125, "grad_norm": 0.6555426120758057, "learning_rate": 0.000499910682129911, "loss": 2.1826, "step": 481 }, { "epoch": 0.02353515625, "grad_norm": 0.6640546321868896, "learning_rate": 0.0004999096925319808, "loss": 2.1556, "step": 482 }, { "epoch": 0.023583984375, "grad_norm": 0.7139080762863159, "learning_rate": 0.0004999086974831835, "loss": 2.1654, "step": 483 }, { "epoch": 0.0236328125, "grad_norm": 0.7653385400772095, "learning_rate": 0.0004999076969835432, "loss": 2.17, "step": 484 }, { "epoch": 0.023681640625, "grad_norm": 0.7682697772979736, "learning_rate": 0.0004999066910330842, "loss": 2.1361, "step": 485 }, { "epoch": 0.02373046875, "grad_norm": 0.7247856855392456, "learning_rate": 0.0004999056796318308, "loss": 2.1672, "step": 486 }, { "epoch": 0.023779296875, "grad_norm": 1.059450387954712, "learning_rate": 0.0004999046627798076, "loss": 2.1772, "step": 487 }, { "epoch": 0.023828125, "grad_norm": 5.17608642578125, "learning_rate": 0.0004999036404770391, "loss": 2.2075, "step": 488 }, { "epoch": 0.023876953125, "grad_norm": 1.4213244915008545, "learning_rate": 0.0004999026127235504, "loss": 2.1887, "step": 489 }, { "epoch": 0.02392578125, "grad_norm": 2.6587772369384766, "learning_rate": 0.0004999015795193661, "loss": 2.2228, "step": 490 }, { "epoch": 0.023974609375, "grad_norm": 1.7856782674789429, "learning_rate": 0.0004999005408645113, "loss": 2.2534, "step": 491 }, { "epoch": 0.0240234375, "grad_norm": 1.5338081121444702, "learning_rate": 0.0004998994967590113, "loss": 2.224, "step": 492 }, { "epoch": 0.024072265625, "grad_norm": 1.110991358757019, "learning_rate": 0.0004998984472028913, "loss": 2.2364, "step": 493 }, { "epoch": 0.02412109375, "grad_norm": 1.0256321430206299, "learning_rate": 0.0004998973921961767, "loss": 2.2, "step": 494 }, { "epoch": 0.024169921875, "grad_norm": 1.2006289958953857, "learning_rate": 0.0004998963317388931, "loss": 2.2322, "step": 495 }, { "epoch": 0.02421875, "grad_norm": 1.4051496982574463, "learning_rate": 0.0004998952658310662, "loss": 2.2485, "step": 496 }, { "epoch": 0.024267578125, "grad_norm": 0.9794636368751526, "learning_rate": 0.0004998941944727219, "loss": 2.1882, "step": 497 }, { "epoch": 0.02431640625, "grad_norm": 1.352663278579712, "learning_rate": 0.0004998931176638861, "loss": 2.2161, "step": 498 }, { "epoch": 0.024365234375, "grad_norm": 0.9019444584846497, "learning_rate": 0.0004998920354045849, "loss": 2.2157, "step": 499 }, { "epoch": 0.0244140625, "grad_norm": 1.0142747163772583, "learning_rate": 0.0004998909476948446, "loss": 2.235, "step": 500 }, { "epoch": 0.024462890625, "grad_norm": 1.1321306228637695, "learning_rate": 0.0004998898545346915, "loss": 2.1769, "step": 501 }, { "epoch": 0.02451171875, "grad_norm": 0.7704365253448486, "learning_rate": 0.0004998887559241521, "loss": 2.1513, "step": 502 }, { "epoch": 0.024560546875, "grad_norm": 0.8318931460380554, "learning_rate": 0.000499887651863253, "loss": 2.1879, "step": 503 }, { "epoch": 0.024609375, "grad_norm": 0.9760987162590027, "learning_rate": 0.000499886542352021, "loss": 2.2086, "step": 504 }, { "epoch": 0.024658203125, "grad_norm": 1.2452162504196167, "learning_rate": 0.000499885427390483, "loss": 2.1908, "step": 505 }, { "epoch": 0.02470703125, "grad_norm": 0.7341023087501526, "learning_rate": 0.0004998843069786659, "loss": 2.15, "step": 506 }, { "epoch": 0.024755859375, "grad_norm": 0.7947841286659241, "learning_rate": 0.0004998831811165971, "loss": 2.1767, "step": 507 }, { "epoch": 0.0248046875, "grad_norm": 0.8008973598480225, "learning_rate": 0.0004998820498043036, "loss": 2.1801, "step": 508 }, { "epoch": 0.024853515625, "grad_norm": 0.8577893376350403, "learning_rate": 0.000499880913041813, "loss": 2.16, "step": 509 }, { "epoch": 0.02490234375, "grad_norm": 0.8703241348266602, "learning_rate": 0.0004998797708291528, "loss": 2.1886, "step": 510 }, { "epoch": 0.024951171875, "grad_norm": 0.7927240133285522, "learning_rate": 0.0004998786231663507, "loss": 2.1857, "step": 511 }, { "epoch": 0.025, "grad_norm": 0.741110622882843, "learning_rate": 0.0004998774700534346, "loss": 2.2001, "step": 512 }, { "epoch": 0.025048828125, "grad_norm": 0.6619424223899841, "learning_rate": 0.0004998763114904322, "loss": 2.1736, "step": 513 }, { "epoch": 0.02509765625, "grad_norm": 0.6026046872138977, "learning_rate": 0.0004998751474773718, "loss": 2.1487, "step": 514 }, { "epoch": 0.025146484375, "grad_norm": 0.5630130171775818, "learning_rate": 0.0004998739780142815, "loss": 2.1355, "step": 515 }, { "epoch": 0.0251953125, "grad_norm": 0.5647515654563904, "learning_rate": 0.0004998728031011897, "loss": 2.1403, "step": 516 }, { "epoch": 0.025244140625, "grad_norm": 0.46090036630630493, "learning_rate": 0.0004998716227381248, "loss": 2.1736, "step": 517 }, { "epoch": 0.02529296875, "grad_norm": 0.4320344030857086, "learning_rate": 0.0004998704369251156, "loss": 2.1598, "step": 518 }, { "epoch": 0.025341796875, "grad_norm": 0.44122588634490967, "learning_rate": 0.0004998692456621906, "loss": 2.2098, "step": 519 }, { "epoch": 0.025390625, "grad_norm": 1.2436511516571045, "learning_rate": 0.0004998680489493788, "loss": 2.1512, "step": 520 }, { "epoch": 0.025439453125, "grad_norm": 1.054885983467102, "learning_rate": 0.0004998668467867092, "loss": 2.1146, "step": 521 }, { "epoch": 0.02548828125, "grad_norm": 0.9389135241508484, "learning_rate": 0.0004998656391742108, "loss": 2.14, "step": 522 }, { "epoch": 0.025537109375, "grad_norm": 0.8237196803092957, "learning_rate": 0.000499864426111913, "loss": 2.1748, "step": 523 }, { "epoch": 0.0255859375, "grad_norm": 1.1491492986679077, "learning_rate": 0.0004998632075998453, "loss": 2.1802, "step": 524 }, { "epoch": 0.025634765625, "grad_norm": 0.9390500783920288, "learning_rate": 0.000499861983638037, "loss": 2.2112, "step": 525 }, { "epoch": 0.02568359375, "grad_norm": 0.795257031917572, "learning_rate": 0.000499860754226518, "loss": 2.1589, "step": 526 }, { "epoch": 0.025732421875, "grad_norm": 0.6734482049942017, "learning_rate": 0.0004998595193653179, "loss": 2.1446, "step": 527 }, { "epoch": 0.02578125, "grad_norm": 0.7253485918045044, "learning_rate": 0.0004998582790544667, "loss": 2.1958, "step": 528 }, { "epoch": 0.025830078125, "grad_norm": 0.7715002298355103, "learning_rate": 0.0004998570332939944, "loss": 2.1932, "step": 529 }, { "epoch": 0.02587890625, "grad_norm": 0.6904875040054321, "learning_rate": 0.0004998557820839313, "loss": 2.196, "step": 530 }, { "epoch": 0.025927734375, "grad_norm": 0.6510008573532104, "learning_rate": 0.0004998545254243077, "loss": 2.1874, "step": 531 }, { "epoch": 0.0259765625, "grad_norm": 0.6343126893043518, "learning_rate": 0.0004998532633151539, "loss": 2.1516, "step": 532 }, { "epoch": 0.026025390625, "grad_norm": 0.6341608166694641, "learning_rate": 0.0004998519957565009, "loss": 2.1416, "step": 533 }, { "epoch": 0.02607421875, "grad_norm": 0.5568147301673889, "learning_rate": 0.0004998507227483789, "loss": 2.1325, "step": 534 }, { "epoch": 0.026123046875, "grad_norm": 0.536456823348999, "learning_rate": 0.0004998494442908191, "loss": 2.1425, "step": 535 }, { "epoch": 0.026171875, "grad_norm": 0.604306697845459, "learning_rate": 0.0004998481603838523, "loss": 2.1545, "step": 536 }, { "epoch": 0.026220703125, "grad_norm": 0.6171956658363342, "learning_rate": 0.0004998468710275096, "loss": 2.1469, "step": 537 }, { "epoch": 0.02626953125, "grad_norm": 0.5373830795288086, "learning_rate": 0.0004998455762218225, "loss": 2.2194, "step": 538 }, { "epoch": 0.026318359375, "grad_norm": 0.521872878074646, "learning_rate": 0.0004998442759668221, "loss": 2.1245, "step": 539 }, { "epoch": 0.0263671875, "grad_norm": 0.6283167004585266, "learning_rate": 0.0004998429702625401, "loss": 2.1496, "step": 540 }, { "epoch": 0.026416015625, "grad_norm": 0.6790646314620972, "learning_rate": 0.0004998416591090083, "loss": 2.1585, "step": 541 }, { "epoch": 0.02646484375, "grad_norm": 0.6184839606285095, "learning_rate": 0.0004998403425062579, "loss": 2.148, "step": 542 }, { "epoch": 0.026513671875, "grad_norm": 0.6626601815223694, "learning_rate": 0.0004998390204543214, "loss": 2.1305, "step": 543 }, { "epoch": 0.0265625, "grad_norm": 0.5943914651870728, "learning_rate": 0.0004998376929532305, "loss": 2.1607, "step": 544 }, { "epoch": 0.026611328125, "grad_norm": 0.5671437978744507, "learning_rate": 0.0004998363600030174, "loss": 2.1545, "step": 545 }, { "epoch": 0.02666015625, "grad_norm": 0.6315659880638123, "learning_rate": 0.0004998350216037146, "loss": 2.1227, "step": 546 }, { "epoch": 0.026708984375, "grad_norm": 0.6340959072113037, "learning_rate": 0.0004998336777553544, "loss": 2.137, "step": 547 }, { "epoch": 0.0267578125, "grad_norm": 0.5902490019798279, "learning_rate": 0.0004998323284579694, "loss": 2.1643, "step": 548 }, { "epoch": 0.026806640625, "grad_norm": 0.6199833750724792, "learning_rate": 0.0004998309737115924, "loss": 2.1724, "step": 549 }, { "epoch": 0.02685546875, "grad_norm": 0.678092896938324, "learning_rate": 0.000499829613516256, "loss": 2.1388, "step": 550 }, { "epoch": 0.026904296875, "grad_norm": 0.6248672604560852, "learning_rate": 0.0004998282478719932, "loss": 2.1816, "step": 551 }, { "epoch": 0.026953125, "grad_norm": 0.6065245866775513, "learning_rate": 0.0004998268767788373, "loss": 2.1237, "step": 552 }, { "epoch": 0.027001953125, "grad_norm": 0.6169759035110474, "learning_rate": 0.0004998255002368214, "loss": 2.1863, "step": 553 }, { "epoch": 0.02705078125, "grad_norm": 0.607194721698761, "learning_rate": 0.0004998241182459789, "loss": 2.1162, "step": 554 }, { "epoch": 0.027099609375, "grad_norm": 0.6127328276634216, "learning_rate": 0.0004998227308063433, "loss": 2.148, "step": 555 }, { "epoch": 0.0271484375, "grad_norm": 0.60355144739151, "learning_rate": 0.0004998213379179481, "loss": 2.1467, "step": 556 }, { "epoch": 0.027197265625, "grad_norm": 0.6133103966712952, "learning_rate": 0.0004998199395808272, "loss": 2.1368, "step": 557 }, { "epoch": 0.02724609375, "grad_norm": 0.5685338973999023, "learning_rate": 0.0004998185357950144, "loss": 2.1233, "step": 558 }, { "epoch": 0.027294921875, "grad_norm": 0.6033803820610046, "learning_rate": 0.0004998171265605439, "loss": 2.1762, "step": 559 }, { "epoch": 0.02734375, "grad_norm": 0.6573621034622192, "learning_rate": 0.0004998157118774496, "loss": 2.177, "step": 560 }, { "epoch": 0.027392578125, "grad_norm": 0.5595572590827942, "learning_rate": 0.000499814291745766, "loss": 2.1269, "step": 561 }, { "epoch": 0.02744140625, "grad_norm": 0.5516347289085388, "learning_rate": 0.0004998128661655274, "loss": 2.1788, "step": 562 }, { "epoch": 0.027490234375, "grad_norm": 0.6302109956741333, "learning_rate": 0.0004998114351367683, "loss": 2.1747, "step": 563 }, { "epoch": 0.0275390625, "grad_norm": 0.5346150994300842, "learning_rate": 0.0004998099986595235, "loss": 2.1346, "step": 564 }, { "epoch": 0.027587890625, "grad_norm": 0.4540080428123474, "learning_rate": 0.0004998085567338278, "loss": 2.1391, "step": 565 }, { "epoch": 0.02763671875, "grad_norm": 0.44882309436798096, "learning_rate": 0.0004998071093597162, "loss": 2.1857, "step": 566 }, { "epoch": 0.027685546875, "grad_norm": 0.6275193095207214, "learning_rate": 0.0004998056565372235, "loss": 2.1002, "step": 567 }, { "epoch": 0.027734375, "grad_norm": 0.5095242857933044, "learning_rate": 0.0004998041982663851, "loss": 2.1379, "step": 568 }, { "epoch": 0.027783203125, "grad_norm": 0.49436119198799133, "learning_rate": 0.0004998027345472365, "loss": 2.1119, "step": 569 }, { "epoch": 0.02783203125, "grad_norm": 0.5432771444320679, "learning_rate": 0.000499801265379813, "loss": 2.1427, "step": 570 }, { "epoch": 0.027880859375, "grad_norm": 0.5685526728630066, "learning_rate": 0.0004997997907641502, "loss": 2.1529, "step": 571 }, { "epoch": 0.0279296875, "grad_norm": 0.5831946134567261, "learning_rate": 0.0004997983107002838, "loss": 2.1305, "step": 572 }, { "epoch": 0.027978515625, "grad_norm": 0.6116488575935364, "learning_rate": 0.0004997968251882499, "loss": 2.185, "step": 573 }, { "epoch": 0.02802734375, "grad_norm": 0.6442887783050537, "learning_rate": 0.0004997953342280843, "loss": 2.1935, "step": 574 }, { "epoch": 0.028076171875, "grad_norm": 0.6136687994003296, "learning_rate": 0.0004997938378198231, "loss": 2.1608, "step": 575 }, { "epoch": 0.028125, "grad_norm": 0.8197783827781677, "learning_rate": 0.0004997923359635027, "loss": 2.1483, "step": 576 }, { "epoch": 0.028173828125, "grad_norm": 0.9120134115219116, "learning_rate": 0.0004997908286591595, "loss": 2.1672, "step": 577 }, { "epoch": 0.02822265625, "grad_norm": 0.7953088879585266, "learning_rate": 0.0004997893159068297, "loss": 2.1587, "step": 578 }, { "epoch": 0.028271484375, "grad_norm": 0.9028617143630981, "learning_rate": 0.0004997877977065506, "loss": 2.1257, "step": 579 }, { "epoch": 0.0283203125, "grad_norm": 0.8027509450912476, "learning_rate": 0.0004997862740583584, "loss": 2.1106, "step": 580 }, { "epoch": 0.028369140625, "grad_norm": 0.572903037071228, "learning_rate": 0.0004997847449622903, "loss": 2.1168, "step": 581 }, { "epoch": 0.02841796875, "grad_norm": 0.7061600685119629, "learning_rate": 0.0004997832104183833, "loss": 2.1301, "step": 582 }, { "epoch": 0.028466796875, "grad_norm": 0.5434768795967102, "learning_rate": 0.0004997816704266747, "loss": 2.1458, "step": 583 }, { "epoch": 0.028515625, "grad_norm": 0.5579871535301208, "learning_rate": 0.0004997801249872016, "loss": 2.1153, "step": 584 }, { "epoch": 0.028564453125, "grad_norm": 0.6179103851318359, "learning_rate": 0.0004997785741000016, "loss": 2.1681, "step": 585 }, { "epoch": 0.02861328125, "grad_norm": 0.5713271498680115, "learning_rate": 0.0004997770177651123, "loss": 2.1398, "step": 586 }, { "epoch": 0.028662109375, "grad_norm": 0.5448514819145203, "learning_rate": 0.0004997754559825714, "loss": 2.1457, "step": 587 }, { "epoch": 0.0287109375, "grad_norm": 0.577400267124176, "learning_rate": 0.0004997738887524168, "loss": 2.1639, "step": 588 }, { "epoch": 0.028759765625, "grad_norm": 0.6272196769714355, "learning_rate": 0.0004997723160746863, "loss": 2.1383, "step": 589 }, { "epoch": 0.02880859375, "grad_norm": 0.6573303937911987, "learning_rate": 0.0004997707379494183, "loss": 2.1586, "step": 590 }, { "epoch": 0.028857421875, "grad_norm": 0.6859440803527832, "learning_rate": 0.0004997691543766507, "loss": 2.1251, "step": 591 }, { "epoch": 0.02890625, "grad_norm": 0.583711564540863, "learning_rate": 0.0004997675653564223, "loss": 2.1471, "step": 592 }, { "epoch": 0.028955078125, "grad_norm": 0.5477043390274048, "learning_rate": 0.0004997659708887712, "loss": 2.1279, "step": 593 }, { "epoch": 0.02900390625, "grad_norm": 0.4689471423625946, "learning_rate": 0.0004997643709737363, "loss": 2.1148, "step": 594 }, { "epoch": 0.029052734375, "grad_norm": 0.4243263602256775, "learning_rate": 0.0004997627656113562, "loss": 2.1089, "step": 595 }, { "epoch": 0.0291015625, "grad_norm": 0.4764551818370819, "learning_rate": 0.0004997611548016699, "loss": 2.1495, "step": 596 }, { "epoch": 0.029150390625, "grad_norm": 0.5185626149177551, "learning_rate": 0.0004997595385447165, "loss": 2.1436, "step": 597 }, { "epoch": 0.02919921875, "grad_norm": 0.5045749545097351, "learning_rate": 0.0004997579168405352, "loss": 2.1842, "step": 598 }, { "epoch": 0.029248046875, "grad_norm": 0.5627615451812744, "learning_rate": 0.0004997562896891652, "loss": 2.1105, "step": 599 }, { "epoch": 0.029296875, "grad_norm": 0.6212490797042847, "learning_rate": 0.0004997546570906458, "loss": 2.1702, "step": 600 }, { "epoch": 0.029345703125, "grad_norm": 0.7030409574508667, "learning_rate": 0.0004997530190450169, "loss": 2.1318, "step": 601 }, { "epoch": 0.02939453125, "grad_norm": 0.6685683131217957, "learning_rate": 0.0004997513755523179, "loss": 2.1086, "step": 602 }, { "epoch": 0.029443359375, "grad_norm": 0.5304714441299438, "learning_rate": 0.0004997497266125887, "loss": 2.1455, "step": 603 }, { "epoch": 0.0294921875, "grad_norm": 0.5230769515037537, "learning_rate": 0.0004997480722258694, "loss": 2.1255, "step": 604 }, { "epoch": 0.029541015625, "grad_norm": 0.5728501677513123, "learning_rate": 0.0004997464123922001, "loss": 2.1184, "step": 605 }, { "epoch": 0.02958984375, "grad_norm": 0.5562993288040161, "learning_rate": 0.0004997447471116207, "loss": 2.164, "step": 606 }, { "epoch": 0.029638671875, "grad_norm": 0.5390117764472961, "learning_rate": 0.000499743076384172, "loss": 2.1072, "step": 607 }, { "epoch": 0.0296875, "grad_norm": 0.5960462093353271, "learning_rate": 0.0004997414002098941, "loss": 2.1666, "step": 608 }, { "epoch": 0.029736328125, "grad_norm": 0.6347503662109375, "learning_rate": 0.000499739718588828, "loss": 2.174, "step": 609 }, { "epoch": 0.02978515625, "grad_norm": 0.6638168096542358, "learning_rate": 0.0004997380315210142, "loss": 2.1652, "step": 610 }, { "epoch": 0.029833984375, "grad_norm": 0.7307913303375244, "learning_rate": 0.0004997363390064936, "loss": 2.1704, "step": 611 }, { "epoch": 0.0298828125, "grad_norm": 0.7281535267829895, "learning_rate": 0.0004997346410453074, "loss": 2.1, "step": 612 }, { "epoch": 0.029931640625, "grad_norm": 0.6982490420341492, "learning_rate": 0.0004997329376374965, "loss": 2.137, "step": 613 }, { "epoch": 0.02998046875, "grad_norm": 0.5868451595306396, "learning_rate": 0.0004997312287831024, "loss": 2.127, "step": 614 }, { "epoch": 0.030029296875, "grad_norm": 0.6307949423789978, "learning_rate": 0.0004997295144821663, "loss": 2.1578, "step": 615 }, { "epoch": 0.030078125, "grad_norm": 0.6579003930091858, "learning_rate": 0.00049972779473473, "loss": 2.1298, "step": 616 }, { "epoch": 0.030126953125, "grad_norm": 0.5183701515197754, "learning_rate": 0.000499726069540835, "loss": 2.1514, "step": 617 }, { "epoch": 0.03017578125, "grad_norm": 0.5205903649330139, "learning_rate": 0.0004997243389005232, "loss": 2.1156, "step": 618 }, { "epoch": 0.030224609375, "grad_norm": 1.1234761476516724, "learning_rate": 0.0004997226028138364, "loss": 2.1907, "step": 619 }, { "epoch": 0.0302734375, "grad_norm": 0.5276328325271606, "learning_rate": 0.0004997208612808168, "loss": 2.1208, "step": 620 }, { "epoch": 0.030322265625, "grad_norm": 0.6201860904693604, "learning_rate": 0.0004997191143015067, "loss": 2.1317, "step": 621 }, { "epoch": 0.03037109375, "grad_norm": 0.6191005706787109, "learning_rate": 0.0004997173618759482, "loss": 2.2014, "step": 622 }, { "epoch": 0.030419921875, "grad_norm": 0.905637264251709, "learning_rate": 0.000499715604004184, "loss": 2.1255, "step": 623 }, { "epoch": 0.03046875, "grad_norm": 0.6301367878913879, "learning_rate": 0.0004997138406862565, "loss": 2.1568, "step": 624 }, { "epoch": 0.030517578125, "grad_norm": 0.5375446081161499, "learning_rate": 0.0004997120719222087, "loss": 2.0907, "step": 625 }, { "epoch": 0.03056640625, "grad_norm": 0.5843269228935242, "learning_rate": 0.0004997102977120832, "loss": 2.1697, "step": 626 }, { "epoch": 0.030615234375, "grad_norm": 0.9127927422523499, "learning_rate": 0.000499708518055923, "loss": 2.1596, "step": 627 }, { "epoch": 0.0306640625, "grad_norm": 1.03096342086792, "learning_rate": 0.0004997067329537714, "loss": 2.1344, "step": 628 }, { "epoch": 0.030712890625, "grad_norm": 0.8340699672698975, "learning_rate": 0.0004997049424056717, "loss": 2.1361, "step": 629 }, { "epoch": 0.03076171875, "grad_norm": 0.8165284395217896, "learning_rate": 0.0004997031464116672, "loss": 2.1112, "step": 630 }, { "epoch": 0.030810546875, "grad_norm": 0.617124080657959, "learning_rate": 0.0004997013449718013, "loss": 2.1131, "step": 631 }, { "epoch": 0.030859375, "grad_norm": 0.548911988735199, "learning_rate": 0.0004996995380861178, "loss": 2.1072, "step": 632 }, { "epoch": 0.030908203125, "grad_norm": 0.504636287689209, "learning_rate": 0.0004996977257546606, "loss": 2.1158, "step": 633 }, { "epoch": 0.03095703125, "grad_norm": 0.4782217741012573, "learning_rate": 0.0004996959079774734, "loss": 2.1192, "step": 634 }, { "epoch": 0.031005859375, "grad_norm": 0.5245587825775146, "learning_rate": 0.0004996940847546003, "loss": 2.1448, "step": 635 }, { "epoch": 0.0310546875, "grad_norm": 0.47955870628356934, "learning_rate": 0.0004996922560860856, "loss": 2.1457, "step": 636 }, { "epoch": 0.031103515625, "grad_norm": 0.45445069670677185, "learning_rate": 0.0004996904219719735, "loss": 2.1586, "step": 637 }, { "epoch": 0.03115234375, "grad_norm": 0.4998181462287903, "learning_rate": 0.0004996885824123087, "loss": 2.1224, "step": 638 }, { "epoch": 0.031201171875, "grad_norm": 0.5396087765693665, "learning_rate": 0.0004996867374071354, "loss": 2.1779, "step": 639 }, { "epoch": 0.03125, "grad_norm": 0.5774237513542175, "learning_rate": 0.0004996848869564986, "loss": 2.1255, "step": 640 }, { "epoch": 0.031298828125, "grad_norm": 0.6297129988670349, "learning_rate": 0.000499683031060443, "loss": 2.1269, "step": 641 }, { "epoch": 0.03134765625, "grad_norm": 0.6704964637756348, "learning_rate": 0.0004996811697190137, "loss": 2.1509, "step": 642 }, { "epoch": 0.031396484375, "grad_norm": 0.6402686238288879, "learning_rate": 0.0004996793029322556, "loss": 2.1642, "step": 643 }, { "epoch": 0.0314453125, "grad_norm": 0.5949898958206177, "learning_rate": 0.0004996774307002142, "loss": 2.1395, "step": 644 }, { "epoch": 0.031494140625, "grad_norm": 0.5846782922744751, "learning_rate": 0.0004996755530229347, "loss": 2.1338, "step": 645 }, { "epoch": 0.03154296875, "grad_norm": 0.6264969110488892, "learning_rate": 0.0004996736699004628, "loss": 2.1198, "step": 646 }, { "epoch": 0.031591796875, "grad_norm": 0.6304665207862854, "learning_rate": 0.0004996717813328439, "loss": 2.1004, "step": 647 }, { "epoch": 0.031640625, "grad_norm": 0.5714513063430786, "learning_rate": 0.000499669887320124, "loss": 2.1577, "step": 648 }, { "epoch": 0.031689453125, "grad_norm": 0.7276261448860168, "learning_rate": 0.0004996679878623488, "loss": 2.1303, "step": 649 }, { "epoch": 0.03173828125, "grad_norm": 0.7570620775222778, "learning_rate": 0.0004996660829595643, "loss": 2.0717, "step": 650 }, { "epoch": 0.031787109375, "grad_norm": 1.006658911705017, "learning_rate": 0.0004996641726118169, "loss": 2.1314, "step": 651 }, { "epoch": 0.0318359375, "grad_norm": 1.405737280845642, "learning_rate": 0.0004996622568191529, "loss": 2.1658, "step": 652 }, { "epoch": 0.031884765625, "grad_norm": 0.6894755959510803, "learning_rate": 0.0004996603355816183, "loss": 2.1178, "step": 653 }, { "epoch": 0.03193359375, "grad_norm": 0.8076439499855042, "learning_rate": 0.0004996584088992603, "loss": 2.1392, "step": 654 }, { "epoch": 0.031982421875, "grad_norm": 0.9408451318740845, "learning_rate": 0.0004996564767721252, "loss": 2.1423, "step": 655 }, { "epoch": 0.03203125, "grad_norm": 0.8154052495956421, "learning_rate": 0.0004996545392002597, "loss": 2.0668, "step": 656 }, { "epoch": 0.032080078125, "grad_norm": 1.069041132926941, "learning_rate": 0.0004996525961837111, "loss": 2.1498, "step": 657 }, { "epoch": 0.03212890625, "grad_norm": 0.5608811378479004, "learning_rate": 0.0004996506477225264, "loss": 2.1822, "step": 658 }, { "epoch": 0.032177734375, "grad_norm": 0.5256673097610474, "learning_rate": 0.0004996486938167526, "loss": 2.1097, "step": 659 }, { "epoch": 0.0322265625, "grad_norm": 0.5323832631111145, "learning_rate": 0.0004996467344664374, "loss": 2.1315, "step": 660 }, { "epoch": 0.032275390625, "grad_norm": 0.5191957354545593, "learning_rate": 0.000499644769671628, "loss": 2.1425, "step": 661 }, { "epoch": 0.03232421875, "grad_norm": 0.4398641586303711, "learning_rate": 0.0004996427994323723, "loss": 2.1102, "step": 662 }, { "epoch": 0.032373046875, "grad_norm": 0.4730203151702881, "learning_rate": 0.0004996408237487177, "loss": 2.1305, "step": 663 }, { "epoch": 0.032421875, "grad_norm": 0.43958476185798645, "learning_rate": 0.0004996388426207123, "loss": 2.1089, "step": 664 }, { "epoch": 0.032470703125, "grad_norm": 0.45507821440696716, "learning_rate": 0.0004996368560484041, "loss": 2.0969, "step": 665 }, { "epoch": 0.03251953125, "grad_norm": 0.4471484422683716, "learning_rate": 0.0004996348640318412, "loss": 2.1004, "step": 666 }, { "epoch": 0.032568359375, "grad_norm": 0.45631179213523865, "learning_rate": 0.0004996328665710719, "loss": 2.1219, "step": 667 }, { "epoch": 0.0326171875, "grad_norm": 0.4566771388053894, "learning_rate": 0.0004996308636661447, "loss": 2.0979, "step": 668 }, { "epoch": 0.032666015625, "grad_norm": 0.49616310000419617, "learning_rate": 0.0004996288553171078, "loss": 2.1229, "step": 669 }, { "epoch": 0.03271484375, "grad_norm": 0.5865945219993591, "learning_rate": 0.0004996268415240104, "loss": 2.1176, "step": 670 }, { "epoch": 0.032763671875, "grad_norm": 0.6402058005332947, "learning_rate": 0.0004996248222869011, "loss": 2.0952, "step": 671 }, { "epoch": 0.0328125, "grad_norm": 0.7204034924507141, "learning_rate": 0.0004996227976058285, "loss": 2.0994, "step": 672 }, { "epoch": 0.032861328125, "grad_norm": 0.7300109267234802, "learning_rate": 0.0004996207674808421, "loss": 2.145, "step": 673 }, { "epoch": 0.03291015625, "grad_norm": 0.6733285188674927, "learning_rate": 0.0004996187319119908, "loss": 2.1443, "step": 674 }, { "epoch": 0.032958984375, "grad_norm": 0.577370822429657, "learning_rate": 0.0004996166908993242, "loss": 2.1295, "step": 675 }, { "epoch": 0.0330078125, "grad_norm": 0.500438928604126, "learning_rate": 0.0004996146444428916, "loss": 2.124, "step": 676 }, { "epoch": 0.033056640625, "grad_norm": 0.5484062433242798, "learning_rate": 0.0004996125925427427, "loss": 2.1131, "step": 677 }, { "epoch": 0.03310546875, "grad_norm": 0.5395079851150513, "learning_rate": 0.000499610535198927, "loss": 2.0898, "step": 678 }, { "epoch": 0.033154296875, "grad_norm": 0.4269322156906128, "learning_rate": 0.0004996084724114947, "loss": 2.1056, "step": 679 }, { "epoch": 0.033203125, "grad_norm": 0.5007755160331726, "learning_rate": 0.0004996064041804956, "loss": 2.0893, "step": 680 }, { "epoch": 0.033251953125, "grad_norm": 0.4849037528038025, "learning_rate": 0.0004996043305059799, "loss": 2.1363, "step": 681 }, { "epoch": 0.03330078125, "grad_norm": 0.4353083074092865, "learning_rate": 0.0004996022513879977, "loss": 2.0761, "step": 682 }, { "epoch": 0.033349609375, "grad_norm": 0.4140443503856659, "learning_rate": 0.0004996001668265994, "loss": 2.1341, "step": 683 }, { "epoch": 0.0333984375, "grad_norm": 0.435231477022171, "learning_rate": 0.0004995980768218358, "loss": 2.1019, "step": 684 }, { "epoch": 0.033447265625, "grad_norm": 0.43823757767677307, "learning_rate": 0.0004995959813737572, "loss": 2.084, "step": 685 }, { "epoch": 0.03349609375, "grad_norm": 0.43497490882873535, "learning_rate": 0.0004995938804824146, "loss": 2.1018, "step": 686 }, { "epoch": 0.033544921875, "grad_norm": 0.43660426139831543, "learning_rate": 0.000499591774147859, "loss": 2.117, "step": 687 }, { "epoch": 0.03359375, "grad_norm": 0.44860249757766724, "learning_rate": 0.0004995896623701412, "loss": 2.0698, "step": 688 }, { "epoch": 0.033642578125, "grad_norm": 0.5456513166427612, "learning_rate": 0.0004995875451493125, "loss": 2.1017, "step": 689 }, { "epoch": 0.03369140625, "grad_norm": 0.6708536744117737, "learning_rate": 0.0004995854224854243, "loss": 2.1508, "step": 690 }, { "epoch": 0.033740234375, "grad_norm": 0.7600275278091431, "learning_rate": 0.0004995832943785279, "loss": 2.0645, "step": 691 }, { "epoch": 0.0337890625, "grad_norm": 0.7278701663017273, "learning_rate": 0.000499581160828675, "loss": 2.1493, "step": 692 }, { "epoch": 0.033837890625, "grad_norm": 0.7372531294822693, "learning_rate": 0.0004995790218359172, "loss": 2.1081, "step": 693 }, { "epoch": 0.03388671875, "grad_norm": 0.5037508606910706, "learning_rate": 0.0004995768774003062, "loss": 2.0796, "step": 694 }, { "epoch": 0.033935546875, "grad_norm": 0.7004613280296326, "learning_rate": 0.0004995747275218943, "loss": 2.1384, "step": 695 }, { "epoch": 0.033984375, "grad_norm": 0.6165234446525574, "learning_rate": 0.0004995725722007335, "loss": 2.1394, "step": 696 }, { "epoch": 0.034033203125, "grad_norm": 0.47674858570098877, "learning_rate": 0.000499570411436876, "loss": 2.1316, "step": 697 }, { "epoch": 0.03408203125, "grad_norm": 0.6105107069015503, "learning_rate": 0.0004995682452303741, "loss": 2.1003, "step": 698 }, { "epoch": 0.034130859375, "grad_norm": 0.5605298280715942, "learning_rate": 0.0004995660735812804, "loss": 2.1188, "step": 699 }, { "epoch": 0.0341796875, "grad_norm": 0.4669831395149231, "learning_rate": 0.0004995638964896475, "loss": 2.0998, "step": 700 }, { "epoch": 0.034228515625, "grad_norm": 0.5848215818405151, "learning_rate": 0.0004995617139555281, "loss": 2.1386, "step": 701 }, { "epoch": 0.03427734375, "grad_norm": 0.5119794011116028, "learning_rate": 0.0004995595259789752, "loss": 2.1233, "step": 702 }, { "epoch": 0.034326171875, "grad_norm": 0.5389149785041809, "learning_rate": 0.0004995573325600418, "loss": 2.1417, "step": 703 }, { "epoch": 0.034375, "grad_norm": 0.5990896821022034, "learning_rate": 0.0004995551336987809, "loss": 2.1351, "step": 704 }, { "epoch": 0.034423828125, "grad_norm": 0.5800740718841553, "learning_rate": 0.0004995529293952461, "loss": 2.1246, "step": 705 }, { "epoch": 0.03447265625, "grad_norm": 0.621502161026001, "learning_rate": 0.0004995507196494905, "loss": 2.0988, "step": 706 }, { "epoch": 0.034521484375, "grad_norm": 0.5568550825119019, "learning_rate": 0.0004995485044615678, "loss": 2.0919, "step": 707 }, { "epoch": 0.0345703125, "grad_norm": 0.5441731214523315, "learning_rate": 0.0004995462838315319, "loss": 2.1124, "step": 708 }, { "epoch": 0.034619140625, "grad_norm": 0.5140829086303711, "learning_rate": 0.0004995440577594362, "loss": 2.1559, "step": 709 }, { "epoch": 0.03466796875, "grad_norm": 0.4675147831439972, "learning_rate": 0.0004995418262453351, "loss": 2.1145, "step": 710 }, { "epoch": 0.034716796875, "grad_norm": 0.4475621283054352, "learning_rate": 0.0004995395892892822, "loss": 2.1393, "step": 711 }, { "epoch": 0.034765625, "grad_norm": 0.44871285557746887, "learning_rate": 0.000499537346891332, "loss": 2.1448, "step": 712 }, { "epoch": 0.034814453125, "grad_norm": 0.4487797021865845, "learning_rate": 0.000499535099051539, "loss": 2.1117, "step": 713 }, { "epoch": 0.03486328125, "grad_norm": 0.503089189529419, "learning_rate": 0.0004995328457699573, "loss": 2.1274, "step": 714 }, { "epoch": 0.034912109375, "grad_norm": 0.4663471579551697, "learning_rate": 0.0004995305870466417, "loss": 2.1395, "step": 715 }, { "epoch": 0.0349609375, "grad_norm": 0.49429088830947876, "learning_rate": 0.000499528322881647, "loss": 2.0912, "step": 716 }, { "epoch": 0.035009765625, "grad_norm": 0.6452680230140686, "learning_rate": 0.000499526053275028, "loss": 2.1306, "step": 717 }, { "epoch": 0.03505859375, "grad_norm": 0.6763915419578552, "learning_rate": 0.0004995237782268397, "loss": 2.1103, "step": 718 }, { "epoch": 0.035107421875, "grad_norm": 0.5543192625045776, "learning_rate": 0.0004995214977371373, "loss": 2.1216, "step": 719 }, { "epoch": 0.03515625, "grad_norm": 0.5120431780815125, "learning_rate": 0.000499519211805976, "loss": 2.108, "step": 720 }, { "epoch": 0.035205078125, "grad_norm": 0.5524142384529114, "learning_rate": 0.0004995169204334112, "loss": 2.1223, "step": 721 }, { "epoch": 0.03525390625, "grad_norm": 0.5308980345726013, "learning_rate": 0.0004995146236194984, "loss": 2.1006, "step": 722 }, { "epoch": 0.035302734375, "grad_norm": 0.4616377353668213, "learning_rate": 0.0004995123213642935, "loss": 2.1452, "step": 723 }, { "epoch": 0.0353515625, "grad_norm": 0.5592250227928162, "learning_rate": 0.0004995100136678519, "loss": 2.1034, "step": 724 }, { "epoch": 0.035400390625, "grad_norm": 0.7861856818199158, "learning_rate": 0.0004995077005302299, "loss": 2.1363, "step": 725 }, { "epoch": 0.03544921875, "grad_norm": 0.6184245944023132, "learning_rate": 0.0004995053819514834, "loss": 2.0849, "step": 726 }, { "epoch": 0.035498046875, "grad_norm": 0.6829530000686646, "learning_rate": 0.0004995030579316686, "loss": 2.1325, "step": 727 }, { "epoch": 0.035546875, "grad_norm": 0.5725662708282471, "learning_rate": 0.0004995007284708418, "loss": 2.1251, "step": 728 }, { "epoch": 0.035595703125, "grad_norm": 0.5133148431777954, "learning_rate": 0.0004994983935690594, "loss": 2.1205, "step": 729 }, { "epoch": 0.03564453125, "grad_norm": 0.5022404789924622, "learning_rate": 0.0004994960532263782, "loss": 2.0975, "step": 730 }, { "epoch": 0.035693359375, "grad_norm": 0.476207435131073, "learning_rate": 0.0004994937074428548, "loss": 2.0963, "step": 731 }, { "epoch": 0.0357421875, "grad_norm": 0.5253466367721558, "learning_rate": 0.000499491356218546, "loss": 2.0955, "step": 732 }, { "epoch": 0.035791015625, "grad_norm": 0.5419726967811584, "learning_rate": 0.0004994889995535088, "loss": 2.1162, "step": 733 }, { "epoch": 0.03583984375, "grad_norm": 0.5482422709465027, "learning_rate": 0.0004994866374478003, "loss": 2.136, "step": 734 }, { "epoch": 0.035888671875, "grad_norm": 0.5899834036827087, "learning_rate": 0.0004994842699014779, "loss": 2.1591, "step": 735 }, { "epoch": 0.0359375, "grad_norm": 0.6650269031524658, "learning_rate": 0.0004994818969145989, "loss": 2.1359, "step": 736 }, { "epoch": 0.035986328125, "grad_norm": 0.6132761836051941, "learning_rate": 0.0004994795184872207, "loss": 2.1217, "step": 737 }, { "epoch": 0.03603515625, "grad_norm": 0.5833902955055237, "learning_rate": 0.000499477134619401, "loss": 2.0838, "step": 738 }, { "epoch": 0.036083984375, "grad_norm": 0.5023425817489624, "learning_rate": 0.0004994747453111976, "loss": 2.1791, "step": 739 }, { "epoch": 0.0361328125, "grad_norm": 0.6052854061126709, "learning_rate": 0.0004994723505626685, "loss": 2.1325, "step": 740 }, { "epoch": 0.036181640625, "grad_norm": 0.5283750295639038, "learning_rate": 0.0004994699503738715, "loss": 2.0863, "step": 741 }, { "epoch": 0.03623046875, "grad_norm": 0.5734720230102539, "learning_rate": 0.0004994675447448651, "loss": 2.0839, "step": 742 }, { "epoch": 0.036279296875, "grad_norm": 0.544346034526825, "learning_rate": 0.0004994651336757073, "loss": 2.0964, "step": 743 }, { "epoch": 0.036328125, "grad_norm": 0.5930027365684509, "learning_rate": 0.0004994627171664565, "loss": 2.1449, "step": 744 }, { "epoch": 0.036376953125, "grad_norm": 0.5315930247306824, "learning_rate": 0.0004994602952171716, "loss": 2.0899, "step": 745 }, { "epoch": 0.03642578125, "grad_norm": 0.5210869908332825, "learning_rate": 0.0004994578678279112, "loss": 2.0951, "step": 746 }, { "epoch": 0.036474609375, "grad_norm": 0.5115110874176025, "learning_rate": 0.000499455434998734, "loss": 2.0469, "step": 747 }, { "epoch": 0.0365234375, "grad_norm": 0.609618604183197, "learning_rate": 0.0004994529967296989, "loss": 2.0817, "step": 748 }, { "epoch": 0.036572265625, "grad_norm": 0.5120054483413696, "learning_rate": 0.0004994505530208651, "loss": 2.1145, "step": 749 }, { "epoch": 0.03662109375, "grad_norm": 0.41749340295791626, "learning_rate": 0.0004994481038722919, "loss": 2.0943, "step": 750 }, { "epoch": 0.036669921875, "grad_norm": 0.4915337562561035, "learning_rate": 0.0004994456492840386, "loss": 2.1242, "step": 751 }, { "epoch": 0.03671875, "grad_norm": 0.44762954115867615, "learning_rate": 0.0004994431892561646, "loss": 2.1291, "step": 752 }, { "epoch": 0.036767578125, "grad_norm": 0.4503234624862671, "learning_rate": 0.0004994407237887297, "loss": 2.1344, "step": 753 }, { "epoch": 0.03681640625, "grad_norm": 0.5503231287002563, "learning_rate": 0.0004994382528817935, "loss": 2.1303, "step": 754 }, { "epoch": 0.036865234375, "grad_norm": 0.6907973289489746, "learning_rate": 0.0004994357765354158, "loss": 2.1115, "step": 755 }, { "epoch": 0.0369140625, "grad_norm": 0.7923247814178467, "learning_rate": 0.0004994332947496568, "loss": 2.16, "step": 756 }, { "epoch": 0.036962890625, "grad_norm": 0.7328776121139526, "learning_rate": 0.0004994308075245768, "loss": 2.1115, "step": 757 }, { "epoch": 0.03701171875, "grad_norm": 0.6290150880813599, "learning_rate": 0.0004994283148602357, "loss": 2.0633, "step": 758 }, { "epoch": 0.037060546875, "grad_norm": 0.6767076253890991, "learning_rate": 0.000499425816756694, "loss": 2.1109, "step": 759 }, { "epoch": 0.037109375, "grad_norm": 0.7713241577148438, "learning_rate": 0.0004994233132140125, "loss": 2.1008, "step": 760 }, { "epoch": 0.037158203125, "grad_norm": 0.592206597328186, "learning_rate": 0.0004994208042322516, "loss": 2.0985, "step": 761 }, { "epoch": 0.03720703125, "grad_norm": 0.6191748976707458, "learning_rate": 0.0004994182898114722, "loss": 2.1122, "step": 762 }, { "epoch": 0.037255859375, "grad_norm": 0.4879325330257416, "learning_rate": 0.0004994157699517353, "loss": 2.056, "step": 763 }, { "epoch": 0.0373046875, "grad_norm": 0.41964662075042725, "learning_rate": 0.0004994132446531019, "loss": 2.135, "step": 764 }, { "epoch": 0.037353515625, "grad_norm": 0.4826265275478363, "learning_rate": 0.0004994107139156334, "loss": 2.1299, "step": 765 }, { "epoch": 0.03740234375, "grad_norm": 0.5378415584564209, "learning_rate": 0.0004994081777393907, "loss": 2.097, "step": 766 }, { "epoch": 0.037451171875, "grad_norm": 0.5841735005378723, "learning_rate": 0.0004994056361244356, "loss": 2.0955, "step": 767 }, { "epoch": 0.0375, "grad_norm": 0.4858386516571045, "learning_rate": 0.0004994030890708297, "loss": 2.1008, "step": 768 }, { "epoch": 0.037548828125, "grad_norm": 0.4116470217704773, "learning_rate": 0.0004994005365786346, "loss": 2.0922, "step": 769 }, { "epoch": 0.03759765625, "grad_norm": 0.4012560248374939, "learning_rate": 0.0004993979786479121, "loss": 2.0692, "step": 770 }, { "epoch": 0.037646484375, "grad_norm": 0.416734904050827, "learning_rate": 0.0004993954152787245, "loss": 2.0427, "step": 771 }, { "epoch": 0.0376953125, "grad_norm": 0.3777092695236206, "learning_rate": 0.0004993928464711337, "loss": 2.0682, "step": 772 }, { "epoch": 0.037744140625, "grad_norm": 0.44020339846611023, "learning_rate": 0.0004993902722252019, "loss": 2.1009, "step": 773 }, { "epoch": 0.03779296875, "grad_norm": 0.4681757986545563, "learning_rate": 0.0004993876925409917, "loss": 2.1076, "step": 774 }, { "epoch": 0.037841796875, "grad_norm": 0.40327590703964233, "learning_rate": 0.0004993851074185653, "loss": 2.1171, "step": 775 }, { "epoch": 0.037890625, "grad_norm": 0.3685934841632843, "learning_rate": 0.0004993825168579857, "loss": 2.0643, "step": 776 }, { "epoch": 0.037939453125, "grad_norm": 0.38871413469314575, "learning_rate": 0.0004993799208593155, "loss": 2.1091, "step": 777 }, { "epoch": 0.03798828125, "grad_norm": 0.410747230052948, "learning_rate": 0.0004993773194226178, "loss": 2.102, "step": 778 }, { "epoch": 0.038037109375, "grad_norm": 0.43232184648513794, "learning_rate": 0.0004993747125479553, "loss": 2.0905, "step": 779 }, { "epoch": 0.0380859375, "grad_norm": 0.5585849285125732, "learning_rate": 0.0004993721002353915, "loss": 2.1099, "step": 780 }, { "epoch": 0.038134765625, "grad_norm": 0.7981166839599609, "learning_rate": 0.0004993694824849895, "loss": 2.0961, "step": 781 }, { "epoch": 0.03818359375, "grad_norm": 0.8156694173812866, "learning_rate": 0.0004993668592968129, "loss": 2.1343, "step": 782 }, { "epoch": 0.038232421875, "grad_norm": 0.5928887128829956, "learning_rate": 0.0004993642306709251, "loss": 2.0795, "step": 783 }, { "epoch": 0.03828125, "grad_norm": 0.6003465056419373, "learning_rate": 0.0004993615966073902, "loss": 2.1074, "step": 784 }, { "epoch": 0.038330078125, "grad_norm": 0.8945834636688232, "learning_rate": 0.0004993589571062714, "loss": 2.1006, "step": 785 }, { "epoch": 0.03837890625, "grad_norm": 0.5970470309257507, "learning_rate": 0.0004993563121676332, "loss": 2.071, "step": 786 }, { "epoch": 0.038427734375, "grad_norm": 0.6940796375274658, "learning_rate": 0.0004993536617915396, "loss": 2.1126, "step": 787 }, { "epoch": 0.0384765625, "grad_norm": 0.6508753299713135, "learning_rate": 0.0004993510059780546, "loss": 2.1779, "step": 788 }, { "epoch": 0.038525390625, "grad_norm": 0.5325515270233154, "learning_rate": 0.0004993483447272428, "loss": 2.1192, "step": 789 }, { "epoch": 0.03857421875, "grad_norm": 0.6272786855697632, "learning_rate": 0.0004993456780391686, "loss": 2.0841, "step": 790 }, { "epoch": 0.038623046875, "grad_norm": 0.551579475402832, "learning_rate": 0.0004993430059138967, "loss": 2.1461, "step": 791 }, { "epoch": 0.038671875, "grad_norm": 0.48074933886528015, "learning_rate": 0.0004993403283514919, "loss": 2.0531, "step": 792 }, { "epoch": 0.038720703125, "grad_norm": 0.5164564847946167, "learning_rate": 0.0004993376453520189, "loss": 2.0948, "step": 793 }, { "epoch": 0.03876953125, "grad_norm": 0.4478915333747864, "learning_rate": 0.0004993349569155428, "loss": 2.1277, "step": 794 }, { "epoch": 0.038818359375, "grad_norm": 0.44582676887512207, "learning_rate": 0.0004993322630421289, "loss": 2.0926, "step": 795 }, { "epoch": 0.0388671875, "grad_norm": 0.38910186290740967, "learning_rate": 0.0004993295637318423, "loss": 2.0613, "step": 796 }, { "epoch": 0.038916015625, "grad_norm": 0.4249894618988037, "learning_rate": 0.0004993268589847485, "loss": 2.0545, "step": 797 }, { "epoch": 0.03896484375, "grad_norm": 0.4173929691314697, "learning_rate": 0.0004993241488009131, "loss": 2.0783, "step": 798 }, { "epoch": 0.039013671875, "grad_norm": 0.5040072798728943, "learning_rate": 0.0004993214331804016, "loss": 2.0581, "step": 799 }, { "epoch": 0.0390625, "grad_norm": 0.5048104524612427, "learning_rate": 0.0004993187121232801, "loss": 2.0637, "step": 800 }, { "epoch": 0.039111328125, "grad_norm": 0.45227399468421936, "learning_rate": 0.0004993159856296143, "loss": 2.1218, "step": 801 }, { "epoch": 0.03916015625, "grad_norm": 0.5085296034812927, "learning_rate": 0.0004993132536994705, "loss": 2.0787, "step": 802 }, { "epoch": 0.039208984375, "grad_norm": 0.5550543665885925, "learning_rate": 0.0004993105163329147, "loss": 2.1197, "step": 803 }, { "epoch": 0.0392578125, "grad_norm": 0.4379076361656189, "learning_rate": 0.0004993077735300133, "loss": 2.0908, "step": 804 }, { "epoch": 0.039306640625, "grad_norm": 0.38783279061317444, "learning_rate": 0.0004993050252908328, "loss": 2.109, "step": 805 }, { "epoch": 0.03935546875, "grad_norm": 0.4117581248283386, "learning_rate": 0.0004993022716154399, "loss": 2.0594, "step": 806 }, { "epoch": 0.039404296875, "grad_norm": 0.4604656994342804, "learning_rate": 0.0004992995125039011, "loss": 2.149, "step": 807 }, { "epoch": 0.039453125, "grad_norm": 0.4881954491138458, "learning_rate": 0.0004992967479562836, "loss": 2.097, "step": 808 }, { "epoch": 0.039501953125, "grad_norm": 0.5291454195976257, "learning_rate": 0.0004992939779726541, "loss": 2.085, "step": 809 }, { "epoch": 0.03955078125, "grad_norm": 0.5188281536102295, "learning_rate": 0.0004992912025530799, "loss": 2.08, "step": 810 }, { "epoch": 0.039599609375, "grad_norm": 0.5645558834075928, "learning_rate": 0.0004992884216976281, "loss": 2.1076, "step": 811 }, { "epoch": 0.0396484375, "grad_norm": 0.5176679491996765, "learning_rate": 0.0004992856354063663, "loss": 2.0984, "step": 812 }, { "epoch": 0.039697265625, "grad_norm": 0.7213494777679443, "learning_rate": 0.0004992828436793619, "loss": 2.0823, "step": 813 }, { "epoch": 0.03974609375, "grad_norm": 0.7385559678077698, "learning_rate": 0.0004992800465166826, "loss": 2.1061, "step": 814 }, { "epoch": 0.039794921875, "grad_norm": 0.4659675657749176, "learning_rate": 0.0004992772439183962, "loss": 2.1048, "step": 815 }, { "epoch": 0.03984375, "grad_norm": 0.7033932209014893, "learning_rate": 0.0004992744358845707, "loss": 2.1311, "step": 816 }, { "epoch": 0.039892578125, "grad_norm": 0.6362271904945374, "learning_rate": 0.000499271622415274, "loss": 2.0832, "step": 817 }, { "epoch": 0.03994140625, "grad_norm": 0.45633307099342346, "learning_rate": 0.0004992688035105744, "loss": 2.1049, "step": 818 }, { "epoch": 0.039990234375, "grad_norm": 0.5678058862686157, "learning_rate": 0.00049926597917054, "loss": 2.1123, "step": 819 }, { "epoch": 0.0400390625, "grad_norm": 0.4044618308544159, "learning_rate": 0.0004992631493952395, "loss": 2.1324, "step": 820 }, { "epoch": 0.040087890625, "grad_norm": 0.41186562180519104, "learning_rate": 0.0004992603141847414, "loss": 2.0711, "step": 821 }, { "epoch": 0.04013671875, "grad_norm": 0.7596760988235474, "learning_rate": 0.0004992574735391144, "loss": 2.0459, "step": 822 }, { "epoch": 0.040185546875, "grad_norm": 0.379529744386673, "learning_rate": 0.0004992546274584275, "loss": 2.1204, "step": 823 }, { "epoch": 0.040234375, "grad_norm": 0.3958330750465393, "learning_rate": 0.0004992517759427494, "loss": 2.0952, "step": 824 }, { "epoch": 0.040283203125, "grad_norm": 0.43135517835617065, "learning_rate": 0.0004992489189921495, "loss": 2.1075, "step": 825 }, { "epoch": 0.04033203125, "grad_norm": 0.4580707550048828, "learning_rate": 0.0004992460566066967, "loss": 2.0803, "step": 826 }, { "epoch": 0.040380859375, "grad_norm": 0.4798245429992676, "learning_rate": 0.0004992431887864607, "loss": 2.0462, "step": 827 }, { "epoch": 0.0404296875, "grad_norm": 0.4851265549659729, "learning_rate": 0.0004992403155315107, "loss": 2.1036, "step": 828 }, { "epoch": 0.040478515625, "grad_norm": 0.4448799788951874, "learning_rate": 0.0004992374368419167, "loss": 2.0902, "step": 829 }, { "epoch": 0.04052734375, "grad_norm": 0.5231778025627136, "learning_rate": 0.0004992345527177482, "loss": 2.0822, "step": 830 }, { "epoch": 0.040576171875, "grad_norm": 0.6662476658821106, "learning_rate": 0.0004992316631590751, "loss": 2.1255, "step": 831 }, { "epoch": 0.040625, "grad_norm": 0.703859806060791, "learning_rate": 0.0004992287681659676, "loss": 2.1073, "step": 832 }, { "epoch": 0.040673828125, "grad_norm": 0.5122031569480896, "learning_rate": 0.0004992258677384957, "loss": 2.0847, "step": 833 }, { "epoch": 0.04072265625, "grad_norm": 0.5215438008308411, "learning_rate": 0.0004992229618767298, "loss": 2.1249, "step": 834 }, { "epoch": 0.040771484375, "grad_norm": 0.6201410293579102, "learning_rate": 0.0004992200505807402, "loss": 2.0408, "step": 835 }, { "epoch": 0.0408203125, "grad_norm": 0.5178573131561279, "learning_rate": 0.0004992171338505975, "loss": 2.0798, "step": 836 }, { "epoch": 0.040869140625, "grad_norm": 0.4893955886363983, "learning_rate": 0.0004992142116863725, "loss": 2.1172, "step": 837 }, { "epoch": 0.04091796875, "grad_norm": 0.4553849399089813, "learning_rate": 0.0004992112840881359, "loss": 2.1373, "step": 838 }, { "epoch": 0.040966796875, "grad_norm": 0.4222692847251892, "learning_rate": 0.0004992083510559588, "loss": 2.0807, "step": 839 }, { "epoch": 0.041015625, "grad_norm": 0.37883260846138, "learning_rate": 0.0004992054125899122, "loss": 2.0729, "step": 840 }, { "epoch": 0.041064453125, "grad_norm": 0.41221001744270325, "learning_rate": 0.0004992024686900672, "loss": 2.1207, "step": 841 }, { "epoch": 0.04111328125, "grad_norm": 0.42780810594558716, "learning_rate": 0.0004991995193564953, "loss": 2.0866, "step": 842 }, { "epoch": 0.041162109375, "grad_norm": 0.4400468170642853, "learning_rate": 0.0004991965645892678, "loss": 2.1239, "step": 843 }, { "epoch": 0.0412109375, "grad_norm": 0.588583767414093, "learning_rate": 0.0004991936043884566, "loss": 2.1236, "step": 844 }, { "epoch": 0.041259765625, "grad_norm": 2.961707592010498, "learning_rate": 0.0004991906387541333, "loss": 2.1258, "step": 845 }, { "epoch": 0.04130859375, "grad_norm": 1.2887789011001587, "learning_rate": 0.0004991876676863697, "loss": 2.1442, "step": 846 }, { "epoch": 0.041357421875, "grad_norm": 1.108601450920105, "learning_rate": 0.0004991846911852378, "loss": 2.1159, "step": 847 }, { "epoch": 0.04140625, "grad_norm": 0.8812528252601624, "learning_rate": 0.0004991817092508099, "loss": 2.0762, "step": 848 }, { "epoch": 0.041455078125, "grad_norm": 0.587304413318634, "learning_rate": 0.0004991787218831581, "loss": 2.0607, "step": 849 }, { "epoch": 0.04150390625, "grad_norm": 0.5270566940307617, "learning_rate": 0.0004991757290823548, "loss": 2.1316, "step": 850 }, { "epoch": 0.041552734375, "grad_norm": 0.48140454292297363, "learning_rate": 0.0004991727308484728, "loss": 2.1454, "step": 851 }, { "epoch": 0.0416015625, "grad_norm": 0.5097341537475586, "learning_rate": 0.0004991697271815845, "loss": 2.0947, "step": 852 }, { "epoch": 0.041650390625, "grad_norm": 0.50489741563797, "learning_rate": 0.0004991667180817627, "loss": 2.1242, "step": 853 }, { "epoch": 0.04169921875, "grad_norm": 0.4708973169326782, "learning_rate": 0.0004991637035490805, "loss": 2.0387, "step": 854 }, { "epoch": 0.041748046875, "grad_norm": 0.48218095302581787, "learning_rate": 0.0004991606835836107, "loss": 2.0539, "step": 855 }, { "epoch": 0.041796875, "grad_norm": 0.4525936543941498, "learning_rate": 0.0004991576581854267, "loss": 2.118, "step": 856 }, { "epoch": 0.041845703125, "grad_norm": 0.4563722610473633, "learning_rate": 0.0004991546273546018, "loss": 2.0959, "step": 857 }, { "epoch": 0.04189453125, "grad_norm": 0.550678014755249, "learning_rate": 0.0004991515910912095, "loss": 2.1208, "step": 858 }, { "epoch": 0.041943359375, "grad_norm": 0.5663391947746277, "learning_rate": 0.0004991485493953232, "loss": 2.0749, "step": 859 }, { "epoch": 0.0419921875, "grad_norm": 0.451704740524292, "learning_rate": 0.0004991455022670168, "loss": 2.0537, "step": 860 }, { "epoch": 0.042041015625, "grad_norm": 0.5110864043235779, "learning_rate": 0.000499142449706364, "loss": 2.1165, "step": 861 }, { "epoch": 0.04208984375, "grad_norm": 0.4920494258403778, "learning_rate": 0.0004991393917134388, "loss": 2.0638, "step": 862 }, { "epoch": 0.042138671875, "grad_norm": 0.40198516845703125, "learning_rate": 0.0004991363282883155, "loss": 2.0801, "step": 863 }, { "epoch": 0.0421875, "grad_norm": 0.4451534152030945, "learning_rate": 0.0004991332594310682, "loss": 2.0895, "step": 864 }, { "epoch": 0.042236328125, "grad_norm": 0.41318055987358093, "learning_rate": 0.0004991301851417712, "loss": 2.0515, "step": 865 }, { "epoch": 0.04228515625, "grad_norm": 0.6008551716804504, "learning_rate": 0.0004991271054204992, "loss": 2.0795, "step": 866 }, { "epoch": 0.042333984375, "grad_norm": 2.321098804473877, "learning_rate": 0.0004991240202673268, "loss": 2.1225, "step": 867 }, { "epoch": 0.0423828125, "grad_norm": 0.7691654562950134, "learning_rate": 0.0004991209296823285, "loss": 2.1143, "step": 868 }, { "epoch": 0.042431640625, "grad_norm": 0.9477538466453552, "learning_rate": 0.0004991178336655796, "loss": 2.0731, "step": 869 }, { "epoch": 0.04248046875, "grad_norm": 0.9732951521873474, "learning_rate": 0.0004991147322171548, "loss": 2.1297, "step": 870 }, { "epoch": 0.042529296875, "grad_norm": 0.6833594441413879, "learning_rate": 0.0004991116253371295, "loss": 2.0752, "step": 871 }, { "epoch": 0.042578125, "grad_norm": 0.7005627751350403, "learning_rate": 0.0004991085130255789, "loss": 2.093, "step": 872 }, { "epoch": 0.042626953125, "grad_norm": 0.6080561280250549, "learning_rate": 0.0004991053952825784, "loss": 2.1045, "step": 873 }, { "epoch": 0.04267578125, "grad_norm": 0.5129806995391846, "learning_rate": 0.0004991022721082037, "loss": 2.083, "step": 874 }, { "epoch": 0.042724609375, "grad_norm": 0.5049960613250732, "learning_rate": 0.0004990991435025303, "loss": 2.0753, "step": 875 }, { "epoch": 0.0427734375, "grad_norm": 0.44743630290031433, "learning_rate": 0.0004990960094656342, "loss": 2.0897, "step": 876 }, { "epoch": 0.042822265625, "grad_norm": 0.4613926410675049, "learning_rate": 0.0004990928699975911, "loss": 2.0727, "step": 877 }, { "epoch": 0.04287109375, "grad_norm": 0.35535314679145813, "learning_rate": 0.0004990897250984774, "loss": 2.0925, "step": 878 }, { "epoch": 0.042919921875, "grad_norm": 0.3788653612136841, "learning_rate": 0.0004990865747683692, "loss": 2.0598, "step": 879 }, { "epoch": 0.04296875, "grad_norm": 0.3592720627784729, "learning_rate": 0.0004990834190073428, "loss": 1.9706, "step": 880 }, { "epoch": 0.043017578125, "grad_norm": 0.3657750189304352, "learning_rate": 0.0004990802578154746, "loss": 2.0513, "step": 881 }, { "epoch": 0.04306640625, "grad_norm": 0.34784144163131714, "learning_rate": 0.0004990770911928416, "loss": 2.0953, "step": 882 }, { "epoch": 0.043115234375, "grad_norm": 0.34957852959632874, "learning_rate": 0.0004990739191395201, "loss": 2.0656, "step": 883 }, { "epoch": 0.0431640625, "grad_norm": 0.39943912625312805, "learning_rate": 0.0004990707416555871, "loss": 2.1135, "step": 884 }, { "epoch": 0.043212890625, "grad_norm": 0.4182076156139374, "learning_rate": 0.0004990675587411196, "loss": 2.084, "step": 885 }, { "epoch": 0.04326171875, "grad_norm": 0.3541222810745239, "learning_rate": 0.000499064370396195, "loss": 2.0809, "step": 886 }, { "epoch": 0.043310546875, "grad_norm": 0.3274332284927368, "learning_rate": 0.0004990611766208903, "loss": 2.088, "step": 887 }, { "epoch": 0.043359375, "grad_norm": 0.3370121419429779, "learning_rate": 0.000499057977415283, "loss": 2.0721, "step": 888 }, { "epoch": 0.043408203125, "grad_norm": 0.36672237515449524, "learning_rate": 0.0004990547727794505, "loss": 2.0618, "step": 889 }, { "epoch": 0.04345703125, "grad_norm": 0.35474345088005066, "learning_rate": 0.0004990515627134706, "loss": 2.1046, "step": 890 }, { "epoch": 0.043505859375, "grad_norm": 0.3698935806751251, "learning_rate": 0.0004990483472174212, "loss": 2.0888, "step": 891 }, { "epoch": 0.0435546875, "grad_norm": 0.4570077657699585, "learning_rate": 0.00049904512629138, "loss": 2.0169, "step": 892 }, { "epoch": 0.043603515625, "grad_norm": 0.5725699663162231, "learning_rate": 0.0004990418999354252, "loss": 2.0636, "step": 893 }, { "epoch": 0.04365234375, "grad_norm": 0.5376597046852112, "learning_rate": 0.000499038668149635, "loss": 2.0624, "step": 894 }, { "epoch": 0.043701171875, "grad_norm": 0.4877265989780426, "learning_rate": 0.0004990354309340877, "loss": 2.0727, "step": 895 }, { "epoch": 0.04375, "grad_norm": 0.5538619756698608, "learning_rate": 0.0004990321882888617, "loss": 2.0832, "step": 896 }, { "epoch": 0.043798828125, "grad_norm": 0.5348575115203857, "learning_rate": 0.0004990289402140355, "loss": 2.095, "step": 897 }, { "epoch": 0.04384765625, "grad_norm": 0.5151073932647705, "learning_rate": 0.0004990256867096881, "loss": 2.0251, "step": 898 }, { "epoch": 0.043896484375, "grad_norm": 0.52495938539505, "learning_rate": 0.0004990224277758982, "loss": 2.0674, "step": 899 }, { "epoch": 0.0439453125, "grad_norm": 0.48588061332702637, "learning_rate": 0.0004990191634127448, "loss": 2.0635, "step": 900 }, { "epoch": 0.043994140625, "grad_norm": 0.3792704641819, "learning_rate": 0.0004990158936203069, "loss": 2.1138, "step": 901 }, { "epoch": 0.04404296875, "grad_norm": 0.41486361622810364, "learning_rate": 0.0004990126183986639, "loss": 2.096, "step": 902 }, { "epoch": 0.044091796875, "grad_norm": 0.4839034378528595, "learning_rate": 0.0004990093377478951, "loss": 2.0475, "step": 903 }, { "epoch": 0.044140625, "grad_norm": 0.467596173286438, "learning_rate": 0.0004990060516680802, "loss": 2.0469, "step": 904 }, { "epoch": 0.044189453125, "grad_norm": 0.4879370927810669, "learning_rate": 0.0004990027601592985, "loss": 2.0635, "step": 905 }, { "epoch": 0.04423828125, "grad_norm": 0.5305043458938599, "learning_rate": 0.00049899946322163, "loss": 2.0684, "step": 906 }, { "epoch": 0.044287109375, "grad_norm": 0.4850102663040161, "learning_rate": 0.0004989961608551545, "loss": 2.116, "step": 907 }, { "epoch": 0.0443359375, "grad_norm": 0.498407244682312, "learning_rate": 0.0004989928530599521, "loss": 2.0847, "step": 908 }, { "epoch": 0.044384765625, "grad_norm": 0.6186873912811279, "learning_rate": 0.000498989539836103, "loss": 2.0897, "step": 909 }, { "epoch": 0.04443359375, "grad_norm": 0.5399211049079895, "learning_rate": 0.0004989862211836873, "loss": 2.0743, "step": 910 }, { "epoch": 0.044482421875, "grad_norm": 0.38604769110679626, "learning_rate": 0.0004989828971027857, "loss": 2.0727, "step": 911 }, { "epoch": 0.04453125, "grad_norm": 0.4465138614177704, "learning_rate": 0.0004989795675934787, "loss": 2.0848, "step": 912 }, { "epoch": 0.044580078125, "grad_norm": 0.4261457622051239, "learning_rate": 0.0004989762326558469, "loss": 2.0941, "step": 913 }, { "epoch": 0.04462890625, "grad_norm": 0.4954240918159485, "learning_rate": 0.0004989728922899712, "loss": 2.0953, "step": 914 }, { "epoch": 0.044677734375, "grad_norm": 0.5594189763069153, "learning_rate": 0.0004989695464959324, "loss": 2.0531, "step": 915 }, { "epoch": 0.0447265625, "grad_norm": 0.549781322479248, "learning_rate": 0.0004989661952738118, "loss": 2.0846, "step": 916 }, { "epoch": 0.044775390625, "grad_norm": 0.5080304741859436, "learning_rate": 0.0004989628386236905, "loss": 2.0744, "step": 917 }, { "epoch": 0.04482421875, "grad_norm": 0.5925115346908569, "learning_rate": 0.0004989594765456498, "loss": 2.0594, "step": 918 }, { "epoch": 0.044873046875, "grad_norm": 0.49874529242515564, "learning_rate": 0.0004989561090397714, "loss": 2.0487, "step": 919 }, { "epoch": 0.044921875, "grad_norm": 0.41832026839256287, "learning_rate": 0.0004989527361061366, "loss": 2.0191, "step": 920 }, { "epoch": 0.044970703125, "grad_norm": 0.44319745898246765, "learning_rate": 0.0004989493577448275, "loss": 2.047, "step": 921 }, { "epoch": 0.04501953125, "grad_norm": 0.44782042503356934, "learning_rate": 0.0004989459739559257, "loss": 2.0468, "step": 922 }, { "epoch": 0.045068359375, "grad_norm": 0.40020832419395447, "learning_rate": 0.0004989425847395133, "loss": 2.0454, "step": 923 }, { "epoch": 0.0451171875, "grad_norm": 0.4339433014392853, "learning_rate": 0.0004989391900956725, "loss": 2.0684, "step": 924 }, { "epoch": 0.045166015625, "grad_norm": 0.4669035077095032, "learning_rate": 0.0004989357900244854, "loss": 2.0391, "step": 925 }, { "epoch": 0.04521484375, "grad_norm": 0.5125132203102112, "learning_rate": 0.0004989323845260346, "loss": 2.0948, "step": 926 }, { "epoch": 0.045263671875, "grad_norm": 0.499088853597641, "learning_rate": 0.0004989289736004026, "loss": 2.1063, "step": 927 }, { "epoch": 0.0453125, "grad_norm": 0.42751744389533997, "learning_rate": 0.0004989255572476719, "loss": 2.0642, "step": 928 }, { "epoch": 0.045361328125, "grad_norm": 0.4933611750602722, "learning_rate": 0.0004989221354679256, "loss": 2.0498, "step": 929 }, { "epoch": 0.04541015625, "grad_norm": 0.609082043170929, "learning_rate": 0.0004989187082612462, "loss": 2.0649, "step": 930 }, { "epoch": 0.045458984375, "grad_norm": 0.5982219576835632, "learning_rate": 0.0004989152756277173, "loss": 2.0663, "step": 931 }, { "epoch": 0.0455078125, "grad_norm": 0.5546870827674866, "learning_rate": 0.0004989118375674216, "loss": 2.0798, "step": 932 }, { "epoch": 0.045556640625, "grad_norm": 0.5493040680885315, "learning_rate": 0.0004989083940804427, "loss": 2.0989, "step": 933 }, { "epoch": 0.04560546875, "grad_norm": 0.5818493962287903, "learning_rate": 0.0004989049451668639, "loss": 2.0699, "step": 934 }, { "epoch": 0.045654296875, "grad_norm": 0.5194397568702698, "learning_rate": 0.000498901490826769, "loss": 2.1011, "step": 935 }, { "epoch": 0.045703125, "grad_norm": 0.4990486204624176, "learning_rate": 0.0004988980310602415, "loss": 2.0905, "step": 936 }, { "epoch": 0.045751953125, "grad_norm": 0.426910400390625, "learning_rate": 0.0004988945658673654, "loss": 2.0297, "step": 937 }, { "epoch": 0.04580078125, "grad_norm": 0.5152286887168884, "learning_rate": 0.0004988910952482246, "loss": 2.0689, "step": 938 }, { "epoch": 0.045849609375, "grad_norm": 0.501254677772522, "learning_rate": 0.0004988876192029032, "loss": 2.0596, "step": 939 }, { "epoch": 0.0458984375, "grad_norm": 0.3934566378593445, "learning_rate": 0.0004988841377314855, "loss": 2.0728, "step": 940 }, { "epoch": 0.045947265625, "grad_norm": 0.5341922640800476, "learning_rate": 0.0004988806508340559, "loss": 2.0714, "step": 941 }, { "epoch": 0.04599609375, "grad_norm": 0.512946367263794, "learning_rate": 0.0004988771585106988, "loss": 2.0722, "step": 942 }, { "epoch": 0.046044921875, "grad_norm": 0.4391801059246063, "learning_rate": 0.000498873660761499, "loss": 2.06, "step": 943 }, { "epoch": 0.04609375, "grad_norm": 0.5773365497589111, "learning_rate": 0.000498870157586541, "loss": 2.0354, "step": 944 }, { "epoch": 0.046142578125, "grad_norm": 0.42619630694389343, "learning_rate": 0.00049886664898591, "loss": 2.054, "step": 945 }, { "epoch": 0.04619140625, "grad_norm": 0.4060613512992859, "learning_rate": 0.0004988631349596909, "loss": 2.0627, "step": 946 }, { "epoch": 0.046240234375, "grad_norm": 0.4600524306297302, "learning_rate": 0.0004988596155079689, "loss": 2.0758, "step": 947 }, { "epoch": 0.0462890625, "grad_norm": 0.44609352946281433, "learning_rate": 0.0004988560906308291, "loss": 2.11, "step": 948 }, { "epoch": 0.046337890625, "grad_norm": 0.5073021650314331, "learning_rate": 0.0004988525603283573, "loss": 2.0795, "step": 949 }, { "epoch": 0.04638671875, "grad_norm": 0.42845359444618225, "learning_rate": 0.0004988490246006388, "loss": 2.0511, "step": 950 }, { "epoch": 0.046435546875, "grad_norm": 0.49571505188941956, "learning_rate": 0.0004988454834477593, "loss": 2.0723, "step": 951 }, { "epoch": 0.046484375, "grad_norm": 0.4760453701019287, "learning_rate": 0.0004988419368698047, "loss": 2.0924, "step": 952 }, { "epoch": 0.046533203125, "grad_norm": 0.40978163480758667, "learning_rate": 0.000498838384866861, "loss": 2.0485, "step": 953 }, { "epoch": 0.04658203125, "grad_norm": 0.43623682856559753, "learning_rate": 0.0004988348274390141, "loss": 2.0896, "step": 954 }, { "epoch": 0.046630859375, "grad_norm": 0.42137426137924194, "learning_rate": 0.0004988312645863504, "loss": 2.0943, "step": 955 }, { "epoch": 0.0466796875, "grad_norm": 0.41693657636642456, "learning_rate": 0.0004988276963089561, "loss": 2.0366, "step": 956 }, { "epoch": 0.046728515625, "grad_norm": 0.3903106451034546, "learning_rate": 0.0004988241226069178, "loss": 2.0681, "step": 957 }, { "epoch": 0.04677734375, "grad_norm": 0.5361778140068054, "learning_rate": 0.0004988205434803222, "loss": 2.0949, "step": 958 }, { "epoch": 0.046826171875, "grad_norm": 0.6068253517150879, "learning_rate": 0.0004988169589292558, "loss": 2.0319, "step": 959 }, { "epoch": 0.046875, "grad_norm": 0.40342187881469727, "learning_rate": 0.0004988133689538055, "loss": 2.0787, "step": 960 }, { "epoch": 0.046923828125, "grad_norm": 0.3892541527748108, "learning_rate": 0.0004988097735540585, "loss": 2.0674, "step": 961 }, { "epoch": 0.04697265625, "grad_norm": 0.5057629942893982, "learning_rate": 0.0004988061727301019, "loss": 2.0716, "step": 962 }, { "epoch": 0.047021484375, "grad_norm": 0.46489638090133667, "learning_rate": 0.0004988025664820228, "loss": 2.0798, "step": 963 }, { "epoch": 0.0470703125, "grad_norm": 0.36408570408821106, "learning_rate": 0.0004987989548099087, "loss": 2.0762, "step": 964 }, { "epoch": 0.047119140625, "grad_norm": 0.3988438546657562, "learning_rate": 0.0004987953377138471, "loss": 2.1031, "step": 965 }, { "epoch": 0.04716796875, "grad_norm": 0.424559086561203, "learning_rate": 0.0004987917151939257, "loss": 2.0753, "step": 966 }, { "epoch": 0.047216796875, "grad_norm": 0.48426973819732666, "learning_rate": 0.0004987880872502324, "loss": 2.0885, "step": 967 }, { "epoch": 0.047265625, "grad_norm": 0.5335932374000549, "learning_rate": 0.000498784453882855, "loss": 2.0921, "step": 968 }, { "epoch": 0.047314453125, "grad_norm": 0.49771085381507874, "learning_rate": 0.0004987808150918815, "loss": 2.0497, "step": 969 }, { "epoch": 0.04736328125, "grad_norm": 0.4700825810432434, "learning_rate": 0.0004987771708774001, "loss": 2.0322, "step": 970 }, { "epoch": 0.047412109375, "grad_norm": 0.5333424210548401, "learning_rate": 0.0004987735212394994, "loss": 2.0449, "step": 971 }, { "epoch": 0.0474609375, "grad_norm": 0.5406484603881836, "learning_rate": 0.0004987698661782674, "loss": 2.0326, "step": 972 }, { "epoch": 0.047509765625, "grad_norm": 0.47175097465515137, "learning_rate": 0.000498766205693793, "loss": 2.0394, "step": 973 }, { "epoch": 0.04755859375, "grad_norm": 0.3574228882789612, "learning_rate": 0.0004987625397861649, "loss": 2.011, "step": 974 }, { "epoch": 0.047607421875, "grad_norm": 0.35309410095214844, "learning_rate": 0.0004987588684554718, "loss": 2.0797, "step": 975 }, { "epoch": 0.04765625, "grad_norm": 0.3763760030269623, "learning_rate": 0.0004987551917018028, "loss": 2.0738, "step": 976 }, { "epoch": 0.047705078125, "grad_norm": 0.3599834740161896, "learning_rate": 0.0004987515095252469, "loss": 2.0654, "step": 977 }, { "epoch": 0.04775390625, "grad_norm": 0.40405696630477905, "learning_rate": 0.0004987478219258936, "loss": 2.0486, "step": 978 }, { "epoch": 0.047802734375, "grad_norm": 0.3928552567958832, "learning_rate": 0.0004987441289038319, "loss": 2.0727, "step": 979 }, { "epoch": 0.0478515625, "grad_norm": 0.4059995412826538, "learning_rate": 0.0004987404304591514, "loss": 2.098, "step": 980 }, { "epoch": 0.047900390625, "grad_norm": 0.43860921263694763, "learning_rate": 0.000498736726591942, "loss": 2.0808, "step": 981 }, { "epoch": 0.04794921875, "grad_norm": 0.473909854888916, "learning_rate": 0.0004987330173022932, "loss": 2.0839, "step": 982 }, { "epoch": 0.047998046875, "grad_norm": 0.5170091986656189, "learning_rate": 0.000498729302590295, "loss": 2.0868, "step": 983 }, { "epoch": 0.048046875, "grad_norm": 0.5665779709815979, "learning_rate": 0.0004987255824560374, "loss": 2.0603, "step": 984 }, { "epoch": 0.048095703125, "grad_norm": 0.5012559294700623, "learning_rate": 0.0004987218568996106, "loss": 2.0823, "step": 985 }, { "epoch": 0.04814453125, "grad_norm": 0.44728216528892517, "learning_rate": 0.0004987181259211048, "loss": 2.0729, "step": 986 }, { "epoch": 0.048193359375, "grad_norm": 0.39824816584587097, "learning_rate": 0.0004987143895206106, "loss": 2.0585, "step": 987 }, { "epoch": 0.0482421875, "grad_norm": 0.41896215081214905, "learning_rate": 0.0004987106476982184, "loss": 2.0656, "step": 988 }, { "epoch": 0.048291015625, "grad_norm": 0.43255990743637085, "learning_rate": 0.0004987069004540189, "loss": 2.0755, "step": 989 }, { "epoch": 0.04833984375, "grad_norm": 0.351372092962265, "learning_rate": 0.0004987031477881029, "loss": 1.991, "step": 990 }, { "epoch": 0.048388671875, "grad_norm": 0.38594865798950195, "learning_rate": 0.0004986993897005614, "loss": 2.0863, "step": 991 }, { "epoch": 0.0484375, "grad_norm": 0.41431114077568054, "learning_rate": 0.0004986956261914856, "loss": 2.0972, "step": 992 }, { "epoch": 0.048486328125, "grad_norm": 0.4262387454509735, "learning_rate": 0.0004986918572609664, "loss": 2.0718, "step": 993 }, { "epoch": 0.04853515625, "grad_norm": 0.46102678775787354, "learning_rate": 0.0004986880829090954, "loss": 2.1135, "step": 994 }, { "epoch": 0.048583984375, "grad_norm": 0.3869483470916748, "learning_rate": 0.000498684303135964, "loss": 2.0162, "step": 995 }, { "epoch": 0.0486328125, "grad_norm": 0.4042915105819702, "learning_rate": 0.0004986805179416638, "loss": 2.0387, "step": 996 }, { "epoch": 0.048681640625, "grad_norm": 0.4806325435638428, "learning_rate": 0.0004986767273262866, "loss": 2.0595, "step": 997 }, { "epoch": 0.04873046875, "grad_norm": 0.4804823696613312, "learning_rate": 0.0004986729312899242, "loss": 2.0344, "step": 998 }, { "epoch": 0.048779296875, "grad_norm": 0.4408351480960846, "learning_rate": 0.0004986691298326686, "loss": 2.0718, "step": 999 }, { "epoch": 0.048828125, "grad_norm": 0.5123075842857361, "learning_rate": 0.000498665322954612, "loss": 2.0929, "step": 1000 }, { "epoch": 0.048876953125, "grad_norm": 0.623113214969635, "learning_rate": 0.0004986615106558465, "loss": 2.1023, "step": 1001 }, { "epoch": 0.04892578125, "grad_norm": 0.646751344203949, "learning_rate": 0.0004986576929364646, "loss": 2.0648, "step": 1002 }, { "epoch": 0.048974609375, "grad_norm": 0.5305054187774658, "learning_rate": 0.0004986538697965588, "loss": 2.0496, "step": 1003 }, { "epoch": 0.0490234375, "grad_norm": 0.44859132170677185, "learning_rate": 0.0004986500412362219, "loss": 2.0895, "step": 1004 }, { "epoch": 0.049072265625, "grad_norm": 0.40662503242492676, "learning_rate": 0.0004986462072555465, "loss": 2.0821, "step": 1005 }, { "epoch": 0.04912109375, "grad_norm": 0.40959861874580383, "learning_rate": 0.0004986423678546257, "loss": 2.0916, "step": 1006 }, { "epoch": 0.049169921875, "grad_norm": 0.4806528389453888, "learning_rate": 0.0004986385230335524, "loss": 2.0582, "step": 1007 }, { "epoch": 0.04921875, "grad_norm": 0.37030118703842163, "learning_rate": 0.0004986346727924197, "loss": 2.0243, "step": 1008 }, { "epoch": 0.049267578125, "grad_norm": 0.3988148868083954, "learning_rate": 0.0004986308171313212, "loss": 2.0467, "step": 1009 }, { "epoch": 0.04931640625, "grad_norm": 0.48385196924209595, "learning_rate": 0.0004986269560503501, "loss": 2.0784, "step": 1010 }, { "epoch": 0.049365234375, "grad_norm": 0.4621245265007019, "learning_rate": 0.0004986230895496, "loss": 2.0403, "step": 1011 }, { "epoch": 0.0494140625, "grad_norm": 0.3828849196434021, "learning_rate": 0.0004986192176291647, "loss": 2.0395, "step": 1012 }, { "epoch": 0.049462890625, "grad_norm": 0.43419957160949707, "learning_rate": 0.0004986153402891381, "loss": 2.0815, "step": 1013 }, { "epoch": 0.04951171875, "grad_norm": 0.5283540487289429, "learning_rate": 0.0004986114575296141, "loss": 2.0465, "step": 1014 }, { "epoch": 0.049560546875, "grad_norm": 0.49393436312675476, "learning_rate": 0.0004986075693506866, "loss": 2.067, "step": 1015 }, { "epoch": 0.049609375, "grad_norm": 0.40732207894325256, "learning_rate": 0.0004986036757524501, "loss": 2.0304, "step": 1016 }, { "epoch": 0.049658203125, "grad_norm": 0.543363630771637, "learning_rate": 0.000498599776734999, "loss": 2.043, "step": 1017 }, { "epoch": 0.04970703125, "grad_norm": 0.994584321975708, "learning_rate": 0.0004985958722984275, "loss": 2.0432, "step": 1018 }, { "epoch": 0.049755859375, "grad_norm": 0.49402010440826416, "learning_rate": 0.0004985919624428305, "loss": 2.0977, "step": 1019 }, { "epoch": 0.0498046875, "grad_norm": 0.52320796251297, "learning_rate": 0.0004985880471683026, "loss": 2.0609, "step": 1020 }, { "epoch": 0.049853515625, "grad_norm": 0.4481967091560364, "learning_rate": 0.0004985841264749388, "loss": 2.0541, "step": 1021 }, { "epoch": 0.04990234375, "grad_norm": 0.38315093517303467, "learning_rate": 0.000498580200362834, "loss": 2.0592, "step": 1022 }, { "epoch": 0.049951171875, "grad_norm": 0.40149015188217163, "learning_rate": 0.0004985762688320834, "loss": 2.04, "step": 1023 }, { "epoch": 0.05, "grad_norm": 0.39495840668678284, "learning_rate": 0.0004985723318827824, "loss": 2.0684, "step": 1024 }, { "epoch": 0.050048828125, "grad_norm": 0.41885361075401306, "learning_rate": 0.0004985683895150263, "loss": 2.0493, "step": 1025 }, { "epoch": 0.05009765625, "grad_norm": 0.40557533502578735, "learning_rate": 0.0004985644417289107, "loss": 2.0441, "step": 1026 }, { "epoch": 0.050146484375, "grad_norm": 0.4839108884334564, "learning_rate": 0.0004985604885245312, "loss": 2.0255, "step": 1027 }, { "epoch": 0.0501953125, "grad_norm": 0.6561737060546875, "learning_rate": 0.0004985565299019836, "loss": 2.0356, "step": 1028 }, { "epoch": 0.050244140625, "grad_norm": 2.27152419090271, "learning_rate": 0.0004985525658613639, "loss": 2.0443, "step": 1029 }, { "epoch": 0.05029296875, "grad_norm": 0.9262478351593018, "learning_rate": 0.0004985485964027682, "loss": 2.0642, "step": 1030 }, { "epoch": 0.050341796875, "grad_norm": 6.0528974533081055, "learning_rate": 0.0004985446215262927, "loss": 2.1332, "step": 1031 }, { "epoch": 0.050390625, "grad_norm": 2.4308509826660156, "learning_rate": 0.0004985406412320337, "loss": 2.123, "step": 1032 }, { "epoch": 0.050439453125, "grad_norm": 0.5490549206733704, "learning_rate": 0.0004985366555200876, "loss": 2.0555, "step": 1033 }, { "epoch": 0.05048828125, "grad_norm": 4.7198309898376465, "learning_rate": 0.0004985326643905511, "loss": 2.1942, "step": 1034 }, { "epoch": 0.050537109375, "grad_norm": 1.722399353981018, "learning_rate": 0.000498528667843521, "loss": 2.1873, "step": 1035 }, { "epoch": 0.0505859375, "grad_norm": 0.6900448799133301, "learning_rate": 0.0004985246658790939, "loss": 2.0515, "step": 1036 }, { "epoch": 0.050634765625, "grad_norm": 1.15213143825531, "learning_rate": 0.000498520658497367, "loss": 2.0675, "step": 1037 }, { "epoch": 0.05068359375, "grad_norm": 0.6649054884910583, "learning_rate": 0.0004985166456984375, "loss": 2.1057, "step": 1038 }, { "epoch": 0.050732421875, "grad_norm": 0.5801951289176941, "learning_rate": 0.0004985126274824023, "loss": 2.043, "step": 1039 }, { "epoch": 0.05078125, "grad_norm": 0.6153457760810852, "learning_rate": 0.0004985086038493591, "loss": 2.0934, "step": 1040 }, { "epoch": 0.050830078125, "grad_norm": 0.5720345973968506, "learning_rate": 0.0004985045747994053, "loss": 2.0669, "step": 1041 }, { "epoch": 0.05087890625, "grad_norm": 0.402263343334198, "learning_rate": 0.0004985005403326387, "loss": 2.0868, "step": 1042 }, { "epoch": 0.050927734375, "grad_norm": 0.4789344370365143, "learning_rate": 0.0004984965004491568, "loss": 2.0583, "step": 1043 }, { "epoch": 0.0509765625, "grad_norm": 0.38720592856407166, "learning_rate": 0.0004984924551490577, "loss": 2.0494, "step": 1044 }, { "epoch": 0.051025390625, "grad_norm": 0.4060901701450348, "learning_rate": 0.0004984884044324394, "loss": 2.038, "step": 1045 }, { "epoch": 0.05107421875, "grad_norm": 0.38420310616493225, "learning_rate": 0.0004984843482994001, "loss": 2.026, "step": 1046 }, { "epoch": 0.051123046875, "grad_norm": 0.3709697425365448, "learning_rate": 0.000498480286750038, "loss": 2.0387, "step": 1047 }, { "epoch": 0.051171875, "grad_norm": 0.37649744749069214, "learning_rate": 0.0004984762197844516, "loss": 2.0179, "step": 1048 }, { "epoch": 0.051220703125, "grad_norm": 0.3255007565021515, "learning_rate": 0.0004984721474027394, "loss": 2.084, "step": 1049 }, { "epoch": 0.05126953125, "grad_norm": 0.36699578166007996, "learning_rate": 0.0004984680696050004, "loss": 2.0871, "step": 1050 }, { "epoch": 0.051318359375, "grad_norm": 0.42492765188217163, "learning_rate": 0.0004984639863913332, "loss": 2.1062, "step": 1051 }, { "epoch": 0.0513671875, "grad_norm": 0.3712759017944336, "learning_rate": 0.0004984598977618366, "loss": 2.0642, "step": 1052 }, { "epoch": 0.051416015625, "grad_norm": 0.3895083963871002, "learning_rate": 0.0004984558037166098, "loss": 2.0679, "step": 1053 }, { "epoch": 0.05146484375, "grad_norm": 0.3520564138889313, "learning_rate": 0.0004984517042557523, "loss": 2.0658, "step": 1054 }, { "epoch": 0.051513671875, "grad_norm": 0.37385088205337524, "learning_rate": 0.000498447599379363, "loss": 2.054, "step": 1055 }, { "epoch": 0.0515625, "grad_norm": 0.363284707069397, "learning_rate": 0.0004984434890875417, "loss": 2.043, "step": 1056 }, { "epoch": 0.051611328125, "grad_norm": 0.3289850354194641, "learning_rate": 0.0004984393733803881, "loss": 2.0763, "step": 1057 }, { "epoch": 0.05166015625, "grad_norm": 0.43398329615592957, "learning_rate": 0.0004984352522580015, "loss": 2.1052, "step": 1058 }, { "epoch": 0.051708984375, "grad_norm": 0.3892976641654968, "learning_rate": 0.0004984311257204822, "loss": 2.0687, "step": 1059 }, { "epoch": 0.0517578125, "grad_norm": 0.43331557512283325, "learning_rate": 0.0004984269937679301, "loss": 2.043, "step": 1060 }, { "epoch": 0.051806640625, "grad_norm": 0.5257551074028015, "learning_rate": 0.0004984228564004452, "loss": 2.0635, "step": 1061 }, { "epoch": 0.05185546875, "grad_norm": 0.47911423444747925, "learning_rate": 0.000498418713618128, "loss": 2.0779, "step": 1062 }, { "epoch": 0.051904296875, "grad_norm": 0.43737250566482544, "learning_rate": 0.0004984145654210786, "loss": 2.0293, "step": 1063 }, { "epoch": 0.051953125, "grad_norm": 0.4232953190803528, "learning_rate": 0.0004984104118093979, "loss": 2.0495, "step": 1064 }, { "epoch": 0.052001953125, "grad_norm": 0.37089312076568604, "learning_rate": 0.0004984062527831864, "loss": 2.0658, "step": 1065 }, { "epoch": 0.05205078125, "grad_norm": 0.38316604495048523, "learning_rate": 0.0004984020883425447, "loss": 2.0734, "step": 1066 }, { "epoch": 0.052099609375, "grad_norm": 0.3836406469345093, "learning_rate": 0.000498397918487574, "loss": 2.0473, "step": 1067 }, { "epoch": 0.0521484375, "grad_norm": 0.3884745240211487, "learning_rate": 0.0004983937432183754, "loss": 2.0761, "step": 1068 }, { "epoch": 0.052197265625, "grad_norm": 0.3546217679977417, "learning_rate": 0.0004983895625350497, "loss": 2.049, "step": 1069 }, { "epoch": 0.05224609375, "grad_norm": 0.3191382586956024, "learning_rate": 0.0004983853764376987, "loss": 2.0125, "step": 1070 }, { "epoch": 0.052294921875, "grad_norm": 0.3864653408527374, "learning_rate": 0.0004983811849264236, "loss": 2.0518, "step": 1071 }, { "epoch": 0.05234375, "grad_norm": 0.43109649419784546, "learning_rate": 0.000498376988001326, "loss": 2.0711, "step": 1072 }, { "epoch": 0.052392578125, "grad_norm": 0.433426171541214, "learning_rate": 0.0004983727856625075, "loss": 2.0838, "step": 1073 }, { "epoch": 0.05244140625, "grad_norm": 0.587848424911499, "learning_rate": 0.0004983685779100702, "loss": 2.0966, "step": 1074 }, { "epoch": 0.052490234375, "grad_norm": 0.6266502737998962, "learning_rate": 0.0004983643647441159, "loss": 2.0718, "step": 1075 }, { "epoch": 0.0525390625, "grad_norm": 0.4438014030456543, "learning_rate": 0.0004983601461647469, "loss": 1.9891, "step": 1076 }, { "epoch": 0.052587890625, "grad_norm": 0.40020644664764404, "learning_rate": 0.0004983559221720652, "loss": 2.0574, "step": 1077 }, { "epoch": 0.05263671875, "grad_norm": 0.468889445066452, "learning_rate": 0.0004983516927661733, "loss": 2.0518, "step": 1078 }, { "epoch": 0.052685546875, "grad_norm": 0.40471652150154114, "learning_rate": 0.0004983474579471738, "loss": 2.0935, "step": 1079 }, { "epoch": 0.052734375, "grad_norm": 0.41535070538520813, "learning_rate": 0.0004983432177151691, "loss": 2.0503, "step": 1080 }, { "epoch": 0.052783203125, "grad_norm": 0.4316154718399048, "learning_rate": 0.000498338972070262, "loss": 2.0542, "step": 1081 }, { "epoch": 0.05283203125, "grad_norm": 0.4210887849330902, "learning_rate": 0.0004983347210125558, "loss": 2.0373, "step": 1082 }, { "epoch": 0.052880859375, "grad_norm": 0.42850038409233093, "learning_rate": 0.000498330464542153, "loss": 2.0384, "step": 1083 }, { "epoch": 0.0529296875, "grad_norm": 0.4439520835876465, "learning_rate": 0.0004983262026591569, "loss": 2.0421, "step": 1084 }, { "epoch": 0.052978515625, "grad_norm": 0.42737266421318054, "learning_rate": 0.0004983219353636709, "loss": 2.0461, "step": 1085 }, { "epoch": 0.05302734375, "grad_norm": 0.4370054304599762, "learning_rate": 0.0004983176626557986, "loss": 2.0367, "step": 1086 }, { "epoch": 0.053076171875, "grad_norm": 0.4206922650337219, "learning_rate": 0.000498313384535643, "loss": 2.0419, "step": 1087 }, { "epoch": 0.053125, "grad_norm": 0.3858455419540405, "learning_rate": 0.0004983091010033083, "loss": 2.0786, "step": 1088 }, { "epoch": 0.053173828125, "grad_norm": 0.4686727523803711, "learning_rate": 0.0004983048120588981, "loss": 2.0849, "step": 1089 }, { "epoch": 0.05322265625, "grad_norm": 0.5003519654273987, "learning_rate": 0.0004983005177025164, "loss": 2.0318, "step": 1090 }, { "epoch": 0.053271484375, "grad_norm": 0.429099977016449, "learning_rate": 0.0004982962179342673, "loss": 2.0889, "step": 1091 }, { "epoch": 0.0533203125, "grad_norm": 0.4113193452358246, "learning_rate": 0.0004982919127542549, "loss": 2.0931, "step": 1092 }, { "epoch": 0.053369140625, "grad_norm": 0.3376920223236084, "learning_rate": 0.0004982876021625836, "loss": 2.0595, "step": 1093 }, { "epoch": 0.05341796875, "grad_norm": 0.37518227100372314, "learning_rate": 0.0004982832861593579, "loss": 2.0521, "step": 1094 }, { "epoch": 0.053466796875, "grad_norm": 0.42907726764678955, "learning_rate": 0.0004982789647446824, "loss": 2.0371, "step": 1095 }, { "epoch": 0.053515625, "grad_norm": 0.4339234232902527, "learning_rate": 0.0004982746379186616, "loss": 2.0586, "step": 1096 }, { "epoch": 0.053564453125, "grad_norm": 0.3818851411342621, "learning_rate": 0.0004982703056814009, "loss": 2.0153, "step": 1097 }, { "epoch": 0.05361328125, "grad_norm": 0.4312569200992584, "learning_rate": 0.0004982659680330047, "loss": 2.0769, "step": 1098 }, { "epoch": 0.053662109375, "grad_norm": 0.4227607846260071, "learning_rate": 0.0004982616249735785, "loss": 2.0473, "step": 1099 }, { "epoch": 0.0537109375, "grad_norm": 0.4294086992740631, "learning_rate": 0.0004982572765032275, "loss": 2.022, "step": 1100 }, { "epoch": 0.053759765625, "grad_norm": 0.4270496070384979, "learning_rate": 0.000498252922622057, "loss": 2.0804, "step": 1101 }, { "epoch": 0.05380859375, "grad_norm": 0.42451292276382446, "learning_rate": 0.0004982485633301725, "loss": 2.0652, "step": 1102 }, { "epoch": 0.053857421875, "grad_norm": 0.4403752088546753, "learning_rate": 0.0004982441986276797, "loss": 2.0465, "step": 1103 }, { "epoch": 0.05390625, "grad_norm": 0.3987381160259247, "learning_rate": 0.0004982398285146846, "loss": 2.0685, "step": 1104 }, { "epoch": 0.053955078125, "grad_norm": 0.388124942779541, "learning_rate": 0.0004982354529912928, "loss": 2.0089, "step": 1105 }, { "epoch": 0.05400390625, "grad_norm": 0.42558392882347107, "learning_rate": 0.0004982310720576103, "loss": 2.0157, "step": 1106 }, { "epoch": 0.054052734375, "grad_norm": 0.5122731328010559, "learning_rate": 0.0004982266857137436, "loss": 2.0542, "step": 1107 }, { "epoch": 0.0541015625, "grad_norm": 0.5401005744934082, "learning_rate": 0.0004982222939597989, "loss": 2.0542, "step": 1108 }, { "epoch": 0.054150390625, "grad_norm": 0.41256794333457947, "learning_rate": 0.0004982178967958824, "loss": 2.1006, "step": 1109 }, { "epoch": 0.05419921875, "grad_norm": 0.4866516590118408, "learning_rate": 0.0004982134942221009, "loss": 2.0762, "step": 1110 }, { "epoch": 0.054248046875, "grad_norm": 0.5379114151000977, "learning_rate": 0.000498209086238561, "loss": 2.0991, "step": 1111 }, { "epoch": 0.054296875, "grad_norm": 0.4129810333251953, "learning_rate": 0.0004982046728453696, "loss": 2.0228, "step": 1112 }, { "epoch": 0.054345703125, "grad_norm": 0.4045696258544922, "learning_rate": 0.0004982002540426337, "loss": 2.0448, "step": 1113 }, { "epoch": 0.05439453125, "grad_norm": 0.47350195050239563, "learning_rate": 0.0004981958298304602, "loss": 2.0664, "step": 1114 }, { "epoch": 0.054443359375, "grad_norm": 0.4033444821834564, "learning_rate": 0.0004981914002089565, "loss": 2.0896, "step": 1115 }, { "epoch": 0.0544921875, "grad_norm": 0.38660287857055664, "learning_rate": 0.0004981869651782299, "loss": 2.0284, "step": 1116 }, { "epoch": 0.054541015625, "grad_norm": 0.42929232120513916, "learning_rate": 0.000498182524738388, "loss": 1.9815, "step": 1117 }, { "epoch": 0.05458984375, "grad_norm": 0.4384561777114868, "learning_rate": 0.0004981780788895382, "loss": 2.0743, "step": 1118 }, { "epoch": 0.054638671875, "grad_norm": 0.35544440150260925, "learning_rate": 0.0004981736276317883, "loss": 2.0514, "step": 1119 }, { "epoch": 0.0546875, "grad_norm": 0.4638873338699341, "learning_rate": 0.0004981691709652464, "loss": 2.0507, "step": 1120 }, { "epoch": 0.054736328125, "grad_norm": 0.447749525308609, "learning_rate": 0.0004981647088900202, "loss": 2.0528, "step": 1121 }, { "epoch": 0.05478515625, "grad_norm": 0.36032482981681824, "learning_rate": 0.0004981602414062181, "loss": 2.0412, "step": 1122 }, { "epoch": 0.054833984375, "grad_norm": 0.3697761595249176, "learning_rate": 0.0004981557685139481, "loss": 2.0018, "step": 1123 }, { "epoch": 0.0548828125, "grad_norm": 0.38965800404548645, "learning_rate": 0.000498151290213319, "loss": 2.0132, "step": 1124 }, { "epoch": 0.054931640625, "grad_norm": 0.4015759527683258, "learning_rate": 0.000498146806504439, "loss": 2.0079, "step": 1125 }, { "epoch": 0.05498046875, "grad_norm": 0.4142986536026001, "learning_rate": 0.0004981423173874169, "loss": 2.0429, "step": 1126 }, { "epoch": 0.055029296875, "grad_norm": 0.3855539858341217, "learning_rate": 0.0004981378228623614, "loss": 2.0682, "step": 1127 }, { "epoch": 0.055078125, "grad_norm": 0.41385430097579956, "learning_rate": 0.0004981333229293816, "loss": 2.0669, "step": 1128 }, { "epoch": 0.055126953125, "grad_norm": 0.4155343770980835, "learning_rate": 0.0004981288175885863, "loss": 2.0402, "step": 1129 }, { "epoch": 0.05517578125, "grad_norm": 0.5190935730934143, "learning_rate": 0.000498124306840085, "loss": 2.0579, "step": 1130 }, { "epoch": 0.055224609375, "grad_norm": 0.5124707818031311, "learning_rate": 0.0004981197906839868, "loss": 2.0014, "step": 1131 }, { "epoch": 0.0552734375, "grad_norm": 0.4449197053909302, "learning_rate": 0.0004981152691204011, "loss": 2.0317, "step": 1132 }, { "epoch": 0.055322265625, "grad_norm": 0.44166260957717896, "learning_rate": 0.0004981107421494378, "loss": 2.0448, "step": 1133 }, { "epoch": 0.05537109375, "grad_norm": 0.4235713481903076, "learning_rate": 0.0004981062097712064, "loss": 2.0603, "step": 1134 }, { "epoch": 0.055419921875, "grad_norm": 0.3709736764431, "learning_rate": 0.0004981016719858166, "loss": 2.0304, "step": 1135 }, { "epoch": 0.05546875, "grad_norm": 0.3568653464317322, "learning_rate": 0.0004980971287933787, "loss": 2.0488, "step": 1136 }, { "epoch": 0.055517578125, "grad_norm": 0.46187272667884827, "learning_rate": 0.0004980925801940027, "loss": 2.0019, "step": 1137 }, { "epoch": 0.05556640625, "grad_norm": 0.42545637488365173, "learning_rate": 0.0004980880261877986, "loss": 2.0471, "step": 1138 }, { "epoch": 0.055615234375, "grad_norm": 0.48914486169815063, "learning_rate": 0.000498083466774877, "loss": 2.0591, "step": 1139 }, { "epoch": 0.0556640625, "grad_norm": 0.5634473562240601, "learning_rate": 0.0004980789019553484, "loss": 2.0685, "step": 1140 }, { "epoch": 0.055712890625, "grad_norm": 0.47961190342903137, "learning_rate": 0.0004980743317293234, "loss": 2.0873, "step": 1141 }, { "epoch": 0.05576171875, "grad_norm": 0.4319491386413574, "learning_rate": 0.0004980697560969127, "loss": 2.0725, "step": 1142 }, { "epoch": 0.055810546875, "grad_norm": 0.4020498991012573, "learning_rate": 0.0004980651750582274, "loss": 2.0779, "step": 1143 }, { "epoch": 0.055859375, "grad_norm": 0.40014195442199707, "learning_rate": 0.0004980605886133782, "loss": 2.0428, "step": 1144 }, { "epoch": 0.055908203125, "grad_norm": 0.41012799739837646, "learning_rate": 0.0004980559967624766, "loss": 2.0498, "step": 1145 }, { "epoch": 0.05595703125, "grad_norm": 0.3664909899234772, "learning_rate": 0.0004980513995056334, "loss": 2.0401, "step": 1146 }, { "epoch": 0.056005859375, "grad_norm": 0.49614226818084717, "learning_rate": 0.0004980467968429607, "loss": 2.01, "step": 1147 }, { "epoch": 0.0560546875, "grad_norm": 0.40055420994758606, "learning_rate": 0.0004980421887745695, "loss": 2.0146, "step": 1148 }, { "epoch": 0.056103515625, "grad_norm": 0.4262978434562683, "learning_rate": 0.0004980375753005717, "loss": 2.0466, "step": 1149 }, { "epoch": 0.05615234375, "grad_norm": 0.5122435688972473, "learning_rate": 0.0004980329564210792, "loss": 2.0301, "step": 1150 }, { "epoch": 0.056201171875, "grad_norm": 0.45526614785194397, "learning_rate": 0.0004980283321362036, "loss": 2.1092, "step": 1151 }, { "epoch": 0.05625, "grad_norm": 0.38121283054351807, "learning_rate": 0.0004980237024460573, "loss": 2.0293, "step": 1152 }, { "epoch": 0.056298828125, "grad_norm": 0.38212618231773376, "learning_rate": 0.0004980190673507524, "loss": 2.0482, "step": 1153 }, { "epoch": 0.05634765625, "grad_norm": 0.3898129463195801, "learning_rate": 0.0004980144268504012, "loss": 2.0464, "step": 1154 }, { "epoch": 0.056396484375, "grad_norm": 0.40949803590774536, "learning_rate": 0.0004980097809451162, "loss": 2.039, "step": 1155 }, { "epoch": 0.0564453125, "grad_norm": 0.41765204071998596, "learning_rate": 0.00049800512963501, "loss": 2.0255, "step": 1156 }, { "epoch": 0.056494140625, "grad_norm": 0.4563169777393341, "learning_rate": 0.0004980004729201954, "loss": 1.994, "step": 1157 }, { "epoch": 0.05654296875, "grad_norm": 0.43240395188331604, "learning_rate": 0.0004979958108007851, "loss": 2.0518, "step": 1158 }, { "epoch": 0.056591796875, "grad_norm": 0.4024682939052582, "learning_rate": 0.0004979911432768921, "loss": 2.0569, "step": 1159 }, { "epoch": 0.056640625, "grad_norm": 0.42657312750816345, "learning_rate": 0.0004979864703486297, "loss": 2.0082, "step": 1160 }, { "epoch": 0.056689453125, "grad_norm": 0.4169178605079651, "learning_rate": 0.000497981792016111, "loss": 2.034, "step": 1161 }, { "epoch": 0.05673828125, "grad_norm": 0.5156140327453613, "learning_rate": 0.0004979771082794495, "loss": 2.0819, "step": 1162 }, { "epoch": 0.056787109375, "grad_norm": 0.5222786068916321, "learning_rate": 0.0004979724191387586, "loss": 2.0204, "step": 1163 }, { "epoch": 0.0568359375, "grad_norm": 0.43308308720588684, "learning_rate": 0.0004979677245941519, "loss": 2.073, "step": 1164 }, { "epoch": 0.056884765625, "grad_norm": 0.4701938033103943, "learning_rate": 0.0004979630246457434, "loss": 2.0262, "step": 1165 }, { "epoch": 0.05693359375, "grad_norm": 0.39455467462539673, "learning_rate": 0.0004979583192936468, "loss": 1.9797, "step": 1166 }, { "epoch": 0.056982421875, "grad_norm": 0.3551017940044403, "learning_rate": 0.0004979536085379762, "loss": 2.0917, "step": 1167 }, { "epoch": 0.05703125, "grad_norm": 0.37872380018234253, "learning_rate": 0.0004979488923788459, "loss": 1.9902, "step": 1168 }, { "epoch": 0.057080078125, "grad_norm": 0.42795875668525696, "learning_rate": 0.0004979441708163699, "loss": 2.0125, "step": 1169 }, { "epoch": 0.05712890625, "grad_norm": 0.38856005668640137, "learning_rate": 0.0004979394438506629, "loss": 2.0561, "step": 1170 }, { "epoch": 0.057177734375, "grad_norm": 0.39386820793151855, "learning_rate": 0.0004979347114818394, "loss": 2.0616, "step": 1171 }, { "epoch": 0.0572265625, "grad_norm": 0.40400490164756775, "learning_rate": 0.000497929973710014, "loss": 2.0335, "step": 1172 }, { "epoch": 0.057275390625, "grad_norm": 0.4279368817806244, "learning_rate": 0.0004979252305353017, "loss": 2.0627, "step": 1173 }, { "epoch": 0.05732421875, "grad_norm": 0.43895620107650757, "learning_rate": 0.0004979204819578172, "loss": 2.0543, "step": 1174 }, { "epoch": 0.057373046875, "grad_norm": 0.48671093583106995, "learning_rate": 0.0004979157279776758, "loss": 2.0923, "step": 1175 }, { "epoch": 0.057421875, "grad_norm": 0.4875694215297699, "learning_rate": 0.0004979109685949926, "loss": 2.041, "step": 1176 }, { "epoch": 0.057470703125, "grad_norm": 0.6373558640480042, "learning_rate": 0.000497906203809883, "loss": 2.035, "step": 1177 }, { "epoch": 0.05751953125, "grad_norm": 0.6253651976585388, "learning_rate": 0.0004979014336224625, "loss": 2.0286, "step": 1178 }, { "epoch": 0.057568359375, "grad_norm": 0.37548673152923584, "learning_rate": 0.0004978966580328466, "loss": 2.0876, "step": 1179 }, { "epoch": 0.0576171875, "grad_norm": 0.5218214392662048, "learning_rate": 0.0004978918770411513, "loss": 2.0421, "step": 1180 }, { "epoch": 0.057666015625, "grad_norm": 0.40862542390823364, "learning_rate": 0.0004978870906474921, "loss": 2.048, "step": 1181 }, { "epoch": 0.05771484375, "grad_norm": 0.43247613310813904, "learning_rate": 0.0004978822988519853, "loss": 2.0381, "step": 1182 }, { "epoch": 0.057763671875, "grad_norm": 0.5087521076202393, "learning_rate": 0.0004978775016547468, "loss": 2.0554, "step": 1183 }, { "epoch": 0.0578125, "grad_norm": 0.43084025382995605, "learning_rate": 0.0004978726990558931, "loss": 2.0606, "step": 1184 }, { "epoch": 0.057861328125, "grad_norm": 0.4079838693141937, "learning_rate": 0.0004978678910555405, "loss": 1.9912, "step": 1185 }, { "epoch": 0.05791015625, "grad_norm": 0.38082417845726013, "learning_rate": 0.0004978630776538056, "loss": 2.0506, "step": 1186 }, { "epoch": 0.057958984375, "grad_norm": 0.3926604092121124, "learning_rate": 0.0004978582588508047, "loss": 2.0332, "step": 1187 }, { "epoch": 0.0580078125, "grad_norm": 0.4389594495296478, "learning_rate": 0.000497853434646655, "loss": 2.0686, "step": 1188 }, { "epoch": 0.058056640625, "grad_norm": 0.41032347083091736, "learning_rate": 0.0004978486050414734, "loss": 2.0573, "step": 1189 }, { "epoch": 0.05810546875, "grad_norm": 0.31083154678344727, "learning_rate": 0.0004978437700353766, "loss": 2.0758, "step": 1190 }, { "epoch": 0.058154296875, "grad_norm": 0.347037136554718, "learning_rate": 0.0004978389296284821, "loss": 1.9886, "step": 1191 }, { "epoch": 0.058203125, "grad_norm": 0.4342493712902069, "learning_rate": 0.0004978340838209071, "loss": 2.0091, "step": 1192 }, { "epoch": 0.058251953125, "grad_norm": 0.35909199714660645, "learning_rate": 0.000497829232612769, "loss": 2.0413, "step": 1193 }, { "epoch": 0.05830078125, "grad_norm": 0.36676517128944397, "learning_rate": 0.0004978243760041855, "loss": 2.0403, "step": 1194 }, { "epoch": 0.058349609375, "grad_norm": 0.3959878385066986, "learning_rate": 0.0004978195139952742, "loss": 2.0144, "step": 1195 }, { "epoch": 0.0583984375, "grad_norm": 0.42410752177238464, "learning_rate": 0.0004978146465861531, "loss": 1.9885, "step": 1196 }, { "epoch": 0.058447265625, "grad_norm": 0.40046414732933044, "learning_rate": 0.0004978097737769399, "loss": 2.0348, "step": 1197 }, { "epoch": 0.05849609375, "grad_norm": 0.34749898314476013, "learning_rate": 0.0004978048955677529, "loss": 2.0, "step": 1198 }, { "epoch": 0.058544921875, "grad_norm": 0.40454593300819397, "learning_rate": 0.0004978000119587101, "loss": 2.0473, "step": 1199 }, { "epoch": 0.05859375, "grad_norm": 0.39215943217277527, "learning_rate": 0.0004977951229499302, "loss": 2.0347, "step": 1200 }, { "epoch": 0.058642578125, "grad_norm": 0.33684712648391724, "learning_rate": 0.0004977902285415314, "loss": 2.0662, "step": 1201 }, { "epoch": 0.05869140625, "grad_norm": 0.4254910349845886, "learning_rate": 0.0004977853287336325, "loss": 2.0162, "step": 1202 }, { "epoch": 0.058740234375, "grad_norm": 0.4309912323951721, "learning_rate": 0.0004977804235263521, "loss": 2.0549, "step": 1203 }, { "epoch": 0.0587890625, "grad_norm": 0.4236809313297272, "learning_rate": 0.0004977755129198092, "loss": 2.0714, "step": 1204 }, { "epoch": 0.058837890625, "grad_norm": 0.4495530426502228, "learning_rate": 0.0004977705969141228, "loss": 2.0352, "step": 1205 }, { "epoch": 0.05888671875, "grad_norm": 0.4905261993408203, "learning_rate": 0.0004977656755094119, "loss": 2.0162, "step": 1206 }, { "epoch": 0.058935546875, "grad_norm": 0.44943422079086304, "learning_rate": 0.0004977607487057959, "loss": 2.0889, "step": 1207 }, { "epoch": 0.058984375, "grad_norm": 0.4088650047779083, "learning_rate": 0.0004977558165033942, "loss": 2.0716, "step": 1208 }, { "epoch": 0.059033203125, "grad_norm": 0.4961899518966675, "learning_rate": 0.0004977508789023264, "loss": 2.0052, "step": 1209 }, { "epoch": 0.05908203125, "grad_norm": 0.4556625187397003, "learning_rate": 0.0004977459359027121, "loss": 2.0748, "step": 1210 }, { "epoch": 0.059130859375, "grad_norm": 0.4538189768791199, "learning_rate": 0.000497740987504671, "loss": 2.07, "step": 1211 }, { "epoch": 0.0591796875, "grad_norm": 0.6183142066001892, "learning_rate": 0.0004977360337083232, "loss": 2.0255, "step": 1212 }, { "epoch": 0.059228515625, "grad_norm": 0.39629796147346497, "learning_rate": 0.0004977310745137886, "loss": 2.0395, "step": 1213 }, { "epoch": 0.05927734375, "grad_norm": 0.36019790172576904, "learning_rate": 0.0004977261099211876, "loss": 2.0727, "step": 1214 }, { "epoch": 0.059326171875, "grad_norm": 0.4096202850341797, "learning_rate": 0.0004977211399306402, "loss": 2.0093, "step": 1215 }, { "epoch": 0.059375, "grad_norm": 0.4189750850200653, "learning_rate": 0.0004977161645422672, "loss": 2.0038, "step": 1216 }, { "epoch": 0.059423828125, "grad_norm": 0.5519155263900757, "learning_rate": 0.0004977111837561889, "loss": 2.0143, "step": 1217 }, { "epoch": 0.05947265625, "grad_norm": 0.5508014559745789, "learning_rate": 0.0004977061975725264, "loss": 2.0201, "step": 1218 }, { "epoch": 0.059521484375, "grad_norm": 0.4522704780101776, "learning_rate": 0.0004977012059914, "loss": 2.0498, "step": 1219 }, { "epoch": 0.0595703125, "grad_norm": 0.437378853559494, "learning_rate": 0.000497696209012931, "loss": 2.0096, "step": 1220 }, { "epoch": 0.059619140625, "grad_norm": 0.3643661141395569, "learning_rate": 0.0004976912066372405, "loss": 2.085, "step": 1221 }, { "epoch": 0.05966796875, "grad_norm": 0.474759966135025, "learning_rate": 0.0004976861988644498, "loss": 2.0661, "step": 1222 }, { "epoch": 0.059716796875, "grad_norm": 0.4567228853702545, "learning_rate": 0.00049768118569468, "loss": 2.0044, "step": 1223 }, { "epoch": 0.059765625, "grad_norm": 0.3239445984363556, "learning_rate": 0.0004976761671280529, "loss": 2.0432, "step": 1224 }, { "epoch": 0.059814453125, "grad_norm": 0.43669581413269043, "learning_rate": 0.0004976711431646898, "loss": 2.0046, "step": 1225 }, { "epoch": 0.05986328125, "grad_norm": 0.31738758087158203, "learning_rate": 0.0004976661138047128, "loss": 2.0538, "step": 1226 }, { "epoch": 0.059912109375, "grad_norm": 0.48839274048805237, "learning_rate": 0.0004976610790482437, "loss": 2.0923, "step": 1227 }, { "epoch": 0.0599609375, "grad_norm": 0.4523884654045105, "learning_rate": 0.0004976560388954044, "loss": 1.9943, "step": 1228 }, { "epoch": 0.060009765625, "grad_norm": 0.45376238226890564, "learning_rate": 0.0004976509933463171, "loss": 2.0588, "step": 1229 }, { "epoch": 0.06005859375, "grad_norm": 0.45356205105781555, "learning_rate": 0.0004976459424011041, "loss": 1.9888, "step": 1230 }, { "epoch": 0.060107421875, "grad_norm": 0.394033282995224, "learning_rate": 0.0004976408860598878, "loss": 1.9876, "step": 1231 }, { "epoch": 0.06015625, "grad_norm": 0.4284716844558716, "learning_rate": 0.0004976358243227908, "loss": 2.035, "step": 1232 }, { "epoch": 0.060205078125, "grad_norm": 0.39686745405197144, "learning_rate": 0.0004976307571899357, "loss": 2.0069, "step": 1233 }, { "epoch": 0.06025390625, "grad_norm": 0.41421031951904297, "learning_rate": 0.0004976256846614454, "loss": 2.0233, "step": 1234 }, { "epoch": 0.060302734375, "grad_norm": 0.39933305978775024, "learning_rate": 0.0004976206067374427, "loss": 1.9977, "step": 1235 }, { "epoch": 0.0603515625, "grad_norm": 0.38699305057525635, "learning_rate": 0.0004976155234180507, "loss": 2.0371, "step": 1236 }, { "epoch": 0.060400390625, "grad_norm": 0.44300130009651184, "learning_rate": 0.0004976104347033929, "loss": 2.0255, "step": 1237 }, { "epoch": 0.06044921875, "grad_norm": 0.4369603097438812, "learning_rate": 0.0004976053405935921, "loss": 2.0235, "step": 1238 }, { "epoch": 0.060498046875, "grad_norm": 0.4388306438922882, "learning_rate": 0.0004976002410887722, "loss": 2.0657, "step": 1239 }, { "epoch": 0.060546875, "grad_norm": 0.3930123746395111, "learning_rate": 0.0004975951361890565, "loss": 2.0656, "step": 1240 }, { "epoch": 0.060595703125, "grad_norm": 0.39000052213668823, "learning_rate": 0.0004975900258945689, "loss": 2.0751, "step": 1241 }, { "epoch": 0.06064453125, "grad_norm": 0.4229252338409424, "learning_rate": 0.0004975849102054332, "loss": 2.0029, "step": 1242 }, { "epoch": 0.060693359375, "grad_norm": 0.3526880145072937, "learning_rate": 0.0004975797891217733, "loss": 2.0515, "step": 1243 }, { "epoch": 0.0607421875, "grad_norm": 0.505046010017395, "learning_rate": 0.0004975746626437135, "loss": 1.9811, "step": 1244 }, { "epoch": 0.060791015625, "grad_norm": 0.5188003182411194, "learning_rate": 0.0004975695307713778, "loss": 1.9845, "step": 1245 }, { "epoch": 0.06083984375, "grad_norm": 0.5722090601921082, "learning_rate": 0.0004975643935048908, "loss": 2.0836, "step": 1246 }, { "epoch": 0.060888671875, "grad_norm": 0.5077537298202515, "learning_rate": 0.000497559250844377, "loss": 1.9993, "step": 1247 }, { "epoch": 0.0609375, "grad_norm": 0.6063058972358704, "learning_rate": 0.0004975541027899609, "loss": 2.0559, "step": 1248 }, { "epoch": 0.060986328125, "grad_norm": 0.5317792296409607, "learning_rate": 0.0004975489493417673, "loss": 1.9898, "step": 1249 }, { "epoch": 0.06103515625, "grad_norm": 0.44917863607406616, "learning_rate": 0.0004975437904999211, "loss": 2.0243, "step": 1250 }, { "epoch": 0.061083984375, "grad_norm": 0.4764515161514282, "learning_rate": 0.0004975386262645472, "loss": 1.993, "step": 1251 }, { "epoch": 0.0611328125, "grad_norm": 0.5017029047012329, "learning_rate": 0.0004975334566357712, "loss": 2.005, "step": 1252 }, { "epoch": 0.061181640625, "grad_norm": 0.3785611093044281, "learning_rate": 0.0004975282816137179, "loss": 2.0393, "step": 1253 }, { "epoch": 0.06123046875, "grad_norm": 0.32505279779434204, "learning_rate": 0.000497523101198513, "loss": 2.0348, "step": 1254 }, { "epoch": 0.061279296875, "grad_norm": 0.33986350893974304, "learning_rate": 0.000497517915390282, "loss": 2.0207, "step": 1255 }, { "epoch": 0.061328125, "grad_norm": 0.3673478066921234, "learning_rate": 0.0004975127241891505, "loss": 2.039, "step": 1256 }, { "epoch": 0.061376953125, "grad_norm": 0.38864508271217346, "learning_rate": 0.0004975075275952444, "loss": 2.0383, "step": 1257 }, { "epoch": 0.06142578125, "grad_norm": 0.35606950521469116, "learning_rate": 0.0004975023256086896, "loss": 2.0111, "step": 1258 }, { "epoch": 0.061474609375, "grad_norm": 0.4413761794567108, "learning_rate": 0.000497497118229612, "loss": 2.021, "step": 1259 }, { "epoch": 0.0615234375, "grad_norm": 0.5080715417861938, "learning_rate": 0.0004974919054581382, "loss": 2.0258, "step": 1260 }, { "epoch": 0.061572265625, "grad_norm": 0.5152518153190613, "learning_rate": 0.0004974866872943944, "loss": 1.9751, "step": 1261 }, { "epoch": 0.06162109375, "grad_norm": 0.39252498745918274, "learning_rate": 0.0004974814637385067, "loss": 2.0561, "step": 1262 }, { "epoch": 0.061669921875, "grad_norm": 0.37057846784591675, "learning_rate": 0.0004974762347906023, "loss": 1.9804, "step": 1263 }, { "epoch": 0.06171875, "grad_norm": 0.4557764530181885, "learning_rate": 0.0004974710004508073, "loss": 2.0115, "step": 1264 }, { "epoch": 0.061767578125, "grad_norm": 0.3712522089481354, "learning_rate": 0.0004974657607192491, "loss": 2.0277, "step": 1265 }, { "epoch": 0.06181640625, "grad_norm": 0.39153942465782166, "learning_rate": 0.0004974605155960545, "loss": 1.9668, "step": 1266 }, { "epoch": 0.061865234375, "grad_norm": 0.4350894093513489, "learning_rate": 0.0004974552650813504, "loss": 2.0555, "step": 1267 }, { "epoch": 0.0619140625, "grad_norm": 0.3741399943828583, "learning_rate": 0.0004974500091752643, "loss": 2.0152, "step": 1268 }, { "epoch": 0.061962890625, "grad_norm": 0.3703256845474243, "learning_rate": 0.0004974447478779234, "loss": 2.0399, "step": 1269 }, { "epoch": 0.06201171875, "grad_norm": 0.39156264066696167, "learning_rate": 0.0004974394811894555, "loss": 2.0185, "step": 1270 }, { "epoch": 0.062060546875, "grad_norm": 0.3650452494621277, "learning_rate": 0.000497434209109988, "loss": 2.0533, "step": 1271 }, { "epoch": 0.062109375, "grad_norm": 0.35050010681152344, "learning_rate": 0.0004974289316396487, "loss": 2.015, "step": 1272 }, { "epoch": 0.062158203125, "grad_norm": 0.4504726231098175, "learning_rate": 0.0004974236487785657, "loss": 1.9962, "step": 1273 }, { "epoch": 0.06220703125, "grad_norm": 0.4753045439720154, "learning_rate": 0.0004974183605268667, "loss": 2.0312, "step": 1274 }, { "epoch": 0.062255859375, "grad_norm": 0.5019488334655762, "learning_rate": 0.0004974130668846801, "loss": 2.0059, "step": 1275 }, { "epoch": 0.0623046875, "grad_norm": 0.5188403129577637, "learning_rate": 0.0004974077678521343, "loss": 2.0045, "step": 1276 }, { "epoch": 0.062353515625, "grad_norm": 0.4368346035480499, "learning_rate": 0.0004974024634293574, "loss": 2.0299, "step": 1277 }, { "epoch": 0.06240234375, "grad_norm": 0.41943198442459106, "learning_rate": 0.000497397153616478, "loss": 2.0361, "step": 1278 }, { "epoch": 0.062451171875, "grad_norm": 0.4363477826118469, "learning_rate": 0.0004973918384136251, "loss": 2.0474, "step": 1279 }, { "epoch": 0.0625, "grad_norm": 0.4157138168811798, "learning_rate": 0.0004973865178209274, "loss": 2.0261, "step": 1280 }, { "epoch": 0.062548828125, "grad_norm": 0.3760819435119629, "learning_rate": 0.0004973811918385136, "loss": 2.0178, "step": 1281 }, { "epoch": 0.06259765625, "grad_norm": 0.3963093161582947, "learning_rate": 0.000497375860466513, "loss": 2.0035, "step": 1282 }, { "epoch": 0.062646484375, "grad_norm": 0.3574170470237732, "learning_rate": 0.0004973705237050548, "loss": 1.9976, "step": 1283 }, { "epoch": 0.0626953125, "grad_norm": 0.35461530089378357, "learning_rate": 0.0004973651815542682, "loss": 2.0481, "step": 1284 }, { "epoch": 0.062744140625, "grad_norm": 0.3896317183971405, "learning_rate": 0.0004973598340142829, "loss": 2.033, "step": 1285 }, { "epoch": 0.06279296875, "grad_norm": 0.3361157774925232, "learning_rate": 0.0004973544810852284, "loss": 2.0975, "step": 1286 }, { "epoch": 0.062841796875, "grad_norm": 0.35614997148513794, "learning_rate": 0.0004973491227672343, "loss": 2.0271, "step": 1287 }, { "epoch": 0.062890625, "grad_norm": 0.3873710334300995, "learning_rate": 0.0004973437590604307, "loss": 2.0386, "step": 1288 }, { "epoch": 0.062939453125, "grad_norm": 0.5329270958900452, "learning_rate": 0.0004973383899649474, "loss": 2.0439, "step": 1289 }, { "epoch": 0.06298828125, "grad_norm": 0.44913262128829956, "learning_rate": 0.0004973330154809146, "loss": 2.0102, "step": 1290 }, { "epoch": 0.063037109375, "grad_norm": 0.4804060161113739, "learning_rate": 0.0004973276356084626, "loss": 2.0343, "step": 1291 }, { "epoch": 0.0630859375, "grad_norm": 0.5259008407592773, "learning_rate": 0.0004973222503477216, "loss": 2.0097, "step": 1292 }, { "epoch": 0.063134765625, "grad_norm": 0.5071630477905273, "learning_rate": 0.0004973168596988224, "loss": 2.0278, "step": 1293 }, { "epoch": 0.06318359375, "grad_norm": 0.4437233507633209, "learning_rate": 0.0004973114636618954, "loss": 2.0452, "step": 1294 }, { "epoch": 0.063232421875, "grad_norm": 0.41113555431365967, "learning_rate": 0.0004973060622370715, "loss": 2.0651, "step": 1295 }, { "epoch": 0.06328125, "grad_norm": 0.6672564148902893, "learning_rate": 0.0004973006554244816, "loss": 2.0487, "step": 1296 }, { "epoch": 0.063330078125, "grad_norm": 0.4779559373855591, "learning_rate": 0.0004972952432242567, "loss": 2.0008, "step": 1297 }, { "epoch": 0.06337890625, "grad_norm": 0.36571747064590454, "learning_rate": 0.000497289825636528, "loss": 2.0995, "step": 1298 }, { "epoch": 0.063427734375, "grad_norm": 0.404430627822876, "learning_rate": 0.0004972844026614268, "loss": 2.0409, "step": 1299 }, { "epoch": 0.0634765625, "grad_norm": 0.4045650064945221, "learning_rate": 0.0004972789742990846, "loss": 2.0358, "step": 1300 }, { "epoch": 0.063525390625, "grad_norm": 0.3495927155017853, "learning_rate": 0.0004972735405496328, "loss": 2.0459, "step": 1301 }, { "epoch": 0.06357421875, "grad_norm": 0.4306910037994385, "learning_rate": 0.0004972681014132031, "loss": 2.0122, "step": 1302 }, { "epoch": 0.063623046875, "grad_norm": 0.43849045038223267, "learning_rate": 0.0004972626568899275, "loss": 2.0327, "step": 1303 }, { "epoch": 0.063671875, "grad_norm": 0.38042503595352173, "learning_rate": 0.0004972572069799378, "loss": 1.9817, "step": 1304 }, { "epoch": 0.063720703125, "grad_norm": 0.41002100706100464, "learning_rate": 0.0004972517516833661, "loss": 2.0273, "step": 1305 }, { "epoch": 0.06376953125, "grad_norm": 0.47751542925834656, "learning_rate": 0.0004972462910003447, "loss": 2.0171, "step": 1306 }, { "epoch": 0.063818359375, "grad_norm": 0.4204331338405609, "learning_rate": 0.0004972408249310059, "loss": 2.0875, "step": 1307 }, { "epoch": 0.0638671875, "grad_norm": 0.4725339710712433, "learning_rate": 0.0004972353534754821, "loss": 2.064, "step": 1308 }, { "epoch": 0.063916015625, "grad_norm": 0.36547428369522095, "learning_rate": 0.0004972298766339061, "loss": 2.0122, "step": 1309 }, { "epoch": 0.06396484375, "grad_norm": 0.38583076000213623, "learning_rate": 0.0004972243944064103, "loss": 2.0294, "step": 1310 }, { "epoch": 0.064013671875, "grad_norm": 0.36292606592178345, "learning_rate": 0.0004972189067931279, "loss": 2.0425, "step": 1311 }, { "epoch": 0.0640625, "grad_norm": 0.3708207309246063, "learning_rate": 0.0004972134137941918, "loss": 2.0514, "step": 1312 }, { "epoch": 0.064111328125, "grad_norm": 0.3818870186805725, "learning_rate": 0.0004972079154097349, "loss": 2.0705, "step": 1313 }, { "epoch": 0.06416015625, "grad_norm": 0.3814767301082611, "learning_rate": 0.0004972024116398908, "loss": 2.0364, "step": 1314 }, { "epoch": 0.064208984375, "grad_norm": 0.38052985072135925, "learning_rate": 0.0004971969024847927, "loss": 2.0273, "step": 1315 }, { "epoch": 0.0642578125, "grad_norm": 0.4025599956512451, "learning_rate": 0.0004971913879445742, "loss": 2.0546, "step": 1316 }, { "epoch": 0.064306640625, "grad_norm": 0.3987191319465637, "learning_rate": 0.0004971858680193689, "loss": 2.0362, "step": 1317 }, { "epoch": 0.06435546875, "grad_norm": 0.33584463596343994, "learning_rate": 0.0004971803427093105, "loss": 2.0066, "step": 1318 }, { "epoch": 0.064404296875, "grad_norm": 0.4236074686050415, "learning_rate": 0.0004971748120145331, "loss": 2.0562, "step": 1319 }, { "epoch": 0.064453125, "grad_norm": 0.6240241527557373, "learning_rate": 0.0004971692759351705, "loss": 2.016, "step": 1320 }, { "epoch": 0.064501953125, "grad_norm": 0.7135176062583923, "learning_rate": 0.0004971637344713571, "loss": 2.029, "step": 1321 }, { "epoch": 0.06455078125, "grad_norm": 0.4842343330383301, "learning_rate": 0.0004971581876232272, "loss": 2.0676, "step": 1322 }, { "epoch": 0.064599609375, "grad_norm": 0.3762984871864319, "learning_rate": 0.0004971526353909151, "loss": 2.0371, "step": 1323 }, { "epoch": 0.0646484375, "grad_norm": 0.47832393646240234, "learning_rate": 0.0004971470777745553, "loss": 2.0483, "step": 1324 }, { "epoch": 0.064697265625, "grad_norm": 0.45573583245277405, "learning_rate": 0.0004971415147742827, "loss": 1.9949, "step": 1325 }, { "epoch": 0.06474609375, "grad_norm": 0.3782083988189697, "learning_rate": 0.0004971359463902319, "loss": 2.0097, "step": 1326 }, { "epoch": 0.064794921875, "grad_norm": 0.35361889004707336, "learning_rate": 0.0004971303726225381, "loss": 2.0116, "step": 1327 }, { "epoch": 0.06484375, "grad_norm": 0.33323055505752563, "learning_rate": 0.0004971247934713362, "loss": 2.0418, "step": 1328 }, { "epoch": 0.064892578125, "grad_norm": 0.374426931142807, "learning_rate": 0.0004971192089367615, "loss": 2.049, "step": 1329 }, { "epoch": 0.06494140625, "grad_norm": 0.3332066535949707, "learning_rate": 0.0004971136190189494, "loss": 2.0543, "step": 1330 }, { "epoch": 0.064990234375, "grad_norm": 0.3435828983783722, "learning_rate": 0.0004971080237180353, "loss": 2.02, "step": 1331 }, { "epoch": 0.0650390625, "grad_norm": 0.37159690260887146, "learning_rate": 0.0004971024230341546, "loss": 2.0019, "step": 1332 }, { "epoch": 0.065087890625, "grad_norm": 0.3335251808166504, "learning_rate": 0.0004970968169674434, "loss": 1.9867, "step": 1333 }, { "epoch": 0.06513671875, "grad_norm": 0.3716065585613251, "learning_rate": 0.0004970912055180373, "loss": 2.0672, "step": 1334 }, { "epoch": 0.065185546875, "grad_norm": 0.4087601900100708, "learning_rate": 0.0004970855886860725, "loss": 2.0385, "step": 1335 }, { "epoch": 0.065234375, "grad_norm": 0.3295016884803772, "learning_rate": 0.000497079966471685, "loss": 2.0131, "step": 1336 }, { "epoch": 0.065283203125, "grad_norm": 0.3713333010673523, "learning_rate": 0.0004970743388750112, "loss": 2.0392, "step": 1337 }, { "epoch": 0.06533203125, "grad_norm": 0.3663235008716583, "learning_rate": 0.0004970687058961873, "loss": 2.036, "step": 1338 }, { "epoch": 0.065380859375, "grad_norm": 0.38723868131637573, "learning_rate": 0.0004970630675353499, "loss": 1.9668, "step": 1339 }, { "epoch": 0.0654296875, "grad_norm": 0.3602182865142822, "learning_rate": 0.0004970574237926356, "loss": 2.0424, "step": 1340 }, { "epoch": 0.065478515625, "grad_norm": 0.34589001536369324, "learning_rate": 0.0004970517746681814, "loss": 2.0353, "step": 1341 }, { "epoch": 0.06552734375, "grad_norm": 0.40101689100265503, "learning_rate": 0.000497046120162124, "loss": 2.0383, "step": 1342 }, { "epoch": 0.065576171875, "grad_norm": 0.5229674577713013, "learning_rate": 0.0004970404602746004, "loss": 2.0692, "step": 1343 }, { "epoch": 0.065625, "grad_norm": 0.4425562918186188, "learning_rate": 0.0004970347950057478, "loss": 2.0461, "step": 1344 }, { "epoch": 0.065673828125, "grad_norm": 0.36149150133132935, "learning_rate": 0.0004970291243557036, "loss": 2.0127, "step": 1345 }, { "epoch": 0.06572265625, "grad_norm": 0.4153628647327423, "learning_rate": 0.0004970234483246053, "loss": 1.9928, "step": 1346 }, { "epoch": 0.065771484375, "grad_norm": 0.4812498688697815, "learning_rate": 0.0004970177669125903, "loss": 2.0795, "step": 1347 }, { "epoch": 0.0658203125, "grad_norm": 0.4718201756477356, "learning_rate": 0.0004970120801197964, "loss": 2.0768, "step": 1348 }, { "epoch": 0.065869140625, "grad_norm": 0.3427468538284302, "learning_rate": 0.0004970063879463614, "loss": 2.0438, "step": 1349 }, { "epoch": 0.06591796875, "grad_norm": 0.38033831119537354, "learning_rate": 0.0004970006903924231, "loss": 2.0368, "step": 1350 }, { "epoch": 0.065966796875, "grad_norm": 0.4401697814464569, "learning_rate": 0.0004969949874581198, "loss": 1.9967, "step": 1351 }, { "epoch": 0.066015625, "grad_norm": 0.36053165793418884, "learning_rate": 0.0004969892791435896, "loss": 2.0336, "step": 1352 }, { "epoch": 0.066064453125, "grad_norm": 0.46503737568855286, "learning_rate": 0.0004969835654489708, "loss": 2.0099, "step": 1353 }, { "epoch": 0.06611328125, "grad_norm": 0.5399351119995117, "learning_rate": 0.0004969778463744021, "loss": 1.9698, "step": 1354 }, { "epoch": 0.066162109375, "grad_norm": 0.4082862436771393, "learning_rate": 0.000496972121920022, "loss": 2.0284, "step": 1355 }, { "epoch": 0.0662109375, "grad_norm": 0.48971787095069885, "learning_rate": 0.000496966392085969, "loss": 2.0391, "step": 1356 }, { "epoch": 0.066259765625, "grad_norm": 0.47161898016929626, "learning_rate": 0.0004969606568723823, "loss": 1.9996, "step": 1357 }, { "epoch": 0.06630859375, "grad_norm": 0.40319007635116577, "learning_rate": 0.0004969549162794007, "loss": 2.0023, "step": 1358 }, { "epoch": 0.066357421875, "grad_norm": 0.5279390811920166, "learning_rate": 0.0004969491703071633, "loss": 2.0465, "step": 1359 }, { "epoch": 0.06640625, "grad_norm": 0.4664466679096222, "learning_rate": 0.0004969434189558096, "loss": 2.017, "step": 1360 }, { "epoch": 0.066455078125, "grad_norm": 0.43247243762016296, "learning_rate": 0.0004969376622254788, "loss": 2.0414, "step": 1361 }, { "epoch": 0.06650390625, "grad_norm": 0.3528018891811371, "learning_rate": 0.0004969319001163104, "loss": 2.013, "step": 1362 }, { "epoch": 0.066552734375, "grad_norm": 0.41825276613235474, "learning_rate": 0.0004969261326284441, "loss": 2.0293, "step": 1363 }, { "epoch": 0.0666015625, "grad_norm": 0.4487457573413849, "learning_rate": 0.0004969203597620197, "loss": 2.0544, "step": 1364 }, { "epoch": 0.066650390625, "grad_norm": 0.3393575847148895, "learning_rate": 0.0004969145815171772, "loss": 2.0564, "step": 1365 }, { "epoch": 0.06669921875, "grad_norm": 0.4088118076324463, "learning_rate": 0.0004969087978940564, "loss": 2.028, "step": 1366 }, { "epoch": 0.066748046875, "grad_norm": 0.371059387922287, "learning_rate": 0.0004969030088927977, "loss": 2.0111, "step": 1367 }, { "epoch": 0.066796875, "grad_norm": 0.29444706439971924, "learning_rate": 0.0004968972145135412, "loss": 2.0197, "step": 1368 }, { "epoch": 0.066845703125, "grad_norm": 0.41875430941581726, "learning_rate": 0.0004968914147564275, "loss": 2.0466, "step": 1369 }, { "epoch": 0.06689453125, "grad_norm": 0.40749117732048035, "learning_rate": 0.0004968856096215971, "loss": 1.9917, "step": 1370 }, { "epoch": 0.066943359375, "grad_norm": 0.3529854416847229, "learning_rate": 0.0004968797991091907, "loss": 2.0517, "step": 1371 }, { "epoch": 0.0669921875, "grad_norm": 0.4950752258300781, "learning_rate": 0.000496873983219349, "loss": 2.0382, "step": 1372 }, { "epoch": 0.067041015625, "grad_norm": 0.42077332735061646, "learning_rate": 0.0004968681619522132, "loss": 2.0053, "step": 1373 }, { "epoch": 0.06708984375, "grad_norm": 0.33670178055763245, "learning_rate": 0.0004968623353079242, "loss": 2.0116, "step": 1374 }, { "epoch": 0.067138671875, "grad_norm": 0.4673776626586914, "learning_rate": 0.0004968565032866233, "loss": 2.0366, "step": 1375 }, { "epoch": 0.0671875, "grad_norm": 0.48355022072792053, "learning_rate": 0.0004968506658884517, "loss": 2.0222, "step": 1376 }, { "epoch": 0.067236328125, "grad_norm": 0.5167339444160461, "learning_rate": 0.000496844823113551, "loss": 2.0155, "step": 1377 }, { "epoch": 0.06728515625, "grad_norm": 0.5243100523948669, "learning_rate": 0.0004968389749620629, "loss": 2.0481, "step": 1378 }, { "epoch": 0.067333984375, "grad_norm": 0.461652547121048, "learning_rate": 0.0004968331214341289, "loss": 2.0555, "step": 1379 }, { "epoch": 0.0673828125, "grad_norm": 0.46401211619377136, "learning_rate": 0.000496827262529891, "loss": 2.0083, "step": 1380 }, { "epoch": 0.067431640625, "grad_norm": 0.40014657378196716, "learning_rate": 0.0004968213982494913, "loss": 2.0496, "step": 1381 }, { "epoch": 0.06748046875, "grad_norm": 0.3577800691127777, "learning_rate": 0.0004968155285930717, "loss": 2.0276, "step": 1382 }, { "epoch": 0.067529296875, "grad_norm": 0.4729588031768799, "learning_rate": 0.0004968096535607745, "loss": 2.0316, "step": 1383 }, { "epoch": 0.067578125, "grad_norm": 0.4196494221687317, "learning_rate": 0.0004968037731527422, "loss": 2.0397, "step": 1384 }, { "epoch": 0.067626953125, "grad_norm": 0.436186820268631, "learning_rate": 0.0004967978873691173, "loss": 2.0007, "step": 1385 }, { "epoch": 0.06767578125, "grad_norm": 0.42432019114494324, "learning_rate": 0.0004967919962100424, "loss": 2.0126, "step": 1386 }, { "epoch": 0.067724609375, "grad_norm": 0.38221508264541626, "learning_rate": 0.0004967860996756602, "loss": 2.0416, "step": 1387 }, { "epoch": 0.0677734375, "grad_norm": 0.4089238941669464, "learning_rate": 0.0004967801977661138, "loss": 2.0333, "step": 1388 }, { "epoch": 0.067822265625, "grad_norm": 0.40389347076416016, "learning_rate": 0.0004967742904815461, "loss": 1.9916, "step": 1389 }, { "epoch": 0.06787109375, "grad_norm": 0.45895257592201233, "learning_rate": 0.0004967683778221003, "loss": 2.035, "step": 1390 }, { "epoch": 0.067919921875, "grad_norm": 0.5600590705871582, "learning_rate": 0.0004967624597879197, "loss": 2.0493, "step": 1391 }, { "epoch": 0.06796875, "grad_norm": 0.5517451167106628, "learning_rate": 0.0004967565363791478, "loss": 2.0216, "step": 1392 }, { "epoch": 0.068017578125, "grad_norm": 0.5008623003959656, "learning_rate": 0.0004967506075959279, "loss": 2.0045, "step": 1393 }, { "epoch": 0.06806640625, "grad_norm": 0.5755999088287354, "learning_rate": 0.000496744673438404, "loss": 2.0372, "step": 1394 }, { "epoch": 0.068115234375, "grad_norm": 0.3982465863227844, "learning_rate": 0.0004967387339067197, "loss": 2.0094, "step": 1395 }, { "epoch": 0.0681640625, "grad_norm": 0.40657082200050354, "learning_rate": 0.0004967327890010192, "loss": 2.0329, "step": 1396 }, { "epoch": 0.068212890625, "grad_norm": 0.4969734847545624, "learning_rate": 0.0004967268387214463, "loss": 2.0216, "step": 1397 }, { "epoch": 0.06826171875, "grad_norm": 0.2983216643333435, "learning_rate": 0.0004967208830681454, "loss": 2.0233, "step": 1398 }, { "epoch": 0.068310546875, "grad_norm": 0.4369111955165863, "learning_rate": 0.0004967149220412607, "loss": 2.0326, "step": 1399 }, { "epoch": 0.068359375, "grad_norm": 0.42079079151153564, "learning_rate": 0.0004967089556409367, "loss": 2.0134, "step": 1400 }, { "epoch": 0.068408203125, "grad_norm": 0.36479735374450684, "learning_rate": 0.0004967029838673181, "loss": 2.0236, "step": 1401 }, { "epoch": 0.06845703125, "grad_norm": 0.4732339084148407, "learning_rate": 0.0004966970067205496, "loss": 2.0282, "step": 1402 }, { "epoch": 0.068505859375, "grad_norm": 0.4124797284603119, "learning_rate": 0.000496691024200776, "loss": 1.948, "step": 1403 }, { "epoch": 0.0685546875, "grad_norm": 0.4700213372707367, "learning_rate": 0.0004966850363081423, "loss": 2.0116, "step": 1404 }, { "epoch": 0.068603515625, "grad_norm": 0.4005722999572754, "learning_rate": 0.0004966790430427938, "loss": 2.0412, "step": 1405 }, { "epoch": 0.06865234375, "grad_norm": 0.34204381704330444, "learning_rate": 0.0004966730444048754, "loss": 2.0243, "step": 1406 }, { "epoch": 0.068701171875, "grad_norm": 0.3532731831073761, "learning_rate": 0.0004966670403945328, "loss": 2.0443, "step": 1407 }, { "epoch": 0.06875, "grad_norm": 0.3670112192630768, "learning_rate": 0.0004966610310119113, "loss": 2.0142, "step": 1408 }, { "epoch": 0.068798828125, "grad_norm": 0.39114072918891907, "learning_rate": 0.0004966550162571567, "loss": 2.0242, "step": 1409 }, { "epoch": 0.06884765625, "grad_norm": 0.3810185492038727, "learning_rate": 0.0004966489961304147, "loss": 2.0401, "step": 1410 }, { "epoch": 0.068896484375, "grad_norm": 0.38853639364242554, "learning_rate": 0.0004966429706318311, "loss": 1.9944, "step": 1411 }, { "epoch": 0.0689453125, "grad_norm": 0.35634562373161316, "learning_rate": 0.0004966369397615522, "loss": 1.9886, "step": 1412 }, { "epoch": 0.068994140625, "grad_norm": 0.37256920337677, "learning_rate": 0.000496630903519724, "loss": 2.0376, "step": 1413 }, { "epoch": 0.06904296875, "grad_norm": 0.3623563349246979, "learning_rate": 0.0004966248619064927, "loss": 2.0139, "step": 1414 }, { "epoch": 0.069091796875, "grad_norm": 0.3485233187675476, "learning_rate": 0.0004966188149220049, "loss": 2.0368, "step": 1415 }, { "epoch": 0.069140625, "grad_norm": 0.33280128240585327, "learning_rate": 0.000496612762566407, "loss": 1.9805, "step": 1416 }, { "epoch": 0.069189453125, "grad_norm": 0.366985023021698, "learning_rate": 0.0004966067048398458, "loss": 2.0099, "step": 1417 }, { "epoch": 0.06923828125, "grad_norm": 0.37307488918304443, "learning_rate": 0.000496600641742468, "loss": 1.9745, "step": 1418 }, { "epoch": 0.069287109375, "grad_norm": 0.31785789132118225, "learning_rate": 0.0004965945732744206, "loss": 2.0383, "step": 1419 }, { "epoch": 0.0693359375, "grad_norm": 0.38270124793052673, "learning_rate": 0.0004965884994358508, "loss": 2.0231, "step": 1420 }, { "epoch": 0.069384765625, "grad_norm": 0.4320908188819885, "learning_rate": 0.0004965824202269057, "loss": 1.9986, "step": 1421 }, { "epoch": 0.06943359375, "grad_norm": 0.40503114461898804, "learning_rate": 0.0004965763356477326, "loss": 2.0463, "step": 1422 }, { "epoch": 0.069482421875, "grad_norm": 0.426921546459198, "learning_rate": 0.0004965702456984788, "loss": 2.0409, "step": 1423 }, { "epoch": 0.06953125, "grad_norm": 0.4226549565792084, "learning_rate": 0.0004965641503792924, "loss": 2.0483, "step": 1424 }, { "epoch": 0.069580078125, "grad_norm": 0.36776554584503174, "learning_rate": 0.0004965580496903206, "loss": 2.0523, "step": 1425 }, { "epoch": 0.06962890625, "grad_norm": 0.31875136494636536, "learning_rate": 0.0004965519436317115, "loss": 1.9762, "step": 1426 }, { "epoch": 0.069677734375, "grad_norm": 0.32512998580932617, "learning_rate": 0.0004965458322036131, "loss": 2.0176, "step": 1427 }, { "epoch": 0.0697265625, "grad_norm": 0.31613683700561523, "learning_rate": 0.0004965397154061736, "loss": 1.9805, "step": 1428 }, { "epoch": 0.069775390625, "grad_norm": 0.3576909005641937, "learning_rate": 0.0004965335932395409, "loss": 1.9837, "step": 1429 }, { "epoch": 0.06982421875, "grad_norm": 0.37991800904273987, "learning_rate": 0.0004965274657038637, "loss": 2.0416, "step": 1430 }, { "epoch": 0.069873046875, "grad_norm": 0.4322262406349182, "learning_rate": 0.0004965213327992904, "loss": 2.0646, "step": 1431 }, { "epoch": 0.069921875, "grad_norm": 0.3871046006679535, "learning_rate": 0.0004965151945259696, "loss": 2.0164, "step": 1432 }, { "epoch": 0.069970703125, "grad_norm": 0.32861706614494324, "learning_rate": 0.00049650905088405, "loss": 2.0276, "step": 1433 }, { "epoch": 0.07001953125, "grad_norm": 0.3674280643463135, "learning_rate": 0.0004965029018736807, "loss": 2.0274, "step": 1434 }, { "epoch": 0.070068359375, "grad_norm": 0.35889557003974915, "learning_rate": 0.0004964967474950106, "loss": 2.0234, "step": 1435 }, { "epoch": 0.0701171875, "grad_norm": 0.35870394110679626, "learning_rate": 0.0004964905877481889, "loss": 1.9903, "step": 1436 }, { "epoch": 0.070166015625, "grad_norm": 0.3954055607318878, "learning_rate": 0.0004964844226333649, "loss": 2.03, "step": 1437 }, { "epoch": 0.07021484375, "grad_norm": 0.4294931888580322, "learning_rate": 0.0004964782521506879, "loss": 1.9963, "step": 1438 }, { "epoch": 0.070263671875, "grad_norm": 0.4973425567150116, "learning_rate": 0.0004964720763003075, "loss": 2.0354, "step": 1439 }, { "epoch": 0.0703125, "grad_norm": 0.5543261766433716, "learning_rate": 0.0004964658950823734, "loss": 2.0291, "step": 1440 }, { "epoch": 0.070361328125, "grad_norm": 0.5018720030784607, "learning_rate": 0.0004964597084970355, "loss": 2.0119, "step": 1441 }, { "epoch": 0.07041015625, "grad_norm": 0.3651126027107239, "learning_rate": 0.0004964535165444436, "loss": 2.0278, "step": 1442 }, { "epoch": 0.070458984375, "grad_norm": 0.4421127736568451, "learning_rate": 0.0004964473192247479, "loss": 2.0123, "step": 1443 }, { "epoch": 0.0705078125, "grad_norm": 0.39431625604629517, "learning_rate": 0.0004964411165380983, "loss": 2.0467, "step": 1444 }, { "epoch": 0.070556640625, "grad_norm": 0.2898810803890228, "learning_rate": 0.0004964349084846456, "loss": 2.0373, "step": 1445 }, { "epoch": 0.07060546875, "grad_norm": 0.3908224105834961, "learning_rate": 0.0004964286950645397, "loss": 2.0443, "step": 1446 }, { "epoch": 0.070654296875, "grad_norm": 0.4118218421936035, "learning_rate": 0.0004964224762779316, "loss": 2.0191, "step": 1447 }, { "epoch": 0.070703125, "grad_norm": 0.397146612405777, "learning_rate": 0.000496416252124972, "loss": 2.0052, "step": 1448 }, { "epoch": 0.070751953125, "grad_norm": 0.35406309366226196, "learning_rate": 0.0004964100226058116, "loss": 2.0142, "step": 1449 }, { "epoch": 0.07080078125, "grad_norm": 0.410001277923584, "learning_rate": 0.0004964037877206014, "loss": 2.0395, "step": 1450 }, { "epoch": 0.070849609375, "grad_norm": 0.42457571625709534, "learning_rate": 0.0004963975474694925, "loss": 2.0594, "step": 1451 }, { "epoch": 0.0708984375, "grad_norm": 0.547321081161499, "learning_rate": 0.0004963913018526363, "loss": 1.9989, "step": 1452 }, { "epoch": 0.070947265625, "grad_norm": 0.42736610770225525, "learning_rate": 0.0004963850508701838, "loss": 1.9694, "step": 1453 }, { "epoch": 0.07099609375, "grad_norm": 0.463030606508255, "learning_rate": 0.000496378794522287, "loss": 2.0102, "step": 1454 }, { "epoch": 0.071044921875, "grad_norm": 0.3910181224346161, "learning_rate": 0.0004963725328090971, "loss": 2.0544, "step": 1455 }, { "epoch": 0.07109375, "grad_norm": 0.39425352215766907, "learning_rate": 0.0004963662657307661, "loss": 2.0416, "step": 1456 }, { "epoch": 0.071142578125, "grad_norm": 0.5314236283302307, "learning_rate": 0.0004963599932874457, "loss": 2.0159, "step": 1457 }, { "epoch": 0.07119140625, "grad_norm": 0.45140811800956726, "learning_rate": 0.0004963537154792881, "loss": 2.0262, "step": 1458 }, { "epoch": 0.071240234375, "grad_norm": 0.40505924820899963, "learning_rate": 0.0004963474323064453, "loss": 1.9895, "step": 1459 }, { "epoch": 0.0712890625, "grad_norm": 0.4413403868675232, "learning_rate": 0.0004963411437690696, "loss": 1.9394, "step": 1460 }, { "epoch": 0.071337890625, "grad_norm": 0.4454101026058197, "learning_rate": 0.0004963348498673136, "loss": 2.02, "step": 1461 }, { "epoch": 0.07138671875, "grad_norm": 0.5781814455986023, "learning_rate": 0.0004963285506013297, "loss": 2.0282, "step": 1462 }, { "epoch": 0.071435546875, "grad_norm": 0.4038597345352173, "learning_rate": 0.0004963222459712706, "loss": 2.0201, "step": 1463 }, { "epoch": 0.071484375, "grad_norm": 0.38253575563430786, "learning_rate": 0.0004963159359772889, "loss": 1.963, "step": 1464 }, { "epoch": 0.071533203125, "grad_norm": 0.3819660544395447, "learning_rate": 0.0004963096206195378, "loss": 1.9968, "step": 1465 }, { "epoch": 0.07158203125, "grad_norm": 0.399127334356308, "learning_rate": 0.0004963032998981702, "loss": 2.0469, "step": 1466 }, { "epoch": 0.071630859375, "grad_norm": 0.4176925718784332, "learning_rate": 0.0004962969738133393, "loss": 2.0004, "step": 1467 }, { "epoch": 0.0716796875, "grad_norm": 0.36253800988197327, "learning_rate": 0.0004962906423651985, "loss": 1.9841, "step": 1468 }, { "epoch": 0.071728515625, "grad_norm": 0.44841986894607544, "learning_rate": 0.0004962843055539012, "loss": 2.0379, "step": 1469 }, { "epoch": 0.07177734375, "grad_norm": 0.4049123525619507, "learning_rate": 0.000496277963379601, "loss": 1.9656, "step": 1470 }, { "epoch": 0.071826171875, "grad_norm": 0.38804665207862854, "learning_rate": 0.0004962716158424516, "loss": 1.9994, "step": 1471 }, { "epoch": 0.071875, "grad_norm": 0.46197783946990967, "learning_rate": 0.0004962652629426068, "loss": 2.007, "step": 1472 }, { "epoch": 0.071923828125, "grad_norm": 0.29284393787384033, "learning_rate": 0.0004962589046802205, "loss": 2.0607, "step": 1473 }, { "epoch": 0.07197265625, "grad_norm": 0.4483253061771393, "learning_rate": 0.000496252541055447, "loss": 2.0094, "step": 1474 }, { "epoch": 0.072021484375, "grad_norm": 0.4622493088245392, "learning_rate": 0.0004962461720684403, "loss": 2.0521, "step": 1475 }, { "epoch": 0.0720703125, "grad_norm": 0.4051806628704071, "learning_rate": 0.000496239797719355, "loss": 2.0327, "step": 1476 }, { "epoch": 0.072119140625, "grad_norm": 0.40962696075439453, "learning_rate": 0.0004962334180083454, "loss": 2.0106, "step": 1477 }, { "epoch": 0.07216796875, "grad_norm": 0.3326147794723511, "learning_rate": 0.0004962270329355662, "loss": 1.9641, "step": 1478 }, { "epoch": 0.072216796875, "grad_norm": 0.3671652674674988, "learning_rate": 0.0004962206425011721, "loss": 2.0517, "step": 1479 }, { "epoch": 0.072265625, "grad_norm": 0.3064383566379547, "learning_rate": 0.000496214246705318, "loss": 2.0097, "step": 1480 }, { "epoch": 0.072314453125, "grad_norm": 0.358898401260376, "learning_rate": 0.000496207845548159, "loss": 1.9936, "step": 1481 }, { "epoch": 0.07236328125, "grad_norm": 0.8260646462440491, "learning_rate": 0.00049620143902985, "loss": 2.0196, "step": 1482 }, { "epoch": 0.072412109375, "grad_norm": 0.3795277774333954, "learning_rate": 0.0004961950271505465, "loss": 2.0088, "step": 1483 }, { "epoch": 0.0724609375, "grad_norm": 0.4022147059440613, "learning_rate": 0.0004961886099104038, "loss": 2.035, "step": 1484 }, { "epoch": 0.072509765625, "grad_norm": 0.4385221600532532, "learning_rate": 0.0004961821873095773, "loss": 2.0505, "step": 1485 }, { "epoch": 0.07255859375, "grad_norm": 0.4600922167301178, "learning_rate": 0.0004961757593482229, "loss": 2.0249, "step": 1486 }, { "epoch": 0.072607421875, "grad_norm": 0.45234760642051697, "learning_rate": 0.0004961693260264964, "loss": 2.0189, "step": 1487 }, { "epoch": 0.07265625, "grad_norm": 0.39208707213401794, "learning_rate": 0.0004961628873445535, "loss": 1.9834, "step": 1488 }, { "epoch": 0.072705078125, "grad_norm": 0.40047433972358704, "learning_rate": 0.0004961564433025504, "loss": 2.0538, "step": 1489 }, { "epoch": 0.07275390625, "grad_norm": 0.350259006023407, "learning_rate": 0.0004961499939006431, "loss": 2.0117, "step": 1490 }, { "epoch": 0.072802734375, "grad_norm": 0.3649987280368805, "learning_rate": 0.0004961435391389881, "loss": 2.0511, "step": 1491 }, { "epoch": 0.0728515625, "grad_norm": 0.41138580441474915, "learning_rate": 0.0004961370790177418, "loss": 2.0329, "step": 1492 }, { "epoch": 0.072900390625, "grad_norm": 0.35833343863487244, "learning_rate": 0.0004961306135370606, "loss": 2.0513, "step": 1493 }, { "epoch": 0.07294921875, "grad_norm": 0.32790645956993103, "learning_rate": 0.0004961241426971014, "loss": 2.0345, "step": 1494 }, { "epoch": 0.072998046875, "grad_norm": 0.3279031813144684, "learning_rate": 0.0004961176664980211, "loss": 2.0234, "step": 1495 }, { "epoch": 0.073046875, "grad_norm": 0.3789631426334381, "learning_rate": 0.0004961111849399763, "loss": 2.0527, "step": 1496 }, { "epoch": 0.073095703125, "grad_norm": 0.3799320459365845, "learning_rate": 0.0004961046980231244, "loss": 1.994, "step": 1497 }, { "epoch": 0.07314453125, "grad_norm": 0.3288620114326477, "learning_rate": 0.0004960982057476224, "loss": 1.9986, "step": 1498 }, { "epoch": 0.073193359375, "grad_norm": 0.35008925199508667, "learning_rate": 0.0004960917081136279, "loss": 1.9929, "step": 1499 }, { "epoch": 0.0732421875, "grad_norm": 0.33310431241989136, "learning_rate": 0.0004960852051212982, "loss": 2.0491, "step": 1500 }, { "epoch": 0.073291015625, "grad_norm": 0.26787009835243225, "learning_rate": 0.0004960786967707909, "loss": 2.0114, "step": 1501 }, { "epoch": 0.07333984375, "grad_norm": 0.32496964931488037, "learning_rate": 0.0004960721830622637, "loss": 1.9747, "step": 1502 }, { "epoch": 0.073388671875, "grad_norm": 0.44341179728507996, "learning_rate": 0.0004960656639958746, "loss": 2.0042, "step": 1503 }, { "epoch": 0.0734375, "grad_norm": 0.43742021918296814, "learning_rate": 0.0004960591395717816, "loss": 1.9935, "step": 1504 }, { "epoch": 0.073486328125, "grad_norm": 0.4465859532356262, "learning_rate": 0.0004960526097901426, "loss": 1.9971, "step": 1505 }, { "epoch": 0.07353515625, "grad_norm": 0.46678149700164795, "learning_rate": 0.0004960460746511162, "loss": 1.9934, "step": 1506 }, { "epoch": 0.073583984375, "grad_norm": 0.42997509241104126, "learning_rate": 0.0004960395341548605, "loss": 2.0466, "step": 1507 }, { "epoch": 0.0736328125, "grad_norm": 0.4680302143096924, "learning_rate": 0.0004960329883015341, "loss": 1.9771, "step": 1508 }, { "epoch": 0.073681640625, "grad_norm": 0.3659434914588928, "learning_rate": 0.0004960264370912957, "loss": 2.0479, "step": 1509 }, { "epoch": 0.07373046875, "grad_norm": 0.31366339325904846, "learning_rate": 0.0004960198805243039, "loss": 1.9991, "step": 1510 }, { "epoch": 0.073779296875, "grad_norm": 0.436463862657547, "learning_rate": 0.0004960133186007178, "loss": 1.9967, "step": 1511 }, { "epoch": 0.073828125, "grad_norm": 0.37962737679481506, "learning_rate": 0.0004960067513206964, "loss": 2.0253, "step": 1512 }, { "epoch": 0.073876953125, "grad_norm": 0.33848801255226135, "learning_rate": 0.0004960001786843988, "loss": 1.9834, "step": 1513 }, { "epoch": 0.07392578125, "grad_norm": 0.3751761317253113, "learning_rate": 0.0004959936006919843, "loss": 1.9776, "step": 1514 }, { "epoch": 0.073974609375, "grad_norm": 0.33207809925079346, "learning_rate": 0.0004959870173436124, "loss": 2.07, "step": 1515 }, { "epoch": 0.0740234375, "grad_norm": 0.41477230191230774, "learning_rate": 0.0004959804286394425, "loss": 2.011, "step": 1516 }, { "epoch": 0.074072265625, "grad_norm": 0.41093710064888, "learning_rate": 0.0004959738345796345, "loss": 2.0122, "step": 1517 }, { "epoch": 0.07412109375, "grad_norm": 0.37957656383514404, "learning_rate": 0.000495967235164348, "loss": 1.996, "step": 1518 }, { "epoch": 0.074169921875, "grad_norm": 0.3574022650718689, "learning_rate": 0.0004959606303937431, "loss": 2.0356, "step": 1519 }, { "epoch": 0.07421875, "grad_norm": 0.380482017993927, "learning_rate": 0.0004959540202679797, "loss": 1.9725, "step": 1520 }, { "epoch": 0.074267578125, "grad_norm": 0.4064003825187683, "learning_rate": 0.0004959474047872182, "loss": 2.0684, "step": 1521 }, { "epoch": 0.07431640625, "grad_norm": 0.4228363335132599, "learning_rate": 0.0004959407839516188, "loss": 2.0034, "step": 1522 }, { "epoch": 0.074365234375, "grad_norm": 0.4577881395816803, "learning_rate": 0.000495934157761342, "loss": 2.0519, "step": 1523 }, { "epoch": 0.0744140625, "grad_norm": 0.4755101799964905, "learning_rate": 0.0004959275262165485, "loss": 1.9606, "step": 1524 }, { "epoch": 0.074462890625, "grad_norm": 0.5191824436187744, "learning_rate": 0.0004959208893173988, "loss": 1.992, "step": 1525 }, { "epoch": 0.07451171875, "grad_norm": 0.47114306688308716, "learning_rate": 0.0004959142470640539, "loss": 2.0014, "step": 1526 }, { "epoch": 0.074560546875, "grad_norm": 0.44069477915763855, "learning_rate": 0.0004959075994566747, "loss": 2.0574, "step": 1527 }, { "epoch": 0.074609375, "grad_norm": 0.41046974062919617, "learning_rate": 0.0004959009464954224, "loss": 2.052, "step": 1528 }, { "epoch": 0.074658203125, "grad_norm": 0.359337717294693, "learning_rate": 0.0004958942881804581, "loss": 2.0178, "step": 1529 }, { "epoch": 0.07470703125, "grad_norm": 0.41936516761779785, "learning_rate": 0.0004958876245119433, "loss": 2.0181, "step": 1530 }, { "epoch": 0.074755859375, "grad_norm": 0.4237978458404541, "learning_rate": 0.0004958809554900395, "loss": 2.0186, "step": 1531 }, { "epoch": 0.0748046875, "grad_norm": 0.39385607838630676, "learning_rate": 0.0004958742811149083, "loss": 2.0396, "step": 1532 }, { "epoch": 0.074853515625, "grad_norm": 0.3411564230918884, "learning_rate": 0.0004958676013867114, "loss": 2.0486, "step": 1533 }, { "epoch": 0.07490234375, "grad_norm": 0.33106738328933716, "learning_rate": 0.0004958609163056108, "loss": 1.9955, "step": 1534 }, { "epoch": 0.074951171875, "grad_norm": 0.3249313533306122, "learning_rate": 0.0004958542258717683, "loss": 2.003, "step": 1535 }, { "epoch": 0.075, "grad_norm": 0.3062542974948883, "learning_rate": 0.0004958475300853464, "loss": 1.9569, "step": 1536 }, { "epoch": 0.075048828125, "grad_norm": 0.3172348439693451, "learning_rate": 0.000495840828946507, "loss": 2.0492, "step": 1537 }, { "epoch": 0.07509765625, "grad_norm": 0.37296316027641296, "learning_rate": 0.0004958341224554129, "loss": 2.0033, "step": 1538 }, { "epoch": 0.075146484375, "grad_norm": 0.34684810042381287, "learning_rate": 0.0004958274106122262, "loss": 2.0017, "step": 1539 }, { "epoch": 0.0751953125, "grad_norm": 0.3613809645175934, "learning_rate": 0.00049582069341711, "loss": 2.0494, "step": 1540 }, { "epoch": 0.075244140625, "grad_norm": 0.41947123408317566, "learning_rate": 0.0004958139708702268, "loss": 2.03, "step": 1541 }, { "epoch": 0.07529296875, "grad_norm": 0.3826564848423004, "learning_rate": 0.0004958072429717395, "loss": 2.0299, "step": 1542 }, { "epoch": 0.075341796875, "grad_norm": 0.415861040353775, "learning_rate": 0.0004958005097218114, "loss": 2.0397, "step": 1543 }, { "epoch": 0.075390625, "grad_norm": 0.5148950219154358, "learning_rate": 0.0004957937711206055, "loss": 2.004, "step": 1544 }, { "epoch": 0.075439453125, "grad_norm": 0.4050554633140564, "learning_rate": 0.0004957870271682853, "loss": 2.0122, "step": 1545 }, { "epoch": 0.07548828125, "grad_norm": 0.38389983773231506, "learning_rate": 0.000495780277865014, "loss": 2.0029, "step": 1546 }, { "epoch": 0.075537109375, "grad_norm": 0.4240744411945343, "learning_rate": 0.0004957735232109554, "loss": 2.0365, "step": 1547 }, { "epoch": 0.0755859375, "grad_norm": 0.3760871887207031, "learning_rate": 0.000495766763206273, "loss": 1.9951, "step": 1548 }, { "epoch": 0.075634765625, "grad_norm": 0.3367307782173157, "learning_rate": 0.0004957599978511307, "loss": 1.9771, "step": 1549 }, { "epoch": 0.07568359375, "grad_norm": 0.3595333695411682, "learning_rate": 0.0004957532271456926, "loss": 2.0482, "step": 1550 }, { "epoch": 0.075732421875, "grad_norm": 0.42496639490127563, "learning_rate": 0.0004957464510901226, "loss": 1.949, "step": 1551 }, { "epoch": 0.07578125, "grad_norm": 0.37057217955589294, "learning_rate": 0.000495739669684585, "loss": 1.9859, "step": 1552 }, { "epoch": 0.075830078125, "grad_norm": 0.3721277713775635, "learning_rate": 0.0004957328829292443, "loss": 2.0407, "step": 1553 }, { "epoch": 0.07587890625, "grad_norm": 0.3814913332462311, "learning_rate": 0.0004957260908242647, "loss": 2.0034, "step": 1554 }, { "epoch": 0.075927734375, "grad_norm": 0.3515186309814453, "learning_rate": 0.0004957192933698111, "loss": 2.0063, "step": 1555 }, { "epoch": 0.0759765625, "grad_norm": 0.3533896803855896, "learning_rate": 0.000495712490566048, "loss": 2.0296, "step": 1556 }, { "epoch": 0.076025390625, "grad_norm": 0.4057687222957611, "learning_rate": 0.0004957056824131404, "loss": 2.0273, "step": 1557 }, { "epoch": 0.07607421875, "grad_norm": 0.431199848651886, "learning_rate": 0.0004956988689112533, "loss": 1.9973, "step": 1558 }, { "epoch": 0.076123046875, "grad_norm": 0.4192124009132385, "learning_rate": 0.0004956920500605518, "loss": 1.9615, "step": 1559 }, { "epoch": 0.076171875, "grad_norm": 0.33966636657714844, "learning_rate": 0.0004956852258612011, "loss": 2.0434, "step": 1560 }, { "epoch": 0.076220703125, "grad_norm": 0.42416903376579285, "learning_rate": 0.0004956783963133666, "loss": 2.0018, "step": 1561 }, { "epoch": 0.07626953125, "grad_norm": 0.41168737411499023, "learning_rate": 0.0004956715614172141, "loss": 1.9945, "step": 1562 }, { "epoch": 0.076318359375, "grad_norm": 0.306674599647522, "learning_rate": 0.0004956647211729088, "loss": 1.9592, "step": 1563 }, { "epoch": 0.0763671875, "grad_norm": 0.31484168767929077, "learning_rate": 0.0004956578755806168, "loss": 2.0049, "step": 1564 }, { "epoch": 0.076416015625, "grad_norm": 0.3638671040534973, "learning_rate": 0.0004956510246405039, "loss": 1.9834, "step": 1565 }, { "epoch": 0.07646484375, "grad_norm": 0.37961897253990173, "learning_rate": 0.0004956441683527361, "loss": 2.0527, "step": 1566 }, { "epoch": 0.076513671875, "grad_norm": 0.35839909315109253, "learning_rate": 0.0004956373067174797, "loss": 1.9998, "step": 1567 }, { "epoch": 0.0765625, "grad_norm": 0.43123167753219604, "learning_rate": 0.0004956304397349009, "loss": 2.0228, "step": 1568 }, { "epoch": 0.076611328125, "grad_norm": 0.4843272864818573, "learning_rate": 0.0004956235674051661, "loss": 2.0194, "step": 1569 }, { "epoch": 0.07666015625, "grad_norm": 0.34743115305900574, "learning_rate": 0.0004956166897284419, "loss": 2.0062, "step": 1570 }, { "epoch": 0.076708984375, "grad_norm": 0.38970279693603516, "learning_rate": 0.000495609806704895, "loss": 1.9801, "step": 1571 }, { "epoch": 0.0767578125, "grad_norm": 0.4200243055820465, "learning_rate": 0.0004956029183346922, "loss": 1.9812, "step": 1572 }, { "epoch": 0.076806640625, "grad_norm": 0.3252773582935333, "learning_rate": 0.0004955960246180005, "loss": 2.0592, "step": 1573 }, { "epoch": 0.07685546875, "grad_norm": 0.3815264105796814, "learning_rate": 0.0004955891255549868, "loss": 2.0066, "step": 1574 }, { "epoch": 0.076904296875, "grad_norm": 0.3835914433002472, "learning_rate": 0.0004955822211458185, "loss": 2.019, "step": 1575 }, { "epoch": 0.076953125, "grad_norm": 0.3225969970226288, "learning_rate": 0.0004955753113906629, "loss": 2.0027, "step": 1576 }, { "epoch": 0.077001953125, "grad_norm": 0.3442991375923157, "learning_rate": 0.0004955683962896873, "loss": 2.0692, "step": 1577 }, { "epoch": 0.07705078125, "grad_norm": 0.3590145707130432, "learning_rate": 0.0004955614758430594, "loss": 2.0342, "step": 1578 }, { "epoch": 0.077099609375, "grad_norm": 0.39118489623069763, "learning_rate": 0.0004955545500509471, "loss": 2.0069, "step": 1579 }, { "epoch": 0.0771484375, "grad_norm": 0.348967045545578, "learning_rate": 0.0004955476189135179, "loss": 1.9731, "step": 1580 }, { "epoch": 0.077197265625, "grad_norm": 0.3281141221523285, "learning_rate": 0.0004955406824309401, "loss": 1.9751, "step": 1581 }, { "epoch": 0.07724609375, "grad_norm": 0.3882421553134918, "learning_rate": 0.0004955337406033817, "loss": 1.9872, "step": 1582 }, { "epoch": 0.077294921875, "grad_norm": 0.3860599994659424, "learning_rate": 0.0004955267934310108, "loss": 1.9537, "step": 1583 }, { "epoch": 0.07734375, "grad_norm": 0.3459037244319916, "learning_rate": 0.000495519840913996, "loss": 1.9897, "step": 1584 }, { "epoch": 0.077392578125, "grad_norm": 0.28934425115585327, "learning_rate": 0.0004955128830525056, "loss": 2.0041, "step": 1585 }, { "epoch": 0.07744140625, "grad_norm": 0.2934224605560303, "learning_rate": 0.0004955059198467085, "loss": 2.0063, "step": 1586 }, { "epoch": 0.077490234375, "grad_norm": 0.33991265296936035, "learning_rate": 0.000495498951296773, "loss": 1.9914, "step": 1587 }, { "epoch": 0.0775390625, "grad_norm": 0.3111371099948883, "learning_rate": 0.0004954919774028685, "loss": 2.03, "step": 1588 }, { "epoch": 0.077587890625, "grad_norm": 0.3194945454597473, "learning_rate": 0.0004954849981651637, "loss": 2.0436, "step": 1589 }, { "epoch": 0.07763671875, "grad_norm": 0.36184099316596985, "learning_rate": 0.0004954780135838278, "loss": 1.9709, "step": 1590 }, { "epoch": 0.077685546875, "grad_norm": 0.4110976755619049, "learning_rate": 0.0004954710236590302, "loss": 1.9971, "step": 1591 }, { "epoch": 0.077734375, "grad_norm": 0.394187331199646, "learning_rate": 0.0004954640283909401, "loss": 2.0111, "step": 1592 }, { "epoch": 0.077783203125, "grad_norm": 0.44868364930152893, "learning_rate": 0.0004954570277797273, "loss": 2.0298, "step": 1593 }, { "epoch": 0.07783203125, "grad_norm": 0.3886258900165558, "learning_rate": 0.0004954500218255613, "loss": 2.0331, "step": 1594 }, { "epoch": 0.077880859375, "grad_norm": 0.35813745856285095, "learning_rate": 0.0004954430105286118, "loss": 2.0284, "step": 1595 }, { "epoch": 0.0779296875, "grad_norm": 0.4207231402397156, "learning_rate": 0.0004954359938890489, "loss": 1.9962, "step": 1596 }, { "epoch": 0.077978515625, "grad_norm": 0.37038660049438477, "learning_rate": 0.0004954289719070426, "loss": 2.0427, "step": 1597 }, { "epoch": 0.07802734375, "grad_norm": 0.3440232574939728, "learning_rate": 0.000495421944582763, "loss": 1.9925, "step": 1598 }, { "epoch": 0.078076171875, "grad_norm": 0.3453715741634369, "learning_rate": 0.0004954149119163805, "loss": 1.9928, "step": 1599 }, { "epoch": 0.078125, "grad_norm": 0.3323862850666046, "learning_rate": 0.0004954078739080656, "loss": 2.0505, "step": 1600 }, { "epoch": 0.078173828125, "grad_norm": 0.3873893916606903, "learning_rate": 0.0004954008305579888, "loss": 2.0039, "step": 1601 }, { "epoch": 0.07822265625, "grad_norm": 0.3822613060474396, "learning_rate": 0.0004953937818663208, "loss": 2.0099, "step": 1602 }, { "epoch": 0.078271484375, "grad_norm": 0.4816878139972687, "learning_rate": 0.0004953867278332324, "loss": 2.0422, "step": 1603 }, { "epoch": 0.0783203125, "grad_norm": 0.47763770818710327, "learning_rate": 0.0004953796684588946, "loss": 1.9822, "step": 1604 }, { "epoch": 0.078369140625, "grad_norm": 0.37767302989959717, "learning_rate": 0.0004953726037434785, "loss": 1.9734, "step": 1605 }, { "epoch": 0.07841796875, "grad_norm": 0.4230763018131256, "learning_rate": 0.0004953655336871553, "loss": 2.0419, "step": 1606 }, { "epoch": 0.078466796875, "grad_norm": 0.38351452350616455, "learning_rate": 0.0004953584582900963, "loss": 2.0061, "step": 1607 }, { "epoch": 0.078515625, "grad_norm": 0.3019363284111023, "learning_rate": 0.0004953513775524731, "loss": 2.0124, "step": 1608 }, { "epoch": 0.078564453125, "grad_norm": 0.40207934379577637, "learning_rate": 0.0004953442914744572, "loss": 1.9784, "step": 1609 }, { "epoch": 0.07861328125, "grad_norm": 0.4186893105506897, "learning_rate": 0.0004953372000562204, "loss": 2.0225, "step": 1610 }, { "epoch": 0.078662109375, "grad_norm": 0.37711620330810547, "learning_rate": 0.0004953301032979345, "loss": 2.0122, "step": 1611 }, { "epoch": 0.0787109375, "grad_norm": 0.39135095477104187, "learning_rate": 0.0004953230011997716, "loss": 1.9546, "step": 1612 }, { "epoch": 0.078759765625, "grad_norm": 0.43997329473495483, "learning_rate": 0.0004953158937619036, "loss": 2.0786, "step": 1613 }, { "epoch": 0.07880859375, "grad_norm": 0.4445687532424927, "learning_rate": 0.0004953087809845031, "loss": 2.0434, "step": 1614 }, { "epoch": 0.078857421875, "grad_norm": 0.4553406238555908, "learning_rate": 0.0004953016628677423, "loss": 2.0512, "step": 1615 }, { "epoch": 0.07890625, "grad_norm": 0.5167407393455505, "learning_rate": 0.0004952945394117936, "loss": 2.0396, "step": 1616 }, { "epoch": 0.078955078125, "grad_norm": 0.4604794383049011, "learning_rate": 0.0004952874106168298, "loss": 1.9801, "step": 1617 }, { "epoch": 0.07900390625, "grad_norm": 0.35678958892822266, "learning_rate": 0.0004952802764830236, "loss": 1.9903, "step": 1618 }, { "epoch": 0.079052734375, "grad_norm": 0.4534480571746826, "learning_rate": 0.0004952731370105479, "loss": 1.9942, "step": 1619 }, { "epoch": 0.0791015625, "grad_norm": 0.38314834237098694, "learning_rate": 0.0004952659921995758, "loss": 1.964, "step": 1620 }, { "epoch": 0.079150390625, "grad_norm": 0.28562381863594055, "learning_rate": 0.0004952588420502806, "loss": 2.0003, "step": 1621 }, { "epoch": 0.07919921875, "grad_norm": 0.3773391544818878, "learning_rate": 0.0004952516865628352, "loss": 2.0299, "step": 1622 }, { "epoch": 0.079248046875, "grad_norm": 0.27893343567848206, "learning_rate": 0.0004952445257374133, "loss": 2.0116, "step": 1623 }, { "epoch": 0.079296875, "grad_norm": 0.33164215087890625, "learning_rate": 0.0004952373595741883, "loss": 1.9982, "step": 1624 }, { "epoch": 0.079345703125, "grad_norm": 0.3554540276527405, "learning_rate": 0.000495230188073334, "loss": 2.0186, "step": 1625 }, { "epoch": 0.07939453125, "grad_norm": 0.32467198371887207, "learning_rate": 0.0004952230112350241, "loss": 2.0278, "step": 1626 }, { "epoch": 0.079443359375, "grad_norm": 0.3957529067993164, "learning_rate": 0.0004952158290594327, "loss": 2.0152, "step": 1627 }, { "epoch": 0.0794921875, "grad_norm": 0.32731181383132935, "learning_rate": 0.0004952086415467337, "loss": 2.0108, "step": 1628 }, { "epoch": 0.079541015625, "grad_norm": 0.34941330552101135, "learning_rate": 0.0004952014486971014, "loss": 2.014, "step": 1629 }, { "epoch": 0.07958984375, "grad_norm": 0.38958826661109924, "learning_rate": 0.0004951942505107101, "loss": 2.0254, "step": 1630 }, { "epoch": 0.079638671875, "grad_norm": 0.37353745102882385, "learning_rate": 0.0004951870469877341, "loss": 2.009, "step": 1631 }, { "epoch": 0.0796875, "grad_norm": 0.38238322734832764, "learning_rate": 0.0004951798381283482, "loss": 1.9804, "step": 1632 }, { "epoch": 0.079736328125, "grad_norm": 0.24940384924411774, "learning_rate": 0.0004951726239327272, "loss": 1.9674, "step": 1633 }, { "epoch": 0.07978515625, "grad_norm": 0.3329075872898102, "learning_rate": 0.0004951654044010455, "loss": 2.0132, "step": 1634 }, { "epoch": 0.079833984375, "grad_norm": 0.3489891588687897, "learning_rate": 0.0004951581795334785, "loss": 1.9854, "step": 1635 }, { "epoch": 0.0798828125, "grad_norm": 0.3845555782318115, "learning_rate": 0.0004951509493302011, "loss": 2.012, "step": 1636 }, { "epoch": 0.079931640625, "grad_norm": 0.43863198161125183, "learning_rate": 0.0004951437137913886, "loss": 2.0154, "step": 1637 }, { "epoch": 0.07998046875, "grad_norm": 0.4557201564311981, "learning_rate": 0.0004951364729172163, "loss": 2.016, "step": 1638 }, { "epoch": 0.080029296875, "grad_norm": 0.49669116735458374, "learning_rate": 0.0004951292267078598, "loss": 2.0513, "step": 1639 }, { "epoch": 0.080078125, "grad_norm": 0.5140843391418457, "learning_rate": 0.0004951219751634945, "loss": 2.0517, "step": 1640 }, { "epoch": 0.080126953125, "grad_norm": 0.42298775911331177, "learning_rate": 0.0004951147182842965, "loss": 1.9821, "step": 1641 }, { "epoch": 0.08017578125, "grad_norm": 0.3332998752593994, "learning_rate": 0.0004951074560704412, "loss": 2.0017, "step": 1642 }, { "epoch": 0.080224609375, "grad_norm": 0.4277209937572479, "learning_rate": 0.0004951001885221051, "loss": 2.0057, "step": 1643 }, { "epoch": 0.0802734375, "grad_norm": 0.4476911723613739, "learning_rate": 0.0004950929156394639, "loss": 1.9917, "step": 1644 }, { "epoch": 0.080322265625, "grad_norm": 0.348804771900177, "learning_rate": 0.0004950856374226943, "loss": 1.9518, "step": 1645 }, { "epoch": 0.08037109375, "grad_norm": 0.39642059803009033, "learning_rate": 0.0004950783538719723, "loss": 2.049, "step": 1646 }, { "epoch": 0.080419921875, "grad_norm": 0.34756454825401306, "learning_rate": 0.0004950710649874746, "loss": 1.9981, "step": 1647 }, { "epoch": 0.08046875, "grad_norm": 0.37238383293151855, "learning_rate": 0.0004950637707693779, "loss": 2.0515, "step": 1648 }, { "epoch": 0.080517578125, "grad_norm": 0.3934236168861389, "learning_rate": 0.0004950564712178589, "loss": 2.0338, "step": 1649 }, { "epoch": 0.08056640625, "grad_norm": 0.28539347648620605, "learning_rate": 0.0004950491663330946, "loss": 1.9809, "step": 1650 }, { "epoch": 0.080615234375, "grad_norm": 0.3638210594654083, "learning_rate": 0.0004950418561152618, "loss": 2.006, "step": 1651 }, { "epoch": 0.0806640625, "grad_norm": 0.4138581156730652, "learning_rate": 0.000495034540564538, "loss": 2.0212, "step": 1652 }, { "epoch": 0.080712890625, "grad_norm": 0.27903786301612854, "learning_rate": 0.0004950272196811001, "loss": 1.9734, "step": 1653 }, { "epoch": 0.08076171875, "grad_norm": 0.34829768538475037, "learning_rate": 0.000495019893465126, "loss": 1.9961, "step": 1654 }, { "epoch": 0.080810546875, "grad_norm": 0.3563130497932434, "learning_rate": 0.0004950125619167929, "loss": 2.0001, "step": 1655 }, { "epoch": 0.080859375, "grad_norm": 0.358332097530365, "learning_rate": 0.0004950052250362786, "loss": 1.978, "step": 1656 }, { "epoch": 0.080908203125, "grad_norm": 0.3704059422016144, "learning_rate": 0.000494997882823761, "loss": 2.0403, "step": 1657 }, { "epoch": 0.08095703125, "grad_norm": 0.33853858709335327, "learning_rate": 0.000494990535279418, "loss": 2.0369, "step": 1658 }, { "epoch": 0.081005859375, "grad_norm": 0.3739646077156067, "learning_rate": 0.0004949831824034276, "loss": 2.0374, "step": 1659 }, { "epoch": 0.0810546875, "grad_norm": 0.40179312229156494, "learning_rate": 0.0004949758241959679, "loss": 1.9381, "step": 1660 }, { "epoch": 0.081103515625, "grad_norm": 0.34864768385887146, "learning_rate": 0.0004949684606572175, "loss": 1.9802, "step": 1661 }, { "epoch": 0.08115234375, "grad_norm": 0.4154249429702759, "learning_rate": 0.0004949610917873547, "loss": 2.0247, "step": 1662 }, { "epoch": 0.081201171875, "grad_norm": 0.42198893427848816, "learning_rate": 0.0004949537175865582, "loss": 2.0571, "step": 1663 }, { "epoch": 0.08125, "grad_norm": 0.3352776765823364, "learning_rate": 0.0004949463380550065, "loss": 1.9851, "step": 1664 }, { "epoch": 0.081298828125, "grad_norm": 0.435192346572876, "learning_rate": 0.0004949389531928788, "loss": 2.0184, "step": 1665 }, { "epoch": 0.08134765625, "grad_norm": 0.3982267677783966, "learning_rate": 0.0004949315630003537, "loss": 2.0211, "step": 1666 }, { "epoch": 0.081396484375, "grad_norm": 0.33793380856513977, "learning_rate": 0.0004949241674776104, "loss": 2.0244, "step": 1667 }, { "epoch": 0.0814453125, "grad_norm": 0.4049651324748993, "learning_rate": 0.0004949167666248285, "loss": 1.9853, "step": 1668 }, { "epoch": 0.081494140625, "grad_norm": 0.37368136644363403, "learning_rate": 0.0004949093604421869, "loss": 2.0247, "step": 1669 }, { "epoch": 0.08154296875, "grad_norm": 0.37941107153892517, "learning_rate": 0.0004949019489298653, "loss": 1.9873, "step": 1670 }, { "epoch": 0.081591796875, "grad_norm": 0.35323619842529297, "learning_rate": 0.0004948945320880434, "loss": 1.9909, "step": 1671 }, { "epoch": 0.081640625, "grad_norm": 0.38915500044822693, "learning_rate": 0.0004948871099169006, "loss": 2.009, "step": 1672 }, { "epoch": 0.081689453125, "grad_norm": 0.4710172414779663, "learning_rate": 0.0004948796824166173, "loss": 2.0406, "step": 1673 }, { "epoch": 0.08173828125, "grad_norm": 0.40860921144485474, "learning_rate": 0.0004948722495873732, "loss": 1.9982, "step": 1674 }, { "epoch": 0.081787109375, "grad_norm": 0.4226577579975128, "learning_rate": 0.0004948648114293483, "loss": 2.0505, "step": 1675 }, { "epoch": 0.0818359375, "grad_norm": 0.43416279554367065, "learning_rate": 0.0004948573679427233, "loss": 1.9898, "step": 1676 }, { "epoch": 0.081884765625, "grad_norm": 0.44889384508132935, "learning_rate": 0.0004948499191276782, "loss": 1.936, "step": 1677 }, { "epoch": 0.08193359375, "grad_norm": 0.418999582529068, "learning_rate": 0.0004948424649843938, "loss": 1.9971, "step": 1678 }, { "epoch": 0.081982421875, "grad_norm": 0.3089962899684906, "learning_rate": 0.0004948350055130505, "loss": 1.9863, "step": 1679 }, { "epoch": 0.08203125, "grad_norm": 0.39001813530921936, "learning_rate": 0.0004948275407138293, "loss": 2.0193, "step": 1680 }, { "epoch": 0.082080078125, "grad_norm": 0.40348944067955017, "learning_rate": 0.0004948200705869111, "loss": 2.027, "step": 1681 }, { "epoch": 0.08212890625, "grad_norm": 0.34453731775283813, "learning_rate": 0.0004948125951324768, "loss": 2.0, "step": 1682 }, { "epoch": 0.082177734375, "grad_norm": 0.33464014530181885, "learning_rate": 0.0004948051143507076, "loss": 1.988, "step": 1683 }, { "epoch": 0.0822265625, "grad_norm": 0.3284696638584137, "learning_rate": 0.000494797628241785, "loss": 2.0025, "step": 1684 }, { "epoch": 0.082275390625, "grad_norm": 0.3601612448692322, "learning_rate": 0.0004947901368058902, "loss": 1.9948, "step": 1685 }, { "epoch": 0.08232421875, "grad_norm": 0.3125396966934204, "learning_rate": 0.000494782640043205, "loss": 2.0138, "step": 1686 }, { "epoch": 0.082373046875, "grad_norm": 0.3001766800880432, "learning_rate": 0.0004947751379539108, "loss": 2.0007, "step": 1687 }, { "epoch": 0.082421875, "grad_norm": 0.3312426209449768, "learning_rate": 0.0004947676305381897, "loss": 1.9793, "step": 1688 }, { "epoch": 0.082470703125, "grad_norm": 0.327108770608902, "learning_rate": 0.0004947601177962234, "loss": 1.9849, "step": 1689 }, { "epoch": 0.08251953125, "grad_norm": 0.3858231008052826, "learning_rate": 0.0004947525997281941, "loss": 2.0101, "step": 1690 }, { "epoch": 0.082568359375, "grad_norm": 0.3782230019569397, "learning_rate": 0.000494745076334284, "loss": 1.9823, "step": 1691 }, { "epoch": 0.0826171875, "grad_norm": 0.3440355062484741, "learning_rate": 0.0004947375476146755, "loss": 1.9973, "step": 1692 }, { "epoch": 0.082666015625, "grad_norm": 0.3601139783859253, "learning_rate": 0.000494730013569551, "loss": 1.9525, "step": 1693 }, { "epoch": 0.08271484375, "grad_norm": 0.31565341353416443, "learning_rate": 0.000494722474199093, "loss": 1.9931, "step": 1694 }, { "epoch": 0.082763671875, "grad_norm": 0.28828194737434387, "learning_rate": 0.0004947149295034844, "loss": 1.9845, "step": 1695 }, { "epoch": 0.0828125, "grad_norm": 0.3749249279499054, "learning_rate": 0.0004947073794829079, "loss": 2.0232, "step": 1696 }, { "epoch": 0.082861328125, "grad_norm": 0.38257554173469543, "learning_rate": 0.0004946998241375466, "loss": 1.9862, "step": 1697 }, { "epoch": 0.08291015625, "grad_norm": 0.4645141363143921, "learning_rate": 0.0004946922634675836, "loss": 1.9909, "step": 1698 }, { "epoch": 0.082958984375, "grad_norm": 0.44037047028541565, "learning_rate": 0.0004946846974732019, "loss": 1.9445, "step": 1699 }, { "epoch": 0.0830078125, "grad_norm": 0.4570297598838806, "learning_rate": 0.0004946771261545853, "loss": 1.9739, "step": 1700 }, { "epoch": 0.083056640625, "grad_norm": 0.4791626036167145, "learning_rate": 0.000494669549511917, "loss": 1.9979, "step": 1701 }, { "epoch": 0.08310546875, "grad_norm": 0.33594757318496704, "learning_rate": 0.0004946619675453806, "loss": 1.9896, "step": 1702 }, { "epoch": 0.083154296875, "grad_norm": 0.48520439863204956, "learning_rate": 0.0004946543802551599, "loss": 1.9779, "step": 1703 }, { "epoch": 0.083203125, "grad_norm": 0.4010576903820038, "learning_rate": 0.000494646787641439, "loss": 1.9634, "step": 1704 }, { "epoch": 0.083251953125, "grad_norm": 0.3415336608886719, "learning_rate": 0.0004946391897044016, "loss": 1.9731, "step": 1705 }, { "epoch": 0.08330078125, "grad_norm": 0.3595392107963562, "learning_rate": 0.000494631586444232, "loss": 1.9774, "step": 1706 }, { "epoch": 0.083349609375, "grad_norm": 0.3642403483390808, "learning_rate": 0.0004946239778611145, "loss": 2.0383, "step": 1707 }, { "epoch": 0.0833984375, "grad_norm": 0.3573497235774994, "learning_rate": 0.0004946163639552335, "loss": 1.9695, "step": 1708 }, { "epoch": 0.083447265625, "grad_norm": 0.3174579441547394, "learning_rate": 0.0004946087447267734, "loss": 1.9787, "step": 1709 }, { "epoch": 0.08349609375, "grad_norm": 0.31290149688720703, "learning_rate": 0.0004946011201759189, "loss": 1.9501, "step": 1710 }, { "epoch": 0.083544921875, "grad_norm": 0.2875548005104065, "learning_rate": 0.0004945934903028549, "loss": 1.9752, "step": 1711 }, { "epoch": 0.08359375, "grad_norm": 0.39916902780532837, "learning_rate": 0.0004945858551077662, "loss": 1.9943, "step": 1712 }, { "epoch": 0.083642578125, "grad_norm": 0.4518921971321106, "learning_rate": 0.000494578214590838, "loss": 1.9774, "step": 1713 }, { "epoch": 0.08369140625, "grad_norm": 0.3545111119747162, "learning_rate": 0.0004945705687522552, "loss": 1.9674, "step": 1714 }, { "epoch": 0.083740234375, "grad_norm": 0.3381524384021759, "learning_rate": 0.0004945629175922034, "loss": 1.9709, "step": 1715 }, { "epoch": 0.0837890625, "grad_norm": 0.3806746304035187, "learning_rate": 0.0004945552611108679, "loss": 1.9588, "step": 1716 }, { "epoch": 0.083837890625, "grad_norm": 0.3431239426136017, "learning_rate": 0.0004945475993084342, "loss": 1.9941, "step": 1717 }, { "epoch": 0.08388671875, "grad_norm": 0.3790096044540405, "learning_rate": 0.0004945399321850879, "loss": 1.9568, "step": 1718 }, { "epoch": 0.083935546875, "grad_norm": 0.5003384351730347, "learning_rate": 0.0004945322597410152, "loss": 2.0123, "step": 1719 }, { "epoch": 0.083984375, "grad_norm": 0.5041674971580505, "learning_rate": 0.0004945245819764016, "loss": 2.0244, "step": 1720 }, { "epoch": 0.084033203125, "grad_norm": 0.3685785233974457, "learning_rate": 0.0004945168988914337, "loss": 1.9989, "step": 1721 }, { "epoch": 0.08408203125, "grad_norm": 0.46105748414993286, "learning_rate": 0.0004945092104862971, "loss": 1.95, "step": 1722 }, { "epoch": 0.084130859375, "grad_norm": 0.43941932916641235, "learning_rate": 0.0004945015167611786, "loss": 2.0282, "step": 1723 }, { "epoch": 0.0841796875, "grad_norm": 0.3999760150909424, "learning_rate": 0.0004944938177162644, "loss": 2.0609, "step": 1724 }, { "epoch": 0.084228515625, "grad_norm": 0.3448227345943451, "learning_rate": 0.0004944861133517412, "loss": 1.9839, "step": 1725 }, { "epoch": 0.08427734375, "grad_norm": 0.39141902327537537, "learning_rate": 0.0004944784036677958, "loss": 1.9766, "step": 1726 }, { "epoch": 0.084326171875, "grad_norm": 0.4497014582157135, "learning_rate": 0.0004944706886646149, "loss": 1.9755, "step": 1727 }, { "epoch": 0.084375, "grad_norm": 0.32832852005958557, "learning_rate": 0.0004944629683423855, "loss": 1.9911, "step": 1728 }, { "epoch": 0.084423828125, "grad_norm": 0.3656862676143646, "learning_rate": 0.0004944552427012948, "loss": 2.0038, "step": 1729 }, { "epoch": 0.08447265625, "grad_norm": 0.41696226596832275, "learning_rate": 0.00049444751174153, "loss": 1.9683, "step": 1730 }, { "epoch": 0.084521484375, "grad_norm": 0.43101730942726135, "learning_rate": 0.0004944397754632785, "loss": 2.0034, "step": 1731 }, { "epoch": 0.0845703125, "grad_norm": 0.38592004776000977, "learning_rate": 0.0004944320338667276, "loss": 1.9661, "step": 1732 }, { "epoch": 0.084619140625, "grad_norm": 0.2805403172969818, "learning_rate": 0.0004944242869520651, "loss": 2.039, "step": 1733 }, { "epoch": 0.08466796875, "grad_norm": 0.45055922865867615, "learning_rate": 0.0004944165347194788, "loss": 2.0225, "step": 1734 }, { "epoch": 0.084716796875, "grad_norm": 0.3963811695575714, "learning_rate": 0.0004944087771691565, "loss": 2.0383, "step": 1735 }, { "epoch": 0.084765625, "grad_norm": 0.33237046003341675, "learning_rate": 0.0004944010143012861, "loss": 1.9354, "step": 1736 }, { "epoch": 0.084814453125, "grad_norm": 0.371834933757782, "learning_rate": 0.0004943932461160559, "loss": 2.0038, "step": 1737 }, { "epoch": 0.08486328125, "grad_norm": 0.28519946336746216, "learning_rate": 0.0004943854726136542, "loss": 1.9839, "step": 1738 }, { "epoch": 0.084912109375, "grad_norm": 0.34979891777038574, "learning_rate": 0.0004943776937942693, "loss": 1.9522, "step": 1739 }, { "epoch": 0.0849609375, "grad_norm": 0.42985522747039795, "learning_rate": 0.0004943699096580897, "loss": 1.9686, "step": 1740 }, { "epoch": 0.085009765625, "grad_norm": 0.346768856048584, "learning_rate": 0.0004943621202053041, "loss": 2.0243, "step": 1741 }, { "epoch": 0.08505859375, "grad_norm": 0.3463951051235199, "learning_rate": 0.0004943543254361013, "loss": 2.0142, "step": 1742 }, { "epoch": 0.085107421875, "grad_norm": 0.3166508078575134, "learning_rate": 0.0004943465253506702, "loss": 1.9777, "step": 1743 }, { "epoch": 0.08515625, "grad_norm": 0.34264513850212097, "learning_rate": 0.0004943387199491998, "loss": 1.993, "step": 1744 }, { "epoch": 0.085205078125, "grad_norm": 0.33550339937210083, "learning_rate": 0.0004943309092318793, "loss": 1.9559, "step": 1745 }, { "epoch": 0.08525390625, "grad_norm": 0.3144224286079407, "learning_rate": 0.0004943230931988981, "loss": 2.0275, "step": 1746 }, { "epoch": 0.085302734375, "grad_norm": 0.3657968044281006, "learning_rate": 0.0004943152718504455, "loss": 1.9524, "step": 1747 }, { "epoch": 0.0853515625, "grad_norm": 0.26439905166625977, "learning_rate": 0.0004943074451867111, "loss": 1.9629, "step": 1748 }, { "epoch": 0.085400390625, "grad_norm": 0.3211369514465332, "learning_rate": 0.0004942996132078846, "loss": 1.994, "step": 1749 }, { "epoch": 0.08544921875, "grad_norm": 0.2901986837387085, "learning_rate": 0.0004942917759141556, "loss": 2.0136, "step": 1750 }, { "epoch": 0.085498046875, "grad_norm": 0.3304313123226166, "learning_rate": 0.0004942839333057144, "loss": 2.018, "step": 1751 }, { "epoch": 0.085546875, "grad_norm": 0.38512200117111206, "learning_rate": 0.0004942760853827509, "loss": 2.0184, "step": 1752 }, { "epoch": 0.085595703125, "grad_norm": 0.38578730821609497, "learning_rate": 0.0004942682321454553, "loss": 1.9883, "step": 1753 }, { "epoch": 0.08564453125, "grad_norm": 0.34133124351501465, "learning_rate": 0.0004942603735940179, "loss": 2.0199, "step": 1754 }, { "epoch": 0.085693359375, "grad_norm": 0.30130860209465027, "learning_rate": 0.0004942525097286291, "loss": 2.001, "step": 1755 }, { "epoch": 0.0857421875, "grad_norm": 0.3216025233268738, "learning_rate": 0.0004942446405494798, "loss": 2.0127, "step": 1756 }, { "epoch": 0.085791015625, "grad_norm": 0.370415061712265, "learning_rate": 0.0004942367660567603, "loss": 2.0506, "step": 1757 }, { "epoch": 0.08583984375, "grad_norm": 0.37534669041633606, "learning_rate": 0.0004942288862506618, "loss": 1.9989, "step": 1758 }, { "epoch": 0.085888671875, "grad_norm": 0.30365103483200073, "learning_rate": 0.000494221001131375, "loss": 1.9765, "step": 1759 }, { "epoch": 0.0859375, "grad_norm": 0.4237724840641022, "learning_rate": 0.0004942131106990911, "loss": 2.0111, "step": 1760 }, { "epoch": 0.085986328125, "grad_norm": 0.4646960198879242, "learning_rate": 0.0004942052149540015, "loss": 2.0229, "step": 1761 }, { "epoch": 0.08603515625, "grad_norm": 0.37012526392936707, "learning_rate": 0.0004941973138962973, "loss": 1.9967, "step": 1762 }, { "epoch": 0.086083984375, "grad_norm": 0.32851308584213257, "learning_rate": 0.0004941894075261702, "loss": 1.9506, "step": 1763 }, { "epoch": 0.0861328125, "grad_norm": 0.45160263776779175, "learning_rate": 0.0004941814958438115, "loss": 1.9731, "step": 1764 }, { "epoch": 0.086181640625, "grad_norm": 0.3973856568336487, "learning_rate": 0.0004941735788494134, "loss": 1.9834, "step": 1765 }, { "epoch": 0.08623046875, "grad_norm": 0.3978804349899292, "learning_rate": 0.0004941656565431673, "loss": 2.0221, "step": 1766 }, { "epoch": 0.086279296875, "grad_norm": 0.286467969417572, "learning_rate": 0.0004941577289252657, "loss": 2.0173, "step": 1767 }, { "epoch": 0.086328125, "grad_norm": 0.35826367139816284, "learning_rate": 0.0004941497959959003, "loss": 1.9854, "step": 1768 }, { "epoch": 0.086376953125, "grad_norm": 0.37955954670906067, "learning_rate": 0.0004941418577552635, "loss": 1.9959, "step": 1769 }, { "epoch": 0.08642578125, "grad_norm": 0.4117143154144287, "learning_rate": 0.0004941339142035478, "loss": 1.9519, "step": 1770 }, { "epoch": 0.086474609375, "grad_norm": 0.434042364358902, "learning_rate": 0.0004941259653409456, "loss": 2.0121, "step": 1771 }, { "epoch": 0.0865234375, "grad_norm": 0.45739230513572693, "learning_rate": 0.0004941180111676497, "loss": 2.0032, "step": 1772 }, { "epoch": 0.086572265625, "grad_norm": 0.48121607303619385, "learning_rate": 0.0004941100516838526, "loss": 2.0072, "step": 1773 }, { "epoch": 0.08662109375, "grad_norm": 0.40672576427459717, "learning_rate": 0.0004941020868897474, "loss": 2.0183, "step": 1774 }, { "epoch": 0.086669921875, "grad_norm": 0.40231800079345703, "learning_rate": 0.0004940941167855271, "loss": 1.9617, "step": 1775 }, { "epoch": 0.08671875, "grad_norm": 0.3512624204158783, "learning_rate": 0.0004940861413713849, "loss": 1.9772, "step": 1776 }, { "epoch": 0.086767578125, "grad_norm": 0.46989208459854126, "learning_rate": 0.0004940781606475141, "loss": 1.9459, "step": 1777 }, { "epoch": 0.08681640625, "grad_norm": 0.491921603679657, "learning_rate": 0.000494070174614108, "loss": 1.9347, "step": 1778 }, { "epoch": 0.086865234375, "grad_norm": 0.3564983010292053, "learning_rate": 0.0004940621832713603, "loss": 1.9896, "step": 1779 }, { "epoch": 0.0869140625, "grad_norm": 0.503760576248169, "learning_rate": 0.0004940541866194645, "loss": 1.9687, "step": 1780 }, { "epoch": 0.086962890625, "grad_norm": 0.4132084548473358, "learning_rate": 0.0004940461846586145, "loss": 2.0165, "step": 1781 }, { "epoch": 0.08701171875, "grad_norm": 0.3981996774673462, "learning_rate": 0.0004940381773890043, "loss": 2.0416, "step": 1782 }, { "epoch": 0.087060546875, "grad_norm": 0.3924277126789093, "learning_rate": 0.0004940301648108279, "loss": 1.9879, "step": 1783 }, { "epoch": 0.087109375, "grad_norm": 0.3198395371437073, "learning_rate": 0.0004940221469242794, "loss": 1.9897, "step": 1784 }, { "epoch": 0.087158203125, "grad_norm": 0.3103877305984497, "learning_rate": 0.0004940141237295533, "loss": 1.9674, "step": 1785 }, { "epoch": 0.08720703125, "grad_norm": 0.2927185893058777, "learning_rate": 0.000494006095226844, "loss": 1.9932, "step": 1786 }, { "epoch": 0.087255859375, "grad_norm": 0.3113398849964142, "learning_rate": 0.0004939980614163458, "loss": 2.0192, "step": 1787 }, { "epoch": 0.0873046875, "grad_norm": 0.35055533051490784, "learning_rate": 0.0004939900222982539, "loss": 1.9802, "step": 1788 }, { "epoch": 0.087353515625, "grad_norm": 0.3219034671783447, "learning_rate": 0.0004939819778727627, "loss": 1.9701, "step": 1789 }, { "epoch": 0.08740234375, "grad_norm": 0.317462295293808, "learning_rate": 0.0004939739281400674, "loss": 1.9948, "step": 1790 }, { "epoch": 0.087451171875, "grad_norm": 0.28533679246902466, "learning_rate": 0.000493965873100363, "loss": 2.0122, "step": 1791 }, { "epoch": 0.0875, "grad_norm": 0.31208372116088867, "learning_rate": 0.0004939578127538449, "loss": 2.0207, "step": 1792 }, { "epoch": 0.087548828125, "grad_norm": 0.28508374094963074, "learning_rate": 0.0004939497471007082, "loss": 1.9826, "step": 1793 }, { "epoch": 0.08759765625, "grad_norm": 0.3496387004852295, "learning_rate": 0.0004939416761411484, "loss": 1.9943, "step": 1794 }, { "epoch": 0.087646484375, "grad_norm": 0.3373797833919525, "learning_rate": 0.0004939335998753612, "loss": 1.98, "step": 1795 }, { "epoch": 0.0876953125, "grad_norm": 0.302412748336792, "learning_rate": 0.0004939255183035424, "loss": 1.9584, "step": 1796 }, { "epoch": 0.087744140625, "grad_norm": 0.3375414311885834, "learning_rate": 0.0004939174314258877, "loss": 2.0006, "step": 1797 }, { "epoch": 0.08779296875, "grad_norm": 0.35244283080101013, "learning_rate": 0.0004939093392425933, "loss": 2.0022, "step": 1798 }, { "epoch": 0.087841796875, "grad_norm": 0.30867305397987366, "learning_rate": 0.000493901241753855, "loss": 2.022, "step": 1799 }, { "epoch": 0.087890625, "grad_norm": 0.35416239500045776, "learning_rate": 0.0004938931389598695, "loss": 1.9994, "step": 1800 }, { "epoch": 0.087939453125, "grad_norm": 0.4274396598339081, "learning_rate": 0.0004938850308608327, "loss": 1.9454, "step": 1801 }, { "epoch": 0.08798828125, "grad_norm": 0.4037797451019287, "learning_rate": 0.0004938769174569413, "loss": 2.0213, "step": 1802 }, { "epoch": 0.088037109375, "grad_norm": 0.4092946946620941, "learning_rate": 0.0004938687987483921, "loss": 1.9779, "step": 1803 }, { "epoch": 0.0880859375, "grad_norm": 0.31936466693878174, "learning_rate": 0.0004938606747353818, "loss": 1.9508, "step": 1804 }, { "epoch": 0.088134765625, "grad_norm": 0.3803541958332062, "learning_rate": 0.0004938525454181071, "loss": 1.9757, "step": 1805 }, { "epoch": 0.08818359375, "grad_norm": 0.4245985150337219, "learning_rate": 0.0004938444107967651, "loss": 1.9684, "step": 1806 }, { "epoch": 0.088232421875, "grad_norm": 0.32501813769340515, "learning_rate": 0.000493836270871553, "loss": 2.045, "step": 1807 }, { "epoch": 0.08828125, "grad_norm": 0.36552342772483826, "learning_rate": 0.0004938281256426681, "loss": 1.9737, "step": 1808 }, { "epoch": 0.088330078125, "grad_norm": 0.41964003443717957, "learning_rate": 0.0004938199751103078, "loss": 1.9751, "step": 1809 }, { "epoch": 0.08837890625, "grad_norm": 0.3775762915611267, "learning_rate": 0.0004938118192746695, "loss": 1.9383, "step": 1810 }, { "epoch": 0.088427734375, "grad_norm": 0.3612406551837921, "learning_rate": 0.0004938036581359511, "loss": 2.0386, "step": 1811 }, { "epoch": 0.0884765625, "grad_norm": 0.3072544038295746, "learning_rate": 0.0004937954916943502, "loss": 1.9902, "step": 1812 }, { "epoch": 0.088525390625, "grad_norm": 0.3325650691986084, "learning_rate": 0.0004937873199500648, "loss": 1.9607, "step": 1813 }, { "epoch": 0.08857421875, "grad_norm": 0.3405448794364929, "learning_rate": 0.0004937791429032929, "loss": 2.0034, "step": 1814 }, { "epoch": 0.088623046875, "grad_norm": 0.3350667953491211, "learning_rate": 0.0004937709605542327, "loss": 2.0059, "step": 1815 }, { "epoch": 0.088671875, "grad_norm": 0.4018327593803406, "learning_rate": 0.0004937627729030825, "loss": 1.9585, "step": 1816 }, { "epoch": 0.088720703125, "grad_norm": 0.34553346037864685, "learning_rate": 0.0004937545799500408, "loss": 1.9676, "step": 1817 }, { "epoch": 0.08876953125, "grad_norm": 0.267554372549057, "learning_rate": 0.0004937463816953061, "loss": 2.0156, "step": 1818 }, { "epoch": 0.088818359375, "grad_norm": 0.36275961995124817, "learning_rate": 0.000493738178139077, "loss": 1.9978, "step": 1819 }, { "epoch": 0.0888671875, "grad_norm": 0.3925084173679352, "learning_rate": 0.0004937299692815525, "loss": 1.9981, "step": 1820 }, { "epoch": 0.088916015625, "grad_norm": 0.298584520816803, "learning_rate": 0.0004937217551229315, "loss": 1.9973, "step": 1821 }, { "epoch": 0.08896484375, "grad_norm": 0.3366398513317108, "learning_rate": 0.000493713535663413, "loss": 2.0463, "step": 1822 }, { "epoch": 0.089013671875, "grad_norm": 0.4081874489784241, "learning_rate": 0.0004937053109031963, "loss": 2.0108, "step": 1823 }, { "epoch": 0.0890625, "grad_norm": 0.38652503490448, "learning_rate": 0.0004936970808424807, "loss": 1.9915, "step": 1824 }, { "epoch": 0.089111328125, "grad_norm": 0.3551326394081116, "learning_rate": 0.0004936888454814656, "loss": 1.9689, "step": 1825 }, { "epoch": 0.08916015625, "grad_norm": 0.37401312589645386, "learning_rate": 0.0004936806048203506, "loss": 2.0272, "step": 1826 }, { "epoch": 0.089208984375, "grad_norm": 0.4560132920742035, "learning_rate": 0.0004936723588593355, "loss": 1.9652, "step": 1827 }, { "epoch": 0.0892578125, "grad_norm": 0.38599148392677307, "learning_rate": 0.0004936641075986201, "loss": 1.9925, "step": 1828 }, { "epoch": 0.089306640625, "grad_norm": 0.3514255881309509, "learning_rate": 0.0004936558510384043, "loss": 1.9701, "step": 1829 }, { "epoch": 0.08935546875, "grad_norm": 0.41179656982421875, "learning_rate": 0.0004936475891788883, "loss": 1.9774, "step": 1830 }, { "epoch": 0.089404296875, "grad_norm": 0.3345165252685547, "learning_rate": 0.0004936393220202725, "loss": 1.9334, "step": 1831 }, { "epoch": 0.089453125, "grad_norm": 0.3197629153728485, "learning_rate": 0.0004936310495627569, "loss": 1.9664, "step": 1832 }, { "epoch": 0.089501953125, "grad_norm": 0.45697325468063354, "learning_rate": 0.0004936227718065422, "loss": 1.9858, "step": 1833 }, { "epoch": 0.08955078125, "grad_norm": 0.35486844182014465, "learning_rate": 0.0004936144887518291, "loss": 2.0357, "step": 1834 }, { "epoch": 0.089599609375, "grad_norm": 0.3611099421977997, "learning_rate": 0.000493606200398818, "loss": 2.0462, "step": 1835 }, { "epoch": 0.0896484375, "grad_norm": 0.30933746695518494, "learning_rate": 0.0004935979067477103, "loss": 1.9671, "step": 1836 }, { "epoch": 0.089697265625, "grad_norm": 0.30723991990089417, "learning_rate": 0.0004935896077987064, "loss": 1.9487, "step": 1837 }, { "epoch": 0.08974609375, "grad_norm": 0.32328832149505615, "learning_rate": 0.000493581303552008, "loss": 2.0056, "step": 1838 }, { "epoch": 0.089794921875, "grad_norm": 0.3328786790370941, "learning_rate": 0.0004935729940078159, "loss": 1.9933, "step": 1839 }, { "epoch": 0.08984375, "grad_norm": 0.33200299739837646, "learning_rate": 0.0004935646791663318, "loss": 1.9364, "step": 1840 }, { "epoch": 0.089892578125, "grad_norm": 0.3628101646900177, "learning_rate": 0.0004935563590277571, "loss": 1.9564, "step": 1841 }, { "epoch": 0.08994140625, "grad_norm": 0.4080869257450104, "learning_rate": 0.0004935480335922935, "loss": 1.959, "step": 1842 }, { "epoch": 0.089990234375, "grad_norm": 0.36005252599716187, "learning_rate": 0.0004935397028601427, "loss": 1.9932, "step": 1843 }, { "epoch": 0.0900390625, "grad_norm": 0.35334518551826477, "learning_rate": 0.0004935313668315065, "loss": 1.982, "step": 1844 }, { "epoch": 0.090087890625, "grad_norm": 0.3244836628437042, "learning_rate": 0.0004935230255065871, "loss": 2.0006, "step": 1845 }, { "epoch": 0.09013671875, "grad_norm": 0.27293333411216736, "learning_rate": 0.0004935146788855868, "loss": 1.9546, "step": 1846 }, { "epoch": 0.090185546875, "grad_norm": 0.27405449748039246, "learning_rate": 0.0004935063269687075, "loss": 1.9696, "step": 1847 }, { "epoch": 0.090234375, "grad_norm": 0.3262519836425781, "learning_rate": 0.000493497969756152, "loss": 1.9903, "step": 1848 }, { "epoch": 0.090283203125, "grad_norm": 0.3386152684688568, "learning_rate": 0.0004934896072481225, "loss": 1.9959, "step": 1849 }, { "epoch": 0.09033203125, "grad_norm": 0.302157461643219, "learning_rate": 0.000493481239444822, "loss": 1.9586, "step": 1850 }, { "epoch": 0.090380859375, "grad_norm": 0.306186705827713, "learning_rate": 0.0004934728663464529, "loss": 1.9779, "step": 1851 }, { "epoch": 0.0904296875, "grad_norm": 0.26767241954803467, "learning_rate": 0.0004934644879532185, "loss": 2.0348, "step": 1852 }, { "epoch": 0.090478515625, "grad_norm": 0.3202535808086395, "learning_rate": 0.0004934561042653218, "loss": 1.9789, "step": 1853 }, { "epoch": 0.09052734375, "grad_norm": 0.37743401527404785, "learning_rate": 0.0004934477152829658, "loss": 1.9578, "step": 1854 }, { "epoch": 0.090576171875, "grad_norm": 0.3552055060863495, "learning_rate": 0.0004934393210063539, "loss": 1.9628, "step": 1855 }, { "epoch": 0.090625, "grad_norm": 0.29255688190460205, "learning_rate": 0.0004934309214356897, "loss": 1.9465, "step": 1856 }, { "epoch": 0.090673828125, "grad_norm": 0.3447589576244354, "learning_rate": 0.0004934225165711765, "loss": 1.9975, "step": 1857 }, { "epoch": 0.09072265625, "grad_norm": 0.34771186113357544, "learning_rate": 0.0004934141064130181, "loss": 1.9832, "step": 1858 }, { "epoch": 0.090771484375, "grad_norm": 0.33597180247306824, "learning_rate": 0.0004934056909614185, "loss": 2.0278, "step": 1859 }, { "epoch": 0.0908203125, "grad_norm": 0.3858603239059448, "learning_rate": 0.0004933972702165814, "loss": 2.0025, "step": 1860 }, { "epoch": 0.090869140625, "grad_norm": 0.3682931363582611, "learning_rate": 0.0004933888441787111, "loss": 1.9862, "step": 1861 }, { "epoch": 0.09091796875, "grad_norm": 0.31650957465171814, "learning_rate": 0.0004933804128480117, "loss": 2.004, "step": 1862 }, { "epoch": 0.090966796875, "grad_norm": 0.3634696900844574, "learning_rate": 0.0004933719762246875, "loss": 1.9604, "step": 1863 }, { "epoch": 0.091015625, "grad_norm": 0.45695942640304565, "learning_rate": 0.000493363534308943, "loss": 1.9904, "step": 1864 }, { "epoch": 0.091064453125, "grad_norm": 0.5478183627128601, "learning_rate": 0.0004933550871009829, "loss": 2.0337, "step": 1865 }, { "epoch": 0.09111328125, "grad_norm": 0.5112489461898804, "learning_rate": 0.0004933466346010117, "loss": 2.0241, "step": 1866 }, { "epoch": 0.091162109375, "grad_norm": 0.4700581431388855, "learning_rate": 0.0004933381768092345, "loss": 1.9894, "step": 1867 }, { "epoch": 0.0912109375, "grad_norm": 0.516936719417572, "learning_rate": 0.0004933297137258561, "loss": 1.9947, "step": 1868 }, { "epoch": 0.091259765625, "grad_norm": 0.382556289434433, "learning_rate": 0.0004933212453510817, "loss": 2.0109, "step": 1869 }, { "epoch": 0.09130859375, "grad_norm": 0.38803306221961975, "learning_rate": 0.0004933127716851167, "loss": 1.978, "step": 1870 }, { "epoch": 0.091357421875, "grad_norm": 0.4775521755218506, "learning_rate": 0.0004933042927281661, "loss": 1.9715, "step": 1871 }, { "epoch": 0.09140625, "grad_norm": 0.3447911739349365, "learning_rate": 0.0004932958084804356, "loss": 1.9813, "step": 1872 }, { "epoch": 0.091455078125, "grad_norm": 0.374717116355896, "learning_rate": 0.0004932873189421308, "loss": 1.9655, "step": 1873 }, { "epoch": 0.09150390625, "grad_norm": 0.37749895453453064, "learning_rate": 0.0004932788241134576, "loss": 1.9876, "step": 1874 }, { "epoch": 0.091552734375, "grad_norm": 0.32437044382095337, "learning_rate": 0.0004932703239946215, "loss": 2.011, "step": 1875 }, { "epoch": 0.0916015625, "grad_norm": 0.3705669939517975, "learning_rate": 0.0004932618185858288, "loss": 2.0359, "step": 1876 }, { "epoch": 0.091650390625, "grad_norm": 0.36235904693603516, "learning_rate": 0.0004932533078872857, "loss": 1.9725, "step": 1877 }, { "epoch": 0.09169921875, "grad_norm": 0.34875231981277466, "learning_rate": 0.0004932447918991983, "loss": 2.0289, "step": 1878 }, { "epoch": 0.091748046875, "grad_norm": 0.400879830121994, "learning_rate": 0.000493236270621773, "loss": 1.983, "step": 1879 }, { "epoch": 0.091796875, "grad_norm": 0.3309303820133209, "learning_rate": 0.0004932277440552164, "loss": 1.9929, "step": 1880 }, { "epoch": 0.091845703125, "grad_norm": 0.35574495792388916, "learning_rate": 0.000493219212199735, "loss": 1.9748, "step": 1881 }, { "epoch": 0.09189453125, "grad_norm": 0.2920076847076416, "learning_rate": 0.0004932106750555358, "loss": 1.9898, "step": 1882 }, { "epoch": 0.091943359375, "grad_norm": 0.3116266429424286, "learning_rate": 0.0004932021326228255, "loss": 2.0302, "step": 1883 }, { "epoch": 0.0919921875, "grad_norm": 0.3704981505870819, "learning_rate": 0.0004931935849018112, "loss": 2.0416, "step": 1884 }, { "epoch": 0.092041015625, "grad_norm": 0.360151082277298, "learning_rate": 0.0004931850318927001, "loss": 1.9582, "step": 1885 }, { "epoch": 0.09208984375, "grad_norm": 0.470014363527298, "learning_rate": 0.0004931764735956995, "loss": 1.957, "step": 1886 }, { "epoch": 0.092138671875, "grad_norm": 0.4784137010574341, "learning_rate": 0.0004931679100110167, "loss": 1.9574, "step": 1887 }, { "epoch": 0.0921875, "grad_norm": 0.30958130955696106, "learning_rate": 0.0004931593411388594, "loss": 2.0315, "step": 1888 }, { "epoch": 0.092236328125, "grad_norm": 0.4149710237979889, "learning_rate": 0.0004931507669794352, "loss": 2.0039, "step": 1889 }, { "epoch": 0.09228515625, "grad_norm": 0.4009321630001068, "learning_rate": 0.0004931421875329519, "loss": 1.9506, "step": 1890 }, { "epoch": 0.092333984375, "grad_norm": 0.4194198548793793, "learning_rate": 0.0004931336027996174, "loss": 2.0055, "step": 1891 }, { "epoch": 0.0923828125, "grad_norm": 0.4625665247440338, "learning_rate": 0.0004931250127796398, "loss": 2.0154, "step": 1892 }, { "epoch": 0.092431640625, "grad_norm": 0.31214645504951477, "learning_rate": 0.0004931164174732273, "loss": 1.9978, "step": 1893 }, { "epoch": 0.09248046875, "grad_norm": 0.46745428442955017, "learning_rate": 0.0004931078168805881, "loss": 1.9679, "step": 1894 }, { "epoch": 0.092529296875, "grad_norm": 0.3507269024848938, "learning_rate": 0.0004930992110019308, "loss": 1.9582, "step": 1895 }, { "epoch": 0.092578125, "grad_norm": 0.35835668444633484, "learning_rate": 0.0004930905998374639, "loss": 1.964, "step": 1896 }, { "epoch": 0.092626953125, "grad_norm": 0.33816632628440857, "learning_rate": 0.0004930819833873962, "loss": 1.9917, "step": 1897 }, { "epoch": 0.09267578125, "grad_norm": 0.3312014043331146, "learning_rate": 0.0004930733616519363, "loss": 1.9964, "step": 1898 }, { "epoch": 0.092724609375, "grad_norm": 0.31879475712776184, "learning_rate": 0.0004930647346312933, "loss": 1.9743, "step": 1899 }, { "epoch": 0.0927734375, "grad_norm": 0.35172781348228455, "learning_rate": 0.0004930561023256762, "loss": 1.9984, "step": 1900 }, { "epoch": 0.092822265625, "grad_norm": 0.3699686825275421, "learning_rate": 0.0004930474647352943, "loss": 1.9804, "step": 1901 }, { "epoch": 0.09287109375, "grad_norm": 0.2900342643260956, "learning_rate": 0.000493038821860357, "loss": 1.9632, "step": 1902 }, { "epoch": 0.092919921875, "grad_norm": 0.37693220376968384, "learning_rate": 0.0004930301737010737, "loss": 1.9883, "step": 1903 }, { "epoch": 0.09296875, "grad_norm": 0.3229311406612396, "learning_rate": 0.0004930215202576539, "loss": 2.0444, "step": 1904 }, { "epoch": 0.093017578125, "grad_norm": 0.2595221698284149, "learning_rate": 0.0004930128615303074, "loss": 1.9895, "step": 1905 }, { "epoch": 0.09306640625, "grad_norm": 0.32784590125083923, "learning_rate": 0.000493004197519244, "loss": 1.9774, "step": 1906 }, { "epoch": 0.093115234375, "grad_norm": 0.3275638818740845, "learning_rate": 0.0004929955282246739, "loss": 1.9643, "step": 1907 }, { "epoch": 0.0931640625, "grad_norm": 0.3639599084854126, "learning_rate": 0.0004929868536468069, "loss": 1.9997, "step": 1908 }, { "epoch": 0.093212890625, "grad_norm": 0.34182533621788025, "learning_rate": 0.0004929781737858535, "loss": 1.9935, "step": 1909 }, { "epoch": 0.09326171875, "grad_norm": 0.3246154189109802, "learning_rate": 0.0004929694886420239, "loss": 1.9915, "step": 1910 }, { "epoch": 0.093310546875, "grad_norm": 0.3671203553676605, "learning_rate": 0.0004929607982155285, "loss": 1.9666, "step": 1911 }, { "epoch": 0.093359375, "grad_norm": 0.34468623995780945, "learning_rate": 0.0004929521025065782, "loss": 2.0215, "step": 1912 }, { "epoch": 0.093408203125, "grad_norm": 0.273843377828598, "learning_rate": 0.0004929434015153835, "loss": 1.9949, "step": 1913 }, { "epoch": 0.09345703125, "grad_norm": 0.3552624583244324, "learning_rate": 0.0004929346952421553, "loss": 2.0162, "step": 1914 }, { "epoch": 0.093505859375, "grad_norm": 0.37477150559425354, "learning_rate": 0.0004929259836871048, "loss": 1.9836, "step": 1915 }, { "epoch": 0.0935546875, "grad_norm": 0.3257855176925659, "learning_rate": 0.000492917266850443, "loss": 1.9976, "step": 1916 }, { "epoch": 0.093603515625, "grad_norm": 0.3519534170627594, "learning_rate": 0.0004929085447323811, "loss": 1.9763, "step": 1917 }, { "epoch": 0.09365234375, "grad_norm": 0.4474601447582245, "learning_rate": 0.0004928998173331306, "loss": 2.0053, "step": 1918 }, { "epoch": 0.093701171875, "grad_norm": 0.37341299653053284, "learning_rate": 0.000492891084652903, "loss": 1.9597, "step": 1919 }, { "epoch": 0.09375, "grad_norm": 0.348501592874527, "learning_rate": 0.0004928823466919098, "loss": 2.0225, "step": 1920 }, { "epoch": 0.093798828125, "grad_norm": 0.33708158135414124, "learning_rate": 0.0004928736034503629, "loss": 1.9911, "step": 1921 }, { "epoch": 0.09384765625, "grad_norm": 0.34594273567199707, "learning_rate": 0.0004928648549284743, "loss": 1.9479, "step": 1922 }, { "epoch": 0.093896484375, "grad_norm": 0.34447434544563293, "learning_rate": 0.0004928561011264559, "loss": 2.0291, "step": 1923 }, { "epoch": 0.0939453125, "grad_norm": 0.34089311957359314, "learning_rate": 0.0004928473420445198, "loss": 1.9351, "step": 1924 }, { "epoch": 0.093994140625, "grad_norm": 0.3785715401172638, "learning_rate": 0.0004928385776828783, "loss": 1.9503, "step": 1925 }, { "epoch": 0.09404296875, "grad_norm": 0.3344329297542572, "learning_rate": 0.0004928298080417439, "loss": 1.9437, "step": 1926 }, { "epoch": 0.094091796875, "grad_norm": 0.271072655916214, "learning_rate": 0.0004928210331213292, "loss": 1.9945, "step": 1927 }, { "epoch": 0.094140625, "grad_norm": 0.28561556339263916, "learning_rate": 0.0004928122529218467, "loss": 1.9773, "step": 1928 }, { "epoch": 0.094189453125, "grad_norm": 0.2958628833293915, "learning_rate": 0.0004928034674435092, "loss": 1.9135, "step": 1929 }, { "epoch": 0.09423828125, "grad_norm": 0.27257487177848816, "learning_rate": 0.0004927946766865298, "loss": 1.9322, "step": 1930 }, { "epoch": 0.094287109375, "grad_norm": 0.31370994448661804, "learning_rate": 0.0004927858806511214, "loss": 1.9654, "step": 1931 }, { "epoch": 0.0943359375, "grad_norm": 0.4323267936706543, "learning_rate": 0.0004927770793374971, "loss": 2.0039, "step": 1932 }, { "epoch": 0.094384765625, "grad_norm": 0.42473235726356506, "learning_rate": 0.0004927682727458704, "loss": 2.0183, "step": 1933 }, { "epoch": 0.09443359375, "grad_norm": 0.4255903661251068, "learning_rate": 0.0004927594608764546, "loss": 1.9856, "step": 1934 }, { "epoch": 0.094482421875, "grad_norm": 0.5305478572845459, "learning_rate": 0.0004927506437294634, "loss": 2.0033, "step": 1935 }, { "epoch": 0.09453125, "grad_norm": 0.35822373628616333, "learning_rate": 0.0004927418213051104, "loss": 1.9514, "step": 1936 }, { "epoch": 0.094580078125, "grad_norm": 0.3579935133457184, "learning_rate": 0.0004927329936036095, "loss": 1.9673, "step": 1937 }, { "epoch": 0.09462890625, "grad_norm": 0.4445202648639679, "learning_rate": 0.0004927241606251745, "loss": 1.9537, "step": 1938 }, { "epoch": 0.094677734375, "grad_norm": 0.35615962743759155, "learning_rate": 0.0004927153223700195, "loss": 1.9833, "step": 1939 }, { "epoch": 0.0947265625, "grad_norm": 0.3229484558105469, "learning_rate": 0.0004927064788383587, "loss": 1.9724, "step": 1940 }, { "epoch": 0.094775390625, "grad_norm": 0.30006924271583557, "learning_rate": 0.0004926976300304067, "loss": 1.9962, "step": 1941 }, { "epoch": 0.09482421875, "grad_norm": 0.2985495924949646, "learning_rate": 0.0004926887759463776, "loss": 2.0033, "step": 1942 }, { "epoch": 0.094873046875, "grad_norm": 0.3169795274734497, "learning_rate": 0.0004926799165864862, "loss": 2.0213, "step": 1943 }, { "epoch": 0.094921875, "grad_norm": 0.32540345191955566, "learning_rate": 0.000492671051950947, "loss": 2.0189, "step": 1944 }, { "epoch": 0.094970703125, "grad_norm": 0.31255897879600525, "learning_rate": 0.000492662182039975, "loss": 1.9733, "step": 1945 }, { "epoch": 0.09501953125, "grad_norm": 0.3534236252307892, "learning_rate": 0.0004926533068537852, "loss": 1.9705, "step": 1946 }, { "epoch": 0.095068359375, "grad_norm": 0.3357395529747009, "learning_rate": 0.0004926444263925928, "loss": 1.9698, "step": 1947 }, { "epoch": 0.0951171875, "grad_norm": 0.4236257076263428, "learning_rate": 0.0004926355406566127, "loss": 2.0521, "step": 1948 }, { "epoch": 0.095166015625, "grad_norm": 0.29475733637809753, "learning_rate": 0.0004926266496460604, "loss": 1.9794, "step": 1949 }, { "epoch": 0.09521484375, "grad_norm": 0.3298952281475067, "learning_rate": 0.0004926177533611514, "loss": 1.9817, "step": 1950 }, { "epoch": 0.095263671875, "grad_norm": 0.4152366518974304, "learning_rate": 0.0004926088518021016, "loss": 1.98, "step": 1951 }, { "epoch": 0.0953125, "grad_norm": 0.33787164092063904, "learning_rate": 0.0004925999449691261, "loss": 1.9735, "step": 1952 }, { "epoch": 0.095361328125, "grad_norm": 0.34936240315437317, "learning_rate": 0.0004925910328624412, "loss": 1.975, "step": 1953 }, { "epoch": 0.09541015625, "grad_norm": 0.3913620710372925, "learning_rate": 0.000492582115482263, "loss": 2.0098, "step": 1954 }, { "epoch": 0.095458984375, "grad_norm": 0.34397733211517334, "learning_rate": 0.0004925731928288072, "loss": 1.9756, "step": 1955 }, { "epoch": 0.0955078125, "grad_norm": 0.4097355604171753, "learning_rate": 0.0004925642649022903, "loss": 1.9554, "step": 1956 }, { "epoch": 0.095556640625, "grad_norm": 0.48653194308280945, "learning_rate": 0.0004925553317029288, "loss": 1.9745, "step": 1957 }, { "epoch": 0.09560546875, "grad_norm": 0.3132115304470062, "learning_rate": 0.000492546393230939, "loss": 1.9583, "step": 1958 }, { "epoch": 0.095654296875, "grad_norm": 0.33665069937705994, "learning_rate": 0.0004925374494865374, "loss": 1.9495, "step": 1959 }, { "epoch": 0.095703125, "grad_norm": 0.43451863527297974, "learning_rate": 0.0004925285004699411, "loss": 1.9741, "step": 1960 }, { "epoch": 0.095751953125, "grad_norm": 0.37677714228630066, "learning_rate": 0.0004925195461813669, "loss": 1.9556, "step": 1961 }, { "epoch": 0.09580078125, "grad_norm": 0.3678143322467804, "learning_rate": 0.0004925105866210316, "loss": 1.9861, "step": 1962 }, { "epoch": 0.095849609375, "grad_norm": 0.30117395520210266, "learning_rate": 0.0004925016217891526, "loss": 1.958, "step": 1963 }, { "epoch": 0.0958984375, "grad_norm": 0.2937098741531372, "learning_rate": 0.0004924926516859469, "loss": 1.9812, "step": 1964 }, { "epoch": 0.095947265625, "grad_norm": 0.37048882246017456, "learning_rate": 0.0004924836763116323, "loss": 1.9801, "step": 1965 }, { "epoch": 0.09599609375, "grad_norm": 0.3590259850025177, "learning_rate": 0.000492474695666426, "loss": 2.0629, "step": 1966 }, { "epoch": 0.096044921875, "grad_norm": 0.3754998743534088, "learning_rate": 0.0004924657097505456, "loss": 2.015, "step": 1967 }, { "epoch": 0.09609375, "grad_norm": 0.3358076810836792, "learning_rate": 0.0004924567185642091, "loss": 1.9844, "step": 1968 }, { "epoch": 0.096142578125, "grad_norm": 0.34025681018829346, "learning_rate": 0.0004924477221076343, "loss": 1.9729, "step": 1969 }, { "epoch": 0.09619140625, "grad_norm": 0.3950899541378021, "learning_rate": 0.0004924387203810393, "loss": 2.0201, "step": 1970 }, { "epoch": 0.096240234375, "grad_norm": 0.3043922781944275, "learning_rate": 0.0004924297133846422, "loss": 1.9832, "step": 1971 }, { "epoch": 0.0962890625, "grad_norm": 0.3189018666744232, "learning_rate": 0.0004924207011186613, "loss": 1.9866, "step": 1972 }, { "epoch": 0.096337890625, "grad_norm": 0.3640248775482178, "learning_rate": 0.0004924116835833151, "loss": 1.9536, "step": 1973 }, { "epoch": 0.09638671875, "grad_norm": 0.4072916805744171, "learning_rate": 0.0004924026607788219, "loss": 1.9729, "step": 1974 }, { "epoch": 0.096435546875, "grad_norm": 0.390102356672287, "learning_rate": 0.0004923936327054008, "loss": 1.9827, "step": 1975 }, { "epoch": 0.096484375, "grad_norm": 0.2569733262062073, "learning_rate": 0.0004923845993632702, "loss": 1.9984, "step": 1976 }, { "epoch": 0.096533203125, "grad_norm": 0.3729031980037689, "learning_rate": 0.0004923755607526493, "loss": 2.0046, "step": 1977 }, { "epoch": 0.09658203125, "grad_norm": 0.3797363340854645, "learning_rate": 0.000492366516873757, "loss": 1.9881, "step": 1978 }, { "epoch": 0.096630859375, "grad_norm": 0.3017745912075043, "learning_rate": 0.0004923574677268125, "loss": 1.9514, "step": 1979 }, { "epoch": 0.0966796875, "grad_norm": 0.45210397243499756, "learning_rate": 0.0004923484133120351, "loss": 1.9706, "step": 1980 }, { "epoch": 0.096728515625, "grad_norm": 0.4618760049343109, "learning_rate": 0.0004923393536296443, "loss": 1.9843, "step": 1981 }, { "epoch": 0.09677734375, "grad_norm": 0.2826623320579529, "learning_rate": 0.0004923302886798598, "loss": 1.9666, "step": 1982 }, { "epoch": 0.096826171875, "grad_norm": 0.4494277536869049, "learning_rate": 0.000492321218462901, "loss": 1.9513, "step": 1983 }, { "epoch": 0.096875, "grad_norm": 0.45490843057632446, "learning_rate": 0.000492312142978988, "loss": 2.027, "step": 1984 }, { "epoch": 0.096923828125, "grad_norm": 0.40057018399238586, "learning_rate": 0.0004923030622283406, "loss": 1.9966, "step": 1985 }, { "epoch": 0.09697265625, "grad_norm": 0.5186879634857178, "learning_rate": 0.0004922939762111788, "loss": 1.9822, "step": 1986 }, { "epoch": 0.097021484375, "grad_norm": 0.35937508940696716, "learning_rate": 0.000492284884927723, "loss": 1.9734, "step": 1987 }, { "epoch": 0.0970703125, "grad_norm": 0.40504881739616394, "learning_rate": 0.0004922757883781934, "loss": 1.9513, "step": 1988 }, { "epoch": 0.097119140625, "grad_norm": 0.4289425313472748, "learning_rate": 0.0004922666865628105, "loss": 1.9836, "step": 1989 }, { "epoch": 0.09716796875, "grad_norm": 0.41388076543807983, "learning_rate": 0.000492257579481795, "loss": 1.9897, "step": 1990 }, { "epoch": 0.097216796875, "grad_norm": 0.46787843108177185, "learning_rate": 0.0004922484671353674, "loss": 1.9519, "step": 1991 }, { "epoch": 0.097265625, "grad_norm": 0.31353533267974854, "learning_rate": 0.0004922393495237488, "loss": 1.9721, "step": 1992 }, { "epoch": 0.097314453125, "grad_norm": 0.3990669250488281, "learning_rate": 0.0004922302266471599, "loss": 1.9827, "step": 1993 }, { "epoch": 0.09736328125, "grad_norm": 0.3777223229408264, "learning_rate": 0.000492221098505822, "loss": 1.9669, "step": 1994 }, { "epoch": 0.097412109375, "grad_norm": 0.37743014097213745, "learning_rate": 0.0004922119650999563, "loss": 2.0008, "step": 1995 }, { "epoch": 0.0974609375, "grad_norm": 0.32288363575935364, "learning_rate": 0.000492202826429784, "loss": 2.0, "step": 1996 }, { "epoch": 0.097509765625, "grad_norm": 0.3841555118560791, "learning_rate": 0.0004921936824955268, "loss": 1.9935, "step": 1997 }, { "epoch": 0.09755859375, "grad_norm": 0.40182292461395264, "learning_rate": 0.0004921845332974062, "loss": 1.9307, "step": 1998 }, { "epoch": 0.097607421875, "grad_norm": 0.37780362367630005, "learning_rate": 0.0004921753788356439, "loss": 1.9602, "step": 1999 }, { "epoch": 0.09765625, "grad_norm": 0.3731973469257355, "learning_rate": 0.0004921662191104619, "loss": 1.9792, "step": 2000 }, { "epoch": 0.097705078125, "grad_norm": 0.3642726540565491, "learning_rate": 0.0004921570541220822, "loss": 1.9646, "step": 2001 }, { "epoch": 0.09775390625, "grad_norm": 0.3673241436481476, "learning_rate": 0.0004921478838707266, "loss": 1.9551, "step": 2002 }, { "epoch": 0.097802734375, "grad_norm": 0.3618405759334564, "learning_rate": 0.0004921387083566178, "loss": 2.0374, "step": 2003 }, { "epoch": 0.0978515625, "grad_norm": 0.35020241141319275, "learning_rate": 0.0004921295275799778, "loss": 1.9687, "step": 2004 }, { "epoch": 0.097900390625, "grad_norm": 0.3086360692977905, "learning_rate": 0.0004921203415410293, "loss": 1.9363, "step": 2005 }, { "epoch": 0.09794921875, "grad_norm": 0.29098355770111084, "learning_rate": 0.0004921111502399949, "loss": 1.9702, "step": 2006 }, { "epoch": 0.097998046875, "grad_norm": 0.4058879613876343, "learning_rate": 0.0004921019536770974, "loss": 1.9647, "step": 2007 }, { "epoch": 0.098046875, "grad_norm": 0.40279003977775574, "learning_rate": 0.0004920927518525594, "loss": 1.955, "step": 2008 }, { "epoch": 0.098095703125, "grad_norm": 0.3765130043029785, "learning_rate": 0.0004920835447666045, "loss": 1.9453, "step": 2009 }, { "epoch": 0.09814453125, "grad_norm": 0.30417951941490173, "learning_rate": 0.0004920743324194552, "loss": 1.9692, "step": 2010 }, { "epoch": 0.098193359375, "grad_norm": 0.2709481716156006, "learning_rate": 0.0004920651148113351, "loss": 1.9936, "step": 2011 }, { "epoch": 0.0982421875, "grad_norm": 0.3005509674549103, "learning_rate": 0.0004920558919424677, "loss": 1.9634, "step": 2012 }, { "epoch": 0.098291015625, "grad_norm": 0.27394899725914, "learning_rate": 0.0004920466638130763, "loss": 1.972, "step": 2013 }, { "epoch": 0.09833984375, "grad_norm": 0.2668408751487732, "learning_rate": 0.0004920374304233846, "loss": 1.9642, "step": 2014 }, { "epoch": 0.098388671875, "grad_norm": 0.3097430467605591, "learning_rate": 0.0004920281917736164, "loss": 1.9695, "step": 2015 }, { "epoch": 0.0984375, "grad_norm": 0.29355159401893616, "learning_rate": 0.0004920189478639957, "loss": 1.9998, "step": 2016 }, { "epoch": 0.098486328125, "grad_norm": 0.2749842405319214, "learning_rate": 0.0004920096986947464, "loss": 1.9921, "step": 2017 }, { "epoch": 0.09853515625, "grad_norm": 0.34436944127082825, "learning_rate": 0.0004920004442660927, "loss": 2.0006, "step": 2018 }, { "epoch": 0.098583984375, "grad_norm": 0.35370713472366333, "learning_rate": 0.0004919911845782589, "loss": 2.0116, "step": 2019 }, { "epoch": 0.0986328125, "grad_norm": 0.3157117962837219, "learning_rate": 0.0004919819196314695, "loss": 1.9627, "step": 2020 }, { "epoch": 0.098681640625, "grad_norm": 0.32216906547546387, "learning_rate": 0.0004919726494259488, "loss": 1.9786, "step": 2021 }, { "epoch": 0.09873046875, "grad_norm": 0.33504676818847656, "learning_rate": 0.0004919633739619218, "loss": 1.9878, "step": 2022 }, { "epoch": 0.098779296875, "grad_norm": 0.30922454595565796, "learning_rate": 0.000491954093239613, "loss": 1.9909, "step": 2023 }, { "epoch": 0.098828125, "grad_norm": 0.27944380044937134, "learning_rate": 0.0004919448072592474, "loss": 1.963, "step": 2024 }, { "epoch": 0.098876953125, "grad_norm": 0.33847668766975403, "learning_rate": 0.0004919355160210502, "loss": 2.0075, "step": 2025 }, { "epoch": 0.09892578125, "grad_norm": 0.3153333067893982, "learning_rate": 0.0004919262195252465, "loss": 1.99, "step": 2026 }, { "epoch": 0.098974609375, "grad_norm": 0.3190540373325348, "learning_rate": 0.0004919169177720616, "loss": 1.9906, "step": 2027 }, { "epoch": 0.0990234375, "grad_norm": 0.2528114318847656, "learning_rate": 0.0004919076107617209, "loss": 1.9939, "step": 2028 }, { "epoch": 0.099072265625, "grad_norm": 0.27775564789772034, "learning_rate": 0.0004918982984944499, "loss": 1.9658, "step": 2029 }, { "epoch": 0.09912109375, "grad_norm": 0.32900404930114746, "learning_rate": 0.0004918889809704745, "loss": 2.0015, "step": 2030 }, { "epoch": 0.099169921875, "grad_norm": 0.3048715889453888, "learning_rate": 0.0004918796581900204, "loss": 1.9777, "step": 2031 }, { "epoch": 0.09921875, "grad_norm": 0.3565579056739807, "learning_rate": 0.0004918703301533135, "loss": 1.9518, "step": 2032 }, { "epoch": 0.099267578125, "grad_norm": 0.3562955856323242, "learning_rate": 0.0004918609968605799, "loss": 1.9522, "step": 2033 }, { "epoch": 0.09931640625, "grad_norm": 0.34407860040664673, "learning_rate": 0.0004918516583120458, "loss": 1.9493, "step": 2034 }, { "epoch": 0.099365234375, "grad_norm": 0.34036123752593994, "learning_rate": 0.0004918423145079376, "loss": 1.9919, "step": 2035 }, { "epoch": 0.0994140625, "grad_norm": 0.36323443055152893, "learning_rate": 0.0004918329654484817, "loss": 1.9824, "step": 2036 }, { "epoch": 0.099462890625, "grad_norm": 0.29511284828186035, "learning_rate": 0.0004918236111339046, "loss": 1.9691, "step": 2037 }, { "epoch": 0.09951171875, "grad_norm": 0.3193492293357849, "learning_rate": 0.0004918142515644332, "loss": 1.9597, "step": 2038 }, { "epoch": 0.099560546875, "grad_norm": 0.3869037330150604, "learning_rate": 0.0004918048867402941, "loss": 1.9647, "step": 2039 }, { "epoch": 0.099609375, "grad_norm": 0.385174959897995, "learning_rate": 0.0004917955166617146, "loss": 1.9792, "step": 2040 }, { "epoch": 0.099658203125, "grad_norm": 0.3331574499607086, "learning_rate": 0.0004917861413289215, "loss": 1.9469, "step": 2041 }, { "epoch": 0.09970703125, "grad_norm": 0.318821519613266, "learning_rate": 0.000491776760742142, "loss": 1.9679, "step": 2042 }, { "epoch": 0.099755859375, "grad_norm": 0.38204100728034973, "learning_rate": 0.0004917673749016035, "loss": 1.95, "step": 2043 }, { "epoch": 0.0998046875, "grad_norm": 0.3905351459980011, "learning_rate": 0.0004917579838075337, "loss": 2.0024, "step": 2044 }, { "epoch": 0.099853515625, "grad_norm": 0.34175905585289, "learning_rate": 0.0004917485874601599, "loss": 1.9524, "step": 2045 }, { "epoch": 0.09990234375, "grad_norm": 0.457516610622406, "learning_rate": 0.0004917391858597099, "loss": 1.9508, "step": 2046 }, { "epoch": 0.099951171875, "grad_norm": 0.3415912687778473, "learning_rate": 0.0004917297790064118, "loss": 1.9669, "step": 2047 }, { "epoch": 0.1, "grad_norm": 0.30939745903015137, "learning_rate": 0.0004917203669004932, "loss": 1.9494, "step": 2048 }, { "epoch": 0.100048828125, "grad_norm": 0.45661547780036926, "learning_rate": 0.0004917109495421825, "loss": 1.9955, "step": 2049 }, { "epoch": 0.10009765625, "grad_norm": 0.4056677222251892, "learning_rate": 0.0004917015269317079, "loss": 1.9214, "step": 2050 }, { "epoch": 0.100146484375, "grad_norm": 0.36813798546791077, "learning_rate": 0.0004916920990692975, "loss": 1.9639, "step": 2051 }, { "epoch": 0.1001953125, "grad_norm": 0.4963024854660034, "learning_rate": 0.0004916826659551802, "loss": 1.9165, "step": 2052 }, { "epoch": 0.100244140625, "grad_norm": 0.3603087067604065, "learning_rate": 0.0004916732275895843, "loss": 1.9672, "step": 2053 }, { "epoch": 0.10029296875, "grad_norm": 0.3606721758842468, "learning_rate": 0.0004916637839727387, "loss": 1.9692, "step": 2054 }, { "epoch": 0.100341796875, "grad_norm": 0.3717184364795685, "learning_rate": 0.0004916543351048721, "loss": 1.9282, "step": 2055 }, { "epoch": 0.100390625, "grad_norm": 0.3270815908908844, "learning_rate": 0.0004916448809862137, "loss": 1.9779, "step": 2056 }, { "epoch": 0.100439453125, "grad_norm": 0.4106564521789551, "learning_rate": 0.0004916354216169927, "loss": 1.9882, "step": 2057 }, { "epoch": 0.10048828125, "grad_norm": 0.35256168246269226, "learning_rate": 0.000491625956997438, "loss": 2.0047, "step": 2058 }, { "epoch": 0.100537109375, "grad_norm": 0.3215746581554413, "learning_rate": 0.0004916164871277794, "loss": 1.9857, "step": 2059 }, { "epoch": 0.1005859375, "grad_norm": 0.38432052731513977, "learning_rate": 0.000491607012008246, "loss": 1.9719, "step": 2060 }, { "epoch": 0.100634765625, "grad_norm": 0.41848185658454895, "learning_rate": 0.0004915975316390678, "loss": 1.9862, "step": 2061 }, { "epoch": 0.10068359375, "grad_norm": 0.3660518229007721, "learning_rate": 0.0004915880460204743, "loss": 1.9452, "step": 2062 }, { "epoch": 0.100732421875, "grad_norm": 0.4050191342830658, "learning_rate": 0.0004915785551526956, "loss": 1.9816, "step": 2063 }, { "epoch": 0.10078125, "grad_norm": 0.377080500125885, "learning_rate": 0.0004915690590359615, "loss": 2.0002, "step": 2064 }, { "epoch": 0.100830078125, "grad_norm": 0.31874191761016846, "learning_rate": 0.0004915595576705024, "loss": 1.9946, "step": 2065 }, { "epoch": 0.10087890625, "grad_norm": 0.35745352506637573, "learning_rate": 0.0004915500510565483, "loss": 1.9631, "step": 2066 }, { "epoch": 0.100927734375, "grad_norm": 0.41410136222839355, "learning_rate": 0.0004915405391943299, "loss": 1.9641, "step": 2067 }, { "epoch": 0.1009765625, "grad_norm": 0.37158602476119995, "learning_rate": 0.0004915310220840774, "loss": 1.9683, "step": 2068 }, { "epoch": 0.101025390625, "grad_norm": 0.3003555238246918, "learning_rate": 0.0004915214997260217, "loss": 1.9668, "step": 2069 }, { "epoch": 0.10107421875, "grad_norm": 0.273417592048645, "learning_rate": 0.0004915119721203935, "loss": 2.0055, "step": 2070 }, { "epoch": 0.101123046875, "grad_norm": 0.35308703780174255, "learning_rate": 0.0004915024392674238, "loss": 1.9661, "step": 2071 }, { "epoch": 0.101171875, "grad_norm": 0.29280608892440796, "learning_rate": 0.0004914929011673434, "loss": 1.9837, "step": 2072 }, { "epoch": 0.101220703125, "grad_norm": 0.3252936601638794, "learning_rate": 0.0004914833578203837, "loss": 1.9539, "step": 2073 }, { "epoch": 0.10126953125, "grad_norm": 0.39644762873649597, "learning_rate": 0.0004914738092267758, "loss": 1.9701, "step": 2074 }, { "epoch": 0.101318359375, "grad_norm": 0.44311267137527466, "learning_rate": 0.0004914642553867513, "loss": 1.9413, "step": 2075 }, { "epoch": 0.1013671875, "grad_norm": 0.4408668279647827, "learning_rate": 0.0004914546963005416, "loss": 1.9643, "step": 2076 }, { "epoch": 0.101416015625, "grad_norm": 0.36955368518829346, "learning_rate": 0.0004914451319683786, "loss": 1.9636, "step": 2077 }, { "epoch": 0.10146484375, "grad_norm": 0.3167573809623718, "learning_rate": 0.0004914355623904938, "loss": 2.021, "step": 2078 }, { "epoch": 0.101513671875, "grad_norm": 0.6286629438400269, "learning_rate": 0.0004914259875671193, "loss": 2.035, "step": 2079 }, { "epoch": 0.1015625, "grad_norm": 0.3961467444896698, "learning_rate": 0.0004914164074984872, "loss": 1.987, "step": 2080 }, { "epoch": 0.101611328125, "grad_norm": 0.30832087993621826, "learning_rate": 0.0004914068221848294, "loss": 2.0013, "step": 2081 }, { "epoch": 0.10166015625, "grad_norm": 0.33245137333869934, "learning_rate": 0.0004913972316263785, "loss": 1.9448, "step": 2082 }, { "epoch": 0.101708984375, "grad_norm": 0.3080903887748718, "learning_rate": 0.0004913876358233669, "loss": 1.9904, "step": 2083 }, { "epoch": 0.1017578125, "grad_norm": 0.2926611602306366, "learning_rate": 0.000491378034776027, "loss": 1.9711, "step": 2084 }, { "epoch": 0.101806640625, "grad_norm": 0.34127485752105713, "learning_rate": 0.0004913684284845917, "loss": 2.0195, "step": 2085 }, { "epoch": 0.10185546875, "grad_norm": 0.32893654704093933, "learning_rate": 0.0004913588169492937, "loss": 1.9713, "step": 2086 }, { "epoch": 0.101904296875, "grad_norm": 0.32220131158828735, "learning_rate": 0.0004913492001703659, "loss": 1.95, "step": 2087 }, { "epoch": 0.101953125, "grad_norm": 0.28422704339027405, "learning_rate": 0.0004913395781480414, "loss": 1.971, "step": 2088 }, { "epoch": 0.102001953125, "grad_norm": 0.36352789402008057, "learning_rate": 0.0004913299508825535, "loss": 2.0084, "step": 2089 }, { "epoch": 0.10205078125, "grad_norm": 0.4441694915294647, "learning_rate": 0.0004913203183741354, "loss": 1.9693, "step": 2090 }, { "epoch": 0.102099609375, "grad_norm": 0.4812953770160675, "learning_rate": 0.0004913106806230205, "loss": 1.9871, "step": 2091 }, { "epoch": 0.1021484375, "grad_norm": 0.32710176706314087, "learning_rate": 0.0004913010376294425, "loss": 1.9888, "step": 2092 }, { "epoch": 0.102197265625, "grad_norm": 0.3763316869735718, "learning_rate": 0.0004912913893936352, "loss": 1.9732, "step": 2093 }, { "epoch": 0.10224609375, "grad_norm": 0.4661775827407837, "learning_rate": 0.0004912817359158322, "loss": 1.9609, "step": 2094 }, { "epoch": 0.102294921875, "grad_norm": 0.37014174461364746, "learning_rate": 0.0004912720771962677, "loss": 1.982, "step": 2095 }, { "epoch": 0.10234375, "grad_norm": 0.3924505114555359, "learning_rate": 0.0004912624132351755, "loss": 1.9744, "step": 2096 }, { "epoch": 0.102392578125, "grad_norm": 0.2980431616306305, "learning_rate": 0.00049125274403279, "loss": 1.9813, "step": 2097 }, { "epoch": 0.10244140625, "grad_norm": 0.33952268958091736, "learning_rate": 0.0004912430695893456, "loss": 1.9567, "step": 2098 }, { "epoch": 0.102490234375, "grad_norm": 0.3318859934806824, "learning_rate": 0.0004912333899050766, "loss": 1.963, "step": 2099 }, { "epoch": 0.1025390625, "grad_norm": 0.3129656910896301, "learning_rate": 0.0004912237049802178, "loss": 1.9962, "step": 2100 }, { "epoch": 0.102587890625, "grad_norm": 0.3255836069583893, "learning_rate": 0.0004912140148150036, "loss": 1.9733, "step": 2101 }, { "epoch": 0.10263671875, "grad_norm": 0.3462151288986206, "learning_rate": 0.0004912043194096693, "loss": 1.9496, "step": 2102 }, { "epoch": 0.102685546875, "grad_norm": 0.3210708498954773, "learning_rate": 0.0004911946187644494, "loss": 1.9644, "step": 2103 }, { "epoch": 0.102734375, "grad_norm": 0.3568514585494995, "learning_rate": 0.0004911849128795793, "loss": 1.9532, "step": 2104 }, { "epoch": 0.102783203125, "grad_norm": 0.3509374260902405, "learning_rate": 0.0004911752017552942, "loss": 1.9693, "step": 2105 }, { "epoch": 0.10283203125, "grad_norm": 0.3524731397628784, "learning_rate": 0.0004911654853918293, "loss": 1.9702, "step": 2106 }, { "epoch": 0.102880859375, "grad_norm": 0.361454039812088, "learning_rate": 0.0004911557637894203, "loss": 1.9535, "step": 2107 }, { "epoch": 0.1029296875, "grad_norm": 0.3121342062950134, "learning_rate": 0.0004911460369483026, "loss": 1.9175, "step": 2108 }, { "epoch": 0.102978515625, "grad_norm": 0.3175090551376343, "learning_rate": 0.0004911363048687122, "loss": 2.0022, "step": 2109 }, { "epoch": 0.10302734375, "grad_norm": 0.3176109492778778, "learning_rate": 0.0004911265675508847, "loss": 1.9612, "step": 2110 }, { "epoch": 0.103076171875, "grad_norm": 0.3656213581562042, "learning_rate": 0.0004911168249950562, "loss": 2.0109, "step": 2111 }, { "epoch": 0.103125, "grad_norm": 0.4081972539424896, "learning_rate": 0.000491107077201463, "loss": 1.9834, "step": 2112 }, { "epoch": 0.103173828125, "grad_norm": 0.3425123393535614, "learning_rate": 0.0004910973241703409, "loss": 2.0247, "step": 2113 }, { "epoch": 0.10322265625, "grad_norm": 0.3982101380825043, "learning_rate": 0.0004910875659019267, "loss": 1.9515, "step": 2114 }, { "epoch": 0.103271484375, "grad_norm": 0.4695647060871124, "learning_rate": 0.0004910778023964566, "loss": 1.9793, "step": 2115 }, { "epoch": 0.1033203125, "grad_norm": 0.2824002504348755, "learning_rate": 0.0004910680336541676, "loss": 1.9291, "step": 2116 }, { "epoch": 0.103369140625, "grad_norm": 0.4520464241504669, "learning_rate": 0.000491058259675296, "loss": 1.9619, "step": 2117 }, { "epoch": 0.10341796875, "grad_norm": 0.37557780742645264, "learning_rate": 0.000491048480460079, "loss": 1.9729, "step": 2118 }, { "epoch": 0.103466796875, "grad_norm": 0.3628202974796295, "learning_rate": 0.0004910386960087534, "loss": 1.9968, "step": 2119 }, { "epoch": 0.103515625, "grad_norm": 0.3143669366836548, "learning_rate": 0.0004910289063215564, "loss": 1.9229, "step": 2120 }, { "epoch": 0.103564453125, "grad_norm": 0.29275408387184143, "learning_rate": 0.0004910191113987255, "loss": 1.9572, "step": 2121 }, { "epoch": 0.10361328125, "grad_norm": 0.39636126160621643, "learning_rate": 0.0004910093112404978, "loss": 1.9784, "step": 2122 }, { "epoch": 0.103662109375, "grad_norm": 0.37363770604133606, "learning_rate": 0.0004909995058471109, "loss": 1.9668, "step": 2123 }, { "epoch": 0.1037109375, "grad_norm": 0.40026119351387024, "learning_rate": 0.0004909896952188024, "loss": 1.9651, "step": 2124 }, { "epoch": 0.103759765625, "grad_norm": 0.3577127754688263, "learning_rate": 0.0004909798793558103, "loss": 1.9482, "step": 2125 }, { "epoch": 0.10380859375, "grad_norm": 0.3204316198825836, "learning_rate": 0.0004909700582583721, "loss": 1.986, "step": 2126 }, { "epoch": 0.103857421875, "grad_norm": 0.43617352843284607, "learning_rate": 0.0004909602319267261, "loss": 1.9888, "step": 2127 }, { "epoch": 0.10390625, "grad_norm": 0.3170802891254425, "learning_rate": 0.0004909504003611103, "loss": 1.9611, "step": 2128 }, { "epoch": 0.103955078125, "grad_norm": 0.3683609664440155, "learning_rate": 0.0004909405635617632, "loss": 2.0073, "step": 2129 }, { "epoch": 0.10400390625, "grad_norm": 0.32770442962646484, "learning_rate": 0.000490930721528923, "loss": 1.9554, "step": 2130 }, { "epoch": 0.104052734375, "grad_norm": 0.35062336921691895, "learning_rate": 0.0004909208742628282, "loss": 1.9688, "step": 2131 }, { "epoch": 0.1041015625, "grad_norm": 0.3614737093448639, "learning_rate": 0.0004909110217637177, "loss": 1.9816, "step": 2132 }, { "epoch": 0.104150390625, "grad_norm": 0.35853084921836853, "learning_rate": 0.00049090116403183, "loss": 1.9802, "step": 2133 }, { "epoch": 0.10419921875, "grad_norm": 0.31292569637298584, "learning_rate": 0.0004908913010674041, "loss": 1.9484, "step": 2134 }, { "epoch": 0.104248046875, "grad_norm": 0.3209400475025177, "learning_rate": 0.0004908814328706792, "loss": 1.9592, "step": 2135 }, { "epoch": 0.104296875, "grad_norm": 0.39385780692100525, "learning_rate": 0.0004908715594418942, "loss": 1.9591, "step": 2136 }, { "epoch": 0.104345703125, "grad_norm": 0.286230206489563, "learning_rate": 0.0004908616807812886, "loss": 1.9435, "step": 2137 }, { "epoch": 0.10439453125, "grad_norm": 0.28395187854766846, "learning_rate": 0.0004908517968891018, "loss": 1.9822, "step": 2138 }, { "epoch": 0.104443359375, "grad_norm": 0.32364434003829956, "learning_rate": 0.0004908419077655732, "loss": 1.9922, "step": 2139 }, { "epoch": 0.1044921875, "grad_norm": 0.4244288206100464, "learning_rate": 0.0004908320134109427, "loss": 1.9441, "step": 2140 }, { "epoch": 0.104541015625, "grad_norm": 0.5060579180717468, "learning_rate": 0.0004908221138254498, "loss": 1.9958, "step": 2141 }, { "epoch": 0.10458984375, "grad_norm": 0.492122083902359, "learning_rate": 0.0004908122090093347, "loss": 2.005, "step": 2142 }, { "epoch": 0.104638671875, "grad_norm": 0.407682329416275, "learning_rate": 0.0004908022989628373, "loss": 1.9952, "step": 2143 }, { "epoch": 0.1046875, "grad_norm": 0.29349303245544434, "learning_rate": 0.0004907923836861978, "loss": 1.9594, "step": 2144 }, { "epoch": 0.104736328125, "grad_norm": 0.3350693881511688, "learning_rate": 0.0004907824631796565, "loss": 1.9232, "step": 2145 }, { "epoch": 0.10478515625, "grad_norm": 0.3395420014858246, "learning_rate": 0.0004907725374434539, "loss": 2.0244, "step": 2146 }, { "epoch": 0.104833984375, "grad_norm": 0.39946678280830383, "learning_rate": 0.0004907626064778304, "loss": 1.9762, "step": 2147 }, { "epoch": 0.1048828125, "grad_norm": 0.37516269087791443, "learning_rate": 0.0004907526702830268, "loss": 1.9661, "step": 2148 }, { "epoch": 0.104931640625, "grad_norm": 0.34623637795448303, "learning_rate": 0.000490742728859284, "loss": 1.9722, "step": 2149 }, { "epoch": 0.10498046875, "grad_norm": 0.34606024622917175, "learning_rate": 0.0004907327822068427, "loss": 1.9775, "step": 2150 }, { "epoch": 0.105029296875, "grad_norm": 0.41928717494010925, "learning_rate": 0.0004907228303259443, "loss": 1.9758, "step": 2151 }, { "epoch": 0.105078125, "grad_norm": 0.35405731201171875, "learning_rate": 0.0004907128732168297, "loss": 1.9748, "step": 2152 }, { "epoch": 0.105126953125, "grad_norm": 0.3676646649837494, "learning_rate": 0.0004907029108797402, "loss": 1.999, "step": 2153 }, { "epoch": 0.10517578125, "grad_norm": 0.4360298812389374, "learning_rate": 0.0004906929433149175, "loss": 1.9589, "step": 2154 }, { "epoch": 0.105224609375, "grad_norm": 0.4023081660270691, "learning_rate": 0.000490682970522603, "loss": 1.9758, "step": 2155 }, { "epoch": 0.1052734375, "grad_norm": 0.4175964593887329, "learning_rate": 0.0004906729925030385, "loss": 1.9635, "step": 2156 }, { "epoch": 0.105322265625, "grad_norm": 0.3097623586654663, "learning_rate": 0.0004906630092564656, "loss": 1.9632, "step": 2157 }, { "epoch": 0.10537109375, "grad_norm": 0.4070817232131958, "learning_rate": 0.0004906530207831266, "loss": 2.0007, "step": 2158 }, { "epoch": 0.105419921875, "grad_norm": 0.3516834080219269, "learning_rate": 0.0004906430270832632, "loss": 1.9679, "step": 2159 }, { "epoch": 0.10546875, "grad_norm": 0.289353609085083, "learning_rate": 0.0004906330281571179, "loss": 1.9839, "step": 2160 }, { "epoch": 0.105517578125, "grad_norm": 0.3758990466594696, "learning_rate": 0.0004906230240049328, "loss": 1.9851, "step": 2161 }, { "epoch": 0.10556640625, "grad_norm": 0.3345782160758972, "learning_rate": 0.0004906130146269507, "loss": 1.9796, "step": 2162 }, { "epoch": 0.105615234375, "grad_norm": 0.32401278614997864, "learning_rate": 0.0004906030000234138, "loss": 1.9757, "step": 2163 }, { "epoch": 0.1056640625, "grad_norm": 0.31348007917404175, "learning_rate": 0.000490592980194565, "loss": 1.9714, "step": 2164 }, { "epoch": 0.105712890625, "grad_norm": 0.2641567885875702, "learning_rate": 0.0004905829551406472, "loss": 1.9495, "step": 2165 }, { "epoch": 0.10576171875, "grad_norm": 0.26183009147644043, "learning_rate": 0.0004905729248619032, "loss": 2.0607, "step": 2166 }, { "epoch": 0.105810546875, "grad_norm": 0.2901969254016876, "learning_rate": 0.0004905628893585762, "loss": 1.9317, "step": 2167 }, { "epoch": 0.105859375, "grad_norm": 0.3005417287349701, "learning_rate": 0.0004905528486309095, "loss": 1.9978, "step": 2168 }, { "epoch": 0.105908203125, "grad_norm": 0.2759726941585541, "learning_rate": 0.0004905428026791463, "loss": 1.9686, "step": 2169 }, { "epoch": 0.10595703125, "grad_norm": 0.26119494438171387, "learning_rate": 0.00049053275150353, "loss": 1.9959, "step": 2170 }, { "epoch": 0.106005859375, "grad_norm": 0.286298930644989, "learning_rate": 0.0004905226951043044, "loss": 1.9486, "step": 2171 }, { "epoch": 0.1060546875, "grad_norm": 0.3055354654788971, "learning_rate": 0.0004905126334817131, "loss": 2.0011, "step": 2172 }, { "epoch": 0.106103515625, "grad_norm": 0.30583643913269043, "learning_rate": 0.0004905025666359999, "loss": 1.9256, "step": 2173 }, { "epoch": 0.10615234375, "grad_norm": 0.3162594437599182, "learning_rate": 0.000490492494567409, "loss": 2.0493, "step": 2174 }, { "epoch": 0.106201171875, "grad_norm": 0.3017038106918335, "learning_rate": 0.0004904824172761844, "loss": 1.9681, "step": 2175 }, { "epoch": 0.10625, "grad_norm": 0.37395742535591125, "learning_rate": 0.0004904723347625702, "loss": 2.0205, "step": 2176 }, { "epoch": 0.106298828125, "grad_norm": 0.3412434458732605, "learning_rate": 0.0004904622470268109, "loss": 1.9957, "step": 2177 }, { "epoch": 0.10634765625, "grad_norm": 0.36938944458961487, "learning_rate": 0.0004904521540691509, "loss": 1.9505, "step": 2178 }, { "epoch": 0.106396484375, "grad_norm": 0.44367077946662903, "learning_rate": 0.000490442055889835, "loss": 1.9891, "step": 2179 }, { "epoch": 0.1064453125, "grad_norm": 0.28140613436698914, "learning_rate": 0.0004904319524891076, "loss": 1.9966, "step": 2180 }, { "epoch": 0.106494140625, "grad_norm": 0.3542938232421875, "learning_rate": 0.0004904218438672138, "loss": 2.0046, "step": 2181 }, { "epoch": 0.10654296875, "grad_norm": 0.48258620500564575, "learning_rate": 0.0004904117300243986, "loss": 1.9681, "step": 2182 }, { "epoch": 0.106591796875, "grad_norm": 0.3129212260246277, "learning_rate": 0.000490401610960907, "loss": 1.9819, "step": 2183 }, { "epoch": 0.106640625, "grad_norm": 0.331429660320282, "learning_rate": 0.0004903914866769843, "loss": 1.9212, "step": 2184 }, { "epoch": 0.106689453125, "grad_norm": 0.3486151397228241, "learning_rate": 0.0004903813571728759, "loss": 1.9529, "step": 2185 }, { "epoch": 0.10673828125, "grad_norm": 0.29662081599235535, "learning_rate": 0.0004903712224488273, "loss": 1.9627, "step": 2186 }, { "epoch": 0.106787109375, "grad_norm": 0.3856395184993744, "learning_rate": 0.0004903610825050842, "loss": 1.9923, "step": 2187 }, { "epoch": 0.1068359375, "grad_norm": 0.3110054135322571, "learning_rate": 0.0004903509373418921, "loss": 2.0146, "step": 2188 }, { "epoch": 0.106884765625, "grad_norm": 0.3125966489315033, "learning_rate": 0.000490340786959497, "loss": 1.9914, "step": 2189 }, { "epoch": 0.10693359375, "grad_norm": 0.3234255611896515, "learning_rate": 0.000490330631358145, "loss": 1.9976, "step": 2190 }, { "epoch": 0.106982421875, "grad_norm": 0.268209308385849, "learning_rate": 0.0004903204705380822, "loss": 1.9972, "step": 2191 }, { "epoch": 0.10703125, "grad_norm": 0.3446280360221863, "learning_rate": 0.0004903103044995548, "loss": 1.967, "step": 2192 }, { "epoch": 0.107080078125, "grad_norm": 0.3501339852809906, "learning_rate": 0.0004903001332428091, "loss": 1.9526, "step": 2193 }, { "epoch": 0.10712890625, "grad_norm": 0.31145796179771423, "learning_rate": 0.0004902899567680917, "loss": 1.9951, "step": 2194 }, { "epoch": 0.107177734375, "grad_norm": 0.3712383508682251, "learning_rate": 0.0004902797750756492, "loss": 1.9831, "step": 2195 }, { "epoch": 0.1072265625, "grad_norm": 0.36170151829719543, "learning_rate": 0.0004902695881657286, "loss": 2.0214, "step": 2196 }, { "epoch": 0.107275390625, "grad_norm": 0.3772439658641815, "learning_rate": 0.0004902593960385764, "loss": 1.9702, "step": 2197 }, { "epoch": 0.10732421875, "grad_norm": 0.2958740293979645, "learning_rate": 0.0004902491986944399, "loss": 1.9906, "step": 2198 }, { "epoch": 0.107373046875, "grad_norm": 0.29411256313323975, "learning_rate": 0.0004902389961335661, "loss": 2.0082, "step": 2199 }, { "epoch": 0.107421875, "grad_norm": 0.3201168477535248, "learning_rate": 0.0004902287883562023, "loss": 1.9254, "step": 2200 }, { "epoch": 0.107470703125, "grad_norm": 0.35433658957481384, "learning_rate": 0.0004902185753625958, "loss": 1.9675, "step": 2201 }, { "epoch": 0.10751953125, "grad_norm": 0.25811439752578735, "learning_rate": 0.0004902083571529944, "loss": 1.9793, "step": 2202 }, { "epoch": 0.107568359375, "grad_norm": 0.3414652943611145, "learning_rate": 0.0004901981337276455, "loss": 1.9721, "step": 2203 }, { "epoch": 0.1076171875, "grad_norm": 0.3839545249938965, "learning_rate": 0.000490187905086797, "loss": 2.0093, "step": 2204 }, { "epoch": 0.107666015625, "grad_norm": 0.33800366520881653, "learning_rate": 0.0004901776712306966, "loss": 1.9609, "step": 2205 }, { "epoch": 0.10771484375, "grad_norm": 0.32608357071876526, "learning_rate": 0.0004901674321595925, "loss": 2.0126, "step": 2206 }, { "epoch": 0.107763671875, "grad_norm": 0.4173572063446045, "learning_rate": 0.0004901571878737329, "loss": 1.9489, "step": 2207 }, { "epoch": 0.1078125, "grad_norm": 0.32772740721702576, "learning_rate": 0.0004901469383733659, "loss": 2.0052, "step": 2208 }, { "epoch": 0.107861328125, "grad_norm": 0.2918713092803955, "learning_rate": 0.00049013668365874, "loss": 1.952, "step": 2209 }, { "epoch": 0.10791015625, "grad_norm": 0.34892895817756653, "learning_rate": 0.0004901264237301039, "loss": 1.9569, "step": 2210 }, { "epoch": 0.107958984375, "grad_norm": 0.36765846610069275, "learning_rate": 0.000490116158587706, "loss": 1.935, "step": 2211 }, { "epoch": 0.1080078125, "grad_norm": 0.3500542640686035, "learning_rate": 0.0004901058882317951, "loss": 1.9783, "step": 2212 }, { "epoch": 0.108056640625, "grad_norm": 0.4167967736721039, "learning_rate": 0.0004900956126626204, "loss": 2.0364, "step": 2213 }, { "epoch": 0.10810546875, "grad_norm": 0.33348482847213745, "learning_rate": 0.0004900853318804304, "loss": 1.9463, "step": 2214 }, { "epoch": 0.108154296875, "grad_norm": 0.322468101978302, "learning_rate": 0.0004900750458854748, "loss": 1.9419, "step": 2215 }, { "epoch": 0.108203125, "grad_norm": 0.39462828636169434, "learning_rate": 0.0004900647546780026, "loss": 1.9568, "step": 2216 }, { "epoch": 0.108251953125, "grad_norm": 0.43382781744003296, "learning_rate": 0.0004900544582582633, "loss": 1.9428, "step": 2217 }, { "epoch": 0.10830078125, "grad_norm": 0.36003875732421875, "learning_rate": 0.0004900441566265064, "loss": 2.0044, "step": 2218 }, { "epoch": 0.108349609375, "grad_norm": 0.3124561309814453, "learning_rate": 0.0004900338497829817, "loss": 1.9632, "step": 2219 }, { "epoch": 0.1083984375, "grad_norm": 0.30740392208099365, "learning_rate": 0.0004900235377279388, "loss": 1.9764, "step": 2220 }, { "epoch": 0.108447265625, "grad_norm": 0.3046943247318268, "learning_rate": 0.0004900132204616278, "loss": 1.968, "step": 2221 }, { "epoch": 0.10849609375, "grad_norm": 0.29618582129478455, "learning_rate": 0.0004900028979842986, "loss": 1.9636, "step": 2222 }, { "epoch": 0.108544921875, "grad_norm": 0.27580299973487854, "learning_rate": 0.0004899925702962014, "loss": 1.9498, "step": 2223 }, { "epoch": 0.10859375, "grad_norm": 0.29444852471351624, "learning_rate": 0.0004899822373975866, "loss": 1.9921, "step": 2224 }, { "epoch": 0.108642578125, "grad_norm": 0.2860184907913208, "learning_rate": 0.0004899718992887046, "loss": 1.99, "step": 2225 }, { "epoch": 0.10869140625, "grad_norm": 0.35157331824302673, "learning_rate": 0.0004899615559698058, "loss": 1.9545, "step": 2226 }, { "epoch": 0.108740234375, "grad_norm": 0.35482731461524963, "learning_rate": 0.0004899512074411411, "loss": 1.9922, "step": 2227 }, { "epoch": 0.1087890625, "grad_norm": 0.30455976724624634, "learning_rate": 0.0004899408537029611, "loss": 1.9599, "step": 2228 }, { "epoch": 0.108837890625, "grad_norm": 0.34117576479911804, "learning_rate": 0.0004899304947555168, "loss": 1.9409, "step": 2229 }, { "epoch": 0.10888671875, "grad_norm": 0.3327700197696686, "learning_rate": 0.0004899201305990594, "loss": 1.9713, "step": 2230 }, { "epoch": 0.108935546875, "grad_norm": 0.3114486634731293, "learning_rate": 0.0004899097612338399, "loss": 1.9959, "step": 2231 }, { "epoch": 0.108984375, "grad_norm": 0.3270753026008606, "learning_rate": 0.0004898993866601098, "loss": 1.9597, "step": 2232 }, { "epoch": 0.109033203125, "grad_norm": 0.29806438088417053, "learning_rate": 0.0004898890068781202, "loss": 1.9257, "step": 2233 }, { "epoch": 0.10908203125, "grad_norm": 0.2878778874874115, "learning_rate": 0.0004898786218881232, "loss": 1.9558, "step": 2234 }, { "epoch": 0.109130859375, "grad_norm": 0.2515740394592285, "learning_rate": 0.0004898682316903699, "loss": 1.9088, "step": 2235 }, { "epoch": 0.1091796875, "grad_norm": 0.30540674924850464, "learning_rate": 0.0004898578362851124, "loss": 1.9498, "step": 2236 }, { "epoch": 0.109228515625, "grad_norm": 0.3510637581348419, "learning_rate": 0.0004898474356726027, "loss": 2.001, "step": 2237 }, { "epoch": 0.10927734375, "grad_norm": 0.32387855648994446, "learning_rate": 0.0004898370298530928, "loss": 1.9754, "step": 2238 }, { "epoch": 0.109326171875, "grad_norm": 0.3416537046432495, "learning_rate": 0.0004898266188268348, "loss": 1.9541, "step": 2239 }, { "epoch": 0.109375, "grad_norm": 0.29736393690109253, "learning_rate": 0.0004898162025940812, "loss": 1.9567, "step": 2240 }, { "epoch": 0.109423828125, "grad_norm": 0.27171170711517334, "learning_rate": 0.0004898057811550843, "loss": 1.9555, "step": 2241 }, { "epoch": 0.10947265625, "grad_norm": 0.27558228373527527, "learning_rate": 0.0004897953545100966, "loss": 1.8847, "step": 2242 }, { "epoch": 0.109521484375, "grad_norm": 0.2443743646144867, "learning_rate": 0.0004897849226593712, "loss": 1.9494, "step": 2243 }, { "epoch": 0.1095703125, "grad_norm": 0.26035550236701965, "learning_rate": 0.0004897744856031604, "loss": 1.9747, "step": 2244 }, { "epoch": 0.109619140625, "grad_norm": 0.27636006474494934, "learning_rate": 0.0004897640433417174, "loss": 1.9666, "step": 2245 }, { "epoch": 0.10966796875, "grad_norm": 0.2871667444705963, "learning_rate": 0.0004897535958752954, "loss": 1.9572, "step": 2246 }, { "epoch": 0.109716796875, "grad_norm": 0.27803608775138855, "learning_rate": 0.0004897431432041474, "loss": 1.9422, "step": 2247 }, { "epoch": 0.109765625, "grad_norm": 0.2714242935180664, "learning_rate": 0.0004897326853285268, "loss": 1.9217, "step": 2248 }, { "epoch": 0.109814453125, "grad_norm": 0.29447054862976074, "learning_rate": 0.0004897222222486872, "loss": 1.9908, "step": 2249 }, { "epoch": 0.10986328125, "grad_norm": 0.29391855001449585, "learning_rate": 0.0004897117539648818, "loss": 1.9811, "step": 2250 }, { "epoch": 0.109912109375, "grad_norm": 0.3171333074569702, "learning_rate": 0.0004897012804773647, "loss": 1.956, "step": 2251 }, { "epoch": 0.1099609375, "grad_norm": 0.31699663400650024, "learning_rate": 0.0004896908017863895, "loss": 1.9297, "step": 2252 }, { "epoch": 0.110009765625, "grad_norm": 0.2750585079193115, "learning_rate": 0.0004896803178922103, "loss": 1.9692, "step": 2253 }, { "epoch": 0.11005859375, "grad_norm": 0.3031053841114044, "learning_rate": 0.0004896698287950812, "loss": 1.9616, "step": 2254 }, { "epoch": 0.110107421875, "grad_norm": 0.3212302029132843, "learning_rate": 0.0004896593344952561, "loss": 1.9919, "step": 2255 }, { "epoch": 0.11015625, "grad_norm": 0.3013755679130554, "learning_rate": 0.0004896488349929898, "loss": 1.9438, "step": 2256 }, { "epoch": 0.110205078125, "grad_norm": 0.3362472653388977, "learning_rate": 0.0004896383302885364, "loss": 1.9651, "step": 2257 }, { "epoch": 0.11025390625, "grad_norm": 0.3018677830696106, "learning_rate": 0.0004896278203821506, "loss": 1.9552, "step": 2258 }, { "epoch": 0.110302734375, "grad_norm": 0.3841222822666168, "learning_rate": 0.0004896173052740873, "loss": 1.9632, "step": 2259 }, { "epoch": 0.1103515625, "grad_norm": 0.36424434185028076, "learning_rate": 0.0004896067849646011, "loss": 1.9711, "step": 2260 }, { "epoch": 0.110400390625, "grad_norm": 0.3503352701663971, "learning_rate": 0.0004895962594539471, "loss": 1.9217, "step": 2261 }, { "epoch": 0.11044921875, "grad_norm": 0.4577390253543854, "learning_rate": 0.0004895857287423802, "loss": 1.9292, "step": 2262 }, { "epoch": 0.110498046875, "grad_norm": 0.42859309911727905, "learning_rate": 0.0004895751928301559, "loss": 1.9832, "step": 2263 }, { "epoch": 0.110546875, "grad_norm": 0.3850133717060089, "learning_rate": 0.0004895646517175294, "loss": 1.9089, "step": 2264 }, { "epoch": 0.110595703125, "grad_norm": 0.3782637417316437, "learning_rate": 0.0004895541054047561, "loss": 1.9372, "step": 2265 }, { "epoch": 0.11064453125, "grad_norm": 0.3008250296115875, "learning_rate": 0.0004895435538920918, "loss": 1.9598, "step": 2266 }, { "epoch": 0.110693359375, "grad_norm": 0.32964110374450684, "learning_rate": 0.000489532997179792, "loss": 1.9479, "step": 2267 }, { "epoch": 0.1107421875, "grad_norm": 0.3315945863723755, "learning_rate": 0.0004895224352681127, "loss": 2.0099, "step": 2268 }, { "epoch": 0.110791015625, "grad_norm": 0.319620817899704, "learning_rate": 0.0004895118681573099, "loss": 1.976, "step": 2269 }, { "epoch": 0.11083984375, "grad_norm": 0.29355388879776, "learning_rate": 0.0004895012958476396, "loss": 1.9316, "step": 2270 }, { "epoch": 0.110888671875, "grad_norm": 0.2776435315608978, "learning_rate": 0.0004894907183393581, "loss": 1.9554, "step": 2271 }, { "epoch": 0.1109375, "grad_norm": 0.32904937863349915, "learning_rate": 0.0004894801356327217, "loss": 1.9283, "step": 2272 }, { "epoch": 0.110986328125, "grad_norm": 0.37596356868743896, "learning_rate": 0.0004894695477279869, "loss": 1.9567, "step": 2273 }, { "epoch": 0.11103515625, "grad_norm": 0.41760194301605225, "learning_rate": 0.0004894589546254102, "loss": 1.9321, "step": 2274 }, { "epoch": 0.111083984375, "grad_norm": 0.36594638228416443, "learning_rate": 0.0004894483563252486, "loss": 1.992, "step": 2275 }, { "epoch": 0.1111328125, "grad_norm": 0.3889891803264618, "learning_rate": 0.0004894377528277587, "loss": 2.0003, "step": 2276 }, { "epoch": 0.111181640625, "grad_norm": 0.3749793469905853, "learning_rate": 0.0004894271441331975, "loss": 1.9291, "step": 2277 }, { "epoch": 0.11123046875, "grad_norm": 0.3049717843532562, "learning_rate": 0.0004894165302418224, "loss": 1.952, "step": 2278 }, { "epoch": 0.111279296875, "grad_norm": 0.40194782614707947, "learning_rate": 0.0004894059111538902, "loss": 1.989, "step": 2279 }, { "epoch": 0.111328125, "grad_norm": 0.37247610092163086, "learning_rate": 0.0004893952868696588, "loss": 1.9835, "step": 2280 }, { "epoch": 0.111376953125, "grad_norm": 0.3049207031726837, "learning_rate": 0.0004893846573893852, "loss": 1.9794, "step": 2281 }, { "epoch": 0.11142578125, "grad_norm": 0.4149486720561981, "learning_rate": 0.0004893740227133272, "loss": 1.9501, "step": 2282 }, { "epoch": 0.111474609375, "grad_norm": 0.37975215911865234, "learning_rate": 0.0004893633828417425, "loss": 1.9651, "step": 2283 }, { "epoch": 0.1115234375, "grad_norm": 0.3079204559326172, "learning_rate": 0.000489352737774889, "loss": 1.9728, "step": 2284 }, { "epoch": 0.111572265625, "grad_norm": 0.28224045038223267, "learning_rate": 0.0004893420875130247, "loss": 1.9756, "step": 2285 }, { "epoch": 0.11162109375, "grad_norm": 0.33627140522003174, "learning_rate": 0.0004893314320564078, "loss": 1.9284, "step": 2286 }, { "epoch": 0.111669921875, "grad_norm": 0.312232106924057, "learning_rate": 0.0004893207714052963, "loss": 1.9168, "step": 2287 }, { "epoch": 0.11171875, "grad_norm": 0.38728511333465576, "learning_rate": 0.0004893101055599488, "loss": 1.9219, "step": 2288 }, { "epoch": 0.111767578125, "grad_norm": 0.34028860926628113, "learning_rate": 0.0004892994345206238, "loss": 1.9944, "step": 2289 }, { "epoch": 0.11181640625, "grad_norm": 0.30594179034233093, "learning_rate": 0.0004892887582875797, "loss": 1.9658, "step": 2290 }, { "epoch": 0.111865234375, "grad_norm": 0.465905100107193, "learning_rate": 0.0004892780768610754, "loss": 1.9412, "step": 2291 }, { "epoch": 0.1119140625, "grad_norm": 0.43237218260765076, "learning_rate": 0.0004892673902413699, "loss": 1.9617, "step": 2292 }, { "epoch": 0.111962890625, "grad_norm": 0.3554988503456116, "learning_rate": 0.000489256698428722, "loss": 1.9891, "step": 2293 }, { "epoch": 0.11201171875, "grad_norm": 0.35661494731903076, "learning_rate": 0.0004892460014233907, "loss": 1.9931, "step": 2294 }, { "epoch": 0.112060546875, "grad_norm": 0.3354804515838623, "learning_rate": 0.0004892352992256356, "loss": 2.0221, "step": 2295 }, { "epoch": 0.112109375, "grad_norm": 0.3374413847923279, "learning_rate": 0.0004892245918357159, "loss": 1.9512, "step": 2296 }, { "epoch": 0.112158203125, "grad_norm": 0.33562585711479187, "learning_rate": 0.0004892138792538911, "loss": 1.9732, "step": 2297 }, { "epoch": 0.11220703125, "grad_norm": 0.3143165409564972, "learning_rate": 0.0004892031614804208, "loss": 1.9737, "step": 2298 }, { "epoch": 0.112255859375, "grad_norm": 0.3746497333049774, "learning_rate": 0.0004891924385155648, "loss": 1.9441, "step": 2299 }, { "epoch": 0.1123046875, "grad_norm": 0.31125324964523315, "learning_rate": 0.000489181710359583, "loss": 1.9502, "step": 2300 }, { "epoch": 0.112353515625, "grad_norm": 0.3060968518257141, "learning_rate": 0.0004891709770127354, "loss": 1.9045, "step": 2301 }, { "epoch": 0.11240234375, "grad_norm": 0.3904802203178406, "learning_rate": 0.000489160238475282, "loss": 2.0035, "step": 2302 }, { "epoch": 0.112451171875, "grad_norm": 0.30406585335731506, "learning_rate": 0.0004891494947474832, "loss": 1.9708, "step": 2303 }, { "epoch": 0.1125, "grad_norm": 0.2770773470401764, "learning_rate": 0.0004891387458295995, "loss": 1.9362, "step": 2304 }, { "epoch": 0.112548828125, "grad_norm": 0.3581194579601288, "learning_rate": 0.0004891279917218911, "loss": 1.9716, "step": 2305 }, { "epoch": 0.11259765625, "grad_norm": 0.30499961972236633, "learning_rate": 0.0004891172324246189, "loss": 1.9113, "step": 2306 }, { "epoch": 0.112646484375, "grad_norm": 0.2735790014266968, "learning_rate": 0.0004891064679380435, "loss": 1.9578, "step": 2307 }, { "epoch": 0.1126953125, "grad_norm": 0.32315486669540405, "learning_rate": 0.0004890956982624258, "loss": 1.9918, "step": 2308 }, { "epoch": 0.112744140625, "grad_norm": 0.2853093147277832, "learning_rate": 0.0004890849233980269, "loss": 1.9828, "step": 2309 }, { "epoch": 0.11279296875, "grad_norm": 0.34035739302635193, "learning_rate": 0.0004890741433451079, "loss": 1.9987, "step": 2310 }, { "epoch": 0.112841796875, "grad_norm": 0.3279079794883728, "learning_rate": 0.00048906335810393, "loss": 1.9665, "step": 2311 }, { "epoch": 0.112890625, "grad_norm": 0.30116355419158936, "learning_rate": 0.0004890525676747547, "loss": 1.9506, "step": 2312 }, { "epoch": 0.112939453125, "grad_norm": 0.35047778487205505, "learning_rate": 0.0004890417720578433, "loss": 1.9995, "step": 2313 }, { "epoch": 0.11298828125, "grad_norm": 0.3447892367839813, "learning_rate": 0.0004890309712534578, "loss": 1.9362, "step": 2314 }, { "epoch": 0.113037109375, "grad_norm": 0.34657618403434753, "learning_rate": 0.0004890201652618596, "loss": 1.9343, "step": 2315 }, { "epoch": 0.1130859375, "grad_norm": 0.32785651087760925, "learning_rate": 0.0004890093540833108, "loss": 1.9605, "step": 2316 }, { "epoch": 0.113134765625, "grad_norm": 0.3468168079853058, "learning_rate": 0.0004889985377180734, "loss": 1.9526, "step": 2317 }, { "epoch": 0.11318359375, "grad_norm": 0.3416922092437744, "learning_rate": 0.0004889877161664096, "loss": 1.969, "step": 2318 }, { "epoch": 0.113232421875, "grad_norm": 0.3285270035266876, "learning_rate": 0.0004889768894285815, "loss": 1.9658, "step": 2319 }, { "epoch": 0.11328125, "grad_norm": 0.28397336602211, "learning_rate": 0.0004889660575048515, "loss": 1.9404, "step": 2320 }, { "epoch": 0.113330078125, "grad_norm": 0.32895344495773315, "learning_rate": 0.0004889552203954823, "loss": 1.9438, "step": 2321 }, { "epoch": 0.11337890625, "grad_norm": 0.30227339267730713, "learning_rate": 0.0004889443781007364, "loss": 1.9505, "step": 2322 }, { "epoch": 0.113427734375, "grad_norm": 0.27187982201576233, "learning_rate": 0.0004889335306208765, "loss": 2.0099, "step": 2323 }, { "epoch": 0.1134765625, "grad_norm": 0.28368815779685974, "learning_rate": 0.000488922677956166, "loss": 2.0135, "step": 2324 }, { "epoch": 0.113525390625, "grad_norm": 0.28176042437553406, "learning_rate": 0.0004889118201068672, "loss": 1.9537, "step": 2325 }, { "epoch": 0.11357421875, "grad_norm": 0.3114688992500305, "learning_rate": 0.0004889009570732436, "loss": 1.9931, "step": 2326 }, { "epoch": 0.113623046875, "grad_norm": 0.3024570047855377, "learning_rate": 0.0004888900888555585, "loss": 1.9537, "step": 2327 }, { "epoch": 0.113671875, "grad_norm": 0.2855931222438812, "learning_rate": 0.0004888792154540753, "loss": 1.9757, "step": 2328 }, { "epoch": 0.113720703125, "grad_norm": 0.29987475275993347, "learning_rate": 0.0004888683368690574, "loss": 1.9793, "step": 2329 }, { "epoch": 0.11376953125, "grad_norm": 0.3141133785247803, "learning_rate": 0.0004888574531007687, "loss": 1.9298, "step": 2330 }, { "epoch": 0.113818359375, "grad_norm": 0.2830854058265686, "learning_rate": 0.0004888465641494726, "loss": 1.963, "step": 2331 }, { "epoch": 0.1138671875, "grad_norm": 0.26735010743141174, "learning_rate": 0.0004888356700154333, "loss": 1.9438, "step": 2332 }, { "epoch": 0.113916015625, "grad_norm": 0.4083377718925476, "learning_rate": 0.0004888247706989149, "loss": 1.9421, "step": 2333 }, { "epoch": 0.11396484375, "grad_norm": 0.46092554926872253, "learning_rate": 0.0004888138662001813, "loss": 1.9347, "step": 2334 }, { "epoch": 0.114013671875, "grad_norm": 0.31228068470954895, "learning_rate": 0.0004888029565194967, "loss": 1.9646, "step": 2335 }, { "epoch": 0.1140625, "grad_norm": 0.3275686204433441, "learning_rate": 0.0004887920416571259, "loss": 1.9657, "step": 2336 }, { "epoch": 0.114111328125, "grad_norm": 0.338318407535553, "learning_rate": 0.0004887811216133331, "loss": 1.9933, "step": 2337 }, { "epoch": 0.11416015625, "grad_norm": 0.29482874274253845, "learning_rate": 0.0004887701963883831, "loss": 2.0226, "step": 2338 }, { "epoch": 0.114208984375, "grad_norm": 0.3766978979110718, "learning_rate": 0.0004887592659825407, "loss": 1.973, "step": 2339 }, { "epoch": 0.1142578125, "grad_norm": 0.44037315249443054, "learning_rate": 0.0004887483303960706, "loss": 1.9178, "step": 2340 }, { "epoch": 0.114306640625, "grad_norm": 0.3293815553188324, "learning_rate": 0.0004887373896292381, "loss": 1.9435, "step": 2341 }, { "epoch": 0.11435546875, "grad_norm": 0.331946462392807, "learning_rate": 0.0004887264436823083, "loss": 1.9995, "step": 2342 }, { "epoch": 0.114404296875, "grad_norm": 0.3416444957256317, "learning_rate": 0.0004887154925555464, "loss": 1.9179, "step": 2343 }, { "epoch": 0.114453125, "grad_norm": 0.3177088499069214, "learning_rate": 0.0004887045362492178, "loss": 1.9317, "step": 2344 }, { "epoch": 0.114501953125, "grad_norm": 0.34211957454681396, "learning_rate": 0.0004886935747635881, "loss": 1.9736, "step": 2345 }, { "epoch": 0.11455078125, "grad_norm": 0.3717080354690552, "learning_rate": 0.0004886826080989229, "loss": 2.004, "step": 2346 }, { "epoch": 0.114599609375, "grad_norm": 0.37436649203300476, "learning_rate": 0.0004886716362554881, "loss": 1.9711, "step": 2347 }, { "epoch": 0.1146484375, "grad_norm": 0.3063649535179138, "learning_rate": 0.0004886606592335495, "loss": 1.9438, "step": 2348 }, { "epoch": 0.114697265625, "grad_norm": 0.3398238718509674, "learning_rate": 0.0004886496770333731, "loss": 1.9652, "step": 2349 }, { "epoch": 0.11474609375, "grad_norm": 0.33448007702827454, "learning_rate": 0.0004886386896552252, "loss": 1.986, "step": 2350 }, { "epoch": 0.114794921875, "grad_norm": 0.2577715218067169, "learning_rate": 0.000488627697099372, "loss": 1.9332, "step": 2351 }, { "epoch": 0.11484375, "grad_norm": 0.2925163805484772, "learning_rate": 0.0004886166993660799, "loss": 1.9396, "step": 2352 }, { "epoch": 0.114892578125, "grad_norm": 0.294239342212677, "learning_rate": 0.0004886056964556155, "loss": 1.9945, "step": 2353 }, { "epoch": 0.11494140625, "grad_norm": 0.28385257720947266, "learning_rate": 0.0004885946883682455, "loss": 1.9551, "step": 2354 }, { "epoch": 0.114990234375, "grad_norm": 0.3552693724632263, "learning_rate": 0.0004885836751042365, "loss": 1.9795, "step": 2355 }, { "epoch": 0.1150390625, "grad_norm": 0.3538987338542938, "learning_rate": 0.0004885726566638557, "loss": 1.9676, "step": 2356 }, { "epoch": 0.115087890625, "grad_norm": 0.4105052649974823, "learning_rate": 0.0004885616330473699, "loss": 1.964, "step": 2357 }, { "epoch": 0.11513671875, "grad_norm": 0.42081284523010254, "learning_rate": 0.0004885506042550464, "loss": 1.9523, "step": 2358 }, { "epoch": 0.115185546875, "grad_norm": 0.38533151149749756, "learning_rate": 0.0004885395702871523, "loss": 1.9562, "step": 2359 }, { "epoch": 0.115234375, "grad_norm": 0.37547704577445984, "learning_rate": 0.0004885285311439553, "loss": 1.9543, "step": 2360 }, { "epoch": 0.115283203125, "grad_norm": 0.28483378887176514, "learning_rate": 0.0004885174868257228, "loss": 1.9647, "step": 2361 }, { "epoch": 0.11533203125, "grad_norm": 0.34897902607917786, "learning_rate": 0.0004885064373327223, "loss": 1.9295, "step": 2362 }, { "epoch": 0.115380859375, "grad_norm": 0.33308687806129456, "learning_rate": 0.000488495382665222, "loss": 1.9689, "step": 2363 }, { "epoch": 0.1154296875, "grad_norm": 0.27217593789100647, "learning_rate": 0.0004884843228234895, "loss": 1.9309, "step": 2364 }, { "epoch": 0.115478515625, "grad_norm": 0.2743956446647644, "learning_rate": 0.0004884732578077928, "loss": 1.9645, "step": 2365 }, { "epoch": 0.11552734375, "grad_norm": 0.2961999773979187, "learning_rate": 0.0004884621876184004, "loss": 1.9824, "step": 2366 }, { "epoch": 0.115576171875, "grad_norm": 0.2933545410633087, "learning_rate": 0.0004884511122555801, "loss": 1.9751, "step": 2367 }, { "epoch": 0.115625, "grad_norm": 0.2922198474407196, "learning_rate": 0.0004884400317196009, "loss": 1.9494, "step": 2368 }, { "epoch": 0.115673828125, "grad_norm": 0.32015135884284973, "learning_rate": 0.0004884289460107309, "loss": 1.9856, "step": 2369 }, { "epoch": 0.11572265625, "grad_norm": 0.33479946851730347, "learning_rate": 0.000488417855129239, "loss": 1.9299, "step": 2370 }, { "epoch": 0.115771484375, "grad_norm": 0.27902594208717346, "learning_rate": 0.0004884067590753939, "loss": 1.9624, "step": 2371 }, { "epoch": 0.1158203125, "grad_norm": 0.3689446449279785, "learning_rate": 0.0004883956578494645, "loss": 1.9837, "step": 2372 }, { "epoch": 0.115869140625, "grad_norm": 0.38945847749710083, "learning_rate": 0.0004883845514517199, "loss": 1.9865, "step": 2373 }, { "epoch": 0.11591796875, "grad_norm": 0.3918105363845825, "learning_rate": 0.0004883734398824294, "loss": 1.9488, "step": 2374 }, { "epoch": 0.115966796875, "grad_norm": 0.39216843247413635, "learning_rate": 0.000488362323141862, "loss": 1.9764, "step": 2375 }, { "epoch": 0.116015625, "grad_norm": 0.313120573759079, "learning_rate": 0.0004883512012302874, "loss": 1.9662, "step": 2376 }, { "epoch": 0.116064453125, "grad_norm": 0.2687877118587494, "learning_rate": 0.0004883400741479748, "loss": 2.0005, "step": 2377 }, { "epoch": 0.11611328125, "grad_norm": 0.32174965739250183, "learning_rate": 0.0004883289418951943, "loss": 1.9634, "step": 2378 }, { "epoch": 0.116162109375, "grad_norm": 0.3894239068031311, "learning_rate": 0.0004883178044722156, "loss": 1.9331, "step": 2379 }, { "epoch": 0.1162109375, "grad_norm": 0.3265348970890045, "learning_rate": 0.0004883066618793083, "loss": 1.9784, "step": 2380 }, { "epoch": 0.116259765625, "grad_norm": 0.3860044777393341, "learning_rate": 0.0004882955141167428, "loss": 1.9808, "step": 2381 }, { "epoch": 0.11630859375, "grad_norm": 0.3733682632446289, "learning_rate": 0.0004882843611847892, "loss": 1.9638, "step": 2382 }, { "epoch": 0.116357421875, "grad_norm": 0.29082685708999634, "learning_rate": 0.00048827320308371765, "loss": 1.9753, "step": 2383 }, { "epoch": 0.11640625, "grad_norm": 0.3259440064430237, "learning_rate": 0.0004882620398137988, "loss": 1.9627, "step": 2384 }, { "epoch": 0.116455078125, "grad_norm": 0.3537008762359619, "learning_rate": 0.000488250871375303, "loss": 1.9972, "step": 2385 }, { "epoch": 0.11650390625, "grad_norm": 0.3439304828643799, "learning_rate": 0.00048823969776850103, "loss": 1.9169, "step": 2386 }, { "epoch": 0.116552734375, "grad_norm": 0.34360605478286743, "learning_rate": 0.00048822851899366364, "loss": 1.9386, "step": 2387 }, { "epoch": 0.1166015625, "grad_norm": 0.34741997718811035, "learning_rate": 0.00048821733505106186, "loss": 1.9509, "step": 2388 }, { "epoch": 0.116650390625, "grad_norm": 0.336934357881546, "learning_rate": 0.0004882061459409666, "loss": 1.9761, "step": 2389 }, { "epoch": 0.11669921875, "grad_norm": 0.2628616690635681, "learning_rate": 0.0004881949516636491, "loss": 1.9413, "step": 2390 }, { "epoch": 0.116748046875, "grad_norm": 0.34633204340934753, "learning_rate": 0.0004881837522193807, "loss": 1.9604, "step": 2391 }, { "epoch": 0.116796875, "grad_norm": 0.2574157118797302, "learning_rate": 0.0004881725476084328, "loss": 1.9207, "step": 2392 }, { "epoch": 0.116845703125, "grad_norm": 0.3534756004810333, "learning_rate": 0.00048816133783107695, "loss": 1.9326, "step": 2393 }, { "epoch": 0.11689453125, "grad_norm": 0.4288610517978668, "learning_rate": 0.00048815012288758484, "loss": 1.9554, "step": 2394 }, { "epoch": 0.116943359375, "grad_norm": 0.3022312521934509, "learning_rate": 0.0004881389027782282, "loss": 1.9592, "step": 2395 }, { "epoch": 0.1169921875, "grad_norm": 0.29921168088912964, "learning_rate": 0.00048812767750327905, "loss": 1.9557, "step": 2396 }, { "epoch": 0.117041015625, "grad_norm": 0.353693425655365, "learning_rate": 0.0004881164470630094, "loss": 1.9201, "step": 2397 }, { "epoch": 0.11708984375, "grad_norm": 0.33460742235183716, "learning_rate": 0.0004881052114576915, "loss": 1.956, "step": 2398 }, { "epoch": 0.117138671875, "grad_norm": 0.30169937014579773, "learning_rate": 0.00048809397068759746, "loss": 1.9803, "step": 2399 }, { "epoch": 0.1171875, "grad_norm": 0.3299410045146942, "learning_rate": 0.00048808272475299994, "loss": 1.9633, "step": 2400 }, { "epoch": 0.117236328125, "grad_norm": 0.29292818903923035, "learning_rate": 0.00048807147365417136, "loss": 1.9267, "step": 2401 }, { "epoch": 0.11728515625, "grad_norm": 0.3326662480831146, "learning_rate": 0.00048806021739138453, "loss": 1.9502, "step": 2402 }, { "epoch": 0.117333984375, "grad_norm": 0.32479578256607056, "learning_rate": 0.00048804895596491203, "loss": 1.9662, "step": 2403 }, { "epoch": 0.1173828125, "grad_norm": 0.28327980637550354, "learning_rate": 0.000488037689375027, "loss": 1.9962, "step": 2404 }, { "epoch": 0.117431640625, "grad_norm": 0.3676585853099823, "learning_rate": 0.0004880264176220024, "loss": 1.9858, "step": 2405 }, { "epoch": 0.11748046875, "grad_norm": 0.3464736044406891, "learning_rate": 0.00048801514070611143, "loss": 1.996, "step": 2406 }, { "epoch": 0.117529296875, "grad_norm": 0.2984428107738495, "learning_rate": 0.00048800385862762737, "loss": 1.9451, "step": 2407 }, { "epoch": 0.117578125, "grad_norm": 0.29453855752944946, "learning_rate": 0.0004879925713868236, "loss": 1.9377, "step": 2408 }, { "epoch": 0.117626953125, "grad_norm": 0.3220144212245941, "learning_rate": 0.00048798127898397385, "loss": 1.9693, "step": 2409 }, { "epoch": 0.11767578125, "grad_norm": 0.37199854850769043, "learning_rate": 0.0004879699814193517, "loss": 1.9542, "step": 2410 }, { "epoch": 0.117724609375, "grad_norm": 0.34282147884368896, "learning_rate": 0.0004879586786932309, "loss": 1.9987, "step": 2411 }, { "epoch": 0.1177734375, "grad_norm": 0.2784314751625061, "learning_rate": 0.0004879473708058855, "loss": 1.9824, "step": 2412 }, { "epoch": 0.117822265625, "grad_norm": 0.31424322724342346, "learning_rate": 0.0004879360577575894, "loss": 1.9659, "step": 2413 }, { "epoch": 0.11787109375, "grad_norm": 0.3022395968437195, "learning_rate": 0.00048792473954861694, "loss": 1.9569, "step": 2414 }, { "epoch": 0.117919921875, "grad_norm": 0.28447267413139343, "learning_rate": 0.00048791341617924236, "loss": 1.9372, "step": 2415 }, { "epoch": 0.11796875, "grad_norm": 0.28463977575302124, "learning_rate": 0.00048790208764973997, "loss": 1.991, "step": 2416 }, { "epoch": 0.118017578125, "grad_norm": 0.3076293170452118, "learning_rate": 0.0004878907539603845, "loss": 1.9369, "step": 2417 }, { "epoch": 0.11806640625, "grad_norm": 0.2946052849292755, "learning_rate": 0.0004878794151114507, "loss": 1.9586, "step": 2418 }, { "epoch": 0.118115234375, "grad_norm": 0.2984424829483032, "learning_rate": 0.0004878680711032131, "loss": 1.9514, "step": 2419 }, { "epoch": 0.1181640625, "grad_norm": 0.32674872875213623, "learning_rate": 0.0004878567219359469, "loss": 1.9704, "step": 2420 }, { "epoch": 0.118212890625, "grad_norm": 0.3484503924846649, "learning_rate": 0.0004878453676099269, "loss": 1.9698, "step": 2421 }, { "epoch": 0.11826171875, "grad_norm": 0.4053109586238861, "learning_rate": 0.0004878340081254285, "loss": 1.9814, "step": 2422 }, { "epoch": 0.118310546875, "grad_norm": 0.4224664568901062, "learning_rate": 0.00048782264348272685, "loss": 1.913, "step": 2423 }, { "epoch": 0.118359375, "grad_norm": 0.44264310598373413, "learning_rate": 0.0004878112736820976, "loss": 1.9907, "step": 2424 }, { "epoch": 0.118408203125, "grad_norm": 0.37535709142684937, "learning_rate": 0.00048779989872381604, "loss": 1.9658, "step": 2425 }, { "epoch": 0.11845703125, "grad_norm": 0.2784021496772766, "learning_rate": 0.000487788518608158, "loss": 1.9725, "step": 2426 }, { "epoch": 0.118505859375, "grad_norm": 0.3745373487472534, "learning_rate": 0.0004877771333353993, "loss": 1.9203, "step": 2427 }, { "epoch": 0.1185546875, "grad_norm": 0.43512317538261414, "learning_rate": 0.0004877657429058158, "loss": 2.0103, "step": 2428 }, { "epoch": 0.118603515625, "grad_norm": 0.3615601360797882, "learning_rate": 0.00048775434731968356, "loss": 1.9495, "step": 2429 }, { "epoch": 0.11865234375, "grad_norm": 0.4031983017921448, "learning_rate": 0.0004877429465772788, "loss": 1.9374, "step": 2430 }, { "epoch": 0.118701171875, "grad_norm": 0.45616987347602844, "learning_rate": 0.0004877315406788778, "loss": 2.021, "step": 2431 }, { "epoch": 0.11875, "grad_norm": 0.3691353499889374, "learning_rate": 0.000487720129624757, "loss": 1.9264, "step": 2432 }, { "epoch": 0.118798828125, "grad_norm": 0.36886417865753174, "learning_rate": 0.000487708713415193, "loss": 1.9068, "step": 2433 }, { "epoch": 0.11884765625, "grad_norm": 0.3814224600791931, "learning_rate": 0.00048769729205046247, "loss": 1.913, "step": 2434 }, { "epoch": 0.118896484375, "grad_norm": 0.36420077085494995, "learning_rate": 0.00048768586553084217, "loss": 1.9682, "step": 2435 }, { "epoch": 0.1189453125, "grad_norm": 0.3572162091732025, "learning_rate": 0.000487674433856609, "loss": 1.9863, "step": 2436 }, { "epoch": 0.118994140625, "grad_norm": 0.4284845292568207, "learning_rate": 0.00048766299702804013, "loss": 1.9843, "step": 2437 }, { "epoch": 0.11904296875, "grad_norm": 0.30172857642173767, "learning_rate": 0.00048765155504541265, "loss": 1.9116, "step": 2438 }, { "epoch": 0.119091796875, "grad_norm": 0.3140902519226074, "learning_rate": 0.00048764010790900385, "loss": 1.9479, "step": 2439 }, { "epoch": 0.119140625, "grad_norm": 0.37296050786972046, "learning_rate": 0.0004876286556190912, "loss": 1.923, "step": 2440 }, { "epoch": 0.119189453125, "grad_norm": 0.29967477917671204, "learning_rate": 0.0004876171981759523, "loss": 1.9652, "step": 2441 }, { "epoch": 0.11923828125, "grad_norm": 0.3128570020198822, "learning_rate": 0.00048760573557986476, "loss": 1.9944, "step": 2442 }, { "epoch": 0.119287109375, "grad_norm": 0.3766232430934906, "learning_rate": 0.00048759426783110646, "loss": 1.9188, "step": 2443 }, { "epoch": 0.1193359375, "grad_norm": 0.3523433804512024, "learning_rate": 0.00048758279492995527, "loss": 1.9533, "step": 2444 }, { "epoch": 0.119384765625, "grad_norm": 0.3498089909553528, "learning_rate": 0.00048757131687668923, "loss": 1.9208, "step": 2445 }, { "epoch": 0.11943359375, "grad_norm": 0.3781106173992157, "learning_rate": 0.0004875598336715865, "loss": 2.0081, "step": 2446 }, { "epoch": 0.119482421875, "grad_norm": 0.3724043667316437, "learning_rate": 0.00048754834531492555, "loss": 1.9243, "step": 2447 }, { "epoch": 0.11953125, "grad_norm": 0.30051928758621216, "learning_rate": 0.00048753685180698465, "loss": 1.9347, "step": 2448 }, { "epoch": 0.119580078125, "grad_norm": 0.3611786365509033, "learning_rate": 0.0004875253531480423, "loss": 1.9062, "step": 2449 }, { "epoch": 0.11962890625, "grad_norm": 0.3195177912712097, "learning_rate": 0.00048751384933837737, "loss": 1.9708, "step": 2450 }, { "epoch": 0.119677734375, "grad_norm": 0.2706540822982788, "learning_rate": 0.0004875023403782685, "loss": 1.9506, "step": 2451 }, { "epoch": 0.1197265625, "grad_norm": 0.3384338617324829, "learning_rate": 0.0004874908262679948, "loss": 1.9897, "step": 2452 }, { "epoch": 0.119775390625, "grad_norm": 0.3527093231678009, "learning_rate": 0.00048747930700783514, "loss": 1.942, "step": 2453 }, { "epoch": 0.11982421875, "grad_norm": 0.30081403255462646, "learning_rate": 0.00048746778259806876, "loss": 1.9502, "step": 2454 }, { "epoch": 0.119873046875, "grad_norm": 0.34472015500068665, "learning_rate": 0.00048745625303897507, "loss": 1.9863, "step": 2455 }, { "epoch": 0.119921875, "grad_norm": 0.32241955399513245, "learning_rate": 0.0004874447183308333, "loss": 1.93, "step": 2456 }, { "epoch": 0.119970703125, "grad_norm": 0.2593451738357544, "learning_rate": 0.0004874331784739231, "loss": 1.9429, "step": 2457 }, { "epoch": 0.12001953125, "grad_norm": 0.27896618843078613, "learning_rate": 0.0004874216334685242, "loss": 1.9575, "step": 2458 }, { "epoch": 0.120068359375, "grad_norm": 0.32070499658584595, "learning_rate": 0.0004874100833149163, "loss": 1.9607, "step": 2459 }, { "epoch": 0.1201171875, "grad_norm": 0.35027647018432617, "learning_rate": 0.0004873985280133795, "loss": 1.9398, "step": 2460 }, { "epoch": 0.120166015625, "grad_norm": 0.3714000880718231, "learning_rate": 0.0004873869675641936, "loss": 1.9467, "step": 2461 }, { "epoch": 0.12021484375, "grad_norm": 0.33694350719451904, "learning_rate": 0.00048737540196763904, "loss": 1.9813, "step": 2462 }, { "epoch": 0.120263671875, "grad_norm": 0.3250640332698822, "learning_rate": 0.0004873638312239959, "loss": 1.8968, "step": 2463 }, { "epoch": 0.1203125, "grad_norm": 0.3410172462463379, "learning_rate": 0.0004873522553335447, "loss": 1.978, "step": 2464 }, { "epoch": 0.120361328125, "grad_norm": 0.32207977771759033, "learning_rate": 0.00048734067429656596, "loss": 1.9541, "step": 2465 }, { "epoch": 0.12041015625, "grad_norm": 0.3146991729736328, "learning_rate": 0.00048732908811334046, "loss": 1.937, "step": 2466 }, { "epoch": 0.120458984375, "grad_norm": 0.32053783535957336, "learning_rate": 0.0004873174967841489, "loss": 1.9321, "step": 2467 }, { "epoch": 0.1205078125, "grad_norm": 0.3173394799232483, "learning_rate": 0.00048730590030927217, "loss": 1.9764, "step": 2468 }, { "epoch": 0.120556640625, "grad_norm": 0.3501441776752472, "learning_rate": 0.0004872942986889915, "loss": 1.9713, "step": 2469 }, { "epoch": 0.12060546875, "grad_norm": 0.34272459149360657, "learning_rate": 0.0004872826919235879, "loss": 1.9704, "step": 2470 }, { "epoch": 0.120654296875, "grad_norm": 0.29111024737358093, "learning_rate": 0.0004872710800133427, "loss": 1.9354, "step": 2471 }, { "epoch": 0.120703125, "grad_norm": 0.2517109513282776, "learning_rate": 0.00048725946295853737, "loss": 1.9526, "step": 2472 }, { "epoch": 0.120751953125, "grad_norm": 0.3380759060382843, "learning_rate": 0.00048724784075945333, "loss": 2.0021, "step": 2473 }, { "epoch": 0.12080078125, "grad_norm": 0.36941900849342346, "learning_rate": 0.0004872362134163724, "loss": 1.9304, "step": 2474 }, { "epoch": 0.120849609375, "grad_norm": 0.37423238158226013, "learning_rate": 0.0004872245809295764, "loss": 1.9534, "step": 2475 }, { "epoch": 0.1208984375, "grad_norm": 0.3951185941696167, "learning_rate": 0.0004872129432993471, "loss": 1.906, "step": 2476 }, { "epoch": 0.120947265625, "grad_norm": 0.28581738471984863, "learning_rate": 0.00048720130052596673, "loss": 1.9435, "step": 2477 }, { "epoch": 0.12099609375, "grad_norm": 0.30763116478919983, "learning_rate": 0.00048718965260971726, "loss": 1.9676, "step": 2478 }, { "epoch": 0.121044921875, "grad_norm": 0.3323640525341034, "learning_rate": 0.0004871779995508811, "loss": 1.9537, "step": 2479 }, { "epoch": 0.12109375, "grad_norm": 0.28724804520606995, "learning_rate": 0.0004871663413497407, "loss": 1.9467, "step": 2480 }, { "epoch": 0.121142578125, "grad_norm": 0.3173132538795471, "learning_rate": 0.00048715467800657857, "loss": 1.9263, "step": 2481 }, { "epoch": 0.12119140625, "grad_norm": 0.38231101632118225, "learning_rate": 0.0004871430095216773, "loss": 1.9481, "step": 2482 }, { "epoch": 0.121240234375, "grad_norm": 0.40415528416633606, "learning_rate": 0.0004871313358953198, "loss": 1.92, "step": 2483 }, { "epoch": 0.1212890625, "grad_norm": 0.3249583840370178, "learning_rate": 0.000487119657127789, "loss": 1.968, "step": 2484 }, { "epoch": 0.121337890625, "grad_norm": 0.31848961114883423, "learning_rate": 0.0004871079732193679, "loss": 1.9427, "step": 2485 }, { "epoch": 0.12138671875, "grad_norm": 0.3046923577785492, "learning_rate": 0.00048709628417033956, "loss": 1.9757, "step": 2486 }, { "epoch": 0.121435546875, "grad_norm": 0.30875295400619507, "learning_rate": 0.00048708458998098745, "loss": 1.8977, "step": 2487 }, { "epoch": 0.121484375, "grad_norm": 0.3525305390357971, "learning_rate": 0.00048707289065159486, "loss": 1.94, "step": 2488 }, { "epoch": 0.121533203125, "grad_norm": 0.3380098044872284, "learning_rate": 0.00048706118618244544, "loss": 1.9765, "step": 2489 }, { "epoch": 0.12158203125, "grad_norm": 0.26201823353767395, "learning_rate": 0.0004870494765738228, "loss": 1.9523, "step": 2490 }, { "epoch": 0.121630859375, "grad_norm": 0.35228627920150757, "learning_rate": 0.00048703776182601065, "loss": 1.9764, "step": 2491 }, { "epoch": 0.1216796875, "grad_norm": 0.3606533706188202, "learning_rate": 0.0004870260419392931, "loss": 1.8972, "step": 2492 }, { "epoch": 0.121728515625, "grad_norm": 0.32427695393562317, "learning_rate": 0.0004870143169139541, "loss": 1.9602, "step": 2493 }, { "epoch": 0.12177734375, "grad_norm": 0.3474273085594177, "learning_rate": 0.00048700258675027776, "loss": 1.954, "step": 2494 }, { "epoch": 0.121826171875, "grad_norm": 0.32634973526000977, "learning_rate": 0.00048699085144854836, "loss": 1.9555, "step": 2495 }, { "epoch": 0.121875, "grad_norm": 0.32734036445617676, "learning_rate": 0.0004869791110090504, "loss": 1.9667, "step": 2496 }, { "epoch": 0.121923828125, "grad_norm": 0.3978440463542938, "learning_rate": 0.00048696736543206844, "loss": 1.9683, "step": 2497 }, { "epoch": 0.12197265625, "grad_norm": 0.331732839345932, "learning_rate": 0.00048695561471788696, "loss": 1.9338, "step": 2498 }, { "epoch": 0.122021484375, "grad_norm": 0.31905609369277954, "learning_rate": 0.000486943858866791, "loss": 1.934, "step": 2499 }, { "epoch": 0.1220703125, "grad_norm": 0.35091328620910645, "learning_rate": 0.0004869320978790653, "loss": 1.9365, "step": 2500 }, { "epoch": 0.122119140625, "grad_norm": 0.41722509264945984, "learning_rate": 0.00048692033175499496, "loss": 1.9278, "step": 2501 }, { "epoch": 0.12216796875, "grad_norm": 0.2715297341346741, "learning_rate": 0.0004869085604948651, "loss": 1.9552, "step": 2502 }, { "epoch": 0.122216796875, "grad_norm": 0.35265201330184937, "learning_rate": 0.000486896784098961, "loss": 1.9647, "step": 2503 }, { "epoch": 0.122265625, "grad_norm": 0.3816823661327362, "learning_rate": 0.0004868850025675681, "loss": 1.9288, "step": 2504 }, { "epoch": 0.122314453125, "grad_norm": 0.32437393069267273, "learning_rate": 0.000486873215900972, "loss": 1.952, "step": 2505 }, { "epoch": 0.12236328125, "grad_norm": 0.2641966938972473, "learning_rate": 0.0004868614240994583, "loss": 1.9525, "step": 2506 }, { "epoch": 0.122412109375, "grad_norm": 0.3349359631538391, "learning_rate": 0.0004868496271633127, "loss": 1.9779, "step": 2507 }, { "epoch": 0.1224609375, "grad_norm": 0.3851323425769806, "learning_rate": 0.00048683782509282127, "loss": 1.9608, "step": 2508 }, { "epoch": 0.122509765625, "grad_norm": 0.25658246874809265, "learning_rate": 0.0004868260178882699, "loss": 1.954, "step": 2509 }, { "epoch": 0.12255859375, "grad_norm": 0.29346317052841187, "learning_rate": 0.0004868142055499448, "loss": 1.9679, "step": 2510 }, { "epoch": 0.122607421875, "grad_norm": 0.2904811501502991, "learning_rate": 0.00048680238807813234, "loss": 1.9328, "step": 2511 }, { "epoch": 0.12265625, "grad_norm": 0.253656804561615, "learning_rate": 0.0004867905654731187, "loss": 1.9253, "step": 2512 }, { "epoch": 0.122705078125, "grad_norm": 0.32370486855506897, "learning_rate": 0.0004867787377351907, "loss": 1.9598, "step": 2513 }, { "epoch": 0.12275390625, "grad_norm": 0.33687764406204224, "learning_rate": 0.00048676690486463474, "loss": 1.9883, "step": 2514 }, { "epoch": 0.122802734375, "grad_norm": 0.34391114115715027, "learning_rate": 0.00048675506686173784, "loss": 2.0213, "step": 2515 }, { "epoch": 0.1228515625, "grad_norm": 0.3241748511791229, "learning_rate": 0.0004867432237267867, "loss": 1.9679, "step": 2516 }, { "epoch": 0.122900390625, "grad_norm": 0.33956843614578247, "learning_rate": 0.00048673137546006843, "loss": 2.0148, "step": 2517 }, { "epoch": 0.12294921875, "grad_norm": 0.34688493609428406, "learning_rate": 0.00048671952206187007, "loss": 1.9379, "step": 2518 }, { "epoch": 0.122998046875, "grad_norm": 0.34522438049316406, "learning_rate": 0.00048670766353247914, "loss": 1.982, "step": 2519 }, { "epoch": 0.123046875, "grad_norm": 0.37780478596687317, "learning_rate": 0.00048669579987218285, "loss": 1.949, "step": 2520 }, { "epoch": 0.123095703125, "grad_norm": 0.34797346591949463, "learning_rate": 0.0004866839310812688, "loss": 1.9708, "step": 2521 }, { "epoch": 0.12314453125, "grad_norm": 0.30262649059295654, "learning_rate": 0.00048667205716002455, "loss": 1.9731, "step": 2522 }, { "epoch": 0.123193359375, "grad_norm": 0.3320465385913849, "learning_rate": 0.000486660178108738, "loss": 1.9577, "step": 2523 }, { "epoch": 0.1232421875, "grad_norm": 0.4179232716560364, "learning_rate": 0.0004866482939276969, "loss": 1.9543, "step": 2524 }, { "epoch": 0.123291015625, "grad_norm": 0.4148147404193878, "learning_rate": 0.0004866364046171895, "loss": 1.9822, "step": 2525 }, { "epoch": 0.12333984375, "grad_norm": 0.32959797978401184, "learning_rate": 0.00048662451017750377, "loss": 1.9231, "step": 2526 }, { "epoch": 0.123388671875, "grad_norm": 0.32570526003837585, "learning_rate": 0.000486612610608928, "loss": 1.9604, "step": 2527 }, { "epoch": 0.1234375, "grad_norm": 0.3846050500869751, "learning_rate": 0.0004866007059117505, "loss": 1.9128, "step": 2528 }, { "epoch": 0.123486328125, "grad_norm": 0.3598853647708893, "learning_rate": 0.00048658879608626, "loss": 1.9324, "step": 2529 }, { "epoch": 0.12353515625, "grad_norm": 0.2911962568759918, "learning_rate": 0.00048657688113274507, "loss": 1.9799, "step": 2530 }, { "epoch": 0.123583984375, "grad_norm": 0.3436177372932434, "learning_rate": 0.00048656496105149434, "loss": 2.0058, "step": 2531 }, { "epoch": 0.1236328125, "grad_norm": 0.2969202995300293, "learning_rate": 0.00048655303584279686, "loss": 1.9675, "step": 2532 }, { "epoch": 0.123681640625, "grad_norm": 0.26267364621162415, "learning_rate": 0.0004865411055069416, "loss": 1.961, "step": 2533 }, { "epoch": 0.12373046875, "grad_norm": 0.30999690294265747, "learning_rate": 0.0004865291700442177, "loss": 1.9929, "step": 2534 }, { "epoch": 0.123779296875, "grad_norm": 0.30265551805496216, "learning_rate": 0.00048651722945491444, "loss": 1.9413, "step": 2535 }, { "epoch": 0.123828125, "grad_norm": 0.2951200306415558, "learning_rate": 0.0004865052837393212, "loss": 1.9285, "step": 2536 }, { "epoch": 0.123876953125, "grad_norm": 0.3383113443851471, "learning_rate": 0.00048649333289772746, "loss": 1.9261, "step": 2537 }, { "epoch": 0.12392578125, "grad_norm": 0.26945480704307556, "learning_rate": 0.00048648137693042283, "loss": 1.9413, "step": 2538 }, { "epoch": 0.123974609375, "grad_norm": 0.2493751049041748, "learning_rate": 0.00048646941583769724, "loss": 1.9698, "step": 2539 }, { "epoch": 0.1240234375, "grad_norm": 0.2991991341114044, "learning_rate": 0.0004864574496198404, "loss": 1.914, "step": 2540 }, { "epoch": 0.124072265625, "grad_norm": 0.31213292479515076, "learning_rate": 0.00048644547827714235, "loss": 1.934, "step": 2541 }, { "epoch": 0.12412109375, "grad_norm": 0.24150533974170685, "learning_rate": 0.0004864335018098933, "loss": 1.9437, "step": 2542 }, { "epoch": 0.124169921875, "grad_norm": 0.37293708324432373, "learning_rate": 0.00048642152021838346, "loss": 1.9436, "step": 2543 }, { "epoch": 0.12421875, "grad_norm": 0.408235639333725, "learning_rate": 0.00048640953350290324, "loss": 1.9672, "step": 2544 }, { "epoch": 0.124267578125, "grad_norm": 0.31446272134780884, "learning_rate": 0.00048639754166374317, "loss": 1.941, "step": 2545 }, { "epoch": 0.12431640625, "grad_norm": 0.4439429044723511, "learning_rate": 0.0004863855447011938, "loss": 1.9937, "step": 2546 }, { "epoch": 0.124365234375, "grad_norm": 0.3216986358165741, "learning_rate": 0.00048637354261554593, "loss": 1.9274, "step": 2547 }, { "epoch": 0.1244140625, "grad_norm": 0.40352901816368103, "learning_rate": 0.00048636153540709045, "loss": 1.958, "step": 2548 }, { "epoch": 0.124462890625, "grad_norm": 0.4253542721271515, "learning_rate": 0.00048634952307611835, "loss": 1.9325, "step": 2549 }, { "epoch": 0.12451171875, "grad_norm": 0.28463107347488403, "learning_rate": 0.0004863375056229208, "loss": 1.9757, "step": 2550 }, { "epoch": 0.124560546875, "grad_norm": 0.42416971921920776, "learning_rate": 0.0004863254830477889, "loss": 1.9524, "step": 2551 }, { "epoch": 0.124609375, "grad_norm": 0.2886291742324829, "learning_rate": 0.00048631345535101426, "loss": 1.9586, "step": 2552 }, { "epoch": 0.124658203125, "grad_norm": 0.361136794090271, "learning_rate": 0.00048630142253288815, "loss": 1.962, "step": 2553 }, { "epoch": 0.12470703125, "grad_norm": 0.32792994379997253, "learning_rate": 0.0004862893845937024, "loss": 1.9599, "step": 2554 }, { "epoch": 0.124755859375, "grad_norm": 0.3343014717102051, "learning_rate": 0.0004862773415337486, "loss": 1.914, "step": 2555 }, { "epoch": 0.1248046875, "grad_norm": 0.39482980966567993, "learning_rate": 0.0004862652933533188, "loss": 1.9274, "step": 2556 }, { "epoch": 0.124853515625, "grad_norm": 0.27788245677948, "learning_rate": 0.0004862532400527048, "loss": 1.9582, "step": 2557 }, { "epoch": 0.12490234375, "grad_norm": 0.25878629088401794, "learning_rate": 0.00048624118163219875, "loss": 1.9449, "step": 2558 }, { "epoch": 0.124951171875, "grad_norm": 0.29491305351257324, "learning_rate": 0.0004862291180920931, "loss": 1.9917, "step": 2559 }, { "epoch": 0.125, "grad_norm": 0.3165069818496704, "learning_rate": 0.00048621704943267995, "loss": 1.9635, "step": 2560 }, { "epoch": 0.125048828125, "grad_norm": 0.2744191884994507, "learning_rate": 0.00048620497565425195, "loss": 1.9417, "step": 2561 }, { "epoch": 0.12509765625, "grad_norm": 0.26746875047683716, "learning_rate": 0.00048619289675710177, "loss": 1.9442, "step": 2562 }, { "epoch": 0.125146484375, "grad_norm": 0.3874928057193756, "learning_rate": 0.000486180812741522, "loss": 1.9511, "step": 2563 }, { "epoch": 0.1251953125, "grad_norm": 0.4072100520133972, "learning_rate": 0.0004861687236078055, "loss": 1.9191, "step": 2564 }, { "epoch": 0.125244140625, "grad_norm": 0.37201544642448425, "learning_rate": 0.0004861566293562454, "loss": 1.983, "step": 2565 }, { "epoch": 0.12529296875, "grad_norm": 0.3572579026222229, "learning_rate": 0.0004861445299871348, "loss": 1.9397, "step": 2566 }, { "epoch": 0.125341796875, "grad_norm": 0.39208346605300903, "learning_rate": 0.0004861324255007668, "loss": 1.9763, "step": 2567 }, { "epoch": 0.125390625, "grad_norm": 0.2901099920272827, "learning_rate": 0.0004861203158974349, "loss": 1.9267, "step": 2568 }, { "epoch": 0.125439453125, "grad_norm": 0.31990084052085876, "learning_rate": 0.0004861082011774324, "loss": 1.9226, "step": 2569 }, { "epoch": 0.12548828125, "grad_norm": 0.3376406133174896, "learning_rate": 0.00048609608134105324, "loss": 1.9423, "step": 2570 }, { "epoch": 0.125537109375, "grad_norm": 0.31049293279647827, "learning_rate": 0.00048608395638859083, "loss": 1.9434, "step": 2571 }, { "epoch": 0.1255859375, "grad_norm": 0.3145679831504822, "learning_rate": 0.0004860718263203393, "loss": 1.9307, "step": 2572 }, { "epoch": 0.125634765625, "grad_norm": 0.30444326996803284, "learning_rate": 0.00048605969113659224, "loss": 1.9368, "step": 2573 }, { "epoch": 0.12568359375, "grad_norm": 0.2955378293991089, "learning_rate": 0.0004860475508376442, "loss": 1.9308, "step": 2574 }, { "epoch": 0.125732421875, "grad_norm": 0.28644588589668274, "learning_rate": 0.0004860354054237891, "loss": 1.9815, "step": 2575 }, { "epoch": 0.12578125, "grad_norm": 0.2854309678077698, "learning_rate": 0.00048602325489532146, "loss": 1.9141, "step": 2576 }, { "epoch": 0.125830078125, "grad_norm": 0.2763071656227112, "learning_rate": 0.00048601109925253567, "loss": 1.9512, "step": 2577 }, { "epoch": 0.12587890625, "grad_norm": 0.3390391170978546, "learning_rate": 0.00048599893849572646, "loss": 1.9455, "step": 2578 }, { "epoch": 0.125927734375, "grad_norm": 0.28441181778907776, "learning_rate": 0.0004859867726251884, "loss": 1.9427, "step": 2579 }, { "epoch": 0.1259765625, "grad_norm": 0.25041723251342773, "learning_rate": 0.00048597460164121636, "loss": 1.9916, "step": 2580 }, { "epoch": 0.126025390625, "grad_norm": 0.285990446805954, "learning_rate": 0.00048596242554410537, "loss": 1.9588, "step": 2581 }, { "epoch": 0.12607421875, "grad_norm": 0.2763587534427643, "learning_rate": 0.00048595024433415054, "loss": 1.9356, "step": 2582 }, { "epoch": 0.126123046875, "grad_norm": 0.24998420476913452, "learning_rate": 0.000485938058011647, "loss": 1.948, "step": 2583 }, { "epoch": 0.126171875, "grad_norm": 0.29205650091171265, "learning_rate": 0.0004859258665768903, "loss": 1.9727, "step": 2584 }, { "epoch": 0.126220703125, "grad_norm": 0.3379195034503937, "learning_rate": 0.00048591367003017564, "loss": 2.0149, "step": 2585 }, { "epoch": 0.12626953125, "grad_norm": 0.30479851365089417, "learning_rate": 0.00048590146837179876, "loss": 1.9308, "step": 2586 }, { "epoch": 0.126318359375, "grad_norm": 0.23257413506507874, "learning_rate": 0.00048588926160205543, "loss": 1.9194, "step": 2587 }, { "epoch": 0.1263671875, "grad_norm": 0.22597797214984894, "learning_rate": 0.00048587704972124135, "loss": 1.9844, "step": 2588 }, { "epoch": 0.126416015625, "grad_norm": 0.2500629723072052, "learning_rate": 0.00048586483272965256, "loss": 1.9664, "step": 2589 }, { "epoch": 0.12646484375, "grad_norm": 0.27759456634521484, "learning_rate": 0.0004858526106275851, "loss": 1.9708, "step": 2590 }, { "epoch": 0.126513671875, "grad_norm": 0.3112030327320099, "learning_rate": 0.0004858403834153353, "loss": 1.9861, "step": 2591 }, { "epoch": 0.1265625, "grad_norm": 0.37497976422309875, "learning_rate": 0.00048582815109319936, "loss": 1.9517, "step": 2592 }, { "epoch": 0.126611328125, "grad_norm": 0.41083982586860657, "learning_rate": 0.00048581591366147385, "loss": 1.9366, "step": 2593 }, { "epoch": 0.12666015625, "grad_norm": 0.3792741894721985, "learning_rate": 0.0004858036711204553, "loss": 1.9498, "step": 2594 }, { "epoch": 0.126708984375, "grad_norm": 0.30756959319114685, "learning_rate": 0.0004857914234704404, "loss": 1.9293, "step": 2595 }, { "epoch": 0.1267578125, "grad_norm": 0.3013686239719391, "learning_rate": 0.000485779170711726, "loss": 1.9016, "step": 2596 }, { "epoch": 0.126806640625, "grad_norm": 0.30475687980651855, "learning_rate": 0.0004857669128446091, "loss": 1.935, "step": 2597 }, { "epoch": 0.12685546875, "grad_norm": 0.30427220463752747, "learning_rate": 0.00048575464986938674, "loss": 1.8935, "step": 2598 }, { "epoch": 0.126904296875, "grad_norm": 0.30789080262184143, "learning_rate": 0.00048574238178635605, "loss": 1.9675, "step": 2599 }, { "epoch": 0.126953125, "grad_norm": 0.25295355916023254, "learning_rate": 0.0004857301085958145, "loss": 1.9217, "step": 2600 }, { "epoch": 0.127001953125, "grad_norm": 0.28840503096580505, "learning_rate": 0.00048571783029805946, "loss": 1.9569, "step": 2601 }, { "epoch": 0.12705078125, "grad_norm": 0.4125131666660309, "learning_rate": 0.0004857055468933885, "loss": 1.9591, "step": 2602 }, { "epoch": 0.127099609375, "grad_norm": 0.3570202887058258, "learning_rate": 0.00048569325838209934, "loss": 1.9921, "step": 2603 }, { "epoch": 0.1271484375, "grad_norm": 0.30606308579444885, "learning_rate": 0.0004856809647644897, "loss": 1.9872, "step": 2604 }, { "epoch": 0.127197265625, "grad_norm": 0.2996368110179901, "learning_rate": 0.0004856686660408577, "loss": 1.9341, "step": 2605 }, { "epoch": 0.12724609375, "grad_norm": 0.2904329001903534, "learning_rate": 0.00048565636221150135, "loss": 1.9957, "step": 2606 }, { "epoch": 0.127294921875, "grad_norm": 0.281369149684906, "learning_rate": 0.00048564405327671884, "loss": 1.9437, "step": 2607 }, { "epoch": 0.12734375, "grad_norm": 0.30505800247192383, "learning_rate": 0.0004856317392368084, "loss": 1.9317, "step": 2608 }, { "epoch": 0.127392578125, "grad_norm": 0.3160969018936157, "learning_rate": 0.00048561942009206857, "loss": 1.9474, "step": 2609 }, { "epoch": 0.12744140625, "grad_norm": 0.2770478129386902, "learning_rate": 0.0004856070958427979, "loss": 1.9533, "step": 2610 }, { "epoch": 0.127490234375, "grad_norm": 0.24775753915309906, "learning_rate": 0.000485594766489295, "loss": 1.9456, "step": 2611 }, { "epoch": 0.1275390625, "grad_norm": 0.25498297810554504, "learning_rate": 0.0004855824320318589, "loss": 1.9271, "step": 2612 }, { "epoch": 0.127587890625, "grad_norm": 0.2696218490600586, "learning_rate": 0.0004855700924707882, "loss": 1.9803, "step": 2613 }, { "epoch": 0.12763671875, "grad_norm": 0.27652764320373535, "learning_rate": 0.0004855577478063822, "loss": 1.945, "step": 2614 }, { "epoch": 0.127685546875, "grad_norm": 0.29062485694885254, "learning_rate": 0.00048554539803894007, "loss": 1.9499, "step": 2615 }, { "epoch": 0.127734375, "grad_norm": 0.3093293607234955, "learning_rate": 0.000485533043168761, "loss": 1.9544, "step": 2616 }, { "epoch": 0.127783203125, "grad_norm": 0.42320266366004944, "learning_rate": 0.0004855206831961445, "loss": 1.9179, "step": 2617 }, { "epoch": 0.12783203125, "grad_norm": 0.4997412860393524, "learning_rate": 0.0004855083181213902, "loss": 1.9266, "step": 2618 }, { "epoch": 0.127880859375, "grad_norm": 0.37234798073768616, "learning_rate": 0.00048549594794479754, "loss": 2.0049, "step": 2619 }, { "epoch": 0.1279296875, "grad_norm": 0.3560904562473297, "learning_rate": 0.00048548357266666657, "loss": 1.9305, "step": 2620 }, { "epoch": 0.127978515625, "grad_norm": 0.3886494040489197, "learning_rate": 0.00048547119228729716, "loss": 1.994, "step": 2621 }, { "epoch": 0.12802734375, "grad_norm": 0.3021526038646698, "learning_rate": 0.0004854588068069892, "loss": 1.9563, "step": 2622 }, { "epoch": 0.128076171875, "grad_norm": 0.2974371016025543, "learning_rate": 0.000485446416226043, "loss": 1.9635, "step": 2623 }, { "epoch": 0.128125, "grad_norm": 0.30604350566864014, "learning_rate": 0.0004854340205447589, "loss": 1.9407, "step": 2624 }, { "epoch": 0.128173828125, "grad_norm": 0.3186424672603607, "learning_rate": 0.00048542161976343717, "loss": 1.9439, "step": 2625 }, { "epoch": 0.12822265625, "grad_norm": 0.26075148582458496, "learning_rate": 0.00048540921388237856, "loss": 1.972, "step": 2626 }, { "epoch": 0.128271484375, "grad_norm": 0.30541688203811646, "learning_rate": 0.00048539680290188346, "loss": 1.9726, "step": 2627 }, { "epoch": 0.1283203125, "grad_norm": 0.3373078405857086, "learning_rate": 0.0004853843868222529, "loss": 1.9774, "step": 2628 }, { "epoch": 0.128369140625, "grad_norm": 0.3442203104496002, "learning_rate": 0.00048537196564378765, "loss": 1.9675, "step": 2629 }, { "epoch": 0.12841796875, "grad_norm": 0.3551578223705292, "learning_rate": 0.00048535953936678885, "loss": 1.9742, "step": 2630 }, { "epoch": 0.128466796875, "grad_norm": 0.30386850237846375, "learning_rate": 0.0004853471079915576, "loss": 1.9248, "step": 2631 }, { "epoch": 0.128515625, "grad_norm": 0.27662909030914307, "learning_rate": 0.00048533467151839517, "loss": 1.9705, "step": 2632 }, { "epoch": 0.128564453125, "grad_norm": 0.28997212648391724, "learning_rate": 0.00048532222994760306, "loss": 1.9563, "step": 2633 }, { "epoch": 0.12861328125, "grad_norm": 0.4161379933357239, "learning_rate": 0.0004853097832794827, "loss": 1.9592, "step": 2634 }, { "epoch": 0.128662109375, "grad_norm": 0.5349624156951904, "learning_rate": 0.00048529733151433577, "loss": 1.93, "step": 2635 }, { "epoch": 0.1287109375, "grad_norm": 0.399630606174469, "learning_rate": 0.000485284874652464, "loss": 1.9324, "step": 2636 }, { "epoch": 0.128759765625, "grad_norm": 0.36842992901802063, "learning_rate": 0.00048527241269416945, "loss": 1.8926, "step": 2637 }, { "epoch": 0.12880859375, "grad_norm": 0.3969930112361908, "learning_rate": 0.000485259945639754, "loss": 1.9851, "step": 2638 }, { "epoch": 0.128857421875, "grad_norm": 0.34111133217811584, "learning_rate": 0.00048524747348951985, "loss": 1.9602, "step": 2639 }, { "epoch": 0.12890625, "grad_norm": 0.3091104328632355, "learning_rate": 0.00048523499624376925, "loss": 1.9565, "step": 2640 }, { "epoch": 0.128955078125, "grad_norm": 0.2949180603027344, "learning_rate": 0.0004852225139028047, "loss": 1.918, "step": 2641 }, { "epoch": 0.12900390625, "grad_norm": 0.3323022723197937, "learning_rate": 0.00048521002646692855, "loss": 1.9861, "step": 2642 }, { "epoch": 0.129052734375, "grad_norm": 0.30736616253852844, "learning_rate": 0.00048519753393644354, "loss": 1.9768, "step": 2643 }, { "epoch": 0.1291015625, "grad_norm": 0.27663907408714294, "learning_rate": 0.0004851850363116524, "loss": 1.8934, "step": 2644 }, { "epoch": 0.129150390625, "grad_norm": 0.3486502468585968, "learning_rate": 0.0004851725335928581, "loss": 1.9394, "step": 2645 }, { "epoch": 0.12919921875, "grad_norm": 0.31324103474617004, "learning_rate": 0.0004851600257803636, "loss": 1.9613, "step": 2646 }, { "epoch": 0.129248046875, "grad_norm": 0.26974278688430786, "learning_rate": 0.000485147512874472, "loss": 1.9615, "step": 2647 }, { "epoch": 0.129296875, "grad_norm": 0.29994070529937744, "learning_rate": 0.00048513499487548665, "loss": 1.928, "step": 2648 }, { "epoch": 0.129345703125, "grad_norm": 0.29376593232154846, "learning_rate": 0.00048512247178371083, "loss": 1.967, "step": 2649 }, { "epoch": 0.12939453125, "grad_norm": 0.27131256461143494, "learning_rate": 0.00048510994359944804, "loss": 1.9694, "step": 2650 }, { "epoch": 0.129443359375, "grad_norm": 0.2881922125816345, "learning_rate": 0.00048509741032300206, "loss": 1.9442, "step": 2651 }, { "epoch": 0.1294921875, "grad_norm": 0.2579802870750427, "learning_rate": 0.00048508487195467653, "loss": 1.95, "step": 2652 }, { "epoch": 0.129541015625, "grad_norm": 0.26406988501548767, "learning_rate": 0.00048507232849477535, "loss": 1.9274, "step": 2653 }, { "epoch": 0.12958984375, "grad_norm": 0.2724282443523407, "learning_rate": 0.0004850597799436025, "loss": 1.9517, "step": 2654 }, { "epoch": 0.129638671875, "grad_norm": 0.2562747001647949, "learning_rate": 0.00048504722630146217, "loss": 1.9552, "step": 2655 }, { "epoch": 0.1296875, "grad_norm": 0.3463316857814789, "learning_rate": 0.00048503466756865847, "loss": 1.9385, "step": 2656 }, { "epoch": 0.129736328125, "grad_norm": 0.28768643736839294, "learning_rate": 0.00048502210374549586, "loss": 1.9242, "step": 2657 }, { "epoch": 0.12978515625, "grad_norm": 0.285569429397583, "learning_rate": 0.00048500953483227895, "loss": 1.9582, "step": 2658 }, { "epoch": 0.129833984375, "grad_norm": 0.32259494066238403, "learning_rate": 0.0004849969608293122, "loss": 2.0019, "step": 2659 }, { "epoch": 0.1298828125, "grad_norm": 0.38154006004333496, "learning_rate": 0.0004849843817369003, "loss": 1.9627, "step": 2660 }, { "epoch": 0.129931640625, "grad_norm": 0.287798672914505, "learning_rate": 0.0004849717975553483, "loss": 1.9434, "step": 2661 }, { "epoch": 0.12998046875, "grad_norm": 0.284227192401886, "learning_rate": 0.0004849592082849611, "loss": 1.9159, "step": 2662 }, { "epoch": 0.130029296875, "grad_norm": 0.34681206941604614, "learning_rate": 0.0004849466139260438, "loss": 1.9868, "step": 2663 }, { "epoch": 0.130078125, "grad_norm": 0.25580158829689026, "learning_rate": 0.0004849340144789016, "loss": 1.9944, "step": 2664 }, { "epoch": 0.130126953125, "grad_norm": 0.28213152289390564, "learning_rate": 0.0004849214099438399, "loss": 1.9577, "step": 2665 }, { "epoch": 0.13017578125, "grad_norm": 0.26044562458992004, "learning_rate": 0.00048490880032116425, "loss": 1.9485, "step": 2666 }, { "epoch": 0.130224609375, "grad_norm": 0.26660290360450745, "learning_rate": 0.0004848961856111801, "loss": 1.9126, "step": 2667 }, { "epoch": 0.1302734375, "grad_norm": 0.37028783559799194, "learning_rate": 0.0004848835658141934, "loss": 1.9374, "step": 2668 }, { "epoch": 0.130322265625, "grad_norm": 0.42466700077056885, "learning_rate": 0.0004848709409305097, "loss": 1.9538, "step": 2669 }, { "epoch": 0.13037109375, "grad_norm": 0.382070392370224, "learning_rate": 0.00048485831096043526, "loss": 1.9797, "step": 2670 }, { "epoch": 0.130419921875, "grad_norm": 0.3576294183731079, "learning_rate": 0.000484845675904276, "loss": 1.9696, "step": 2671 }, { "epoch": 0.13046875, "grad_norm": 0.3610958158969879, "learning_rate": 0.0004848330357623382, "loss": 1.9317, "step": 2672 }, { "epoch": 0.130517578125, "grad_norm": 0.3147321045398712, "learning_rate": 0.00048482039053492814, "loss": 1.9142, "step": 2673 }, { "epoch": 0.13056640625, "grad_norm": 0.3535750210285187, "learning_rate": 0.0004848077402223524, "loss": 1.9648, "step": 2674 }, { "epoch": 0.130615234375, "grad_norm": 0.41160187125205994, "learning_rate": 0.0004847950848249176, "loss": 1.9718, "step": 2675 }, { "epoch": 0.1306640625, "grad_norm": 0.2957160472869873, "learning_rate": 0.0004847824243429302, "loss": 1.9792, "step": 2676 }, { "epoch": 0.130712890625, "grad_norm": 0.3309563994407654, "learning_rate": 0.0004847697587766973, "loss": 1.9142, "step": 2677 }, { "epoch": 0.13076171875, "grad_norm": 0.36193612217903137, "learning_rate": 0.0004847570881265259, "loss": 1.9591, "step": 2678 }, { "epoch": 0.130810546875, "grad_norm": 0.2632095515727997, "learning_rate": 0.0004847444123927228, "loss": 1.9695, "step": 2679 }, { "epoch": 0.130859375, "grad_norm": 0.34325411915779114, "learning_rate": 0.0004847317315755953, "loss": 1.9282, "step": 2680 }, { "epoch": 0.130908203125, "grad_norm": 0.37307414412498474, "learning_rate": 0.00048471904567545094, "loss": 1.9511, "step": 2681 }, { "epoch": 0.13095703125, "grad_norm": 0.28530657291412354, "learning_rate": 0.00048470635469259697, "loss": 1.9017, "step": 2682 }, { "epoch": 0.131005859375, "grad_norm": 0.3219052851200104, "learning_rate": 0.00048469365862734094, "loss": 1.9452, "step": 2683 }, { "epoch": 0.1310546875, "grad_norm": 0.29376983642578125, "learning_rate": 0.00048468095747999067, "loss": 1.9366, "step": 2684 }, { "epoch": 0.131103515625, "grad_norm": 0.38850560784339905, "learning_rate": 0.000484668251250854, "loss": 1.9606, "step": 2685 }, { "epoch": 0.13115234375, "grad_norm": 0.4430956244468689, "learning_rate": 0.00048465553994023875, "loss": 1.9069, "step": 2686 }, { "epoch": 0.131201171875, "grad_norm": 0.30804160237312317, "learning_rate": 0.0004846428235484531, "loss": 1.9887, "step": 2687 }, { "epoch": 0.13125, "grad_norm": 0.33121687173843384, "learning_rate": 0.00048463010207580517, "loss": 1.9187, "step": 2688 }, { "epoch": 0.131298828125, "grad_norm": 0.32628971338272095, "learning_rate": 0.0004846173755226033, "loss": 1.9309, "step": 2689 }, { "epoch": 0.13134765625, "grad_norm": 0.25765877962112427, "learning_rate": 0.000484604643889156, "loss": 1.9534, "step": 2690 }, { "epoch": 0.131396484375, "grad_norm": 0.2857937216758728, "learning_rate": 0.00048459190717577166, "loss": 1.9426, "step": 2691 }, { "epoch": 0.1314453125, "grad_norm": 0.25007742643356323, "learning_rate": 0.0004845791653827591, "loss": 1.963, "step": 2692 }, { "epoch": 0.131494140625, "grad_norm": 0.28668156266212463, "learning_rate": 0.0004845664185104271, "loss": 1.9509, "step": 2693 }, { "epoch": 0.13154296875, "grad_norm": 0.2508494555950165, "learning_rate": 0.00048455366655908455, "loss": 1.9502, "step": 2694 }, { "epoch": 0.131591796875, "grad_norm": 0.31007641553878784, "learning_rate": 0.00048454090952904056, "loss": 1.9794, "step": 2695 }, { "epoch": 0.131640625, "grad_norm": 0.41099441051483154, "learning_rate": 0.0004845281474206043, "loss": 1.9706, "step": 2696 }, { "epoch": 0.131689453125, "grad_norm": 0.33117562532424927, "learning_rate": 0.00048451538023408503, "loss": 1.9414, "step": 2697 }, { "epoch": 0.13173828125, "grad_norm": 0.33254456520080566, "learning_rate": 0.00048450260796979223, "loss": 1.9183, "step": 2698 }, { "epoch": 0.131787109375, "grad_norm": 0.46011707186698914, "learning_rate": 0.0004844898306280354, "loss": 1.9329, "step": 2699 }, { "epoch": 0.1318359375, "grad_norm": 0.3635769784450531, "learning_rate": 0.0004844770482091242, "loss": 1.9072, "step": 2700 }, { "epoch": 0.131884765625, "grad_norm": 0.44029104709625244, "learning_rate": 0.00048446426071336847, "loss": 1.9658, "step": 2701 }, { "epoch": 0.13193359375, "grad_norm": 0.5665297508239746, "learning_rate": 0.00048445146814107804, "loss": 1.9534, "step": 2702 }, { "epoch": 0.131982421875, "grad_norm": 0.40592139959335327, "learning_rate": 0.00048443867049256307, "loss": 1.9369, "step": 2703 }, { "epoch": 0.13203125, "grad_norm": 0.3791857063770294, "learning_rate": 0.00048442586776813363, "loss": 1.9429, "step": 2704 }, { "epoch": 0.132080078125, "grad_norm": 0.3474803566932678, "learning_rate": 0.0004844130599681001, "loss": 1.884, "step": 2705 }, { "epoch": 0.13212890625, "grad_norm": 0.30125898122787476, "learning_rate": 0.00048440024709277274, "loss": 1.9797, "step": 2706 }, { "epoch": 0.132177734375, "grad_norm": 0.29995083808898926, "learning_rate": 0.0004843874291424622, "loss": 1.9463, "step": 2707 }, { "epoch": 0.1322265625, "grad_norm": 0.29136717319488525, "learning_rate": 0.00048437460611747916, "loss": 1.9589, "step": 2708 }, { "epoch": 0.132275390625, "grad_norm": 0.3408009111881256, "learning_rate": 0.0004843617780181342, "loss": 1.9595, "step": 2709 }, { "epoch": 0.13232421875, "grad_norm": 0.28607192635536194, "learning_rate": 0.0004843489448447385, "loss": 1.9357, "step": 2710 }, { "epoch": 0.132373046875, "grad_norm": 0.28544434905052185, "learning_rate": 0.00048433610659760274, "loss": 1.9488, "step": 2711 }, { "epoch": 0.132421875, "grad_norm": 0.32396015524864197, "learning_rate": 0.0004843232632770384, "loss": 1.9161, "step": 2712 }, { "epoch": 0.132470703125, "grad_norm": 0.29541540145874023, "learning_rate": 0.0004843104148833565, "loss": 1.7987, "step": 2713 }, { "epoch": 0.13251953125, "grad_norm": 0.3414922058582306, "learning_rate": 0.00048429756141686863, "loss": 1.9069, "step": 2714 }, { "epoch": 0.132568359375, "grad_norm": 0.3674732446670532, "learning_rate": 0.0004842847028778862, "loss": 2.0191, "step": 2715 }, { "epoch": 0.1326171875, "grad_norm": 0.2976113557815552, "learning_rate": 0.00048427183926672083, "loss": 1.9842, "step": 2716 }, { "epoch": 0.132666015625, "grad_norm": 0.4438100755214691, "learning_rate": 0.00048425897058368433, "loss": 2.0009, "step": 2717 }, { "epoch": 0.13271484375, "grad_norm": 0.358058363199234, "learning_rate": 0.00048424609682908856, "loss": 1.9279, "step": 2718 }, { "epoch": 0.132763671875, "grad_norm": 0.2905043959617615, "learning_rate": 0.0004842332180032455, "loss": 1.9208, "step": 2719 }, { "epoch": 0.1328125, "grad_norm": 0.4169381856918335, "learning_rate": 0.0004842203341064673, "loss": 1.9943, "step": 2720 }, { "epoch": 0.132861328125, "grad_norm": 0.3086134195327759, "learning_rate": 0.0004842074451390663, "loss": 1.9377, "step": 2721 }, { "epoch": 0.13291015625, "grad_norm": 0.2989913821220398, "learning_rate": 0.0004841945511013547, "loss": 1.9317, "step": 2722 }, { "epoch": 0.132958984375, "grad_norm": 0.380631685256958, "learning_rate": 0.00048418165199364503, "loss": 1.9381, "step": 2723 }, { "epoch": 0.1330078125, "grad_norm": 0.29122987389564514, "learning_rate": 0.00048416874781625016, "loss": 1.9492, "step": 2724 }, { "epoch": 0.133056640625, "grad_norm": 0.32168418169021606, "learning_rate": 0.00048415583856948246, "loss": 1.9457, "step": 2725 }, { "epoch": 0.13310546875, "grad_norm": 0.41019541025161743, "learning_rate": 0.00048414292425365507, "loss": 1.9614, "step": 2726 }, { "epoch": 0.133154296875, "grad_norm": 0.29401734471321106, "learning_rate": 0.0004841300048690809, "loss": 1.9447, "step": 2727 }, { "epoch": 0.133203125, "grad_norm": 0.3441515564918518, "learning_rate": 0.00048411708041607305, "loss": 1.9459, "step": 2728 }, { "epoch": 0.133251953125, "grad_norm": 0.3598680794239044, "learning_rate": 0.00048410415089494477, "loss": 1.9237, "step": 2729 }, { "epoch": 0.13330078125, "grad_norm": 0.3428635597229004, "learning_rate": 0.0004840912163060093, "loss": 1.9522, "step": 2730 }, { "epoch": 0.133349609375, "grad_norm": 0.3365756869316101, "learning_rate": 0.0004840782766495803, "loss": 1.9165, "step": 2731 }, { "epoch": 0.1333984375, "grad_norm": 0.27347198128700256, "learning_rate": 0.00048406533192597124, "loss": 1.9571, "step": 2732 }, { "epoch": 0.133447265625, "grad_norm": 0.32136067748069763, "learning_rate": 0.00048405238213549594, "loss": 1.9535, "step": 2733 }, { "epoch": 0.13349609375, "grad_norm": 0.34407839179039, "learning_rate": 0.0004840394272784682, "loss": 1.9131, "step": 2734 }, { "epoch": 0.133544921875, "grad_norm": 0.29440370202064514, "learning_rate": 0.000484026467355202, "loss": 1.9565, "step": 2735 }, { "epoch": 0.13359375, "grad_norm": 0.28473877906799316, "learning_rate": 0.00048401350236601146, "loss": 1.9146, "step": 2736 }, { "epoch": 0.133642578125, "grad_norm": 0.29761719703674316, "learning_rate": 0.00048400053231121074, "loss": 1.9667, "step": 2737 }, { "epoch": 0.13369140625, "grad_norm": 0.32771578431129456, "learning_rate": 0.00048398755719111417, "loss": 1.9419, "step": 2738 }, { "epoch": 0.133740234375, "grad_norm": 0.38355210423469543, "learning_rate": 0.0004839745770060363, "loss": 1.9225, "step": 2739 }, { "epoch": 0.1337890625, "grad_norm": 0.3469833433628082, "learning_rate": 0.00048396159175629174, "loss": 1.9621, "step": 2740 }, { "epoch": 0.133837890625, "grad_norm": 0.304575651884079, "learning_rate": 0.000483948601442195, "loss": 1.9964, "step": 2741 }, { "epoch": 0.13388671875, "grad_norm": 0.35598430037498474, "learning_rate": 0.00048393560606406114, "loss": 1.9679, "step": 2742 }, { "epoch": 0.133935546875, "grad_norm": 0.26555126905441284, "learning_rate": 0.00048392260562220486, "loss": 1.9783, "step": 2743 }, { "epoch": 0.133984375, "grad_norm": 0.26156026124954224, "learning_rate": 0.0004839096001169416, "loss": 1.9563, "step": 2744 }, { "epoch": 0.134033203125, "grad_norm": 0.3228766918182373, "learning_rate": 0.00048389658954858615, "loss": 1.9732, "step": 2745 }, { "epoch": 0.13408203125, "grad_norm": 0.2956679165363312, "learning_rate": 0.0004838835739174541, "loss": 1.9467, "step": 2746 }, { "epoch": 0.134130859375, "grad_norm": 0.2933870851993561, "learning_rate": 0.00048387055322386083, "loss": 1.8897, "step": 2747 }, { "epoch": 0.1341796875, "grad_norm": 0.3015759587287903, "learning_rate": 0.0004838575274681219, "loss": 1.939, "step": 2748 }, { "epoch": 0.134228515625, "grad_norm": 0.3859144449234009, "learning_rate": 0.00048384449665055297, "loss": 1.9144, "step": 2749 }, { "epoch": 0.13427734375, "grad_norm": 0.3199724853038788, "learning_rate": 0.0004838314607714699, "loss": 1.9389, "step": 2750 }, { "epoch": 0.134326171875, "grad_norm": 0.3055242896080017, "learning_rate": 0.00048381841983118865, "loss": 1.9212, "step": 2751 }, { "epoch": 0.134375, "grad_norm": 0.3420690894126892, "learning_rate": 0.00048380537383002517, "loss": 1.9684, "step": 2752 }, { "epoch": 0.134423828125, "grad_norm": 0.43362897634506226, "learning_rate": 0.0004837923227682957, "loss": 1.919, "step": 2753 }, { "epoch": 0.13447265625, "grad_norm": 0.3559405505657196, "learning_rate": 0.0004837792666463166, "loss": 1.9473, "step": 2754 }, { "epoch": 0.134521484375, "grad_norm": 0.3308442533016205, "learning_rate": 0.0004837662054644041, "loss": 1.9571, "step": 2755 }, { "epoch": 0.1345703125, "grad_norm": 0.29690536856651306, "learning_rate": 0.00048375313922287505, "loss": 1.9773, "step": 2756 }, { "epoch": 0.134619140625, "grad_norm": 0.30477988719940186, "learning_rate": 0.0004837400679220459, "loss": 1.9206, "step": 2757 }, { "epoch": 0.13466796875, "grad_norm": 0.31890615820884705, "learning_rate": 0.00048372699156223355, "loss": 1.9312, "step": 2758 }, { "epoch": 0.134716796875, "grad_norm": 0.2632503807544708, "learning_rate": 0.0004837139101437548, "loss": 1.8936, "step": 2759 }, { "epoch": 0.134765625, "grad_norm": 0.26035577058792114, "learning_rate": 0.0004837008236669268, "loss": 1.903, "step": 2760 }, { "epoch": 0.134814453125, "grad_norm": 0.29139867424964905, "learning_rate": 0.0004836877321320666, "loss": 1.958, "step": 2761 }, { "epoch": 0.13486328125, "grad_norm": 0.32785144448280334, "learning_rate": 0.00048367463553949166, "loss": 1.9351, "step": 2762 }, { "epoch": 0.134912109375, "grad_norm": 0.2700155973434448, "learning_rate": 0.0004836615338895192, "loss": 1.9157, "step": 2763 }, { "epoch": 0.1349609375, "grad_norm": 0.30227431654930115, "learning_rate": 0.00048364842718246685, "loss": 1.9112, "step": 2764 }, { "epoch": 0.135009765625, "grad_norm": 0.2752290666103363, "learning_rate": 0.0004836353154186523, "loss": 1.9355, "step": 2765 }, { "epoch": 0.13505859375, "grad_norm": 0.3006250262260437, "learning_rate": 0.00048362219859839317, "loss": 1.9033, "step": 2766 }, { "epoch": 0.135107421875, "grad_norm": 0.3405635952949524, "learning_rate": 0.00048360907672200757, "loss": 1.9459, "step": 2767 }, { "epoch": 0.13515625, "grad_norm": 0.3239494860172272, "learning_rate": 0.0004835959497898133, "loss": 1.9909, "step": 2768 }, { "epoch": 0.135205078125, "grad_norm": 0.2614613473415375, "learning_rate": 0.0004835828178021287, "loss": 1.9491, "step": 2769 }, { "epoch": 0.13525390625, "grad_norm": 0.30761855840682983, "learning_rate": 0.0004835696807592718, "loss": 1.8793, "step": 2770 }, { "epoch": 0.135302734375, "grad_norm": 0.2930842936038971, "learning_rate": 0.00048355653866156116, "loss": 1.9258, "step": 2771 }, { "epoch": 0.1353515625, "grad_norm": 0.24054855108261108, "learning_rate": 0.0004835433915093153, "loss": 1.9175, "step": 2772 }, { "epoch": 0.135400390625, "grad_norm": 0.28279298543930054, "learning_rate": 0.0004835302393028528, "loss": 1.9238, "step": 2773 }, { "epoch": 0.13544921875, "grad_norm": 0.260881632566452, "learning_rate": 0.00048351708204249247, "loss": 1.9667, "step": 2774 }, { "epoch": 0.135498046875, "grad_norm": 0.2564978301525116, "learning_rate": 0.000483503919728553, "loss": 1.9388, "step": 2775 }, { "epoch": 0.135546875, "grad_norm": 0.3093903064727783, "learning_rate": 0.00048349075236135366, "loss": 1.963, "step": 2776 }, { "epoch": 0.135595703125, "grad_norm": 0.3190198838710785, "learning_rate": 0.00048347757994121333, "loss": 1.9548, "step": 2777 }, { "epoch": 0.13564453125, "grad_norm": 0.33027225732803345, "learning_rate": 0.0004834644024684515, "loss": 1.9334, "step": 2778 }, { "epoch": 0.135693359375, "grad_norm": 0.3581155240535736, "learning_rate": 0.0004834512199433872, "loss": 1.9623, "step": 2779 }, { "epoch": 0.1357421875, "grad_norm": 0.3519496023654938, "learning_rate": 0.00048343803236634023, "loss": 1.9369, "step": 2780 }, { "epoch": 0.135791015625, "grad_norm": 0.4017088711261749, "learning_rate": 0.00048342483973763006, "loss": 1.9762, "step": 2781 }, { "epoch": 0.13583984375, "grad_norm": 0.43068671226501465, "learning_rate": 0.00048341164205757654, "loss": 1.924, "step": 2782 }, { "epoch": 0.135888671875, "grad_norm": 0.28848010301589966, "learning_rate": 0.00048339843932649934, "loss": 1.8925, "step": 2783 }, { "epoch": 0.1359375, "grad_norm": 0.33056962490081787, "learning_rate": 0.0004833852315447186, "loss": 1.8834, "step": 2784 }, { "epoch": 0.135986328125, "grad_norm": 0.37369856238365173, "learning_rate": 0.0004833720187125543, "loss": 1.9464, "step": 2785 }, { "epoch": 0.13603515625, "grad_norm": 0.2799489200115204, "learning_rate": 0.0004833588008303267, "loss": 1.9707, "step": 2786 }, { "epoch": 0.136083984375, "grad_norm": 0.3170425593852997, "learning_rate": 0.00048334557789835627, "loss": 1.9459, "step": 2787 }, { "epoch": 0.1361328125, "grad_norm": 0.3325173854827881, "learning_rate": 0.00048333234991696335, "loss": 1.9471, "step": 2788 }, { "epoch": 0.136181640625, "grad_norm": 0.316687673330307, "learning_rate": 0.0004833191168864685, "loss": 1.9451, "step": 2789 }, { "epoch": 0.13623046875, "grad_norm": 0.29092201590538025, "learning_rate": 0.0004833058788071925, "loss": 1.9492, "step": 2790 }, { "epoch": 0.136279296875, "grad_norm": 0.31663262844085693, "learning_rate": 0.00048329263567945625, "loss": 1.977, "step": 2791 }, { "epoch": 0.136328125, "grad_norm": 0.24035926163196564, "learning_rate": 0.0004832793875035805, "loss": 1.9207, "step": 2792 }, { "epoch": 0.136376953125, "grad_norm": 0.28036943078041077, "learning_rate": 0.00048326613427988657, "loss": 1.9354, "step": 2793 }, { "epoch": 0.13642578125, "grad_norm": 0.2846924960613251, "learning_rate": 0.0004832528760086956, "loss": 1.9839, "step": 2794 }, { "epoch": 0.136474609375, "grad_norm": 0.23764120042324066, "learning_rate": 0.0004832396126903288, "loss": 1.9728, "step": 2795 }, { "epoch": 0.1365234375, "grad_norm": 0.2782424986362457, "learning_rate": 0.00048322634432510766, "loss": 1.9327, "step": 2796 }, { "epoch": 0.136572265625, "grad_norm": 0.34385165572166443, "learning_rate": 0.00048321307091335377, "loss": 1.9719, "step": 2797 }, { "epoch": 0.13662109375, "grad_norm": 0.36322009563446045, "learning_rate": 0.00048319979245538887, "loss": 1.945, "step": 2798 }, { "epoch": 0.136669921875, "grad_norm": 0.281920462846756, "learning_rate": 0.00048318650895153476, "loss": 1.9349, "step": 2799 }, { "epoch": 0.13671875, "grad_norm": 0.3715970814228058, "learning_rate": 0.0004831732204021134, "loss": 1.9317, "step": 2800 }, { "epoch": 0.136767578125, "grad_norm": 0.3810425102710724, "learning_rate": 0.00048315992680744664, "loss": 1.9325, "step": 2801 }, { "epoch": 0.13681640625, "grad_norm": 0.2981497645378113, "learning_rate": 0.00048314662816785687, "loss": 1.9532, "step": 2802 }, { "epoch": 0.136865234375, "grad_norm": 0.43036553263664246, "learning_rate": 0.0004831333244836664, "loss": 1.9185, "step": 2803 }, { "epoch": 0.1369140625, "grad_norm": 0.39123308658599854, "learning_rate": 0.00048312001575519757, "loss": 1.9628, "step": 2804 }, { "epoch": 0.136962890625, "grad_norm": 0.2709375023841858, "learning_rate": 0.000483106701982773, "loss": 1.9694, "step": 2805 }, { "epoch": 0.13701171875, "grad_norm": 0.32040828466415405, "learning_rate": 0.0004830933831667152, "loss": 1.9384, "step": 2806 }, { "epoch": 0.137060546875, "grad_norm": 0.2893323600292206, "learning_rate": 0.0004830800593073472, "loss": 1.9433, "step": 2807 }, { "epoch": 0.137109375, "grad_norm": 0.3141605854034424, "learning_rate": 0.0004830667304049918, "loss": 1.9735, "step": 2808 }, { "epoch": 0.137158203125, "grad_norm": 0.3353452682495117, "learning_rate": 0.00048305339645997195, "loss": 1.9701, "step": 2809 }, { "epoch": 0.13720703125, "grad_norm": 0.32058364152908325, "learning_rate": 0.0004830400574726109, "loss": 1.9791, "step": 2810 }, { "epoch": 0.137255859375, "grad_norm": 0.4249953031539917, "learning_rate": 0.0004830267134432319, "loss": 1.9263, "step": 2811 }, { "epoch": 0.1373046875, "grad_norm": 0.3600628077983856, "learning_rate": 0.00048301336437215844, "loss": 1.9534, "step": 2812 }, { "epoch": 0.137353515625, "grad_norm": 0.3414947986602783, "learning_rate": 0.0004830000102597139, "loss": 1.9887, "step": 2813 }, { "epoch": 0.13740234375, "grad_norm": 0.3680295944213867, "learning_rate": 0.0004829866511062221, "loss": 1.9651, "step": 2814 }, { "epoch": 0.137451171875, "grad_norm": 0.4307950735092163, "learning_rate": 0.00048297328691200667, "loss": 1.6557, "step": 2815 }, { "epoch": 0.1375, "grad_norm": 0.3412806987762451, "learning_rate": 0.0004829599176773916, "loss": 1.9411, "step": 2816 }, { "epoch": 0.137548828125, "grad_norm": 0.3560451865196228, "learning_rate": 0.0004829465434027007, "loss": 1.9134, "step": 2817 }, { "epoch": 0.13759765625, "grad_norm": 0.3333214521408081, "learning_rate": 0.0004829331640882584, "loss": 1.9426, "step": 2818 }, { "epoch": 0.137646484375, "grad_norm": 0.38358694314956665, "learning_rate": 0.00048291977973438877, "loss": 1.9469, "step": 2819 }, { "epoch": 0.1376953125, "grad_norm": 0.28628218173980713, "learning_rate": 0.0004829063903414162, "loss": 1.9567, "step": 2820 }, { "epoch": 0.137744140625, "grad_norm": 0.30209600925445557, "learning_rate": 0.0004828929959096653, "loss": 1.9666, "step": 2821 }, { "epoch": 0.13779296875, "grad_norm": 0.28982114791870117, "learning_rate": 0.00048287959643946056, "loss": 1.9194, "step": 2822 }, { "epoch": 0.137841796875, "grad_norm": 0.2329326570034027, "learning_rate": 0.00048286619193112684, "loss": 1.9238, "step": 2823 }, { "epoch": 0.137890625, "grad_norm": 0.25455278158187866, "learning_rate": 0.0004828527823849889, "loss": 1.918, "step": 2824 }, { "epoch": 0.137939453125, "grad_norm": 0.2574367821216583, "learning_rate": 0.0004828393678013718, "loss": 1.933, "step": 2825 }, { "epoch": 0.13798828125, "grad_norm": 0.30021119117736816, "learning_rate": 0.0004828259481806007, "loss": 1.9031, "step": 2826 }, { "epoch": 0.138037109375, "grad_norm": 0.27443069219589233, "learning_rate": 0.00048281252352300083, "loss": 1.9588, "step": 2827 }, { "epoch": 0.1380859375, "grad_norm": 0.28806763887405396, "learning_rate": 0.00048279909382889735, "loss": 1.9375, "step": 2828 }, { "epoch": 0.138134765625, "grad_norm": 0.32730916142463684, "learning_rate": 0.000482785659098616, "loss": 1.8977, "step": 2829 }, { "epoch": 0.13818359375, "grad_norm": 0.3213173747062683, "learning_rate": 0.00048277221933248226, "loss": 1.9472, "step": 2830 }, { "epoch": 0.138232421875, "grad_norm": 0.24618518352508545, "learning_rate": 0.0004827587745308218, "loss": 1.9432, "step": 2831 }, { "epoch": 0.13828125, "grad_norm": 0.3126327395439148, "learning_rate": 0.0004827453246939606, "loss": 1.947, "step": 2832 }, { "epoch": 0.138330078125, "grad_norm": 0.4310871958732605, "learning_rate": 0.0004827318698222246, "loss": 1.9637, "step": 2833 }, { "epoch": 0.13837890625, "grad_norm": 0.41157373785972595, "learning_rate": 0.00048271840991593966, "loss": 1.9357, "step": 2834 }, { "epoch": 0.138427734375, "grad_norm": 0.34536993503570557, "learning_rate": 0.0004827049449754323, "loss": 1.9451, "step": 2835 }, { "epoch": 0.1384765625, "grad_norm": 0.38459455966949463, "learning_rate": 0.00048269147500102873, "loss": 1.9752, "step": 2836 }, { "epoch": 0.138525390625, "grad_norm": 0.3365647494792938, "learning_rate": 0.0004826779999930554, "loss": 1.9897, "step": 2837 }, { "epoch": 0.13857421875, "grad_norm": 0.280414879322052, "learning_rate": 0.00048266451995183885, "loss": 1.8952, "step": 2838 }, { "epoch": 0.138623046875, "grad_norm": 0.2865367829799652, "learning_rate": 0.0004826510348777059, "loss": 1.9502, "step": 2839 }, { "epoch": 0.138671875, "grad_norm": 0.31575489044189453, "learning_rate": 0.0004826375447709832, "loss": 1.9473, "step": 2840 }, { "epoch": 0.138720703125, "grad_norm": 0.31195423007011414, "learning_rate": 0.00048262404963199786, "loss": 1.9711, "step": 2841 }, { "epoch": 0.13876953125, "grad_norm": 0.34996870160102844, "learning_rate": 0.00048261054946107686, "loss": 1.9608, "step": 2842 }, { "epoch": 0.138818359375, "grad_norm": 0.3060489594936371, "learning_rate": 0.0004825970442585473, "loss": 1.9592, "step": 2843 }, { "epoch": 0.1388671875, "grad_norm": 0.28308191895484924, "learning_rate": 0.0004825835340247368, "loss": 1.9568, "step": 2844 }, { "epoch": 0.138916015625, "grad_norm": 0.3154065012931824, "learning_rate": 0.0004825700187599724, "loss": 1.9514, "step": 2845 }, { "epoch": 0.13896484375, "grad_norm": 0.2420497089624405, "learning_rate": 0.00048255649846458187, "loss": 1.9513, "step": 2846 }, { "epoch": 0.139013671875, "grad_norm": 0.3041324317455292, "learning_rate": 0.0004825429731388929, "loss": 1.9226, "step": 2847 }, { "epoch": 0.1390625, "grad_norm": 0.34453678131103516, "learning_rate": 0.00048252944278323324, "loss": 1.9545, "step": 2848 }, { "epoch": 0.139111328125, "grad_norm": 0.3990999162197113, "learning_rate": 0.00048251590739793076, "loss": 1.9349, "step": 2849 }, { "epoch": 0.13916015625, "grad_norm": 0.34957921504974365, "learning_rate": 0.0004825023669833136, "loss": 1.9575, "step": 2850 }, { "epoch": 0.139208984375, "grad_norm": 0.2750493884086609, "learning_rate": 0.00048248882153970984, "loss": 1.9263, "step": 2851 }, { "epoch": 0.1392578125, "grad_norm": 0.34961846470832825, "learning_rate": 0.00048247527106744784, "loss": 1.8747, "step": 2852 }, { "epoch": 0.139306640625, "grad_norm": 0.28913164138793945, "learning_rate": 0.0004824617155668559, "loss": 1.9437, "step": 2853 }, { "epoch": 0.13935546875, "grad_norm": 0.2866270840167999, "learning_rate": 0.00048244815503826257, "loss": 1.9759, "step": 2854 }, { "epoch": 0.139404296875, "grad_norm": 0.2874768376350403, "learning_rate": 0.0004824345894819966, "loss": 1.9136, "step": 2855 }, { "epoch": 0.139453125, "grad_norm": 0.3575119078159332, "learning_rate": 0.0004824210188983867, "loss": 1.9185, "step": 2856 }, { "epoch": 0.139501953125, "grad_norm": 0.3447951674461365, "learning_rate": 0.00048240744328776177, "loss": 1.9245, "step": 2857 }, { "epoch": 0.13955078125, "grad_norm": 0.2775998115539551, "learning_rate": 0.0004823938626504508, "loss": 1.9333, "step": 2858 }, { "epoch": 0.139599609375, "grad_norm": 0.30933138728141785, "learning_rate": 0.000482380276986783, "loss": 1.9377, "step": 2859 }, { "epoch": 0.1396484375, "grad_norm": 0.3241446912288666, "learning_rate": 0.0004823666862970876, "loss": 1.9421, "step": 2860 }, { "epoch": 0.139697265625, "grad_norm": 0.22767190635204315, "learning_rate": 0.0004823530905816939, "loss": 1.9599, "step": 2861 }, { "epoch": 0.13974609375, "grad_norm": 0.3486478328704834, "learning_rate": 0.00048233948984093147, "loss": 1.9768, "step": 2862 }, { "epoch": 0.139794921875, "grad_norm": 0.39326295256614685, "learning_rate": 0.00048232588407512997, "loss": 1.9348, "step": 2863 }, { "epoch": 0.13984375, "grad_norm": 0.29440805315971375, "learning_rate": 0.000482312273284619, "loss": 1.9364, "step": 2864 }, { "epoch": 0.139892578125, "grad_norm": 0.3240882158279419, "learning_rate": 0.0004822986574697286, "loss": 1.942, "step": 2865 }, { "epoch": 0.13994140625, "grad_norm": 0.30759525299072266, "learning_rate": 0.0004822850366307887, "loss": 1.927, "step": 2866 }, { "epoch": 0.139990234375, "grad_norm": 0.3642910420894623, "learning_rate": 0.00048227141076812935, "loss": 1.8971, "step": 2867 }, { "epoch": 0.1400390625, "grad_norm": 0.3931790888309479, "learning_rate": 0.00048225777988208094, "loss": 1.9684, "step": 2868 }, { "epoch": 0.140087890625, "grad_norm": 0.3580802381038666, "learning_rate": 0.0004822441439729737, "loss": 1.9241, "step": 2869 }, { "epoch": 0.14013671875, "grad_norm": 0.3437618017196655, "learning_rate": 0.0004822305030411381, "loss": 1.9073, "step": 2870 }, { "epoch": 0.140185546875, "grad_norm": 0.3340206742286682, "learning_rate": 0.00048221685708690475, "loss": 1.979, "step": 2871 }, { "epoch": 0.140234375, "grad_norm": 0.34362515807151794, "learning_rate": 0.0004822032061106045, "loss": 1.9448, "step": 2872 }, { "epoch": 0.140283203125, "grad_norm": 0.3216696083545685, "learning_rate": 0.0004821895501125679, "loss": 1.9272, "step": 2873 }, { "epoch": 0.14033203125, "grad_norm": 0.35912665724754333, "learning_rate": 0.00048217588909312627, "loss": 1.879, "step": 2874 }, { "epoch": 0.140380859375, "grad_norm": 0.31767499446868896, "learning_rate": 0.0004821622230526104, "loss": 1.9572, "step": 2875 }, { "epoch": 0.1404296875, "grad_norm": 0.36220455169677734, "learning_rate": 0.0004821485519913518, "loss": 2.0135, "step": 2876 }, { "epoch": 0.140478515625, "grad_norm": 0.38967326283454895, "learning_rate": 0.0004821348759096815, "loss": 1.9458, "step": 2877 }, { "epoch": 0.14052734375, "grad_norm": 0.27750512957572937, "learning_rate": 0.00048212119480793103, "loss": 1.9123, "step": 2878 }, { "epoch": 0.140576171875, "grad_norm": 0.3011779487133026, "learning_rate": 0.00048210750868643207, "loss": 1.9807, "step": 2879 }, { "epoch": 0.140625, "grad_norm": 0.3381567597389221, "learning_rate": 0.00048209381754551616, "loss": 1.8898, "step": 2880 }, { "epoch": 0.140673828125, "grad_norm": 0.23832444846630096, "learning_rate": 0.0004820801213855154, "loss": 1.9578, "step": 2881 }, { "epoch": 0.14072265625, "grad_norm": 0.29568278789520264, "learning_rate": 0.00048206642020676133, "loss": 1.9236, "step": 2882 }, { "epoch": 0.140771484375, "grad_norm": 0.23466087877750397, "learning_rate": 0.0004820527140095863, "loss": 1.9347, "step": 2883 }, { "epoch": 0.1408203125, "grad_norm": 0.2508884370326996, "learning_rate": 0.00048203900279432247, "loss": 1.9193, "step": 2884 }, { "epoch": 0.140869140625, "grad_norm": 0.27473264932632446, "learning_rate": 0.00048202528656130194, "loss": 1.9728, "step": 2885 }, { "epoch": 0.14091796875, "grad_norm": 0.24541006982326508, "learning_rate": 0.0004820115653108573, "loss": 1.9536, "step": 2886 }, { "epoch": 0.140966796875, "grad_norm": 0.22865930199623108, "learning_rate": 0.0004819978390433211, "loss": 1.9331, "step": 2887 }, { "epoch": 0.141015625, "grad_norm": 0.3080562949180603, "learning_rate": 0.00048198410775902596, "loss": 1.9181, "step": 2888 }, { "epoch": 0.141064453125, "grad_norm": 0.3055649399757385, "learning_rate": 0.0004819703714583046, "loss": 1.9049, "step": 2889 }, { "epoch": 0.14111328125, "grad_norm": 0.3640112280845642, "learning_rate": 0.0004819566301414901, "loss": 1.9206, "step": 2890 }, { "epoch": 0.141162109375, "grad_norm": 0.3798571228981018, "learning_rate": 0.00048194288380891544, "loss": 1.922, "step": 2891 }, { "epoch": 0.1412109375, "grad_norm": 0.3195284903049469, "learning_rate": 0.0004819291324609137, "loss": 1.9759, "step": 2892 }, { "epoch": 0.141259765625, "grad_norm": 0.31190261244773865, "learning_rate": 0.0004819153760978181, "loss": 1.9684, "step": 2893 }, { "epoch": 0.14130859375, "grad_norm": 0.32336583733558655, "learning_rate": 0.0004819016147199622, "loss": 1.887, "step": 2894 }, { "epoch": 0.141357421875, "grad_norm": 0.2526874840259552, "learning_rate": 0.00048188784832767944, "loss": 1.9482, "step": 2895 }, { "epoch": 0.14140625, "grad_norm": 0.3288187086582184, "learning_rate": 0.00048187407692130345, "loss": 1.9585, "step": 2896 }, { "epoch": 0.141455078125, "grad_norm": 0.29208338260650635, "learning_rate": 0.00048186030050116803, "loss": 1.9569, "step": 2897 }, { "epoch": 0.14150390625, "grad_norm": 0.3009394705295563, "learning_rate": 0.000481846519067607, "loss": 1.9731, "step": 2898 }, { "epoch": 0.141552734375, "grad_norm": 0.3424535095691681, "learning_rate": 0.0004818327326209545, "loss": 1.9211, "step": 2899 }, { "epoch": 0.1416015625, "grad_norm": 0.2749929130077362, "learning_rate": 0.00048181894116154444, "loss": 1.874, "step": 2900 }, { "epoch": 0.141650390625, "grad_norm": 0.28364285826683044, "learning_rate": 0.0004818051446897112, "loss": 1.8908, "step": 2901 }, { "epoch": 0.14169921875, "grad_norm": 0.2697376608848572, "learning_rate": 0.0004817913432057892, "loss": 1.9266, "step": 2902 }, { "epoch": 0.141748046875, "grad_norm": 0.2493991106748581, "learning_rate": 0.0004817775367101128, "loss": 1.9195, "step": 2903 }, { "epoch": 0.141796875, "grad_norm": 0.24625813961029053, "learning_rate": 0.0004817637252030167, "loss": 1.9653, "step": 2904 }, { "epoch": 0.141845703125, "grad_norm": 0.24565783143043518, "learning_rate": 0.00048174990868483567, "loss": 1.9429, "step": 2905 }, { "epoch": 0.14189453125, "grad_norm": 0.2374296337366104, "learning_rate": 0.00048173608715590437, "loss": 1.9656, "step": 2906 }, { "epoch": 0.141943359375, "grad_norm": 0.32538241147994995, "learning_rate": 0.000481722260616558, "loss": 1.9625, "step": 2907 }, { "epoch": 0.1419921875, "grad_norm": 0.4220999777317047, "learning_rate": 0.0004817084290671315, "loss": 1.918, "step": 2908 }, { "epoch": 0.142041015625, "grad_norm": 0.4798889458179474, "learning_rate": 0.00048169459250796024, "loss": 1.9242, "step": 2909 }, { "epoch": 0.14208984375, "grad_norm": 0.3576270043849945, "learning_rate": 0.00048168075093937943, "loss": 1.9038, "step": 2910 }, { "epoch": 0.142138671875, "grad_norm": 0.24935242533683777, "learning_rate": 0.00048166690436172457, "loss": 1.884, "step": 2911 }, { "epoch": 0.1421875, "grad_norm": 0.2936621308326721, "learning_rate": 0.0004816530527753312, "loss": 1.9562, "step": 2912 }, { "epoch": 0.142236328125, "grad_norm": 0.3408103287220001, "learning_rate": 0.00048163919618053516, "loss": 1.9143, "step": 2913 }, { "epoch": 0.14228515625, "grad_norm": 0.3216996192932129, "learning_rate": 0.0004816253345776722, "loss": 1.924, "step": 2914 }, { "epoch": 0.142333984375, "grad_norm": 0.2880913019180298, "learning_rate": 0.0004816114679670782, "loss": 1.9309, "step": 2915 }, { "epoch": 0.1423828125, "grad_norm": 0.2711474597454071, "learning_rate": 0.0004815975963490893, "loss": 1.9005, "step": 2916 }, { "epoch": 0.142431640625, "grad_norm": 0.308948278427124, "learning_rate": 0.00048158371972404165, "loss": 1.9563, "step": 2917 }, { "epoch": 0.14248046875, "grad_norm": 0.29940176010131836, "learning_rate": 0.0004815698380922716, "loss": 1.9324, "step": 2918 }, { "epoch": 0.142529296875, "grad_norm": 0.2977103888988495, "learning_rate": 0.0004815559514541156, "loss": 1.9292, "step": 2919 }, { "epoch": 0.142578125, "grad_norm": 0.3957459032535553, "learning_rate": 0.0004815420598099101, "loss": 1.9205, "step": 2920 }, { "epoch": 0.142626953125, "grad_norm": 0.36293479800224304, "learning_rate": 0.00048152816315999187, "loss": 1.889, "step": 2921 }, { "epoch": 0.14267578125, "grad_norm": 0.3229316174983978, "learning_rate": 0.00048151426150469767, "loss": 1.9669, "step": 2922 }, { "epoch": 0.142724609375, "grad_norm": 0.39292022585868835, "learning_rate": 0.0004815003548443645, "loss": 1.9414, "step": 2923 }, { "epoch": 0.1427734375, "grad_norm": 0.31583771109580994, "learning_rate": 0.00048148644317932927, "loss": 1.8877, "step": 2924 }, { "epoch": 0.142822265625, "grad_norm": 0.31091582775115967, "learning_rate": 0.00048147252650992923, "loss": 1.9677, "step": 2925 }, { "epoch": 0.14287109375, "grad_norm": 0.32569169998168945, "learning_rate": 0.0004814586048365016, "loss": 1.949, "step": 2926 }, { "epoch": 0.142919921875, "grad_norm": 0.3150377869606018, "learning_rate": 0.0004814446781593839, "loss": 1.9211, "step": 2927 }, { "epoch": 0.14296875, "grad_norm": 0.345165491104126, "learning_rate": 0.00048143074647891344, "loss": 1.9441, "step": 2928 }, { "epoch": 0.143017578125, "grad_norm": 0.296032190322876, "learning_rate": 0.00048141680979542806, "loss": 1.9461, "step": 2929 }, { "epoch": 0.14306640625, "grad_norm": 0.3216593861579895, "learning_rate": 0.00048140286810926543, "loss": 1.9385, "step": 2930 }, { "epoch": 0.143115234375, "grad_norm": 0.32722675800323486, "learning_rate": 0.00048138892142076355, "loss": 1.8977, "step": 2931 }, { "epoch": 0.1431640625, "grad_norm": 0.29203590750694275, "learning_rate": 0.0004813749697302603, "loss": 1.9069, "step": 2932 }, { "epoch": 0.143212890625, "grad_norm": 0.32773441076278687, "learning_rate": 0.0004813610130380938, "loss": 1.9359, "step": 2933 }, { "epoch": 0.14326171875, "grad_norm": 0.2839939296245575, "learning_rate": 0.0004813470513446025, "loss": 1.8803, "step": 2934 }, { "epoch": 0.143310546875, "grad_norm": 0.30237799882888794, "learning_rate": 0.00048133308465012455, "loss": 1.9201, "step": 2935 }, { "epoch": 0.143359375, "grad_norm": 0.25250309705734253, "learning_rate": 0.00048131911295499845, "loss": 1.9197, "step": 2936 }, { "epoch": 0.143408203125, "grad_norm": 0.3477902412414551, "learning_rate": 0.00048130513625956295, "loss": 1.9201, "step": 2937 }, { "epoch": 0.14345703125, "grad_norm": 0.3655267059803009, "learning_rate": 0.00048129115456415684, "loss": 1.9378, "step": 2938 }, { "epoch": 0.143505859375, "grad_norm": 0.37384936213493347, "learning_rate": 0.00048127716786911873, "loss": 1.9695, "step": 2939 }, { "epoch": 0.1435546875, "grad_norm": 0.2912966310977936, "learning_rate": 0.00048126317617478776, "loss": 1.905, "step": 2940 }, { "epoch": 0.143603515625, "grad_norm": 0.27250543236732483, "learning_rate": 0.00048124917948150306, "loss": 1.9331, "step": 2941 }, { "epoch": 0.14365234375, "grad_norm": 0.39197030663490295, "learning_rate": 0.00048123517778960373, "loss": 1.9439, "step": 2942 }, { "epoch": 0.143701171875, "grad_norm": 0.35078194737434387, "learning_rate": 0.0004812211710994292, "loss": 1.9332, "step": 2943 }, { "epoch": 0.14375, "grad_norm": 0.3755006492137909, "learning_rate": 0.00048120715941131893, "loss": 1.9099, "step": 2944 }, { "epoch": 0.143798828125, "grad_norm": 0.343252956867218, "learning_rate": 0.0004811931427256125, "loss": 1.9108, "step": 2945 }, { "epoch": 0.14384765625, "grad_norm": 0.33758771419525146, "learning_rate": 0.00048117912104264957, "loss": 1.9275, "step": 2946 }, { "epoch": 0.143896484375, "grad_norm": 0.3968861401081085, "learning_rate": 0.00048116509436277, "loss": 1.9846, "step": 2947 }, { "epoch": 0.1439453125, "grad_norm": 0.30931127071380615, "learning_rate": 0.00048115106268631374, "loss": 1.9165, "step": 2948 }, { "epoch": 0.143994140625, "grad_norm": 0.3484847843647003, "learning_rate": 0.0004811370260136209, "loss": 1.9767, "step": 2949 }, { "epoch": 0.14404296875, "grad_norm": 0.2705479562282562, "learning_rate": 0.00048112298434503154, "loss": 1.9253, "step": 2950 }, { "epoch": 0.144091796875, "grad_norm": 0.3282467722892761, "learning_rate": 0.0004811089376808862, "loss": 1.9337, "step": 2951 }, { "epoch": 0.144140625, "grad_norm": 0.2769882380962372, "learning_rate": 0.00048109488602152503, "loss": 1.9223, "step": 2952 }, { "epoch": 0.144189453125, "grad_norm": 0.2776065170764923, "learning_rate": 0.0004810808293672887, "loss": 1.9121, "step": 2953 }, { "epoch": 0.14423828125, "grad_norm": 0.2979893386363983, "learning_rate": 0.00048106676771851804, "loss": 1.9426, "step": 2954 }, { "epoch": 0.144287109375, "grad_norm": 0.3236512243747711, "learning_rate": 0.0004810527010755536, "loss": 1.9662, "step": 2955 }, { "epoch": 0.1443359375, "grad_norm": 0.8546280264854431, "learning_rate": 0.00048103862943873647, "loss": 1.9541, "step": 2956 }, { "epoch": 0.144384765625, "grad_norm": 0.26982754468917847, "learning_rate": 0.0004810245528084076, "loss": 1.9253, "step": 2957 }, { "epoch": 0.14443359375, "grad_norm": 0.23970730602741241, "learning_rate": 0.0004810104711849082, "loss": 1.9382, "step": 2958 }, { "epoch": 0.144482421875, "grad_norm": 0.33469581604003906, "learning_rate": 0.0004809963845685795, "loss": 1.8935, "step": 2959 }, { "epoch": 0.14453125, "grad_norm": 0.3131215274333954, "learning_rate": 0.0004809822929597629, "loss": 1.9627, "step": 2960 }, { "epoch": 0.144580078125, "grad_norm": 0.2520798444747925, "learning_rate": 0.0004809681963588, "loss": 1.9938, "step": 2961 }, { "epoch": 0.14462890625, "grad_norm": 0.34048154950141907, "learning_rate": 0.00048095409476603233, "loss": 1.9006, "step": 2962 }, { "epoch": 0.144677734375, "grad_norm": 0.3564125895500183, "learning_rate": 0.0004809399881818018, "loss": 1.9411, "step": 2963 }, { "epoch": 0.1447265625, "grad_norm": 0.3254007399082184, "learning_rate": 0.0004809258766064501, "loss": 1.9372, "step": 2964 }, { "epoch": 0.144775390625, "grad_norm": 0.3652283847332001, "learning_rate": 0.00048091176004031946, "loss": 1.9201, "step": 2965 }, { "epoch": 0.14482421875, "grad_norm": 0.32599228620529175, "learning_rate": 0.0004808976384837518, "loss": 1.9667, "step": 2966 }, { "epoch": 0.144873046875, "grad_norm": 0.29086804389953613, "learning_rate": 0.00048088351193708944, "loss": 1.8956, "step": 2967 }, { "epoch": 0.144921875, "grad_norm": 0.35727357864379883, "learning_rate": 0.00048086938040067486, "loss": 1.9046, "step": 2968 }, { "epoch": 0.144970703125, "grad_norm": 0.32107946276664734, "learning_rate": 0.00048085524387485036, "loss": 1.9616, "step": 2969 }, { "epoch": 0.14501953125, "grad_norm": 0.2779982388019562, "learning_rate": 0.00048084110235995864, "loss": 1.9198, "step": 2970 }, { "epoch": 0.145068359375, "grad_norm": 0.24640674889087677, "learning_rate": 0.00048082695585634244, "loss": 1.9766, "step": 2971 }, { "epoch": 0.1451171875, "grad_norm": 0.2597413957118988, "learning_rate": 0.00048081280436434467, "loss": 1.9267, "step": 2972 }, { "epoch": 0.145166015625, "grad_norm": 0.2966836392879486, "learning_rate": 0.0004807986478843082, "loss": 1.9523, "step": 2973 }, { "epoch": 0.14521484375, "grad_norm": 0.23217947781085968, "learning_rate": 0.0004807844864165761, "loss": 1.9299, "step": 2974 }, { "epoch": 0.145263671875, "grad_norm": 0.35253316164016724, "learning_rate": 0.00048077031996149176, "loss": 1.962, "step": 2975 }, { "epoch": 0.1453125, "grad_norm": 0.34472718834877014, "learning_rate": 0.0004807561485193983, "loss": 1.9285, "step": 2976 }, { "epoch": 0.145361328125, "grad_norm": 0.2611691951751709, "learning_rate": 0.00048074197209063935, "loss": 1.9174, "step": 2977 }, { "epoch": 0.14541015625, "grad_norm": 0.2895244061946869, "learning_rate": 0.00048072779067555847, "loss": 1.9963, "step": 2978 }, { "epoch": 0.145458984375, "grad_norm": 0.2926000952720642, "learning_rate": 0.00048071360427449916, "loss": 1.939, "step": 2979 }, { "epoch": 0.1455078125, "grad_norm": 0.5745974183082581, "learning_rate": 0.00048069941288780545, "loss": 1.9195, "step": 2980 }, { "epoch": 0.145556640625, "grad_norm": 0.29065245389938354, "learning_rate": 0.00048068521651582123, "loss": 1.9439, "step": 2981 }, { "epoch": 0.14560546875, "grad_norm": 0.2687985897064209, "learning_rate": 0.0004806710151588905, "loss": 1.9162, "step": 2982 }, { "epoch": 0.145654296875, "grad_norm": 0.25918659567832947, "learning_rate": 0.00048065680881735753, "loss": 1.9396, "step": 2983 }, { "epoch": 0.145703125, "grad_norm": 0.27516400814056396, "learning_rate": 0.0004806425974915665, "loss": 1.9465, "step": 2984 }, { "epoch": 0.145751953125, "grad_norm": 0.28932276368141174, "learning_rate": 0.000480628381181862, "loss": 1.9301, "step": 2985 }, { "epoch": 0.14580078125, "grad_norm": 0.28692102432250977, "learning_rate": 0.0004806141598885884, "loss": 1.9362, "step": 2986 }, { "epoch": 0.145849609375, "grad_norm": 0.3224214017391205, "learning_rate": 0.00048059993361209053, "loss": 1.8971, "step": 2987 }, { "epoch": 0.1458984375, "grad_norm": 0.7621709704399109, "learning_rate": 0.000480585702352713, "loss": 1.9642, "step": 2988 }, { "epoch": 0.145947265625, "grad_norm": 1.2820180654525757, "learning_rate": 0.0004805714661108009, "loss": 1.9541, "step": 2989 }, { "epoch": 0.14599609375, "grad_norm": 0.4280798137187958, "learning_rate": 0.0004805572248866991, "loss": 1.9778, "step": 2990 }, { "epoch": 0.146044921875, "grad_norm": 0.44649365544319153, "learning_rate": 0.0004805429786807528, "loss": 1.95, "step": 2991 }, { "epoch": 0.14609375, "grad_norm": 0.4498361349105835, "learning_rate": 0.0004805287274933073, "loss": 1.8741, "step": 2992 }, { "epoch": 0.146142578125, "grad_norm": 0.4092569053173065, "learning_rate": 0.00048051447132470797, "loss": 1.9181, "step": 2993 }, { "epoch": 0.14619140625, "grad_norm": 0.4055674076080322, "learning_rate": 0.00048050021017530037, "loss": 1.9191, "step": 2994 }, { "epoch": 0.146240234375, "grad_norm": 0.43335071206092834, "learning_rate": 0.00048048594404543005, "loss": 1.9178, "step": 2995 }, { "epoch": 0.1462890625, "grad_norm": 0.3089121878147125, "learning_rate": 0.0004804716729354428, "loss": 1.9318, "step": 2996 }, { "epoch": 0.146337890625, "grad_norm": 0.32836899161338806, "learning_rate": 0.0004804573968456844, "loss": 1.8971, "step": 2997 }, { "epoch": 0.14638671875, "grad_norm": 0.31778427958488464, "learning_rate": 0.000480443115776501, "loss": 1.9048, "step": 2998 }, { "epoch": 0.146435546875, "grad_norm": 0.32507455348968506, "learning_rate": 0.0004804288297282386, "loss": 1.9564, "step": 2999 }, { "epoch": 0.146484375, "grad_norm": 0.3002340793609619, "learning_rate": 0.0004804145387012435, "loss": 1.915, "step": 3000 }, { "epoch": 0.146533203125, "grad_norm": 0.29859867691993713, "learning_rate": 0.000480400242695862, "loss": 1.9786, "step": 3001 }, { "epoch": 0.14658203125, "grad_norm": 0.3044776916503906, "learning_rate": 0.00048038594171244067, "loss": 1.8952, "step": 3002 }, { "epoch": 0.146630859375, "grad_norm": 0.27846816182136536, "learning_rate": 0.000480371635751326, "loss": 1.9168, "step": 3003 }, { "epoch": 0.1466796875, "grad_norm": 0.32414814829826355, "learning_rate": 0.0004803573248128647, "loss": 1.9194, "step": 3004 }, { "epoch": 0.146728515625, "grad_norm": 0.32533538341522217, "learning_rate": 0.0004803430088974037, "loss": 1.9687, "step": 3005 }, { "epoch": 0.14677734375, "grad_norm": 0.3174189627170563, "learning_rate": 0.0004803286880052899, "loss": 1.9117, "step": 3006 }, { "epoch": 0.146826171875, "grad_norm": 0.28939908742904663, "learning_rate": 0.0004803143621368704, "loss": 1.9582, "step": 3007 }, { "epoch": 0.146875, "grad_norm": 0.3881751596927643, "learning_rate": 0.0004803000312924923, "loss": 1.9358, "step": 3008 }, { "epoch": 0.146923828125, "grad_norm": 0.3564598560333252, "learning_rate": 0.0004802856954725031, "loss": 1.8879, "step": 3009 }, { "epoch": 0.14697265625, "grad_norm": 0.28254085779190063, "learning_rate": 0.00048027135467725013, "loss": 1.9225, "step": 3010 }, { "epoch": 0.147021484375, "grad_norm": 0.2959519624710083, "learning_rate": 0.00048025700890708096, "loss": 1.936, "step": 3011 }, { "epoch": 0.1470703125, "grad_norm": 0.31116557121276855, "learning_rate": 0.00048024265816234323, "loss": 1.9609, "step": 3012 }, { "epoch": 0.147119140625, "grad_norm": 0.26212793588638306, "learning_rate": 0.0004802283024433848, "loss": 1.8979, "step": 3013 }, { "epoch": 0.14716796875, "grad_norm": 0.26191285252571106, "learning_rate": 0.00048021394175055363, "loss": 1.9435, "step": 3014 }, { "epoch": 0.147216796875, "grad_norm": 0.2820715308189392, "learning_rate": 0.0004801995760841977, "loss": 1.8966, "step": 3015 }, { "epoch": 0.147265625, "grad_norm": 0.29983586072921753, "learning_rate": 0.00048018520544466513, "loss": 1.9062, "step": 3016 }, { "epoch": 0.147314453125, "grad_norm": 0.2538764476776123, "learning_rate": 0.00048017082983230436, "loss": 1.9577, "step": 3017 }, { "epoch": 0.14736328125, "grad_norm": 0.2638642191886902, "learning_rate": 0.00048015644924746364, "loss": 1.9685, "step": 3018 }, { "epoch": 0.147412109375, "grad_norm": 0.2928617000579834, "learning_rate": 0.00048014206369049155, "loss": 1.9172, "step": 3019 }, { "epoch": 0.1474609375, "grad_norm": 0.3482005000114441, "learning_rate": 0.00048012767316173675, "loss": 1.9483, "step": 3020 }, { "epoch": 0.147509765625, "grad_norm": 0.3177429735660553, "learning_rate": 0.000480113277661548, "loss": 1.924, "step": 3021 }, { "epoch": 0.14755859375, "grad_norm": 0.24683938920497894, "learning_rate": 0.0004800988771902742, "loss": 1.9431, "step": 3022 }, { "epoch": 0.147607421875, "grad_norm": 0.24600467085838318, "learning_rate": 0.0004800844717482643, "loss": 1.9287, "step": 3023 }, { "epoch": 0.14765625, "grad_norm": 0.29480212926864624, "learning_rate": 0.00048007006133586746, "loss": 1.953, "step": 3024 }, { "epoch": 0.147705078125, "grad_norm": 0.30422794818878174, "learning_rate": 0.000480055645953433, "loss": 1.8975, "step": 3025 }, { "epoch": 0.14775390625, "grad_norm": 0.3113625943660736, "learning_rate": 0.00048004122560131016, "loss": 1.8998, "step": 3026 }, { "epoch": 0.147802734375, "grad_norm": 0.31788623332977295, "learning_rate": 0.00048002680027984845, "loss": 1.9685, "step": 3027 }, { "epoch": 0.1478515625, "grad_norm": 0.2420133501291275, "learning_rate": 0.00048001236998939756, "loss": 1.9331, "step": 3028 }, { "epoch": 0.147900390625, "grad_norm": 0.2975316047668457, "learning_rate": 0.0004799979347303072, "loss": 1.9056, "step": 3029 }, { "epoch": 0.14794921875, "grad_norm": 0.2651990056037903, "learning_rate": 0.0004799834945029272, "loss": 1.9048, "step": 3030 }, { "epoch": 0.147998046875, "grad_norm": 0.25060373544692993, "learning_rate": 0.0004799690493076075, "loss": 1.9315, "step": 3031 }, { "epoch": 0.148046875, "grad_norm": 0.316651314496994, "learning_rate": 0.00047995459914469826, "loss": 1.922, "step": 3032 }, { "epoch": 0.148095703125, "grad_norm": 0.2978883385658264, "learning_rate": 0.00047994014401454957, "loss": 1.9566, "step": 3033 }, { "epoch": 0.14814453125, "grad_norm": 0.2578395903110504, "learning_rate": 0.0004799256839175119, "loss": 1.9467, "step": 3034 }, { "epoch": 0.148193359375, "grad_norm": 0.261135071516037, "learning_rate": 0.00047991121885393565, "loss": 1.906, "step": 3035 }, { "epoch": 0.1482421875, "grad_norm": 0.33123597502708435, "learning_rate": 0.0004798967488241714, "loss": 1.9399, "step": 3036 }, { "epoch": 0.148291015625, "grad_norm": 0.34381619095802307, "learning_rate": 0.00047988227382856973, "loss": 1.9695, "step": 3037 }, { "epoch": 0.14833984375, "grad_norm": 0.295574426651001, "learning_rate": 0.00047986779386748166, "loss": 1.9126, "step": 3038 }, { "epoch": 0.148388671875, "grad_norm": 0.3509497344493866, "learning_rate": 0.00047985330894125797, "loss": 1.9427, "step": 3039 }, { "epoch": 0.1484375, "grad_norm": 0.41036149859428406, "learning_rate": 0.0004798388190502497, "loss": 1.9247, "step": 3040 }, { "epoch": 0.148486328125, "grad_norm": 0.38034340739250183, "learning_rate": 0.0004798243241948082, "loss": 1.9528, "step": 3041 }, { "epoch": 0.14853515625, "grad_norm": 0.3684881925582886, "learning_rate": 0.0004798098243752846, "loss": 1.9975, "step": 3042 }, { "epoch": 0.148583984375, "grad_norm": 0.32781702280044556, "learning_rate": 0.0004797953195920304, "loss": 1.932, "step": 3043 }, { "epoch": 0.1486328125, "grad_norm": 0.34025853872299194, "learning_rate": 0.0004797808098453971, "loss": 1.9437, "step": 3044 }, { "epoch": 0.148681640625, "grad_norm": 0.25677382946014404, "learning_rate": 0.0004797662951357363, "loss": 1.8863, "step": 3045 }, { "epoch": 0.14873046875, "grad_norm": 0.27727431058883667, "learning_rate": 0.0004797517754633998, "loss": 1.9256, "step": 3046 }, { "epoch": 0.148779296875, "grad_norm": 0.26105690002441406, "learning_rate": 0.0004797372508287396, "loss": 1.9128, "step": 3047 }, { "epoch": 0.148828125, "grad_norm": 0.2424224466085434, "learning_rate": 0.0004797227212321076, "loss": 1.9026, "step": 3048 }, { "epoch": 0.148876953125, "grad_norm": 0.27309295535087585, "learning_rate": 0.00047970818667385603, "loss": 1.974, "step": 3049 }, { "epoch": 0.14892578125, "grad_norm": 0.37323471903800964, "learning_rate": 0.00047969364715433707, "loss": 1.914, "step": 3050 }, { "epoch": 0.148974609375, "grad_norm": 0.2529546022415161, "learning_rate": 0.00047967910267390307, "loss": 1.8943, "step": 3051 }, { "epoch": 0.1490234375, "grad_norm": 0.3143264353275299, "learning_rate": 0.0004796645532329066, "loss": 1.9289, "step": 3052 }, { "epoch": 0.149072265625, "grad_norm": 0.2990831732749939, "learning_rate": 0.00047964999883170036, "loss": 1.9044, "step": 3053 }, { "epoch": 0.14912109375, "grad_norm": 0.2463589906692505, "learning_rate": 0.00047963543947063687, "loss": 1.9437, "step": 3054 }, { "epoch": 0.149169921875, "grad_norm": 0.35175997018814087, "learning_rate": 0.0004796208751500691, "loss": 1.9579, "step": 3055 }, { "epoch": 0.14921875, "grad_norm": 0.3267260193824768, "learning_rate": 0.0004796063058703501, "loss": 1.9288, "step": 3056 }, { "epoch": 0.149267578125, "grad_norm": 0.3850342333316803, "learning_rate": 0.00047959173163183283, "loss": 1.9066, "step": 3057 }, { "epoch": 0.14931640625, "grad_norm": 0.3615163564682007, "learning_rate": 0.0004795771524348707, "loss": 1.8701, "step": 3058 }, { "epoch": 0.149365234375, "grad_norm": 0.24297358095645905, "learning_rate": 0.00047956256827981676, "loss": 1.8904, "step": 3059 }, { "epoch": 0.1494140625, "grad_norm": 0.43331217765808105, "learning_rate": 0.0004795479791670247, "loss": 1.883, "step": 3060 }, { "epoch": 0.149462890625, "grad_norm": 0.3385840952396393, "learning_rate": 0.000479533385096848, "loss": 1.944, "step": 3061 }, { "epoch": 0.14951171875, "grad_norm": 0.27151721715927124, "learning_rate": 0.0004795187860696405, "loss": 1.9501, "step": 3062 }, { "epoch": 0.149560546875, "grad_norm": 0.35526931285858154, "learning_rate": 0.0004795041820857559, "loss": 1.9295, "step": 3063 }, { "epoch": 0.149609375, "grad_norm": 0.32127073407173157, "learning_rate": 0.00047948957314554805, "loss": 1.9365, "step": 3064 }, { "epoch": 0.149658203125, "grad_norm": 0.3050878643989563, "learning_rate": 0.0004794749592493712, "loss": 1.9745, "step": 3065 }, { "epoch": 0.14970703125, "grad_norm": 0.31620049476623535, "learning_rate": 0.00047946034039757934, "loss": 1.9444, "step": 3066 }, { "epoch": 0.149755859375, "grad_norm": 0.3607986867427826, "learning_rate": 0.0004794457165905269, "loss": 1.9471, "step": 3067 }, { "epoch": 0.1498046875, "grad_norm": 0.3456646502017975, "learning_rate": 0.0004794310878285683, "loss": 1.9505, "step": 3068 }, { "epoch": 0.149853515625, "grad_norm": 0.2644546329975128, "learning_rate": 0.00047941645411205805, "loss": 1.9357, "step": 3069 }, { "epoch": 0.14990234375, "grad_norm": 0.28019633889198303, "learning_rate": 0.0004794018154413508, "loss": 1.9801, "step": 3070 }, { "epoch": 0.149951171875, "grad_norm": 0.24633881449699402, "learning_rate": 0.00047938717181680136, "loss": 1.9708, "step": 3071 }, { "epoch": 0.15, "grad_norm": 0.31443607807159424, "learning_rate": 0.0004793725232387646, "loss": 1.9479, "step": 3072 }, { "epoch": 0.150048828125, "grad_norm": 0.2708803415298462, "learning_rate": 0.00047935786970759546, "loss": 1.9416, "step": 3073 }, { "epoch": 0.15009765625, "grad_norm": 0.25892534852027893, "learning_rate": 0.00047934321122364927, "loss": 1.9545, "step": 3074 }, { "epoch": 0.150146484375, "grad_norm": 0.3045962154865265, "learning_rate": 0.00047932854778728114, "loss": 1.9531, "step": 3075 }, { "epoch": 0.1501953125, "grad_norm": 0.28753823041915894, "learning_rate": 0.00047931387939884653, "loss": 1.9296, "step": 3076 }, { "epoch": 0.150244140625, "grad_norm": 0.24837715923786163, "learning_rate": 0.00047929920605870084, "loss": 1.9088, "step": 3077 }, { "epoch": 0.15029296875, "grad_norm": 0.30151379108428955, "learning_rate": 0.0004792845277671999, "loss": 1.9369, "step": 3078 }, { "epoch": 0.150341796875, "grad_norm": 0.26017460227012634, "learning_rate": 0.0004792698445246992, "loss": 1.9052, "step": 3079 }, { "epoch": 0.150390625, "grad_norm": 0.21787194907665253, "learning_rate": 0.0004792551563315547, "loss": 1.8966, "step": 3080 }, { "epoch": 0.150439453125, "grad_norm": 0.2580764889717102, "learning_rate": 0.0004792404631881225, "loss": 1.9124, "step": 3081 }, { "epoch": 0.15048828125, "grad_norm": 0.31286340951919556, "learning_rate": 0.00047922576509475844, "loss": 1.9351, "step": 3082 }, { "epoch": 0.150537109375, "grad_norm": 0.29140716791152954, "learning_rate": 0.000479211062051819, "loss": 1.957, "step": 3083 }, { "epoch": 0.1505859375, "grad_norm": 0.28852343559265137, "learning_rate": 0.0004791963540596603, "loss": 1.9433, "step": 3084 }, { "epoch": 0.150634765625, "grad_norm": 0.29732465744018555, "learning_rate": 0.00047918164111863904, "loss": 1.9353, "step": 3085 }, { "epoch": 0.15068359375, "grad_norm": 0.3006944954395294, "learning_rate": 0.0004791669232291116, "loss": 1.9327, "step": 3086 }, { "epoch": 0.150732421875, "grad_norm": 0.2685914933681488, "learning_rate": 0.0004791522003914348, "loss": 1.9381, "step": 3087 }, { "epoch": 0.15078125, "grad_norm": 0.3036808669567108, "learning_rate": 0.00047913747260596535, "loss": 1.9486, "step": 3088 }, { "epoch": 0.150830078125, "grad_norm": 0.3103950619697571, "learning_rate": 0.0004791227398730603, "loss": 1.9674, "step": 3089 }, { "epoch": 0.15087890625, "grad_norm": 0.25475943088531494, "learning_rate": 0.00047910800219307657, "loss": 1.8953, "step": 3090 }, { "epoch": 0.150927734375, "grad_norm": 0.2880955636501312, "learning_rate": 0.00047909325956637153, "loss": 1.9403, "step": 3091 }, { "epoch": 0.1509765625, "grad_norm": 0.33792468905448914, "learning_rate": 0.00047907851199330227, "loss": 1.9239, "step": 3092 }, { "epoch": 0.151025390625, "grad_norm": 0.3953322172164917, "learning_rate": 0.00047906375947422635, "loss": 1.936, "step": 3093 }, { "epoch": 0.15107421875, "grad_norm": 0.32560914754867554, "learning_rate": 0.00047904900200950134, "loss": 1.9081, "step": 3094 }, { "epoch": 0.151123046875, "grad_norm": 0.3052483797073364, "learning_rate": 0.00047903423959948476, "loss": 1.9552, "step": 3095 }, { "epoch": 0.151171875, "grad_norm": 0.41132664680480957, "learning_rate": 0.0004790194722445345, "loss": 1.9126, "step": 3096 }, { "epoch": 0.151220703125, "grad_norm": 0.42090731859207153, "learning_rate": 0.0004790046999450085, "loss": 1.9636, "step": 3097 }, { "epoch": 0.15126953125, "grad_norm": 0.4517715871334076, "learning_rate": 0.0004789899227012646, "loss": 1.9188, "step": 3098 }, { "epoch": 0.151318359375, "grad_norm": 0.4535156786441803, "learning_rate": 0.00047897514051366106, "loss": 1.9299, "step": 3099 }, { "epoch": 0.1513671875, "grad_norm": 0.3082243502140045, "learning_rate": 0.0004789603533825562, "loss": 1.913, "step": 3100 }, { "epoch": 0.151416015625, "grad_norm": 0.36730870604515076, "learning_rate": 0.0004789455613083082, "loss": 1.9429, "step": 3101 }, { "epoch": 0.15146484375, "grad_norm": 0.3528287410736084, "learning_rate": 0.00047893076429127575, "loss": 1.9209, "step": 3102 }, { "epoch": 0.151513671875, "grad_norm": 0.25617554783821106, "learning_rate": 0.00047891596233181727, "loss": 1.9225, "step": 3103 }, { "epoch": 0.1515625, "grad_norm": 0.2578149437904358, "learning_rate": 0.00047890115543029174, "loss": 1.9555, "step": 3104 }, { "epoch": 0.151611328125, "grad_norm": 0.28063416481018066, "learning_rate": 0.00047888634358705793, "loss": 1.9435, "step": 3105 }, { "epoch": 0.15166015625, "grad_norm": 0.2557755708694458, "learning_rate": 0.00047887152680247465, "loss": 1.87, "step": 3106 }, { "epoch": 0.151708984375, "grad_norm": 0.22587540745735168, "learning_rate": 0.0004788567050769012, "loss": 1.8903, "step": 3107 }, { "epoch": 0.1517578125, "grad_norm": 0.3103911876678467, "learning_rate": 0.0004788418784106967, "loss": 1.909, "step": 3108 }, { "epoch": 0.151806640625, "grad_norm": 0.31738969683647156, "learning_rate": 0.0004788270468042205, "loss": 1.9325, "step": 3109 }, { "epoch": 0.15185546875, "grad_norm": 0.2791460156440735, "learning_rate": 0.0004788122102578321, "loss": 1.9866, "step": 3110 }, { "epoch": 0.151904296875, "grad_norm": 0.31924372911453247, "learning_rate": 0.00047879736877189105, "loss": 1.8942, "step": 3111 }, { "epoch": 0.151953125, "grad_norm": 0.2954700291156769, "learning_rate": 0.0004787825223467571, "loss": 1.9124, "step": 3112 }, { "epoch": 0.152001953125, "grad_norm": 0.3418107032775879, "learning_rate": 0.0004787676709827899, "loss": 1.9012, "step": 3113 }, { "epoch": 0.15205078125, "grad_norm": 0.3473219573497772, "learning_rate": 0.0004787528146803495, "loss": 1.9157, "step": 3114 }, { "epoch": 0.152099609375, "grad_norm": 0.32213452458381653, "learning_rate": 0.000478737953439796, "loss": 1.8961, "step": 3115 }, { "epoch": 0.1521484375, "grad_norm": 0.3642258644104004, "learning_rate": 0.00047872308726148945, "loss": 1.9085, "step": 3116 }, { "epoch": 0.152197265625, "grad_norm": 0.2488832324743271, "learning_rate": 0.00047870821614579023, "loss": 1.8917, "step": 3117 }, { "epoch": 0.15224609375, "grad_norm": 0.3139618933200836, "learning_rate": 0.00047869334009305873, "loss": 1.9388, "step": 3118 }, { "epoch": 0.152294921875, "grad_norm": 0.30920401215553284, "learning_rate": 0.0004786784591036555, "loss": 1.935, "step": 3119 }, { "epoch": 0.15234375, "grad_norm": 0.24794569611549377, "learning_rate": 0.00047866357317794125, "loss": 1.934, "step": 3120 }, { "epoch": 0.152392578125, "grad_norm": 0.29621419310569763, "learning_rate": 0.00047864868231627664, "loss": 1.9685, "step": 3121 }, { "epoch": 0.15244140625, "grad_norm": 0.35362890362739563, "learning_rate": 0.0004786337865190225, "loss": 1.9138, "step": 3122 }, { "epoch": 0.152490234375, "grad_norm": 0.4301058053970337, "learning_rate": 0.0004786188857865401, "loss": 1.9273, "step": 3123 }, { "epoch": 0.1525390625, "grad_norm": 0.3325541913509369, "learning_rate": 0.00047860398011919026, "loss": 1.9404, "step": 3124 }, { "epoch": 0.152587890625, "grad_norm": 0.28691384196281433, "learning_rate": 0.0004785890695173345, "loss": 1.936, "step": 3125 }, { "epoch": 0.15263671875, "grad_norm": 0.28398397564888, "learning_rate": 0.00047857415398133403, "loss": 1.9324, "step": 3126 }, { "epoch": 0.152685546875, "grad_norm": 0.3090391755104065, "learning_rate": 0.00047855923351155047, "loss": 1.9242, "step": 3127 }, { "epoch": 0.152734375, "grad_norm": 0.27080196142196655, "learning_rate": 0.0004785443081083452, "loss": 1.8889, "step": 3128 }, { "epoch": 0.152783203125, "grad_norm": 0.3165290653705597, "learning_rate": 0.0004785293777720802, "loss": 1.9752, "step": 3129 }, { "epoch": 0.15283203125, "grad_norm": 0.3480871617794037, "learning_rate": 0.0004785144425031172, "loss": 1.9736, "step": 3130 }, { "epoch": 0.152880859375, "grad_norm": 0.2761017680168152, "learning_rate": 0.00047849950230181815, "loss": 1.9244, "step": 3131 }, { "epoch": 0.1529296875, "grad_norm": 0.29239365458488464, "learning_rate": 0.00047848455716854524, "loss": 1.9582, "step": 3132 }, { "epoch": 0.152978515625, "grad_norm": 0.2786470651626587, "learning_rate": 0.0004784696071036606, "loss": 1.9499, "step": 3133 }, { "epoch": 0.15302734375, "grad_norm": 0.249526709318161, "learning_rate": 0.0004784546521075265, "loss": 1.9144, "step": 3134 }, { "epoch": 0.153076171875, "grad_norm": 0.26531076431274414, "learning_rate": 0.0004784396921805055, "loss": 1.8838, "step": 3135 }, { "epoch": 0.153125, "grad_norm": 0.24837002158164978, "learning_rate": 0.00047842472732296013, "loss": 1.9563, "step": 3136 }, { "epoch": 0.153173828125, "grad_norm": 0.23329861462116241, "learning_rate": 0.00047840975753525307, "loss": 1.9345, "step": 3137 }, { "epoch": 0.15322265625, "grad_norm": 0.2755569815635681, "learning_rate": 0.0004783947828177471, "loss": 1.955, "step": 3138 }, { "epoch": 0.153271484375, "grad_norm": 0.22522315382957458, "learning_rate": 0.0004783798031708051, "loss": 1.9071, "step": 3139 }, { "epoch": 0.1533203125, "grad_norm": 0.28022444248199463, "learning_rate": 0.00047836481859479026, "loss": 1.9087, "step": 3140 }, { "epoch": 0.153369140625, "grad_norm": 0.2784350514411926, "learning_rate": 0.00047834982909006563, "loss": 1.9596, "step": 3141 }, { "epoch": 0.15341796875, "grad_norm": 0.2485807240009308, "learning_rate": 0.0004783348346569945, "loss": 1.9194, "step": 3142 }, { "epoch": 0.153466796875, "grad_norm": 0.29896464943885803, "learning_rate": 0.0004783198352959403, "loss": 1.9011, "step": 3143 }, { "epoch": 0.153515625, "grad_norm": 0.2632581293582916, "learning_rate": 0.00047830483100726656, "loss": 1.9218, "step": 3144 }, { "epoch": 0.153564453125, "grad_norm": 0.24623771011829376, "learning_rate": 0.000478289821791337, "loss": 1.9087, "step": 3145 }, { "epoch": 0.15361328125, "grad_norm": 0.27826979756355286, "learning_rate": 0.0004782748076485151, "loss": 1.9901, "step": 3146 }, { "epoch": 0.153662109375, "grad_norm": 0.2935371696949005, "learning_rate": 0.0004782597885791651, "loss": 1.8984, "step": 3147 }, { "epoch": 0.1537109375, "grad_norm": 0.35573258996009827, "learning_rate": 0.00047824476458365074, "loss": 1.9382, "step": 3148 }, { "epoch": 0.153759765625, "grad_norm": 0.30058637261390686, "learning_rate": 0.0004782297356623362, "loss": 1.9268, "step": 3149 }, { "epoch": 0.15380859375, "grad_norm": 0.3107689619064331, "learning_rate": 0.0004782147018155858, "loss": 1.9236, "step": 3150 }, { "epoch": 0.153857421875, "grad_norm": 0.25121745467185974, "learning_rate": 0.0004781996630437639, "loss": 1.9084, "step": 3151 }, { "epoch": 0.15390625, "grad_norm": 0.29298001527786255, "learning_rate": 0.0004781846193472348, "loss": 1.9461, "step": 3152 }, { "epoch": 0.153955078125, "grad_norm": 0.34264591336250305, "learning_rate": 0.0004781695707263632, "loss": 1.9129, "step": 3153 }, { "epoch": 0.15400390625, "grad_norm": 0.35323676466941833, "learning_rate": 0.00047815451718151387, "loss": 1.9135, "step": 3154 }, { "epoch": 0.154052734375, "grad_norm": 0.4417264759540558, "learning_rate": 0.00047813945871305163, "loss": 1.9185, "step": 3155 }, { "epoch": 0.1541015625, "grad_norm": 0.39362871646881104, "learning_rate": 0.00047812439532134144, "loss": 1.9072, "step": 3156 }, { "epoch": 0.154150390625, "grad_norm": 0.28620702028274536, "learning_rate": 0.00047810932700674823, "loss": 1.9118, "step": 3157 }, { "epoch": 0.15419921875, "grad_norm": 0.2684750258922577, "learning_rate": 0.0004780942537696374, "loss": 1.9376, "step": 3158 }, { "epoch": 0.154248046875, "grad_norm": 0.2509436011314392, "learning_rate": 0.00047807917561037413, "loss": 1.9399, "step": 3159 }, { "epoch": 0.154296875, "grad_norm": 0.28967711329460144, "learning_rate": 0.0004780640925293239, "loss": 1.941, "step": 3160 }, { "epoch": 0.154345703125, "grad_norm": 0.26771867275238037, "learning_rate": 0.00047804900452685223, "loss": 1.9554, "step": 3161 }, { "epoch": 0.15439453125, "grad_norm": 0.3004494607448578, "learning_rate": 0.00047803391160332483, "loss": 1.9134, "step": 3162 }, { "epoch": 0.154443359375, "grad_norm": 0.2806358337402344, "learning_rate": 0.00047801881375910744, "loss": 1.9234, "step": 3163 }, { "epoch": 0.1544921875, "grad_norm": 0.26264527440071106, "learning_rate": 0.000478003710994566, "loss": 1.9329, "step": 3164 }, { "epoch": 0.154541015625, "grad_norm": 0.2663244903087616, "learning_rate": 0.00047798860331006656, "loss": 1.9317, "step": 3165 }, { "epoch": 0.15458984375, "grad_norm": 0.24271610379219055, "learning_rate": 0.0004779734907059752, "loss": 1.925, "step": 3166 }, { "epoch": 0.154638671875, "grad_norm": 0.24901065230369568, "learning_rate": 0.0004779583731826583, "loss": 1.9284, "step": 3167 }, { "epoch": 0.1546875, "grad_norm": 0.2662883698940277, "learning_rate": 0.0004779432507404821, "loss": 1.9447, "step": 3168 }, { "epoch": 0.154736328125, "grad_norm": 0.23717673122882843, "learning_rate": 0.00047792812337981317, "loss": 1.9196, "step": 3169 }, { "epoch": 0.15478515625, "grad_norm": 0.27228468656539917, "learning_rate": 0.00047791299110101814, "loss": 1.8937, "step": 3170 }, { "epoch": 0.154833984375, "grad_norm": 0.3027532398700714, "learning_rate": 0.00047789785390446376, "loss": 1.9324, "step": 3171 }, { "epoch": 0.1548828125, "grad_norm": 0.3277203142642975, "learning_rate": 0.0004778827117905169, "loss": 1.9287, "step": 3172 }, { "epoch": 0.154931640625, "grad_norm": 0.30492499470710754, "learning_rate": 0.0004778675647595445, "loss": 1.9295, "step": 3173 }, { "epoch": 0.15498046875, "grad_norm": 0.4197116494178772, "learning_rate": 0.0004778524128119138, "loss": 1.9204, "step": 3174 }, { "epoch": 0.155029296875, "grad_norm": 0.426797479391098, "learning_rate": 0.00047783725594799175, "loss": 1.9498, "step": 3175 }, { "epoch": 0.155078125, "grad_norm": 0.3055233061313629, "learning_rate": 0.00047782209416814586, "loss": 1.9055, "step": 3176 }, { "epoch": 0.155126953125, "grad_norm": 0.35145682096481323, "learning_rate": 0.0004778069274727436, "loss": 1.8734, "step": 3177 }, { "epoch": 0.15517578125, "grad_norm": 0.31752973794937134, "learning_rate": 0.0004777917558621526, "loss": 1.9349, "step": 3178 }, { "epoch": 0.155224609375, "grad_norm": 0.33173927664756775, "learning_rate": 0.0004777765793367404, "loss": 1.9417, "step": 3179 }, { "epoch": 0.1552734375, "grad_norm": 0.4103988707065582, "learning_rate": 0.0004777613978968749, "loss": 1.9658, "step": 3180 }, { "epoch": 0.155322265625, "grad_norm": 0.31480157375335693, "learning_rate": 0.00047774621154292405, "loss": 1.9372, "step": 3181 }, { "epoch": 0.15537109375, "grad_norm": 0.30125394463539124, "learning_rate": 0.0004777310202752558, "loss": 1.965, "step": 3182 }, { "epoch": 0.155419921875, "grad_norm": 0.3077690899372101, "learning_rate": 0.00047771582409423846, "loss": 1.9595, "step": 3183 }, { "epoch": 0.15546875, "grad_norm": 0.3611440062522888, "learning_rate": 0.00047770062300024033, "loss": 1.913, "step": 3184 }, { "epoch": 0.155517578125, "grad_norm": 0.36909863352775574, "learning_rate": 0.0004776854169936296, "loss": 1.9071, "step": 3185 }, { "epoch": 0.15556640625, "grad_norm": 0.3363829255104065, "learning_rate": 0.00047767020607477505, "loss": 1.9496, "step": 3186 }, { "epoch": 0.155615234375, "grad_norm": 0.34316912293434143, "learning_rate": 0.00047765499024404523, "loss": 1.8997, "step": 3187 }, { "epoch": 0.1556640625, "grad_norm": 0.31927305459976196, "learning_rate": 0.0004776397695018089, "loss": 1.9026, "step": 3188 }, { "epoch": 0.155712890625, "grad_norm": 0.3651661276817322, "learning_rate": 0.000477624543848435, "loss": 1.9406, "step": 3189 }, { "epoch": 0.15576171875, "grad_norm": 0.31919077038764954, "learning_rate": 0.00047760931328429243, "loss": 1.9063, "step": 3190 }, { "epoch": 0.155810546875, "grad_norm": 0.24442006647586823, "learning_rate": 0.00047759407780975047, "loss": 1.9422, "step": 3191 }, { "epoch": 0.155859375, "grad_norm": 0.3914593458175659, "learning_rate": 0.0004775788374251782, "loss": 1.9414, "step": 3192 }, { "epoch": 0.155908203125, "grad_norm": 0.4275575578212738, "learning_rate": 0.000477563592130945, "loss": 1.9419, "step": 3193 }, { "epoch": 0.15595703125, "grad_norm": 0.2920864522457123, "learning_rate": 0.00047754834192742046, "loss": 1.9015, "step": 3194 }, { "epoch": 0.156005859375, "grad_norm": 0.24952945113182068, "learning_rate": 0.0004775330868149741, "loss": 1.8902, "step": 3195 }, { "epoch": 0.1560546875, "grad_norm": 0.30675649642944336, "learning_rate": 0.00047751782679397573, "loss": 1.9386, "step": 3196 }, { "epoch": 0.156103515625, "grad_norm": 0.31249767541885376, "learning_rate": 0.00047750256186479507, "loss": 1.9039, "step": 3197 }, { "epoch": 0.15615234375, "grad_norm": 0.24750681221485138, "learning_rate": 0.0004774872920278022, "loss": 1.9874, "step": 3198 }, { "epoch": 0.156201171875, "grad_norm": 0.2442438155412674, "learning_rate": 0.000477472017283367, "loss": 1.9039, "step": 3199 }, { "epoch": 0.15625, "grad_norm": 0.28377601504325867, "learning_rate": 0.0004774567376318599, "loss": 1.9211, "step": 3200 }, { "epoch": 0.156298828125, "grad_norm": 0.2504023313522339, "learning_rate": 0.0004774414530736512, "loss": 1.9334, "step": 3201 }, { "epoch": 0.15634765625, "grad_norm": 0.24315133690834045, "learning_rate": 0.00047742616360911105, "loss": 1.8756, "step": 3202 }, { "epoch": 0.156396484375, "grad_norm": 0.265828937292099, "learning_rate": 0.00047741086923861034, "loss": 1.9472, "step": 3203 }, { "epoch": 0.1564453125, "grad_norm": 0.23835520446300507, "learning_rate": 0.0004773955699625196, "loss": 1.9244, "step": 3204 }, { "epoch": 0.156494140625, "grad_norm": 0.2483202964067459, "learning_rate": 0.0004773802657812095, "loss": 1.9403, "step": 3205 }, { "epoch": 0.15654296875, "grad_norm": 0.2571318745613098, "learning_rate": 0.0004773649566950512, "loss": 1.9351, "step": 3206 }, { "epoch": 0.156591796875, "grad_norm": 0.2247491031885147, "learning_rate": 0.0004773496427044155, "loss": 1.8789, "step": 3207 }, { "epoch": 0.156640625, "grad_norm": 0.2916520833969116, "learning_rate": 0.0004773343238096737, "loss": 2.0088, "step": 3208 }, { "epoch": 0.156689453125, "grad_norm": 0.28271958231925964, "learning_rate": 0.0004773190000111969, "loss": 1.9536, "step": 3209 }, { "epoch": 0.15673828125, "grad_norm": 0.2650965452194214, "learning_rate": 0.0004773036713093567, "loss": 1.926, "step": 3210 }, { "epoch": 0.156787109375, "grad_norm": 0.2521488070487976, "learning_rate": 0.00047728833770452453, "loss": 1.9528, "step": 3211 }, { "epoch": 0.1568359375, "grad_norm": 0.26713311672210693, "learning_rate": 0.0004772729991970719, "loss": 1.9173, "step": 3212 }, { "epoch": 0.156884765625, "grad_norm": 0.30150851607322693, "learning_rate": 0.0004772576557873706, "loss": 1.9165, "step": 3213 }, { "epoch": 0.15693359375, "grad_norm": 0.2646191418170929, "learning_rate": 0.0004772423074757926, "loss": 1.9693, "step": 3214 }, { "epoch": 0.156982421875, "grad_norm": 0.29648715257644653, "learning_rate": 0.00047722695426270973, "loss": 1.9143, "step": 3215 }, { "epoch": 0.15703125, "grad_norm": 0.336855411529541, "learning_rate": 0.00047721159614849424, "loss": 1.9573, "step": 3216 }, { "epoch": 0.157080078125, "grad_norm": 0.26237601041793823, "learning_rate": 0.0004771962331335182, "loss": 1.9489, "step": 3217 }, { "epoch": 0.15712890625, "grad_norm": 0.4221075475215912, "learning_rate": 0.000477180865218154, "loss": 1.9128, "step": 3218 }, { "epoch": 0.157177734375, "grad_norm": 0.4910816252231598, "learning_rate": 0.00047716549240277414, "loss": 1.9543, "step": 3219 }, { "epoch": 0.1572265625, "grad_norm": 0.32285276055336, "learning_rate": 0.00047715011468775124, "loss": 1.9209, "step": 3220 }, { "epoch": 0.157275390625, "grad_norm": 0.41776710748672485, "learning_rate": 0.0004771347320734578, "loss": 1.8817, "step": 3221 }, { "epoch": 0.15732421875, "grad_norm": 0.34471940994262695, "learning_rate": 0.00047711934456026674, "loss": 1.9296, "step": 3222 }, { "epoch": 0.157373046875, "grad_norm": 0.3411334753036499, "learning_rate": 0.0004771039521485509, "loss": 1.9283, "step": 3223 }, { "epoch": 0.157421875, "grad_norm": 0.45116177201271057, "learning_rate": 0.0004770885548386835, "loss": 1.9342, "step": 3224 }, { "epoch": 0.157470703125, "grad_norm": 0.2684197723865509, "learning_rate": 0.00047707315263103764, "loss": 1.9159, "step": 3225 }, { "epoch": 0.15751953125, "grad_norm": 0.39602571725845337, "learning_rate": 0.0004770577455259865, "loss": 1.9009, "step": 3226 }, { "epoch": 0.157568359375, "grad_norm": 0.3569110929965973, "learning_rate": 0.0004770423335239037, "loss": 1.9779, "step": 3227 }, { "epoch": 0.1576171875, "grad_norm": 0.27012336254119873, "learning_rate": 0.0004770269166251625, "loss": 1.8734, "step": 3228 }, { "epoch": 0.157666015625, "grad_norm": 0.30546802282333374, "learning_rate": 0.0004770114948301367, "loss": 1.9357, "step": 3229 }, { "epoch": 0.15771484375, "grad_norm": 0.3231799602508545, "learning_rate": 0.00047699606813920005, "loss": 1.9475, "step": 3230 }, { "epoch": 0.157763671875, "grad_norm": 0.27372848987579346, "learning_rate": 0.0004769806365527264, "loss": 1.9347, "step": 3231 }, { "epoch": 0.1578125, "grad_norm": 0.2679883539676666, "learning_rate": 0.0004769652000710898, "loss": 1.9267, "step": 3232 }, { "epoch": 0.157861328125, "grad_norm": 0.3271799683570862, "learning_rate": 0.0004769497586946643, "loss": 1.9693, "step": 3233 }, { "epoch": 0.15791015625, "grad_norm": 0.2850671410560608, "learning_rate": 0.00047693431242382405, "loss": 1.9183, "step": 3234 }, { "epoch": 0.157958984375, "grad_norm": 0.31839677691459656, "learning_rate": 0.0004769188612589436, "loss": 1.9407, "step": 3235 }, { "epoch": 0.1580078125, "grad_norm": 0.28460460901260376, "learning_rate": 0.0004769034052003973, "loss": 1.9196, "step": 3236 }, { "epoch": 0.158056640625, "grad_norm": 0.32962566614151, "learning_rate": 0.0004768879442485598, "loss": 1.9205, "step": 3237 }, { "epoch": 0.15810546875, "grad_norm": 0.22497716546058655, "learning_rate": 0.00047687247840380576, "loss": 1.9035, "step": 3238 }, { "epoch": 0.158154296875, "grad_norm": 0.31635966897010803, "learning_rate": 0.00047685700766651, "loss": 1.9392, "step": 3239 }, { "epoch": 0.158203125, "grad_norm": 0.2706034183502197, "learning_rate": 0.00047684153203704754, "loss": 1.8693, "step": 3240 }, { "epoch": 0.158251953125, "grad_norm": 0.2624804973602295, "learning_rate": 0.00047682605151579333, "loss": 1.9443, "step": 3241 }, { "epoch": 0.15830078125, "grad_norm": 0.31015655398368835, "learning_rate": 0.00047681056610312264, "loss": 1.9482, "step": 3242 }, { "epoch": 0.158349609375, "grad_norm": 0.42623189091682434, "learning_rate": 0.0004767950757994108, "loss": 1.9327, "step": 3243 }, { "epoch": 0.1583984375, "grad_norm": 0.3280121088027954, "learning_rate": 0.0004767795806050331, "loss": 1.9081, "step": 3244 }, { "epoch": 0.158447265625, "grad_norm": 0.25043556094169617, "learning_rate": 0.0004767640805203652, "loss": 1.9139, "step": 3245 }, { "epoch": 0.15849609375, "grad_norm": 0.3330765962600708, "learning_rate": 0.00047674857554578273, "loss": 1.8939, "step": 3246 }, { "epoch": 0.158544921875, "grad_norm": 0.31137970089912415, "learning_rate": 0.00047673306568166135, "loss": 1.9147, "step": 3247 }, { "epoch": 0.15859375, "grad_norm": 0.27751263976097107, "learning_rate": 0.0004767175509283772, "loss": 1.8957, "step": 3248 }, { "epoch": 0.158642578125, "grad_norm": 0.27188071608543396, "learning_rate": 0.00047670203128630603, "loss": 1.9185, "step": 3249 }, { "epoch": 0.15869140625, "grad_norm": 0.27606967091560364, "learning_rate": 0.00047668650675582413, "loss": 1.9004, "step": 3250 }, { "epoch": 0.158740234375, "grad_norm": 0.3047439455986023, "learning_rate": 0.0004766709773373077, "loss": 1.9153, "step": 3251 }, { "epoch": 0.1587890625, "grad_norm": 0.4039624035358429, "learning_rate": 0.00047665544303113314, "loss": 1.8895, "step": 3252 }, { "epoch": 0.158837890625, "grad_norm": 0.33891570568084717, "learning_rate": 0.00047663990383767685, "loss": 1.8981, "step": 3253 }, { "epoch": 0.15888671875, "grad_norm": 0.2717687785625458, "learning_rate": 0.0004766243597573155, "loss": 1.9272, "step": 3254 }, { "epoch": 0.158935546875, "grad_norm": 0.29201197624206543, "learning_rate": 0.00047660881079042585, "loss": 1.929, "step": 3255 }, { "epoch": 0.158984375, "grad_norm": 0.27274030447006226, "learning_rate": 0.0004765932569373847, "loss": 1.9088, "step": 3256 }, { "epoch": 0.159033203125, "grad_norm": 0.30791956186294556, "learning_rate": 0.000476577698198569, "loss": 1.9192, "step": 3257 }, { "epoch": 0.15908203125, "grad_norm": 0.32191401720046997, "learning_rate": 0.0004765621345743558, "loss": 1.8774, "step": 3258 }, { "epoch": 0.159130859375, "grad_norm": 0.2420378029346466, "learning_rate": 0.0004765465660651224, "loss": 1.9479, "step": 3259 }, { "epoch": 0.1591796875, "grad_norm": 0.34327399730682373, "learning_rate": 0.000476530992671246, "loss": 1.903, "step": 3260 }, { "epoch": 0.159228515625, "grad_norm": 0.34685876965522766, "learning_rate": 0.0004765154143931041, "loss": 1.9426, "step": 3261 }, { "epoch": 0.15927734375, "grad_norm": 0.32863160967826843, "learning_rate": 0.0004764998312310742, "loss": 1.8976, "step": 3262 }, { "epoch": 0.159326171875, "grad_norm": 0.26727351546287537, "learning_rate": 0.00047648424318553405, "loss": 1.9481, "step": 3263 }, { "epoch": 0.159375, "grad_norm": 0.27921435236930847, "learning_rate": 0.00047646865025686134, "loss": 1.8775, "step": 3264 }, { "epoch": 0.159423828125, "grad_norm": 0.3615817129611969, "learning_rate": 0.0004764530524454341, "loss": 1.9278, "step": 3265 }, { "epoch": 0.15947265625, "grad_norm": 0.5376487374305725, "learning_rate": 0.0004764374497516303, "loss": 1.9626, "step": 3266 }, { "epoch": 0.159521484375, "grad_norm": 0.28911468386650085, "learning_rate": 0.0004764218421758281, "loss": 1.9546, "step": 3267 }, { "epoch": 0.1595703125, "grad_norm": 0.34092968702316284, "learning_rate": 0.0004764062297184056, "loss": 1.8902, "step": 3268 }, { "epoch": 0.159619140625, "grad_norm": 0.3424206078052521, "learning_rate": 0.0004763906123797414, "loss": 1.9362, "step": 3269 }, { "epoch": 0.15966796875, "grad_norm": 0.3244931399822235, "learning_rate": 0.0004763749901602139, "loss": 1.9166, "step": 3270 }, { "epoch": 0.159716796875, "grad_norm": 0.2813383638858795, "learning_rate": 0.00047635936306020184, "loss": 1.9186, "step": 3271 }, { "epoch": 0.159765625, "grad_norm": 0.37207794189453125, "learning_rate": 0.0004763437310800837, "loss": 1.9558, "step": 3272 }, { "epoch": 0.159814453125, "grad_norm": 0.42455577850341797, "learning_rate": 0.00047632809422023853, "loss": 1.8912, "step": 3273 }, { "epoch": 0.15986328125, "grad_norm": 0.2842428982257843, "learning_rate": 0.0004763124524810452, "loss": 1.8993, "step": 3274 }, { "epoch": 0.159912109375, "grad_norm": 0.3391556143760681, "learning_rate": 0.00047629680586288296, "loss": 1.94, "step": 3275 }, { "epoch": 0.1599609375, "grad_norm": 0.33702772855758667, "learning_rate": 0.0004762811543661308, "loss": 1.959, "step": 3276 }, { "epoch": 0.160009765625, "grad_norm": 0.2948959767818451, "learning_rate": 0.0004762654979911682, "loss": 1.9617, "step": 3277 }, { "epoch": 0.16005859375, "grad_norm": 0.31386587023735046, "learning_rate": 0.0004762498367383746, "loss": 1.955, "step": 3278 }, { "epoch": 0.160107421875, "grad_norm": 0.3071090877056122, "learning_rate": 0.0004762341706081294, "loss": 1.8916, "step": 3279 }, { "epoch": 0.16015625, "grad_norm": 0.2679007351398468, "learning_rate": 0.0004762184996008125, "loss": 1.9119, "step": 3280 }, { "epoch": 0.160205078125, "grad_norm": 0.344775915145874, "learning_rate": 0.00047620282371680365, "loss": 1.9644, "step": 3281 }, { "epoch": 0.16025390625, "grad_norm": 0.29769834876060486, "learning_rate": 0.00047618714295648263, "loss": 1.9406, "step": 3282 }, { "epoch": 0.160302734375, "grad_norm": 0.275917649269104, "learning_rate": 0.0004761714573202296, "loss": 1.8935, "step": 3283 }, { "epoch": 0.1603515625, "grad_norm": 0.46834486722946167, "learning_rate": 0.0004761557668084247, "loss": 1.9145, "step": 3284 }, { "epoch": 0.160400390625, "grad_norm": 0.2520643174648285, "learning_rate": 0.0004761400714214482, "loss": 1.9553, "step": 3285 }, { "epoch": 0.16044921875, "grad_norm": 0.27201616764068604, "learning_rate": 0.00047612437115968043, "loss": 1.9196, "step": 3286 }, { "epoch": 0.160498046875, "grad_norm": 0.25188300013542175, "learning_rate": 0.00047610866602350196, "loss": 1.9587, "step": 3287 }, { "epoch": 0.160546875, "grad_norm": 0.27425333857536316, "learning_rate": 0.0004760929560132934, "loss": 1.9383, "step": 3288 }, { "epoch": 0.160595703125, "grad_norm": 0.2671681344509125, "learning_rate": 0.00047607724112943547, "loss": 1.9324, "step": 3289 }, { "epoch": 0.16064453125, "grad_norm": 0.25686177611351013, "learning_rate": 0.00047606152137230905, "loss": 1.9361, "step": 3290 }, { "epoch": 0.160693359375, "grad_norm": 0.28644007444381714, "learning_rate": 0.0004760457967422952, "loss": 1.9207, "step": 3291 }, { "epoch": 0.1607421875, "grad_norm": 0.23318295180797577, "learning_rate": 0.0004760300672397749, "loss": 1.9229, "step": 3292 }, { "epoch": 0.160791015625, "grad_norm": 0.26272234320640564, "learning_rate": 0.0004760143328651295, "loss": 1.9261, "step": 3293 }, { "epoch": 0.16083984375, "grad_norm": 0.25343799591064453, "learning_rate": 0.0004759985936187401, "loss": 1.9345, "step": 3294 }, { "epoch": 0.160888671875, "grad_norm": 0.2636781632900238, "learning_rate": 0.0004759828495009884, "loss": 1.9432, "step": 3295 }, { "epoch": 0.1609375, "grad_norm": 0.32709410786628723, "learning_rate": 0.00047596710051225585, "loss": 1.9513, "step": 3296 }, { "epoch": 0.160986328125, "grad_norm": 0.3224184215068817, "learning_rate": 0.0004759513466529242, "loss": 1.9511, "step": 3297 }, { "epoch": 0.16103515625, "grad_norm": 0.28055456280708313, "learning_rate": 0.00047593558792337516, "loss": 1.9192, "step": 3298 }, { "epoch": 0.161083984375, "grad_norm": 0.32304686307907104, "learning_rate": 0.00047591982432399073, "loss": 1.8905, "step": 3299 }, { "epoch": 0.1611328125, "grad_norm": 0.3646746575832367, "learning_rate": 0.00047590405585515294, "loss": 1.9054, "step": 3300 }, { "epoch": 0.161181640625, "grad_norm": 0.34100842475891113, "learning_rate": 0.000475888282517244, "loss": 1.9564, "step": 3301 }, { "epoch": 0.16123046875, "grad_norm": 0.3870326280593872, "learning_rate": 0.0004758725043106461, "loss": 1.9217, "step": 3302 }, { "epoch": 0.161279296875, "grad_norm": 0.37388452887535095, "learning_rate": 0.0004758567212357417, "loss": 1.891, "step": 3303 }, { "epoch": 0.161328125, "grad_norm": 0.293783575296402, "learning_rate": 0.0004758409332929133, "loss": 1.9105, "step": 3304 }, { "epoch": 0.161376953125, "grad_norm": 0.279044508934021, "learning_rate": 0.0004758251404825435, "loss": 1.9309, "step": 3305 }, { "epoch": 0.16142578125, "grad_norm": 0.2777695059776306, "learning_rate": 0.0004758093428050151, "loss": 1.906, "step": 3306 }, { "epoch": 0.161474609375, "grad_norm": 0.24531801044940948, "learning_rate": 0.00047579354026071086, "loss": 1.8844, "step": 3307 }, { "epoch": 0.1615234375, "grad_norm": 0.269085556268692, "learning_rate": 0.00047577773285001395, "loss": 1.9465, "step": 3308 }, { "epoch": 0.161572265625, "grad_norm": 0.2881282866001129, "learning_rate": 0.00047576192057330736, "loss": 1.9204, "step": 3309 }, { "epoch": 0.16162109375, "grad_norm": 0.29340115189552307, "learning_rate": 0.0004757461034309744, "loss": 1.9321, "step": 3310 }, { "epoch": 0.161669921875, "grad_norm": 0.2407786101102829, "learning_rate": 0.0004757302814233982, "loss": 1.9374, "step": 3311 }, { "epoch": 0.16171875, "grad_norm": 0.3100285828113556, "learning_rate": 0.0004757144545509625, "loss": 1.9125, "step": 3312 }, { "epoch": 0.161767578125, "grad_norm": 0.27709388732910156, "learning_rate": 0.00047569862281405064, "loss": 1.9142, "step": 3313 }, { "epoch": 0.16181640625, "grad_norm": 0.2859870195388794, "learning_rate": 0.0004756827862130465, "loss": 1.9202, "step": 3314 }, { "epoch": 0.161865234375, "grad_norm": 0.33827900886535645, "learning_rate": 0.00047566694474833373, "loss": 1.8998, "step": 3315 }, { "epoch": 0.1619140625, "grad_norm": 0.31451961398124695, "learning_rate": 0.0004756510984202964, "loss": 1.8784, "step": 3316 }, { "epoch": 0.161962890625, "grad_norm": 0.27219337224960327, "learning_rate": 0.00047563524722931853, "loss": 1.9803, "step": 3317 }, { "epoch": 0.16201171875, "grad_norm": 0.3978055417537689, "learning_rate": 0.0004756193911757842, "loss": 1.9361, "step": 3318 }, { "epoch": 0.162060546875, "grad_norm": 0.31779932975769043, "learning_rate": 0.00047560353026007766, "loss": 1.902, "step": 3319 }, { "epoch": 0.162109375, "grad_norm": 0.3173840045928955, "learning_rate": 0.00047558766448258357, "loss": 1.8918, "step": 3320 }, { "epoch": 0.162158203125, "grad_norm": 0.3546499013900757, "learning_rate": 0.00047557179384368614, "loss": 1.9214, "step": 3321 }, { "epoch": 0.16220703125, "grad_norm": 0.2937312424182892, "learning_rate": 0.0004755559183437703, "loss": 1.9027, "step": 3322 }, { "epoch": 0.162255859375, "grad_norm": 0.3191480338573456, "learning_rate": 0.0004755400379832205, "loss": 1.8704, "step": 3323 }, { "epoch": 0.1623046875, "grad_norm": 0.36843445897102356, "learning_rate": 0.00047552415276242186, "loss": 1.8954, "step": 3324 }, { "epoch": 0.162353515625, "grad_norm": 0.256092369556427, "learning_rate": 0.0004755082626817593, "loss": 1.9097, "step": 3325 }, { "epoch": 0.16240234375, "grad_norm": 0.2950100004673004, "learning_rate": 0.0004754923677416178, "loss": 1.9478, "step": 3326 }, { "epoch": 0.162451171875, "grad_norm": 0.2809121906757355, "learning_rate": 0.00047547646794238277, "loss": 1.9608, "step": 3327 }, { "epoch": 0.1625, "grad_norm": 0.3790995478630066, "learning_rate": 0.00047546056328443947, "loss": 1.9199, "step": 3328 }, { "epoch": 0.162548828125, "grad_norm": 0.3500390946865082, "learning_rate": 0.0004754446537681734, "loss": 1.9177, "step": 3329 }, { "epoch": 0.16259765625, "grad_norm": 0.3171538710594177, "learning_rate": 0.00047542873939397006, "loss": 1.9545, "step": 3330 }, { "epoch": 0.162646484375, "grad_norm": 0.346647709608078, "learning_rate": 0.0004754128201622152, "loss": 1.9163, "step": 3331 }, { "epoch": 0.1626953125, "grad_norm": 0.27350494265556335, "learning_rate": 0.00047539689607329467, "loss": 1.9326, "step": 3332 }, { "epoch": 0.162744140625, "grad_norm": 0.32076314091682434, "learning_rate": 0.00047538096712759434, "loss": 1.9122, "step": 3333 }, { "epoch": 0.16279296875, "grad_norm": 0.3112861216068268, "learning_rate": 0.00047536503332550024, "loss": 1.9183, "step": 3334 }, { "epoch": 0.162841796875, "grad_norm": 0.3562770485877991, "learning_rate": 0.00047534909466739867, "loss": 1.9261, "step": 3335 }, { "epoch": 0.162890625, "grad_norm": 0.28466108441352844, "learning_rate": 0.00047533315115367577, "loss": 1.9081, "step": 3336 }, { "epoch": 0.162939453125, "grad_norm": 0.31524670124053955, "learning_rate": 0.000475317202784718, "loss": 1.9328, "step": 3337 }, { "epoch": 0.16298828125, "grad_norm": 0.29324257373809814, "learning_rate": 0.000475301249560912, "loss": 1.935, "step": 3338 }, { "epoch": 0.163037109375, "grad_norm": 0.3544987142086029, "learning_rate": 0.0004752852914826442, "loss": 1.8927, "step": 3339 }, { "epoch": 0.1630859375, "grad_norm": 0.2674236595630646, "learning_rate": 0.0004752693285503014, "loss": 1.9185, "step": 3340 }, { "epoch": 0.163134765625, "grad_norm": 0.27070656418800354, "learning_rate": 0.0004752533607642707, "loss": 1.9419, "step": 3341 }, { "epoch": 0.16318359375, "grad_norm": 0.2966417968273163, "learning_rate": 0.0004752373881249387, "loss": 1.924, "step": 3342 }, { "epoch": 0.163232421875, "grad_norm": 0.2958088219165802, "learning_rate": 0.0004752214106326929, "loss": 1.9461, "step": 3343 }, { "epoch": 0.16328125, "grad_norm": 0.26680490374565125, "learning_rate": 0.0004752054282879203, "loss": 1.9598, "step": 3344 }, { "epoch": 0.163330078125, "grad_norm": 0.3023378551006317, "learning_rate": 0.0004751894410910084, "loss": 1.9006, "step": 3345 }, { "epoch": 0.16337890625, "grad_norm": 0.2941802144050598, "learning_rate": 0.0004751734490423444, "loss": 1.9326, "step": 3346 }, { "epoch": 0.163427734375, "grad_norm": 0.27539491653442383, "learning_rate": 0.0004751574521423161, "loss": 1.9768, "step": 3347 }, { "epoch": 0.1634765625, "grad_norm": 0.3282974362373352, "learning_rate": 0.00047514145039131116, "loss": 1.973, "step": 3348 }, { "epoch": 0.163525390625, "grad_norm": 0.32200369238853455, "learning_rate": 0.0004751254437897174, "loss": 1.8697, "step": 3349 }, { "epoch": 0.16357421875, "grad_norm": 0.29157009720802307, "learning_rate": 0.0004751094323379227, "loss": 1.966, "step": 3350 }, { "epoch": 0.163623046875, "grad_norm": 0.3543739318847656, "learning_rate": 0.00047509341603631515, "loss": 1.8608, "step": 3351 }, { "epoch": 0.163671875, "grad_norm": 0.38656288385391235, "learning_rate": 0.0004750773948852829, "loss": 1.9454, "step": 3352 }, { "epoch": 0.163720703125, "grad_norm": 0.28822702169418335, "learning_rate": 0.0004750613688852143, "loss": 1.9152, "step": 3353 }, { "epoch": 0.16376953125, "grad_norm": 0.4009400010108948, "learning_rate": 0.00047504533803649756, "loss": 1.9023, "step": 3354 }, { "epoch": 0.163818359375, "grad_norm": 0.33781033754348755, "learning_rate": 0.0004750293023395215, "loss": 1.9193, "step": 3355 }, { "epoch": 0.1638671875, "grad_norm": 0.298153817653656, "learning_rate": 0.0004750132617946745, "loss": 1.9083, "step": 3356 }, { "epoch": 0.163916015625, "grad_norm": 0.33000636100769043, "learning_rate": 0.00047499721640234543, "loss": 1.9377, "step": 3357 }, { "epoch": 0.16396484375, "grad_norm": 0.24701333045959473, "learning_rate": 0.0004749811661629231, "loss": 1.9096, "step": 3358 }, { "epoch": 0.164013671875, "grad_norm": 0.3228531777858734, "learning_rate": 0.0004749651110767966, "loss": 1.9273, "step": 3359 }, { "epoch": 0.1640625, "grad_norm": 0.2834751009941101, "learning_rate": 0.000474949051144355, "loss": 1.9394, "step": 3360 }, { "epoch": 0.164111328125, "grad_norm": 0.35435986518859863, "learning_rate": 0.00047493298636598754, "loss": 1.9279, "step": 3361 }, { "epoch": 0.16416015625, "grad_norm": 0.38314715027809143, "learning_rate": 0.00047491691674208343, "loss": 1.9474, "step": 3362 }, { "epoch": 0.164208984375, "grad_norm": 0.2923671305179596, "learning_rate": 0.00047490084227303224, "loss": 1.9272, "step": 3363 }, { "epoch": 0.1642578125, "grad_norm": 0.3433133065700531, "learning_rate": 0.0004748847629592236, "loss": 1.9133, "step": 3364 }, { "epoch": 0.164306640625, "grad_norm": 0.3422880172729492, "learning_rate": 0.00047486867880104714, "loss": 1.9153, "step": 3365 }, { "epoch": 0.16435546875, "grad_norm": 0.2731762230396271, "learning_rate": 0.00047485258979889267, "loss": 1.8906, "step": 3366 }, { "epoch": 0.164404296875, "grad_norm": 0.34708112478256226, "learning_rate": 0.0004748364959531501, "loss": 1.946, "step": 3367 }, { "epoch": 0.164453125, "grad_norm": 0.3383144736289978, "learning_rate": 0.0004748203972642096, "loss": 1.9256, "step": 3368 }, { "epoch": 0.164501953125, "grad_norm": 0.2584901452064514, "learning_rate": 0.0004748042937324611, "loss": 1.9511, "step": 3369 }, { "epoch": 0.16455078125, "grad_norm": 0.2999621033668518, "learning_rate": 0.00047478818535829503, "loss": 1.9429, "step": 3370 }, { "epoch": 0.164599609375, "grad_norm": 0.2516031563282013, "learning_rate": 0.00047477207214210185, "loss": 1.9857, "step": 3371 }, { "epoch": 0.1646484375, "grad_norm": 0.267128586769104, "learning_rate": 0.00047475595408427197, "loss": 1.9284, "step": 3372 }, { "epoch": 0.164697265625, "grad_norm": 0.2865886390209198, "learning_rate": 0.00047473983118519604, "loss": 1.9242, "step": 3373 }, { "epoch": 0.16474609375, "grad_norm": 0.3434985876083374, "learning_rate": 0.00047472370344526496, "loss": 1.9441, "step": 3374 }, { "epoch": 0.164794921875, "grad_norm": 0.35673972964286804, "learning_rate": 0.0004747075708648693, "loss": 1.9368, "step": 3375 }, { "epoch": 0.16484375, "grad_norm": 0.30110883712768555, "learning_rate": 0.0004746914334444003, "loss": 1.9134, "step": 3376 }, { "epoch": 0.164892578125, "grad_norm": 0.28328701853752136, "learning_rate": 0.00047467529118424895, "loss": 1.9218, "step": 3377 }, { "epoch": 0.16494140625, "grad_norm": 0.3191482126712799, "learning_rate": 0.00047465914408480653, "loss": 1.8995, "step": 3378 }, { "epoch": 0.164990234375, "grad_norm": 0.32728874683380127, "learning_rate": 0.0004746429921464643, "loss": 1.9095, "step": 3379 }, { "epoch": 0.1650390625, "grad_norm": 0.3272866904735565, "learning_rate": 0.00047462683536961383, "loss": 1.8822, "step": 3380 }, { "epoch": 0.165087890625, "grad_norm": 0.37859010696411133, "learning_rate": 0.0004746106737546466, "loss": 1.9781, "step": 3381 }, { "epoch": 0.16513671875, "grad_norm": 0.32119083404541016, "learning_rate": 0.0004745945073019543, "loss": 1.9707, "step": 3382 }, { "epoch": 0.165185546875, "grad_norm": 0.33572596311569214, "learning_rate": 0.0004745783360119288, "loss": 1.8977, "step": 3383 }, { "epoch": 0.165234375, "grad_norm": 0.2710478603839874, "learning_rate": 0.000474562159884962, "loss": 1.9067, "step": 3384 }, { "epoch": 0.165283203125, "grad_norm": 0.2986358404159546, "learning_rate": 0.0004745459789214459, "loss": 1.9292, "step": 3385 }, { "epoch": 0.16533203125, "grad_norm": 0.2859049439430237, "learning_rate": 0.0004745297931217727, "loss": 1.9467, "step": 3386 }, { "epoch": 0.165380859375, "grad_norm": 0.31355923414230347, "learning_rate": 0.0004745136024863346, "loss": 1.9269, "step": 3387 }, { "epoch": 0.1654296875, "grad_norm": 0.28401777148246765, "learning_rate": 0.0004744974070155242, "loss": 1.9265, "step": 3388 }, { "epoch": 0.165478515625, "grad_norm": 0.3147432506084442, "learning_rate": 0.0004744812067097337, "loss": 1.9292, "step": 3389 }, { "epoch": 0.16552734375, "grad_norm": 0.30617809295654297, "learning_rate": 0.000474465001569356, "loss": 1.8873, "step": 3390 }, { "epoch": 0.165576171875, "grad_norm": 0.23867276310920715, "learning_rate": 0.00047444879159478374, "loss": 1.9336, "step": 3391 }, { "epoch": 0.165625, "grad_norm": 0.26511263847351074, "learning_rate": 0.0004744325767864097, "loss": 1.9168, "step": 3392 }, { "epoch": 0.165673828125, "grad_norm": 0.24198050796985626, "learning_rate": 0.00047441635714462704, "loss": 1.9201, "step": 3393 }, { "epoch": 0.16572265625, "grad_norm": 0.2934378981590271, "learning_rate": 0.00047440013266982867, "loss": 1.969, "step": 3394 }, { "epoch": 0.165771484375, "grad_norm": 0.2564351260662079, "learning_rate": 0.0004743839033624079, "loss": 1.9294, "step": 3395 }, { "epoch": 0.1658203125, "grad_norm": 0.3008393943309784, "learning_rate": 0.00047436766922275805, "loss": 1.9392, "step": 3396 }, { "epoch": 0.165869140625, "grad_norm": 0.2593735456466675, "learning_rate": 0.0004743514302512726, "loss": 1.9855, "step": 3397 }, { "epoch": 0.16591796875, "grad_norm": 0.29819029569625854, "learning_rate": 0.0004743351864483451, "loss": 1.9073, "step": 3398 }, { "epoch": 0.165966796875, "grad_norm": 0.2950916290283203, "learning_rate": 0.0004743189378143692, "loss": 1.9548, "step": 3399 }, { "epoch": 0.166015625, "grad_norm": 0.31649792194366455, "learning_rate": 0.00047430268434973873, "loss": 1.9204, "step": 3400 }, { "epoch": 0.166064453125, "grad_norm": 0.36165180802345276, "learning_rate": 0.00047428642605484747, "loss": 1.9715, "step": 3401 }, { "epoch": 0.16611328125, "grad_norm": 0.30170369148254395, "learning_rate": 0.0004742701629300896, "loss": 1.8974, "step": 3402 }, { "epoch": 0.166162109375, "grad_norm": 0.3670176565647125, "learning_rate": 0.0004742538949758593, "loss": 1.9022, "step": 3403 }, { "epoch": 0.1662109375, "grad_norm": 0.39769446849823, "learning_rate": 0.0004742376221925508, "loss": 1.9223, "step": 3404 }, { "epoch": 0.166259765625, "grad_norm": 0.3578191101551056, "learning_rate": 0.0004742213445805584, "loss": 1.9706, "step": 3405 }, { "epoch": 0.16630859375, "grad_norm": 0.3940681219100952, "learning_rate": 0.0004742050621402767, "loss": 1.9251, "step": 3406 }, { "epoch": 0.166357421875, "grad_norm": 0.3123006224632263, "learning_rate": 0.00047418877487210025, "loss": 1.9092, "step": 3407 }, { "epoch": 0.16640625, "grad_norm": 0.3513592481613159, "learning_rate": 0.00047417248277642385, "loss": 1.8721, "step": 3408 }, { "epoch": 0.166455078125, "grad_norm": 0.35604000091552734, "learning_rate": 0.0004741561858536423, "loss": 1.8954, "step": 3409 }, { "epoch": 0.16650390625, "grad_norm": 0.36085912585258484, "learning_rate": 0.0004741398841041506, "loss": 1.8847, "step": 3410 }, { "epoch": 0.166552734375, "grad_norm": 0.2629184126853943, "learning_rate": 0.0004741235775283438, "loss": 1.9364, "step": 3411 }, { "epoch": 0.1666015625, "grad_norm": 0.2994205951690674, "learning_rate": 0.0004741072661266171, "loss": 1.9156, "step": 3412 }, { "epoch": 0.166650390625, "grad_norm": 0.24226057529449463, "learning_rate": 0.0004740909498993659, "loss": 1.9162, "step": 3413 }, { "epoch": 0.16669921875, "grad_norm": 0.2753842771053314, "learning_rate": 0.0004740746288469855, "loss": 1.9761, "step": 3414 }, { "epoch": 0.166748046875, "grad_norm": 0.23330911993980408, "learning_rate": 0.00047405830296987165, "loss": 1.917, "step": 3415 }, { "epoch": 0.166796875, "grad_norm": 0.263791561126709, "learning_rate": 0.00047404197226841977, "loss": 1.9689, "step": 3416 }, { "epoch": 0.166845703125, "grad_norm": 0.30610889196395874, "learning_rate": 0.0004740256367430259, "loss": 1.946, "step": 3417 }, { "epoch": 0.16689453125, "grad_norm": 0.317686527967453, "learning_rate": 0.0004740092963940858, "loss": 1.9178, "step": 3418 }, { "epoch": 0.166943359375, "grad_norm": 0.24917952716350555, "learning_rate": 0.0004739929512219955, "loss": 1.9092, "step": 3419 }, { "epoch": 0.1669921875, "grad_norm": 0.3030681908130646, "learning_rate": 0.00047397660122715116, "loss": 1.9413, "step": 3420 }, { "epoch": 0.167041015625, "grad_norm": 0.37553805112838745, "learning_rate": 0.000473960246409949, "loss": 1.8975, "step": 3421 }, { "epoch": 0.16708984375, "grad_norm": 0.3387937545776367, "learning_rate": 0.0004739438867707855, "loss": 1.9425, "step": 3422 }, { "epoch": 0.167138671875, "grad_norm": 0.2374383956193924, "learning_rate": 0.000473927522310057, "loss": 1.8719, "step": 3423 }, { "epoch": 0.1671875, "grad_norm": 0.32030272483825684, "learning_rate": 0.00047391115302816017, "loss": 1.9207, "step": 3424 }, { "epoch": 0.167236328125, "grad_norm": 0.35612398386001587, "learning_rate": 0.0004738947789254918, "loss": 1.9155, "step": 3425 }, { "epoch": 0.16728515625, "grad_norm": 0.2843605577945709, "learning_rate": 0.00047387840000244865, "loss": 1.9576, "step": 3426 }, { "epoch": 0.167333984375, "grad_norm": 0.2664858102798462, "learning_rate": 0.00047386201625942775, "loss": 1.9534, "step": 3427 }, { "epoch": 0.1673828125, "grad_norm": 0.30552852153778076, "learning_rate": 0.000473845627696826, "loss": 1.8778, "step": 3428 }, { "epoch": 0.167431640625, "grad_norm": 0.2876681685447693, "learning_rate": 0.00047382923431504083, "loss": 1.9227, "step": 3429 }, { "epoch": 0.16748046875, "grad_norm": 0.3601766526699066, "learning_rate": 0.0004738128361144693, "loss": 1.9568, "step": 3430 }, { "epoch": 0.167529296875, "grad_norm": 0.3711298406124115, "learning_rate": 0.0004737964330955092, "loss": 1.9159, "step": 3431 }, { "epoch": 0.167578125, "grad_norm": 0.287837415933609, "learning_rate": 0.00047378002525855765, "loss": 1.9383, "step": 3432 }, { "epoch": 0.167626953125, "grad_norm": 0.34194421768188477, "learning_rate": 0.00047376361260401253, "loss": 1.9661, "step": 3433 }, { "epoch": 0.16767578125, "grad_norm": 0.3109811246395111, "learning_rate": 0.00047374719513227156, "loss": 1.9213, "step": 3434 }, { "epoch": 0.167724609375, "grad_norm": 0.2887398600578308, "learning_rate": 0.0004737307728437326, "loss": 1.8944, "step": 3435 }, { "epoch": 0.1677734375, "grad_norm": 0.2749607563018799, "learning_rate": 0.00047371434573879374, "loss": 1.9136, "step": 3436 }, { "epoch": 0.167822265625, "grad_norm": 0.2605856955051422, "learning_rate": 0.0004736979138178532, "loss": 1.9233, "step": 3437 }, { "epoch": 0.16787109375, "grad_norm": 0.3384186625480652, "learning_rate": 0.0004736814770813089, "loss": 1.8828, "step": 3438 }, { "epoch": 0.167919921875, "grad_norm": 0.31046247482299805, "learning_rate": 0.0004736650355295594, "loss": 1.8756, "step": 3439 }, { "epoch": 0.16796875, "grad_norm": 0.3246247470378876, "learning_rate": 0.00047364858916300323, "loss": 1.931, "step": 3440 }, { "epoch": 0.168017578125, "grad_norm": 0.29630354046821594, "learning_rate": 0.0004736321379820389, "loss": 1.9386, "step": 3441 }, { "epoch": 0.16806640625, "grad_norm": 0.25872117280960083, "learning_rate": 0.00047361568198706506, "loss": 1.9892, "step": 3442 }, { "epoch": 0.168115234375, "grad_norm": 0.34113454818725586, "learning_rate": 0.0004735992211784807, "loss": 1.9158, "step": 3443 }, { "epoch": 0.1681640625, "grad_norm": 0.3483835756778717, "learning_rate": 0.0004735827555566846, "loss": 1.9182, "step": 3444 }, { "epoch": 0.168212890625, "grad_norm": 0.26095643639564514, "learning_rate": 0.0004735662851220759, "loss": 1.9269, "step": 3445 }, { "epoch": 0.16826171875, "grad_norm": 0.28011268377304077, "learning_rate": 0.00047354980987505377, "loss": 1.9162, "step": 3446 }, { "epoch": 0.168310546875, "grad_norm": 0.2599872946739197, "learning_rate": 0.00047353332981601743, "loss": 1.9556, "step": 3447 }, { "epoch": 0.168359375, "grad_norm": 0.27058082818984985, "learning_rate": 0.00047351684494536633, "loss": 1.9801, "step": 3448 }, { "epoch": 0.168408203125, "grad_norm": 0.3256082236766815, "learning_rate": 0.0004735003552635001, "loss": 1.9347, "step": 3449 }, { "epoch": 0.16845703125, "grad_norm": 0.31944000720977783, "learning_rate": 0.00047348386077081825, "loss": 1.9317, "step": 3450 }, { "epoch": 0.168505859375, "grad_norm": 0.3244417905807495, "learning_rate": 0.00047346736146772054, "loss": 1.9393, "step": 3451 }, { "epoch": 0.1685546875, "grad_norm": 0.32319340109825134, "learning_rate": 0.0004734508573546069, "loss": 1.8871, "step": 3452 }, { "epoch": 0.168603515625, "grad_norm": 0.2208748161792755, "learning_rate": 0.00047343434843187727, "loss": 1.9129, "step": 3453 }, { "epoch": 0.16865234375, "grad_norm": 0.28212079405784607, "learning_rate": 0.0004734178346999318, "loss": 1.8765, "step": 3454 }, { "epoch": 0.168701171875, "grad_norm": 0.3087945580482483, "learning_rate": 0.00047340131615917076, "loss": 1.8767, "step": 3455 }, { "epoch": 0.16875, "grad_norm": 0.27676844596862793, "learning_rate": 0.00047338479280999434, "loss": 1.937, "step": 3456 }, { "epoch": 0.168798828125, "grad_norm": 0.35171905159950256, "learning_rate": 0.0004733682646528032, "loss": 1.9594, "step": 3457 }, { "epoch": 0.16884765625, "grad_norm": 0.3705417513847351, "learning_rate": 0.0004733517316879977, "loss": 1.9233, "step": 3458 }, { "epoch": 0.168896484375, "grad_norm": 0.3472554385662079, "learning_rate": 0.0004733351939159786, "loss": 1.9756, "step": 3459 }, { "epoch": 0.1689453125, "grad_norm": 0.3237781822681427, "learning_rate": 0.0004733186513371468, "loss": 1.9053, "step": 3460 }, { "epoch": 0.168994140625, "grad_norm": 0.34487593173980713, "learning_rate": 0.00047330210395190317, "loss": 1.9341, "step": 3461 }, { "epoch": 0.16904296875, "grad_norm": 0.27819743752479553, "learning_rate": 0.00047328555176064866, "loss": 1.9564, "step": 3462 }, { "epoch": 0.169091796875, "grad_norm": 0.3169393539428711, "learning_rate": 0.0004732689947637846, "loss": 1.9171, "step": 3463 }, { "epoch": 0.169140625, "grad_norm": 0.3160710334777832, "learning_rate": 0.00047325243296171207, "loss": 1.8888, "step": 3464 }, { "epoch": 0.169189453125, "grad_norm": 0.3059457838535309, "learning_rate": 0.0004732358663548326, "loss": 1.9519, "step": 3465 }, { "epoch": 0.16923828125, "grad_norm": 0.2851097583770752, "learning_rate": 0.00047321929494354765, "loss": 1.9215, "step": 3466 }, { "epoch": 0.169287109375, "grad_norm": 0.31533822417259216, "learning_rate": 0.0004732027187282588, "loss": 1.9161, "step": 3467 }, { "epoch": 0.1693359375, "grad_norm": 0.37033388018608093, "learning_rate": 0.0004731861377093679, "loss": 1.9282, "step": 3468 }, { "epoch": 0.169384765625, "grad_norm": 0.25974780321121216, "learning_rate": 0.00047316955188727666, "loss": 1.9116, "step": 3469 }, { "epoch": 0.16943359375, "grad_norm": 0.36173051595687866, "learning_rate": 0.00047315296126238714, "loss": 1.9512, "step": 3470 }, { "epoch": 0.169482421875, "grad_norm": 0.3316841125488281, "learning_rate": 0.0004731363658351015, "loss": 1.8706, "step": 3471 }, { "epoch": 0.16953125, "grad_norm": 0.2787225544452667, "learning_rate": 0.0004731197656058218, "loss": 1.9179, "step": 3472 }, { "epoch": 0.169580078125, "grad_norm": 0.3073117733001709, "learning_rate": 0.00047310316057495036, "loss": 1.9496, "step": 3473 }, { "epoch": 0.16962890625, "grad_norm": 0.28519973158836365, "learning_rate": 0.0004730865507428897, "loss": 1.929, "step": 3474 }, { "epoch": 0.169677734375, "grad_norm": 0.3205796480178833, "learning_rate": 0.00047306993611004235, "loss": 1.9199, "step": 3475 }, { "epoch": 0.1697265625, "grad_norm": 0.3041728138923645, "learning_rate": 0.00047305331667681094, "loss": 1.9065, "step": 3476 }, { "epoch": 0.169775390625, "grad_norm": 0.25414347648620605, "learning_rate": 0.00047303669244359826, "loss": 1.9091, "step": 3477 }, { "epoch": 0.16982421875, "grad_norm": 0.32518941164016724, "learning_rate": 0.0004730200634108073, "loss": 1.9019, "step": 3478 }, { "epoch": 0.169873046875, "grad_norm": 0.310246080160141, "learning_rate": 0.0004730034295788411, "loss": 1.9167, "step": 3479 }, { "epoch": 0.169921875, "grad_norm": 0.2916777729988098, "learning_rate": 0.00047298679094810253, "loss": 1.9127, "step": 3480 }, { "epoch": 0.169970703125, "grad_norm": 0.279739111661911, "learning_rate": 0.0004729701475189951, "loss": 1.8918, "step": 3481 }, { "epoch": 0.17001953125, "grad_norm": 0.24693146347999573, "learning_rate": 0.0004729534992919221, "loss": 1.9467, "step": 3482 }, { "epoch": 0.170068359375, "grad_norm": 0.2714826464653015, "learning_rate": 0.000472936846267287, "loss": 1.8433, "step": 3483 }, { "epoch": 0.1701171875, "grad_norm": 0.31232890486717224, "learning_rate": 0.00047292018844549347, "loss": 1.8758, "step": 3484 }, { "epoch": 0.170166015625, "grad_norm": 0.3621164858341217, "learning_rate": 0.0004729035258269451, "loss": 1.9735, "step": 3485 }, { "epoch": 0.17021484375, "grad_norm": 0.31091687083244324, "learning_rate": 0.00047288685841204576, "loss": 1.8931, "step": 3486 }, { "epoch": 0.170263671875, "grad_norm": 0.26274579763412476, "learning_rate": 0.0004728701862011995, "loss": 1.87, "step": 3487 }, { "epoch": 0.1703125, "grad_norm": 0.2915117144584656, "learning_rate": 0.0004728535091948103, "loss": 1.9406, "step": 3488 }, { "epoch": 0.170361328125, "grad_norm": 0.25241178274154663, "learning_rate": 0.00047283682739328233, "loss": 1.9004, "step": 3489 }, { "epoch": 0.17041015625, "grad_norm": 0.3255901634693146, "learning_rate": 0.0004728201407970199, "loss": 1.9212, "step": 3490 }, { "epoch": 0.170458984375, "grad_norm": 0.28137266635894775, "learning_rate": 0.00047280344940642743, "loss": 1.9265, "step": 3491 }, { "epoch": 0.1705078125, "grad_norm": 0.2891952097415924, "learning_rate": 0.00047278675322190943, "loss": 1.9204, "step": 3492 }, { "epoch": 0.170556640625, "grad_norm": 0.32661598920822144, "learning_rate": 0.0004727700522438706, "loss": 1.89, "step": 3493 }, { "epoch": 0.17060546875, "grad_norm": 0.2696611285209656, "learning_rate": 0.00047275334647271566, "loss": 1.898, "step": 3494 }, { "epoch": 0.170654296875, "grad_norm": 0.22167058289051056, "learning_rate": 0.0004727366359088495, "loss": 1.8958, "step": 3495 }, { "epoch": 0.170703125, "grad_norm": 0.29441460967063904, "learning_rate": 0.00047271992055267713, "loss": 1.8838, "step": 3496 }, { "epoch": 0.170751953125, "grad_norm": 0.2967529296875, "learning_rate": 0.00047270320040460355, "loss": 1.8895, "step": 3497 }, { "epoch": 0.17080078125, "grad_norm": 0.2868121266365051, "learning_rate": 0.0004726864754650342, "loss": 1.9217, "step": 3498 }, { "epoch": 0.170849609375, "grad_norm": 0.3221467137336731, "learning_rate": 0.0004726697457343742, "loss": 1.9372, "step": 3499 }, { "epoch": 0.1708984375, "grad_norm": 0.2905699610710144, "learning_rate": 0.00047265301121302917, "loss": 1.9265, "step": 3500 }, { "epoch": 0.170947265625, "grad_norm": 0.2632710039615631, "learning_rate": 0.00047263627190140457, "loss": 1.8641, "step": 3501 }, { "epoch": 0.17099609375, "grad_norm": 0.2617713510990143, "learning_rate": 0.0004726195277999062, "loss": 1.9168, "step": 3502 }, { "epoch": 0.171044921875, "grad_norm": 0.2334844321012497, "learning_rate": 0.0004726027789089397, "loss": 1.9005, "step": 3503 }, { "epoch": 0.17109375, "grad_norm": 0.2671392560005188, "learning_rate": 0.00047258602522891124, "loss": 1.9021, "step": 3504 }, { "epoch": 0.171142578125, "grad_norm": 0.3497489392757416, "learning_rate": 0.0004725692667602266, "loss": 1.9173, "step": 3505 }, { "epoch": 0.17119140625, "grad_norm": 0.35967546701431274, "learning_rate": 0.0004725525035032921, "loss": 1.9184, "step": 3506 }, { "epoch": 0.171240234375, "grad_norm": 0.34747084975242615, "learning_rate": 0.000472535735458514, "loss": 1.9215, "step": 3507 }, { "epoch": 0.1712890625, "grad_norm": 0.2736997902393341, "learning_rate": 0.0004725189626262986, "loss": 1.9002, "step": 3508 }, { "epoch": 0.171337890625, "grad_norm": 0.34094589948654175, "learning_rate": 0.0004725021850070525, "loss": 1.9005, "step": 3509 }, { "epoch": 0.17138671875, "grad_norm": 0.2934512197971344, "learning_rate": 0.00047248540260118224, "loss": 1.8869, "step": 3510 }, { "epoch": 0.171435546875, "grad_norm": 0.33802396059036255, "learning_rate": 0.0004724686154090946, "loss": 1.8878, "step": 3511 }, { "epoch": 0.171484375, "grad_norm": 0.30851200222969055, "learning_rate": 0.0004724518234311964, "loss": 1.8637, "step": 3512 }, { "epoch": 0.171533203125, "grad_norm": 0.3149496018886566, "learning_rate": 0.00047243502666789467, "loss": 1.9891, "step": 3513 }, { "epoch": 0.17158203125, "grad_norm": 0.3195419907569885, "learning_rate": 0.0004724182251195964, "loss": 1.9018, "step": 3514 }, { "epoch": 0.171630859375, "grad_norm": 0.30018100142478943, "learning_rate": 0.0004724014187867089, "loss": 1.9139, "step": 3515 }, { "epoch": 0.1716796875, "grad_norm": 0.34423568844795227, "learning_rate": 0.0004723846076696395, "loss": 1.927, "step": 3516 }, { "epoch": 0.171728515625, "grad_norm": 0.2976928651332855, "learning_rate": 0.00047236779176879534, "loss": 1.8871, "step": 3517 }, { "epoch": 0.17177734375, "grad_norm": 0.31142640113830566, "learning_rate": 0.00047235097108458434, "loss": 1.9408, "step": 3518 }, { "epoch": 0.171826171875, "grad_norm": 0.28612640500068665, "learning_rate": 0.00047233414561741394, "loss": 1.904, "step": 3519 }, { "epoch": 0.171875, "grad_norm": 0.2659110128879547, "learning_rate": 0.00047231731536769207, "loss": 1.9176, "step": 3520 }, { "epoch": 0.171923828125, "grad_norm": 0.24785597622394562, "learning_rate": 0.0004723004803358264, "loss": 1.9219, "step": 3521 }, { "epoch": 0.17197265625, "grad_norm": 0.27646204829216003, "learning_rate": 0.00047228364052222515, "loss": 1.9297, "step": 3522 }, { "epoch": 0.172021484375, "grad_norm": 0.38193610310554504, "learning_rate": 0.00047226679592729645, "loss": 1.9627, "step": 3523 }, { "epoch": 0.1720703125, "grad_norm": 0.3481144607067108, "learning_rate": 0.0004722499465514484, "loss": 1.9491, "step": 3524 }, { "epoch": 0.172119140625, "grad_norm": 0.2850179970264435, "learning_rate": 0.0004722330923950894, "loss": 1.881, "step": 3525 }, { "epoch": 0.17216796875, "grad_norm": 0.2616626024246216, "learning_rate": 0.00047221623345862794, "loss": 1.9516, "step": 3526 }, { "epoch": 0.172216796875, "grad_norm": 0.27471989393234253, "learning_rate": 0.00047219936974247267, "loss": 1.9428, "step": 3527 }, { "epoch": 0.172265625, "grad_norm": 0.29683917760849, "learning_rate": 0.00047218250124703224, "loss": 1.9111, "step": 3528 }, { "epoch": 0.172314453125, "grad_norm": 0.2293800413608551, "learning_rate": 0.0004721656279727155, "loss": 1.9376, "step": 3529 }, { "epoch": 0.17236328125, "grad_norm": 0.30866289138793945, "learning_rate": 0.00047214874991993136, "loss": 1.9352, "step": 3530 }, { "epoch": 0.172412109375, "grad_norm": 0.3173173666000366, "learning_rate": 0.0004721318670890888, "loss": 1.9316, "step": 3531 }, { "epoch": 0.1724609375, "grad_norm": 0.35299983620643616, "learning_rate": 0.0004721149794805971, "loss": 1.9455, "step": 3532 }, { "epoch": 0.172509765625, "grad_norm": 0.35022130608558655, "learning_rate": 0.00047209808709486556, "loss": 1.9712, "step": 3533 }, { "epoch": 0.17255859375, "grad_norm": 0.33024466037750244, "learning_rate": 0.0004720811899323035, "loss": 1.9108, "step": 3534 }, { "epoch": 0.172607421875, "grad_norm": 0.40833166241645813, "learning_rate": 0.00047206428799332045, "loss": 1.9252, "step": 3535 }, { "epoch": 0.17265625, "grad_norm": 0.2729322016239166, "learning_rate": 0.00047204738127832615, "loss": 1.9725, "step": 3536 }, { "epoch": 0.172705078125, "grad_norm": 0.31562110781669617, "learning_rate": 0.0004720304697877301, "loss": 1.8778, "step": 3537 }, { "epoch": 0.17275390625, "grad_norm": 0.3229336142539978, "learning_rate": 0.0004720135535219425, "loss": 1.9115, "step": 3538 }, { "epoch": 0.172802734375, "grad_norm": 0.25023600459098816, "learning_rate": 0.000471996632481373, "loss": 1.9332, "step": 3539 }, { "epoch": 0.1728515625, "grad_norm": 0.27198925614356995, "learning_rate": 0.000471979706666432, "loss": 1.8999, "step": 3540 }, { "epoch": 0.172900390625, "grad_norm": 0.24054180085659027, "learning_rate": 0.0004719627760775294, "loss": 1.8818, "step": 3541 }, { "epoch": 0.17294921875, "grad_norm": 0.28962504863739014, "learning_rate": 0.00047194584071507577, "loss": 1.8522, "step": 3542 }, { "epoch": 0.172998046875, "grad_norm": 0.23058804869651794, "learning_rate": 0.00047192890057948147, "loss": 1.9198, "step": 3543 }, { "epoch": 0.173046875, "grad_norm": 0.2741925120353699, "learning_rate": 0.000471911955671157, "loss": 1.9152, "step": 3544 }, { "epoch": 0.173095703125, "grad_norm": 0.25971296429634094, "learning_rate": 0.0004718950059905131, "loss": 1.9811, "step": 3545 }, { "epoch": 0.17314453125, "grad_norm": 0.27585798501968384, "learning_rate": 0.0004718780515379606, "loss": 1.9493, "step": 3546 }, { "epoch": 0.173193359375, "grad_norm": 0.37361374497413635, "learning_rate": 0.00047186109231391033, "loss": 1.8619, "step": 3547 }, { "epoch": 0.1732421875, "grad_norm": 0.31696945428848267, "learning_rate": 0.0004718441283187733, "loss": 1.8773, "step": 3548 }, { "epoch": 0.173291015625, "grad_norm": 0.26611417531967163, "learning_rate": 0.00047182715955296065, "loss": 1.9079, "step": 3549 }, { "epoch": 0.17333984375, "grad_norm": 0.31275761127471924, "learning_rate": 0.0004718101860168837, "loss": 1.926, "step": 3550 }, { "epoch": 0.173388671875, "grad_norm": 0.23184557259082794, "learning_rate": 0.0004717932077109538, "loss": 1.9113, "step": 3551 }, { "epoch": 0.1734375, "grad_norm": 0.33576059341430664, "learning_rate": 0.0004717762246355824, "loss": 1.9453, "step": 3552 }, { "epoch": 0.173486328125, "grad_norm": 0.3645724952220917, "learning_rate": 0.0004717592367911811, "loss": 1.9665, "step": 3553 }, { "epoch": 0.17353515625, "grad_norm": 0.28887516260147095, "learning_rate": 0.0004717422441781616, "loss": 1.9167, "step": 3554 }, { "epoch": 0.173583984375, "grad_norm": 0.29416424036026, "learning_rate": 0.00047172524679693574, "loss": 1.9028, "step": 3555 }, { "epoch": 0.1736328125, "grad_norm": 0.25022709369659424, "learning_rate": 0.0004717082446479156, "loss": 1.8877, "step": 3556 }, { "epoch": 0.173681640625, "grad_norm": 0.2861517369747162, "learning_rate": 0.00047169123773151296, "loss": 1.9398, "step": 3557 }, { "epoch": 0.17373046875, "grad_norm": 0.2930237650871277, "learning_rate": 0.0004716742260481402, "loss": 1.888, "step": 3558 }, { "epoch": 0.173779296875, "grad_norm": 0.27741360664367676, "learning_rate": 0.00047165720959820956, "loss": 1.8935, "step": 3559 }, { "epoch": 0.173828125, "grad_norm": 0.28018489480018616, "learning_rate": 0.00047164018838213346, "loss": 1.9363, "step": 3560 }, { "epoch": 0.173876953125, "grad_norm": 0.5395210385322571, "learning_rate": 0.0004716231624003244, "loss": 1.9604, "step": 3561 }, { "epoch": 0.17392578125, "grad_norm": 0.3253551125526428, "learning_rate": 0.0004716061316531951, "loss": 1.9284, "step": 3562 }, { "epoch": 0.173974609375, "grad_norm": 0.30311769247055054, "learning_rate": 0.00047158909614115825, "loss": 1.9119, "step": 3563 }, { "epoch": 0.1740234375, "grad_norm": 0.45028042793273926, "learning_rate": 0.00047157205586462664, "loss": 1.8779, "step": 3564 }, { "epoch": 0.174072265625, "grad_norm": 0.34339627623558044, "learning_rate": 0.0004715550108240134, "loss": 1.9406, "step": 3565 }, { "epoch": 0.17412109375, "grad_norm": 0.3085564076900482, "learning_rate": 0.0004715379610197315, "loss": 1.9235, "step": 3566 }, { "epoch": 0.174169921875, "grad_norm": 0.3201998770236969, "learning_rate": 0.0004715209064521943, "loss": 1.8977, "step": 3567 }, { "epoch": 0.17421875, "grad_norm": 0.2971876561641693, "learning_rate": 0.000471503847121815, "loss": 1.9307, "step": 3568 }, { "epoch": 0.174267578125, "grad_norm": 0.33045798540115356, "learning_rate": 0.00047148678302900705, "loss": 1.8994, "step": 3569 }, { "epoch": 0.17431640625, "grad_norm": 0.27119332551956177, "learning_rate": 0.00047146971417418417, "loss": 1.9222, "step": 3570 }, { "epoch": 0.174365234375, "grad_norm": 0.31348830461502075, "learning_rate": 0.00047145264055775975, "loss": 1.8556, "step": 3571 }, { "epoch": 0.1744140625, "grad_norm": 0.2816448509693146, "learning_rate": 0.0004714355621801479, "loss": 1.8927, "step": 3572 }, { "epoch": 0.174462890625, "grad_norm": 0.33343759179115295, "learning_rate": 0.0004714184790417624, "loss": 1.9062, "step": 3573 }, { "epoch": 0.17451171875, "grad_norm": 0.36317208409309387, "learning_rate": 0.00047140139114301715, "loss": 1.9364, "step": 3574 }, { "epoch": 0.174560546875, "grad_norm": 0.26195573806762695, "learning_rate": 0.00047138429848432643, "loss": 1.9304, "step": 3575 }, { "epoch": 0.174609375, "grad_norm": 0.28738507628440857, "learning_rate": 0.0004713672010661045, "loss": 1.8987, "step": 3576 }, { "epoch": 0.174658203125, "grad_norm": 0.29095855355262756, "learning_rate": 0.00047135009888876566, "loss": 1.9586, "step": 3577 }, { "epoch": 0.17470703125, "grad_norm": 0.26741477847099304, "learning_rate": 0.0004713329919527245, "loss": 1.9662, "step": 3578 }, { "epoch": 0.174755859375, "grad_norm": 0.2853944003582001, "learning_rate": 0.0004713158802583955, "loss": 1.9335, "step": 3579 }, { "epoch": 0.1748046875, "grad_norm": 0.3292505741119385, "learning_rate": 0.0004712987638061934, "loss": 1.9318, "step": 3580 }, { "epoch": 0.174853515625, "grad_norm": 0.32685989141464233, "learning_rate": 0.0004712816425965331, "loss": 1.9119, "step": 3581 }, { "epoch": 0.17490234375, "grad_norm": 0.3464820384979248, "learning_rate": 0.00047126451662982946, "loss": 1.9629, "step": 3582 }, { "epoch": 0.174951171875, "grad_norm": 0.2905120551586151, "learning_rate": 0.00047124738590649756, "loss": 1.9098, "step": 3583 }, { "epoch": 0.175, "grad_norm": 0.29173555970191956, "learning_rate": 0.00047123025042695267, "loss": 1.8914, "step": 3584 }, { "epoch": 0.175048828125, "grad_norm": 0.23078681528568268, "learning_rate": 0.00047121311019160996, "loss": 1.9126, "step": 3585 }, { "epoch": 0.17509765625, "grad_norm": 0.27035045623779297, "learning_rate": 0.0004711959652008849, "loss": 1.9301, "step": 3586 }, { "epoch": 0.175146484375, "grad_norm": 0.24349842965602875, "learning_rate": 0.00047117881545519306, "loss": 1.9287, "step": 3587 }, { "epoch": 0.1751953125, "grad_norm": 0.32332074642181396, "learning_rate": 0.0004711616609549499, "loss": 1.9257, "step": 3588 }, { "epoch": 0.175244140625, "grad_norm": 0.3034059405326843, "learning_rate": 0.0004711445017005714, "loss": 1.9132, "step": 3589 }, { "epoch": 0.17529296875, "grad_norm": 0.2648470103740692, "learning_rate": 0.0004711273376924732, "loss": 1.9308, "step": 3590 }, { "epoch": 0.175341796875, "grad_norm": 0.32726842164993286, "learning_rate": 0.0004711101689310715, "loss": 1.9432, "step": 3591 }, { "epoch": 0.175390625, "grad_norm": 0.23115578293800354, "learning_rate": 0.0004710929954167823, "loss": 1.9749, "step": 3592 }, { "epoch": 0.175439453125, "grad_norm": 0.33066219091415405, "learning_rate": 0.0004710758171500218, "loss": 1.9354, "step": 3593 }, { "epoch": 0.17548828125, "grad_norm": 0.32178038358688354, "learning_rate": 0.0004710586341312064, "loss": 1.9482, "step": 3594 }, { "epoch": 0.175537109375, "grad_norm": 0.3102787733078003, "learning_rate": 0.0004710414463607524, "loss": 1.9151, "step": 3595 }, { "epoch": 0.1755859375, "grad_norm": 0.4283759891986847, "learning_rate": 0.0004710242538390765, "loss": 1.9355, "step": 3596 }, { "epoch": 0.175634765625, "grad_norm": 0.2685447633266449, "learning_rate": 0.00047100705656659533, "loss": 1.8928, "step": 3597 }, { "epoch": 0.17568359375, "grad_norm": 0.32573631405830383, "learning_rate": 0.0004709898545437256, "loss": 1.903, "step": 3598 }, { "epoch": 0.175732421875, "grad_norm": 0.434416264295578, "learning_rate": 0.0004709726477708844, "loss": 1.9161, "step": 3599 }, { "epoch": 0.17578125, "grad_norm": 0.2959132790565491, "learning_rate": 0.00047095543624848857, "loss": 1.9312, "step": 3600 }, { "epoch": 0.175830078125, "grad_norm": 0.2979394197463989, "learning_rate": 0.0004709382199769553, "loss": 1.889, "step": 3601 }, { "epoch": 0.17587890625, "grad_norm": 0.2850574254989624, "learning_rate": 0.0004709209989567019, "loss": 1.9448, "step": 3602 }, { "epoch": 0.175927734375, "grad_norm": 0.2568104863166809, "learning_rate": 0.00047090377318814567, "loss": 1.911, "step": 3603 }, { "epoch": 0.1759765625, "grad_norm": 0.2655841112136841, "learning_rate": 0.00047088654267170413, "loss": 1.8915, "step": 3604 }, { "epoch": 0.176025390625, "grad_norm": 0.3405348062515259, "learning_rate": 0.00047086930740779476, "loss": 1.9117, "step": 3605 }, { "epoch": 0.17607421875, "grad_norm": 0.28083980083465576, "learning_rate": 0.00047085206739683547, "loss": 1.9072, "step": 3606 }, { "epoch": 0.176123046875, "grad_norm": 0.3334708511829376, "learning_rate": 0.00047083482263924394, "loss": 1.916, "step": 3607 }, { "epoch": 0.176171875, "grad_norm": 0.37264665961265564, "learning_rate": 0.00047081757313543815, "loss": 1.9564, "step": 3608 }, { "epoch": 0.176220703125, "grad_norm": 0.29696446657180786, "learning_rate": 0.0004708003188858362, "loss": 1.9803, "step": 3609 }, { "epoch": 0.17626953125, "grad_norm": 0.30114683508872986, "learning_rate": 0.0004707830598908561, "loss": 1.9071, "step": 3610 }, { "epoch": 0.176318359375, "grad_norm": 0.31149420142173767, "learning_rate": 0.00047076579615091635, "loss": 1.8722, "step": 3611 }, { "epoch": 0.1763671875, "grad_norm": 0.33925333619117737, "learning_rate": 0.0004707485276664353, "loss": 1.8728, "step": 3612 }, { "epoch": 0.176416015625, "grad_norm": 0.38611796498298645, "learning_rate": 0.0004707312544378313, "loss": 1.9257, "step": 3613 }, { "epoch": 0.17646484375, "grad_norm": 0.34690436720848083, "learning_rate": 0.0004707139764655232, "loss": 1.9187, "step": 3614 }, { "epoch": 0.176513671875, "grad_norm": 0.25480255484580994, "learning_rate": 0.00047069669374992957, "loss": 1.897, "step": 3615 }, { "epoch": 0.1765625, "grad_norm": 0.2936360836029053, "learning_rate": 0.0004706794062914694, "loss": 1.9036, "step": 3616 }, { "epoch": 0.176611328125, "grad_norm": 0.3088696599006653, "learning_rate": 0.00047066211409056154, "loss": 1.9646, "step": 3617 }, { "epoch": 0.17666015625, "grad_norm": 0.33505672216415405, "learning_rate": 0.00047064481714762527, "loss": 1.9124, "step": 3618 }, { "epoch": 0.176708984375, "grad_norm": 0.3485592007637024, "learning_rate": 0.00047062751546307955, "loss": 1.9277, "step": 3619 }, { "epoch": 0.1767578125, "grad_norm": 0.29186293482780457, "learning_rate": 0.00047061020903734396, "loss": 1.9029, "step": 3620 }, { "epoch": 0.176806640625, "grad_norm": 0.26468929648399353, "learning_rate": 0.0004705928978708377, "loss": 1.92, "step": 3621 }, { "epoch": 0.17685546875, "grad_norm": 0.33533304929733276, "learning_rate": 0.0004705755819639804, "loss": 1.9099, "step": 3622 }, { "epoch": 0.176904296875, "grad_norm": 0.3550077974796295, "learning_rate": 0.0004705582613171919, "loss": 1.9243, "step": 3623 }, { "epoch": 0.176953125, "grad_norm": 0.35453927516937256, "learning_rate": 0.00047054093593089163, "loss": 1.9311, "step": 3624 }, { "epoch": 0.177001953125, "grad_norm": 0.2818650007247925, "learning_rate": 0.0004705236058054998, "loss": 1.9272, "step": 3625 }, { "epoch": 0.17705078125, "grad_norm": 0.31217557191848755, "learning_rate": 0.0004705062709414363, "loss": 1.9462, "step": 3626 }, { "epoch": 0.177099609375, "grad_norm": 0.3123570382595062, "learning_rate": 0.00047048893133912125, "loss": 1.865, "step": 3627 }, { "epoch": 0.1771484375, "grad_norm": 0.31247270107269287, "learning_rate": 0.00047047158699897485, "loss": 1.9151, "step": 3628 }, { "epoch": 0.177197265625, "grad_norm": 0.2767021954059601, "learning_rate": 0.00047045423792141757, "loss": 1.9181, "step": 3629 }, { "epoch": 0.17724609375, "grad_norm": 0.29090914130210876, "learning_rate": 0.00047043688410686977, "loss": 1.8855, "step": 3630 }, { "epoch": 0.177294921875, "grad_norm": 0.29336056113243103, "learning_rate": 0.000470419525555752, "loss": 1.935, "step": 3631 }, { "epoch": 0.17734375, "grad_norm": 0.2934505045413971, "learning_rate": 0.0004704021622684851, "loss": 1.9099, "step": 3632 }, { "epoch": 0.177392578125, "grad_norm": 0.3256034851074219, "learning_rate": 0.0004703847942454898, "loss": 1.8647, "step": 3633 }, { "epoch": 0.17744140625, "grad_norm": 0.3658676743507385, "learning_rate": 0.0004703674214871871, "loss": 1.909, "step": 3634 }, { "epoch": 0.177490234375, "grad_norm": 0.27232521772384644, "learning_rate": 0.00047035004399399787, "loss": 1.9006, "step": 3635 }, { "epoch": 0.1775390625, "grad_norm": 0.36779582500457764, "learning_rate": 0.00047033266176634356, "loss": 1.927, "step": 3636 }, { "epoch": 0.177587890625, "grad_norm": 0.32347404956817627, "learning_rate": 0.0004703152748046451, "loss": 1.9096, "step": 3637 }, { "epoch": 0.17763671875, "grad_norm": 0.28621360659599304, "learning_rate": 0.00047029788310932406, "loss": 1.9186, "step": 3638 }, { "epoch": 0.177685546875, "grad_norm": 0.24260388314723969, "learning_rate": 0.000470280486680802, "loss": 1.909, "step": 3639 }, { "epoch": 0.177734375, "grad_norm": 0.2638241648674011, "learning_rate": 0.0004702630855195003, "loss": 1.9215, "step": 3640 }, { "epoch": 0.177783203125, "grad_norm": 0.34734681248664856, "learning_rate": 0.000470245679625841, "loss": 1.8482, "step": 3641 }, { "epoch": 0.17783203125, "grad_norm": 0.34363746643066406, "learning_rate": 0.0004702282690002458, "loss": 1.9315, "step": 3642 }, { "epoch": 0.177880859375, "grad_norm": 0.362785667181015, "learning_rate": 0.0004702108536431366, "loss": 1.8499, "step": 3643 }, { "epoch": 0.1779296875, "grad_norm": 0.2782791554927826, "learning_rate": 0.00047019343355493554, "loss": 1.893, "step": 3644 }, { "epoch": 0.177978515625, "grad_norm": 0.45598915219306946, "learning_rate": 0.0004701760087360648, "loss": 1.9435, "step": 3645 }, { "epoch": 0.17802734375, "grad_norm": 0.45537492632865906, "learning_rate": 0.00047015857918694665, "loss": 1.927, "step": 3646 }, { "epoch": 0.178076171875, "grad_norm": 0.2727809250354767, "learning_rate": 0.00047014114490800366, "loss": 1.953, "step": 3647 }, { "epoch": 0.178125, "grad_norm": 0.4218829572200775, "learning_rate": 0.00047012370589965814, "loss": 1.9079, "step": 3648 }, { "epoch": 0.178173828125, "grad_norm": 0.3081490993499756, "learning_rate": 0.0004701062621623329, "loss": 1.9001, "step": 3649 }, { "epoch": 0.17822265625, "grad_norm": 0.28672054409980774, "learning_rate": 0.0004700888136964506, "loss": 1.9525, "step": 3650 }, { "epoch": 0.178271484375, "grad_norm": 0.34789836406707764, "learning_rate": 0.0004700713605024342, "loss": 1.9246, "step": 3651 }, { "epoch": 0.1783203125, "grad_norm": 0.30428022146224976, "learning_rate": 0.00047005390258070663, "loss": 1.937, "step": 3652 }, { "epoch": 0.178369140625, "grad_norm": 0.30729037523269653, "learning_rate": 0.000470036439931691, "loss": 1.9357, "step": 3653 }, { "epoch": 0.17841796875, "grad_norm": 0.30333128571510315, "learning_rate": 0.0004700189725558107, "loss": 1.9324, "step": 3654 }, { "epoch": 0.178466796875, "grad_norm": 0.2857601046562195, "learning_rate": 0.00047000150045348876, "loss": 1.8756, "step": 3655 }, { "epoch": 0.178515625, "grad_norm": 0.2991495132446289, "learning_rate": 0.00046998402362514893, "loss": 1.9023, "step": 3656 }, { "epoch": 0.178564453125, "grad_norm": 0.22884038090705872, "learning_rate": 0.0004699665420712145, "loss": 1.9438, "step": 3657 }, { "epoch": 0.17861328125, "grad_norm": 0.2814713418483734, "learning_rate": 0.00046994905579210937, "loss": 1.9028, "step": 3658 }, { "epoch": 0.178662109375, "grad_norm": 0.22753290832042694, "learning_rate": 0.00046993156478825727, "loss": 1.9434, "step": 3659 }, { "epoch": 0.1787109375, "grad_norm": 0.26667991280555725, "learning_rate": 0.00046991406906008213, "loss": 1.873, "step": 3660 }, { "epoch": 0.178759765625, "grad_norm": 0.29528748989105225, "learning_rate": 0.0004698965686080078, "loss": 1.9166, "step": 3661 }, { "epoch": 0.17880859375, "grad_norm": 0.27387702465057373, "learning_rate": 0.00046987906343245865, "loss": 1.8716, "step": 3662 }, { "epoch": 0.178857421875, "grad_norm": 0.3654647469520569, "learning_rate": 0.0004698615535338588, "loss": 1.9553, "step": 3663 }, { "epoch": 0.17890625, "grad_norm": 0.298397034406662, "learning_rate": 0.0004698440389126327, "loss": 1.9419, "step": 3664 }, { "epoch": 0.178955078125, "grad_norm": 0.2807534337043762, "learning_rate": 0.0004698265195692047, "loss": 1.8971, "step": 3665 }, { "epoch": 0.17900390625, "grad_norm": 0.34571516513824463, "learning_rate": 0.0004698089955039995, "loss": 1.9067, "step": 3666 }, { "epoch": 0.179052734375, "grad_norm": 0.31172481179237366, "learning_rate": 0.0004697914667174418, "loss": 1.8782, "step": 3667 }, { "epoch": 0.1791015625, "grad_norm": 0.29351142048835754, "learning_rate": 0.0004697739332099564, "loss": 1.9041, "step": 3668 }, { "epoch": 0.179150390625, "grad_norm": 0.3429147005081177, "learning_rate": 0.00046975639498196824, "loss": 1.9082, "step": 3669 }, { "epoch": 0.17919921875, "grad_norm": 0.28452420234680176, "learning_rate": 0.00046973885203390245, "loss": 1.9661, "step": 3670 }, { "epoch": 0.179248046875, "grad_norm": 0.3317002058029175, "learning_rate": 0.00046972130436618404, "loss": 1.8643, "step": 3671 }, { "epoch": 0.179296875, "grad_norm": 0.28969281911849976, "learning_rate": 0.0004697037519792384, "loss": 1.9185, "step": 3672 }, { "epoch": 0.179345703125, "grad_norm": 0.3219856321811676, "learning_rate": 0.00046968619487349096, "loss": 1.9463, "step": 3673 }, { "epoch": 0.17939453125, "grad_norm": 0.3354300260543823, "learning_rate": 0.0004696686330493672, "loss": 1.9465, "step": 3674 }, { "epoch": 0.179443359375, "grad_norm": 0.23240408301353455, "learning_rate": 0.0004696510665072926, "loss": 1.9203, "step": 3675 }, { "epoch": 0.1794921875, "grad_norm": 0.33824190497398376, "learning_rate": 0.0004696334952476931, "loss": 1.9406, "step": 3676 }, { "epoch": 0.179541015625, "grad_norm": 0.27322691679000854, "learning_rate": 0.00046961591927099445, "loss": 1.8978, "step": 3677 }, { "epoch": 0.17958984375, "grad_norm": 0.2850175201892853, "learning_rate": 0.0004695983385776227, "loss": 1.9448, "step": 3678 }, { "epoch": 0.179638671875, "grad_norm": 0.3140221834182739, "learning_rate": 0.0004695807531680038, "loss": 1.9123, "step": 3679 }, { "epoch": 0.1796875, "grad_norm": 0.3563169538974762, "learning_rate": 0.0004695631630425641, "loss": 1.944, "step": 3680 }, { "epoch": 0.179736328125, "grad_norm": 0.31335461139678955, "learning_rate": 0.0004695455682017298, "loss": 1.9237, "step": 3681 }, { "epoch": 0.17978515625, "grad_norm": 0.2663716971874237, "learning_rate": 0.00046952796864592727, "loss": 1.8999, "step": 3682 }, { "epoch": 0.179833984375, "grad_norm": 0.33590617775917053, "learning_rate": 0.0004695103643755833, "loss": 1.8714, "step": 3683 }, { "epoch": 0.1798828125, "grad_norm": 0.34617316722869873, "learning_rate": 0.00046949275539112423, "loss": 1.8997, "step": 3684 }, { "epoch": 0.179931640625, "grad_norm": 0.3686439096927643, "learning_rate": 0.0004694751416929771, "loss": 1.8872, "step": 3685 }, { "epoch": 0.17998046875, "grad_norm": 0.334879994392395, "learning_rate": 0.0004694575232815686, "loss": 1.9262, "step": 3686 }, { "epoch": 0.180029296875, "grad_norm": 0.34094420075416565, "learning_rate": 0.0004694399001573258, "loss": 1.8708, "step": 3687 }, { "epoch": 0.180078125, "grad_norm": 0.24468980729579926, "learning_rate": 0.0004694222723206759, "loss": 1.9304, "step": 3688 }, { "epoch": 0.180126953125, "grad_norm": 0.3419586420059204, "learning_rate": 0.0004694046397720458, "loss": 1.8992, "step": 3689 }, { "epoch": 0.18017578125, "grad_norm": 0.37404394149780273, "learning_rate": 0.0004693870025118633, "loss": 1.93, "step": 3690 }, { "epoch": 0.180224609375, "grad_norm": 0.3470974266529083, "learning_rate": 0.00046936936054055555, "loss": 1.9334, "step": 3691 }, { "epoch": 0.1802734375, "grad_norm": 0.2523563802242279, "learning_rate": 0.00046935171385855025, "loss": 1.9185, "step": 3692 }, { "epoch": 0.180322265625, "grad_norm": 0.25116199254989624, "learning_rate": 0.0004693340624662749, "loss": 1.9566, "step": 3693 }, { "epoch": 0.18037109375, "grad_norm": 0.32947805523872375, "learning_rate": 0.00046931640636415755, "loss": 1.9203, "step": 3694 }, { "epoch": 0.180419921875, "grad_norm": 0.27127161622047424, "learning_rate": 0.0004692987455526259, "loss": 1.94, "step": 3695 }, { "epoch": 0.18046875, "grad_norm": 0.2768775224685669, "learning_rate": 0.00046928108003210805, "loss": 1.9172, "step": 3696 }, { "epoch": 0.180517578125, "grad_norm": 0.27372363209724426, "learning_rate": 0.00046926340980303215, "loss": 1.9306, "step": 3697 }, { "epoch": 0.18056640625, "grad_norm": 0.3426954746246338, "learning_rate": 0.0004692457348658265, "loss": 1.9384, "step": 3698 }, { "epoch": 0.180615234375, "grad_norm": 0.3092641532421112, "learning_rate": 0.0004692280552209194, "loss": 1.9362, "step": 3699 }, { "epoch": 0.1806640625, "grad_norm": 0.38794952630996704, "learning_rate": 0.00046921037086873927, "loss": 1.928, "step": 3700 }, { "epoch": 0.180712890625, "grad_norm": 0.2743432819843292, "learning_rate": 0.0004691926818097148, "loss": 1.8788, "step": 3701 }, { "epoch": 0.18076171875, "grad_norm": 0.33266255259513855, "learning_rate": 0.0004691749880442747, "loss": 1.9369, "step": 3702 }, { "epoch": 0.180810546875, "grad_norm": 0.38247671723365784, "learning_rate": 0.0004691572895728478, "loss": 1.911, "step": 3703 }, { "epoch": 0.180859375, "grad_norm": 0.2266256958246231, "learning_rate": 0.00046913958639586295, "loss": 1.8975, "step": 3704 }, { "epoch": 0.180908203125, "grad_norm": 0.3450096547603607, "learning_rate": 0.0004691218785137493, "loss": 1.8968, "step": 3705 }, { "epoch": 0.18095703125, "grad_norm": 0.35587799549102783, "learning_rate": 0.0004691041659269359, "loss": 1.9179, "step": 3706 }, { "epoch": 0.181005859375, "grad_norm": 0.3721602261066437, "learning_rate": 0.0004690864486358521, "loss": 1.9178, "step": 3707 }, { "epoch": 0.1810546875, "grad_norm": 0.27825215458869934, "learning_rate": 0.00046906872664092734, "loss": 1.948, "step": 3708 }, { "epoch": 0.181103515625, "grad_norm": 0.33730846643447876, "learning_rate": 0.000469050999942591, "loss": 1.8994, "step": 3709 }, { "epoch": 0.18115234375, "grad_norm": 0.32204434275627136, "learning_rate": 0.00046903326854127287, "loss": 1.9499, "step": 3710 }, { "epoch": 0.181201171875, "grad_norm": 0.2727659046649933, "learning_rate": 0.0004690155324374025, "loss": 1.9143, "step": 3711 }, { "epoch": 0.18125, "grad_norm": 0.26584887504577637, "learning_rate": 0.0004689977916314099, "loss": 1.8991, "step": 3712 }, { "epoch": 0.181298828125, "grad_norm": 0.2594103515148163, "learning_rate": 0.000468980046123725, "loss": 1.9042, "step": 3713 }, { "epoch": 0.18134765625, "grad_norm": 0.28130751848220825, "learning_rate": 0.0004689622959147778, "loss": 1.9202, "step": 3714 }, { "epoch": 0.181396484375, "grad_norm": 0.2959096431732178, "learning_rate": 0.0004689445410049985, "loss": 1.8923, "step": 3715 }, { "epoch": 0.1814453125, "grad_norm": 0.2684394121170044, "learning_rate": 0.00046892678139481744, "loss": 1.9797, "step": 3716 }, { "epoch": 0.181494140625, "grad_norm": 0.2593652307987213, "learning_rate": 0.00046890901708466506, "loss": 1.9016, "step": 3717 }, { "epoch": 0.18154296875, "grad_norm": 0.2408376932144165, "learning_rate": 0.0004688912480749718, "loss": 1.9312, "step": 3718 }, { "epoch": 0.181591796875, "grad_norm": 0.2808798849582672, "learning_rate": 0.00046887347436616844, "loss": 1.9241, "step": 3719 }, { "epoch": 0.181640625, "grad_norm": 0.30696773529052734, "learning_rate": 0.0004688556959586857, "loss": 1.9286, "step": 3720 }, { "epoch": 0.181689453125, "grad_norm": 0.24694490432739258, "learning_rate": 0.00046883791285295436, "loss": 1.9288, "step": 3721 }, { "epoch": 0.18173828125, "grad_norm": 0.23027199506759644, "learning_rate": 0.0004688201250494055, "loss": 1.8747, "step": 3722 }, { "epoch": 0.181787109375, "grad_norm": 0.26801615953445435, "learning_rate": 0.0004688023325484702, "loss": 1.9198, "step": 3723 }, { "epoch": 0.1818359375, "grad_norm": 0.26151618361473083, "learning_rate": 0.00046878453535057965, "loss": 1.941, "step": 3724 }, { "epoch": 0.181884765625, "grad_norm": 0.25189322233200073, "learning_rate": 0.00046876673345616523, "loss": 1.8885, "step": 3725 }, { "epoch": 0.18193359375, "grad_norm": 0.32362398505210876, "learning_rate": 0.00046874892686565834, "loss": 1.9196, "step": 3726 }, { "epoch": 0.181982421875, "grad_norm": 0.3257061243057251, "learning_rate": 0.00046873111557949054, "loss": 1.8584, "step": 3727 }, { "epoch": 0.18203125, "grad_norm": 0.2881745398044586, "learning_rate": 0.0004687132995980935, "loss": 1.8893, "step": 3728 }, { "epoch": 0.182080078125, "grad_norm": 0.2984360158443451, "learning_rate": 0.00046869547892189914, "loss": 1.9719, "step": 3729 }, { "epoch": 0.18212890625, "grad_norm": 0.26801449060440063, "learning_rate": 0.00046867765355133905, "loss": 1.8667, "step": 3730 }, { "epoch": 0.182177734375, "grad_norm": 0.21552519500255585, "learning_rate": 0.00046865982348684557, "loss": 1.9126, "step": 3731 }, { "epoch": 0.1822265625, "grad_norm": 0.2579406201839447, "learning_rate": 0.0004686419887288506, "loss": 1.9268, "step": 3732 }, { "epoch": 0.182275390625, "grad_norm": 0.2276943325996399, "learning_rate": 0.0004686241492777865, "loss": 1.9085, "step": 3733 }, { "epoch": 0.18232421875, "grad_norm": 0.21315446496009827, "learning_rate": 0.0004686063051340856, "loss": 1.8867, "step": 3734 }, { "epoch": 0.182373046875, "grad_norm": 0.2586243152618408, "learning_rate": 0.0004685884562981803, "loss": 1.915, "step": 3735 }, { "epoch": 0.182421875, "grad_norm": 0.29885631799697876, "learning_rate": 0.00046857060277050324, "loss": 1.9146, "step": 3736 }, { "epoch": 0.182470703125, "grad_norm": 0.30822649598121643, "learning_rate": 0.0004685527445514871, "loss": 1.8931, "step": 3737 }, { "epoch": 0.18251953125, "grad_norm": 0.274018257856369, "learning_rate": 0.00046853488164156476, "loss": 1.9089, "step": 3738 }, { "epoch": 0.182568359375, "grad_norm": 0.25215908885002136, "learning_rate": 0.0004685170140411691, "loss": 1.8731, "step": 3739 }, { "epoch": 0.1826171875, "grad_norm": 0.3326561748981476, "learning_rate": 0.00046849914175073305, "loss": 1.895, "step": 3740 }, { "epoch": 0.182666015625, "grad_norm": 0.347911536693573, "learning_rate": 0.0004684812647706899, "loss": 1.9084, "step": 3741 }, { "epoch": 0.18271484375, "grad_norm": 0.28639164566993713, "learning_rate": 0.0004684633831014728, "loss": 1.944, "step": 3742 }, { "epoch": 0.182763671875, "grad_norm": 0.3701295852661133, "learning_rate": 0.0004684454967435152, "loss": 1.9553, "step": 3743 }, { "epoch": 0.1828125, "grad_norm": 0.36405161023139954, "learning_rate": 0.00046842760569725065, "loss": 1.8807, "step": 3744 }, { "epoch": 0.182861328125, "grad_norm": 0.25677189230918884, "learning_rate": 0.0004684097099631126, "loss": 1.8421, "step": 3745 }, { "epoch": 0.18291015625, "grad_norm": 0.25635120272636414, "learning_rate": 0.00046839180954153485, "loss": 1.9238, "step": 3746 }, { "epoch": 0.182958984375, "grad_norm": 0.27468767762184143, "learning_rate": 0.0004683739044329513, "loss": 1.897, "step": 3747 }, { "epoch": 0.1830078125, "grad_norm": 0.28172382712364197, "learning_rate": 0.00046835599463779573, "loss": 1.9463, "step": 3748 }, { "epoch": 0.183056640625, "grad_norm": 0.25468188524246216, "learning_rate": 0.00046833808015650226, "loss": 1.9071, "step": 3749 }, { "epoch": 0.18310546875, "grad_norm": 0.2877734303474426, "learning_rate": 0.0004683201609895052, "loss": 1.9414, "step": 3750 }, { "epoch": 0.183154296875, "grad_norm": 0.2689729928970337, "learning_rate": 0.00046830223713723864, "loss": 1.9116, "step": 3751 }, { "epoch": 0.183203125, "grad_norm": 0.2610586881637573, "learning_rate": 0.0004682843086001371, "loss": 1.9198, "step": 3752 }, { "epoch": 0.183251953125, "grad_norm": 0.3971535265445709, "learning_rate": 0.00046826637537863514, "loss": 1.9234, "step": 3753 }, { "epoch": 0.18330078125, "grad_norm": 0.41798898577690125, "learning_rate": 0.00046824843747316717, "loss": 1.9238, "step": 3754 }, { "epoch": 0.183349609375, "grad_norm": 0.3898483216762543, "learning_rate": 0.00046823049488416813, "loss": 1.926, "step": 3755 }, { "epoch": 0.1833984375, "grad_norm": 0.4087633192539215, "learning_rate": 0.0004682125476120728, "loss": 1.9062, "step": 3756 }, { "epoch": 0.183447265625, "grad_norm": 0.3588685095310211, "learning_rate": 0.0004681945956573161, "loss": 1.9234, "step": 3757 }, { "epoch": 0.18349609375, "grad_norm": 0.34401869773864746, "learning_rate": 0.00046817663902033323, "loss": 1.8959, "step": 3758 }, { "epoch": 0.183544921875, "grad_norm": 0.3598618805408478, "learning_rate": 0.0004681586777015593, "loss": 1.879, "step": 3759 }, { "epoch": 0.18359375, "grad_norm": 0.3699378967285156, "learning_rate": 0.00046814071170142964, "loss": 1.8915, "step": 3760 }, { "epoch": 0.183642578125, "grad_norm": 0.2694892883300781, "learning_rate": 0.0004681227410203797, "loss": 1.9268, "step": 3761 }, { "epoch": 0.18369140625, "grad_norm": 0.3652883768081665, "learning_rate": 0.0004681047656588449, "loss": 1.9302, "step": 3762 }, { "epoch": 0.183740234375, "grad_norm": 0.34781190752983093, "learning_rate": 0.000468086785617261, "loss": 1.9503, "step": 3763 }, { "epoch": 0.1837890625, "grad_norm": 0.24136298894882202, "learning_rate": 0.00046806880089606375, "loss": 1.9132, "step": 3764 }, { "epoch": 0.183837890625, "grad_norm": 0.3361181318759918, "learning_rate": 0.00046805081149568897, "loss": 1.8976, "step": 3765 }, { "epoch": 0.18388671875, "grad_norm": 0.323021799325943, "learning_rate": 0.00046803281741657264, "loss": 1.9208, "step": 3766 }, { "epoch": 0.183935546875, "grad_norm": 0.2841741442680359, "learning_rate": 0.000468014818659151, "loss": 1.8839, "step": 3767 }, { "epoch": 0.183984375, "grad_norm": 0.3263798654079437, "learning_rate": 0.00046799681522386013, "loss": 1.99, "step": 3768 }, { "epoch": 0.184033203125, "grad_norm": 0.31135231256484985, "learning_rate": 0.00046797880711113636, "loss": 1.9197, "step": 3769 }, { "epoch": 0.18408203125, "grad_norm": 0.3291581869125366, "learning_rate": 0.0004679607943214162, "loss": 1.9016, "step": 3770 }, { "epoch": 0.184130859375, "grad_norm": 0.309817910194397, "learning_rate": 0.0004679427768551361, "loss": 1.8842, "step": 3771 }, { "epoch": 0.1841796875, "grad_norm": 0.34784090518951416, "learning_rate": 0.00046792475471273283, "loss": 1.8823, "step": 3772 }, { "epoch": 0.184228515625, "grad_norm": 0.3109668493270874, "learning_rate": 0.0004679067278946432, "loss": 1.8941, "step": 3773 }, { "epoch": 0.18427734375, "grad_norm": 0.3019338548183441, "learning_rate": 0.0004678886964013039, "loss": 1.8935, "step": 3774 }, { "epoch": 0.184326171875, "grad_norm": 0.30487188696861267, "learning_rate": 0.0004678706602331522, "loss": 1.9199, "step": 3775 }, { "epoch": 0.184375, "grad_norm": 0.33873340487480164, "learning_rate": 0.000467852619390625, "loss": 1.9165, "step": 3776 }, { "epoch": 0.184423828125, "grad_norm": 0.3016669750213623, "learning_rate": 0.0004678345738741597, "loss": 1.9151, "step": 3777 }, { "epoch": 0.18447265625, "grad_norm": 0.3388601243495941, "learning_rate": 0.0004678165236841936, "loss": 1.8935, "step": 3778 }, { "epoch": 0.184521484375, "grad_norm": 0.2838214933872223, "learning_rate": 0.0004677984688211641, "loss": 1.8917, "step": 3779 }, { "epoch": 0.1845703125, "grad_norm": 0.2778550386428833, "learning_rate": 0.0004677804092855088, "loss": 1.8634, "step": 3780 }, { "epoch": 0.184619140625, "grad_norm": 0.29345884919166565, "learning_rate": 0.00046776234507766543, "loss": 1.8791, "step": 3781 }, { "epoch": 0.18466796875, "grad_norm": 0.24534501135349274, "learning_rate": 0.00046774427619807176, "loss": 1.8955, "step": 3782 }, { "epoch": 0.184716796875, "grad_norm": 0.2576075792312622, "learning_rate": 0.0004677262026471657, "loss": 1.909, "step": 3783 }, { "epoch": 0.184765625, "grad_norm": 0.2915971279144287, "learning_rate": 0.0004677081244253853, "loss": 1.9291, "step": 3784 }, { "epoch": 0.184814453125, "grad_norm": 0.23058699071407318, "learning_rate": 0.0004676900415331687, "loss": 1.8952, "step": 3785 }, { "epoch": 0.18486328125, "grad_norm": 0.27666112780570984, "learning_rate": 0.00046767195397095406, "loss": 1.9036, "step": 3786 }, { "epoch": 0.184912109375, "grad_norm": 0.2863306999206543, "learning_rate": 0.00046765386173917996, "loss": 1.9068, "step": 3787 }, { "epoch": 0.1849609375, "grad_norm": 0.2225174456834793, "learning_rate": 0.0004676357648382846, "loss": 1.9403, "step": 3788 }, { "epoch": 0.185009765625, "grad_norm": 0.3006160259246826, "learning_rate": 0.0004676176632687068, "loss": 1.9488, "step": 3789 }, { "epoch": 0.18505859375, "grad_norm": 0.2638607621192932, "learning_rate": 0.0004675995570308852, "loss": 1.8831, "step": 3790 }, { "epoch": 0.185107421875, "grad_norm": 0.2983646094799042, "learning_rate": 0.0004675814461252585, "loss": 1.9003, "step": 3791 }, { "epoch": 0.18515625, "grad_norm": 0.35530713200569153, "learning_rate": 0.0004675633305522658, "loss": 1.9161, "step": 3792 }, { "epoch": 0.185205078125, "grad_norm": 0.3004351556301117, "learning_rate": 0.0004675452103123461, "loss": 1.9188, "step": 3793 }, { "epoch": 0.18525390625, "grad_norm": 0.32782530784606934, "learning_rate": 0.00046752708540593853, "loss": 1.9144, "step": 3794 }, { "epoch": 0.185302734375, "grad_norm": 0.3144591748714447, "learning_rate": 0.00046750895583348235, "loss": 1.9144, "step": 3795 }, { "epoch": 0.1853515625, "grad_norm": 0.27192044258117676, "learning_rate": 0.00046749082159541696, "loss": 1.9291, "step": 3796 }, { "epoch": 0.185400390625, "grad_norm": 0.2891320288181305, "learning_rate": 0.0004674726826921819, "loss": 1.9158, "step": 3797 }, { "epoch": 0.18544921875, "grad_norm": 0.3050483167171478, "learning_rate": 0.0004674545391242167, "loss": 1.9124, "step": 3798 }, { "epoch": 0.185498046875, "grad_norm": 0.3303212821483612, "learning_rate": 0.0004674363908919612, "loss": 1.9158, "step": 3799 }, { "epoch": 0.185546875, "grad_norm": 0.3054337501525879, "learning_rate": 0.0004674182379958551, "loss": 1.8874, "step": 3800 }, { "epoch": 0.185595703125, "grad_norm": 0.2676522433757782, "learning_rate": 0.0004674000804363384, "loss": 1.9208, "step": 3801 }, { "epoch": 0.18564453125, "grad_norm": 0.2987672984600067, "learning_rate": 0.0004673819182138512, "loss": 1.8769, "step": 3802 }, { "epoch": 0.185693359375, "grad_norm": 0.27921363711357117, "learning_rate": 0.00046736375132883367, "loss": 1.8772, "step": 3803 }, { "epoch": 0.1857421875, "grad_norm": 0.34283268451690674, "learning_rate": 0.0004673455797817261, "loss": 1.9028, "step": 3804 }, { "epoch": 0.185791015625, "grad_norm": 0.3068019449710846, "learning_rate": 0.00046732740357296885, "loss": 1.8977, "step": 3805 }, { "epoch": 0.18583984375, "grad_norm": 0.3122882843017578, "learning_rate": 0.0004673092227030024, "loss": 1.9223, "step": 3806 }, { "epoch": 0.185888671875, "grad_norm": 0.3410957157611847, "learning_rate": 0.0004672910371722675, "loss": 1.8863, "step": 3807 }, { "epoch": 0.1859375, "grad_norm": 0.30542558431625366, "learning_rate": 0.0004672728469812049, "loss": 1.8734, "step": 3808 }, { "epoch": 0.185986328125, "grad_norm": 0.2777632772922516, "learning_rate": 0.0004672546521302553, "loss": 1.9162, "step": 3809 }, { "epoch": 0.18603515625, "grad_norm": 0.2638660669326782, "learning_rate": 0.0004672364526198598, "loss": 1.8826, "step": 3810 }, { "epoch": 0.186083984375, "grad_norm": 0.273781955242157, "learning_rate": 0.00046721824845045935, "loss": 1.9162, "step": 3811 }, { "epoch": 0.1861328125, "grad_norm": 0.2478475570678711, "learning_rate": 0.00046720003962249525, "loss": 1.8896, "step": 3812 }, { "epoch": 0.186181640625, "grad_norm": 0.28760528564453125, "learning_rate": 0.0004671818261364088, "loss": 1.9229, "step": 3813 }, { "epoch": 0.18623046875, "grad_norm": 0.3054419755935669, "learning_rate": 0.00046716360799264135, "loss": 1.8761, "step": 3814 }, { "epoch": 0.186279296875, "grad_norm": 0.24509602785110474, "learning_rate": 0.00046714538519163453, "loss": 1.9053, "step": 3815 }, { "epoch": 0.186328125, "grad_norm": 0.2717202305793762, "learning_rate": 0.00046712715773382986, "loss": 1.9062, "step": 3816 }, { "epoch": 0.186376953125, "grad_norm": 0.3724818229675293, "learning_rate": 0.0004671089256196693, "loss": 1.9261, "step": 3817 }, { "epoch": 0.18642578125, "grad_norm": 0.3240197002887726, "learning_rate": 0.0004670906888495945, "loss": 1.856, "step": 3818 }, { "epoch": 0.186474609375, "grad_norm": 0.2875368595123291, "learning_rate": 0.00046707244742404754, "loss": 1.8996, "step": 3819 }, { "epoch": 0.1865234375, "grad_norm": 0.34434109926223755, "learning_rate": 0.0004670542013434705, "loss": 1.9155, "step": 3820 }, { "epoch": 0.186572265625, "grad_norm": 0.28417718410491943, "learning_rate": 0.00046703595060830564, "loss": 1.8967, "step": 3821 }, { "epoch": 0.18662109375, "grad_norm": 0.26513728499412537, "learning_rate": 0.0004670176952189952, "loss": 1.8985, "step": 3822 }, { "epoch": 0.186669921875, "grad_norm": 0.34070107340812683, "learning_rate": 0.00046699943517598163, "loss": 1.8989, "step": 3823 }, { "epoch": 0.18671875, "grad_norm": 0.43746069073677063, "learning_rate": 0.0004669811704797075, "loss": 1.9065, "step": 3824 }, { "epoch": 0.186767578125, "grad_norm": 0.2981468737125397, "learning_rate": 0.0004669629011306155, "loss": 1.8899, "step": 3825 }, { "epoch": 0.18681640625, "grad_norm": 0.2717648446559906, "learning_rate": 0.00046694462712914837, "loss": 1.8965, "step": 3826 }, { "epoch": 0.186865234375, "grad_norm": 0.3477334976196289, "learning_rate": 0.000466926348475749, "loss": 1.8912, "step": 3827 }, { "epoch": 0.1869140625, "grad_norm": 0.20098644495010376, "learning_rate": 0.0004669080651708604, "loss": 1.8818, "step": 3828 }, { "epoch": 0.186962890625, "grad_norm": 0.32733654975891113, "learning_rate": 0.0004668897772149256, "loss": 1.8873, "step": 3829 }, { "epoch": 0.18701171875, "grad_norm": 0.25871041417121887, "learning_rate": 0.0004668714846083879, "loss": 1.8991, "step": 3830 }, { "epoch": 0.187060546875, "grad_norm": 0.27938148379325867, "learning_rate": 0.0004668531873516906, "loss": 1.8772, "step": 3831 }, { "epoch": 0.187109375, "grad_norm": 0.2918061912059784, "learning_rate": 0.0004668348854452772, "loss": 1.9101, "step": 3832 }, { "epoch": 0.187158203125, "grad_norm": 0.2518866956233978, "learning_rate": 0.00046681657888959126, "loss": 1.8757, "step": 3833 }, { "epoch": 0.18720703125, "grad_norm": 0.3488340675830841, "learning_rate": 0.00046679826768507646, "loss": 1.8791, "step": 3834 }, { "epoch": 0.187255859375, "grad_norm": 0.31605252623558044, "learning_rate": 0.00046677995183217646, "loss": 1.8936, "step": 3835 }, { "epoch": 0.1873046875, "grad_norm": 0.35099872946739197, "learning_rate": 0.0004667616313313353, "loss": 1.8919, "step": 3836 }, { "epoch": 0.187353515625, "grad_norm": 0.30577197670936584, "learning_rate": 0.000466743306182997, "loss": 1.9056, "step": 3837 }, { "epoch": 0.18740234375, "grad_norm": 0.3678314685821533, "learning_rate": 0.00046672497638760555, "loss": 1.907, "step": 3838 }, { "epoch": 0.187451171875, "grad_norm": 0.2705540359020233, "learning_rate": 0.0004667066419456053, "loss": 1.8979, "step": 3839 }, { "epoch": 0.1875, "grad_norm": 0.3354896605014801, "learning_rate": 0.0004666883028574405, "loss": 1.9028, "step": 3840 }, { "epoch": 0.187548828125, "grad_norm": 0.30399125814437866, "learning_rate": 0.0004666699591235558, "loss": 1.8838, "step": 3841 }, { "epoch": 0.18759765625, "grad_norm": 0.32809174060821533, "learning_rate": 0.0004666516107443956, "loss": 1.9122, "step": 3842 }, { "epoch": 0.187646484375, "grad_norm": 0.3790653347969055, "learning_rate": 0.0004666332577204046, "loss": 1.8736, "step": 3843 }, { "epoch": 0.1876953125, "grad_norm": 0.2817552983760834, "learning_rate": 0.0004666149000520277, "loss": 1.8851, "step": 3844 }, { "epoch": 0.187744140625, "grad_norm": 0.22787104547023773, "learning_rate": 0.00046659653773970975, "loss": 1.9156, "step": 3845 }, { "epoch": 0.18779296875, "grad_norm": 0.250474750995636, "learning_rate": 0.0004665781707838957, "loss": 1.8751, "step": 3846 }, { "epoch": 0.187841796875, "grad_norm": 0.2765655815601349, "learning_rate": 0.00046655979918503083, "loss": 1.9022, "step": 3847 }, { "epoch": 0.187890625, "grad_norm": 0.2696087062358856, "learning_rate": 0.00046654142294356033, "loss": 1.9158, "step": 3848 }, { "epoch": 0.187939453125, "grad_norm": 0.2808753550052643, "learning_rate": 0.0004665230420599296, "loss": 1.9102, "step": 3849 }, { "epoch": 0.18798828125, "grad_norm": 0.2610603868961334, "learning_rate": 0.00046650465653458404, "loss": 1.8798, "step": 3850 }, { "epoch": 0.188037109375, "grad_norm": 0.2911938428878784, "learning_rate": 0.00046648626636796925, "loss": 1.8874, "step": 3851 }, { "epoch": 0.1880859375, "grad_norm": 0.26807478070259094, "learning_rate": 0.00046646787156053097, "loss": 1.8957, "step": 3852 }, { "epoch": 0.188134765625, "grad_norm": 0.22286823391914368, "learning_rate": 0.00046644947211271504, "loss": 1.8984, "step": 3853 }, { "epoch": 0.18818359375, "grad_norm": 0.34766289591789246, "learning_rate": 0.0004664310680249673, "loss": 1.9134, "step": 3854 }, { "epoch": 0.188232421875, "grad_norm": 0.2797510325908661, "learning_rate": 0.00046641265929773384, "loss": 1.8869, "step": 3855 }, { "epoch": 0.18828125, "grad_norm": 0.35708290338516235, "learning_rate": 0.0004663942459314608, "loss": 1.9208, "step": 3856 }, { "epoch": 0.188330078125, "grad_norm": 0.3727058172225952, "learning_rate": 0.0004663758279265945, "loss": 1.8715, "step": 3857 }, { "epoch": 0.18837890625, "grad_norm": 0.2966122031211853, "learning_rate": 0.00046635740528358125, "loss": 1.9203, "step": 3858 }, { "epoch": 0.188427734375, "grad_norm": 0.31435084342956543, "learning_rate": 0.00046633897800286745, "loss": 1.8902, "step": 3859 }, { "epoch": 0.1884765625, "grad_norm": 0.28563621640205383, "learning_rate": 0.0004663205460848999, "loss": 1.8778, "step": 3860 }, { "epoch": 0.188525390625, "grad_norm": 0.2741090953350067, "learning_rate": 0.00046630210953012513, "loss": 1.9226, "step": 3861 }, { "epoch": 0.18857421875, "grad_norm": 0.254782497882843, "learning_rate": 0.0004662836683389901, "loss": 1.8661, "step": 3862 }, { "epoch": 0.188623046875, "grad_norm": 0.301479697227478, "learning_rate": 0.0004662652225119417, "loss": 1.9109, "step": 3863 }, { "epoch": 0.188671875, "grad_norm": 0.21807150542736053, "learning_rate": 0.0004662467720494269, "loss": 1.9161, "step": 3864 }, { "epoch": 0.188720703125, "grad_norm": 0.28720808029174805, "learning_rate": 0.000466228316951893, "loss": 1.8564, "step": 3865 }, { "epoch": 0.18876953125, "grad_norm": 0.2857954800128937, "learning_rate": 0.0004662098572197872, "loss": 1.8968, "step": 3866 }, { "epoch": 0.188818359375, "grad_norm": 0.28313061594963074, "learning_rate": 0.0004661913928535569, "loss": 1.8658, "step": 3867 }, { "epoch": 0.1888671875, "grad_norm": 0.24764738976955414, "learning_rate": 0.00046617292385364956, "loss": 1.8997, "step": 3868 }, { "epoch": 0.188916015625, "grad_norm": 0.26851174235343933, "learning_rate": 0.00046615445022051287, "loss": 1.8745, "step": 3869 }, { "epoch": 0.18896484375, "grad_norm": 0.2832317352294922, "learning_rate": 0.00046613597195459445, "loss": 1.8705, "step": 3870 }, { "epoch": 0.189013671875, "grad_norm": 0.23898358643054962, "learning_rate": 0.00046611748905634213, "loss": 1.8943, "step": 3871 }, { "epoch": 0.1890625, "grad_norm": 0.31072571873664856, "learning_rate": 0.0004660990015262041, "loss": 1.8979, "step": 3872 }, { "epoch": 0.189111328125, "grad_norm": 0.319807231426239, "learning_rate": 0.0004660805093646281, "loss": 1.8907, "step": 3873 }, { "epoch": 0.18916015625, "grad_norm": 0.21355973184108734, "learning_rate": 0.00046606201257206253, "loss": 1.8894, "step": 3874 }, { "epoch": 0.189208984375, "grad_norm": 0.3097524642944336, "learning_rate": 0.00046604351114895554, "loss": 1.8922, "step": 3875 }, { "epoch": 0.1892578125, "grad_norm": 0.2826918959617615, "learning_rate": 0.0004660250050957556, "loss": 1.8994, "step": 3876 }, { "epoch": 0.189306640625, "grad_norm": 0.4766705632209778, "learning_rate": 0.00046600649441291115, "loss": 1.8947, "step": 3877 }, { "epoch": 0.18935546875, "grad_norm": 0.5777364373207092, "learning_rate": 0.00046598797910087086, "loss": 1.9101, "step": 3878 }, { "epoch": 0.189404296875, "grad_norm": 0.33857765793800354, "learning_rate": 0.00046596945916008354, "loss": 1.8928, "step": 3879 }, { "epoch": 0.189453125, "grad_norm": 0.3176157772541046, "learning_rate": 0.00046595093459099793, "loss": 1.9119, "step": 3880 }, { "epoch": 0.189501953125, "grad_norm": 0.39822259545326233, "learning_rate": 0.00046593240539406295, "loss": 1.8847, "step": 3881 }, { "epoch": 0.18955078125, "grad_norm": 0.3659169673919678, "learning_rate": 0.0004659138715697278, "loss": 1.8934, "step": 3882 }, { "epoch": 0.189599609375, "grad_norm": 0.2978299856185913, "learning_rate": 0.0004658953331184415, "loss": 1.9035, "step": 3883 }, { "epoch": 0.1896484375, "grad_norm": 0.3013443946838379, "learning_rate": 0.0004658767900406535, "loss": 1.9046, "step": 3884 }, { "epoch": 0.189697265625, "grad_norm": 0.24639655649662018, "learning_rate": 0.00046585824233681314, "loss": 1.8786, "step": 3885 }, { "epoch": 0.18974609375, "grad_norm": 0.27966079115867615, "learning_rate": 0.00046583969000737, "loss": 1.8782, "step": 3886 }, { "epoch": 0.189794921875, "grad_norm": 0.24485518038272858, "learning_rate": 0.0004658211330527735, "loss": 1.9004, "step": 3887 }, { "epoch": 0.18984375, "grad_norm": 0.2691141366958618, "learning_rate": 0.00046580257147347366, "loss": 1.8867, "step": 3888 }, { "epoch": 0.189892578125, "grad_norm": 0.24652846157550812, "learning_rate": 0.00046578400526992015, "loss": 1.8758, "step": 3889 }, { "epoch": 0.18994140625, "grad_norm": 0.25510191917419434, "learning_rate": 0.00046576543444256307, "loss": 1.8954, "step": 3890 }, { "epoch": 0.189990234375, "grad_norm": 0.2600933909416199, "learning_rate": 0.0004657468589918524, "loss": 1.8871, "step": 3891 }, { "epoch": 0.1900390625, "grad_norm": 0.22986997663974762, "learning_rate": 0.00046572827891823833, "loss": 1.912, "step": 3892 }, { "epoch": 0.190087890625, "grad_norm": 0.2692010998725891, "learning_rate": 0.00046570969422217125, "loss": 1.8888, "step": 3893 }, { "epoch": 0.19013671875, "grad_norm": 0.2517731487751007, "learning_rate": 0.0004656911049041014, "loss": 1.9076, "step": 3894 }, { "epoch": 0.190185546875, "grad_norm": 0.30432382225990295, "learning_rate": 0.0004656725109644795, "loss": 1.8783, "step": 3895 }, { "epoch": 0.190234375, "grad_norm": 0.2785419225692749, "learning_rate": 0.00046565391240375607, "loss": 1.8707, "step": 3896 }, { "epoch": 0.190283203125, "grad_norm": 0.2921440601348877, "learning_rate": 0.000465635309222382, "loss": 1.8706, "step": 3897 }, { "epoch": 0.19033203125, "grad_norm": 0.3034391403198242, "learning_rate": 0.000465616701420808, "loss": 1.887, "step": 3898 }, { "epoch": 0.190380859375, "grad_norm": 0.23017553985118866, "learning_rate": 0.0004655980889994851, "loss": 1.8894, "step": 3899 }, { "epoch": 0.1904296875, "grad_norm": 0.30625107884407043, "learning_rate": 0.00046557947195886433, "loss": 1.9046, "step": 3900 }, { "epoch": 0.190478515625, "grad_norm": 0.2561984658241272, "learning_rate": 0.000465560850299397, "loss": 1.8934, "step": 3901 }, { "epoch": 0.19052734375, "grad_norm": 0.2100095897912979, "learning_rate": 0.0004655422240215344, "loss": 1.8858, "step": 3902 }, { "epoch": 0.190576171875, "grad_norm": 0.2748933732509613, "learning_rate": 0.0004655235931257279, "loss": 1.9031, "step": 3903 }, { "epoch": 0.190625, "grad_norm": 0.2978511452674866, "learning_rate": 0.00046550495761242907, "loss": 1.897, "step": 3904 }, { "epoch": 0.190673828125, "grad_norm": 0.27544650435447693, "learning_rate": 0.0004654863174820895, "loss": 1.8757, "step": 3905 }, { "epoch": 0.19072265625, "grad_norm": 0.27412426471710205, "learning_rate": 0.000465467672735161, "loss": 1.8908, "step": 3906 }, { "epoch": 0.190771484375, "grad_norm": 0.24418264627456665, "learning_rate": 0.00046544902337209543, "loss": 1.8996, "step": 3907 }, { "epoch": 0.1908203125, "grad_norm": 0.28628867864608765, "learning_rate": 0.00046543036939334476, "loss": 1.9003, "step": 3908 }, { "epoch": 0.190869140625, "grad_norm": 0.2596432566642761, "learning_rate": 0.0004654117107993611, "loss": 1.8998, "step": 3909 }, { "epoch": 0.19091796875, "grad_norm": 0.2499133050441742, "learning_rate": 0.0004653930475905967, "loss": 1.9235, "step": 3910 }, { "epoch": 0.190966796875, "grad_norm": 0.27362340688705444, "learning_rate": 0.00046537437976750386, "loss": 1.913, "step": 3911 }, { "epoch": 0.191015625, "grad_norm": 0.29166021943092346, "learning_rate": 0.0004653557073305349, "loss": 1.9085, "step": 3912 }, { "epoch": 0.191064453125, "grad_norm": 0.31313246488571167, "learning_rate": 0.00046533703028014245, "loss": 1.9069, "step": 3913 }, { "epoch": 0.19111328125, "grad_norm": 0.2600175738334656, "learning_rate": 0.0004653183486167792, "loss": 1.8656, "step": 3914 }, { "epoch": 0.191162109375, "grad_norm": 0.262892484664917, "learning_rate": 0.0004652996623408978, "loss": 1.8901, "step": 3915 }, { "epoch": 0.1912109375, "grad_norm": 0.22986897826194763, "learning_rate": 0.0004652809714529512, "loss": 1.8963, "step": 3916 }, { "epoch": 0.191259765625, "grad_norm": 0.2628546357154846, "learning_rate": 0.0004652622759533925, "loss": 1.8949, "step": 3917 }, { "epoch": 0.19130859375, "grad_norm": 0.2870689332485199, "learning_rate": 0.0004652435758426746, "loss": 1.8945, "step": 3918 }, { "epoch": 0.191357421875, "grad_norm": 0.3190333843231201, "learning_rate": 0.00046522487112125085, "loss": 1.924, "step": 3919 }, { "epoch": 0.19140625, "grad_norm": 0.3145574927330017, "learning_rate": 0.00046520616178957454, "loss": 1.8872, "step": 3920 }, { "epoch": 0.191455078125, "grad_norm": 0.26294076442718506, "learning_rate": 0.000465187447848099, "loss": 1.9054, "step": 3921 }, { "epoch": 0.19150390625, "grad_norm": 0.3963048756122589, "learning_rate": 0.00046516872929727786, "loss": 1.8757, "step": 3922 }, { "epoch": 0.191552734375, "grad_norm": 0.3123570680618286, "learning_rate": 0.00046515000613756485, "loss": 1.9185, "step": 3923 }, { "epoch": 0.1916015625, "grad_norm": 0.28585365414619446, "learning_rate": 0.0004651312783694137, "loss": 1.8647, "step": 3924 }, { "epoch": 0.191650390625, "grad_norm": 0.33265751600265503, "learning_rate": 0.0004651125459932782, "loss": 1.9066, "step": 3925 }, { "epoch": 0.19169921875, "grad_norm": 0.2857452630996704, "learning_rate": 0.0004650938090096125, "loss": 1.8933, "step": 3926 }, { "epoch": 0.191748046875, "grad_norm": 0.32966071367263794, "learning_rate": 0.0004650750674188705, "loss": 1.8789, "step": 3927 }, { "epoch": 0.191796875, "grad_norm": 0.3624846339225769, "learning_rate": 0.0004650563212215066, "loss": 1.9112, "step": 3928 }, { "epoch": 0.191845703125, "grad_norm": 0.2683459520339966, "learning_rate": 0.000465037570417975, "loss": 1.9007, "step": 3929 }, { "epoch": 0.19189453125, "grad_norm": 0.34295621514320374, "learning_rate": 0.00046501881500873036, "loss": 1.8933, "step": 3930 }, { "epoch": 0.191943359375, "grad_norm": 0.3603833317756653, "learning_rate": 0.0004650000549942269, "loss": 1.9084, "step": 3931 }, { "epoch": 0.1919921875, "grad_norm": 0.3482758402824402, "learning_rate": 0.0004649812903749196, "loss": 1.8902, "step": 3932 }, { "epoch": 0.192041015625, "grad_norm": 0.29165855050086975, "learning_rate": 0.000464962521151263, "loss": 1.8722, "step": 3933 }, { "epoch": 0.19208984375, "grad_norm": 0.21898610889911652, "learning_rate": 0.0004649437473237122, "loss": 1.8934, "step": 3934 }, { "epoch": 0.192138671875, "grad_norm": 0.308827668428421, "learning_rate": 0.000464924968892722, "loss": 1.8981, "step": 3935 }, { "epoch": 0.1921875, "grad_norm": 0.29164108633995056, "learning_rate": 0.0004649061858587476, "loss": 1.8811, "step": 3936 }, { "epoch": 0.192236328125, "grad_norm": 0.2278340458869934, "learning_rate": 0.0004648873982222443, "loss": 1.8986, "step": 3937 }, { "epoch": 0.19228515625, "grad_norm": 0.2160012423992157, "learning_rate": 0.0004648686059836672, "loss": 1.8931, "step": 3938 }, { "epoch": 0.192333984375, "grad_norm": 0.22434847056865692, "learning_rate": 0.000464849809143472, "loss": 1.8927, "step": 3939 }, { "epoch": 0.1923828125, "grad_norm": 0.23513616621494293, "learning_rate": 0.0004648310077021141, "loss": 1.8906, "step": 3940 }, { "epoch": 0.192431640625, "grad_norm": 0.2314106822013855, "learning_rate": 0.0004648122016600493, "loss": 1.8996, "step": 3941 }, { "epoch": 0.19248046875, "grad_norm": 0.25575417280197144, "learning_rate": 0.00046479339101773313, "loss": 1.8974, "step": 3942 }, { "epoch": 0.192529296875, "grad_norm": 0.26373422145843506, "learning_rate": 0.00046477457577562183, "loss": 1.8969, "step": 3943 }, { "epoch": 0.192578125, "grad_norm": 0.26129603385925293, "learning_rate": 0.0004647557559341712, "loss": 1.9271, "step": 3944 }, { "epoch": 0.192626953125, "grad_norm": 0.232929527759552, "learning_rate": 0.0004647369314938373, "loss": 1.8911, "step": 3945 }, { "epoch": 0.19267578125, "grad_norm": 0.2746851146221161, "learning_rate": 0.00046471810245507646, "loss": 1.9066, "step": 3946 }, { "epoch": 0.192724609375, "grad_norm": 0.2972863018512726, "learning_rate": 0.00046469926881834503, "loss": 1.9019, "step": 3947 }, { "epoch": 0.1927734375, "grad_norm": 0.24249601364135742, "learning_rate": 0.00046468043058409933, "loss": 1.8822, "step": 3948 }, { "epoch": 0.192822265625, "grad_norm": 0.29998260736465454, "learning_rate": 0.00046466158775279603, "loss": 1.9014, "step": 3949 }, { "epoch": 0.19287109375, "grad_norm": 0.2792489230632782, "learning_rate": 0.0004646427403248919, "loss": 1.8948, "step": 3950 }, { "epoch": 0.192919921875, "grad_norm": 0.3675711750984192, "learning_rate": 0.00046462388830084347, "loss": 1.9224, "step": 3951 }, { "epoch": 0.19296875, "grad_norm": 0.3517060875892639, "learning_rate": 0.0004646050316811078, "loss": 1.8721, "step": 3952 }, { "epoch": 0.193017578125, "grad_norm": 0.3125743567943573, "learning_rate": 0.0004645861704661418, "loss": 1.8858, "step": 3953 }, { "epoch": 0.19306640625, "grad_norm": 0.3114846646785736, "learning_rate": 0.00046456730465640274, "loss": 1.8949, "step": 3954 }, { "epoch": 0.193115234375, "grad_norm": 0.32072779536247253, "learning_rate": 0.0004645484342523478, "loss": 1.9078, "step": 3955 }, { "epoch": 0.1931640625, "grad_norm": 0.3206098973751068, "learning_rate": 0.00046452955925443414, "loss": 1.8952, "step": 3956 }, { "epoch": 0.193212890625, "grad_norm": 0.2944374978542328, "learning_rate": 0.00046451067966311936, "loss": 1.9114, "step": 3957 }, { "epoch": 0.19326171875, "grad_norm": 0.2804959714412689, "learning_rate": 0.00046449179547886104, "loss": 1.8954, "step": 3958 }, { "epoch": 0.193310546875, "grad_norm": 0.2577219009399414, "learning_rate": 0.0004644729067021168, "loss": 1.9134, "step": 3959 }, { "epoch": 0.193359375, "grad_norm": 0.30654487013816833, "learning_rate": 0.00046445401333334457, "loss": 1.8859, "step": 3960 }, { "epoch": 0.193408203125, "grad_norm": 0.30313846468925476, "learning_rate": 0.00046443511537300197, "loss": 1.8975, "step": 3961 }, { "epoch": 0.19345703125, "grad_norm": 0.24052667617797852, "learning_rate": 0.00046441621282154727, "loss": 1.8769, "step": 3962 }, { "epoch": 0.193505859375, "grad_norm": 0.23928560316562653, "learning_rate": 0.00046439730567943843, "loss": 1.8846, "step": 3963 }, { "epoch": 0.1935546875, "grad_norm": 0.2536441385746002, "learning_rate": 0.00046437839394713364, "loss": 1.8851, "step": 3964 }, { "epoch": 0.193603515625, "grad_norm": 0.2973223626613617, "learning_rate": 0.00046435947762509146, "loss": 1.8912, "step": 3965 }, { "epoch": 0.19365234375, "grad_norm": 0.24683135747909546, "learning_rate": 0.0004643405567137702, "loss": 1.901, "step": 3966 }, { "epoch": 0.193701171875, "grad_norm": 0.2885264754295349, "learning_rate": 0.00046432163121362836, "loss": 1.912, "step": 3967 }, { "epoch": 0.19375, "grad_norm": 0.3181796371936798, "learning_rate": 0.00046430270112512474, "loss": 1.8985, "step": 3968 }, { "epoch": 0.193798828125, "grad_norm": 0.3298410475254059, "learning_rate": 0.000464283766448718, "loss": 1.902, "step": 3969 }, { "epoch": 0.19384765625, "grad_norm": 0.28943192958831787, "learning_rate": 0.00046426482718486725, "loss": 1.8829, "step": 3970 }, { "epoch": 0.193896484375, "grad_norm": 0.2801717519760132, "learning_rate": 0.0004642458833340312, "loss": 1.8869, "step": 3971 }, { "epoch": 0.1939453125, "grad_norm": 0.27867835760116577, "learning_rate": 0.00046422693489666923, "loss": 1.8792, "step": 3972 }, { "epoch": 0.193994140625, "grad_norm": 0.2989110052585602, "learning_rate": 0.00046420798187324044, "loss": 1.9064, "step": 3973 }, { "epoch": 0.19404296875, "grad_norm": 0.26757243275642395, "learning_rate": 0.0004641890242642042, "loss": 1.8745, "step": 3974 }, { "epoch": 0.194091796875, "grad_norm": 0.2562633454799652, "learning_rate": 0.00046417006207001994, "loss": 1.8907, "step": 3975 }, { "epoch": 0.194140625, "grad_norm": 0.28731483221054077, "learning_rate": 0.0004641510952911473, "loss": 1.9184, "step": 3976 }, { "epoch": 0.194189453125, "grad_norm": 0.25341248512268066, "learning_rate": 0.0004641321239280459, "loss": 1.8829, "step": 3977 }, { "epoch": 0.19423828125, "grad_norm": 0.26560983061790466, "learning_rate": 0.00046411314798117543, "loss": 1.9081, "step": 3978 }, { "epoch": 0.194287109375, "grad_norm": 0.2598762810230255, "learning_rate": 0.000464094167450996, "loss": 1.886, "step": 3979 }, { "epoch": 0.1943359375, "grad_norm": 0.28805068135261536, "learning_rate": 0.00046407518233796747, "loss": 1.9006, "step": 3980 }, { "epoch": 0.194384765625, "grad_norm": 0.2563306987285614, "learning_rate": 0.00046405619264255, "loss": 1.8819, "step": 3981 }, { "epoch": 0.19443359375, "grad_norm": 0.3413543999195099, "learning_rate": 0.0004640371983652038, "loss": 1.8912, "step": 3982 }, { "epoch": 0.194482421875, "grad_norm": 0.34976550936698914, "learning_rate": 0.00046401819950638923, "loss": 1.8837, "step": 3983 }, { "epoch": 0.19453125, "grad_norm": 0.35827529430389404, "learning_rate": 0.0004639991960665668, "loss": 1.8935, "step": 3984 }, { "epoch": 0.194580078125, "grad_norm": 0.4048691987991333, "learning_rate": 0.0004639801880461969, "loss": 1.9116, "step": 3985 }, { "epoch": 0.19462890625, "grad_norm": 0.29111066460609436, "learning_rate": 0.00046396117544574033, "loss": 1.8993, "step": 3986 }, { "epoch": 0.194677734375, "grad_norm": 0.28403738141059875, "learning_rate": 0.000463942158265658, "loss": 1.8937, "step": 3987 }, { "epoch": 0.1947265625, "grad_norm": 0.31561458110809326, "learning_rate": 0.00046392313650641054, "loss": 1.9119, "step": 3988 }, { "epoch": 0.194775390625, "grad_norm": 0.2483844757080078, "learning_rate": 0.0004639041101684591, "loss": 1.8836, "step": 3989 }, { "epoch": 0.19482421875, "grad_norm": 0.27205127477645874, "learning_rate": 0.0004638850792522649, "loss": 1.8691, "step": 3990 }, { "epoch": 0.194873046875, "grad_norm": 0.34166452288627625, "learning_rate": 0.0004638660437582889, "loss": 1.903, "step": 3991 }, { "epoch": 0.194921875, "grad_norm": 0.31492990255355835, "learning_rate": 0.0004638470036869927, "loss": 1.8699, "step": 3992 }, { "epoch": 0.194970703125, "grad_norm": 0.3267298936843872, "learning_rate": 0.0004638279590388376, "loss": 1.8526, "step": 3993 }, { "epoch": 0.19501953125, "grad_norm": 0.29949912428855896, "learning_rate": 0.00046380890981428523, "loss": 1.8992, "step": 3994 }, { "epoch": 0.195068359375, "grad_norm": 0.31369268894195557, "learning_rate": 0.0004637898560137972, "loss": 1.8812, "step": 3995 }, { "epoch": 0.1951171875, "grad_norm": 0.3317028284072876, "learning_rate": 0.00046377079763783535, "loss": 1.8685, "step": 3996 }, { "epoch": 0.195166015625, "grad_norm": 0.3319056034088135, "learning_rate": 0.0004637517346868616, "loss": 1.8764, "step": 3997 }, { "epoch": 0.19521484375, "grad_norm": 0.2533596158027649, "learning_rate": 0.0004637326671613379, "loss": 1.8702, "step": 3998 }, { "epoch": 0.195263671875, "grad_norm": 0.2824985682964325, "learning_rate": 0.0004637135950617264, "loss": 1.9118, "step": 3999 }, { "epoch": 0.1953125, "grad_norm": 0.3168768882751465, "learning_rate": 0.0004636945183884893, "loss": 1.9138, "step": 4000 }, { "epoch": 0.195361328125, "grad_norm": 0.30968552827835083, "learning_rate": 0.000463675437142089, "loss": 1.8977, "step": 4001 }, { "epoch": 0.19541015625, "grad_norm": 0.34231284260749817, "learning_rate": 0.00046365635132298785, "loss": 1.8957, "step": 4002 }, { "epoch": 0.195458984375, "grad_norm": 0.3158310055732727, "learning_rate": 0.0004636372609316484, "loss": 1.8768, "step": 4003 }, { "epoch": 0.1955078125, "grad_norm": 0.31002065539360046, "learning_rate": 0.0004636181659685335, "loss": 1.9011, "step": 4004 }, { "epoch": 0.195556640625, "grad_norm": 0.25688236951828003, "learning_rate": 0.0004635990664341057, "loss": 1.884, "step": 4005 }, { "epoch": 0.19560546875, "grad_norm": 0.2558180093765259, "learning_rate": 0.00046357996232882805, "loss": 1.9067, "step": 4006 }, { "epoch": 0.195654296875, "grad_norm": 0.3374403715133667, "learning_rate": 0.0004635608536531635, "loss": 1.8923, "step": 4007 }, { "epoch": 0.195703125, "grad_norm": 0.3248530328273773, "learning_rate": 0.00046354174040757524, "loss": 1.8887, "step": 4008 }, { "epoch": 0.195751953125, "grad_norm": 0.28641319274902344, "learning_rate": 0.0004635226225925264, "loss": 1.9017, "step": 4009 }, { "epoch": 0.19580078125, "grad_norm": 0.2732515037059784, "learning_rate": 0.00046350350020848036, "loss": 1.867, "step": 4010 }, { "epoch": 0.195849609375, "grad_norm": 0.24674515426158905, "learning_rate": 0.0004634843732559005, "loss": 1.9045, "step": 4011 }, { "epoch": 0.1958984375, "grad_norm": 0.2506139874458313, "learning_rate": 0.0004634652417352504, "loss": 1.8926, "step": 4012 }, { "epoch": 0.195947265625, "grad_norm": 0.28578218817710876, "learning_rate": 0.0004634461056469938, "loss": 1.8756, "step": 4013 }, { "epoch": 0.19599609375, "grad_norm": 0.37113073468208313, "learning_rate": 0.0004634269649915944, "loss": 1.8743, "step": 4014 }, { "epoch": 0.196044921875, "grad_norm": 0.31151077151298523, "learning_rate": 0.0004634078197695162, "loss": 1.8602, "step": 4015 }, { "epoch": 0.19609375, "grad_norm": 0.2558768391609192, "learning_rate": 0.0004633886699812231, "loss": 1.892, "step": 4016 }, { "epoch": 0.196142578125, "grad_norm": 0.2641793191432953, "learning_rate": 0.00046336951562717923, "loss": 1.9006, "step": 4017 }, { "epoch": 0.19619140625, "grad_norm": 0.24599066376686096, "learning_rate": 0.00046335035670784877, "loss": 1.8748, "step": 4018 }, { "epoch": 0.196240234375, "grad_norm": 0.2849861681461334, "learning_rate": 0.00046333119322369614, "loss": 1.8908, "step": 4019 }, { "epoch": 0.1962890625, "grad_norm": 0.26617974042892456, "learning_rate": 0.00046331202517518573, "loss": 1.8987, "step": 4020 }, { "epoch": 0.196337890625, "grad_norm": 0.2612764537334442, "learning_rate": 0.00046329285256278206, "loss": 1.8793, "step": 4021 }, { "epoch": 0.19638671875, "grad_norm": 0.30484744906425476, "learning_rate": 0.00046327367538694987, "loss": 1.8731, "step": 4022 }, { "epoch": 0.196435546875, "grad_norm": 0.296265184879303, "learning_rate": 0.0004632544936481539, "loss": 1.8813, "step": 4023 }, { "epoch": 0.196484375, "grad_norm": 0.2808147966861725, "learning_rate": 0.00046323530734685906, "loss": 1.8857, "step": 4024 }, { "epoch": 0.196533203125, "grad_norm": 0.25932154059410095, "learning_rate": 0.00046321611648353025, "loss": 1.8885, "step": 4025 }, { "epoch": 0.19658203125, "grad_norm": 0.2525120973587036, "learning_rate": 0.0004631969210586327, "loss": 1.896, "step": 4026 }, { "epoch": 0.196630859375, "grad_norm": 0.29576143622398376, "learning_rate": 0.00046317772107263156, "loss": 1.8831, "step": 4027 }, { "epoch": 0.1966796875, "grad_norm": 0.2742979824542999, "learning_rate": 0.00046315851652599214, "loss": 1.9107, "step": 4028 }, { "epoch": 0.196728515625, "grad_norm": 0.3100084960460663, "learning_rate": 0.0004631393074191799, "loss": 1.8947, "step": 4029 }, { "epoch": 0.19677734375, "grad_norm": 0.39042940735816956, "learning_rate": 0.00046312009375266055, "loss": 1.8812, "step": 4030 }, { "epoch": 0.196826171875, "grad_norm": 0.31383222341537476, "learning_rate": 0.00046310087552689944, "loss": 1.9074, "step": 4031 }, { "epoch": 0.196875, "grad_norm": 0.22830381989479065, "learning_rate": 0.0004630816527423625, "loss": 1.8752, "step": 4032 }, { "epoch": 0.196923828125, "grad_norm": 0.2881971001625061, "learning_rate": 0.00046306242539951567, "loss": 1.8874, "step": 4033 }, { "epoch": 0.19697265625, "grad_norm": 0.3515084683895111, "learning_rate": 0.0004630431934988248, "loss": 1.912, "step": 4034 }, { "epoch": 0.197021484375, "grad_norm": 0.2326962947845459, "learning_rate": 0.0004630239570407561, "loss": 1.9038, "step": 4035 }, { "epoch": 0.1970703125, "grad_norm": 0.2607722580432892, "learning_rate": 0.00046300471602577577, "loss": 1.8709, "step": 4036 }, { "epoch": 0.197119140625, "grad_norm": 0.29884013533592224, "learning_rate": 0.0004629854704543501, "loss": 1.8711, "step": 4037 }, { "epoch": 0.19716796875, "grad_norm": 0.3247200548648834, "learning_rate": 0.0004629662203269455, "loss": 1.8842, "step": 4038 }, { "epoch": 0.197216796875, "grad_norm": 0.3874967694282532, "learning_rate": 0.0004629469656440285, "loss": 1.8815, "step": 4039 }, { "epoch": 0.197265625, "grad_norm": 0.3379119038581848, "learning_rate": 0.00046292770640606593, "loss": 1.9117, "step": 4040 }, { "epoch": 0.197314453125, "grad_norm": 0.3200187683105469, "learning_rate": 0.0004629084426135243, "loss": 1.8835, "step": 4041 }, { "epoch": 0.19736328125, "grad_norm": 0.2891557812690735, "learning_rate": 0.00046288917426687054, "loss": 1.8875, "step": 4042 }, { "epoch": 0.197412109375, "grad_norm": 0.26418039202690125, "learning_rate": 0.00046286990136657185, "loss": 1.8761, "step": 4043 }, { "epoch": 0.1974609375, "grad_norm": 0.3606051504611969, "learning_rate": 0.000462850623913095, "loss": 1.8835, "step": 4044 }, { "epoch": 0.197509765625, "grad_norm": 0.28662124276161194, "learning_rate": 0.0004628313419069075, "loss": 1.8923, "step": 4045 }, { "epoch": 0.19755859375, "grad_norm": 0.2756370007991791, "learning_rate": 0.00046281205534847645, "loss": 1.8756, "step": 4046 }, { "epoch": 0.197607421875, "grad_norm": 0.329831063747406, "learning_rate": 0.0004627927642382694, "loss": 1.8723, "step": 4047 }, { "epoch": 0.19765625, "grad_norm": 0.23502859473228455, "learning_rate": 0.0004627734685767538, "loss": 1.8933, "step": 4048 }, { "epoch": 0.197705078125, "grad_norm": 0.26736485958099365, "learning_rate": 0.00046275416836439736, "loss": 1.893, "step": 4049 }, { "epoch": 0.19775390625, "grad_norm": 0.31085655093193054, "learning_rate": 0.00046273486360166784, "loss": 1.8912, "step": 4050 }, { "epoch": 0.197802734375, "grad_norm": 0.3058494031429291, "learning_rate": 0.000462715554289033, "loss": 1.8786, "step": 4051 }, { "epoch": 0.1978515625, "grad_norm": 0.43467578291893005, "learning_rate": 0.00046269624042696096, "loss": 1.8461, "step": 4052 }, { "epoch": 0.197900390625, "grad_norm": 0.42631733417510986, "learning_rate": 0.0004626769220159197, "loss": 1.8985, "step": 4053 }, { "epoch": 0.19794921875, "grad_norm": 0.39115971326828003, "learning_rate": 0.0004626575990563775, "loss": 1.8626, "step": 4054 }, { "epoch": 0.197998046875, "grad_norm": 0.29656562209129333, "learning_rate": 0.0004626382715488026, "loss": 1.8924, "step": 4055 }, { "epoch": 0.198046875, "grad_norm": 0.2617323100566864, "learning_rate": 0.0004626189394936634, "loss": 1.8901, "step": 4056 }, { "epoch": 0.198095703125, "grad_norm": 0.39188095927238464, "learning_rate": 0.0004625996028914285, "loss": 1.8821, "step": 4057 }, { "epoch": 0.19814453125, "grad_norm": 0.30059319734573364, "learning_rate": 0.0004625802617425665, "loss": 1.9041, "step": 4058 }, { "epoch": 0.198193359375, "grad_norm": 0.2680511176586151, "learning_rate": 0.0004625609160475462, "loss": 1.9064, "step": 4059 }, { "epoch": 0.1982421875, "grad_norm": 0.25266435742378235, "learning_rate": 0.00046254156580683635, "loss": 1.8804, "step": 4060 }, { "epoch": 0.198291015625, "grad_norm": 0.3404048681259155, "learning_rate": 0.00046252221102090603, "loss": 1.8826, "step": 4061 }, { "epoch": 0.19833984375, "grad_norm": 0.2661249339580536, "learning_rate": 0.00046250285169022426, "loss": 1.8918, "step": 4062 }, { "epoch": 0.198388671875, "grad_norm": 0.3054879307746887, "learning_rate": 0.0004624834878152602, "loss": 1.8874, "step": 4063 }, { "epoch": 0.1984375, "grad_norm": 0.32771188020706177, "learning_rate": 0.0004624641193964833, "loss": 1.8865, "step": 4064 }, { "epoch": 0.198486328125, "grad_norm": 0.3019038140773773, "learning_rate": 0.0004624447464343628, "loss": 1.8985, "step": 4065 }, { "epoch": 0.19853515625, "grad_norm": 0.29689809679985046, "learning_rate": 0.0004624253689293682, "loss": 1.8868, "step": 4066 }, { "epoch": 0.198583984375, "grad_norm": 0.2600552439689636, "learning_rate": 0.0004624059868819693, "loss": 1.8997, "step": 4067 }, { "epoch": 0.1986328125, "grad_norm": 0.27924486994743347, "learning_rate": 0.00046238660029263576, "loss": 1.9084, "step": 4068 }, { "epoch": 0.198681640625, "grad_norm": 0.26859501004219055, "learning_rate": 0.00046236720916183736, "loss": 1.8925, "step": 4069 }, { "epoch": 0.19873046875, "grad_norm": 0.2606300711631775, "learning_rate": 0.0004623478134900441, "loss": 1.8774, "step": 4070 }, { "epoch": 0.198779296875, "grad_norm": 0.26932159066200256, "learning_rate": 0.0004623284132777262, "loss": 1.9116, "step": 4071 }, { "epoch": 0.198828125, "grad_norm": 0.2214171439409256, "learning_rate": 0.00046230900852535354, "loss": 1.8857, "step": 4072 }, { "epoch": 0.198876953125, "grad_norm": 0.2677132785320282, "learning_rate": 0.00046228959923339663, "loss": 1.8781, "step": 4073 }, { "epoch": 0.19892578125, "grad_norm": 0.27381742000579834, "learning_rate": 0.00046227018540232585, "loss": 1.8815, "step": 4074 }, { "epoch": 0.198974609375, "grad_norm": 0.2328842133283615, "learning_rate": 0.0004622507670326117, "loss": 1.9076, "step": 4075 }, { "epoch": 0.1990234375, "grad_norm": 0.2578282654285431, "learning_rate": 0.00046223134412472466, "loss": 1.8696, "step": 4076 }, { "epoch": 0.199072265625, "grad_norm": 0.27211982011795044, "learning_rate": 0.00046221191667913567, "loss": 1.8975, "step": 4077 }, { "epoch": 0.19912109375, "grad_norm": 0.2812022268772125, "learning_rate": 0.00046219248469631547, "loss": 1.8932, "step": 4078 }, { "epoch": 0.199169921875, "grad_norm": 0.2718845307826996, "learning_rate": 0.000462173048176735, "loss": 1.8921, "step": 4079 }, { "epoch": 0.19921875, "grad_norm": 0.26076844334602356, "learning_rate": 0.0004621536071208653, "loss": 1.8833, "step": 4080 }, { "epoch": 0.199267578125, "grad_norm": 0.2221163958311081, "learning_rate": 0.00046213416152917757, "loss": 1.8807, "step": 4081 }, { "epoch": 0.19931640625, "grad_norm": 0.2247871458530426, "learning_rate": 0.00046211471140214315, "loss": 1.8864, "step": 4082 }, { "epoch": 0.199365234375, "grad_norm": 0.2817566692829132, "learning_rate": 0.00046209525674023333, "loss": 1.8858, "step": 4083 }, { "epoch": 0.1994140625, "grad_norm": 0.28298214077949524, "learning_rate": 0.0004620757975439197, "loss": 1.9069, "step": 4084 }, { "epoch": 0.199462890625, "grad_norm": 0.25989022850990295, "learning_rate": 0.0004620563338136738, "loss": 1.8788, "step": 4085 }, { "epoch": 0.19951171875, "grad_norm": 0.24249149858951569, "learning_rate": 0.00046203686554996734, "loss": 1.8994, "step": 4086 }, { "epoch": 0.199560546875, "grad_norm": 0.2247573435306549, "learning_rate": 0.0004620173927532722, "loss": 1.9055, "step": 4087 }, { "epoch": 0.199609375, "grad_norm": 0.22096718847751617, "learning_rate": 0.0004619979154240603, "loss": 1.8682, "step": 4088 }, { "epoch": 0.199658203125, "grad_norm": 0.23237518966197968, "learning_rate": 0.00046197843356280365, "loss": 1.8771, "step": 4089 }, { "epoch": 0.19970703125, "grad_norm": 0.2308422178030014, "learning_rate": 0.00046195894716997456, "loss": 1.8695, "step": 4090 }, { "epoch": 0.199755859375, "grad_norm": 0.2379147708415985, "learning_rate": 0.0004619394562460451, "loss": 1.9092, "step": 4091 }, { "epoch": 0.1998046875, "grad_norm": 0.2952437698841095, "learning_rate": 0.0004619199607914877, "loss": 1.8997, "step": 4092 }, { "epoch": 0.199853515625, "grad_norm": 0.27585238218307495, "learning_rate": 0.00046190046080677496, "loss": 1.8684, "step": 4093 }, { "epoch": 0.19990234375, "grad_norm": 0.2165825068950653, "learning_rate": 0.00046188095629237934, "loss": 1.8947, "step": 4094 }, { "epoch": 0.199951171875, "grad_norm": 0.2502520978450775, "learning_rate": 0.00046186144724877365, "loss": 1.8896, "step": 4095 }, { "epoch": 0.2, "grad_norm": 0.2604858875274658, "learning_rate": 0.00046184193367643055, "loss": 1.8922, "step": 4096 }, { "epoch": 0.200048828125, "grad_norm": 0.28255319595336914, "learning_rate": 0.0004618224155758233, "loss": 1.9045, "step": 4097 }, { "epoch": 0.20009765625, "grad_norm": 0.277905136346817, "learning_rate": 0.0004618028929474245, "loss": 1.8954, "step": 4098 }, { "epoch": 0.200146484375, "grad_norm": 0.2751050889492035, "learning_rate": 0.0004617833657917076, "loss": 1.8883, "step": 4099 }, { "epoch": 0.2001953125, "grad_norm": 0.2632584273815155, "learning_rate": 0.00046176383410914576, "loss": 1.8923, "step": 4100 }, { "epoch": 0.200244140625, "grad_norm": 0.2972228229045868, "learning_rate": 0.0004617442979002124, "loss": 1.9132, "step": 4101 }, { "epoch": 0.20029296875, "grad_norm": 0.3713725507259369, "learning_rate": 0.0004617247571653809, "loss": 1.9052, "step": 4102 }, { "epoch": 0.200341796875, "grad_norm": 0.32207897305488586, "learning_rate": 0.00046170521190512493, "loss": 1.8899, "step": 4103 }, { "epoch": 0.200390625, "grad_norm": 0.2689990699291229, "learning_rate": 0.00046168566211991807, "loss": 1.8906, "step": 4104 }, { "epoch": 0.200439453125, "grad_norm": 0.3047080934047699, "learning_rate": 0.0004616661078102343, "loss": 1.8853, "step": 4105 }, { "epoch": 0.20048828125, "grad_norm": 0.27914726734161377, "learning_rate": 0.00046164654897654745, "loss": 1.892, "step": 4106 }, { "epoch": 0.200537109375, "grad_norm": 0.3041841685771942, "learning_rate": 0.00046162698561933146, "loss": 1.896, "step": 4107 }, { "epoch": 0.2005859375, "grad_norm": 0.28174591064453125, "learning_rate": 0.00046160741773906063, "loss": 1.8657, "step": 4108 }, { "epoch": 0.200634765625, "grad_norm": 0.3785693347454071, "learning_rate": 0.00046158784533620903, "loss": 1.9091, "step": 4109 }, { "epoch": 0.20068359375, "grad_norm": 0.34459567070007324, "learning_rate": 0.00046156826841125116, "loss": 1.8923, "step": 4110 }, { "epoch": 0.200732421875, "grad_norm": 0.2853797674179077, "learning_rate": 0.00046154868696466136, "loss": 1.8803, "step": 4111 }, { "epoch": 0.20078125, "grad_norm": 0.29554998874664307, "learning_rate": 0.00046152910099691425, "loss": 1.9107, "step": 4112 }, { "epoch": 0.200830078125, "grad_norm": 0.25606024265289307, "learning_rate": 0.00046150951050848453, "loss": 1.9066, "step": 4113 }, { "epoch": 0.20087890625, "grad_norm": 0.25476765632629395, "learning_rate": 0.00046148991549984703, "loss": 1.883, "step": 4114 }, { "epoch": 0.200927734375, "grad_norm": 0.2848855257034302, "learning_rate": 0.0004614703159714766, "loss": 1.8524, "step": 4115 }, { "epoch": 0.2009765625, "grad_norm": 0.299157053232193, "learning_rate": 0.00046145071192384824, "loss": 1.8923, "step": 4116 }, { "epoch": 0.201025390625, "grad_norm": 0.34171009063720703, "learning_rate": 0.000461431103357437, "loss": 1.8653, "step": 4117 }, { "epoch": 0.20107421875, "grad_norm": 0.31078675389289856, "learning_rate": 0.0004614114902727183, "loss": 1.8957, "step": 4118 }, { "epoch": 0.201123046875, "grad_norm": 0.27347278594970703, "learning_rate": 0.0004613918726701674, "loss": 1.8981, "step": 4119 }, { "epoch": 0.201171875, "grad_norm": 0.23375701904296875, "learning_rate": 0.0004613722505502596, "loss": 1.8896, "step": 4120 }, { "epoch": 0.201220703125, "grad_norm": 0.2915455996990204, "learning_rate": 0.0004613526239134707, "loss": 1.8833, "step": 4121 }, { "epoch": 0.20126953125, "grad_norm": 0.2560201585292816, "learning_rate": 0.0004613329927602762, "loss": 1.9093, "step": 4122 }, { "epoch": 0.201318359375, "grad_norm": 0.2605549097061157, "learning_rate": 0.0004613133570911519, "loss": 1.8858, "step": 4123 }, { "epoch": 0.2013671875, "grad_norm": 0.3032921254634857, "learning_rate": 0.0004612937169065737, "loss": 1.9027, "step": 4124 }, { "epoch": 0.201416015625, "grad_norm": 0.27709606289863586, "learning_rate": 0.00046127407220701756, "loss": 1.9019, "step": 4125 }, { "epoch": 0.20146484375, "grad_norm": 0.2702173590660095, "learning_rate": 0.0004612544229929597, "loss": 1.885, "step": 4126 }, { "epoch": 0.201513671875, "grad_norm": 0.37778565287590027, "learning_rate": 0.0004612347692648763, "loss": 1.8661, "step": 4127 }, { "epoch": 0.2015625, "grad_norm": 0.3482252359390259, "learning_rate": 0.00046121511102324356, "loss": 1.8773, "step": 4128 }, { "epoch": 0.201611328125, "grad_norm": 0.27459418773651123, "learning_rate": 0.0004611954482685381, "loss": 1.8636, "step": 4129 }, { "epoch": 0.20166015625, "grad_norm": 0.298545241355896, "learning_rate": 0.00046117578100123626, "loss": 1.8775, "step": 4130 }, { "epoch": 0.201708984375, "grad_norm": 0.3319898247718811, "learning_rate": 0.00046115610922181486, "loss": 1.9181, "step": 4131 }, { "epoch": 0.2017578125, "grad_norm": 0.2782059907913208, "learning_rate": 0.0004611364329307505, "loss": 1.8908, "step": 4132 }, { "epoch": 0.201806640625, "grad_norm": 0.27185118198394775, "learning_rate": 0.00046111675212852024, "loss": 1.9087, "step": 4133 }, { "epoch": 0.20185546875, "grad_norm": 0.34880349040031433, "learning_rate": 0.000461097066815601, "loss": 1.9045, "step": 4134 }, { "epoch": 0.201904296875, "grad_norm": 0.28377118706703186, "learning_rate": 0.00046107737699246974, "loss": 1.9085, "step": 4135 }, { "epoch": 0.201953125, "grad_norm": 0.24266165494918823, "learning_rate": 0.00046105768265960383, "loss": 1.875, "step": 4136 }, { "epoch": 0.202001953125, "grad_norm": 0.2911596894264221, "learning_rate": 0.0004610379838174804, "loss": 1.9007, "step": 4137 }, { "epoch": 0.20205078125, "grad_norm": 0.2470223754644394, "learning_rate": 0.00046101828046657704, "loss": 1.8896, "step": 4138 }, { "epoch": 0.202099609375, "grad_norm": 0.2977962791919708, "learning_rate": 0.0004609985726073713, "loss": 1.914, "step": 4139 }, { "epoch": 0.2021484375, "grad_norm": 0.2719085216522217, "learning_rate": 0.0004609788602403406, "loss": 1.8973, "step": 4140 }, { "epoch": 0.202197265625, "grad_norm": 0.3311716318130493, "learning_rate": 0.00046095914336596286, "loss": 1.896, "step": 4141 }, { "epoch": 0.20224609375, "grad_norm": 0.3211229145526886, "learning_rate": 0.0004609394219847159, "loss": 1.902, "step": 4142 }, { "epoch": 0.202294921875, "grad_norm": 0.31348371505737305, "learning_rate": 0.00046091969609707767, "loss": 1.8743, "step": 4143 }, { "epoch": 0.20234375, "grad_norm": 0.28774169087409973, "learning_rate": 0.00046089996570352617, "loss": 1.8867, "step": 4144 }, { "epoch": 0.202392578125, "grad_norm": 0.24692760407924652, "learning_rate": 0.00046088023080453964, "loss": 1.8716, "step": 4145 }, { "epoch": 0.20244140625, "grad_norm": 0.2539650797843933, "learning_rate": 0.0004608604914005964, "loss": 1.8603, "step": 4146 }, { "epoch": 0.202490234375, "grad_norm": 0.27627110481262207, "learning_rate": 0.00046084074749217494, "loss": 1.894, "step": 4147 }, { "epoch": 0.2025390625, "grad_norm": 0.3694857954978943, "learning_rate": 0.0004608209990797536, "loss": 1.8673, "step": 4148 }, { "epoch": 0.202587890625, "grad_norm": 0.3014836013317108, "learning_rate": 0.0004608012461638109, "loss": 1.8933, "step": 4149 }, { "epoch": 0.20263671875, "grad_norm": 0.305289089679718, "learning_rate": 0.000460781488744826, "loss": 1.9145, "step": 4150 }, { "epoch": 0.202685546875, "grad_norm": 0.3112064003944397, "learning_rate": 0.00046076172682327725, "loss": 1.8824, "step": 4151 }, { "epoch": 0.202734375, "grad_norm": 0.25988471508026123, "learning_rate": 0.00046074196039964395, "loss": 1.8906, "step": 4152 }, { "epoch": 0.202783203125, "grad_norm": 0.3732872009277344, "learning_rate": 0.00046072218947440497, "loss": 1.8982, "step": 4153 }, { "epoch": 0.20283203125, "grad_norm": 0.3357386887073517, "learning_rate": 0.00046070241404803946, "loss": 1.9078, "step": 4154 }, { "epoch": 0.202880859375, "grad_norm": 0.22762395441532135, "learning_rate": 0.0004606826341210268, "loss": 1.8814, "step": 4155 }, { "epoch": 0.2029296875, "grad_norm": 0.2747178077697754, "learning_rate": 0.00046066284969384635, "loss": 1.8815, "step": 4156 }, { "epoch": 0.202978515625, "grad_norm": 0.2379172295331955, "learning_rate": 0.00046064306076697755, "loss": 1.8947, "step": 4157 }, { "epoch": 0.20302734375, "grad_norm": 0.2822434604167938, "learning_rate": 0.0004606232673409, "loss": 1.8934, "step": 4158 }, { "epoch": 0.203076171875, "grad_norm": 0.24954727292060852, "learning_rate": 0.0004606034694160935, "loss": 1.8803, "step": 4159 }, { "epoch": 0.203125, "grad_norm": 0.23953385651111603, "learning_rate": 0.00046058366699303776, "loss": 1.898, "step": 4160 }, { "epoch": 0.203173828125, "grad_norm": 0.2769727408885956, "learning_rate": 0.0004605638600722128, "loss": 1.9012, "step": 4161 }, { "epoch": 0.20322265625, "grad_norm": 0.25461918115615845, "learning_rate": 0.00046054404865409856, "loss": 1.8699, "step": 4162 }, { "epoch": 0.203271484375, "grad_norm": 0.3046160340309143, "learning_rate": 0.0004605242327391753, "loss": 1.8981, "step": 4163 }, { "epoch": 0.2033203125, "grad_norm": 0.27929648756980896, "learning_rate": 0.0004605044123279232, "loss": 1.8923, "step": 4164 }, { "epoch": 0.203369140625, "grad_norm": 0.2578090727329254, "learning_rate": 0.00046048458742082253, "loss": 1.8929, "step": 4165 }, { "epoch": 0.20341796875, "grad_norm": 0.32173144817352295, "learning_rate": 0.000460464758018354, "loss": 1.8978, "step": 4166 }, { "epoch": 0.203466796875, "grad_norm": 0.284454345703125, "learning_rate": 0.000460444924120998, "loss": 1.9003, "step": 4167 }, { "epoch": 0.203515625, "grad_norm": 0.27051836252212524, "learning_rate": 0.00046042508572923527, "loss": 1.8677, "step": 4168 }, { "epoch": 0.203564453125, "grad_norm": 0.30784541368484497, "learning_rate": 0.0004604052428435466, "loss": 1.8911, "step": 4169 }, { "epoch": 0.20361328125, "grad_norm": 0.27316027879714966, "learning_rate": 0.00046038539546441296, "loss": 1.8842, "step": 4170 }, { "epoch": 0.203662109375, "grad_norm": 0.258872389793396, "learning_rate": 0.0004603655435923153, "loss": 1.8563, "step": 4171 }, { "epoch": 0.2037109375, "grad_norm": 0.29440170526504517, "learning_rate": 0.00046034568722773476, "loss": 1.9034, "step": 4172 }, { "epoch": 0.203759765625, "grad_norm": 0.2790099084377289, "learning_rate": 0.0004603258263711526, "loss": 1.8797, "step": 4173 }, { "epoch": 0.20380859375, "grad_norm": 0.40476423501968384, "learning_rate": 0.0004603059610230502, "loss": 1.8872, "step": 4174 }, { "epoch": 0.203857421875, "grad_norm": 0.29684194922447205, "learning_rate": 0.00046028609118390886, "loss": 1.8766, "step": 4175 }, { "epoch": 0.20390625, "grad_norm": 0.3621717095375061, "learning_rate": 0.0004602662168542103, "loss": 1.8869, "step": 4176 }, { "epoch": 0.203955078125, "grad_norm": 0.429462730884552, "learning_rate": 0.00046024633803443615, "loss": 1.9042, "step": 4177 }, { "epoch": 0.20400390625, "grad_norm": 0.4159896969795227, "learning_rate": 0.00046022645472506814, "loss": 1.8918, "step": 4178 }, { "epoch": 0.204052734375, "grad_norm": 0.28905221819877625, "learning_rate": 0.0004602065669265882, "loss": 1.8797, "step": 4179 }, { "epoch": 0.2041015625, "grad_norm": 0.31127646565437317, "learning_rate": 0.00046018667463947836, "loss": 1.9074, "step": 4180 }, { "epoch": 0.204150390625, "grad_norm": 0.2404821217060089, "learning_rate": 0.0004601667778642206, "loss": 1.9057, "step": 4181 }, { "epoch": 0.20419921875, "grad_norm": 0.260139524936676, "learning_rate": 0.0004601468766012973, "loss": 1.9107, "step": 4182 }, { "epoch": 0.204248046875, "grad_norm": 0.33483102917671204, "learning_rate": 0.0004601269708511906, "loss": 1.8838, "step": 4183 }, { "epoch": 0.204296875, "grad_norm": 0.35519400238990784, "learning_rate": 0.0004601070606143831, "loss": 1.8812, "step": 4184 }, { "epoch": 0.204345703125, "grad_norm": 0.2513984739780426, "learning_rate": 0.0004600871458913573, "loss": 1.9076, "step": 4185 }, { "epoch": 0.20439453125, "grad_norm": 0.3504859507083893, "learning_rate": 0.00046006722668259575, "loss": 1.8941, "step": 4186 }, { "epoch": 0.204443359375, "grad_norm": 0.4238618016242981, "learning_rate": 0.0004600473029885813, "loss": 1.89, "step": 4187 }, { "epoch": 0.2044921875, "grad_norm": 0.43730998039245605, "learning_rate": 0.00046002737480979687, "loss": 1.8812, "step": 4188 }, { "epoch": 0.204541015625, "grad_norm": 0.4444003999233246, "learning_rate": 0.0004600074421467253, "loss": 1.882, "step": 4189 }, { "epoch": 0.20458984375, "grad_norm": 0.28788691759109497, "learning_rate": 0.0004599875049998497, "loss": 1.8724, "step": 4190 }, { "epoch": 0.204638671875, "grad_norm": 0.4027971625328064, "learning_rate": 0.0004599675633696533, "loss": 1.9099, "step": 4191 }, { "epoch": 0.2046875, "grad_norm": 0.4323995113372803, "learning_rate": 0.00045994761725661956, "loss": 1.9117, "step": 4192 }, { "epoch": 0.204736328125, "grad_norm": 0.2445593774318695, "learning_rate": 0.00045992766666123154, "loss": 1.8855, "step": 4193 }, { "epoch": 0.20478515625, "grad_norm": 0.2833675146102905, "learning_rate": 0.000459907711583973, "loss": 1.8804, "step": 4194 }, { "epoch": 0.204833984375, "grad_norm": 0.3588849902153015, "learning_rate": 0.00045988775202532756, "loss": 1.8726, "step": 4195 }, { "epoch": 0.2048828125, "grad_norm": 0.24474726617336273, "learning_rate": 0.0004598677879857789, "loss": 1.8665, "step": 4196 }, { "epoch": 0.204931640625, "grad_norm": 0.2724114656448364, "learning_rate": 0.00045984781946581085, "loss": 1.8764, "step": 4197 }, { "epoch": 0.20498046875, "grad_norm": 0.25769782066345215, "learning_rate": 0.00045982784646590735, "loss": 1.8604, "step": 4198 }, { "epoch": 0.205029296875, "grad_norm": 0.23342518508434296, "learning_rate": 0.0004598078689865526, "loss": 1.8599, "step": 4199 }, { "epoch": 0.205078125, "grad_norm": 0.20983995497226715, "learning_rate": 0.0004597878870282306, "loss": 1.8871, "step": 4200 }, { "epoch": 0.205126953125, "grad_norm": 0.2468387633562088, "learning_rate": 0.00045976790059142574, "loss": 1.8965, "step": 4201 }, { "epoch": 0.20517578125, "grad_norm": 0.255696177482605, "learning_rate": 0.00045974790967662243, "loss": 1.8992, "step": 4202 }, { "epoch": 0.205224609375, "grad_norm": 0.2554589509963989, "learning_rate": 0.00045972791428430506, "loss": 1.8798, "step": 4203 }, { "epoch": 0.2052734375, "grad_norm": 0.2495853453874588, "learning_rate": 0.0004597079144149582, "loss": 1.8722, "step": 4204 }, { "epoch": 0.205322265625, "grad_norm": 0.2504926919937134, "learning_rate": 0.0004596879100690667, "loss": 1.8864, "step": 4205 }, { "epoch": 0.20537109375, "grad_norm": 0.27460145950317383, "learning_rate": 0.0004596679012471153, "loss": 1.8938, "step": 4206 }, { "epoch": 0.205419921875, "grad_norm": 0.23615708947181702, "learning_rate": 0.000459647887949589, "loss": 1.8695, "step": 4207 }, { "epoch": 0.20546875, "grad_norm": 0.2090335339307785, "learning_rate": 0.0004596278701769727, "loss": 1.8765, "step": 4208 }, { "epoch": 0.205517578125, "grad_norm": 0.21829912066459656, "learning_rate": 0.0004596078479297517, "loss": 1.8793, "step": 4209 }, { "epoch": 0.20556640625, "grad_norm": 0.24714700877666473, "learning_rate": 0.0004595878212084112, "loss": 1.8833, "step": 4210 }, { "epoch": 0.205615234375, "grad_norm": 0.29537057876586914, "learning_rate": 0.00045956779001343653, "loss": 1.864, "step": 4211 }, { "epoch": 0.2056640625, "grad_norm": 0.251544326543808, "learning_rate": 0.0004595477543453132, "loss": 1.9091, "step": 4212 }, { "epoch": 0.205712890625, "grad_norm": 0.22365032136440277, "learning_rate": 0.0004595277142045268, "loss": 1.8937, "step": 4213 }, { "epoch": 0.20576171875, "grad_norm": 0.26746001839637756, "learning_rate": 0.00045950766959156297, "loss": 1.8878, "step": 4214 }, { "epoch": 0.205810546875, "grad_norm": 0.2698354125022888, "learning_rate": 0.0004594876205069076, "loss": 1.8862, "step": 4215 }, { "epoch": 0.205859375, "grad_norm": 0.21597473323345184, "learning_rate": 0.0004594675669510464, "loss": 1.8935, "step": 4216 }, { "epoch": 0.205908203125, "grad_norm": 0.2648662328720093, "learning_rate": 0.0004594475089244656, "loss": 1.8562, "step": 4217 }, { "epoch": 0.20595703125, "grad_norm": 0.28697237372398376, "learning_rate": 0.00045942744642765124, "loss": 1.8852, "step": 4218 }, { "epoch": 0.206005859375, "grad_norm": 0.25023895502090454, "learning_rate": 0.0004594073794610895, "loss": 1.877, "step": 4219 }, { "epoch": 0.2060546875, "grad_norm": 0.3560786545276642, "learning_rate": 0.00045938730802526687, "loss": 1.8829, "step": 4220 }, { "epoch": 0.206103515625, "grad_norm": 0.34153512120246887, "learning_rate": 0.0004593672321206696, "loss": 1.867, "step": 4221 }, { "epoch": 0.20615234375, "grad_norm": 0.21498139202594757, "learning_rate": 0.0004593471517477844, "loss": 1.8916, "step": 4222 }, { "epoch": 0.206201171875, "grad_norm": 0.27183252573013306, "learning_rate": 0.0004593270669070978, "loss": 1.8819, "step": 4223 }, { "epoch": 0.20625, "grad_norm": 0.34657022356987, "learning_rate": 0.0004593069775990967, "loss": 1.8952, "step": 4224 }, { "epoch": 0.206298828125, "grad_norm": 0.40214577317237854, "learning_rate": 0.00045928688382426794, "loss": 1.8879, "step": 4225 }, { "epoch": 0.20634765625, "grad_norm": 0.38558247685432434, "learning_rate": 0.00045926678558309847, "loss": 1.8798, "step": 4226 }, { "epoch": 0.206396484375, "grad_norm": 0.3294599950313568, "learning_rate": 0.0004592466828760754, "loss": 1.8933, "step": 4227 }, { "epoch": 0.2064453125, "grad_norm": 0.3280373513698578, "learning_rate": 0.000459226575703686, "loss": 1.8994, "step": 4228 }, { "epoch": 0.206494140625, "grad_norm": 0.3719525933265686, "learning_rate": 0.0004592064640664175, "loss": 1.9029, "step": 4229 }, { "epoch": 0.20654296875, "grad_norm": 0.3275463283061981, "learning_rate": 0.0004591863479647573, "loss": 1.8939, "step": 4230 }, { "epoch": 0.206591796875, "grad_norm": 0.3240432143211365, "learning_rate": 0.00045916622739919306, "loss": 1.9306, "step": 4231 }, { "epoch": 0.206640625, "grad_norm": 0.3321658968925476, "learning_rate": 0.00045914610237021236, "loss": 1.8887, "step": 4232 }, { "epoch": 0.206689453125, "grad_norm": 0.3591693043708801, "learning_rate": 0.0004591259728783028, "loss": 1.8961, "step": 4233 }, { "epoch": 0.20673828125, "grad_norm": 0.31772279739379883, "learning_rate": 0.00045910583892395246, "loss": 1.8693, "step": 4234 }, { "epoch": 0.206787109375, "grad_norm": 0.33150166273117065, "learning_rate": 0.00045908570050764926, "loss": 1.906, "step": 4235 }, { "epoch": 0.2068359375, "grad_norm": 0.31677836179733276, "learning_rate": 0.0004590655576298811, "loss": 1.9002, "step": 4236 }, { "epoch": 0.206884765625, "grad_norm": 0.2591873109340668, "learning_rate": 0.00045904541029113635, "loss": 1.8865, "step": 4237 }, { "epoch": 0.20693359375, "grad_norm": 0.27731287479400635, "learning_rate": 0.0004590252584919031, "loss": 1.8741, "step": 4238 }, { "epoch": 0.206982421875, "grad_norm": 0.23856697976589203, "learning_rate": 0.00045900510223267004, "loss": 1.8804, "step": 4239 }, { "epoch": 0.20703125, "grad_norm": 0.24754391610622406, "learning_rate": 0.00045898494151392537, "loss": 1.8583, "step": 4240 }, { "epoch": 0.207080078125, "grad_norm": 0.25319939851760864, "learning_rate": 0.0004589647763361579, "loss": 1.8858, "step": 4241 }, { "epoch": 0.20712890625, "grad_norm": 0.24685019254684448, "learning_rate": 0.0004589446066998563, "loss": 1.8905, "step": 4242 }, { "epoch": 0.207177734375, "grad_norm": 0.26908183097839355, "learning_rate": 0.0004589244326055093, "loss": 1.9071, "step": 4243 }, { "epoch": 0.2072265625, "grad_norm": 0.2945692241191864, "learning_rate": 0.00045890425405360595, "loss": 1.8874, "step": 4244 }, { "epoch": 0.207275390625, "grad_norm": 0.3429662883281708, "learning_rate": 0.00045888407104463524, "loss": 1.9043, "step": 4245 }, { "epoch": 0.20732421875, "grad_norm": 0.3335816264152527, "learning_rate": 0.00045886388357908636, "loss": 1.8793, "step": 4246 }, { "epoch": 0.207373046875, "grad_norm": 0.28185543417930603, "learning_rate": 0.00045884369165744856, "loss": 1.8624, "step": 4247 }, { "epoch": 0.207421875, "grad_norm": 0.26601603627204895, "learning_rate": 0.0004588234952802112, "loss": 1.8927, "step": 4248 }, { "epoch": 0.207470703125, "grad_norm": 0.3451172113418579, "learning_rate": 0.0004588032944478637, "loss": 1.9012, "step": 4249 }, { "epoch": 0.20751953125, "grad_norm": 0.37294653058052063, "learning_rate": 0.00045878308916089567, "loss": 1.8943, "step": 4250 }, { "epoch": 0.207568359375, "grad_norm": 0.3083263635635376, "learning_rate": 0.0004587628794197969, "loss": 1.8973, "step": 4251 }, { "epoch": 0.2076171875, "grad_norm": 0.2825663387775421, "learning_rate": 0.00045874266522505705, "loss": 1.8618, "step": 4252 }, { "epoch": 0.207666015625, "grad_norm": 0.3134966492652893, "learning_rate": 0.0004587224465771661, "loss": 1.9047, "step": 4253 }, { "epoch": 0.20771484375, "grad_norm": 0.28604811429977417, "learning_rate": 0.0004587022234766141, "loss": 1.8899, "step": 4254 }, { "epoch": 0.207763671875, "grad_norm": 0.2744993269443512, "learning_rate": 0.0004586819959238911, "loss": 1.8952, "step": 4255 }, { "epoch": 0.2078125, "grad_norm": 0.2930297553539276, "learning_rate": 0.0004586617639194873, "loss": 1.8925, "step": 4256 }, { "epoch": 0.207861328125, "grad_norm": 0.2511126697063446, "learning_rate": 0.0004586415274638933, "loss": 1.9059, "step": 4257 }, { "epoch": 0.20791015625, "grad_norm": 0.26788270473480225, "learning_rate": 0.00045862128655759914, "loss": 1.8913, "step": 4258 }, { "epoch": 0.207958984375, "grad_norm": 0.30279308557510376, "learning_rate": 0.00045860104120109564, "loss": 1.8836, "step": 4259 }, { "epoch": 0.2080078125, "grad_norm": 0.3125830292701721, "learning_rate": 0.00045858079139487345, "loss": 1.9028, "step": 4260 }, { "epoch": 0.208056640625, "grad_norm": 0.3056882619857788, "learning_rate": 0.00045856053713942327, "loss": 1.8989, "step": 4261 }, { "epoch": 0.20810546875, "grad_norm": 0.2772828936576843, "learning_rate": 0.0004585402784352359, "loss": 1.8976, "step": 4262 }, { "epoch": 0.208154296875, "grad_norm": 0.32415488362312317, "learning_rate": 0.00045852001528280255, "loss": 1.9017, "step": 4263 }, { "epoch": 0.208203125, "grad_norm": 0.3540818393230438, "learning_rate": 0.00045849974768261413, "loss": 1.8698, "step": 4264 }, { "epoch": 0.208251953125, "grad_norm": 0.3128064274787903, "learning_rate": 0.00045847947563516203, "loss": 1.8798, "step": 4265 }, { "epoch": 0.20830078125, "grad_norm": 0.29593855142593384, "learning_rate": 0.0004584591991409373, "loss": 1.8733, "step": 4266 }, { "epoch": 0.208349609375, "grad_norm": 0.25556841492652893, "learning_rate": 0.00045843891820043146, "loss": 1.8791, "step": 4267 }, { "epoch": 0.2083984375, "grad_norm": 0.2817467451095581, "learning_rate": 0.00045841863281413615, "loss": 1.915, "step": 4268 }, { "epoch": 0.208447265625, "grad_norm": 0.3065771758556366, "learning_rate": 0.0004583983429825429, "loss": 1.879, "step": 4269 }, { "epoch": 0.20849609375, "grad_norm": 0.32414761185646057, "learning_rate": 0.0004583780487061435, "loss": 1.8664, "step": 4270 }, { "epoch": 0.208544921875, "grad_norm": 0.35493215918540955, "learning_rate": 0.0004583577499854297, "loss": 1.9123, "step": 4271 }, { "epoch": 0.20859375, "grad_norm": 0.3087448179721832, "learning_rate": 0.0004583374468208935, "loss": 1.9029, "step": 4272 }, { "epoch": 0.208642578125, "grad_norm": 0.25606516003608704, "learning_rate": 0.00045831713921302714, "loss": 1.8896, "step": 4273 }, { "epoch": 0.20869140625, "grad_norm": 0.31420090794563293, "learning_rate": 0.00045829682716232254, "loss": 1.8774, "step": 4274 }, { "epoch": 0.208740234375, "grad_norm": 0.2496248185634613, "learning_rate": 0.000458276510669272, "loss": 1.8948, "step": 4275 }, { "epoch": 0.2087890625, "grad_norm": 0.2380286604166031, "learning_rate": 0.00045825618973436807, "loss": 1.8898, "step": 4276 }, { "epoch": 0.208837890625, "grad_norm": 0.29381105303764343, "learning_rate": 0.00045823586435810303, "loss": 1.8914, "step": 4277 }, { "epoch": 0.20888671875, "grad_norm": 0.28025224804878235, "learning_rate": 0.00045821553454096975, "loss": 1.9019, "step": 4278 }, { "epoch": 0.208935546875, "grad_norm": 0.27610665559768677, "learning_rate": 0.00045819520028346077, "loss": 1.8679, "step": 4279 }, { "epoch": 0.208984375, "grad_norm": 0.2863152027130127, "learning_rate": 0.0004581748615860689, "loss": 1.8705, "step": 4280 }, { "epoch": 0.209033203125, "grad_norm": 0.21415404975414276, "learning_rate": 0.00045815451844928714, "loss": 1.9027, "step": 4281 }, { "epoch": 0.20908203125, "grad_norm": 0.25063738226890564, "learning_rate": 0.00045813417087360846, "loss": 1.9284, "step": 4282 }, { "epoch": 0.209130859375, "grad_norm": 0.24627314507961273, "learning_rate": 0.00045811381885952604, "loss": 1.8746, "step": 4283 }, { "epoch": 0.2091796875, "grad_norm": 0.22083792090415955, "learning_rate": 0.0004580934624075331, "loss": 1.8903, "step": 4284 }, { "epoch": 0.209228515625, "grad_norm": 0.24798999726772308, "learning_rate": 0.00045807310151812293, "loss": 1.868, "step": 4285 }, { "epoch": 0.20927734375, "grad_norm": 0.24409742653369904, "learning_rate": 0.0004580527361917891, "loss": 1.8671, "step": 4286 }, { "epoch": 0.209326171875, "grad_norm": 0.22546321153640747, "learning_rate": 0.0004580323664290252, "loss": 1.8742, "step": 4287 }, { "epoch": 0.209375, "grad_norm": 0.217474564909935, "learning_rate": 0.0004580119922303248, "loss": 1.9078, "step": 4288 }, { "epoch": 0.209423828125, "grad_norm": 0.2727143466472626, "learning_rate": 0.0004579916135961817, "loss": 1.8876, "step": 4289 }, { "epoch": 0.20947265625, "grad_norm": 0.23618867993354797, "learning_rate": 0.0004579712305270899, "loss": 1.9034, "step": 4290 }, { "epoch": 0.209521484375, "grad_norm": 0.22527430951595306, "learning_rate": 0.0004579508430235434, "loss": 1.8642, "step": 4291 }, { "epoch": 0.2095703125, "grad_norm": 0.2630583345890045, "learning_rate": 0.0004579304510860361, "loss": 1.8605, "step": 4292 }, { "epoch": 0.209619140625, "grad_norm": 0.25301802158355713, "learning_rate": 0.0004579100547150624, "loss": 1.8876, "step": 4293 }, { "epoch": 0.20966796875, "grad_norm": 0.24601110816001892, "learning_rate": 0.0004578896539111166, "loss": 1.8887, "step": 4294 }, { "epoch": 0.209716796875, "grad_norm": 0.26627638936042786, "learning_rate": 0.0004578692486746931, "loss": 1.8986, "step": 4295 }, { "epoch": 0.209765625, "grad_norm": 0.2617560029029846, "learning_rate": 0.00045784883900628644, "loss": 1.8788, "step": 4296 }, { "epoch": 0.209814453125, "grad_norm": 0.29307082295417786, "learning_rate": 0.00045782842490639124, "loss": 1.8782, "step": 4297 }, { "epoch": 0.20986328125, "grad_norm": 0.37233656644821167, "learning_rate": 0.0004578080063755023, "loss": 1.9168, "step": 4298 }, { "epoch": 0.209912109375, "grad_norm": 0.36955252289772034, "learning_rate": 0.00045778758341411446, "loss": 1.879, "step": 4299 }, { "epoch": 0.2099609375, "grad_norm": 0.36611464619636536, "learning_rate": 0.00045776715602272267, "loss": 1.8659, "step": 4300 }, { "epoch": 0.210009765625, "grad_norm": 0.35931676626205444, "learning_rate": 0.0004577467242018221, "loss": 1.8812, "step": 4301 }, { "epoch": 0.21005859375, "grad_norm": 0.35290423035621643, "learning_rate": 0.00045772628795190775, "loss": 1.8882, "step": 4302 }, { "epoch": 0.210107421875, "grad_norm": 0.2540489733219147, "learning_rate": 0.0004577058472734751, "loss": 1.907, "step": 4303 }, { "epoch": 0.21015625, "grad_norm": 0.3660503625869751, "learning_rate": 0.00045768540216701935, "loss": 1.9006, "step": 4304 }, { "epoch": 0.210205078125, "grad_norm": 0.39407631754875183, "learning_rate": 0.0004576649526330362, "loss": 1.899, "step": 4305 }, { "epoch": 0.21025390625, "grad_norm": 0.3510664999485016, "learning_rate": 0.00045764449867202105, "loss": 1.8858, "step": 4306 }, { "epoch": 0.210302734375, "grad_norm": 0.3466684818267822, "learning_rate": 0.00045762404028446983, "loss": 1.8427, "step": 4307 }, { "epoch": 0.2103515625, "grad_norm": 0.24084939062595367, "learning_rate": 0.00045760357747087836, "loss": 1.8645, "step": 4308 }, { "epoch": 0.210400390625, "grad_norm": 0.2901983857154846, "learning_rate": 0.0004575831102317423, "loss": 1.8599, "step": 4309 }, { "epoch": 0.21044921875, "grad_norm": 0.2775486707687378, "learning_rate": 0.000457562638567558, "loss": 1.9032, "step": 4310 }, { "epoch": 0.210498046875, "grad_norm": 0.2627560794353485, "learning_rate": 0.0004575421624788215, "loss": 1.8614, "step": 4311 }, { "epoch": 0.210546875, "grad_norm": 0.24315012991428375, "learning_rate": 0.000457521681966029, "loss": 1.8872, "step": 4312 }, { "epoch": 0.210595703125, "grad_norm": 0.27326223254203796, "learning_rate": 0.0004575011970296769, "loss": 1.9087, "step": 4313 }, { "epoch": 0.21064453125, "grad_norm": 0.32447895407676697, "learning_rate": 0.00045748070767026166, "loss": 1.8994, "step": 4314 }, { "epoch": 0.210693359375, "grad_norm": 0.30998390913009644, "learning_rate": 0.0004574602138882799, "loss": 1.9076, "step": 4315 }, { "epoch": 0.2107421875, "grad_norm": 0.2994157075881958, "learning_rate": 0.00045743971568422827, "loss": 1.8903, "step": 4316 }, { "epoch": 0.210791015625, "grad_norm": 0.3331969082355499, "learning_rate": 0.0004574192130586035, "loss": 1.8523, "step": 4317 }, { "epoch": 0.21083984375, "grad_norm": 0.32188716530799866, "learning_rate": 0.0004573987060119026, "loss": 1.8956, "step": 4318 }, { "epoch": 0.210888671875, "grad_norm": 0.4268534481525421, "learning_rate": 0.0004573781945446225, "loss": 1.8999, "step": 4319 }, { "epoch": 0.2109375, "grad_norm": 0.3631746768951416, "learning_rate": 0.0004573576786572603, "loss": 1.8928, "step": 4320 }, { "epoch": 0.210986328125, "grad_norm": 0.26406794786453247, "learning_rate": 0.0004573371583503134, "loss": 1.9099, "step": 4321 }, { "epoch": 0.21103515625, "grad_norm": 0.3591754734516144, "learning_rate": 0.0004573166336242788, "loss": 1.906, "step": 4322 }, { "epoch": 0.211083984375, "grad_norm": 0.34304529428482056, "learning_rate": 0.00045729610447965414, "loss": 1.8625, "step": 4323 }, { "epoch": 0.2111328125, "grad_norm": 0.24350780248641968, "learning_rate": 0.000457275570916937, "loss": 1.8671, "step": 4324 }, { "epoch": 0.211181640625, "grad_norm": 0.36123034358024597, "learning_rate": 0.00045725503293662494, "loss": 1.9015, "step": 4325 }, { "epoch": 0.21123046875, "grad_norm": 0.3448932468891144, "learning_rate": 0.0004572344905392158, "loss": 1.8921, "step": 4326 }, { "epoch": 0.211279296875, "grad_norm": 0.27031514048576355, "learning_rate": 0.00045721394372520724, "loss": 1.9, "step": 4327 }, { "epoch": 0.211328125, "grad_norm": 0.32208624482154846, "learning_rate": 0.00045719339249509746, "loss": 1.8502, "step": 4328 }, { "epoch": 0.211376953125, "grad_norm": 0.2970709800720215, "learning_rate": 0.0004571728368493844, "loss": 1.8829, "step": 4329 }, { "epoch": 0.21142578125, "grad_norm": 0.32492750883102417, "learning_rate": 0.0004571522767885663, "loss": 1.893, "step": 4330 }, { "epoch": 0.211474609375, "grad_norm": 0.2679835259914398, "learning_rate": 0.0004571317123131414, "loss": 1.8558, "step": 4331 }, { "epoch": 0.2115234375, "grad_norm": 0.26839005947113037, "learning_rate": 0.00045711114342360823, "loss": 1.8832, "step": 4332 }, { "epoch": 0.211572265625, "grad_norm": 0.23861519992351532, "learning_rate": 0.0004570905701204651, "loss": 1.8648, "step": 4333 }, { "epoch": 0.21162109375, "grad_norm": 0.26208776235580444, "learning_rate": 0.0004570699924042108, "loss": 1.8688, "step": 4334 }, { "epoch": 0.211669921875, "grad_norm": 0.22502869367599487, "learning_rate": 0.0004570494102753438, "loss": 1.9016, "step": 4335 }, { "epoch": 0.21171875, "grad_norm": 0.281299352645874, "learning_rate": 0.00045702882373436317, "loss": 1.8541, "step": 4336 }, { "epoch": 0.211767578125, "grad_norm": 0.26654186844825745, "learning_rate": 0.0004570082327817678, "loss": 1.8819, "step": 4337 }, { "epoch": 0.21181640625, "grad_norm": 0.23648710548877716, "learning_rate": 0.00045698763741805666, "loss": 1.8827, "step": 4338 }, { "epoch": 0.211865234375, "grad_norm": 0.23478873074054718, "learning_rate": 0.00045696703764372886, "loss": 1.8665, "step": 4339 }, { "epoch": 0.2119140625, "grad_norm": 0.2905430495738983, "learning_rate": 0.0004569464334592838, "loss": 1.8662, "step": 4340 }, { "epoch": 0.211962890625, "grad_norm": 0.2532639801502228, "learning_rate": 0.00045692582486522073, "loss": 1.9125, "step": 4341 }, { "epoch": 0.21201171875, "grad_norm": 0.32860758900642395, "learning_rate": 0.0004569052118620391, "loss": 1.8653, "step": 4342 }, { "epoch": 0.212060546875, "grad_norm": 0.36664044857025146, "learning_rate": 0.00045688459445023856, "loss": 1.871, "step": 4343 }, { "epoch": 0.212109375, "grad_norm": 0.2548082172870636, "learning_rate": 0.00045686397263031863, "loss": 1.8724, "step": 4344 }, { "epoch": 0.212158203125, "grad_norm": 0.29209572076797485, "learning_rate": 0.00045684334640277936, "loss": 1.8778, "step": 4345 }, { "epoch": 0.21220703125, "grad_norm": 0.3697254955768585, "learning_rate": 0.0004568227157681205, "loss": 1.8771, "step": 4346 }, { "epoch": 0.212255859375, "grad_norm": 0.28919658064842224, "learning_rate": 0.000456802080726842, "loss": 1.8797, "step": 4347 }, { "epoch": 0.2123046875, "grad_norm": 0.3067643940448761, "learning_rate": 0.000456781441279444, "loss": 1.8624, "step": 4348 }, { "epoch": 0.212353515625, "grad_norm": 0.24708938598632812, "learning_rate": 0.00045676079742642666, "loss": 1.8898, "step": 4349 }, { "epoch": 0.21240234375, "grad_norm": 0.27193114161491394, "learning_rate": 0.0004567401491682905, "loss": 1.8627, "step": 4350 }, { "epoch": 0.212451171875, "grad_norm": 0.2859501242637634, "learning_rate": 0.0004567194965055358, "loss": 1.917, "step": 4351 }, { "epoch": 0.2125, "grad_norm": 0.32346311211586, "learning_rate": 0.00045669883943866307, "loss": 1.9011, "step": 4352 }, { "epoch": 0.212548828125, "grad_norm": 0.33120331168174744, "learning_rate": 0.00045667817796817293, "loss": 1.8822, "step": 4353 }, { "epoch": 0.21259765625, "grad_norm": 0.29261308908462524, "learning_rate": 0.0004566575120945663, "loss": 1.886, "step": 4354 }, { "epoch": 0.212646484375, "grad_norm": 0.3029157221317291, "learning_rate": 0.00045663684181834394, "loss": 1.8802, "step": 4355 }, { "epoch": 0.2126953125, "grad_norm": 0.29199978709220886, "learning_rate": 0.0004566161671400067, "loss": 1.8689, "step": 4356 }, { "epoch": 0.212744140625, "grad_norm": 0.2372092753648758, "learning_rate": 0.0004565954880600558, "loss": 1.8804, "step": 4357 }, { "epoch": 0.21279296875, "grad_norm": 0.3451406955718994, "learning_rate": 0.0004565748045789923, "loss": 1.9155, "step": 4358 }, { "epoch": 0.212841796875, "grad_norm": 0.33676308393478394, "learning_rate": 0.0004565541166973176, "loss": 1.8871, "step": 4359 }, { "epoch": 0.212890625, "grad_norm": 0.2524230182170868, "learning_rate": 0.0004565334244155329, "loss": 1.9039, "step": 4360 }, { "epoch": 0.212939453125, "grad_norm": 0.33894577622413635, "learning_rate": 0.00045651272773413994, "loss": 1.863, "step": 4361 }, { "epoch": 0.21298828125, "grad_norm": 0.26776814460754395, "learning_rate": 0.00045649202665364014, "loss": 1.9057, "step": 4362 }, { "epoch": 0.213037109375, "grad_norm": 0.24219515919685364, "learning_rate": 0.0004564713211745353, "loss": 1.8656, "step": 4363 }, { "epoch": 0.2130859375, "grad_norm": 0.2959662675857544, "learning_rate": 0.0004564506112973272, "loss": 1.8818, "step": 4364 }, { "epoch": 0.213134765625, "grad_norm": 0.24380680918693542, "learning_rate": 0.0004564298970225177, "loss": 1.8829, "step": 4365 }, { "epoch": 0.21318359375, "grad_norm": 0.264470636844635, "learning_rate": 0.000456409178350609, "loss": 1.8708, "step": 4366 }, { "epoch": 0.213232421875, "grad_norm": 0.3115828037261963, "learning_rate": 0.00045638845528210304, "loss": 1.8797, "step": 4367 }, { "epoch": 0.21328125, "grad_norm": 0.26929548382759094, "learning_rate": 0.0004563677278175021, "loss": 1.9117, "step": 4368 }, { "epoch": 0.213330078125, "grad_norm": 0.24763493239879608, "learning_rate": 0.0004563469959573086, "loss": 1.9039, "step": 4369 }, { "epoch": 0.21337890625, "grad_norm": 0.3030800223350525, "learning_rate": 0.0004563262597020251, "loss": 1.8824, "step": 4370 }, { "epoch": 0.213427734375, "grad_norm": 0.2403951734304428, "learning_rate": 0.00045630551905215385, "loss": 1.9, "step": 4371 }, { "epoch": 0.2134765625, "grad_norm": 0.2636868357658386, "learning_rate": 0.00045628477400819776, "loss": 1.9003, "step": 4372 }, { "epoch": 0.213525390625, "grad_norm": 0.26028943061828613, "learning_rate": 0.0004562640245706595, "loss": 1.8577, "step": 4373 }, { "epoch": 0.21357421875, "grad_norm": 0.23652468621730804, "learning_rate": 0.0004562432707400419, "loss": 1.8421, "step": 4374 }, { "epoch": 0.213623046875, "grad_norm": 0.29626914858818054, "learning_rate": 0.00045622251251684813, "loss": 1.8844, "step": 4375 }, { "epoch": 0.213671875, "grad_norm": 0.3354378044605255, "learning_rate": 0.0004562017499015812, "loss": 1.8864, "step": 4376 }, { "epoch": 0.213720703125, "grad_norm": 0.33752673864364624, "learning_rate": 0.0004561809828947442, "loss": 1.8905, "step": 4377 }, { "epoch": 0.21376953125, "grad_norm": 0.3763105273246765, "learning_rate": 0.00045616021149684055, "loss": 1.8722, "step": 4378 }, { "epoch": 0.213818359375, "grad_norm": 0.33664923906326294, "learning_rate": 0.00045613943570837367, "loss": 1.8917, "step": 4379 }, { "epoch": 0.2138671875, "grad_norm": 0.2810051739215851, "learning_rate": 0.00045611865552984697, "loss": 1.859, "step": 4380 }, { "epoch": 0.213916015625, "grad_norm": 0.2922183573246002, "learning_rate": 0.00045609787096176417, "loss": 1.9017, "step": 4381 }, { "epoch": 0.21396484375, "grad_norm": 0.3080795705318451, "learning_rate": 0.000456077082004629, "loss": 1.8912, "step": 4382 }, { "epoch": 0.214013671875, "grad_norm": 0.3447812795639038, "learning_rate": 0.00045605628865894526, "loss": 1.8982, "step": 4383 }, { "epoch": 0.2140625, "grad_norm": 0.36110740900039673, "learning_rate": 0.00045603549092521696, "loss": 1.8729, "step": 4384 }, { "epoch": 0.214111328125, "grad_norm": 0.30985817313194275, "learning_rate": 0.000456014688803948, "loss": 1.875, "step": 4385 }, { "epoch": 0.21416015625, "grad_norm": 0.24591828882694244, "learning_rate": 0.0004559938822956427, "loss": 1.8835, "step": 4386 }, { "epoch": 0.214208984375, "grad_norm": 0.2638538181781769, "learning_rate": 0.0004559730714008052, "loss": 1.8829, "step": 4387 }, { "epoch": 0.2142578125, "grad_norm": 0.3279835283756256, "learning_rate": 0.00045595225611993995, "loss": 1.8955, "step": 4388 }, { "epoch": 0.214306640625, "grad_norm": 0.3350362777709961, "learning_rate": 0.0004559314364535514, "loss": 1.8861, "step": 4389 }, { "epoch": 0.21435546875, "grad_norm": 0.26503124833106995, "learning_rate": 0.00045591061240214415, "loss": 1.8801, "step": 4390 }, { "epoch": 0.214404296875, "grad_norm": 0.27444297075271606, "learning_rate": 0.00045588978396622284, "loss": 1.9102, "step": 4391 }, { "epoch": 0.214453125, "grad_norm": 0.23734378814697266, "learning_rate": 0.00045586895114629227, "loss": 1.902, "step": 4392 }, { "epoch": 0.214501953125, "grad_norm": 0.2993130683898926, "learning_rate": 0.00045584811394285734, "loss": 1.8748, "step": 4393 }, { "epoch": 0.21455078125, "grad_norm": 0.3267841339111328, "learning_rate": 0.0004558272723564231, "loss": 1.8649, "step": 4394 }, { "epoch": 0.214599609375, "grad_norm": 0.2959243357181549, "learning_rate": 0.0004558064263874947, "loss": 1.875, "step": 4395 }, { "epoch": 0.2146484375, "grad_norm": 0.3379196524620056, "learning_rate": 0.00045578557603657727, "loss": 1.8642, "step": 4396 }, { "epoch": 0.214697265625, "grad_norm": 0.2824620008468628, "learning_rate": 0.00045576472130417616, "loss": 1.911, "step": 4397 }, { "epoch": 0.21474609375, "grad_norm": 0.2820276916027069, "learning_rate": 0.0004557438621907968, "loss": 1.8451, "step": 4398 }, { "epoch": 0.214794921875, "grad_norm": 0.31883877515792847, "learning_rate": 0.0004557229986969447, "loss": 1.9044, "step": 4399 }, { "epoch": 0.21484375, "grad_norm": 0.3103891611099243, "learning_rate": 0.0004557021308231256, "loss": 1.9063, "step": 4400 }, { "epoch": 0.214892578125, "grad_norm": 0.247249037027359, "learning_rate": 0.0004556812585698451, "loss": 1.8715, "step": 4401 }, { "epoch": 0.21494140625, "grad_norm": 0.33292636275291443, "learning_rate": 0.0004556603819376092, "loss": 1.8722, "step": 4402 }, { "epoch": 0.214990234375, "grad_norm": 0.2819299101829529, "learning_rate": 0.0004556395009269238, "loss": 1.865, "step": 4403 }, { "epoch": 0.2150390625, "grad_norm": 0.3335406184196472, "learning_rate": 0.000455618615538295, "loss": 1.8861, "step": 4404 }, { "epoch": 0.215087890625, "grad_norm": 0.2895556092262268, "learning_rate": 0.00045559772577222885, "loss": 1.8796, "step": 4405 }, { "epoch": 0.21513671875, "grad_norm": 0.29875895380973816, "learning_rate": 0.00045557683162923175, "loss": 1.8874, "step": 4406 }, { "epoch": 0.215185546875, "grad_norm": 0.2581641376018524, "learning_rate": 0.00045555593310981013, "loss": 1.888, "step": 4407 }, { "epoch": 0.215234375, "grad_norm": 0.22525957226753235, "learning_rate": 0.0004555350302144703, "loss": 1.8872, "step": 4408 }, { "epoch": 0.215283203125, "grad_norm": 0.3405015766620636, "learning_rate": 0.00045551412294371913, "loss": 1.8763, "step": 4409 }, { "epoch": 0.21533203125, "grad_norm": 0.3958164155483246, "learning_rate": 0.00045549321129806304, "loss": 1.8593, "step": 4410 }, { "epoch": 0.215380859375, "grad_norm": 0.30206218361854553, "learning_rate": 0.00045547229527800897, "loss": 1.8776, "step": 4411 }, { "epoch": 0.2154296875, "grad_norm": 0.2118964046239853, "learning_rate": 0.0004554513748840639, "loss": 1.9012, "step": 4412 }, { "epoch": 0.215478515625, "grad_norm": 0.29854080080986023, "learning_rate": 0.0004554304501167348, "loss": 1.8748, "step": 4413 }, { "epoch": 0.21552734375, "grad_norm": 0.3352142572402954, "learning_rate": 0.0004554095209765288, "loss": 1.8597, "step": 4414 }, { "epoch": 0.215576171875, "grad_norm": 0.2903798818588257, "learning_rate": 0.000455388587463953, "loss": 1.8836, "step": 4415 }, { "epoch": 0.215625, "grad_norm": 0.25822362303733826, "learning_rate": 0.00045536764957951494, "loss": 1.9077, "step": 4416 }, { "epoch": 0.215673828125, "grad_norm": 0.25766274333000183, "learning_rate": 0.000455346707323722, "loss": 1.8809, "step": 4417 }, { "epoch": 0.21572265625, "grad_norm": 0.2680676579475403, "learning_rate": 0.00045532576069708163, "loss": 1.888, "step": 4418 }, { "epoch": 0.215771484375, "grad_norm": 0.25742897391319275, "learning_rate": 0.00045530480970010163, "loss": 1.8693, "step": 4419 }, { "epoch": 0.2158203125, "grad_norm": 0.22171670198440552, "learning_rate": 0.0004552838543332897, "loss": 1.8875, "step": 4420 }, { "epoch": 0.215869140625, "grad_norm": 0.3225015699863434, "learning_rate": 0.00045526289459715385, "loss": 1.8942, "step": 4421 }, { "epoch": 0.21591796875, "grad_norm": 0.36097463965415955, "learning_rate": 0.00045524193049220174, "loss": 1.8913, "step": 4422 }, { "epoch": 0.215966796875, "grad_norm": 0.2911911606788635, "learning_rate": 0.0004552209620189417, "loss": 1.8793, "step": 4423 }, { "epoch": 0.216015625, "grad_norm": 0.26628899574279785, "learning_rate": 0.0004551999891778819, "loss": 1.8809, "step": 4424 }, { "epoch": 0.216064453125, "grad_norm": 0.2916570007801056, "learning_rate": 0.0004551790119695306, "loss": 1.8738, "step": 4425 }, { "epoch": 0.21611328125, "grad_norm": 0.3378573954105377, "learning_rate": 0.00045515803039439614, "loss": 1.8667, "step": 4426 }, { "epoch": 0.216162109375, "grad_norm": 0.3123818039894104, "learning_rate": 0.00045513704445298707, "loss": 1.8862, "step": 4427 }, { "epoch": 0.2162109375, "grad_norm": 0.2321636974811554, "learning_rate": 0.0004551160541458121, "loss": 1.8542, "step": 4428 }, { "epoch": 0.216259765625, "grad_norm": 0.28493207693099976, "learning_rate": 0.0004550950594733797, "loss": 1.8932, "step": 4429 }, { "epoch": 0.21630859375, "grad_norm": 0.2997889518737793, "learning_rate": 0.000455074060436199, "loss": 1.8816, "step": 4430 }, { "epoch": 0.216357421875, "grad_norm": 0.33421891927719116, "learning_rate": 0.0004550530570347787, "loss": 1.8757, "step": 4431 }, { "epoch": 0.21640625, "grad_norm": 0.2567248046398163, "learning_rate": 0.0004550320492696279, "loss": 1.8578, "step": 4432 }, { "epoch": 0.216455078125, "grad_norm": 0.23948349058628082, "learning_rate": 0.00045501103714125574, "loss": 1.8905, "step": 4433 }, { "epoch": 0.21650390625, "grad_norm": 0.3159269094467163, "learning_rate": 0.0004549900206501716, "loss": 1.8848, "step": 4434 }, { "epoch": 0.216552734375, "grad_norm": 0.2605387568473816, "learning_rate": 0.00045496899979688454, "loss": 1.8596, "step": 4435 }, { "epoch": 0.2166015625, "grad_norm": 0.25831374526023865, "learning_rate": 0.00045494797458190425, "loss": 1.8778, "step": 4436 }, { "epoch": 0.216650390625, "grad_norm": 0.2365652173757553, "learning_rate": 0.00045492694500574025, "loss": 1.8894, "step": 4437 }, { "epoch": 0.21669921875, "grad_norm": 0.2433037906885147, "learning_rate": 0.00045490591106890215, "loss": 1.9032, "step": 4438 }, { "epoch": 0.216748046875, "grad_norm": 0.2749379873275757, "learning_rate": 0.00045488487277189975, "loss": 1.867, "step": 4439 }, { "epoch": 0.216796875, "grad_norm": 0.2447052150964737, "learning_rate": 0.000454863830115243, "loss": 1.8789, "step": 4440 }, { "epoch": 0.216845703125, "grad_norm": 0.2343180775642395, "learning_rate": 0.00045484278309944185, "loss": 1.8757, "step": 4441 }, { "epoch": 0.21689453125, "grad_norm": 0.27925440669059753, "learning_rate": 0.00045482173172500627, "loss": 1.8687, "step": 4442 }, { "epoch": 0.216943359375, "grad_norm": 0.2883366048336029, "learning_rate": 0.0004548006759924466, "loss": 1.8804, "step": 4443 }, { "epoch": 0.2169921875, "grad_norm": 0.26617103815078735, "learning_rate": 0.00045477961590227313, "loss": 1.9111, "step": 4444 }, { "epoch": 0.217041015625, "grad_norm": 0.27795249223709106, "learning_rate": 0.0004547585514549962, "loss": 1.8948, "step": 4445 }, { "epoch": 0.21708984375, "grad_norm": 0.3650432527065277, "learning_rate": 0.0004547374826511263, "loss": 1.9162, "step": 4446 }, { "epoch": 0.217138671875, "grad_norm": 0.44707271456718445, "learning_rate": 0.0004547164094911742, "loss": 1.8887, "step": 4447 }, { "epoch": 0.2171875, "grad_norm": 0.3501872420310974, "learning_rate": 0.00045469533197565044, "loss": 1.8765, "step": 4448 }, { "epoch": 0.217236328125, "grad_norm": 0.2716187536716461, "learning_rate": 0.00045467425010506596, "loss": 1.8731, "step": 4449 }, { "epoch": 0.21728515625, "grad_norm": 0.32012420892715454, "learning_rate": 0.00045465316387993177, "loss": 1.8675, "step": 4450 }, { "epoch": 0.217333984375, "grad_norm": 0.3032674789428711, "learning_rate": 0.00045463207330075886, "loss": 1.8884, "step": 4451 }, { "epoch": 0.2173828125, "grad_norm": 0.27760714292526245, "learning_rate": 0.0004546109783680582, "loss": 1.8868, "step": 4452 }, { "epoch": 0.217431640625, "grad_norm": 0.2832639813423157, "learning_rate": 0.0004545898790823412, "loss": 1.8817, "step": 4453 }, { "epoch": 0.21748046875, "grad_norm": 0.3309480845928192, "learning_rate": 0.0004545687754441192, "loss": 1.9068, "step": 4454 }, { "epoch": 0.217529296875, "grad_norm": 0.2647459805011749, "learning_rate": 0.00045454766745390375, "loss": 1.8896, "step": 4455 }, { "epoch": 0.217578125, "grad_norm": 0.22509852051734924, "learning_rate": 0.0004545265551122063, "loss": 1.889, "step": 4456 }, { "epoch": 0.217626953125, "grad_norm": 0.2966275215148926, "learning_rate": 0.00045450543841953853, "loss": 1.8555, "step": 4457 }, { "epoch": 0.21767578125, "grad_norm": 0.25084733963012695, "learning_rate": 0.0004544843173764122, "loss": 1.8746, "step": 4458 }, { "epoch": 0.217724609375, "grad_norm": 0.2576361298561096, "learning_rate": 0.0004544631919833393, "loss": 1.8714, "step": 4459 }, { "epoch": 0.2177734375, "grad_norm": 0.2788444757461548, "learning_rate": 0.0004544420622408318, "loss": 1.88, "step": 4460 }, { "epoch": 0.217822265625, "grad_norm": 0.23141753673553467, "learning_rate": 0.0004544209281494017, "loss": 1.8913, "step": 4461 }, { "epoch": 0.21787109375, "grad_norm": 0.27711915969848633, "learning_rate": 0.0004543997897095613, "loss": 1.8929, "step": 4462 }, { "epoch": 0.217919921875, "grad_norm": 0.25275570154190063, "learning_rate": 0.00045437864692182277, "loss": 1.8977, "step": 4463 }, { "epoch": 0.21796875, "grad_norm": 0.2431456297636032, "learning_rate": 0.0004543574997866987, "loss": 1.876, "step": 4464 }, { "epoch": 0.218017578125, "grad_norm": 0.29946863651275635, "learning_rate": 0.00045433634830470155, "loss": 1.8912, "step": 4465 }, { "epoch": 0.21806640625, "grad_norm": 0.31154897809028625, "learning_rate": 0.0004543151924763439, "loss": 1.9016, "step": 4466 }, { "epoch": 0.218115234375, "grad_norm": 0.39540043473243713, "learning_rate": 0.0004542940323021385, "loss": 1.8868, "step": 4467 }, { "epoch": 0.2181640625, "grad_norm": 0.3652907907962799, "learning_rate": 0.0004542728677825982, "loss": 1.8836, "step": 4468 }, { "epoch": 0.218212890625, "grad_norm": 0.22518250346183777, "learning_rate": 0.00045425169891823587, "loss": 1.8488, "step": 4469 }, { "epoch": 0.21826171875, "grad_norm": 0.3238357901573181, "learning_rate": 0.00045423052570956466, "loss": 1.8674, "step": 4470 }, { "epoch": 0.218310546875, "grad_norm": 0.3368992209434509, "learning_rate": 0.0004542093481570976, "loss": 1.8743, "step": 4471 }, { "epoch": 0.218359375, "grad_norm": 0.306822270154953, "learning_rate": 0.00045418816626134807, "loss": 1.8581, "step": 4472 }, { "epoch": 0.218408203125, "grad_norm": 0.2560220956802368, "learning_rate": 0.0004541669800228294, "loss": 1.8849, "step": 4473 }, { "epoch": 0.21845703125, "grad_norm": 0.2908066511154175, "learning_rate": 0.000454145789442055, "loss": 1.8619, "step": 4474 }, { "epoch": 0.218505859375, "grad_norm": 0.23396135866641998, "learning_rate": 0.0004541245945195384, "loss": 1.8815, "step": 4475 }, { "epoch": 0.2185546875, "grad_norm": 0.2690432369709015, "learning_rate": 0.00045410339525579334, "loss": 1.8955, "step": 4476 }, { "epoch": 0.218603515625, "grad_norm": 0.24055561423301697, "learning_rate": 0.00045408219165133377, "loss": 1.88, "step": 4477 }, { "epoch": 0.21865234375, "grad_norm": 0.2340317964553833, "learning_rate": 0.0004540609837066733, "loss": 1.8944, "step": 4478 }, { "epoch": 0.218701171875, "grad_norm": 0.28283217549324036, "learning_rate": 0.0004540397714223261, "loss": 1.8767, "step": 4479 }, { "epoch": 0.21875, "grad_norm": 0.29862406849861145, "learning_rate": 0.00045401855479880606, "loss": 1.8667, "step": 4480 }, { "epoch": 0.218798828125, "grad_norm": 0.2639766037464142, "learning_rate": 0.0004539973338366276, "loss": 1.8536, "step": 4481 }, { "epoch": 0.21884765625, "grad_norm": 0.2981272339820862, "learning_rate": 0.000453976108536305, "loss": 1.8756, "step": 4482 }, { "epoch": 0.218896484375, "grad_norm": 0.2922227680683136, "learning_rate": 0.0004539548788983526, "loss": 1.8847, "step": 4483 }, { "epoch": 0.2189453125, "grad_norm": 0.24836932122707367, "learning_rate": 0.00045393364492328487, "loss": 1.8899, "step": 4484 }, { "epoch": 0.218994140625, "grad_norm": 0.22195731103420258, "learning_rate": 0.00045391240661161656, "loss": 1.8684, "step": 4485 }, { "epoch": 0.21904296875, "grad_norm": 0.270717978477478, "learning_rate": 0.0004538911639638623, "loss": 1.8836, "step": 4486 }, { "epoch": 0.219091796875, "grad_norm": 0.27451038360595703, "learning_rate": 0.000453869916980537, "loss": 1.8672, "step": 4487 }, { "epoch": 0.219140625, "grad_norm": 0.25294172763824463, "learning_rate": 0.0004538486656621556, "loss": 1.877, "step": 4488 }, { "epoch": 0.219189453125, "grad_norm": 0.3221990466117859, "learning_rate": 0.000453827410009233, "loss": 1.9117, "step": 4489 }, { "epoch": 0.21923828125, "grad_norm": 0.3128281533718109, "learning_rate": 0.0004538061500222845, "loss": 1.8622, "step": 4490 }, { "epoch": 0.219287109375, "grad_norm": 0.321142315864563, "learning_rate": 0.0004537848857018253, "loss": 1.8789, "step": 4491 }, { "epoch": 0.2193359375, "grad_norm": 0.36277899146080017, "learning_rate": 0.00045376361704837077, "loss": 1.8886, "step": 4492 }, { "epoch": 0.219384765625, "grad_norm": 0.26140904426574707, "learning_rate": 0.00045374234406243634, "loss": 1.8937, "step": 4493 }, { "epoch": 0.21943359375, "grad_norm": 0.2746216058731079, "learning_rate": 0.0004537210667445376, "loss": 1.8978, "step": 4494 }, { "epoch": 0.219482421875, "grad_norm": 0.2807396650314331, "learning_rate": 0.00045369978509519026, "loss": 1.885, "step": 4495 }, { "epoch": 0.21953125, "grad_norm": 0.22843694686889648, "learning_rate": 0.00045367849911491005, "loss": 1.8735, "step": 4496 }, { "epoch": 0.219580078125, "grad_norm": 0.2743361294269562, "learning_rate": 0.0004536572088042129, "loss": 1.8882, "step": 4497 }, { "epoch": 0.21962890625, "grad_norm": 0.26744386553764343, "learning_rate": 0.00045363591416361474, "loss": 1.8793, "step": 4498 }, { "epoch": 0.219677734375, "grad_norm": 0.40649187564849854, "learning_rate": 0.00045361461519363163, "loss": 1.8682, "step": 4499 }, { "epoch": 0.2197265625, "grad_norm": 0.3160495162010193, "learning_rate": 0.0004535933118947799, "loss": 1.8877, "step": 4500 }, { "epoch": 0.219775390625, "grad_norm": 0.3246208131313324, "learning_rate": 0.00045357200426757573, "loss": 1.908, "step": 4501 }, { "epoch": 0.21982421875, "grad_norm": 0.20604856312274933, "learning_rate": 0.0004535506923125355, "loss": 1.87, "step": 4502 }, { "epoch": 0.219873046875, "grad_norm": 0.2970461845397949, "learning_rate": 0.0004535293760301759, "loss": 1.878, "step": 4503 }, { "epoch": 0.219921875, "grad_norm": 0.3362593352794647, "learning_rate": 0.00045350805542101346, "loss": 1.8884, "step": 4504 }, { "epoch": 0.219970703125, "grad_norm": 0.26518598198890686, "learning_rate": 0.00045348673048556485, "loss": 1.861, "step": 4505 }, { "epoch": 0.22001953125, "grad_norm": 0.27057936787605286, "learning_rate": 0.0004534654012243469, "loss": 1.8861, "step": 4506 }, { "epoch": 0.220068359375, "grad_norm": 0.2828996181488037, "learning_rate": 0.00045344406763787653, "loss": 1.8666, "step": 4507 }, { "epoch": 0.2201171875, "grad_norm": 0.18972554802894592, "learning_rate": 0.000453422729726671, "loss": 1.8676, "step": 4508 }, { "epoch": 0.220166015625, "grad_norm": 0.28579869866371155, "learning_rate": 0.0004534013874912471, "loss": 1.8794, "step": 4509 }, { "epoch": 0.22021484375, "grad_norm": 0.2386077493429184, "learning_rate": 0.0004533800409321223, "loss": 1.8904, "step": 4510 }, { "epoch": 0.220263671875, "grad_norm": 0.22111107409000397, "learning_rate": 0.00045335869004981395, "loss": 1.8698, "step": 4511 }, { "epoch": 0.2203125, "grad_norm": 0.2551708221435547, "learning_rate": 0.0004533373348448394, "loss": 1.8973, "step": 4512 }, { "epoch": 0.220361328125, "grad_norm": 0.2908632159233093, "learning_rate": 0.00045331597531771627, "loss": 1.8782, "step": 4513 }, { "epoch": 0.22041015625, "grad_norm": 0.2855885922908783, "learning_rate": 0.00045329461146896224, "loss": 1.8608, "step": 4514 }, { "epoch": 0.220458984375, "grad_norm": 0.34508007764816284, "learning_rate": 0.0004532732432990951, "loss": 1.8938, "step": 4515 }, { "epoch": 0.2205078125, "grad_norm": 0.3477758467197418, "learning_rate": 0.0004532518708086326, "loss": 1.8867, "step": 4516 }, { "epoch": 0.220556640625, "grad_norm": 0.28391218185424805, "learning_rate": 0.00045323049399809286, "loss": 1.874, "step": 4517 }, { "epoch": 0.22060546875, "grad_norm": 0.3235521912574768, "learning_rate": 0.00045320911286799386, "loss": 1.9135, "step": 4518 }, { "epoch": 0.220654296875, "grad_norm": 0.2865099310874939, "learning_rate": 0.0004531877274188539, "loss": 1.863, "step": 4519 }, { "epoch": 0.220703125, "grad_norm": 0.28591054677963257, "learning_rate": 0.00045316633765119115, "loss": 1.8292, "step": 4520 }, { "epoch": 0.220751953125, "grad_norm": 0.3119315207004547, "learning_rate": 0.0004531449435655241, "loss": 1.8617, "step": 4521 }, { "epoch": 0.22080078125, "grad_norm": 0.33157584071159363, "learning_rate": 0.0004531235451623712, "loss": 1.8664, "step": 4522 }, { "epoch": 0.220849609375, "grad_norm": 0.26488009095191956, "learning_rate": 0.0004531021424422511, "loss": 1.8854, "step": 4523 }, { "epoch": 0.2208984375, "grad_norm": 0.3022625148296356, "learning_rate": 0.0004530807354056825, "loss": 1.9156, "step": 4524 }, { "epoch": 0.220947265625, "grad_norm": 0.31044238805770874, "learning_rate": 0.00045305932405318426, "loss": 1.8848, "step": 4525 }, { "epoch": 0.22099609375, "grad_norm": 0.24940894544124603, "learning_rate": 0.0004530379083852752, "loss": 1.8927, "step": 4526 }, { "epoch": 0.221044921875, "grad_norm": 0.31727397441864014, "learning_rate": 0.0004530164884024743, "loss": 1.8996, "step": 4527 }, { "epoch": 0.22109375, "grad_norm": 0.33861494064331055, "learning_rate": 0.0004529950641053009, "loss": 1.8761, "step": 4528 }, { "epoch": 0.221142578125, "grad_norm": 0.2990071773529053, "learning_rate": 0.0004529736354942741, "loss": 1.8833, "step": 4529 }, { "epoch": 0.22119140625, "grad_norm": 0.2864431142807007, "learning_rate": 0.00045295220256991327, "loss": 1.9031, "step": 4530 }, { "epoch": 0.221240234375, "grad_norm": 0.35880863666534424, "learning_rate": 0.0004529307653327379, "loss": 1.8779, "step": 4531 }, { "epoch": 0.2212890625, "grad_norm": 0.30683568120002747, "learning_rate": 0.0004529093237832674, "loss": 1.8534, "step": 4532 }, { "epoch": 0.221337890625, "grad_norm": 0.24746626615524292, "learning_rate": 0.0004528878779220215, "loss": 1.8883, "step": 4533 }, { "epoch": 0.22138671875, "grad_norm": 0.3653567433357239, "learning_rate": 0.00045286642774951995, "loss": 1.8711, "step": 4534 }, { "epoch": 0.221435546875, "grad_norm": 0.2654235064983368, "learning_rate": 0.0004528449732662827, "loss": 1.918, "step": 4535 }, { "epoch": 0.221484375, "grad_norm": 0.28018760681152344, "learning_rate": 0.00045282351447282967, "loss": 1.8771, "step": 4536 }, { "epoch": 0.221533203125, "grad_norm": 0.281676709651947, "learning_rate": 0.0004528020513696808, "loss": 1.8893, "step": 4537 }, { "epoch": 0.22158203125, "grad_norm": 0.21811407804489136, "learning_rate": 0.0004527805839573564, "loss": 1.8605, "step": 4538 }, { "epoch": 0.221630859375, "grad_norm": 0.23527348041534424, "learning_rate": 0.0004527591122363768, "loss": 1.8854, "step": 4539 }, { "epoch": 0.2216796875, "grad_norm": 0.2549058198928833, "learning_rate": 0.0004527376362072622, "loss": 1.8914, "step": 4540 }, { "epoch": 0.221728515625, "grad_norm": 0.23519586026668549, "learning_rate": 0.00045271615587053315, "loss": 1.8959, "step": 4541 }, { "epoch": 0.22177734375, "grad_norm": 0.37467947602272034, "learning_rate": 0.00045269467122671046, "loss": 1.863, "step": 4542 }, { "epoch": 0.221826171875, "grad_norm": 0.4750082194805145, "learning_rate": 0.00045267318227631455, "loss": 1.9023, "step": 4543 }, { "epoch": 0.221875, "grad_norm": 0.3573734164237976, "learning_rate": 0.0004526516890198663, "loss": 1.8959, "step": 4544 }, { "epoch": 0.221923828125, "grad_norm": 0.3007015585899353, "learning_rate": 0.0004526301914578867, "loss": 1.8617, "step": 4545 }, { "epoch": 0.22197265625, "grad_norm": 0.27149397134780884, "learning_rate": 0.00045260868959089666, "loss": 1.8552, "step": 4546 }, { "epoch": 0.222021484375, "grad_norm": 0.26428937911987305, "learning_rate": 0.0004525871834194174, "loss": 1.8799, "step": 4547 }, { "epoch": 0.2220703125, "grad_norm": 0.2906181216239929, "learning_rate": 0.00045256567294397007, "loss": 1.887, "step": 4548 }, { "epoch": 0.222119140625, "grad_norm": 0.26505374908447266, "learning_rate": 0.00045254415816507596, "loss": 1.8736, "step": 4549 }, { "epoch": 0.22216796875, "grad_norm": 0.31007733941078186, "learning_rate": 0.00045252263908325655, "loss": 1.8852, "step": 4550 }, { "epoch": 0.222216796875, "grad_norm": 0.27405333518981934, "learning_rate": 0.00045250111569903337, "loss": 1.892, "step": 4551 }, { "epoch": 0.222265625, "grad_norm": 0.28183868527412415, "learning_rate": 0.00045247958801292805, "loss": 1.8547, "step": 4552 }, { "epoch": 0.222314453125, "grad_norm": 0.32253390550613403, "learning_rate": 0.0004524580560254623, "loss": 1.8729, "step": 4553 }, { "epoch": 0.22236328125, "grad_norm": 0.2735274136066437, "learning_rate": 0.000452436519737158, "loss": 1.9044, "step": 4554 }, { "epoch": 0.222412109375, "grad_norm": 0.2615950405597687, "learning_rate": 0.0004524149791485372, "loss": 1.8758, "step": 4555 }, { "epoch": 0.2224609375, "grad_norm": 0.2502950429916382, "learning_rate": 0.0004523934342601218, "loss": 1.8712, "step": 4556 }, { "epoch": 0.222509765625, "grad_norm": 0.2996269464492798, "learning_rate": 0.0004523718850724339, "loss": 1.896, "step": 4557 }, { "epoch": 0.22255859375, "grad_norm": 0.22766397893428802, "learning_rate": 0.0004523503315859959, "loss": 1.876, "step": 4558 }, { "epoch": 0.222607421875, "grad_norm": 0.24938973784446716, "learning_rate": 0.00045232877380133014, "loss": 1.907, "step": 4559 }, { "epoch": 0.22265625, "grad_norm": 0.22693413496017456, "learning_rate": 0.0004523072117189591, "loss": 1.8932, "step": 4560 }, { "epoch": 0.222705078125, "grad_norm": 0.23136211931705475, "learning_rate": 0.00045228564533940536, "loss": 1.8929, "step": 4561 }, { "epoch": 0.22275390625, "grad_norm": 0.3043670058250427, "learning_rate": 0.0004522640746631916, "loss": 1.8658, "step": 4562 }, { "epoch": 0.222802734375, "grad_norm": 0.23062720894813538, "learning_rate": 0.00045224249969084046, "loss": 1.8863, "step": 4563 }, { "epoch": 0.2228515625, "grad_norm": 0.26632437109947205, "learning_rate": 0.00045222092042287505, "loss": 1.8989, "step": 4564 }, { "epoch": 0.222900390625, "grad_norm": 0.2822737395763397, "learning_rate": 0.0004521993368598182, "loss": 1.8924, "step": 4565 }, { "epoch": 0.22294921875, "grad_norm": 0.26280513405799866, "learning_rate": 0.00045217774900219306, "loss": 1.8835, "step": 4566 }, { "epoch": 0.222998046875, "grad_norm": 0.2868841886520386, "learning_rate": 0.0004521561568505229, "loss": 1.8848, "step": 4567 }, { "epoch": 0.223046875, "grad_norm": 0.3229133188724518, "learning_rate": 0.0004521345604053309, "loss": 1.8996, "step": 4568 }, { "epoch": 0.223095703125, "grad_norm": 0.3860400915145874, "learning_rate": 0.0004521129596671405, "loss": 1.8915, "step": 4569 }, { "epoch": 0.22314453125, "grad_norm": 0.32974815368652344, "learning_rate": 0.00045209135463647525, "loss": 1.8989, "step": 4570 }, { "epoch": 0.223193359375, "grad_norm": 0.24951456487178802, "learning_rate": 0.0004520697453138588, "loss": 1.891, "step": 4571 }, { "epoch": 0.2232421875, "grad_norm": 0.37737008929252625, "learning_rate": 0.00045204813169981477, "loss": 1.8766, "step": 4572 }, { "epoch": 0.223291015625, "grad_norm": 0.3400488793849945, "learning_rate": 0.000452026513794867, "loss": 1.8827, "step": 4573 }, { "epoch": 0.22333984375, "grad_norm": 0.22896742820739746, "learning_rate": 0.0004520048915995395, "loss": 1.9243, "step": 4574 }, { "epoch": 0.223388671875, "grad_norm": 0.34830230474472046, "learning_rate": 0.00045198326511435623, "loss": 1.8876, "step": 4575 }, { "epoch": 0.2234375, "grad_norm": 0.3276212215423584, "learning_rate": 0.00045196163433984125, "loss": 1.8805, "step": 4576 }, { "epoch": 0.223486328125, "grad_norm": 0.2910979986190796, "learning_rate": 0.000451939999276519, "loss": 1.9064, "step": 4577 }, { "epoch": 0.22353515625, "grad_norm": 0.4069819748401642, "learning_rate": 0.00045191835992491376, "loss": 1.8896, "step": 4578 }, { "epoch": 0.223583984375, "grad_norm": 0.3384169042110443, "learning_rate": 0.00045189671628554983, "loss": 1.8709, "step": 4579 }, { "epoch": 0.2236328125, "grad_norm": 0.3024374842643738, "learning_rate": 0.0004518750683589519, "loss": 1.8648, "step": 4580 }, { "epoch": 0.223681640625, "grad_norm": 0.3379307687282562, "learning_rate": 0.0004518534161456446, "loss": 1.8676, "step": 4581 }, { "epoch": 0.22373046875, "grad_norm": 0.2864198386669159, "learning_rate": 0.0004518317596461527, "loss": 1.8554, "step": 4582 }, { "epoch": 0.223779296875, "grad_norm": 0.2563588619232178, "learning_rate": 0.000451810098861001, "loss": 1.8801, "step": 4583 }, { "epoch": 0.223828125, "grad_norm": 0.21224099397659302, "learning_rate": 0.00045178843379071445, "loss": 1.8833, "step": 4584 }, { "epoch": 0.223876953125, "grad_norm": 0.2927469313144684, "learning_rate": 0.00045176676443581834, "loss": 1.8879, "step": 4585 }, { "epoch": 0.22392578125, "grad_norm": 0.3204004466533661, "learning_rate": 0.00045174509079683753, "loss": 1.8694, "step": 4586 }, { "epoch": 0.223974609375, "grad_norm": 0.2983829081058502, "learning_rate": 0.0004517234128742975, "loss": 1.8693, "step": 4587 }, { "epoch": 0.2240234375, "grad_norm": 0.2752986550331116, "learning_rate": 0.00045170173066872354, "loss": 1.8909, "step": 4588 }, { "epoch": 0.224072265625, "grad_norm": 0.3751376271247864, "learning_rate": 0.0004516800441806412, "loss": 1.8841, "step": 4589 }, { "epoch": 0.22412109375, "grad_norm": 0.38226839900016785, "learning_rate": 0.0004516583534105761, "loss": 1.8958, "step": 4590 }, { "epoch": 0.224169921875, "grad_norm": 0.23846691846847534, "learning_rate": 0.0004516366583590539, "loss": 1.9195, "step": 4591 }, { "epoch": 0.22421875, "grad_norm": 0.35466840863227844, "learning_rate": 0.00045161495902660035, "loss": 1.8782, "step": 4592 }, { "epoch": 0.224267578125, "grad_norm": 0.2882896363735199, "learning_rate": 0.0004515932554137413, "loss": 1.8526, "step": 4593 }, { "epoch": 0.22431640625, "grad_norm": 0.28916576504707336, "learning_rate": 0.0004515715475210028, "loss": 1.9005, "step": 4594 }, { "epoch": 0.224365234375, "grad_norm": 0.351917564868927, "learning_rate": 0.00045154983534891107, "loss": 1.9044, "step": 4595 }, { "epoch": 0.2244140625, "grad_norm": 0.2362678349018097, "learning_rate": 0.0004515281188979923, "loss": 1.8651, "step": 4596 }, { "epoch": 0.224462890625, "grad_norm": 0.25495997071266174, "learning_rate": 0.00045150639816877265, "loss": 1.8527, "step": 4597 }, { "epoch": 0.22451171875, "grad_norm": 0.2580544650554657, "learning_rate": 0.00045148467316177864, "loss": 1.8876, "step": 4598 }, { "epoch": 0.224560546875, "grad_norm": 0.26215559244155884, "learning_rate": 0.0004514629438775369, "loss": 1.9124, "step": 4599 }, { "epoch": 0.224609375, "grad_norm": 0.22787365317344666, "learning_rate": 0.0004514412103165738, "loss": 1.8936, "step": 4600 }, { "epoch": 0.224658203125, "grad_norm": 0.26308950781822205, "learning_rate": 0.00045141947247941626, "loss": 1.8679, "step": 4601 }, { "epoch": 0.22470703125, "grad_norm": 0.300678551197052, "learning_rate": 0.00045139773036659113, "loss": 1.8679, "step": 4602 }, { "epoch": 0.224755859375, "grad_norm": 0.273946613073349, "learning_rate": 0.00045137598397862526, "loss": 1.914, "step": 4603 }, { "epoch": 0.2248046875, "grad_norm": 0.2759843170642853, "learning_rate": 0.00045135423331604574, "loss": 1.876, "step": 4604 }, { "epoch": 0.224853515625, "grad_norm": 0.34312349557876587, "learning_rate": 0.00045133247837937966, "loss": 1.8722, "step": 4605 }, { "epoch": 0.22490234375, "grad_norm": 0.2616609036922455, "learning_rate": 0.00045131071916915426, "loss": 1.9008, "step": 4606 }, { "epoch": 0.224951171875, "grad_norm": 0.32693397998809814, "learning_rate": 0.00045128895568589706, "loss": 1.8835, "step": 4607 }, { "epoch": 0.225, "grad_norm": 0.30846741795539856, "learning_rate": 0.00045126718793013525, "loss": 1.8804, "step": 4608 }, { "epoch": 0.225048828125, "grad_norm": 0.3403300344944, "learning_rate": 0.0004512454159023966, "loss": 1.8588, "step": 4609 }, { "epoch": 0.22509765625, "grad_norm": 0.3491627275943756, "learning_rate": 0.0004512236396032087, "loss": 1.892, "step": 4610 }, { "epoch": 0.225146484375, "grad_norm": 0.28076255321502686, "learning_rate": 0.0004512018590330993, "loss": 1.8672, "step": 4611 }, { "epoch": 0.2251953125, "grad_norm": 0.22204315662384033, "learning_rate": 0.00045118007419259627, "loss": 1.8711, "step": 4612 }, { "epoch": 0.225244140625, "grad_norm": 0.23338516056537628, "learning_rate": 0.00045115828508222774, "loss": 1.8791, "step": 4613 }, { "epoch": 0.22529296875, "grad_norm": 0.2315828949213028, "learning_rate": 0.0004511364917025214, "loss": 1.8846, "step": 4614 }, { "epoch": 0.225341796875, "grad_norm": 0.2167331874370575, "learning_rate": 0.00045111469405400585, "loss": 1.8666, "step": 4615 }, { "epoch": 0.225390625, "grad_norm": 0.22999580204486847, "learning_rate": 0.00045109289213720916, "loss": 1.8812, "step": 4616 }, { "epoch": 0.225439453125, "grad_norm": 0.22979514300823212, "learning_rate": 0.0004510710859526598, "loss": 1.8895, "step": 4617 }, { "epoch": 0.22548828125, "grad_norm": 0.2233552187681198, "learning_rate": 0.0004510492755008861, "loss": 1.8883, "step": 4618 }, { "epoch": 0.225537109375, "grad_norm": 0.2728597819805145, "learning_rate": 0.00045102746078241695, "loss": 1.9097, "step": 4619 }, { "epoch": 0.2255859375, "grad_norm": 0.2653622031211853, "learning_rate": 0.0004510056417977807, "loss": 1.8716, "step": 4620 }, { "epoch": 0.225634765625, "grad_norm": 0.2481170892715454, "learning_rate": 0.00045098381854750643, "loss": 1.869, "step": 4621 }, { "epoch": 0.22568359375, "grad_norm": 0.2821645438671112, "learning_rate": 0.0004509619910321229, "loss": 1.8706, "step": 4622 }, { "epoch": 0.225732421875, "grad_norm": 0.29557526111602783, "learning_rate": 0.0004509401592521591, "loss": 1.8987, "step": 4623 }, { "epoch": 0.22578125, "grad_norm": 0.2887794077396393, "learning_rate": 0.00045091832320814424, "loss": 1.8664, "step": 4624 }, { "epoch": 0.225830078125, "grad_norm": 0.26228442788124084, "learning_rate": 0.00045089648290060747, "loss": 1.8932, "step": 4625 }, { "epoch": 0.22587890625, "grad_norm": 0.3093548119068146, "learning_rate": 0.0004508746383300781, "loss": 1.8617, "step": 4626 }, { "epoch": 0.225927734375, "grad_norm": 0.28520694375038147, "learning_rate": 0.00045085278949708567, "loss": 1.9003, "step": 4627 }, { "epoch": 0.2259765625, "grad_norm": 0.2155388593673706, "learning_rate": 0.0004508309364021595, "loss": 1.9074, "step": 4628 }, { "epoch": 0.226025390625, "grad_norm": 0.25932109355926514, "learning_rate": 0.00045080907904582933, "loss": 1.8835, "step": 4629 }, { "epoch": 0.22607421875, "grad_norm": 0.3318248689174652, "learning_rate": 0.000450787217428625, "loss": 1.8706, "step": 4630 }, { "epoch": 0.226123046875, "grad_norm": 0.24984881281852722, "learning_rate": 0.00045076535155107617, "loss": 1.8694, "step": 4631 }, { "epoch": 0.226171875, "grad_norm": 0.2445467710494995, "learning_rate": 0.0004507434814137128, "loss": 1.8733, "step": 4632 }, { "epoch": 0.226220703125, "grad_norm": 0.3047024607658386, "learning_rate": 0.00045072160701706504, "loss": 1.8752, "step": 4633 }, { "epoch": 0.22626953125, "grad_norm": 0.32804131507873535, "learning_rate": 0.00045069972836166286, "loss": 1.8711, "step": 4634 }, { "epoch": 0.226318359375, "grad_norm": 0.34366610646247864, "learning_rate": 0.00045067784544803663, "loss": 1.8732, "step": 4635 }, { "epoch": 0.2263671875, "grad_norm": 0.39354562759399414, "learning_rate": 0.0004506559582767167, "loss": 1.9227, "step": 4636 }, { "epoch": 0.226416015625, "grad_norm": 0.3277514576911926, "learning_rate": 0.00045063406684823354, "loss": 1.8847, "step": 4637 }, { "epoch": 0.22646484375, "grad_norm": 0.3253057897090912, "learning_rate": 0.00045061217116311767, "loss": 1.8908, "step": 4638 }, { "epoch": 0.226513671875, "grad_norm": 0.385871022939682, "learning_rate": 0.0004505902712218997, "loss": 1.8504, "step": 4639 }, { "epoch": 0.2265625, "grad_norm": 0.24696336686611176, "learning_rate": 0.0004505683670251104, "loss": 1.8633, "step": 4640 }, { "epoch": 0.226611328125, "grad_norm": 0.33051714301109314, "learning_rate": 0.0004505464585732807, "loss": 1.8881, "step": 4641 }, { "epoch": 0.22666015625, "grad_norm": 0.3117362856864929, "learning_rate": 0.00045052454586694165, "loss": 1.8981, "step": 4642 }, { "epoch": 0.226708984375, "grad_norm": 0.25970423221588135, "learning_rate": 0.0004505026289066241, "loss": 1.878, "step": 4643 }, { "epoch": 0.2267578125, "grad_norm": 0.25603991746902466, "learning_rate": 0.00045048070769285935, "loss": 1.8759, "step": 4644 }, { "epoch": 0.226806640625, "grad_norm": 0.21388927102088928, "learning_rate": 0.0004504587822261787, "loss": 1.8825, "step": 4645 }, { "epoch": 0.22685546875, "grad_norm": 0.2738982141017914, "learning_rate": 0.0004504368525071135, "loss": 1.8768, "step": 4646 }, { "epoch": 0.226904296875, "grad_norm": 0.27591755986213684, "learning_rate": 0.0004504149185361952, "loss": 1.8998, "step": 4647 }, { "epoch": 0.226953125, "grad_norm": 0.22801116108894348, "learning_rate": 0.0004503929803139555, "loss": 1.8707, "step": 4648 }, { "epoch": 0.227001953125, "grad_norm": 0.28029680252075195, "learning_rate": 0.000450371037840926, "loss": 1.8794, "step": 4649 }, { "epoch": 0.22705078125, "grad_norm": 0.27513715624809265, "learning_rate": 0.0004503490911176384, "loss": 1.8838, "step": 4650 }, { "epoch": 0.227099609375, "grad_norm": 0.236833393573761, "learning_rate": 0.0004503271401446248, "loss": 1.8613, "step": 4651 }, { "epoch": 0.2271484375, "grad_norm": 0.21949796378612518, "learning_rate": 0.0004503051849224171, "loss": 1.8605, "step": 4652 }, { "epoch": 0.227197265625, "grad_norm": 0.2864204943180084, "learning_rate": 0.00045028322545154735, "loss": 1.8817, "step": 4653 }, { "epoch": 0.22724609375, "grad_norm": 0.2941676080226898, "learning_rate": 0.00045026126173254783, "loss": 1.8721, "step": 4654 }, { "epoch": 0.227294921875, "grad_norm": 0.2975645065307617, "learning_rate": 0.0004502392937659508, "loss": 1.9037, "step": 4655 }, { "epoch": 0.22734375, "grad_norm": 0.3067544996738434, "learning_rate": 0.0004502173215522888, "loss": 1.8732, "step": 4656 }, { "epoch": 0.227392578125, "grad_norm": 0.28621891140937805, "learning_rate": 0.00045019534509209414, "loss": 1.8596, "step": 4657 }, { "epoch": 0.22744140625, "grad_norm": 0.28365522623062134, "learning_rate": 0.00045017336438589955, "loss": 1.8771, "step": 4658 }, { "epoch": 0.227490234375, "grad_norm": 0.2714718282222748, "learning_rate": 0.0004501513794342378, "loss": 1.8714, "step": 4659 }, { "epoch": 0.2275390625, "grad_norm": 0.22387108206748962, "learning_rate": 0.00045012939023764164, "loss": 1.8756, "step": 4660 }, { "epoch": 0.227587890625, "grad_norm": 0.2963597774505615, "learning_rate": 0.00045010739679664404, "loss": 1.8709, "step": 4661 }, { "epoch": 0.22763671875, "grad_norm": 0.2751019299030304, "learning_rate": 0.000450085399111778, "loss": 1.8828, "step": 4662 }, { "epoch": 0.227685546875, "grad_norm": 0.3067832887172699, "learning_rate": 0.0004500633971835765, "loss": 1.8866, "step": 4663 }, { "epoch": 0.227734375, "grad_norm": 0.32853206992149353, "learning_rate": 0.00045004139101257305, "loss": 1.8762, "step": 4664 }, { "epoch": 0.227783203125, "grad_norm": 0.3287924528121948, "learning_rate": 0.00045001938059930084, "loss": 1.9203, "step": 4665 }, { "epoch": 0.22783203125, "grad_norm": 0.33098265528678894, "learning_rate": 0.00044999736594429336, "loss": 1.8805, "step": 4666 }, { "epoch": 0.227880859375, "grad_norm": 0.3121209740638733, "learning_rate": 0.0004499753470480841, "loss": 1.8945, "step": 4667 }, { "epoch": 0.2279296875, "grad_norm": 0.3984759747982025, "learning_rate": 0.00044995332391120673, "loss": 1.8911, "step": 4668 }, { "epoch": 0.227978515625, "grad_norm": 0.3244412839412689, "learning_rate": 0.000449931296534195, "loss": 1.8631, "step": 4669 }, { "epoch": 0.22802734375, "grad_norm": 0.26136335730552673, "learning_rate": 0.0004499092649175828, "loss": 1.8635, "step": 4670 }, { "epoch": 0.228076171875, "grad_norm": 0.34656867384910583, "learning_rate": 0.000449887229061904, "loss": 1.8851, "step": 4671 }, { "epoch": 0.228125, "grad_norm": 0.2584525942802429, "learning_rate": 0.0004498651889676927, "loss": 1.8748, "step": 4672 }, { "epoch": 0.228173828125, "grad_norm": 0.2706945836544037, "learning_rate": 0.000449843144635483, "loss": 1.8535, "step": 4673 }, { "epoch": 0.22822265625, "grad_norm": 0.31250643730163574, "learning_rate": 0.0004498210960658093, "loss": 1.8762, "step": 4674 }, { "epoch": 0.228271484375, "grad_norm": 0.2972119152545929, "learning_rate": 0.0004497990432592059, "loss": 1.8753, "step": 4675 }, { "epoch": 0.2283203125, "grad_norm": 0.30886590480804443, "learning_rate": 0.0004497769862162072, "loss": 1.8566, "step": 4676 }, { "epoch": 0.228369140625, "grad_norm": 0.22362832725048065, "learning_rate": 0.00044975492493734787, "loss": 1.8813, "step": 4677 }, { "epoch": 0.22841796875, "grad_norm": 0.27932924032211304, "learning_rate": 0.00044973285942316257, "loss": 1.8749, "step": 4678 }, { "epoch": 0.228466796875, "grad_norm": 0.2812206745147705, "learning_rate": 0.000449710789674186, "loss": 1.8966, "step": 4679 }, { "epoch": 0.228515625, "grad_norm": 0.31192779541015625, "learning_rate": 0.00044968871569095307, "loss": 1.8848, "step": 4680 }, { "epoch": 0.228564453125, "grad_norm": 0.2893208861351013, "learning_rate": 0.00044966663747399876, "loss": 1.8898, "step": 4681 }, { "epoch": 0.22861328125, "grad_norm": 0.27424439787864685, "learning_rate": 0.00044964455502385817, "loss": 1.9019, "step": 4682 }, { "epoch": 0.228662109375, "grad_norm": 0.32577791810035706, "learning_rate": 0.0004496224683410665, "loss": 1.8901, "step": 4683 }, { "epoch": 0.2287109375, "grad_norm": 0.2562309205532074, "learning_rate": 0.000449600377426159, "loss": 1.8869, "step": 4684 }, { "epoch": 0.228759765625, "grad_norm": 0.20864292979240417, "learning_rate": 0.0004495782822796711, "loss": 1.8821, "step": 4685 }, { "epoch": 0.22880859375, "grad_norm": 0.2923653721809387, "learning_rate": 0.0004495561829021383, "loss": 1.8729, "step": 4686 }, { "epoch": 0.228857421875, "grad_norm": 0.3033794164657593, "learning_rate": 0.0004495340792940961, "loss": 1.8732, "step": 4687 }, { "epoch": 0.22890625, "grad_norm": 0.263455331325531, "learning_rate": 0.0004495119714560804, "loss": 1.8899, "step": 4688 }, { "epoch": 0.228955078125, "grad_norm": 0.3222441077232361, "learning_rate": 0.0004494898593886267, "loss": 1.8556, "step": 4689 }, { "epoch": 0.22900390625, "grad_norm": 0.39278143644332886, "learning_rate": 0.00044946774309227115, "loss": 1.8864, "step": 4690 }, { "epoch": 0.229052734375, "grad_norm": 0.3170289695262909, "learning_rate": 0.0004494456225675497, "loss": 1.8984, "step": 4691 }, { "epoch": 0.2291015625, "grad_norm": 0.28465673327445984, "learning_rate": 0.00044942349781499843, "loss": 1.895, "step": 4692 }, { "epoch": 0.229150390625, "grad_norm": 0.36473533511161804, "learning_rate": 0.00044940136883515354, "loss": 1.8753, "step": 4693 }, { "epoch": 0.22919921875, "grad_norm": 0.29130345582962036, "learning_rate": 0.00044937923562855136, "loss": 1.8525, "step": 4694 }, { "epoch": 0.229248046875, "grad_norm": 0.27737316489219666, "learning_rate": 0.00044935709819572835, "loss": 1.8807, "step": 4695 }, { "epoch": 0.229296875, "grad_norm": 0.30752745270729065, "learning_rate": 0.000449334956537221, "loss": 1.8551, "step": 4696 }, { "epoch": 0.229345703125, "grad_norm": 0.25722548365592957, "learning_rate": 0.0004493128106535658, "loss": 1.8755, "step": 4697 }, { "epoch": 0.22939453125, "grad_norm": 0.34325718879699707, "learning_rate": 0.0004492906605452997, "loss": 1.8652, "step": 4698 }, { "epoch": 0.229443359375, "grad_norm": 0.2564796805381775, "learning_rate": 0.0004492685062129594, "loss": 1.8958, "step": 4699 }, { "epoch": 0.2294921875, "grad_norm": 0.27028974890708923, "learning_rate": 0.0004492463476570818, "loss": 1.8838, "step": 4700 }, { "epoch": 0.229541015625, "grad_norm": 0.3024457097053528, "learning_rate": 0.000449224184878204, "loss": 1.8859, "step": 4701 }, { "epoch": 0.22958984375, "grad_norm": 0.2000988870859146, "learning_rate": 0.00044920201787686313, "loss": 1.8915, "step": 4702 }, { "epoch": 0.229638671875, "grad_norm": 0.365144819021225, "learning_rate": 0.0004491798466535965, "loss": 1.8623, "step": 4703 }, { "epoch": 0.2296875, "grad_norm": 0.35440734028816223, "learning_rate": 0.0004491576712089412, "loss": 1.8651, "step": 4704 }, { "epoch": 0.229736328125, "grad_norm": 0.2353055775165558, "learning_rate": 0.00044913549154343484, "loss": 1.8843, "step": 4705 }, { "epoch": 0.22978515625, "grad_norm": 0.3248212933540344, "learning_rate": 0.00044911330765761494, "loss": 1.8686, "step": 4706 }, { "epoch": 0.229833984375, "grad_norm": 0.27803605794906616, "learning_rate": 0.0004490911195520192, "loss": 1.8959, "step": 4707 }, { "epoch": 0.2298828125, "grad_norm": 0.3061840534210205, "learning_rate": 0.0004490689272271853, "loss": 1.8827, "step": 4708 }, { "epoch": 0.229931640625, "grad_norm": 0.39730724692344666, "learning_rate": 0.0004490467306836511, "loss": 1.8641, "step": 4709 }, { "epoch": 0.22998046875, "grad_norm": 0.31689751148223877, "learning_rate": 0.0004490245299219546, "loss": 1.8553, "step": 4710 }, { "epoch": 0.230029296875, "grad_norm": 0.3677610754966736, "learning_rate": 0.0004490023249426338, "loss": 1.8911, "step": 4711 }, { "epoch": 0.230078125, "grad_norm": 0.30504804849624634, "learning_rate": 0.00044898011574622676, "loss": 1.8775, "step": 4712 }, { "epoch": 0.230126953125, "grad_norm": 0.2175193428993225, "learning_rate": 0.0004489579023332719, "loss": 1.8927, "step": 4713 }, { "epoch": 0.23017578125, "grad_norm": 0.28405794501304626, "learning_rate": 0.00044893568470430754, "loss": 1.8824, "step": 4714 }, { "epoch": 0.230224609375, "grad_norm": 0.23342619836330414, "learning_rate": 0.0004489134628598721, "loss": 1.8747, "step": 4715 }, { "epoch": 0.2302734375, "grad_norm": 0.2815980315208435, "learning_rate": 0.00044889123680050415, "loss": 1.8888, "step": 4716 }, { "epoch": 0.230322265625, "grad_norm": 0.21983957290649414, "learning_rate": 0.0004488690065267424, "loss": 1.8843, "step": 4717 }, { "epoch": 0.23037109375, "grad_norm": 0.2845339775085449, "learning_rate": 0.0004488467720391256, "loss": 1.8695, "step": 4718 }, { "epoch": 0.230419921875, "grad_norm": 0.3503659963607788, "learning_rate": 0.0004488245333381926, "loss": 1.8764, "step": 4719 }, { "epoch": 0.23046875, "grad_norm": 0.2782078683376312, "learning_rate": 0.0004488022904244824, "loss": 1.8929, "step": 4720 }, { "epoch": 0.230517578125, "grad_norm": 0.2787877023220062, "learning_rate": 0.00044878004329853405, "loss": 1.879, "step": 4721 }, { "epoch": 0.23056640625, "grad_norm": 0.30374976992607117, "learning_rate": 0.0004487577919608867, "loss": 1.8643, "step": 4722 }, { "epoch": 0.230615234375, "grad_norm": 0.2798115611076355, "learning_rate": 0.00044873553641207976, "loss": 1.8929, "step": 4723 }, { "epoch": 0.2306640625, "grad_norm": 0.379297137260437, "learning_rate": 0.00044871327665265244, "loss": 1.8736, "step": 4724 }, { "epoch": 0.230712890625, "grad_norm": 0.278249591588974, "learning_rate": 0.00044869101268314433, "loss": 1.867, "step": 4725 }, { "epoch": 0.23076171875, "grad_norm": 0.2978440821170807, "learning_rate": 0.0004486687445040949, "loss": 1.881, "step": 4726 }, { "epoch": 0.230810546875, "grad_norm": 0.2995496094226837, "learning_rate": 0.0004486464721160441, "loss": 1.8615, "step": 4727 }, { "epoch": 0.230859375, "grad_norm": 0.28384992480278015, "learning_rate": 0.00044862419551953145, "loss": 1.8632, "step": 4728 }, { "epoch": 0.230908203125, "grad_norm": 0.29071661829948425, "learning_rate": 0.0004486019147150969, "loss": 1.8833, "step": 4729 }, { "epoch": 0.23095703125, "grad_norm": 0.3349510729312897, "learning_rate": 0.00044857962970328046, "loss": 1.904, "step": 4730 }, { "epoch": 0.231005859375, "grad_norm": 0.337515652179718, "learning_rate": 0.0004485573404846223, "loss": 1.8875, "step": 4731 }, { "epoch": 0.2310546875, "grad_norm": 0.27837175130844116, "learning_rate": 0.00044853504705966255, "loss": 1.8364, "step": 4732 }, { "epoch": 0.231103515625, "grad_norm": 0.24916616082191467, "learning_rate": 0.00044851274942894157, "loss": 1.8966, "step": 4733 }, { "epoch": 0.23115234375, "grad_norm": 0.33001509308815, "learning_rate": 0.00044849044759299957, "loss": 1.8637, "step": 4734 }, { "epoch": 0.231201171875, "grad_norm": 0.3619769215583801, "learning_rate": 0.00044846814155237723, "loss": 1.868, "step": 4735 }, { "epoch": 0.23125, "grad_norm": 0.22776369750499725, "learning_rate": 0.0004484458313076152, "loss": 1.8848, "step": 4736 }, { "epoch": 0.231298828125, "grad_norm": 0.30362507700920105, "learning_rate": 0.00044842351685925406, "loss": 1.881, "step": 4737 }, { "epoch": 0.23134765625, "grad_norm": 0.40549135208129883, "learning_rate": 0.00044840119820783466, "loss": 1.8777, "step": 4738 }, { "epoch": 0.231396484375, "grad_norm": 0.3048206567764282, "learning_rate": 0.0004483788753538979, "loss": 1.8518, "step": 4739 }, { "epoch": 0.2314453125, "grad_norm": 0.290577232837677, "learning_rate": 0.00044835654829798483, "loss": 1.8531, "step": 4740 }, { "epoch": 0.231494140625, "grad_norm": 0.3105572760105133, "learning_rate": 0.00044833421704063654, "loss": 1.862, "step": 4741 }, { "epoch": 0.23154296875, "grad_norm": 0.25628241896629333, "learning_rate": 0.00044831188158239423, "loss": 1.8816, "step": 4742 }, { "epoch": 0.231591796875, "grad_norm": 0.29043275117874146, "learning_rate": 0.00044828954192379923, "loss": 1.8754, "step": 4743 }, { "epoch": 0.231640625, "grad_norm": 0.26563358306884766, "learning_rate": 0.00044826719806539294, "loss": 1.9, "step": 4744 }, { "epoch": 0.231689453125, "grad_norm": 0.2944371700286865, "learning_rate": 0.0004482448500077169, "loss": 1.878, "step": 4745 }, { "epoch": 0.23173828125, "grad_norm": 0.23608043789863586, "learning_rate": 0.0004482224977513128, "loss": 1.8583, "step": 4746 }, { "epoch": 0.231787109375, "grad_norm": 0.2499222457408905, "learning_rate": 0.0004482001412967223, "loss": 1.8775, "step": 4747 }, { "epoch": 0.2318359375, "grad_norm": 0.32426801323890686, "learning_rate": 0.00044817778064448717, "loss": 1.8771, "step": 4748 }, { "epoch": 0.231884765625, "grad_norm": 0.25174736976623535, "learning_rate": 0.0004481554157951494, "loss": 1.8615, "step": 4749 }, { "epoch": 0.23193359375, "grad_norm": 0.25327199697494507, "learning_rate": 0.00044813304674925104, "loss": 1.8774, "step": 4750 }, { "epoch": 0.231982421875, "grad_norm": 0.2986818253993988, "learning_rate": 0.0004481106735073342, "loss": 1.8476, "step": 4751 }, { "epoch": 0.23203125, "grad_norm": 0.28396713733673096, "learning_rate": 0.0004480882960699411, "loss": 1.8746, "step": 4752 }, { "epoch": 0.232080078125, "grad_norm": 0.21839602291584015, "learning_rate": 0.0004480659144376141, "loss": 1.873, "step": 4753 }, { "epoch": 0.23212890625, "grad_norm": 0.2975209057331085, "learning_rate": 0.0004480435286108956, "loss": 1.8783, "step": 4754 }, { "epoch": 0.232177734375, "grad_norm": 0.30452418327331543, "learning_rate": 0.00044802113859032823, "loss": 1.8948, "step": 4755 }, { "epoch": 0.2322265625, "grad_norm": 0.29815205931663513, "learning_rate": 0.00044799874437645453, "loss": 1.8705, "step": 4756 }, { "epoch": 0.232275390625, "grad_norm": 0.3451012372970581, "learning_rate": 0.00044797634596981724, "loss": 1.8549, "step": 4757 }, { "epoch": 0.23232421875, "grad_norm": 0.3234756886959076, "learning_rate": 0.0004479539433709592, "loss": 1.8795, "step": 4758 }, { "epoch": 0.232373046875, "grad_norm": 0.2874232530593872, "learning_rate": 0.00044793153658042353, "loss": 1.9133, "step": 4759 }, { "epoch": 0.232421875, "grad_norm": 0.28161776065826416, "learning_rate": 0.000447909125598753, "loss": 1.8689, "step": 4760 }, { "epoch": 0.232470703125, "grad_norm": 0.2901633381843567, "learning_rate": 0.000447886710426491, "loss": 1.8943, "step": 4761 }, { "epoch": 0.23251953125, "grad_norm": 0.2401876002550125, "learning_rate": 0.00044786429106418064, "loss": 1.8617, "step": 4762 }, { "epoch": 0.232568359375, "grad_norm": 0.2676357924938202, "learning_rate": 0.00044784186751236526, "loss": 1.8886, "step": 4763 }, { "epoch": 0.2326171875, "grad_norm": 0.2639598846435547, "learning_rate": 0.00044781943977158847, "loss": 1.865, "step": 4764 }, { "epoch": 0.232666015625, "grad_norm": 0.29706960916519165, "learning_rate": 0.00044779700784239356, "loss": 1.8723, "step": 4765 }, { "epoch": 0.23271484375, "grad_norm": 0.3957176208496094, "learning_rate": 0.0004477745717253245, "loss": 1.8787, "step": 4766 }, { "epoch": 0.232763671875, "grad_norm": 0.4270346760749817, "learning_rate": 0.0004477521314209248, "loss": 1.8794, "step": 4767 }, { "epoch": 0.2328125, "grad_norm": 0.286319762468338, "learning_rate": 0.00044772968692973836, "loss": 1.8797, "step": 4768 }, { "epoch": 0.232861328125, "grad_norm": 0.30068346858024597, "learning_rate": 0.00044770723825230936, "loss": 1.8382, "step": 4769 }, { "epoch": 0.23291015625, "grad_norm": 0.3370182514190674, "learning_rate": 0.0004476847853891815, "loss": 1.8724, "step": 4770 }, { "epoch": 0.232958984375, "grad_norm": 0.25431299209594727, "learning_rate": 0.0004476623283408992, "loss": 1.8906, "step": 4771 }, { "epoch": 0.2330078125, "grad_norm": 0.37132543325424194, "learning_rate": 0.0004476398671080067, "loss": 1.8556, "step": 4772 }, { "epoch": 0.233056640625, "grad_norm": 0.38497528433799744, "learning_rate": 0.0004476174016910483, "loss": 1.9058, "step": 4773 }, { "epoch": 0.23310546875, "grad_norm": 0.29864928126335144, "learning_rate": 0.0004475949320905685, "loss": 1.8938, "step": 4774 }, { "epoch": 0.233154296875, "grad_norm": 0.436649888753891, "learning_rate": 0.00044757245830711186, "loss": 1.8536, "step": 4775 }, { "epoch": 0.233203125, "grad_norm": 0.3367980122566223, "learning_rate": 0.000447549980341223, "loss": 1.8878, "step": 4776 }, { "epoch": 0.233251953125, "grad_norm": 0.27590277791023254, "learning_rate": 0.0004475274981934468, "loss": 1.8778, "step": 4777 }, { "epoch": 0.23330078125, "grad_norm": 0.36668407917022705, "learning_rate": 0.00044750501186432805, "loss": 1.8654, "step": 4778 }, { "epoch": 0.233349609375, "grad_norm": 0.2543782591819763, "learning_rate": 0.0004474825213544117, "loss": 1.8637, "step": 4779 }, { "epoch": 0.2333984375, "grad_norm": 0.33146604895591736, "learning_rate": 0.00044746002666424297, "loss": 1.9055, "step": 4780 }, { "epoch": 0.233447265625, "grad_norm": 0.2744317054748535, "learning_rate": 0.00044743752779436693, "loss": 1.88, "step": 4781 }, { "epoch": 0.23349609375, "grad_norm": 0.339959055185318, "learning_rate": 0.0004474150247453287, "loss": 1.8945, "step": 4782 }, { "epoch": 0.233544921875, "grad_norm": 0.3209676146507263, "learning_rate": 0.0004473925175176741, "loss": 1.8897, "step": 4783 }, { "epoch": 0.23359375, "grad_norm": 0.28051871061325073, "learning_rate": 0.00044737000611194813, "loss": 1.8905, "step": 4784 }, { "epoch": 0.233642578125, "grad_norm": 0.331017404794693, "learning_rate": 0.00044734749052869667, "loss": 1.8974, "step": 4785 }, { "epoch": 0.23369140625, "grad_norm": 0.2267080694437027, "learning_rate": 0.0004473249707684652, "loss": 1.872, "step": 4786 }, { "epoch": 0.233740234375, "grad_norm": 0.3153071999549866, "learning_rate": 0.0004473024468317998, "loss": 1.8687, "step": 4787 }, { "epoch": 0.2337890625, "grad_norm": 0.2707361578941345, "learning_rate": 0.0004472799187192461, "loss": 1.8858, "step": 4788 }, { "epoch": 0.233837890625, "grad_norm": 0.2589961886405945, "learning_rate": 0.0004472573864313501, "loss": 1.8564, "step": 4789 }, { "epoch": 0.23388671875, "grad_norm": 0.34084153175354004, "learning_rate": 0.00044723484996865803, "loss": 1.8778, "step": 4790 }, { "epoch": 0.233935546875, "grad_norm": 0.29471421241760254, "learning_rate": 0.00044721230933171595, "loss": 1.8555, "step": 4791 }, { "epoch": 0.233984375, "grad_norm": 0.3101121485233307, "learning_rate": 0.0004471897645210702, "loss": 1.873, "step": 4792 }, { "epoch": 0.234033203125, "grad_norm": 0.29476651549339294, "learning_rate": 0.00044716721553726723, "loss": 1.8772, "step": 4793 }, { "epoch": 0.23408203125, "grad_norm": 0.29492947459220886, "learning_rate": 0.0004471446623808534, "loss": 1.873, "step": 4794 }, { "epoch": 0.234130859375, "grad_norm": 0.33560246229171753, "learning_rate": 0.00044712210505237543, "loss": 1.8506, "step": 4795 }, { "epoch": 0.2341796875, "grad_norm": 0.3047308623790741, "learning_rate": 0.00044709954355238, "loss": 1.92, "step": 4796 }, { "epoch": 0.234228515625, "grad_norm": 0.26151958107948303, "learning_rate": 0.0004470769778814138, "loss": 1.8783, "step": 4797 }, { "epoch": 0.23427734375, "grad_norm": 0.24090814590454102, "learning_rate": 0.00044705440804002376, "loss": 1.8723, "step": 4798 }, { "epoch": 0.234326171875, "grad_norm": 0.24595174193382263, "learning_rate": 0.00044703183402875694, "loss": 1.8495, "step": 4799 }, { "epoch": 0.234375, "grad_norm": 0.26877427101135254, "learning_rate": 0.00044700925584816053, "loss": 1.8708, "step": 4800 }, { "epoch": 0.234423828125, "grad_norm": 0.28093743324279785, "learning_rate": 0.00044698667349878145, "loss": 1.871, "step": 4801 }, { "epoch": 0.23447265625, "grad_norm": 0.27469581365585327, "learning_rate": 0.0004469640869811673, "loss": 1.8499, "step": 4802 }, { "epoch": 0.234521484375, "grad_norm": 0.29770803451538086, "learning_rate": 0.0004469414962958652, "loss": 1.8767, "step": 4803 }, { "epoch": 0.2345703125, "grad_norm": 0.3417205512523651, "learning_rate": 0.000446918901443423, "loss": 1.858, "step": 4804 }, { "epoch": 0.234619140625, "grad_norm": 0.29146498441696167, "learning_rate": 0.0004468963024243879, "loss": 1.896, "step": 4805 }, { "epoch": 0.23466796875, "grad_norm": 0.21472778916358948, "learning_rate": 0.0004468736992393079, "loss": 1.9134, "step": 4806 }, { "epoch": 0.234716796875, "grad_norm": 0.24380870163440704, "learning_rate": 0.00044685109188873074, "loss": 1.8579, "step": 4807 }, { "epoch": 0.234765625, "grad_norm": 0.3002604842185974, "learning_rate": 0.0004468284803732043, "loss": 1.8618, "step": 4808 }, { "epoch": 0.234814453125, "grad_norm": 0.2929013967514038, "learning_rate": 0.0004468058646932765, "loss": 1.8734, "step": 4809 }, { "epoch": 0.23486328125, "grad_norm": 0.2990531325340271, "learning_rate": 0.0004467832448494957, "loss": 1.8837, "step": 4810 }, { "epoch": 0.234912109375, "grad_norm": 0.36506006121635437, "learning_rate": 0.0004467606208424098, "loss": 1.8765, "step": 4811 }, { "epoch": 0.2349609375, "grad_norm": 0.3331315517425537, "learning_rate": 0.0004467379926725673, "loss": 1.8802, "step": 4812 }, { "epoch": 0.235009765625, "grad_norm": 0.2181369960308075, "learning_rate": 0.0004467153603405167, "loss": 1.8585, "step": 4813 }, { "epoch": 0.23505859375, "grad_norm": 0.3098841905593872, "learning_rate": 0.00044669272384680633, "loss": 1.8816, "step": 4814 }, { "epoch": 0.235107421875, "grad_norm": 0.2712537348270416, "learning_rate": 0.00044667008319198475, "loss": 1.9015, "step": 4815 }, { "epoch": 0.23515625, "grad_norm": 0.26625415682792664, "learning_rate": 0.0004466474383766008, "loss": 1.8826, "step": 4816 }, { "epoch": 0.235205078125, "grad_norm": 0.27938467264175415, "learning_rate": 0.0004466247894012034, "loss": 1.8392, "step": 4817 }, { "epoch": 0.23525390625, "grad_norm": 0.2693087160587311, "learning_rate": 0.0004466021362663413, "loss": 1.8748, "step": 4818 }, { "epoch": 0.235302734375, "grad_norm": 0.30550286173820496, "learning_rate": 0.0004465794789725635, "loss": 1.8898, "step": 4819 }, { "epoch": 0.2353515625, "grad_norm": 0.25432708859443665, "learning_rate": 0.0004465568175204193, "loss": 1.8587, "step": 4820 }, { "epoch": 0.235400390625, "grad_norm": 0.23053652048110962, "learning_rate": 0.00044653415191045763, "loss": 1.8627, "step": 4821 }, { "epoch": 0.23544921875, "grad_norm": 0.23058511316776276, "learning_rate": 0.0004465114821432282, "loss": 1.8763, "step": 4822 }, { "epoch": 0.235498046875, "grad_norm": 0.20578189194202423, "learning_rate": 0.00044648880821928006, "loss": 1.8717, "step": 4823 }, { "epoch": 0.235546875, "grad_norm": 0.22944431006908417, "learning_rate": 0.00044646613013916286, "loss": 1.8708, "step": 4824 }, { "epoch": 0.235595703125, "grad_norm": 0.26846474409103394, "learning_rate": 0.00044644344790342635, "loss": 1.8746, "step": 4825 }, { "epoch": 0.23564453125, "grad_norm": 0.2609626352787018, "learning_rate": 0.00044642076151262005, "loss": 1.8758, "step": 4826 }, { "epoch": 0.235693359375, "grad_norm": 0.21578945219516754, "learning_rate": 0.0004463980709672939, "loss": 1.8476, "step": 4827 }, { "epoch": 0.2357421875, "grad_norm": 0.21219490468502045, "learning_rate": 0.00044637537626799776, "loss": 1.8967, "step": 4828 }, { "epoch": 0.235791015625, "grad_norm": 0.26005470752716064, "learning_rate": 0.0004463526774152818, "loss": 1.8837, "step": 4829 }, { "epoch": 0.23583984375, "grad_norm": 0.25571519136428833, "learning_rate": 0.00044632997440969597, "loss": 1.8562, "step": 4830 }, { "epoch": 0.235888671875, "grad_norm": 0.22416041791439056, "learning_rate": 0.00044630726725179053, "loss": 1.8753, "step": 4831 }, { "epoch": 0.2359375, "grad_norm": 0.26070839166641235, "learning_rate": 0.000446284555942116, "loss": 1.8636, "step": 4832 }, { "epoch": 0.235986328125, "grad_norm": 0.3462459444999695, "learning_rate": 0.0004462618404812225, "loss": 1.8891, "step": 4833 }, { "epoch": 0.23603515625, "grad_norm": 0.3505041301250458, "learning_rate": 0.0004462391208696607, "loss": 1.8845, "step": 4834 }, { "epoch": 0.236083984375, "grad_norm": 0.30662524700164795, "learning_rate": 0.00044621639710798136, "loss": 1.8738, "step": 4835 }, { "epoch": 0.2361328125, "grad_norm": 0.3229389488697052, "learning_rate": 0.0004461936691967349, "loss": 1.8897, "step": 4836 }, { "epoch": 0.236181640625, "grad_norm": 0.28168049454689026, "learning_rate": 0.0004461709371364725, "loss": 1.8528, "step": 4837 }, { "epoch": 0.23623046875, "grad_norm": 0.22756625711917877, "learning_rate": 0.00044614820092774487, "loss": 1.8937, "step": 4838 }, { "epoch": 0.236279296875, "grad_norm": 0.27505654096603394, "learning_rate": 0.000446125460571103, "loss": 1.8708, "step": 4839 }, { "epoch": 0.236328125, "grad_norm": 0.3475116491317749, "learning_rate": 0.00044610271606709823, "loss": 1.8652, "step": 4840 }, { "epoch": 0.236376953125, "grad_norm": 0.34366360306739807, "learning_rate": 0.00044607996741628163, "loss": 1.8787, "step": 4841 }, { "epoch": 0.23642578125, "grad_norm": 0.25874096155166626, "learning_rate": 0.0004460572146192046, "loss": 1.8504, "step": 4842 }, { "epoch": 0.236474609375, "grad_norm": 0.23879621922969818, "learning_rate": 0.0004460344576764185, "loss": 1.8508, "step": 4843 }, { "epoch": 0.2365234375, "grad_norm": 0.2561609745025635, "learning_rate": 0.00044601169658847495, "loss": 1.866, "step": 4844 }, { "epoch": 0.236572265625, "grad_norm": 0.27706700563430786, "learning_rate": 0.00044598893135592557, "loss": 1.8805, "step": 4845 }, { "epoch": 0.23662109375, "grad_norm": 0.23675911128520966, "learning_rate": 0.000445966161979322, "loss": 1.8493, "step": 4846 }, { "epoch": 0.236669921875, "grad_norm": 0.21159926056861877, "learning_rate": 0.00044594338845921615, "loss": 1.8622, "step": 4847 }, { "epoch": 0.23671875, "grad_norm": 0.26707690954208374, "learning_rate": 0.00044592061079616, "loss": 1.8853, "step": 4848 }, { "epoch": 0.236767578125, "grad_norm": 0.261123925447464, "learning_rate": 0.00044589782899070546, "loss": 1.859, "step": 4849 }, { "epoch": 0.23681640625, "grad_norm": 0.2183540314435959, "learning_rate": 0.00044587504304340476, "loss": 1.8798, "step": 4850 }, { "epoch": 0.236865234375, "grad_norm": 0.22499121725559235, "learning_rate": 0.0004458522529548102, "loss": 1.8515, "step": 4851 }, { "epoch": 0.2369140625, "grad_norm": 0.21862201392650604, "learning_rate": 0.0004458294587254739, "loss": 1.8905, "step": 4852 }, { "epoch": 0.236962890625, "grad_norm": 0.21624429523944855, "learning_rate": 0.0004458066603559485, "loss": 1.8812, "step": 4853 }, { "epoch": 0.23701171875, "grad_norm": 0.2232380360364914, "learning_rate": 0.00044578385784678644, "loss": 1.8788, "step": 4854 }, { "epoch": 0.237060546875, "grad_norm": 0.23997807502746582, "learning_rate": 0.0004457610511985404, "loss": 1.8636, "step": 4855 }, { "epoch": 0.237109375, "grad_norm": 0.20722626149654388, "learning_rate": 0.00044573824041176303, "loss": 1.8785, "step": 4856 }, { "epoch": 0.237158203125, "grad_norm": 0.24518465995788574, "learning_rate": 0.00044571542548700735, "loss": 1.8531, "step": 4857 }, { "epoch": 0.23720703125, "grad_norm": 0.3245370090007782, "learning_rate": 0.0004456926064248261, "loss": 1.8647, "step": 4858 }, { "epoch": 0.237255859375, "grad_norm": 0.3877509534358978, "learning_rate": 0.00044566978322577237, "loss": 1.861, "step": 4859 }, { "epoch": 0.2373046875, "grad_norm": 0.41350457072257996, "learning_rate": 0.0004456469558903994, "loss": 1.8745, "step": 4860 }, { "epoch": 0.237353515625, "grad_norm": 0.3158990442752838, "learning_rate": 0.0004456241244192603, "loss": 1.9043, "step": 4861 }, { "epoch": 0.23740234375, "grad_norm": 0.2407805323600769, "learning_rate": 0.00044560128881290844, "loss": 1.8928, "step": 4862 }, { "epoch": 0.237451171875, "grad_norm": 0.2902960777282715, "learning_rate": 0.00044557844907189737, "loss": 1.8903, "step": 4863 }, { "epoch": 0.2375, "grad_norm": 0.2930952310562134, "learning_rate": 0.00044555560519678053, "loss": 1.8848, "step": 4864 }, { "epoch": 0.237548828125, "grad_norm": 0.2661633789539337, "learning_rate": 0.00044553275718811153, "loss": 1.8711, "step": 4865 }, { "epoch": 0.23759765625, "grad_norm": 0.26924562454223633, "learning_rate": 0.0004455099050464442, "loss": 1.8581, "step": 4866 }, { "epoch": 0.237646484375, "grad_norm": 0.27415117621421814, "learning_rate": 0.0004454870487723323, "loss": 1.8933, "step": 4867 }, { "epoch": 0.2376953125, "grad_norm": 0.34904322028160095, "learning_rate": 0.00044546418836632993, "loss": 1.8771, "step": 4868 }, { "epoch": 0.237744140625, "grad_norm": 0.3073510527610779, "learning_rate": 0.00044544132382899084, "loss": 1.8957, "step": 4869 }, { "epoch": 0.23779296875, "grad_norm": 0.2755628824234009, "learning_rate": 0.0004454184551608694, "loss": 1.8577, "step": 4870 }, { "epoch": 0.237841796875, "grad_norm": 0.3134610056877136, "learning_rate": 0.0004453955823625198, "loss": 1.8688, "step": 4871 }, { "epoch": 0.237890625, "grad_norm": 0.34410226345062256, "learning_rate": 0.00044537270543449633, "loss": 1.8788, "step": 4872 }, { "epoch": 0.237939453125, "grad_norm": 0.3243359625339508, "learning_rate": 0.0004453498243773535, "loss": 1.881, "step": 4873 }, { "epoch": 0.23798828125, "grad_norm": 0.2170715034008026, "learning_rate": 0.0004453269391916458, "loss": 1.8585, "step": 4874 }, { "epoch": 0.238037109375, "grad_norm": 0.30666568875312805, "learning_rate": 0.0004453040498779279, "loss": 1.8589, "step": 4875 }, { "epoch": 0.2380859375, "grad_norm": 0.34609830379486084, "learning_rate": 0.0004452811564367545, "loss": 1.8868, "step": 4876 }, { "epoch": 0.238134765625, "grad_norm": 0.29684653878211975, "learning_rate": 0.0004452582588686805, "loss": 1.8894, "step": 4877 }, { "epoch": 0.23818359375, "grad_norm": 0.22955767810344696, "learning_rate": 0.00044523535717426086, "loss": 1.864, "step": 4878 }, { "epoch": 0.238232421875, "grad_norm": 0.3467799723148346, "learning_rate": 0.0004452124513540505, "loss": 1.8773, "step": 4879 }, { "epoch": 0.23828125, "grad_norm": 0.380484938621521, "learning_rate": 0.00044518954140860455, "loss": 1.878, "step": 4880 }, { "epoch": 0.238330078125, "grad_norm": 0.28838562965393066, "learning_rate": 0.0004451666273384785, "loss": 1.8657, "step": 4881 }, { "epoch": 0.23837890625, "grad_norm": 0.25066423416137695, "learning_rate": 0.00044514370914422745, "loss": 1.8645, "step": 4882 }, { "epoch": 0.238427734375, "grad_norm": 0.2996842861175537, "learning_rate": 0.00044512078682640695, "loss": 1.8668, "step": 4883 }, { "epoch": 0.2384765625, "grad_norm": 0.261156290769577, "learning_rate": 0.00044509786038557256, "loss": 1.855, "step": 4884 }, { "epoch": 0.238525390625, "grad_norm": 0.29384443163871765, "learning_rate": 0.0004450749298222798, "loss": 1.8722, "step": 4885 }, { "epoch": 0.23857421875, "grad_norm": 0.32191163301467896, "learning_rate": 0.00044505199513708446, "loss": 1.8778, "step": 4886 }, { "epoch": 0.238623046875, "grad_norm": 0.2616666257381439, "learning_rate": 0.0004450290563305424, "loss": 1.888, "step": 4887 }, { "epoch": 0.238671875, "grad_norm": 0.2515113353729248, "learning_rate": 0.0004450061134032096, "loss": 1.838, "step": 4888 }, { "epoch": 0.238720703125, "grad_norm": 0.3088722825050354, "learning_rate": 0.000444983166355642, "loss": 1.8823, "step": 4889 }, { "epoch": 0.23876953125, "grad_norm": 0.263468861579895, "learning_rate": 0.00044496021518839585, "loss": 1.8616, "step": 4890 }, { "epoch": 0.238818359375, "grad_norm": 0.39104512333869934, "learning_rate": 0.00044493725990202727, "loss": 1.8937, "step": 4891 }, { "epoch": 0.2388671875, "grad_norm": 0.4601041376590729, "learning_rate": 0.0004449143004970928, "loss": 1.8767, "step": 4892 }, { "epoch": 0.238916015625, "grad_norm": 0.2416108250617981, "learning_rate": 0.00044489133697414866, "loss": 1.872, "step": 4893 }, { "epoch": 0.23896484375, "grad_norm": 0.3385879099369049, "learning_rate": 0.0004448683693337515, "loss": 1.8731, "step": 4894 }, { "epoch": 0.239013671875, "grad_norm": 0.3170311450958252, "learning_rate": 0.00044484539757645795, "loss": 1.8881, "step": 4895 }, { "epoch": 0.2390625, "grad_norm": 0.2544320523738861, "learning_rate": 0.0004448224217028247, "loss": 1.8859, "step": 4896 }, { "epoch": 0.239111328125, "grad_norm": 0.3035712242126465, "learning_rate": 0.00044479944171340864, "loss": 1.8693, "step": 4897 }, { "epoch": 0.23916015625, "grad_norm": 0.3442285656929016, "learning_rate": 0.0004447764576087667, "loss": 1.8778, "step": 4898 }, { "epoch": 0.239208984375, "grad_norm": 0.30513429641723633, "learning_rate": 0.00044475346938945595, "loss": 1.8515, "step": 4899 }, { "epoch": 0.2392578125, "grad_norm": 0.30096593499183655, "learning_rate": 0.00044473047705603346, "loss": 1.8707, "step": 4900 }, { "epoch": 0.239306640625, "grad_norm": 0.34476137161254883, "learning_rate": 0.00044470748060905654, "loss": 1.8519, "step": 4901 }, { "epoch": 0.23935546875, "grad_norm": 0.22441169619560242, "learning_rate": 0.0004446844800490824, "loss": 1.8786, "step": 4902 }, { "epoch": 0.239404296875, "grad_norm": 0.24822549521923065, "learning_rate": 0.00044466147537666864, "loss": 1.8726, "step": 4903 }, { "epoch": 0.239453125, "grad_norm": 0.27521246671676636, "learning_rate": 0.00044463846659237267, "loss": 1.8795, "step": 4904 }, { "epoch": 0.239501953125, "grad_norm": 0.2408868819475174, "learning_rate": 0.00044461545369675214, "loss": 1.8847, "step": 4905 }, { "epoch": 0.23955078125, "grad_norm": 0.29901936650276184, "learning_rate": 0.0004445924366903649, "loss": 1.8614, "step": 4906 }, { "epoch": 0.239599609375, "grad_norm": 0.251191109418869, "learning_rate": 0.00044456941557376867, "loss": 1.9018, "step": 4907 }, { "epoch": 0.2396484375, "grad_norm": 0.24168360233306885, "learning_rate": 0.00044454639034752143, "loss": 1.8639, "step": 4908 }, { "epoch": 0.239697265625, "grad_norm": 0.259988009929657, "learning_rate": 0.0004445233610121812, "loss": 1.854, "step": 4909 }, { "epoch": 0.23974609375, "grad_norm": 0.2438904047012329, "learning_rate": 0.0004445003275683062, "loss": 1.8543, "step": 4910 }, { "epoch": 0.239794921875, "grad_norm": 0.2053946703672409, "learning_rate": 0.00044447729001645446, "loss": 1.8873, "step": 4911 }, { "epoch": 0.23984375, "grad_norm": 0.2647416293621063, "learning_rate": 0.0004444542483571846, "loss": 1.8741, "step": 4912 }, { "epoch": 0.239892578125, "grad_norm": 0.2821706533432007, "learning_rate": 0.0004444312025910547, "loss": 1.8679, "step": 4913 }, { "epoch": 0.23994140625, "grad_norm": 0.23774248361587524, "learning_rate": 0.0004444081527186236, "loss": 1.8761, "step": 4914 }, { "epoch": 0.239990234375, "grad_norm": 0.2861906886100769, "learning_rate": 0.0004443850987404498, "loss": 1.8678, "step": 4915 }, { "epoch": 0.2400390625, "grad_norm": 0.3156861364841461, "learning_rate": 0.000444362040657092, "loss": 1.8923, "step": 4916 }, { "epoch": 0.240087890625, "grad_norm": 0.3214239776134491, "learning_rate": 0.00044433897846910914, "loss": 1.8567, "step": 4917 }, { "epoch": 0.24013671875, "grad_norm": 0.27301260828971863, "learning_rate": 0.0004443159121770601, "loss": 1.8694, "step": 4918 }, { "epoch": 0.240185546875, "grad_norm": 0.31402787566185, "learning_rate": 0.00044429284178150387, "loss": 1.8631, "step": 4919 }, { "epoch": 0.240234375, "grad_norm": 0.3126353621482849, "learning_rate": 0.0004442697672829997, "loss": 1.8892, "step": 4920 }, { "epoch": 0.240283203125, "grad_norm": 0.2597823739051819, "learning_rate": 0.0004442466886821066, "loss": 1.8692, "step": 4921 }, { "epoch": 0.24033203125, "grad_norm": 0.3004777133464813, "learning_rate": 0.0004442236059793841, "loss": 1.8708, "step": 4922 }, { "epoch": 0.240380859375, "grad_norm": 0.29620835185050964, "learning_rate": 0.0004442005191753915, "loss": 1.869, "step": 4923 }, { "epoch": 0.2404296875, "grad_norm": 0.2767051160335541, "learning_rate": 0.0004441774282706884, "loss": 1.8936, "step": 4924 }, { "epoch": 0.240478515625, "grad_norm": 0.25150009989738464, "learning_rate": 0.0004441543332658344, "loss": 1.8743, "step": 4925 }, { "epoch": 0.24052734375, "grad_norm": 0.2553045451641083, "learning_rate": 0.0004441312341613893, "loss": 1.8629, "step": 4926 }, { "epoch": 0.240576171875, "grad_norm": 0.26212775707244873, "learning_rate": 0.00044410813095791283, "loss": 1.8834, "step": 4927 }, { "epoch": 0.240625, "grad_norm": 0.28167784214019775, "learning_rate": 0.0004440850236559649, "loss": 1.8658, "step": 4928 }, { "epoch": 0.240673828125, "grad_norm": 0.2640528976917267, "learning_rate": 0.00044406191225610566, "loss": 1.8829, "step": 4929 }, { "epoch": 0.24072265625, "grad_norm": 0.27934756875038147, "learning_rate": 0.0004440387967588951, "loss": 1.8778, "step": 4930 }, { "epoch": 0.240771484375, "grad_norm": 0.2881578803062439, "learning_rate": 0.0004440156771648935, "loss": 1.8654, "step": 4931 }, { "epoch": 0.2408203125, "grad_norm": 0.28308624029159546, "learning_rate": 0.0004439925534746612, "loss": 1.884, "step": 4932 }, { "epoch": 0.240869140625, "grad_norm": 0.2640046179294586, "learning_rate": 0.0004439694256887586, "loss": 1.8688, "step": 4933 }, { "epoch": 0.24091796875, "grad_norm": 0.28041404485702515, "learning_rate": 0.0004439462938077462, "loss": 1.8751, "step": 4934 }, { "epoch": 0.240966796875, "grad_norm": 0.27910172939300537, "learning_rate": 0.0004439231578321847, "loss": 1.8537, "step": 4935 }, { "epoch": 0.241015625, "grad_norm": 0.3064216673374176, "learning_rate": 0.0004439000177626347, "loss": 1.8801, "step": 4936 }, { "epoch": 0.241064453125, "grad_norm": 0.3528411090373993, "learning_rate": 0.0004438768735996571, "loss": 1.8724, "step": 4937 }, { "epoch": 0.24111328125, "grad_norm": 0.3462006747722626, "learning_rate": 0.0004438537253438127, "loss": 1.9029, "step": 4938 }, { "epoch": 0.241162109375, "grad_norm": 0.3230524957180023, "learning_rate": 0.0004438305729956628, "loss": 1.8838, "step": 4939 }, { "epoch": 0.2412109375, "grad_norm": 0.27595391869544983, "learning_rate": 0.0004438074165557682, "loss": 1.8892, "step": 4940 }, { "epoch": 0.241259765625, "grad_norm": 0.35676810145378113, "learning_rate": 0.0004437842560246902, "loss": 1.8665, "step": 4941 }, { "epoch": 0.24130859375, "grad_norm": 0.2989819347858429, "learning_rate": 0.0004437610914029902, "loss": 1.869, "step": 4942 }, { "epoch": 0.241357421875, "grad_norm": 0.26832279562950134, "learning_rate": 0.0004437379226912296, "loss": 1.8764, "step": 4943 }, { "epoch": 0.24140625, "grad_norm": 0.3310162127017975, "learning_rate": 0.00044371474988996984, "loss": 1.8695, "step": 4944 }, { "epoch": 0.241455078125, "grad_norm": 0.2577512264251709, "learning_rate": 0.0004436915729997726, "loss": 1.8804, "step": 4945 }, { "epoch": 0.24150390625, "grad_norm": 0.3795963227748871, "learning_rate": 0.00044366839202119955, "loss": 1.9037, "step": 4946 }, { "epoch": 0.241552734375, "grad_norm": 0.31185483932495117, "learning_rate": 0.00044364520695481255, "loss": 1.8646, "step": 4947 }, { "epoch": 0.2416015625, "grad_norm": 0.2586994767189026, "learning_rate": 0.0004436220178011734, "loss": 1.8835, "step": 4948 }, { "epoch": 0.241650390625, "grad_norm": 0.32291343808174133, "learning_rate": 0.0004435988245608442, "loss": 1.8452, "step": 4949 }, { "epoch": 0.24169921875, "grad_norm": 0.254151314496994, "learning_rate": 0.0004435756272343871, "loss": 1.8742, "step": 4950 }, { "epoch": 0.241748046875, "grad_norm": 0.28796911239624023, "learning_rate": 0.0004435524258223642, "loss": 1.8858, "step": 4951 }, { "epoch": 0.241796875, "grad_norm": 0.30102524161338806, "learning_rate": 0.0004435292203253378, "loss": 1.871, "step": 4952 }, { "epoch": 0.241845703125, "grad_norm": 0.2833251655101776, "learning_rate": 0.0004435060107438704, "loss": 1.8629, "step": 4953 }, { "epoch": 0.24189453125, "grad_norm": 0.2832144498825073, "learning_rate": 0.0004434827970785245, "loss": 1.8484, "step": 4954 }, { "epoch": 0.241943359375, "grad_norm": 0.30905407667160034, "learning_rate": 0.00044345957932986256, "loss": 1.8745, "step": 4955 }, { "epoch": 0.2419921875, "grad_norm": 0.2681049704551697, "learning_rate": 0.00044343635749844747, "loss": 1.8749, "step": 4956 }, { "epoch": 0.242041015625, "grad_norm": 0.21803271770477295, "learning_rate": 0.0004434131315848418, "loss": 1.8672, "step": 4957 }, { "epoch": 0.24208984375, "grad_norm": 0.26758572459220886, "learning_rate": 0.0004433899015896087, "loss": 1.8325, "step": 4958 }, { "epoch": 0.242138671875, "grad_norm": 0.24138355255126953, "learning_rate": 0.000443366667513311, "loss": 1.8739, "step": 4959 }, { "epoch": 0.2421875, "grad_norm": 0.23780414462089539, "learning_rate": 0.0004433434293565119, "loss": 1.902, "step": 4960 }, { "epoch": 0.242236328125, "grad_norm": 0.23620955646038055, "learning_rate": 0.0004433201871197745, "loss": 1.8664, "step": 4961 }, { "epoch": 0.24228515625, "grad_norm": 0.31730446219444275, "learning_rate": 0.00044329694080366217, "loss": 1.8722, "step": 4962 }, { "epoch": 0.242333984375, "grad_norm": 0.34171220660209656, "learning_rate": 0.00044327369040873826, "loss": 1.8644, "step": 4963 }, { "epoch": 0.2423828125, "grad_norm": 0.29332679510116577, "learning_rate": 0.0004432504359355663, "loss": 1.8735, "step": 4964 }, { "epoch": 0.242431640625, "grad_norm": 0.26592835783958435, "learning_rate": 0.0004432271773847098, "loss": 1.8812, "step": 4965 }, { "epoch": 0.24248046875, "grad_norm": 0.3303449749946594, "learning_rate": 0.00044320391475673247, "loss": 1.8552, "step": 4966 }, { "epoch": 0.242529296875, "grad_norm": 0.336025208234787, "learning_rate": 0.00044318064805219816, "loss": 1.8944, "step": 4967 }, { "epoch": 0.242578125, "grad_norm": 0.2829013168811798, "learning_rate": 0.0004431573772716708, "loss": 1.8684, "step": 4968 }, { "epoch": 0.242626953125, "grad_norm": 0.32067281007766724, "learning_rate": 0.0004431341024157143, "loss": 1.8647, "step": 4969 }, { "epoch": 0.24267578125, "grad_norm": 0.3180464208126068, "learning_rate": 0.00044311082348489267, "loss": 1.8587, "step": 4970 }, { "epoch": 0.242724609375, "grad_norm": 0.3077254593372345, "learning_rate": 0.0004430875404797702, "loss": 1.8769, "step": 4971 }, { "epoch": 0.2427734375, "grad_norm": 0.30099669098854065, "learning_rate": 0.00044306425340091116, "loss": 1.8662, "step": 4972 }, { "epoch": 0.242822265625, "grad_norm": 0.2560100257396698, "learning_rate": 0.00044304096224887993, "loss": 1.8763, "step": 4973 }, { "epoch": 0.24287109375, "grad_norm": 0.3032388985157013, "learning_rate": 0.00044301766702424094, "loss": 1.8608, "step": 4974 }, { "epoch": 0.242919921875, "grad_norm": 0.3161863684654236, "learning_rate": 0.00044299436772755885, "loss": 1.9008, "step": 4975 }, { "epoch": 0.24296875, "grad_norm": 0.2443462461233139, "learning_rate": 0.00044297106435939825, "loss": 1.868, "step": 4976 }, { "epoch": 0.243017578125, "grad_norm": 0.32576844096183777, "learning_rate": 0.000442947756920324, "loss": 1.879, "step": 4977 }, { "epoch": 0.24306640625, "grad_norm": 0.3073829412460327, "learning_rate": 0.00044292444541090096, "loss": 1.8894, "step": 4978 }, { "epoch": 0.243115234375, "grad_norm": 0.22064052522182465, "learning_rate": 0.000442901129831694, "loss": 1.8673, "step": 4979 }, { "epoch": 0.2431640625, "grad_norm": 0.2843286395072937, "learning_rate": 0.0004428778101832683, "loss": 1.8723, "step": 4980 }, { "epoch": 0.243212890625, "grad_norm": 0.2327749878168106, "learning_rate": 0.00044285448646618896, "loss": 1.8483, "step": 4981 }, { "epoch": 0.24326171875, "grad_norm": 0.2587781548500061, "learning_rate": 0.00044283115868102137, "loss": 1.8738, "step": 4982 }, { "epoch": 0.243310546875, "grad_norm": 0.3033539354801178, "learning_rate": 0.0004428078268283307, "loss": 1.869, "step": 4983 }, { "epoch": 0.243359375, "grad_norm": 0.24374093115329742, "learning_rate": 0.0004427844909086827, "loss": 1.8776, "step": 4984 }, { "epoch": 0.243408203125, "grad_norm": 0.33882927894592285, "learning_rate": 0.0004427611509226426, "loss": 1.9083, "step": 4985 }, { "epoch": 0.24345703125, "grad_norm": 0.37391167879104614, "learning_rate": 0.00044273780687077637, "loss": 1.862, "step": 4986 }, { "epoch": 0.243505859375, "grad_norm": 0.2729862630367279, "learning_rate": 0.00044271445875364947, "loss": 1.9123, "step": 4987 }, { "epoch": 0.2435546875, "grad_norm": 0.28598007559776306, "learning_rate": 0.000442691106571828, "loss": 1.8896, "step": 4988 }, { "epoch": 0.243603515625, "grad_norm": 0.27842947840690613, "learning_rate": 0.0004426677503258779, "loss": 1.8844, "step": 4989 }, { "epoch": 0.24365234375, "grad_norm": 0.3100188970565796, "learning_rate": 0.00044264439001636514, "loss": 1.8804, "step": 4990 }, { "epoch": 0.243701171875, "grad_norm": 0.32302314043045044, "learning_rate": 0.0004426210256438559, "loss": 1.8456, "step": 4991 }, { "epoch": 0.24375, "grad_norm": 0.3219097852706909, "learning_rate": 0.0004425976572089164, "loss": 1.8516, "step": 4992 }, { "epoch": 0.243798828125, "grad_norm": 0.2787076532840729, "learning_rate": 0.00044257428471211304, "loss": 1.8457, "step": 4993 }, { "epoch": 0.24384765625, "grad_norm": 0.26823341846466064, "learning_rate": 0.0004425509081540123, "loss": 1.8879, "step": 4994 }, { "epoch": 0.243896484375, "grad_norm": 0.48234039545059204, "learning_rate": 0.0004425275275351806, "loss": 1.8878, "step": 4995 }, { "epoch": 0.2439453125, "grad_norm": 0.2863769829273224, "learning_rate": 0.00044250414285618487, "loss": 1.8908, "step": 4996 }, { "epoch": 0.243994140625, "grad_norm": 0.3057452142238617, "learning_rate": 0.0004424807541175915, "loss": 1.8674, "step": 4997 }, { "epoch": 0.24404296875, "grad_norm": 0.2659650146961212, "learning_rate": 0.0004424573613199675, "loss": 1.8731, "step": 4998 }, { "epoch": 0.244091796875, "grad_norm": 0.2685467004776001, "learning_rate": 0.0004424339644638799, "loss": 1.8532, "step": 4999 }, { "epoch": 0.244140625, "grad_norm": 0.25597453117370605, "learning_rate": 0.00044241056354989557, "loss": 1.8773, "step": 5000 }, { "epoch": 0.244189453125, "grad_norm": 0.2553762197494507, "learning_rate": 0.00044238715857858183, "loss": 1.8835, "step": 5001 }, { "epoch": 0.24423828125, "grad_norm": 0.2729444205760956, "learning_rate": 0.00044236374955050574, "loss": 1.8528, "step": 5002 }, { "epoch": 0.244287109375, "grad_norm": 0.31665149331092834, "learning_rate": 0.00044234033646623475, "loss": 1.8822, "step": 5003 }, { "epoch": 0.2443359375, "grad_norm": 0.28579026460647583, "learning_rate": 0.0004423169193263363, "loss": 1.853, "step": 5004 }, { "epoch": 0.244384765625, "grad_norm": 0.22006875276565552, "learning_rate": 0.00044229349813137785, "loss": 1.8589, "step": 5005 }, { "epoch": 0.24443359375, "grad_norm": 0.2937517464160919, "learning_rate": 0.0004422700728819271, "loss": 1.8759, "step": 5006 }, { "epoch": 0.244482421875, "grad_norm": 0.32668718695640564, "learning_rate": 0.0004422466435785517, "loss": 1.8509, "step": 5007 }, { "epoch": 0.24453125, "grad_norm": 0.32135868072509766, "learning_rate": 0.00044222321022181955, "loss": 1.8805, "step": 5008 }, { "epoch": 0.244580078125, "grad_norm": 0.24299439787864685, "learning_rate": 0.00044219977281229864, "loss": 1.8848, "step": 5009 }, { "epoch": 0.24462890625, "grad_norm": 0.33529600501060486, "learning_rate": 0.0004421763313505568, "loss": 1.8485, "step": 5010 }, { "epoch": 0.244677734375, "grad_norm": 0.35839903354644775, "learning_rate": 0.0004421528858371622, "loss": 1.8565, "step": 5011 }, { "epoch": 0.2447265625, "grad_norm": 0.21029074490070343, "learning_rate": 0.00044212943627268327, "loss": 1.8469, "step": 5012 }, { "epoch": 0.244775390625, "grad_norm": 0.3268072009086609, "learning_rate": 0.00044210598265768817, "loss": 1.8651, "step": 5013 }, { "epoch": 0.24482421875, "grad_norm": 0.32092809677124023, "learning_rate": 0.00044208252499274524, "loss": 1.8841, "step": 5014 }, { "epoch": 0.244873046875, "grad_norm": 0.28836750984191895, "learning_rate": 0.0004420590632784231, "loss": 1.8389, "step": 5015 }, { "epoch": 0.244921875, "grad_norm": 0.3569374680519104, "learning_rate": 0.0004420355975152904, "loss": 1.8863, "step": 5016 }, { "epoch": 0.244970703125, "grad_norm": 0.3801896870136261, "learning_rate": 0.0004420121277039158, "loss": 1.861, "step": 5017 }, { "epoch": 0.24501953125, "grad_norm": 0.37951886653900146, "learning_rate": 0.0004419886538448681, "loss": 1.8661, "step": 5018 }, { "epoch": 0.245068359375, "grad_norm": 0.3394325077533722, "learning_rate": 0.00044196517593871623, "loss": 1.8691, "step": 5019 }, { "epoch": 0.2451171875, "grad_norm": 0.335874080657959, "learning_rate": 0.00044194169398602913, "loss": 1.8618, "step": 5020 }, { "epoch": 0.245166015625, "grad_norm": 0.3266218602657318, "learning_rate": 0.000441918207987376, "loss": 1.9018, "step": 5021 }, { "epoch": 0.24521484375, "grad_norm": 0.2936822175979614, "learning_rate": 0.0004418947179433259, "loss": 1.8754, "step": 5022 }, { "epoch": 0.245263671875, "grad_norm": 0.32269325852394104, "learning_rate": 0.00044187122385444836, "loss": 1.886, "step": 5023 }, { "epoch": 0.2453125, "grad_norm": 0.22159695625305176, "learning_rate": 0.0004418477257213126, "loss": 1.8571, "step": 5024 }, { "epoch": 0.245361328125, "grad_norm": 0.2721232771873474, "learning_rate": 0.0004418242235444882, "loss": 1.8573, "step": 5025 }, { "epoch": 0.24541015625, "grad_norm": 0.3383371829986572, "learning_rate": 0.0004418007173245447, "loss": 1.8329, "step": 5026 }, { "epoch": 0.245458984375, "grad_norm": 0.23793037235736847, "learning_rate": 0.0004417772070620517, "loss": 1.9032, "step": 5027 }, { "epoch": 0.2455078125, "grad_norm": 0.3019699156284332, "learning_rate": 0.0004417536927575792, "loss": 1.8383, "step": 5028 }, { "epoch": 0.245556640625, "grad_norm": 0.31123360991477966, "learning_rate": 0.000441730174411697, "loss": 1.8831, "step": 5029 }, { "epoch": 0.24560546875, "grad_norm": 0.23625050485134125, "learning_rate": 0.000441706652024975, "loss": 1.8653, "step": 5030 }, { "epoch": 0.245654296875, "grad_norm": 0.3122534155845642, "learning_rate": 0.00044168312559798346, "loss": 1.8951, "step": 5031 }, { "epoch": 0.245703125, "grad_norm": 0.3289574384689331, "learning_rate": 0.00044165959513129245, "loss": 1.8564, "step": 5032 }, { "epoch": 0.245751953125, "grad_norm": 0.315053790807724, "learning_rate": 0.00044163606062547217, "loss": 1.8789, "step": 5033 }, { "epoch": 0.24580078125, "grad_norm": 0.29242879152297974, "learning_rate": 0.0004416125220810932, "loss": 1.8747, "step": 5034 }, { "epoch": 0.245849609375, "grad_norm": 0.31804659962654114, "learning_rate": 0.0004415889794987258, "loss": 1.8727, "step": 5035 }, { "epoch": 0.2458984375, "grad_norm": 0.32344672083854675, "learning_rate": 0.0004415654328789407, "loss": 1.8859, "step": 5036 }, { "epoch": 0.245947265625, "grad_norm": 0.34826457500457764, "learning_rate": 0.00044154188222230844, "loss": 1.8649, "step": 5037 }, { "epoch": 0.24599609375, "grad_norm": 0.3087301552295685, "learning_rate": 0.00044151832752939993, "loss": 1.891, "step": 5038 }, { "epoch": 0.246044921875, "grad_norm": 0.2060529738664627, "learning_rate": 0.0004414947688007859, "loss": 1.8582, "step": 5039 }, { "epoch": 0.24609375, "grad_norm": 0.2839168310165405, "learning_rate": 0.0004414712060370375, "loss": 1.8897, "step": 5040 }, { "epoch": 0.246142578125, "grad_norm": 0.29287928342819214, "learning_rate": 0.0004414476392387257, "loss": 1.8778, "step": 5041 }, { "epoch": 0.24619140625, "grad_norm": 0.3299199938774109, "learning_rate": 0.00044142406840642147, "loss": 1.8799, "step": 5042 }, { "epoch": 0.246240234375, "grad_norm": 0.3414135277271271, "learning_rate": 0.00044140049354069637, "loss": 1.8592, "step": 5043 }, { "epoch": 0.2462890625, "grad_norm": 0.2716069221496582, "learning_rate": 0.00044137691464212164, "loss": 1.8577, "step": 5044 }, { "epoch": 0.246337890625, "grad_norm": 0.349123477935791, "learning_rate": 0.0004413533317112686, "loss": 1.8807, "step": 5045 }, { "epoch": 0.24638671875, "grad_norm": 0.33007922768592834, "learning_rate": 0.000441329744748709, "loss": 1.8851, "step": 5046 }, { "epoch": 0.246435546875, "grad_norm": 0.2593992352485657, "learning_rate": 0.00044130615375501445, "loss": 1.845, "step": 5047 }, { "epoch": 0.246484375, "grad_norm": 0.41666463017463684, "learning_rate": 0.0004412825587307566, "loss": 1.8782, "step": 5048 }, { "epoch": 0.246533203125, "grad_norm": 0.2968021035194397, "learning_rate": 0.0004412589596765073, "loss": 1.8626, "step": 5049 }, { "epoch": 0.24658203125, "grad_norm": 0.37167009711265564, "learning_rate": 0.0004412353565928387, "loss": 1.8665, "step": 5050 }, { "epoch": 0.246630859375, "grad_norm": 0.4328637719154358, "learning_rate": 0.0004412117494803225, "loss": 1.8845, "step": 5051 }, { "epoch": 0.2466796875, "grad_norm": 0.2288678139448166, "learning_rate": 0.00044118813833953115, "loss": 1.8519, "step": 5052 }, { "epoch": 0.246728515625, "grad_norm": 0.3922203481197357, "learning_rate": 0.0004411645231710367, "loss": 1.8655, "step": 5053 }, { "epoch": 0.24677734375, "grad_norm": 0.2910563349723816, "learning_rate": 0.00044114090397541153, "loss": 1.8506, "step": 5054 }, { "epoch": 0.246826171875, "grad_norm": 0.2903216481208801, "learning_rate": 0.000441117280753228, "loss": 1.888, "step": 5055 }, { "epoch": 0.246875, "grad_norm": 0.30219268798828125, "learning_rate": 0.00044109365350505886, "loss": 1.8541, "step": 5056 }, { "epoch": 0.246923828125, "grad_norm": 0.3123689293861389, "learning_rate": 0.00044107002223147654, "loss": 1.8512, "step": 5057 }, { "epoch": 0.24697265625, "grad_norm": 0.34703367948532104, "learning_rate": 0.00044104638693305375, "loss": 1.8507, "step": 5058 }, { "epoch": 0.247021484375, "grad_norm": 0.2887507379055023, "learning_rate": 0.00044102274761036343, "loss": 1.8957, "step": 5059 }, { "epoch": 0.2470703125, "grad_norm": 0.31294241547584534, "learning_rate": 0.00044099910426397844, "loss": 1.8488, "step": 5060 }, { "epoch": 0.247119140625, "grad_norm": 0.2389763593673706, "learning_rate": 0.00044097545689447177, "loss": 1.8428, "step": 5061 }, { "epoch": 0.24716796875, "grad_norm": 0.34127986431121826, "learning_rate": 0.0004409518055024166, "loss": 1.8896, "step": 5062 }, { "epoch": 0.247216796875, "grad_norm": 0.24298147857189178, "learning_rate": 0.00044092815008838605, "loss": 1.859, "step": 5063 }, { "epoch": 0.247265625, "grad_norm": 0.24697861075401306, "learning_rate": 0.00044090449065295353, "loss": 1.8703, "step": 5064 }, { "epoch": 0.247314453125, "grad_norm": 0.27920761704444885, "learning_rate": 0.0004408808271966923, "loss": 1.8563, "step": 5065 }, { "epoch": 0.24736328125, "grad_norm": 0.22467809915542603, "learning_rate": 0.00044085715972017606, "loss": 1.8705, "step": 5066 }, { "epoch": 0.247412109375, "grad_norm": 0.23493614792823792, "learning_rate": 0.00044083348822397824, "loss": 1.8775, "step": 5067 }, { "epoch": 0.2474609375, "grad_norm": 0.2904694676399231, "learning_rate": 0.0004408098127086726, "loss": 1.8406, "step": 5068 }, { "epoch": 0.247509765625, "grad_norm": 0.28167036175727844, "learning_rate": 0.00044078613317483296, "loss": 1.8664, "step": 5069 }, { "epoch": 0.24755859375, "grad_norm": 0.3049875497817993, "learning_rate": 0.00044076244962303323, "loss": 1.8764, "step": 5070 }, { "epoch": 0.247607421875, "grad_norm": 0.2902607321739197, "learning_rate": 0.00044073876205384725, "loss": 1.8641, "step": 5071 }, { "epoch": 0.24765625, "grad_norm": 0.2647269070148468, "learning_rate": 0.0004407150704678494, "loss": 1.8676, "step": 5072 }, { "epoch": 0.247705078125, "grad_norm": 0.26057377457618713, "learning_rate": 0.0004406913748656135, "loss": 1.8732, "step": 5073 }, { "epoch": 0.24775390625, "grad_norm": 0.25016555190086365, "learning_rate": 0.00044066767524771414, "loss": 1.9101, "step": 5074 }, { "epoch": 0.247802734375, "grad_norm": 0.23524506390094757, "learning_rate": 0.00044064397161472553, "loss": 1.8724, "step": 5075 }, { "epoch": 0.2478515625, "grad_norm": 0.23932306468486786, "learning_rate": 0.0004406202639672222, "loss": 1.8689, "step": 5076 }, { "epoch": 0.247900390625, "grad_norm": 0.2468663454055786, "learning_rate": 0.00044059655230577866, "loss": 1.8478, "step": 5077 }, { "epoch": 0.24794921875, "grad_norm": 0.2777966856956482, "learning_rate": 0.0004405728366309697, "loss": 1.8821, "step": 5078 }, { "epoch": 0.247998046875, "grad_norm": 0.2854156196117401, "learning_rate": 0.00044054911694337, "loss": 1.8923, "step": 5079 }, { "epoch": 0.248046875, "grad_norm": 0.350376695394516, "learning_rate": 0.0004405253932435545, "loss": 1.8659, "step": 5080 }, { "epoch": 0.248095703125, "grad_norm": 0.36521947383880615, "learning_rate": 0.000440501665532098, "loss": 1.8707, "step": 5081 }, { "epoch": 0.24814453125, "grad_norm": 0.3064037561416626, "learning_rate": 0.00044047793380957577, "loss": 1.8451, "step": 5082 }, { "epoch": 0.248193359375, "grad_norm": 0.38382452726364136, "learning_rate": 0.00044045419807656286, "loss": 1.8491, "step": 5083 }, { "epoch": 0.2482421875, "grad_norm": 0.3187410831451416, "learning_rate": 0.0004404304583336345, "loss": 1.8359, "step": 5084 }, { "epoch": 0.248291015625, "grad_norm": 0.2924506366252899, "learning_rate": 0.00044040671458136604, "loss": 1.8385, "step": 5085 }, { "epoch": 0.24833984375, "grad_norm": 0.3317908048629761, "learning_rate": 0.00044038296682033306, "loss": 1.8863, "step": 5086 }, { "epoch": 0.248388671875, "grad_norm": 0.4102923274040222, "learning_rate": 0.000440359215051111, "loss": 1.8881, "step": 5087 }, { "epoch": 0.2484375, "grad_norm": 0.3008342683315277, "learning_rate": 0.0004403354592742755, "loss": 1.8743, "step": 5088 }, { "epoch": 0.248486328125, "grad_norm": 0.3876064121723175, "learning_rate": 0.00044031169949040227, "loss": 1.8296, "step": 5089 }, { "epoch": 0.24853515625, "grad_norm": 0.3728554844856262, "learning_rate": 0.00044028793570006727, "loss": 1.8704, "step": 5090 }, { "epoch": 0.248583984375, "grad_norm": 0.276154488325119, "learning_rate": 0.00044026416790384626, "loss": 1.9006, "step": 5091 }, { "epoch": 0.2486328125, "grad_norm": 0.3470548093318939, "learning_rate": 0.00044024039610231544, "loss": 1.8697, "step": 5092 }, { "epoch": 0.248681640625, "grad_norm": 0.2873291075229645, "learning_rate": 0.00044021662029605085, "loss": 1.8716, "step": 5093 }, { "epoch": 0.24873046875, "grad_norm": 0.2802225649356842, "learning_rate": 0.0004401928404856287, "loss": 1.8841, "step": 5094 }, { "epoch": 0.248779296875, "grad_norm": 0.33316996693611145, "learning_rate": 0.0004401690566716254, "loss": 1.8647, "step": 5095 }, { "epoch": 0.248828125, "grad_norm": 0.23898650705814362, "learning_rate": 0.0004401452688546173, "loss": 1.8568, "step": 5096 }, { "epoch": 0.248876953125, "grad_norm": 0.30051225423812866, "learning_rate": 0.0004401214770351809, "loss": 1.8686, "step": 5097 }, { "epoch": 0.24892578125, "grad_norm": 0.2819689214229584, "learning_rate": 0.0004400976812138929, "loss": 1.8679, "step": 5098 }, { "epoch": 0.248974609375, "grad_norm": 0.28040027618408203, "learning_rate": 0.0004400738813913299, "loss": 1.8544, "step": 5099 }, { "epoch": 0.2490234375, "grad_norm": 0.28905582427978516, "learning_rate": 0.0004400500775680688, "loss": 1.8601, "step": 5100 }, { "epoch": 0.249072265625, "grad_norm": 0.21708141267299652, "learning_rate": 0.00044002626974468643, "loss": 1.8681, "step": 5101 }, { "epoch": 0.24912109375, "grad_norm": 0.2673826217651367, "learning_rate": 0.00044000245792175997, "loss": 1.8761, "step": 5102 }, { "epoch": 0.249169921875, "grad_norm": 0.2721775770187378, "learning_rate": 0.00043997864209986627, "loss": 1.8847, "step": 5103 }, { "epoch": 0.24921875, "grad_norm": 0.22239434719085693, "learning_rate": 0.00043995482227958264, "loss": 1.8689, "step": 5104 }, { "epoch": 0.249267578125, "grad_norm": 0.30632126331329346, "learning_rate": 0.0004399309984614864, "loss": 1.8737, "step": 5105 }, { "epoch": 0.24931640625, "grad_norm": 0.3248952627182007, "learning_rate": 0.00043990717064615483, "loss": 1.8335, "step": 5106 }, { "epoch": 0.249365234375, "grad_norm": 0.22284452617168427, "learning_rate": 0.00043988333883416555, "loss": 1.8648, "step": 5107 }, { "epoch": 0.2494140625, "grad_norm": 0.2929984927177429, "learning_rate": 0.00043985950302609606, "loss": 1.8708, "step": 5108 }, { "epoch": 0.249462890625, "grad_norm": 0.3341735005378723, "learning_rate": 0.0004398356632225241, "loss": 1.8741, "step": 5109 }, { "epoch": 0.24951171875, "grad_norm": 0.3459677994251251, "learning_rate": 0.0004398118194240274, "loss": 1.8724, "step": 5110 }, { "epoch": 0.249560546875, "grad_norm": 0.3811725378036499, "learning_rate": 0.00043978797163118384, "loss": 1.8897, "step": 5111 }, { "epoch": 0.249609375, "grad_norm": 0.2815454304218292, "learning_rate": 0.00043976411984457143, "loss": 1.8348, "step": 5112 }, { "epoch": 0.249658203125, "grad_norm": 0.2205946147441864, "learning_rate": 0.0004397402640647682, "loss": 1.8594, "step": 5113 }, { "epoch": 0.24970703125, "grad_norm": 0.29691874980926514, "learning_rate": 0.00043971640429235224, "loss": 1.8601, "step": 5114 }, { "epoch": 0.249755859375, "grad_norm": 0.356955349445343, "learning_rate": 0.00043969254052790205, "loss": 1.8932, "step": 5115 }, { "epoch": 0.2498046875, "grad_norm": 0.33274999260902405, "learning_rate": 0.00043966867277199566, "loss": 1.8525, "step": 5116 }, { "epoch": 0.249853515625, "grad_norm": 0.25864994525909424, "learning_rate": 0.0004396448010252118, "loss": 1.8926, "step": 5117 }, { "epoch": 0.24990234375, "grad_norm": 0.26202157139778137, "learning_rate": 0.0004396209252881289, "loss": 1.887, "step": 5118 }, { "epoch": 0.249951171875, "grad_norm": 0.36792007088661194, "learning_rate": 0.00043959704556132556, "loss": 1.883, "step": 5119 }, { "epoch": 0.25, "grad_norm": 0.35090532898902893, "learning_rate": 0.0004395731618453806, "loss": 1.8777, "step": 5120 }, { "epoch": 0.250048828125, "grad_norm": 0.22585457563400269, "learning_rate": 0.00043954927414087294, "loss": 1.8608, "step": 5121 }, { "epoch": 0.25009765625, "grad_norm": 0.3194408118724823, "learning_rate": 0.0004395253824483813, "loss": 1.8505, "step": 5122 }, { "epoch": 0.250146484375, "grad_norm": 0.35345447063446045, "learning_rate": 0.0004395014867684849, "loss": 1.8935, "step": 5123 }, { "epoch": 0.2501953125, "grad_norm": 0.24554190039634705, "learning_rate": 0.0004394775871017628, "loss": 1.8842, "step": 5124 }, { "epoch": 0.250244140625, "grad_norm": 0.4207732379436493, "learning_rate": 0.00043945368344879427, "loss": 1.8734, "step": 5125 }, { "epoch": 0.25029296875, "grad_norm": 0.34187403321266174, "learning_rate": 0.0004394297758101586, "loss": 1.8878, "step": 5126 }, { "epoch": 0.250341796875, "grad_norm": 0.28407084941864014, "learning_rate": 0.0004394058641864351, "loss": 1.8545, "step": 5127 }, { "epoch": 0.250390625, "grad_norm": 0.35291847586631775, "learning_rate": 0.0004393819485782036, "loss": 1.8484, "step": 5128 }, { "epoch": 0.250439453125, "grad_norm": 0.2795839309692383, "learning_rate": 0.0004393580289860433, "loss": 1.8974, "step": 5129 }, { "epoch": 0.25048828125, "grad_norm": 0.28320416808128357, "learning_rate": 0.00043933410541053424, "loss": 1.8558, "step": 5130 }, { "epoch": 0.250537109375, "grad_norm": 0.35097169876098633, "learning_rate": 0.0004393101778522561, "loss": 1.8784, "step": 5131 }, { "epoch": 0.2505859375, "grad_norm": 0.2615499496459961, "learning_rate": 0.0004392862463117888, "loss": 1.8404, "step": 5132 }, { "epoch": 0.250634765625, "grad_norm": 0.303792268037796, "learning_rate": 0.0004392623107897122, "loss": 1.8742, "step": 5133 }, { "epoch": 0.25068359375, "grad_norm": 0.29034626483917236, "learning_rate": 0.0004392383712866067, "loss": 1.8457, "step": 5134 }, { "epoch": 0.250732421875, "grad_norm": 0.23624226450920105, "learning_rate": 0.0004392144278030523, "loss": 1.8504, "step": 5135 }, { "epoch": 0.25078125, "grad_norm": 0.3055577278137207, "learning_rate": 0.0004391904803396293, "loss": 1.8754, "step": 5136 }, { "epoch": 0.250830078125, "grad_norm": 0.30762678384780884, "learning_rate": 0.000439166528896918, "loss": 1.8617, "step": 5137 }, { "epoch": 0.25087890625, "grad_norm": 0.26063182950019836, "learning_rate": 0.00043914257347549913, "loss": 1.8701, "step": 5138 }, { "epoch": 0.250927734375, "grad_norm": 0.25897377729415894, "learning_rate": 0.00043911861407595307, "loss": 1.8796, "step": 5139 }, { "epoch": 0.2509765625, "grad_norm": 0.28010302782058716, "learning_rate": 0.0004390946506988605, "loss": 1.8665, "step": 5140 }, { "epoch": 0.251025390625, "grad_norm": 0.24992693960666656, "learning_rate": 0.00043907068334480225, "loss": 1.8897, "step": 5141 }, { "epoch": 0.25107421875, "grad_norm": 0.2865423858165741, "learning_rate": 0.00043904671201435927, "loss": 1.8372, "step": 5142 }, { "epoch": 0.251123046875, "grad_norm": 0.32123708724975586, "learning_rate": 0.00043902273670811233, "loss": 1.865, "step": 5143 }, { "epoch": 0.251171875, "grad_norm": 0.2888452112674713, "learning_rate": 0.0004389987574266426, "loss": 1.8573, "step": 5144 }, { "epoch": 0.251220703125, "grad_norm": 0.23782341182231903, "learning_rate": 0.0004389747741705313, "loss": 1.8697, "step": 5145 }, { "epoch": 0.25126953125, "grad_norm": 0.2670332193374634, "learning_rate": 0.0004389507869403595, "loss": 1.8432, "step": 5146 }, { "epoch": 0.251318359375, "grad_norm": 0.26569676399230957, "learning_rate": 0.0004389267957367087, "loss": 1.8694, "step": 5147 }, { "epoch": 0.2513671875, "grad_norm": 0.22754041850566864, "learning_rate": 0.0004389028005601604, "loss": 1.861, "step": 5148 }, { "epoch": 0.251416015625, "grad_norm": 0.23837368190288544, "learning_rate": 0.00043887880141129596, "loss": 1.8774, "step": 5149 }, { "epoch": 0.25146484375, "grad_norm": 0.27818024158477783, "learning_rate": 0.0004388547982906971, "loss": 1.875, "step": 5150 }, { "epoch": 0.251513671875, "grad_norm": 0.3949007987976074, "learning_rate": 0.0004388307911989456, "loss": 1.857, "step": 5151 }, { "epoch": 0.2515625, "grad_norm": 0.3686492443084717, "learning_rate": 0.00043880678013662324, "loss": 1.8561, "step": 5152 }, { "epoch": 0.251611328125, "grad_norm": 0.35639381408691406, "learning_rate": 0.00043878276510431197, "loss": 1.8572, "step": 5153 }, { "epoch": 0.25166015625, "grad_norm": 0.34001582860946655, "learning_rate": 0.0004387587461025938, "loss": 1.8729, "step": 5154 }, { "epoch": 0.251708984375, "grad_norm": 0.3301718533039093, "learning_rate": 0.00043873472313205085, "loss": 1.8651, "step": 5155 }, { "epoch": 0.2517578125, "grad_norm": 0.4164426326751709, "learning_rate": 0.00043871069619326545, "loss": 1.8761, "step": 5156 }, { "epoch": 0.251806640625, "grad_norm": 0.3424311578273773, "learning_rate": 0.00043868666528681965, "loss": 1.8566, "step": 5157 }, { "epoch": 0.25185546875, "grad_norm": 0.2957718074321747, "learning_rate": 0.0004386626304132961, "loss": 1.8422, "step": 5158 }, { "epoch": 0.251904296875, "grad_norm": 0.3632572889328003, "learning_rate": 0.00043863859157327716, "loss": 1.8926, "step": 5159 }, { "epoch": 0.251953125, "grad_norm": 0.2661210596561432, "learning_rate": 0.0004386145487673455, "loss": 1.8559, "step": 5160 }, { "epoch": 0.252001953125, "grad_norm": 0.33604851365089417, "learning_rate": 0.0004385905019960838, "loss": 1.8762, "step": 5161 }, { "epoch": 0.25205078125, "grad_norm": 0.3086228668689728, "learning_rate": 0.0004385664512600749, "loss": 1.8794, "step": 5162 }, { "epoch": 0.252099609375, "grad_norm": 0.29357200860977173, "learning_rate": 0.00043854239655990166, "loss": 1.8543, "step": 5163 }, { "epoch": 0.2521484375, "grad_norm": 0.23486943542957306, "learning_rate": 0.000438518337896147, "loss": 1.8601, "step": 5164 }, { "epoch": 0.252197265625, "grad_norm": 0.27739349007606506, "learning_rate": 0.000438494275269394, "loss": 1.8617, "step": 5165 }, { "epoch": 0.25224609375, "grad_norm": 0.2990482747554779, "learning_rate": 0.000438470208680226, "loss": 1.8672, "step": 5166 }, { "epoch": 0.252294921875, "grad_norm": 0.29021647572517395, "learning_rate": 0.00043844613812922604, "loss": 1.8807, "step": 5167 }, { "epoch": 0.25234375, "grad_norm": 0.23786602914333344, "learning_rate": 0.0004384220636169778, "loss": 1.8439, "step": 5168 }, { "epoch": 0.252392578125, "grad_norm": 0.26682206988334656, "learning_rate": 0.00043839798514406434, "loss": 1.8812, "step": 5169 }, { "epoch": 0.25244140625, "grad_norm": 0.26492220163345337, "learning_rate": 0.0004383739027110695, "loss": 1.86, "step": 5170 }, { "epoch": 0.252490234375, "grad_norm": 0.24112536013126373, "learning_rate": 0.000438349816318577, "loss": 1.8601, "step": 5171 }, { "epoch": 0.2525390625, "grad_norm": 0.24480225145816803, "learning_rate": 0.00043832572596717043, "loss": 1.8554, "step": 5172 }, { "epoch": 0.252587890625, "grad_norm": 0.28068774938583374, "learning_rate": 0.00043830163165743354, "loss": 1.8305, "step": 5173 }, { "epoch": 0.25263671875, "grad_norm": 0.2448117583990097, "learning_rate": 0.0004382775333899505, "loss": 1.8532, "step": 5174 }, { "epoch": 0.252685546875, "grad_norm": 0.26335838437080383, "learning_rate": 0.00043825343116530533, "loss": 1.8653, "step": 5175 }, { "epoch": 0.252734375, "grad_norm": 0.285971462726593, "learning_rate": 0.000438229324984082, "loss": 1.8741, "step": 5176 }, { "epoch": 0.252783203125, "grad_norm": 0.23213547468185425, "learning_rate": 0.00043820521484686485, "loss": 1.8491, "step": 5177 }, { "epoch": 0.25283203125, "grad_norm": 0.25819650292396545, "learning_rate": 0.00043818110075423823, "loss": 1.8564, "step": 5178 }, { "epoch": 0.252880859375, "grad_norm": 0.263791024684906, "learning_rate": 0.00043815698270678653, "loss": 1.8266, "step": 5179 }, { "epoch": 0.2529296875, "grad_norm": 0.2699660360813141, "learning_rate": 0.00043813286070509426, "loss": 1.8656, "step": 5180 }, { "epoch": 0.252978515625, "grad_norm": 0.22470661997795105, "learning_rate": 0.00043810873474974606, "loss": 1.8688, "step": 5181 }, { "epoch": 0.25302734375, "grad_norm": 0.26552337408065796, "learning_rate": 0.0004380846048413267, "loss": 1.8685, "step": 5182 }, { "epoch": 0.253076171875, "grad_norm": 0.29058435559272766, "learning_rate": 0.00043806047098042083, "loss": 1.8827, "step": 5183 }, { "epoch": 0.253125, "grad_norm": 0.27043482661247253, "learning_rate": 0.0004380363331676135, "loss": 1.8505, "step": 5184 }, { "epoch": 0.253173828125, "grad_norm": 0.2716500759124756, "learning_rate": 0.00043801219140348957, "loss": 1.8791, "step": 5185 }, { "epoch": 0.25322265625, "grad_norm": 0.2807293236255646, "learning_rate": 0.0004379880456886343, "loss": 1.8667, "step": 5186 }, { "epoch": 0.253271484375, "grad_norm": 0.3004014790058136, "learning_rate": 0.0004379638960236327, "loss": 1.8564, "step": 5187 }, { "epoch": 0.2533203125, "grad_norm": 0.26173698902130127, "learning_rate": 0.0004379397424090703, "loss": 1.8735, "step": 5188 }, { "epoch": 0.253369140625, "grad_norm": 0.2568732500076294, "learning_rate": 0.00043791558484553223, "loss": 1.8973, "step": 5189 }, { "epoch": 0.25341796875, "grad_norm": 0.27012351155281067, "learning_rate": 0.00043789142333360416, "loss": 1.855, "step": 5190 }, { "epoch": 0.253466796875, "grad_norm": 0.22957684099674225, "learning_rate": 0.00043786725787387155, "loss": 1.8499, "step": 5191 }, { "epoch": 0.253515625, "grad_norm": 0.20947162806987762, "learning_rate": 0.0004378430884669201, "loss": 1.8485, "step": 5192 }, { "epoch": 0.253564453125, "grad_norm": 0.29232144355773926, "learning_rate": 0.00043781891511333556, "loss": 1.875, "step": 5193 }, { "epoch": 0.25361328125, "grad_norm": 0.31775739789009094, "learning_rate": 0.00043779473781370377, "loss": 1.8548, "step": 5194 }, { "epoch": 0.253662109375, "grad_norm": 0.3766670823097229, "learning_rate": 0.00043777055656861073, "loss": 1.8527, "step": 5195 }, { "epoch": 0.2537109375, "grad_norm": 0.5322206020355225, "learning_rate": 0.0004377463713786426, "loss": 1.8857, "step": 5196 }, { "epoch": 0.253759765625, "grad_norm": 0.4392569959163666, "learning_rate": 0.00043772218224438525, "loss": 1.8651, "step": 5197 }, { "epoch": 0.25380859375, "grad_norm": 0.32712703943252563, "learning_rate": 0.00043769798916642517, "loss": 1.8954, "step": 5198 }, { "epoch": 0.253857421875, "grad_norm": 0.36191481351852417, "learning_rate": 0.0004376737921453486, "loss": 1.9086, "step": 5199 }, { "epoch": 0.25390625, "grad_norm": 0.2856948971748352, "learning_rate": 0.000437649591181742, "loss": 1.8648, "step": 5200 }, { "epoch": 0.253955078125, "grad_norm": 0.3706534504890442, "learning_rate": 0.00043762538627619183, "loss": 1.8601, "step": 5201 }, { "epoch": 0.25400390625, "grad_norm": 0.27943170070648193, "learning_rate": 0.0004376011774292848, "loss": 1.8712, "step": 5202 }, { "epoch": 0.254052734375, "grad_norm": 0.2145606130361557, "learning_rate": 0.0004375769646416076, "loss": 1.8762, "step": 5203 }, { "epoch": 0.2541015625, "grad_norm": 0.31798920035362244, "learning_rate": 0.0004375527479137471, "loss": 1.8851, "step": 5204 }, { "epoch": 0.254150390625, "grad_norm": 0.21439436078071594, "learning_rate": 0.00043752852724629015, "loss": 1.8639, "step": 5205 }, { "epoch": 0.25419921875, "grad_norm": 0.2612505257129669, "learning_rate": 0.0004375043026398237, "loss": 1.8738, "step": 5206 }, { "epoch": 0.254248046875, "grad_norm": 0.26334816217422485, "learning_rate": 0.000437480074094935, "loss": 1.8602, "step": 5207 }, { "epoch": 0.254296875, "grad_norm": 0.2407357096672058, "learning_rate": 0.0004374558416122111, "loss": 1.8583, "step": 5208 }, { "epoch": 0.254345703125, "grad_norm": 0.30457454919815063, "learning_rate": 0.00043743160519223943, "loss": 1.8531, "step": 5209 }, { "epoch": 0.25439453125, "grad_norm": 0.2546022832393646, "learning_rate": 0.0004374073648356072, "loss": 1.8634, "step": 5210 }, { "epoch": 0.254443359375, "grad_norm": 0.3322831392288208, "learning_rate": 0.0004373831205429021, "loss": 1.8346, "step": 5211 }, { "epoch": 0.2544921875, "grad_norm": 0.3033141791820526, "learning_rate": 0.00043735887231471156, "loss": 1.8671, "step": 5212 }, { "epoch": 0.254541015625, "grad_norm": 0.2327217310667038, "learning_rate": 0.0004373346201516234, "loss": 1.8534, "step": 5213 }, { "epoch": 0.25458984375, "grad_norm": 0.31034815311431885, "learning_rate": 0.00043731036405422524, "loss": 1.8692, "step": 5214 }, { "epoch": 0.254638671875, "grad_norm": 0.2956205904483795, "learning_rate": 0.000437286104023105, "loss": 1.875, "step": 5215 }, { "epoch": 0.2546875, "grad_norm": 0.2448834478855133, "learning_rate": 0.00043726184005885065, "loss": 1.8709, "step": 5216 }, { "epoch": 0.254736328125, "grad_norm": 0.23008939623832703, "learning_rate": 0.00043723757216205037, "loss": 1.8573, "step": 5217 }, { "epoch": 0.25478515625, "grad_norm": 0.2444574236869812, "learning_rate": 0.000437213300333292, "loss": 1.8714, "step": 5218 }, { "epoch": 0.254833984375, "grad_norm": 0.2954114079475403, "learning_rate": 0.00043718902457316413, "loss": 1.8768, "step": 5219 }, { "epoch": 0.2548828125, "grad_norm": 0.23749610781669617, "learning_rate": 0.0004371647448822548, "loss": 1.8846, "step": 5220 }, { "epoch": 0.254931640625, "grad_norm": 0.2606501579284668, "learning_rate": 0.00043714046126115273, "loss": 1.876, "step": 5221 }, { "epoch": 0.25498046875, "grad_norm": 0.2676614820957184, "learning_rate": 0.0004371161737104463, "loss": 1.8525, "step": 5222 }, { "epoch": 0.255029296875, "grad_norm": 0.22390347719192505, "learning_rate": 0.00043709188223072404, "loss": 1.8631, "step": 5223 }, { "epoch": 0.255078125, "grad_norm": 0.2737003266811371, "learning_rate": 0.0004370675868225749, "loss": 1.8712, "step": 5224 }, { "epoch": 0.255126953125, "grad_norm": 0.22639624774456024, "learning_rate": 0.00043704328748658756, "loss": 1.8891, "step": 5225 }, { "epoch": 0.25517578125, "grad_norm": 0.2577382028102875, "learning_rate": 0.000437018984223351, "loss": 1.8614, "step": 5226 }, { "epoch": 0.255224609375, "grad_norm": 0.3350638747215271, "learning_rate": 0.00043699467703345416, "loss": 1.8897, "step": 5227 }, { "epoch": 0.2552734375, "grad_norm": 0.26679906249046326, "learning_rate": 0.0004369703659174861, "loss": 1.8614, "step": 5228 }, { "epoch": 0.255322265625, "grad_norm": 0.23995640873908997, "learning_rate": 0.00043694605087603615, "loss": 1.8794, "step": 5229 }, { "epoch": 0.25537109375, "grad_norm": 0.29326605796813965, "learning_rate": 0.0004369217319096936, "loss": 1.8727, "step": 5230 }, { "epoch": 0.255419921875, "grad_norm": 0.27914556860923767, "learning_rate": 0.0004368974090190478, "loss": 1.8419, "step": 5231 }, { "epoch": 0.25546875, "grad_norm": 0.23044221103191376, "learning_rate": 0.0004368730822046882, "loss": 1.8527, "step": 5232 }, { "epoch": 0.255517578125, "grad_norm": 0.2975548803806305, "learning_rate": 0.0004368487514672043, "loss": 1.8529, "step": 5233 }, { "epoch": 0.25556640625, "grad_norm": 0.33560439944267273, "learning_rate": 0.00043682441680718603, "loss": 1.8592, "step": 5234 }, { "epoch": 0.255615234375, "grad_norm": 0.3371022045612335, "learning_rate": 0.00043680007822522295, "loss": 1.8607, "step": 5235 }, { "epoch": 0.2556640625, "grad_norm": 0.24057653546333313, "learning_rate": 0.000436775735721905, "loss": 1.8703, "step": 5236 }, { "epoch": 0.255712890625, "grad_norm": 0.2929406464099884, "learning_rate": 0.0004367513892978221, "loss": 1.8661, "step": 5237 }, { "epoch": 0.25576171875, "grad_norm": 0.39646491408348083, "learning_rate": 0.00043672703895356437, "loss": 1.8934, "step": 5238 }, { "epoch": 0.255810546875, "grad_norm": 0.29307854175567627, "learning_rate": 0.0004367026846897219, "loss": 1.8732, "step": 5239 }, { "epoch": 0.255859375, "grad_norm": 0.2602323889732361, "learning_rate": 0.0004366783265068849, "loss": 1.8434, "step": 5240 }, { "epoch": 0.255908203125, "grad_norm": 0.30587852001190186, "learning_rate": 0.0004366539644056438, "loss": 1.8552, "step": 5241 }, { "epoch": 0.25595703125, "grad_norm": 0.21622604131698608, "learning_rate": 0.00043662959838658904, "loss": 1.8611, "step": 5242 }, { "epoch": 0.256005859375, "grad_norm": 0.2203238159418106, "learning_rate": 0.00043660522845031114, "loss": 1.8736, "step": 5243 }, { "epoch": 0.2560546875, "grad_norm": 0.2827144265174866, "learning_rate": 0.0004365808545974006, "loss": 1.8642, "step": 5244 }, { "epoch": 0.256103515625, "grad_norm": 0.31077420711517334, "learning_rate": 0.0004365564768284483, "loss": 1.8503, "step": 5245 }, { "epoch": 0.25615234375, "grad_norm": 0.2551892399787903, "learning_rate": 0.0004365320951440449, "loss": 1.8739, "step": 5246 }, { "epoch": 0.256201171875, "grad_norm": 0.2833108901977539, "learning_rate": 0.00043650770954478153, "loss": 1.8429, "step": 5247 }, { "epoch": 0.25625, "grad_norm": 0.3240698575973511, "learning_rate": 0.000436483320031249, "loss": 1.8763, "step": 5248 }, { "epoch": 0.256298828125, "grad_norm": 0.3372794985771179, "learning_rate": 0.00043645892660403853, "loss": 1.8768, "step": 5249 }, { "epoch": 0.25634765625, "grad_norm": 0.32035064697265625, "learning_rate": 0.00043643452926374115, "loss": 1.8792, "step": 5250 }, { "epoch": 0.256396484375, "grad_norm": 0.3110550343990326, "learning_rate": 0.00043641012801094834, "loss": 1.8852, "step": 5251 }, { "epoch": 0.2564453125, "grad_norm": 0.288455069065094, "learning_rate": 0.0004363857228462514, "loss": 1.8449, "step": 5252 }, { "epoch": 0.256494140625, "grad_norm": 0.3199804425239563, "learning_rate": 0.00043636131377024174, "loss": 1.9015, "step": 5253 }, { "epoch": 0.25654296875, "grad_norm": 0.3423040807247162, "learning_rate": 0.00043633690078351105, "loss": 1.8715, "step": 5254 }, { "epoch": 0.256591796875, "grad_norm": 0.28593751788139343, "learning_rate": 0.000436312483886651, "loss": 1.8474, "step": 5255 }, { "epoch": 0.256640625, "grad_norm": 0.30574145913124084, "learning_rate": 0.0004362880630802533, "loss": 1.8854, "step": 5256 }, { "epoch": 0.256689453125, "grad_norm": 0.31764712929725647, "learning_rate": 0.00043626363836490975, "loss": 1.8695, "step": 5257 }, { "epoch": 0.25673828125, "grad_norm": 0.2535068988800049, "learning_rate": 0.00043623920974121235, "loss": 1.8519, "step": 5258 }, { "epoch": 0.256787109375, "grad_norm": 0.33131343126296997, "learning_rate": 0.0004362147772097533, "loss": 1.8986, "step": 5259 }, { "epoch": 0.2568359375, "grad_norm": 0.29775387048721313, "learning_rate": 0.00043619034077112446, "loss": 1.8643, "step": 5260 }, { "epoch": 0.256884765625, "grad_norm": 0.31013187766075134, "learning_rate": 0.0004361659004259183, "loss": 1.8515, "step": 5261 }, { "epoch": 0.25693359375, "grad_norm": 0.26274651288986206, "learning_rate": 0.000436141456174727, "loss": 1.8648, "step": 5262 }, { "epoch": 0.256982421875, "grad_norm": 0.2636200487613678, "learning_rate": 0.00043611700801814314, "loss": 1.844, "step": 5263 }, { "epoch": 0.25703125, "grad_norm": 0.34057727456092834, "learning_rate": 0.000436092555956759, "loss": 1.8545, "step": 5264 }, { "epoch": 0.257080078125, "grad_norm": 0.3222498893737793, "learning_rate": 0.0004360680999911675, "loss": 1.8433, "step": 5265 }, { "epoch": 0.25712890625, "grad_norm": 0.3035983741283417, "learning_rate": 0.00043604364012196113, "loss": 1.8246, "step": 5266 }, { "epoch": 0.257177734375, "grad_norm": 0.2918285131454468, "learning_rate": 0.0004360191763497328, "loss": 1.8645, "step": 5267 }, { "epoch": 0.2572265625, "grad_norm": 0.32976609468460083, "learning_rate": 0.0004359947086750753, "loss": 1.8724, "step": 5268 }, { "epoch": 0.257275390625, "grad_norm": 0.2986690402030945, "learning_rate": 0.0004359702370985817, "loss": 1.8729, "step": 5269 }, { "epoch": 0.25732421875, "grad_norm": 0.38979658484458923, "learning_rate": 0.00043594576162084514, "loss": 1.8489, "step": 5270 }, { "epoch": 0.257373046875, "grad_norm": 0.3305032551288605, "learning_rate": 0.0004359212822424587, "loss": 1.8614, "step": 5271 }, { "epoch": 0.257421875, "grad_norm": 0.23878207802772522, "learning_rate": 0.0004358967989640157, "loss": 1.8815, "step": 5272 }, { "epoch": 0.257470703125, "grad_norm": 0.2670815587043762, "learning_rate": 0.00043587231178610953, "loss": 1.8812, "step": 5273 }, { "epoch": 0.25751953125, "grad_norm": 0.2390599101781845, "learning_rate": 0.00043584782070933367, "loss": 1.8738, "step": 5274 }, { "epoch": 0.257568359375, "grad_norm": 0.3277471959590912, "learning_rate": 0.00043582332573428157, "loss": 1.8752, "step": 5275 }, { "epoch": 0.2576171875, "grad_norm": 0.3627271354198456, "learning_rate": 0.000435798826861547, "loss": 1.8958, "step": 5276 }, { "epoch": 0.257666015625, "grad_norm": 0.31960704922676086, "learning_rate": 0.0004357743240917237, "loss": 1.8502, "step": 5277 }, { "epoch": 0.25771484375, "grad_norm": 0.2592199444770813, "learning_rate": 0.00043574981742540555, "loss": 1.8539, "step": 5278 }, { "epoch": 0.257763671875, "grad_norm": 0.3420127034187317, "learning_rate": 0.00043572530686318637, "loss": 1.8717, "step": 5279 }, { "epoch": 0.2578125, "grad_norm": 0.2735220789909363, "learning_rate": 0.00043570079240566026, "loss": 1.855, "step": 5280 }, { "epoch": 0.257861328125, "grad_norm": 0.24024486541748047, "learning_rate": 0.00043567627405342125, "loss": 1.8631, "step": 5281 }, { "epoch": 0.25791015625, "grad_norm": 0.31285175681114197, "learning_rate": 0.0004356517518070637, "loss": 1.8715, "step": 5282 }, { "epoch": 0.257958984375, "grad_norm": 0.2637968957424164, "learning_rate": 0.000435627225667182, "loss": 1.8727, "step": 5283 }, { "epoch": 0.2580078125, "grad_norm": 0.22642824053764343, "learning_rate": 0.00043560269563437025, "loss": 1.8648, "step": 5284 }, { "epoch": 0.258056640625, "grad_norm": 0.25016286969184875, "learning_rate": 0.0004355781617092233, "loss": 1.8573, "step": 5285 }, { "epoch": 0.25810546875, "grad_norm": 0.2672956585884094, "learning_rate": 0.00043555362389233556, "loss": 1.8583, "step": 5286 }, { "epoch": 0.258154296875, "grad_norm": 0.26843929290771484, "learning_rate": 0.0004355290821843017, "loss": 1.8457, "step": 5287 }, { "epoch": 0.258203125, "grad_norm": 0.2734709680080414, "learning_rate": 0.0004355045365857166, "loss": 1.8712, "step": 5288 }, { "epoch": 0.258251953125, "grad_norm": 0.2828058898448944, "learning_rate": 0.0004354799870971751, "loss": 1.873, "step": 5289 }, { "epoch": 0.25830078125, "grad_norm": 0.3143850564956665, "learning_rate": 0.0004354554337192722, "loss": 1.8498, "step": 5290 }, { "epoch": 0.258349609375, "grad_norm": 0.3184623718261719, "learning_rate": 0.00043543087645260293, "loss": 1.8595, "step": 5291 }, { "epoch": 0.2583984375, "grad_norm": 0.35951948165893555, "learning_rate": 0.00043540631529776245, "loss": 1.8913, "step": 5292 }, { "epoch": 0.258447265625, "grad_norm": 0.2536933720111847, "learning_rate": 0.0004353817502553462, "loss": 1.8545, "step": 5293 }, { "epoch": 0.25849609375, "grad_norm": 0.26347148418426514, "learning_rate": 0.00043535718132594925, "loss": 1.8473, "step": 5294 }, { "epoch": 0.258544921875, "grad_norm": 0.2573700547218323, "learning_rate": 0.0004353326085101672, "loss": 1.8677, "step": 5295 }, { "epoch": 0.25859375, "grad_norm": 0.24431149661540985, "learning_rate": 0.0004353080318085956, "loss": 1.846, "step": 5296 }, { "epoch": 0.258642578125, "grad_norm": 0.27656257152557373, "learning_rate": 0.00043528345122183004, "loss": 1.878, "step": 5297 }, { "epoch": 0.25869140625, "grad_norm": 0.26576244831085205, "learning_rate": 0.00043525886675046627, "loss": 1.8453, "step": 5298 }, { "epoch": 0.258740234375, "grad_norm": 0.27402463555336, "learning_rate": 0.00043523427839510016, "loss": 1.8397, "step": 5299 }, { "epoch": 0.2587890625, "grad_norm": 0.2564134895801544, "learning_rate": 0.00043520968615632744, "loss": 1.8727, "step": 5300 }, { "epoch": 0.258837890625, "grad_norm": 0.2286337912082672, "learning_rate": 0.00043518509003474446, "loss": 1.849, "step": 5301 }, { "epoch": 0.25888671875, "grad_norm": 0.27734676003456116, "learning_rate": 0.000435160490030947, "loss": 1.8612, "step": 5302 }, { "epoch": 0.258935546875, "grad_norm": 0.37542393803596497, "learning_rate": 0.0004351358861455314, "loss": 1.8694, "step": 5303 }, { "epoch": 0.258984375, "grad_norm": 0.4087728261947632, "learning_rate": 0.000435111278379094, "loss": 1.8448, "step": 5304 }, { "epoch": 0.259033203125, "grad_norm": 0.33810487389564514, "learning_rate": 0.0004350866667322311, "loss": 1.8665, "step": 5305 }, { "epoch": 0.25908203125, "grad_norm": 0.3067789673805237, "learning_rate": 0.00043506205120553927, "loss": 1.8707, "step": 5306 }, { "epoch": 0.259130859375, "grad_norm": 0.23458638787269592, "learning_rate": 0.000435037431799615, "loss": 1.8729, "step": 5307 }, { "epoch": 0.2591796875, "grad_norm": 0.3003320097923279, "learning_rate": 0.00043501280851505496, "loss": 1.8597, "step": 5308 }, { "epoch": 0.259228515625, "grad_norm": 0.3112507164478302, "learning_rate": 0.000434988181352456, "loss": 1.8789, "step": 5309 }, { "epoch": 0.25927734375, "grad_norm": 0.22211924195289612, "learning_rate": 0.0004349635503124149, "loss": 1.8404, "step": 5310 }, { "epoch": 0.259326171875, "grad_norm": 0.2959253191947937, "learning_rate": 0.0004349389153955287, "loss": 1.8729, "step": 5311 }, { "epoch": 0.259375, "grad_norm": 0.2924632430076599, "learning_rate": 0.00043491427660239437, "loss": 1.8581, "step": 5312 }, { "epoch": 0.259423828125, "grad_norm": 0.23559388518333435, "learning_rate": 0.00043488963393360905, "loss": 1.8867, "step": 5313 }, { "epoch": 0.25947265625, "grad_norm": 0.2857638895511627, "learning_rate": 0.00043486498738976997, "loss": 1.8658, "step": 5314 }, { "epoch": 0.259521484375, "grad_norm": 0.29047131538391113, "learning_rate": 0.0004348403369714745, "loss": 1.882, "step": 5315 }, { "epoch": 0.2595703125, "grad_norm": 0.29049527645111084, "learning_rate": 0.0004348156826793201, "loss": 1.8838, "step": 5316 }, { "epoch": 0.259619140625, "grad_norm": 0.2999834716320038, "learning_rate": 0.00043479102451390416, "loss": 1.8744, "step": 5317 }, { "epoch": 0.25966796875, "grad_norm": 0.3346216082572937, "learning_rate": 0.0004347663624758245, "loss": 1.8558, "step": 5318 }, { "epoch": 0.259716796875, "grad_norm": 0.3194577395915985, "learning_rate": 0.00043474169656567855, "loss": 1.8755, "step": 5319 }, { "epoch": 0.259765625, "grad_norm": 0.2919371426105499, "learning_rate": 0.0004347170267840643, "loss": 1.9146, "step": 5320 }, { "epoch": 0.259814453125, "grad_norm": 0.37069636583328247, "learning_rate": 0.0004346923531315796, "loss": 1.8821, "step": 5321 }, { "epoch": 0.25986328125, "grad_norm": 0.3461071848869324, "learning_rate": 0.00043466767560882243, "loss": 1.844, "step": 5322 }, { "epoch": 0.259912109375, "grad_norm": 0.27989596128463745, "learning_rate": 0.00043464299421639086, "loss": 1.8675, "step": 5323 }, { "epoch": 0.2599609375, "grad_norm": 0.31979021430015564, "learning_rate": 0.00043461830895488306, "loss": 1.8628, "step": 5324 }, { "epoch": 0.260009765625, "grad_norm": 0.2296009659767151, "learning_rate": 0.00043459361982489724, "loss": 1.8794, "step": 5325 }, { "epoch": 0.26005859375, "grad_norm": 0.2851089537143707, "learning_rate": 0.00043456892682703193, "loss": 1.906, "step": 5326 }, { "epoch": 0.260107421875, "grad_norm": 0.31838521361351013, "learning_rate": 0.00043454422996188545, "loss": 1.901, "step": 5327 }, { "epoch": 0.26015625, "grad_norm": 0.23730583488941193, "learning_rate": 0.0004345195292300564, "loss": 1.8633, "step": 5328 }, { "epoch": 0.260205078125, "grad_norm": 0.295958936214447, "learning_rate": 0.0004344948246321434, "loss": 1.8472, "step": 5329 }, { "epoch": 0.26025390625, "grad_norm": 0.2965766489505768, "learning_rate": 0.0004344701161687452, "loss": 1.8704, "step": 5330 }, { "epoch": 0.260302734375, "grad_norm": 0.23174850642681122, "learning_rate": 0.0004344454038404606, "loss": 1.892, "step": 5331 }, { "epoch": 0.2603515625, "grad_norm": 0.28213873505592346, "learning_rate": 0.0004344206876478884, "loss": 1.8927, "step": 5332 }, { "epoch": 0.260400390625, "grad_norm": 0.26580482721328735, "learning_rate": 0.000434395967591628, "loss": 1.8615, "step": 5333 }, { "epoch": 0.26044921875, "grad_norm": 0.23513224720954895, "learning_rate": 0.00043437124367227807, "loss": 1.8731, "step": 5334 }, { "epoch": 0.260498046875, "grad_norm": 0.29533904790878296, "learning_rate": 0.0004343465158904382, "loss": 1.8569, "step": 5335 }, { "epoch": 0.260546875, "grad_norm": 0.3191894292831421, "learning_rate": 0.0004343217842467074, "loss": 1.8649, "step": 5336 }, { "epoch": 0.260595703125, "grad_norm": 0.2378954440355301, "learning_rate": 0.0004342970487416851, "loss": 1.8587, "step": 5337 }, { "epoch": 0.26064453125, "grad_norm": 0.3365674316883087, "learning_rate": 0.0004342723093759709, "loss": 1.838, "step": 5338 }, { "epoch": 0.260693359375, "grad_norm": 0.4147617518901825, "learning_rate": 0.00043424756615016437, "loss": 1.8628, "step": 5339 }, { "epoch": 0.2607421875, "grad_norm": 0.28186699748039246, "learning_rate": 0.00043422281906486504, "loss": 1.8753, "step": 5340 }, { "epoch": 0.260791015625, "grad_norm": 0.2726995348930359, "learning_rate": 0.00043419806812067285, "loss": 1.8862, "step": 5341 }, { "epoch": 0.26083984375, "grad_norm": 0.3489168882369995, "learning_rate": 0.0004341733133181875, "loss": 1.8514, "step": 5342 }, { "epoch": 0.260888671875, "grad_norm": 0.30359357595443726, "learning_rate": 0.00043414855465800917, "loss": 1.8797, "step": 5343 }, { "epoch": 0.2609375, "grad_norm": 0.26666367053985596, "learning_rate": 0.0004341237921407377, "loss": 1.8785, "step": 5344 }, { "epoch": 0.260986328125, "grad_norm": 0.35187551379203796, "learning_rate": 0.0004340990257669732, "loss": 1.8594, "step": 5345 }, { "epoch": 0.26103515625, "grad_norm": 0.2896427512168884, "learning_rate": 0.00043407425553731603, "loss": 1.8898, "step": 5346 }, { "epoch": 0.261083984375, "grad_norm": 0.29746687412261963, "learning_rate": 0.0004340494814523665, "loss": 1.8978, "step": 5347 }, { "epoch": 0.2611328125, "grad_norm": 0.3856874406337738, "learning_rate": 0.000434024703512725, "loss": 1.8707, "step": 5348 }, { "epoch": 0.261181640625, "grad_norm": 0.29106616973876953, "learning_rate": 0.00043399992171899205, "loss": 1.871, "step": 5349 }, { "epoch": 0.26123046875, "grad_norm": 0.3074832260608673, "learning_rate": 0.0004339751360717683, "loss": 1.8767, "step": 5350 }, { "epoch": 0.261279296875, "grad_norm": 0.41977939009666443, "learning_rate": 0.0004339503465716544, "loss": 1.8445, "step": 5351 }, { "epoch": 0.261328125, "grad_norm": 0.286295622587204, "learning_rate": 0.0004339255532192511, "loss": 1.8677, "step": 5352 }, { "epoch": 0.261376953125, "grad_norm": 0.2986844480037689, "learning_rate": 0.0004339007560151593, "loss": 1.8823, "step": 5353 }, { "epoch": 0.26142578125, "grad_norm": 0.3321342468261719, "learning_rate": 0.0004338759549599802, "loss": 1.8773, "step": 5354 }, { "epoch": 0.261474609375, "grad_norm": 0.3225744962692261, "learning_rate": 0.0004338511500543145, "loss": 1.878, "step": 5355 }, { "epoch": 0.2615234375, "grad_norm": 0.3365250825881958, "learning_rate": 0.0004338263412987636, "loss": 1.8743, "step": 5356 }, { "epoch": 0.261572265625, "grad_norm": 0.28168076276779175, "learning_rate": 0.0004338015286939288, "loss": 1.8678, "step": 5357 }, { "epoch": 0.26162109375, "grad_norm": 0.27094054222106934, "learning_rate": 0.00043377671224041137, "loss": 1.8636, "step": 5358 }, { "epoch": 0.261669921875, "grad_norm": 0.30517610907554626, "learning_rate": 0.00043375189193881265, "loss": 1.8659, "step": 5359 }, { "epoch": 0.26171875, "grad_norm": 0.25207337737083435, "learning_rate": 0.00043372706778973435, "loss": 1.8787, "step": 5360 }, { "epoch": 0.261767578125, "grad_norm": 0.2774644196033478, "learning_rate": 0.00043370223979377804, "loss": 1.8835, "step": 5361 }, { "epoch": 0.26181640625, "grad_norm": 0.23143088817596436, "learning_rate": 0.00043367740795154547, "loss": 1.871, "step": 5362 }, { "epoch": 0.261865234375, "grad_norm": 0.334613174200058, "learning_rate": 0.00043365257226363836, "loss": 1.8577, "step": 5363 }, { "epoch": 0.2619140625, "grad_norm": 0.28142625093460083, "learning_rate": 0.0004336277327306588, "loss": 1.8761, "step": 5364 }, { "epoch": 0.261962890625, "grad_norm": 0.3304632604122162, "learning_rate": 0.00043360288935320864, "loss": 1.8746, "step": 5365 }, { "epoch": 0.26201171875, "grad_norm": 0.3365062475204468, "learning_rate": 0.0004335780421318901, "loss": 1.8667, "step": 5366 }, { "epoch": 0.262060546875, "grad_norm": 0.25889408588409424, "learning_rate": 0.00043355319106730525, "loss": 1.8503, "step": 5367 }, { "epoch": 0.262109375, "grad_norm": 0.2986155152320862, "learning_rate": 0.00043352833616005647, "loss": 1.854, "step": 5368 }, { "epoch": 0.262158203125, "grad_norm": 0.22128649055957794, "learning_rate": 0.0004335034774107461, "loss": 1.8789, "step": 5369 }, { "epoch": 0.26220703125, "grad_norm": 0.27628129720687866, "learning_rate": 0.0004334786148199765, "loss": 1.8746, "step": 5370 }, { "epoch": 0.262255859375, "grad_norm": 0.20833082497119904, "learning_rate": 0.0004334537483883505, "loss": 1.8537, "step": 5371 }, { "epoch": 0.2623046875, "grad_norm": 0.2732321321964264, "learning_rate": 0.0004334288781164705, "loss": 1.8397, "step": 5372 }, { "epoch": 0.262353515625, "grad_norm": 0.2926616966724396, "learning_rate": 0.0004334040040049395, "loss": 1.8578, "step": 5373 }, { "epoch": 0.26240234375, "grad_norm": 0.28429025411605835, "learning_rate": 0.0004333791260543601, "loss": 1.891, "step": 5374 }, { "epoch": 0.262451171875, "grad_norm": 0.29053717851638794, "learning_rate": 0.00043335424426533533, "loss": 1.8612, "step": 5375 }, { "epoch": 0.2625, "grad_norm": 0.23277947306632996, "learning_rate": 0.00043332935863846825, "loss": 1.8702, "step": 5376 }, { "epoch": 0.262548828125, "grad_norm": 0.25350284576416016, "learning_rate": 0.000433304469174362, "loss": 1.8758, "step": 5377 }, { "epoch": 0.26259765625, "grad_norm": 0.2781599164009094, "learning_rate": 0.00043327957587361963, "loss": 1.8715, "step": 5378 }, { "epoch": 0.262646484375, "grad_norm": 0.23111388087272644, "learning_rate": 0.0004332546787368447, "loss": 1.8863, "step": 5379 }, { "epoch": 0.2626953125, "grad_norm": 0.23046816885471344, "learning_rate": 0.00043322977776464043, "loss": 1.8735, "step": 5380 }, { "epoch": 0.262744140625, "grad_norm": 0.2154463231563568, "learning_rate": 0.00043320487295761046, "loss": 1.8713, "step": 5381 }, { "epoch": 0.26279296875, "grad_norm": 0.20518390834331512, "learning_rate": 0.0004331799643163582, "loss": 1.8283, "step": 5382 }, { "epoch": 0.262841796875, "grad_norm": 0.22404226660728455, "learning_rate": 0.0004331550518414875, "loss": 1.8874, "step": 5383 }, { "epoch": 0.262890625, "grad_norm": 0.2689679265022278, "learning_rate": 0.000433130135533602, "loss": 1.881, "step": 5384 }, { "epoch": 0.262939453125, "grad_norm": 0.2746425271034241, "learning_rate": 0.00043310521539330566, "loss": 1.8578, "step": 5385 }, { "epoch": 0.26298828125, "grad_norm": 0.2555278539657593, "learning_rate": 0.0004330802914212024, "loss": 1.8704, "step": 5386 }, { "epoch": 0.263037109375, "grad_norm": 0.23614929616451263, "learning_rate": 0.00043305536361789627, "loss": 1.8598, "step": 5387 }, { "epoch": 0.2630859375, "grad_norm": 0.22617189586162567, "learning_rate": 0.0004330304319839914, "loss": 1.8649, "step": 5388 }, { "epoch": 0.263134765625, "grad_norm": 0.2703016996383667, "learning_rate": 0.00043300549652009206, "loss": 1.9087, "step": 5389 }, { "epoch": 0.26318359375, "grad_norm": 0.3361594080924988, "learning_rate": 0.0004329805572268026, "loss": 1.8741, "step": 5390 }, { "epoch": 0.263232421875, "grad_norm": 0.37145325541496277, "learning_rate": 0.0004329556141047274, "loss": 1.8716, "step": 5391 }, { "epoch": 0.26328125, "grad_norm": 0.33997541666030884, "learning_rate": 0.000432930667154471, "loss": 1.8648, "step": 5392 }, { "epoch": 0.263330078125, "grad_norm": 0.3032677471637726, "learning_rate": 0.000432905716376638, "loss": 1.8808, "step": 5393 }, { "epoch": 0.26337890625, "grad_norm": 0.29442906379699707, "learning_rate": 0.0004328807617718331, "loss": 1.8747, "step": 5394 }, { "epoch": 0.263427734375, "grad_norm": 0.32120832800865173, "learning_rate": 0.00043285580334066103, "loss": 1.8713, "step": 5395 }, { "epoch": 0.2634765625, "grad_norm": 0.29757094383239746, "learning_rate": 0.0004328308410837269, "loss": 1.867, "step": 5396 }, { "epoch": 0.263525390625, "grad_norm": 0.2974194884300232, "learning_rate": 0.00043280587500163536, "loss": 1.8695, "step": 5397 }, { "epoch": 0.26357421875, "grad_norm": 0.3466792404651642, "learning_rate": 0.00043278090509499175, "loss": 1.8743, "step": 5398 }, { "epoch": 0.263623046875, "grad_norm": 0.3352547883987427, "learning_rate": 0.00043275593136440113, "loss": 1.8744, "step": 5399 }, { "epoch": 0.263671875, "grad_norm": 0.2704928517341614, "learning_rate": 0.0004327309538104688, "loss": 1.862, "step": 5400 }, { "epoch": 0.263720703125, "grad_norm": 0.4057883024215698, "learning_rate": 0.00043270597243380007, "loss": 1.8721, "step": 5401 }, { "epoch": 0.26376953125, "grad_norm": 0.37553349137306213, "learning_rate": 0.00043268098723500036, "loss": 1.86, "step": 5402 }, { "epoch": 0.263818359375, "grad_norm": 0.2646079957485199, "learning_rate": 0.0004326559982146753, "loss": 1.8635, "step": 5403 }, { "epoch": 0.2638671875, "grad_norm": 0.3316333293914795, "learning_rate": 0.00043263100537343054, "loss": 1.8651, "step": 5404 }, { "epoch": 0.263916015625, "grad_norm": 0.32310017943382263, "learning_rate": 0.00043260600871187154, "loss": 1.8849, "step": 5405 }, { "epoch": 0.26396484375, "grad_norm": 0.24399518966674805, "learning_rate": 0.00043258100823060443, "loss": 1.8494, "step": 5406 }, { "epoch": 0.264013671875, "grad_norm": 0.3033832013607025, "learning_rate": 0.000432556003930235, "loss": 1.87, "step": 5407 }, { "epoch": 0.2640625, "grad_norm": 0.2704663574695587, "learning_rate": 0.0004325309958113691, "loss": 1.8637, "step": 5408 }, { "epoch": 0.264111328125, "grad_norm": 0.27798035740852356, "learning_rate": 0.00043250598387461304, "loss": 1.8772, "step": 5409 }, { "epoch": 0.26416015625, "grad_norm": 0.2904926538467407, "learning_rate": 0.0004324809681205729, "loss": 1.8552, "step": 5410 }, { "epoch": 0.264208984375, "grad_norm": 0.2694591283798218, "learning_rate": 0.00043245594854985503, "loss": 1.8718, "step": 5411 }, { "epoch": 0.2642578125, "grad_norm": 0.26021096110343933, "learning_rate": 0.00043243092516306576, "loss": 1.86, "step": 5412 }, { "epoch": 0.264306640625, "grad_norm": 0.2750384509563446, "learning_rate": 0.0004324058979608114, "loss": 1.8635, "step": 5413 }, { "epoch": 0.26435546875, "grad_norm": 0.23426872491836548, "learning_rate": 0.00043238086694369875, "loss": 1.8629, "step": 5414 }, { "epoch": 0.264404296875, "grad_norm": 0.22430574893951416, "learning_rate": 0.0004323558321123344, "loss": 1.8541, "step": 5415 }, { "epoch": 0.264453125, "grad_norm": 0.2564699351787567, "learning_rate": 0.00043233079346732497, "loss": 1.8744, "step": 5416 }, { "epoch": 0.264501953125, "grad_norm": 0.23851844668388367, "learning_rate": 0.0004323057510092774, "loss": 1.8543, "step": 5417 }, { "epoch": 0.26455078125, "grad_norm": 0.24820378422737122, "learning_rate": 0.00043228070473879857, "loss": 1.8576, "step": 5418 }, { "epoch": 0.264599609375, "grad_norm": 0.27668920159339905, "learning_rate": 0.00043225565465649545, "loss": 1.8656, "step": 5419 }, { "epoch": 0.2646484375, "grad_norm": 0.2656530737876892, "learning_rate": 0.00043223060076297526, "loss": 1.8527, "step": 5420 }, { "epoch": 0.264697265625, "grad_norm": 0.29046469926834106, "learning_rate": 0.00043220554305884514, "loss": 1.8688, "step": 5421 }, { "epoch": 0.26474609375, "grad_norm": 0.3575761616230011, "learning_rate": 0.0004321804815447123, "loss": 1.8611, "step": 5422 }, { "epoch": 0.264794921875, "grad_norm": 0.2907367944717407, "learning_rate": 0.0004321554162211843, "loss": 1.8567, "step": 5423 }, { "epoch": 0.26484375, "grad_norm": 0.2596026360988617, "learning_rate": 0.00043213034708886847, "loss": 1.857, "step": 5424 }, { "epoch": 0.264892578125, "grad_norm": 0.35810452699661255, "learning_rate": 0.0004321052741483725, "loss": 1.8617, "step": 5425 }, { "epoch": 0.26494140625, "grad_norm": 0.2817716896533966, "learning_rate": 0.000432080197400304, "loss": 1.8503, "step": 5426 }, { "epoch": 0.264990234375, "grad_norm": 0.2854205071926117, "learning_rate": 0.0004320551168452707, "loss": 1.8833, "step": 5427 }, { "epoch": 0.2650390625, "grad_norm": 0.3317159116268158, "learning_rate": 0.00043203003248388046, "loss": 1.8878, "step": 5428 }, { "epoch": 0.265087890625, "grad_norm": 0.2647876441478729, "learning_rate": 0.0004320049443167412, "loss": 1.8987, "step": 5429 }, { "epoch": 0.26513671875, "grad_norm": 0.34070396423339844, "learning_rate": 0.00043197985234446094, "loss": 1.8785, "step": 5430 }, { "epoch": 0.265185546875, "grad_norm": 0.31732726097106934, "learning_rate": 0.00043195475656764796, "loss": 1.8734, "step": 5431 }, { "epoch": 0.265234375, "grad_norm": 0.244368776679039, "learning_rate": 0.0004319296569869103, "loss": 1.8526, "step": 5432 }, { "epoch": 0.265283203125, "grad_norm": 0.2961980104446411, "learning_rate": 0.00043190455360285636, "loss": 1.8789, "step": 5433 }, { "epoch": 0.26533203125, "grad_norm": 0.23916691541671753, "learning_rate": 0.00043187944641609444, "loss": 1.8585, "step": 5434 }, { "epoch": 0.265380859375, "grad_norm": 0.25803279876708984, "learning_rate": 0.00043185433542723315, "loss": 1.833, "step": 5435 }, { "epoch": 0.2654296875, "grad_norm": 0.2261461615562439, "learning_rate": 0.000431829220636881, "loss": 1.9021, "step": 5436 }, { "epoch": 0.265478515625, "grad_norm": 0.27904394268989563, "learning_rate": 0.0004318041020456467, "loss": 1.8386, "step": 5437 }, { "epoch": 0.26552734375, "grad_norm": 0.24374698102474213, "learning_rate": 0.000431778979654139, "loss": 1.8744, "step": 5438 }, { "epoch": 0.265576171875, "grad_norm": 0.258201003074646, "learning_rate": 0.00043175385346296685, "loss": 1.87, "step": 5439 }, { "epoch": 0.265625, "grad_norm": 0.2834412157535553, "learning_rate": 0.000431728723472739, "loss": 1.8546, "step": 5440 }, { "epoch": 0.265673828125, "grad_norm": 0.2716384828090668, "learning_rate": 0.0004317035896840647, "loss": 1.858, "step": 5441 }, { "epoch": 0.26572265625, "grad_norm": 0.27002403140068054, "learning_rate": 0.0004316784520975531, "loss": 1.8502, "step": 5442 }, { "epoch": 0.265771484375, "grad_norm": 0.29587215185165405, "learning_rate": 0.0004316533107138132, "loss": 1.8703, "step": 5443 }, { "epoch": 0.2658203125, "grad_norm": 0.2535116374492645, "learning_rate": 0.00043162816553345444, "loss": 1.8657, "step": 5444 }, { "epoch": 0.265869140625, "grad_norm": 0.2544131577014923, "learning_rate": 0.0004316030165570864, "loss": 1.856, "step": 5445 }, { "epoch": 0.26591796875, "grad_norm": 0.2674049437046051, "learning_rate": 0.0004315778637853183, "loss": 1.8249, "step": 5446 }, { "epoch": 0.265966796875, "grad_norm": 0.21516650915145874, "learning_rate": 0.00043155270721875993, "loss": 1.845, "step": 5447 }, { "epoch": 0.266015625, "grad_norm": 0.2826240658760071, "learning_rate": 0.0004315275468580209, "loss": 1.8542, "step": 5448 }, { "epoch": 0.266064453125, "grad_norm": 0.29713183641433716, "learning_rate": 0.00043150238270371106, "loss": 1.8601, "step": 5449 }, { "epoch": 0.26611328125, "grad_norm": 0.22838808596134186, "learning_rate": 0.00043147721475644027, "loss": 1.8482, "step": 5450 }, { "epoch": 0.266162109375, "grad_norm": 0.2441319078207016, "learning_rate": 0.00043145204301681844, "loss": 1.8527, "step": 5451 }, { "epoch": 0.2662109375, "grad_norm": 0.2683185040950775, "learning_rate": 0.00043142686748545565, "loss": 1.8466, "step": 5452 }, { "epoch": 0.266259765625, "grad_norm": 0.23166286945343018, "learning_rate": 0.00043140168816296204, "loss": 1.832, "step": 5453 }, { "epoch": 0.26630859375, "grad_norm": 0.2955226004123688, "learning_rate": 0.00043137650504994785, "loss": 1.8637, "step": 5454 }, { "epoch": 0.266357421875, "grad_norm": 0.2959707975387573, "learning_rate": 0.00043135131814702344, "loss": 1.8636, "step": 5455 }, { "epoch": 0.26640625, "grad_norm": 0.22340357303619385, "learning_rate": 0.00043132612745479926, "loss": 1.8569, "step": 5456 }, { "epoch": 0.266455078125, "grad_norm": 0.23294506967067719, "learning_rate": 0.0004313009329738857, "loss": 1.894, "step": 5457 }, { "epoch": 0.26650390625, "grad_norm": 0.25296854972839355, "learning_rate": 0.00043127573470489356, "loss": 1.858, "step": 5458 }, { "epoch": 0.266552734375, "grad_norm": 0.30316513776779175, "learning_rate": 0.0004312505326484334, "loss": 1.8719, "step": 5459 }, { "epoch": 0.2666015625, "grad_norm": 0.3341226875782013, "learning_rate": 0.00043122532680511604, "loss": 1.8752, "step": 5460 }, { "epoch": 0.266650390625, "grad_norm": 0.2708250880241394, "learning_rate": 0.00043120011717555243, "loss": 1.8533, "step": 5461 }, { "epoch": 0.26669921875, "grad_norm": 0.30090028047561646, "learning_rate": 0.0004311749037603534, "loss": 1.8661, "step": 5462 }, { "epoch": 0.266748046875, "grad_norm": 0.348918616771698, "learning_rate": 0.00043114968656013023, "loss": 1.8607, "step": 5463 }, { "epoch": 0.266796875, "grad_norm": 0.3012344539165497, "learning_rate": 0.00043112446557549386, "loss": 1.8677, "step": 5464 }, { "epoch": 0.266845703125, "grad_norm": 0.3160187304019928, "learning_rate": 0.00043109924080705567, "loss": 1.8745, "step": 5465 }, { "epoch": 0.26689453125, "grad_norm": 0.3737541437149048, "learning_rate": 0.0004310740122554269, "loss": 1.8571, "step": 5466 }, { "epoch": 0.266943359375, "grad_norm": 0.33349257707595825, "learning_rate": 0.00043104877992121914, "loss": 1.8654, "step": 5467 }, { "epoch": 0.2669921875, "grad_norm": 0.21208786964416504, "learning_rate": 0.0004310235438050439, "loss": 1.8763, "step": 5468 }, { "epoch": 0.267041015625, "grad_norm": 0.2756386697292328, "learning_rate": 0.00043099830390751253, "loss": 1.8507, "step": 5469 }, { "epoch": 0.26708984375, "grad_norm": 0.2276211380958557, "learning_rate": 0.0004309730602292371, "loss": 1.8427, "step": 5470 }, { "epoch": 0.267138671875, "grad_norm": 0.24372319877147675, "learning_rate": 0.00043094781277082915, "loss": 1.885, "step": 5471 }, { "epoch": 0.2671875, "grad_norm": 0.2679544687271118, "learning_rate": 0.00043092256153290067, "loss": 1.8604, "step": 5472 }, { "epoch": 0.267236328125, "grad_norm": 0.2370128184556961, "learning_rate": 0.00043089730651606374, "loss": 1.8813, "step": 5473 }, { "epoch": 0.26728515625, "grad_norm": 0.23691630363464355, "learning_rate": 0.0004308720477209303, "loss": 1.8608, "step": 5474 }, { "epoch": 0.267333984375, "grad_norm": 0.2693430185317993, "learning_rate": 0.0004308467851481125, "loss": 1.8421, "step": 5475 }, { "epoch": 0.2673828125, "grad_norm": 0.24905772507190704, "learning_rate": 0.0004308215187982227, "loss": 1.8523, "step": 5476 }, { "epoch": 0.267431640625, "grad_norm": 0.22082382440567017, "learning_rate": 0.00043079624867187324, "loss": 1.8589, "step": 5477 }, { "epoch": 0.26748046875, "grad_norm": 0.26023972034454346, "learning_rate": 0.00043077097476967646, "loss": 1.8758, "step": 5478 }, { "epoch": 0.267529296875, "grad_norm": 0.3344576060771942, "learning_rate": 0.000430745697092245, "loss": 1.8509, "step": 5479 }, { "epoch": 0.267578125, "grad_norm": 0.2905609607696533, "learning_rate": 0.0004307204156401914, "loss": 1.8801, "step": 5480 }, { "epoch": 0.267626953125, "grad_norm": 0.2528096139431, "learning_rate": 0.0004306951304141285, "loss": 1.882, "step": 5481 }, { "epoch": 0.26767578125, "grad_norm": 0.2777235507965088, "learning_rate": 0.00043066984141466896, "loss": 1.8638, "step": 5482 }, { "epoch": 0.267724609375, "grad_norm": 0.27309420704841614, "learning_rate": 0.00043064454864242575, "loss": 1.857, "step": 5483 }, { "epoch": 0.2677734375, "grad_norm": 0.2467314451932907, "learning_rate": 0.000430619252098012, "loss": 1.8501, "step": 5484 }, { "epoch": 0.267822265625, "grad_norm": 0.23204989731311798, "learning_rate": 0.00043059395178204055, "loss": 1.8419, "step": 5485 }, { "epoch": 0.26787109375, "grad_norm": 0.2782917320728302, "learning_rate": 0.0004305686476951246, "loss": 1.8843, "step": 5486 }, { "epoch": 0.267919921875, "grad_norm": 0.2574631869792938, "learning_rate": 0.0004305433398378776, "loss": 1.8671, "step": 5487 }, { "epoch": 0.26796875, "grad_norm": 0.289490282535553, "learning_rate": 0.00043051802821091273, "loss": 1.8363, "step": 5488 }, { "epoch": 0.268017578125, "grad_norm": 0.29021310806274414, "learning_rate": 0.00043049271281484354, "loss": 1.8699, "step": 5489 }, { "epoch": 0.26806640625, "grad_norm": 0.27793624997138977, "learning_rate": 0.00043046739365028346, "loss": 1.8567, "step": 5490 }, { "epoch": 0.268115234375, "grad_norm": 0.2994944751262665, "learning_rate": 0.0004304420707178463, "loss": 1.8216, "step": 5491 }, { "epoch": 0.2681640625, "grad_norm": 0.27307918667793274, "learning_rate": 0.00043041674401814563, "loss": 1.885, "step": 5492 }, { "epoch": 0.268212890625, "grad_norm": 0.3279667794704437, "learning_rate": 0.00043039141355179537, "loss": 1.8709, "step": 5493 }, { "epoch": 0.26826171875, "grad_norm": 0.3294979929924011, "learning_rate": 0.0004303660793194093, "loss": 1.86, "step": 5494 }, { "epoch": 0.268310546875, "grad_norm": 0.2854495644569397, "learning_rate": 0.00043034074132160147, "loss": 1.875, "step": 5495 }, { "epoch": 0.268359375, "grad_norm": 0.3209783434867859, "learning_rate": 0.0004303153995589859, "loss": 1.8548, "step": 5496 }, { "epoch": 0.268408203125, "grad_norm": 0.411073237657547, "learning_rate": 0.00043029005403217696, "loss": 1.8679, "step": 5497 }, { "epoch": 0.26845703125, "grad_norm": 0.2849827706813812, "learning_rate": 0.0004302647047417888, "loss": 1.8757, "step": 5498 }, { "epoch": 0.268505859375, "grad_norm": 0.2726452350616455, "learning_rate": 0.0004302393516884357, "loss": 1.8411, "step": 5499 }, { "epoch": 0.2685546875, "grad_norm": 0.3518570065498352, "learning_rate": 0.00043021399487273225, "loss": 1.8773, "step": 5500 }, { "epoch": 0.268603515625, "grad_norm": 0.2931723892688751, "learning_rate": 0.00043018863429529296, "loss": 1.8584, "step": 5501 }, { "epoch": 0.26865234375, "grad_norm": 0.2828073501586914, "learning_rate": 0.0004301632699567324, "loss": 1.8736, "step": 5502 }, { "epoch": 0.268701171875, "grad_norm": 0.33115532994270325, "learning_rate": 0.0004301379018576654, "loss": 1.8479, "step": 5503 }, { "epoch": 0.26875, "grad_norm": 0.2927585244178772, "learning_rate": 0.0004301125299987066, "loss": 1.8499, "step": 5504 }, { "epoch": 0.268798828125, "grad_norm": 0.26134878396987915, "learning_rate": 0.00043008715438047114, "loss": 1.8832, "step": 5505 }, { "epoch": 0.26884765625, "grad_norm": 0.2963932454586029, "learning_rate": 0.0004300617750035738, "loss": 1.8705, "step": 5506 }, { "epoch": 0.268896484375, "grad_norm": 0.2494628131389618, "learning_rate": 0.00043003639186862984, "loss": 1.8839, "step": 5507 }, { "epoch": 0.2689453125, "grad_norm": 0.25338974595069885, "learning_rate": 0.00043001100497625427, "loss": 1.8511, "step": 5508 }, { "epoch": 0.268994140625, "grad_norm": 0.22024281322956085, "learning_rate": 0.0004299856143270625, "loss": 1.8615, "step": 5509 }, { "epoch": 0.26904296875, "grad_norm": 0.2629911005496979, "learning_rate": 0.00042996021992166997, "loss": 1.8484, "step": 5510 }, { "epoch": 0.269091796875, "grad_norm": 0.21720263361930847, "learning_rate": 0.0004299348217606919, "loss": 1.8912, "step": 5511 }, { "epoch": 0.269140625, "grad_norm": 0.23312020301818848, "learning_rate": 0.0004299094198447439, "loss": 1.8631, "step": 5512 }, { "epoch": 0.269189453125, "grad_norm": 0.2582831382751465, "learning_rate": 0.00042988401417444185, "loss": 1.8314, "step": 5513 }, { "epoch": 0.26923828125, "grad_norm": 0.20578497648239136, "learning_rate": 0.00042985860475040105, "loss": 1.887, "step": 5514 }, { "epoch": 0.269287109375, "grad_norm": 0.2509283423423767, "learning_rate": 0.00042983319157323776, "loss": 1.8296, "step": 5515 }, { "epoch": 0.2693359375, "grad_norm": 0.29251351952552795, "learning_rate": 0.00042980777464356754, "loss": 1.8911, "step": 5516 }, { "epoch": 0.269384765625, "grad_norm": 0.24568375945091248, "learning_rate": 0.00042978235396200654, "loss": 1.8615, "step": 5517 }, { "epoch": 0.26943359375, "grad_norm": 0.30288365483283997, "learning_rate": 0.0004297569295291709, "loss": 1.8715, "step": 5518 }, { "epoch": 0.269482421875, "grad_norm": 0.29765409231185913, "learning_rate": 0.0004297315013456768, "loss": 1.8907, "step": 5519 }, { "epoch": 0.26953125, "grad_norm": 0.2617616355419159, "learning_rate": 0.0004297060694121403, "loss": 1.874, "step": 5520 }, { "epoch": 0.269580078125, "grad_norm": 0.28755974769592285, "learning_rate": 0.00042968063372917796, "loss": 1.8828, "step": 5521 }, { "epoch": 0.26962890625, "grad_norm": 0.29899635910987854, "learning_rate": 0.0004296551942974062, "loss": 1.8473, "step": 5522 }, { "epoch": 0.269677734375, "grad_norm": 0.2489636093378067, "learning_rate": 0.0004296297511174417, "loss": 1.8776, "step": 5523 }, { "epoch": 0.2697265625, "grad_norm": 0.292833149433136, "learning_rate": 0.00042960430418990074, "loss": 1.8617, "step": 5524 }, { "epoch": 0.269775390625, "grad_norm": 0.3154178857803345, "learning_rate": 0.0004295788535154004, "loss": 1.8888, "step": 5525 }, { "epoch": 0.26982421875, "grad_norm": 0.22526797652244568, "learning_rate": 0.0004295533990945573, "loss": 1.8749, "step": 5526 }, { "epoch": 0.269873046875, "grad_norm": 0.22905398905277252, "learning_rate": 0.0004295279409279885, "loss": 1.8552, "step": 5527 }, { "epoch": 0.269921875, "grad_norm": 0.2615602910518646, "learning_rate": 0.0004295024790163108, "loss": 1.8394, "step": 5528 }, { "epoch": 0.269970703125, "grad_norm": 0.27564552426338196, "learning_rate": 0.00042947701336014144, "loss": 1.8596, "step": 5529 }, { "epoch": 0.27001953125, "grad_norm": 0.2257210612297058, "learning_rate": 0.0004294515439600975, "loss": 1.8753, "step": 5530 }, { "epoch": 0.270068359375, "grad_norm": 0.2607210874557495, "learning_rate": 0.0004294260708167963, "loss": 1.8493, "step": 5531 }, { "epoch": 0.2701171875, "grad_norm": 0.31644630432128906, "learning_rate": 0.00042940059393085526, "loss": 1.8665, "step": 5532 }, { "epoch": 0.270166015625, "grad_norm": 0.28049027919769287, "learning_rate": 0.0004293751133028918, "loss": 1.8552, "step": 5533 }, { "epoch": 0.27021484375, "grad_norm": 0.22369936108589172, "learning_rate": 0.0004293496289335234, "loss": 1.8645, "step": 5534 }, { "epoch": 0.270263671875, "grad_norm": 0.2272195965051651, "learning_rate": 0.0004293241408233678, "loss": 1.8796, "step": 5535 }, { "epoch": 0.2703125, "grad_norm": 0.26348423957824707, "learning_rate": 0.0004292986489730426, "loss": 1.8369, "step": 5536 }, { "epoch": 0.270361328125, "grad_norm": 0.3289259672164917, "learning_rate": 0.0004292731533831657, "loss": 1.8769, "step": 5537 }, { "epoch": 0.27041015625, "grad_norm": 0.36981791257858276, "learning_rate": 0.000429247654054355, "loss": 1.8617, "step": 5538 }, { "epoch": 0.270458984375, "grad_norm": 0.32291555404663086, "learning_rate": 0.0004292221509872285, "loss": 1.8579, "step": 5539 }, { "epoch": 0.2705078125, "grad_norm": 0.34921136498451233, "learning_rate": 0.0004291966441824042, "loss": 1.8606, "step": 5540 }, { "epoch": 0.270556640625, "grad_norm": 0.4330902397632599, "learning_rate": 0.00042917113364050036, "loss": 1.8833, "step": 5541 }, { "epoch": 0.27060546875, "grad_norm": 0.249424546957016, "learning_rate": 0.00042914561936213534, "loss": 1.8548, "step": 5542 }, { "epoch": 0.270654296875, "grad_norm": 0.3640264570713043, "learning_rate": 0.0004291201013479273, "loss": 1.8677, "step": 5543 }, { "epoch": 0.270703125, "grad_norm": 0.28657612204551697, "learning_rate": 0.0004290945795984948, "loss": 1.8554, "step": 5544 }, { "epoch": 0.270751953125, "grad_norm": 0.28005823493003845, "learning_rate": 0.00042906905411445637, "loss": 1.854, "step": 5545 }, { "epoch": 0.27080078125, "grad_norm": 0.3576778769493103, "learning_rate": 0.0004290435248964306, "loss": 1.88, "step": 5546 }, { "epoch": 0.270849609375, "grad_norm": 0.23417288064956665, "learning_rate": 0.0004290179919450363, "loss": 1.8828, "step": 5547 }, { "epoch": 0.2708984375, "grad_norm": 0.32071131467819214, "learning_rate": 0.00042899245526089225, "loss": 1.8807, "step": 5548 }, { "epoch": 0.270947265625, "grad_norm": 0.2680037021636963, "learning_rate": 0.00042896691484461724, "loss": 1.8575, "step": 5549 }, { "epoch": 0.27099609375, "grad_norm": 0.25167542695999146, "learning_rate": 0.00042894137069683045, "loss": 1.8519, "step": 5550 }, { "epoch": 0.271044921875, "grad_norm": 0.2910742461681366, "learning_rate": 0.0004289158228181508, "loss": 1.8526, "step": 5551 }, { "epoch": 0.27109375, "grad_norm": 0.3137010633945465, "learning_rate": 0.0004288902712091976, "loss": 1.8613, "step": 5552 }, { "epoch": 0.271142578125, "grad_norm": 0.28587380051612854, "learning_rate": 0.00042886471587058996, "loss": 1.8778, "step": 5553 }, { "epoch": 0.27119140625, "grad_norm": 0.29448872804641724, "learning_rate": 0.0004288391568029474, "loss": 1.8734, "step": 5554 }, { "epoch": 0.271240234375, "grad_norm": 0.34815579652786255, "learning_rate": 0.00042881359400688914, "loss": 1.863, "step": 5555 }, { "epoch": 0.2712890625, "grad_norm": 0.3057989478111267, "learning_rate": 0.000428788027483035, "loss": 1.8551, "step": 5556 }, { "epoch": 0.271337890625, "grad_norm": 0.2752630412578583, "learning_rate": 0.0004287624572320044, "loss": 1.8479, "step": 5557 }, { "epoch": 0.27138671875, "grad_norm": 0.3627280592918396, "learning_rate": 0.0004287368832544172, "loss": 1.8679, "step": 5558 }, { "epoch": 0.271435546875, "grad_norm": 0.25986701250076294, "learning_rate": 0.000428711305550893, "loss": 1.8628, "step": 5559 }, { "epoch": 0.271484375, "grad_norm": 0.2913649380207062, "learning_rate": 0.00042868572412205186, "loss": 1.8646, "step": 5560 }, { "epoch": 0.271533203125, "grad_norm": 0.2926522493362427, "learning_rate": 0.00042866013896851376, "loss": 1.8384, "step": 5561 }, { "epoch": 0.27158203125, "grad_norm": 0.26360318064689636, "learning_rate": 0.0004286345500908987, "loss": 1.8751, "step": 5562 }, { "epoch": 0.271630859375, "grad_norm": 0.28652626276016235, "learning_rate": 0.0004286089574898269, "loss": 1.8719, "step": 5563 }, { "epoch": 0.2716796875, "grad_norm": 0.2877010107040405, "learning_rate": 0.00042858336116591864, "loss": 1.8827, "step": 5564 }, { "epoch": 0.271728515625, "grad_norm": 0.3167484998703003, "learning_rate": 0.0004285577611197942, "loss": 1.8544, "step": 5565 }, { "epoch": 0.27177734375, "grad_norm": 0.23806586861610413, "learning_rate": 0.00042853215735207405, "loss": 1.8648, "step": 5566 }, { "epoch": 0.271826171875, "grad_norm": 0.27283230423927307, "learning_rate": 0.0004285065498633787, "loss": 1.8754, "step": 5567 }, { "epoch": 0.271875, "grad_norm": 0.25286269187927246, "learning_rate": 0.00042848093865432887, "loss": 1.8764, "step": 5568 }, { "epoch": 0.271923828125, "grad_norm": 0.3045633137226105, "learning_rate": 0.0004284553237255452, "loss": 1.9074, "step": 5569 }, { "epoch": 0.27197265625, "grad_norm": 0.33956238627433777, "learning_rate": 0.00042842970507764837, "loss": 1.8812, "step": 5570 }, { "epoch": 0.272021484375, "grad_norm": 0.29280704259872437, "learning_rate": 0.0004284040827112594, "loss": 1.8587, "step": 5571 }, { "epoch": 0.2720703125, "grad_norm": 0.30355024337768555, "learning_rate": 0.00042837845662699927, "loss": 1.8694, "step": 5572 }, { "epoch": 0.272119140625, "grad_norm": 0.3142327070236206, "learning_rate": 0.00042835282682548905, "loss": 1.8792, "step": 5573 }, { "epoch": 0.27216796875, "grad_norm": 0.32248252630233765, "learning_rate": 0.00042832719330734983, "loss": 1.8541, "step": 5574 }, { "epoch": 0.272216796875, "grad_norm": 0.25239887833595276, "learning_rate": 0.00042830155607320286, "loss": 1.8732, "step": 5575 }, { "epoch": 0.272265625, "grad_norm": 0.2774185240268707, "learning_rate": 0.00042827591512366954, "loss": 1.8835, "step": 5576 }, { "epoch": 0.272314453125, "grad_norm": 0.29210442304611206, "learning_rate": 0.0004282502704593713, "loss": 1.8624, "step": 5577 }, { "epoch": 0.27236328125, "grad_norm": 0.2635522782802582, "learning_rate": 0.0004282246220809297, "loss": 1.8429, "step": 5578 }, { "epoch": 0.272412109375, "grad_norm": 0.26573169231414795, "learning_rate": 0.0004281989699889662, "loss": 1.8501, "step": 5579 }, { "epoch": 0.2724609375, "grad_norm": 0.2867535352706909, "learning_rate": 0.00042817331418410257, "loss": 1.8612, "step": 5580 }, { "epoch": 0.272509765625, "grad_norm": 0.23922806978225708, "learning_rate": 0.00042814765466696065, "loss": 1.8702, "step": 5581 }, { "epoch": 0.27255859375, "grad_norm": 0.2571413218975067, "learning_rate": 0.0004281219914381622, "loss": 1.8589, "step": 5582 }, { "epoch": 0.272607421875, "grad_norm": 0.3148411214351654, "learning_rate": 0.00042809632449832935, "loss": 1.8674, "step": 5583 }, { "epoch": 0.27265625, "grad_norm": 0.23323963582515717, "learning_rate": 0.0004280706538480841, "loss": 1.8898, "step": 5584 }, { "epoch": 0.272705078125, "grad_norm": 0.25514739751815796, "learning_rate": 0.0004280449794880485, "loss": 1.837, "step": 5585 }, { "epoch": 0.27275390625, "grad_norm": 0.31751227378845215, "learning_rate": 0.00042801930141884493, "loss": 1.8726, "step": 5586 }, { "epoch": 0.272802734375, "grad_norm": 0.2764038145542145, "learning_rate": 0.0004279936196410957, "loss": 1.8457, "step": 5587 }, { "epoch": 0.2728515625, "grad_norm": 0.2722364366054535, "learning_rate": 0.00042796793415542306, "loss": 1.8493, "step": 5588 }, { "epoch": 0.272900390625, "grad_norm": 0.3453310430049896, "learning_rate": 0.0004279422449624497, "loss": 1.845, "step": 5589 }, { "epoch": 0.27294921875, "grad_norm": 0.21688394248485565, "learning_rate": 0.0004279165520627982, "loss": 1.8649, "step": 5590 }, { "epoch": 0.272998046875, "grad_norm": 0.2970576584339142, "learning_rate": 0.0004278908554570912, "loss": 1.8719, "step": 5591 }, { "epoch": 0.273046875, "grad_norm": 0.3944854438304901, "learning_rate": 0.0004278651551459514, "loss": 1.8725, "step": 5592 }, { "epoch": 0.273095703125, "grad_norm": 0.35775744915008545, "learning_rate": 0.00042783945113000185, "loss": 1.8481, "step": 5593 }, { "epoch": 0.27314453125, "grad_norm": 0.310284823179245, "learning_rate": 0.0004278137434098654, "loss": 1.8608, "step": 5594 }, { "epoch": 0.273193359375, "grad_norm": 0.2534835934638977, "learning_rate": 0.00042778803198616504, "loss": 1.8734, "step": 5595 }, { "epoch": 0.2732421875, "grad_norm": 0.21687257289886475, "learning_rate": 0.0004277623168595241, "loss": 1.8545, "step": 5596 }, { "epoch": 0.273291015625, "grad_norm": 0.30505916476249695, "learning_rate": 0.00042773659803056556, "loss": 1.8691, "step": 5597 }, { "epoch": 0.27333984375, "grad_norm": 0.2845875024795532, "learning_rate": 0.0004277108754999129, "loss": 1.8421, "step": 5598 }, { "epoch": 0.273388671875, "grad_norm": 0.2558421790599823, "learning_rate": 0.0004276851492681895, "loss": 1.8606, "step": 5599 }, { "epoch": 0.2734375, "grad_norm": 0.22545504570007324, "learning_rate": 0.00042765941933601886, "loss": 1.8673, "step": 5600 }, { "epoch": 0.273486328125, "grad_norm": 0.3381122350692749, "learning_rate": 0.0004276336857040245, "loss": 1.8799, "step": 5601 }, { "epoch": 0.27353515625, "grad_norm": 0.39972952008247375, "learning_rate": 0.00042760794837283023, "loss": 1.8724, "step": 5602 }, { "epoch": 0.273583984375, "grad_norm": 0.2520521581172943, "learning_rate": 0.00042758220734305963, "loss": 1.8596, "step": 5603 }, { "epoch": 0.2736328125, "grad_norm": 0.2974465787410736, "learning_rate": 0.00042755646261533666, "loss": 1.8796, "step": 5604 }, { "epoch": 0.273681640625, "grad_norm": 0.3306296169757843, "learning_rate": 0.0004275307141902853, "loss": 1.8699, "step": 5605 }, { "epoch": 0.27373046875, "grad_norm": 0.28601253032684326, "learning_rate": 0.0004275049620685295, "loss": 1.8572, "step": 5606 }, { "epoch": 0.273779296875, "grad_norm": 0.28912022709846497, "learning_rate": 0.0004274792062506935, "loss": 1.8656, "step": 5607 }, { "epoch": 0.273828125, "grad_norm": 0.2791818678379059, "learning_rate": 0.0004274534467374013, "loss": 1.8431, "step": 5608 }, { "epoch": 0.273876953125, "grad_norm": 0.3262842297554016, "learning_rate": 0.0004274276835292775, "loss": 1.8529, "step": 5609 }, { "epoch": 0.27392578125, "grad_norm": 0.2857052981853485, "learning_rate": 0.00042740191662694616, "loss": 1.8557, "step": 5610 }, { "epoch": 0.273974609375, "grad_norm": 0.2766442596912384, "learning_rate": 0.0004273761460310321, "loss": 1.8529, "step": 5611 }, { "epoch": 0.2740234375, "grad_norm": 0.35224005579948425, "learning_rate": 0.00042735037174215963, "loss": 1.8661, "step": 5612 }, { "epoch": 0.274072265625, "grad_norm": 0.3161827027797699, "learning_rate": 0.0004273245937609535, "loss": 1.8831, "step": 5613 }, { "epoch": 0.27412109375, "grad_norm": 0.2897830009460449, "learning_rate": 0.00042729881208803847, "loss": 1.8755, "step": 5614 }, { "epoch": 0.274169921875, "grad_norm": 0.3368292450904846, "learning_rate": 0.00042727302672403936, "loss": 1.8833, "step": 5615 }, { "epoch": 0.27421875, "grad_norm": 0.2017897516489029, "learning_rate": 0.00042724723766958113, "loss": 1.8699, "step": 5616 }, { "epoch": 0.274267578125, "grad_norm": 0.3071552515029907, "learning_rate": 0.0004272214449252888, "loss": 1.8827, "step": 5617 }, { "epoch": 0.27431640625, "grad_norm": 0.30332720279693604, "learning_rate": 0.00042719564849178746, "loss": 1.8812, "step": 5618 }, { "epoch": 0.274365234375, "grad_norm": 0.2558891177177429, "learning_rate": 0.0004271698483697023, "loss": 1.8244, "step": 5619 }, { "epoch": 0.2744140625, "grad_norm": 0.24398453533649445, "learning_rate": 0.0004271440445596586, "loss": 1.855, "step": 5620 }, { "epoch": 0.274462890625, "grad_norm": 0.2542794942855835, "learning_rate": 0.00042711823706228174, "loss": 1.8744, "step": 5621 }, { "epoch": 0.27451171875, "grad_norm": 0.29373040795326233, "learning_rate": 0.0004270924258781972, "loss": 1.8506, "step": 5622 }, { "epoch": 0.274560546875, "grad_norm": 0.29606419801712036, "learning_rate": 0.00042706661100803056, "loss": 1.8723, "step": 5623 }, { "epoch": 0.274609375, "grad_norm": 0.28751346468925476, "learning_rate": 0.0004270407924524074, "loss": 1.8677, "step": 5624 }, { "epoch": 0.274658203125, "grad_norm": 0.26026350259780884, "learning_rate": 0.0004270149702119535, "loss": 1.8516, "step": 5625 }, { "epoch": 0.27470703125, "grad_norm": 0.26478272676467896, "learning_rate": 0.00042698914428729464, "loss": 1.8611, "step": 5626 }, { "epoch": 0.274755859375, "grad_norm": 0.27649375796318054, "learning_rate": 0.0004269633146790568, "loss": 1.8436, "step": 5627 }, { "epoch": 0.2748046875, "grad_norm": 0.2149287611246109, "learning_rate": 0.00042693748138786594, "loss": 1.837, "step": 5628 }, { "epoch": 0.274853515625, "grad_norm": 0.3081371784210205, "learning_rate": 0.0004269116444143482, "loss": 1.8445, "step": 5629 }, { "epoch": 0.27490234375, "grad_norm": 0.35379260778427124, "learning_rate": 0.0004268858037591296, "loss": 1.8511, "step": 5630 }, { "epoch": 0.274951171875, "grad_norm": 0.28675970435142517, "learning_rate": 0.00042685995942283656, "loss": 1.8475, "step": 5631 }, { "epoch": 0.275, "grad_norm": 0.2163475751876831, "learning_rate": 0.00042683411140609546, "loss": 1.8603, "step": 5632 }, { "epoch": 0.275048828125, "grad_norm": 0.3209209144115448, "learning_rate": 0.0004268082597095326, "loss": 1.847, "step": 5633 }, { "epoch": 0.27509765625, "grad_norm": 0.268065482378006, "learning_rate": 0.0004267824043337747, "loss": 1.8725, "step": 5634 }, { "epoch": 0.275146484375, "grad_norm": 0.2396736592054367, "learning_rate": 0.0004267565452794482, "loss": 1.873, "step": 5635 }, { "epoch": 0.2751953125, "grad_norm": 0.3015369474887848, "learning_rate": 0.00042673068254718, "loss": 1.852, "step": 5636 }, { "epoch": 0.275244140625, "grad_norm": 0.3005540370941162, "learning_rate": 0.00042670481613759677, "loss": 1.8494, "step": 5637 }, { "epoch": 0.27529296875, "grad_norm": 0.21900539100170135, "learning_rate": 0.00042667894605132533, "loss": 1.8543, "step": 5638 }, { "epoch": 0.275341796875, "grad_norm": 0.2602889835834503, "learning_rate": 0.0004266530722889929, "loss": 1.837, "step": 5639 }, { "epoch": 0.275390625, "grad_norm": 0.29526999592781067, "learning_rate": 0.0004266271948512264, "loss": 1.8528, "step": 5640 }, { "epoch": 0.275439453125, "grad_norm": 0.306522011756897, "learning_rate": 0.00042660131373865304, "loss": 1.85, "step": 5641 }, { "epoch": 0.27548828125, "grad_norm": 0.27377381920814514, "learning_rate": 0.0004265754289519, "loss": 1.8526, "step": 5642 }, { "epoch": 0.275537109375, "grad_norm": 0.26968827843666077, "learning_rate": 0.0004265495404915947, "loss": 1.8335, "step": 5643 }, { "epoch": 0.2755859375, "grad_norm": 0.2945333421230316, "learning_rate": 0.00042652364835836455, "loss": 1.8733, "step": 5644 }, { "epoch": 0.275634765625, "grad_norm": 0.2641691267490387, "learning_rate": 0.000426497752552837, "loss": 1.8694, "step": 5645 }, { "epoch": 0.27568359375, "grad_norm": 0.2203356921672821, "learning_rate": 0.00042647185307563974, "loss": 1.8673, "step": 5646 }, { "epoch": 0.275732421875, "grad_norm": 0.24723078310489655, "learning_rate": 0.00042644594992740037, "loss": 1.8704, "step": 5647 }, { "epoch": 0.27578125, "grad_norm": 0.29234007000923157, "learning_rate": 0.0004264200431087468, "loss": 1.8868, "step": 5648 }, { "epoch": 0.275830078125, "grad_norm": 0.31755760312080383, "learning_rate": 0.0004263941326203068, "loss": 1.8693, "step": 5649 }, { "epoch": 0.27587890625, "grad_norm": 0.23530511558055878, "learning_rate": 0.00042636821846270837, "loss": 1.8619, "step": 5650 }, { "epoch": 0.275927734375, "grad_norm": 0.29744860529899597, "learning_rate": 0.00042634230063657956, "loss": 1.8497, "step": 5651 }, { "epoch": 0.2759765625, "grad_norm": 0.23865535855293274, "learning_rate": 0.00042631637914254856, "loss": 1.8724, "step": 5652 }, { "epoch": 0.276025390625, "grad_norm": 0.2597830593585968, "learning_rate": 0.0004262904539812435, "loss": 1.867, "step": 5653 }, { "epoch": 0.27607421875, "grad_norm": 0.32400545477867126, "learning_rate": 0.00042626452515329274, "loss": 1.8572, "step": 5654 }, { "epoch": 0.276123046875, "grad_norm": 0.30361583828926086, "learning_rate": 0.00042623859265932467, "loss": 1.8584, "step": 5655 }, { "epoch": 0.276171875, "grad_norm": 0.2728385329246521, "learning_rate": 0.0004262126564999678, "loss": 1.8497, "step": 5656 }, { "epoch": 0.276220703125, "grad_norm": 0.332051545381546, "learning_rate": 0.0004261867166758507, "loss": 1.8757, "step": 5657 }, { "epoch": 0.27626953125, "grad_norm": 0.2883819043636322, "learning_rate": 0.0004261607731876021, "loss": 1.8692, "step": 5658 }, { "epoch": 0.276318359375, "grad_norm": 0.2442641407251358, "learning_rate": 0.0004261348260358507, "loss": 1.8498, "step": 5659 }, { "epoch": 0.2763671875, "grad_norm": 0.2411593496799469, "learning_rate": 0.00042610887522122543, "loss": 1.8673, "step": 5660 }, { "epoch": 0.276416015625, "grad_norm": 0.3288181722164154, "learning_rate": 0.0004260829207443551, "loss": 1.8666, "step": 5661 }, { "epoch": 0.27646484375, "grad_norm": 0.37908995151519775, "learning_rate": 0.0004260569626058689, "loss": 1.8713, "step": 5662 }, { "epoch": 0.276513671875, "grad_norm": 0.299584299325943, "learning_rate": 0.0004260310008063958, "loss": 1.8306, "step": 5663 }, { "epoch": 0.2765625, "grad_norm": 0.31950998306274414, "learning_rate": 0.00042600503534656506, "loss": 1.8788, "step": 5664 }, { "epoch": 0.276611328125, "grad_norm": 0.33036771416664124, "learning_rate": 0.00042597906622700597, "loss": 1.8444, "step": 5665 }, { "epoch": 0.27666015625, "grad_norm": 0.3494766354560852, "learning_rate": 0.0004259530934483479, "loss": 1.8726, "step": 5666 }, { "epoch": 0.276708984375, "grad_norm": 0.37812340259552, "learning_rate": 0.00042592711701122026, "loss": 1.8418, "step": 5667 }, { "epoch": 0.2767578125, "grad_norm": 0.3350487947463989, "learning_rate": 0.0004259011369162528, "loss": 1.8531, "step": 5668 }, { "epoch": 0.276806640625, "grad_norm": 0.2715495228767395, "learning_rate": 0.00042587515316407505, "loss": 1.8571, "step": 5669 }, { "epoch": 0.27685546875, "grad_norm": 0.28782352805137634, "learning_rate": 0.00042584916575531676, "loss": 1.8579, "step": 5670 }, { "epoch": 0.276904296875, "grad_norm": 0.3054461181163788, "learning_rate": 0.00042582317469060766, "loss": 1.8747, "step": 5671 }, { "epoch": 0.276953125, "grad_norm": 0.30080920457839966, "learning_rate": 0.0004257971799705778, "loss": 1.8376, "step": 5672 }, { "epoch": 0.277001953125, "grad_norm": 0.2794157564640045, "learning_rate": 0.00042577118159585714, "loss": 1.8518, "step": 5673 }, { "epoch": 0.27705078125, "grad_norm": 0.2056008279323578, "learning_rate": 0.0004257451795670758, "loss": 1.8606, "step": 5674 }, { "epoch": 0.277099609375, "grad_norm": 0.265088826417923, "learning_rate": 0.00042571917388486383, "loss": 1.8263, "step": 5675 }, { "epoch": 0.2771484375, "grad_norm": 0.253460556268692, "learning_rate": 0.0004256931645498517, "loss": 1.8625, "step": 5676 }, { "epoch": 0.277197265625, "grad_norm": 0.2547469437122345, "learning_rate": 0.00042566715156266955, "loss": 1.844, "step": 5677 }, { "epoch": 0.27724609375, "grad_norm": 0.2568104863166809, "learning_rate": 0.00042564113492394803, "loss": 1.8727, "step": 5678 }, { "epoch": 0.277294921875, "grad_norm": 0.21920163929462433, "learning_rate": 0.00042561511463431757, "loss": 1.8564, "step": 5679 }, { "epoch": 0.27734375, "grad_norm": 0.27821922302246094, "learning_rate": 0.0004255890906944087, "loss": 1.8757, "step": 5680 }, { "epoch": 0.277392578125, "grad_norm": 0.30448833107948303, "learning_rate": 0.0004255630631048523, "loss": 1.8571, "step": 5681 }, { "epoch": 0.27744140625, "grad_norm": 0.28746455907821655, "learning_rate": 0.0004255370318662792, "loss": 1.8592, "step": 5682 }, { "epoch": 0.277490234375, "grad_norm": 0.31809476017951965, "learning_rate": 0.0004255109969793201, "loss": 1.833, "step": 5683 }, { "epoch": 0.2775390625, "grad_norm": 0.2994731366634369, "learning_rate": 0.0004254849584446061, "loss": 1.8739, "step": 5684 }, { "epoch": 0.277587890625, "grad_norm": 0.2538754940032959, "learning_rate": 0.00042545891626276816, "loss": 1.8754, "step": 5685 }, { "epoch": 0.27763671875, "grad_norm": 0.24241745471954346, "learning_rate": 0.0004254328704344376, "loss": 1.8724, "step": 5686 }, { "epoch": 0.277685546875, "grad_norm": 0.28128060698509216, "learning_rate": 0.0004254068209602455, "loss": 1.8633, "step": 5687 }, { "epoch": 0.277734375, "grad_norm": 0.3154356777667999, "learning_rate": 0.0004253807678408233, "loss": 1.8539, "step": 5688 }, { "epoch": 0.277783203125, "grad_norm": 0.2802368700504303, "learning_rate": 0.0004253547110768024, "loss": 1.8645, "step": 5689 }, { "epoch": 0.27783203125, "grad_norm": 0.23400941491127014, "learning_rate": 0.0004253286506688143, "loss": 1.829, "step": 5690 }, { "epoch": 0.277880859375, "grad_norm": 0.2521915137767792, "learning_rate": 0.00042530258661749044, "loss": 1.8578, "step": 5691 }, { "epoch": 0.2779296875, "grad_norm": 0.312826007604599, "learning_rate": 0.0004252765189234627, "loss": 1.8791, "step": 5692 }, { "epoch": 0.277978515625, "grad_norm": 0.3269122242927551, "learning_rate": 0.00042525044758736285, "loss": 1.8561, "step": 5693 }, { "epoch": 0.27802734375, "grad_norm": 0.30177682638168335, "learning_rate": 0.00042522437260982264, "loss": 1.8411, "step": 5694 }, { "epoch": 0.278076171875, "grad_norm": 0.2871256172657013, "learning_rate": 0.0004251982939914741, "loss": 1.8731, "step": 5695 }, { "epoch": 0.278125, "grad_norm": 0.24991019070148468, "learning_rate": 0.0004251722117329493, "loss": 1.8735, "step": 5696 }, { "epoch": 0.278173828125, "grad_norm": 0.3264785706996918, "learning_rate": 0.0004251461258348803, "loss": 1.8729, "step": 5697 }, { "epoch": 0.27822265625, "grad_norm": 0.26710787415504456, "learning_rate": 0.0004251200362978991, "loss": 1.8405, "step": 5698 }, { "epoch": 0.278271484375, "grad_norm": 0.2730340361595154, "learning_rate": 0.00042509394312263845, "loss": 1.8779, "step": 5699 }, { "epoch": 0.2783203125, "grad_norm": 0.23499946296215057, "learning_rate": 0.0004250678463097304, "loss": 1.8464, "step": 5700 }, { "epoch": 0.278369140625, "grad_norm": 0.23303088545799255, "learning_rate": 0.00042504174585980754, "loss": 1.8567, "step": 5701 }, { "epoch": 0.27841796875, "grad_norm": 0.25049299001693726, "learning_rate": 0.00042501564177350245, "loss": 1.8463, "step": 5702 }, { "epoch": 0.278466796875, "grad_norm": 0.2982906401157379, "learning_rate": 0.0004249895340514477, "loss": 1.857, "step": 5703 }, { "epoch": 0.278515625, "grad_norm": 0.29691198468208313, "learning_rate": 0.0004249634226942762, "loss": 1.8875, "step": 5704 }, { "epoch": 0.278564453125, "grad_norm": 0.2844417095184326, "learning_rate": 0.0004249373077026206, "loss": 1.8405, "step": 5705 }, { "epoch": 0.27861328125, "grad_norm": 0.3392249345779419, "learning_rate": 0.0004249111890771139, "loss": 1.8316, "step": 5706 }, { "epoch": 0.278662109375, "grad_norm": 0.3598851263523102, "learning_rate": 0.00042488506681838905, "loss": 1.8743, "step": 5707 }, { "epoch": 0.2787109375, "grad_norm": 0.28524720668792725, "learning_rate": 0.00042485894092707927, "loss": 1.8748, "step": 5708 }, { "epoch": 0.278759765625, "grad_norm": 0.32487937808036804, "learning_rate": 0.00042483281140381766, "loss": 1.8801, "step": 5709 }, { "epoch": 0.27880859375, "grad_norm": 0.30342817306518555, "learning_rate": 0.0004248066782492374, "loss": 1.8744, "step": 5710 }, { "epoch": 0.278857421875, "grad_norm": 0.2538738548755646, "learning_rate": 0.000424780541463972, "loss": 1.823, "step": 5711 }, { "epoch": 0.27890625, "grad_norm": 0.2842814326286316, "learning_rate": 0.00042475440104865485, "loss": 1.8776, "step": 5712 }, { "epoch": 0.278955078125, "grad_norm": 0.2884630858898163, "learning_rate": 0.00042472825700391953, "loss": 1.889, "step": 5713 }, { "epoch": 0.27900390625, "grad_norm": 0.25035691261291504, "learning_rate": 0.00042470210933039955, "loss": 1.8616, "step": 5714 }, { "epoch": 0.279052734375, "grad_norm": 0.22946368157863617, "learning_rate": 0.00042467595802872873, "loss": 1.8371, "step": 5715 }, { "epoch": 0.2791015625, "grad_norm": 0.2272026389837265, "learning_rate": 0.00042464980309954073, "loss": 1.8768, "step": 5716 }, { "epoch": 0.279150390625, "grad_norm": 0.21709029376506805, "learning_rate": 0.00042462364454346954, "loss": 1.8631, "step": 5717 }, { "epoch": 0.27919921875, "grad_norm": 0.22959594428539276, "learning_rate": 0.00042459748236114915, "loss": 1.8537, "step": 5718 }, { "epoch": 0.279248046875, "grad_norm": 0.2980477213859558, "learning_rate": 0.00042457131655321354, "loss": 1.8676, "step": 5719 }, { "epoch": 0.279296875, "grad_norm": 0.27402034401893616, "learning_rate": 0.00042454514712029703, "loss": 1.8479, "step": 5720 }, { "epoch": 0.279345703125, "grad_norm": 0.28451061248779297, "learning_rate": 0.00042451897406303366, "loss": 1.8841, "step": 5721 }, { "epoch": 0.27939453125, "grad_norm": 0.2703280746936798, "learning_rate": 0.0004244927973820578, "loss": 1.8684, "step": 5722 }, { "epoch": 0.279443359375, "grad_norm": 0.2647377550601959, "learning_rate": 0.00042446661707800395, "loss": 1.8551, "step": 5723 }, { "epoch": 0.2794921875, "grad_norm": 0.24052563309669495, "learning_rate": 0.00042444043315150647, "loss": 1.8454, "step": 5724 }, { "epoch": 0.279541015625, "grad_norm": 0.22706089913845062, "learning_rate": 0.0004244142456032001, "loss": 1.8665, "step": 5725 }, { "epoch": 0.27958984375, "grad_norm": 0.24278974533081055, "learning_rate": 0.0004243880544337194, "loss": 1.8542, "step": 5726 }, { "epoch": 0.279638671875, "grad_norm": 0.2620956599712372, "learning_rate": 0.00042436185964369927, "loss": 1.8668, "step": 5727 }, { "epoch": 0.2796875, "grad_norm": 0.2458297461271286, "learning_rate": 0.0004243356612337744, "loss": 1.8487, "step": 5728 }, { "epoch": 0.279736328125, "grad_norm": 0.3041878640651703, "learning_rate": 0.0004243094592045799, "loss": 1.8649, "step": 5729 }, { "epoch": 0.27978515625, "grad_norm": 0.3220462501049042, "learning_rate": 0.00042428325355675065, "loss": 1.8781, "step": 5730 }, { "epoch": 0.279833984375, "grad_norm": 0.27609536051750183, "learning_rate": 0.0004242570442909218, "loss": 1.879, "step": 5731 }, { "epoch": 0.2798828125, "grad_norm": 0.32636764645576477, "learning_rate": 0.00042423083140772863, "loss": 1.8623, "step": 5732 }, { "epoch": 0.279931640625, "grad_norm": 0.33307307958602905, "learning_rate": 0.0004242046149078062, "loss": 1.8572, "step": 5733 }, { "epoch": 0.27998046875, "grad_norm": 0.2604809105396271, "learning_rate": 0.00042417839479179023, "loss": 1.854, "step": 5734 }, { "epoch": 0.280029296875, "grad_norm": 0.2819962799549103, "learning_rate": 0.00042415217106031594, "loss": 1.8577, "step": 5735 }, { "epoch": 0.280078125, "grad_norm": 0.2670150101184845, "learning_rate": 0.00042412594371401895, "loss": 1.8921, "step": 5736 }, { "epoch": 0.280126953125, "grad_norm": 0.19628627598285675, "learning_rate": 0.000424099712753535, "loss": 1.8711, "step": 5737 }, { "epoch": 0.28017578125, "grad_norm": 0.25904765725135803, "learning_rate": 0.0004240734781794996, "loss": 1.8294, "step": 5738 }, { "epoch": 0.280224609375, "grad_norm": 0.23121008276939392, "learning_rate": 0.0004240472399925487, "loss": 1.8671, "step": 5739 }, { "epoch": 0.2802734375, "grad_norm": 0.23412510752677917, "learning_rate": 0.0004240209981933183, "loss": 1.843, "step": 5740 }, { "epoch": 0.280322265625, "grad_norm": 0.26163575053215027, "learning_rate": 0.00042399475278244426, "loss": 1.8651, "step": 5741 }, { "epoch": 0.28037109375, "grad_norm": 0.2838466167449951, "learning_rate": 0.0004239685037605626, "loss": 1.8447, "step": 5742 }, { "epoch": 0.280419921875, "grad_norm": 0.25497353076934814, "learning_rate": 0.0004239422511283097, "loss": 1.8579, "step": 5743 }, { "epoch": 0.28046875, "grad_norm": 0.2538010776042938, "learning_rate": 0.0004239159948863216, "loss": 1.8468, "step": 5744 }, { "epoch": 0.280517578125, "grad_norm": 0.24734888970851898, "learning_rate": 0.00042388973503523466, "loss": 1.8443, "step": 5745 }, { "epoch": 0.28056640625, "grad_norm": 0.2580288052558899, "learning_rate": 0.0004238634715756855, "loss": 1.8499, "step": 5746 }, { "epoch": 0.280615234375, "grad_norm": 0.2237313687801361, "learning_rate": 0.0004238372045083105, "loss": 1.8884, "step": 5747 }, { "epoch": 0.2806640625, "grad_norm": 0.2268279492855072, "learning_rate": 0.00042381093383374617, "loss": 1.8306, "step": 5748 }, { "epoch": 0.280712890625, "grad_norm": 0.21860919892787933, "learning_rate": 0.0004237846595526294, "loss": 1.8248, "step": 5749 }, { "epoch": 0.28076171875, "grad_norm": 0.24432100355625153, "learning_rate": 0.0004237583816655969, "loss": 1.8454, "step": 5750 }, { "epoch": 0.280810546875, "grad_norm": 0.27891141176223755, "learning_rate": 0.0004237321001732854, "loss": 1.9049, "step": 5751 }, { "epoch": 0.280859375, "grad_norm": 0.3708457946777344, "learning_rate": 0.00042370581507633197, "loss": 1.8633, "step": 5752 }, { "epoch": 0.280908203125, "grad_norm": 0.3869527578353882, "learning_rate": 0.0004236795263753738, "loss": 1.8468, "step": 5753 }, { "epoch": 0.28095703125, "grad_norm": 0.38065409660339355, "learning_rate": 0.00042365323407104766, "loss": 1.8734, "step": 5754 }, { "epoch": 0.281005859375, "grad_norm": 0.33680129051208496, "learning_rate": 0.0004236269381639911, "loss": 1.872, "step": 5755 }, { "epoch": 0.2810546875, "grad_norm": 0.31837621331214905, "learning_rate": 0.00042360063865484125, "loss": 1.8672, "step": 5756 }, { "epoch": 0.281103515625, "grad_norm": 0.3312147557735443, "learning_rate": 0.00042357433554423554, "loss": 1.8645, "step": 5757 }, { "epoch": 0.28115234375, "grad_norm": 0.45421111583709717, "learning_rate": 0.0004235480288328114, "loss": 1.8462, "step": 5758 }, { "epoch": 0.281201171875, "grad_norm": 0.3705991506576538, "learning_rate": 0.0004235217185212065, "loss": 1.891, "step": 5759 }, { "epoch": 0.28125, "grad_norm": 0.2575342357158661, "learning_rate": 0.00042349540461005837, "loss": 1.8232, "step": 5760 }, { "epoch": 0.281298828125, "grad_norm": 0.29415830969810486, "learning_rate": 0.0004234690871000049, "loss": 1.8555, "step": 5761 }, { "epoch": 0.28134765625, "grad_norm": 0.2627907693386078, "learning_rate": 0.00042344276599168377, "loss": 1.8776, "step": 5762 }, { "epoch": 0.281396484375, "grad_norm": 0.2816172242164612, "learning_rate": 0.0004234164412857329, "loss": 1.8444, "step": 5763 }, { "epoch": 0.2814453125, "grad_norm": 0.2251715511083603, "learning_rate": 0.00042339011298279044, "loss": 1.8492, "step": 5764 }, { "epoch": 0.281494140625, "grad_norm": 0.2504105567932129, "learning_rate": 0.00042336378108349433, "loss": 1.8768, "step": 5765 }, { "epoch": 0.28154296875, "grad_norm": 0.2799117863178253, "learning_rate": 0.0004233374455884828, "loss": 1.8604, "step": 5766 }, { "epoch": 0.281591796875, "grad_norm": 0.288057416677475, "learning_rate": 0.00042331110649839403, "loss": 1.853, "step": 5767 }, { "epoch": 0.281640625, "grad_norm": 0.23485711216926575, "learning_rate": 0.00042328476381386655, "loss": 1.8577, "step": 5768 }, { "epoch": 0.281689453125, "grad_norm": 0.36416175961494446, "learning_rate": 0.0004232584175355386, "loss": 1.8705, "step": 5769 }, { "epoch": 0.28173828125, "grad_norm": 0.34462055563926697, "learning_rate": 0.00042323206766404885, "loss": 1.8817, "step": 5770 }, { "epoch": 0.281787109375, "grad_norm": 0.26172906160354614, "learning_rate": 0.0004232057142000358, "loss": 1.8652, "step": 5771 }, { "epoch": 0.2818359375, "grad_norm": 0.4128842055797577, "learning_rate": 0.0004231793571441383, "loss": 1.8618, "step": 5772 }, { "epoch": 0.281884765625, "grad_norm": 0.2836674749851227, "learning_rate": 0.0004231529964969949, "loss": 1.8823, "step": 5773 }, { "epoch": 0.28193359375, "grad_norm": 0.2969575822353363, "learning_rate": 0.0004231266322592447, "loss": 1.8482, "step": 5774 }, { "epoch": 0.281982421875, "grad_norm": 0.397379070520401, "learning_rate": 0.0004231002644315265, "loss": 1.8675, "step": 5775 }, { "epoch": 0.28203125, "grad_norm": 0.26300618052482605, "learning_rate": 0.00042307389301447947, "loss": 1.851, "step": 5776 }, { "epoch": 0.282080078125, "grad_norm": 0.31598204374313354, "learning_rate": 0.0004230475180087427, "loss": 1.8591, "step": 5777 }, { "epoch": 0.28212890625, "grad_norm": 0.2582678198814392, "learning_rate": 0.0004230211394149553, "loss": 1.8893, "step": 5778 }, { "epoch": 0.282177734375, "grad_norm": 0.26770710945129395, "learning_rate": 0.00042299475723375676, "loss": 1.8687, "step": 5779 }, { "epoch": 0.2822265625, "grad_norm": 0.3024587333202362, "learning_rate": 0.00042296837146578627, "loss": 1.8709, "step": 5780 }, { "epoch": 0.282275390625, "grad_norm": 0.2169741988182068, "learning_rate": 0.0004229419821116835, "loss": 1.8902, "step": 5781 }, { "epoch": 0.28232421875, "grad_norm": 0.29994505643844604, "learning_rate": 0.00042291558917208796, "loss": 1.826, "step": 5782 }, { "epoch": 0.282373046875, "grad_norm": 0.30426645278930664, "learning_rate": 0.0004228891926476392, "loss": 1.8376, "step": 5783 }, { "epoch": 0.282421875, "grad_norm": 0.2470012754201889, "learning_rate": 0.0004228627925389771, "loss": 1.8662, "step": 5784 }, { "epoch": 0.282470703125, "grad_norm": 0.2506422698497772, "learning_rate": 0.00042283638884674143, "loss": 1.8195, "step": 5785 }, { "epoch": 0.28251953125, "grad_norm": 0.2650343179702759, "learning_rate": 0.000422809981571572, "loss": 1.8571, "step": 5786 }, { "epoch": 0.282568359375, "grad_norm": 0.2926687002182007, "learning_rate": 0.00042278357071410904, "loss": 1.8522, "step": 5787 }, { "epoch": 0.2826171875, "grad_norm": 0.28957223892211914, "learning_rate": 0.0004227571562749925, "loss": 1.8556, "step": 5788 }, { "epoch": 0.282666015625, "grad_norm": 0.24465444684028625, "learning_rate": 0.00042273073825486264, "loss": 1.8469, "step": 5789 }, { "epoch": 0.28271484375, "grad_norm": 0.2877347469329834, "learning_rate": 0.00042270431665435956, "loss": 1.8737, "step": 5790 }, { "epoch": 0.282763671875, "grad_norm": 0.28459295630455017, "learning_rate": 0.0004226778914741237, "loss": 1.839, "step": 5791 }, { "epoch": 0.2828125, "grad_norm": 0.19561702013015747, "learning_rate": 0.0004226514627147954, "loss": 1.8518, "step": 5792 }, { "epoch": 0.282861328125, "grad_norm": 0.29720330238342285, "learning_rate": 0.00042262503037701545, "loss": 1.8791, "step": 5793 }, { "epoch": 0.28291015625, "grad_norm": 0.3428441882133484, "learning_rate": 0.00042259859446142425, "loss": 1.847, "step": 5794 }, { "epoch": 0.282958984375, "grad_norm": 0.28087061643600464, "learning_rate": 0.00042257215496866254, "loss": 1.8458, "step": 5795 }, { "epoch": 0.2830078125, "grad_norm": 0.3298932611942291, "learning_rate": 0.000422545711899371, "loss": 1.8414, "step": 5796 }, { "epoch": 0.283056640625, "grad_norm": 0.3032383322715759, "learning_rate": 0.00042251926525419073, "loss": 1.8542, "step": 5797 }, { "epoch": 0.28310546875, "grad_norm": 0.18870821595191956, "learning_rate": 0.00042249281503376244, "loss": 1.851, "step": 5798 }, { "epoch": 0.283154296875, "grad_norm": 0.26649877429008484, "learning_rate": 0.00042246636123872733, "loss": 1.8525, "step": 5799 }, { "epoch": 0.283203125, "grad_norm": 0.23878167569637299, "learning_rate": 0.00042243990386972645, "loss": 1.8373, "step": 5800 }, { "epoch": 0.283251953125, "grad_norm": 0.1988511085510254, "learning_rate": 0.0004224134429274011, "loss": 1.85, "step": 5801 }, { "epoch": 0.28330078125, "grad_norm": 0.2640785574913025, "learning_rate": 0.0004223869784123925, "loss": 1.869, "step": 5802 }, { "epoch": 0.283349609375, "grad_norm": 0.19171541929244995, "learning_rate": 0.0004223605103253421, "loss": 1.8693, "step": 5803 }, { "epoch": 0.2833984375, "grad_norm": 0.23216235637664795, "learning_rate": 0.0004223340386668913, "loss": 1.8415, "step": 5804 }, { "epoch": 0.283447265625, "grad_norm": 0.29344823956489563, "learning_rate": 0.00042230756343768173, "loss": 1.8602, "step": 5805 }, { "epoch": 0.28349609375, "grad_norm": 0.32699891924858093, "learning_rate": 0.00042228108463835496, "loss": 1.8538, "step": 5806 }, { "epoch": 0.283544921875, "grad_norm": 0.30811887979507446, "learning_rate": 0.0004222546022695529, "loss": 1.8476, "step": 5807 }, { "epoch": 0.28359375, "grad_norm": 0.24803638458251953, "learning_rate": 0.00042222811633191716, "loss": 1.8439, "step": 5808 }, { "epoch": 0.283642578125, "grad_norm": 0.27324041724205017, "learning_rate": 0.00042220162682608976, "loss": 1.8652, "step": 5809 }, { "epoch": 0.28369140625, "grad_norm": 0.23239666223526, "learning_rate": 0.0004221751337527127, "loss": 1.8567, "step": 5810 }, { "epoch": 0.283740234375, "grad_norm": 0.22703386843204498, "learning_rate": 0.00042214863711242804, "loss": 1.8364, "step": 5811 }, { "epoch": 0.2837890625, "grad_norm": 0.32496556639671326, "learning_rate": 0.00042212213690587784, "loss": 1.8388, "step": 5812 }, { "epoch": 0.283837890625, "grad_norm": 0.4149886667728424, "learning_rate": 0.00042209563313370447, "loss": 1.8268, "step": 5813 }, { "epoch": 0.28388671875, "grad_norm": 0.3360079228878021, "learning_rate": 0.00042206912579655033, "loss": 1.8407, "step": 5814 }, { "epoch": 0.283935546875, "grad_norm": 0.23490992188453674, "learning_rate": 0.00042204261489505775, "loss": 1.8341, "step": 5815 }, { "epoch": 0.283984375, "grad_norm": 0.32999494671821594, "learning_rate": 0.0004220161004298693, "loss": 1.8615, "step": 5816 }, { "epoch": 0.284033203125, "grad_norm": 0.3416467010974884, "learning_rate": 0.0004219895824016274, "loss": 1.848, "step": 5817 }, { "epoch": 0.28408203125, "grad_norm": 0.33328530192375183, "learning_rate": 0.0004219630608109751, "loss": 1.8787, "step": 5818 }, { "epoch": 0.284130859375, "grad_norm": 0.35296300053596497, "learning_rate": 0.00042193653565855476, "loss": 1.8641, "step": 5819 }, { "epoch": 0.2841796875, "grad_norm": 0.28623855113983154, "learning_rate": 0.00042191000694500957, "loss": 1.8437, "step": 5820 }, { "epoch": 0.284228515625, "grad_norm": 0.30990180373191833, "learning_rate": 0.0004218834746709823, "loss": 1.854, "step": 5821 }, { "epoch": 0.28427734375, "grad_norm": 0.3343527019023895, "learning_rate": 0.00042185693883711603, "loss": 1.8493, "step": 5822 }, { "epoch": 0.284326171875, "grad_norm": 0.22128254175186157, "learning_rate": 0.0004218303994440538, "loss": 1.8591, "step": 5823 }, { "epoch": 0.284375, "grad_norm": 0.2754362225532532, "learning_rate": 0.00042180385649243893, "loss": 1.8471, "step": 5824 }, { "epoch": 0.284423828125, "grad_norm": 0.29652583599090576, "learning_rate": 0.0004217773099829146, "loss": 1.8575, "step": 5825 }, { "epoch": 0.28447265625, "grad_norm": 0.300748348236084, "learning_rate": 0.0004217507599161242, "loss": 1.8567, "step": 5826 }, { "epoch": 0.284521484375, "grad_norm": 0.3307240605354309, "learning_rate": 0.0004217242062927113, "loss": 1.8461, "step": 5827 }, { "epoch": 0.2845703125, "grad_norm": 0.28966838121414185, "learning_rate": 0.0004216976491133195, "loss": 1.8556, "step": 5828 }, { "epoch": 0.284619140625, "grad_norm": 0.26862767338752747, "learning_rate": 0.0004216710883785922, "loss": 1.8443, "step": 5829 }, { "epoch": 0.28466796875, "grad_norm": 0.306730717420578, "learning_rate": 0.00042164452408917325, "loss": 1.8436, "step": 5830 }, { "epoch": 0.284716796875, "grad_norm": 0.3491697907447815, "learning_rate": 0.0004216179562457064, "loss": 1.8421, "step": 5831 }, { "epoch": 0.284765625, "grad_norm": 0.2979002594947815, "learning_rate": 0.00042159138484883566, "loss": 1.8341, "step": 5832 }, { "epoch": 0.284814453125, "grad_norm": 0.28149324655532837, "learning_rate": 0.00042156480989920486, "loss": 1.8259, "step": 5833 }, { "epoch": 0.28486328125, "grad_norm": 0.27277278900146484, "learning_rate": 0.0004215382313974582, "loss": 1.883, "step": 5834 }, { "epoch": 0.284912109375, "grad_norm": 0.25682342052459717, "learning_rate": 0.0004215116493442398, "loss": 1.865, "step": 5835 }, { "epoch": 0.2849609375, "grad_norm": 0.24882586300373077, "learning_rate": 0.0004214850637401939, "loss": 1.8568, "step": 5836 }, { "epoch": 0.285009765625, "grad_norm": 0.24342788755893707, "learning_rate": 0.0004214584745859647, "loss": 1.8483, "step": 5837 }, { "epoch": 0.28505859375, "grad_norm": 0.23973092436790466, "learning_rate": 0.0004214318818821967, "loss": 1.8463, "step": 5838 }, { "epoch": 0.285107421875, "grad_norm": 0.2488202601671219, "learning_rate": 0.0004214052856295345, "loss": 1.8716, "step": 5839 }, { "epoch": 0.28515625, "grad_norm": 0.19674520194530487, "learning_rate": 0.00042137868582862255, "loss": 1.8522, "step": 5840 }, { "epoch": 0.285205078125, "grad_norm": 0.25156694650650024, "learning_rate": 0.0004213520824801055, "loss": 1.8262, "step": 5841 }, { "epoch": 0.28525390625, "grad_norm": 0.25685200095176697, "learning_rate": 0.0004213254755846281, "loss": 1.8578, "step": 5842 }, { "epoch": 0.285302734375, "grad_norm": 0.26818346977233887, "learning_rate": 0.00042129886514283537, "loss": 1.8725, "step": 5843 }, { "epoch": 0.2853515625, "grad_norm": 0.2598773241043091, "learning_rate": 0.00042127225115537204, "loss": 1.8589, "step": 5844 }, { "epoch": 0.285400390625, "grad_norm": 0.2606641948223114, "learning_rate": 0.0004212456336228832, "loss": 1.8609, "step": 5845 }, { "epoch": 0.28544921875, "grad_norm": 0.21932974457740784, "learning_rate": 0.0004212190125460139, "loss": 1.8654, "step": 5846 }, { "epoch": 0.285498046875, "grad_norm": 0.2399248331785202, "learning_rate": 0.0004211923879254095, "loss": 1.8382, "step": 5847 }, { "epoch": 0.285546875, "grad_norm": 0.2838203012943268, "learning_rate": 0.00042116575976171495, "loss": 1.8683, "step": 5848 }, { "epoch": 0.285595703125, "grad_norm": 0.2573049068450928, "learning_rate": 0.0004211391280555759, "loss": 1.8709, "step": 5849 }, { "epoch": 0.28564453125, "grad_norm": 0.2146390825510025, "learning_rate": 0.0004211124928076377, "loss": 1.8678, "step": 5850 }, { "epoch": 0.285693359375, "grad_norm": 0.23097573220729828, "learning_rate": 0.0004210858540185457, "loss": 1.8595, "step": 5851 }, { "epoch": 0.2857421875, "grad_norm": 0.23823103308677673, "learning_rate": 0.0004210592116889458, "loss": 1.8591, "step": 5852 }, { "epoch": 0.285791015625, "grad_norm": 0.27302539348602295, "learning_rate": 0.0004210325658194835, "loss": 1.8753, "step": 5853 }, { "epoch": 0.28583984375, "grad_norm": 0.3570581078529358, "learning_rate": 0.0004210059164108047, "loss": 1.853, "step": 5854 }, { "epoch": 0.285888671875, "grad_norm": 0.38068100810050964, "learning_rate": 0.0004209792634635552, "loss": 1.8346, "step": 5855 }, { "epoch": 0.2859375, "grad_norm": 0.2793118953704834, "learning_rate": 0.0004209526069783809, "loss": 1.8412, "step": 5856 }, { "epoch": 0.285986328125, "grad_norm": 0.24360795319080353, "learning_rate": 0.00042092594695592795, "loss": 1.8668, "step": 5857 }, { "epoch": 0.28603515625, "grad_norm": 0.2831730544567108, "learning_rate": 0.0004208992833968425, "loss": 1.8795, "step": 5858 }, { "epoch": 0.286083984375, "grad_norm": 0.2456052601337433, "learning_rate": 0.00042087261630177066, "loss": 1.8634, "step": 5859 }, { "epoch": 0.2861328125, "grad_norm": 0.29075735807418823, "learning_rate": 0.00042084594567135875, "loss": 1.8632, "step": 5860 }, { "epoch": 0.286181640625, "grad_norm": 0.22859834134578705, "learning_rate": 0.00042081927150625315, "loss": 1.859, "step": 5861 }, { "epoch": 0.28623046875, "grad_norm": 0.25533321499824524, "learning_rate": 0.00042079259380710046, "loss": 1.8677, "step": 5862 }, { "epoch": 0.286279296875, "grad_norm": 0.26070040464401245, "learning_rate": 0.00042076591257454704, "loss": 1.8467, "step": 5863 }, { "epoch": 0.286328125, "grad_norm": 0.21229352056980133, "learning_rate": 0.0004207392278092397, "loss": 1.869, "step": 5864 }, { "epoch": 0.286376953125, "grad_norm": 0.3235326111316681, "learning_rate": 0.000420712539511825, "loss": 1.8501, "step": 5865 }, { "epoch": 0.28642578125, "grad_norm": 0.3023001253604889, "learning_rate": 0.0004206858476829499, "loss": 1.8621, "step": 5866 }, { "epoch": 0.286474609375, "grad_norm": 0.3123144209384918, "learning_rate": 0.00042065915232326125, "loss": 1.8586, "step": 5867 }, { "epoch": 0.2865234375, "grad_norm": 0.40562281012535095, "learning_rate": 0.0004206324534334059, "loss": 1.8687, "step": 5868 }, { "epoch": 0.286572265625, "grad_norm": 0.435366153717041, "learning_rate": 0.00042060575101403113, "loss": 1.8715, "step": 5869 }, { "epoch": 0.28662109375, "grad_norm": 0.2810698449611664, "learning_rate": 0.000420579045065784, "loss": 1.8473, "step": 5870 }, { "epoch": 0.286669921875, "grad_norm": 0.26902082562446594, "learning_rate": 0.0004205523355893118, "loss": 1.8586, "step": 5871 }, { "epoch": 0.28671875, "grad_norm": 0.33674395084381104, "learning_rate": 0.00042052562258526176, "loss": 1.8603, "step": 5872 }, { "epoch": 0.286767578125, "grad_norm": 0.32213249802589417, "learning_rate": 0.00042049890605428135, "loss": 1.8719, "step": 5873 }, { "epoch": 0.28681640625, "grad_norm": 0.29787981510162354, "learning_rate": 0.0004204721859970181, "loss": 1.8386, "step": 5874 }, { "epoch": 0.286865234375, "grad_norm": 0.3253142237663269, "learning_rate": 0.0004204454624141195, "loss": 1.8744, "step": 5875 }, { "epoch": 0.2869140625, "grad_norm": 0.2932067811489105, "learning_rate": 0.00042041873530623326, "loss": 1.8802, "step": 5876 }, { "epoch": 0.286962890625, "grad_norm": 0.24594993889331818, "learning_rate": 0.0004203920046740071, "loss": 1.8575, "step": 5877 }, { "epoch": 0.28701171875, "grad_norm": 0.2725224792957306, "learning_rate": 0.000420365270518089, "loss": 1.8548, "step": 5878 }, { "epoch": 0.287060546875, "grad_norm": 0.2844351828098297, "learning_rate": 0.00042033853283912674, "loss": 1.8626, "step": 5879 }, { "epoch": 0.287109375, "grad_norm": 0.22808276116847992, "learning_rate": 0.00042031179163776845, "loss": 1.8635, "step": 5880 }, { "epoch": 0.287158203125, "grad_norm": 0.2750243544578552, "learning_rate": 0.0004202850469146621, "loss": 1.8631, "step": 5881 }, { "epoch": 0.28720703125, "grad_norm": 0.2834615409374237, "learning_rate": 0.00042025829867045584, "loss": 1.8794, "step": 5882 }, { "epoch": 0.287255859375, "grad_norm": 0.22627496719360352, "learning_rate": 0.0004202315469057981, "loss": 1.8708, "step": 5883 }, { "epoch": 0.2873046875, "grad_norm": 0.23140601813793182, "learning_rate": 0.00042020479162133714, "loss": 1.8453, "step": 5884 }, { "epoch": 0.287353515625, "grad_norm": 0.28608494997024536, "learning_rate": 0.0004201780328177213, "loss": 1.8671, "step": 5885 }, { "epoch": 0.28740234375, "grad_norm": 0.25574082136154175, "learning_rate": 0.0004201512704955993, "loss": 1.8644, "step": 5886 }, { "epoch": 0.287451171875, "grad_norm": 0.23444001376628876, "learning_rate": 0.00042012450465561956, "loss": 1.8716, "step": 5887 }, { "epoch": 0.2875, "grad_norm": 0.3088957667350769, "learning_rate": 0.000420097735298431, "loss": 1.8696, "step": 5888 }, { "epoch": 0.287548828125, "grad_norm": 0.3023104667663574, "learning_rate": 0.0004200709624246822, "loss": 1.8558, "step": 5889 }, { "epoch": 0.28759765625, "grad_norm": 0.26357972621917725, "learning_rate": 0.00042004418603502203, "loss": 1.8402, "step": 5890 }, { "epoch": 0.287646484375, "grad_norm": 0.2371443510055542, "learning_rate": 0.00042001740613009945, "loss": 1.8671, "step": 5891 }, { "epoch": 0.2876953125, "grad_norm": 0.3125104606151581, "learning_rate": 0.00041999062271056364, "loss": 1.8601, "step": 5892 }, { "epoch": 0.287744140625, "grad_norm": 0.26288145780563354, "learning_rate": 0.0004199638357770635, "loss": 1.8609, "step": 5893 }, { "epoch": 0.28779296875, "grad_norm": 0.284155935049057, "learning_rate": 0.0004199370453302484, "loss": 1.8518, "step": 5894 }, { "epoch": 0.287841796875, "grad_norm": 0.331642746925354, "learning_rate": 0.00041991025137076747, "loss": 1.8737, "step": 5895 }, { "epoch": 0.287890625, "grad_norm": 0.2980753481388092, "learning_rate": 0.0004198834538992703, "loss": 1.8569, "step": 5896 }, { "epoch": 0.287939453125, "grad_norm": 0.21248306334018707, "learning_rate": 0.0004198566529164062, "loss": 1.8514, "step": 5897 }, { "epoch": 0.28798828125, "grad_norm": 0.2502215802669525, "learning_rate": 0.00041982984842282476, "loss": 1.8649, "step": 5898 }, { "epoch": 0.288037109375, "grad_norm": 0.2771085202693939, "learning_rate": 0.0004198030404191755, "loss": 1.8643, "step": 5899 }, { "epoch": 0.2880859375, "grad_norm": 0.29088518023490906, "learning_rate": 0.0004197762289061083, "loss": 1.8736, "step": 5900 }, { "epoch": 0.288134765625, "grad_norm": 0.2825155258178711, "learning_rate": 0.0004197494138842728, "loss": 1.8753, "step": 5901 }, { "epoch": 0.28818359375, "grad_norm": 0.24953234195709229, "learning_rate": 0.000419722595354319, "loss": 1.8484, "step": 5902 }, { "epoch": 0.288232421875, "grad_norm": 0.2792736291885376, "learning_rate": 0.00041969577331689683, "loss": 1.8734, "step": 5903 }, { "epoch": 0.28828125, "grad_norm": 0.30914410948753357, "learning_rate": 0.00041966894777265636, "loss": 1.8589, "step": 5904 }, { "epoch": 0.288330078125, "grad_norm": 0.22003182768821716, "learning_rate": 0.00041964211872224776, "loss": 1.8606, "step": 5905 }, { "epoch": 0.28837890625, "grad_norm": 0.29087862372398376, "learning_rate": 0.00041961528616632124, "loss": 1.8426, "step": 5906 }, { "epoch": 0.288427734375, "grad_norm": 0.33773329854011536, "learning_rate": 0.00041958845010552694, "loss": 1.862, "step": 5907 }, { "epoch": 0.2884765625, "grad_norm": 0.282086580991745, "learning_rate": 0.00041956161054051543, "loss": 1.8796, "step": 5908 }, { "epoch": 0.288525390625, "grad_norm": 0.3085750639438629, "learning_rate": 0.00041953476747193724, "loss": 1.8737, "step": 5909 }, { "epoch": 0.28857421875, "grad_norm": 0.34039631485939026, "learning_rate": 0.00041950792090044277, "loss": 1.8396, "step": 5910 }, { "epoch": 0.288623046875, "grad_norm": 0.24282421171665192, "learning_rate": 0.0004194810708266827, "loss": 1.8526, "step": 5911 }, { "epoch": 0.288671875, "grad_norm": 0.33240315318107605, "learning_rate": 0.00041945421725130786, "loss": 1.8715, "step": 5912 }, { "epoch": 0.288720703125, "grad_norm": 0.31557497382164, "learning_rate": 0.00041942736017496895, "loss": 1.848, "step": 5913 }, { "epoch": 0.28876953125, "grad_norm": 0.24053603410720825, "learning_rate": 0.00041940049959831704, "loss": 1.8566, "step": 5914 }, { "epoch": 0.288818359375, "grad_norm": 0.2812685966491699, "learning_rate": 0.00041937363552200304, "loss": 1.8427, "step": 5915 }, { "epoch": 0.2888671875, "grad_norm": 0.2983727753162384, "learning_rate": 0.00041934676794667784, "loss": 1.8468, "step": 5916 }, { "epoch": 0.288916015625, "grad_norm": 0.23161189258098602, "learning_rate": 0.00041931989687299284, "loss": 1.8367, "step": 5917 }, { "epoch": 0.28896484375, "grad_norm": 0.24041086435317993, "learning_rate": 0.0004192930223015991, "loss": 1.8287, "step": 5918 }, { "epoch": 0.289013671875, "grad_norm": 0.273355633020401, "learning_rate": 0.0004192661442331481, "loss": 1.8389, "step": 5919 }, { "epoch": 0.2890625, "grad_norm": 0.24726234376430511, "learning_rate": 0.00041923926266829127, "loss": 1.8612, "step": 5920 }, { "epoch": 0.289111328125, "grad_norm": 0.20615176856517792, "learning_rate": 0.00041921237760767996, "loss": 1.8613, "step": 5921 }, { "epoch": 0.28916015625, "grad_norm": 0.24903123080730438, "learning_rate": 0.00041918548905196584, "loss": 1.8641, "step": 5922 }, { "epoch": 0.289208984375, "grad_norm": 0.2528405785560608, "learning_rate": 0.0004191585970018006, "loss": 1.8705, "step": 5923 }, { "epoch": 0.2892578125, "grad_norm": 0.2842909097671509, "learning_rate": 0.0004191317014578359, "loss": 1.8745, "step": 5924 }, { "epoch": 0.289306640625, "grad_norm": 0.33656787872314453, "learning_rate": 0.00041910480242072357, "loss": 1.8432, "step": 5925 }, { "epoch": 0.28935546875, "grad_norm": 0.2529006004333496, "learning_rate": 0.00041907789989111563, "loss": 1.84, "step": 5926 }, { "epoch": 0.289404296875, "grad_norm": 0.268276572227478, "learning_rate": 0.00041905099386966405, "loss": 1.8326, "step": 5927 }, { "epoch": 0.289453125, "grad_norm": 0.3311121165752411, "learning_rate": 0.0004190240843570208, "loss": 1.8633, "step": 5928 }, { "epoch": 0.289501953125, "grad_norm": 0.26997843384742737, "learning_rate": 0.00041899717135383824, "loss": 1.8633, "step": 5929 }, { "epoch": 0.28955078125, "grad_norm": 0.30183741450309753, "learning_rate": 0.0004189702548607685, "loss": 1.8484, "step": 5930 }, { "epoch": 0.289599609375, "grad_norm": 0.3349006175994873, "learning_rate": 0.00041894333487846417, "loss": 1.8672, "step": 5931 }, { "epoch": 0.2896484375, "grad_norm": 0.32994547486305237, "learning_rate": 0.00041891641140757725, "loss": 1.8724, "step": 5932 }, { "epoch": 0.289697265625, "grad_norm": 0.4010109007358551, "learning_rate": 0.0004188894844487605, "loss": 1.8559, "step": 5933 }, { "epoch": 0.28974609375, "grad_norm": 0.30491065979003906, "learning_rate": 0.0004188625540026666, "loss": 1.8779, "step": 5934 }, { "epoch": 0.289794921875, "grad_norm": 0.2674127519130707, "learning_rate": 0.000418835620069948, "loss": 1.8569, "step": 5935 }, { "epoch": 0.28984375, "grad_norm": 0.28661656379699707, "learning_rate": 0.0004188086826512577, "loss": 1.846, "step": 5936 }, { "epoch": 0.289892578125, "grad_norm": 0.2169773429632187, "learning_rate": 0.00041878174174724844, "loss": 1.8583, "step": 5937 }, { "epoch": 0.28994140625, "grad_norm": 0.3241456151008606, "learning_rate": 0.00041875479735857307, "loss": 1.8515, "step": 5938 }, { "epoch": 0.289990234375, "grad_norm": 0.29660165309906006, "learning_rate": 0.00041872784948588477, "loss": 1.8751, "step": 5939 }, { "epoch": 0.2900390625, "grad_norm": 0.2766478359699249, "learning_rate": 0.00041870089812983655, "loss": 1.8674, "step": 5940 }, { "epoch": 0.290087890625, "grad_norm": 0.2679663598537445, "learning_rate": 0.0004186739432910816, "loss": 1.878, "step": 5941 }, { "epoch": 0.29013671875, "grad_norm": 0.2712416350841522, "learning_rate": 0.00041864698497027315, "loss": 1.8409, "step": 5942 }, { "epoch": 0.290185546875, "grad_norm": 0.2714209258556366, "learning_rate": 0.0004186200231680647, "loss": 1.8702, "step": 5943 }, { "epoch": 0.290234375, "grad_norm": 0.37759074568748474, "learning_rate": 0.0004185930578851096, "loss": 1.8664, "step": 5944 }, { "epoch": 0.290283203125, "grad_norm": 0.315296471118927, "learning_rate": 0.00041856608912206134, "loss": 1.8761, "step": 5945 }, { "epoch": 0.29033203125, "grad_norm": 0.23445647954940796, "learning_rate": 0.0004185391168795736, "loss": 1.8642, "step": 5946 }, { "epoch": 0.290380859375, "grad_norm": 0.3199787437915802, "learning_rate": 0.0004185121411583001, "loss": 1.8405, "step": 5947 }, { "epoch": 0.2904296875, "grad_norm": 0.3702246844768524, "learning_rate": 0.00041848516195889445, "loss": 1.8859, "step": 5948 }, { "epoch": 0.290478515625, "grad_norm": 0.25395888090133667, "learning_rate": 0.00041845817928201076, "loss": 1.8693, "step": 5949 }, { "epoch": 0.29052734375, "grad_norm": 0.35151761770248413, "learning_rate": 0.0004184311931283026, "loss": 1.856, "step": 5950 }, { "epoch": 0.290576171875, "grad_norm": 0.3220142722129822, "learning_rate": 0.00041840420349842453, "loss": 1.8474, "step": 5951 }, { "epoch": 0.290625, "grad_norm": 0.252423495054245, "learning_rate": 0.0004183772103930302, "loss": 1.8926, "step": 5952 }, { "epoch": 0.290673828125, "grad_norm": 0.3806728720664978, "learning_rate": 0.0004183502138127741, "loss": 1.8595, "step": 5953 }, { "epoch": 0.29072265625, "grad_norm": 0.30329737067222595, "learning_rate": 0.0004183232137583102, "loss": 1.8606, "step": 5954 }, { "epoch": 0.290771484375, "grad_norm": 0.2830688953399658, "learning_rate": 0.0004182962102302933, "loss": 1.8463, "step": 5955 }, { "epoch": 0.2908203125, "grad_norm": 0.3489900231361389, "learning_rate": 0.00041826920322937753, "loss": 1.8457, "step": 5956 }, { "epoch": 0.290869140625, "grad_norm": 0.24711520969867706, "learning_rate": 0.00041824219275621755, "loss": 1.8409, "step": 5957 }, { "epoch": 0.29091796875, "grad_norm": 0.29345160722732544, "learning_rate": 0.00041821517881146785, "loss": 1.862, "step": 5958 }, { "epoch": 0.290966796875, "grad_norm": 0.3426625728607178, "learning_rate": 0.00041818816139578335, "loss": 1.8669, "step": 5959 }, { "epoch": 0.291015625, "grad_norm": 0.24783186614513397, "learning_rate": 0.00041816114050981863, "loss": 1.8443, "step": 5960 }, { "epoch": 0.291064453125, "grad_norm": 0.3483213782310486, "learning_rate": 0.00041813411615422867, "loss": 1.8345, "step": 5961 }, { "epoch": 0.29111328125, "grad_norm": 0.3218459188938141, "learning_rate": 0.0004181070883296685, "loss": 1.8693, "step": 5962 }, { "epoch": 0.291162109375, "grad_norm": 0.27098819613456726, "learning_rate": 0.00041808005703679297, "loss": 1.8845, "step": 5963 }, { "epoch": 0.2912109375, "grad_norm": 0.2880007326602936, "learning_rate": 0.0004180530222762574, "loss": 1.8552, "step": 5964 }, { "epoch": 0.291259765625, "grad_norm": 0.292128324508667, "learning_rate": 0.00041802598404871684, "loss": 1.8734, "step": 5965 }, { "epoch": 0.29130859375, "grad_norm": 0.28159698843955994, "learning_rate": 0.00041799894235482665, "loss": 1.8734, "step": 5966 }, { "epoch": 0.291357421875, "grad_norm": 0.2734799087047577, "learning_rate": 0.0004179718971952423, "loss": 1.8873, "step": 5967 }, { "epoch": 0.29140625, "grad_norm": 0.2318652868270874, "learning_rate": 0.0004179448485706191, "loss": 1.8979, "step": 5968 }, { "epoch": 0.291455078125, "grad_norm": 0.23805910348892212, "learning_rate": 0.0004179177964816126, "loss": 1.8235, "step": 5969 }, { "epoch": 0.29150390625, "grad_norm": 0.24150030314922333, "learning_rate": 0.0004178907409288785, "loss": 1.8198, "step": 5970 }, { "epoch": 0.291552734375, "grad_norm": 0.2571336328983307, "learning_rate": 0.00041786368191307254, "loss": 1.8558, "step": 5971 }, { "epoch": 0.2916015625, "grad_norm": 0.2767156958580017, "learning_rate": 0.0004178366194348504, "loss": 1.8396, "step": 5972 }, { "epoch": 0.291650390625, "grad_norm": 0.2849787473678589, "learning_rate": 0.00041780955349486815, "loss": 1.8782, "step": 5973 }, { "epoch": 0.29169921875, "grad_norm": 0.29075464606285095, "learning_rate": 0.0004177824840937816, "loss": 1.8865, "step": 5974 }, { "epoch": 0.291748046875, "grad_norm": 0.26066380739212036, "learning_rate": 0.0004177554112322468, "loss": 1.8679, "step": 5975 }, { "epoch": 0.291796875, "grad_norm": 0.26008471846580505, "learning_rate": 0.00041772833491091993, "loss": 1.8615, "step": 5976 }, { "epoch": 0.291845703125, "grad_norm": 0.2690640687942505, "learning_rate": 0.0004177012551304572, "loss": 1.8197, "step": 5977 }, { "epoch": 0.29189453125, "grad_norm": 0.3361062705516815, "learning_rate": 0.0004176741718915149, "loss": 1.8647, "step": 5978 }, { "epoch": 0.291943359375, "grad_norm": 0.28335127234458923, "learning_rate": 0.0004176470851947494, "loss": 1.8204, "step": 5979 }, { "epoch": 0.2919921875, "grad_norm": 0.23811258375644684, "learning_rate": 0.00041761999504081725, "loss": 1.8746, "step": 5980 }, { "epoch": 0.292041015625, "grad_norm": 0.26539406180381775, "learning_rate": 0.00041759290143037495, "loss": 1.8603, "step": 5981 }, { "epoch": 0.29208984375, "grad_norm": 0.3018334209918976, "learning_rate": 0.0004175658043640791, "loss": 1.8286, "step": 5982 }, { "epoch": 0.292138671875, "grad_norm": 0.21713002026081085, "learning_rate": 0.0004175387038425864, "loss": 1.8685, "step": 5983 }, { "epoch": 0.2921875, "grad_norm": 0.21812157332897186, "learning_rate": 0.00041751159986655375, "loss": 1.8247, "step": 5984 }, { "epoch": 0.292236328125, "grad_norm": 0.3052917718887329, "learning_rate": 0.00041748449243663794, "loss": 1.8627, "step": 5985 }, { "epoch": 0.29228515625, "grad_norm": 0.30987662076950073, "learning_rate": 0.000417457381553496, "loss": 1.8415, "step": 5986 }, { "epoch": 0.292333984375, "grad_norm": 0.265992134809494, "learning_rate": 0.000417430267217785, "loss": 1.866, "step": 5987 }, { "epoch": 0.2923828125, "grad_norm": 0.2637771666049957, "learning_rate": 0.000417403149430162, "loss": 1.8864, "step": 5988 }, { "epoch": 0.292431640625, "grad_norm": 0.3284306824207306, "learning_rate": 0.0004173760281912843, "loss": 1.8652, "step": 5989 }, { "epoch": 0.29248046875, "grad_norm": 0.28847068548202515, "learning_rate": 0.0004173489035018092, "loss": 1.8464, "step": 5990 }, { "epoch": 0.292529296875, "grad_norm": 0.26643645763397217, "learning_rate": 0.00041732177536239404, "loss": 1.8313, "step": 5991 }, { "epoch": 0.292578125, "grad_norm": 0.29796814918518066, "learning_rate": 0.0004172946437736963, "loss": 1.8675, "step": 5992 }, { "epoch": 0.292626953125, "grad_norm": 0.31673842668533325, "learning_rate": 0.0004172675087363735, "loss": 1.8414, "step": 5993 }, { "epoch": 0.29267578125, "grad_norm": 0.2871735692024231, "learning_rate": 0.0004172403702510834, "loss": 1.8773, "step": 5994 }, { "epoch": 0.292724609375, "grad_norm": 0.27635055780410767, "learning_rate": 0.00041721322831848363, "loss": 1.8559, "step": 5995 }, { "epoch": 0.2927734375, "grad_norm": 0.377162903547287, "learning_rate": 0.00041718608293923203, "loss": 1.8299, "step": 5996 }, { "epoch": 0.292822265625, "grad_norm": 0.36490997672080994, "learning_rate": 0.00041715893411398646, "loss": 1.8738, "step": 5997 }, { "epoch": 0.29287109375, "grad_norm": 0.32939720153808594, "learning_rate": 0.00041713178184340496, "loss": 1.8558, "step": 5998 }, { "epoch": 0.292919921875, "grad_norm": 0.3538917303085327, "learning_rate": 0.00041710462612814554, "loss": 1.8806, "step": 5999 }, { "epoch": 0.29296875, "grad_norm": 0.2983965277671814, "learning_rate": 0.0004170774669688662, "loss": 1.8279, "step": 6000 }, { "epoch": 0.293017578125, "grad_norm": 0.27992457151412964, "learning_rate": 0.0004170503043662255, "loss": 1.8498, "step": 6001 }, { "epoch": 0.29306640625, "grad_norm": 0.3165068030357361, "learning_rate": 0.0004170231383208814, "loss": 1.8577, "step": 6002 }, { "epoch": 0.293115234375, "grad_norm": 0.2682267725467682, "learning_rate": 0.0004169959688334925, "loss": 1.8453, "step": 6003 }, { "epoch": 0.2931640625, "grad_norm": 0.2754788100719452, "learning_rate": 0.0004169687959047173, "loss": 1.8482, "step": 6004 }, { "epoch": 0.293212890625, "grad_norm": 0.37217435240745544, "learning_rate": 0.0004169416195352141, "loss": 1.8298, "step": 6005 }, { "epoch": 0.29326171875, "grad_norm": 0.29167887568473816, "learning_rate": 0.0004169144397256418, "loss": 1.8712, "step": 6006 }, { "epoch": 0.293310546875, "grad_norm": 0.2531486749649048, "learning_rate": 0.0004168872564766591, "loss": 1.8473, "step": 6007 }, { "epoch": 0.293359375, "grad_norm": 0.3402627110481262, "learning_rate": 0.0004168600697889247, "loss": 1.8469, "step": 6008 }, { "epoch": 0.293408203125, "grad_norm": 0.3296284079551697, "learning_rate": 0.00041683287966309753, "loss": 1.8352, "step": 6009 }, { "epoch": 0.29345703125, "grad_norm": 0.21497267484664917, "learning_rate": 0.0004168056860998366, "loss": 1.8742, "step": 6010 }, { "epoch": 0.293505859375, "grad_norm": 0.3073688745498657, "learning_rate": 0.00041677848909980094, "loss": 1.8425, "step": 6011 }, { "epoch": 0.2935546875, "grad_norm": 0.2905511260032654, "learning_rate": 0.00041675128866364966, "loss": 1.8541, "step": 6012 }, { "epoch": 0.293603515625, "grad_norm": 0.21700142323970795, "learning_rate": 0.000416724084792042, "loss": 1.8669, "step": 6013 }, { "epoch": 0.29365234375, "grad_norm": 0.2638050615787506, "learning_rate": 0.0004166968774856373, "loss": 1.843, "step": 6014 }, { "epoch": 0.293701171875, "grad_norm": 0.23075425624847412, "learning_rate": 0.000416669666745095, "loss": 1.855, "step": 6015 }, { "epoch": 0.29375, "grad_norm": 0.2912154495716095, "learning_rate": 0.0004166424525710744, "loss": 1.8238, "step": 6016 }, { "epoch": 0.293798828125, "grad_norm": 0.26595181226730347, "learning_rate": 0.00041661523496423525, "loss": 1.8269, "step": 6017 }, { "epoch": 0.29384765625, "grad_norm": 0.2641473412513733, "learning_rate": 0.0004165880139252371, "loss": 1.866, "step": 6018 }, { "epoch": 0.293896484375, "grad_norm": 0.30185404419898987, "learning_rate": 0.00041656078945473976, "loss": 1.8363, "step": 6019 }, { "epoch": 0.2939453125, "grad_norm": 0.32766929268836975, "learning_rate": 0.00041653356155340275, "loss": 1.8511, "step": 6020 }, { "epoch": 0.293994140625, "grad_norm": 0.2634750306606293, "learning_rate": 0.0004165063302218863, "loss": 1.8705, "step": 6021 }, { "epoch": 0.29404296875, "grad_norm": 0.2535545825958252, "learning_rate": 0.0004164790954608502, "loss": 1.8794, "step": 6022 }, { "epoch": 0.294091796875, "grad_norm": 0.34955546259880066, "learning_rate": 0.0004164518572709546, "loss": 1.8357, "step": 6023 }, { "epoch": 0.294140625, "grad_norm": 0.3090803921222687, "learning_rate": 0.0004164246156528597, "loss": 1.8753, "step": 6024 }, { "epoch": 0.294189453125, "grad_norm": 0.32559874653816223, "learning_rate": 0.0004163973706072255, "loss": 1.8726, "step": 6025 }, { "epoch": 0.29423828125, "grad_norm": 0.25925761461257935, "learning_rate": 0.00041637012213471245, "loss": 1.8617, "step": 6026 }, { "epoch": 0.294287109375, "grad_norm": 0.3078628182411194, "learning_rate": 0.00041634287023598087, "loss": 1.8657, "step": 6027 }, { "epoch": 0.2943359375, "grad_norm": 0.27124324440956116, "learning_rate": 0.00041631561491169134, "loss": 1.8443, "step": 6028 }, { "epoch": 0.294384765625, "grad_norm": 0.21561706066131592, "learning_rate": 0.00041628835616250434, "loss": 1.864, "step": 6029 }, { "epoch": 0.29443359375, "grad_norm": 0.3024269640445709, "learning_rate": 0.00041626109398908053, "loss": 1.8686, "step": 6030 }, { "epoch": 0.294482421875, "grad_norm": 0.2623787820339203, "learning_rate": 0.00041623382839208053, "loss": 1.8665, "step": 6031 }, { "epoch": 0.29453125, "grad_norm": 0.35613200068473816, "learning_rate": 0.0004162065593721653, "loss": 1.8376, "step": 6032 }, { "epoch": 0.294580078125, "grad_norm": 0.27353692054748535, "learning_rate": 0.0004161792869299957, "loss": 1.8585, "step": 6033 }, { "epoch": 0.29462890625, "grad_norm": 0.2601346969604492, "learning_rate": 0.00041615201106623255, "loss": 1.8394, "step": 6034 }, { "epoch": 0.294677734375, "grad_norm": 0.31750622391700745, "learning_rate": 0.00041612473178153714, "loss": 1.834, "step": 6035 }, { "epoch": 0.2947265625, "grad_norm": 0.29975372552871704, "learning_rate": 0.0004160974490765704, "loss": 1.8472, "step": 6036 }, { "epoch": 0.294775390625, "grad_norm": 0.24824558198451996, "learning_rate": 0.00041607016295199363, "loss": 1.8875, "step": 6037 }, { "epoch": 0.29482421875, "grad_norm": 0.3031201958656311, "learning_rate": 0.0004160428734084681, "loss": 1.8604, "step": 6038 }, { "epoch": 0.294873046875, "grad_norm": 0.3179837465286255, "learning_rate": 0.00041601558044665525, "loss": 1.8449, "step": 6039 }, { "epoch": 0.294921875, "grad_norm": 0.2738080322742462, "learning_rate": 0.0004159882840672166, "loss": 1.8544, "step": 6040 }, { "epoch": 0.294970703125, "grad_norm": 0.35408005118370056, "learning_rate": 0.0004159609842708135, "loss": 1.8545, "step": 6041 }, { "epoch": 0.29501953125, "grad_norm": 0.346027135848999, "learning_rate": 0.00041593368105810775, "loss": 1.8477, "step": 6042 }, { "epoch": 0.295068359375, "grad_norm": 0.24517711997032166, "learning_rate": 0.00041590637442976097, "loss": 1.8436, "step": 6043 }, { "epoch": 0.2951171875, "grad_norm": 0.3239712119102478, "learning_rate": 0.00041587906438643506, "loss": 1.8552, "step": 6044 }, { "epoch": 0.295166015625, "grad_norm": 0.2698994576931, "learning_rate": 0.0004158517509287918, "loss": 1.8419, "step": 6045 }, { "epoch": 0.29521484375, "grad_norm": 0.24801425635814667, "learning_rate": 0.0004158244340574932, "loss": 1.8302, "step": 6046 }, { "epoch": 0.295263671875, "grad_norm": 0.27338412404060364, "learning_rate": 0.0004157971137732013, "loss": 1.848, "step": 6047 }, { "epoch": 0.2953125, "grad_norm": 0.28065165877342224, "learning_rate": 0.0004157697900765783, "loss": 1.8519, "step": 6048 }, { "epoch": 0.295361328125, "grad_norm": 0.21480709314346313, "learning_rate": 0.0004157424629682863, "loss": 1.8613, "step": 6049 }, { "epoch": 0.29541015625, "grad_norm": 0.2974233329296112, "learning_rate": 0.00041571513244898764, "loss": 1.8305, "step": 6050 }, { "epoch": 0.295458984375, "grad_norm": 0.2503338158130646, "learning_rate": 0.0004156877985193447, "loss": 1.8414, "step": 6051 }, { "epoch": 0.2955078125, "grad_norm": 0.2998024821281433, "learning_rate": 0.00041566046118001994, "loss": 1.8619, "step": 6052 }, { "epoch": 0.295556640625, "grad_norm": 0.2849915027618408, "learning_rate": 0.00041563312043167586, "loss": 1.8563, "step": 6053 }, { "epoch": 0.29560546875, "grad_norm": 0.23040848970413208, "learning_rate": 0.00041560577627497517, "loss": 1.8638, "step": 6054 }, { "epoch": 0.295654296875, "grad_norm": 0.2665606737136841, "learning_rate": 0.00041557842871058043, "loss": 1.8507, "step": 6055 }, { "epoch": 0.295703125, "grad_norm": 0.2839956283569336, "learning_rate": 0.00041555107773915464, "loss": 1.8539, "step": 6056 }, { "epoch": 0.295751953125, "grad_norm": 0.33067360520362854, "learning_rate": 0.00041552372336136056, "loss": 1.8291, "step": 6057 }, { "epoch": 0.29580078125, "grad_norm": 0.32121217250823975, "learning_rate": 0.000415496365577861, "loss": 1.8492, "step": 6058 }, { "epoch": 0.295849609375, "grad_norm": 0.2747727632522583, "learning_rate": 0.0004154690043893194, "loss": 1.8453, "step": 6059 }, { "epoch": 0.2958984375, "grad_norm": 0.24163438379764557, "learning_rate": 0.0004154416397963985, "loss": 1.8412, "step": 6060 }, { "epoch": 0.295947265625, "grad_norm": 0.28662794828414917, "learning_rate": 0.00041541427179976153, "loss": 1.8572, "step": 6061 }, { "epoch": 0.29599609375, "grad_norm": 0.2978740930557251, "learning_rate": 0.000415386900400072, "loss": 1.8336, "step": 6062 }, { "epoch": 0.296044921875, "grad_norm": 0.3052942156791687, "learning_rate": 0.0004153595255979931, "loss": 1.8553, "step": 6063 }, { "epoch": 0.29609375, "grad_norm": 0.3339909017086029, "learning_rate": 0.00041533214739418835, "loss": 1.8405, "step": 6064 }, { "epoch": 0.296142578125, "grad_norm": 0.23884044587612152, "learning_rate": 0.00041530476578932124, "loss": 1.8312, "step": 6065 }, { "epoch": 0.29619140625, "grad_norm": 0.3482411801815033, "learning_rate": 0.0004152773807840555, "loss": 1.874, "step": 6066 }, { "epoch": 0.296240234375, "grad_norm": 0.3773943781852722, "learning_rate": 0.0004152499923790547, "loss": 1.8311, "step": 6067 }, { "epoch": 0.2962890625, "grad_norm": 0.30415186285972595, "learning_rate": 0.0004152226005749826, "loss": 1.861, "step": 6068 }, { "epoch": 0.296337890625, "grad_norm": 0.3276391327381134, "learning_rate": 0.0004151952053725032, "loss": 1.8511, "step": 6069 }, { "epoch": 0.29638671875, "grad_norm": 0.3505615293979645, "learning_rate": 0.00041516780677228025, "loss": 1.8727, "step": 6070 }, { "epoch": 0.296435546875, "grad_norm": 0.3173997700214386, "learning_rate": 0.00041514040477497806, "loss": 1.8268, "step": 6071 }, { "epoch": 0.296484375, "grad_norm": 0.29646432399749756, "learning_rate": 0.00041511299938126045, "loss": 1.8515, "step": 6072 }, { "epoch": 0.296533203125, "grad_norm": 0.33298224210739136, "learning_rate": 0.0004150855905917917, "loss": 1.8549, "step": 6073 }, { "epoch": 0.29658203125, "grad_norm": 0.3143113851547241, "learning_rate": 0.00041505817840723625, "loss": 1.8546, "step": 6074 }, { "epoch": 0.296630859375, "grad_norm": 0.41033726930618286, "learning_rate": 0.00041503076282825823, "loss": 1.8396, "step": 6075 }, { "epoch": 0.2966796875, "grad_norm": 0.33167117834091187, "learning_rate": 0.00041500334385552223, "loss": 1.8515, "step": 6076 }, { "epoch": 0.296728515625, "grad_norm": 0.3133736848831177, "learning_rate": 0.0004149759214896927, "loss": 1.853, "step": 6077 }, { "epoch": 0.29677734375, "grad_norm": 0.3509826064109802, "learning_rate": 0.00041494849573143425, "loss": 1.8518, "step": 6078 }, { "epoch": 0.296826171875, "grad_norm": 0.2507186830043793, "learning_rate": 0.0004149210665814116, "loss": 1.8561, "step": 6079 }, { "epoch": 0.296875, "grad_norm": 0.3082042634487152, "learning_rate": 0.0004148936340402894, "loss": 1.8551, "step": 6080 }, { "epoch": 0.296923828125, "grad_norm": 0.3035234808921814, "learning_rate": 0.00041486619810873266, "loss": 1.8493, "step": 6081 }, { "epoch": 0.29697265625, "grad_norm": 0.23234552145004272, "learning_rate": 0.00041483875878740623, "loss": 1.8266, "step": 6082 }, { "epoch": 0.297021484375, "grad_norm": 0.3260993957519531, "learning_rate": 0.00041481131607697515, "loss": 1.8599, "step": 6083 }, { "epoch": 0.2970703125, "grad_norm": 0.26226067543029785, "learning_rate": 0.0004147838699781045, "loss": 1.8349, "step": 6084 }, { "epoch": 0.297119140625, "grad_norm": 0.2563171982765198, "learning_rate": 0.00041475642049145937, "loss": 1.8432, "step": 6085 }, { "epoch": 0.29716796875, "grad_norm": 0.2502158582210541, "learning_rate": 0.00041472896761770526, "loss": 1.8521, "step": 6086 }, { "epoch": 0.297216796875, "grad_norm": 0.25822684168815613, "learning_rate": 0.0004147015113575073, "loss": 1.8456, "step": 6087 }, { "epoch": 0.297265625, "grad_norm": 0.2510616183280945, "learning_rate": 0.00041467405171153097, "loss": 1.8518, "step": 6088 }, { "epoch": 0.297314453125, "grad_norm": 0.20638683438301086, "learning_rate": 0.00041464658868044176, "loss": 1.8624, "step": 6089 }, { "epoch": 0.29736328125, "grad_norm": 0.25517216324806213, "learning_rate": 0.0004146191222649053, "loss": 1.8639, "step": 6090 }, { "epoch": 0.297412109375, "grad_norm": 0.26854339241981506, "learning_rate": 0.0004145916524655872, "loss": 1.8581, "step": 6091 }, { "epoch": 0.2974609375, "grad_norm": 0.3196415901184082, "learning_rate": 0.00041456417928315334, "loss": 1.8607, "step": 6092 }, { "epoch": 0.297509765625, "grad_norm": 0.29290688037872314, "learning_rate": 0.00041453670271826944, "loss": 1.8242, "step": 6093 }, { "epoch": 0.29755859375, "grad_norm": 0.24702514708042145, "learning_rate": 0.00041450922277160143, "loss": 1.8718, "step": 6094 }, { "epoch": 0.297607421875, "grad_norm": 0.24669113755226135, "learning_rate": 0.0004144817394438153, "loss": 1.8496, "step": 6095 }, { "epoch": 0.29765625, "grad_norm": 0.26367485523223877, "learning_rate": 0.0004144542527355772, "loss": 1.8414, "step": 6096 }, { "epoch": 0.297705078125, "grad_norm": 0.2694566547870636, "learning_rate": 0.00041442676264755326, "loss": 1.8345, "step": 6097 }, { "epoch": 0.29775390625, "grad_norm": 0.2507360875606537, "learning_rate": 0.00041439926918040965, "loss": 1.8482, "step": 6098 }, { "epoch": 0.297802734375, "grad_norm": 0.2740941047668457, "learning_rate": 0.0004143717723348128, "loss": 1.8371, "step": 6099 }, { "epoch": 0.2978515625, "grad_norm": 0.3117503225803375, "learning_rate": 0.0004143442721114291, "loss": 1.8628, "step": 6100 }, { "epoch": 0.297900390625, "grad_norm": 0.2775120437145233, "learning_rate": 0.00041431676851092506, "loss": 1.8325, "step": 6101 }, { "epoch": 0.29794921875, "grad_norm": 0.2584889829158783, "learning_rate": 0.00041428926153396704, "loss": 1.8628, "step": 6102 }, { "epoch": 0.297998046875, "grad_norm": 0.31933897733688354, "learning_rate": 0.000414261751181222, "loss": 1.8712, "step": 6103 }, { "epoch": 0.298046875, "grad_norm": 0.31070709228515625, "learning_rate": 0.00041423423745335654, "loss": 1.8429, "step": 6104 }, { "epoch": 0.298095703125, "grad_norm": 0.27514803409576416, "learning_rate": 0.0004142067203510374, "loss": 1.8525, "step": 6105 }, { "epoch": 0.29814453125, "grad_norm": 0.26670047640800476, "learning_rate": 0.00041417919987493164, "loss": 1.8492, "step": 6106 }, { "epoch": 0.298193359375, "grad_norm": 0.30431365966796875, "learning_rate": 0.0004141516760257061, "loss": 1.8441, "step": 6107 }, { "epoch": 0.2982421875, "grad_norm": 0.31823301315307617, "learning_rate": 0.00041412414880402795, "loss": 1.8587, "step": 6108 }, { "epoch": 0.298291015625, "grad_norm": 0.2558130919933319, "learning_rate": 0.0004140966182105642, "loss": 1.8254, "step": 6109 }, { "epoch": 0.29833984375, "grad_norm": 0.3210791349411011, "learning_rate": 0.00041406908424598224, "loss": 1.8327, "step": 6110 }, { "epoch": 0.298388671875, "grad_norm": 0.2865303158760071, "learning_rate": 0.00041404154691094923, "loss": 1.8499, "step": 6111 }, { "epoch": 0.2984375, "grad_norm": 0.2643422782421112, "learning_rate": 0.00041401400620613265, "loss": 1.8558, "step": 6112 }, { "epoch": 0.298486328125, "grad_norm": 0.31507280468940735, "learning_rate": 0.00041398646213219994, "loss": 1.8341, "step": 6113 }, { "epoch": 0.29853515625, "grad_norm": 0.26299259066581726, "learning_rate": 0.00041395891468981865, "loss": 1.8945, "step": 6114 }, { "epoch": 0.298583984375, "grad_norm": 0.28011730313301086, "learning_rate": 0.0004139313638796565, "loss": 1.8536, "step": 6115 }, { "epoch": 0.2986328125, "grad_norm": 0.2813335061073303, "learning_rate": 0.00041390380970238107, "loss": 1.874, "step": 6116 }, { "epoch": 0.298681640625, "grad_norm": 0.26229047775268555, "learning_rate": 0.0004138762521586602, "loss": 1.8592, "step": 6117 }, { "epoch": 0.29873046875, "grad_norm": 0.2676534354686737, "learning_rate": 0.0004138486912491618, "loss": 1.8538, "step": 6118 }, { "epoch": 0.298779296875, "grad_norm": 0.2913895845413208, "learning_rate": 0.0004138211269745539, "loss": 1.8736, "step": 6119 }, { "epoch": 0.298828125, "grad_norm": 0.27367472648620605, "learning_rate": 0.0004137935593355043, "loss": 1.8454, "step": 6120 }, { "epoch": 0.298876953125, "grad_norm": 0.2814062833786011, "learning_rate": 0.0004137659883326814, "loss": 1.8669, "step": 6121 }, { "epoch": 0.29892578125, "grad_norm": 0.3263419270515442, "learning_rate": 0.0004137384139667532, "loss": 1.8628, "step": 6122 }, { "epoch": 0.298974609375, "grad_norm": 0.3480551540851593, "learning_rate": 0.0004137108362383882, "loss": 1.8411, "step": 6123 }, { "epoch": 0.2990234375, "grad_norm": 0.25082162022590637, "learning_rate": 0.0004136832551482545, "loss": 1.8502, "step": 6124 }, { "epoch": 0.299072265625, "grad_norm": 0.2912696599960327, "learning_rate": 0.00041365567069702084, "loss": 1.8597, "step": 6125 }, { "epoch": 0.29912109375, "grad_norm": 0.3165445625782013, "learning_rate": 0.0004136280828853555, "loss": 1.8314, "step": 6126 }, { "epoch": 0.299169921875, "grad_norm": 0.24269258975982666, "learning_rate": 0.0004136004917139273, "loss": 1.8125, "step": 6127 }, { "epoch": 0.29921875, "grad_norm": 0.25039830803871155, "learning_rate": 0.00041357289718340473, "loss": 1.8651, "step": 6128 }, { "epoch": 0.299267578125, "grad_norm": 0.27934738993644714, "learning_rate": 0.00041354529929445665, "loss": 1.8786, "step": 6129 }, { "epoch": 0.29931640625, "grad_norm": 0.24848708510398865, "learning_rate": 0.00041351769804775205, "loss": 1.8275, "step": 6130 }, { "epoch": 0.299365234375, "grad_norm": 0.2219008505344391, "learning_rate": 0.0004134900934439597, "loss": 1.8495, "step": 6131 }, { "epoch": 0.2994140625, "grad_norm": 0.2828231751918793, "learning_rate": 0.00041346248548374865, "loss": 1.8715, "step": 6132 }, { "epoch": 0.299462890625, "grad_norm": 0.25512412190437317, "learning_rate": 0.0004134348741677881, "loss": 1.845, "step": 6133 }, { "epoch": 0.29951171875, "grad_norm": 0.2834383547306061, "learning_rate": 0.00041340725949674714, "loss": 1.873, "step": 6134 }, { "epoch": 0.299560546875, "grad_norm": 0.2680399715900421, "learning_rate": 0.000413379641471295, "loss": 1.8909, "step": 6135 }, { "epoch": 0.299609375, "grad_norm": 0.2620023787021637, "learning_rate": 0.0004133520200921011, "loss": 1.8484, "step": 6136 }, { "epoch": 0.299658203125, "grad_norm": 0.29474514722824097, "learning_rate": 0.0004133243953598348, "loss": 1.8542, "step": 6137 }, { "epoch": 0.29970703125, "grad_norm": 0.28165575861930847, "learning_rate": 0.0004132967672751656, "loss": 1.8889, "step": 6138 }, { "epoch": 0.299755859375, "grad_norm": 0.2722609043121338, "learning_rate": 0.0004132691358387633, "loss": 1.8371, "step": 6139 }, { "epoch": 0.2998046875, "grad_norm": 0.27508148550987244, "learning_rate": 0.0004132415010512973, "loss": 1.8449, "step": 6140 }, { "epoch": 0.299853515625, "grad_norm": 0.29411184787750244, "learning_rate": 0.00041321386291343754, "loss": 1.8554, "step": 6141 }, { "epoch": 0.29990234375, "grad_norm": 0.3124532997608185, "learning_rate": 0.00041318622142585374, "loss": 1.8567, "step": 6142 }, { "epoch": 0.299951171875, "grad_norm": 0.3310800790786743, "learning_rate": 0.0004131585765892158, "loss": 1.8283, "step": 6143 }, { "epoch": 0.3, "grad_norm": 0.3725448548793793, "learning_rate": 0.00041313092840419374, "loss": 1.859, "step": 6144 }, { "epoch": 0.300048828125, "grad_norm": 0.3349009156227112, "learning_rate": 0.00041310327687145773, "loss": 1.8564, "step": 6145 }, { "epoch": 0.30009765625, "grad_norm": 0.23935629427433014, "learning_rate": 0.0004130756219916779, "loss": 1.8503, "step": 6146 }, { "epoch": 0.300146484375, "grad_norm": 0.35409876704216003, "learning_rate": 0.0004130479637655243, "loss": 1.8338, "step": 6147 }, { "epoch": 0.3001953125, "grad_norm": 0.3483249545097351, "learning_rate": 0.00041302030219366755, "loss": 1.8464, "step": 6148 }, { "epoch": 0.300244140625, "grad_norm": 0.32822415232658386, "learning_rate": 0.00041299263727677773, "loss": 1.8419, "step": 6149 }, { "epoch": 0.30029296875, "grad_norm": 0.28622934222221375, "learning_rate": 0.00041296496901552563, "loss": 1.8594, "step": 6150 }, { "epoch": 0.300341796875, "grad_norm": 0.29281941056251526, "learning_rate": 0.00041293729741058156, "loss": 1.8674, "step": 6151 }, { "epoch": 0.300390625, "grad_norm": 0.3817397356033325, "learning_rate": 0.0004129096224626163, "loss": 1.849, "step": 6152 }, { "epoch": 0.300439453125, "grad_norm": 0.3327037990093231, "learning_rate": 0.0004128819441723006, "loss": 1.8566, "step": 6153 }, { "epoch": 0.30048828125, "grad_norm": 0.31139498949050903, "learning_rate": 0.00041285426254030513, "loss": 1.8696, "step": 6154 }, { "epoch": 0.300537109375, "grad_norm": 0.3104103207588196, "learning_rate": 0.0004128265775673009, "loss": 1.8778, "step": 6155 }, { "epoch": 0.3005859375, "grad_norm": 0.30181097984313965, "learning_rate": 0.00041279888925395875, "loss": 1.8502, "step": 6156 }, { "epoch": 0.300634765625, "grad_norm": 0.32499971985816956, "learning_rate": 0.0004127711976009499, "loss": 1.8632, "step": 6157 }, { "epoch": 0.30068359375, "grad_norm": 0.2356053590774536, "learning_rate": 0.00041274350260894543, "loss": 1.8719, "step": 6158 }, { "epoch": 0.300732421875, "grad_norm": 0.26056089997291565, "learning_rate": 0.00041271580427861644, "loss": 1.8738, "step": 6159 }, { "epoch": 0.30078125, "grad_norm": 0.31266844272613525, "learning_rate": 0.0004126881026106343, "loss": 1.8693, "step": 6160 }, { "epoch": 0.300830078125, "grad_norm": 0.23784783482551575, "learning_rate": 0.00041266039760567036, "loss": 1.8134, "step": 6161 }, { "epoch": 0.30087890625, "grad_norm": 0.23153391480445862, "learning_rate": 0.0004126326892643961, "loss": 1.8559, "step": 6162 }, { "epoch": 0.300927734375, "grad_norm": 0.2865602672100067, "learning_rate": 0.00041260497758748303, "loss": 1.814, "step": 6163 }, { "epoch": 0.3009765625, "grad_norm": 0.26721957325935364, "learning_rate": 0.00041257726257560293, "loss": 1.8747, "step": 6164 }, { "epoch": 0.301025390625, "grad_norm": 0.2646118700504303, "learning_rate": 0.00041254954422942716, "loss": 1.8558, "step": 6165 }, { "epoch": 0.30107421875, "grad_norm": 0.2711467146873474, "learning_rate": 0.0004125218225496277, "loss": 1.8525, "step": 6166 }, { "epoch": 0.301123046875, "grad_norm": 0.24042321741580963, "learning_rate": 0.00041249409753687643, "loss": 1.8431, "step": 6167 }, { "epoch": 0.301171875, "grad_norm": 0.258758544921875, "learning_rate": 0.0004124663691918453, "loss": 1.8592, "step": 6168 }, { "epoch": 0.301220703125, "grad_norm": 0.344786673784256, "learning_rate": 0.00041243863751520615, "loss": 1.8593, "step": 6169 }, { "epoch": 0.30126953125, "grad_norm": 0.3356286585330963, "learning_rate": 0.0004124109025076313, "loss": 1.8465, "step": 6170 }, { "epoch": 0.301318359375, "grad_norm": 0.3028925359249115, "learning_rate": 0.00041238316416979283, "loss": 1.8672, "step": 6171 }, { "epoch": 0.3013671875, "grad_norm": 0.29463574290275574, "learning_rate": 0.00041235542250236295, "loss": 1.848, "step": 6172 }, { "epoch": 0.301416015625, "grad_norm": 0.3728436231613159, "learning_rate": 0.00041232767750601406, "loss": 1.8606, "step": 6173 }, { "epoch": 0.30146484375, "grad_norm": 0.3190580904483795, "learning_rate": 0.00041229992918141863, "loss": 1.8572, "step": 6174 }, { "epoch": 0.301513671875, "grad_norm": 0.3138558864593506, "learning_rate": 0.000412272177529249, "loss": 1.852, "step": 6175 }, { "epoch": 0.3015625, "grad_norm": 0.3047286570072174, "learning_rate": 0.000412244422550178, "loss": 1.864, "step": 6176 }, { "epoch": 0.301611328125, "grad_norm": 0.27760347723960876, "learning_rate": 0.0004122166642448781, "loss": 1.84, "step": 6177 }, { "epoch": 0.30166015625, "grad_norm": 0.36652883887290955, "learning_rate": 0.00041218890261402206, "loss": 1.8415, "step": 6178 }, { "epoch": 0.301708984375, "grad_norm": 0.27158215641975403, "learning_rate": 0.00041216113765828286, "loss": 1.8322, "step": 6179 }, { "epoch": 0.3017578125, "grad_norm": 0.29149821400642395, "learning_rate": 0.00041213336937833317, "loss": 1.8505, "step": 6180 }, { "epoch": 0.301806640625, "grad_norm": 0.3871263563632965, "learning_rate": 0.00041210559777484613, "loss": 1.8446, "step": 6181 }, { "epoch": 0.30185546875, "grad_norm": 0.2649684250354767, "learning_rate": 0.00041207782284849475, "loss": 1.8522, "step": 6182 }, { "epoch": 0.301904296875, "grad_norm": 0.7599014639854431, "learning_rate": 0.0004120500445999523, "loss": 1.8526, "step": 6183 }, { "epoch": 0.301953125, "grad_norm": 0.27656152844429016, "learning_rate": 0.00041202226302989193, "loss": 1.8576, "step": 6184 }, { "epoch": 0.302001953125, "grad_norm": 0.26582464575767517, "learning_rate": 0.0004119944781389869, "loss": 1.8349, "step": 6185 }, { "epoch": 0.30205078125, "grad_norm": 0.33160635828971863, "learning_rate": 0.00041196668992791064, "loss": 1.8351, "step": 6186 }, { "epoch": 0.302099609375, "grad_norm": 0.31273674964904785, "learning_rate": 0.00041193889839733654, "loss": 1.8465, "step": 6187 }, { "epoch": 0.3021484375, "grad_norm": 0.307248592376709, "learning_rate": 0.0004119111035479383, "loss": 1.8713, "step": 6188 }, { "epoch": 0.302197265625, "grad_norm": 0.33551472425460815, "learning_rate": 0.00041188330538038937, "loss": 1.861, "step": 6189 }, { "epoch": 0.30224609375, "grad_norm": 0.2798084020614624, "learning_rate": 0.0004118555038953636, "loss": 1.8404, "step": 6190 }, { "epoch": 0.302294921875, "grad_norm": 0.2580626606941223, "learning_rate": 0.00041182769909353477, "loss": 1.8459, "step": 6191 }, { "epoch": 0.30234375, "grad_norm": 0.30497461557388306, "learning_rate": 0.00041179989097557676, "loss": 1.8757, "step": 6192 }, { "epoch": 0.302392578125, "grad_norm": 0.2621091902256012, "learning_rate": 0.0004117720795421635, "loss": 1.8591, "step": 6193 }, { "epoch": 0.30244140625, "grad_norm": 0.2563592791557312, "learning_rate": 0.00041174426479396894, "loss": 1.8586, "step": 6194 }, { "epoch": 0.302490234375, "grad_norm": 0.2705029845237732, "learning_rate": 0.0004117164467316673, "loss": 1.8446, "step": 6195 }, { "epoch": 0.3025390625, "grad_norm": 0.24134087562561035, "learning_rate": 0.00041168862535593273, "loss": 1.8859, "step": 6196 }, { "epoch": 0.302587890625, "grad_norm": 0.2856534421443939, "learning_rate": 0.00041166080066743947, "loss": 1.8502, "step": 6197 }, { "epoch": 0.30263671875, "grad_norm": 0.29932454228401184, "learning_rate": 0.00041163297266686203, "loss": 1.8519, "step": 6198 }, { "epoch": 0.302685546875, "grad_norm": 0.2804478704929352, "learning_rate": 0.00041160514135487453, "loss": 1.8703, "step": 6199 }, { "epoch": 0.302734375, "grad_norm": 0.2613031566143036, "learning_rate": 0.0004115773067321518, "loss": 1.8462, "step": 6200 }, { "epoch": 0.302783203125, "grad_norm": 0.2356191873550415, "learning_rate": 0.0004115494687993684, "loss": 1.8576, "step": 6201 }, { "epoch": 0.30283203125, "grad_norm": 0.27307969331741333, "learning_rate": 0.0004115216275571988, "loss": 1.8479, "step": 6202 }, { "epoch": 0.302880859375, "grad_norm": 0.3097607493400574, "learning_rate": 0.0004114937830063179, "loss": 1.8574, "step": 6203 }, { "epoch": 0.3029296875, "grad_norm": 0.3091468811035156, "learning_rate": 0.00041146593514740056, "loss": 1.8526, "step": 6204 }, { "epoch": 0.302978515625, "grad_norm": 0.2896428108215332, "learning_rate": 0.0004114380839811216, "loss": 1.8362, "step": 6205 }, { "epoch": 0.30302734375, "grad_norm": 0.2398175150156021, "learning_rate": 0.00041141022950815604, "loss": 1.8615, "step": 6206 }, { "epoch": 0.303076171875, "grad_norm": 0.27148374915122986, "learning_rate": 0.000411382371729179, "loss": 1.8547, "step": 6207 }, { "epoch": 0.303125, "grad_norm": 0.2991926670074463, "learning_rate": 0.0004113545106448657, "loss": 1.8274, "step": 6208 }, { "epoch": 0.303173828125, "grad_norm": 0.29041022062301636, "learning_rate": 0.00041132664625589125, "loss": 1.8652, "step": 6209 }, { "epoch": 0.30322265625, "grad_norm": 0.2621786594390869, "learning_rate": 0.0004112987785629309, "loss": 1.8259, "step": 6210 }, { "epoch": 0.303271484375, "grad_norm": 0.22449764609336853, "learning_rate": 0.00041127090756666026, "loss": 1.8245, "step": 6211 }, { "epoch": 0.3033203125, "grad_norm": 0.27975133061408997, "learning_rate": 0.00041124303326775465, "loss": 1.8571, "step": 6212 }, { "epoch": 0.303369140625, "grad_norm": 0.28678837418556213, "learning_rate": 0.00041121515566688963, "loss": 1.8504, "step": 6213 }, { "epoch": 0.30341796875, "grad_norm": 0.21564055979251862, "learning_rate": 0.000411187274764741, "loss": 1.848, "step": 6214 }, { "epoch": 0.303466796875, "grad_norm": 0.32435742020606995, "learning_rate": 0.0004111593905619842, "loss": 1.842, "step": 6215 }, { "epoch": 0.303515625, "grad_norm": 0.34414613246917725, "learning_rate": 0.00041113150305929525, "loss": 1.852, "step": 6216 }, { "epoch": 0.303564453125, "grad_norm": 0.25369754433631897, "learning_rate": 0.00041110361225735, "loss": 1.8377, "step": 6217 }, { "epoch": 0.30361328125, "grad_norm": 0.2680782079696655, "learning_rate": 0.00041107571815682427, "loss": 1.8537, "step": 6218 }, { "epoch": 0.303662109375, "grad_norm": 0.2980824410915375, "learning_rate": 0.0004110478207583943, "loss": 1.835, "step": 6219 }, { "epoch": 0.3037109375, "grad_norm": 0.22609733045101166, "learning_rate": 0.00041101992006273606, "loss": 1.8527, "step": 6220 }, { "epoch": 0.303759765625, "grad_norm": 0.29431235790252686, "learning_rate": 0.0004109920160705257, "loss": 1.8712, "step": 6221 }, { "epoch": 0.30380859375, "grad_norm": 0.31014207005500793, "learning_rate": 0.0004109641087824396, "loss": 1.8428, "step": 6222 }, { "epoch": 0.303857421875, "grad_norm": 0.25480005145072937, "learning_rate": 0.0004109361981991542, "loss": 1.8588, "step": 6223 }, { "epoch": 0.30390625, "grad_norm": 0.24809370934963226, "learning_rate": 0.00041090828432134574, "loss": 1.8655, "step": 6224 }, { "epoch": 0.303955078125, "grad_norm": 0.2953343987464905, "learning_rate": 0.00041088036714969083, "loss": 1.8428, "step": 6225 }, { "epoch": 0.30400390625, "grad_norm": 0.30546867847442627, "learning_rate": 0.00041085244668486603, "loss": 1.8522, "step": 6226 }, { "epoch": 0.304052734375, "grad_norm": 0.2666206955909729, "learning_rate": 0.00041082452292754806, "loss": 1.8353, "step": 6227 }, { "epoch": 0.3041015625, "grad_norm": 0.3496520221233368, "learning_rate": 0.0004107965958784137, "loss": 1.8517, "step": 6228 }, { "epoch": 0.304150390625, "grad_norm": 0.2437373250722885, "learning_rate": 0.0004107686655381397, "loss": 1.8387, "step": 6229 }, { "epoch": 0.30419921875, "grad_norm": 0.3015970289707184, "learning_rate": 0.00041074073190740305, "loss": 1.8222, "step": 6230 }, { "epoch": 0.304248046875, "grad_norm": 0.263644814491272, "learning_rate": 0.00041071279498688067, "loss": 1.8446, "step": 6231 }, { "epoch": 0.304296875, "grad_norm": 0.31404659152030945, "learning_rate": 0.0004106848547772497, "loss": 1.8735, "step": 6232 }, { "epoch": 0.304345703125, "grad_norm": 0.30374857783317566, "learning_rate": 0.0004106569112791873, "loss": 1.8248, "step": 6233 }, { "epoch": 0.30439453125, "grad_norm": 0.23654481768608093, "learning_rate": 0.0004106289644933706, "loss": 1.8356, "step": 6234 }, { "epoch": 0.304443359375, "grad_norm": 0.3088642358779907, "learning_rate": 0.000410601014420477, "loss": 1.8505, "step": 6235 }, { "epoch": 0.3044921875, "grad_norm": 0.23553788661956787, "learning_rate": 0.0004105730610611839, "loss": 1.8302, "step": 6236 }, { "epoch": 0.304541015625, "grad_norm": 0.23991809785366058, "learning_rate": 0.0004105451044161687, "loss": 1.8433, "step": 6237 }, { "epoch": 0.30458984375, "grad_norm": 0.3397555947303772, "learning_rate": 0.000410517144486109, "loss": 1.8632, "step": 6238 }, { "epoch": 0.304638671875, "grad_norm": 0.2590096890926361, "learning_rate": 0.00041048918127168244, "loss": 1.8507, "step": 6239 }, { "epoch": 0.3046875, "grad_norm": 0.27116915583610535, "learning_rate": 0.0004104612147735668, "loss": 1.843, "step": 6240 }, { "epoch": 0.304736328125, "grad_norm": 0.2801123857498169, "learning_rate": 0.0004104332449924397, "loss": 1.8596, "step": 6241 }, { "epoch": 0.30478515625, "grad_norm": 0.3153025209903717, "learning_rate": 0.00041040527192897914, "loss": 1.86, "step": 6242 }, { "epoch": 0.304833984375, "grad_norm": 0.30410706996917725, "learning_rate": 0.000410377295583863, "loss": 1.8699, "step": 6243 }, { "epoch": 0.3048828125, "grad_norm": 0.2345842868089676, "learning_rate": 0.00041034931595776934, "loss": 1.8457, "step": 6244 }, { "epoch": 0.304931640625, "grad_norm": 0.3111841678619385, "learning_rate": 0.0004103213330513763, "loss": 1.8296, "step": 6245 }, { "epoch": 0.30498046875, "grad_norm": 0.2935961186885834, "learning_rate": 0.00041029334686536206, "loss": 1.823, "step": 6246 }, { "epoch": 0.305029296875, "grad_norm": 0.27022168040275574, "learning_rate": 0.00041026535740040483, "loss": 1.8483, "step": 6247 }, { "epoch": 0.305078125, "grad_norm": 0.27769479155540466, "learning_rate": 0.00041023736465718296, "loss": 1.8247, "step": 6248 }, { "epoch": 0.305126953125, "grad_norm": 0.21699152886867523, "learning_rate": 0.000410209368636375, "loss": 1.8477, "step": 6249 }, { "epoch": 0.30517578125, "grad_norm": 0.29143211245536804, "learning_rate": 0.0004101813693386593, "loss": 1.8482, "step": 6250 }, { "epoch": 0.305224609375, "grad_norm": 0.28834667801856995, "learning_rate": 0.00041015336676471453, "loss": 1.8503, "step": 6251 }, { "epoch": 0.3052734375, "grad_norm": 0.2826392948627472, "learning_rate": 0.00041012536091521927, "loss": 1.8497, "step": 6252 }, { "epoch": 0.305322265625, "grad_norm": 0.2710035741329193, "learning_rate": 0.0004100973517908524, "loss": 1.8477, "step": 6253 }, { "epoch": 0.30537109375, "grad_norm": 0.21848994493484497, "learning_rate": 0.00041006933939229264, "loss": 1.8607, "step": 6254 }, { "epoch": 0.305419921875, "grad_norm": 0.22789454460144043, "learning_rate": 0.00041004132372021897, "loss": 1.8662, "step": 6255 }, { "epoch": 0.30546875, "grad_norm": 0.273720920085907, "learning_rate": 0.0004100133047753103, "loss": 1.8772, "step": 6256 }, { "epoch": 0.305517578125, "grad_norm": 0.20769865810871124, "learning_rate": 0.00040998528255824576, "loss": 1.859, "step": 6257 }, { "epoch": 0.30556640625, "grad_norm": 0.27116891741752625, "learning_rate": 0.00040995725706970436, "loss": 1.8478, "step": 6258 }, { "epoch": 0.305615234375, "grad_norm": 0.32274994254112244, "learning_rate": 0.0004099292283103655, "loss": 1.8617, "step": 6259 }, { "epoch": 0.3056640625, "grad_norm": 0.3655678629875183, "learning_rate": 0.00040990119628090836, "loss": 1.8382, "step": 6260 }, { "epoch": 0.305712890625, "grad_norm": 0.3404575288295746, "learning_rate": 0.0004098731609820124, "loss": 1.8382, "step": 6261 }, { "epoch": 0.30576171875, "grad_norm": 0.2399924099445343, "learning_rate": 0.00040984512241435696, "loss": 1.863, "step": 6262 }, { "epoch": 0.305810546875, "grad_norm": 0.2971963882446289, "learning_rate": 0.0004098170805786216, "loss": 1.8365, "step": 6263 }, { "epoch": 0.305859375, "grad_norm": 0.2928027808666229, "learning_rate": 0.0004097890354754861, "loss": 1.8614, "step": 6264 }, { "epoch": 0.305908203125, "grad_norm": 0.3257724642753601, "learning_rate": 0.00040976098710562994, "loss": 1.8582, "step": 6265 }, { "epoch": 0.30595703125, "grad_norm": 0.3653707206249237, "learning_rate": 0.00040973293546973303, "loss": 1.8563, "step": 6266 }, { "epoch": 0.306005859375, "grad_norm": 0.2760549485683441, "learning_rate": 0.00040970488056847517, "loss": 1.8632, "step": 6267 }, { "epoch": 0.3060546875, "grad_norm": 0.34538331627845764, "learning_rate": 0.0004096768224025364, "loss": 1.8556, "step": 6268 }, { "epoch": 0.306103515625, "grad_norm": 0.3341052830219269, "learning_rate": 0.0004096487609725965, "loss": 1.8259, "step": 6269 }, { "epoch": 0.30615234375, "grad_norm": 0.298459529876709, "learning_rate": 0.0004096206962793358, "loss": 1.8263, "step": 6270 }, { "epoch": 0.306201171875, "grad_norm": 0.38439372181892395, "learning_rate": 0.00040959262832343437, "loss": 1.8649, "step": 6271 }, { "epoch": 0.30625, "grad_norm": 0.3087610900402069, "learning_rate": 0.00040956455710557235, "loss": 1.8459, "step": 6272 }, { "epoch": 0.306298828125, "grad_norm": 0.3294806182384491, "learning_rate": 0.0004095364826264303, "loss": 1.8686, "step": 6273 }, { "epoch": 0.30634765625, "grad_norm": 0.3986267149448395, "learning_rate": 0.0004095084048866885, "loss": 1.851, "step": 6274 }, { "epoch": 0.306396484375, "grad_norm": 0.2861795425415039, "learning_rate": 0.0004094803238870273, "loss": 1.8483, "step": 6275 }, { "epoch": 0.3064453125, "grad_norm": 0.3104221224784851, "learning_rate": 0.00040945223962812754, "loss": 1.8408, "step": 6276 }, { "epoch": 0.306494140625, "grad_norm": 0.2802399694919586, "learning_rate": 0.0004094241521106698, "loss": 1.8477, "step": 6277 }, { "epoch": 0.30654296875, "grad_norm": 0.3064103126525879, "learning_rate": 0.0004093960613353346, "loss": 1.8345, "step": 6278 }, { "epoch": 0.306591796875, "grad_norm": 0.35616230964660645, "learning_rate": 0.00040936796730280296, "loss": 1.8516, "step": 6279 }, { "epoch": 0.306640625, "grad_norm": 0.288698673248291, "learning_rate": 0.00040933987001375564, "loss": 1.843, "step": 6280 }, { "epoch": 0.306689453125, "grad_norm": 0.29447105526924133, "learning_rate": 0.00040931176946887364, "loss": 1.8283, "step": 6281 }, { "epoch": 0.30673828125, "grad_norm": 0.31311002373695374, "learning_rate": 0.00040928366566883806, "loss": 1.8661, "step": 6282 }, { "epoch": 0.306787109375, "grad_norm": 0.3258682191371918, "learning_rate": 0.00040925555861432995, "loss": 1.8594, "step": 6283 }, { "epoch": 0.3068359375, "grad_norm": 0.25742173194885254, "learning_rate": 0.0004092274483060305, "loss": 1.844, "step": 6284 }, { "epoch": 0.306884765625, "grad_norm": 0.32944101095199585, "learning_rate": 0.00040919933474462103, "loss": 1.8498, "step": 6285 }, { "epoch": 0.30693359375, "grad_norm": 0.3452976644039154, "learning_rate": 0.0004091712179307828, "loss": 1.8738, "step": 6286 }, { "epoch": 0.306982421875, "grad_norm": 0.3207565248012543, "learning_rate": 0.0004091430978651974, "loss": 1.8508, "step": 6287 }, { "epoch": 0.30703125, "grad_norm": 0.2750515937805176, "learning_rate": 0.0004091149745485461, "loss": 1.8427, "step": 6288 }, { "epoch": 0.307080078125, "grad_norm": 0.3491295874118805, "learning_rate": 0.00040908684798151077, "loss": 1.8295, "step": 6289 }, { "epoch": 0.30712890625, "grad_norm": 0.2746192216873169, "learning_rate": 0.0004090587181647729, "loss": 1.8436, "step": 6290 }, { "epoch": 0.307177734375, "grad_norm": 0.3729904592037201, "learning_rate": 0.00040903058509901433, "loss": 1.8287, "step": 6291 }, { "epoch": 0.3072265625, "grad_norm": 0.26731449365615845, "learning_rate": 0.00040900244878491683, "loss": 1.8575, "step": 6292 }, { "epoch": 0.307275390625, "grad_norm": 0.2789233326911926, "learning_rate": 0.00040897430922316237, "loss": 1.8455, "step": 6293 }, { "epoch": 0.30732421875, "grad_norm": 0.26293110847473145, "learning_rate": 0.00040894616641443284, "loss": 1.8814, "step": 6294 }, { "epoch": 0.307373046875, "grad_norm": 0.2526845335960388, "learning_rate": 0.00040891802035941037, "loss": 1.8281, "step": 6295 }, { "epoch": 0.307421875, "grad_norm": 0.26974695920944214, "learning_rate": 0.0004088898710587772, "loss": 1.8333, "step": 6296 }, { "epoch": 0.307470703125, "grad_norm": 0.25062504410743713, "learning_rate": 0.0004088617185132153, "loss": 1.8383, "step": 6297 }, { "epoch": 0.30751953125, "grad_norm": 0.2914745509624481, "learning_rate": 0.0004088335627234071, "loss": 1.8406, "step": 6298 }, { "epoch": 0.307568359375, "grad_norm": 0.29561319947242737, "learning_rate": 0.00040880540369003505, "loss": 1.8658, "step": 6299 }, { "epoch": 0.3076171875, "grad_norm": 0.24185234308242798, "learning_rate": 0.00040877724141378147, "loss": 1.8382, "step": 6300 }, { "epoch": 0.307666015625, "grad_norm": 0.31656214594841003, "learning_rate": 0.0004087490758953291, "loss": 1.8462, "step": 6301 }, { "epoch": 0.30771484375, "grad_norm": 0.2818854749202728, "learning_rate": 0.0004087209071353604, "loss": 1.8498, "step": 6302 }, { "epoch": 0.307763671875, "grad_norm": 0.25587648153305054, "learning_rate": 0.00040869273513455793, "loss": 1.8744, "step": 6303 }, { "epoch": 0.3078125, "grad_norm": 0.2579011619091034, "learning_rate": 0.0004086645598936047, "loss": 1.8498, "step": 6304 }, { "epoch": 0.307861328125, "grad_norm": 0.24662546813488007, "learning_rate": 0.00040863638141318357, "loss": 1.8466, "step": 6305 }, { "epoch": 0.30791015625, "grad_norm": 0.2744612991809845, "learning_rate": 0.00040860819969397726, "loss": 1.8495, "step": 6306 }, { "epoch": 0.307958984375, "grad_norm": 0.28027254343032837, "learning_rate": 0.0004085800147366689, "loss": 1.8366, "step": 6307 }, { "epoch": 0.3080078125, "grad_norm": 0.26716023683547974, "learning_rate": 0.0004085518265419416, "loss": 1.833, "step": 6308 }, { "epoch": 0.308056640625, "grad_norm": 0.2798502445220947, "learning_rate": 0.00040852363511047847, "loss": 1.8155, "step": 6309 }, { "epoch": 0.30810546875, "grad_norm": 0.2583712041378021, "learning_rate": 0.0004084954404429629, "loss": 1.8664, "step": 6310 }, { "epoch": 0.308154296875, "grad_norm": 0.2527560889720917, "learning_rate": 0.0004084672425400779, "loss": 1.8626, "step": 6311 }, { "epoch": 0.308203125, "grad_norm": 0.26247936487197876, "learning_rate": 0.0004084390414025071, "loss": 1.8303, "step": 6312 }, { "epoch": 0.308251953125, "grad_norm": 0.2381865531206131, "learning_rate": 0.00040841083703093393, "loss": 1.8114, "step": 6313 }, { "epoch": 0.30830078125, "grad_norm": 0.2547239661216736, "learning_rate": 0.00040838262942604193, "loss": 1.8247, "step": 6314 }, { "epoch": 0.308349609375, "grad_norm": 0.23467864096164703, "learning_rate": 0.0004083544185885148, "loss": 1.8543, "step": 6315 }, { "epoch": 0.3083984375, "grad_norm": 0.2665019631385803, "learning_rate": 0.0004083262045190362, "loss": 1.8371, "step": 6316 }, { "epoch": 0.308447265625, "grad_norm": 0.3076275587081909, "learning_rate": 0.0004082979872182899, "loss": 1.8263, "step": 6317 }, { "epoch": 0.30849609375, "grad_norm": 0.33074817061424255, "learning_rate": 0.0004082697666869598, "loss": 1.8453, "step": 6318 }, { "epoch": 0.308544921875, "grad_norm": 0.27391475439071655, "learning_rate": 0.0004082415429257298, "loss": 1.8481, "step": 6319 }, { "epoch": 0.30859375, "grad_norm": 0.26532337069511414, "learning_rate": 0.00040821331593528395, "loss": 1.8565, "step": 6320 }, { "epoch": 0.308642578125, "grad_norm": 0.29897865653038025, "learning_rate": 0.00040818508571630643, "loss": 1.8377, "step": 6321 }, { "epoch": 0.30869140625, "grad_norm": 0.30613401532173157, "learning_rate": 0.00040815685226948135, "loss": 1.8605, "step": 6322 }, { "epoch": 0.308740234375, "grad_norm": 0.2598994970321655, "learning_rate": 0.00040812861559549294, "loss": 1.8512, "step": 6323 }, { "epoch": 0.3087890625, "grad_norm": 0.2389792948961258, "learning_rate": 0.00040810037569502554, "loss": 1.8435, "step": 6324 }, { "epoch": 0.308837890625, "grad_norm": 0.25436192750930786, "learning_rate": 0.00040807213256876363, "loss": 1.8611, "step": 6325 }, { "epoch": 0.30888671875, "grad_norm": 0.2824532091617584, "learning_rate": 0.00040804388621739176, "loss": 1.8141, "step": 6326 }, { "epoch": 0.308935546875, "grad_norm": 0.24748541414737701, "learning_rate": 0.00040801563664159433, "loss": 1.8512, "step": 6327 }, { "epoch": 0.308984375, "grad_norm": 0.2868073284626007, "learning_rate": 0.0004079873838420562, "loss": 1.8341, "step": 6328 }, { "epoch": 0.309033203125, "grad_norm": 0.34076249599456787, "learning_rate": 0.0004079591278194619, "loss": 1.8473, "step": 6329 }, { "epoch": 0.30908203125, "grad_norm": 0.25941312313079834, "learning_rate": 0.00040793086857449627, "loss": 1.8466, "step": 6330 }, { "epoch": 0.309130859375, "grad_norm": 0.30895960330963135, "learning_rate": 0.0004079026061078443, "loss": 1.8435, "step": 6331 }, { "epoch": 0.3091796875, "grad_norm": 0.3660851716995239, "learning_rate": 0.00040787434042019086, "loss": 1.857, "step": 6332 }, { "epoch": 0.309228515625, "grad_norm": 0.32556384801864624, "learning_rate": 0.000407846071512221, "loss": 1.8357, "step": 6333 }, { "epoch": 0.30927734375, "grad_norm": 0.2395176738500595, "learning_rate": 0.00040781779938462, "loss": 1.832, "step": 6334 }, { "epoch": 0.309326171875, "grad_norm": 0.31385308504104614, "learning_rate": 0.00040778952403807285, "loss": 1.8711, "step": 6335 }, { "epoch": 0.309375, "grad_norm": 0.2858041524887085, "learning_rate": 0.00040776124547326495, "loss": 1.8531, "step": 6336 }, { "epoch": 0.309423828125, "grad_norm": 0.22030116617679596, "learning_rate": 0.0004077329636908815, "loss": 1.8243, "step": 6337 }, { "epoch": 0.30947265625, "grad_norm": 0.29410579800605774, "learning_rate": 0.0004077046786916081, "loss": 1.8533, "step": 6338 }, { "epoch": 0.309521484375, "grad_norm": 0.25772714614868164, "learning_rate": 0.0004076763904761302, "loss": 1.8349, "step": 6339 }, { "epoch": 0.3095703125, "grad_norm": 0.2654486894607544, "learning_rate": 0.0004076480990451334, "loss": 1.8571, "step": 6340 }, { "epoch": 0.309619140625, "grad_norm": 0.2629469931125641, "learning_rate": 0.00040761980439930337, "loss": 1.8422, "step": 6341 }, { "epoch": 0.30966796875, "grad_norm": 0.25443315505981445, "learning_rate": 0.00040759150653932574, "loss": 1.8469, "step": 6342 }, { "epoch": 0.309716796875, "grad_norm": 0.2952684760093689, "learning_rate": 0.00040756320546588655, "loss": 1.8619, "step": 6343 }, { "epoch": 0.309765625, "grad_norm": 0.2928388714790344, "learning_rate": 0.0004075349011796715, "loss": 1.8551, "step": 6344 }, { "epoch": 0.309814453125, "grad_norm": 0.30483511090278625, "learning_rate": 0.0004075065936813667, "loss": 1.8429, "step": 6345 }, { "epoch": 0.30986328125, "grad_norm": 0.3217712640762329, "learning_rate": 0.00040747828297165806, "loss": 1.8402, "step": 6346 }, { "epoch": 0.309912109375, "grad_norm": 0.2539752721786499, "learning_rate": 0.0004074499690512319, "loss": 1.8568, "step": 6347 }, { "epoch": 0.3099609375, "grad_norm": 0.26583582162857056, "learning_rate": 0.0004074216519207742, "loss": 1.8333, "step": 6348 }, { "epoch": 0.310009765625, "grad_norm": 0.24406324326992035, "learning_rate": 0.0004073933315809715, "loss": 1.8419, "step": 6349 }, { "epoch": 0.31005859375, "grad_norm": 0.2214704155921936, "learning_rate": 0.00040736500803250996, "loss": 1.8323, "step": 6350 }, { "epoch": 0.310107421875, "grad_norm": 0.25889086723327637, "learning_rate": 0.0004073366812760761, "loss": 1.8457, "step": 6351 }, { "epoch": 0.31015625, "grad_norm": 0.2432575672864914, "learning_rate": 0.0004073083513123566, "loss": 1.8601, "step": 6352 }, { "epoch": 0.310205078125, "grad_norm": 0.24270957708358765, "learning_rate": 0.0004072800181420377, "loss": 1.8495, "step": 6353 }, { "epoch": 0.31025390625, "grad_norm": 0.26411202549934387, "learning_rate": 0.0004072516817658065, "loss": 1.8842, "step": 6354 }, { "epoch": 0.310302734375, "grad_norm": 0.3737993538379669, "learning_rate": 0.0004072233421843494, "loss": 1.8618, "step": 6355 }, { "epoch": 0.3103515625, "grad_norm": 0.47415876388549805, "learning_rate": 0.0004071949993983534, "loss": 1.8646, "step": 6356 }, { "epoch": 0.310400390625, "grad_norm": 0.3845615088939667, "learning_rate": 0.00040716665340850545, "loss": 1.8437, "step": 6357 }, { "epoch": 0.31044921875, "grad_norm": 0.24837075173854828, "learning_rate": 0.00040713830421549235, "loss": 1.8171, "step": 6358 }, { "epoch": 0.310498046875, "grad_norm": 0.34345704317092896, "learning_rate": 0.0004071099518200014, "loss": 1.8575, "step": 6359 }, { "epoch": 0.310546875, "grad_norm": 0.3468643128871918, "learning_rate": 0.00040708159622271963, "loss": 1.8398, "step": 6360 }, { "epoch": 0.310595703125, "grad_norm": 0.2624472677707672, "learning_rate": 0.0004070532374243342, "loss": 1.8557, "step": 6361 }, { "epoch": 0.31064453125, "grad_norm": 0.3476787805557251, "learning_rate": 0.00040702487542553244, "loss": 1.8127, "step": 6362 }, { "epoch": 0.310693359375, "grad_norm": 0.26864826679229736, "learning_rate": 0.0004069965102270019, "loss": 1.8216, "step": 6363 }, { "epoch": 0.3107421875, "grad_norm": 0.3345113694667816, "learning_rate": 0.0004069681418294298, "loss": 1.849, "step": 6364 }, { "epoch": 0.310791015625, "grad_norm": 0.2738223373889923, "learning_rate": 0.00040693977023350376, "loss": 1.8452, "step": 6365 }, { "epoch": 0.31083984375, "grad_norm": 0.2417435646057129, "learning_rate": 0.00040691139543991143, "loss": 1.8385, "step": 6366 }, { "epoch": 0.310888671875, "grad_norm": 0.274847149848938, "learning_rate": 0.00040688301744934034, "loss": 1.8438, "step": 6367 }, { "epoch": 0.3109375, "grad_norm": 0.23758383095264435, "learning_rate": 0.0004068546362624784, "loss": 1.8514, "step": 6368 }, { "epoch": 0.310986328125, "grad_norm": 0.2735053300857544, "learning_rate": 0.00040682625188001353, "loss": 1.8433, "step": 6369 }, { "epoch": 0.31103515625, "grad_norm": 0.3053426146507263, "learning_rate": 0.0004067978643026335, "loss": 1.8556, "step": 6370 }, { "epoch": 0.311083984375, "grad_norm": 0.21531519293785095, "learning_rate": 0.0004067694735310263, "loss": 1.8524, "step": 6371 }, { "epoch": 0.3111328125, "grad_norm": 0.26253679394721985, "learning_rate": 0.00040674107956588005, "loss": 1.8633, "step": 6372 }, { "epoch": 0.311181640625, "grad_norm": 0.241708442568779, "learning_rate": 0.0004067126824078829, "loss": 1.8645, "step": 6373 }, { "epoch": 0.31123046875, "grad_norm": 0.21452590823173523, "learning_rate": 0.0004066842820577231, "loss": 1.8684, "step": 6374 }, { "epoch": 0.311279296875, "grad_norm": 0.22409915924072266, "learning_rate": 0.0004066558785160889, "loss": 1.8617, "step": 6375 }, { "epoch": 0.311328125, "grad_norm": 0.2529079020023346, "learning_rate": 0.0004066274717836688, "loss": 1.823, "step": 6376 }, { "epoch": 0.311376953125, "grad_norm": 0.3364427089691162, "learning_rate": 0.0004065990618611511, "loss": 1.8744, "step": 6377 }, { "epoch": 0.31142578125, "grad_norm": 0.3362365961074829, "learning_rate": 0.0004065706487492244, "loss": 1.8653, "step": 6378 }, { "epoch": 0.311474609375, "grad_norm": 0.2290637493133545, "learning_rate": 0.00040654223244857734, "loss": 1.8455, "step": 6379 }, { "epoch": 0.3115234375, "grad_norm": 0.29013505578041077, "learning_rate": 0.00040651381295989857, "loss": 1.8587, "step": 6380 }, { "epoch": 0.311572265625, "grad_norm": 0.31632453203201294, "learning_rate": 0.0004064853902838769, "loss": 1.8595, "step": 6381 }, { "epoch": 0.31162109375, "grad_norm": 0.269570916891098, "learning_rate": 0.0004064569644212012, "loss": 1.8622, "step": 6382 }, { "epoch": 0.311669921875, "grad_norm": 0.26879236102104187, "learning_rate": 0.0004064285353725603, "loss": 1.8433, "step": 6383 }, { "epoch": 0.31171875, "grad_norm": 0.24096499383449554, "learning_rate": 0.00040640010313864325, "loss": 1.8404, "step": 6384 }, { "epoch": 0.311767578125, "grad_norm": 0.28851351141929626, "learning_rate": 0.00040637166772013915, "loss": 1.8486, "step": 6385 }, { "epoch": 0.31181640625, "grad_norm": 0.30698978900909424, "learning_rate": 0.00040634322911773724, "loss": 1.857, "step": 6386 }, { "epoch": 0.311865234375, "grad_norm": 0.31643983721733093, "learning_rate": 0.0004063147873321266, "loss": 1.8832, "step": 6387 }, { "epoch": 0.3119140625, "grad_norm": 0.3023664057254791, "learning_rate": 0.00040628634236399653, "loss": 1.8091, "step": 6388 }, { "epoch": 0.311962890625, "grad_norm": 0.3165188133716583, "learning_rate": 0.0004062578942140365, "loss": 1.839, "step": 6389 }, { "epoch": 0.31201171875, "grad_norm": 0.23710612952709198, "learning_rate": 0.000406229442882936, "loss": 1.878, "step": 6390 }, { "epoch": 0.312060546875, "grad_norm": 0.3344832956790924, "learning_rate": 0.00040620098837138447, "loss": 1.8159, "step": 6391 }, { "epoch": 0.312109375, "grad_norm": 0.3021858036518097, "learning_rate": 0.00040617253068007156, "loss": 1.8515, "step": 6392 }, { "epoch": 0.312158203125, "grad_norm": 0.2782846987247467, "learning_rate": 0.000406144069809687, "loss": 1.8515, "step": 6393 }, { "epoch": 0.31220703125, "grad_norm": 0.3289432227611542, "learning_rate": 0.0004061156057609206, "loss": 1.8617, "step": 6394 }, { "epoch": 0.312255859375, "grad_norm": 0.2613390386104584, "learning_rate": 0.00040608713853446213, "loss": 1.8739, "step": 6395 }, { "epoch": 0.3123046875, "grad_norm": 0.24233493208885193, "learning_rate": 0.00040605866813100153, "loss": 1.8432, "step": 6396 }, { "epoch": 0.312353515625, "grad_norm": 0.3088131546974182, "learning_rate": 0.0004060301945512288, "loss": 1.847, "step": 6397 }, { "epoch": 0.31240234375, "grad_norm": 0.313270628452301, "learning_rate": 0.0004060017177958341, "loss": 1.8414, "step": 6398 }, { "epoch": 0.312451171875, "grad_norm": 0.2872362732887268, "learning_rate": 0.0004059732378655074, "loss": 1.8268, "step": 6399 }, { "epoch": 0.3125, "grad_norm": 0.3441224694252014, "learning_rate": 0.0004059447547609392, "loss": 1.8325, "step": 6400 }, { "epoch": 0.312548828125, "grad_norm": 0.29833269119262695, "learning_rate": 0.00040591626848281967, "loss": 1.845, "step": 6401 }, { "epoch": 0.31259765625, "grad_norm": 0.27336522936820984, "learning_rate": 0.0004058877790318391, "loss": 1.8312, "step": 6402 }, { "epoch": 0.312646484375, "grad_norm": 0.3685483932495117, "learning_rate": 0.0004058592864086881, "loss": 1.8478, "step": 6403 }, { "epoch": 0.3126953125, "grad_norm": 0.30214375257492065, "learning_rate": 0.00040583079061405717, "loss": 1.8381, "step": 6404 }, { "epoch": 0.312744140625, "grad_norm": 0.2532203197479248, "learning_rate": 0.0004058022916486369, "loss": 1.8423, "step": 6405 }, { "epoch": 0.31279296875, "grad_norm": 0.32957974076271057, "learning_rate": 0.000405773789513118, "loss": 1.8488, "step": 6406 }, { "epoch": 0.312841796875, "grad_norm": 0.23293691873550415, "learning_rate": 0.0004057452842081912, "loss": 1.8269, "step": 6407 }, { "epoch": 0.312890625, "grad_norm": 0.3057972192764282, "learning_rate": 0.00040571677573454754, "loss": 1.8429, "step": 6408 }, { "epoch": 0.312939453125, "grad_norm": 0.32736849784851074, "learning_rate": 0.0004056882640928777, "loss": 1.8478, "step": 6409 }, { "epoch": 0.31298828125, "grad_norm": 0.3269091546535492, "learning_rate": 0.00040565974928387285, "loss": 1.8398, "step": 6410 }, { "epoch": 0.313037109375, "grad_norm": 0.2835990786552429, "learning_rate": 0.0004056312313082239, "loss": 1.8602, "step": 6411 }, { "epoch": 0.3130859375, "grad_norm": 0.30756112933158875, "learning_rate": 0.0004056027101666222, "loss": 1.8646, "step": 6412 }, { "epoch": 0.313134765625, "grad_norm": 0.3308284878730774, "learning_rate": 0.0004055741858597589, "loss": 1.8408, "step": 6413 }, { "epoch": 0.31318359375, "grad_norm": 0.24626727402210236, "learning_rate": 0.00040554565838832524, "loss": 1.8504, "step": 6414 }, { "epoch": 0.313232421875, "grad_norm": 0.25226274132728577, "learning_rate": 0.0004055171277530127, "loss": 1.8471, "step": 6415 }, { "epoch": 0.31328125, "grad_norm": 0.2946144640445709, "learning_rate": 0.0004054885939545127, "loss": 1.8745, "step": 6416 }, { "epoch": 0.313330078125, "grad_norm": 0.21495001018047333, "learning_rate": 0.0004054600569935168, "loss": 1.8526, "step": 6417 }, { "epoch": 0.31337890625, "grad_norm": 0.26977109909057617, "learning_rate": 0.0004054315168707166, "loss": 1.8595, "step": 6418 }, { "epoch": 0.313427734375, "grad_norm": 0.21836429834365845, "learning_rate": 0.00040540297358680386, "loss": 1.8699, "step": 6419 }, { "epoch": 0.3134765625, "grad_norm": 0.2387019842863083, "learning_rate": 0.00040537442714247017, "loss": 1.834, "step": 6420 }, { "epoch": 0.313525390625, "grad_norm": 0.24977736175060272, "learning_rate": 0.00040534587753840765, "loss": 1.8337, "step": 6421 }, { "epoch": 0.31357421875, "grad_norm": 0.21588803827762604, "learning_rate": 0.000405317324775308, "loss": 1.8288, "step": 6422 }, { "epoch": 0.313623046875, "grad_norm": 0.24943183362483978, "learning_rate": 0.00040528876885386324, "loss": 1.828, "step": 6423 }, { "epoch": 0.313671875, "grad_norm": 0.25776007771492004, "learning_rate": 0.00040526020977476554, "loss": 1.854, "step": 6424 }, { "epoch": 0.313720703125, "grad_norm": 0.2679702639579773, "learning_rate": 0.00040523164753870694, "loss": 1.866, "step": 6425 }, { "epoch": 0.31376953125, "grad_norm": 0.21471305191516876, "learning_rate": 0.0004052030821463798, "loss": 1.846, "step": 6426 }, { "epoch": 0.313818359375, "grad_norm": 0.22605329751968384, "learning_rate": 0.00040517451359847636, "loss": 1.8233, "step": 6427 }, { "epoch": 0.3138671875, "grad_norm": 0.33567678928375244, "learning_rate": 0.00040514594189568903, "loss": 1.8597, "step": 6428 }, { "epoch": 0.313916015625, "grad_norm": 0.2667553126811981, "learning_rate": 0.0004051173670387102, "loss": 1.8725, "step": 6429 }, { "epoch": 0.31396484375, "grad_norm": 0.2974162697792053, "learning_rate": 0.00040508878902823246, "loss": 1.8315, "step": 6430 }, { "epoch": 0.314013671875, "grad_norm": 0.32247865200042725, "learning_rate": 0.0004050602078649484, "loss": 1.8526, "step": 6431 }, { "epoch": 0.3140625, "grad_norm": 0.3041776418685913, "learning_rate": 0.0004050316235495506, "loss": 1.8545, "step": 6432 }, { "epoch": 0.314111328125, "grad_norm": 0.27911216020584106, "learning_rate": 0.0004050030360827321, "loss": 1.8547, "step": 6433 }, { "epoch": 0.31416015625, "grad_norm": 0.2230914831161499, "learning_rate": 0.0004049744454651855, "loss": 1.8581, "step": 6434 }, { "epoch": 0.314208984375, "grad_norm": 0.21743446588516235, "learning_rate": 0.00040494585169760386, "loss": 1.8405, "step": 6435 }, { "epoch": 0.3142578125, "grad_norm": 0.25346964597702026, "learning_rate": 0.00040491725478068003, "loss": 1.8302, "step": 6436 }, { "epoch": 0.314306640625, "grad_norm": 0.32025226950645447, "learning_rate": 0.0004048886547151072, "loss": 1.8306, "step": 6437 }, { "epoch": 0.31435546875, "grad_norm": 0.30683282017707825, "learning_rate": 0.0004048600515015785, "loss": 1.8328, "step": 6438 }, { "epoch": 0.314404296875, "grad_norm": 0.31700587272644043, "learning_rate": 0.00040483144514078705, "loss": 1.8507, "step": 6439 }, { "epoch": 0.314453125, "grad_norm": 0.3060368597507477, "learning_rate": 0.0004048028356334263, "loss": 1.8308, "step": 6440 }, { "epoch": 0.314501953125, "grad_norm": 0.22490523755550385, "learning_rate": 0.00040477422298018943, "loss": 1.8689, "step": 6441 }, { "epoch": 0.31455078125, "grad_norm": 0.27952852845191956, "learning_rate": 0.0004047456071817701, "loss": 1.879, "step": 6442 }, { "epoch": 0.314599609375, "grad_norm": 0.31865236163139343, "learning_rate": 0.00040471698823886173, "loss": 1.8326, "step": 6443 }, { "epoch": 0.3146484375, "grad_norm": 0.28308433294296265, "learning_rate": 0.00040468836615215785, "loss": 1.8251, "step": 6444 }, { "epoch": 0.314697265625, "grad_norm": 0.2510574460029602, "learning_rate": 0.00040465974092235236, "loss": 1.8391, "step": 6445 }, { "epoch": 0.31474609375, "grad_norm": 0.29886919260025024, "learning_rate": 0.00040463111255013883, "loss": 1.8427, "step": 6446 }, { "epoch": 0.314794921875, "grad_norm": 0.2820591926574707, "learning_rate": 0.00040460248103621124, "loss": 1.8305, "step": 6447 }, { "epoch": 0.31484375, "grad_norm": 0.31444987654685974, "learning_rate": 0.00040457384638126323, "loss": 1.8417, "step": 6448 }, { "epoch": 0.314892578125, "grad_norm": 0.2583030164241791, "learning_rate": 0.00040454520858598905, "loss": 1.8601, "step": 6449 }, { "epoch": 0.31494140625, "grad_norm": 0.24292103946208954, "learning_rate": 0.0004045165676510826, "loss": 1.848, "step": 6450 }, { "epoch": 0.314990234375, "grad_norm": 0.23855018615722656, "learning_rate": 0.00040448792357723805, "loss": 1.8358, "step": 6451 }, { "epoch": 0.3150390625, "grad_norm": 0.2672047019004822, "learning_rate": 0.0004044592763651498, "loss": 1.8394, "step": 6452 }, { "epoch": 0.315087890625, "grad_norm": 0.23560282588005066, "learning_rate": 0.00040443062601551184, "loss": 1.835, "step": 6453 }, { "epoch": 0.31513671875, "grad_norm": 0.21927829086780548, "learning_rate": 0.0004044019725290187, "loss": 1.8433, "step": 6454 }, { "epoch": 0.315185546875, "grad_norm": 0.24114356935024261, "learning_rate": 0.0004043733159063649, "loss": 1.8414, "step": 6455 }, { "epoch": 0.315234375, "grad_norm": 0.323826402425766, "learning_rate": 0.0004043446561482448, "loss": 1.8649, "step": 6456 }, { "epoch": 0.315283203125, "grad_norm": 0.2824292778968811, "learning_rate": 0.0004043159932553529, "loss": 1.8214, "step": 6457 }, { "epoch": 0.31533203125, "grad_norm": 0.2506413161754608, "learning_rate": 0.0004042873272283842, "loss": 1.8543, "step": 6458 }, { "epoch": 0.315380859375, "grad_norm": 0.260970801115036, "learning_rate": 0.0004042586580680331, "loss": 1.8841, "step": 6459 }, { "epoch": 0.3154296875, "grad_norm": 0.23850731551647186, "learning_rate": 0.0004042299857749947, "loss": 1.8347, "step": 6460 }, { "epoch": 0.315478515625, "grad_norm": 0.22913222014904022, "learning_rate": 0.0004042013103499637, "loss": 1.8442, "step": 6461 }, { "epoch": 0.31552734375, "grad_norm": 0.26103073358535767, "learning_rate": 0.0004041726317936352, "loss": 1.8721, "step": 6462 }, { "epoch": 0.315576171875, "grad_norm": 0.2230840027332306, "learning_rate": 0.0004041439501067042, "loss": 1.8383, "step": 6463 }, { "epoch": 0.315625, "grad_norm": 0.2231300175189972, "learning_rate": 0.0004041152652898658, "loss": 1.8561, "step": 6464 }, { "epoch": 0.315673828125, "grad_norm": 0.24241895973682404, "learning_rate": 0.00040408657734381523, "loss": 1.8782, "step": 6465 }, { "epoch": 0.31572265625, "grad_norm": 0.31190013885498047, "learning_rate": 0.00040405788626924764, "loss": 1.8551, "step": 6466 }, { "epoch": 0.315771484375, "grad_norm": 0.30694931745529175, "learning_rate": 0.0004040291920668586, "loss": 1.8555, "step": 6467 }, { "epoch": 0.3158203125, "grad_norm": 0.27560535073280334, "learning_rate": 0.0004040004947373434, "loss": 1.8598, "step": 6468 }, { "epoch": 0.315869140625, "grad_norm": 0.23603525757789612, "learning_rate": 0.0004039717942813976, "loss": 1.8331, "step": 6469 }, { "epoch": 0.31591796875, "grad_norm": 0.35641446709632874, "learning_rate": 0.00040394309069971676, "loss": 1.8373, "step": 6470 }, { "epoch": 0.315966796875, "grad_norm": 0.4135001301765442, "learning_rate": 0.0004039143839929964, "loss": 1.8356, "step": 6471 }, { "epoch": 0.316015625, "grad_norm": 0.30670568346977234, "learning_rate": 0.0004038856741619325, "loss": 1.8382, "step": 6472 }, { "epoch": 0.316064453125, "grad_norm": 0.28247740864753723, "learning_rate": 0.0004038569612072207, "loss": 1.8704, "step": 6473 }, { "epoch": 0.31611328125, "grad_norm": 0.31750693917274475, "learning_rate": 0.00040382824512955696, "loss": 1.8264, "step": 6474 }, { "epoch": 0.316162109375, "grad_norm": 0.3624558746814728, "learning_rate": 0.0004037995259296371, "loss": 1.8447, "step": 6475 }, { "epoch": 0.3162109375, "grad_norm": 0.3172079622745514, "learning_rate": 0.00040377080360815736, "loss": 1.8543, "step": 6476 }, { "epoch": 0.316259765625, "grad_norm": 0.2135012000799179, "learning_rate": 0.00040374207816581373, "loss": 1.7884, "step": 6477 }, { "epoch": 0.31630859375, "grad_norm": 0.28356724977493286, "learning_rate": 0.0004037133496033024, "loss": 1.8423, "step": 6478 }, { "epoch": 0.316357421875, "grad_norm": 0.2949542999267578, "learning_rate": 0.0004036846179213196, "loss": 1.8673, "step": 6479 }, { "epoch": 0.31640625, "grad_norm": 0.29803964495658875, "learning_rate": 0.0004036558831205617, "loss": 1.8432, "step": 6480 }, { "epoch": 0.316455078125, "grad_norm": 0.3741019666194916, "learning_rate": 0.0004036271452017251, "loss": 1.8269, "step": 6481 }, { "epoch": 0.31650390625, "grad_norm": 0.3035012483596802, "learning_rate": 0.0004035984041655063, "loss": 1.8557, "step": 6482 }, { "epoch": 0.316552734375, "grad_norm": 0.3400670886039734, "learning_rate": 0.0004035696600126019, "loss": 1.8652, "step": 6483 }, { "epoch": 0.3166015625, "grad_norm": 0.42255473136901855, "learning_rate": 0.00040354091274370844, "loss": 1.8336, "step": 6484 }, { "epoch": 0.316650390625, "grad_norm": 0.28636905550956726, "learning_rate": 0.0004035121623595228, "loss": 1.845, "step": 6485 }, { "epoch": 0.31669921875, "grad_norm": 0.3478671610355377, "learning_rate": 0.0004034834088607416, "loss": 1.8609, "step": 6486 }, { "epoch": 0.316748046875, "grad_norm": 0.3144395053386688, "learning_rate": 0.00040345465224806174, "loss": 1.8366, "step": 6487 }, { "epoch": 0.316796875, "grad_norm": 0.2550588846206665, "learning_rate": 0.0004034258925221802, "loss": 1.8043, "step": 6488 }, { "epoch": 0.316845703125, "grad_norm": 0.2776491045951843, "learning_rate": 0.000403397129683794, "loss": 1.8445, "step": 6489 }, { "epoch": 0.31689453125, "grad_norm": 0.23623332381248474, "learning_rate": 0.0004033683637336002, "loss": 1.8379, "step": 6490 }, { "epoch": 0.316943359375, "grad_norm": 0.2890010476112366, "learning_rate": 0.00040333959467229595, "loss": 1.8184, "step": 6491 }, { "epoch": 0.3169921875, "grad_norm": 0.2479446828365326, "learning_rate": 0.0004033108225005785, "loss": 1.8228, "step": 6492 }, { "epoch": 0.317041015625, "grad_norm": 0.2671288549900055, "learning_rate": 0.0004032820472191452, "loss": 1.8172, "step": 6493 }, { "epoch": 0.31708984375, "grad_norm": 0.3408938944339752, "learning_rate": 0.00040325326882869353, "loss": 1.8432, "step": 6494 }, { "epoch": 0.317138671875, "grad_norm": 0.35132262110710144, "learning_rate": 0.0004032244873299207, "loss": 1.8754, "step": 6495 }, { "epoch": 0.3171875, "grad_norm": 0.2594212293624878, "learning_rate": 0.00040319570272352446, "loss": 1.8283, "step": 6496 }, { "epoch": 0.317236328125, "grad_norm": 0.24574699997901917, "learning_rate": 0.00040316691501020237, "loss": 1.8306, "step": 6497 }, { "epoch": 0.31728515625, "grad_norm": 0.29894450306892395, "learning_rate": 0.0004031381241906521, "loss": 1.8411, "step": 6498 }, { "epoch": 0.317333984375, "grad_norm": 0.24431400001049042, "learning_rate": 0.0004031093302655716, "loss": 1.8415, "step": 6499 }, { "epoch": 0.3173828125, "grad_norm": 0.23385612666606903, "learning_rate": 0.00040308053323565837, "loss": 1.8044, "step": 6500 }, { "epoch": 0.317431640625, "grad_norm": 0.2646171748638153, "learning_rate": 0.0004030517331016106, "loss": 1.8556, "step": 6501 }, { "epoch": 0.31748046875, "grad_norm": 0.2769233286380768, "learning_rate": 0.00040302292986412613, "loss": 1.8381, "step": 6502 }, { "epoch": 0.317529296875, "grad_norm": 0.21151234209537506, "learning_rate": 0.0004029941235239031, "loss": 1.8612, "step": 6503 }, { "epoch": 0.317578125, "grad_norm": 0.2639584541320801, "learning_rate": 0.0004029653140816398, "loss": 1.8566, "step": 6504 }, { "epoch": 0.317626953125, "grad_norm": 0.28900277614593506, "learning_rate": 0.00040293650153803425, "loss": 1.851, "step": 6505 }, { "epoch": 0.31767578125, "grad_norm": 0.21922913193702698, "learning_rate": 0.00040290768589378473, "loss": 1.8151, "step": 6506 }, { "epoch": 0.317724609375, "grad_norm": 0.2565918266773224, "learning_rate": 0.0004028788671495898, "loss": 1.8557, "step": 6507 }, { "epoch": 0.3177734375, "grad_norm": 0.27558213472366333, "learning_rate": 0.0004028500453061477, "loss": 1.8332, "step": 6508 }, { "epoch": 0.317822265625, "grad_norm": 0.23013925552368164, "learning_rate": 0.000402821220364157, "loss": 1.842, "step": 6509 }, { "epoch": 0.31787109375, "grad_norm": 0.2993154525756836, "learning_rate": 0.00040279239232431644, "loss": 1.8653, "step": 6510 }, { "epoch": 0.317919921875, "grad_norm": 0.2916245460510254, "learning_rate": 0.00040276356118732447, "loss": 1.8361, "step": 6511 }, { "epoch": 0.31796875, "grad_norm": 0.26233619451522827, "learning_rate": 0.0004027347269538801, "loss": 1.8627, "step": 6512 }, { "epoch": 0.318017578125, "grad_norm": 0.2839605510234833, "learning_rate": 0.00040270588962468186, "loss": 1.8361, "step": 6513 }, { "epoch": 0.31806640625, "grad_norm": 0.31277814507484436, "learning_rate": 0.0004026770492004289, "loss": 1.8478, "step": 6514 }, { "epoch": 0.318115234375, "grad_norm": 0.2944476306438446, "learning_rate": 0.0004026482056818199, "loss": 1.8365, "step": 6515 }, { "epoch": 0.3181640625, "grad_norm": 0.313126802444458, "learning_rate": 0.0004026193590695542, "loss": 1.836, "step": 6516 }, { "epoch": 0.318212890625, "grad_norm": 0.2445065826177597, "learning_rate": 0.00040259050936433086, "loss": 1.8306, "step": 6517 }, { "epoch": 0.31826171875, "grad_norm": 0.26504790782928467, "learning_rate": 0.00040256165656684897, "loss": 1.8546, "step": 6518 }, { "epoch": 0.318310546875, "grad_norm": 0.2707119286060333, "learning_rate": 0.00040253280067780784, "loss": 1.8356, "step": 6519 }, { "epoch": 0.318359375, "grad_norm": 0.30847638845443726, "learning_rate": 0.0004025039416979069, "loss": 1.8161, "step": 6520 }, { "epoch": 0.318408203125, "grad_norm": 0.3615027070045471, "learning_rate": 0.0004024750796278454, "loss": 1.8468, "step": 6521 }, { "epoch": 0.31845703125, "grad_norm": 0.27665048837661743, "learning_rate": 0.00040244621446832297, "loss": 1.8456, "step": 6522 }, { "epoch": 0.318505859375, "grad_norm": 0.32643380761146545, "learning_rate": 0.00040241734622003915, "loss": 1.8485, "step": 6523 }, { "epoch": 0.3185546875, "grad_norm": 0.338756799697876, "learning_rate": 0.0004023884748836935, "loss": 1.8413, "step": 6524 }, { "epoch": 0.318603515625, "grad_norm": 0.23536938428878784, "learning_rate": 0.0004023596004599859, "loss": 1.8412, "step": 6525 }, { "epoch": 0.31865234375, "grad_norm": 0.3270176947116852, "learning_rate": 0.00040233072294961603, "loss": 1.8476, "step": 6526 }, { "epoch": 0.318701171875, "grad_norm": 0.28918495774269104, "learning_rate": 0.00040230184235328384, "loss": 1.8499, "step": 6527 }, { "epoch": 0.31875, "grad_norm": 0.2720367908477783, "learning_rate": 0.0004022729586716892, "loss": 1.8451, "step": 6528 }, { "epoch": 0.318798828125, "grad_norm": 0.34317365288734436, "learning_rate": 0.0004022440719055322, "loss": 1.8348, "step": 6529 }, { "epoch": 0.31884765625, "grad_norm": 0.2889581620693207, "learning_rate": 0.00040221518205551285, "loss": 1.8562, "step": 6530 }, { "epoch": 0.318896484375, "grad_norm": 0.2772253155708313, "learning_rate": 0.0004021862891223313, "loss": 1.8421, "step": 6531 }, { "epoch": 0.3189453125, "grad_norm": 0.27842971682548523, "learning_rate": 0.0004021573931066879, "loss": 1.8482, "step": 6532 }, { "epoch": 0.318994140625, "grad_norm": 0.2970322370529175, "learning_rate": 0.00040212849400928294, "loss": 1.834, "step": 6533 }, { "epoch": 0.31904296875, "grad_norm": 0.3067307472229004, "learning_rate": 0.0004020995918308168, "loss": 1.8291, "step": 6534 }, { "epoch": 0.319091796875, "grad_norm": 0.29604989290237427, "learning_rate": 0.00040207068657198994, "loss": 1.8379, "step": 6535 }, { "epoch": 0.319140625, "grad_norm": 0.2949371635913849, "learning_rate": 0.00040204177823350293, "loss": 1.8581, "step": 6536 }, { "epoch": 0.319189453125, "grad_norm": 0.2676762640476227, "learning_rate": 0.0004020128668160563, "loss": 1.8692, "step": 6537 }, { "epoch": 0.31923828125, "grad_norm": 0.2595878839492798, "learning_rate": 0.0004019839523203508, "loss": 1.8075, "step": 6538 }, { "epoch": 0.319287109375, "grad_norm": 0.3307282626628876, "learning_rate": 0.00040195503474708716, "loss": 1.8453, "step": 6539 }, { "epoch": 0.3193359375, "grad_norm": 0.2850724458694458, "learning_rate": 0.0004019261140969664, "loss": 1.8362, "step": 6540 }, { "epoch": 0.319384765625, "grad_norm": 0.2931298315525055, "learning_rate": 0.0004018971903706892, "loss": 1.8258, "step": 6541 }, { "epoch": 0.31943359375, "grad_norm": 0.28027990460395813, "learning_rate": 0.0004018682635689566, "loss": 1.8416, "step": 6542 }, { "epoch": 0.319482421875, "grad_norm": 0.22728875279426575, "learning_rate": 0.0004018393336924698, "loss": 1.838, "step": 6543 }, { "epoch": 0.31953125, "grad_norm": 0.27601873874664307, "learning_rate": 0.00040181040074192977, "loss": 1.8548, "step": 6544 }, { "epoch": 0.319580078125, "grad_norm": 0.24255824089050293, "learning_rate": 0.0004017814647180378, "loss": 1.8446, "step": 6545 }, { "epoch": 0.31962890625, "grad_norm": 0.33679497241973877, "learning_rate": 0.0004017525256214953, "loss": 1.845, "step": 6546 }, { "epoch": 0.319677734375, "grad_norm": 0.3169373869895935, "learning_rate": 0.00040172358345300335, "loss": 1.8326, "step": 6547 }, { "epoch": 0.3197265625, "grad_norm": 0.23419438302516937, "learning_rate": 0.0004016946382132636, "loss": 1.8347, "step": 6548 }, { "epoch": 0.319775390625, "grad_norm": 0.2797180116176605, "learning_rate": 0.00040166568990297757, "loss": 1.8536, "step": 6549 }, { "epoch": 0.31982421875, "grad_norm": 0.3077120780944824, "learning_rate": 0.00040163673852284675, "loss": 1.8433, "step": 6550 }, { "epoch": 0.319873046875, "grad_norm": 0.2747962176799774, "learning_rate": 0.0004016077840735728, "loss": 1.8665, "step": 6551 }, { "epoch": 0.319921875, "grad_norm": 0.277017205953598, "learning_rate": 0.00040157882655585754, "loss": 1.8533, "step": 6552 }, { "epoch": 0.319970703125, "grad_norm": 0.2135210484266281, "learning_rate": 0.00040154986597040266, "loss": 1.8561, "step": 6553 }, { "epoch": 0.32001953125, "grad_norm": 0.24566498398780823, "learning_rate": 0.0004015209023179102, "loss": 1.8415, "step": 6554 }, { "epoch": 0.320068359375, "grad_norm": 0.24603962898254395, "learning_rate": 0.0004014919355990819, "loss": 1.8226, "step": 6555 }, { "epoch": 0.3201171875, "grad_norm": 0.26197579503059387, "learning_rate": 0.00040146296581462007, "loss": 1.8621, "step": 6556 }, { "epoch": 0.320166015625, "grad_norm": 0.27047351002693176, "learning_rate": 0.0004014339929652265, "loss": 1.8427, "step": 6557 }, { "epoch": 0.32021484375, "grad_norm": 0.2551276981830597, "learning_rate": 0.00040140501705160364, "loss": 1.8609, "step": 6558 }, { "epoch": 0.320263671875, "grad_norm": 0.2618183195590973, "learning_rate": 0.0004013760380744536, "loss": 1.8673, "step": 6559 }, { "epoch": 0.3203125, "grad_norm": 0.2755248248577118, "learning_rate": 0.00040134705603447874, "loss": 1.8566, "step": 6560 }, { "epoch": 0.320361328125, "grad_norm": 0.2433103322982788, "learning_rate": 0.0004013180709323816, "loss": 1.8353, "step": 6561 }, { "epoch": 0.32041015625, "grad_norm": 0.30579903721809387, "learning_rate": 0.0004012890827688644, "loss": 1.8416, "step": 6562 }, { "epoch": 0.320458984375, "grad_norm": 0.3275390565395355, "learning_rate": 0.0004012600915446298, "loss": 1.8454, "step": 6563 }, { "epoch": 0.3205078125, "grad_norm": 0.34271240234375, "learning_rate": 0.00040123109726038046, "loss": 1.8432, "step": 6564 }, { "epoch": 0.320556640625, "grad_norm": 0.35742199420928955, "learning_rate": 0.00040120209991681915, "loss": 1.8415, "step": 6565 }, { "epoch": 0.32060546875, "grad_norm": 0.32091787457466125, "learning_rate": 0.0004011730995146485, "loss": 1.8326, "step": 6566 }, { "epoch": 0.320654296875, "grad_norm": 0.40343591570854187, "learning_rate": 0.0004011440960545714, "loss": 1.8446, "step": 6567 }, { "epoch": 0.320703125, "grad_norm": 0.3522791266441345, "learning_rate": 0.0004011150895372908, "loss": 1.8229, "step": 6568 }, { "epoch": 0.320751953125, "grad_norm": 0.30795320868492126, "learning_rate": 0.0004010860799635097, "loss": 1.8537, "step": 6569 }, { "epoch": 0.32080078125, "grad_norm": 0.395616352558136, "learning_rate": 0.0004010570673339311, "loss": 1.8482, "step": 6570 }, { "epoch": 0.320849609375, "grad_norm": 0.23019014298915863, "learning_rate": 0.0004010280516492583, "loss": 1.8432, "step": 6571 }, { "epoch": 0.3208984375, "grad_norm": 0.3502538800239563, "learning_rate": 0.0004009990329101944, "loss": 1.8271, "step": 6572 }, { "epoch": 0.320947265625, "grad_norm": 0.34426987171173096, "learning_rate": 0.0004009700111174427, "loss": 1.8408, "step": 6573 }, { "epoch": 0.32099609375, "grad_norm": 0.30247870087623596, "learning_rate": 0.0004009409862717066, "loss": 1.8516, "step": 6574 }, { "epoch": 0.321044921875, "grad_norm": 0.3992679715156555, "learning_rate": 0.0004009119583736894, "loss": 1.863, "step": 6575 }, { "epoch": 0.32109375, "grad_norm": 0.22510525584220886, "learning_rate": 0.00040088292742409474, "loss": 1.8478, "step": 6576 }, { "epoch": 0.321142578125, "grad_norm": 0.38941532373428345, "learning_rate": 0.0004008538934236263, "loss": 1.7789, "step": 6577 }, { "epoch": 0.32119140625, "grad_norm": 0.3770432770252228, "learning_rate": 0.0004008248563729876, "loss": 1.8494, "step": 6578 }, { "epoch": 0.321240234375, "grad_norm": 0.3053756058216095, "learning_rate": 0.00040079581627288247, "loss": 1.8151, "step": 6579 }, { "epoch": 0.3212890625, "grad_norm": 0.3426252603530884, "learning_rate": 0.0004007667731240146, "loss": 1.8605, "step": 6580 }, { "epoch": 0.321337890625, "grad_norm": 0.23997311294078827, "learning_rate": 0.00040073772692708805, "loss": 1.8299, "step": 6581 }, { "epoch": 0.32138671875, "grad_norm": 0.40063562989234924, "learning_rate": 0.0004007086776828065, "loss": 1.8479, "step": 6582 }, { "epoch": 0.321435546875, "grad_norm": 0.29762521386146545, "learning_rate": 0.0004006796253918742, "loss": 1.8668, "step": 6583 }, { "epoch": 0.321484375, "grad_norm": 0.25928524136543274, "learning_rate": 0.00040065057005499535, "loss": 1.8555, "step": 6584 }, { "epoch": 0.321533203125, "grad_norm": 0.3405742049217224, "learning_rate": 0.00040062151167287387, "loss": 1.8561, "step": 6585 }, { "epoch": 0.32158203125, "grad_norm": 0.20935383439064026, "learning_rate": 0.00040059245024621413, "loss": 1.824, "step": 6586 }, { "epoch": 0.321630859375, "grad_norm": 0.25666871666908264, "learning_rate": 0.0004005633857757205, "loss": 1.842, "step": 6587 }, { "epoch": 0.3216796875, "grad_norm": 0.28138288855552673, "learning_rate": 0.00040053431826209736, "loss": 1.846, "step": 6588 }, { "epoch": 0.321728515625, "grad_norm": 0.24716119468212128, "learning_rate": 0.0004005052477060491, "loss": 1.8562, "step": 6589 }, { "epoch": 0.32177734375, "grad_norm": 0.2390487641096115, "learning_rate": 0.0004004761741082803, "loss": 1.8482, "step": 6590 }, { "epoch": 0.321826171875, "grad_norm": 0.22424350678920746, "learning_rate": 0.00040044709746949565, "loss": 1.8376, "step": 6591 }, { "epoch": 0.321875, "grad_norm": 0.23378624022006989, "learning_rate": 0.0004004180177903999, "loss": 1.8556, "step": 6592 }, { "epoch": 0.321923828125, "grad_norm": 0.276327908039093, "learning_rate": 0.0004003889350716976, "loss": 1.8186, "step": 6593 }, { "epoch": 0.32197265625, "grad_norm": 0.26780086755752563, "learning_rate": 0.00040035984931409375, "loss": 1.8482, "step": 6594 }, { "epoch": 0.322021484375, "grad_norm": 0.23027601838111877, "learning_rate": 0.0004003307605182933, "loss": 1.8209, "step": 6595 }, { "epoch": 0.3220703125, "grad_norm": 0.26230090856552124, "learning_rate": 0.00040030166868500124, "loss": 1.8363, "step": 6596 }, { "epoch": 0.322119140625, "grad_norm": 0.2950122654438019, "learning_rate": 0.0004002725738149224, "loss": 1.8649, "step": 6597 }, { "epoch": 0.32216796875, "grad_norm": 0.28552716970443726, "learning_rate": 0.00040024347590876216, "loss": 1.8267, "step": 6598 }, { "epoch": 0.322216796875, "grad_norm": 0.26697948575019836, "learning_rate": 0.00040021437496722575, "loss": 1.8441, "step": 6599 }, { "epoch": 0.322265625, "grad_norm": 0.29579824209213257, "learning_rate": 0.00040018527099101826, "loss": 1.8226, "step": 6600 }, { "epoch": 0.322314453125, "grad_norm": 0.27991315722465515, "learning_rate": 0.0004001561639808452, "loss": 1.811, "step": 6601 }, { "epoch": 0.32236328125, "grad_norm": 0.22398880124092102, "learning_rate": 0.00040012705393741195, "loss": 1.8319, "step": 6602 }, { "epoch": 0.322412109375, "grad_norm": 0.2713748514652252, "learning_rate": 0.00040009794086142396, "loss": 1.8447, "step": 6603 }, { "epoch": 0.3224609375, "grad_norm": 0.3403099775314331, "learning_rate": 0.00040006882475358694, "loss": 1.8505, "step": 6604 }, { "epoch": 0.322509765625, "grad_norm": 0.2610081732273102, "learning_rate": 0.00040003970561460644, "loss": 1.8436, "step": 6605 }, { "epoch": 0.32255859375, "grad_norm": 0.30599191784858704, "learning_rate": 0.0004000105834451883, "loss": 1.8499, "step": 6606 }, { "epoch": 0.322607421875, "grad_norm": 0.29501020908355713, "learning_rate": 0.00039998145824603817, "loss": 1.8563, "step": 6607 }, { "epoch": 0.32265625, "grad_norm": 0.30983999371528625, "learning_rate": 0.0003999523300178619, "loss": 1.8417, "step": 6608 }, { "epoch": 0.322705078125, "grad_norm": 0.2231898307800293, "learning_rate": 0.00039992319876136566, "loss": 1.838, "step": 6609 }, { "epoch": 0.32275390625, "grad_norm": 0.31024235486984253, "learning_rate": 0.00039989406447725527, "loss": 1.8403, "step": 6610 }, { "epoch": 0.322802734375, "grad_norm": 0.31695541739463806, "learning_rate": 0.00039986492716623695, "loss": 1.851, "step": 6611 }, { "epoch": 0.3228515625, "grad_norm": 0.26220521330833435, "learning_rate": 0.00039983578682901673, "loss": 1.8494, "step": 6612 }, { "epoch": 0.322900390625, "grad_norm": 0.27775833010673523, "learning_rate": 0.000399806643466301, "loss": 1.848, "step": 6613 }, { "epoch": 0.32294921875, "grad_norm": 0.23295259475708008, "learning_rate": 0.0003997774970787959, "loss": 1.8507, "step": 6614 }, { "epoch": 0.322998046875, "grad_norm": 0.2681960165500641, "learning_rate": 0.000399748347667208, "loss": 1.8524, "step": 6615 }, { "epoch": 0.323046875, "grad_norm": 0.3248581886291504, "learning_rate": 0.0003997191952322437, "loss": 1.8304, "step": 6616 }, { "epoch": 0.323095703125, "grad_norm": 0.3141895532608032, "learning_rate": 0.00039969003977460946, "loss": 1.8497, "step": 6617 }, { "epoch": 0.32314453125, "grad_norm": 0.27319490909576416, "learning_rate": 0.0003996608812950119, "loss": 1.8452, "step": 6618 }, { "epoch": 0.323193359375, "grad_norm": 0.2937765419483185, "learning_rate": 0.0003996317197941577, "loss": 1.8522, "step": 6619 }, { "epoch": 0.3232421875, "grad_norm": 0.22082503139972687, "learning_rate": 0.00039960255527275376, "loss": 1.8409, "step": 6620 }, { "epoch": 0.323291015625, "grad_norm": 0.2461077868938446, "learning_rate": 0.0003995733877315067, "loss": 1.8512, "step": 6621 }, { "epoch": 0.32333984375, "grad_norm": 0.26274287700653076, "learning_rate": 0.00039954421717112353, "loss": 1.8457, "step": 6622 }, { "epoch": 0.323388671875, "grad_norm": 0.23242124915122986, "learning_rate": 0.00039951504359231116, "loss": 1.865, "step": 6623 }, { "epoch": 0.3234375, "grad_norm": 0.2307821810245514, "learning_rate": 0.00039948586699577675, "loss": 1.8263, "step": 6624 }, { "epoch": 0.323486328125, "grad_norm": 0.23322132229804993, "learning_rate": 0.00039945668738222733, "loss": 1.8426, "step": 6625 }, { "epoch": 0.32353515625, "grad_norm": 0.26234254240989685, "learning_rate": 0.00039942750475237, "loss": 1.8504, "step": 6626 }, { "epoch": 0.323583984375, "grad_norm": 0.23836320638656616, "learning_rate": 0.0003993983191069122, "loss": 1.8478, "step": 6627 }, { "epoch": 0.3236328125, "grad_norm": 0.26388707756996155, "learning_rate": 0.00039936913044656127, "loss": 1.8331, "step": 6628 }, { "epoch": 0.323681640625, "grad_norm": 0.25840136408805847, "learning_rate": 0.00039933993877202444, "loss": 1.8171, "step": 6629 }, { "epoch": 0.32373046875, "grad_norm": 0.2461990863084793, "learning_rate": 0.0003993107440840094, "loss": 1.8265, "step": 6630 }, { "epoch": 0.323779296875, "grad_norm": 0.25430577993392944, "learning_rate": 0.0003992815463832235, "loss": 1.8298, "step": 6631 }, { "epoch": 0.323828125, "grad_norm": 0.26505807042121887, "learning_rate": 0.0003992523456703746, "loss": 1.8567, "step": 6632 }, { "epoch": 0.323876953125, "grad_norm": 0.22326336801052094, "learning_rate": 0.00039922314194617013, "loss": 1.8448, "step": 6633 }, { "epoch": 0.32392578125, "grad_norm": 0.23425275087356567, "learning_rate": 0.0003991939352113181, "loss": 1.8525, "step": 6634 }, { "epoch": 0.323974609375, "grad_norm": 0.24943336844444275, "learning_rate": 0.00039916472546652624, "loss": 1.8309, "step": 6635 }, { "epoch": 0.3240234375, "grad_norm": 0.2509312629699707, "learning_rate": 0.00039913551271250256, "loss": 1.8322, "step": 6636 }, { "epoch": 0.324072265625, "grad_norm": 0.26233795285224915, "learning_rate": 0.00039910629694995495, "loss": 1.8272, "step": 6637 }, { "epoch": 0.32412109375, "grad_norm": 0.3032820522785187, "learning_rate": 0.00039907707817959156, "loss": 1.8289, "step": 6638 }, { "epoch": 0.324169921875, "grad_norm": 0.2815743386745453, "learning_rate": 0.0003990478564021205, "loss": 1.8451, "step": 6639 }, { "epoch": 0.32421875, "grad_norm": 0.2267923504114151, "learning_rate": 0.0003990186316182499, "loss": 1.8591, "step": 6640 }, { "epoch": 0.324267578125, "grad_norm": 0.2769520878791809, "learning_rate": 0.0003989894038286882, "loss": 1.8488, "step": 6641 }, { "epoch": 0.32431640625, "grad_norm": 0.3530632555484772, "learning_rate": 0.0003989601730341437, "loss": 1.8238, "step": 6642 }, { "epoch": 0.324365234375, "grad_norm": 0.31656593084335327, "learning_rate": 0.00039893093923532475, "loss": 1.8294, "step": 6643 }, { "epoch": 0.3244140625, "grad_norm": 0.2773861289024353, "learning_rate": 0.00039890170243294, "loss": 1.8382, "step": 6644 }, { "epoch": 0.324462890625, "grad_norm": 0.329078733921051, "learning_rate": 0.00039887246262769796, "loss": 1.841, "step": 6645 }, { "epoch": 0.32451171875, "grad_norm": 0.27193954586982727, "learning_rate": 0.0003988432198203072, "loss": 1.8352, "step": 6646 }, { "epoch": 0.324560546875, "grad_norm": 0.37838014960289, "learning_rate": 0.00039881397401147654, "loss": 1.8329, "step": 6647 }, { "epoch": 0.324609375, "grad_norm": 0.3322335481643677, "learning_rate": 0.00039878472520191475, "loss": 1.8608, "step": 6648 }, { "epoch": 0.324658203125, "grad_norm": 0.2820621132850647, "learning_rate": 0.00039875547339233075, "loss": 1.8307, "step": 6649 }, { "epoch": 0.32470703125, "grad_norm": 0.33550024032592773, "learning_rate": 0.0003987262185834334, "loss": 1.8338, "step": 6650 }, { "epoch": 0.324755859375, "grad_norm": 0.22097627818584442, "learning_rate": 0.00039869696077593167, "loss": 1.869, "step": 6651 }, { "epoch": 0.3248046875, "grad_norm": 0.29137229919433594, "learning_rate": 0.0003986676999705348, "loss": 1.8324, "step": 6652 }, { "epoch": 0.324853515625, "grad_norm": 0.2186134159564972, "learning_rate": 0.0003986384361679518, "loss": 1.8342, "step": 6653 }, { "epoch": 0.32490234375, "grad_norm": 0.29372870922088623, "learning_rate": 0.000398609169368892, "loss": 1.8484, "step": 6654 }, { "epoch": 0.324951171875, "grad_norm": 0.33985817432403564, "learning_rate": 0.0003985798995740647, "loss": 1.8872, "step": 6655 }, { "epoch": 0.325, "grad_norm": 0.3923836052417755, "learning_rate": 0.0003985506267841792, "loss": 1.8327, "step": 6656 }, { "epoch": 0.325048828125, "grad_norm": 0.3693123757839203, "learning_rate": 0.000398521350999945, "loss": 1.8643, "step": 6657 }, { "epoch": 0.32509765625, "grad_norm": 0.2888534665107727, "learning_rate": 0.0003984920722220716, "loss": 1.8252, "step": 6658 }, { "epoch": 0.325146484375, "grad_norm": 0.35342586040496826, "learning_rate": 0.0003984627904512687, "loss": 1.8575, "step": 6659 }, { "epoch": 0.3251953125, "grad_norm": 0.34130528569221497, "learning_rate": 0.0003984335056882459, "loss": 1.8144, "step": 6660 }, { "epoch": 0.325244140625, "grad_norm": 0.35409703850746155, "learning_rate": 0.00039840421793371284, "loss": 1.8539, "step": 6661 }, { "epoch": 0.32529296875, "grad_norm": 0.2788334786891937, "learning_rate": 0.00039837492718837946, "loss": 1.835, "step": 6662 }, { "epoch": 0.325341796875, "grad_norm": 0.31517845392227173, "learning_rate": 0.0003983456334529556, "loss": 1.8483, "step": 6663 }, { "epoch": 0.325390625, "grad_norm": 0.33151301741600037, "learning_rate": 0.0003983163367281512, "loss": 1.8572, "step": 6664 }, { "epoch": 0.325439453125, "grad_norm": 0.24999795854091644, "learning_rate": 0.0003982870370146763, "loss": 1.8498, "step": 6665 }, { "epoch": 0.32548828125, "grad_norm": 0.3290036916732788, "learning_rate": 0.000398257734313241, "loss": 1.8364, "step": 6666 }, { "epoch": 0.325537109375, "grad_norm": 0.2367064207792282, "learning_rate": 0.0003982284286245556, "loss": 1.8673, "step": 6667 }, { "epoch": 0.3255859375, "grad_norm": 0.3367582857608795, "learning_rate": 0.0003981991199493301, "loss": 1.8481, "step": 6668 }, { "epoch": 0.325634765625, "grad_norm": 0.26671352982521057, "learning_rate": 0.000398169808288275, "loss": 1.827, "step": 6669 }, { "epoch": 0.32568359375, "grad_norm": 0.2681376338005066, "learning_rate": 0.0003981404936421006, "loss": 1.8465, "step": 6670 }, { "epoch": 0.325732421875, "grad_norm": 0.2539014518260956, "learning_rate": 0.0003981111760115174, "loss": 1.838, "step": 6671 }, { "epoch": 0.32578125, "grad_norm": 0.30468231439590454, "learning_rate": 0.0003980818553972361, "loss": 1.8388, "step": 6672 }, { "epoch": 0.325830078125, "grad_norm": 0.2698807716369629, "learning_rate": 0.00039805253179996705, "loss": 1.8583, "step": 6673 }, { "epoch": 0.32587890625, "grad_norm": 0.24240325391292572, "learning_rate": 0.000398023205220421, "loss": 1.8341, "step": 6674 }, { "epoch": 0.325927734375, "grad_norm": 0.2510271668434143, "learning_rate": 0.00039799387565930876, "loss": 1.8388, "step": 6675 }, { "epoch": 0.3259765625, "grad_norm": 0.2995648980140686, "learning_rate": 0.0003979645431173411, "loss": 1.8281, "step": 6676 }, { "epoch": 0.326025390625, "grad_norm": 0.2376679629087448, "learning_rate": 0.00039793520759522907, "loss": 1.8296, "step": 6677 }, { "epoch": 0.32607421875, "grad_norm": 0.26603758335113525, "learning_rate": 0.00039790586909368335, "loss": 1.8438, "step": 6678 }, { "epoch": 0.326123046875, "grad_norm": 0.22713695466518402, "learning_rate": 0.0003978765276134152, "loss": 1.836, "step": 6679 }, { "epoch": 0.326171875, "grad_norm": 0.2308286726474762, "learning_rate": 0.0003978471831551358, "loss": 1.8449, "step": 6680 }, { "epoch": 0.326220703125, "grad_norm": 0.2677105665206909, "learning_rate": 0.0003978178357195561, "loss": 1.8404, "step": 6681 }, { "epoch": 0.32626953125, "grad_norm": 0.28905659914016724, "learning_rate": 0.00039778848530738753, "loss": 1.8306, "step": 6682 }, { "epoch": 0.326318359375, "grad_norm": 0.28413498401641846, "learning_rate": 0.0003977591319193413, "loss": 1.8392, "step": 6683 }, { "epoch": 0.3263671875, "grad_norm": 0.3790974020957947, "learning_rate": 0.000397729775556129, "loss": 1.8463, "step": 6684 }, { "epoch": 0.326416015625, "grad_norm": 0.3366568386554718, "learning_rate": 0.0003977004162184619, "loss": 1.8422, "step": 6685 }, { "epoch": 0.32646484375, "grad_norm": 0.31119608879089355, "learning_rate": 0.00039767105390705167, "loss": 1.8376, "step": 6686 }, { "epoch": 0.326513671875, "grad_norm": 0.32053300738334656, "learning_rate": 0.00039764168862260985, "loss": 1.8391, "step": 6687 }, { "epoch": 0.3265625, "grad_norm": 0.23817656934261322, "learning_rate": 0.00039761232036584826, "loss": 1.8546, "step": 6688 }, { "epoch": 0.326611328125, "grad_norm": 0.2893178164958954, "learning_rate": 0.0003975829491374785, "loss": 1.84, "step": 6689 }, { "epoch": 0.32666015625, "grad_norm": 0.2866966724395752, "learning_rate": 0.0003975535749382125, "loss": 1.852, "step": 6690 }, { "epoch": 0.326708984375, "grad_norm": 0.20979757606983185, "learning_rate": 0.00039752419776876226, "loss": 1.8397, "step": 6691 }, { "epoch": 0.3267578125, "grad_norm": 0.3194981515407562, "learning_rate": 0.00039749481762983957, "loss": 1.8177, "step": 6692 }, { "epoch": 0.326806640625, "grad_norm": 0.24819420278072357, "learning_rate": 0.00039746543452215654, "loss": 1.8322, "step": 6693 }, { "epoch": 0.32685546875, "grad_norm": 0.2354804128408432, "learning_rate": 0.00039743604844642535, "loss": 1.8459, "step": 6694 }, { "epoch": 0.326904296875, "grad_norm": 0.24090376496315002, "learning_rate": 0.00039740665940335814, "loss": 1.8604, "step": 6695 }, { "epoch": 0.326953125, "grad_norm": 0.23176299035549164, "learning_rate": 0.0003973772673936672, "loss": 1.8527, "step": 6696 }, { "epoch": 0.327001953125, "grad_norm": 0.27140116691589355, "learning_rate": 0.0003973478724180649, "loss": 1.825, "step": 6697 }, { "epoch": 0.32705078125, "grad_norm": 0.2791403532028198, "learning_rate": 0.0003973184744772636, "loss": 1.8333, "step": 6698 }, { "epoch": 0.327099609375, "grad_norm": 0.29251551628112793, "learning_rate": 0.0003972890735719758, "loss": 1.8637, "step": 6699 }, { "epoch": 0.3271484375, "grad_norm": 0.26838237047195435, "learning_rate": 0.000397259669702914, "loss": 1.8434, "step": 6700 }, { "epoch": 0.327197265625, "grad_norm": 0.3194736838340759, "learning_rate": 0.000397230262870791, "loss": 1.8423, "step": 6701 }, { "epoch": 0.32724609375, "grad_norm": 0.29599541425704956, "learning_rate": 0.00039720085307631933, "loss": 1.8525, "step": 6702 }, { "epoch": 0.327294921875, "grad_norm": 0.2625702917575836, "learning_rate": 0.00039717144032021175, "loss": 1.8106, "step": 6703 }, { "epoch": 0.32734375, "grad_norm": 0.27132001519203186, "learning_rate": 0.0003971420246031812, "loss": 1.8881, "step": 6704 }, { "epoch": 0.327392578125, "grad_norm": 0.24689318239688873, "learning_rate": 0.0003971126059259406, "loss": 1.8158, "step": 6705 }, { "epoch": 0.32744140625, "grad_norm": 0.2840946316719055, "learning_rate": 0.00039708318428920283, "loss": 1.8594, "step": 6706 }, { "epoch": 0.327490234375, "grad_norm": 0.31148669123649597, "learning_rate": 0.00039705375969368114, "loss": 1.8316, "step": 6707 }, { "epoch": 0.3275390625, "grad_norm": 0.401507705450058, "learning_rate": 0.0003970243321400883, "loss": 1.8434, "step": 6708 }, { "epoch": 0.327587890625, "grad_norm": 0.3074403703212738, "learning_rate": 0.0003969949016291379, "loss": 1.8372, "step": 6709 }, { "epoch": 0.32763671875, "grad_norm": 0.24280856549739838, "learning_rate": 0.000396965468161543, "loss": 1.8266, "step": 6710 }, { "epoch": 0.327685546875, "grad_norm": 0.366835355758667, "learning_rate": 0.000396936031738017, "loss": 1.8565, "step": 6711 }, { "epoch": 0.327734375, "grad_norm": 0.3573281764984131, "learning_rate": 0.00039690659235927327, "loss": 1.8481, "step": 6712 }, { "epoch": 0.327783203125, "grad_norm": 0.25144314765930176, "learning_rate": 0.00039687715002602536, "loss": 1.872, "step": 6713 }, { "epoch": 0.32783203125, "grad_norm": 0.2868261933326721, "learning_rate": 0.00039684770473898687, "loss": 1.8421, "step": 6714 }, { "epoch": 0.327880859375, "grad_norm": 0.2912626564502716, "learning_rate": 0.0003968182564988713, "loss": 1.8704, "step": 6715 }, { "epoch": 0.3279296875, "grad_norm": 0.28992119431495667, "learning_rate": 0.0003967888053063925, "loss": 1.8413, "step": 6716 }, { "epoch": 0.327978515625, "grad_norm": 0.3893255293369293, "learning_rate": 0.0003967593511622641, "loss": 1.8167, "step": 6717 }, { "epoch": 0.32802734375, "grad_norm": 0.33440470695495605, "learning_rate": 0.00039672989406719996, "loss": 1.8447, "step": 6718 }, { "epoch": 0.328076171875, "grad_norm": 0.2301725298166275, "learning_rate": 0.00039670043402191405, "loss": 1.8495, "step": 6719 }, { "epoch": 0.328125, "grad_norm": 0.34473446011543274, "learning_rate": 0.0003966709710271204, "loss": 1.8092, "step": 6720 }, { "epoch": 0.328173828125, "grad_norm": 0.25278112292289734, "learning_rate": 0.0003966415050835329, "loss": 1.8344, "step": 6721 }, { "epoch": 0.32822265625, "grad_norm": 0.26453855633735657, "learning_rate": 0.000396612036191866, "loss": 1.8665, "step": 6722 }, { "epoch": 0.328271484375, "grad_norm": 0.28107717633247375, "learning_rate": 0.00039658256435283354, "loss": 1.8241, "step": 6723 }, { "epoch": 0.3283203125, "grad_norm": 0.26090675592422485, "learning_rate": 0.00039655308956715004, "loss": 1.8616, "step": 6724 }, { "epoch": 0.328369140625, "grad_norm": 0.2771492302417755, "learning_rate": 0.0003965236118355297, "loss": 1.8385, "step": 6725 }, { "epoch": 0.32841796875, "grad_norm": 0.2848857343196869, "learning_rate": 0.000396494131158687, "loss": 1.8392, "step": 6726 }, { "epoch": 0.328466796875, "grad_norm": 0.21424205601215363, "learning_rate": 0.00039646464753733643, "loss": 1.8341, "step": 6727 }, { "epoch": 0.328515625, "grad_norm": 0.32760801911354065, "learning_rate": 0.00039643516097219255, "loss": 1.8303, "step": 6728 }, { "epoch": 0.328564453125, "grad_norm": 0.25838562846183777, "learning_rate": 0.00039640567146397, "loss": 1.8452, "step": 6729 }, { "epoch": 0.32861328125, "grad_norm": 0.23207370936870575, "learning_rate": 0.0003963761790133835, "loss": 1.8434, "step": 6730 }, { "epoch": 0.328662109375, "grad_norm": 0.29332876205444336, "learning_rate": 0.0003963466836211477, "loss": 1.8172, "step": 6731 }, { "epoch": 0.3287109375, "grad_norm": 0.2824487090110779, "learning_rate": 0.00039631718528797754, "loss": 1.8447, "step": 6732 }, { "epoch": 0.328759765625, "grad_norm": 0.2804636061191559, "learning_rate": 0.000396287684014588, "loss": 1.8382, "step": 6733 }, { "epoch": 0.32880859375, "grad_norm": 0.3406933844089508, "learning_rate": 0.00039625817980169396, "loss": 1.8418, "step": 6734 }, { "epoch": 0.328857421875, "grad_norm": 0.33312225341796875, "learning_rate": 0.0003962286726500105, "loss": 1.8184, "step": 6735 }, { "epoch": 0.32890625, "grad_norm": 0.24386221170425415, "learning_rate": 0.00039619916256025277, "loss": 1.8134, "step": 6736 }, { "epoch": 0.328955078125, "grad_norm": 0.3451048731803894, "learning_rate": 0.000396169649533136, "loss": 1.8127, "step": 6737 }, { "epoch": 0.32900390625, "grad_norm": 0.37467193603515625, "learning_rate": 0.0003961401335693754, "loss": 1.8455, "step": 6738 }, { "epoch": 0.329052734375, "grad_norm": 0.29919567704200745, "learning_rate": 0.0003961106146696864, "loss": 1.851, "step": 6739 }, { "epoch": 0.3291015625, "grad_norm": 0.3162961006164551, "learning_rate": 0.00039608109283478435, "loss": 1.8358, "step": 6740 }, { "epoch": 0.329150390625, "grad_norm": 0.2677229642868042, "learning_rate": 0.0003960515680653847, "loss": 1.8501, "step": 6741 }, { "epoch": 0.32919921875, "grad_norm": 0.27246981859207153, "learning_rate": 0.00039602204036220294, "loss": 1.8625, "step": 6742 }, { "epoch": 0.329248046875, "grad_norm": 0.26943209767341614, "learning_rate": 0.0003959925097259549, "loss": 1.8642, "step": 6743 }, { "epoch": 0.329296875, "grad_norm": 0.251084566116333, "learning_rate": 0.0003959629761573562, "loss": 1.839, "step": 6744 }, { "epoch": 0.329345703125, "grad_norm": 0.2937658429145813, "learning_rate": 0.00039593343965712256, "loss": 1.8272, "step": 6745 }, { "epoch": 0.32939453125, "grad_norm": 0.20290575921535492, "learning_rate": 0.0003959039002259699, "loss": 1.8539, "step": 6746 }, { "epoch": 0.329443359375, "grad_norm": 0.2829497158527374, "learning_rate": 0.0003958743578646141, "loss": 1.8651, "step": 6747 }, { "epoch": 0.3294921875, "grad_norm": 0.30809950828552246, "learning_rate": 0.00039584481257377103, "loss": 1.8657, "step": 6748 }, { "epoch": 0.329541015625, "grad_norm": 0.2919938862323761, "learning_rate": 0.0003958152643541569, "loss": 1.8416, "step": 6749 }, { "epoch": 0.32958984375, "grad_norm": 0.2791389524936676, "learning_rate": 0.00039578571320648774, "loss": 1.853, "step": 6750 }, { "epoch": 0.329638671875, "grad_norm": 0.3078036606311798, "learning_rate": 0.00039575615913147984, "loss": 1.864, "step": 6751 }, { "epoch": 0.3296875, "grad_norm": 0.2632100284099579, "learning_rate": 0.00039572660212984934, "loss": 1.8469, "step": 6752 }, { "epoch": 0.329736328125, "grad_norm": 0.2718328833580017, "learning_rate": 0.0003956970422023127, "loss": 1.8621, "step": 6753 }, { "epoch": 0.32978515625, "grad_norm": 0.2760842740535736, "learning_rate": 0.0003956674793495862, "loss": 1.8793, "step": 6754 }, { "epoch": 0.329833984375, "grad_norm": 0.2674649953842163, "learning_rate": 0.0003956379135723865, "loss": 1.822, "step": 6755 }, { "epoch": 0.3298828125, "grad_norm": 0.29070815443992615, "learning_rate": 0.00039560834487143006, "loss": 1.8383, "step": 6756 }, { "epoch": 0.329931640625, "grad_norm": 0.27577683329582214, "learning_rate": 0.00039557877324743337, "loss": 1.8405, "step": 6757 }, { "epoch": 0.32998046875, "grad_norm": 0.2741076350212097, "learning_rate": 0.00039554919870111327, "loss": 1.8461, "step": 6758 }, { "epoch": 0.330029296875, "grad_norm": 0.2821826934814453, "learning_rate": 0.0003955196212331866, "loss": 1.8437, "step": 6759 }, { "epoch": 0.330078125, "grad_norm": 0.2946246266365051, "learning_rate": 0.0003954900408443699, "loss": 1.841, "step": 6760 }, { "epoch": 0.330126953125, "grad_norm": 0.24029900133609772, "learning_rate": 0.0003954604575353804, "loss": 1.8595, "step": 6761 }, { "epoch": 0.33017578125, "grad_norm": 0.28227078914642334, "learning_rate": 0.0003954308713069349, "loss": 1.8274, "step": 6762 }, { "epoch": 0.330224609375, "grad_norm": 0.27248722314834595, "learning_rate": 0.00039540128215975046, "loss": 1.8426, "step": 6763 }, { "epoch": 0.3302734375, "grad_norm": 0.23833701014518738, "learning_rate": 0.0003953716900945442, "loss": 1.8608, "step": 6764 }, { "epoch": 0.330322265625, "grad_norm": 0.2663269340991974, "learning_rate": 0.0003953420951120334, "loss": 1.8255, "step": 6765 }, { "epoch": 0.33037109375, "grad_norm": 0.31538844108581543, "learning_rate": 0.0003953124972129352, "loss": 1.8628, "step": 6766 }, { "epoch": 0.330419921875, "grad_norm": 0.3172428011894226, "learning_rate": 0.000395282896397967, "loss": 1.7923, "step": 6767 }, { "epoch": 0.33046875, "grad_norm": 0.31808847188949585, "learning_rate": 0.0003952532926678461, "loss": 1.8128, "step": 6768 }, { "epoch": 0.330517578125, "grad_norm": 0.34081539511680603, "learning_rate": 0.00039522368602329014, "loss": 1.8445, "step": 6769 }, { "epoch": 0.33056640625, "grad_norm": 0.29003316164016724, "learning_rate": 0.00039519407646501647, "loss": 1.8444, "step": 6770 }, { "epoch": 0.330615234375, "grad_norm": 0.3045824468135834, "learning_rate": 0.0003951644639937428, "loss": 1.8251, "step": 6771 }, { "epoch": 0.3306640625, "grad_norm": 0.34676051139831543, "learning_rate": 0.0003951348486101869, "loss": 1.8263, "step": 6772 }, { "epoch": 0.330712890625, "grad_norm": 0.2928166687488556, "learning_rate": 0.0003951052303150664, "loss": 1.8463, "step": 6773 }, { "epoch": 0.33076171875, "grad_norm": 0.3273351192474365, "learning_rate": 0.0003950756091090991, "loss": 1.8709, "step": 6774 }, { "epoch": 0.330810546875, "grad_norm": 0.3303337097167969, "learning_rate": 0.0003950459849930029, "loss": 1.8695, "step": 6775 }, { "epoch": 0.330859375, "grad_norm": 0.31907567381858826, "learning_rate": 0.0003950163579674959, "loss": 1.8599, "step": 6776 }, { "epoch": 0.330908203125, "grad_norm": 0.34198620915412903, "learning_rate": 0.00039498672803329603, "loss": 1.8417, "step": 6777 }, { "epoch": 0.33095703125, "grad_norm": 0.2431255280971527, "learning_rate": 0.0003949570951911214, "loss": 1.8548, "step": 6778 }, { "epoch": 0.331005859375, "grad_norm": 0.3087032437324524, "learning_rate": 0.0003949274594416902, "loss": 1.8711, "step": 6779 }, { "epoch": 0.3310546875, "grad_norm": 0.33190569281578064, "learning_rate": 0.0003948978207857206, "loss": 1.8301, "step": 6780 }, { "epoch": 0.331103515625, "grad_norm": 0.21313032507896423, "learning_rate": 0.00039486817922393104, "loss": 1.8241, "step": 6781 }, { "epoch": 0.33115234375, "grad_norm": 0.29930755496025085, "learning_rate": 0.00039483853475704, "loss": 1.8645, "step": 6782 }, { "epoch": 0.331201171875, "grad_norm": 0.34228816628456116, "learning_rate": 0.0003948088873857656, "loss": 1.8577, "step": 6783 }, { "epoch": 0.33125, "grad_norm": 0.3025856614112854, "learning_rate": 0.0003947792371108266, "loss": 1.8536, "step": 6784 }, { "epoch": 0.331298828125, "grad_norm": 0.28572192788124084, "learning_rate": 0.00039474958393294156, "loss": 1.8146, "step": 6785 }, { "epoch": 0.33134765625, "grad_norm": 0.24695441126823425, "learning_rate": 0.0003947199278528292, "loss": 1.8568, "step": 6786 }, { "epoch": 0.331396484375, "grad_norm": 0.25777003169059753, "learning_rate": 0.0003946902688712081, "loss": 1.8464, "step": 6787 }, { "epoch": 0.3314453125, "grad_norm": 0.26896417140960693, "learning_rate": 0.0003946606069887973, "loss": 1.8464, "step": 6788 }, { "epoch": 0.331494140625, "grad_norm": 0.24364306032657623, "learning_rate": 0.0003946309422063155, "loss": 1.8361, "step": 6789 }, { "epoch": 0.33154296875, "grad_norm": 0.26218387484550476, "learning_rate": 0.00039460127452448166, "loss": 1.8245, "step": 6790 }, { "epoch": 0.331591796875, "grad_norm": 0.30718183517456055, "learning_rate": 0.000394571603944015, "loss": 1.8352, "step": 6791 }, { "epoch": 0.331640625, "grad_norm": 0.3080516457557678, "learning_rate": 0.0003945419304656343, "loss": 1.8429, "step": 6792 }, { "epoch": 0.331689453125, "grad_norm": 0.2439969778060913, "learning_rate": 0.00039451225409005896, "loss": 1.8445, "step": 6793 }, { "epoch": 0.33173828125, "grad_norm": 0.27208003401756287, "learning_rate": 0.0003944825748180081, "loss": 1.87, "step": 6794 }, { "epoch": 0.331787109375, "grad_norm": 0.29654639959335327, "learning_rate": 0.00039445289265020104, "loss": 1.8549, "step": 6795 }, { "epoch": 0.3318359375, "grad_norm": 0.30869221687316895, "learning_rate": 0.00039442320758735716, "loss": 1.8651, "step": 6796 }, { "epoch": 0.331884765625, "grad_norm": 0.2611965537071228, "learning_rate": 0.00039439351963019585, "loss": 1.801, "step": 6797 }, { "epoch": 0.33193359375, "grad_norm": 0.2637183368206024, "learning_rate": 0.0003943638287794368, "loss": 1.8222, "step": 6798 }, { "epoch": 0.331982421875, "grad_norm": 0.2795068621635437, "learning_rate": 0.00039433413503579936, "loss": 1.8489, "step": 6799 }, { "epoch": 0.33203125, "grad_norm": 0.30826982855796814, "learning_rate": 0.0003943044384000033, "loss": 1.8345, "step": 6800 }, { "epoch": 0.332080078125, "grad_norm": 0.3597230613231659, "learning_rate": 0.00039427473887276827, "loss": 1.8655, "step": 6801 }, { "epoch": 0.33212890625, "grad_norm": 0.34042274951934814, "learning_rate": 0.0003942450364548143, "loss": 1.8542, "step": 6802 }, { "epoch": 0.332177734375, "grad_norm": 0.32673409581184387, "learning_rate": 0.00039421533114686097, "loss": 1.8555, "step": 6803 }, { "epoch": 0.3322265625, "grad_norm": 0.3489316999912262, "learning_rate": 0.00039418562294962826, "loss": 1.8583, "step": 6804 }, { "epoch": 0.332275390625, "grad_norm": 0.2560132145881653, "learning_rate": 0.00039415591186383627, "loss": 1.8469, "step": 6805 }, { "epoch": 0.33232421875, "grad_norm": 0.28528735041618347, "learning_rate": 0.00039412619789020505, "loss": 1.8625, "step": 6806 }, { "epoch": 0.332373046875, "grad_norm": 0.2895843982696533, "learning_rate": 0.0003940964810294547, "loss": 1.8075, "step": 6807 }, { "epoch": 0.332421875, "grad_norm": 0.2544899880886078, "learning_rate": 0.0003940667612823054, "loss": 1.8597, "step": 6808 }, { "epoch": 0.332470703125, "grad_norm": 0.3042367696762085, "learning_rate": 0.0003940370386494775, "loss": 1.8324, "step": 6809 }, { "epoch": 0.33251953125, "grad_norm": 0.2997209131717682, "learning_rate": 0.0003940073131316914, "loss": 1.8078, "step": 6810 }, { "epoch": 0.332568359375, "grad_norm": 0.32399430871009827, "learning_rate": 0.0003939775847296674, "loss": 1.8308, "step": 6811 }, { "epoch": 0.3326171875, "grad_norm": 0.3120138645172119, "learning_rate": 0.00039394785344412615, "loss": 1.8723, "step": 6812 }, { "epoch": 0.332666015625, "grad_norm": 0.2769121527671814, "learning_rate": 0.00039391811927578797, "loss": 1.8316, "step": 6813 }, { "epoch": 0.33271484375, "grad_norm": 0.2950575053691864, "learning_rate": 0.00039388838222537375, "loss": 1.829, "step": 6814 }, { "epoch": 0.332763671875, "grad_norm": 0.2931157052516937, "learning_rate": 0.000393858642293604, "loss": 1.8298, "step": 6815 }, { "epoch": 0.3328125, "grad_norm": 0.31202974915504456, "learning_rate": 0.0003938288994811995, "loss": 1.8399, "step": 6816 }, { "epoch": 0.332861328125, "grad_norm": 0.30677247047424316, "learning_rate": 0.0003937991537888813, "loss": 1.8086, "step": 6817 }, { "epoch": 0.33291015625, "grad_norm": 0.25304263830184937, "learning_rate": 0.0003937694052173701, "loss": 1.8398, "step": 6818 }, { "epoch": 0.332958984375, "grad_norm": 0.2529876232147217, "learning_rate": 0.00039373965376738695, "loss": 1.8492, "step": 6819 }, { "epoch": 0.3330078125, "grad_norm": 0.24303889274597168, "learning_rate": 0.00039370989943965286, "loss": 1.8253, "step": 6820 }, { "epoch": 0.333056640625, "grad_norm": 0.2968252897262573, "learning_rate": 0.000393680142234889, "loss": 1.8288, "step": 6821 }, { "epoch": 0.33310546875, "grad_norm": 0.30012035369873047, "learning_rate": 0.0003936503821538166, "loss": 1.8627, "step": 6822 }, { "epoch": 0.333154296875, "grad_norm": 0.33044105768203735, "learning_rate": 0.0003936206191971568, "loss": 1.8622, "step": 6823 }, { "epoch": 0.333203125, "grad_norm": 0.25040027499198914, "learning_rate": 0.00039359085336563107, "loss": 1.8144, "step": 6824 }, { "epoch": 0.333251953125, "grad_norm": 0.3056745231151581, "learning_rate": 0.0003935610846599607, "loss": 1.8372, "step": 6825 }, { "epoch": 0.33330078125, "grad_norm": 0.26910683512687683, "learning_rate": 0.00039353131308086714, "loss": 1.8617, "step": 6826 }, { "epoch": 0.333349609375, "grad_norm": 0.2519243061542511, "learning_rate": 0.000393501538629072, "loss": 1.8396, "step": 6827 }, { "epoch": 0.3333984375, "grad_norm": 0.2815163731575012, "learning_rate": 0.0003934717613052969, "loss": 1.8417, "step": 6828 }, { "epoch": 0.333447265625, "grad_norm": 0.24773544073104858, "learning_rate": 0.00039344198111026347, "loss": 1.8073, "step": 6829 }, { "epoch": 0.33349609375, "grad_norm": 0.30847886204719543, "learning_rate": 0.0003934121980446935, "loss": 1.8377, "step": 6830 }, { "epoch": 0.333544921875, "grad_norm": 0.28940001130104065, "learning_rate": 0.00039338241210930876, "loss": 1.853, "step": 6831 }, { "epoch": 0.33359375, "grad_norm": 0.3470637798309326, "learning_rate": 0.0003933526233048311, "loss": 1.824, "step": 6832 }, { "epoch": 0.333642578125, "grad_norm": 0.23879873752593994, "learning_rate": 0.00039332283163198267, "loss": 1.8724, "step": 6833 }, { "epoch": 0.33369140625, "grad_norm": 0.26795658469200134, "learning_rate": 0.00039329303709148523, "loss": 1.8473, "step": 6834 }, { "epoch": 0.333740234375, "grad_norm": 0.31233274936676025, "learning_rate": 0.000393263239684061, "loss": 1.8141, "step": 6835 }, { "epoch": 0.3337890625, "grad_norm": 0.24080681800842285, "learning_rate": 0.0003932334394104322, "loss": 1.8354, "step": 6836 }, { "epoch": 0.333837890625, "grad_norm": 0.26181694865226746, "learning_rate": 0.000393203636271321, "loss": 1.8377, "step": 6837 }, { "epoch": 0.33388671875, "grad_norm": 0.24049320816993713, "learning_rate": 0.0003931738302674497, "loss": 1.8112, "step": 6838 }, { "epoch": 0.333935546875, "grad_norm": 0.2691023349761963, "learning_rate": 0.0003931440213995406, "loss": 1.8337, "step": 6839 }, { "epoch": 0.333984375, "grad_norm": 0.2619325816631317, "learning_rate": 0.0003931142096683164, "loss": 1.8107, "step": 6840 }, { "epoch": 0.334033203125, "grad_norm": 0.2605139911174774, "learning_rate": 0.00039308439507449935, "loss": 1.8283, "step": 6841 }, { "epoch": 0.33408203125, "grad_norm": 0.2812184989452362, "learning_rate": 0.0003930545776188121, "loss": 1.8392, "step": 6842 }, { "epoch": 0.334130859375, "grad_norm": 0.26344963908195496, "learning_rate": 0.0003930247573019773, "loss": 1.8493, "step": 6843 }, { "epoch": 0.3341796875, "grad_norm": 0.2907324433326721, "learning_rate": 0.00039299493412471776, "loss": 1.8493, "step": 6844 }, { "epoch": 0.334228515625, "grad_norm": 0.2375200390815735, "learning_rate": 0.0003929651080877562, "loss": 1.8548, "step": 6845 }, { "epoch": 0.33427734375, "grad_norm": 0.2121773362159729, "learning_rate": 0.0003929352791918154, "loss": 1.8422, "step": 6846 }, { "epoch": 0.334326171875, "grad_norm": 0.2858632504940033, "learning_rate": 0.00039290544743761835, "loss": 1.8371, "step": 6847 }, { "epoch": 0.334375, "grad_norm": 0.3431737422943115, "learning_rate": 0.00039287561282588807, "loss": 1.8409, "step": 6848 }, { "epoch": 0.334423828125, "grad_norm": 0.3410768508911133, "learning_rate": 0.0003928457753573477, "loss": 1.8477, "step": 6849 }, { "epoch": 0.33447265625, "grad_norm": 0.27856820821762085, "learning_rate": 0.0003928159350327202, "loss": 1.8388, "step": 6850 }, { "epoch": 0.334521484375, "grad_norm": 0.27390405535697937, "learning_rate": 0.0003927860918527289, "loss": 1.8229, "step": 6851 }, { "epoch": 0.3345703125, "grad_norm": 0.28210654854774475, "learning_rate": 0.00039275624581809707, "loss": 1.8389, "step": 6852 }, { "epoch": 0.334619140625, "grad_norm": 0.2494465857744217, "learning_rate": 0.0003927263969295479, "loss": 1.8391, "step": 6853 }, { "epoch": 0.33466796875, "grad_norm": 0.3057619631290436, "learning_rate": 0.000392696545187805, "loss": 1.8331, "step": 6854 }, { "epoch": 0.334716796875, "grad_norm": 0.2850116491317749, "learning_rate": 0.00039266669059359176, "loss": 1.826, "step": 6855 }, { "epoch": 0.334765625, "grad_norm": 0.22886890172958374, "learning_rate": 0.00039263683314763175, "loss": 1.8453, "step": 6856 }, { "epoch": 0.334814453125, "grad_norm": 0.2139883190393448, "learning_rate": 0.0003926069728506486, "loss": 1.827, "step": 6857 }, { "epoch": 0.33486328125, "grad_norm": 0.2299552857875824, "learning_rate": 0.000392577109703366, "loss": 1.8357, "step": 6858 }, { "epoch": 0.334912109375, "grad_norm": 0.2263425588607788, "learning_rate": 0.0003925472437065076, "loss": 1.8349, "step": 6859 }, { "epoch": 0.3349609375, "grad_norm": 0.2456604689359665, "learning_rate": 0.00039251737486079735, "loss": 1.8188, "step": 6860 }, { "epoch": 0.335009765625, "grad_norm": 0.2561821937561035, "learning_rate": 0.00039248750316695906, "loss": 1.8251, "step": 6861 }, { "epoch": 0.33505859375, "grad_norm": 0.2745426297187805, "learning_rate": 0.00039245762862571683, "loss": 1.8481, "step": 6862 }, { "epoch": 0.335107421875, "grad_norm": 0.2258012294769287, "learning_rate": 0.00039242775123779456, "loss": 1.8446, "step": 6863 }, { "epoch": 0.33515625, "grad_norm": 0.21887171268463135, "learning_rate": 0.0003923978710039164, "loss": 1.8636, "step": 6864 }, { "epoch": 0.335205078125, "grad_norm": 0.25173696875572205, "learning_rate": 0.00039236798792480654, "loss": 1.8372, "step": 6865 }, { "epoch": 0.33525390625, "grad_norm": 0.3545736074447632, "learning_rate": 0.00039233810200118923, "loss": 1.8332, "step": 6866 }, { "epoch": 0.335302734375, "grad_norm": 0.3697248101234436, "learning_rate": 0.00039230821323378866, "loss": 1.8456, "step": 6867 }, { "epoch": 0.3353515625, "grad_norm": 0.25351080298423767, "learning_rate": 0.00039227832162332934, "loss": 1.8318, "step": 6868 }, { "epoch": 0.335400390625, "grad_norm": 0.21796829998493195, "learning_rate": 0.00039224842717053564, "loss": 1.8559, "step": 6869 }, { "epoch": 0.33544921875, "grad_norm": 0.24515610933303833, "learning_rate": 0.00039221852987613223, "loss": 1.8412, "step": 6870 }, { "epoch": 0.335498046875, "grad_norm": 0.2639360725879669, "learning_rate": 0.0003921886297408434, "loss": 1.8201, "step": 6871 }, { "epoch": 0.335546875, "grad_norm": 0.27097824215888977, "learning_rate": 0.0003921587267653941, "loss": 1.8396, "step": 6872 }, { "epoch": 0.335595703125, "grad_norm": 0.35919228196144104, "learning_rate": 0.00039212882095050896, "loss": 1.8174, "step": 6873 }, { "epoch": 0.33564453125, "grad_norm": 0.33568063378334045, "learning_rate": 0.00039209891229691267, "loss": 1.8194, "step": 6874 }, { "epoch": 0.335693359375, "grad_norm": 0.30768248438835144, "learning_rate": 0.00039206900080533013, "loss": 1.8405, "step": 6875 }, { "epoch": 0.3357421875, "grad_norm": 0.268564373254776, "learning_rate": 0.00039203908647648646, "loss": 1.8171, "step": 6876 }, { "epoch": 0.335791015625, "grad_norm": 0.2945076525211334, "learning_rate": 0.0003920091693111064, "loss": 1.8424, "step": 6877 }, { "epoch": 0.33583984375, "grad_norm": 0.3591303527355194, "learning_rate": 0.0003919792493099151, "loss": 1.846, "step": 6878 }, { "epoch": 0.335888671875, "grad_norm": 0.23307518661022186, "learning_rate": 0.00039194932647363767, "loss": 1.8553, "step": 6879 }, { "epoch": 0.3359375, "grad_norm": 0.32709258794784546, "learning_rate": 0.00039191940080299947, "loss": 1.8484, "step": 6880 }, { "epoch": 0.335986328125, "grad_norm": 0.38139480352401733, "learning_rate": 0.00039188947229872555, "loss": 1.8368, "step": 6881 }, { "epoch": 0.33603515625, "grad_norm": 0.2899235188961029, "learning_rate": 0.00039185954096154145, "loss": 1.843, "step": 6882 }, { "epoch": 0.336083984375, "grad_norm": 0.3087846338748932, "learning_rate": 0.0003918296067921725, "loss": 1.8247, "step": 6883 }, { "epoch": 0.3361328125, "grad_norm": 0.25627756118774414, "learning_rate": 0.00039179966979134405, "loss": 1.8605, "step": 6884 }, { "epoch": 0.336181640625, "grad_norm": 0.26906099915504456, "learning_rate": 0.00039176972995978195, "loss": 1.8379, "step": 6885 }, { "epoch": 0.33623046875, "grad_norm": 0.3623809814453125, "learning_rate": 0.00039173978729821143, "loss": 1.8177, "step": 6886 }, { "epoch": 0.336279296875, "grad_norm": 0.2798311412334442, "learning_rate": 0.00039170984180735853, "loss": 1.8255, "step": 6887 }, { "epoch": 0.336328125, "grad_norm": 0.2667446434497833, "learning_rate": 0.00039167989348794875, "loss": 1.8319, "step": 6888 }, { "epoch": 0.336376953125, "grad_norm": 0.3148050308227539, "learning_rate": 0.00039164994234070806, "loss": 1.8431, "step": 6889 }, { "epoch": 0.33642578125, "grad_norm": 0.3413771986961365, "learning_rate": 0.0003916199883663623, "loss": 1.8512, "step": 6890 }, { "epoch": 0.336474609375, "grad_norm": 0.3631232976913452, "learning_rate": 0.00039159003156563746, "loss": 1.8183, "step": 6891 }, { "epoch": 0.3365234375, "grad_norm": 0.2536538243293762, "learning_rate": 0.00039156007193925945, "loss": 1.8499, "step": 6892 }, { "epoch": 0.336572265625, "grad_norm": 0.32169613242149353, "learning_rate": 0.0003915301094879545, "loss": 1.848, "step": 6893 }, { "epoch": 0.33662109375, "grad_norm": 0.36511632800102234, "learning_rate": 0.00039150014421244876, "loss": 1.8501, "step": 6894 }, { "epoch": 0.336669921875, "grad_norm": 0.24180887639522552, "learning_rate": 0.00039147017611346847, "loss": 1.8078, "step": 6895 }, { "epoch": 0.33671875, "grad_norm": 0.22327353060245514, "learning_rate": 0.00039144020519173975, "loss": 1.8214, "step": 6896 }, { "epoch": 0.336767578125, "grad_norm": 0.32665717601776123, "learning_rate": 0.0003914102314479892, "loss": 1.8238, "step": 6897 }, { "epoch": 0.33681640625, "grad_norm": 0.26335909962654114, "learning_rate": 0.00039138025488294313, "loss": 1.8107, "step": 6898 }, { "epoch": 0.336865234375, "grad_norm": 0.2894197702407837, "learning_rate": 0.0003913502754973282, "loss": 1.8219, "step": 6899 }, { "epoch": 0.3369140625, "grad_norm": 0.26503732800483704, "learning_rate": 0.00039132029329187073, "loss": 1.8434, "step": 6900 }, { "epoch": 0.336962890625, "grad_norm": 0.2526193857192993, "learning_rate": 0.00039129030826729757, "loss": 1.8333, "step": 6901 }, { "epoch": 0.33701171875, "grad_norm": 0.28106117248535156, "learning_rate": 0.00039126032042433533, "loss": 1.8426, "step": 6902 }, { "epoch": 0.337060546875, "grad_norm": 0.23392683267593384, "learning_rate": 0.0003912303297637108, "loss": 1.8639, "step": 6903 }, { "epoch": 0.337109375, "grad_norm": 0.2954167127609253, "learning_rate": 0.00039120033628615086, "loss": 1.8594, "step": 6904 }, { "epoch": 0.337158203125, "grad_norm": 0.2521724998950958, "learning_rate": 0.00039117033999238243, "loss": 1.8236, "step": 6905 }, { "epoch": 0.33720703125, "grad_norm": 0.24616695940494537, "learning_rate": 0.0003911403408831325, "loss": 1.8374, "step": 6906 }, { "epoch": 0.337255859375, "grad_norm": 0.2928994297981262, "learning_rate": 0.0003911103389591281, "loss": 1.8325, "step": 6907 }, { "epoch": 0.3373046875, "grad_norm": 0.23793265223503113, "learning_rate": 0.0003910803342210964, "loss": 1.8381, "step": 6908 }, { "epoch": 0.337353515625, "grad_norm": 0.2602624297142029, "learning_rate": 0.0003910503266697644, "loss": 1.8506, "step": 6909 }, { "epoch": 0.33740234375, "grad_norm": 0.2705543041229248, "learning_rate": 0.00039102031630585966, "loss": 1.8235, "step": 6910 }, { "epoch": 0.337451171875, "grad_norm": 0.28355780243873596, "learning_rate": 0.0003909903031301092, "loss": 1.8443, "step": 6911 }, { "epoch": 0.3375, "grad_norm": 0.23875921964645386, "learning_rate": 0.00039096028714324065, "loss": 1.8291, "step": 6912 }, { "epoch": 0.337548828125, "grad_norm": 0.24221262335777283, "learning_rate": 0.00039093026834598134, "loss": 1.8421, "step": 6913 }, { "epoch": 0.33759765625, "grad_norm": 0.23905061185359955, "learning_rate": 0.0003909002467390589, "loss": 1.8345, "step": 6914 }, { "epoch": 0.337646484375, "grad_norm": 0.21535412967205048, "learning_rate": 0.00039087022232320073, "loss": 1.8458, "step": 6915 }, { "epoch": 0.3376953125, "grad_norm": 0.23187045753002167, "learning_rate": 0.00039084019509913486, "loss": 1.8497, "step": 6916 }, { "epoch": 0.337744140625, "grad_norm": 0.24245072901248932, "learning_rate": 0.0003908101650675886, "loss": 1.8189, "step": 6917 }, { "epoch": 0.33779296875, "grad_norm": 0.2259555160999298, "learning_rate": 0.00039078013222929004, "loss": 1.8027, "step": 6918 }, { "epoch": 0.337841796875, "grad_norm": 0.28672370314598083, "learning_rate": 0.00039075009658496695, "loss": 1.8601, "step": 6919 }, { "epoch": 0.337890625, "grad_norm": 0.3338945209980011, "learning_rate": 0.00039072005813534723, "loss": 1.8601, "step": 6920 }, { "epoch": 0.337939453125, "grad_norm": 0.23850354552268982, "learning_rate": 0.000390690016881159, "loss": 1.8273, "step": 6921 }, { "epoch": 0.33798828125, "grad_norm": 0.23052941262722015, "learning_rate": 0.00039065997282313017, "loss": 1.8171, "step": 6922 }, { "epoch": 0.338037109375, "grad_norm": 0.2615436017513275, "learning_rate": 0.00039062992596198897, "loss": 1.8087, "step": 6923 }, { "epoch": 0.3380859375, "grad_norm": 0.209340438246727, "learning_rate": 0.00039059987629846374, "loss": 1.8433, "step": 6924 }, { "epoch": 0.338134765625, "grad_norm": 0.22190925478935242, "learning_rate": 0.0003905698238332826, "loss": 1.8324, "step": 6925 }, { "epoch": 0.33818359375, "grad_norm": 0.23623162508010864, "learning_rate": 0.0003905397685671739, "loss": 1.8558, "step": 6926 }, { "epoch": 0.338232421875, "grad_norm": 0.27424922585487366, "learning_rate": 0.0003905097105008661, "loss": 1.8289, "step": 6927 }, { "epoch": 0.33828125, "grad_norm": 0.2241477519273758, "learning_rate": 0.00039047964963508764, "loss": 1.8112, "step": 6928 }, { "epoch": 0.338330078125, "grad_norm": 0.23868998885154724, "learning_rate": 0.0003904495859705671, "loss": 1.8489, "step": 6929 }, { "epoch": 0.33837890625, "grad_norm": 0.30123719573020935, "learning_rate": 0.00039041951950803314, "loss": 1.8715, "step": 6930 }, { "epoch": 0.338427734375, "grad_norm": 0.21812935173511505, "learning_rate": 0.00039038945024821436, "loss": 1.8678, "step": 6931 }, { "epoch": 0.3384765625, "grad_norm": 0.25213584303855896, "learning_rate": 0.00039035937819183945, "loss": 1.8455, "step": 6932 }, { "epoch": 0.338525390625, "grad_norm": 0.36085382103919983, "learning_rate": 0.00039032930333963756, "loss": 1.8373, "step": 6933 }, { "epoch": 0.33857421875, "grad_norm": 0.34334632754325867, "learning_rate": 0.0003902992256923372, "loss": 1.8395, "step": 6934 }, { "epoch": 0.338623046875, "grad_norm": 0.2867465913295746, "learning_rate": 0.0003902691452506675, "loss": 1.8343, "step": 6935 }, { "epoch": 0.338671875, "grad_norm": 0.317059189081192, "learning_rate": 0.0003902390620153575, "loss": 1.8257, "step": 6936 }, { "epoch": 0.338720703125, "grad_norm": 0.26617076992988586, "learning_rate": 0.0003902089759871362, "loss": 1.8194, "step": 6937 }, { "epoch": 0.33876953125, "grad_norm": 0.29203951358795166, "learning_rate": 0.0003901788871667328, "loss": 1.8606, "step": 6938 }, { "epoch": 0.338818359375, "grad_norm": 0.2761162221431732, "learning_rate": 0.0003901487955548765, "loss": 1.8336, "step": 6939 }, { "epoch": 0.3388671875, "grad_norm": 0.26068854331970215, "learning_rate": 0.0003901187011522967, "loss": 1.8029, "step": 6940 }, { "epoch": 0.338916015625, "grad_norm": 0.29716232419013977, "learning_rate": 0.00039008860395972275, "loss": 1.8644, "step": 6941 }, { "epoch": 0.33896484375, "grad_norm": 0.3082146644592285, "learning_rate": 0.0003900585039778839, "loss": 1.839, "step": 6942 }, { "epoch": 0.339013671875, "grad_norm": 0.31637632846832275, "learning_rate": 0.0003900284012075098, "loss": 1.8364, "step": 6943 }, { "epoch": 0.3390625, "grad_norm": 0.2685092091560364, "learning_rate": 0.00038999829564932996, "loss": 1.835, "step": 6944 }, { "epoch": 0.339111328125, "grad_norm": 0.29190489649772644, "learning_rate": 0.00038996818730407414, "loss": 1.8496, "step": 6945 }, { "epoch": 0.33916015625, "grad_norm": 0.2790977954864502, "learning_rate": 0.00038993807617247186, "loss": 1.8386, "step": 6946 }, { "epoch": 0.339208984375, "grad_norm": 0.2718336880207062, "learning_rate": 0.000389907962255253, "loss": 1.8185, "step": 6947 }, { "epoch": 0.3392578125, "grad_norm": 0.2533761262893677, "learning_rate": 0.0003898778455531473, "loss": 1.826, "step": 6948 }, { "epoch": 0.339306640625, "grad_norm": 0.26513969898223877, "learning_rate": 0.0003898477260668848, "loss": 1.8553, "step": 6949 }, { "epoch": 0.33935546875, "grad_norm": 0.30476289987564087, "learning_rate": 0.00038981760379719533, "loss": 1.8271, "step": 6950 }, { "epoch": 0.339404296875, "grad_norm": 0.26778557896614075, "learning_rate": 0.00038978747874480904, "loss": 1.8546, "step": 6951 }, { "epoch": 0.339453125, "grad_norm": 0.3411663770675659, "learning_rate": 0.00038975735091045593, "loss": 1.8592, "step": 6952 }, { "epoch": 0.339501953125, "grad_norm": 0.2956528961658478, "learning_rate": 0.00038972722029486626, "loss": 1.819, "step": 6953 }, { "epoch": 0.33955078125, "grad_norm": 0.2566750645637512, "learning_rate": 0.0003896970868987702, "loss": 1.8299, "step": 6954 }, { "epoch": 0.339599609375, "grad_norm": 0.28191664814949036, "learning_rate": 0.0003896669507228981, "loss": 1.8373, "step": 6955 }, { "epoch": 0.3396484375, "grad_norm": 0.2625175416469574, "learning_rate": 0.0003896368117679803, "loss": 1.8303, "step": 6956 }, { "epoch": 0.339697265625, "grad_norm": 0.28025227785110474, "learning_rate": 0.0003896066700347474, "loss": 1.8496, "step": 6957 }, { "epoch": 0.33974609375, "grad_norm": 0.30353039503097534, "learning_rate": 0.0003895765255239297, "loss": 1.8406, "step": 6958 }, { "epoch": 0.339794921875, "grad_norm": 0.29066571593284607, "learning_rate": 0.00038954637823625785, "loss": 1.8739, "step": 6959 }, { "epoch": 0.33984375, "grad_norm": 0.2349512279033661, "learning_rate": 0.00038951622817246244, "loss": 1.8259, "step": 6960 }, { "epoch": 0.339892578125, "grad_norm": 0.27692240476608276, "learning_rate": 0.0003894860753332743, "loss": 1.8399, "step": 6961 }, { "epoch": 0.33994140625, "grad_norm": 0.2431856095790863, "learning_rate": 0.00038945591971942413, "loss": 1.8577, "step": 6962 }, { "epoch": 0.339990234375, "grad_norm": 0.23681071400642395, "learning_rate": 0.00038942576133164274, "loss": 1.838, "step": 6963 }, { "epoch": 0.3400390625, "grad_norm": 0.24496552348136902, "learning_rate": 0.0003893956001706612, "loss": 1.8047, "step": 6964 }, { "epoch": 0.340087890625, "grad_norm": 0.2286761999130249, "learning_rate": 0.00038936543623721033, "loss": 1.8473, "step": 6965 }, { "epoch": 0.34013671875, "grad_norm": 0.2682534456253052, "learning_rate": 0.00038933526953202125, "loss": 1.8342, "step": 6966 }, { "epoch": 0.340185546875, "grad_norm": 0.2944652736186981, "learning_rate": 0.0003893051000558251, "loss": 1.8331, "step": 6967 }, { "epoch": 0.340234375, "grad_norm": 0.24575935304164886, "learning_rate": 0.0003892749278093529, "loss": 1.8475, "step": 6968 }, { "epoch": 0.340283203125, "grad_norm": 0.32531964778900146, "learning_rate": 0.0003892447527933361, "loss": 1.8321, "step": 6969 }, { "epoch": 0.34033203125, "grad_norm": 0.3068033754825592, "learning_rate": 0.00038921457500850596, "loss": 1.8402, "step": 6970 }, { "epoch": 0.340380859375, "grad_norm": 0.3024789094924927, "learning_rate": 0.0003891843944555937, "loss": 1.8399, "step": 6971 }, { "epoch": 0.3404296875, "grad_norm": 0.36607715487480164, "learning_rate": 0.00038915421113533107, "loss": 1.8504, "step": 6972 }, { "epoch": 0.340478515625, "grad_norm": 0.2832017242908478, "learning_rate": 0.0003891240250484493, "loss": 1.8285, "step": 6973 }, { "epoch": 0.34052734375, "grad_norm": 0.35426101088523865, "learning_rate": 0.00038909383619568016, "loss": 1.8338, "step": 6974 }, { "epoch": 0.340576171875, "grad_norm": 0.3475309908390045, "learning_rate": 0.00038906364457775525, "loss": 1.8562, "step": 6975 }, { "epoch": 0.340625, "grad_norm": 0.2993617355823517, "learning_rate": 0.0003890334501954062, "loss": 1.8345, "step": 6976 }, { "epoch": 0.340673828125, "grad_norm": 0.34630128741264343, "learning_rate": 0.00038900325304936495, "loss": 1.8676, "step": 6977 }, { "epoch": 0.34072265625, "grad_norm": 0.27099835872650146, "learning_rate": 0.0003889730531403633, "loss": 1.8051, "step": 6978 }, { "epoch": 0.340771484375, "grad_norm": 0.2867193818092346, "learning_rate": 0.00038894285046913303, "loss": 1.8842, "step": 6979 }, { "epoch": 0.3408203125, "grad_norm": 0.26960888504981995, "learning_rate": 0.0003889126450364063, "loss": 1.8514, "step": 6980 }, { "epoch": 0.340869140625, "grad_norm": 0.2510501742362976, "learning_rate": 0.00038888243684291515, "loss": 1.8375, "step": 6981 }, { "epoch": 0.34091796875, "grad_norm": 0.2970637083053589, "learning_rate": 0.0003888522258893915, "loss": 1.8683, "step": 6982 }, { "epoch": 0.340966796875, "grad_norm": 0.2516656816005707, "learning_rate": 0.00038882201217656784, "loss": 1.8395, "step": 6983 }, { "epoch": 0.341015625, "grad_norm": 0.24725881218910217, "learning_rate": 0.0003887917957051762, "loss": 1.8286, "step": 6984 }, { "epoch": 0.341064453125, "grad_norm": 0.3065013289451599, "learning_rate": 0.000388761576475949, "loss": 1.8294, "step": 6985 }, { "epoch": 0.34111328125, "grad_norm": 0.26314353942871094, "learning_rate": 0.00038873135448961856, "loss": 1.8419, "step": 6986 }, { "epoch": 0.341162109375, "grad_norm": 0.24401433765888214, "learning_rate": 0.0003887011297469174, "loss": 1.8398, "step": 6987 }, { "epoch": 0.3412109375, "grad_norm": 0.25017648935317993, "learning_rate": 0.000388670902248578, "loss": 1.845, "step": 6988 }, { "epoch": 0.341259765625, "grad_norm": 0.2170119732618332, "learning_rate": 0.00038864067199533296, "loss": 1.8351, "step": 6989 }, { "epoch": 0.34130859375, "grad_norm": 0.2458675503730774, "learning_rate": 0.000388610438987915, "loss": 1.8446, "step": 6990 }, { "epoch": 0.341357421875, "grad_norm": 0.33303242921829224, "learning_rate": 0.0003885802032270567, "loss": 1.8397, "step": 6991 }, { "epoch": 0.34140625, "grad_norm": 0.3249567151069641, "learning_rate": 0.00038854996471349097, "loss": 1.8003, "step": 6992 }, { "epoch": 0.341455078125, "grad_norm": 0.2389289289712906, "learning_rate": 0.0003885197234479506, "loss": 1.8253, "step": 6993 }, { "epoch": 0.34150390625, "grad_norm": 0.2544572651386261, "learning_rate": 0.0003884894794311686, "loss": 1.8246, "step": 6994 }, { "epoch": 0.341552734375, "grad_norm": 0.24852542579174042, "learning_rate": 0.00038845923266387776, "loss": 1.8332, "step": 6995 }, { "epoch": 0.3416015625, "grad_norm": 0.26962149143218994, "learning_rate": 0.0003884289831468114, "loss": 1.8412, "step": 6996 }, { "epoch": 0.341650390625, "grad_norm": 0.2762397229671478, "learning_rate": 0.0003883987308807024, "loss": 1.7971, "step": 6997 }, { "epoch": 0.34169921875, "grad_norm": 0.24963568150997162, "learning_rate": 0.00038836847586628403, "loss": 1.8574, "step": 6998 }, { "epoch": 0.341748046875, "grad_norm": 0.22275348007678986, "learning_rate": 0.00038833821810428963, "loss": 1.8279, "step": 6999 }, { "epoch": 0.341796875, "grad_norm": 0.28293555974960327, "learning_rate": 0.00038830795759545243, "loss": 1.8189, "step": 7000 }, { "epoch": 0.341845703125, "grad_norm": 0.3094676733016968, "learning_rate": 0.0003882776943405059, "loss": 1.87, "step": 7001 }, { "epoch": 0.34189453125, "grad_norm": 0.30405986309051514, "learning_rate": 0.00038824742834018346, "loss": 1.8398, "step": 7002 }, { "epoch": 0.341943359375, "grad_norm": 0.22433623671531677, "learning_rate": 0.00038821715959521855, "loss": 1.8284, "step": 7003 }, { "epoch": 0.3419921875, "grad_norm": 0.25232842564582825, "learning_rate": 0.0003881868881063448, "loss": 1.8299, "step": 7004 }, { "epoch": 0.342041015625, "grad_norm": 0.225753515958786, "learning_rate": 0.0003881566138742959, "loss": 1.8751, "step": 7005 }, { "epoch": 0.34208984375, "grad_norm": 0.2776884138584137, "learning_rate": 0.0003881263368998056, "loss": 1.8103, "step": 7006 }, { "epoch": 0.342138671875, "grad_norm": 0.30431175231933594, "learning_rate": 0.0003880960571836076, "loss": 1.8366, "step": 7007 }, { "epoch": 0.3421875, "grad_norm": 0.2453516125679016, "learning_rate": 0.00038806577472643583, "loss": 1.8465, "step": 7008 }, { "epoch": 0.342236328125, "grad_norm": 0.22261777520179749, "learning_rate": 0.0003880354895290241, "loss": 1.8306, "step": 7009 }, { "epoch": 0.34228515625, "grad_norm": 0.3357419967651367, "learning_rate": 0.0003880052015921066, "loss": 1.8482, "step": 7010 }, { "epoch": 0.342333984375, "grad_norm": 0.3460255265235901, "learning_rate": 0.00038797491091641713, "loss": 1.8654, "step": 7011 }, { "epoch": 0.3423828125, "grad_norm": 0.24317540228366852, "learning_rate": 0.00038794461750269, "loss": 1.852, "step": 7012 }, { "epoch": 0.342431640625, "grad_norm": 0.2840670347213745, "learning_rate": 0.0003879143213516593, "loss": 1.8179, "step": 7013 }, { "epoch": 0.34248046875, "grad_norm": 0.3536067605018616, "learning_rate": 0.0003878840224640592, "loss": 1.8411, "step": 7014 }, { "epoch": 0.342529296875, "grad_norm": 0.2905305325984955, "learning_rate": 0.0003878537208406243, "loss": 1.8374, "step": 7015 }, { "epoch": 0.342578125, "grad_norm": 0.31491297483444214, "learning_rate": 0.00038782341648208876, "loss": 1.8505, "step": 7016 }, { "epoch": 0.342626953125, "grad_norm": 0.3634396493434906, "learning_rate": 0.00038779310938918704, "loss": 1.8535, "step": 7017 }, { "epoch": 0.34267578125, "grad_norm": 0.33370882272720337, "learning_rate": 0.0003877627995626537, "loss": 1.8173, "step": 7018 }, { "epoch": 0.342724609375, "grad_norm": 0.2979428172111511, "learning_rate": 0.0003877324870032234, "loss": 1.8705, "step": 7019 }, { "epoch": 0.3427734375, "grad_norm": 0.24873045086860657, "learning_rate": 0.0003877021717116306, "loss": 1.8552, "step": 7020 }, { "epoch": 0.342822265625, "grad_norm": 0.3172726631164551, "learning_rate": 0.00038767185368861017, "loss": 1.8492, "step": 7021 }, { "epoch": 0.34287109375, "grad_norm": 0.2541744112968445, "learning_rate": 0.0003876415329348968, "loss": 1.8264, "step": 7022 }, { "epoch": 0.342919921875, "grad_norm": 0.24573343992233276, "learning_rate": 0.0003876112094512255, "loss": 1.8699, "step": 7023 }, { "epoch": 0.34296875, "grad_norm": 0.31843721866607666, "learning_rate": 0.000387580883238331, "loss": 1.8183, "step": 7024 }, { "epoch": 0.343017578125, "grad_norm": 0.24415308237075806, "learning_rate": 0.0003875505542969483, "loss": 1.8404, "step": 7025 }, { "epoch": 0.34306640625, "grad_norm": 0.29110395908355713, "learning_rate": 0.0003875202226278126, "loss": 1.8134, "step": 7026 }, { "epoch": 0.343115234375, "grad_norm": 0.2902772128582001, "learning_rate": 0.0003874898882316589, "loss": 1.8322, "step": 7027 }, { "epoch": 0.3431640625, "grad_norm": 0.1957399696111679, "learning_rate": 0.0003874595511092223, "loss": 1.8498, "step": 7028 }, { "epoch": 0.343212890625, "grad_norm": 0.28107210993766785, "learning_rate": 0.00038742921126123817, "loss": 1.8509, "step": 7029 }, { "epoch": 0.34326171875, "grad_norm": 0.24536770582199097, "learning_rate": 0.0003873988686884417, "loss": 1.8394, "step": 7030 }, { "epoch": 0.343310546875, "grad_norm": 0.24781303107738495, "learning_rate": 0.00038736852339156844, "loss": 1.8307, "step": 7031 }, { "epoch": 0.343359375, "grad_norm": 0.24017132818698883, "learning_rate": 0.00038733817537135376, "loss": 1.8418, "step": 7032 }, { "epoch": 0.343408203125, "grad_norm": 0.307027667760849, "learning_rate": 0.00038730782462853303, "loss": 1.8332, "step": 7033 }, { "epoch": 0.34345703125, "grad_norm": 0.2761068642139435, "learning_rate": 0.0003872774711638421, "loss": 1.8225, "step": 7034 }, { "epoch": 0.343505859375, "grad_norm": 0.23145046830177307, "learning_rate": 0.0003872471149780163, "loss": 1.8237, "step": 7035 }, { "epoch": 0.3435546875, "grad_norm": 0.2862267792224884, "learning_rate": 0.0003872167560717916, "loss": 1.8205, "step": 7036 }, { "epoch": 0.343603515625, "grad_norm": 0.29256466031074524, "learning_rate": 0.00038718639444590365, "loss": 1.861, "step": 7037 }, { "epoch": 0.34365234375, "grad_norm": 0.2402983009815216, "learning_rate": 0.0003871560301010883, "loss": 1.8265, "step": 7038 }, { "epoch": 0.343701171875, "grad_norm": 0.2817365229129791, "learning_rate": 0.0003871256630380814, "loss": 1.8408, "step": 7039 }, { "epoch": 0.34375, "grad_norm": 0.31147703528404236, "learning_rate": 0.000387095293257619, "loss": 1.8384, "step": 7040 }, { "epoch": 0.343798828125, "grad_norm": 0.2457161843776703, "learning_rate": 0.00038706492076043714, "loss": 1.8354, "step": 7041 }, { "epoch": 0.34384765625, "grad_norm": 0.33006227016448975, "learning_rate": 0.0003870345455472719, "loss": 1.8306, "step": 7042 }, { "epoch": 0.343896484375, "grad_norm": 0.4590865969657898, "learning_rate": 0.00038700416761885937, "loss": 1.8287, "step": 7043 }, { "epoch": 0.3439453125, "grad_norm": 0.33173587918281555, "learning_rate": 0.00038697378697593595, "loss": 1.8089, "step": 7044 }, { "epoch": 0.343994140625, "grad_norm": 0.27500712871551514, "learning_rate": 0.0003869434036192378, "loss": 1.8033, "step": 7045 }, { "epoch": 0.34404296875, "grad_norm": 0.2969324588775635, "learning_rate": 0.0003869130175495013, "loss": 1.8253, "step": 7046 }, { "epoch": 0.344091796875, "grad_norm": 0.25613296031951904, "learning_rate": 0.00038688262876746286, "loss": 1.8381, "step": 7047 }, { "epoch": 0.344140625, "grad_norm": 0.29287847876548767, "learning_rate": 0.0003868522372738591, "loss": 1.8056, "step": 7048 }, { "epoch": 0.344189453125, "grad_norm": 0.26884880661964417, "learning_rate": 0.00038682184306942653, "loss": 1.8442, "step": 7049 }, { "epoch": 0.34423828125, "grad_norm": 0.2797262370586395, "learning_rate": 0.0003867914461549018, "loss": 1.8239, "step": 7050 }, { "epoch": 0.344287109375, "grad_norm": 0.3096011281013489, "learning_rate": 0.00038676104653102145, "loss": 1.8341, "step": 7051 }, { "epoch": 0.3443359375, "grad_norm": 0.3056800365447998, "learning_rate": 0.00038673064419852244, "loss": 1.8447, "step": 7052 }, { "epoch": 0.344384765625, "grad_norm": 0.34650734066963196, "learning_rate": 0.00038670023915814145, "loss": 1.836, "step": 7053 }, { "epoch": 0.34443359375, "grad_norm": 0.2704240679740906, "learning_rate": 0.0003866698314106154, "loss": 1.8419, "step": 7054 }, { "epoch": 0.344482421875, "grad_norm": 0.27448269724845886, "learning_rate": 0.00038663942095668133, "loss": 1.8297, "step": 7055 }, { "epoch": 0.34453125, "grad_norm": 0.3122590184211731, "learning_rate": 0.00038660900779707613, "loss": 1.8263, "step": 7056 }, { "epoch": 0.344580078125, "grad_norm": 0.28667765855789185, "learning_rate": 0.000386578591932537, "loss": 1.7906, "step": 7057 }, { "epoch": 0.34462890625, "grad_norm": 0.2457980364561081, "learning_rate": 0.0003865481733638011, "loss": 1.8358, "step": 7058 }, { "epoch": 0.344677734375, "grad_norm": 0.2925074100494385, "learning_rate": 0.00038651775209160553, "loss": 1.8572, "step": 7059 }, { "epoch": 0.3447265625, "grad_norm": 0.2660309672355652, "learning_rate": 0.0003864873281166877, "loss": 1.8702, "step": 7060 }, { "epoch": 0.344775390625, "grad_norm": 0.24757561087608337, "learning_rate": 0.0003864569014397849, "loss": 1.8395, "step": 7061 }, { "epoch": 0.34482421875, "grad_norm": 0.24597525596618652, "learning_rate": 0.00038642647206163456, "loss": 1.863, "step": 7062 }, { "epoch": 0.344873046875, "grad_norm": 0.2234640121459961, "learning_rate": 0.00038639603998297413, "loss": 1.8447, "step": 7063 }, { "epoch": 0.344921875, "grad_norm": 0.25350749492645264, "learning_rate": 0.00038636560520454116, "loss": 1.8204, "step": 7064 }, { "epoch": 0.344970703125, "grad_norm": 0.243569016456604, "learning_rate": 0.00038633516772707324, "loss": 1.8252, "step": 7065 }, { "epoch": 0.34501953125, "grad_norm": 0.25234758853912354, "learning_rate": 0.0003863047275513081, "loss": 1.8506, "step": 7066 }, { "epoch": 0.345068359375, "grad_norm": 0.2587752640247345, "learning_rate": 0.0003862742846779835, "loss": 1.8237, "step": 7067 }, { "epoch": 0.3451171875, "grad_norm": 0.25530263781547546, "learning_rate": 0.00038624383910783723, "loss": 1.8133, "step": 7068 }, { "epoch": 0.345166015625, "grad_norm": 0.2675708532333374, "learning_rate": 0.0003862133908416072, "loss": 1.8413, "step": 7069 }, { "epoch": 0.34521484375, "grad_norm": 0.2962895631790161, "learning_rate": 0.00038618293988003117, "loss": 1.8265, "step": 7070 }, { "epoch": 0.345263671875, "grad_norm": 0.2706916332244873, "learning_rate": 0.0003861524862238473, "loss": 1.8498, "step": 7071 }, { "epoch": 0.3453125, "grad_norm": 0.21465696394443512, "learning_rate": 0.0003861220298737936, "loss": 1.8614, "step": 7072 }, { "epoch": 0.345361328125, "grad_norm": 0.3079456686973572, "learning_rate": 0.00038609157083060834, "loss": 1.8421, "step": 7073 }, { "epoch": 0.34541015625, "grad_norm": 0.2968042492866516, "learning_rate": 0.00038606110909502955, "loss": 1.8521, "step": 7074 }, { "epoch": 0.345458984375, "grad_norm": 0.24895361065864563, "learning_rate": 0.0003860306446677956, "loss": 1.8516, "step": 7075 }, { "epoch": 0.3455078125, "grad_norm": 0.31341034173965454, "learning_rate": 0.00038600017754964475, "loss": 1.8505, "step": 7076 }, { "epoch": 0.345556640625, "grad_norm": 0.28346049785614014, "learning_rate": 0.00038596970774131537, "loss": 1.8354, "step": 7077 }, { "epoch": 0.34560546875, "grad_norm": 0.25525009632110596, "learning_rate": 0.00038593923524354596, "loss": 1.7995, "step": 7078 }, { "epoch": 0.345654296875, "grad_norm": 0.29278168082237244, "learning_rate": 0.0003859087600570751, "loss": 1.8341, "step": 7079 }, { "epoch": 0.345703125, "grad_norm": 0.3458227813243866, "learning_rate": 0.00038587828218264133, "loss": 1.8223, "step": 7080 }, { "epoch": 0.345751953125, "grad_norm": 0.33681440353393555, "learning_rate": 0.00038584780162098333, "loss": 1.8223, "step": 7081 }, { "epoch": 0.34580078125, "grad_norm": 0.25089311599731445, "learning_rate": 0.0003858173183728398, "loss": 1.8192, "step": 7082 }, { "epoch": 0.345849609375, "grad_norm": 0.28404366970062256, "learning_rate": 0.00038578683243894953, "loss": 1.8387, "step": 7083 }, { "epoch": 0.3458984375, "grad_norm": 0.3029991388320923, "learning_rate": 0.0003857563438200514, "loss": 1.8438, "step": 7084 }, { "epoch": 0.345947265625, "grad_norm": 0.3786722421646118, "learning_rate": 0.0003857258525168842, "loss": 1.8567, "step": 7085 }, { "epoch": 0.34599609375, "grad_norm": 0.40823954343795776, "learning_rate": 0.0003856953585301871, "loss": 1.8277, "step": 7086 }, { "epoch": 0.346044921875, "grad_norm": 0.23704968392848969, "learning_rate": 0.000385664861860699, "loss": 1.8287, "step": 7087 }, { "epoch": 0.34609375, "grad_norm": 0.41524171829223633, "learning_rate": 0.00038563436250915907, "loss": 1.8503, "step": 7088 }, { "epoch": 0.346142578125, "grad_norm": 0.39304009079933167, "learning_rate": 0.0003856038604763065, "loss": 1.8506, "step": 7089 }, { "epoch": 0.34619140625, "grad_norm": 0.25020262598991394, "learning_rate": 0.00038557335576288053, "loss": 1.8097, "step": 7090 }, { "epoch": 0.346240234375, "grad_norm": 0.36010560393333435, "learning_rate": 0.00038554284836962043, "loss": 1.8225, "step": 7091 }, { "epoch": 0.3462890625, "grad_norm": 0.2793896794319153, "learning_rate": 0.0003855123382972656, "loss": 1.8404, "step": 7092 }, { "epoch": 0.346337890625, "grad_norm": 0.3352983295917511, "learning_rate": 0.00038548182554655545, "loss": 1.8723, "step": 7093 }, { "epoch": 0.34638671875, "grad_norm": 0.24611519277095795, "learning_rate": 0.0003854513101182295, "loss": 1.8589, "step": 7094 }, { "epoch": 0.346435546875, "grad_norm": 0.32730698585510254, "learning_rate": 0.0003854207920130273, "loss": 1.8457, "step": 7095 }, { "epoch": 0.346484375, "grad_norm": 0.38881346583366394, "learning_rate": 0.0003853902712316886, "loss": 1.832, "step": 7096 }, { "epoch": 0.346533203125, "grad_norm": 0.1987835168838501, "learning_rate": 0.0003853597477749528, "loss": 1.8707, "step": 7097 }, { "epoch": 0.34658203125, "grad_norm": 0.3533618152141571, "learning_rate": 0.00038532922164356, "loss": 1.8267, "step": 7098 }, { "epoch": 0.346630859375, "grad_norm": 0.30227452516555786, "learning_rate": 0.0003852986928382499, "loss": 1.8254, "step": 7099 }, { "epoch": 0.3466796875, "grad_norm": 0.23286029696464539, "learning_rate": 0.0003852681613597622, "loss": 1.8202, "step": 7100 }, { "epoch": 0.346728515625, "grad_norm": 0.3345184326171875, "learning_rate": 0.00038523762720883714, "loss": 1.8622, "step": 7101 }, { "epoch": 0.34677734375, "grad_norm": 0.28368592262268066, "learning_rate": 0.00038520709038621467, "loss": 1.8067, "step": 7102 }, { "epoch": 0.346826171875, "grad_norm": 0.2561042010784149, "learning_rate": 0.0003851765508926347, "loss": 1.8226, "step": 7103 }, { "epoch": 0.346875, "grad_norm": 0.24293582141399384, "learning_rate": 0.0003851460087288376, "loss": 1.8542, "step": 7104 }, { "epoch": 0.346923828125, "grad_norm": 0.2766352593898773, "learning_rate": 0.00038511546389556346, "loss": 1.8485, "step": 7105 }, { "epoch": 0.34697265625, "grad_norm": 0.2478395253419876, "learning_rate": 0.0003850849163935525, "loss": 1.8636, "step": 7106 }, { "epoch": 0.347021484375, "grad_norm": 0.28612735867500305, "learning_rate": 0.00038505436622354527, "loss": 1.8248, "step": 7107 }, { "epoch": 0.3470703125, "grad_norm": 0.2559512257575989, "learning_rate": 0.000385023813386282, "loss": 1.8259, "step": 7108 }, { "epoch": 0.347119140625, "grad_norm": 0.21761444211006165, "learning_rate": 0.0003849932578825032, "loss": 1.8491, "step": 7109 }, { "epoch": 0.34716796875, "grad_norm": 0.3293704688549042, "learning_rate": 0.0003849626997129495, "loss": 1.8273, "step": 7110 }, { "epoch": 0.347216796875, "grad_norm": 0.2696406841278076, "learning_rate": 0.0003849321388783614, "loss": 1.8558, "step": 7111 }, { "epoch": 0.347265625, "grad_norm": 0.27398771047592163, "learning_rate": 0.00038490157537947957, "loss": 1.8465, "step": 7112 }, { "epoch": 0.347314453125, "grad_norm": 0.3296750783920288, "learning_rate": 0.0003848710092170448, "loss": 1.8447, "step": 7113 }, { "epoch": 0.34736328125, "grad_norm": 0.23661479353904724, "learning_rate": 0.0003848404403917978, "loss": 1.8269, "step": 7114 }, { "epoch": 0.347412109375, "grad_norm": 0.2909473478794098, "learning_rate": 0.00038480986890447957, "loss": 1.8675, "step": 7115 }, { "epoch": 0.3474609375, "grad_norm": 0.33415108919143677, "learning_rate": 0.00038477929475583077, "loss": 1.8177, "step": 7116 }, { "epoch": 0.347509765625, "grad_norm": 0.24694685637950897, "learning_rate": 0.0003847487179465927, "loss": 1.8443, "step": 7117 }, { "epoch": 0.34755859375, "grad_norm": 0.32811740040779114, "learning_rate": 0.0003847181384775063, "loss": 1.8591, "step": 7118 }, { "epoch": 0.347607421875, "grad_norm": 0.34253057837486267, "learning_rate": 0.0003846875563493127, "loss": 1.8475, "step": 7119 }, { "epoch": 0.34765625, "grad_norm": 0.27424904704093933, "learning_rate": 0.000384656971562753, "loss": 1.8233, "step": 7120 }, { "epoch": 0.347705078125, "grad_norm": 0.3089292049407959, "learning_rate": 0.00038462638411856844, "loss": 1.8677, "step": 7121 }, { "epoch": 0.34775390625, "grad_norm": 0.31705594062805176, "learning_rate": 0.0003845957940175004, "loss": 1.8247, "step": 7122 }, { "epoch": 0.347802734375, "grad_norm": 0.2949482500553131, "learning_rate": 0.0003845652012602903, "loss": 1.8215, "step": 7123 }, { "epoch": 0.3478515625, "grad_norm": 0.271278977394104, "learning_rate": 0.00038453460584767944, "loss": 1.8416, "step": 7124 }, { "epoch": 0.347900390625, "grad_norm": 0.266472190618515, "learning_rate": 0.0003845040077804095, "loss": 1.8532, "step": 7125 }, { "epoch": 0.34794921875, "grad_norm": 0.30202192068099976, "learning_rate": 0.0003844734070592219, "loss": 1.8284, "step": 7126 }, { "epoch": 0.347998046875, "grad_norm": 0.2765442132949829, "learning_rate": 0.00038444280368485833, "loss": 1.8298, "step": 7127 }, { "epoch": 0.348046875, "grad_norm": 0.285257488489151, "learning_rate": 0.0003844121976580606, "loss": 1.8076, "step": 7128 }, { "epoch": 0.348095703125, "grad_norm": 0.2700105607509613, "learning_rate": 0.0003843815889795702, "loss": 1.8376, "step": 7129 }, { "epoch": 0.34814453125, "grad_norm": 0.22407634556293488, "learning_rate": 0.0003843509776501292, "loss": 1.8525, "step": 7130 }, { "epoch": 0.348193359375, "grad_norm": 0.24459347128868103, "learning_rate": 0.0003843203636704794, "loss": 1.8351, "step": 7131 }, { "epoch": 0.3482421875, "grad_norm": 0.22752533853054047, "learning_rate": 0.0003842897470413627, "loss": 1.8557, "step": 7132 }, { "epoch": 0.348291015625, "grad_norm": 0.22960810363292694, "learning_rate": 0.00038425912776352125, "loss": 1.8573, "step": 7133 }, { "epoch": 0.34833984375, "grad_norm": 0.227980837225914, "learning_rate": 0.00038422850583769705, "loss": 1.8117, "step": 7134 }, { "epoch": 0.348388671875, "grad_norm": 0.2371140867471695, "learning_rate": 0.0003841978812646323, "loss": 1.8412, "step": 7135 }, { "epoch": 0.3484375, "grad_norm": 0.22485345602035522, "learning_rate": 0.00038416725404506904, "loss": 1.8487, "step": 7136 }, { "epoch": 0.348486328125, "grad_norm": 0.24902372062206268, "learning_rate": 0.00038413662417974975, "loss": 1.8423, "step": 7137 }, { "epoch": 0.34853515625, "grad_norm": 0.2832794189453125, "learning_rate": 0.00038410599166941665, "loss": 1.8504, "step": 7138 }, { "epoch": 0.348583984375, "grad_norm": 0.24501869082450867, "learning_rate": 0.0003840753565148123, "loss": 1.832, "step": 7139 }, { "epoch": 0.3486328125, "grad_norm": 0.25079628825187683, "learning_rate": 0.0003840447187166789, "loss": 1.8213, "step": 7140 }, { "epoch": 0.348681640625, "grad_norm": 0.2544405460357666, "learning_rate": 0.00038401407827575926, "loss": 1.846, "step": 7141 }, { "epoch": 0.34873046875, "grad_norm": 0.287706196308136, "learning_rate": 0.00038398343519279577, "loss": 1.8492, "step": 7142 }, { "epoch": 0.348779296875, "grad_norm": 0.33094534277915955, "learning_rate": 0.00038395278946853125, "loss": 1.859, "step": 7143 }, { "epoch": 0.348828125, "grad_norm": 0.34776952862739563, "learning_rate": 0.00038392214110370834, "loss": 1.8809, "step": 7144 }, { "epoch": 0.348876953125, "grad_norm": 0.4498388171195984, "learning_rate": 0.00038389149009906975, "loss": 1.8166, "step": 7145 }, { "epoch": 0.34892578125, "grad_norm": 0.4145906865596771, "learning_rate": 0.0003838608364553585, "loss": 1.8506, "step": 7146 }, { "epoch": 0.348974609375, "grad_norm": 0.2972877025604248, "learning_rate": 0.00038383018017331734, "loss": 1.8399, "step": 7147 }, { "epoch": 0.3490234375, "grad_norm": 0.28907015919685364, "learning_rate": 0.0003837995212536894, "loss": 1.8249, "step": 7148 }, { "epoch": 0.349072265625, "grad_norm": 0.27344512939453125, "learning_rate": 0.00038376885969721765, "loss": 1.8329, "step": 7149 }, { "epoch": 0.34912109375, "grad_norm": 0.29539552330970764, "learning_rate": 0.0003837381955046452, "loss": 1.8379, "step": 7150 }, { "epoch": 0.349169921875, "grad_norm": 0.2777046859264374, "learning_rate": 0.00038370752867671515, "loss": 1.8318, "step": 7151 }, { "epoch": 0.34921875, "grad_norm": 0.23419582843780518, "learning_rate": 0.0003836768592141709, "loss": 1.8214, "step": 7152 }, { "epoch": 0.349267578125, "grad_norm": 0.26430314779281616, "learning_rate": 0.0003836461871177556, "loss": 1.8454, "step": 7153 }, { "epoch": 0.34931640625, "grad_norm": 0.2898761034011841, "learning_rate": 0.00038361551238821266, "loss": 1.8178, "step": 7154 }, { "epoch": 0.349365234375, "grad_norm": 0.2268541306257248, "learning_rate": 0.00038358483502628555, "loss": 1.848, "step": 7155 }, { "epoch": 0.3494140625, "grad_norm": 0.2969076335430145, "learning_rate": 0.0003835541550327177, "loss": 1.8434, "step": 7156 }, { "epoch": 0.349462890625, "grad_norm": 0.3087930679321289, "learning_rate": 0.00038352347240825274, "loss": 1.8343, "step": 7157 }, { "epoch": 0.34951171875, "grad_norm": 0.23555712401866913, "learning_rate": 0.00038349278715363425, "loss": 1.8195, "step": 7158 }, { "epoch": 0.349560546875, "grad_norm": 0.31976252794265747, "learning_rate": 0.00038346209926960577, "loss": 1.8075, "step": 7159 }, { "epoch": 0.349609375, "grad_norm": 0.25610849261283875, "learning_rate": 0.0003834314087569113, "loss": 1.8205, "step": 7160 }, { "epoch": 0.349658203125, "grad_norm": 0.22571209073066711, "learning_rate": 0.0003834007156162945, "loss": 1.8345, "step": 7161 }, { "epoch": 0.34970703125, "grad_norm": 0.29032525420188904, "learning_rate": 0.00038337001984849927, "loss": 1.8234, "step": 7162 }, { "epoch": 0.349755859375, "grad_norm": 0.2875204384326935, "learning_rate": 0.00038333932145426957, "loss": 1.8415, "step": 7163 }, { "epoch": 0.3498046875, "grad_norm": 0.2034546434879303, "learning_rate": 0.0003833086204343493, "loss": 1.8299, "step": 7164 }, { "epoch": 0.349853515625, "grad_norm": 0.25868868827819824, "learning_rate": 0.00038327791678948266, "loss": 1.8188, "step": 7165 }, { "epoch": 0.34990234375, "grad_norm": 0.23619629442691803, "learning_rate": 0.00038324721052041374, "loss": 1.8704, "step": 7166 }, { "epoch": 0.349951171875, "grad_norm": 0.22989752888679504, "learning_rate": 0.0003832165016278866, "loss": 1.8411, "step": 7167 }, { "epoch": 0.35, "grad_norm": 0.2521588206291199, "learning_rate": 0.0003831857901126457, "loss": 1.8388, "step": 7168 }, { "epoch": 0.350048828125, "grad_norm": 0.2601926922798157, "learning_rate": 0.00038315507597543524, "loss": 1.8538, "step": 7169 }, { "epoch": 0.35009765625, "grad_norm": 0.25914549827575684, "learning_rate": 0.00038312435921699967, "loss": 1.8436, "step": 7170 }, { "epoch": 0.350146484375, "grad_norm": 0.2236206978559494, "learning_rate": 0.0003830936398380833, "loss": 1.8352, "step": 7171 }, { "epoch": 0.3501953125, "grad_norm": 0.23023618757724762, "learning_rate": 0.00038306291783943075, "loss": 1.8423, "step": 7172 }, { "epoch": 0.350244140625, "grad_norm": 0.23918263614177704, "learning_rate": 0.00038303219322178655, "loss": 1.8524, "step": 7173 }, { "epoch": 0.35029296875, "grad_norm": 0.2804960608482361, "learning_rate": 0.0003830014659858954, "loss": 1.8249, "step": 7174 }, { "epoch": 0.350341796875, "grad_norm": 0.25250208377838135, "learning_rate": 0.00038297073613250196, "loss": 1.8384, "step": 7175 }, { "epoch": 0.350390625, "grad_norm": 0.2999930679798126, "learning_rate": 0.00038294000366235093, "loss": 1.872, "step": 7176 }, { "epoch": 0.350439453125, "grad_norm": 0.3727535605430603, "learning_rate": 0.0003829092685761873, "loss": 1.806, "step": 7177 }, { "epoch": 0.35048828125, "grad_norm": 0.3469402492046356, "learning_rate": 0.0003828785308747558, "loss": 1.8427, "step": 7178 }, { "epoch": 0.350537109375, "grad_norm": 0.25310370326042175, "learning_rate": 0.0003828477905588014, "loss": 1.8196, "step": 7179 }, { "epoch": 0.3505859375, "grad_norm": 0.3091880679130554, "learning_rate": 0.0003828170476290692, "loss": 1.8218, "step": 7180 }, { "epoch": 0.350634765625, "grad_norm": 0.3421233594417572, "learning_rate": 0.0003827863020863041, "loss": 1.8354, "step": 7181 }, { "epoch": 0.35068359375, "grad_norm": 0.3361113965511322, "learning_rate": 0.0003827555539312515, "loss": 1.8443, "step": 7182 }, { "epoch": 0.350732421875, "grad_norm": 0.41580691933631897, "learning_rate": 0.00038272480316465645, "loss": 1.848, "step": 7183 }, { "epoch": 0.35078125, "grad_norm": 0.3713535666465759, "learning_rate": 0.00038269404978726426, "loss": 1.8653, "step": 7184 }, { "epoch": 0.350830078125, "grad_norm": 0.2852887213230133, "learning_rate": 0.0003826632937998203, "loss": 1.8371, "step": 7185 }, { "epoch": 0.35087890625, "grad_norm": 0.36836400628089905, "learning_rate": 0.00038263253520306984, "loss": 1.8198, "step": 7186 }, { "epoch": 0.350927734375, "grad_norm": 0.35803937911987305, "learning_rate": 0.00038260177399775845, "loss": 1.8291, "step": 7187 }, { "epoch": 0.3509765625, "grad_norm": 0.29203730821609497, "learning_rate": 0.00038257101018463163, "loss": 1.8289, "step": 7188 }, { "epoch": 0.351025390625, "grad_norm": 0.3019964098930359, "learning_rate": 0.0003825402437644349, "loss": 1.8332, "step": 7189 }, { "epoch": 0.35107421875, "grad_norm": 0.3413294851779938, "learning_rate": 0.000382509474737914, "loss": 1.8207, "step": 7190 }, { "epoch": 0.351123046875, "grad_norm": 0.27309450507164, "learning_rate": 0.0003824787031058147, "loss": 1.8372, "step": 7191 }, { "epoch": 0.351171875, "grad_norm": 0.28090769052505493, "learning_rate": 0.00038244792886888253, "loss": 1.8437, "step": 7192 }, { "epoch": 0.351220703125, "grad_norm": 0.27003926038742065, "learning_rate": 0.00038241715202786357, "loss": 1.8339, "step": 7193 }, { "epoch": 0.35126953125, "grad_norm": 0.24678272008895874, "learning_rate": 0.0003823863725835036, "loss": 1.8422, "step": 7194 }, { "epoch": 0.351318359375, "grad_norm": 0.31315672397613525, "learning_rate": 0.0003823555905365486, "loss": 1.8371, "step": 7195 }, { "epoch": 0.3513671875, "grad_norm": 0.2717477083206177, "learning_rate": 0.0003823248058877447, "loss": 1.8295, "step": 7196 }, { "epoch": 0.351416015625, "grad_norm": 0.2124742567539215, "learning_rate": 0.00038229401863783784, "loss": 1.8433, "step": 7197 }, { "epoch": 0.35146484375, "grad_norm": 0.2456681877374649, "learning_rate": 0.00038226322878757426, "loss": 1.8713, "step": 7198 }, { "epoch": 0.351513671875, "grad_norm": 0.1916365921497345, "learning_rate": 0.00038223243633770017, "loss": 1.842, "step": 7199 }, { "epoch": 0.3515625, "grad_norm": 0.24762722849845886, "learning_rate": 0.00038220164128896175, "loss": 1.8311, "step": 7200 }, { "epoch": 0.351611328125, "grad_norm": 0.23046638071537018, "learning_rate": 0.0003821708436421055, "loss": 1.8123, "step": 7201 }, { "epoch": 0.35166015625, "grad_norm": 0.2634343206882477, "learning_rate": 0.0003821400433978778, "loss": 1.8005, "step": 7202 }, { "epoch": 0.351708984375, "grad_norm": 0.2682304084300995, "learning_rate": 0.0003821092405570249, "loss": 1.812, "step": 7203 }, { "epoch": 0.3517578125, "grad_norm": 0.2635926306247711, "learning_rate": 0.00038207843512029374, "loss": 1.827, "step": 7204 }, { "epoch": 0.351806640625, "grad_norm": 0.284970223903656, "learning_rate": 0.00038204762708843055, "loss": 1.8537, "step": 7205 }, { "epoch": 0.35185546875, "grad_norm": 0.23895689845085144, "learning_rate": 0.0003820168164621821, "loss": 1.8211, "step": 7206 }, { "epoch": 0.351904296875, "grad_norm": 0.2775740623474121, "learning_rate": 0.00038198600324229517, "loss": 1.819, "step": 7207 }, { "epoch": 0.351953125, "grad_norm": 0.28441718220710754, "learning_rate": 0.00038195518742951647, "loss": 1.852, "step": 7208 }, { "epoch": 0.352001953125, "grad_norm": 0.29557862877845764, "learning_rate": 0.0003819243690245928, "loss": 1.8638, "step": 7209 }, { "epoch": 0.35205078125, "grad_norm": 0.2762874960899353, "learning_rate": 0.0003818935480282713, "loss": 1.834, "step": 7210 }, { "epoch": 0.352099609375, "grad_norm": 0.2510543465614319, "learning_rate": 0.00038186272444129874, "loss": 1.8368, "step": 7211 }, { "epoch": 0.3521484375, "grad_norm": 0.25689515471458435, "learning_rate": 0.00038183189826442217, "loss": 1.8255, "step": 7212 }, { "epoch": 0.352197265625, "grad_norm": 0.2560872435569763, "learning_rate": 0.00038180106949838873, "loss": 1.8449, "step": 7213 }, { "epoch": 0.35224609375, "grad_norm": 0.20477606356143951, "learning_rate": 0.00038177023814394556, "loss": 1.8178, "step": 7214 }, { "epoch": 0.352294921875, "grad_norm": 0.2067962884902954, "learning_rate": 0.0003817394042018399, "loss": 1.8371, "step": 7215 }, { "epoch": 0.35234375, "grad_norm": 0.24350938200950623, "learning_rate": 0.000381708567672819, "loss": 1.8133, "step": 7216 }, { "epoch": 0.352392578125, "grad_norm": 0.2797580659389496, "learning_rate": 0.00038167772855763027, "loss": 1.8256, "step": 7217 }, { "epoch": 0.35244140625, "grad_norm": 0.29341667890548706, "learning_rate": 0.00038164688685702106, "loss": 1.8089, "step": 7218 }, { "epoch": 0.352490234375, "grad_norm": 0.31899338960647583, "learning_rate": 0.0003816160425717389, "loss": 1.8309, "step": 7219 }, { "epoch": 0.3525390625, "grad_norm": 0.266282856464386, "learning_rate": 0.00038158519570253126, "loss": 1.8259, "step": 7220 }, { "epoch": 0.352587890625, "grad_norm": 0.2760241627693176, "learning_rate": 0.0003815543462501458, "loss": 1.866, "step": 7221 }, { "epoch": 0.35263671875, "grad_norm": 0.25270095467567444, "learning_rate": 0.0003815234942153301, "loss": 1.8368, "step": 7222 }, { "epoch": 0.352685546875, "grad_norm": 0.28023022413253784, "learning_rate": 0.000381492639598832, "loss": 1.8329, "step": 7223 }, { "epoch": 0.352734375, "grad_norm": 0.2992701530456543, "learning_rate": 0.0003814617824013992, "loss": 1.815, "step": 7224 }, { "epoch": 0.352783203125, "grad_norm": 0.38001877069473267, "learning_rate": 0.00038143092262377955, "loss": 1.8443, "step": 7225 }, { "epoch": 0.35283203125, "grad_norm": 0.3794831931591034, "learning_rate": 0.00038140006026672103, "loss": 1.8307, "step": 7226 }, { "epoch": 0.352880859375, "grad_norm": 0.2773469090461731, "learning_rate": 0.0003813691953309715, "loss": 1.8548, "step": 7227 }, { "epoch": 0.3529296875, "grad_norm": 0.4716448187828064, "learning_rate": 0.00038133832781727913, "loss": 1.8242, "step": 7228 }, { "epoch": 0.352978515625, "grad_norm": 0.36353158950805664, "learning_rate": 0.0003813074577263919, "loss": 1.8362, "step": 7229 }, { "epoch": 0.35302734375, "grad_norm": 0.33750495314598083, "learning_rate": 0.0003812765850590581, "loss": 1.8668, "step": 7230 }, { "epoch": 0.353076171875, "grad_norm": 0.34546583890914917, "learning_rate": 0.00038124570981602583, "loss": 1.8208, "step": 7231 }, { "epoch": 0.353125, "grad_norm": 0.23278099298477173, "learning_rate": 0.0003812148319980435, "loss": 1.8216, "step": 7232 }, { "epoch": 0.353173828125, "grad_norm": 0.31264346837997437, "learning_rate": 0.0003811839516058592, "loss": 1.8381, "step": 7233 }, { "epoch": 0.35322265625, "grad_norm": 0.21872718632221222, "learning_rate": 0.0003811530686402217, "loss": 1.8069, "step": 7234 }, { "epoch": 0.353271484375, "grad_norm": 0.30625584721565247, "learning_rate": 0.0003811221831018792, "loss": 1.8504, "step": 7235 }, { "epoch": 0.3533203125, "grad_norm": 0.27088767290115356, "learning_rate": 0.00038109129499158044, "loss": 1.8722, "step": 7236 }, { "epoch": 0.353369140625, "grad_norm": 0.26792845129966736, "learning_rate": 0.0003810604043100738, "loss": 1.8437, "step": 7237 }, { "epoch": 0.35341796875, "grad_norm": 0.30643215775489807, "learning_rate": 0.00038102951105810817, "loss": 1.8369, "step": 7238 }, { "epoch": 0.353466796875, "grad_norm": 0.27508077025413513, "learning_rate": 0.00038099861523643207, "loss": 1.8107, "step": 7239 }, { "epoch": 0.353515625, "grad_norm": 0.25780773162841797, "learning_rate": 0.0003809677168457944, "loss": 1.7942, "step": 7240 }, { "epoch": 0.353564453125, "grad_norm": 0.2312232404947281, "learning_rate": 0.0003809368158869439, "loss": 1.8373, "step": 7241 }, { "epoch": 0.35361328125, "grad_norm": 0.3089824914932251, "learning_rate": 0.00038090591236062966, "loss": 1.8183, "step": 7242 }, { "epoch": 0.353662109375, "grad_norm": 0.23480162024497986, "learning_rate": 0.0003808750062676005, "loss": 1.8248, "step": 7243 }, { "epoch": 0.3537109375, "grad_norm": 0.3147464394569397, "learning_rate": 0.0003808440976086056, "loss": 1.8311, "step": 7244 }, { "epoch": 0.353759765625, "grad_norm": 0.30190497636795044, "learning_rate": 0.0003808131863843939, "loss": 1.8469, "step": 7245 }, { "epoch": 0.35380859375, "grad_norm": 0.28266555070877075, "learning_rate": 0.0003807822725957146, "loss": 1.8364, "step": 7246 }, { "epoch": 0.353857421875, "grad_norm": 0.327831506729126, "learning_rate": 0.00038075135624331694, "loss": 1.8292, "step": 7247 }, { "epoch": 0.35390625, "grad_norm": 0.27238425612449646, "learning_rate": 0.00038072043732795014, "loss": 1.8262, "step": 7248 }, { "epoch": 0.353955078125, "grad_norm": 0.2518717646598816, "learning_rate": 0.00038068951585036365, "loss": 1.8384, "step": 7249 }, { "epoch": 0.35400390625, "grad_norm": 0.229821115732193, "learning_rate": 0.00038065859181130686, "loss": 1.8166, "step": 7250 }, { "epoch": 0.354052734375, "grad_norm": 0.24177488684654236, "learning_rate": 0.00038062766521152917, "loss": 1.8418, "step": 7251 }, { "epoch": 0.3541015625, "grad_norm": 0.2868764400482178, "learning_rate": 0.0003805967360517802, "loss": 1.8123, "step": 7252 }, { "epoch": 0.354150390625, "grad_norm": 0.2407413274049759, "learning_rate": 0.00038056580433280953, "loss": 1.8402, "step": 7253 }, { "epoch": 0.35419921875, "grad_norm": 0.26131755113601685, "learning_rate": 0.00038053487005536675, "loss": 1.8149, "step": 7254 }, { "epoch": 0.354248046875, "grad_norm": 0.2868610918521881, "learning_rate": 0.00038050393322020155, "loss": 1.8241, "step": 7255 }, { "epoch": 0.354296875, "grad_norm": 0.27686232328414917, "learning_rate": 0.0003804729938280638, "loss": 1.8457, "step": 7256 }, { "epoch": 0.354345703125, "grad_norm": 0.32361745834350586, "learning_rate": 0.0003804420518797034, "loss": 1.8486, "step": 7257 }, { "epoch": 0.35439453125, "grad_norm": 0.3222311735153198, "learning_rate": 0.00038041110737587, "loss": 1.8184, "step": 7258 }, { "epoch": 0.354443359375, "grad_norm": 0.26336902379989624, "learning_rate": 0.0003803801603173139, "loss": 1.8437, "step": 7259 }, { "epoch": 0.3544921875, "grad_norm": 0.27081549167633057, "learning_rate": 0.00038034921070478477, "loss": 1.8622, "step": 7260 }, { "epoch": 0.354541015625, "grad_norm": 0.2516447603702545, "learning_rate": 0.000380318258539033, "loss": 1.8472, "step": 7261 }, { "epoch": 0.35458984375, "grad_norm": 0.2641756236553192, "learning_rate": 0.0003802873038208086, "loss": 1.8328, "step": 7262 }, { "epoch": 0.354638671875, "grad_norm": 0.23880018293857574, "learning_rate": 0.00038025634655086183, "loss": 1.81, "step": 7263 }, { "epoch": 0.3546875, "grad_norm": 0.2394212782382965, "learning_rate": 0.00038022538672994287, "loss": 1.8408, "step": 7264 }, { "epoch": 0.354736328125, "grad_norm": 0.26431581377983093, "learning_rate": 0.00038019442435880215, "loss": 1.8316, "step": 7265 }, { "epoch": 0.35478515625, "grad_norm": 0.2828350365161896, "learning_rate": 0.00038016345943819, "loss": 1.8546, "step": 7266 }, { "epoch": 0.354833984375, "grad_norm": 0.25596854090690613, "learning_rate": 0.0003801324919688571, "loss": 1.8503, "step": 7267 }, { "epoch": 0.3548828125, "grad_norm": 0.28158751130104065, "learning_rate": 0.0003801015219515536, "loss": 1.8077, "step": 7268 }, { "epoch": 0.354931640625, "grad_norm": 0.2951284945011139, "learning_rate": 0.0003800705493870303, "loss": 1.8075, "step": 7269 }, { "epoch": 0.35498046875, "grad_norm": 0.2524617314338684, "learning_rate": 0.00038003957427603795, "loss": 1.8169, "step": 7270 }, { "epoch": 0.355029296875, "grad_norm": 0.32187801599502563, "learning_rate": 0.00038000859661932706, "loss": 1.819, "step": 7271 }, { "epoch": 0.355078125, "grad_norm": 0.24984432756900787, "learning_rate": 0.00037997761641764846, "loss": 1.7979, "step": 7272 }, { "epoch": 0.355126953125, "grad_norm": 0.314735472202301, "learning_rate": 0.00037994663367175297, "loss": 1.8453, "step": 7273 }, { "epoch": 0.35517578125, "grad_norm": 0.3209612965583801, "learning_rate": 0.00037991564838239154, "loss": 1.8205, "step": 7274 }, { "epoch": 0.355224609375, "grad_norm": 0.26626065373420715, "learning_rate": 0.00037988466055031506, "loss": 1.8436, "step": 7275 }, { "epoch": 0.3552734375, "grad_norm": 0.33813950419425964, "learning_rate": 0.00037985367017627454, "loss": 1.8197, "step": 7276 }, { "epoch": 0.355322265625, "grad_norm": 0.2687554359436035, "learning_rate": 0.0003798226772610212, "loss": 1.835, "step": 7277 }, { "epoch": 0.35537109375, "grad_norm": 0.2717287540435791, "learning_rate": 0.000379791681805306, "loss": 1.818, "step": 7278 }, { "epoch": 0.355419921875, "grad_norm": 0.2991907000541687, "learning_rate": 0.0003797606838098801, "loss": 1.8266, "step": 7279 }, { "epoch": 0.35546875, "grad_norm": 0.26051807403564453, "learning_rate": 0.000379729683275495, "loss": 1.8413, "step": 7280 }, { "epoch": 0.355517578125, "grad_norm": 0.26533666253089905, "learning_rate": 0.0003796986802029018, "loss": 1.805, "step": 7281 }, { "epoch": 0.35556640625, "grad_norm": 0.2742096185684204, "learning_rate": 0.00037966767459285194, "loss": 1.8461, "step": 7282 }, { "epoch": 0.355615234375, "grad_norm": 0.27816253900527954, "learning_rate": 0.000379636666446097, "loss": 1.7979, "step": 7283 }, { "epoch": 0.3556640625, "grad_norm": 0.30064618587493896, "learning_rate": 0.00037960565576338834, "loss": 1.7965, "step": 7284 }, { "epoch": 0.355712890625, "grad_norm": 0.314267098903656, "learning_rate": 0.0003795746425454776, "loss": 1.8329, "step": 7285 }, { "epoch": 0.35576171875, "grad_norm": 0.31303802132606506, "learning_rate": 0.0003795436267931163, "loss": 1.8506, "step": 7286 }, { "epoch": 0.355810546875, "grad_norm": 0.2950424253940582, "learning_rate": 0.00037951260850705636, "loss": 1.8399, "step": 7287 }, { "epoch": 0.355859375, "grad_norm": 0.3179405629634857, "learning_rate": 0.0003794815876880493, "loss": 1.8493, "step": 7288 }, { "epoch": 0.355908203125, "grad_norm": 0.36632490158081055, "learning_rate": 0.00037945056433684704, "loss": 1.8567, "step": 7289 }, { "epoch": 0.35595703125, "grad_norm": 0.41901326179504395, "learning_rate": 0.0003794195384542014, "loss": 1.8335, "step": 7290 }, { "epoch": 0.356005859375, "grad_norm": 0.3414709270000458, "learning_rate": 0.0003793885100408644, "loss": 1.8336, "step": 7291 }, { "epoch": 0.3560546875, "grad_norm": 0.33878040313720703, "learning_rate": 0.0003793574790975879, "loss": 1.8132, "step": 7292 }, { "epoch": 0.356103515625, "grad_norm": 0.364108681678772, "learning_rate": 0.00037932644562512413, "loss": 1.8241, "step": 7293 }, { "epoch": 0.35615234375, "grad_norm": 0.33115777373313904, "learning_rate": 0.00037929540962422514, "loss": 1.8404, "step": 7294 }, { "epoch": 0.356201171875, "grad_norm": 0.3153758645057678, "learning_rate": 0.0003792643710956432, "loss": 1.852, "step": 7295 }, { "epoch": 0.35625, "grad_norm": 0.30198362469673157, "learning_rate": 0.00037923333004013035, "loss": 1.8316, "step": 7296 }, { "epoch": 0.356298828125, "grad_norm": 0.3748912811279297, "learning_rate": 0.000379202286458439, "loss": 1.8343, "step": 7297 }, { "epoch": 0.35634765625, "grad_norm": 0.3128708600997925, "learning_rate": 0.00037917124035132156, "loss": 1.82, "step": 7298 }, { "epoch": 0.356396484375, "grad_norm": 0.34639278054237366, "learning_rate": 0.0003791401917195304, "loss": 1.8171, "step": 7299 }, { "epoch": 0.3564453125, "grad_norm": 0.37606900930404663, "learning_rate": 0.00037910914056381794, "loss": 1.8317, "step": 7300 }, { "epoch": 0.356494140625, "grad_norm": 0.2380758672952652, "learning_rate": 0.000379078086884937, "loss": 1.8464, "step": 7301 }, { "epoch": 0.35654296875, "grad_norm": 0.37127554416656494, "learning_rate": 0.00037904703068363996, "loss": 1.8243, "step": 7302 }, { "epoch": 0.356591796875, "grad_norm": 0.2108307033777237, "learning_rate": 0.00037901597196067944, "loss": 1.8117, "step": 7303 }, { "epoch": 0.356640625, "grad_norm": 0.3183583915233612, "learning_rate": 0.00037898491071680835, "loss": 1.8323, "step": 7304 }, { "epoch": 0.356689453125, "grad_norm": 0.29368123412132263, "learning_rate": 0.0003789538469527794, "loss": 1.8398, "step": 7305 }, { "epoch": 0.35673828125, "grad_norm": 0.24565580487251282, "learning_rate": 0.0003789227806693454, "loss": 1.8452, "step": 7306 }, { "epoch": 0.356787109375, "grad_norm": 0.29624834656715393, "learning_rate": 0.0003788917118672594, "loss": 1.8286, "step": 7307 }, { "epoch": 0.3568359375, "grad_norm": 0.26775941252708435, "learning_rate": 0.0003788606405472742, "loss": 1.8249, "step": 7308 }, { "epoch": 0.356884765625, "grad_norm": 0.3034159541130066, "learning_rate": 0.0003788295667101431, "loss": 1.8386, "step": 7309 }, { "epoch": 0.35693359375, "grad_norm": 0.32570284605026245, "learning_rate": 0.0003787984903566188, "loss": 1.843, "step": 7310 }, { "epoch": 0.356982421875, "grad_norm": 0.30172795057296753, "learning_rate": 0.00037876741148745486, "loss": 1.8403, "step": 7311 }, { "epoch": 0.35703125, "grad_norm": 0.30117732286453247, "learning_rate": 0.00037873633010340437, "loss": 1.8539, "step": 7312 }, { "epoch": 0.357080078125, "grad_norm": 0.2526842951774597, "learning_rate": 0.0003787052462052205, "loss": 1.836, "step": 7313 }, { "epoch": 0.35712890625, "grad_norm": 0.2599608898162842, "learning_rate": 0.00037867415979365673, "loss": 1.8218, "step": 7314 }, { "epoch": 0.357177734375, "grad_norm": 0.24900087714195251, "learning_rate": 0.0003786430708694663, "loss": 1.8287, "step": 7315 }, { "epoch": 0.3572265625, "grad_norm": 0.22737671434879303, "learning_rate": 0.00037861197943340287, "loss": 1.8463, "step": 7316 }, { "epoch": 0.357275390625, "grad_norm": 0.2540125548839569, "learning_rate": 0.00037858088548621983, "loss": 1.8175, "step": 7317 }, { "epoch": 0.35732421875, "grad_norm": 0.2405012547969818, "learning_rate": 0.00037854978902867085, "loss": 1.8223, "step": 7318 }, { "epoch": 0.357373046875, "grad_norm": 0.2192947268486023, "learning_rate": 0.0003785186900615095, "loss": 1.8486, "step": 7319 }, { "epoch": 0.357421875, "grad_norm": 0.2338804006576538, "learning_rate": 0.00037848758858548953, "loss": 1.8467, "step": 7320 }, { "epoch": 0.357470703125, "grad_norm": 0.236971914768219, "learning_rate": 0.00037845648460136473, "loss": 1.8479, "step": 7321 }, { "epoch": 0.35751953125, "grad_norm": 0.26404452323913574, "learning_rate": 0.000378425378109889, "loss": 1.8302, "step": 7322 }, { "epoch": 0.357568359375, "grad_norm": 0.2265174835920334, "learning_rate": 0.000378394269111816, "loss": 1.8087, "step": 7323 }, { "epoch": 0.3576171875, "grad_norm": 0.21677856147289276, "learning_rate": 0.0003783631576078999, "loss": 1.8258, "step": 7324 }, { "epoch": 0.357666015625, "grad_norm": 0.21198534965515137, "learning_rate": 0.0003783320435988946, "loss": 1.8248, "step": 7325 }, { "epoch": 0.35771484375, "grad_norm": 0.2280738353729248, "learning_rate": 0.0003783009270855542, "loss": 1.8493, "step": 7326 }, { "epoch": 0.357763671875, "grad_norm": 0.21011324226856232, "learning_rate": 0.0003782698080686329, "loss": 1.8261, "step": 7327 }, { "epoch": 0.3578125, "grad_norm": 0.22316671907901764, "learning_rate": 0.0003782386865488848, "loss": 1.8454, "step": 7328 }, { "epoch": 0.357861328125, "grad_norm": 0.300645649433136, "learning_rate": 0.0003782075625270642, "loss": 1.839, "step": 7329 }, { "epoch": 0.35791015625, "grad_norm": 0.29922255873680115, "learning_rate": 0.0003781764360039253, "loss": 1.8462, "step": 7330 }, { "epoch": 0.357958984375, "grad_norm": 0.25867760181427, "learning_rate": 0.0003781453069802227, "loss": 1.8279, "step": 7331 }, { "epoch": 0.3580078125, "grad_norm": 0.27119407057762146, "learning_rate": 0.0003781141754567107, "loss": 1.8184, "step": 7332 }, { "epoch": 0.358056640625, "grad_norm": 0.3285558223724365, "learning_rate": 0.0003780830414341439, "loss": 1.8392, "step": 7333 }, { "epoch": 0.35810546875, "grad_norm": 0.3518846035003662, "learning_rate": 0.00037805190491327665, "loss": 1.8446, "step": 7334 }, { "epoch": 0.358154296875, "grad_norm": 0.24039053916931152, "learning_rate": 0.0003780207658948637, "loss": 1.8341, "step": 7335 }, { "epoch": 0.358203125, "grad_norm": 0.34967881441116333, "learning_rate": 0.00037798962437965975, "loss": 1.8493, "step": 7336 }, { "epoch": 0.358251953125, "grad_norm": 0.3777039051055908, "learning_rate": 0.00037795848036841954, "loss": 1.8502, "step": 7337 }, { "epoch": 0.35830078125, "grad_norm": 0.22099484503269196, "learning_rate": 0.00037792733386189783, "loss": 1.8562, "step": 7338 }, { "epoch": 0.358349609375, "grad_norm": 0.3636554181575775, "learning_rate": 0.00037789618486084946, "loss": 1.817, "step": 7339 }, { "epoch": 0.3583984375, "grad_norm": 0.30876126885414124, "learning_rate": 0.0003778650333660294, "loss": 1.834, "step": 7340 }, { "epoch": 0.358447265625, "grad_norm": 0.2388109266757965, "learning_rate": 0.0003778338793781926, "loss": 1.8321, "step": 7341 }, { "epoch": 0.35849609375, "grad_norm": 0.33237794041633606, "learning_rate": 0.0003778027228980942, "loss": 1.816, "step": 7342 }, { "epoch": 0.358544921875, "grad_norm": 0.2933278977870941, "learning_rate": 0.00037777156392648916, "loss": 1.827, "step": 7343 }, { "epoch": 0.35859375, "grad_norm": 0.2609318792819977, "learning_rate": 0.00037774040246413266, "loss": 1.8289, "step": 7344 }, { "epoch": 0.358642578125, "grad_norm": 0.31216874718666077, "learning_rate": 0.00037770923851178, "loss": 1.8415, "step": 7345 }, { "epoch": 0.35869140625, "grad_norm": 0.2460956573486328, "learning_rate": 0.00037767807207018646, "loss": 1.8276, "step": 7346 }, { "epoch": 0.358740234375, "grad_norm": 0.28292593359947205, "learning_rate": 0.0003776469031401073, "loss": 1.8165, "step": 7347 }, { "epoch": 0.3587890625, "grad_norm": 0.2740638852119446, "learning_rate": 0.000377615731722298, "loss": 1.8122, "step": 7348 }, { "epoch": 0.358837890625, "grad_norm": 0.21472322940826416, "learning_rate": 0.000377584557817514, "loss": 1.8407, "step": 7349 }, { "epoch": 0.35888671875, "grad_norm": 0.31139445304870605, "learning_rate": 0.0003775533814265108, "loss": 1.8349, "step": 7350 }, { "epoch": 0.358935546875, "grad_norm": 0.22285622358322144, "learning_rate": 0.00037752220255004405, "loss": 1.8153, "step": 7351 }, { "epoch": 0.358984375, "grad_norm": 0.28013116121292114, "learning_rate": 0.00037749102118886943, "loss": 1.8334, "step": 7352 }, { "epoch": 0.359033203125, "grad_norm": 0.21866649389266968, "learning_rate": 0.0003774598373437424, "loss": 1.8365, "step": 7353 }, { "epoch": 0.35908203125, "grad_norm": 0.2489159256219864, "learning_rate": 0.000377428651015419, "loss": 1.8441, "step": 7354 }, { "epoch": 0.359130859375, "grad_norm": 0.26586827635765076, "learning_rate": 0.00037739746220465494, "loss": 1.8391, "step": 7355 }, { "epoch": 0.3591796875, "grad_norm": 0.3083546459674835, "learning_rate": 0.00037736627091220615, "loss": 1.8372, "step": 7356 }, { "epoch": 0.359228515625, "grad_norm": 0.298092782497406, "learning_rate": 0.0003773350771388285, "loss": 1.8279, "step": 7357 }, { "epoch": 0.35927734375, "grad_norm": 0.30360105633735657, "learning_rate": 0.000377303880885278, "loss": 1.8486, "step": 7358 }, { "epoch": 0.359326171875, "grad_norm": 0.2938053011894226, "learning_rate": 0.0003772726821523108, "loss": 1.8519, "step": 7359 }, { "epoch": 0.359375, "grad_norm": 0.29026147723197937, "learning_rate": 0.00037724148094068295, "loss": 1.8126, "step": 7360 }, { "epoch": 0.359423828125, "grad_norm": 0.3119409382343292, "learning_rate": 0.0003772102772511507, "loss": 1.8416, "step": 7361 }, { "epoch": 0.35947265625, "grad_norm": 0.3414170742034912, "learning_rate": 0.00037717907108447034, "loss": 1.8418, "step": 7362 }, { "epoch": 0.359521484375, "grad_norm": 0.2427220195531845, "learning_rate": 0.000377147862441398, "loss": 1.831, "step": 7363 }, { "epoch": 0.3595703125, "grad_norm": 0.32114073634147644, "learning_rate": 0.0003771166513226902, "loss": 1.8285, "step": 7364 }, { "epoch": 0.359619140625, "grad_norm": 0.29104650020599365, "learning_rate": 0.00037708543772910333, "loss": 1.8467, "step": 7365 }, { "epoch": 0.35966796875, "grad_norm": 0.23457969725131989, "learning_rate": 0.0003770542216613938, "loss": 1.8255, "step": 7366 }, { "epoch": 0.359716796875, "grad_norm": 0.27370545268058777, "learning_rate": 0.0003770230031203183, "loss": 1.8375, "step": 7367 }, { "epoch": 0.359765625, "grad_norm": 0.25415611267089844, "learning_rate": 0.0003769917821066333, "loss": 1.8486, "step": 7368 }, { "epoch": 0.359814453125, "grad_norm": 0.21619123220443726, "learning_rate": 0.0003769605586210955, "loss": 1.8273, "step": 7369 }, { "epoch": 0.35986328125, "grad_norm": 0.2665881812572479, "learning_rate": 0.00037692933266446165, "loss": 1.8055, "step": 7370 }, { "epoch": 0.359912109375, "grad_norm": 0.27352648973464966, "learning_rate": 0.0003768981042374886, "loss": 1.8457, "step": 7371 }, { "epoch": 0.3599609375, "grad_norm": 0.31793004274368286, "learning_rate": 0.00037686687334093305, "loss": 1.7949, "step": 7372 }, { "epoch": 0.360009765625, "grad_norm": 0.36618301272392273, "learning_rate": 0.00037683563997555205, "loss": 1.83, "step": 7373 }, { "epoch": 0.36005859375, "grad_norm": 0.3334938585758209, "learning_rate": 0.0003768044041421025, "loss": 1.8312, "step": 7374 }, { "epoch": 0.360107421875, "grad_norm": 0.2472175508737564, "learning_rate": 0.00037677316584134137, "loss": 1.8117, "step": 7375 }, { "epoch": 0.36015625, "grad_norm": 0.2596663236618042, "learning_rate": 0.00037674192507402584, "loss": 1.8486, "step": 7376 }, { "epoch": 0.360205078125, "grad_norm": 0.3759767413139343, "learning_rate": 0.000376710681840913, "loss": 1.8302, "step": 7377 }, { "epoch": 0.36025390625, "grad_norm": 0.2763359844684601, "learning_rate": 0.0003766794361427601, "loss": 1.8438, "step": 7378 }, { "epoch": 0.360302734375, "grad_norm": 0.36157676577568054, "learning_rate": 0.0003766481879803243, "loss": 1.8455, "step": 7379 }, { "epoch": 0.3603515625, "grad_norm": 0.3733830153942108, "learning_rate": 0.00037661693735436316, "loss": 1.7979, "step": 7380 }, { "epoch": 0.360400390625, "grad_norm": 0.24346019327640533, "learning_rate": 0.0003765856842656337, "loss": 1.8349, "step": 7381 }, { "epoch": 0.36044921875, "grad_norm": 0.2628331184387207, "learning_rate": 0.0003765544287148937, "loss": 1.8144, "step": 7382 }, { "epoch": 0.360498046875, "grad_norm": 0.2310580313205719, "learning_rate": 0.0003765231707029005, "loss": 1.8203, "step": 7383 }, { "epoch": 0.360546875, "grad_norm": 0.2317466139793396, "learning_rate": 0.0003764919102304117, "loss": 1.8148, "step": 7384 }, { "epoch": 0.360595703125, "grad_norm": 0.23230287432670593, "learning_rate": 0.00037646064729818495, "loss": 1.8099, "step": 7385 }, { "epoch": 0.36064453125, "grad_norm": 0.24136902391910553, "learning_rate": 0.00037642938190697784, "loss": 1.7958, "step": 7386 }, { "epoch": 0.360693359375, "grad_norm": 0.2272147238254547, "learning_rate": 0.0003763981140575482, "loss": 1.813, "step": 7387 }, { "epoch": 0.3607421875, "grad_norm": 0.21884918212890625, "learning_rate": 0.0003763668437506538, "loss": 1.8128, "step": 7388 }, { "epoch": 0.360791015625, "grad_norm": 0.2673225700855255, "learning_rate": 0.00037633557098705256, "loss": 1.8355, "step": 7389 }, { "epoch": 0.36083984375, "grad_norm": 0.2502068281173706, "learning_rate": 0.0003763042957675023, "loss": 1.805, "step": 7390 }, { "epoch": 0.360888671875, "grad_norm": 0.28848591446876526, "learning_rate": 0.00037627301809276106, "loss": 1.8161, "step": 7391 }, { "epoch": 0.3609375, "grad_norm": 0.236707404255867, "learning_rate": 0.00037624173796358686, "loss": 1.8105, "step": 7392 }, { "epoch": 0.360986328125, "grad_norm": 0.2884017527103424, "learning_rate": 0.0003762104553807377, "loss": 1.7963, "step": 7393 }, { "epoch": 0.36103515625, "grad_norm": 0.30955207347869873, "learning_rate": 0.00037617917034497194, "loss": 1.8133, "step": 7394 }, { "epoch": 0.361083984375, "grad_norm": 0.29246559739112854, "learning_rate": 0.0003761478828570477, "loss": 1.8447, "step": 7395 }, { "epoch": 0.3611328125, "grad_norm": 0.23275941610336304, "learning_rate": 0.0003761165929177233, "loss": 1.8133, "step": 7396 }, { "epoch": 0.361181640625, "grad_norm": 0.2810044288635254, "learning_rate": 0.000376085300527757, "loss": 1.864, "step": 7397 }, { "epoch": 0.36123046875, "grad_norm": 0.29870039224624634, "learning_rate": 0.00037605400568790714, "loss": 1.8382, "step": 7398 }, { "epoch": 0.361279296875, "grad_norm": 0.2792796492576599, "learning_rate": 0.0003760227083989324, "loss": 1.8444, "step": 7399 }, { "epoch": 0.361328125, "grad_norm": 0.2791227698326111, "learning_rate": 0.00037599140866159106, "loss": 1.8375, "step": 7400 }, { "epoch": 0.361376953125, "grad_norm": 0.23343609273433685, "learning_rate": 0.00037596010647664185, "loss": 1.8553, "step": 7401 }, { "epoch": 0.36142578125, "grad_norm": 0.2829454839229584, "learning_rate": 0.0003759288018448433, "loss": 1.8388, "step": 7402 }, { "epoch": 0.361474609375, "grad_norm": 0.31370818614959717, "learning_rate": 0.00037589749476695417, "loss": 1.8419, "step": 7403 }, { "epoch": 0.3615234375, "grad_norm": 0.243280827999115, "learning_rate": 0.0003758661852437332, "loss": 1.8158, "step": 7404 }, { "epoch": 0.361572265625, "grad_norm": 0.3236267566680908, "learning_rate": 0.00037583487327593927, "loss": 1.8239, "step": 7405 }, { "epoch": 0.36162109375, "grad_norm": 0.29878202080726624, "learning_rate": 0.00037580355886433114, "loss": 1.8021, "step": 7406 }, { "epoch": 0.361669921875, "grad_norm": 0.27024492621421814, "learning_rate": 0.0003757722420096677, "loss": 1.8225, "step": 7407 }, { "epoch": 0.36171875, "grad_norm": 0.30221453309059143, "learning_rate": 0.00037574092271270806, "loss": 1.8361, "step": 7408 }, { "epoch": 0.361767578125, "grad_norm": 0.3083574175834656, "learning_rate": 0.00037570960097421126, "loss": 1.8181, "step": 7409 }, { "epoch": 0.36181640625, "grad_norm": 0.3306373953819275, "learning_rate": 0.0003756782767949363, "loss": 1.8462, "step": 7410 }, { "epoch": 0.361865234375, "grad_norm": 0.32403722405433655, "learning_rate": 0.0003756469501756424, "loss": 1.8397, "step": 7411 }, { "epoch": 0.3619140625, "grad_norm": 0.31214094161987305, "learning_rate": 0.0003756156211170889, "loss": 1.8271, "step": 7412 }, { "epoch": 0.361962890625, "grad_norm": 0.2940070629119873, "learning_rate": 0.000375584289620035, "loss": 1.8309, "step": 7413 }, { "epoch": 0.36201171875, "grad_norm": 0.31704068183898926, "learning_rate": 0.00037555295568523995, "loss": 1.8363, "step": 7414 }, { "epoch": 0.362060546875, "grad_norm": 0.32288581132888794, "learning_rate": 0.0003755216193134633, "loss": 1.8621, "step": 7415 }, { "epoch": 0.362109375, "grad_norm": 0.26844003796577454, "learning_rate": 0.0003754902805054644, "loss": 1.8626, "step": 7416 }, { "epoch": 0.362158203125, "grad_norm": 0.34751057624816895, "learning_rate": 0.0003754589392620028, "loss": 1.8364, "step": 7417 }, { "epoch": 0.36220703125, "grad_norm": 0.31799498200416565, "learning_rate": 0.00037542759558383817, "loss": 1.844, "step": 7418 }, { "epoch": 0.362255859375, "grad_norm": 0.29587292671203613, "learning_rate": 0.00037539624947173, "loss": 1.8412, "step": 7419 }, { "epoch": 0.3623046875, "grad_norm": 0.30743491649627686, "learning_rate": 0.0003753649009264381, "loss": 1.8254, "step": 7420 }, { "epoch": 0.362353515625, "grad_norm": 0.32430288195610046, "learning_rate": 0.0003753335499487222, "loss": 1.8282, "step": 7421 }, { "epoch": 0.36240234375, "grad_norm": 0.310478538274765, "learning_rate": 0.0003753021965393421, "loss": 1.8314, "step": 7422 }, { "epoch": 0.362451171875, "grad_norm": 0.30101653933525085, "learning_rate": 0.00037527084069905776, "loss": 1.8259, "step": 7423 }, { "epoch": 0.3625, "grad_norm": 0.23013965785503387, "learning_rate": 0.000375239482428629, "loss": 1.822, "step": 7424 }, { "epoch": 0.362548828125, "grad_norm": 0.28547486662864685, "learning_rate": 0.0003752081217288158, "loss": 1.8113, "step": 7425 }, { "epoch": 0.36259765625, "grad_norm": 0.22562305629253387, "learning_rate": 0.0003751767586003783, "loss": 1.8214, "step": 7426 }, { "epoch": 0.362646484375, "grad_norm": 0.30501726269721985, "learning_rate": 0.0003751453930440765, "loss": 1.8479, "step": 7427 }, { "epoch": 0.3626953125, "grad_norm": 0.29296350479125977, "learning_rate": 0.00037511402506067073, "loss": 1.8178, "step": 7428 }, { "epoch": 0.362744140625, "grad_norm": 0.27276331186294556, "learning_rate": 0.00037508265465092114, "loss": 1.8158, "step": 7429 }, { "epoch": 0.36279296875, "grad_norm": 0.28596484661102295, "learning_rate": 0.00037505128181558795, "loss": 1.8415, "step": 7430 }, { "epoch": 0.362841796875, "grad_norm": 0.2708981931209564, "learning_rate": 0.0003750199065554316, "loss": 1.8024, "step": 7431 }, { "epoch": 0.362890625, "grad_norm": 0.264913946390152, "learning_rate": 0.00037498852887121243, "loss": 1.826, "step": 7432 }, { "epoch": 0.362939453125, "grad_norm": 0.2232087254524231, "learning_rate": 0.00037495714876369096, "loss": 1.817, "step": 7433 }, { "epoch": 0.36298828125, "grad_norm": 0.32943034172058105, "learning_rate": 0.00037492576623362773, "loss": 1.852, "step": 7434 }, { "epoch": 0.363037109375, "grad_norm": 0.2550012171268463, "learning_rate": 0.00037489438128178324, "loss": 1.8188, "step": 7435 }, { "epoch": 0.3630859375, "grad_norm": 0.33394014835357666, "learning_rate": 0.0003748629939089181, "loss": 1.8214, "step": 7436 }, { "epoch": 0.363134765625, "grad_norm": 0.28685262799263, "learning_rate": 0.00037483160411579315, "loss": 1.8236, "step": 7437 }, { "epoch": 0.36318359375, "grad_norm": 0.26105672121047974, "learning_rate": 0.00037480021190316906, "loss": 1.8394, "step": 7438 }, { "epoch": 0.363232421875, "grad_norm": 0.30284518003463745, "learning_rate": 0.00037476881727180664, "loss": 1.8295, "step": 7439 }, { "epoch": 0.36328125, "grad_norm": 0.22944730520248413, "learning_rate": 0.0003747374202224668, "loss": 1.8278, "step": 7440 }, { "epoch": 0.363330078125, "grad_norm": 0.26735827326774597, "learning_rate": 0.00037470602075591043, "loss": 1.823, "step": 7441 }, { "epoch": 0.36337890625, "grad_norm": 0.2313823401927948, "learning_rate": 0.0003746746188728985, "loss": 1.8481, "step": 7442 }, { "epoch": 0.363427734375, "grad_norm": 0.27007681131362915, "learning_rate": 0.00037464321457419215, "loss": 1.8258, "step": 7443 }, { "epoch": 0.3634765625, "grad_norm": 0.3701314926147461, "learning_rate": 0.00037461180786055244, "loss": 1.8346, "step": 7444 }, { "epoch": 0.363525390625, "grad_norm": 0.29601404070854187, "learning_rate": 0.0003745803987327405, "loss": 1.8399, "step": 7445 }, { "epoch": 0.36357421875, "grad_norm": 0.24810218811035156, "learning_rate": 0.00037454898719151765, "loss": 1.8063, "step": 7446 }, { "epoch": 0.363623046875, "grad_norm": 0.30889931321144104, "learning_rate": 0.0003745175732376451, "loss": 1.8478, "step": 7447 }, { "epoch": 0.363671875, "grad_norm": 0.22886796295642853, "learning_rate": 0.00037448615687188424, "loss": 1.8541, "step": 7448 }, { "epoch": 0.363720703125, "grad_norm": 0.24973125755786896, "learning_rate": 0.0003744547380949964, "loss": 1.8258, "step": 7449 }, { "epoch": 0.36376953125, "grad_norm": 0.29708966612815857, "learning_rate": 0.00037442331690774313, "loss": 1.8192, "step": 7450 }, { "epoch": 0.363818359375, "grad_norm": 0.25606638193130493, "learning_rate": 0.00037439189331088584, "loss": 1.8384, "step": 7451 }, { "epoch": 0.3638671875, "grad_norm": 0.25673699378967285, "learning_rate": 0.0003743604673051862, "loss": 1.8285, "step": 7452 }, { "epoch": 0.363916015625, "grad_norm": 0.3117835223674774, "learning_rate": 0.00037432903889140584, "loss": 1.8306, "step": 7453 }, { "epoch": 0.36396484375, "grad_norm": 0.28401416540145874, "learning_rate": 0.00037429760807030634, "loss": 1.8421, "step": 7454 }, { "epoch": 0.364013671875, "grad_norm": 0.27163273096084595, "learning_rate": 0.00037426617484264963, "loss": 1.8485, "step": 7455 }, { "epoch": 0.3640625, "grad_norm": 0.27057021856307983, "learning_rate": 0.00037423473920919736, "loss": 1.8427, "step": 7456 }, { "epoch": 0.364111328125, "grad_norm": 0.243181049823761, "learning_rate": 0.0003742033011707115, "loss": 1.8227, "step": 7457 }, { "epoch": 0.36416015625, "grad_norm": 0.3271198868751526, "learning_rate": 0.0003741718607279539, "loss": 1.829, "step": 7458 }, { "epoch": 0.364208984375, "grad_norm": 0.3652975857257843, "learning_rate": 0.0003741404178816866, "loss": 1.8133, "step": 7459 }, { "epoch": 0.3642578125, "grad_norm": 0.31154683232307434, "learning_rate": 0.0003741089726326716, "loss": 1.8525, "step": 7460 }, { "epoch": 0.364306640625, "grad_norm": 0.3954624533653259, "learning_rate": 0.00037407752498167104, "loss": 1.8413, "step": 7461 }, { "epoch": 0.36435546875, "grad_norm": 0.2576228678226471, "learning_rate": 0.00037404607492944704, "loss": 1.8282, "step": 7462 }, { "epoch": 0.364404296875, "grad_norm": 0.2901429235935211, "learning_rate": 0.00037401462247676185, "loss": 1.8376, "step": 7463 }, { "epoch": 0.364453125, "grad_norm": 0.39193668961524963, "learning_rate": 0.0003739831676243778, "loss": 1.8205, "step": 7464 }, { "epoch": 0.364501953125, "grad_norm": 0.2697351276874542, "learning_rate": 0.00037395171037305706, "loss": 1.8304, "step": 7465 }, { "epoch": 0.36455078125, "grad_norm": 0.41444969177246094, "learning_rate": 0.0003739202507235622, "loss": 1.8035, "step": 7466 }, { "epoch": 0.364599609375, "grad_norm": 0.38540297746658325, "learning_rate": 0.0003738887886766556, "loss": 1.8156, "step": 7467 }, { "epoch": 0.3646484375, "grad_norm": 0.2780788540840149, "learning_rate": 0.0003738573242330997, "loss": 1.8432, "step": 7468 }, { "epoch": 0.364697265625, "grad_norm": 0.49641236662864685, "learning_rate": 0.00037382585739365715, "loss": 1.8301, "step": 7469 }, { "epoch": 0.36474609375, "grad_norm": 0.2882884442806244, "learning_rate": 0.0003737943881590906, "loss": 1.8208, "step": 7470 }, { "epoch": 0.364794921875, "grad_norm": 0.3848716616630554, "learning_rate": 0.0003737629165301626, "loss": 1.8368, "step": 7471 }, { "epoch": 0.36484375, "grad_norm": 0.2867090702056885, "learning_rate": 0.00037373144250763603, "loss": 1.8448, "step": 7472 }, { "epoch": 0.364892578125, "grad_norm": 0.3513070046901703, "learning_rate": 0.0003736999660922736, "loss": 1.8285, "step": 7473 }, { "epoch": 0.36494140625, "grad_norm": 0.3286915719509125, "learning_rate": 0.0003736684872848382, "loss": 1.8298, "step": 7474 }, { "epoch": 0.364990234375, "grad_norm": 0.26090940833091736, "learning_rate": 0.00037363700608609274, "loss": 1.8269, "step": 7475 }, { "epoch": 0.3650390625, "grad_norm": 0.3191932141780853, "learning_rate": 0.0003736055224968002, "loss": 1.8106, "step": 7476 }, { "epoch": 0.365087890625, "grad_norm": 0.24622167646884918, "learning_rate": 0.0003735740365177236, "loss": 1.8007, "step": 7477 }, { "epoch": 0.36513671875, "grad_norm": 0.28931286931037903, "learning_rate": 0.00037354254814962604, "loss": 1.8184, "step": 7478 }, { "epoch": 0.365185546875, "grad_norm": 0.25697562098503113, "learning_rate": 0.0003735110573932706, "loss": 1.8491, "step": 7479 }, { "epoch": 0.365234375, "grad_norm": 0.28103721141815186, "learning_rate": 0.0003734795642494206, "loss": 1.8069, "step": 7480 }, { "epoch": 0.365283203125, "grad_norm": 0.27383697032928467, "learning_rate": 0.0003734480687188393, "loss": 1.8488, "step": 7481 }, { "epoch": 0.36533203125, "grad_norm": 0.2746807336807251, "learning_rate": 0.00037341657080228993, "loss": 1.8355, "step": 7482 }, { "epoch": 0.365380859375, "grad_norm": 0.2745009660720825, "learning_rate": 0.0003733850705005358, "loss": 1.8337, "step": 7483 }, { "epoch": 0.3654296875, "grad_norm": 0.26091110706329346, "learning_rate": 0.00037335356781434056, "loss": 1.8227, "step": 7484 }, { "epoch": 0.365478515625, "grad_norm": 0.31863197684288025, "learning_rate": 0.0003733220627444675, "loss": 1.8412, "step": 7485 }, { "epoch": 0.36552734375, "grad_norm": 0.2984262704849243, "learning_rate": 0.0003732905552916803, "loss": 1.8352, "step": 7486 }, { "epoch": 0.365576171875, "grad_norm": 0.22734321653842926, "learning_rate": 0.0003732590454567425, "loss": 1.8201, "step": 7487 }, { "epoch": 0.365625, "grad_norm": 0.3532279133796692, "learning_rate": 0.00037322753324041787, "loss": 1.8453, "step": 7488 }, { "epoch": 0.365673828125, "grad_norm": 0.24580928683280945, "learning_rate": 0.00037319601864346997, "loss": 1.8584, "step": 7489 }, { "epoch": 0.36572265625, "grad_norm": 0.2696783244609833, "learning_rate": 0.00037316450166666274, "loss": 1.8088, "step": 7490 }, { "epoch": 0.365771484375, "grad_norm": 0.2545325458049774, "learning_rate": 0.00037313298231075995, "loss": 1.8199, "step": 7491 }, { "epoch": 0.3658203125, "grad_norm": 0.2812657654285431, "learning_rate": 0.00037310146057652546, "loss": 1.8151, "step": 7492 }, { "epoch": 0.365869140625, "grad_norm": 0.35566550493240356, "learning_rate": 0.0003730699364647232, "loss": 1.8295, "step": 7493 }, { "epoch": 0.36591796875, "grad_norm": 0.2998044490814209, "learning_rate": 0.00037303840997611725, "loss": 1.8258, "step": 7494 }, { "epoch": 0.365966796875, "grad_norm": 0.32367926836013794, "learning_rate": 0.00037300688111147176, "loss": 1.8158, "step": 7495 }, { "epoch": 0.366015625, "grad_norm": 0.2298031598329544, "learning_rate": 0.0003729753498715507, "loss": 1.813, "step": 7496 }, { "epoch": 0.366064453125, "grad_norm": 0.3015722930431366, "learning_rate": 0.0003729438162571184, "loss": 1.8275, "step": 7497 }, { "epoch": 0.36611328125, "grad_norm": 0.28511205315589905, "learning_rate": 0.00037291228026893895, "loss": 1.8336, "step": 7498 }, { "epoch": 0.366162109375, "grad_norm": 0.26492759585380554, "learning_rate": 0.00037288074190777677, "loss": 1.801, "step": 7499 }, { "epoch": 0.3662109375, "grad_norm": 0.2640310525894165, "learning_rate": 0.0003728492011743961, "loss": 1.8341, "step": 7500 }, { "epoch": 0.366259765625, "grad_norm": 0.31725072860717773, "learning_rate": 0.0003728176580695615, "loss": 1.8254, "step": 7501 }, { "epoch": 0.36630859375, "grad_norm": 0.256382018327713, "learning_rate": 0.0003727861125940374, "loss": 1.8262, "step": 7502 }, { "epoch": 0.366357421875, "grad_norm": 0.27249571681022644, "learning_rate": 0.0003727545647485882, "loss": 1.8264, "step": 7503 }, { "epoch": 0.36640625, "grad_norm": 0.2952299118041992, "learning_rate": 0.00037272301453397866, "loss": 1.8377, "step": 7504 }, { "epoch": 0.366455078125, "grad_norm": 0.27618157863616943, "learning_rate": 0.0003726914619509733, "loss": 1.8277, "step": 7505 }, { "epoch": 0.36650390625, "grad_norm": 0.2783345580101013, "learning_rate": 0.00037265990700033693, "loss": 1.8228, "step": 7506 }, { "epoch": 0.366552734375, "grad_norm": 0.35025787353515625, "learning_rate": 0.00037262834968283424, "loss": 1.8357, "step": 7507 }, { "epoch": 0.3666015625, "grad_norm": 0.2999204993247986, "learning_rate": 0.0003725967899992301, "loss": 1.8244, "step": 7508 }, { "epoch": 0.366650390625, "grad_norm": 0.30028435587882996, "learning_rate": 0.0003725652279502893, "loss": 1.8398, "step": 7509 }, { "epoch": 0.36669921875, "grad_norm": 0.2536691427230835, "learning_rate": 0.0003725336635367768, "loss": 1.8462, "step": 7510 }, { "epoch": 0.366748046875, "grad_norm": 0.30857905745506287, "learning_rate": 0.00037250209675945763, "loss": 1.8161, "step": 7511 }, { "epoch": 0.366796875, "grad_norm": 0.22803665697574615, "learning_rate": 0.00037247052761909676, "loss": 1.8364, "step": 7512 }, { "epoch": 0.366845703125, "grad_norm": 0.2904404401779175, "learning_rate": 0.0003724389561164594, "loss": 1.8286, "step": 7513 }, { "epoch": 0.36689453125, "grad_norm": 0.3331552743911743, "learning_rate": 0.0003724073822523107, "loss": 1.8293, "step": 7514 }, { "epoch": 0.366943359375, "grad_norm": 0.2652762830257416, "learning_rate": 0.00037237580602741575, "loss": 1.8123, "step": 7515 }, { "epoch": 0.3669921875, "grad_norm": 0.270358681678772, "learning_rate": 0.00037234422744253994, "loss": 1.8333, "step": 7516 }, { "epoch": 0.367041015625, "grad_norm": 0.26221925020217896, "learning_rate": 0.0003723126464984486, "loss": 1.8153, "step": 7517 }, { "epoch": 0.36708984375, "grad_norm": 0.21016235649585724, "learning_rate": 0.0003722810631959071, "loss": 1.8497, "step": 7518 }, { "epoch": 0.367138671875, "grad_norm": 0.23511070013046265, "learning_rate": 0.0003722494775356808, "loss": 1.8256, "step": 7519 }, { "epoch": 0.3671875, "grad_norm": 0.25766441226005554, "learning_rate": 0.00037221788951853537, "loss": 1.8337, "step": 7520 }, { "epoch": 0.367236328125, "grad_norm": 0.2180469036102295, "learning_rate": 0.0003721862991452362, "loss": 1.8315, "step": 7521 }, { "epoch": 0.36728515625, "grad_norm": 0.2569188177585602, "learning_rate": 0.00037215470641654904, "loss": 1.8249, "step": 7522 }, { "epoch": 0.367333984375, "grad_norm": 0.3069956302642822, "learning_rate": 0.0003721231113332396, "loss": 1.8219, "step": 7523 }, { "epoch": 0.3673828125, "grad_norm": 0.24935206770896912, "learning_rate": 0.0003720915138960734, "loss": 1.8042, "step": 7524 }, { "epoch": 0.367431640625, "grad_norm": 0.2439158409833908, "learning_rate": 0.00037205991410581643, "loss": 1.8218, "step": 7525 }, { "epoch": 0.36748046875, "grad_norm": 0.2460411936044693, "learning_rate": 0.0003720283119632343, "loss": 1.8205, "step": 7526 }, { "epoch": 0.367529296875, "grad_norm": 0.33397579193115234, "learning_rate": 0.00037199670746909327, "loss": 1.8236, "step": 7527 }, { "epoch": 0.367578125, "grad_norm": 0.2763116657733917, "learning_rate": 0.00037196510062415905, "loss": 1.8474, "step": 7528 }, { "epoch": 0.367626953125, "grad_norm": 0.2769278883934021, "learning_rate": 0.00037193349142919776, "loss": 1.8476, "step": 7529 }, { "epoch": 0.36767578125, "grad_norm": 0.2790844142436981, "learning_rate": 0.0003719018798849753, "loss": 1.855, "step": 7530 }, { "epoch": 0.367724609375, "grad_norm": 0.27202844619750977, "learning_rate": 0.0003718702659922581, "loss": 1.8273, "step": 7531 }, { "epoch": 0.3677734375, "grad_norm": 0.22652371227741241, "learning_rate": 0.0003718386497518121, "loss": 1.8446, "step": 7532 }, { "epoch": 0.367822265625, "grad_norm": 0.25537657737731934, "learning_rate": 0.00037180703116440356, "loss": 1.8311, "step": 7533 }, { "epoch": 0.36787109375, "grad_norm": 0.2801816165447235, "learning_rate": 0.0003717754102307989, "loss": 1.8397, "step": 7534 }, { "epoch": 0.367919921875, "grad_norm": 0.23278063535690308, "learning_rate": 0.00037174378695176445, "loss": 1.8622, "step": 7535 }, { "epoch": 0.36796875, "grad_norm": 0.25055181980133057, "learning_rate": 0.00037171216132806664, "loss": 1.8206, "step": 7536 }, { "epoch": 0.368017578125, "grad_norm": 0.25735414028167725, "learning_rate": 0.0003716805333604718, "loss": 1.8203, "step": 7537 }, { "epoch": 0.36806640625, "grad_norm": 0.28990742564201355, "learning_rate": 0.00037164890304974656, "loss": 1.8212, "step": 7538 }, { "epoch": 0.368115234375, "grad_norm": 0.3344483971595764, "learning_rate": 0.0003716172703966576, "loss": 1.8272, "step": 7539 }, { "epoch": 0.3681640625, "grad_norm": 0.2888253927230835, "learning_rate": 0.00037158563540197145, "loss": 1.8252, "step": 7540 }, { "epoch": 0.368212890625, "grad_norm": 0.31513580679893494, "learning_rate": 0.0003715539980664548, "loss": 1.8442, "step": 7541 }, { "epoch": 0.36826171875, "grad_norm": 0.2580944001674652, "learning_rate": 0.00037152235839087443, "loss": 1.8428, "step": 7542 }, { "epoch": 0.368310546875, "grad_norm": 0.3170838952064514, "learning_rate": 0.0003714907163759972, "loss": 1.7809, "step": 7543 }, { "epoch": 0.368359375, "grad_norm": 0.3782224953174591, "learning_rate": 0.0003714590720225898, "loss": 1.8054, "step": 7544 }, { "epoch": 0.368408203125, "grad_norm": 0.24010781943798065, "learning_rate": 0.0003714274253314194, "loss": 1.8247, "step": 7545 }, { "epoch": 0.36845703125, "grad_norm": 0.25618281960487366, "learning_rate": 0.00037139577630325286, "loss": 1.8132, "step": 7546 }, { "epoch": 0.368505859375, "grad_norm": 0.29712608456611633, "learning_rate": 0.0003713641249388573, "loss": 1.8376, "step": 7547 }, { "epoch": 0.3685546875, "grad_norm": 0.28565794229507446, "learning_rate": 0.0003713324712389997, "loss": 1.8331, "step": 7548 }, { "epoch": 0.368603515625, "grad_norm": 0.3491212725639343, "learning_rate": 0.0003713008152044474, "loss": 1.8429, "step": 7549 }, { "epoch": 0.36865234375, "grad_norm": 0.32894518971443176, "learning_rate": 0.0003712691568359673, "loss": 1.8252, "step": 7550 }, { "epoch": 0.368701171875, "grad_norm": 0.26907628774642944, "learning_rate": 0.00037123749613432683, "loss": 1.8482, "step": 7551 }, { "epoch": 0.36875, "grad_norm": 0.2653082609176636, "learning_rate": 0.00037120583310029344, "loss": 1.8425, "step": 7552 }, { "epoch": 0.368798828125, "grad_norm": 0.30662235617637634, "learning_rate": 0.00037117416773463434, "loss": 1.8313, "step": 7553 }, { "epoch": 0.36884765625, "grad_norm": 0.26038578152656555, "learning_rate": 0.00037114250003811696, "loss": 1.828, "step": 7554 }, { "epoch": 0.368896484375, "grad_norm": 0.24469594657421112, "learning_rate": 0.000371110830011509, "loss": 1.8209, "step": 7555 }, { "epoch": 0.3689453125, "grad_norm": 0.3858356475830078, "learning_rate": 0.00037107915765557774, "loss": 1.8254, "step": 7556 }, { "epoch": 0.368994140625, "grad_norm": 0.3152356445789337, "learning_rate": 0.00037104748297109096, "loss": 1.8239, "step": 7557 }, { "epoch": 0.36904296875, "grad_norm": 0.2505708932876587, "learning_rate": 0.0003710158059588162, "loss": 1.8165, "step": 7558 }, { "epoch": 0.369091796875, "grad_norm": 0.28066369891166687, "learning_rate": 0.00037098412661952133, "loss": 1.832, "step": 7559 }, { "epoch": 0.369140625, "grad_norm": 0.24215473234653473, "learning_rate": 0.00037095244495397397, "loss": 1.8282, "step": 7560 }, { "epoch": 0.369189453125, "grad_norm": 0.2230573296546936, "learning_rate": 0.0003709207609629421, "loss": 1.8204, "step": 7561 }, { "epoch": 0.36923828125, "grad_norm": 0.23770570755004883, "learning_rate": 0.0003708890746471934, "loss": 1.839, "step": 7562 }, { "epoch": 0.369287109375, "grad_norm": 0.2891751229763031, "learning_rate": 0.0003708573860074961, "loss": 1.8321, "step": 7563 }, { "epoch": 0.3693359375, "grad_norm": 0.2281082570552826, "learning_rate": 0.00037082569504461786, "loss": 1.8356, "step": 7564 }, { "epoch": 0.369384765625, "grad_norm": 0.28647613525390625, "learning_rate": 0.00037079400175932703, "loss": 1.8407, "step": 7565 }, { "epoch": 0.36943359375, "grad_norm": 0.3341693878173828, "learning_rate": 0.0003707623061523916, "loss": 1.8066, "step": 7566 }, { "epoch": 0.369482421875, "grad_norm": 0.28304123878479004, "learning_rate": 0.00037073060822457975, "loss": 1.8181, "step": 7567 }, { "epoch": 0.36953125, "grad_norm": 0.3364592492580414, "learning_rate": 0.0003706989079766597, "loss": 1.8349, "step": 7568 }, { "epoch": 0.369580078125, "grad_norm": 0.2873595356941223, "learning_rate": 0.0003706672054093997, "loss": 1.8637, "step": 7569 }, { "epoch": 0.36962890625, "grad_norm": 0.2545204162597656, "learning_rate": 0.00037063550052356814, "loss": 1.8586, "step": 7570 }, { "epoch": 0.369677734375, "grad_norm": 0.2650291323661804, "learning_rate": 0.0003706037933199334, "loss": 1.8342, "step": 7571 }, { "epoch": 0.3697265625, "grad_norm": 0.2606204152107239, "learning_rate": 0.0003705720837992638, "loss": 1.8164, "step": 7572 }, { "epoch": 0.369775390625, "grad_norm": 0.2429511547088623, "learning_rate": 0.00037054037196232817, "loss": 1.8283, "step": 7573 }, { "epoch": 0.36982421875, "grad_norm": 0.6159670352935791, "learning_rate": 0.00037050865780989474, "loss": 1.8205, "step": 7574 }, { "epoch": 0.369873046875, "grad_norm": 0.27369141578674316, "learning_rate": 0.0003704769413427323, "loss": 1.8195, "step": 7575 }, { "epoch": 0.369921875, "grad_norm": 0.3193378746509552, "learning_rate": 0.00037044522256160945, "loss": 1.8185, "step": 7576 }, { "epoch": 0.369970703125, "grad_norm": 0.276210218667984, "learning_rate": 0.000370413501467295, "loss": 1.825, "step": 7577 }, { "epoch": 0.37001953125, "grad_norm": 0.22955931723117828, "learning_rate": 0.0003703817780605577, "loss": 1.8395, "step": 7578 }, { "epoch": 0.370068359375, "grad_norm": 0.2625614404678345, "learning_rate": 0.0003703500523421663, "loss": 1.8447, "step": 7579 }, { "epoch": 0.3701171875, "grad_norm": 0.28802403807640076, "learning_rate": 0.0003703183243128898, "loss": 1.821, "step": 7580 }, { "epoch": 0.370166015625, "grad_norm": 0.20754268765449524, "learning_rate": 0.0003702865939734971, "loss": 1.8094, "step": 7581 }, { "epoch": 0.37021484375, "grad_norm": 0.2848266661167145, "learning_rate": 0.0003702548613247573, "loss": 1.8488, "step": 7582 }, { "epoch": 0.370263671875, "grad_norm": 0.29047495126724243, "learning_rate": 0.00037022312636743944, "loss": 1.8367, "step": 7583 }, { "epoch": 0.3703125, "grad_norm": 0.24187400937080383, "learning_rate": 0.0003701913891023126, "loss": 1.8395, "step": 7584 }, { "epoch": 0.370361328125, "grad_norm": 0.3239099085330963, "learning_rate": 0.00037015964953014593, "loss": 1.8355, "step": 7585 }, { "epoch": 0.37041015625, "grad_norm": 0.26585423946380615, "learning_rate": 0.00037012790765170866, "loss": 1.852, "step": 7586 }, { "epoch": 0.370458984375, "grad_norm": 0.28742462396621704, "learning_rate": 0.0003700961634677702, "loss": 1.8216, "step": 7587 }, { "epoch": 0.3705078125, "grad_norm": 0.3084433376789093, "learning_rate": 0.00037006441697909977, "loss": 1.8409, "step": 7588 }, { "epoch": 0.370556640625, "grad_norm": 0.36994650959968567, "learning_rate": 0.00037003266818646683, "loss": 1.8409, "step": 7589 }, { "epoch": 0.37060546875, "grad_norm": 0.27290642261505127, "learning_rate": 0.0003700009170906409, "loss": 1.8236, "step": 7590 }, { "epoch": 0.370654296875, "grad_norm": 0.2204694151878357, "learning_rate": 0.00036996916369239134, "loss": 1.8488, "step": 7591 }, { "epoch": 0.370703125, "grad_norm": 0.2563524544239044, "learning_rate": 0.00036993740799248786, "loss": 1.8359, "step": 7592 }, { "epoch": 0.370751953125, "grad_norm": 0.27005621790885925, "learning_rate": 0.0003699056499917, "loss": 1.7959, "step": 7593 }, { "epoch": 0.37080078125, "grad_norm": 0.3093854486942291, "learning_rate": 0.0003698738896907975, "loss": 1.835, "step": 7594 }, { "epoch": 0.370849609375, "grad_norm": 0.35969480872154236, "learning_rate": 0.00036984212709055, "loss": 1.8177, "step": 7595 }, { "epoch": 0.3708984375, "grad_norm": 0.3173467516899109, "learning_rate": 0.00036981036219172733, "loss": 1.8217, "step": 7596 }, { "epoch": 0.370947265625, "grad_norm": 0.2979578673839569, "learning_rate": 0.00036977859499509944, "loss": 1.8636, "step": 7597 }, { "epoch": 0.37099609375, "grad_norm": 0.2926958203315735, "learning_rate": 0.00036974682550143615, "loss": 1.8539, "step": 7598 }, { "epoch": 0.371044921875, "grad_norm": 0.3615927994251251, "learning_rate": 0.0003697150537115073, "loss": 1.8319, "step": 7599 }, { "epoch": 0.37109375, "grad_norm": 0.3149760365486145, "learning_rate": 0.0003696832796260832, "loss": 1.8458, "step": 7600 }, { "epoch": 0.371142578125, "grad_norm": 0.2563896179199219, "learning_rate": 0.0003696515032459336, "loss": 1.8365, "step": 7601 }, { "epoch": 0.37119140625, "grad_norm": 0.31175145506858826, "learning_rate": 0.00036961972457182893, "loss": 1.8262, "step": 7602 }, { "epoch": 0.371240234375, "grad_norm": 0.2885615825653076, "learning_rate": 0.0003695879436045391, "loss": 1.8263, "step": 7603 }, { "epoch": 0.3712890625, "grad_norm": 0.3122859597206116, "learning_rate": 0.0003695561603448345, "loss": 1.8089, "step": 7604 }, { "epoch": 0.371337890625, "grad_norm": 0.3382733166217804, "learning_rate": 0.00036952437479348545, "loss": 1.8086, "step": 7605 }, { "epoch": 0.37138671875, "grad_norm": 0.2642497420310974, "learning_rate": 0.00036949258695126214, "loss": 1.8347, "step": 7606 }, { "epoch": 0.371435546875, "grad_norm": 0.3147086501121521, "learning_rate": 0.0003694607968189351, "loss": 1.8144, "step": 7607 }, { "epoch": 0.371484375, "grad_norm": 0.3430258631706238, "learning_rate": 0.00036942900439727476, "loss": 1.8317, "step": 7608 }, { "epoch": 0.371533203125, "grad_norm": 0.27547353506088257, "learning_rate": 0.0003693972096870517, "loss": 1.818, "step": 7609 }, { "epoch": 0.37158203125, "grad_norm": 0.2959018647670746, "learning_rate": 0.00036936541268903634, "loss": 1.8548, "step": 7610 }, { "epoch": 0.371630859375, "grad_norm": 0.32811886072158813, "learning_rate": 0.0003693336134039995, "loss": 1.8178, "step": 7611 }, { "epoch": 0.3716796875, "grad_norm": 0.2489013522863388, "learning_rate": 0.0003693018118327116, "loss": 1.838, "step": 7612 }, { "epoch": 0.371728515625, "grad_norm": 0.3408755362033844, "learning_rate": 0.00036927000797594363, "loss": 1.8505, "step": 7613 }, { "epoch": 0.37177734375, "grad_norm": 0.3452344238758087, "learning_rate": 0.00036923820183446627, "loss": 1.7996, "step": 7614 }, { "epoch": 0.371826171875, "grad_norm": 0.23725692927837372, "learning_rate": 0.00036920639340905036, "loss": 1.8258, "step": 7615 }, { "epoch": 0.371875, "grad_norm": 0.27004384994506836, "learning_rate": 0.0003691745827004668, "loss": 1.8311, "step": 7616 }, { "epoch": 0.371923828125, "grad_norm": 0.2778221666812897, "learning_rate": 0.0003691427697094866, "loss": 1.8188, "step": 7617 }, { "epoch": 0.37197265625, "grad_norm": 0.25464141368865967, "learning_rate": 0.00036911095443688073, "loss": 1.8587, "step": 7618 }, { "epoch": 0.372021484375, "grad_norm": 0.2854223847389221, "learning_rate": 0.0003690791368834203, "loss": 1.8359, "step": 7619 }, { "epoch": 0.3720703125, "grad_norm": 0.21128179132938385, "learning_rate": 0.0003690473170498764, "loss": 1.8223, "step": 7620 }, { "epoch": 0.372119140625, "grad_norm": 0.29112133383750916, "learning_rate": 0.0003690154949370202, "loss": 1.8155, "step": 7621 }, { "epoch": 0.37216796875, "grad_norm": 0.3308970034122467, "learning_rate": 0.0003689836705456229, "loss": 1.8341, "step": 7622 }, { "epoch": 0.372216796875, "grad_norm": 0.2337830662727356, "learning_rate": 0.000368951843876456, "loss": 1.8332, "step": 7623 }, { "epoch": 0.372265625, "grad_norm": 0.2680107057094574, "learning_rate": 0.00036892001493029047, "loss": 1.8216, "step": 7624 }, { "epoch": 0.372314453125, "grad_norm": 0.332068532705307, "learning_rate": 0.00036888818370789806, "loss": 1.8349, "step": 7625 }, { "epoch": 0.37236328125, "grad_norm": 0.23361945152282715, "learning_rate": 0.0003688563502100501, "loss": 1.8261, "step": 7626 }, { "epoch": 0.372412109375, "grad_norm": 0.2633451521396637, "learning_rate": 0.000368824514437518, "loss": 1.8325, "step": 7627 }, { "epoch": 0.3724609375, "grad_norm": 0.26900288462638855, "learning_rate": 0.00036879267639107347, "loss": 1.8183, "step": 7628 }, { "epoch": 0.372509765625, "grad_norm": 0.2569616734981537, "learning_rate": 0.000368760836071488, "loss": 1.8264, "step": 7629 }, { "epoch": 0.37255859375, "grad_norm": 0.31200870871543884, "learning_rate": 0.0003687289934795335, "loss": 1.8425, "step": 7630 }, { "epoch": 0.372607421875, "grad_norm": 0.3043605089187622, "learning_rate": 0.0003686971486159814, "loss": 1.8204, "step": 7631 }, { "epoch": 0.37265625, "grad_norm": 0.37914207577705383, "learning_rate": 0.0003686653014816037, "loss": 1.8437, "step": 7632 }, { "epoch": 0.372705078125, "grad_norm": 0.3375913202762604, "learning_rate": 0.0003686334520771721, "loss": 1.8268, "step": 7633 }, { "epoch": 0.37275390625, "grad_norm": 0.2651040554046631, "learning_rate": 0.00036860160040345864, "loss": 1.8268, "step": 7634 }, { "epoch": 0.372802734375, "grad_norm": 0.46686142683029175, "learning_rate": 0.0003685697464612351, "loss": 1.8125, "step": 7635 }, { "epoch": 0.3728515625, "grad_norm": 0.3578263521194458, "learning_rate": 0.00036853789025127363, "loss": 1.8115, "step": 7636 }, { "epoch": 0.372900390625, "grad_norm": 0.2776489853858948, "learning_rate": 0.00036850603177434634, "loss": 1.83, "step": 7637 }, { "epoch": 0.37294921875, "grad_norm": 0.47498011589050293, "learning_rate": 0.00036847417103122513, "loss": 1.8476, "step": 7638 }, { "epoch": 0.372998046875, "grad_norm": 0.2605825960636139, "learning_rate": 0.0003684423080226823, "loss": 1.8373, "step": 7639 }, { "epoch": 0.373046875, "grad_norm": 0.35388559103012085, "learning_rate": 0.00036841044274949007, "loss": 1.8249, "step": 7640 }, { "epoch": 0.373095703125, "grad_norm": 0.24429430067539215, "learning_rate": 0.00036837857521242087, "loss": 1.8235, "step": 7641 }, { "epoch": 0.37314453125, "grad_norm": 0.2947210967540741, "learning_rate": 0.0003683467054122467, "loss": 1.8213, "step": 7642 }, { "epoch": 0.373193359375, "grad_norm": 0.33983367681503296, "learning_rate": 0.00036831483334974014, "loss": 1.8446, "step": 7643 }, { "epoch": 0.3732421875, "grad_norm": 0.2553727626800537, "learning_rate": 0.00036828295902567365, "loss": 1.8426, "step": 7644 }, { "epoch": 0.373291015625, "grad_norm": 0.33399438858032227, "learning_rate": 0.00036825108244081974, "loss": 1.8367, "step": 7645 }, { "epoch": 0.37333984375, "grad_norm": 0.28166747093200684, "learning_rate": 0.0003682192035959509, "loss": 1.8395, "step": 7646 }, { "epoch": 0.373388671875, "grad_norm": 0.31981298327445984, "learning_rate": 0.00036818732249183973, "loss": 1.8352, "step": 7647 }, { "epoch": 0.3734375, "grad_norm": 0.27107861638069153, "learning_rate": 0.000368155439129259, "loss": 1.8201, "step": 7648 }, { "epoch": 0.373486328125, "grad_norm": 0.34147095680236816, "learning_rate": 0.00036812355350898136, "loss": 1.8072, "step": 7649 }, { "epoch": 0.37353515625, "grad_norm": 0.3047454357147217, "learning_rate": 0.00036809166563177954, "loss": 1.8758, "step": 7650 }, { "epoch": 0.373583984375, "grad_norm": 0.2649138569831848, "learning_rate": 0.00036805977549842644, "loss": 1.8306, "step": 7651 }, { "epoch": 0.3736328125, "grad_norm": 0.34248974919319153, "learning_rate": 0.0003680278831096949, "loss": 1.8093, "step": 7652 }, { "epoch": 0.373681640625, "grad_norm": 0.2212296724319458, "learning_rate": 0.0003679959884663579, "loss": 1.8383, "step": 7653 }, { "epoch": 0.37373046875, "grad_norm": 0.31882160902023315, "learning_rate": 0.0003679640915691884, "loss": 1.8325, "step": 7654 }, { "epoch": 0.373779296875, "grad_norm": 0.26769784092903137, "learning_rate": 0.00036793219241895947, "loss": 1.8555, "step": 7655 }, { "epoch": 0.373828125, "grad_norm": 0.2766674757003784, "learning_rate": 0.0003679002910164441, "loss": 1.8321, "step": 7656 }, { "epoch": 0.373876953125, "grad_norm": 0.31999802589416504, "learning_rate": 0.0003678683873624157, "loss": 1.8351, "step": 7657 }, { "epoch": 0.37392578125, "grad_norm": 0.23072712123394012, "learning_rate": 0.00036783648145764726, "loss": 1.8148, "step": 7658 }, { "epoch": 0.373974609375, "grad_norm": 0.31117352843284607, "learning_rate": 0.00036780457330291204, "loss": 1.8093, "step": 7659 }, { "epoch": 0.3740234375, "grad_norm": 0.265482097864151, "learning_rate": 0.0003677726628989835, "loss": 1.8468, "step": 7660 }, { "epoch": 0.374072265625, "grad_norm": 0.2940639555454254, "learning_rate": 0.0003677407502466349, "loss": 1.8159, "step": 7661 }, { "epoch": 0.37412109375, "grad_norm": 0.6689140200614929, "learning_rate": 0.00036770883534663974, "loss": 1.828, "step": 7662 }, { "epoch": 0.374169921875, "grad_norm": 0.26614663004875183, "learning_rate": 0.0003676769181997715, "loss": 1.827, "step": 7663 }, { "epoch": 0.37421875, "grad_norm": 0.27090147137641907, "learning_rate": 0.00036764499880680363, "loss": 1.8354, "step": 7664 }, { "epoch": 0.374267578125, "grad_norm": 0.2421882301568985, "learning_rate": 0.0003676130771685098, "loss": 1.8447, "step": 7665 }, { "epoch": 0.37431640625, "grad_norm": 0.23194099962711334, "learning_rate": 0.00036758115328566364, "loss": 1.825, "step": 7666 }, { "epoch": 0.374365234375, "grad_norm": 0.21694770455360413, "learning_rate": 0.00036754922715903887, "loss": 1.8228, "step": 7667 }, { "epoch": 0.3744140625, "grad_norm": 0.2804086208343506, "learning_rate": 0.00036751729878940927, "loss": 1.8425, "step": 7668 }, { "epoch": 0.374462890625, "grad_norm": 0.311996728181839, "learning_rate": 0.00036748536817754853, "loss": 1.8182, "step": 7669 }, { "epoch": 0.37451171875, "grad_norm": 0.3047553598880768, "learning_rate": 0.00036745343532423066, "loss": 1.8303, "step": 7670 }, { "epoch": 0.374560546875, "grad_norm": 0.23124977946281433, "learning_rate": 0.0003674215002302294, "loss": 1.8275, "step": 7671 }, { "epoch": 0.374609375, "grad_norm": 0.2488262802362442, "learning_rate": 0.00036738956289631894, "loss": 1.8278, "step": 7672 }, { "epoch": 0.374658203125, "grad_norm": 0.24317237734794617, "learning_rate": 0.0003673576233232731, "loss": 1.8136, "step": 7673 }, { "epoch": 0.37470703125, "grad_norm": 0.2705746293067932, "learning_rate": 0.0003673256815118662, "loss": 1.8146, "step": 7674 }, { "epoch": 0.374755859375, "grad_norm": 0.26707929372787476, "learning_rate": 0.0003672937374628721, "loss": 1.8326, "step": 7675 }, { "epoch": 0.3748046875, "grad_norm": 0.2401120513677597, "learning_rate": 0.0003672617911770651, "loss": 1.8009, "step": 7676 }, { "epoch": 0.374853515625, "grad_norm": 0.21974118053913116, "learning_rate": 0.0003672298426552196, "loss": 1.8466, "step": 7677 }, { "epoch": 0.37490234375, "grad_norm": 0.23820753395557404, "learning_rate": 0.0003671978918981097, "loss": 1.8284, "step": 7678 }, { "epoch": 0.374951171875, "grad_norm": 0.23753482103347778, "learning_rate": 0.00036716593890650986, "loss": 1.8142, "step": 7679 }, { "epoch": 0.375, "grad_norm": 0.2278328835964203, "learning_rate": 0.0003671339836811944, "loss": 1.8202, "step": 7680 }, { "epoch": 0.375048828125, "grad_norm": 0.26043039560317993, "learning_rate": 0.00036710202622293784, "loss": 1.8321, "step": 7681 }, { "epoch": 0.37509765625, "grad_norm": 0.2634241580963135, "learning_rate": 0.0003670700665325147, "loss": 1.8365, "step": 7682 }, { "epoch": 0.375146484375, "grad_norm": 0.2153044492006302, "learning_rate": 0.0003670381046106995, "loss": 1.8693, "step": 7683 }, { "epoch": 0.3751953125, "grad_norm": 0.24384871125221252, "learning_rate": 0.00036700614045826683, "loss": 1.8258, "step": 7684 }, { "epoch": 0.375244140625, "grad_norm": 0.3022485673427582, "learning_rate": 0.0003669741740759915, "loss": 1.8043, "step": 7685 }, { "epoch": 0.37529296875, "grad_norm": 0.2468213140964508, "learning_rate": 0.00036694220546464826, "loss": 1.8117, "step": 7686 }, { "epoch": 0.375341796875, "grad_norm": 0.23723936080932617, "learning_rate": 0.0003669102346250116, "loss": 1.8264, "step": 7687 }, { "epoch": 0.375390625, "grad_norm": 0.28459644317626953, "learning_rate": 0.0003668782615578567, "loss": 1.8052, "step": 7688 }, { "epoch": 0.375439453125, "grad_norm": 0.2637026011943817, "learning_rate": 0.0003668462862639583, "loss": 1.8072, "step": 7689 }, { "epoch": 0.37548828125, "grad_norm": 0.28606489300727844, "learning_rate": 0.0003668143087440914, "loss": 1.8154, "step": 7690 }, { "epoch": 0.375537109375, "grad_norm": 0.24941901862621307, "learning_rate": 0.0003667823289990309, "loss": 1.8308, "step": 7691 }, { "epoch": 0.3755859375, "grad_norm": 0.2243649661540985, "learning_rate": 0.00036675034702955196, "loss": 1.8203, "step": 7692 }, { "epoch": 0.375634765625, "grad_norm": 0.2881010174751282, "learning_rate": 0.0003667183628364296, "loss": 1.8338, "step": 7693 }, { "epoch": 0.37568359375, "grad_norm": 0.3197973668575287, "learning_rate": 0.00036668637642043905, "loss": 1.8183, "step": 7694 }, { "epoch": 0.375732421875, "grad_norm": 0.22323426604270935, "learning_rate": 0.0003666543877823555, "loss": 1.8139, "step": 7695 }, { "epoch": 0.37578125, "grad_norm": 0.3067862391471863, "learning_rate": 0.0003666223969229543, "loss": 1.8258, "step": 7696 }, { "epoch": 0.375830078125, "grad_norm": 0.3745500147342682, "learning_rate": 0.00036659040384301054, "loss": 1.802, "step": 7697 }, { "epoch": 0.37587890625, "grad_norm": 0.2338843196630478, "learning_rate": 0.0003665584085432999, "loss": 1.8313, "step": 7698 }, { "epoch": 0.375927734375, "grad_norm": 0.33118706941604614, "learning_rate": 0.00036652641102459765, "loss": 1.8351, "step": 7699 }, { "epoch": 0.3759765625, "grad_norm": 0.32046279311180115, "learning_rate": 0.00036649441128767935, "loss": 1.8102, "step": 7700 }, { "epoch": 0.376025390625, "grad_norm": 0.23599062860012054, "learning_rate": 0.0003664624093333204, "loss": 1.8436, "step": 7701 }, { "epoch": 0.37607421875, "grad_norm": 0.36832723021507263, "learning_rate": 0.00036643040516229645, "loss": 1.8528, "step": 7702 }, { "epoch": 0.376123046875, "grad_norm": 0.3778837025165558, "learning_rate": 0.0003663983987753833, "loss": 1.8249, "step": 7703 }, { "epoch": 0.376171875, "grad_norm": 0.27301785349845886, "learning_rate": 0.00036636639017335643, "loss": 1.8288, "step": 7704 }, { "epoch": 0.376220703125, "grad_norm": 0.35078126192092896, "learning_rate": 0.00036633437935699174, "loss": 1.8312, "step": 7705 }, { "epoch": 0.37626953125, "grad_norm": 0.32411664724349976, "learning_rate": 0.0003663023663270649, "loss": 1.864, "step": 7706 }, { "epoch": 0.376318359375, "grad_norm": 0.27311670780181885, "learning_rate": 0.000366270351084352, "loss": 1.8218, "step": 7707 }, { "epoch": 0.3763671875, "grad_norm": 0.27892544865608215, "learning_rate": 0.0003662383336296287, "loss": 1.8256, "step": 7708 }, { "epoch": 0.376416015625, "grad_norm": 0.32857227325439453, "learning_rate": 0.00036620631396367114, "loss": 1.8177, "step": 7709 }, { "epoch": 0.37646484375, "grad_norm": 0.3569096624851227, "learning_rate": 0.0003661742920872553, "loss": 1.7843, "step": 7710 }, { "epoch": 0.376513671875, "grad_norm": 0.28093627095222473, "learning_rate": 0.0003661422680011572, "loss": 1.8296, "step": 7711 }, { "epoch": 0.3765625, "grad_norm": 0.23857995867729187, "learning_rate": 0.000366110241706153, "loss": 1.8394, "step": 7712 }, { "epoch": 0.376611328125, "grad_norm": 0.32765135169029236, "learning_rate": 0.0003660782132030189, "loss": 1.8322, "step": 7713 }, { "epoch": 0.37666015625, "grad_norm": 0.26899150013923645, "learning_rate": 0.0003660461824925312, "loss": 1.8417, "step": 7714 }, { "epoch": 0.376708984375, "grad_norm": 0.23875872790813446, "learning_rate": 0.000366014149575466, "loss": 1.8523, "step": 7715 }, { "epoch": 0.3767578125, "grad_norm": 0.3316110074520111, "learning_rate": 0.00036598211445259995, "loss": 1.8366, "step": 7716 }, { "epoch": 0.376806640625, "grad_norm": 0.2725440263748169, "learning_rate": 0.0003659500771247091, "loss": 1.8315, "step": 7717 }, { "epoch": 0.37685546875, "grad_norm": 0.22944077849388123, "learning_rate": 0.00036591803759257013, "loss": 1.8366, "step": 7718 }, { "epoch": 0.376904296875, "grad_norm": 0.28141075372695923, "learning_rate": 0.0003658859958569595, "loss": 1.8473, "step": 7719 }, { "epoch": 0.376953125, "grad_norm": 0.2816134989261627, "learning_rate": 0.0003658539519186537, "loss": 1.8131, "step": 7720 }, { "epoch": 0.377001953125, "grad_norm": 0.22523953020572662, "learning_rate": 0.0003658219057784293, "loss": 1.7919, "step": 7721 }, { "epoch": 0.37705078125, "grad_norm": 0.24122291803359985, "learning_rate": 0.0003657898574370632, "loss": 1.8075, "step": 7722 }, { "epoch": 0.377099609375, "grad_norm": 0.2619668245315552, "learning_rate": 0.0003657578068953319, "loss": 1.8401, "step": 7723 }, { "epoch": 0.3771484375, "grad_norm": 0.27565622329711914, "learning_rate": 0.00036572575415401215, "loss": 1.8153, "step": 7724 }, { "epoch": 0.377197265625, "grad_norm": 0.2352880984544754, "learning_rate": 0.000365693699213881, "loss": 1.8227, "step": 7725 }, { "epoch": 0.37724609375, "grad_norm": 0.199904665350914, "learning_rate": 0.00036566164207571517, "loss": 1.8218, "step": 7726 }, { "epoch": 0.377294921875, "grad_norm": 0.23613758385181427, "learning_rate": 0.0003656295827402916, "loss": 1.8576, "step": 7727 }, { "epoch": 0.37734375, "grad_norm": 0.2752116620540619, "learning_rate": 0.00036559752120838716, "loss": 1.8271, "step": 7728 }, { "epoch": 0.377392578125, "grad_norm": 0.2627987265586853, "learning_rate": 0.0003655654574807791, "loss": 1.8061, "step": 7729 }, { "epoch": 0.37744140625, "grad_norm": 0.21404114365577698, "learning_rate": 0.00036553339155824446, "loss": 1.8354, "step": 7730 }, { "epoch": 0.377490234375, "grad_norm": 0.2543472349643707, "learning_rate": 0.0003655013234415603, "loss": 1.8298, "step": 7731 }, { "epoch": 0.3775390625, "grad_norm": 0.29479965567588806, "learning_rate": 0.00036546925313150387, "loss": 1.8205, "step": 7732 }, { "epoch": 0.377587890625, "grad_norm": 0.23863248527050018, "learning_rate": 0.00036543718062885246, "loss": 1.8388, "step": 7733 }, { "epoch": 0.37763671875, "grad_norm": 0.24016815423965454, "learning_rate": 0.0003654051059343832, "loss": 1.8005, "step": 7734 }, { "epoch": 0.377685546875, "grad_norm": 0.31139227747917175, "learning_rate": 0.00036537302904887366, "loss": 1.8364, "step": 7735 }, { "epoch": 0.377734375, "grad_norm": 0.3647208511829376, "learning_rate": 0.0003653409499731011, "loss": 1.8251, "step": 7736 }, { "epoch": 0.377783203125, "grad_norm": 0.33330750465393066, "learning_rate": 0.0003653088687078431, "loss": 1.8138, "step": 7737 }, { "epoch": 0.37783203125, "grad_norm": 0.28928717970848083, "learning_rate": 0.00036527678525387705, "loss": 1.8659, "step": 7738 }, { "epoch": 0.377880859375, "grad_norm": 0.3125384747982025, "learning_rate": 0.0003652446996119806, "loss": 1.8429, "step": 7739 }, { "epoch": 0.3779296875, "grad_norm": 0.35736602544784546, "learning_rate": 0.0003652126117829313, "loss": 1.8424, "step": 7740 }, { "epoch": 0.377978515625, "grad_norm": 0.31041449308395386, "learning_rate": 0.00036518052176750696, "loss": 1.8093, "step": 7741 }, { "epoch": 0.37802734375, "grad_norm": 0.30431848764419556, "learning_rate": 0.00036514842956648523, "loss": 1.8366, "step": 7742 }, { "epoch": 0.378076171875, "grad_norm": 0.2521316111087799, "learning_rate": 0.00036511633518064384, "loss": 1.8351, "step": 7743 }, { "epoch": 0.378125, "grad_norm": 0.2851801812648773, "learning_rate": 0.00036508423861076066, "loss": 1.8332, "step": 7744 }, { "epoch": 0.378173828125, "grad_norm": 0.31098097562789917, "learning_rate": 0.0003650521398576136, "loss": 1.8382, "step": 7745 }, { "epoch": 0.37822265625, "grad_norm": 0.19905923306941986, "learning_rate": 0.0003650200389219806, "loss": 1.7663, "step": 7746 }, { "epoch": 0.378271484375, "grad_norm": 0.26078054308891296, "learning_rate": 0.0003649879358046396, "loss": 1.8227, "step": 7747 }, { "epoch": 0.3783203125, "grad_norm": 0.2272801548242569, "learning_rate": 0.0003649558305063688, "loss": 1.7949, "step": 7748 }, { "epoch": 0.378369140625, "grad_norm": 0.25000783801078796, "learning_rate": 0.000364923723027946, "loss": 1.8413, "step": 7749 }, { "epoch": 0.37841796875, "grad_norm": 0.2585432827472687, "learning_rate": 0.00036489161337014965, "loss": 1.8409, "step": 7750 }, { "epoch": 0.378466796875, "grad_norm": 0.2425219863653183, "learning_rate": 0.0003648595015337578, "loss": 1.8166, "step": 7751 }, { "epoch": 0.378515625, "grad_norm": 0.2568461000919342, "learning_rate": 0.00036482738751954875, "loss": 1.8173, "step": 7752 }, { "epoch": 0.378564453125, "grad_norm": 0.24456530809402466, "learning_rate": 0.00036479527132830076, "loss": 1.8568, "step": 7753 }, { "epoch": 0.37861328125, "grad_norm": 0.3014076352119446, "learning_rate": 0.00036476315296079224, "loss": 1.8021, "step": 7754 }, { "epoch": 0.378662109375, "grad_norm": 0.2578064799308777, "learning_rate": 0.0003647310324178016, "loss": 1.8359, "step": 7755 }, { "epoch": 0.3787109375, "grad_norm": 0.22021596133708954, "learning_rate": 0.0003646989097001073, "loss": 1.8261, "step": 7756 }, { "epoch": 0.378759765625, "grad_norm": 0.27568334341049194, "learning_rate": 0.00036466678480848787, "loss": 1.8365, "step": 7757 }, { "epoch": 0.37880859375, "grad_norm": 0.23745973408222198, "learning_rate": 0.0003646346577437219, "loss": 1.8406, "step": 7758 }, { "epoch": 0.378857421875, "grad_norm": 0.2516661286354065, "learning_rate": 0.00036460252850658794, "loss": 1.8088, "step": 7759 }, { "epoch": 0.37890625, "grad_norm": 0.24355685710906982, "learning_rate": 0.0003645703970978647, "loss": 1.8365, "step": 7760 }, { "epoch": 0.378955078125, "grad_norm": 0.21995244920253754, "learning_rate": 0.000364538263518331, "loss": 1.8277, "step": 7761 }, { "epoch": 0.37900390625, "grad_norm": 0.2285740226507187, "learning_rate": 0.00036450612776876547, "loss": 1.832, "step": 7762 }, { "epoch": 0.379052734375, "grad_norm": 0.20818237960338593, "learning_rate": 0.00036447398984994705, "loss": 1.8269, "step": 7763 }, { "epoch": 0.3791015625, "grad_norm": 0.23410558700561523, "learning_rate": 0.00036444184976265457, "loss": 1.8335, "step": 7764 }, { "epoch": 0.379150390625, "grad_norm": 0.2931320369243622, "learning_rate": 0.00036440970750766704, "loss": 1.8331, "step": 7765 }, { "epoch": 0.37919921875, "grad_norm": 0.295243501663208, "learning_rate": 0.0003643775630857635, "loss": 1.8067, "step": 7766 }, { "epoch": 0.379248046875, "grad_norm": 0.34564855694770813, "learning_rate": 0.0003643454164977228, "loss": 1.8271, "step": 7767 }, { "epoch": 0.379296875, "grad_norm": 0.31259897351264954, "learning_rate": 0.0003643132677443242, "loss": 1.8156, "step": 7768 }, { "epoch": 0.379345703125, "grad_norm": 0.22899580001831055, "learning_rate": 0.0003642811168263468, "loss": 1.8359, "step": 7769 }, { "epoch": 0.37939453125, "grad_norm": 0.28618428111076355, "learning_rate": 0.0003642489637445698, "loss": 1.8282, "step": 7770 }, { "epoch": 0.379443359375, "grad_norm": 0.287574827671051, "learning_rate": 0.0003642168084997724, "loss": 1.8208, "step": 7771 }, { "epoch": 0.3794921875, "grad_norm": 0.22920820116996765, "learning_rate": 0.00036418465109273407, "loss": 1.8367, "step": 7772 }, { "epoch": 0.379541015625, "grad_norm": 0.26157575845718384, "learning_rate": 0.00036415249152423404, "loss": 1.8306, "step": 7773 }, { "epoch": 0.37958984375, "grad_norm": 0.3082767724990845, "learning_rate": 0.00036412032979505173, "loss": 1.8426, "step": 7774 }, { "epoch": 0.379638671875, "grad_norm": 0.26631537079811096, "learning_rate": 0.0003640881659059667, "loss": 1.8236, "step": 7775 }, { "epoch": 0.3796875, "grad_norm": 0.23631301522254944, "learning_rate": 0.0003640559998577583, "loss": 1.8259, "step": 7776 }, { "epoch": 0.379736328125, "grad_norm": 0.37981534004211426, "learning_rate": 0.00036402383165120623, "loss": 1.8501, "step": 7777 }, { "epoch": 0.37978515625, "grad_norm": 0.35964077711105347, "learning_rate": 0.00036399166128709006, "loss": 1.8377, "step": 7778 }, { "epoch": 0.379833984375, "grad_norm": 0.27843451499938965, "learning_rate": 0.0003639594887661895, "loss": 1.8143, "step": 7779 }, { "epoch": 0.3798828125, "grad_norm": 0.4058297872543335, "learning_rate": 0.00036392731408928426, "loss": 1.8417, "step": 7780 }, { "epoch": 0.379931640625, "grad_norm": 0.42060384154319763, "learning_rate": 0.00036389513725715414, "loss": 1.8383, "step": 7781 }, { "epoch": 0.37998046875, "grad_norm": 0.23179766535758972, "learning_rate": 0.0003638629582705789, "loss": 1.831, "step": 7782 }, { "epoch": 0.380029296875, "grad_norm": 0.4348006248474121, "learning_rate": 0.00036383077713033853, "loss": 1.8582, "step": 7783 }, { "epoch": 0.380078125, "grad_norm": 0.34648242592811584, "learning_rate": 0.000363798593837213, "loss": 1.8204, "step": 7784 }, { "epoch": 0.380126953125, "grad_norm": 0.2764705419540405, "learning_rate": 0.00036376640839198203, "loss": 1.8056, "step": 7785 }, { "epoch": 0.38017578125, "grad_norm": 0.41312330961227417, "learning_rate": 0.00036373422079542594, "loss": 1.8209, "step": 7786 }, { "epoch": 0.380224609375, "grad_norm": 0.2549189329147339, "learning_rate": 0.00036370203104832465, "loss": 1.8148, "step": 7787 }, { "epoch": 0.3802734375, "grad_norm": 0.437811940908432, "learning_rate": 0.0003636698391514584, "loss": 1.8522, "step": 7788 }, { "epoch": 0.380322265625, "grad_norm": 0.3225247263908386, "learning_rate": 0.0003636376451056074, "loss": 1.8264, "step": 7789 }, { "epoch": 0.38037109375, "grad_norm": 0.33512061834335327, "learning_rate": 0.00036360544891155184, "loss": 1.8311, "step": 7790 }, { "epoch": 0.380419921875, "grad_norm": 0.3228941261768341, "learning_rate": 0.000363573250570072, "loss": 1.849, "step": 7791 }, { "epoch": 0.38046875, "grad_norm": 0.27980682253837585, "learning_rate": 0.00036354105008194846, "loss": 1.8189, "step": 7792 }, { "epoch": 0.380517578125, "grad_norm": 0.35477784276008606, "learning_rate": 0.0003635088474479612, "loss": 1.8364, "step": 7793 }, { "epoch": 0.38056640625, "grad_norm": 0.2814371883869171, "learning_rate": 0.00036347664266889103, "loss": 1.8252, "step": 7794 }, { "epoch": 0.380615234375, "grad_norm": 0.3634689152240753, "learning_rate": 0.00036344443574551827, "loss": 1.8223, "step": 7795 }, { "epoch": 0.3806640625, "grad_norm": 0.2519494891166687, "learning_rate": 0.0003634122266786236, "loss": 1.8115, "step": 7796 }, { "epoch": 0.380712890625, "grad_norm": 0.3066188097000122, "learning_rate": 0.0003633800154689876, "loss": 1.8369, "step": 7797 }, { "epoch": 0.38076171875, "grad_norm": 0.30934616923332214, "learning_rate": 0.0003633478021173909, "loss": 1.8294, "step": 7798 }, { "epoch": 0.380810546875, "grad_norm": 0.3230258524417877, "learning_rate": 0.00036331558662461424, "loss": 1.8484, "step": 7799 }, { "epoch": 0.380859375, "grad_norm": 0.26647552847862244, "learning_rate": 0.00036328336899143837, "loss": 1.8376, "step": 7800 }, { "epoch": 0.380908203125, "grad_norm": 0.2658616900444031, "learning_rate": 0.0003632511492186442, "loss": 1.8208, "step": 7801 }, { "epoch": 0.38095703125, "grad_norm": 0.3462955355644226, "learning_rate": 0.0003632189273070125, "loss": 1.8423, "step": 7802 }, { "epoch": 0.381005859375, "grad_norm": 0.27634626626968384, "learning_rate": 0.00036318670325732416, "loss": 1.8346, "step": 7803 }, { "epoch": 0.3810546875, "grad_norm": 0.2942219078540802, "learning_rate": 0.0003631544770703603, "loss": 1.8125, "step": 7804 }, { "epoch": 0.381103515625, "grad_norm": 0.26669901609420776, "learning_rate": 0.00036312224874690183, "loss": 1.8382, "step": 7805 }, { "epoch": 0.38115234375, "grad_norm": 0.28167760372161865, "learning_rate": 0.00036309001828772983, "loss": 1.8372, "step": 7806 }, { "epoch": 0.381201171875, "grad_norm": 0.3254542350769043, "learning_rate": 0.0003630577856936255, "loss": 1.8387, "step": 7807 }, { "epoch": 0.38125, "grad_norm": 0.273070365190506, "learning_rate": 0.00036302555096537, "loss": 1.8249, "step": 7808 }, { "epoch": 0.381298828125, "grad_norm": 0.2706347703933716, "learning_rate": 0.0003629933141037445, "loss": 1.8356, "step": 7809 }, { "epoch": 0.38134765625, "grad_norm": 0.2951316833496094, "learning_rate": 0.00036296107510953044, "loss": 1.8477, "step": 7810 }, { "epoch": 0.381396484375, "grad_norm": 0.36450836062431335, "learning_rate": 0.000362928833983509, "loss": 1.8065, "step": 7811 }, { "epoch": 0.3814453125, "grad_norm": 0.40523630380630493, "learning_rate": 0.0003628965907264616, "loss": 1.8269, "step": 7812 }, { "epoch": 0.381494140625, "grad_norm": 0.19379061460494995, "learning_rate": 0.00036286434533916966, "loss": 1.8173, "step": 7813 }, { "epoch": 0.38154296875, "grad_norm": 0.32504209876060486, "learning_rate": 0.0003628320978224148, "loss": 1.8365, "step": 7814 }, { "epoch": 0.381591796875, "grad_norm": 0.29051896929740906, "learning_rate": 0.00036279984817697854, "loss": 1.8379, "step": 7815 }, { "epoch": 0.381640625, "grad_norm": 0.24249592423439026, "learning_rate": 0.0003627675964036423, "loss": 1.8475, "step": 7816 }, { "epoch": 0.381689453125, "grad_norm": 0.2826695144176483, "learning_rate": 0.00036273534250318796, "loss": 1.8221, "step": 7817 }, { "epoch": 0.38173828125, "grad_norm": 0.31172728538513184, "learning_rate": 0.00036270308647639703, "loss": 1.8225, "step": 7818 }, { "epoch": 0.381787109375, "grad_norm": 0.29685178399086, "learning_rate": 0.0003626708283240514, "loss": 1.8365, "step": 7819 }, { "epoch": 0.3818359375, "grad_norm": 0.20220060646533966, "learning_rate": 0.00036263856804693277, "loss": 1.8007, "step": 7820 }, { "epoch": 0.381884765625, "grad_norm": 0.29701682925224304, "learning_rate": 0.000362606305645823, "loss": 1.8584, "step": 7821 }, { "epoch": 0.38193359375, "grad_norm": 0.2465946078300476, "learning_rate": 0.0003625740411215041, "loss": 1.8056, "step": 7822 }, { "epoch": 0.381982421875, "grad_norm": 0.22212733328342438, "learning_rate": 0.00036254177447475795, "loss": 1.839, "step": 7823 }, { "epoch": 0.38203125, "grad_norm": 0.27265700697898865, "learning_rate": 0.00036250950570636655, "loss": 1.8315, "step": 7824 }, { "epoch": 0.382080078125, "grad_norm": 0.284225195646286, "learning_rate": 0.00036247723481711193, "loss": 1.8244, "step": 7825 }, { "epoch": 0.38212890625, "grad_norm": 0.2173154056072235, "learning_rate": 0.00036244496180777634, "loss": 1.7956, "step": 7826 }, { "epoch": 0.382177734375, "grad_norm": 0.28153863549232483, "learning_rate": 0.00036241268667914177, "loss": 1.8294, "step": 7827 }, { "epoch": 0.3822265625, "grad_norm": 0.2951352298259735, "learning_rate": 0.0003623804094319905, "loss": 1.8116, "step": 7828 }, { "epoch": 0.382275390625, "grad_norm": 0.26792585849761963, "learning_rate": 0.0003623481300671049, "loss": 1.8274, "step": 7829 }, { "epoch": 0.38232421875, "grad_norm": 0.2549096345901489, "learning_rate": 0.0003623158485852671, "loss": 1.8316, "step": 7830 }, { "epoch": 0.382373046875, "grad_norm": 0.26872798800468445, "learning_rate": 0.00036228356498725964, "loss": 1.83, "step": 7831 }, { "epoch": 0.382421875, "grad_norm": 0.2488584816455841, "learning_rate": 0.0003622512792738648, "loss": 1.8282, "step": 7832 }, { "epoch": 0.382470703125, "grad_norm": 0.30581134557724, "learning_rate": 0.0003622189914458651, "loss": 1.8067, "step": 7833 }, { "epoch": 0.38251953125, "grad_norm": 0.2782798111438751, "learning_rate": 0.00036218670150404313, "loss": 1.8234, "step": 7834 }, { "epoch": 0.382568359375, "grad_norm": 0.2415330410003662, "learning_rate": 0.0003621544094491814, "loss": 1.8222, "step": 7835 }, { "epoch": 0.3826171875, "grad_norm": 0.25964418053627014, "learning_rate": 0.0003621221152820625, "loss": 1.8205, "step": 7836 }, { "epoch": 0.382666015625, "grad_norm": 0.24597807228565216, "learning_rate": 0.00036208981900346916, "loss": 1.8256, "step": 7837 }, { "epoch": 0.38271484375, "grad_norm": 0.2585979104042053, "learning_rate": 0.0003620575206141841, "loss": 1.7861, "step": 7838 }, { "epoch": 0.382763671875, "grad_norm": 0.2477140873670578, "learning_rate": 0.00036202522011499, "loss": 1.8085, "step": 7839 }, { "epoch": 0.3828125, "grad_norm": 0.2439325451850891, "learning_rate": 0.00036199291750666987, "loss": 1.8367, "step": 7840 }, { "epoch": 0.382861328125, "grad_norm": 0.20986776053905487, "learning_rate": 0.00036196061279000644, "loss": 1.8416, "step": 7841 }, { "epoch": 0.38291015625, "grad_norm": 0.2456616461277008, "learning_rate": 0.0003619283059657827, "loss": 1.858, "step": 7842 }, { "epoch": 0.382958984375, "grad_norm": 0.2819916009902954, "learning_rate": 0.0003618959970347817, "loss": 1.8173, "step": 7843 }, { "epoch": 0.3830078125, "grad_norm": 0.24594062566757202, "learning_rate": 0.00036186368599778633, "loss": 1.8291, "step": 7844 }, { "epoch": 0.383056640625, "grad_norm": 0.2545650005340576, "learning_rate": 0.00036183137285557976, "loss": 1.8265, "step": 7845 }, { "epoch": 0.38310546875, "grad_norm": 0.3075276017189026, "learning_rate": 0.0003617990576089451, "loss": 1.833, "step": 7846 }, { "epoch": 0.383154296875, "grad_norm": 0.24975872039794922, "learning_rate": 0.0003617667402586656, "loss": 1.8418, "step": 7847 }, { "epoch": 0.383203125, "grad_norm": 0.227182537317276, "learning_rate": 0.0003617344208055244, "loss": 1.8334, "step": 7848 }, { "epoch": 0.383251953125, "grad_norm": 0.27214664220809937, "learning_rate": 0.00036170209925030485, "loss": 1.8188, "step": 7849 }, { "epoch": 0.38330078125, "grad_norm": 0.23958665132522583, "learning_rate": 0.00036166977559379016, "loss": 1.8202, "step": 7850 }, { "epoch": 0.383349609375, "grad_norm": 0.25463247299194336, "learning_rate": 0.000361637449836764, "loss": 1.8307, "step": 7851 }, { "epoch": 0.3833984375, "grad_norm": 0.27961423993110657, "learning_rate": 0.0003616051219800095, "loss": 1.8208, "step": 7852 }, { "epoch": 0.383447265625, "grad_norm": 0.30743566155433655, "learning_rate": 0.0003615727920243104, "loss": 1.824, "step": 7853 }, { "epoch": 0.38349609375, "grad_norm": 0.28943079710006714, "learning_rate": 0.0003615404599704501, "loss": 1.8487, "step": 7854 }, { "epoch": 0.383544921875, "grad_norm": 0.26004257798194885, "learning_rate": 0.00036150812581921217, "loss": 1.8218, "step": 7855 }, { "epoch": 0.38359375, "grad_norm": 0.2837675213813782, "learning_rate": 0.00036147578957138033, "loss": 1.8399, "step": 7856 }, { "epoch": 0.383642578125, "grad_norm": 0.26553499698638916, "learning_rate": 0.00036144345122773836, "loss": 1.8175, "step": 7857 }, { "epoch": 0.38369140625, "grad_norm": 0.3005940020084381, "learning_rate": 0.00036141111078906977, "loss": 1.8389, "step": 7858 }, { "epoch": 0.383740234375, "grad_norm": 0.2578183710575104, "learning_rate": 0.0003613787682561585, "loss": 1.8302, "step": 7859 }, { "epoch": 0.3837890625, "grad_norm": 0.24566121399402618, "learning_rate": 0.00036134642362978844, "loss": 1.8408, "step": 7860 }, { "epoch": 0.383837890625, "grad_norm": 0.3093588948249817, "learning_rate": 0.0003613140769107434, "loss": 1.8162, "step": 7861 }, { "epoch": 0.38388671875, "grad_norm": 0.31832677125930786, "learning_rate": 0.0003612817280998074, "loss": 1.8195, "step": 7862 }, { "epoch": 0.383935546875, "grad_norm": 0.22265784442424774, "learning_rate": 0.0003612493771977644, "loss": 1.8313, "step": 7863 }, { "epoch": 0.383984375, "grad_norm": 0.2604995667934418, "learning_rate": 0.0003612170242053984, "loss": 1.8241, "step": 7864 }, { "epoch": 0.384033203125, "grad_norm": 0.33232730627059937, "learning_rate": 0.00036118466912349355, "loss": 1.8374, "step": 7865 }, { "epoch": 0.38408203125, "grad_norm": 0.348908394575119, "learning_rate": 0.0003611523119528341, "loss": 1.8441, "step": 7866 }, { "epoch": 0.384130859375, "grad_norm": 0.38825535774230957, "learning_rate": 0.00036111995269420404, "loss": 1.8377, "step": 7867 }, { "epoch": 0.3841796875, "grad_norm": 0.2589156925678253, "learning_rate": 0.0003610875913483878, "loss": 1.8126, "step": 7868 }, { "epoch": 0.384228515625, "grad_norm": 0.26911598443984985, "learning_rate": 0.0003610552279161697, "loss": 1.8125, "step": 7869 }, { "epoch": 0.38427734375, "grad_norm": 0.32103782892227173, "learning_rate": 0.00036102286239833386, "loss": 1.8213, "step": 7870 }, { "epoch": 0.384326171875, "grad_norm": 0.286276251077652, "learning_rate": 0.0003609904947956649, "loss": 1.8321, "step": 7871 }, { "epoch": 0.384375, "grad_norm": 0.3465215861797333, "learning_rate": 0.0003609581251089472, "loss": 1.8122, "step": 7872 }, { "epoch": 0.384423828125, "grad_norm": 0.3209342360496521, "learning_rate": 0.0003609257533389654, "loss": 1.8453, "step": 7873 }, { "epoch": 0.38447265625, "grad_norm": 0.2574216425418854, "learning_rate": 0.0003608933794865038, "loss": 1.8334, "step": 7874 }, { "epoch": 0.384521484375, "grad_norm": 0.2733338177204132, "learning_rate": 0.0003608610035523472, "loss": 1.7818, "step": 7875 }, { "epoch": 0.3845703125, "grad_norm": 0.26257607340812683, "learning_rate": 0.0003608286255372801, "loss": 1.8069, "step": 7876 }, { "epoch": 0.384619140625, "grad_norm": 0.24358515441417694, "learning_rate": 0.0003607962454420874, "loss": 1.8169, "step": 7877 }, { "epoch": 0.38466796875, "grad_norm": 0.2645532190799713, "learning_rate": 0.00036076386326755374, "loss": 1.8508, "step": 7878 }, { "epoch": 0.384716796875, "grad_norm": 0.26980823278427124, "learning_rate": 0.00036073147901446404, "loss": 1.8353, "step": 7879 }, { "epoch": 0.384765625, "grad_norm": 0.2851669490337372, "learning_rate": 0.0003606990926836029, "loss": 1.841, "step": 7880 }, { "epoch": 0.384814453125, "grad_norm": 0.28344881534576416, "learning_rate": 0.0003606667042757555, "loss": 1.82, "step": 7881 }, { "epoch": 0.38486328125, "grad_norm": 0.24128802120685577, "learning_rate": 0.0003606343137917067, "loss": 1.8063, "step": 7882 }, { "epoch": 0.384912109375, "grad_norm": 0.30712011456489563, "learning_rate": 0.0003606019212322416, "loss": 1.828, "step": 7883 }, { "epoch": 0.3849609375, "grad_norm": 0.23093007504940033, "learning_rate": 0.00036056952659814496, "loss": 1.8241, "step": 7884 }, { "epoch": 0.385009765625, "grad_norm": 0.23165035247802734, "learning_rate": 0.0003605371298902022, "loss": 1.8147, "step": 7885 }, { "epoch": 0.38505859375, "grad_norm": 0.2919559180736542, "learning_rate": 0.0003605047311091984, "loss": 1.7933, "step": 7886 }, { "epoch": 0.385107421875, "grad_norm": 0.2659439444541931, "learning_rate": 0.00036047233025591867, "loss": 1.8423, "step": 7887 }, { "epoch": 0.38515625, "grad_norm": 0.3182993233203888, "learning_rate": 0.00036043992733114844, "loss": 1.8362, "step": 7888 }, { "epoch": 0.385205078125, "grad_norm": 0.30490928888320923, "learning_rate": 0.00036040752233567285, "loss": 1.8464, "step": 7889 }, { "epoch": 0.38525390625, "grad_norm": 0.24184349179267883, "learning_rate": 0.0003603751152702774, "loss": 1.8483, "step": 7890 }, { "epoch": 0.385302734375, "grad_norm": 0.39677199721336365, "learning_rate": 0.0003603427061357474, "loss": 1.8343, "step": 7891 }, { "epoch": 0.3853515625, "grad_norm": 0.3654630184173584, "learning_rate": 0.0003603102949328684, "loss": 1.8215, "step": 7892 }, { "epoch": 0.385400390625, "grad_norm": 0.2486788034439087, "learning_rate": 0.00036027788166242584, "loss": 1.8479, "step": 7893 }, { "epoch": 0.38544921875, "grad_norm": 0.3509880304336548, "learning_rate": 0.00036024546632520537, "loss": 1.8479, "step": 7894 }, { "epoch": 0.385498046875, "grad_norm": 0.31475332379341125, "learning_rate": 0.00036021304892199245, "loss": 1.8087, "step": 7895 }, { "epoch": 0.385546875, "grad_norm": 0.2654896676540375, "learning_rate": 0.00036018062945357285, "loss": 1.8316, "step": 7896 }, { "epoch": 0.385595703125, "grad_norm": 0.28697896003723145, "learning_rate": 0.00036014820792073225, "loss": 1.8056, "step": 7897 }, { "epoch": 0.38564453125, "grad_norm": 0.26045656204223633, "learning_rate": 0.00036011578432425647, "loss": 1.8279, "step": 7898 }, { "epoch": 0.385693359375, "grad_norm": 0.25773996114730835, "learning_rate": 0.00036008335866493117, "loss": 1.8287, "step": 7899 }, { "epoch": 0.3857421875, "grad_norm": 0.28367504477500916, "learning_rate": 0.0003600509309435424, "loss": 1.8234, "step": 7900 }, { "epoch": 0.385791015625, "grad_norm": 0.21824456751346588, "learning_rate": 0.000360018501160876, "loss": 1.8291, "step": 7901 }, { "epoch": 0.38583984375, "grad_norm": 0.24706056714057922, "learning_rate": 0.000359986069317718, "loss": 1.7965, "step": 7902 }, { "epoch": 0.385888671875, "grad_norm": 0.2287280261516571, "learning_rate": 0.00035995363541485413, "loss": 1.8522, "step": 7903 }, { "epoch": 0.3859375, "grad_norm": 0.2420113980770111, "learning_rate": 0.00035992119945307084, "loss": 1.8226, "step": 7904 }, { "epoch": 0.385986328125, "grad_norm": 0.2820189893245697, "learning_rate": 0.000359888761433154, "loss": 1.8289, "step": 7905 }, { "epoch": 0.38603515625, "grad_norm": 0.22906966507434845, "learning_rate": 0.00035985632135588974, "loss": 1.8052, "step": 7906 }, { "epoch": 0.386083984375, "grad_norm": 0.24799004197120667, "learning_rate": 0.00035982387922206446, "loss": 1.8238, "step": 7907 }, { "epoch": 0.3861328125, "grad_norm": 0.2809152603149414, "learning_rate": 0.0003597914350324643, "loss": 1.7986, "step": 7908 }, { "epoch": 0.386181640625, "grad_norm": 0.28111353516578674, "learning_rate": 0.00035975898878787553, "loss": 1.8072, "step": 7909 }, { "epoch": 0.38623046875, "grad_norm": 0.2387804239988327, "learning_rate": 0.0003597265404890847, "loss": 1.8433, "step": 7910 }, { "epoch": 0.386279296875, "grad_norm": 0.33293595910072327, "learning_rate": 0.00035969409013687797, "loss": 1.8158, "step": 7911 }, { "epoch": 0.386328125, "grad_norm": 0.2884279191493988, "learning_rate": 0.000359661637732042, "loss": 1.8168, "step": 7912 }, { "epoch": 0.386376953125, "grad_norm": 0.2990912199020386, "learning_rate": 0.00035962918327536313, "loss": 1.8224, "step": 7913 }, { "epoch": 0.38642578125, "grad_norm": 0.3077740967273712, "learning_rate": 0.00035959672676762807, "loss": 1.8106, "step": 7914 }, { "epoch": 0.386474609375, "grad_norm": 0.21833503246307373, "learning_rate": 0.00035956426820962334, "loss": 1.7905, "step": 7915 }, { "epoch": 0.3865234375, "grad_norm": 0.2766486704349518, "learning_rate": 0.00035953180760213573, "loss": 1.8018, "step": 7916 }, { "epoch": 0.386572265625, "grad_norm": 0.29007771611213684, "learning_rate": 0.00035949934494595173, "loss": 1.8339, "step": 7917 }, { "epoch": 0.38662109375, "grad_norm": 0.23233507573604584, "learning_rate": 0.0003594668802418583, "loss": 1.839, "step": 7918 }, { "epoch": 0.386669921875, "grad_norm": 0.2880328595638275, "learning_rate": 0.00035943441349064217, "loss": 1.8311, "step": 7919 }, { "epoch": 0.38671875, "grad_norm": 0.32404467463493347, "learning_rate": 0.00035940194469309016, "loss": 1.8248, "step": 7920 }, { "epoch": 0.386767578125, "grad_norm": 0.25970545411109924, "learning_rate": 0.0003593694738499892, "loss": 1.828, "step": 7921 }, { "epoch": 0.38681640625, "grad_norm": 0.24775543808937073, "learning_rate": 0.0003593370009621263, "loss": 1.8402, "step": 7922 }, { "epoch": 0.386865234375, "grad_norm": 0.24661299586296082, "learning_rate": 0.00035930452603028835, "loss": 1.8245, "step": 7923 }, { "epoch": 0.3869140625, "grad_norm": 0.23612748086452484, "learning_rate": 0.00035927204905526256, "loss": 1.824, "step": 7924 }, { "epoch": 0.386962890625, "grad_norm": 0.34198328852653503, "learning_rate": 0.00035923957003783597, "loss": 1.818, "step": 7925 }, { "epoch": 0.38701171875, "grad_norm": 0.3011922240257263, "learning_rate": 0.00035920708897879564, "loss": 1.816, "step": 7926 }, { "epoch": 0.387060546875, "grad_norm": 0.3564125895500183, "learning_rate": 0.000359174605878929, "loss": 1.8195, "step": 7927 }, { "epoch": 0.387109375, "grad_norm": 0.4075597822666168, "learning_rate": 0.00035914212073902307, "loss": 1.8157, "step": 7928 }, { "epoch": 0.387158203125, "grad_norm": 0.20465171337127686, "learning_rate": 0.0003591096335598652, "loss": 1.8256, "step": 7929 }, { "epoch": 0.38720703125, "grad_norm": 0.4270763397216797, "learning_rate": 0.0003590771443422428, "loss": 1.8422, "step": 7930 }, { "epoch": 0.387255859375, "grad_norm": 0.3585347831249237, "learning_rate": 0.0003590446530869433, "loss": 1.8393, "step": 7931 }, { "epoch": 0.3873046875, "grad_norm": 0.32402992248535156, "learning_rate": 0.0003590121597947541, "loss": 1.8422, "step": 7932 }, { "epoch": 0.387353515625, "grad_norm": 0.3973870873451233, "learning_rate": 0.0003589796644664627, "loss": 1.8052, "step": 7933 }, { "epoch": 0.38740234375, "grad_norm": 0.342545747756958, "learning_rate": 0.0003589471671028567, "loss": 1.8216, "step": 7934 }, { "epoch": 0.387451171875, "grad_norm": 0.2830633819103241, "learning_rate": 0.0003589146677047237, "loss": 1.8319, "step": 7935 }, { "epoch": 0.3875, "grad_norm": 0.3302830159664154, "learning_rate": 0.0003588821662728512, "loss": 1.8063, "step": 7936 }, { "epoch": 0.387548828125, "grad_norm": 0.2849458158016205, "learning_rate": 0.00035884966280802706, "loss": 1.8295, "step": 7937 }, { "epoch": 0.38759765625, "grad_norm": 0.29233285784721375, "learning_rate": 0.00035881715731103897, "loss": 1.8208, "step": 7938 }, { "epoch": 0.387646484375, "grad_norm": 0.30651649832725525, "learning_rate": 0.00035878464978267473, "loss": 1.8271, "step": 7939 }, { "epoch": 0.3876953125, "grad_norm": 0.2901909351348877, "learning_rate": 0.0003587521402237222, "loss": 1.8396, "step": 7940 }, { "epoch": 0.387744140625, "grad_norm": 0.2830854058265686, "learning_rate": 0.00035871962863496924, "loss": 1.8607, "step": 7941 }, { "epoch": 0.38779296875, "grad_norm": 0.2819029688835144, "learning_rate": 0.00035868711501720393, "loss": 1.8221, "step": 7942 }, { "epoch": 0.387841796875, "grad_norm": 0.2633644938468933, "learning_rate": 0.00035865459937121404, "loss": 1.8259, "step": 7943 }, { "epoch": 0.387890625, "grad_norm": 0.24591782689094543, "learning_rate": 0.0003586220816977878, "loss": 1.8507, "step": 7944 }, { "epoch": 0.387939453125, "grad_norm": 0.283230185508728, "learning_rate": 0.00035858956199771316, "loss": 1.818, "step": 7945 }, { "epoch": 0.38798828125, "grad_norm": 0.22214491665363312, "learning_rate": 0.0003585570402717784, "loss": 1.836, "step": 7946 }, { "epoch": 0.388037109375, "grad_norm": 0.26776811480522156, "learning_rate": 0.00035852451652077156, "loss": 1.7975, "step": 7947 }, { "epoch": 0.3880859375, "grad_norm": 0.24027620255947113, "learning_rate": 0.000358491990745481, "loss": 1.8292, "step": 7948 }, { "epoch": 0.388134765625, "grad_norm": 0.3229255974292755, "learning_rate": 0.00035845946294669497, "loss": 1.8158, "step": 7949 }, { "epoch": 0.38818359375, "grad_norm": 0.33388984203338623, "learning_rate": 0.0003584269331252018, "loss": 1.8186, "step": 7950 }, { "epoch": 0.388232421875, "grad_norm": 0.28455549478530884, "learning_rate": 0.00035839440128178986, "loss": 1.8444, "step": 7951 }, { "epoch": 0.38828125, "grad_norm": 0.2578040361404419, "learning_rate": 0.0003583618674172477, "loss": 1.8401, "step": 7952 }, { "epoch": 0.388330078125, "grad_norm": 0.27290812134742737, "learning_rate": 0.00035832933153236363, "loss": 1.8405, "step": 7953 }, { "epoch": 0.38837890625, "grad_norm": 0.28032225370407104, "learning_rate": 0.00035829679362792626, "loss": 1.8432, "step": 7954 }, { "epoch": 0.388427734375, "grad_norm": 0.25683262944221497, "learning_rate": 0.0003582642537047242, "loss": 1.8212, "step": 7955 }, { "epoch": 0.3884765625, "grad_norm": 0.2683854103088379, "learning_rate": 0.00035823171176354603, "loss": 1.8081, "step": 7956 }, { "epoch": 0.388525390625, "grad_norm": 0.20380647480487823, "learning_rate": 0.00035819916780518047, "loss": 1.8304, "step": 7957 }, { "epoch": 0.38857421875, "grad_norm": 0.2617913782596588, "learning_rate": 0.00035816662183041625, "loss": 1.8433, "step": 7958 }, { "epoch": 0.388623046875, "grad_norm": 0.2980010211467743, "learning_rate": 0.00035813407384004207, "loss": 1.8278, "step": 7959 }, { "epoch": 0.388671875, "grad_norm": 0.23309466242790222, "learning_rate": 0.0003581015238348469, "loss": 1.8227, "step": 7960 }, { "epoch": 0.388720703125, "grad_norm": 0.22489053010940552, "learning_rate": 0.00035806897181561953, "loss": 1.8065, "step": 7961 }, { "epoch": 0.38876953125, "grad_norm": 0.2702218294143677, "learning_rate": 0.0003580364177831489, "loss": 1.8379, "step": 7962 }, { "epoch": 0.388818359375, "grad_norm": 0.24641425907611847, "learning_rate": 0.00035800386173822393, "loss": 1.8269, "step": 7963 }, { "epoch": 0.3888671875, "grad_norm": 0.23822948336601257, "learning_rate": 0.0003579713036816337, "loss": 1.8265, "step": 7964 }, { "epoch": 0.388916015625, "grad_norm": 0.25050023198127747, "learning_rate": 0.00035793874361416734, "loss": 1.8108, "step": 7965 }, { "epoch": 0.38896484375, "grad_norm": 0.2802926301956177, "learning_rate": 0.00035790618153661377, "loss": 1.8262, "step": 7966 }, { "epoch": 0.389013671875, "grad_norm": 0.254313200712204, "learning_rate": 0.00035787361744976236, "loss": 1.8325, "step": 7967 }, { "epoch": 0.3890625, "grad_norm": 0.1905793398618698, "learning_rate": 0.0003578410513544022, "loss": 1.8281, "step": 7968 }, { "epoch": 0.389111328125, "grad_norm": 0.2362508624792099, "learning_rate": 0.0003578084832513227, "loss": 1.8022, "step": 7969 }, { "epoch": 0.38916015625, "grad_norm": 0.3125852644443512, "learning_rate": 0.000357775913141313, "loss": 1.8371, "step": 7970 }, { "epoch": 0.389208984375, "grad_norm": 0.30860236287117004, "learning_rate": 0.0003577433410251626, "loss": 1.8402, "step": 7971 }, { "epoch": 0.3892578125, "grad_norm": 0.34598708152770996, "learning_rate": 0.0003577107669036608, "loss": 1.8259, "step": 7972 }, { "epoch": 0.389306640625, "grad_norm": 0.35675740242004395, "learning_rate": 0.00035767819077759717, "loss": 1.8192, "step": 7973 }, { "epoch": 0.38935546875, "grad_norm": 0.2381032556295395, "learning_rate": 0.00035764561264776105, "loss": 1.8231, "step": 7974 }, { "epoch": 0.389404296875, "grad_norm": 0.29926666617393494, "learning_rate": 0.00035761303251494224, "loss": 1.806, "step": 7975 }, { "epoch": 0.389453125, "grad_norm": 0.29365274310112, "learning_rate": 0.00035758045037993016, "loss": 1.8149, "step": 7976 }, { "epoch": 0.389501953125, "grad_norm": 0.21769067645072937, "learning_rate": 0.0003575478662435145, "loss": 1.847, "step": 7977 }, { "epoch": 0.38955078125, "grad_norm": 0.2559305727481842, "learning_rate": 0.000357515280106485, "loss": 1.8103, "step": 7978 }, { "epoch": 0.389599609375, "grad_norm": 0.2652186155319214, "learning_rate": 0.0003574826919696315, "loss": 1.8489, "step": 7979 }, { "epoch": 0.3896484375, "grad_norm": 0.2467580884695053, "learning_rate": 0.0003574501018337435, "loss": 1.8114, "step": 7980 }, { "epoch": 0.389697265625, "grad_norm": 0.20332133769989014, "learning_rate": 0.0003574175096996112, "loss": 1.8277, "step": 7981 }, { "epoch": 0.38974609375, "grad_norm": 0.24261847138404846, "learning_rate": 0.00035738491556802426, "loss": 1.844, "step": 7982 }, { "epoch": 0.389794921875, "grad_norm": 0.2488616555929184, "learning_rate": 0.0003573523194397727, "loss": 1.8426, "step": 7983 }, { "epoch": 0.38984375, "grad_norm": 0.2247202843427658, "learning_rate": 0.0003573197213156466, "loss": 1.8371, "step": 7984 }, { "epoch": 0.389892578125, "grad_norm": 0.22837118804454803, "learning_rate": 0.00035728712119643583, "loss": 1.8406, "step": 7985 }, { "epoch": 0.38994140625, "grad_norm": 0.2739196717739105, "learning_rate": 0.00035725451908293066, "loss": 1.8017, "step": 7986 }, { "epoch": 0.389990234375, "grad_norm": 0.24082954227924347, "learning_rate": 0.00035722191497592105, "loss": 1.8145, "step": 7987 }, { "epoch": 0.3900390625, "grad_norm": 0.26503247022628784, "learning_rate": 0.0003571893088761973, "loss": 1.8378, "step": 7988 }, { "epoch": 0.390087890625, "grad_norm": 0.22556568682193756, "learning_rate": 0.0003571567007845496, "loss": 1.8457, "step": 7989 }, { "epoch": 0.39013671875, "grad_norm": 0.23648512363433838, "learning_rate": 0.00035712409070176826, "loss": 1.826, "step": 7990 }, { "epoch": 0.390185546875, "grad_norm": 0.26325973868370056, "learning_rate": 0.00035709147862864364, "loss": 1.8485, "step": 7991 }, { "epoch": 0.390234375, "grad_norm": 0.22471961379051208, "learning_rate": 0.000357058864565966, "loss": 1.8054, "step": 7992 }, { "epoch": 0.390283203125, "grad_norm": 0.2952626049518585, "learning_rate": 0.000357026248514526, "loss": 1.8395, "step": 7993 }, { "epoch": 0.39033203125, "grad_norm": 0.26157110929489136, "learning_rate": 0.00035699363047511385, "loss": 1.8389, "step": 7994 }, { "epoch": 0.390380859375, "grad_norm": 0.23114146292209625, "learning_rate": 0.00035696101044852024, "loss": 1.8193, "step": 7995 }, { "epoch": 0.3904296875, "grad_norm": 0.2636825740337372, "learning_rate": 0.0003569283884355357, "loss": 1.8295, "step": 7996 }, { "epoch": 0.390478515625, "grad_norm": 0.31821179389953613, "learning_rate": 0.0003568957644369508, "loss": 1.8537, "step": 7997 }, { "epoch": 0.39052734375, "grad_norm": 0.27535349130630493, "learning_rate": 0.0003568631384535563, "loss": 1.8377, "step": 7998 }, { "epoch": 0.390576171875, "grad_norm": 0.3102201223373413, "learning_rate": 0.00035683051048614287, "loss": 1.8274, "step": 7999 }, { "epoch": 0.390625, "grad_norm": 0.28750184178352356, "learning_rate": 0.00035679788053550124, "loss": 1.8179, "step": 8000 }, { "epoch": 0.390673828125, "grad_norm": 0.2330588698387146, "learning_rate": 0.0003567652486024223, "loss": 1.8102, "step": 8001 }, { "epoch": 0.39072265625, "grad_norm": 0.2555520832538605, "learning_rate": 0.00035673261468769675, "loss": 1.823, "step": 8002 }, { "epoch": 0.390771484375, "grad_norm": 0.29856258630752563, "learning_rate": 0.00035669997879211575, "loss": 1.8084, "step": 8003 }, { "epoch": 0.3908203125, "grad_norm": 0.27462446689605713, "learning_rate": 0.00035666734091647015, "loss": 1.8347, "step": 8004 }, { "epoch": 0.390869140625, "grad_norm": 0.27678948640823364, "learning_rate": 0.0003566347010615508, "loss": 1.8094, "step": 8005 }, { "epoch": 0.39091796875, "grad_norm": 0.3587842881679535, "learning_rate": 0.00035660205922814905, "loss": 1.8245, "step": 8006 }, { "epoch": 0.390966796875, "grad_norm": 0.33617302775382996, "learning_rate": 0.0003565694154170557, "loss": 1.8429, "step": 8007 }, { "epoch": 0.391015625, "grad_norm": 0.23752616345882416, "learning_rate": 0.00035653676962906205, "loss": 1.8354, "step": 8008 }, { "epoch": 0.391064453125, "grad_norm": 0.3523954749107361, "learning_rate": 0.00035650412186495926, "loss": 1.832, "step": 8009 }, { "epoch": 0.39111328125, "grad_norm": 0.27140313386917114, "learning_rate": 0.00035647147212553867, "loss": 1.8214, "step": 8010 }, { "epoch": 0.391162109375, "grad_norm": 0.2981569170951843, "learning_rate": 0.0003564388204115915, "loss": 1.8193, "step": 8011 }, { "epoch": 0.3912109375, "grad_norm": 0.34567010402679443, "learning_rate": 0.0003564061667239091, "loss": 1.8002, "step": 8012 }, { "epoch": 0.391259765625, "grad_norm": 0.2600046694278717, "learning_rate": 0.0003563735110632828, "loss": 1.8158, "step": 8013 }, { "epoch": 0.39130859375, "grad_norm": 0.26816749572753906, "learning_rate": 0.0003563408534305041, "loss": 1.8177, "step": 8014 }, { "epoch": 0.391357421875, "grad_norm": 0.2673277258872986, "learning_rate": 0.00035630819382636447, "loss": 1.799, "step": 8015 }, { "epoch": 0.39140625, "grad_norm": 0.24095045030117035, "learning_rate": 0.00035627553225165543, "loss": 1.8199, "step": 8016 }, { "epoch": 0.391455078125, "grad_norm": 0.2377951443195343, "learning_rate": 0.0003562428687071686, "loss": 1.8225, "step": 8017 }, { "epoch": 0.39150390625, "grad_norm": 0.28879493474960327, "learning_rate": 0.0003562102031936955, "loss": 1.8099, "step": 8018 }, { "epoch": 0.391552734375, "grad_norm": 0.23042485117912292, "learning_rate": 0.00035617753571202796, "loss": 1.8104, "step": 8019 }, { "epoch": 0.3916015625, "grad_norm": 0.2516573667526245, "learning_rate": 0.00035614486626295766, "loss": 1.829, "step": 8020 }, { "epoch": 0.391650390625, "grad_norm": 0.2908271253108978, "learning_rate": 0.00035611219484727623, "loss": 1.8451, "step": 8021 }, { "epoch": 0.39169921875, "grad_norm": 0.32303380966186523, "learning_rate": 0.0003560795214657757, "loss": 1.8534, "step": 8022 }, { "epoch": 0.391748046875, "grad_norm": 0.28168314695358276, "learning_rate": 0.00035604684611924774, "loss": 1.8052, "step": 8023 }, { "epoch": 0.391796875, "grad_norm": 0.30841657519340515, "learning_rate": 0.0003560141688084844, "loss": 1.8392, "step": 8024 }, { "epoch": 0.391845703125, "grad_norm": 0.339677095413208, "learning_rate": 0.00035598148953427754, "loss": 1.8268, "step": 8025 }, { "epoch": 0.39189453125, "grad_norm": 0.3502306044101715, "learning_rate": 0.00035594880829741926, "loss": 1.8289, "step": 8026 }, { "epoch": 0.391943359375, "grad_norm": 0.3429849445819855, "learning_rate": 0.00035591612509870165, "loss": 1.8123, "step": 8027 }, { "epoch": 0.3919921875, "grad_norm": 0.28976741433143616, "learning_rate": 0.00035588343993891666, "loss": 1.852, "step": 8028 }, { "epoch": 0.392041015625, "grad_norm": 0.27371183037757874, "learning_rate": 0.0003558507528188565, "loss": 1.8286, "step": 8029 }, { "epoch": 0.39208984375, "grad_norm": 0.22199444472789764, "learning_rate": 0.0003558180637393134, "loss": 1.8186, "step": 8030 }, { "epoch": 0.392138671875, "grad_norm": 0.24354568123817444, "learning_rate": 0.0003557853727010797, "loss": 1.8014, "step": 8031 }, { "epoch": 0.3921875, "grad_norm": 0.2368495911359787, "learning_rate": 0.0003557526797049474, "loss": 1.7987, "step": 8032 }, { "epoch": 0.392236328125, "grad_norm": 0.24609136581420898, "learning_rate": 0.00035571998475170916, "loss": 1.8347, "step": 8033 }, { "epoch": 0.39228515625, "grad_norm": 0.2817348837852478, "learning_rate": 0.00035568728784215727, "loss": 1.841, "step": 8034 }, { "epoch": 0.392333984375, "grad_norm": 0.23334059119224548, "learning_rate": 0.000355654588977084, "loss": 1.8448, "step": 8035 }, { "epoch": 0.3923828125, "grad_norm": 0.28155386447906494, "learning_rate": 0.0003556218881572821, "loss": 1.8157, "step": 8036 }, { "epoch": 0.392431640625, "grad_norm": 0.3305833041667938, "learning_rate": 0.000355589185383544, "loss": 1.8534, "step": 8037 }, { "epoch": 0.39248046875, "grad_norm": 0.2944068908691406, "learning_rate": 0.0003555564806566621, "loss": 1.8011, "step": 8038 }, { "epoch": 0.392529296875, "grad_norm": 0.30486100912094116, "learning_rate": 0.00035552377397742924, "loss": 1.8358, "step": 8039 }, { "epoch": 0.392578125, "grad_norm": 0.33694103360176086, "learning_rate": 0.00035549106534663803, "loss": 1.8388, "step": 8040 }, { "epoch": 0.392626953125, "grad_norm": 0.27596762776374817, "learning_rate": 0.0003554583547650812, "loss": 1.8184, "step": 8041 }, { "epoch": 0.39267578125, "grad_norm": 0.3008583188056946, "learning_rate": 0.0003554256422335515, "loss": 1.8047, "step": 8042 }, { "epoch": 0.392724609375, "grad_norm": 0.26274532079696655, "learning_rate": 0.00035539292775284173, "loss": 1.8366, "step": 8043 }, { "epoch": 0.3927734375, "grad_norm": 0.3200088441371918, "learning_rate": 0.0003553602113237447, "loss": 1.8334, "step": 8044 }, { "epoch": 0.392822265625, "grad_norm": 0.29008251428604126, "learning_rate": 0.0003553274929470535, "loss": 1.8162, "step": 8045 }, { "epoch": 0.39287109375, "grad_norm": 0.25262364745140076, "learning_rate": 0.0003552947726235609, "loss": 1.8097, "step": 8046 }, { "epoch": 0.392919921875, "grad_norm": 0.29898470640182495, "learning_rate": 0.0003552620503540601, "loss": 1.8455, "step": 8047 }, { "epoch": 0.39296875, "grad_norm": 0.2281382530927658, "learning_rate": 0.0003552293261393438, "loss": 1.8258, "step": 8048 }, { "epoch": 0.393017578125, "grad_norm": 0.2827511727809906, "learning_rate": 0.00035519659998020555, "loss": 1.8381, "step": 8049 }, { "epoch": 0.39306640625, "grad_norm": 0.28294461965560913, "learning_rate": 0.00035516387187743817, "loss": 1.8344, "step": 8050 }, { "epoch": 0.393115234375, "grad_norm": 0.24273733794689178, "learning_rate": 0.00035513114183183497, "loss": 1.8186, "step": 8051 }, { "epoch": 0.3931640625, "grad_norm": 0.31790241599082947, "learning_rate": 0.00035509840984418917, "loss": 1.8154, "step": 8052 }, { "epoch": 0.393212890625, "grad_norm": 0.27492791414260864, "learning_rate": 0.0003550656759152941, "loss": 1.824, "step": 8053 }, { "epoch": 0.39326171875, "grad_norm": 0.30183863639831543, "learning_rate": 0.00035503294004594297, "loss": 1.8208, "step": 8054 }, { "epoch": 0.393310546875, "grad_norm": 0.2625916004180908, "learning_rate": 0.00035500020223692925, "loss": 1.8258, "step": 8055 }, { "epoch": 0.393359375, "grad_norm": 0.23496897518634796, "learning_rate": 0.0003549674624890464, "loss": 1.833, "step": 8056 }, { "epoch": 0.393408203125, "grad_norm": 0.270099014043808, "learning_rate": 0.0003549347208030878, "loss": 1.8066, "step": 8057 }, { "epoch": 0.39345703125, "grad_norm": 0.22233135998249054, "learning_rate": 0.00035490197717984704, "loss": 1.8147, "step": 8058 }, { "epoch": 0.393505859375, "grad_norm": 0.2553105056285858, "learning_rate": 0.0003548692316201177, "loss": 1.8213, "step": 8059 }, { "epoch": 0.3935546875, "grad_norm": 0.27687451243400574, "learning_rate": 0.0003548364841246934, "loss": 1.8189, "step": 8060 }, { "epoch": 0.393603515625, "grad_norm": 0.28470873832702637, "learning_rate": 0.0003548037346943677, "loss": 1.8314, "step": 8061 }, { "epoch": 0.39365234375, "grad_norm": 0.33445701003074646, "learning_rate": 0.0003547709833299344, "loss": 1.8449, "step": 8062 }, { "epoch": 0.393701171875, "grad_norm": 0.3016308844089508, "learning_rate": 0.0003547382300321872, "loss": 1.8101, "step": 8063 }, { "epoch": 0.39375, "grad_norm": 0.3294866979122162, "learning_rate": 0.0003547054748019199, "loss": 1.8229, "step": 8064 }, { "epoch": 0.393798828125, "grad_norm": 0.29469841718673706, "learning_rate": 0.0003546727176399265, "loss": 1.8326, "step": 8065 }, { "epoch": 0.39384765625, "grad_norm": 0.21421749889850616, "learning_rate": 0.0003546399585470007, "loss": 1.8059, "step": 8066 }, { "epoch": 0.393896484375, "grad_norm": 0.2707156836986542, "learning_rate": 0.00035460719752393655, "loss": 1.8227, "step": 8067 }, { "epoch": 0.3939453125, "grad_norm": 0.29483357071876526, "learning_rate": 0.00035457443457152804, "loss": 1.8299, "step": 8068 }, { "epoch": 0.393994140625, "grad_norm": 0.30120036005973816, "learning_rate": 0.0003545416696905691, "loss": 1.7933, "step": 8069 }, { "epoch": 0.39404296875, "grad_norm": 0.23817460238933563, "learning_rate": 0.000354508902881854, "loss": 1.8068, "step": 8070 }, { "epoch": 0.394091796875, "grad_norm": 0.2938162386417389, "learning_rate": 0.0003544761341461767, "loss": 1.8323, "step": 8071 }, { "epoch": 0.394140625, "grad_norm": 0.23432737588882446, "learning_rate": 0.0003544433634843314, "loss": 1.8317, "step": 8072 }, { "epoch": 0.394189453125, "grad_norm": 0.2206127643585205, "learning_rate": 0.0003544105908971124, "loss": 1.8223, "step": 8073 }, { "epoch": 0.39423828125, "grad_norm": 0.28835341334342957, "learning_rate": 0.00035437781638531396, "loss": 1.8047, "step": 8074 }, { "epoch": 0.394287109375, "grad_norm": 0.22218598425388336, "learning_rate": 0.0003543450399497303, "loss": 1.8004, "step": 8075 }, { "epoch": 0.3943359375, "grad_norm": 0.23486445844173431, "learning_rate": 0.00035431226159115593, "loss": 1.8158, "step": 8076 }, { "epoch": 0.394384765625, "grad_norm": 0.22125492990016937, "learning_rate": 0.00035427948131038514, "loss": 1.833, "step": 8077 }, { "epoch": 0.39443359375, "grad_norm": 0.283671498298645, "learning_rate": 0.0003542466991082124, "loss": 1.8416, "step": 8078 }, { "epoch": 0.394482421875, "grad_norm": 0.2731476426124573, "learning_rate": 0.00035421391498543234, "loss": 1.8263, "step": 8079 }, { "epoch": 0.39453125, "grad_norm": 0.27713167667388916, "learning_rate": 0.0003541811289428394, "loss": 1.8231, "step": 8080 }, { "epoch": 0.394580078125, "grad_norm": 0.24280637502670288, "learning_rate": 0.0003541483409812281, "loss": 1.7969, "step": 8081 }, { "epoch": 0.39462890625, "grad_norm": 0.23165088891983032, "learning_rate": 0.00035411555110139315, "loss": 1.8091, "step": 8082 }, { "epoch": 0.394677734375, "grad_norm": 0.261091947555542, "learning_rate": 0.0003540827593041293, "loss": 1.8136, "step": 8083 }, { "epoch": 0.3947265625, "grad_norm": 0.2832632064819336, "learning_rate": 0.0003540499655902313, "loss": 1.829, "step": 8084 }, { "epoch": 0.394775390625, "grad_norm": 0.24814698100090027, "learning_rate": 0.0003540171699604938, "loss": 1.8281, "step": 8085 }, { "epoch": 0.39482421875, "grad_norm": 0.23503437638282776, "learning_rate": 0.0003539843724157117, "loss": 1.8244, "step": 8086 }, { "epoch": 0.394873046875, "grad_norm": 0.2488030642271042, "learning_rate": 0.00035395157295667996, "loss": 1.7892, "step": 8087 }, { "epoch": 0.394921875, "grad_norm": 0.33308297395706177, "learning_rate": 0.0003539187715841934, "loss": 1.834, "step": 8088 }, { "epoch": 0.394970703125, "grad_norm": 0.3265773355960846, "learning_rate": 0.000353885968299047, "loss": 1.8243, "step": 8089 }, { "epoch": 0.39501953125, "grad_norm": 0.2643173635005951, "learning_rate": 0.0003538531631020357, "loss": 1.7799, "step": 8090 }, { "epoch": 0.395068359375, "grad_norm": 0.30000245571136475, "learning_rate": 0.00035382035599395466, "loss": 1.838, "step": 8091 }, { "epoch": 0.3951171875, "grad_norm": 0.23414693772792816, "learning_rate": 0.00035378754697559903, "loss": 1.8453, "step": 8092 }, { "epoch": 0.395166015625, "grad_norm": 0.22288450598716736, "learning_rate": 0.0003537547360477639, "loss": 1.7985, "step": 8093 }, { "epoch": 0.39521484375, "grad_norm": 0.2631744146347046, "learning_rate": 0.0003537219232112444, "loss": 1.8184, "step": 8094 }, { "epoch": 0.395263671875, "grad_norm": 0.25647905468940735, "learning_rate": 0.000353689108466836, "loss": 1.804, "step": 8095 }, { "epoch": 0.3953125, "grad_norm": 0.2669857442378998, "learning_rate": 0.0003536562918153337, "loss": 1.824, "step": 8096 }, { "epoch": 0.395361328125, "grad_norm": 0.25939860939979553, "learning_rate": 0.000353623473257533, "loss": 1.8113, "step": 8097 }, { "epoch": 0.39541015625, "grad_norm": 0.26181718707084656, "learning_rate": 0.0003535906527942293, "loss": 1.8306, "step": 8098 }, { "epoch": 0.395458984375, "grad_norm": 0.26013222336769104, "learning_rate": 0.000353557830426218, "loss": 1.8224, "step": 8099 }, { "epoch": 0.3955078125, "grad_norm": 0.25653424859046936, "learning_rate": 0.00035352500615429445, "loss": 1.7846, "step": 8100 }, { "epoch": 0.395556640625, "grad_norm": 0.23665006458759308, "learning_rate": 0.0003534921799792544, "loss": 1.8179, "step": 8101 }, { "epoch": 0.39560546875, "grad_norm": 0.23579634726047516, "learning_rate": 0.0003534593519018933, "loss": 1.8395, "step": 8102 }, { "epoch": 0.395654296875, "grad_norm": 0.2502692639827728, "learning_rate": 0.0003534265219230067, "loss": 1.8232, "step": 8103 }, { "epoch": 0.395703125, "grad_norm": 0.22359418869018555, "learning_rate": 0.00035339369004339045, "loss": 1.8075, "step": 8104 }, { "epoch": 0.395751953125, "grad_norm": 0.20197154581546783, "learning_rate": 0.00035336085626384004, "loss": 1.7983, "step": 8105 }, { "epoch": 0.39580078125, "grad_norm": 0.2577521800994873, "learning_rate": 0.0003533280205851514, "loss": 1.8056, "step": 8106 }, { "epoch": 0.395849609375, "grad_norm": 0.32421231269836426, "learning_rate": 0.0003532951830081202, "loss": 1.8072, "step": 8107 }, { "epoch": 0.3958984375, "grad_norm": 0.3321079909801483, "learning_rate": 0.00035326234353354235, "loss": 1.8431, "step": 8108 }, { "epoch": 0.395947265625, "grad_norm": 0.2822667956352234, "learning_rate": 0.0003532295021622137, "loss": 1.8201, "step": 8109 }, { "epoch": 0.39599609375, "grad_norm": 0.2696734368801117, "learning_rate": 0.0003531966588949302, "loss": 1.8385, "step": 8110 }, { "epoch": 0.396044921875, "grad_norm": 0.30188632011413574, "learning_rate": 0.00035316381373248795, "loss": 1.7943, "step": 8111 }, { "epoch": 0.39609375, "grad_norm": 0.26501035690307617, "learning_rate": 0.0003531309666756828, "loss": 1.8187, "step": 8112 }, { "epoch": 0.396142578125, "grad_norm": 0.23966428637504578, "learning_rate": 0.000353098117725311, "loss": 1.8334, "step": 8113 }, { "epoch": 0.39619140625, "grad_norm": 0.2735019624233246, "learning_rate": 0.0003530652668821685, "loss": 1.7866, "step": 8114 }, { "epoch": 0.396240234375, "grad_norm": 0.31739649176597595, "learning_rate": 0.0003530324141470515, "loss": 1.8471, "step": 8115 }, { "epoch": 0.3962890625, "grad_norm": 0.38390424847602844, "learning_rate": 0.00035299955952075633, "loss": 1.8338, "step": 8116 }, { "epoch": 0.396337890625, "grad_norm": 0.344927579164505, "learning_rate": 0.0003529667030040791, "loss": 1.8305, "step": 8117 }, { "epoch": 0.39638671875, "grad_norm": 0.28806576132774353, "learning_rate": 0.00035293384459781626, "loss": 1.8221, "step": 8118 }, { "epoch": 0.396435546875, "grad_norm": 0.2612631022930145, "learning_rate": 0.0003529009843027641, "loss": 1.8087, "step": 8119 }, { "epoch": 0.396484375, "grad_norm": 0.2961721122264862, "learning_rate": 0.00035286812211971885, "loss": 1.8269, "step": 8120 }, { "epoch": 0.396533203125, "grad_norm": 0.33859020471572876, "learning_rate": 0.0003528352580494772, "loss": 1.8096, "step": 8121 }, { "epoch": 0.39658203125, "grad_norm": 0.29139137268066406, "learning_rate": 0.0003528023920928355, "loss": 1.8243, "step": 8122 }, { "epoch": 0.396630859375, "grad_norm": 0.24958816170692444, "learning_rate": 0.00035276952425059043, "loss": 1.8078, "step": 8123 }, { "epoch": 0.3966796875, "grad_norm": 0.27277034521102905, "learning_rate": 0.0003527366545235384, "loss": 1.8148, "step": 8124 }, { "epoch": 0.396728515625, "grad_norm": 0.29166606068611145, "learning_rate": 0.00035270378291247606, "loss": 1.8349, "step": 8125 }, { "epoch": 0.39677734375, "grad_norm": 0.3486558496952057, "learning_rate": 0.0003526709094182001, "loss": 1.8319, "step": 8126 }, { "epoch": 0.396826171875, "grad_norm": 0.26427024602890015, "learning_rate": 0.00035263803404150736, "loss": 1.8329, "step": 8127 }, { "epoch": 0.396875, "grad_norm": 0.3264957666397095, "learning_rate": 0.00035260515678319437, "loss": 1.8015, "step": 8128 }, { "epoch": 0.396923828125, "grad_norm": 0.24187035858631134, "learning_rate": 0.0003525722776440581, "loss": 1.8299, "step": 8129 }, { "epoch": 0.39697265625, "grad_norm": 0.2704024314880371, "learning_rate": 0.0003525393966248954, "loss": 1.8523, "step": 8130 }, { "epoch": 0.397021484375, "grad_norm": 0.27686652541160583, "learning_rate": 0.00035250651372650306, "loss": 1.8127, "step": 8131 }, { "epoch": 0.3970703125, "grad_norm": 0.2140515148639679, "learning_rate": 0.00035247362894967816, "loss": 1.8392, "step": 8132 }, { "epoch": 0.397119140625, "grad_norm": 0.2581416070461273, "learning_rate": 0.00035244074229521757, "loss": 1.8403, "step": 8133 }, { "epoch": 0.39716796875, "grad_norm": 0.24132008850574493, "learning_rate": 0.00035240785376391845, "loss": 1.8238, "step": 8134 }, { "epoch": 0.397216796875, "grad_norm": 0.2546932101249695, "learning_rate": 0.0003523749633565777, "loss": 1.8496, "step": 8135 }, { "epoch": 0.397265625, "grad_norm": 0.29465749859809875, "learning_rate": 0.00035234207107399257, "loss": 1.8129, "step": 8136 }, { "epoch": 0.397314453125, "grad_norm": 0.24984917044639587, "learning_rate": 0.0003523091769169602, "loss": 1.8279, "step": 8137 }, { "epoch": 0.39736328125, "grad_norm": 0.22220277786254883, "learning_rate": 0.0003522762808862779, "loss": 1.8288, "step": 8138 }, { "epoch": 0.397412109375, "grad_norm": 0.3093608617782593, "learning_rate": 0.0003522433829827428, "loss": 1.8272, "step": 8139 }, { "epoch": 0.3974609375, "grad_norm": 0.31477195024490356, "learning_rate": 0.0003522104832071523, "loss": 1.8066, "step": 8140 }, { "epoch": 0.397509765625, "grad_norm": 0.25379109382629395, "learning_rate": 0.0003521775815603036, "loss": 1.7975, "step": 8141 }, { "epoch": 0.39755859375, "grad_norm": 0.26997217535972595, "learning_rate": 0.00035214467804299435, "loss": 1.7913, "step": 8142 }, { "epoch": 0.397607421875, "grad_norm": 0.28585270047187805, "learning_rate": 0.0003521117726560218, "loss": 1.8314, "step": 8143 }, { "epoch": 0.39765625, "grad_norm": 0.3027687668800354, "learning_rate": 0.00035207886540018345, "loss": 1.8202, "step": 8144 }, { "epoch": 0.397705078125, "grad_norm": 0.2802959084510803, "learning_rate": 0.0003520459562762769, "loss": 1.8065, "step": 8145 }, { "epoch": 0.39775390625, "grad_norm": 0.269378125667572, "learning_rate": 0.0003520130452850997, "loss": 1.854, "step": 8146 }, { "epoch": 0.397802734375, "grad_norm": 0.26218780875205994, "learning_rate": 0.0003519801324274495, "loss": 1.8296, "step": 8147 }, { "epoch": 0.3978515625, "grad_norm": 0.26230132579803467, "learning_rate": 0.00035194721770412385, "loss": 1.8236, "step": 8148 }, { "epoch": 0.397900390625, "grad_norm": 0.241347074508667, "learning_rate": 0.00035191430111592066, "loss": 1.8152, "step": 8149 }, { "epoch": 0.39794921875, "grad_norm": 0.20247118175029755, "learning_rate": 0.00035188138266363755, "loss": 1.8376, "step": 8150 }, { "epoch": 0.397998046875, "grad_norm": 0.2313014417886734, "learning_rate": 0.0003518484623480724, "loss": 1.814, "step": 8151 }, { "epoch": 0.398046875, "grad_norm": 0.21550966799259186, "learning_rate": 0.0003518155401700229, "loss": 1.8547, "step": 8152 }, { "epoch": 0.398095703125, "grad_norm": 0.22295421361923218, "learning_rate": 0.0003517826161302873, "loss": 1.812, "step": 8153 }, { "epoch": 0.39814453125, "grad_norm": 0.24243660271167755, "learning_rate": 0.00035174969022966313, "loss": 1.8145, "step": 8154 }, { "epoch": 0.398193359375, "grad_norm": 0.23927970230579376, "learning_rate": 0.0003517167624689486, "loss": 1.7922, "step": 8155 }, { "epoch": 0.3982421875, "grad_norm": 0.27300405502319336, "learning_rate": 0.00035168383284894165, "loss": 1.8143, "step": 8156 }, { "epoch": 0.398291015625, "grad_norm": 0.26234763860702515, "learning_rate": 0.0003516509013704405, "loss": 1.8307, "step": 8157 }, { "epoch": 0.39833984375, "grad_norm": 0.23417192697525024, "learning_rate": 0.00035161796803424313, "loss": 1.8455, "step": 8158 }, { "epoch": 0.398388671875, "grad_norm": 0.2615744173526764, "learning_rate": 0.00035158503284114773, "loss": 1.8145, "step": 8159 }, { "epoch": 0.3984375, "grad_norm": 0.2282068133354187, "learning_rate": 0.0003515520957919526, "loss": 1.8269, "step": 8160 }, { "epoch": 0.398486328125, "grad_norm": 0.24794594943523407, "learning_rate": 0.00035151915688745583, "loss": 1.8193, "step": 8161 }, { "epoch": 0.39853515625, "grad_norm": 0.25100675225257874, "learning_rate": 0.00035148621612845593, "loss": 1.7991, "step": 8162 }, { "epoch": 0.398583984375, "grad_norm": 0.2516922354698181, "learning_rate": 0.00035145327351575106, "loss": 1.7837, "step": 8163 }, { "epoch": 0.3986328125, "grad_norm": 0.2357475757598877, "learning_rate": 0.0003514203290501397, "loss": 1.8524, "step": 8164 }, { "epoch": 0.398681640625, "grad_norm": 0.26606982946395874, "learning_rate": 0.0003513873827324203, "loss": 1.8053, "step": 8165 }, { "epoch": 0.39873046875, "grad_norm": 0.2860620617866516, "learning_rate": 0.0003513544345633912, "loss": 1.788, "step": 8166 }, { "epoch": 0.398779296875, "grad_norm": 0.30910125374794006, "learning_rate": 0.00035132148454385115, "loss": 1.8495, "step": 8167 }, { "epoch": 0.398828125, "grad_norm": 0.2834242582321167, "learning_rate": 0.00035128853267459855, "loss": 1.8166, "step": 8168 }, { "epoch": 0.398876953125, "grad_norm": 0.2557981312274933, "learning_rate": 0.00035125557895643214, "loss": 1.8292, "step": 8169 }, { "epoch": 0.39892578125, "grad_norm": 0.28153347969055176, "learning_rate": 0.00035122262339015037, "loss": 1.8368, "step": 8170 }, { "epoch": 0.398974609375, "grad_norm": 0.36669832468032837, "learning_rate": 0.00035118966597655224, "loss": 1.8089, "step": 8171 }, { "epoch": 0.3990234375, "grad_norm": 0.34115907549858093, "learning_rate": 0.0003511567067164363, "loss": 1.8189, "step": 8172 }, { "epoch": 0.399072265625, "grad_norm": 0.26563912630081177, "learning_rate": 0.0003511237456106013, "loss": 1.8352, "step": 8173 }, { "epoch": 0.39912109375, "grad_norm": 0.3457096219062805, "learning_rate": 0.00035109078265984636, "loss": 1.8034, "step": 8174 }, { "epoch": 0.399169921875, "grad_norm": 0.29646122455596924, "learning_rate": 0.00035105781786497, "loss": 1.8136, "step": 8175 }, { "epoch": 0.39921875, "grad_norm": 0.28731805086135864, "learning_rate": 0.0003510248512267714, "loss": 1.7872, "step": 8176 }, { "epoch": 0.399267578125, "grad_norm": 0.2876127064228058, "learning_rate": 0.00035099188274604937, "loss": 1.8081, "step": 8177 }, { "epoch": 0.39931640625, "grad_norm": 0.26347172260284424, "learning_rate": 0.0003509589124236031, "loss": 1.8235, "step": 8178 }, { "epoch": 0.399365234375, "grad_norm": 0.3038730323314667, "learning_rate": 0.0003509259402602315, "loss": 1.8196, "step": 8179 }, { "epoch": 0.3994140625, "grad_norm": 0.2899210751056671, "learning_rate": 0.00035089296625673386, "loss": 1.8117, "step": 8180 }, { "epoch": 0.399462890625, "grad_norm": 0.29502934217453003, "learning_rate": 0.00035085999041390906, "loss": 1.8398, "step": 8181 }, { "epoch": 0.39951171875, "grad_norm": 0.25595617294311523, "learning_rate": 0.00035082701273255644, "loss": 1.8272, "step": 8182 }, { "epoch": 0.399560546875, "grad_norm": 0.2666119635105133, "learning_rate": 0.00035079403321347535, "loss": 1.8, "step": 8183 }, { "epoch": 0.399609375, "grad_norm": 0.32144877314567566, "learning_rate": 0.00035076105185746493, "loss": 1.8056, "step": 8184 }, { "epoch": 0.399658203125, "grad_norm": 0.2575407326221466, "learning_rate": 0.00035072806866532453, "loss": 1.8461, "step": 8185 }, { "epoch": 0.39970703125, "grad_norm": 0.26442980766296387, "learning_rate": 0.0003506950836378536, "loss": 1.8228, "step": 8186 }, { "epoch": 0.399755859375, "grad_norm": 0.30153363943099976, "learning_rate": 0.0003506620967758515, "loss": 1.8279, "step": 8187 }, { "epoch": 0.3998046875, "grad_norm": 0.24292662739753723, "learning_rate": 0.0003506291080801177, "loss": 1.816, "step": 8188 }, { "epoch": 0.399853515625, "grad_norm": 0.24039319157600403, "learning_rate": 0.00035059611755145165, "loss": 1.8146, "step": 8189 }, { "epoch": 0.39990234375, "grad_norm": 0.27816441655158997, "learning_rate": 0.00035056312519065295, "loss": 1.796, "step": 8190 }, { "epoch": 0.399951171875, "grad_norm": 0.21807801723480225, "learning_rate": 0.00035053013099852127, "loss": 1.834, "step": 8191 }, { "epoch": 0.4, "grad_norm": 0.25919678807258606, "learning_rate": 0.0003504971349758562, "loss": 1.8148, "step": 8192 }, { "epoch": 0.400048828125, "grad_norm": 0.32242637872695923, "learning_rate": 0.0003504641371234574, "loss": 1.8092, "step": 8193 }, { "epoch": 0.40009765625, "grad_norm": 0.2274196892976761, "learning_rate": 0.0003504311374421246, "loss": 1.8211, "step": 8194 }, { "epoch": 0.400146484375, "grad_norm": 0.2386527806520462, "learning_rate": 0.00035039813593265747, "loss": 1.8256, "step": 8195 }, { "epoch": 0.4001953125, "grad_norm": 0.24702297151088715, "learning_rate": 0.00035036513259585606, "loss": 1.825, "step": 8196 }, { "epoch": 0.400244140625, "grad_norm": 0.23040419816970825, "learning_rate": 0.00035033212743252007, "loss": 1.8232, "step": 8197 }, { "epoch": 0.40029296875, "grad_norm": 0.21977053582668304, "learning_rate": 0.0003502991204434495, "loss": 1.8334, "step": 8198 }, { "epoch": 0.400341796875, "grad_norm": 0.24137242138385773, "learning_rate": 0.0003502661116294442, "loss": 1.8175, "step": 8199 }, { "epoch": 0.400390625, "grad_norm": 0.2953343093395233, "learning_rate": 0.0003502331009913042, "loss": 1.8159, "step": 8200 }, { "epoch": 0.400439453125, "grad_norm": 0.27795320749282837, "learning_rate": 0.0003502000885298296, "loss": 1.8301, "step": 8201 }, { "epoch": 0.40048828125, "grad_norm": 0.3115060031414032, "learning_rate": 0.00035016707424582045, "loss": 1.8447, "step": 8202 }, { "epoch": 0.400537109375, "grad_norm": 0.31063389778137207, "learning_rate": 0.0003501340581400769, "loss": 1.8364, "step": 8203 }, { "epoch": 0.4005859375, "grad_norm": 0.3165314495563507, "learning_rate": 0.000350101040213399, "loss": 1.8257, "step": 8204 }, { "epoch": 0.400634765625, "grad_norm": 0.28747913241386414, "learning_rate": 0.00035006802046658707, "loss": 1.8336, "step": 8205 }, { "epoch": 0.40068359375, "grad_norm": 0.29763829708099365, "learning_rate": 0.0003500349989004414, "loss": 1.809, "step": 8206 }, { "epoch": 0.400732421875, "grad_norm": 0.3565085232257843, "learning_rate": 0.00035000197551576226, "loss": 1.83, "step": 8207 }, { "epoch": 0.40078125, "grad_norm": 0.30621668696403503, "learning_rate": 0.00034996895031334997, "loss": 1.8141, "step": 8208 }, { "epoch": 0.400830078125, "grad_norm": 0.316310316324234, "learning_rate": 0.00034993592329400484, "loss": 1.8255, "step": 8209 }, { "epoch": 0.40087890625, "grad_norm": 0.3463835120201111, "learning_rate": 0.0003499028944585275, "loss": 1.8481, "step": 8210 }, { "epoch": 0.400927734375, "grad_norm": 0.2567107081413269, "learning_rate": 0.00034986986380771833, "loss": 1.8011, "step": 8211 }, { "epoch": 0.4009765625, "grad_norm": 0.3222580552101135, "learning_rate": 0.00034983683134237793, "loss": 1.808, "step": 8212 }, { "epoch": 0.401025390625, "grad_norm": 0.3041970729827881, "learning_rate": 0.0003498037970633067, "loss": 1.8326, "step": 8213 }, { "epoch": 0.40107421875, "grad_norm": 0.2506358027458191, "learning_rate": 0.0003497707609713054, "loss": 1.8095, "step": 8214 }, { "epoch": 0.401123046875, "grad_norm": 0.3239516615867615, "learning_rate": 0.00034973772306717463, "loss": 1.816, "step": 8215 }, { "epoch": 0.401171875, "grad_norm": 0.30584022402763367, "learning_rate": 0.00034970468335171506, "loss": 1.8229, "step": 8216 }, { "epoch": 0.401220703125, "grad_norm": 0.2803587317466736, "learning_rate": 0.00034967164182572746, "loss": 1.8443, "step": 8217 }, { "epoch": 0.40126953125, "grad_norm": 0.2904149889945984, "learning_rate": 0.0003496385984900127, "loss": 1.8394, "step": 8218 }, { "epoch": 0.401318359375, "grad_norm": 0.270506888628006, "learning_rate": 0.0003496055533453715, "loss": 1.818, "step": 8219 }, { "epoch": 0.4013671875, "grad_norm": 0.28069067001342773, "learning_rate": 0.0003495725063926048, "loss": 1.839, "step": 8220 }, { "epoch": 0.401416015625, "grad_norm": 0.287564754486084, "learning_rate": 0.0003495394576325134, "loss": 1.7949, "step": 8221 }, { "epoch": 0.40146484375, "grad_norm": 0.22896189987659454, "learning_rate": 0.0003495064070658985, "loss": 1.8192, "step": 8222 }, { "epoch": 0.401513671875, "grad_norm": 0.2966395318508148, "learning_rate": 0.0003494733546935609, "loss": 1.7886, "step": 8223 }, { "epoch": 0.4015625, "grad_norm": 0.25792044401168823, "learning_rate": 0.0003494403005163017, "loss": 1.8146, "step": 8224 }, { "epoch": 0.401611328125, "grad_norm": 0.247869074344635, "learning_rate": 0.0003494072445349221, "loss": 1.8264, "step": 8225 }, { "epoch": 0.40166015625, "grad_norm": 0.28744494915008545, "learning_rate": 0.00034937418675022304, "loss": 1.8251, "step": 8226 }, { "epoch": 0.401708984375, "grad_norm": 0.2576946020126343, "learning_rate": 0.00034934112716300585, "loss": 1.7971, "step": 8227 }, { "epoch": 0.4017578125, "grad_norm": 0.37481629848480225, "learning_rate": 0.00034930806577407173, "loss": 1.8411, "step": 8228 }, { "epoch": 0.401806640625, "grad_norm": 0.31660324335098267, "learning_rate": 0.0003492750025842219, "loss": 1.8067, "step": 8229 }, { "epoch": 0.40185546875, "grad_norm": 0.3086322546005249, "learning_rate": 0.0003492419375942578, "loss": 1.8018, "step": 8230 }, { "epoch": 0.401904296875, "grad_norm": 0.29579368233680725, "learning_rate": 0.00034920887080498064, "loss": 1.8179, "step": 8231 }, { "epoch": 0.401953125, "grad_norm": 0.36354610323905945, "learning_rate": 0.00034917580221719194, "loss": 1.8535, "step": 8232 }, { "epoch": 0.402001953125, "grad_norm": 0.34261104464530945, "learning_rate": 0.000349142731831693, "loss": 1.8324, "step": 8233 }, { "epoch": 0.40205078125, "grad_norm": 0.25637874007225037, "learning_rate": 0.0003491096596492854, "loss": 1.8218, "step": 8234 }, { "epoch": 0.402099609375, "grad_norm": 0.3923896849155426, "learning_rate": 0.00034907658567077065, "loss": 1.8233, "step": 8235 }, { "epoch": 0.4021484375, "grad_norm": 0.30192938446998596, "learning_rate": 0.00034904350989695034, "loss": 1.8225, "step": 8236 }, { "epoch": 0.402197265625, "grad_norm": 0.30459269881248474, "learning_rate": 0.0003490104323286261, "loss": 1.8174, "step": 8237 }, { "epoch": 0.40224609375, "grad_norm": 0.273771196603775, "learning_rate": 0.0003489773529665996, "loss": 1.8175, "step": 8238 }, { "epoch": 0.402294921875, "grad_norm": 0.2982422709465027, "learning_rate": 0.00034894427181167247, "loss": 1.8289, "step": 8239 }, { "epoch": 0.40234375, "grad_norm": 0.2651934325695038, "learning_rate": 0.0003489111888646465, "loss": 1.8308, "step": 8240 }, { "epoch": 0.402392578125, "grad_norm": 0.23350033164024353, "learning_rate": 0.00034887810412632356, "loss": 1.8308, "step": 8241 }, { "epoch": 0.40244140625, "grad_norm": 0.24872498214244843, "learning_rate": 0.0003488450175975053, "loss": 1.8353, "step": 8242 }, { "epoch": 0.402490234375, "grad_norm": 0.2322542518377304, "learning_rate": 0.0003488119292789938, "loss": 1.8296, "step": 8243 }, { "epoch": 0.4025390625, "grad_norm": 0.2579250633716583, "learning_rate": 0.0003487788391715909, "loss": 1.7852, "step": 8244 }, { "epoch": 0.402587890625, "grad_norm": 0.22047407925128937, "learning_rate": 0.00034874574727609855, "loss": 1.8462, "step": 8245 }, { "epoch": 0.40263671875, "grad_norm": 0.25517311692237854, "learning_rate": 0.00034871265359331867, "loss": 1.783, "step": 8246 }, { "epoch": 0.402685546875, "grad_norm": 0.26986968517303467, "learning_rate": 0.00034867955812405355, "loss": 1.8488, "step": 8247 }, { "epoch": 0.402734375, "grad_norm": 0.29420143365859985, "learning_rate": 0.00034864646086910507, "loss": 1.8223, "step": 8248 }, { "epoch": 0.402783203125, "grad_norm": 0.2896799147129059, "learning_rate": 0.0003486133618292755, "loss": 1.8146, "step": 8249 }, { "epoch": 0.40283203125, "grad_norm": 0.2947719991207123, "learning_rate": 0.0003485802610053668, "loss": 1.8164, "step": 8250 }, { "epoch": 0.402880859375, "grad_norm": 0.2609063684940338, "learning_rate": 0.00034854715839818156, "loss": 1.7944, "step": 8251 }, { "epoch": 0.4029296875, "grad_norm": 0.2630797326564789, "learning_rate": 0.00034851405400852174, "loss": 1.8289, "step": 8252 }, { "epoch": 0.402978515625, "grad_norm": 0.27599748969078064, "learning_rate": 0.00034848094783718984, "loss": 1.8256, "step": 8253 }, { "epoch": 0.40302734375, "grad_norm": 0.25409072637557983, "learning_rate": 0.00034844783988498805, "loss": 1.8125, "step": 8254 }, { "epoch": 0.403076171875, "grad_norm": 0.24908949434757233, "learning_rate": 0.0003484147301527189, "loss": 1.8052, "step": 8255 }, { "epoch": 0.403125, "grad_norm": 0.30270740389823914, "learning_rate": 0.00034838161864118476, "loss": 1.8109, "step": 8256 }, { "epoch": 0.403173828125, "grad_norm": 0.28498750925064087, "learning_rate": 0.0003483485053511882, "loss": 1.8129, "step": 8257 }, { "epoch": 0.40322265625, "grad_norm": 0.2615811228752136, "learning_rate": 0.00034831539028353165, "loss": 1.7989, "step": 8258 }, { "epoch": 0.403271484375, "grad_norm": 0.3389129936695099, "learning_rate": 0.0003482822734390177, "loss": 1.8312, "step": 8259 }, { "epoch": 0.4033203125, "grad_norm": 0.33913934230804443, "learning_rate": 0.000348249154818449, "loss": 1.8468, "step": 8260 }, { "epoch": 0.403369140625, "grad_norm": 0.25082361698150635, "learning_rate": 0.0003482160344226281, "loss": 1.8424, "step": 8261 }, { "epoch": 0.40341796875, "grad_norm": 0.31133508682250977, "learning_rate": 0.00034818291225235785, "loss": 1.8259, "step": 8262 }, { "epoch": 0.403466796875, "grad_norm": 0.3126937448978424, "learning_rate": 0.0003481497883084409, "loss": 1.8273, "step": 8263 }, { "epoch": 0.403515625, "grad_norm": 0.23906996846199036, "learning_rate": 0.0003481166625916802, "loss": 1.8022, "step": 8264 }, { "epoch": 0.403564453125, "grad_norm": 0.275346577167511, "learning_rate": 0.00034808353510287825, "loss": 1.8087, "step": 8265 }, { "epoch": 0.40361328125, "grad_norm": 0.22670163214206696, "learning_rate": 0.00034805040584283826, "loss": 1.8372, "step": 8266 }, { "epoch": 0.403662109375, "grad_norm": 0.2167787253856659, "learning_rate": 0.0003480172748123629, "loss": 1.8105, "step": 8267 }, { "epoch": 0.4037109375, "grad_norm": 0.22067895531654358, "learning_rate": 0.0003479841420122553, "loss": 1.8224, "step": 8268 }, { "epoch": 0.403759765625, "grad_norm": 0.21722526848316193, "learning_rate": 0.00034795100744331825, "loss": 1.8192, "step": 8269 }, { "epoch": 0.40380859375, "grad_norm": 0.2612757682800293, "learning_rate": 0.000347917871106355, "loss": 1.8339, "step": 8270 }, { "epoch": 0.403857421875, "grad_norm": 0.25586366653442383, "learning_rate": 0.0003478847330021686, "loss": 1.8338, "step": 8271 }, { "epoch": 0.40390625, "grad_norm": 0.29680636525154114, "learning_rate": 0.0003478515931315622, "loss": 1.805, "step": 8272 }, { "epoch": 0.403955078125, "grad_norm": 0.2811892330646515, "learning_rate": 0.0003478184514953388, "loss": 1.7778, "step": 8273 }, { "epoch": 0.40400390625, "grad_norm": 0.3712189793586731, "learning_rate": 0.00034778530809430173, "loss": 1.8126, "step": 8274 }, { "epoch": 0.404052734375, "grad_norm": 0.3486725986003876, "learning_rate": 0.0003477521629292543, "loss": 1.786, "step": 8275 }, { "epoch": 0.4041015625, "grad_norm": 0.2556104063987732, "learning_rate": 0.0003477190160009997, "loss": 1.8385, "step": 8276 }, { "epoch": 0.404150390625, "grad_norm": 0.25891241431236267, "learning_rate": 0.00034768586731034136, "loss": 1.8292, "step": 8277 }, { "epoch": 0.40419921875, "grad_norm": 0.3545498251914978, "learning_rate": 0.0003476527168580826, "loss": 1.8238, "step": 8278 }, { "epoch": 0.404248046875, "grad_norm": 0.28199124336242676, "learning_rate": 0.0003476195646450269, "loss": 1.7935, "step": 8279 }, { "epoch": 0.404296875, "grad_norm": 0.30420300364494324, "learning_rate": 0.00034758641067197764, "loss": 1.8249, "step": 8280 }, { "epoch": 0.404345703125, "grad_norm": 0.3188720643520355, "learning_rate": 0.00034755325493973855, "loss": 1.8306, "step": 8281 }, { "epoch": 0.40439453125, "grad_norm": 0.27540695667266846, "learning_rate": 0.00034752009744911294, "loss": 1.8026, "step": 8282 }, { "epoch": 0.404443359375, "grad_norm": 0.334586501121521, "learning_rate": 0.00034748693820090454, "loss": 1.8119, "step": 8283 }, { "epoch": 0.4044921875, "grad_norm": 0.2100050449371338, "learning_rate": 0.0003474537771959169, "loss": 1.8215, "step": 8284 }, { "epoch": 0.404541015625, "grad_norm": 0.3083016574382782, "learning_rate": 0.0003474206144349538, "loss": 1.8209, "step": 8285 }, { "epoch": 0.40458984375, "grad_norm": 0.23151953518390656, "learning_rate": 0.00034738744991881894, "loss": 1.7862, "step": 8286 }, { "epoch": 0.404638671875, "grad_norm": 0.25151118636131287, "learning_rate": 0.000347354283648316, "loss": 1.828, "step": 8287 }, { "epoch": 0.4046875, "grad_norm": 0.28202709555625916, "learning_rate": 0.00034732111562424894, "loss": 1.8137, "step": 8288 }, { "epoch": 0.404736328125, "grad_norm": 0.23534336686134338, "learning_rate": 0.0003472879458474216, "loss": 1.7811, "step": 8289 }, { "epoch": 0.40478515625, "grad_norm": 0.28537434339523315, "learning_rate": 0.0003472547743186377, "loss": 1.8088, "step": 8290 }, { "epoch": 0.404833984375, "grad_norm": 0.26034918427467346, "learning_rate": 0.0003472216010387014, "loss": 1.8209, "step": 8291 }, { "epoch": 0.4048828125, "grad_norm": 0.3042124807834625, "learning_rate": 0.0003471884260084165, "loss": 1.8088, "step": 8292 }, { "epoch": 0.404931640625, "grad_norm": 0.29876571893692017, "learning_rate": 0.0003471552492285871, "loss": 1.8278, "step": 8293 }, { "epoch": 0.40498046875, "grad_norm": 0.31333059072494507, "learning_rate": 0.00034712207070001735, "loss": 1.8453, "step": 8294 }, { "epoch": 0.405029296875, "grad_norm": 0.2908715009689331, "learning_rate": 0.0003470888904235111, "loss": 1.8201, "step": 8295 }, { "epoch": 0.405078125, "grad_norm": 0.2850976288318634, "learning_rate": 0.00034705570839987276, "loss": 1.8036, "step": 8296 }, { "epoch": 0.405126953125, "grad_norm": 0.2932301461696625, "learning_rate": 0.0003470225246299065, "loss": 1.8302, "step": 8297 }, { "epoch": 0.40517578125, "grad_norm": 0.3253802955150604, "learning_rate": 0.0003469893391144165, "loss": 1.8135, "step": 8298 }, { "epoch": 0.405224609375, "grad_norm": 0.31289780139923096, "learning_rate": 0.000346956151854207, "loss": 1.7966, "step": 8299 }, { "epoch": 0.4052734375, "grad_norm": 0.268371045589447, "learning_rate": 0.0003469229628500823, "loss": 1.8353, "step": 8300 }, { "epoch": 0.405322265625, "grad_norm": 0.2373555600643158, "learning_rate": 0.00034688977210284685, "loss": 1.8271, "step": 8301 }, { "epoch": 0.40537109375, "grad_norm": 0.2925189435482025, "learning_rate": 0.00034685657961330504, "loss": 1.8161, "step": 8302 }, { "epoch": 0.405419921875, "grad_norm": 0.2442227452993393, "learning_rate": 0.00034682338538226127, "loss": 1.8499, "step": 8303 }, { "epoch": 0.40546875, "grad_norm": 0.2471315711736679, "learning_rate": 0.00034679018941052, "loss": 1.8294, "step": 8304 }, { "epoch": 0.405517578125, "grad_norm": 0.2861884832382202, "learning_rate": 0.00034675699169888586, "loss": 1.8001, "step": 8305 }, { "epoch": 0.40556640625, "grad_norm": 0.33595946431159973, "learning_rate": 0.00034672379224816346, "loss": 1.8199, "step": 8306 }, { "epoch": 0.405615234375, "grad_norm": 0.24232222139835358, "learning_rate": 0.0003466905910591572, "loss": 1.8217, "step": 8307 }, { "epoch": 0.4056640625, "grad_norm": 0.3237062990665436, "learning_rate": 0.00034665738813267194, "loss": 1.8171, "step": 8308 }, { "epoch": 0.405712890625, "grad_norm": 0.29645270109176636, "learning_rate": 0.00034662418346951234, "loss": 1.7912, "step": 8309 }, { "epoch": 0.40576171875, "grad_norm": 0.29188966751098633, "learning_rate": 0.00034659097707048303, "loss": 1.8254, "step": 8310 }, { "epoch": 0.405810546875, "grad_norm": 0.29664626717567444, "learning_rate": 0.0003465577689363889, "loss": 1.8322, "step": 8311 }, { "epoch": 0.405859375, "grad_norm": 0.19170555472373962, "learning_rate": 0.00034652455906803483, "loss": 1.822, "step": 8312 }, { "epoch": 0.405908203125, "grad_norm": 0.3081602156162262, "learning_rate": 0.00034649134746622554, "loss": 1.8159, "step": 8313 }, { "epoch": 0.40595703125, "grad_norm": 0.2705433666706085, "learning_rate": 0.00034645813413176603, "loss": 1.822, "step": 8314 }, { "epoch": 0.406005859375, "grad_norm": 0.2518172264099121, "learning_rate": 0.0003464249190654612, "loss": 1.8059, "step": 8315 }, { "epoch": 0.4060546875, "grad_norm": 0.29090389609336853, "learning_rate": 0.00034639170226811604, "loss": 1.8561, "step": 8316 }, { "epoch": 0.406103515625, "grad_norm": 0.223241925239563, "learning_rate": 0.0003463584837405357, "loss": 1.8314, "step": 8317 }, { "epoch": 0.40615234375, "grad_norm": 0.29574212431907654, "learning_rate": 0.0003463252634835252, "loss": 1.8381, "step": 8318 }, { "epoch": 0.406201171875, "grad_norm": 0.27544739842414856, "learning_rate": 0.0003462920414978895, "loss": 1.8275, "step": 8319 }, { "epoch": 0.40625, "grad_norm": 0.21604160964488983, "learning_rate": 0.000346258817784434, "loss": 1.7813, "step": 8320 }, { "epoch": 0.406298828125, "grad_norm": 0.27617090940475464, "learning_rate": 0.0003462255923439638, "loss": 1.8322, "step": 8321 }, { "epoch": 0.40634765625, "grad_norm": 0.29037296772003174, "learning_rate": 0.0003461923651772841, "loss": 1.8404, "step": 8322 }, { "epoch": 0.406396484375, "grad_norm": 0.2681865394115448, "learning_rate": 0.0003461591362852003, "loss": 1.8062, "step": 8323 }, { "epoch": 0.4064453125, "grad_norm": 0.23029281198978424, "learning_rate": 0.0003461259056685176, "loss": 1.8521, "step": 8324 }, { "epoch": 0.406494140625, "grad_norm": 0.2795652449131012, "learning_rate": 0.0003460926733280415, "loss": 1.8403, "step": 8325 }, { "epoch": 0.40654296875, "grad_norm": 0.31138578057289124, "learning_rate": 0.0003460594392645773, "loss": 1.8168, "step": 8326 }, { "epoch": 0.406591796875, "grad_norm": 0.32371944189071655, "learning_rate": 0.00034602620347893047, "loss": 1.8261, "step": 8327 }, { "epoch": 0.406640625, "grad_norm": 0.26528871059417725, "learning_rate": 0.0003459929659719066, "loss": 1.789, "step": 8328 }, { "epoch": 0.406689453125, "grad_norm": 0.31129002571105957, "learning_rate": 0.0003459597267443111, "loss": 1.8029, "step": 8329 }, { "epoch": 0.40673828125, "grad_norm": 0.3346090018749237, "learning_rate": 0.0003459264857969497, "loss": 1.8118, "step": 8330 }, { "epoch": 0.406787109375, "grad_norm": 0.25932055711746216, "learning_rate": 0.00034589324313062794, "loss": 1.8194, "step": 8331 }, { "epoch": 0.4068359375, "grad_norm": 0.257973313331604, "learning_rate": 0.0003458599987461514, "loss": 1.8115, "step": 8332 }, { "epoch": 0.406884765625, "grad_norm": 0.2598167657852173, "learning_rate": 0.0003458267526443259, "loss": 1.8193, "step": 8333 }, { "epoch": 0.40693359375, "grad_norm": 0.2571287155151367, "learning_rate": 0.00034579350482595713, "loss": 1.8031, "step": 8334 }, { "epoch": 0.406982421875, "grad_norm": 0.2277364730834961, "learning_rate": 0.0003457602552918509, "loss": 1.8486, "step": 8335 }, { "epoch": 0.40703125, "grad_norm": 0.2603326141834259, "learning_rate": 0.000345727004042813, "loss": 1.8084, "step": 8336 }, { "epoch": 0.407080078125, "grad_norm": 0.24756279587745667, "learning_rate": 0.0003456937510796494, "loss": 1.792, "step": 8337 }, { "epoch": 0.40712890625, "grad_norm": 0.25560957193374634, "learning_rate": 0.0003456604964031659, "loss": 1.7955, "step": 8338 }, { "epoch": 0.407177734375, "grad_norm": 0.2512604892253876, "learning_rate": 0.0003456272400141685, "loss": 1.7958, "step": 8339 }, { "epoch": 0.4072265625, "grad_norm": 0.2672517001628876, "learning_rate": 0.0003455939819134633, "loss": 1.797, "step": 8340 }, { "epoch": 0.407275390625, "grad_norm": 0.2738495171070099, "learning_rate": 0.00034556072210185614, "loss": 1.8256, "step": 8341 }, { "epoch": 0.40732421875, "grad_norm": 0.25113311409950256, "learning_rate": 0.00034552746058015316, "loss": 1.8124, "step": 8342 }, { "epoch": 0.407373046875, "grad_norm": 0.3042376637458801, "learning_rate": 0.0003454941973491606, "loss": 1.8226, "step": 8343 }, { "epoch": 0.407421875, "grad_norm": 0.30659952759742737, "learning_rate": 0.00034546093240968447, "loss": 1.8063, "step": 8344 }, { "epoch": 0.407470703125, "grad_norm": 0.26711463928222656, "learning_rate": 0.000345427665762531, "loss": 1.8188, "step": 8345 }, { "epoch": 0.40751953125, "grad_norm": 0.26950061321258545, "learning_rate": 0.00034539439740850655, "loss": 1.8154, "step": 8346 }, { "epoch": 0.407568359375, "grad_norm": 0.24790361523628235, "learning_rate": 0.0003453611273484173, "loss": 1.8189, "step": 8347 }, { "epoch": 0.4076171875, "grad_norm": 0.27154600620269775, "learning_rate": 0.0003453278555830696, "loss": 1.8247, "step": 8348 }, { "epoch": 0.407666015625, "grad_norm": 0.21262633800506592, "learning_rate": 0.00034529458211326986, "loss": 1.8344, "step": 8349 }, { "epoch": 0.40771484375, "grad_norm": 0.27434593439102173, "learning_rate": 0.00034526130693982444, "loss": 1.8248, "step": 8350 }, { "epoch": 0.407763671875, "grad_norm": 0.28263401985168457, "learning_rate": 0.00034522803006353976, "loss": 1.8191, "step": 8351 }, { "epoch": 0.4078125, "grad_norm": 0.23082540929317474, "learning_rate": 0.00034519475148522236, "loss": 1.8046, "step": 8352 }, { "epoch": 0.407861328125, "grad_norm": 0.3108607232570648, "learning_rate": 0.0003451614712056788, "loss": 1.8329, "step": 8353 }, { "epoch": 0.40791015625, "grad_norm": 0.2837824523448944, "learning_rate": 0.0003451281892257155, "loss": 1.825, "step": 8354 }, { "epoch": 0.407958984375, "grad_norm": 0.29135024547576904, "learning_rate": 0.00034509490554613927, "loss": 1.8058, "step": 8355 }, { "epoch": 0.4080078125, "grad_norm": 0.2869505286216736, "learning_rate": 0.0003450616201677568, "loss": 1.8109, "step": 8356 }, { "epoch": 0.408056640625, "grad_norm": 0.27341213822364807, "learning_rate": 0.00034502833309137457, "loss": 1.8425, "step": 8357 }, { "epoch": 0.40810546875, "grad_norm": 0.31987378001213074, "learning_rate": 0.0003449950443177994, "loss": 1.8132, "step": 8358 }, { "epoch": 0.408154296875, "grad_norm": 0.2727740406990051, "learning_rate": 0.0003449617538478381, "loss": 1.8408, "step": 8359 }, { "epoch": 0.408203125, "grad_norm": 0.2787408232688904, "learning_rate": 0.00034492846168229747, "loss": 1.813, "step": 8360 }, { "epoch": 0.408251953125, "grad_norm": 0.28532978892326355, "learning_rate": 0.0003448951678219845, "loss": 1.8253, "step": 8361 }, { "epoch": 0.40830078125, "grad_norm": 0.25217390060424805, "learning_rate": 0.0003448618722677059, "loss": 1.8182, "step": 8362 }, { "epoch": 0.408349609375, "grad_norm": 0.2451595515012741, "learning_rate": 0.00034482857502026864, "loss": 1.8163, "step": 8363 }, { "epoch": 0.4083984375, "grad_norm": 0.2709900736808777, "learning_rate": 0.0003447952760804799, "loss": 1.8269, "step": 8364 }, { "epoch": 0.408447265625, "grad_norm": 0.2793959975242615, "learning_rate": 0.00034476197544914655, "loss": 1.8128, "step": 8365 }, { "epoch": 0.40849609375, "grad_norm": 0.26111292839050293, "learning_rate": 0.00034472867312707565, "loss": 1.8394, "step": 8366 }, { "epoch": 0.408544921875, "grad_norm": 0.25377437472343445, "learning_rate": 0.00034469536911507434, "loss": 1.8124, "step": 8367 }, { "epoch": 0.40859375, "grad_norm": 0.33461180329322815, "learning_rate": 0.00034466206341394974, "loss": 1.8098, "step": 8368 }, { "epoch": 0.408642578125, "grad_norm": 0.33506473898887634, "learning_rate": 0.00034462875602450904, "loss": 1.8218, "step": 8369 }, { "epoch": 0.40869140625, "grad_norm": 0.3121296167373657, "learning_rate": 0.0003445954469475596, "loss": 1.7875, "step": 8370 }, { "epoch": 0.408740234375, "grad_norm": 0.25870880484580994, "learning_rate": 0.0003445621361839085, "loss": 1.7925, "step": 8371 }, { "epoch": 0.4087890625, "grad_norm": 0.292266309261322, "learning_rate": 0.0003445288237343632, "loss": 1.8106, "step": 8372 }, { "epoch": 0.408837890625, "grad_norm": 0.36115750670433044, "learning_rate": 0.00034449550959973096, "loss": 1.8272, "step": 8373 }, { "epoch": 0.40888671875, "grad_norm": 0.28367000818252563, "learning_rate": 0.00034446219378081927, "loss": 1.8071, "step": 8374 }, { "epoch": 0.408935546875, "grad_norm": 0.30537083745002747, "learning_rate": 0.0003444288762784354, "loss": 1.7951, "step": 8375 }, { "epoch": 0.408984375, "grad_norm": 0.33041802048683167, "learning_rate": 0.00034439555709338704, "loss": 1.8212, "step": 8376 }, { "epoch": 0.409033203125, "grad_norm": 0.2912524342536926, "learning_rate": 0.0003443622362264816, "loss": 1.8549, "step": 8377 }, { "epoch": 0.40908203125, "grad_norm": 0.24093399941921234, "learning_rate": 0.0003443289136785266, "loss": 1.8146, "step": 8378 }, { "epoch": 0.409130859375, "grad_norm": 0.27381622791290283, "learning_rate": 0.0003442955894503297, "loss": 1.8168, "step": 8379 }, { "epoch": 0.4091796875, "grad_norm": 0.26814520359039307, "learning_rate": 0.0003442622635426985, "loss": 1.831, "step": 8380 }, { "epoch": 0.409228515625, "grad_norm": 0.2685510516166687, "learning_rate": 0.0003442289359564407, "loss": 1.8511, "step": 8381 }, { "epoch": 0.40927734375, "grad_norm": 0.26312515139579773, "learning_rate": 0.000344195606692364, "loss": 1.8272, "step": 8382 }, { "epoch": 0.409326171875, "grad_norm": 0.20071539282798767, "learning_rate": 0.00034416227575127616, "loss": 1.8186, "step": 8383 }, { "epoch": 0.409375, "grad_norm": 0.29275399446487427, "learning_rate": 0.00034412894313398505, "loss": 1.8475, "step": 8384 }, { "epoch": 0.409423828125, "grad_norm": 0.29890620708465576, "learning_rate": 0.0003440956088412985, "loss": 1.8268, "step": 8385 }, { "epoch": 0.40947265625, "grad_norm": 0.2608203589916229, "learning_rate": 0.00034406227287402433, "loss": 1.7901, "step": 8386 }, { "epoch": 0.409521484375, "grad_norm": 0.21040843427181244, "learning_rate": 0.00034402893523297046, "loss": 1.8089, "step": 8387 }, { "epoch": 0.4095703125, "grad_norm": 0.2517189681529999, "learning_rate": 0.00034399559591894493, "loss": 1.8063, "step": 8388 }, { "epoch": 0.409619140625, "grad_norm": 0.24133704602718353, "learning_rate": 0.0003439622549327557, "loss": 1.8028, "step": 8389 }, { "epoch": 0.40966796875, "grad_norm": 0.23244404792785645, "learning_rate": 0.00034392891227521074, "loss": 1.7943, "step": 8390 }, { "epoch": 0.409716796875, "grad_norm": 0.22532543540000916, "learning_rate": 0.0003438955679471183, "loss": 1.8217, "step": 8391 }, { "epoch": 0.409765625, "grad_norm": 0.2090921401977539, "learning_rate": 0.0003438622219492863, "loss": 1.812, "step": 8392 }, { "epoch": 0.409814453125, "grad_norm": 0.24136850237846375, "learning_rate": 0.0003438288742825232, "loss": 1.8137, "step": 8393 }, { "epoch": 0.40986328125, "grad_norm": 0.2339431345462799, "learning_rate": 0.00034379552494763694, "loss": 1.8108, "step": 8394 }, { "epoch": 0.409912109375, "grad_norm": 0.2418747991323471, "learning_rate": 0.0003437621739454359, "loss": 1.8198, "step": 8395 }, { "epoch": 0.4099609375, "grad_norm": 0.24679508805274963, "learning_rate": 0.00034372882127672833, "loss": 1.8306, "step": 8396 }, { "epoch": 0.410009765625, "grad_norm": 0.25491318106651306, "learning_rate": 0.0003436954669423225, "loss": 1.8143, "step": 8397 }, { "epoch": 0.41005859375, "grad_norm": 0.2222188115119934, "learning_rate": 0.0003436621109430269, "loss": 1.8188, "step": 8398 }, { "epoch": 0.410107421875, "grad_norm": 0.2505212426185608, "learning_rate": 0.0003436287532796499, "loss": 1.8231, "step": 8399 }, { "epoch": 0.41015625, "grad_norm": 0.2968711853027344, "learning_rate": 0.00034359539395299987, "loss": 1.8228, "step": 8400 }, { "epoch": 0.410205078125, "grad_norm": 0.29730361700057983, "learning_rate": 0.0003435620329638853, "loss": 1.8325, "step": 8401 }, { "epoch": 0.41025390625, "grad_norm": 0.2633632719516754, "learning_rate": 0.0003435286703131149, "loss": 1.8195, "step": 8402 }, { "epoch": 0.410302734375, "grad_norm": 0.32203084230422974, "learning_rate": 0.0003434953060014971, "loss": 1.7937, "step": 8403 }, { "epoch": 0.4103515625, "grad_norm": 0.2990414798259735, "learning_rate": 0.00034346194002984057, "loss": 1.7983, "step": 8404 }, { "epoch": 0.410400390625, "grad_norm": 0.3105506896972656, "learning_rate": 0.0003434285723989538, "loss": 1.8015, "step": 8405 }, { "epoch": 0.41044921875, "grad_norm": 0.28465592861175537, "learning_rate": 0.0003433952031096456, "loss": 1.85, "step": 8406 }, { "epoch": 0.410498046875, "grad_norm": 0.23002038896083832, "learning_rate": 0.00034336183216272483, "loss": 1.8096, "step": 8407 }, { "epoch": 0.410546875, "grad_norm": 0.2572599947452545, "learning_rate": 0.00034332845955900007, "loss": 1.835, "step": 8408 }, { "epoch": 0.410595703125, "grad_norm": 0.23929570615291595, "learning_rate": 0.00034329508529928014, "loss": 1.7953, "step": 8409 }, { "epoch": 0.41064453125, "grad_norm": 0.22163398563861847, "learning_rate": 0.0003432617093843741, "loss": 1.8069, "step": 8410 }, { "epoch": 0.410693359375, "grad_norm": 0.22084808349609375, "learning_rate": 0.00034322833181509054, "loss": 1.813, "step": 8411 }, { "epoch": 0.4107421875, "grad_norm": 0.21599939465522766, "learning_rate": 0.0003431949525922386, "loss": 1.8037, "step": 8412 }, { "epoch": 0.410791015625, "grad_norm": 0.2567177414894104, "learning_rate": 0.00034316157171662725, "loss": 1.8193, "step": 8413 }, { "epoch": 0.41083984375, "grad_norm": 0.36976176500320435, "learning_rate": 0.0003431281891890654, "loss": 1.8296, "step": 8414 }, { "epoch": 0.410888671875, "grad_norm": 0.29752588272094727, "learning_rate": 0.0003430948050103621, "loss": 1.822, "step": 8415 }, { "epoch": 0.4109375, "grad_norm": 0.2035011351108551, "learning_rate": 0.0003430614191813266, "loss": 1.8197, "step": 8416 }, { "epoch": 0.410986328125, "grad_norm": 0.26761454343795776, "learning_rate": 0.00034302803170276785, "loss": 1.8101, "step": 8417 }, { "epoch": 0.41103515625, "grad_norm": 0.28262490034103394, "learning_rate": 0.0003429946425754951, "loss": 1.8007, "step": 8418 }, { "epoch": 0.411083984375, "grad_norm": 0.27919310331344604, "learning_rate": 0.00034296125180031755, "loss": 1.8239, "step": 8419 }, { "epoch": 0.4111328125, "grad_norm": 0.256512850522995, "learning_rate": 0.00034292785937804456, "loss": 1.8342, "step": 8420 }, { "epoch": 0.411181640625, "grad_norm": 0.2670685648918152, "learning_rate": 0.0003428944653094852, "loss": 1.7767, "step": 8421 }, { "epoch": 0.41123046875, "grad_norm": 0.2316499799489975, "learning_rate": 0.000342861069595449, "loss": 1.8087, "step": 8422 }, { "epoch": 0.411279296875, "grad_norm": 0.2585970163345337, "learning_rate": 0.00034282767223674525, "loss": 1.806, "step": 8423 }, { "epoch": 0.411328125, "grad_norm": 0.28675445914268494, "learning_rate": 0.00034279427323418343, "loss": 1.8101, "step": 8424 }, { "epoch": 0.411376953125, "grad_norm": 0.23164477944374084, "learning_rate": 0.00034276087258857285, "loss": 1.8182, "step": 8425 }, { "epoch": 0.41142578125, "grad_norm": 0.30414730310440063, "learning_rate": 0.0003427274703007231, "loss": 1.8084, "step": 8426 }, { "epoch": 0.411474609375, "grad_norm": 0.29467543959617615, "learning_rate": 0.00034269406637144366, "loss": 1.8125, "step": 8427 }, { "epoch": 0.4115234375, "grad_norm": 0.24923036992549896, "learning_rate": 0.0003426606608015442, "loss": 1.82, "step": 8428 }, { "epoch": 0.411572265625, "grad_norm": 0.32275357842445374, "learning_rate": 0.00034262725359183424, "loss": 1.8349, "step": 8429 }, { "epoch": 0.41162109375, "grad_norm": 0.2701428532600403, "learning_rate": 0.00034259384474312346, "loss": 1.8356, "step": 8430 }, { "epoch": 0.411669921875, "grad_norm": 0.2770915627479553, "learning_rate": 0.00034256043425622153, "loss": 1.8246, "step": 8431 }, { "epoch": 0.41171875, "grad_norm": 0.3086055815219879, "learning_rate": 0.0003425270221319383, "loss": 1.8179, "step": 8432 }, { "epoch": 0.411767578125, "grad_norm": 0.21907299757003784, "learning_rate": 0.00034249360837108336, "loss": 1.8061, "step": 8433 }, { "epoch": 0.41181640625, "grad_norm": 0.29921114444732666, "learning_rate": 0.0003424601929744666, "loss": 1.8242, "step": 8434 }, { "epoch": 0.411865234375, "grad_norm": 0.29137539863586426, "learning_rate": 0.00034242677594289783, "loss": 1.7991, "step": 8435 }, { "epoch": 0.4119140625, "grad_norm": 0.22705449163913727, "learning_rate": 0.00034239335727718703, "loss": 1.8259, "step": 8436 }, { "epoch": 0.411962890625, "grad_norm": 0.3130621910095215, "learning_rate": 0.000342359936978144, "loss": 1.8302, "step": 8437 }, { "epoch": 0.41201171875, "grad_norm": 0.311628520488739, "learning_rate": 0.0003423265150465788, "loss": 1.8432, "step": 8438 }, { "epoch": 0.412060546875, "grad_norm": 0.2689840793609619, "learning_rate": 0.0003422930914833015, "loss": 1.8003, "step": 8439 }, { "epoch": 0.412109375, "grad_norm": 0.23505830764770508, "learning_rate": 0.000342259666289122, "loss": 1.7963, "step": 8440 }, { "epoch": 0.412158203125, "grad_norm": 0.3420777916908264, "learning_rate": 0.0003422262394648505, "loss": 1.8269, "step": 8441 }, { "epoch": 0.41220703125, "grad_norm": 0.3381449282169342, "learning_rate": 0.000342192811011297, "loss": 1.8263, "step": 8442 }, { "epoch": 0.412255859375, "grad_norm": 0.25229114294052124, "learning_rate": 0.00034215938092927177, "loss": 1.8225, "step": 8443 }, { "epoch": 0.4123046875, "grad_norm": 0.3253041207790375, "learning_rate": 0.0003421259492195851, "loss": 1.816, "step": 8444 }, { "epoch": 0.412353515625, "grad_norm": 0.25695550441741943, "learning_rate": 0.00034209251588304695, "loss": 1.8351, "step": 8445 }, { "epoch": 0.41240234375, "grad_norm": 0.274305135011673, "learning_rate": 0.00034205908092046784, "loss": 1.8077, "step": 8446 }, { "epoch": 0.412451171875, "grad_norm": 0.35506242513656616, "learning_rate": 0.00034202564433265806, "loss": 1.8137, "step": 8447 }, { "epoch": 0.4125, "grad_norm": 0.28208693861961365, "learning_rate": 0.0003419922061204279, "loss": 1.8309, "step": 8448 }, { "epoch": 0.412548828125, "grad_norm": 0.36462825536727905, "learning_rate": 0.0003419587662845879, "loss": 1.8164, "step": 8449 }, { "epoch": 0.41259765625, "grad_norm": 0.32832419872283936, "learning_rate": 0.0003419253248259483, "loss": 1.8403, "step": 8450 }, { "epoch": 0.412646484375, "grad_norm": 0.251881867647171, "learning_rate": 0.0003418918817453197, "loss": 1.827, "step": 8451 }, { "epoch": 0.4126953125, "grad_norm": 0.41216716170310974, "learning_rate": 0.00034185843704351266, "loss": 1.8551, "step": 8452 }, { "epoch": 0.412744140625, "grad_norm": 0.3049843907356262, "learning_rate": 0.0003418249907213376, "loss": 1.8192, "step": 8453 }, { "epoch": 0.41279296875, "grad_norm": 0.28574103116989136, "learning_rate": 0.00034179154277960526, "loss": 1.8056, "step": 8454 }, { "epoch": 0.412841796875, "grad_norm": 0.2964179515838623, "learning_rate": 0.0003417580932191262, "loss": 1.8315, "step": 8455 }, { "epoch": 0.412890625, "grad_norm": 0.27573361992836, "learning_rate": 0.00034172464204071113, "loss": 1.8317, "step": 8456 }, { "epoch": 0.412939453125, "grad_norm": 0.32569417357444763, "learning_rate": 0.00034169118924517073, "loss": 1.8461, "step": 8457 }, { "epoch": 0.41298828125, "grad_norm": 0.27852663397789, "learning_rate": 0.00034165773483331587, "loss": 1.8017, "step": 8458 }, { "epoch": 0.413037109375, "grad_norm": 0.3100586533546448, "learning_rate": 0.00034162427880595717, "loss": 1.8176, "step": 8459 }, { "epoch": 0.4130859375, "grad_norm": 0.26840826869010925, "learning_rate": 0.00034159082116390555, "loss": 1.7936, "step": 8460 }, { "epoch": 0.413134765625, "grad_norm": 0.27654725313186646, "learning_rate": 0.0003415573619079719, "loss": 1.8487, "step": 8461 }, { "epoch": 0.41318359375, "grad_norm": 0.2793533504009247, "learning_rate": 0.00034152390103896706, "loss": 1.8078, "step": 8462 }, { "epoch": 0.413232421875, "grad_norm": 0.2521665394306183, "learning_rate": 0.0003414904385577021, "loss": 1.8334, "step": 8463 }, { "epoch": 0.41328125, "grad_norm": 0.29007017612457275, "learning_rate": 0.0003414569744649879, "loss": 1.8172, "step": 8464 }, { "epoch": 0.413330078125, "grad_norm": 0.23113051056861877, "learning_rate": 0.0003414235087616356, "loss": 1.8113, "step": 8465 }, { "epoch": 0.41337890625, "grad_norm": 0.2652663290500641, "learning_rate": 0.0003413900414484562, "loss": 1.8172, "step": 8466 }, { "epoch": 0.413427734375, "grad_norm": 0.23082825541496277, "learning_rate": 0.0003413565725262608, "loss": 1.8216, "step": 8467 }, { "epoch": 0.4134765625, "grad_norm": 0.2727552652359009, "learning_rate": 0.0003413231019958605, "loss": 1.805, "step": 8468 }, { "epoch": 0.413525390625, "grad_norm": 0.23692141473293304, "learning_rate": 0.0003412896298580666, "loss": 1.7908, "step": 8469 }, { "epoch": 0.41357421875, "grad_norm": 0.2338085025548935, "learning_rate": 0.0003412561561136902, "loss": 1.7989, "step": 8470 }, { "epoch": 0.413623046875, "grad_norm": 0.2674480080604553, "learning_rate": 0.0003412226807635426, "loss": 1.8187, "step": 8471 }, { "epoch": 0.413671875, "grad_norm": 0.23272891342639923, "learning_rate": 0.00034118920380843523, "loss": 1.8101, "step": 8472 }, { "epoch": 0.413720703125, "grad_norm": 0.2710433602333069, "learning_rate": 0.00034115572524917923, "loss": 1.8136, "step": 8473 }, { "epoch": 0.41376953125, "grad_norm": 0.3076707720756531, "learning_rate": 0.0003411222450865862, "loss": 1.8139, "step": 8474 }, { "epoch": 0.413818359375, "grad_norm": 0.2969793975353241, "learning_rate": 0.0003410887633214673, "loss": 1.8206, "step": 8475 }, { "epoch": 0.4138671875, "grad_norm": 0.2529924809932709, "learning_rate": 0.0003410552799546342, "loss": 1.7836, "step": 8476 }, { "epoch": 0.413916015625, "grad_norm": 0.24503161013126373, "learning_rate": 0.00034102179498689836, "loss": 1.8093, "step": 8477 }, { "epoch": 0.41396484375, "grad_norm": 0.25476425886154175, "learning_rate": 0.0003409883084190712, "loss": 1.8513, "step": 8478 }, { "epoch": 0.414013671875, "grad_norm": 0.2544201612472534, "learning_rate": 0.0003409548202519644, "loss": 1.7836, "step": 8479 }, { "epoch": 0.4140625, "grad_norm": 0.3165973722934723, "learning_rate": 0.00034092133048638955, "loss": 1.8009, "step": 8480 }, { "epoch": 0.414111328125, "grad_norm": 0.31829410791397095, "learning_rate": 0.0003408878391231583, "loss": 1.7979, "step": 8481 }, { "epoch": 0.41416015625, "grad_norm": 0.2381305694580078, "learning_rate": 0.00034085434616308233, "loss": 1.8159, "step": 8482 }, { "epoch": 0.414208984375, "grad_norm": 0.2513675391674042, "learning_rate": 0.0003408208516069734, "loss": 1.8397, "step": 8483 }, { "epoch": 0.4142578125, "grad_norm": 0.29321157932281494, "learning_rate": 0.0003407873554556432, "loss": 1.8297, "step": 8484 }, { "epoch": 0.414306640625, "grad_norm": 0.34627798199653625, "learning_rate": 0.00034075385770990365, "loss": 1.8053, "step": 8485 }, { "epoch": 0.41435546875, "grad_norm": 0.24059180915355682, "learning_rate": 0.0003407203583705665, "loss": 1.822, "step": 8486 }, { "epoch": 0.414404296875, "grad_norm": 0.32835808396339417, "learning_rate": 0.00034068685743844364, "loss": 1.8318, "step": 8487 }, { "epoch": 0.414453125, "grad_norm": 0.33725014328956604, "learning_rate": 0.000340653354914347, "loss": 1.8333, "step": 8488 }, { "epoch": 0.414501953125, "grad_norm": 0.37063896656036377, "learning_rate": 0.00034061985079908864, "loss": 1.8331, "step": 8489 }, { "epoch": 0.41455078125, "grad_norm": 0.319656640291214, "learning_rate": 0.0003405863450934804, "loss": 1.7787, "step": 8490 }, { "epoch": 0.414599609375, "grad_norm": 0.2460232526063919, "learning_rate": 0.00034055283779833444, "loss": 1.8184, "step": 8491 }, { "epoch": 0.4146484375, "grad_norm": 0.3231859505176544, "learning_rate": 0.0003405193289144628, "loss": 1.8165, "step": 8492 }, { "epoch": 0.414697265625, "grad_norm": 0.3138924837112427, "learning_rate": 0.0003404858184426775, "loss": 1.8223, "step": 8493 }, { "epoch": 0.41474609375, "grad_norm": 0.2909587025642395, "learning_rate": 0.0003404523063837909, "loss": 1.8344, "step": 8494 }, { "epoch": 0.414794921875, "grad_norm": 0.3265673518180847, "learning_rate": 0.000340418792738615, "loss": 1.8156, "step": 8495 }, { "epoch": 0.41484375, "grad_norm": 0.2954567074775696, "learning_rate": 0.0003403852775079621, "loss": 1.7825, "step": 8496 }, { "epoch": 0.414892578125, "grad_norm": 0.30163154006004333, "learning_rate": 0.00034035176069264453, "loss": 1.8109, "step": 8497 }, { "epoch": 0.41494140625, "grad_norm": 0.2727010250091553, "learning_rate": 0.00034031824229347444, "loss": 1.8173, "step": 8498 }, { "epoch": 0.414990234375, "grad_norm": 0.2983124852180481, "learning_rate": 0.00034028472231126435, "loss": 1.8252, "step": 8499 }, { "epoch": 0.4150390625, "grad_norm": 0.30942562222480774, "learning_rate": 0.00034025120074682657, "loss": 1.8529, "step": 8500 }, { "epoch": 0.415087890625, "grad_norm": 0.2559092938899994, "learning_rate": 0.0003402176776009735, "loss": 1.8126, "step": 8501 }, { "epoch": 0.41513671875, "grad_norm": 0.313806414604187, "learning_rate": 0.0003401841528745176, "loss": 1.7986, "step": 8502 }, { "epoch": 0.415185546875, "grad_norm": 0.3060494363307953, "learning_rate": 0.0003401506265682714, "loss": 1.8174, "step": 8503 }, { "epoch": 0.415234375, "grad_norm": 0.30659806728363037, "learning_rate": 0.0003401170986830474, "loss": 1.8093, "step": 8504 }, { "epoch": 0.415283203125, "grad_norm": 0.3345370292663574, "learning_rate": 0.00034008356921965815, "loss": 1.7986, "step": 8505 }, { "epoch": 0.41533203125, "grad_norm": 0.2870822548866272, "learning_rate": 0.00034005003817891646, "loss": 1.8269, "step": 8506 }, { "epoch": 0.415380859375, "grad_norm": 0.2873004674911499, "learning_rate": 0.00034001650556163466, "loss": 1.7888, "step": 8507 }, { "epoch": 0.4154296875, "grad_norm": 0.2535567879676819, "learning_rate": 0.0003399829713686258, "loss": 1.8129, "step": 8508 }, { "epoch": 0.415478515625, "grad_norm": 0.234212264418602, "learning_rate": 0.0003399494356007023, "loss": 1.8183, "step": 8509 }, { "epoch": 0.41552734375, "grad_norm": 0.24309170246124268, "learning_rate": 0.00033991589825867706, "loss": 1.8049, "step": 8510 }, { "epoch": 0.415576171875, "grad_norm": 0.30191725492477417, "learning_rate": 0.00033988235934336286, "loss": 1.8176, "step": 8511 }, { "epoch": 0.415625, "grad_norm": 0.24246369302272797, "learning_rate": 0.0003398488188555726, "loss": 1.8205, "step": 8512 }, { "epoch": 0.415673828125, "grad_norm": 0.2616364359855652, "learning_rate": 0.00033981527679611905, "loss": 1.8086, "step": 8513 }, { "epoch": 0.41572265625, "grad_norm": 0.2995586395263672, "learning_rate": 0.00033978173316581527, "loss": 1.8121, "step": 8514 }, { "epoch": 0.415771484375, "grad_norm": 0.2159099280834198, "learning_rate": 0.00033974818796547415, "loss": 1.8254, "step": 8515 }, { "epoch": 0.4158203125, "grad_norm": 0.2687738537788391, "learning_rate": 0.0003397146411959086, "loss": 1.8157, "step": 8516 }, { "epoch": 0.415869140625, "grad_norm": 0.24996130168437958, "learning_rate": 0.0003396810928579318, "loss": 1.8013, "step": 8517 }, { "epoch": 0.41591796875, "grad_norm": 0.21032758057117462, "learning_rate": 0.00033964754295235674, "loss": 1.828, "step": 8518 }, { "epoch": 0.415966796875, "grad_norm": 0.29874172806739807, "learning_rate": 0.0003396139914799966, "loss": 1.806, "step": 8519 }, { "epoch": 0.416015625, "grad_norm": 0.2834281921386719, "learning_rate": 0.0003395804384416643, "loss": 1.8612, "step": 8520 }, { "epoch": 0.416064453125, "grad_norm": 0.30728498101234436, "learning_rate": 0.0003395468838381733, "loss": 1.8078, "step": 8521 }, { "epoch": 0.41611328125, "grad_norm": 0.28137627243995667, "learning_rate": 0.0003395133276703367, "loss": 1.8208, "step": 8522 }, { "epoch": 0.416162109375, "grad_norm": 0.2758216857910156, "learning_rate": 0.00033947976993896773, "loss": 1.8206, "step": 8523 }, { "epoch": 0.4162109375, "grad_norm": 0.2657955586910248, "learning_rate": 0.00033944621064487977, "loss": 1.8169, "step": 8524 }, { "epoch": 0.416259765625, "grad_norm": 0.26917019486427307, "learning_rate": 0.00033941264978888615, "loss": 1.8083, "step": 8525 }, { "epoch": 0.41630859375, "grad_norm": 0.2636278569698334, "learning_rate": 0.0003393790873718002, "loss": 1.7995, "step": 8526 }, { "epoch": 0.416357421875, "grad_norm": 0.340619295835495, "learning_rate": 0.00033934552339443533, "loss": 1.8184, "step": 8527 }, { "epoch": 0.41640625, "grad_norm": 0.3435891270637512, "learning_rate": 0.000339311957857605, "loss": 1.8229, "step": 8528 }, { "epoch": 0.416455078125, "grad_norm": 0.2354326993227005, "learning_rate": 0.0003392783907621227, "loss": 1.8261, "step": 8529 }, { "epoch": 0.41650390625, "grad_norm": 0.3009648025035858, "learning_rate": 0.000339244822108802, "loss": 1.8158, "step": 8530 }, { "epoch": 0.416552734375, "grad_norm": 0.2473926842212677, "learning_rate": 0.0003392112518984564, "loss": 1.8053, "step": 8531 }, { "epoch": 0.4166015625, "grad_norm": 0.2502731382846832, "learning_rate": 0.0003391776801318995, "loss": 1.8089, "step": 8532 }, { "epoch": 0.416650390625, "grad_norm": 0.2291051149368286, "learning_rate": 0.00033914410680994494, "loss": 1.8083, "step": 8533 }, { "epoch": 0.41669921875, "grad_norm": 0.22380134463310242, "learning_rate": 0.00033911053193340645, "loss": 1.8228, "step": 8534 }, { "epoch": 0.416748046875, "grad_norm": 0.20702187716960907, "learning_rate": 0.00033907695550309776, "loss": 1.8149, "step": 8535 }, { "epoch": 0.416796875, "grad_norm": 0.27205973863601685, "learning_rate": 0.0003390433775198325, "loss": 1.8313, "step": 8536 }, { "epoch": 0.416845703125, "grad_norm": 0.2405623495578766, "learning_rate": 0.00033900979798442456, "loss": 1.8003, "step": 8537 }, { "epoch": 0.41689453125, "grad_norm": 0.24474328756332397, "learning_rate": 0.00033897621689768775, "loss": 1.8112, "step": 8538 }, { "epoch": 0.416943359375, "grad_norm": 0.2894098162651062, "learning_rate": 0.0003389426342604359, "loss": 1.8289, "step": 8539 }, { "epoch": 0.4169921875, "grad_norm": 0.22348947823047638, "learning_rate": 0.000338909050073483, "loss": 1.8483, "step": 8540 }, { "epoch": 0.417041015625, "grad_norm": 0.2459537237882614, "learning_rate": 0.0003388754643376428, "loss": 1.8511, "step": 8541 }, { "epoch": 0.41708984375, "grad_norm": 0.2713260054588318, "learning_rate": 0.0003388418770537296, "loss": 1.8285, "step": 8542 }, { "epoch": 0.417138671875, "grad_norm": 0.26281386613845825, "learning_rate": 0.0003388082882225571, "loss": 1.8272, "step": 8543 }, { "epoch": 0.4171875, "grad_norm": 0.2620062828063965, "learning_rate": 0.00033877469784493946, "loss": 1.79, "step": 8544 }, { "epoch": 0.417236328125, "grad_norm": 0.27999335527420044, "learning_rate": 0.00033874110592169084, "loss": 1.8286, "step": 8545 }, { "epoch": 0.41728515625, "grad_norm": 0.2716372609138489, "learning_rate": 0.00033870751245362526, "loss": 1.8534, "step": 8546 }, { "epoch": 0.417333984375, "grad_norm": 0.2692275643348694, "learning_rate": 0.00033867391744155697, "loss": 1.8149, "step": 8547 }, { "epoch": 0.4173828125, "grad_norm": 0.28356122970581055, "learning_rate": 0.0003386403208863001, "loss": 1.8439, "step": 8548 }, { "epoch": 0.417431640625, "grad_norm": 0.28176358342170715, "learning_rate": 0.0003386067227886689, "loss": 1.8026, "step": 8549 }, { "epoch": 0.41748046875, "grad_norm": 0.23958957195281982, "learning_rate": 0.00033857312314947776, "loss": 1.8218, "step": 8550 }, { "epoch": 0.417529296875, "grad_norm": 0.24893809854984283, "learning_rate": 0.0003385395219695409, "loss": 1.8071, "step": 8551 }, { "epoch": 0.417578125, "grad_norm": 0.2609039545059204, "learning_rate": 0.00033850591924967254, "loss": 1.8149, "step": 8552 }, { "epoch": 0.417626953125, "grad_norm": 0.3024348318576813, "learning_rate": 0.0003384723149906873, "loss": 1.7981, "step": 8553 }, { "epoch": 0.41767578125, "grad_norm": 0.31123316287994385, "learning_rate": 0.0003384387091933995, "loss": 1.813, "step": 8554 }, { "epoch": 0.417724609375, "grad_norm": 0.27993541955947876, "learning_rate": 0.00033840510185862365, "loss": 1.798, "step": 8555 }, { "epoch": 0.4177734375, "grad_norm": 0.3059673011302948, "learning_rate": 0.0003383714929871742, "loss": 1.7939, "step": 8556 }, { "epoch": 0.417822265625, "grad_norm": 0.3035159409046173, "learning_rate": 0.0003383378825798657, "loss": 1.822, "step": 8557 }, { "epoch": 0.41787109375, "grad_norm": 0.23283962905406952, "learning_rate": 0.0003383042706375127, "loss": 1.8182, "step": 8558 }, { "epoch": 0.417919921875, "grad_norm": 0.2764996290206909, "learning_rate": 0.00033827065716092995, "loss": 1.8041, "step": 8559 }, { "epoch": 0.41796875, "grad_norm": 0.25843894481658936, "learning_rate": 0.00033823704215093196, "loss": 1.8274, "step": 8560 }, { "epoch": 0.418017578125, "grad_norm": 0.2149251103401184, "learning_rate": 0.0003382034256083334, "loss": 1.7894, "step": 8561 }, { "epoch": 0.41806640625, "grad_norm": 0.24653466045856476, "learning_rate": 0.00033816980753394903, "loss": 1.8119, "step": 8562 }, { "epoch": 0.418115234375, "grad_norm": 0.2448531687259674, "learning_rate": 0.00033813618792859364, "loss": 1.798, "step": 8563 }, { "epoch": 0.4181640625, "grad_norm": 0.24078701436519623, "learning_rate": 0.000338102566793082, "loss": 1.802, "step": 8564 }, { "epoch": 0.418212890625, "grad_norm": 0.33431440591812134, "learning_rate": 0.000338068944128229, "loss": 1.812, "step": 8565 }, { "epoch": 0.41826171875, "grad_norm": 0.26177486777305603, "learning_rate": 0.00033803531993484943, "loss": 1.8051, "step": 8566 }, { "epoch": 0.418310546875, "grad_norm": 0.22339527308940887, "learning_rate": 0.0003380016942137583, "loss": 1.8098, "step": 8567 }, { "epoch": 0.418359375, "grad_norm": 0.26661500334739685, "learning_rate": 0.0003379680669657704, "loss": 1.8059, "step": 8568 }, { "epoch": 0.418408203125, "grad_norm": 0.24169068038463593, "learning_rate": 0.0003379344381917009, "loss": 1.8079, "step": 8569 }, { "epoch": 0.41845703125, "grad_norm": 0.22313588857650757, "learning_rate": 0.0003379008078923647, "loss": 1.8101, "step": 8570 }, { "epoch": 0.418505859375, "grad_norm": 0.3366188108921051, "learning_rate": 0.00033786717606857687, "loss": 1.8102, "step": 8571 }, { "epoch": 0.4185546875, "grad_norm": 0.33135154843330383, "learning_rate": 0.0003378335427211525, "loss": 1.7887, "step": 8572 }, { "epoch": 0.418603515625, "grad_norm": 0.24099139869213104, "learning_rate": 0.0003377999078509068, "loss": 1.8125, "step": 8573 }, { "epoch": 0.41865234375, "grad_norm": 0.3083128333091736, "learning_rate": 0.00033776627145865485, "loss": 1.7962, "step": 8574 }, { "epoch": 0.418701171875, "grad_norm": 0.3015475869178772, "learning_rate": 0.0003377326335452119, "loss": 1.8209, "step": 8575 }, { "epoch": 0.41875, "grad_norm": 0.2943730056285858, "learning_rate": 0.0003376989941113932, "loss": 1.7998, "step": 8576 }, { "epoch": 0.418798828125, "grad_norm": 0.2985170781612396, "learning_rate": 0.00033766535315801397, "loss": 1.8198, "step": 8577 }, { "epoch": 0.41884765625, "grad_norm": 0.30446428060531616, "learning_rate": 0.00033763171068588955, "loss": 1.8162, "step": 8578 }, { "epoch": 0.418896484375, "grad_norm": 0.2753290832042694, "learning_rate": 0.00033759806669583526, "loss": 1.8381, "step": 8579 }, { "epoch": 0.4189453125, "grad_norm": 0.2825518846511841, "learning_rate": 0.00033756442118866657, "loss": 1.818, "step": 8580 }, { "epoch": 0.418994140625, "grad_norm": 0.2559652328491211, "learning_rate": 0.00033753077416519896, "loss": 1.8039, "step": 8581 }, { "epoch": 0.41904296875, "grad_norm": 0.3070569634437561, "learning_rate": 0.00033749712562624766, "loss": 1.8136, "step": 8582 }, { "epoch": 0.419091796875, "grad_norm": 0.28178104758262634, "learning_rate": 0.00033746347557262835, "loss": 1.8036, "step": 8583 }, { "epoch": 0.419140625, "grad_norm": 0.2428000569343567, "learning_rate": 0.0003374298240051566, "loss": 1.826, "step": 8584 }, { "epoch": 0.419189453125, "grad_norm": 0.2335439920425415, "learning_rate": 0.00033739617092464787, "loss": 1.8214, "step": 8585 }, { "epoch": 0.41923828125, "grad_norm": 0.26014187932014465, "learning_rate": 0.0003373625163319178, "loss": 1.7934, "step": 8586 }, { "epoch": 0.419287109375, "grad_norm": 0.24110092222690582, "learning_rate": 0.0003373288602277821, "loss": 1.7852, "step": 8587 }, { "epoch": 0.4193359375, "grad_norm": 0.24865250289440155, "learning_rate": 0.0003372952026130563, "loss": 1.8325, "step": 8588 }, { "epoch": 0.419384765625, "grad_norm": 0.24608586728572845, "learning_rate": 0.00033726154348855624, "loss": 1.8033, "step": 8589 }, { "epoch": 0.41943359375, "grad_norm": 0.24844712018966675, "learning_rate": 0.0003372278828550977, "loss": 1.8154, "step": 8590 }, { "epoch": 0.419482421875, "grad_norm": 0.24868282675743103, "learning_rate": 0.00033719422071349644, "loss": 1.7847, "step": 8591 }, { "epoch": 0.41953125, "grad_norm": 0.2503913342952728, "learning_rate": 0.0003371605570645682, "loss": 1.8088, "step": 8592 }, { "epoch": 0.419580078125, "grad_norm": 0.2644003629684448, "learning_rate": 0.000337126891909129, "loss": 1.821, "step": 8593 }, { "epoch": 0.41962890625, "grad_norm": 0.23545299470424652, "learning_rate": 0.00033709322524799463, "loss": 1.84, "step": 8594 }, { "epoch": 0.419677734375, "grad_norm": 0.22378423810005188, "learning_rate": 0.00033705955708198104, "loss": 1.7918, "step": 8595 }, { "epoch": 0.4197265625, "grad_norm": 0.25804880261421204, "learning_rate": 0.00033702588741190423, "loss": 1.8057, "step": 8596 }, { "epoch": 0.419775390625, "grad_norm": 0.23604954779148102, "learning_rate": 0.0003369922162385802, "loss": 1.8177, "step": 8597 }, { "epoch": 0.41982421875, "grad_norm": 0.30457955598831177, "learning_rate": 0.00033695854356282495, "loss": 1.8077, "step": 8598 }, { "epoch": 0.419873046875, "grad_norm": 0.33344417810440063, "learning_rate": 0.00033692486938545474, "loss": 1.8387, "step": 8599 }, { "epoch": 0.419921875, "grad_norm": 0.23614619672298431, "learning_rate": 0.0003368911937072855, "loss": 1.8218, "step": 8600 }, { "epoch": 0.419970703125, "grad_norm": 0.2863539457321167, "learning_rate": 0.00033685751652913357, "loss": 1.8002, "step": 8601 }, { "epoch": 0.42001953125, "grad_norm": 0.2822449803352356, "learning_rate": 0.00033682383785181493, "loss": 1.832, "step": 8602 }, { "epoch": 0.420068359375, "grad_norm": 0.24005845189094543, "learning_rate": 0.00033679015767614585, "loss": 1.7974, "step": 8603 }, { "epoch": 0.4201171875, "grad_norm": 0.2507474422454834, "learning_rate": 0.0003367564760029428, "loss": 1.7868, "step": 8604 }, { "epoch": 0.420166015625, "grad_norm": 0.3366287052631378, "learning_rate": 0.0003367227928330218, "loss": 1.814, "step": 8605 }, { "epoch": 0.42021484375, "grad_norm": 0.2899138629436493, "learning_rate": 0.00033668910816719936, "loss": 1.8412, "step": 8606 }, { "epoch": 0.420263671875, "grad_norm": 0.2588040232658386, "learning_rate": 0.0003366554220062919, "loss": 1.7941, "step": 8607 }, { "epoch": 0.4203125, "grad_norm": 0.29512977600097656, "learning_rate": 0.0003366217343511158, "loss": 1.8067, "step": 8608 }, { "epoch": 0.420361328125, "grad_norm": 0.24157823622226715, "learning_rate": 0.0003365880452024874, "loss": 1.8012, "step": 8609 }, { "epoch": 0.42041015625, "grad_norm": 0.23243309557437897, "learning_rate": 0.0003365543545612232, "loss": 1.8153, "step": 8610 }, { "epoch": 0.420458984375, "grad_norm": 0.23090314865112305, "learning_rate": 0.0003365206624281397, "loss": 1.8223, "step": 8611 }, { "epoch": 0.4205078125, "grad_norm": 0.220833420753479, "learning_rate": 0.0003364869688040536, "loss": 1.8034, "step": 8612 }, { "epoch": 0.420556640625, "grad_norm": 0.23751474916934967, "learning_rate": 0.00033645327368978146, "loss": 1.8036, "step": 8613 }, { "epoch": 0.42060546875, "grad_norm": 0.23442161083221436, "learning_rate": 0.0003364195770861398, "loss": 1.8203, "step": 8614 }, { "epoch": 0.420654296875, "grad_norm": 0.26003775000572205, "learning_rate": 0.00033638587899394536, "loss": 1.8431, "step": 8615 }, { "epoch": 0.420703125, "grad_norm": 0.27723148465156555, "learning_rate": 0.0003363521794140148, "loss": 1.7873, "step": 8616 }, { "epoch": 0.420751953125, "grad_norm": 0.24095571041107178, "learning_rate": 0.0003363184783471649, "loss": 1.8252, "step": 8617 }, { "epoch": 0.42080078125, "grad_norm": 0.2735576033592224, "learning_rate": 0.00033628477579421246, "loss": 1.8064, "step": 8618 }, { "epoch": 0.420849609375, "grad_norm": 0.3364335894584656, "learning_rate": 0.00033625107175597424, "loss": 1.8299, "step": 8619 }, { "epoch": 0.4208984375, "grad_norm": 0.29675137996673584, "learning_rate": 0.00033621736623326706, "loss": 1.8008, "step": 8620 }, { "epoch": 0.420947265625, "grad_norm": 0.25920310616493225, "learning_rate": 0.0003361836592269078, "loss": 1.8137, "step": 8621 }, { "epoch": 0.42099609375, "grad_norm": 0.2934381365776062, "learning_rate": 0.00033614995073771343, "loss": 1.817, "step": 8622 }, { "epoch": 0.421044921875, "grad_norm": 0.3469793498516083, "learning_rate": 0.00033611624076650083, "loss": 1.8292, "step": 8623 }, { "epoch": 0.42109375, "grad_norm": 0.36049336194992065, "learning_rate": 0.0003360825293140871, "loss": 1.814, "step": 8624 }, { "epoch": 0.421142578125, "grad_norm": 0.3278675079345703, "learning_rate": 0.0003360488163812892, "loss": 1.822, "step": 8625 }, { "epoch": 0.42119140625, "grad_norm": 0.2755569815635681, "learning_rate": 0.0003360151019689242, "loss": 1.7764, "step": 8626 }, { "epoch": 0.421240234375, "grad_norm": 0.2568809688091278, "learning_rate": 0.00033598138607780916, "loss": 1.8037, "step": 8627 }, { "epoch": 0.4212890625, "grad_norm": 0.27499571442604065, "learning_rate": 0.0003359476687087612, "loss": 1.7988, "step": 8628 }, { "epoch": 0.421337890625, "grad_norm": 0.29198792576789856, "learning_rate": 0.00033591394986259756, "loss": 1.8526, "step": 8629 }, { "epoch": 0.42138671875, "grad_norm": 0.2793978154659271, "learning_rate": 0.00033588022954013537, "loss": 1.8135, "step": 8630 }, { "epoch": 0.421435546875, "grad_norm": 0.3264940679073334, "learning_rate": 0.00033584650774219193, "loss": 1.8031, "step": 8631 }, { "epoch": 0.421484375, "grad_norm": 0.3188088834285736, "learning_rate": 0.0003358127844695845, "loss": 1.8128, "step": 8632 }, { "epoch": 0.421533203125, "grad_norm": 0.297213613986969, "learning_rate": 0.0003357790597231304, "loss": 1.8013, "step": 8633 }, { "epoch": 0.42158203125, "grad_norm": 0.27937525510787964, "learning_rate": 0.000335745333503647, "loss": 1.8229, "step": 8634 }, { "epoch": 0.421630859375, "grad_norm": 0.2663393020629883, "learning_rate": 0.00033571160581195153, "loss": 1.7945, "step": 8635 }, { "epoch": 0.4216796875, "grad_norm": 0.29246246814727783, "learning_rate": 0.0003356778766488615, "loss": 1.8384, "step": 8636 }, { "epoch": 0.421728515625, "grad_norm": 0.2956347167491913, "learning_rate": 0.00033564414601519447, "loss": 1.8333, "step": 8637 }, { "epoch": 0.42177734375, "grad_norm": 0.2611077129840851, "learning_rate": 0.00033561041391176784, "loss": 1.8251, "step": 8638 }, { "epoch": 0.421826171875, "grad_norm": 0.2882649302482605, "learning_rate": 0.0003355766803393991, "loss": 1.8184, "step": 8639 }, { "epoch": 0.421875, "grad_norm": 0.26567503809928894, "learning_rate": 0.00033554294529890586, "loss": 1.7956, "step": 8640 }, { "epoch": 0.421923828125, "grad_norm": 0.35999706387519836, "learning_rate": 0.0003355092087911057, "loss": 1.8063, "step": 8641 }, { "epoch": 0.42197265625, "grad_norm": 0.2109619528055191, "learning_rate": 0.0003354754708168162, "loss": 1.8334, "step": 8642 }, { "epoch": 0.422021484375, "grad_norm": 0.3391174376010895, "learning_rate": 0.0003354417313768552, "loss": 1.8343, "step": 8643 }, { "epoch": 0.4220703125, "grad_norm": 0.31088316440582275, "learning_rate": 0.00033540799047204024, "loss": 1.8037, "step": 8644 }, { "epoch": 0.422119140625, "grad_norm": 0.22935211658477783, "learning_rate": 0.0003353742481031891, "loss": 1.8142, "step": 8645 }, { "epoch": 0.42216796875, "grad_norm": 0.29565417766571045, "learning_rate": 0.0003353405042711195, "loss": 1.8397, "step": 8646 }, { "epoch": 0.422216796875, "grad_norm": 0.24610058963298798, "learning_rate": 0.0003353067589766493, "loss": 1.8168, "step": 8647 }, { "epoch": 0.422265625, "grad_norm": 0.22378554940223694, "learning_rate": 0.00033527301222059636, "loss": 1.8083, "step": 8648 }, { "epoch": 0.422314453125, "grad_norm": 0.22833773493766785, "learning_rate": 0.0003352392640037785, "loss": 1.8065, "step": 8649 }, { "epoch": 0.42236328125, "grad_norm": 0.2248845100402832, "learning_rate": 0.0003352055143270138, "loss": 1.8205, "step": 8650 }, { "epoch": 0.422412109375, "grad_norm": 0.23839344084262848, "learning_rate": 0.00033517176319112005, "loss": 1.7999, "step": 8651 }, { "epoch": 0.4224609375, "grad_norm": 0.24345993995666504, "learning_rate": 0.00033513801059691523, "loss": 1.8133, "step": 8652 }, { "epoch": 0.422509765625, "grad_norm": 0.24302366375923157, "learning_rate": 0.00033510425654521744, "loss": 1.8059, "step": 8653 }, { "epoch": 0.42255859375, "grad_norm": 0.26588425040245056, "learning_rate": 0.00033507050103684475, "loss": 1.8176, "step": 8654 }, { "epoch": 0.422607421875, "grad_norm": 0.2711332142353058, "learning_rate": 0.00033503674407261514, "loss": 1.8279, "step": 8655 }, { "epoch": 0.42265625, "grad_norm": 0.2101210206747055, "learning_rate": 0.0003350029856533468, "loss": 1.8052, "step": 8656 }, { "epoch": 0.422705078125, "grad_norm": 0.27234646677970886, "learning_rate": 0.0003349692257798579, "loss": 1.8211, "step": 8657 }, { "epoch": 0.42275390625, "grad_norm": 0.23292487859725952, "learning_rate": 0.0003349354644529666, "loss": 1.8209, "step": 8658 }, { "epoch": 0.422802734375, "grad_norm": 0.2312295138835907, "learning_rate": 0.0003349017016734913, "loss": 1.8157, "step": 8659 }, { "epoch": 0.4228515625, "grad_norm": 0.29871654510498047, "learning_rate": 0.00033486793744225005, "loss": 1.8207, "step": 8660 }, { "epoch": 0.422900390625, "grad_norm": 0.22083571553230286, "learning_rate": 0.0003348341717600612, "loss": 1.8073, "step": 8661 }, { "epoch": 0.42294921875, "grad_norm": 0.2690308392047882, "learning_rate": 0.0003348004046277432, "loss": 1.8048, "step": 8662 }, { "epoch": 0.422998046875, "grad_norm": 0.2916368246078491, "learning_rate": 0.00033476663604611434, "loss": 1.7953, "step": 8663 }, { "epoch": 0.423046875, "grad_norm": 0.2662011981010437, "learning_rate": 0.00033473286601599304, "loss": 1.8173, "step": 8664 }, { "epoch": 0.423095703125, "grad_norm": 0.2920880913734436, "learning_rate": 0.00033469909453819773, "loss": 1.8149, "step": 8665 }, { "epoch": 0.42314453125, "grad_norm": 0.3030267059803009, "learning_rate": 0.0003346653216135469, "loss": 1.8244, "step": 8666 }, { "epoch": 0.423193359375, "grad_norm": 0.237665593624115, "learning_rate": 0.0003346315472428591, "loss": 1.842, "step": 8667 }, { "epoch": 0.4232421875, "grad_norm": 0.2876582443714142, "learning_rate": 0.00033459777142695276, "loss": 1.8293, "step": 8668 }, { "epoch": 0.423291015625, "grad_norm": 0.28981149196624756, "learning_rate": 0.00033456399416664665, "loss": 1.804, "step": 8669 }, { "epoch": 0.42333984375, "grad_norm": 0.2606841027736664, "learning_rate": 0.0003345302154627592, "loss": 1.8127, "step": 8670 }, { "epoch": 0.423388671875, "grad_norm": 0.30494925379753113, "learning_rate": 0.00033449643531610916, "loss": 1.8192, "step": 8671 }, { "epoch": 0.4234375, "grad_norm": 0.23564676940441132, "learning_rate": 0.00033446265372751526, "loss": 1.8018, "step": 8672 }, { "epoch": 0.423486328125, "grad_norm": 0.2615583837032318, "learning_rate": 0.00033442887069779615, "loss": 1.788, "step": 8673 }, { "epoch": 0.42353515625, "grad_norm": 0.24191927909851074, "learning_rate": 0.00033439508622777066, "loss": 1.8285, "step": 8674 }, { "epoch": 0.423583984375, "grad_norm": 0.284742534160614, "learning_rate": 0.0003343613003182575, "loss": 1.8199, "step": 8675 }, { "epoch": 0.4236328125, "grad_norm": 0.27637940645217896, "learning_rate": 0.00033432751297007553, "loss": 1.8165, "step": 8676 }, { "epoch": 0.423681640625, "grad_norm": 0.26521188020706177, "learning_rate": 0.00033429372418404356, "loss": 1.8115, "step": 8677 }, { "epoch": 0.42373046875, "grad_norm": 0.3047151267528534, "learning_rate": 0.0003342599339609807, "loss": 1.7977, "step": 8678 }, { "epoch": 0.423779296875, "grad_norm": 0.23466995358467102, "learning_rate": 0.00033422614230170554, "loss": 1.7889, "step": 8679 }, { "epoch": 0.423828125, "grad_norm": 0.2844550609588623, "learning_rate": 0.0003341923492070374, "loss": 1.8195, "step": 8680 }, { "epoch": 0.423876953125, "grad_norm": 0.25203245878219604, "learning_rate": 0.00033415855467779505, "loss": 1.8249, "step": 8681 }, { "epoch": 0.42392578125, "grad_norm": 0.26273640990257263, "learning_rate": 0.00033412475871479763, "loss": 1.7981, "step": 8682 }, { "epoch": 0.423974609375, "grad_norm": 0.29075461626052856, "learning_rate": 0.00033409096131886413, "loss": 1.8123, "step": 8683 }, { "epoch": 0.4240234375, "grad_norm": 0.25122320652008057, "learning_rate": 0.0003340571624908138, "loss": 1.7934, "step": 8684 }, { "epoch": 0.424072265625, "grad_norm": 0.2837999761104584, "learning_rate": 0.00033402336223146566, "loss": 1.8193, "step": 8685 }, { "epoch": 0.42412109375, "grad_norm": 0.262736439704895, "learning_rate": 0.00033398956054163885, "loss": 1.8356, "step": 8686 }, { "epoch": 0.424169921875, "grad_norm": 0.33725738525390625, "learning_rate": 0.0003339557574221527, "loss": 1.8284, "step": 8687 }, { "epoch": 0.42421875, "grad_norm": 0.2794765830039978, "learning_rate": 0.00033392195287382644, "loss": 1.8288, "step": 8688 }, { "epoch": 0.424267578125, "grad_norm": 0.2671462297439575, "learning_rate": 0.0003338881468974792, "loss": 1.796, "step": 8689 }, { "epoch": 0.42431640625, "grad_norm": 0.3023369610309601, "learning_rate": 0.00033385433949393055, "loss": 1.8036, "step": 8690 }, { "epoch": 0.424365234375, "grad_norm": 0.2510404586791992, "learning_rate": 0.0003338205306639996, "loss": 1.8425, "step": 8691 }, { "epoch": 0.4244140625, "grad_norm": 0.26785629987716675, "learning_rate": 0.0003337867204085059, "loss": 1.8252, "step": 8692 }, { "epoch": 0.424462890625, "grad_norm": 0.3010748326778412, "learning_rate": 0.0003337529087282688, "loss": 1.799, "step": 8693 }, { "epoch": 0.42451171875, "grad_norm": 0.25514674186706543, "learning_rate": 0.00033371909562410776, "loss": 1.8396, "step": 8694 }, { "epoch": 0.424560546875, "grad_norm": 0.2987736761569977, "learning_rate": 0.00033368528109684216, "loss": 1.8237, "step": 8695 }, { "epoch": 0.424609375, "grad_norm": 0.2765420973300934, "learning_rate": 0.0003336514651472917, "loss": 1.8207, "step": 8696 }, { "epoch": 0.424658203125, "grad_norm": 0.2619200050830841, "learning_rate": 0.00033361764777627584, "loss": 1.8241, "step": 8697 }, { "epoch": 0.42470703125, "grad_norm": 0.2820422947406769, "learning_rate": 0.0003335838289846142, "loss": 1.8001, "step": 8698 }, { "epoch": 0.424755859375, "grad_norm": 0.25443464517593384, "learning_rate": 0.00033355000877312643, "loss": 1.7861, "step": 8699 }, { "epoch": 0.4248046875, "grad_norm": 0.266242116689682, "learning_rate": 0.0003335161871426321, "loss": 1.8408, "step": 8700 }, { "epoch": 0.424853515625, "grad_norm": 0.29002854228019714, "learning_rate": 0.000333482364093951, "loss": 1.8001, "step": 8701 }, { "epoch": 0.42490234375, "grad_norm": 0.26217764616012573, "learning_rate": 0.0003334485396279029, "loss": 1.8288, "step": 8702 }, { "epoch": 0.424951171875, "grad_norm": 0.2672111392021179, "learning_rate": 0.00033341471374530735, "loss": 1.8202, "step": 8703 }, { "epoch": 0.425, "grad_norm": 0.310904860496521, "learning_rate": 0.0003333808864469843, "loss": 1.8315, "step": 8704 }, { "epoch": 0.425048828125, "grad_norm": 0.2862207889556885, "learning_rate": 0.0003333470577337536, "loss": 1.8112, "step": 8705 }, { "epoch": 0.42509765625, "grad_norm": 0.304633229970932, "learning_rate": 0.0003333132276064351, "loss": 1.8255, "step": 8706 }, { "epoch": 0.425146484375, "grad_norm": 0.28143200278282166, "learning_rate": 0.00033327939606584865, "loss": 1.8085, "step": 8707 }, { "epoch": 0.4251953125, "grad_norm": 0.2550804913043976, "learning_rate": 0.00033324556311281427, "loss": 1.8129, "step": 8708 }, { "epoch": 0.425244140625, "grad_norm": 0.2774297893047333, "learning_rate": 0.0003332117287481518, "loss": 1.8101, "step": 8709 }, { "epoch": 0.42529296875, "grad_norm": 0.26148396730422974, "learning_rate": 0.00033317789297268135, "loss": 1.8257, "step": 8710 }, { "epoch": 0.425341796875, "grad_norm": 0.3868115544319153, "learning_rate": 0.00033314405578722294, "loss": 1.8059, "step": 8711 }, { "epoch": 0.425390625, "grad_norm": 0.3563820421695709, "learning_rate": 0.0003331102171925966, "loss": 1.8117, "step": 8712 }, { "epoch": 0.425439453125, "grad_norm": 0.2294691503047943, "learning_rate": 0.00033307637718962255, "loss": 1.7872, "step": 8713 }, { "epoch": 0.42548828125, "grad_norm": 0.32746243476867676, "learning_rate": 0.0003330425357791207, "loss": 1.8259, "step": 8714 }, { "epoch": 0.425537109375, "grad_norm": 0.29037153720855713, "learning_rate": 0.0003330086929619114, "loss": 1.8243, "step": 8715 }, { "epoch": 0.4255859375, "grad_norm": 0.28002050518989563, "learning_rate": 0.00033297484873881484, "loss": 1.8179, "step": 8716 }, { "epoch": 0.425634765625, "grad_norm": 0.2587750554084778, "learning_rate": 0.00033294100311065123, "loss": 1.8239, "step": 8717 }, { "epoch": 0.42568359375, "grad_norm": 0.2704682946205139, "learning_rate": 0.0003329071560782409, "loss": 1.8041, "step": 8718 }, { "epoch": 0.425732421875, "grad_norm": 0.2950729727745056, "learning_rate": 0.00033287330764240414, "loss": 1.838, "step": 8719 }, { "epoch": 0.42578125, "grad_norm": 0.21601466834545135, "learning_rate": 0.0003328394578039612, "loss": 1.8147, "step": 8720 }, { "epoch": 0.425830078125, "grad_norm": 0.3194323778152466, "learning_rate": 0.0003328056065637326, "loss": 1.8042, "step": 8721 }, { "epoch": 0.42587890625, "grad_norm": 0.26614660024642944, "learning_rate": 0.00033277175392253865, "loss": 1.8415, "step": 8722 }, { "epoch": 0.425927734375, "grad_norm": 0.2501033544540405, "learning_rate": 0.00033273789988119977, "loss": 1.8078, "step": 8723 }, { "epoch": 0.4259765625, "grad_norm": 0.3088621497154236, "learning_rate": 0.0003327040444405366, "loss": 1.8276, "step": 8724 }, { "epoch": 0.426025390625, "grad_norm": 0.27197039127349854, "learning_rate": 0.0003326701876013696, "loss": 1.8115, "step": 8725 }, { "epoch": 0.42607421875, "grad_norm": 0.22622135281562805, "learning_rate": 0.0003326363293645191, "loss": 1.8215, "step": 8726 }, { "epoch": 0.426123046875, "grad_norm": 0.2757985591888428, "learning_rate": 0.00033260246973080605, "loss": 1.8395, "step": 8727 }, { "epoch": 0.426171875, "grad_norm": 0.2632364332675934, "learning_rate": 0.0003325686087010507, "loss": 1.8314, "step": 8728 }, { "epoch": 0.426220703125, "grad_norm": 0.22375138103961945, "learning_rate": 0.0003325347462760739, "loss": 1.8304, "step": 8729 }, { "epoch": 0.42626953125, "grad_norm": 0.25342804193496704, "learning_rate": 0.00033250088245669637, "loss": 1.8106, "step": 8730 }, { "epoch": 0.426318359375, "grad_norm": 0.2534637749195099, "learning_rate": 0.00033246701724373874, "loss": 1.7921, "step": 8731 }, { "epoch": 0.4263671875, "grad_norm": 0.2639237940311432, "learning_rate": 0.00033243315063802173, "loss": 1.8328, "step": 8732 }, { "epoch": 0.426416015625, "grad_norm": 0.24659986793994904, "learning_rate": 0.0003323992826403662, "loss": 1.8029, "step": 8733 }, { "epoch": 0.42646484375, "grad_norm": 0.28096669912338257, "learning_rate": 0.00033236541325159293, "loss": 1.8088, "step": 8734 }, { "epoch": 0.426513671875, "grad_norm": 0.3088182210922241, "learning_rate": 0.0003323315424725228, "loss": 1.8282, "step": 8735 }, { "epoch": 0.4265625, "grad_norm": 0.2739441990852356, "learning_rate": 0.00033229767030397666, "loss": 1.8176, "step": 8736 }, { "epoch": 0.426611328125, "grad_norm": 0.28400135040283203, "learning_rate": 0.0003322637967467755, "loss": 1.8247, "step": 8737 }, { "epoch": 0.42666015625, "grad_norm": 0.23631340265274048, "learning_rate": 0.00033222992180174016, "loss": 1.8101, "step": 8738 }, { "epoch": 0.426708984375, "grad_norm": 0.2577185034751892, "learning_rate": 0.00033219604546969175, "loss": 1.7905, "step": 8739 }, { "epoch": 0.4267578125, "grad_norm": 0.2826632559299469, "learning_rate": 0.0003321621677514511, "loss": 1.8045, "step": 8740 }, { "epoch": 0.426806640625, "grad_norm": 0.22918865084648132, "learning_rate": 0.0003321282886478395, "loss": 1.8111, "step": 8741 }, { "epoch": 0.42685546875, "grad_norm": 0.27699247002601624, "learning_rate": 0.0003320944081596779, "loss": 1.7973, "step": 8742 }, { "epoch": 0.426904296875, "grad_norm": 0.3559968173503876, "learning_rate": 0.0003320605262877875, "loss": 1.8266, "step": 8743 }, { "epoch": 0.426953125, "grad_norm": 0.2827608287334442, "learning_rate": 0.00033202664303298935, "loss": 1.7858, "step": 8744 }, { "epoch": 0.427001953125, "grad_norm": 0.23985150456428528, "learning_rate": 0.00033199275839610473, "loss": 1.8195, "step": 8745 }, { "epoch": 0.42705078125, "grad_norm": 0.3274606466293335, "learning_rate": 0.00033195887237795474, "loss": 1.8299, "step": 8746 }, { "epoch": 0.427099609375, "grad_norm": 0.2737382650375366, "learning_rate": 0.0003319249849793608, "loss": 1.8072, "step": 8747 }, { "epoch": 0.4271484375, "grad_norm": 0.25187912583351135, "learning_rate": 0.00033189109620114407, "loss": 1.8204, "step": 8748 }, { "epoch": 0.427197265625, "grad_norm": 0.25909435749053955, "learning_rate": 0.0003318572060441259, "loss": 1.7957, "step": 8749 }, { "epoch": 0.42724609375, "grad_norm": 0.24915549159049988, "learning_rate": 0.00033182331450912775, "loss": 1.8215, "step": 8750 }, { "epoch": 0.427294921875, "grad_norm": 0.28253504633903503, "learning_rate": 0.00033178942159697085, "loss": 1.818, "step": 8751 }, { "epoch": 0.42734375, "grad_norm": 0.309091180562973, "learning_rate": 0.0003317555273084767, "loss": 1.8118, "step": 8752 }, { "epoch": 0.427392578125, "grad_norm": 0.24833929538726807, "learning_rate": 0.00033172163164446676, "loss": 1.7968, "step": 8753 }, { "epoch": 0.42744140625, "grad_norm": 0.30787894129753113, "learning_rate": 0.0003316877346057625, "loss": 1.8017, "step": 8754 }, { "epoch": 0.427490234375, "grad_norm": 0.2468925565481186, "learning_rate": 0.00033165383619318544, "loss": 1.8203, "step": 8755 }, { "epoch": 0.4275390625, "grad_norm": 0.2498231679201126, "learning_rate": 0.0003316199364075572, "loss": 1.8191, "step": 8756 }, { "epoch": 0.427587890625, "grad_norm": 0.2565898895263672, "learning_rate": 0.00033158603524969924, "loss": 1.7805, "step": 8757 }, { "epoch": 0.42763671875, "grad_norm": 0.2664254307746887, "learning_rate": 0.00033155213272043326, "loss": 1.8237, "step": 8758 }, { "epoch": 0.427685546875, "grad_norm": 0.264730840921402, "learning_rate": 0.00033151822882058097, "loss": 1.8069, "step": 8759 }, { "epoch": 0.427734375, "grad_norm": 0.33040210604667664, "learning_rate": 0.00033148432355096396, "loss": 1.828, "step": 8760 }, { "epoch": 0.427783203125, "grad_norm": 0.24358360469341278, "learning_rate": 0.000331450416912404, "loss": 1.802, "step": 8761 }, { "epoch": 0.42783203125, "grad_norm": 0.3652702867984772, "learning_rate": 0.00033141650890572286, "loss": 1.8367, "step": 8762 }, { "epoch": 0.427880859375, "grad_norm": 0.3428432047367096, "learning_rate": 0.00033138259953174225, "loss": 1.7981, "step": 8763 }, { "epoch": 0.4279296875, "grad_norm": 0.29554376006126404, "learning_rate": 0.00033134868879128407, "loss": 1.8186, "step": 8764 }, { "epoch": 0.427978515625, "grad_norm": 0.3859771192073822, "learning_rate": 0.00033131477668517015, "loss": 1.8001, "step": 8765 }, { "epoch": 0.42802734375, "grad_norm": 0.274942547082901, "learning_rate": 0.00033128086321422236, "loss": 1.821, "step": 8766 }, { "epoch": 0.428076171875, "grad_norm": 0.3248843550682068, "learning_rate": 0.00033124694837926265, "loss": 1.7777, "step": 8767 }, { "epoch": 0.428125, "grad_norm": 0.36759674549102783, "learning_rate": 0.00033121303218111293, "loss": 1.7859, "step": 8768 }, { "epoch": 0.428173828125, "grad_norm": 0.25141504406929016, "learning_rate": 0.00033117911462059526, "loss": 1.7964, "step": 8769 }, { "epoch": 0.42822265625, "grad_norm": 0.3539213240146637, "learning_rate": 0.0003311451956985316, "loss": 1.7956, "step": 8770 }, { "epoch": 0.428271484375, "grad_norm": 0.234230637550354, "learning_rate": 0.00033111127541574396, "loss": 1.8138, "step": 8771 }, { "epoch": 0.4283203125, "grad_norm": 0.34380823373794556, "learning_rate": 0.00033107735377305457, "loss": 1.834, "step": 8772 }, { "epoch": 0.428369140625, "grad_norm": 0.33485665917396545, "learning_rate": 0.0003310434307712854, "loss": 1.8232, "step": 8773 }, { "epoch": 0.42841796875, "grad_norm": 0.2863435745239258, "learning_rate": 0.00033100950641125863, "loss": 1.7985, "step": 8774 }, { "epoch": 0.428466796875, "grad_norm": 0.343677818775177, "learning_rate": 0.00033097558069379655, "loss": 1.8041, "step": 8775 }, { "epoch": 0.428515625, "grad_norm": 0.2984845042228699, "learning_rate": 0.00033094165361972124, "loss": 1.8355, "step": 8776 }, { "epoch": 0.428564453125, "grad_norm": 0.44500699639320374, "learning_rate": 0.000330907725189855, "loss": 1.8336, "step": 8777 }, { "epoch": 0.42861328125, "grad_norm": 0.265399307012558, "learning_rate": 0.00033087379540502025, "loss": 1.8176, "step": 8778 }, { "epoch": 0.428662109375, "grad_norm": 0.3786154091358185, "learning_rate": 0.0003308398642660391, "loss": 1.8018, "step": 8779 }, { "epoch": 0.4287109375, "grad_norm": 0.2947595417499542, "learning_rate": 0.00033080593177373395, "loss": 1.8025, "step": 8780 }, { "epoch": 0.428759765625, "grad_norm": 0.25989001989364624, "learning_rate": 0.00033077199792892726, "loss": 1.819, "step": 8781 }, { "epoch": 0.42880859375, "grad_norm": 0.3156087100505829, "learning_rate": 0.00033073806273244133, "loss": 1.8284, "step": 8782 }, { "epoch": 0.428857421875, "grad_norm": 0.28149962425231934, "learning_rate": 0.0003307041261850987, "loss": 1.8464, "step": 8783 }, { "epoch": 0.42890625, "grad_norm": 0.3387649655342102, "learning_rate": 0.0003306701882877218, "loss": 1.8231, "step": 8784 }, { "epoch": 0.428955078125, "grad_norm": 0.2942274808883667, "learning_rate": 0.0003306362490411332, "loss": 1.8348, "step": 8785 }, { "epoch": 0.42900390625, "grad_norm": 0.3143314719200134, "learning_rate": 0.0003306023084461555, "loss": 1.8146, "step": 8786 }, { "epoch": 0.429052734375, "grad_norm": 0.2574766278266907, "learning_rate": 0.00033056836650361104, "loss": 1.8286, "step": 8787 }, { "epoch": 0.4291015625, "grad_norm": 0.33439382910728455, "learning_rate": 0.0003305344232143226, "loss": 1.838, "step": 8788 }, { "epoch": 0.429150390625, "grad_norm": 0.24344344437122345, "learning_rate": 0.00033050047857911276, "loss": 1.8228, "step": 8789 }, { "epoch": 0.42919921875, "grad_norm": 0.26576942205429077, "learning_rate": 0.0003304665325988043, "loss": 1.8102, "step": 8790 }, { "epoch": 0.429248046875, "grad_norm": 0.2898833453655243, "learning_rate": 0.00033043258527421985, "loss": 1.8058, "step": 8791 }, { "epoch": 0.429296875, "grad_norm": 0.25719746947288513, "learning_rate": 0.00033039863660618213, "loss": 1.8083, "step": 8792 }, { "epoch": 0.429345703125, "grad_norm": 0.27693262696266174, "learning_rate": 0.000330364686595514, "loss": 1.8101, "step": 8793 }, { "epoch": 0.42939453125, "grad_norm": 0.27263832092285156, "learning_rate": 0.00033033073524303826, "loss": 1.8331, "step": 8794 }, { "epoch": 0.429443359375, "grad_norm": 0.2629797160625458, "learning_rate": 0.00033029678254957754, "loss": 1.8071, "step": 8795 }, { "epoch": 0.4294921875, "grad_norm": 0.26135173439979553, "learning_rate": 0.00033026282851595496, "loss": 1.8061, "step": 8796 }, { "epoch": 0.429541015625, "grad_norm": 0.24374531209468842, "learning_rate": 0.00033022887314299336, "loss": 1.8323, "step": 8797 }, { "epoch": 0.42958984375, "grad_norm": 0.3307033181190491, "learning_rate": 0.0003301949164315156, "loss": 1.8101, "step": 8798 }, { "epoch": 0.429638671875, "grad_norm": 0.2715199589729309, "learning_rate": 0.00033016095838234465, "loss": 1.8183, "step": 8799 }, { "epoch": 0.4296875, "grad_norm": 0.2621591091156006, "learning_rate": 0.0003301269989963037, "loss": 1.8163, "step": 8800 }, { "epoch": 0.429736328125, "grad_norm": 0.3297303318977356, "learning_rate": 0.0003300930382742155, "loss": 1.8103, "step": 8801 }, { "epoch": 0.42978515625, "grad_norm": 0.2104712575674057, "learning_rate": 0.00033005907621690335, "loss": 1.7923, "step": 8802 }, { "epoch": 0.429833984375, "grad_norm": 0.267940878868103, "learning_rate": 0.00033002511282519023, "loss": 1.7911, "step": 8803 }, { "epoch": 0.4298828125, "grad_norm": 0.2542383372783661, "learning_rate": 0.0003299911480998993, "loss": 1.8448, "step": 8804 }, { "epoch": 0.429931640625, "grad_norm": 0.2351885288953781, "learning_rate": 0.0003299571820418537, "loss": 1.8186, "step": 8805 }, { "epoch": 0.42998046875, "grad_norm": 0.29570314288139343, "learning_rate": 0.00032992321465187666, "loss": 1.8137, "step": 8806 }, { "epoch": 0.430029296875, "grad_norm": 0.22967688739299774, "learning_rate": 0.00032988924593079134, "loss": 1.82, "step": 8807 }, { "epoch": 0.430078125, "grad_norm": 0.25493136048316956, "learning_rate": 0.000329855275879421, "loss": 1.8357, "step": 8808 }, { "epoch": 0.430126953125, "grad_norm": 0.28218430280685425, "learning_rate": 0.0003298213044985891, "loss": 1.8121, "step": 8809 }, { "epoch": 0.43017578125, "grad_norm": 0.20387545228004456, "learning_rate": 0.0003297873317891188, "loss": 1.8006, "step": 8810 }, { "epoch": 0.430224609375, "grad_norm": 0.2585762143135071, "learning_rate": 0.0003297533577518336, "loss": 1.811, "step": 8811 }, { "epoch": 0.4302734375, "grad_norm": 0.1996128112077713, "learning_rate": 0.0003297193823875566, "loss": 1.813, "step": 8812 }, { "epoch": 0.430322265625, "grad_norm": 0.27776244282722473, "learning_rate": 0.00032968540569711146, "loss": 1.8445, "step": 8813 }, { "epoch": 0.43037109375, "grad_norm": 0.2569442391395569, "learning_rate": 0.0003296514276813216, "loss": 1.8383, "step": 8814 }, { "epoch": 0.430419921875, "grad_norm": 0.3162379264831543, "learning_rate": 0.0003296174483410105, "loss": 1.8457, "step": 8815 }, { "epoch": 0.43046875, "grad_norm": 0.3046342134475708, "learning_rate": 0.00032958346767700166, "loss": 1.8239, "step": 8816 }, { "epoch": 0.430517578125, "grad_norm": 0.2182677835226059, "learning_rate": 0.0003295494856901186, "loss": 1.8162, "step": 8817 }, { "epoch": 0.43056640625, "grad_norm": 0.307380735874176, "learning_rate": 0.000329515502381185, "loss": 1.8066, "step": 8818 }, { "epoch": 0.430615234375, "grad_norm": 0.3495343327522278, "learning_rate": 0.0003294815177510243, "loss": 1.8077, "step": 8819 }, { "epoch": 0.4306640625, "grad_norm": 0.2711291015148163, "learning_rate": 0.00032944753180046035, "loss": 1.7918, "step": 8820 }, { "epoch": 0.430712890625, "grad_norm": 0.2796745002269745, "learning_rate": 0.0003294135445303167, "loss": 1.7969, "step": 8821 }, { "epoch": 0.43076171875, "grad_norm": 0.22138771414756775, "learning_rate": 0.00032937955594141707, "loss": 1.8257, "step": 8822 }, { "epoch": 0.430810546875, "grad_norm": 0.28754693269729614, "learning_rate": 0.00032934556603458513, "loss": 1.8006, "step": 8823 }, { "epoch": 0.430859375, "grad_norm": 0.26136553287506104, "learning_rate": 0.0003293115748106448, "loss": 1.7934, "step": 8824 }, { "epoch": 0.430908203125, "grad_norm": 0.2506689429283142, "learning_rate": 0.00032927758227041987, "loss": 1.8074, "step": 8825 }, { "epoch": 0.43095703125, "grad_norm": 0.3173237144947052, "learning_rate": 0.00032924358841473404, "loss": 1.7988, "step": 8826 }, { "epoch": 0.431005859375, "grad_norm": 0.2531201243400574, "learning_rate": 0.0003292095932444113, "loss": 1.8109, "step": 8827 }, { "epoch": 0.4310546875, "grad_norm": 0.25786152482032776, "learning_rate": 0.0003291755967602756, "loss": 1.8081, "step": 8828 }, { "epoch": 0.431103515625, "grad_norm": 0.29750141501426697, "learning_rate": 0.0003291415989631507, "loss": 1.8085, "step": 8829 }, { "epoch": 0.43115234375, "grad_norm": 0.2861655652523041, "learning_rate": 0.0003291075998538607, "loss": 1.8384, "step": 8830 }, { "epoch": 0.431201171875, "grad_norm": 0.3438264727592468, "learning_rate": 0.0003290735994332295, "loss": 1.7951, "step": 8831 }, { "epoch": 0.43125, "grad_norm": 0.3157578706741333, "learning_rate": 0.0003290395977020812, "loss": 1.7884, "step": 8832 }, { "epoch": 0.431298828125, "grad_norm": 0.2609429657459259, "learning_rate": 0.0003290055946612398, "loss": 1.8007, "step": 8833 }, { "epoch": 0.43134765625, "grad_norm": 0.2912408709526062, "learning_rate": 0.0003289715903115294, "loss": 1.8122, "step": 8834 }, { "epoch": 0.431396484375, "grad_norm": 0.26632553339004517, "learning_rate": 0.0003289375846537742, "loss": 1.8397, "step": 8835 }, { "epoch": 0.4314453125, "grad_norm": 0.2547721564769745, "learning_rate": 0.0003289035776887983, "loss": 1.8083, "step": 8836 }, { "epoch": 0.431494140625, "grad_norm": 0.2993237376213074, "learning_rate": 0.00032886956941742594, "loss": 1.8386, "step": 8837 }, { "epoch": 0.43154296875, "grad_norm": 0.3021325170993805, "learning_rate": 0.00032883555984048124, "loss": 1.7921, "step": 8838 }, { "epoch": 0.431591796875, "grad_norm": 0.2998517155647278, "learning_rate": 0.0003288015489587885, "loss": 1.8256, "step": 8839 }, { "epoch": 0.431640625, "grad_norm": 0.24338731169700623, "learning_rate": 0.000328767536773172, "loss": 1.8001, "step": 8840 }, { "epoch": 0.431689453125, "grad_norm": 0.3443777561187744, "learning_rate": 0.0003287335232844561, "loss": 1.815, "step": 8841 }, { "epoch": 0.43173828125, "grad_norm": 0.28409433364868164, "learning_rate": 0.00032869950849346495, "loss": 1.8065, "step": 8842 }, { "epoch": 0.431787109375, "grad_norm": 0.2714436948299408, "learning_rate": 0.0003286654924010232, "loss": 1.8313, "step": 8843 }, { "epoch": 0.4318359375, "grad_norm": 0.28173357248306274, "learning_rate": 0.000328631475007955, "loss": 1.7986, "step": 8844 }, { "epoch": 0.431884765625, "grad_norm": 0.22776316106319427, "learning_rate": 0.0003285974563150851, "loss": 1.7887, "step": 8845 }, { "epoch": 0.43193359375, "grad_norm": 0.24198134243488312, "learning_rate": 0.00032856343632323766, "loss": 1.8147, "step": 8846 }, { "epoch": 0.431982421875, "grad_norm": 0.2584144175052643, "learning_rate": 0.00032852941503323736, "loss": 1.7941, "step": 8847 }, { "epoch": 0.43203125, "grad_norm": 0.26809874176979065, "learning_rate": 0.00032849539244590866, "loss": 1.8002, "step": 8848 }, { "epoch": 0.432080078125, "grad_norm": 0.3312723934650421, "learning_rate": 0.00032846136856207615, "loss": 1.8017, "step": 8849 }, { "epoch": 0.43212890625, "grad_norm": 0.31735798716545105, "learning_rate": 0.00032842734338256436, "loss": 1.8007, "step": 8850 }, { "epoch": 0.432177734375, "grad_norm": 0.21591830253601074, "learning_rate": 0.00032839331690819805, "loss": 1.7923, "step": 8851 }, { "epoch": 0.4322265625, "grad_norm": 0.21680909395217896, "learning_rate": 0.0003283592891398018, "loss": 1.7993, "step": 8852 }, { "epoch": 0.432275390625, "grad_norm": 0.27951401472091675, "learning_rate": 0.0003283252600782004, "loss": 1.8057, "step": 8853 }, { "epoch": 0.43232421875, "grad_norm": 0.335245281457901, "learning_rate": 0.0003282912297242183, "loss": 1.8343, "step": 8854 }, { "epoch": 0.432373046875, "grad_norm": 0.256043016910553, "learning_rate": 0.00032825719807868056, "loss": 1.8056, "step": 8855 }, { "epoch": 0.432421875, "grad_norm": 0.28459909558296204, "learning_rate": 0.0003282231651424117, "loss": 1.8237, "step": 8856 }, { "epoch": 0.432470703125, "grad_norm": 0.3886682987213135, "learning_rate": 0.00032818913091623683, "loss": 1.8295, "step": 8857 }, { "epoch": 0.43251953125, "grad_norm": 0.28114232420921326, "learning_rate": 0.0003281550954009806, "loss": 1.8115, "step": 8858 }, { "epoch": 0.432568359375, "grad_norm": 0.299262672662735, "learning_rate": 0.00032812105859746787, "loss": 1.8006, "step": 8859 }, { "epoch": 0.4326171875, "grad_norm": 0.3287855386734009, "learning_rate": 0.0003280870205065237, "loss": 1.7914, "step": 8860 }, { "epoch": 0.432666015625, "grad_norm": 0.334139883518219, "learning_rate": 0.0003280529811289728, "loss": 1.8032, "step": 8861 }, { "epoch": 0.43271484375, "grad_norm": 0.3775726854801178, "learning_rate": 0.0003280189404656404, "loss": 1.8053, "step": 8862 }, { "epoch": 0.432763671875, "grad_norm": 0.2929494380950928, "learning_rate": 0.00032798489851735137, "loss": 1.8256, "step": 8863 }, { "epoch": 0.4328125, "grad_norm": 0.3008347451686859, "learning_rate": 0.00032795085528493074, "loss": 1.8238, "step": 8864 }, { "epoch": 0.432861328125, "grad_norm": 0.2777714133262634, "learning_rate": 0.00032791681076920355, "loss": 1.8162, "step": 8865 }, { "epoch": 0.43291015625, "grad_norm": 0.32368525862693787, "learning_rate": 0.000327882764970995, "loss": 1.8172, "step": 8866 }, { "epoch": 0.432958984375, "grad_norm": 0.34323790669441223, "learning_rate": 0.0003278487178911301, "loss": 1.8241, "step": 8867 }, { "epoch": 0.4330078125, "grad_norm": 0.26899006962776184, "learning_rate": 0.0003278146695304341, "loss": 1.8189, "step": 8868 }, { "epoch": 0.433056640625, "grad_norm": 0.3289594352245331, "learning_rate": 0.0003277806198897321, "loss": 1.7837, "step": 8869 }, { "epoch": 0.43310546875, "grad_norm": 0.34693899750709534, "learning_rate": 0.0003277465689698495, "loss": 1.7735, "step": 8870 }, { "epoch": 0.433154296875, "grad_norm": 0.2665594816207886, "learning_rate": 0.00032771251677161137, "loss": 1.8131, "step": 8871 }, { "epoch": 0.433203125, "grad_norm": 0.30824026465415955, "learning_rate": 0.000327678463295843, "loss": 1.8142, "step": 8872 }, { "epoch": 0.433251953125, "grad_norm": 0.2806300222873688, "learning_rate": 0.0003276444085433698, "loss": 1.8241, "step": 8873 }, { "epoch": 0.43330078125, "grad_norm": 0.23458582162857056, "learning_rate": 0.000327610352515017, "loss": 1.7963, "step": 8874 }, { "epoch": 0.433349609375, "grad_norm": 0.25069621205329895, "learning_rate": 0.0003275762952116101, "loss": 1.8051, "step": 8875 }, { "epoch": 0.4333984375, "grad_norm": 0.21731166541576385, "learning_rate": 0.0003275422366339744, "loss": 1.8189, "step": 8876 }, { "epoch": 0.433447265625, "grad_norm": 0.2366601526737213, "learning_rate": 0.0003275081767829354, "loss": 1.8151, "step": 8877 }, { "epoch": 0.43349609375, "grad_norm": 0.21986274421215057, "learning_rate": 0.00032747411565931856, "loss": 1.8062, "step": 8878 }, { "epoch": 0.433544921875, "grad_norm": 0.23717360198497772, "learning_rate": 0.0003274400532639494, "loss": 1.8223, "step": 8879 }, { "epoch": 0.43359375, "grad_norm": 0.19689320027828217, "learning_rate": 0.00032740598959765346, "loss": 1.8102, "step": 8880 }, { "epoch": 0.433642578125, "grad_norm": 0.22030587494373322, "learning_rate": 0.00032737192466125617, "loss": 1.8042, "step": 8881 }, { "epoch": 0.43369140625, "grad_norm": 0.23324239253997803, "learning_rate": 0.0003273378584555832, "loss": 1.7928, "step": 8882 }, { "epoch": 0.433740234375, "grad_norm": 0.25912895798683167, "learning_rate": 0.00032730379098146016, "loss": 1.8391, "step": 8883 }, { "epoch": 0.4337890625, "grad_norm": 0.2441352903842926, "learning_rate": 0.0003272697222397128, "loss": 1.8355, "step": 8884 }, { "epoch": 0.433837890625, "grad_norm": 0.23759184777736664, "learning_rate": 0.0003272356522311667, "loss": 1.809, "step": 8885 }, { "epoch": 0.43388671875, "grad_norm": 0.24515539407730103, "learning_rate": 0.0003272015809566476, "loss": 1.788, "step": 8886 }, { "epoch": 0.433935546875, "grad_norm": 0.245094895362854, "learning_rate": 0.00032716750841698127, "loss": 1.796, "step": 8887 }, { "epoch": 0.433984375, "grad_norm": 0.22721920907497406, "learning_rate": 0.00032713343461299345, "loss": 1.8025, "step": 8888 }, { "epoch": 0.434033203125, "grad_norm": 0.23251450061798096, "learning_rate": 0.00032709935954551, "loss": 1.8189, "step": 8889 }, { "epoch": 0.43408203125, "grad_norm": 0.2572985887527466, "learning_rate": 0.00032706528321535663, "loss": 1.8055, "step": 8890 }, { "epoch": 0.434130859375, "grad_norm": 0.24928610026836395, "learning_rate": 0.0003270312056233593, "loss": 1.7862, "step": 8891 }, { "epoch": 0.4341796875, "grad_norm": 0.19397613406181335, "learning_rate": 0.00032699712677034394, "loss": 1.8026, "step": 8892 }, { "epoch": 0.434228515625, "grad_norm": 0.21721987426280975, "learning_rate": 0.0003269630466571364, "loss": 1.7945, "step": 8893 }, { "epoch": 0.43427734375, "grad_norm": 0.22360888123512268, "learning_rate": 0.0003269289652845627, "loss": 1.8164, "step": 8894 }, { "epoch": 0.434326171875, "grad_norm": 0.22392572462558746, "learning_rate": 0.00032689488265344886, "loss": 1.7844, "step": 8895 }, { "epoch": 0.434375, "grad_norm": 0.20983490347862244, "learning_rate": 0.00032686079876462085, "loss": 1.8077, "step": 8896 }, { "epoch": 0.434423828125, "grad_norm": 0.21877454221248627, "learning_rate": 0.00032682671361890464, "loss": 1.8065, "step": 8897 }, { "epoch": 0.43447265625, "grad_norm": 0.21979352831840515, "learning_rate": 0.00032679262721712645, "loss": 1.7953, "step": 8898 }, { "epoch": 0.434521484375, "grad_norm": 0.21327438950538635, "learning_rate": 0.0003267585395601123, "loss": 1.8108, "step": 8899 }, { "epoch": 0.4345703125, "grad_norm": 0.2578732371330261, "learning_rate": 0.0003267244506486883, "loss": 1.8262, "step": 8900 }, { "epoch": 0.434619140625, "grad_norm": 0.33097201585769653, "learning_rate": 0.0003266903604836808, "loss": 1.8334, "step": 8901 }, { "epoch": 0.43466796875, "grad_norm": 0.28613072633743286, "learning_rate": 0.0003266562690659158, "loss": 1.8142, "step": 8902 }, { "epoch": 0.434716796875, "grad_norm": 0.35619503259658813, "learning_rate": 0.00032662217639621967, "loss": 1.829, "step": 8903 }, { "epoch": 0.434765625, "grad_norm": 0.30110427737236023, "learning_rate": 0.00032658808247541864, "loss": 1.7992, "step": 8904 }, { "epoch": 0.434814453125, "grad_norm": 0.35176366567611694, "learning_rate": 0.00032655398730433886, "loss": 1.8431, "step": 8905 }, { "epoch": 0.43486328125, "grad_norm": 0.38969385623931885, "learning_rate": 0.0003265198908838069, "loss": 1.8011, "step": 8906 }, { "epoch": 0.434912109375, "grad_norm": 0.22529324889183044, "learning_rate": 0.000326485793214649, "loss": 1.8158, "step": 8907 }, { "epoch": 0.4349609375, "grad_norm": 0.36794939637184143, "learning_rate": 0.0003264516942976915, "loss": 1.8265, "step": 8908 }, { "epoch": 0.435009765625, "grad_norm": 0.3705633580684662, "learning_rate": 0.0003264175941337608, "loss": 1.796, "step": 8909 }, { "epoch": 0.43505859375, "grad_norm": 0.2981257140636444, "learning_rate": 0.00032638349272368344, "loss": 1.7998, "step": 8910 }, { "epoch": 0.435107421875, "grad_norm": 0.4151122570037842, "learning_rate": 0.00032634939006828586, "loss": 1.7946, "step": 8911 }, { "epoch": 0.43515625, "grad_norm": 0.30524203181266785, "learning_rate": 0.0003263152861683945, "loss": 1.7887, "step": 8912 }, { "epoch": 0.435205078125, "grad_norm": 0.276285856962204, "learning_rate": 0.00032628118102483606, "loss": 1.8182, "step": 8913 }, { "epoch": 0.43525390625, "grad_norm": 0.3526410162448883, "learning_rate": 0.00032624707463843703, "loss": 1.8038, "step": 8914 }, { "epoch": 0.435302734375, "grad_norm": 0.24901781976222992, "learning_rate": 0.0003262129670100239, "loss": 1.8179, "step": 8915 }, { "epoch": 0.4353515625, "grad_norm": 0.3134523928165436, "learning_rate": 0.0003261788581404233, "loss": 1.8202, "step": 8916 }, { "epoch": 0.435400390625, "grad_norm": 0.25055432319641113, "learning_rate": 0.00032614474803046206, "loss": 1.8298, "step": 8917 }, { "epoch": 0.43544921875, "grad_norm": 0.26131898164749146, "learning_rate": 0.0003261106366809667, "loss": 1.8129, "step": 8918 }, { "epoch": 0.435498046875, "grad_norm": 0.2424444705247879, "learning_rate": 0.000326076524092764, "loss": 1.7887, "step": 8919 }, { "epoch": 0.435546875, "grad_norm": 0.21799390017986298, "learning_rate": 0.00032604241026668073, "loss": 1.7867, "step": 8920 }, { "epoch": 0.435595703125, "grad_norm": 0.2418781816959381, "learning_rate": 0.00032600829520354366, "loss": 1.8184, "step": 8921 }, { "epoch": 0.43564453125, "grad_norm": 0.25648507475852966, "learning_rate": 0.00032597417890417954, "loss": 1.8366, "step": 8922 }, { "epoch": 0.435693359375, "grad_norm": 0.2896358370780945, "learning_rate": 0.00032594006136941523, "loss": 1.8225, "step": 8923 }, { "epoch": 0.4357421875, "grad_norm": 0.28966671228408813, "learning_rate": 0.0003259059426000777, "loss": 1.8091, "step": 8924 }, { "epoch": 0.435791015625, "grad_norm": 0.23818184435367584, "learning_rate": 0.00032587182259699365, "loss": 1.8104, "step": 8925 }, { "epoch": 0.43583984375, "grad_norm": 0.2930639982223511, "learning_rate": 0.00032583770136099016, "loss": 1.8538, "step": 8926 }, { "epoch": 0.435888671875, "grad_norm": 0.28388094902038574, "learning_rate": 0.0003258035788928941, "loss": 1.8176, "step": 8927 }, { "epoch": 0.4359375, "grad_norm": 0.2629093825817108, "learning_rate": 0.0003257694551935325, "loss": 1.8204, "step": 8928 }, { "epoch": 0.435986328125, "grad_norm": 0.2765708863735199, "learning_rate": 0.00032573533026373244, "loss": 1.8287, "step": 8929 }, { "epoch": 0.43603515625, "grad_norm": 0.29494816064834595, "learning_rate": 0.00032570120410432076, "loss": 1.8052, "step": 8930 }, { "epoch": 0.436083984375, "grad_norm": 0.24582365155220032, "learning_rate": 0.0003256670767161248, "loss": 1.7955, "step": 8931 }, { "epoch": 0.4361328125, "grad_norm": 0.2528734505176544, "learning_rate": 0.00032563294809997143, "loss": 1.7945, "step": 8932 }, { "epoch": 0.436181640625, "grad_norm": 0.24854031205177307, "learning_rate": 0.0003255988182566879, "loss": 1.7905, "step": 8933 }, { "epoch": 0.43623046875, "grad_norm": 0.2630135715007782, "learning_rate": 0.00032556468718710147, "loss": 1.8045, "step": 8934 }, { "epoch": 0.436279296875, "grad_norm": 0.24960915744304657, "learning_rate": 0.0003255305548920391, "loss": 1.803, "step": 8935 }, { "epoch": 0.436328125, "grad_norm": 0.29949161410331726, "learning_rate": 0.00032549642137232813, "loss": 1.8206, "step": 8936 }, { "epoch": 0.436376953125, "grad_norm": 0.29098737239837646, "learning_rate": 0.00032546228662879583, "loss": 1.8041, "step": 8937 }, { "epoch": 0.43642578125, "grad_norm": 0.28464365005493164, "learning_rate": 0.00032542815066226955, "loss": 1.8409, "step": 8938 }, { "epoch": 0.436474609375, "grad_norm": 0.2994776964187622, "learning_rate": 0.00032539401347357643, "loss": 1.8274, "step": 8939 }, { "epoch": 0.4365234375, "grad_norm": 0.2061275690793991, "learning_rate": 0.000325359875063544, "loss": 1.7937, "step": 8940 }, { "epoch": 0.436572265625, "grad_norm": 0.3240450620651245, "learning_rate": 0.0003253257354329995, "loss": 1.7971, "step": 8941 }, { "epoch": 0.43662109375, "grad_norm": 0.3370078504085541, "learning_rate": 0.0003252915945827703, "loss": 1.8041, "step": 8942 }, { "epoch": 0.436669921875, "grad_norm": 0.2911677658557892, "learning_rate": 0.00032525745251368393, "loss": 1.8197, "step": 8943 }, { "epoch": 0.43671875, "grad_norm": 0.2777900695800781, "learning_rate": 0.00032522330922656794, "loss": 1.7814, "step": 8944 }, { "epoch": 0.436767578125, "grad_norm": 0.2759968340396881, "learning_rate": 0.00032518916472224965, "loss": 1.7932, "step": 8945 }, { "epoch": 0.43681640625, "grad_norm": 0.2698253393173218, "learning_rate": 0.00032515501900155655, "loss": 1.8328, "step": 8946 }, { "epoch": 0.436865234375, "grad_norm": 0.28155726194381714, "learning_rate": 0.0003251208720653163, "loss": 1.8089, "step": 8947 }, { "epoch": 0.4369140625, "grad_norm": 0.21305181086063385, "learning_rate": 0.00032508672391435645, "loss": 1.8147, "step": 8948 }, { "epoch": 0.436962890625, "grad_norm": 0.32537534832954407, "learning_rate": 0.00032505257454950464, "loss": 1.8411, "step": 8949 }, { "epoch": 0.43701171875, "grad_norm": 0.2945479452610016, "learning_rate": 0.0003250184239715884, "loss": 1.8063, "step": 8950 }, { "epoch": 0.437060546875, "grad_norm": 0.2338726967573166, "learning_rate": 0.00032498427218143554, "loss": 1.8078, "step": 8951 }, { "epoch": 0.437109375, "grad_norm": 0.3235224783420563, "learning_rate": 0.0003249501191798737, "loss": 1.8062, "step": 8952 }, { "epoch": 0.437158203125, "grad_norm": 0.22056709229946136, "learning_rate": 0.00032491596496773056, "loss": 1.7902, "step": 8953 }, { "epoch": 0.43720703125, "grad_norm": 0.27720072865486145, "learning_rate": 0.00032488180954583397, "loss": 1.8354, "step": 8954 }, { "epoch": 0.437255859375, "grad_norm": 0.32924550771713257, "learning_rate": 0.00032484765291501157, "loss": 1.8046, "step": 8955 }, { "epoch": 0.4373046875, "grad_norm": 0.24043728411197662, "learning_rate": 0.00032481349507609126, "loss": 1.8146, "step": 8956 }, { "epoch": 0.437353515625, "grad_norm": 0.2818576693534851, "learning_rate": 0.00032477933602990093, "loss": 1.7931, "step": 8957 }, { "epoch": 0.43740234375, "grad_norm": 0.2570163607597351, "learning_rate": 0.0003247451757772684, "loss": 1.8084, "step": 8958 }, { "epoch": 0.437451171875, "grad_norm": 0.3225618600845337, "learning_rate": 0.0003247110143190215, "loss": 1.7931, "step": 8959 }, { "epoch": 0.4375, "grad_norm": 0.298991322517395, "learning_rate": 0.0003246768516559883, "loss": 1.7896, "step": 8960 }, { "epoch": 0.437548828125, "grad_norm": 0.23249641060829163, "learning_rate": 0.0003246426877889966, "loss": 1.8244, "step": 8961 }, { "epoch": 0.43759765625, "grad_norm": 0.2858259975910187, "learning_rate": 0.0003246085227188745, "loss": 1.8312, "step": 8962 }, { "epoch": 0.437646484375, "grad_norm": 0.23345331847667694, "learning_rate": 0.0003245743564464501, "loss": 1.7985, "step": 8963 }, { "epoch": 0.4376953125, "grad_norm": 0.2877310514450073, "learning_rate": 0.00032454018897255126, "loss": 1.8117, "step": 8964 }, { "epoch": 0.437744140625, "grad_norm": 0.28961873054504395, "learning_rate": 0.0003245060202980062, "loss": 1.7933, "step": 8965 }, { "epoch": 0.43779296875, "grad_norm": 0.19492530822753906, "learning_rate": 0.00032447185042364286, "loss": 1.8168, "step": 8966 }, { "epoch": 0.437841796875, "grad_norm": 0.2745726704597473, "learning_rate": 0.0003244376793502896, "loss": 1.8106, "step": 8967 }, { "epoch": 0.437890625, "grad_norm": 0.27588269114494324, "learning_rate": 0.00032440350707877433, "loss": 1.8217, "step": 8968 }, { "epoch": 0.437939453125, "grad_norm": 0.27603501081466675, "learning_rate": 0.00032436933360992546, "loss": 1.8071, "step": 8969 }, { "epoch": 0.43798828125, "grad_norm": 0.3068810701370239, "learning_rate": 0.00032433515894457113, "loss": 1.8254, "step": 8970 }, { "epoch": 0.438037109375, "grad_norm": 0.26895254850387573, "learning_rate": 0.0003243009830835396, "loss": 1.8295, "step": 8971 }, { "epoch": 0.4380859375, "grad_norm": 0.27143582701683044, "learning_rate": 0.00032426680602765914, "loss": 1.813, "step": 8972 }, { "epoch": 0.438134765625, "grad_norm": 0.27745485305786133, "learning_rate": 0.000324232627777758, "loss": 1.8084, "step": 8973 }, { "epoch": 0.43818359375, "grad_norm": 0.24051961302757263, "learning_rate": 0.00032419844833466463, "loss": 1.8387, "step": 8974 }, { "epoch": 0.438232421875, "grad_norm": 0.2663915455341339, "learning_rate": 0.00032416426769920725, "loss": 1.8, "step": 8975 }, { "epoch": 0.43828125, "grad_norm": 0.28605058789253235, "learning_rate": 0.00032413008587221445, "loss": 1.8218, "step": 8976 }, { "epoch": 0.438330078125, "grad_norm": 0.24019859731197357, "learning_rate": 0.00032409590285451445, "loss": 1.8173, "step": 8977 }, { "epoch": 0.43837890625, "grad_norm": 0.23834310472011566, "learning_rate": 0.00032406171864693585, "loss": 1.8209, "step": 8978 }, { "epoch": 0.438427734375, "grad_norm": 0.2804831564426422, "learning_rate": 0.00032402753325030704, "loss": 1.8158, "step": 8979 }, { "epoch": 0.4384765625, "grad_norm": 0.27442196011543274, "learning_rate": 0.0003239933466654567, "loss": 1.8076, "step": 8980 }, { "epoch": 0.438525390625, "grad_norm": 0.2503378391265869, "learning_rate": 0.0003239591588932132, "loss": 1.8013, "step": 8981 }, { "epoch": 0.43857421875, "grad_norm": 0.28486913442611694, "learning_rate": 0.00032392496993440505, "loss": 1.8052, "step": 8982 }, { "epoch": 0.438623046875, "grad_norm": 0.26642701029777527, "learning_rate": 0.000323890779789861, "loss": 1.7917, "step": 8983 }, { "epoch": 0.438671875, "grad_norm": 0.3169712722301483, "learning_rate": 0.0003238565884604096, "loss": 1.8037, "step": 8984 }, { "epoch": 0.438720703125, "grad_norm": 0.26668980717658997, "learning_rate": 0.0003238223959468796, "loss": 1.7981, "step": 8985 }, { "epoch": 0.43876953125, "grad_norm": 0.23758631944656372, "learning_rate": 0.0003237882022500995, "loss": 1.8346, "step": 8986 }, { "epoch": 0.438818359375, "grad_norm": 0.2680847942829132, "learning_rate": 0.00032375400737089826, "loss": 1.8031, "step": 8987 }, { "epoch": 0.4388671875, "grad_norm": 0.27898913621902466, "learning_rate": 0.0003237198113101045, "loss": 1.8121, "step": 8988 }, { "epoch": 0.438916015625, "grad_norm": 0.25096991658210754, "learning_rate": 0.00032368561406854687, "loss": 1.7947, "step": 8989 }, { "epoch": 0.43896484375, "grad_norm": 0.29611530900001526, "learning_rate": 0.00032365141564705426, "loss": 1.8035, "step": 8990 }, { "epoch": 0.439013671875, "grad_norm": 0.33135518431663513, "learning_rate": 0.0003236172160464556, "loss": 1.8211, "step": 8991 }, { "epoch": 0.4390625, "grad_norm": 0.27450451254844666, "learning_rate": 0.00032358301526757947, "loss": 1.804, "step": 8992 }, { "epoch": 0.439111328125, "grad_norm": 0.26419293880462646, "learning_rate": 0.0003235488133112551, "loss": 1.7784, "step": 8993 }, { "epoch": 0.43916015625, "grad_norm": 0.32598960399627686, "learning_rate": 0.0003235146101783112, "loss": 1.8057, "step": 8994 }, { "epoch": 0.439208984375, "grad_norm": 0.3425842225551605, "learning_rate": 0.00032348040586957673, "loss": 1.8107, "step": 8995 }, { "epoch": 0.4392578125, "grad_norm": 0.2622252106666565, "learning_rate": 0.00032344620038588065, "loss": 1.8151, "step": 8996 }, { "epoch": 0.439306640625, "grad_norm": 0.31685006618499756, "learning_rate": 0.000323411993728052, "loss": 1.8326, "step": 8997 }, { "epoch": 0.43935546875, "grad_norm": 0.43769821524620056, "learning_rate": 0.0003233777858969198, "loss": 1.8045, "step": 8998 }, { "epoch": 0.439404296875, "grad_norm": 0.30174458026885986, "learning_rate": 0.00032334357689331304, "loss": 1.8127, "step": 8999 }, { "epoch": 0.439453125, "grad_norm": 0.27575790882110596, "learning_rate": 0.00032330936671806095, "loss": 1.7981, "step": 9000 }, { "epoch": 0.439501953125, "grad_norm": 0.3961799740791321, "learning_rate": 0.0003232751553719925, "loss": 1.794, "step": 9001 }, { "epoch": 0.43955078125, "grad_norm": 0.27315837144851685, "learning_rate": 0.00032324094285593685, "loss": 1.8086, "step": 9002 }, { "epoch": 0.439599609375, "grad_norm": 0.289907306432724, "learning_rate": 0.0003232067291707232, "loss": 1.7784, "step": 9003 }, { "epoch": 0.4396484375, "grad_norm": 0.33506453037261963, "learning_rate": 0.0003231725143171808, "loss": 1.8573, "step": 9004 }, { "epoch": 0.439697265625, "grad_norm": 0.2751420736312866, "learning_rate": 0.0003231382982961388, "loss": 1.797, "step": 9005 }, { "epoch": 0.43974609375, "grad_norm": 0.2765732705593109, "learning_rate": 0.00032310408110842645, "loss": 1.8086, "step": 9006 }, { "epoch": 0.439794921875, "grad_norm": 0.31801730394363403, "learning_rate": 0.000323069862754873, "loss": 1.8296, "step": 9007 }, { "epoch": 0.43984375, "grad_norm": 0.22412939369678497, "learning_rate": 0.00032303564323630786, "loss": 1.833, "step": 9008 }, { "epoch": 0.439892578125, "grad_norm": 0.27754291892051697, "learning_rate": 0.00032300142255356024, "loss": 1.7964, "step": 9009 }, { "epoch": 0.43994140625, "grad_norm": 0.2911137044429779, "learning_rate": 0.00032296720070745967, "loss": 1.7949, "step": 9010 }, { "epoch": 0.439990234375, "grad_norm": 0.20403537154197693, "learning_rate": 0.0003229329776988354, "loss": 1.8109, "step": 9011 }, { "epoch": 0.4400390625, "grad_norm": 0.3274000883102417, "learning_rate": 0.00032289875352851694, "loss": 1.8267, "step": 9012 }, { "epoch": 0.440087890625, "grad_norm": 0.2711639106273651, "learning_rate": 0.00032286452819733376, "loss": 1.782, "step": 9013 }, { "epoch": 0.44013671875, "grad_norm": 0.27480119466781616, "learning_rate": 0.0003228303017061152, "loss": 1.811, "step": 9014 }, { "epoch": 0.440185546875, "grad_norm": 0.34449172019958496, "learning_rate": 0.0003227960740556908, "loss": 1.8208, "step": 9015 }, { "epoch": 0.440234375, "grad_norm": 0.22540418803691864, "learning_rate": 0.0003227618452468903, "loss": 1.81, "step": 9016 }, { "epoch": 0.440283203125, "grad_norm": 0.30200105905532837, "learning_rate": 0.00032272761528054307, "loss": 1.8028, "step": 9017 }, { "epoch": 0.44033203125, "grad_norm": 0.3078186810016632, "learning_rate": 0.0003226933841574786, "loss": 1.7993, "step": 9018 }, { "epoch": 0.440380859375, "grad_norm": 0.22782136499881744, "learning_rate": 0.0003226591518785268, "loss": 1.8305, "step": 9019 }, { "epoch": 0.4404296875, "grad_norm": 0.23533281683921814, "learning_rate": 0.0003226249184445171, "loss": 1.8034, "step": 9020 }, { "epoch": 0.440478515625, "grad_norm": 0.2453976422548294, "learning_rate": 0.0003225906838562792, "loss": 1.8107, "step": 9021 }, { "epoch": 0.44052734375, "grad_norm": 0.23577868938446045, "learning_rate": 0.00032255644811464304, "loss": 1.8285, "step": 9022 }, { "epoch": 0.440576171875, "grad_norm": 0.22489416599273682, "learning_rate": 0.0003225222112204379, "loss": 1.8037, "step": 9023 }, { "epoch": 0.440625, "grad_norm": 0.23001404106616974, "learning_rate": 0.00032248797317449394, "loss": 1.8364, "step": 9024 }, { "epoch": 0.440673828125, "grad_norm": 0.21574483811855316, "learning_rate": 0.0003224537339776407, "loss": 1.8247, "step": 9025 }, { "epoch": 0.44072265625, "grad_norm": 0.21378055214881897, "learning_rate": 0.0003224194936307082, "loss": 1.8097, "step": 9026 }, { "epoch": 0.440771484375, "grad_norm": 0.2433730959892273, "learning_rate": 0.0003223852521345261, "loss": 1.803, "step": 9027 }, { "epoch": 0.4408203125, "grad_norm": 0.2831786572933197, "learning_rate": 0.0003223510094899244, "loss": 1.8194, "step": 9028 }, { "epoch": 0.440869140625, "grad_norm": 0.2643970549106598, "learning_rate": 0.0003223167656977329, "loss": 1.7981, "step": 9029 }, { "epoch": 0.44091796875, "grad_norm": 0.21524693071842194, "learning_rate": 0.0003222825207587816, "loss": 1.7951, "step": 9030 }, { "epoch": 0.440966796875, "grad_norm": 0.25222131609916687, "learning_rate": 0.0003222482746739004, "loss": 1.8199, "step": 9031 }, { "epoch": 0.441015625, "grad_norm": 0.3096858561038971, "learning_rate": 0.00032221402744391934, "loss": 1.7972, "step": 9032 }, { "epoch": 0.441064453125, "grad_norm": 0.22384822368621826, "learning_rate": 0.00032217977906966834, "loss": 1.7921, "step": 9033 }, { "epoch": 0.44111328125, "grad_norm": 0.24392083287239075, "learning_rate": 0.0003221455295519775, "loss": 1.8212, "step": 9034 }, { "epoch": 0.441162109375, "grad_norm": 0.27301767468452454, "learning_rate": 0.000322111278891677, "loss": 1.8214, "step": 9035 }, { "epoch": 0.4412109375, "grad_norm": 0.23608280718326569, "learning_rate": 0.0003220770270895966, "loss": 1.8052, "step": 9036 }, { "epoch": 0.441259765625, "grad_norm": 0.3221118152141571, "learning_rate": 0.00032204277414656676, "loss": 1.8175, "step": 9037 }, { "epoch": 0.44130859375, "grad_norm": 0.3031010329723358, "learning_rate": 0.00032200852006341744, "loss": 1.8219, "step": 9038 }, { "epoch": 0.441357421875, "grad_norm": 0.28139111399650574, "learning_rate": 0.0003219742648409789, "loss": 1.7815, "step": 9039 }, { "epoch": 0.44140625, "grad_norm": 0.24481897056102753, "learning_rate": 0.0003219400084800814, "loss": 1.8062, "step": 9040 }, { "epoch": 0.441455078125, "grad_norm": 0.26145273447036743, "learning_rate": 0.0003219057509815549, "loss": 1.8028, "step": 9041 }, { "epoch": 0.44150390625, "grad_norm": 0.3380846381187439, "learning_rate": 0.0003218714923462299, "loss": 1.8122, "step": 9042 }, { "epoch": 0.441552734375, "grad_norm": 0.272869735956192, "learning_rate": 0.0003218372325749367, "loss": 1.8008, "step": 9043 }, { "epoch": 0.4416015625, "grad_norm": 0.24359838664531708, "learning_rate": 0.00032180297166850547, "loss": 1.8178, "step": 9044 }, { "epoch": 0.441650390625, "grad_norm": 0.2989461421966553, "learning_rate": 0.0003217687096277666, "loss": 1.8304, "step": 9045 }, { "epoch": 0.44169921875, "grad_norm": 0.2839514911174774, "learning_rate": 0.0003217344464535506, "loss": 1.8104, "step": 9046 }, { "epoch": 0.441748046875, "grad_norm": 0.2143738567829132, "learning_rate": 0.00032170018214668767, "loss": 1.8134, "step": 9047 }, { "epoch": 0.441796875, "grad_norm": 0.290414035320282, "learning_rate": 0.00032166591670800834, "loss": 1.7928, "step": 9048 }, { "epoch": 0.441845703125, "grad_norm": 0.315510094165802, "learning_rate": 0.00032163165013834297, "loss": 1.7957, "step": 9049 }, { "epoch": 0.44189453125, "grad_norm": 0.2601642906665802, "learning_rate": 0.0003215973824385221, "loss": 1.8089, "step": 9050 }, { "epoch": 0.441943359375, "grad_norm": 0.24404172599315643, "learning_rate": 0.0003215631136093762, "loss": 1.8101, "step": 9051 }, { "epoch": 0.4419921875, "grad_norm": 0.2419821321964264, "learning_rate": 0.00032152884365173594, "loss": 1.8186, "step": 9052 }, { "epoch": 0.442041015625, "grad_norm": 0.27949419617652893, "learning_rate": 0.00032149457256643174, "loss": 1.8462, "step": 9053 }, { "epoch": 0.44208984375, "grad_norm": 0.2480122596025467, "learning_rate": 0.00032146030035429415, "loss": 1.8183, "step": 9054 }, { "epoch": 0.442138671875, "grad_norm": 0.2555403411388397, "learning_rate": 0.00032142602701615394, "loss": 1.8224, "step": 9055 }, { "epoch": 0.4421875, "grad_norm": 0.317143052816391, "learning_rate": 0.0003213917525528416, "loss": 1.8234, "step": 9056 }, { "epoch": 0.442236328125, "grad_norm": 0.28210362792015076, "learning_rate": 0.0003213574769651879, "loss": 1.8013, "step": 9057 }, { "epoch": 0.44228515625, "grad_norm": 0.2573387622833252, "learning_rate": 0.00032132320025402353, "loss": 1.814, "step": 9058 }, { "epoch": 0.442333984375, "grad_norm": 0.30002668499946594, "learning_rate": 0.00032128892242017917, "loss": 1.8188, "step": 9059 }, { "epoch": 0.4423828125, "grad_norm": 0.31045058369636536, "learning_rate": 0.00032125464346448556, "loss": 1.8092, "step": 9060 }, { "epoch": 0.442431640625, "grad_norm": 0.23609234392642975, "learning_rate": 0.0003212203633877734, "loss": 1.7972, "step": 9061 }, { "epoch": 0.44248046875, "grad_norm": 0.24903708696365356, "learning_rate": 0.00032118608219087373, "loss": 1.7871, "step": 9062 }, { "epoch": 0.442529296875, "grad_norm": 0.2537330687046051, "learning_rate": 0.00032115179987461727, "loss": 1.8151, "step": 9063 }, { "epoch": 0.442578125, "grad_norm": 0.21050071716308594, "learning_rate": 0.0003211175164398349, "loss": 1.8107, "step": 9064 }, { "epoch": 0.442626953125, "grad_norm": 0.2530438005924225, "learning_rate": 0.0003210832318873574, "loss": 1.8124, "step": 9065 }, { "epoch": 0.44267578125, "grad_norm": 0.2709430456161499, "learning_rate": 0.00032104894621801577, "loss": 1.7894, "step": 9066 }, { "epoch": 0.442724609375, "grad_norm": 0.22101537883281708, "learning_rate": 0.0003210146594326409, "loss": 1.8192, "step": 9067 }, { "epoch": 0.4427734375, "grad_norm": 0.21056626737117767, "learning_rate": 0.0003209803715320638, "loss": 1.7934, "step": 9068 }, { "epoch": 0.442822265625, "grad_norm": 0.22199265658855438, "learning_rate": 0.00032094608251711544, "loss": 1.7868, "step": 9069 }, { "epoch": 0.44287109375, "grad_norm": 0.19560422003269196, "learning_rate": 0.0003209117923886269, "loss": 1.8388, "step": 9070 }, { "epoch": 0.442919921875, "grad_norm": 0.20932221412658691, "learning_rate": 0.0003208775011474291, "loss": 1.8159, "step": 9071 }, { "epoch": 0.44296875, "grad_norm": 0.2204579859972, "learning_rate": 0.0003208432087943533, "loss": 1.8019, "step": 9072 }, { "epoch": 0.443017578125, "grad_norm": 0.24673885107040405, "learning_rate": 0.0003208089153302305, "loss": 1.7935, "step": 9073 }, { "epoch": 0.44306640625, "grad_norm": 0.2730766236782074, "learning_rate": 0.0003207746207558918, "loss": 1.8063, "step": 9074 }, { "epoch": 0.443115234375, "grad_norm": 0.2628431022167206, "learning_rate": 0.00032074032507216837, "loss": 1.8012, "step": 9075 }, { "epoch": 0.4431640625, "grad_norm": 0.21435429155826569, "learning_rate": 0.0003207060282798915, "loss": 1.7889, "step": 9076 }, { "epoch": 0.443212890625, "grad_norm": 0.21728180348873138, "learning_rate": 0.0003206717303798922, "loss": 1.805, "step": 9077 }, { "epoch": 0.44326171875, "grad_norm": 0.25750938057899475, "learning_rate": 0.0003206374313730019, "loss": 1.841, "step": 9078 }, { "epoch": 0.443310546875, "grad_norm": 0.303343802690506, "learning_rate": 0.00032060313126005174, "loss": 1.7951, "step": 9079 }, { "epoch": 0.443359375, "grad_norm": 0.31632697582244873, "learning_rate": 0.0003205688300418731, "loss": 1.8175, "step": 9080 }, { "epoch": 0.443408203125, "grad_norm": 0.25094956159591675, "learning_rate": 0.0003205345277192973, "loss": 1.7997, "step": 9081 }, { "epoch": 0.44345703125, "grad_norm": 0.3360791802406311, "learning_rate": 0.00032050022429315554, "loss": 1.8348, "step": 9082 }, { "epoch": 0.443505859375, "grad_norm": 0.34648996591567993, "learning_rate": 0.00032046591976427936, "loss": 1.8101, "step": 9083 }, { "epoch": 0.4435546875, "grad_norm": 0.2796393036842346, "learning_rate": 0.0003204316141335, "loss": 1.8085, "step": 9084 }, { "epoch": 0.443603515625, "grad_norm": 0.28897011280059814, "learning_rate": 0.000320397307401649, "loss": 1.8023, "step": 9085 }, { "epoch": 0.44365234375, "grad_norm": 0.3294229209423065, "learning_rate": 0.00032036299956955786, "loss": 1.7988, "step": 9086 }, { "epoch": 0.443701171875, "grad_norm": 0.32109740376472473, "learning_rate": 0.00032032869063805785, "loss": 1.8236, "step": 9087 }, { "epoch": 0.44375, "grad_norm": 0.3127068877220154, "learning_rate": 0.0003202943806079807, "loss": 1.8184, "step": 9088 }, { "epoch": 0.443798828125, "grad_norm": 0.2914387583732605, "learning_rate": 0.0003202600694801579, "loss": 1.8179, "step": 9089 }, { "epoch": 0.44384765625, "grad_norm": 0.24169720709323883, "learning_rate": 0.0003202257572554209, "loss": 1.7858, "step": 9090 }, { "epoch": 0.443896484375, "grad_norm": 0.3082515299320221, "learning_rate": 0.00032019144393460126, "loss": 1.7783, "step": 9091 }, { "epoch": 0.4439453125, "grad_norm": 0.22373421490192413, "learning_rate": 0.0003201571295185308, "loss": 1.8195, "step": 9092 }, { "epoch": 0.443994140625, "grad_norm": 0.25862422585487366, "learning_rate": 0.0003201228140080409, "loss": 1.8053, "step": 9093 }, { "epoch": 0.44404296875, "grad_norm": 0.2691529095172882, "learning_rate": 0.00032008849740396345, "loss": 1.8177, "step": 9094 }, { "epoch": 0.444091796875, "grad_norm": 0.24259601533412933, "learning_rate": 0.00032005417970713, "loss": 1.7983, "step": 9095 }, { "epoch": 0.444140625, "grad_norm": 0.2516557276248932, "learning_rate": 0.00032001986091837224, "loss": 1.8078, "step": 9096 }, { "epoch": 0.444189453125, "grad_norm": 0.25819525122642517, "learning_rate": 0.00031998554103852204, "loss": 1.8058, "step": 9097 }, { "epoch": 0.44423828125, "grad_norm": 0.2639573812484741, "learning_rate": 0.0003199512200684112, "loss": 1.7927, "step": 9098 }, { "epoch": 0.444287109375, "grad_norm": 0.23852761089801788, "learning_rate": 0.00031991689800887137, "loss": 1.8044, "step": 9099 }, { "epoch": 0.4443359375, "grad_norm": 0.26222777366638184, "learning_rate": 0.0003198825748607344, "loss": 1.7976, "step": 9100 }, { "epoch": 0.444384765625, "grad_norm": 0.2523859143257141, "learning_rate": 0.0003198482506248322, "loss": 1.8116, "step": 9101 }, { "epoch": 0.44443359375, "grad_norm": 0.2527049481868744, "learning_rate": 0.0003198139253019966, "loss": 1.7976, "step": 9102 }, { "epoch": 0.444482421875, "grad_norm": 0.20831257104873657, "learning_rate": 0.00031977959889305946, "loss": 1.802, "step": 9103 }, { "epoch": 0.44453125, "grad_norm": 0.2683294117450714, "learning_rate": 0.00031974527139885285, "loss": 1.809, "step": 9104 }, { "epoch": 0.444580078125, "grad_norm": 0.20936788618564606, "learning_rate": 0.00031971094282020864, "loss": 1.8045, "step": 9105 }, { "epoch": 0.44462890625, "grad_norm": 0.2758963108062744, "learning_rate": 0.00031967661315795883, "loss": 1.7994, "step": 9106 }, { "epoch": 0.444677734375, "grad_norm": 0.2413489818572998, "learning_rate": 0.00031964228241293535, "loss": 1.8116, "step": 9107 }, { "epoch": 0.4447265625, "grad_norm": 0.254524827003479, "learning_rate": 0.0003196079505859703, "loss": 1.8067, "step": 9108 }, { "epoch": 0.444775390625, "grad_norm": 0.2641805410385132, "learning_rate": 0.0003195736176778957, "loss": 1.8056, "step": 9109 }, { "epoch": 0.44482421875, "grad_norm": 0.2656175196170807, "learning_rate": 0.0003195392836895437, "loss": 1.8351, "step": 9110 }, { "epoch": 0.444873046875, "grad_norm": 0.2516224980354309, "learning_rate": 0.00031950494862174634, "loss": 1.8145, "step": 9111 }, { "epoch": 0.444921875, "grad_norm": 0.23183944821357727, "learning_rate": 0.0003194706124753359, "loss": 1.8182, "step": 9112 }, { "epoch": 0.444970703125, "grad_norm": 0.2733907699584961, "learning_rate": 0.00031943627525114434, "loss": 1.8198, "step": 9113 }, { "epoch": 0.44501953125, "grad_norm": 0.32752102613449097, "learning_rate": 0.000319401936950004, "loss": 1.7901, "step": 9114 }, { "epoch": 0.445068359375, "grad_norm": 0.27731019258499146, "learning_rate": 0.000319367597572747, "loss": 1.786, "step": 9115 }, { "epoch": 0.4451171875, "grad_norm": 0.33441677689552307, "learning_rate": 0.0003193332571202057, "loss": 1.8015, "step": 9116 }, { "epoch": 0.445166015625, "grad_norm": 0.3188410699367523, "learning_rate": 0.0003192989155932122, "loss": 1.8048, "step": 9117 }, { "epoch": 0.44521484375, "grad_norm": 0.22584623098373413, "learning_rate": 0.00031926457299259894, "loss": 1.8072, "step": 9118 }, { "epoch": 0.445263671875, "grad_norm": 0.2467917948961258, "learning_rate": 0.0003192302293191982, "loss": 1.8171, "step": 9119 }, { "epoch": 0.4453125, "grad_norm": 0.3040795624256134, "learning_rate": 0.0003191958845738423, "loss": 1.8174, "step": 9120 }, { "epoch": 0.445361328125, "grad_norm": 0.2829764783382416, "learning_rate": 0.00031916153875736363, "loss": 1.8011, "step": 9121 }, { "epoch": 0.44541015625, "grad_norm": 0.1928102970123291, "learning_rate": 0.00031912719187059466, "loss": 1.7817, "step": 9122 }, { "epoch": 0.445458984375, "grad_norm": 0.2986619174480438, "learning_rate": 0.0003190928439143677, "loss": 1.8069, "step": 9123 }, { "epoch": 0.4455078125, "grad_norm": 0.2945336103439331, "learning_rate": 0.00031905849488951516, "loss": 1.8306, "step": 9124 }, { "epoch": 0.445556640625, "grad_norm": 0.2813860774040222, "learning_rate": 0.0003190241447968697, "loss": 1.8217, "step": 9125 }, { "epoch": 0.44560546875, "grad_norm": 0.3668298125267029, "learning_rate": 0.00031898979363726365, "loss": 1.7945, "step": 9126 }, { "epoch": 0.445654296875, "grad_norm": 0.22937986254692078, "learning_rate": 0.00031895544141152967, "loss": 1.8002, "step": 9127 }, { "epoch": 0.445703125, "grad_norm": 0.33111777901649475, "learning_rate": 0.00031892108812050027, "loss": 1.8123, "step": 9128 }, { "epoch": 0.445751953125, "grad_norm": 0.37164437770843506, "learning_rate": 0.00031888673376500795, "loss": 1.8026, "step": 9129 }, { "epoch": 0.44580078125, "grad_norm": 0.34270331263542175, "learning_rate": 0.0003188523783458854, "loss": 1.824, "step": 9130 }, { "epoch": 0.445849609375, "grad_norm": 0.30741384625434875, "learning_rate": 0.0003188180218639653, "loss": 1.799, "step": 9131 }, { "epoch": 0.4458984375, "grad_norm": 0.361571341753006, "learning_rate": 0.0003187836643200802, "loss": 1.8135, "step": 9132 }, { "epoch": 0.445947265625, "grad_norm": 0.2963132858276367, "learning_rate": 0.0003187493057150627, "loss": 1.8364, "step": 9133 }, { "epoch": 0.44599609375, "grad_norm": 0.3288489580154419, "learning_rate": 0.00031871494604974576, "loss": 1.8051, "step": 9134 }, { "epoch": 0.446044921875, "grad_norm": 0.4059455394744873, "learning_rate": 0.0003186805853249619, "loss": 1.8054, "step": 9135 }, { "epoch": 0.44609375, "grad_norm": 0.2802565395832062, "learning_rate": 0.00031864622354154407, "loss": 1.8124, "step": 9136 }, { "epoch": 0.446142578125, "grad_norm": 0.3835495412349701, "learning_rate": 0.0003186118607003248, "loss": 1.8113, "step": 9137 }, { "epoch": 0.44619140625, "grad_norm": 0.2845182716846466, "learning_rate": 0.00031857749680213714, "loss": 1.8206, "step": 9138 }, { "epoch": 0.446240234375, "grad_norm": 0.293788880109787, "learning_rate": 0.0003185431318478139, "loss": 1.8184, "step": 9139 }, { "epoch": 0.4462890625, "grad_norm": 0.36875179409980774, "learning_rate": 0.0003185087658381878, "loss": 1.8225, "step": 9140 }, { "epoch": 0.446337890625, "grad_norm": 0.27901461720466614, "learning_rate": 0.0003184743987740918, "loss": 1.8131, "step": 9141 }, { "epoch": 0.44638671875, "grad_norm": 0.35296452045440674, "learning_rate": 0.00031844003065635877, "loss": 1.818, "step": 9142 }, { "epoch": 0.446435546875, "grad_norm": 0.31960105895996094, "learning_rate": 0.0003184056614858218, "loss": 1.8129, "step": 9143 }, { "epoch": 0.446484375, "grad_norm": 0.25170987844467163, "learning_rate": 0.0003183712912633137, "loss": 1.7902, "step": 9144 }, { "epoch": 0.446533203125, "grad_norm": 0.31126680970191956, "learning_rate": 0.0003183369199896675, "loss": 1.8258, "step": 9145 }, { "epoch": 0.44658203125, "grad_norm": 0.25075212121009827, "learning_rate": 0.0003183025476657163, "loss": 1.8189, "step": 9146 }, { "epoch": 0.446630859375, "grad_norm": 0.324682354927063, "learning_rate": 0.00031826817429229297, "loss": 1.8118, "step": 9147 }, { "epoch": 0.4466796875, "grad_norm": 0.29957863688468933, "learning_rate": 0.0003182337998702308, "loss": 1.8359, "step": 9148 }, { "epoch": 0.446728515625, "grad_norm": 0.26935049891471863, "learning_rate": 0.00031819942440036264, "loss": 1.8389, "step": 9149 }, { "epoch": 0.44677734375, "grad_norm": 0.25321146845817566, "learning_rate": 0.0003181650478835217, "loss": 1.8063, "step": 9150 }, { "epoch": 0.446826171875, "grad_norm": 0.23945902287960052, "learning_rate": 0.0003181306703205413, "loss": 1.8049, "step": 9151 }, { "epoch": 0.446875, "grad_norm": 0.2678757309913635, "learning_rate": 0.0003180962917122543, "loss": 1.8048, "step": 9152 }, { "epoch": 0.446923828125, "grad_norm": 0.29819992184638977, "learning_rate": 0.0003180619120594941, "loss": 1.8032, "step": 9153 }, { "epoch": 0.44697265625, "grad_norm": 0.23902273178100586, "learning_rate": 0.00031802753136309385, "loss": 1.7842, "step": 9154 }, { "epoch": 0.447021484375, "grad_norm": 0.2856190800666809, "learning_rate": 0.0003179931496238869, "loss": 1.8239, "step": 9155 }, { "epoch": 0.4470703125, "grad_norm": 0.25678396224975586, "learning_rate": 0.0003179587668427064, "loss": 1.7885, "step": 9156 }, { "epoch": 0.447119140625, "grad_norm": 0.24745352566242218, "learning_rate": 0.0003179243830203856, "loss": 1.7797, "step": 9157 }, { "epoch": 0.44716796875, "grad_norm": 0.25167518854141235, "learning_rate": 0.0003178899981577579, "loss": 1.8227, "step": 9158 }, { "epoch": 0.447216796875, "grad_norm": 0.24483272433280945, "learning_rate": 0.0003178556122556567, "loss": 1.8033, "step": 9159 }, { "epoch": 0.447265625, "grad_norm": 0.22172079980373383, "learning_rate": 0.0003178212253149153, "loss": 1.8169, "step": 9160 }, { "epoch": 0.447314453125, "grad_norm": 0.2143300473690033, "learning_rate": 0.00031778683733636704, "loss": 1.7884, "step": 9161 }, { "epoch": 0.44736328125, "grad_norm": 0.24249054491519928, "learning_rate": 0.00031775244832084543, "loss": 1.8035, "step": 9162 }, { "epoch": 0.447412109375, "grad_norm": 0.22980356216430664, "learning_rate": 0.0003177180582691839, "loss": 1.787, "step": 9163 }, { "epoch": 0.4474609375, "grad_norm": 0.21653641760349274, "learning_rate": 0.00031768366718221586, "loss": 1.8087, "step": 9164 }, { "epoch": 0.447509765625, "grad_norm": 0.21478025615215302, "learning_rate": 0.000317649275060775, "loss": 1.8074, "step": 9165 }, { "epoch": 0.44755859375, "grad_norm": 0.2296736091375351, "learning_rate": 0.0003176148819056946, "loss": 1.8078, "step": 9166 }, { "epoch": 0.447607421875, "grad_norm": 0.23772108554840088, "learning_rate": 0.00031758048771780823, "loss": 1.8085, "step": 9167 }, { "epoch": 0.44765625, "grad_norm": 0.24223992228507996, "learning_rate": 0.0003175460924979496, "loss": 1.807, "step": 9168 }, { "epoch": 0.447705078125, "grad_norm": 0.2552187740802765, "learning_rate": 0.0003175116962469522, "loss": 1.8007, "step": 9169 }, { "epoch": 0.44775390625, "grad_norm": 0.22888405621051788, "learning_rate": 0.00031747729896564975, "loss": 1.7845, "step": 9170 }, { "epoch": 0.447802734375, "grad_norm": 0.23438696563243866, "learning_rate": 0.0003174429006548758, "loss": 1.7969, "step": 9171 }, { "epoch": 0.4478515625, "grad_norm": 0.22627003490924835, "learning_rate": 0.00031740850131546406, "loss": 1.8044, "step": 9172 }, { "epoch": 0.447900390625, "grad_norm": 0.19841429591178894, "learning_rate": 0.0003173741009482482, "loss": 1.8143, "step": 9173 }, { "epoch": 0.44794921875, "grad_norm": 0.22702763974666595, "learning_rate": 0.000317339699554062, "loss": 1.8049, "step": 9174 }, { "epoch": 0.447998046875, "grad_norm": 0.3259626626968384, "learning_rate": 0.00031730529713373915, "loss": 1.8328, "step": 9175 }, { "epoch": 0.448046875, "grad_norm": 0.3365328907966614, "learning_rate": 0.0003172708936881134, "loss": 1.8101, "step": 9176 }, { "epoch": 0.448095703125, "grad_norm": 0.2981555163860321, "learning_rate": 0.0003172364892180186, "loss": 1.7957, "step": 9177 }, { "epoch": 0.44814453125, "grad_norm": 0.279069185256958, "learning_rate": 0.0003172020837242886, "loss": 1.7993, "step": 9178 }, { "epoch": 0.448193359375, "grad_norm": 0.26336508989334106, "learning_rate": 0.00031716767720775714, "loss": 1.8125, "step": 9179 }, { "epoch": 0.4482421875, "grad_norm": 0.3597520887851715, "learning_rate": 0.00031713326966925816, "loss": 1.8081, "step": 9180 }, { "epoch": 0.448291015625, "grad_norm": 0.3454259932041168, "learning_rate": 0.00031709886110962553, "loss": 1.7984, "step": 9181 }, { "epoch": 0.44833984375, "grad_norm": 0.23561061918735504, "learning_rate": 0.00031706445152969323, "loss": 1.8093, "step": 9182 }, { "epoch": 0.448388671875, "grad_norm": 0.31948143243789673, "learning_rate": 0.00031703004093029506, "loss": 1.8201, "step": 9183 }, { "epoch": 0.4484375, "grad_norm": 0.3385539650917053, "learning_rate": 0.00031699562931226515, "loss": 1.7875, "step": 9184 }, { "epoch": 0.448486328125, "grad_norm": 0.30385449528694153, "learning_rate": 0.00031696121667643747, "loss": 1.7991, "step": 9185 }, { "epoch": 0.44853515625, "grad_norm": 0.26928526163101196, "learning_rate": 0.0003169268030236459, "loss": 1.8127, "step": 9186 }, { "epoch": 0.448583984375, "grad_norm": 0.2599039673805237, "learning_rate": 0.00031689238835472467, "loss": 1.7822, "step": 9187 }, { "epoch": 0.4486328125, "grad_norm": 0.2547436058521271, "learning_rate": 0.0003168579726705077, "loss": 1.7928, "step": 9188 }, { "epoch": 0.448681640625, "grad_norm": 0.28759583830833435, "learning_rate": 0.0003168235559718291, "loss": 1.785, "step": 9189 }, { "epoch": 0.44873046875, "grad_norm": 0.22924168407917023, "learning_rate": 0.00031678913825952307, "loss": 1.8238, "step": 9190 }, { "epoch": 0.448779296875, "grad_norm": 0.3082193434238434, "learning_rate": 0.00031675471953442356, "loss": 1.7892, "step": 9191 }, { "epoch": 0.448828125, "grad_norm": 0.2827984690666199, "learning_rate": 0.000316720299797365, "loss": 1.8151, "step": 9192 }, { "epoch": 0.448876953125, "grad_norm": 0.2604532837867737, "learning_rate": 0.0003166858790491815, "loss": 1.8148, "step": 9193 }, { "epoch": 0.44892578125, "grad_norm": 0.3212272822856903, "learning_rate": 0.0003166514572907072, "loss": 1.8028, "step": 9194 }, { "epoch": 0.448974609375, "grad_norm": 0.22671395540237427, "learning_rate": 0.00031661703452277634, "loss": 1.7868, "step": 9195 }, { "epoch": 0.4490234375, "grad_norm": 0.295337975025177, "learning_rate": 0.0003165826107462232, "loss": 1.8294, "step": 9196 }, { "epoch": 0.449072265625, "grad_norm": 0.28208687901496887, "learning_rate": 0.00031654818596188217, "loss": 1.8316, "step": 9197 }, { "epoch": 0.44912109375, "grad_norm": 0.20683160424232483, "learning_rate": 0.0003165137601705874, "loss": 1.8106, "step": 9198 }, { "epoch": 0.449169921875, "grad_norm": 0.28847363591194153, "learning_rate": 0.00031647933337317324, "loss": 1.7881, "step": 9199 }, { "epoch": 0.44921875, "grad_norm": 0.23992401361465454, "learning_rate": 0.0003164449055704742, "loss": 1.7825, "step": 9200 }, { "epoch": 0.449267578125, "grad_norm": 0.22993652522563934, "learning_rate": 0.00031641047676332455, "loss": 1.849, "step": 9201 }, { "epoch": 0.44931640625, "grad_norm": 0.23413346707820892, "learning_rate": 0.0003163760469525588, "loss": 1.7912, "step": 9202 }, { "epoch": 0.449365234375, "grad_norm": 0.24886535108089447, "learning_rate": 0.0003163416161390112, "loss": 1.8103, "step": 9203 }, { "epoch": 0.4494140625, "grad_norm": 0.27453354001045227, "learning_rate": 0.0003163071843235164, "loss": 1.8124, "step": 9204 }, { "epoch": 0.449462890625, "grad_norm": 0.22200128436088562, "learning_rate": 0.0003162727515069088, "loss": 1.8001, "step": 9205 }, { "epoch": 0.44951171875, "grad_norm": 0.27039799094200134, "learning_rate": 0.0003162383176900229, "loss": 1.8071, "step": 9206 }, { "epoch": 0.449560546875, "grad_norm": 0.32065683603286743, "learning_rate": 0.00031620388287369324, "loss": 1.7948, "step": 9207 }, { "epoch": 0.449609375, "grad_norm": 0.271322101354599, "learning_rate": 0.00031616944705875426, "loss": 1.8242, "step": 9208 }, { "epoch": 0.449658203125, "grad_norm": 0.26310136914253235, "learning_rate": 0.0003161350102460407, "loss": 1.8126, "step": 9209 }, { "epoch": 0.44970703125, "grad_norm": 0.2545687258243561, "learning_rate": 0.00031610057243638723, "loss": 1.7901, "step": 9210 }, { "epoch": 0.449755859375, "grad_norm": 0.2625712454319, "learning_rate": 0.0003160661336306283, "loss": 1.8117, "step": 9211 }, { "epoch": 0.4498046875, "grad_norm": 0.2760826349258423, "learning_rate": 0.00031603169382959863, "loss": 1.8082, "step": 9212 }, { "epoch": 0.449853515625, "grad_norm": 0.29139310121536255, "learning_rate": 0.0003159972530341329, "loss": 1.7984, "step": 9213 }, { "epoch": 0.44990234375, "grad_norm": 0.23953352868556976, "learning_rate": 0.00031596281124506584, "loss": 1.791, "step": 9214 }, { "epoch": 0.449951171875, "grad_norm": 0.2175801396369934, "learning_rate": 0.00031592836846323214, "loss": 1.8286, "step": 9215 }, { "epoch": 0.45, "grad_norm": 0.26674684882164, "learning_rate": 0.0003158939246894665, "loss": 1.8029, "step": 9216 }, { "epoch": 0.450048828125, "grad_norm": 0.24701689183712006, "learning_rate": 0.00031585947992460373, "loss": 1.8269, "step": 9217 }, { "epoch": 0.45009765625, "grad_norm": 0.27073973417282104, "learning_rate": 0.00031582503416947865, "loss": 1.8126, "step": 9218 }, { "epoch": 0.450146484375, "grad_norm": 0.28755760192871094, "learning_rate": 0.0003157905874249261, "loss": 1.8105, "step": 9219 }, { "epoch": 0.4501953125, "grad_norm": 0.25802522897720337, "learning_rate": 0.00031575613969178087, "loss": 1.7975, "step": 9220 }, { "epoch": 0.450244140625, "grad_norm": 0.28736957907676697, "learning_rate": 0.0003157216909708778, "loss": 1.7975, "step": 9221 }, { "epoch": 0.45029296875, "grad_norm": 0.3103814125061035, "learning_rate": 0.00031568724126305195, "loss": 1.8231, "step": 9222 }, { "epoch": 0.450341796875, "grad_norm": 0.2656412124633789, "learning_rate": 0.000315652790569138, "loss": 1.8058, "step": 9223 }, { "epoch": 0.450390625, "grad_norm": 0.295291543006897, "learning_rate": 0.00031561833888997114, "loss": 1.7867, "step": 9224 }, { "epoch": 0.450439453125, "grad_norm": 0.3359649181365967, "learning_rate": 0.0003155838862263862, "loss": 1.7992, "step": 9225 }, { "epoch": 0.45048828125, "grad_norm": 0.33258306980133057, "learning_rate": 0.00031554943257921804, "loss": 1.7955, "step": 9226 }, { "epoch": 0.450537109375, "grad_norm": 0.3827780783176422, "learning_rate": 0.0003155149779493019, "loss": 1.8088, "step": 9227 }, { "epoch": 0.4505859375, "grad_norm": 0.21153396368026733, "learning_rate": 0.00031548052233747274, "loss": 1.8232, "step": 9228 }, { "epoch": 0.450634765625, "grad_norm": 0.329586386680603, "learning_rate": 0.0003154460657445655, "loss": 1.7776, "step": 9229 }, { "epoch": 0.45068359375, "grad_norm": 0.2603997588157654, "learning_rate": 0.0003154116081714154, "loss": 1.7921, "step": 9230 }, { "epoch": 0.450732421875, "grad_norm": 0.29212668538093567, "learning_rate": 0.00031537714961885755, "loss": 1.7995, "step": 9231 }, { "epoch": 0.45078125, "grad_norm": 0.21538510918617249, "learning_rate": 0.000315342690087727, "loss": 1.8131, "step": 9232 }, { "epoch": 0.450830078125, "grad_norm": 0.2915816605091095, "learning_rate": 0.000315308229578859, "loss": 1.8031, "step": 9233 }, { "epoch": 0.45087890625, "grad_norm": 0.2905765473842621, "learning_rate": 0.0003152737680930886, "loss": 1.8009, "step": 9234 }, { "epoch": 0.450927734375, "grad_norm": 0.27468734979629517, "learning_rate": 0.00031523930563125114, "loss": 1.7869, "step": 9235 }, { "epoch": 0.4509765625, "grad_norm": 0.3245835304260254, "learning_rate": 0.0003152048421941817, "loss": 1.7978, "step": 9236 }, { "epoch": 0.451025390625, "grad_norm": 0.3590414226055145, "learning_rate": 0.0003151703777827157, "loss": 1.8035, "step": 9237 }, { "epoch": 0.45107421875, "grad_norm": 0.2807811200618744, "learning_rate": 0.00031513591239768824, "loss": 1.8094, "step": 9238 }, { "epoch": 0.451123046875, "grad_norm": 0.3025124669075012, "learning_rate": 0.00031510144603993473, "loss": 1.8331, "step": 9239 }, { "epoch": 0.451171875, "grad_norm": 0.2960816025733948, "learning_rate": 0.00031506697871029046, "loss": 1.814, "step": 9240 }, { "epoch": 0.451220703125, "grad_norm": 0.24562789499759674, "learning_rate": 0.0003150325104095908, "loss": 1.8145, "step": 9241 }, { "epoch": 0.45126953125, "grad_norm": 0.2614452838897705, "learning_rate": 0.00031499804113867106, "loss": 1.8042, "step": 9242 }, { "epoch": 0.451318359375, "grad_norm": 0.2676730751991272, "learning_rate": 0.0003149635708983667, "loss": 1.8046, "step": 9243 }, { "epoch": 0.4513671875, "grad_norm": 0.21160787343978882, "learning_rate": 0.0003149290996895131, "loss": 1.7983, "step": 9244 }, { "epoch": 0.451416015625, "grad_norm": 0.2569763660430908, "learning_rate": 0.00031489462751294555, "loss": 1.8081, "step": 9245 }, { "epoch": 0.45146484375, "grad_norm": 0.242111474275589, "learning_rate": 0.0003148601543694998, "loss": 1.7774, "step": 9246 }, { "epoch": 0.451513671875, "grad_norm": 0.23622676730155945, "learning_rate": 0.00031482568026001105, "loss": 1.8244, "step": 9247 }, { "epoch": 0.4515625, "grad_norm": 0.22969810664653778, "learning_rate": 0.00031479120518531506, "loss": 1.8189, "step": 9248 }, { "epoch": 0.451611328125, "grad_norm": 0.24798771739006042, "learning_rate": 0.0003147567291462473, "loss": 1.786, "step": 9249 }, { "epoch": 0.45166015625, "grad_norm": 0.2330668419599533, "learning_rate": 0.0003147222521436431, "loss": 1.8018, "step": 9250 }, { "epoch": 0.451708984375, "grad_norm": 0.2452295422554016, "learning_rate": 0.0003146877741783383, "loss": 1.8015, "step": 9251 }, { "epoch": 0.4517578125, "grad_norm": 0.2358589470386505, "learning_rate": 0.00031465329525116843, "loss": 1.8067, "step": 9252 }, { "epoch": 0.451806640625, "grad_norm": 0.2490999400615692, "learning_rate": 0.00031461881536296905, "loss": 1.7995, "step": 9253 }, { "epoch": 0.45185546875, "grad_norm": 0.22571150958538055, "learning_rate": 0.00031458433451457584, "loss": 1.8106, "step": 9254 }, { "epoch": 0.451904296875, "grad_norm": 0.23690713942050934, "learning_rate": 0.0003145498527068245, "loss": 1.8127, "step": 9255 }, { "epoch": 0.451953125, "grad_norm": 0.27383896708488464, "learning_rate": 0.0003145153699405507, "loss": 1.7982, "step": 9256 }, { "epoch": 0.452001953125, "grad_norm": 0.28843259811401367, "learning_rate": 0.00031448088621659014, "loss": 1.7879, "step": 9257 }, { "epoch": 0.45205078125, "grad_norm": 0.29091140627861023, "learning_rate": 0.00031444640153577866, "loss": 1.7891, "step": 9258 }, { "epoch": 0.452099609375, "grad_norm": 0.24432042241096497, "learning_rate": 0.0003144119158989519, "loss": 1.8209, "step": 9259 }, { "epoch": 0.4521484375, "grad_norm": 0.26408839225769043, "learning_rate": 0.00031437742930694575, "loss": 1.7763, "step": 9260 }, { "epoch": 0.452197265625, "grad_norm": 0.2903217375278473, "learning_rate": 0.00031434294176059584, "loss": 1.8222, "step": 9261 }, { "epoch": 0.45224609375, "grad_norm": 0.30336418747901917, "learning_rate": 0.0003143084532607382, "loss": 1.819, "step": 9262 }, { "epoch": 0.452294921875, "grad_norm": 0.313700407743454, "learning_rate": 0.00031427396380820865, "loss": 1.8063, "step": 9263 }, { "epoch": 0.45234375, "grad_norm": 0.2676842212677002, "learning_rate": 0.00031423947340384294, "loss": 1.8432, "step": 9264 }, { "epoch": 0.452392578125, "grad_norm": 0.3069399893283844, "learning_rate": 0.00031420498204847715, "loss": 1.7929, "step": 9265 }, { "epoch": 0.45244140625, "grad_norm": 0.2776414155960083, "learning_rate": 0.00031417048974294716, "loss": 1.793, "step": 9266 }, { "epoch": 0.452490234375, "grad_norm": 0.25979211926460266, "learning_rate": 0.00031413599648808877, "loss": 1.7878, "step": 9267 }, { "epoch": 0.4525390625, "grad_norm": 0.28992632031440735, "learning_rate": 0.0003141015022847381, "loss": 1.8067, "step": 9268 }, { "epoch": 0.452587890625, "grad_norm": 0.32394924759864807, "learning_rate": 0.0003140670071337312, "loss": 1.8125, "step": 9269 }, { "epoch": 0.45263671875, "grad_norm": 0.27708715200424194, "learning_rate": 0.00031403251103590387, "loss": 1.7864, "step": 9270 }, { "epoch": 0.452685546875, "grad_norm": 0.2794702351093292, "learning_rate": 0.0003139980139920923, "loss": 1.8108, "step": 9271 }, { "epoch": 0.452734375, "grad_norm": 0.28588640689849854, "learning_rate": 0.0003139635160031326, "loss": 1.7826, "step": 9272 }, { "epoch": 0.452783203125, "grad_norm": 0.23698222637176514, "learning_rate": 0.0003139290170698607, "loss": 1.7998, "step": 9273 }, { "epoch": 0.45283203125, "grad_norm": 0.2442900687456131, "learning_rate": 0.0003138945171931128, "loss": 1.8108, "step": 9274 }, { "epoch": 0.452880859375, "grad_norm": 0.24547262489795685, "learning_rate": 0.0003138600163737252, "loss": 1.8054, "step": 9275 }, { "epoch": 0.4529296875, "grad_norm": 0.2698270082473755, "learning_rate": 0.0003138255146125337, "loss": 1.7889, "step": 9276 }, { "epoch": 0.452978515625, "grad_norm": 0.2505856156349182, "learning_rate": 0.0003137910119103747, "loss": 1.7983, "step": 9277 }, { "epoch": 0.45302734375, "grad_norm": 0.2825734615325928, "learning_rate": 0.0003137565082680844, "loss": 1.797, "step": 9278 }, { "epoch": 0.453076171875, "grad_norm": 0.23599229753017426, "learning_rate": 0.00031372200368649897, "loss": 1.8102, "step": 9279 }, { "epoch": 0.453125, "grad_norm": 0.25019821524620056, "learning_rate": 0.00031368749816645464, "loss": 1.8201, "step": 9280 }, { "epoch": 0.453173828125, "grad_norm": 0.3404862880706787, "learning_rate": 0.0003136529917087877, "loss": 1.8028, "step": 9281 }, { "epoch": 0.45322265625, "grad_norm": 0.3212023079395294, "learning_rate": 0.0003136184843143345, "loss": 1.8313, "step": 9282 }, { "epoch": 0.453271484375, "grad_norm": 0.2310871183872223, "learning_rate": 0.00031358397598393135, "loss": 1.8149, "step": 9283 }, { "epoch": 0.4533203125, "grad_norm": 0.3932369351387024, "learning_rate": 0.00031354946671841445, "loss": 1.799, "step": 9284 }, { "epoch": 0.453369140625, "grad_norm": 0.35352635383605957, "learning_rate": 0.00031351495651862026, "loss": 1.8071, "step": 9285 }, { "epoch": 0.45341796875, "grad_norm": 0.325234591960907, "learning_rate": 0.00031348044538538524, "loss": 1.7993, "step": 9286 }, { "epoch": 0.453466796875, "grad_norm": 0.43705764412879944, "learning_rate": 0.00031344593331954565, "loss": 1.8057, "step": 9287 }, { "epoch": 0.453515625, "grad_norm": 0.25744110345840454, "learning_rate": 0.00031341142032193794, "loss": 1.8255, "step": 9288 }, { "epoch": 0.453564453125, "grad_norm": 0.31156083941459656, "learning_rate": 0.0003133769063933987, "loss": 1.8089, "step": 9289 }, { "epoch": 0.45361328125, "grad_norm": 0.33607715368270874, "learning_rate": 0.0003133423915347642, "loss": 1.8192, "step": 9290 }, { "epoch": 0.453662109375, "grad_norm": 0.32241278886795044, "learning_rate": 0.0003133078757468711, "loss": 1.832, "step": 9291 }, { "epoch": 0.4537109375, "grad_norm": 0.26683738827705383, "learning_rate": 0.0003132733590305558, "loss": 1.832, "step": 9292 }, { "epoch": 0.453759765625, "grad_norm": 0.29133257269859314, "learning_rate": 0.000313238841386655, "loss": 1.8119, "step": 9293 }, { "epoch": 0.45380859375, "grad_norm": 0.38162761926651, "learning_rate": 0.0003132043228160051, "loss": 1.7927, "step": 9294 }, { "epoch": 0.453857421875, "grad_norm": 0.27031323313713074, "learning_rate": 0.0003131698033194427, "loss": 1.8231, "step": 9295 }, { "epoch": 0.45390625, "grad_norm": 0.2667831480503082, "learning_rate": 0.00031313528289780444, "loss": 1.8266, "step": 9296 }, { "epoch": 0.453955078125, "grad_norm": 0.3162441849708557, "learning_rate": 0.000313100761551927, "loss": 1.8017, "step": 9297 }, { "epoch": 0.45400390625, "grad_norm": 0.30921971797943115, "learning_rate": 0.00031306623928264706, "loss": 1.8055, "step": 9298 }, { "epoch": 0.454052734375, "grad_norm": 0.2268049120903015, "learning_rate": 0.0003130317160908011, "loss": 1.7893, "step": 9299 }, { "epoch": 0.4541015625, "grad_norm": 0.3008536994457245, "learning_rate": 0.0003129971919772261, "loss": 1.7926, "step": 9300 }, { "epoch": 0.454150390625, "grad_norm": 0.3086618483066559, "learning_rate": 0.0003129626669427585, "loss": 1.7997, "step": 9301 }, { "epoch": 0.45419921875, "grad_norm": 0.31470251083374023, "learning_rate": 0.00031292814098823527, "loss": 1.8162, "step": 9302 }, { "epoch": 0.454248046875, "grad_norm": 0.34299495816230774, "learning_rate": 0.00031289361411449294, "loss": 1.8043, "step": 9303 }, { "epoch": 0.454296875, "grad_norm": 0.2695029377937317, "learning_rate": 0.00031285908632236855, "loss": 1.8261, "step": 9304 }, { "epoch": 0.454345703125, "grad_norm": 0.30479076504707336, "learning_rate": 0.00031282455761269876, "loss": 1.805, "step": 9305 }, { "epoch": 0.45439453125, "grad_norm": 0.27260029315948486, "learning_rate": 0.0003127900279863203, "loss": 1.8009, "step": 9306 }, { "epoch": 0.454443359375, "grad_norm": 0.2552044987678528, "learning_rate": 0.0003127554974440703, "loss": 1.8327, "step": 9307 }, { "epoch": 0.4544921875, "grad_norm": 0.2810738980770111, "learning_rate": 0.00031272096598678546, "loss": 1.7877, "step": 9308 }, { "epoch": 0.454541015625, "grad_norm": 0.2231530249118805, "learning_rate": 0.0003126864336153027, "loss": 1.8111, "step": 9309 }, { "epoch": 0.45458984375, "grad_norm": 0.26941877603530884, "learning_rate": 0.00031265190033045895, "loss": 1.7974, "step": 9310 }, { "epoch": 0.454638671875, "grad_norm": 0.23736825585365295, "learning_rate": 0.0003126173661330911, "loss": 1.827, "step": 9311 }, { "epoch": 0.4546875, "grad_norm": 0.22201940417289734, "learning_rate": 0.00031258283102403615, "loss": 1.8102, "step": 9312 }, { "epoch": 0.454736328125, "grad_norm": 0.25501397252082825, "learning_rate": 0.0003125482950041312, "loss": 1.8249, "step": 9313 }, { "epoch": 0.45478515625, "grad_norm": 0.19904166460037231, "learning_rate": 0.00031251375807421304, "loss": 1.8224, "step": 9314 }, { "epoch": 0.454833984375, "grad_norm": 0.25624412298202515, "learning_rate": 0.0003124792202351189, "loss": 1.8054, "step": 9315 }, { "epoch": 0.4548828125, "grad_norm": 0.2361958622932434, "learning_rate": 0.00031244468148768567, "loss": 1.8228, "step": 9316 }, { "epoch": 0.454931640625, "grad_norm": 0.23985770344734192, "learning_rate": 0.0003124101418327506, "loss": 1.791, "step": 9317 }, { "epoch": 0.45498046875, "grad_norm": 0.28357672691345215, "learning_rate": 0.00031237560127115064, "loss": 1.8033, "step": 9318 }, { "epoch": 0.455029296875, "grad_norm": 0.2298499196767807, "learning_rate": 0.00031234105980372295, "loss": 1.7923, "step": 9319 }, { "epoch": 0.455078125, "grad_norm": 0.26628267765045166, "learning_rate": 0.0003123065174313047, "loss": 1.8039, "step": 9320 }, { "epoch": 0.455126953125, "grad_norm": 0.23656325042247772, "learning_rate": 0.000312271974154733, "loss": 1.7674, "step": 9321 }, { "epoch": 0.45517578125, "grad_norm": 0.26270875334739685, "learning_rate": 0.000312237429974845, "loss": 1.8023, "step": 9322 }, { "epoch": 0.455224609375, "grad_norm": 0.28654584288597107, "learning_rate": 0.00031220288489247806, "loss": 1.7979, "step": 9323 }, { "epoch": 0.4552734375, "grad_norm": 0.24976566433906555, "learning_rate": 0.0003121683389084693, "loss": 1.7823, "step": 9324 }, { "epoch": 0.455322265625, "grad_norm": 0.23074473440647125, "learning_rate": 0.00031213379202365605, "loss": 1.8288, "step": 9325 }, { "epoch": 0.45537109375, "grad_norm": 0.24951519072055817, "learning_rate": 0.0003120992442388754, "loss": 1.8033, "step": 9326 }, { "epoch": 0.455419921875, "grad_norm": 0.31379812955856323, "learning_rate": 0.0003120646955549648, "loss": 1.828, "step": 9327 }, { "epoch": 0.45546875, "grad_norm": 0.21640628576278687, "learning_rate": 0.0003120301459727615, "loss": 1.814, "step": 9328 }, { "epoch": 0.455517578125, "grad_norm": 0.27622276544570923, "learning_rate": 0.00031199559549310293, "loss": 1.8139, "step": 9329 }, { "epoch": 0.45556640625, "grad_norm": 0.2623804807662964, "learning_rate": 0.0003119610441168264, "loss": 1.7621, "step": 9330 }, { "epoch": 0.455615234375, "grad_norm": 0.2317955046892166, "learning_rate": 0.00031192649184476914, "loss": 1.8504, "step": 9331 }, { "epoch": 0.4556640625, "grad_norm": 0.27206718921661377, "learning_rate": 0.0003118919386777687, "loss": 1.8063, "step": 9332 }, { "epoch": 0.455712890625, "grad_norm": 0.21755991876125336, "learning_rate": 0.00031185738461666265, "loss": 1.8007, "step": 9333 }, { "epoch": 0.45576171875, "grad_norm": 0.25242945551872253, "learning_rate": 0.00031182282966228815, "loss": 1.8006, "step": 9334 }, { "epoch": 0.455810546875, "grad_norm": 0.30846938490867615, "learning_rate": 0.0003117882738154828, "loss": 1.8061, "step": 9335 }, { "epoch": 0.455859375, "grad_norm": 0.23163524270057678, "learning_rate": 0.00031175371707708413, "loss": 1.8175, "step": 9336 }, { "epoch": 0.455908203125, "grad_norm": 0.26151347160339355, "learning_rate": 0.0003117191594479295, "loss": 1.8197, "step": 9337 }, { "epoch": 0.45595703125, "grad_norm": 0.27716967463493347, "learning_rate": 0.0003116846009288566, "loss": 1.8254, "step": 9338 }, { "epoch": 0.456005859375, "grad_norm": 0.22856181859970093, "learning_rate": 0.0003116500415207029, "loss": 1.8029, "step": 9339 }, { "epoch": 0.4560546875, "grad_norm": 0.23018161952495575, "learning_rate": 0.000311615481224306, "loss": 1.8238, "step": 9340 }, { "epoch": 0.456103515625, "grad_norm": 0.2278890162706375, "learning_rate": 0.0003115809200405035, "loss": 1.8224, "step": 9341 }, { "epoch": 0.45615234375, "grad_norm": 0.28207018971443176, "learning_rate": 0.0003115463579701331, "loss": 1.8141, "step": 9342 }, { "epoch": 0.456201171875, "grad_norm": 0.20312340557575226, "learning_rate": 0.0003115117950140323, "loss": 1.8151, "step": 9343 }, { "epoch": 0.45625, "grad_norm": 0.2565305233001709, "learning_rate": 0.0003114772311730388, "loss": 1.8316, "step": 9344 }, { "epoch": 0.456298828125, "grad_norm": 0.25455242395401, "learning_rate": 0.0003114426664479903, "loss": 1.796, "step": 9345 }, { "epoch": 0.45634765625, "grad_norm": 0.25628647208213806, "learning_rate": 0.0003114081008397245, "loss": 1.8168, "step": 9346 }, { "epoch": 0.456396484375, "grad_norm": 0.2763369679450989, "learning_rate": 0.0003113735343490791, "loss": 1.8052, "step": 9347 }, { "epoch": 0.4564453125, "grad_norm": 0.23575547337532043, "learning_rate": 0.0003113389669768918, "loss": 1.7812, "step": 9348 }, { "epoch": 0.456494140625, "grad_norm": 0.3006943166255951, "learning_rate": 0.0003113043987240006, "loss": 1.8083, "step": 9349 }, { "epoch": 0.45654296875, "grad_norm": 0.27907899022102356, "learning_rate": 0.0003112698295912431, "loss": 1.7901, "step": 9350 }, { "epoch": 0.456591796875, "grad_norm": 0.25201255083084106, "learning_rate": 0.0003112352595794571, "loss": 1.791, "step": 9351 }, { "epoch": 0.456640625, "grad_norm": 0.31939366459846497, "learning_rate": 0.00031120068868948047, "loss": 1.8099, "step": 9352 }, { "epoch": 0.456689453125, "grad_norm": 0.28286227583885193, "learning_rate": 0.0003111661169221511, "loss": 1.7908, "step": 9353 }, { "epoch": 0.45673828125, "grad_norm": 0.2767813801765442, "learning_rate": 0.0003111315442783068, "loss": 1.8105, "step": 9354 }, { "epoch": 0.456787109375, "grad_norm": 0.3145875334739685, "learning_rate": 0.0003110969707587855, "loss": 1.806, "step": 9355 }, { "epoch": 0.4568359375, "grad_norm": 0.24500061571598053, "learning_rate": 0.00031106239636442514, "loss": 1.7811, "step": 9356 }, { "epoch": 0.456884765625, "grad_norm": 0.2527020573616028, "learning_rate": 0.0003110278210960636, "loss": 1.7929, "step": 9357 }, { "epoch": 0.45693359375, "grad_norm": 0.2755396366119385, "learning_rate": 0.0003109932449545388, "loss": 1.8263, "step": 9358 }, { "epoch": 0.456982421875, "grad_norm": 0.2599535882472992, "learning_rate": 0.00031095866794068894, "loss": 1.7728, "step": 9359 }, { "epoch": 0.45703125, "grad_norm": 0.26744115352630615, "learning_rate": 0.00031092409005535187, "loss": 1.8016, "step": 9360 }, { "epoch": 0.457080078125, "grad_norm": 0.28956836462020874, "learning_rate": 0.0003108895112993655, "loss": 1.8231, "step": 9361 }, { "epoch": 0.45712890625, "grad_norm": 0.3356463611125946, "learning_rate": 0.0003108549316735681, "loss": 1.8047, "step": 9362 }, { "epoch": 0.457177734375, "grad_norm": 0.31625375151634216, "learning_rate": 0.00031082035117879753, "loss": 1.8169, "step": 9363 }, { "epoch": 0.4572265625, "grad_norm": 0.3156173825263977, "learning_rate": 0.000310785769815892, "loss": 1.7955, "step": 9364 }, { "epoch": 0.457275390625, "grad_norm": 0.2819218337535858, "learning_rate": 0.0003107511875856896, "loss": 1.8073, "step": 9365 }, { "epoch": 0.45732421875, "grad_norm": 0.28514745831489563, "learning_rate": 0.00031071660448902844, "loss": 1.8242, "step": 9366 }, { "epoch": 0.457373046875, "grad_norm": 0.3223285973072052, "learning_rate": 0.0003106820205267467, "loss": 1.813, "step": 9367 }, { "epoch": 0.457421875, "grad_norm": 0.2625775933265686, "learning_rate": 0.00031064743569968255, "loss": 1.8235, "step": 9368 }, { "epoch": 0.457470703125, "grad_norm": 0.2732101082801819, "learning_rate": 0.00031061285000867406, "loss": 1.8317, "step": 9369 }, { "epoch": 0.45751953125, "grad_norm": 0.2955951690673828, "learning_rate": 0.00031057826345455965, "loss": 1.8043, "step": 9370 }, { "epoch": 0.457568359375, "grad_norm": 0.2572139501571655, "learning_rate": 0.00031054367603817735, "loss": 1.8104, "step": 9371 }, { "epoch": 0.4576171875, "grad_norm": 0.25908464193344116, "learning_rate": 0.00031050908776036553, "loss": 1.7835, "step": 9372 }, { "epoch": 0.457666015625, "grad_norm": 0.22959889471530914, "learning_rate": 0.0003104744986219625, "loss": 1.8148, "step": 9373 }, { "epoch": 0.45771484375, "grad_norm": 0.30591753125190735, "learning_rate": 0.0003104399086238064, "loss": 1.8068, "step": 9374 }, { "epoch": 0.457763671875, "grad_norm": 0.29574912786483765, "learning_rate": 0.00031040531776673565, "loss": 1.8068, "step": 9375 }, { "epoch": 0.4578125, "grad_norm": 0.2883168160915375, "learning_rate": 0.0003103707260515887, "loss": 1.8235, "step": 9376 }, { "epoch": 0.457861328125, "grad_norm": 0.2502690255641937, "learning_rate": 0.0003103361334792037, "loss": 1.8068, "step": 9377 }, { "epoch": 0.45791015625, "grad_norm": 0.23834830522537231, "learning_rate": 0.0003103015400504191, "loss": 1.7934, "step": 9378 }, { "epoch": 0.457958984375, "grad_norm": 0.24507655203342438, "learning_rate": 0.0003102669457660733, "loss": 1.779, "step": 9379 }, { "epoch": 0.4580078125, "grad_norm": 0.229928657412529, "learning_rate": 0.00031023235062700483, "loss": 1.7839, "step": 9380 }, { "epoch": 0.458056640625, "grad_norm": 0.21499140560626984, "learning_rate": 0.000310197754634052, "loss": 1.8158, "step": 9381 }, { "epoch": 0.45810546875, "grad_norm": 0.2572002112865448, "learning_rate": 0.00031016315778805326, "loss": 1.8013, "step": 9382 }, { "epoch": 0.458154296875, "grad_norm": 0.20766568183898926, "learning_rate": 0.0003101285600898472, "loss": 1.8174, "step": 9383 }, { "epoch": 0.458203125, "grad_norm": 0.26580899953842163, "learning_rate": 0.00031009396154027226, "loss": 1.8131, "step": 9384 }, { "epoch": 0.458251953125, "grad_norm": 0.274951308965683, "learning_rate": 0.00031005936214016693, "loss": 1.8042, "step": 9385 }, { "epoch": 0.45830078125, "grad_norm": 0.22492137551307678, "learning_rate": 0.00031002476189036975, "loss": 1.8143, "step": 9386 }, { "epoch": 0.458349609375, "grad_norm": 0.2078874260187149, "learning_rate": 0.00030999016079171935, "loss": 1.8468, "step": 9387 }, { "epoch": 0.4583984375, "grad_norm": 0.24454711377620697, "learning_rate": 0.00030995555884505443, "loss": 1.8203, "step": 9388 }, { "epoch": 0.458447265625, "grad_norm": 0.27080631256103516, "learning_rate": 0.0003099209560512133, "loss": 1.7717, "step": 9389 }, { "epoch": 0.45849609375, "grad_norm": 0.2750251591205597, "learning_rate": 0.0003098863524110348, "loss": 1.8206, "step": 9390 }, { "epoch": 0.458544921875, "grad_norm": 0.2531554400920868, "learning_rate": 0.0003098517479253575, "loss": 1.8035, "step": 9391 }, { "epoch": 0.45859375, "grad_norm": 0.268710732460022, "learning_rate": 0.0003098171425950202, "loss": 1.792, "step": 9392 }, { "epoch": 0.458642578125, "grad_norm": 0.24786493182182312, "learning_rate": 0.0003097825364208614, "loss": 1.8039, "step": 9393 }, { "epoch": 0.45869140625, "grad_norm": 0.2712526321411133, "learning_rate": 0.0003097479294037198, "loss": 1.8024, "step": 9394 }, { "epoch": 0.458740234375, "grad_norm": 0.24911491572856903, "learning_rate": 0.00030971332154443434, "loss": 1.8186, "step": 9395 }, { "epoch": 0.4587890625, "grad_norm": 0.2662110924720764, "learning_rate": 0.0003096787128438436, "loss": 1.7768, "step": 9396 }, { "epoch": 0.458837890625, "grad_norm": 0.31385523080825806, "learning_rate": 0.0003096441033027863, "loss": 1.8081, "step": 9397 }, { "epoch": 0.45888671875, "grad_norm": 0.2666410803794861, "learning_rate": 0.00030960949292210143, "loss": 1.7944, "step": 9398 }, { "epoch": 0.458935546875, "grad_norm": 0.28978246450424194, "learning_rate": 0.0003095748817026277, "loss": 1.7922, "step": 9399 }, { "epoch": 0.458984375, "grad_norm": 0.29241809248924255, "learning_rate": 0.00030954026964520386, "loss": 1.7977, "step": 9400 }, { "epoch": 0.459033203125, "grad_norm": 0.2921225428581238, "learning_rate": 0.000309505656750669, "loss": 1.8086, "step": 9401 }, { "epoch": 0.45908203125, "grad_norm": 0.3414348363876343, "learning_rate": 0.00030947104301986167, "loss": 1.816, "step": 9402 }, { "epoch": 0.459130859375, "grad_norm": 0.2954314947128296, "learning_rate": 0.00030943642845362095, "loss": 1.8274, "step": 9403 }, { "epoch": 0.4591796875, "grad_norm": 0.2722761034965515, "learning_rate": 0.00030940181305278574, "loss": 1.8176, "step": 9404 }, { "epoch": 0.459228515625, "grad_norm": 0.33768370747566223, "learning_rate": 0.00030936719681819494, "loss": 1.807, "step": 9405 }, { "epoch": 0.45927734375, "grad_norm": 0.3101472854614258, "learning_rate": 0.0003093325797506875, "loss": 1.7919, "step": 9406 }, { "epoch": 0.459326171875, "grad_norm": 0.28007179498672485, "learning_rate": 0.00030929796185110244, "loss": 1.7778, "step": 9407 }, { "epoch": 0.459375, "grad_norm": 0.24246057868003845, "learning_rate": 0.0003092633431202787, "loss": 1.7773, "step": 9408 }, { "epoch": 0.459423828125, "grad_norm": 0.26801639795303345, "learning_rate": 0.0003092287235590553, "loss": 1.8291, "step": 9409 }, { "epoch": 0.45947265625, "grad_norm": 0.25110191106796265, "learning_rate": 0.00030919410316827134, "loss": 1.7818, "step": 9410 }, { "epoch": 0.459521484375, "grad_norm": 0.24412982165813446, "learning_rate": 0.00030915948194876577, "loss": 1.809, "step": 9411 }, { "epoch": 0.4595703125, "grad_norm": 0.2567910850048065, "learning_rate": 0.00030912485990137773, "loss": 1.8222, "step": 9412 }, { "epoch": 0.459619140625, "grad_norm": 0.2398315817117691, "learning_rate": 0.0003090902370269463, "loss": 1.7814, "step": 9413 }, { "epoch": 0.45966796875, "grad_norm": 0.24505744874477386, "learning_rate": 0.00030905561332631056, "loss": 1.797, "step": 9414 }, { "epoch": 0.459716796875, "grad_norm": 0.27587634325027466, "learning_rate": 0.0003090209888003097, "loss": 1.777, "step": 9415 }, { "epoch": 0.459765625, "grad_norm": 0.24462322890758514, "learning_rate": 0.00030898636344978283, "loss": 1.8132, "step": 9416 }, { "epoch": 0.459814453125, "grad_norm": 0.25911346077919006, "learning_rate": 0.00030895173727556906, "loss": 1.8203, "step": 9417 }, { "epoch": 0.45986328125, "grad_norm": 0.27447572350502014, "learning_rate": 0.0003089171102785078, "loss": 1.8018, "step": 9418 }, { "epoch": 0.459912109375, "grad_norm": 0.21091611683368683, "learning_rate": 0.00030888248245943814, "loss": 1.8019, "step": 9419 }, { "epoch": 0.4599609375, "grad_norm": 0.2658613920211792, "learning_rate": 0.00030884785381919916, "loss": 1.7868, "step": 9420 }, { "epoch": 0.460009765625, "grad_norm": 0.20939898490905762, "learning_rate": 0.0003088132243586303, "loss": 1.8319, "step": 9421 }, { "epoch": 0.46005859375, "grad_norm": 0.3147512376308441, "learning_rate": 0.0003087785940785708, "loss": 1.8268, "step": 9422 }, { "epoch": 0.460107421875, "grad_norm": 0.25734150409698486, "learning_rate": 0.0003087439629798599, "loss": 1.7904, "step": 9423 }, { "epoch": 0.46015625, "grad_norm": 0.2611233592033386, "learning_rate": 0.000308709331063337, "loss": 1.8145, "step": 9424 }, { "epoch": 0.460205078125, "grad_norm": 0.2585258483886719, "learning_rate": 0.0003086746983298414, "loss": 1.797, "step": 9425 }, { "epoch": 0.46025390625, "grad_norm": 0.2493029683828354, "learning_rate": 0.00030864006478021234, "loss": 1.8081, "step": 9426 }, { "epoch": 0.460302734375, "grad_norm": 0.2950190603733063, "learning_rate": 0.00030860543041528934, "loss": 1.7925, "step": 9427 }, { "epoch": 0.4603515625, "grad_norm": 0.27952390909194946, "learning_rate": 0.00030857079523591177, "loss": 1.7921, "step": 9428 }, { "epoch": 0.460400390625, "grad_norm": 0.3373166620731354, "learning_rate": 0.00030853615924291897, "loss": 1.8246, "step": 9429 }, { "epoch": 0.46044921875, "grad_norm": 0.2599526345729828, "learning_rate": 0.0003085015224371503, "loss": 1.7869, "step": 9430 }, { "epoch": 0.460498046875, "grad_norm": 0.20816995203495026, "learning_rate": 0.00030846688481944544, "loss": 1.8326, "step": 9431 }, { "epoch": 0.460546875, "grad_norm": 0.2728835940361023, "learning_rate": 0.0003084322463906437, "loss": 1.8114, "step": 9432 }, { "epoch": 0.460595703125, "grad_norm": 0.2705363631248474, "learning_rate": 0.0003083976071515847, "loss": 1.7756, "step": 9433 }, { "epoch": 0.46064453125, "grad_norm": 0.2952271103858948, "learning_rate": 0.00030836296710310773, "loss": 1.8137, "step": 9434 }, { "epoch": 0.460693359375, "grad_norm": 0.28706449270248413, "learning_rate": 0.00030832832624605255, "loss": 1.8008, "step": 9435 }, { "epoch": 0.4607421875, "grad_norm": 0.2438448816537857, "learning_rate": 0.00030829368458125855, "loss": 1.8031, "step": 9436 }, { "epoch": 0.460791015625, "grad_norm": 0.23291808366775513, "learning_rate": 0.0003082590421095654, "loss": 1.772, "step": 9437 }, { "epoch": 0.46083984375, "grad_norm": 0.3038685917854309, "learning_rate": 0.00030822439883181264, "loss": 1.8207, "step": 9438 }, { "epoch": 0.460888671875, "grad_norm": 0.2930144965648651, "learning_rate": 0.00030818975474883976, "loss": 1.8044, "step": 9439 }, { "epoch": 0.4609375, "grad_norm": 0.25112417340278625, "learning_rate": 0.00030815510986148667, "loss": 1.8161, "step": 9440 }, { "epoch": 0.460986328125, "grad_norm": 0.31241798400878906, "learning_rate": 0.00030812046417059275, "loss": 1.7983, "step": 9441 }, { "epoch": 0.46103515625, "grad_norm": 0.20662789046764374, "learning_rate": 0.0003080858176769978, "loss": 1.8186, "step": 9442 }, { "epoch": 0.461083984375, "grad_norm": 0.29778730869293213, "learning_rate": 0.0003080511703815415, "loss": 1.8018, "step": 9443 }, { "epoch": 0.4611328125, "grad_norm": 0.19058680534362793, "learning_rate": 0.0003080165222850635, "loss": 1.7787, "step": 9444 }, { "epoch": 0.461181640625, "grad_norm": 0.32454821467399597, "learning_rate": 0.0003079818733884037, "loss": 1.7978, "step": 9445 }, { "epoch": 0.46123046875, "grad_norm": 0.2683747112751007, "learning_rate": 0.0003079472236924015, "loss": 1.8133, "step": 9446 }, { "epoch": 0.461279296875, "grad_norm": 0.22366133332252502, "learning_rate": 0.000307912573197897, "loss": 1.8133, "step": 9447 }, { "epoch": 0.461328125, "grad_norm": 0.26966211199760437, "learning_rate": 0.0003078779219057298, "loss": 1.8156, "step": 9448 }, { "epoch": 0.461376953125, "grad_norm": 0.30072906613349915, "learning_rate": 0.0003078432698167397, "loss": 1.7819, "step": 9449 }, { "epoch": 0.46142578125, "grad_norm": 0.23857225477695465, "learning_rate": 0.0003078086169317666, "loss": 1.7975, "step": 9450 }, { "epoch": 0.461474609375, "grad_norm": 0.2429293841123581, "learning_rate": 0.00030777396325165033, "loss": 1.8049, "step": 9451 }, { "epoch": 0.4615234375, "grad_norm": 0.2560526430606842, "learning_rate": 0.00030773930877723074, "loss": 1.8152, "step": 9452 }, { "epoch": 0.461572265625, "grad_norm": 0.24928651750087738, "learning_rate": 0.00030770465350934766, "loss": 1.8164, "step": 9453 }, { "epoch": 0.46162109375, "grad_norm": 0.29504579305648804, "learning_rate": 0.000307669997448841, "loss": 1.7921, "step": 9454 }, { "epoch": 0.461669921875, "grad_norm": 0.21907609701156616, "learning_rate": 0.00030763534059655086, "loss": 1.815, "step": 9455 }, { "epoch": 0.46171875, "grad_norm": 0.2995103597640991, "learning_rate": 0.0003076006829533169, "loss": 1.8057, "step": 9456 }, { "epoch": 0.461767578125, "grad_norm": 0.2680104374885559, "learning_rate": 0.0003075660245199792, "loss": 1.822, "step": 9457 }, { "epoch": 0.46181640625, "grad_norm": 0.21501632034778595, "learning_rate": 0.00030753136529737774, "loss": 1.8096, "step": 9458 }, { "epoch": 0.461865234375, "grad_norm": 0.2736198902130127, "learning_rate": 0.0003074967052863525, "loss": 1.8137, "step": 9459 }, { "epoch": 0.4619140625, "grad_norm": 0.24944770336151123, "learning_rate": 0.0003074620444877435, "loss": 1.8211, "step": 9460 }, { "epoch": 0.461962890625, "grad_norm": 0.3223314881324768, "learning_rate": 0.0003074273829023908, "loss": 1.8081, "step": 9461 }, { "epoch": 0.46201171875, "grad_norm": 0.37431013584136963, "learning_rate": 0.00030739272053113435, "loss": 1.8336, "step": 9462 }, { "epoch": 0.462060546875, "grad_norm": 0.3611792325973511, "learning_rate": 0.0003073580573748144, "loss": 1.8375, "step": 9463 }, { "epoch": 0.462109375, "grad_norm": 0.2977839708328247, "learning_rate": 0.0003073233934342709, "loss": 1.7969, "step": 9464 }, { "epoch": 0.462158203125, "grad_norm": 0.26522648334503174, "learning_rate": 0.00030728872871034404, "loss": 1.7896, "step": 9465 }, { "epoch": 0.46220703125, "grad_norm": 0.3072567582130432, "learning_rate": 0.00030725406320387385, "loss": 1.7868, "step": 9466 }, { "epoch": 0.462255859375, "grad_norm": 0.2843022346496582, "learning_rate": 0.00030721939691570046, "loss": 1.8308, "step": 9467 }, { "epoch": 0.4623046875, "grad_norm": 0.34894225001335144, "learning_rate": 0.0003071847298466641, "loss": 1.7622, "step": 9468 }, { "epoch": 0.462353515625, "grad_norm": 0.30918243527412415, "learning_rate": 0.00030715006199760503, "loss": 1.8153, "step": 9469 }, { "epoch": 0.46240234375, "grad_norm": 0.24665242433547974, "learning_rate": 0.0003071153933693634, "loss": 1.8091, "step": 9470 }, { "epoch": 0.462451171875, "grad_norm": 0.3485569357872009, "learning_rate": 0.00030708072396277937, "loss": 1.7987, "step": 9471 }, { "epoch": 0.4625, "grad_norm": 0.2437325268983841, "learning_rate": 0.0003070460537786932, "loss": 1.8187, "step": 9472 }, { "epoch": 0.462548828125, "grad_norm": 0.29409801959991455, "learning_rate": 0.00030701138281794514, "loss": 1.7975, "step": 9473 }, { "epoch": 0.46259765625, "grad_norm": 0.23949246108531952, "learning_rate": 0.0003069767110813756, "loss": 1.8327, "step": 9474 }, { "epoch": 0.462646484375, "grad_norm": 0.28306519985198975, "learning_rate": 0.0003069420385698248, "loss": 1.7867, "step": 9475 }, { "epoch": 0.4626953125, "grad_norm": 0.26365700364112854, "learning_rate": 0.0003069073652841329, "loss": 1.829, "step": 9476 }, { "epoch": 0.462744140625, "grad_norm": 0.25794151425361633, "learning_rate": 0.00030687269122514047, "loss": 1.8336, "step": 9477 }, { "epoch": 0.46279296875, "grad_norm": 0.2721090614795685, "learning_rate": 0.00030683801639368766, "loss": 1.8267, "step": 9478 }, { "epoch": 0.462841796875, "grad_norm": 0.25418663024902344, "learning_rate": 0.000306803340790615, "loss": 1.8241, "step": 9479 }, { "epoch": 0.462890625, "grad_norm": 0.23394042253494263, "learning_rate": 0.00030676866441676283, "loss": 1.8073, "step": 9480 }, { "epoch": 0.462939453125, "grad_norm": 0.2671656310558319, "learning_rate": 0.00030673398727297155, "loss": 1.8131, "step": 9481 }, { "epoch": 0.46298828125, "grad_norm": 0.2580883502960205, "learning_rate": 0.0003066993093600816, "loss": 1.8171, "step": 9482 }, { "epoch": 0.463037109375, "grad_norm": 0.2523982524871826, "learning_rate": 0.0003066646306789334, "loss": 1.8026, "step": 9483 }, { "epoch": 0.4630859375, "grad_norm": 0.22881954908370972, "learning_rate": 0.0003066299512303675, "loss": 1.7975, "step": 9484 }, { "epoch": 0.463134765625, "grad_norm": 0.21094352006912231, "learning_rate": 0.0003065952710152243, "loss": 1.8258, "step": 9485 }, { "epoch": 0.46318359375, "grad_norm": 0.25721293687820435, "learning_rate": 0.00030656059003434435, "loss": 1.8059, "step": 9486 }, { "epoch": 0.463232421875, "grad_norm": 0.25481030344963074, "learning_rate": 0.0003065259082885681, "loss": 1.7995, "step": 9487 }, { "epoch": 0.46328125, "grad_norm": 0.227497398853302, "learning_rate": 0.0003064912257787361, "loss": 1.8234, "step": 9488 }, { "epoch": 0.463330078125, "grad_norm": 0.2646510899066925, "learning_rate": 0.00030645654250568895, "loss": 1.8264, "step": 9489 }, { "epoch": 0.46337890625, "grad_norm": 0.25976666808128357, "learning_rate": 0.00030642185847026723, "loss": 1.8229, "step": 9490 }, { "epoch": 0.463427734375, "grad_norm": 0.26025041937828064, "learning_rate": 0.00030638717367331155, "loss": 1.7893, "step": 9491 }, { "epoch": 0.4634765625, "grad_norm": 0.23516663908958435, "learning_rate": 0.00030635248811566244, "loss": 1.8217, "step": 9492 }, { "epoch": 0.463525390625, "grad_norm": 0.23042742908000946, "learning_rate": 0.00030631780179816065, "loss": 1.8123, "step": 9493 }, { "epoch": 0.46357421875, "grad_norm": 0.2797010540962219, "learning_rate": 0.00030628311472164675, "loss": 1.8099, "step": 9494 }, { "epoch": 0.463623046875, "grad_norm": 0.25777432322502136, "learning_rate": 0.0003062484268869615, "loss": 1.8138, "step": 9495 }, { "epoch": 0.463671875, "grad_norm": 0.24112997949123383, "learning_rate": 0.0003062137382949455, "loss": 1.7927, "step": 9496 }, { "epoch": 0.463720703125, "grad_norm": 0.22479520738124847, "learning_rate": 0.00030617904894643947, "loss": 1.7921, "step": 9497 }, { "epoch": 0.46376953125, "grad_norm": 0.30784347653388977, "learning_rate": 0.000306144358842284, "loss": 1.814, "step": 9498 }, { "epoch": 0.463818359375, "grad_norm": 0.3044992685317993, "learning_rate": 0.0003061096679833201, "loss": 1.7916, "step": 9499 }, { "epoch": 0.4638671875, "grad_norm": 0.2150285840034485, "learning_rate": 0.0003060749763703884, "loss": 1.8152, "step": 9500 }, { "epoch": 0.463916015625, "grad_norm": 0.2826870381832123, "learning_rate": 0.00030604028400432965, "loss": 1.7916, "step": 9501 }, { "epoch": 0.46396484375, "grad_norm": 0.3543710708618164, "learning_rate": 0.00030600559088598476, "loss": 1.8084, "step": 9502 }, { "epoch": 0.464013671875, "grad_norm": 0.326772540807724, "learning_rate": 0.0003059708970161944, "loss": 1.7985, "step": 9503 }, { "epoch": 0.4640625, "grad_norm": 0.2422584593296051, "learning_rate": 0.0003059362023957994, "loss": 1.7941, "step": 9504 }, { "epoch": 0.464111328125, "grad_norm": 0.3628550171852112, "learning_rate": 0.00030590150702564084, "loss": 1.8148, "step": 9505 }, { "epoch": 0.46416015625, "grad_norm": 0.26072701811790466, "learning_rate": 0.0003058668109065593, "loss": 1.7836, "step": 9506 }, { "epoch": 0.464208984375, "grad_norm": 0.277715265750885, "learning_rate": 0.0003058321140393959, "loss": 1.7918, "step": 9507 }, { "epoch": 0.4642578125, "grad_norm": 0.3438257873058319, "learning_rate": 0.00030579741642499135, "loss": 1.8279, "step": 9508 }, { "epoch": 0.464306640625, "grad_norm": 0.3147253692150116, "learning_rate": 0.00030576271806418673, "loss": 1.8015, "step": 9509 }, { "epoch": 0.46435546875, "grad_norm": 0.37030547857284546, "learning_rate": 0.00030572801895782295, "loss": 1.7995, "step": 9510 }, { "epoch": 0.464404296875, "grad_norm": 0.3277863562107086, "learning_rate": 0.00030569331910674087, "loss": 1.8091, "step": 9511 }, { "epoch": 0.464453125, "grad_norm": 0.28290101885795593, "learning_rate": 0.00030565861851178155, "loss": 1.8046, "step": 9512 }, { "epoch": 0.464501953125, "grad_norm": 0.23801816999912262, "learning_rate": 0.000305623917173786, "loss": 1.7991, "step": 9513 }, { "epoch": 0.46455078125, "grad_norm": 0.32087576389312744, "learning_rate": 0.0003055892150935952, "loss": 1.8192, "step": 9514 }, { "epoch": 0.464599609375, "grad_norm": 0.2669762670993805, "learning_rate": 0.0003055545122720503, "loss": 1.8096, "step": 9515 }, { "epoch": 0.4646484375, "grad_norm": 0.29454031586647034, "learning_rate": 0.00030551980870999207, "loss": 1.8074, "step": 9516 }, { "epoch": 0.464697265625, "grad_norm": 0.24662569165229797, "learning_rate": 0.0003054851044082619, "loss": 1.7967, "step": 9517 }, { "epoch": 0.46474609375, "grad_norm": 0.260053426027298, "learning_rate": 0.00030545039936770066, "loss": 1.777, "step": 9518 }, { "epoch": 0.464794921875, "grad_norm": 0.28158044815063477, "learning_rate": 0.00030541569358914956, "loss": 1.8034, "step": 9519 }, { "epoch": 0.46484375, "grad_norm": 0.2470269352197647, "learning_rate": 0.0003053809870734497, "loss": 1.7893, "step": 9520 }, { "epoch": 0.464892578125, "grad_norm": 0.2989785075187683, "learning_rate": 0.0003053462798214422, "loss": 1.7896, "step": 9521 }, { "epoch": 0.46494140625, "grad_norm": 0.23555363714694977, "learning_rate": 0.00030531157183396826, "loss": 1.7938, "step": 9522 }, { "epoch": 0.464990234375, "grad_norm": 0.2750599980354309, "learning_rate": 0.00030527686311186886, "loss": 1.7958, "step": 9523 }, { "epoch": 0.4650390625, "grad_norm": 0.2879371643066406, "learning_rate": 0.0003052421536559855, "loss": 1.8084, "step": 9524 }, { "epoch": 0.465087890625, "grad_norm": 0.20885337889194489, "learning_rate": 0.00030520744346715926, "loss": 1.8171, "step": 9525 }, { "epoch": 0.46513671875, "grad_norm": 0.289072185754776, "learning_rate": 0.0003051727325462313, "loss": 1.8001, "step": 9526 }, { "epoch": 0.465185546875, "grad_norm": 0.26987433433532715, "learning_rate": 0.000305138020894043, "loss": 1.8187, "step": 9527 }, { "epoch": 0.465234375, "grad_norm": 0.20288623869419098, "learning_rate": 0.0003051033085114355, "loss": 1.7896, "step": 9528 }, { "epoch": 0.465283203125, "grad_norm": 0.2574993073940277, "learning_rate": 0.0003050685953992501, "loss": 1.7703, "step": 9529 }, { "epoch": 0.46533203125, "grad_norm": 0.2599439322948456, "learning_rate": 0.0003050338815583282, "loss": 1.8098, "step": 9530 }, { "epoch": 0.465380859375, "grad_norm": 0.32517001032829285, "learning_rate": 0.000304999166989511, "loss": 1.8063, "step": 9531 }, { "epoch": 0.4654296875, "grad_norm": 0.2820371687412262, "learning_rate": 0.00030496445169363997, "loss": 1.8122, "step": 9532 }, { "epoch": 0.465478515625, "grad_norm": 0.22000938653945923, "learning_rate": 0.0003049297356715563, "loss": 1.7994, "step": 9533 }, { "epoch": 0.46552734375, "grad_norm": 0.2691391706466675, "learning_rate": 0.00030489501892410145, "loss": 1.7851, "step": 9534 }, { "epoch": 0.465576171875, "grad_norm": 0.19274692237377167, "learning_rate": 0.0003048603014521168, "loss": 1.7917, "step": 9535 }, { "epoch": 0.465625, "grad_norm": 0.23258128762245178, "learning_rate": 0.0003048255832564438, "loss": 1.7996, "step": 9536 }, { "epoch": 0.465673828125, "grad_norm": 0.2254265695810318, "learning_rate": 0.0003047908643379238, "loss": 1.7826, "step": 9537 }, { "epoch": 0.46572265625, "grad_norm": 0.23618213832378387, "learning_rate": 0.00030475614469739826, "loss": 1.8159, "step": 9538 }, { "epoch": 0.465771484375, "grad_norm": 0.26982253789901733, "learning_rate": 0.00030472142433570865, "loss": 1.8101, "step": 9539 }, { "epoch": 0.4658203125, "grad_norm": 0.22301529347896576, "learning_rate": 0.0003046867032536964, "loss": 1.805, "step": 9540 }, { "epoch": 0.465869140625, "grad_norm": 0.22639116644859314, "learning_rate": 0.0003046519814522031, "loss": 1.7852, "step": 9541 }, { "epoch": 0.46591796875, "grad_norm": 0.269713819026947, "learning_rate": 0.0003046172589320702, "loss": 1.8185, "step": 9542 }, { "epoch": 0.465966796875, "grad_norm": 0.2596305012702942, "learning_rate": 0.00030458253569413924, "loss": 1.8064, "step": 9543 }, { "epoch": 0.466015625, "grad_norm": 0.2592725455760956, "learning_rate": 0.0003045478117392518, "loss": 1.8022, "step": 9544 }, { "epoch": 0.466064453125, "grad_norm": 0.2345273196697235, "learning_rate": 0.0003045130870682494, "loss": 1.8187, "step": 9545 }, { "epoch": 0.46611328125, "grad_norm": 0.2680106461048126, "learning_rate": 0.00030447836168197355, "loss": 1.8082, "step": 9546 }, { "epoch": 0.466162109375, "grad_norm": 0.262224018573761, "learning_rate": 0.000304443635581266, "loss": 1.7908, "step": 9547 }, { "epoch": 0.4662109375, "grad_norm": 0.22368963062763214, "learning_rate": 0.0003044089087669682, "loss": 1.8015, "step": 9548 }, { "epoch": 0.466259765625, "grad_norm": 0.24016419053077698, "learning_rate": 0.0003043741812399219, "loss": 1.7889, "step": 9549 }, { "epoch": 0.46630859375, "grad_norm": 0.2527083158493042, "learning_rate": 0.0003043394530009687, "loss": 1.7925, "step": 9550 }, { "epoch": 0.466357421875, "grad_norm": 0.21412211656570435, "learning_rate": 0.00030430472405095036, "loss": 1.7994, "step": 9551 }, { "epoch": 0.46640625, "grad_norm": 0.2513786852359772, "learning_rate": 0.00030426999439070847, "loss": 1.8015, "step": 9552 }, { "epoch": 0.466455078125, "grad_norm": 0.29323074221611023, "learning_rate": 0.0003042352640210847, "loss": 1.806, "step": 9553 }, { "epoch": 0.46650390625, "grad_norm": 0.27730074524879456, "learning_rate": 0.00030420053294292083, "loss": 1.7926, "step": 9554 }, { "epoch": 0.466552734375, "grad_norm": 0.21395817399024963, "learning_rate": 0.0003041658011570586, "loss": 1.7986, "step": 9555 }, { "epoch": 0.4666015625, "grad_norm": 0.287124902009964, "learning_rate": 0.00030413106866433975, "loss": 1.829, "step": 9556 }, { "epoch": 0.466650390625, "grad_norm": 0.2739701569080353, "learning_rate": 0.00030409633546560604, "loss": 1.7947, "step": 9557 }, { "epoch": 0.46669921875, "grad_norm": 0.2334243506193161, "learning_rate": 0.00030406160156169927, "loss": 1.8004, "step": 9558 }, { "epoch": 0.466748046875, "grad_norm": 0.2687149941921234, "learning_rate": 0.0003040268669534612, "loss": 1.7971, "step": 9559 }, { "epoch": 0.466796875, "grad_norm": 0.2524808347225189, "learning_rate": 0.0003039921316417337, "loss": 1.8105, "step": 9560 }, { "epoch": 0.466845703125, "grad_norm": 0.23219561576843262, "learning_rate": 0.00030395739562735877, "loss": 1.7946, "step": 9561 }, { "epoch": 0.46689453125, "grad_norm": 0.2606005370616913, "learning_rate": 0.0003039226589111779, "loss": 1.8044, "step": 9562 }, { "epoch": 0.466943359375, "grad_norm": 0.32613644003868103, "learning_rate": 0.0003038879214940332, "loss": 1.8168, "step": 9563 }, { "epoch": 0.4669921875, "grad_norm": 0.23882333934307098, "learning_rate": 0.0003038531833767665, "loss": 1.8133, "step": 9564 }, { "epoch": 0.467041015625, "grad_norm": 0.31337007880210876, "learning_rate": 0.0003038184445602197, "loss": 1.7865, "step": 9565 }, { "epoch": 0.46708984375, "grad_norm": 0.3209889829158783, "learning_rate": 0.00030378370504523483, "loss": 1.7937, "step": 9566 }, { "epoch": 0.467138671875, "grad_norm": 0.25428569316864014, "learning_rate": 0.00030374896483265366, "loss": 1.8079, "step": 9567 }, { "epoch": 0.4671875, "grad_norm": 0.31329530477523804, "learning_rate": 0.00030371422392331827, "loss": 1.8079, "step": 9568 }, { "epoch": 0.467236328125, "grad_norm": 0.25695711374282837, "learning_rate": 0.00030367948231807063, "loss": 1.7972, "step": 9569 }, { "epoch": 0.46728515625, "grad_norm": 0.29519397020339966, "learning_rate": 0.0003036447400177526, "loss": 1.7864, "step": 9570 }, { "epoch": 0.467333984375, "grad_norm": 0.30391183495521545, "learning_rate": 0.00030360999702320633, "loss": 1.8289, "step": 9571 }, { "epoch": 0.4673828125, "grad_norm": 0.2636163532733917, "learning_rate": 0.00030357525333527387, "loss": 1.8143, "step": 9572 }, { "epoch": 0.467431640625, "grad_norm": 0.23480357229709625, "learning_rate": 0.0003035405089547971, "loss": 1.8145, "step": 9573 }, { "epoch": 0.46748046875, "grad_norm": 0.26620054244995117, "learning_rate": 0.00030350576388261816, "loss": 1.8007, "step": 9574 }, { "epoch": 0.467529296875, "grad_norm": 0.26702889800071716, "learning_rate": 0.00030347101811957917, "loss": 1.7837, "step": 9575 }, { "epoch": 0.467578125, "grad_norm": 0.22871963679790497, "learning_rate": 0.00030343627166652215, "loss": 1.7962, "step": 9576 }, { "epoch": 0.467626953125, "grad_norm": 0.2355208843946457, "learning_rate": 0.00030340152452428925, "loss": 1.8131, "step": 9577 }, { "epoch": 0.46767578125, "grad_norm": 0.2936619520187378, "learning_rate": 0.00030336677669372264, "loss": 1.8042, "step": 9578 }, { "epoch": 0.467724609375, "grad_norm": 0.23521895706653595, "learning_rate": 0.00030333202817566435, "loss": 1.8116, "step": 9579 }, { "epoch": 0.4677734375, "grad_norm": 0.2655757963657379, "learning_rate": 0.00030329727897095664, "loss": 1.8305, "step": 9580 }, { "epoch": 0.467822265625, "grad_norm": 0.28761014342308044, "learning_rate": 0.0003032625290804416, "loss": 1.8181, "step": 9581 }, { "epoch": 0.46787109375, "grad_norm": 0.2700929045677185, "learning_rate": 0.0003032277785049615, "loss": 1.8097, "step": 9582 }, { "epoch": 0.467919921875, "grad_norm": 0.26193079352378845, "learning_rate": 0.00030319302724535847, "loss": 1.7804, "step": 9583 }, { "epoch": 0.46796875, "grad_norm": 0.2597575783729553, "learning_rate": 0.00030315827530247476, "loss": 1.7871, "step": 9584 }, { "epoch": 0.468017578125, "grad_norm": 0.2824573218822479, "learning_rate": 0.0003031235226771527, "loss": 1.8041, "step": 9585 }, { "epoch": 0.46806640625, "grad_norm": 0.3136156499385834, "learning_rate": 0.00030308876937023443, "loss": 1.7873, "step": 9586 }, { "epoch": 0.468115234375, "grad_norm": 0.2622126638889313, "learning_rate": 0.0003030540153825623, "loss": 1.7937, "step": 9587 }, { "epoch": 0.4681640625, "grad_norm": 0.31508660316467285, "learning_rate": 0.0003030192607149785, "loss": 1.7717, "step": 9588 }, { "epoch": 0.468212890625, "grad_norm": 0.3078702688217163, "learning_rate": 0.0003029845053683254, "loss": 1.7854, "step": 9589 }, { "epoch": 0.46826171875, "grad_norm": 0.23704740405082703, "learning_rate": 0.00030294974934344537, "loss": 1.8056, "step": 9590 }, { "epoch": 0.468310546875, "grad_norm": 0.3818141222000122, "learning_rate": 0.0003029149926411807, "loss": 1.8178, "step": 9591 }, { "epoch": 0.468359375, "grad_norm": 0.3134456276893616, "learning_rate": 0.0003028802352623738, "loss": 1.8037, "step": 9592 }, { "epoch": 0.468408203125, "grad_norm": 0.3426326811313629, "learning_rate": 0.0003028454772078669, "loss": 1.7885, "step": 9593 }, { "epoch": 0.46845703125, "grad_norm": 0.2915235161781311, "learning_rate": 0.00030281071847850253, "loss": 1.7767, "step": 9594 }, { "epoch": 0.468505859375, "grad_norm": 0.37980028986930847, "learning_rate": 0.00030277595907512317, "loss": 1.8034, "step": 9595 }, { "epoch": 0.4685546875, "grad_norm": 0.34157201647758484, "learning_rate": 0.0003027411989985711, "loss": 1.7869, "step": 9596 }, { "epoch": 0.468603515625, "grad_norm": 0.2854444086551666, "learning_rate": 0.0003027064382496886, "loss": 1.8006, "step": 9597 }, { "epoch": 0.46865234375, "grad_norm": 0.30594170093536377, "learning_rate": 0.0003026716768293185, "loss": 1.8189, "step": 9598 }, { "epoch": 0.468701171875, "grad_norm": 0.2773781418800354, "learning_rate": 0.000302636914738303, "loss": 1.8109, "step": 9599 }, { "epoch": 0.46875, "grad_norm": 0.2372666746377945, "learning_rate": 0.0003026021519774847, "loss": 1.7841, "step": 9600 }, { "epoch": 0.468798828125, "grad_norm": 0.2849695086479187, "learning_rate": 0.000302567388547706, "loss": 1.811, "step": 9601 }, { "epoch": 0.46884765625, "grad_norm": 0.30267220735549927, "learning_rate": 0.00030253262444980957, "loss": 1.804, "step": 9602 }, { "epoch": 0.468896484375, "grad_norm": 0.2232198566198349, "learning_rate": 0.00030249785968463786, "loss": 1.7948, "step": 9603 }, { "epoch": 0.4689453125, "grad_norm": 0.2765170931816101, "learning_rate": 0.00030246309425303343, "loss": 1.8004, "step": 9604 }, { "epoch": 0.468994140625, "grad_norm": 0.314291387796402, "learning_rate": 0.0003024283281558388, "loss": 1.8073, "step": 9605 }, { "epoch": 0.46904296875, "grad_norm": 0.3128766119480133, "learning_rate": 0.0003023935613938966, "loss": 1.7997, "step": 9606 }, { "epoch": 0.469091796875, "grad_norm": 0.2675880491733551, "learning_rate": 0.0003023587939680495, "loss": 1.8077, "step": 9607 }, { "epoch": 0.469140625, "grad_norm": 0.2336798906326294, "learning_rate": 0.00030232402587914, "loss": 1.7978, "step": 9608 }, { "epoch": 0.469189453125, "grad_norm": 0.22649326920509338, "learning_rate": 0.0003022892571280108, "loss": 1.8009, "step": 9609 }, { "epoch": 0.46923828125, "grad_norm": 0.2613120675086975, "learning_rate": 0.00030225448771550444, "loss": 1.7943, "step": 9610 }, { "epoch": 0.469287109375, "grad_norm": 0.2435918003320694, "learning_rate": 0.00030221971764246385, "loss": 1.8067, "step": 9611 }, { "epoch": 0.4693359375, "grad_norm": 0.22359387576580048, "learning_rate": 0.0003021849469097315, "loss": 1.8193, "step": 9612 }, { "epoch": 0.469384765625, "grad_norm": 0.22421857714653015, "learning_rate": 0.00030215017551815, "loss": 1.813, "step": 9613 }, { "epoch": 0.46943359375, "grad_norm": 0.20642760396003723, "learning_rate": 0.00030211540346856226, "loss": 1.819, "step": 9614 }, { "epoch": 0.469482421875, "grad_norm": 0.24696017801761627, "learning_rate": 0.0003020806307618109, "loss": 1.8156, "step": 9615 }, { "epoch": 0.46953125, "grad_norm": 0.2691773474216461, "learning_rate": 0.0003020458573987387, "loss": 1.7969, "step": 9616 }, { "epoch": 0.469580078125, "grad_norm": 0.30625665187835693, "learning_rate": 0.0003020110833801884, "loss": 1.827, "step": 9617 }, { "epoch": 0.46962890625, "grad_norm": 0.29164835810661316, "learning_rate": 0.0003019763087070028, "loss": 1.8054, "step": 9618 }, { "epoch": 0.469677734375, "grad_norm": 0.2587231397628784, "learning_rate": 0.00030194153338002473, "loss": 1.7985, "step": 9619 }, { "epoch": 0.4697265625, "grad_norm": 0.2823719382286072, "learning_rate": 0.00030190675740009696, "loss": 1.841, "step": 9620 }, { "epoch": 0.469775390625, "grad_norm": 0.2611360251903534, "learning_rate": 0.00030187198076806225, "loss": 1.8143, "step": 9621 }, { "epoch": 0.46982421875, "grad_norm": 0.28585949540138245, "learning_rate": 0.0003018372034847636, "loss": 1.7826, "step": 9622 }, { "epoch": 0.469873046875, "grad_norm": 0.2391578108072281, "learning_rate": 0.0003018024255510437, "loss": 1.7967, "step": 9623 }, { "epoch": 0.469921875, "grad_norm": 0.2714047431945801, "learning_rate": 0.0003017676469677454, "loss": 1.7901, "step": 9624 }, { "epoch": 0.469970703125, "grad_norm": 0.29840362071990967, "learning_rate": 0.00030173286773571177, "loss": 1.8174, "step": 9625 }, { "epoch": 0.47001953125, "grad_norm": 0.2572774291038513, "learning_rate": 0.0003016980878557856, "loss": 1.7919, "step": 9626 }, { "epoch": 0.470068359375, "grad_norm": 0.2665165662765503, "learning_rate": 0.00030166330732880984, "loss": 1.7957, "step": 9627 }, { "epoch": 0.4701171875, "grad_norm": 0.2628830671310425, "learning_rate": 0.0003016285261556274, "loss": 1.8043, "step": 9628 }, { "epoch": 0.470166015625, "grad_norm": 0.28619712591171265, "learning_rate": 0.0003015937443370812, "loss": 1.8116, "step": 9629 }, { "epoch": 0.47021484375, "grad_norm": 0.2290203720331192, "learning_rate": 0.00030155896187401425, "loss": 1.7537, "step": 9630 }, { "epoch": 0.470263671875, "grad_norm": 0.2626398801803589, "learning_rate": 0.0003015241787672695, "loss": 1.8105, "step": 9631 }, { "epoch": 0.4703125, "grad_norm": 0.2478499859571457, "learning_rate": 0.00030148939501768994, "loss": 1.802, "step": 9632 }, { "epoch": 0.470361328125, "grad_norm": 0.2551132142543793, "learning_rate": 0.0003014546106261187, "loss": 1.8081, "step": 9633 }, { "epoch": 0.47041015625, "grad_norm": 0.23612958192825317, "learning_rate": 0.0003014198255933987, "loss": 1.8308, "step": 9634 }, { "epoch": 0.470458984375, "grad_norm": 0.27698299288749695, "learning_rate": 0.00030138503992037304, "loss": 1.8035, "step": 9635 }, { "epoch": 0.4705078125, "grad_norm": 0.28268012404441833, "learning_rate": 0.0003013502536078846, "loss": 1.7858, "step": 9636 }, { "epoch": 0.470556640625, "grad_norm": 0.26529407501220703, "learning_rate": 0.0003013154666567768, "loss": 1.8006, "step": 9637 }, { "epoch": 0.47060546875, "grad_norm": 0.25712230801582336, "learning_rate": 0.0003012806790678923, "loss": 1.7938, "step": 9638 }, { "epoch": 0.470654296875, "grad_norm": 0.2822849750518799, "learning_rate": 0.0003012458908420746, "loss": 1.7928, "step": 9639 }, { "epoch": 0.470703125, "grad_norm": 0.27755826711654663, "learning_rate": 0.00030121110198016657, "loss": 1.8074, "step": 9640 }, { "epoch": 0.470751953125, "grad_norm": 0.2691858410835266, "learning_rate": 0.0003011763124830114, "loss": 1.801, "step": 9641 }, { "epoch": 0.47080078125, "grad_norm": 0.26378363370895386, "learning_rate": 0.0003011415223514523, "loss": 1.8152, "step": 9642 }, { "epoch": 0.470849609375, "grad_norm": 0.29216596484184265, "learning_rate": 0.00030110673158633246, "loss": 1.8082, "step": 9643 }, { "epoch": 0.4708984375, "grad_norm": 0.23511649668216705, "learning_rate": 0.00030107194018849485, "loss": 1.8074, "step": 9644 }, { "epoch": 0.470947265625, "grad_norm": 0.29884007573127747, "learning_rate": 0.000301037148158783, "loss": 1.7908, "step": 9645 }, { "epoch": 0.47099609375, "grad_norm": 0.3512073755264282, "learning_rate": 0.0003010023554980399, "loss": 1.7898, "step": 9646 }, { "epoch": 0.471044921875, "grad_norm": 0.29526710510253906, "learning_rate": 0.00030096756220710874, "loss": 1.8036, "step": 9647 }, { "epoch": 0.47109375, "grad_norm": 0.30746379494667053, "learning_rate": 0.00030093276828683293, "loss": 1.8191, "step": 9648 }, { "epoch": 0.471142578125, "grad_norm": 0.3431416153907776, "learning_rate": 0.00030089797373805563, "loss": 1.8231, "step": 9649 }, { "epoch": 0.47119140625, "grad_norm": 0.23848463594913483, "learning_rate": 0.0003008631785616201, "loss": 1.8163, "step": 9650 }, { "epoch": 0.471240234375, "grad_norm": 0.28621795773506165, "learning_rate": 0.0003008283827583697, "loss": 1.8227, "step": 9651 }, { "epoch": 0.4712890625, "grad_norm": 0.2910573482513428, "learning_rate": 0.00030079358632914766, "loss": 1.8165, "step": 9652 }, { "epoch": 0.471337890625, "grad_norm": 0.25201523303985596, "learning_rate": 0.0003007587892747973, "loss": 1.7948, "step": 9653 }, { "epoch": 0.47138671875, "grad_norm": 0.2697595953941345, "learning_rate": 0.00030072399159616214, "loss": 1.8096, "step": 9654 }, { "epoch": 0.471435546875, "grad_norm": 0.24223758280277252, "learning_rate": 0.0003006891932940853, "loss": 1.8043, "step": 9655 }, { "epoch": 0.471484375, "grad_norm": 0.2370288074016571, "learning_rate": 0.0003006543943694101, "loss": 1.8055, "step": 9656 }, { "epoch": 0.471533203125, "grad_norm": 0.22668907046318054, "learning_rate": 0.00030061959482298017, "loss": 1.7805, "step": 9657 }, { "epoch": 0.47158203125, "grad_norm": 0.23184199631214142, "learning_rate": 0.00030058479465563877, "loss": 1.8115, "step": 9658 }, { "epoch": 0.471630859375, "grad_norm": 0.2674553394317627, "learning_rate": 0.0003005499938682293, "loss": 1.7982, "step": 9659 }, { "epoch": 0.4716796875, "grad_norm": 0.22283610701560974, "learning_rate": 0.00030051519246159517, "loss": 1.8044, "step": 9660 }, { "epoch": 0.471728515625, "grad_norm": 0.26788920164108276, "learning_rate": 0.0003004803904365799, "loss": 1.806, "step": 9661 }, { "epoch": 0.47177734375, "grad_norm": 0.274607390165329, "learning_rate": 0.00030044558779402687, "loss": 1.799, "step": 9662 }, { "epoch": 0.471826171875, "grad_norm": 0.23775677382946014, "learning_rate": 0.0003004107845347796, "loss": 1.7989, "step": 9663 }, { "epoch": 0.471875, "grad_norm": 0.2383987158536911, "learning_rate": 0.00030037598065968147, "loss": 1.7664, "step": 9664 }, { "epoch": 0.471923828125, "grad_norm": 0.25950661301612854, "learning_rate": 0.0003003411761695762, "loss": 1.8132, "step": 9665 }, { "epoch": 0.47197265625, "grad_norm": 0.26328304409980774, "learning_rate": 0.0003003063710653071, "loss": 1.8079, "step": 9666 }, { "epoch": 0.472021484375, "grad_norm": 0.25240811705589294, "learning_rate": 0.00030027156534771765, "loss": 1.7995, "step": 9667 }, { "epoch": 0.4720703125, "grad_norm": 0.2732413411140442, "learning_rate": 0.00030023675901765163, "loss": 1.8211, "step": 9668 }, { "epoch": 0.472119140625, "grad_norm": 0.26378461718559265, "learning_rate": 0.00030020195207595247, "loss": 1.7932, "step": 9669 }, { "epoch": 0.47216796875, "grad_norm": 0.2966855466365814, "learning_rate": 0.00030016714452346384, "loss": 1.8037, "step": 9670 }, { "epoch": 0.472216796875, "grad_norm": 0.34007057547569275, "learning_rate": 0.0003001323363610291, "loss": 1.7879, "step": 9671 }, { "epoch": 0.472265625, "grad_norm": 0.1979457139968872, "learning_rate": 0.00030009752758949204, "loss": 1.8284, "step": 9672 }, { "epoch": 0.472314453125, "grad_norm": 0.3147798180580139, "learning_rate": 0.0003000627182096963, "loss": 1.7805, "step": 9673 }, { "epoch": 0.47236328125, "grad_norm": 0.2992672324180603, "learning_rate": 0.00030002790822248536, "loss": 1.7948, "step": 9674 }, { "epoch": 0.472412109375, "grad_norm": 0.3066997230052948, "learning_rate": 0.000299993097628703, "loss": 1.8323, "step": 9675 }, { "epoch": 0.4724609375, "grad_norm": 0.28652191162109375, "learning_rate": 0.0002999582864291928, "loss": 1.8046, "step": 9676 }, { "epoch": 0.472509765625, "grad_norm": 0.2748797535896301, "learning_rate": 0.0002999234746247985, "loss": 1.813, "step": 9677 }, { "epoch": 0.47255859375, "grad_norm": 0.2765333652496338, "learning_rate": 0.0002998886622163638, "loss": 1.804, "step": 9678 }, { "epoch": 0.472607421875, "grad_norm": 0.32943248748779297, "learning_rate": 0.0002998538492047324, "loss": 1.7793, "step": 9679 }, { "epoch": 0.47265625, "grad_norm": 0.2464950680732727, "learning_rate": 0.0002998190355907479, "loss": 1.8268, "step": 9680 }, { "epoch": 0.472705078125, "grad_norm": 0.289761483669281, "learning_rate": 0.0002997842213752542, "loss": 1.801, "step": 9681 }, { "epoch": 0.47275390625, "grad_norm": 0.26457616686820984, "learning_rate": 0.000299749406559095, "loss": 1.7939, "step": 9682 }, { "epoch": 0.472802734375, "grad_norm": 0.2371671497821808, "learning_rate": 0.000299714591143114, "loss": 1.8029, "step": 9683 }, { "epoch": 0.4728515625, "grad_norm": 0.32926198840141296, "learning_rate": 0.00029967977512815504, "loss": 1.767, "step": 9684 }, { "epoch": 0.472900390625, "grad_norm": 0.2466972917318344, "learning_rate": 0.0002996449585150619, "loss": 1.8144, "step": 9685 }, { "epoch": 0.47294921875, "grad_norm": 0.28979843854904175, "learning_rate": 0.0002996101413046785, "loss": 1.7775, "step": 9686 }, { "epoch": 0.472998046875, "grad_norm": 0.25754404067993164, "learning_rate": 0.00029957532349784836, "loss": 1.8038, "step": 9687 }, { "epoch": 0.473046875, "grad_norm": 0.21268810331821442, "learning_rate": 0.00029954050509541563, "loss": 1.7938, "step": 9688 }, { "epoch": 0.473095703125, "grad_norm": 0.2873605489730835, "learning_rate": 0.00029950568609822406, "loss": 1.786, "step": 9689 }, { "epoch": 0.47314453125, "grad_norm": 0.2085786908864975, "learning_rate": 0.0002994708665071174, "loss": 1.7874, "step": 9690 }, { "epoch": 0.473193359375, "grad_norm": 0.2960359752178192, "learning_rate": 0.0002994360463229397, "loss": 1.8014, "step": 9691 }, { "epoch": 0.4732421875, "grad_norm": 0.2673926055431366, "learning_rate": 0.00029940122554653477, "loss": 1.8188, "step": 9692 }, { "epoch": 0.473291015625, "grad_norm": 0.26409098505973816, "learning_rate": 0.00029936640417874654, "loss": 1.7983, "step": 9693 }, { "epoch": 0.47333984375, "grad_norm": 0.24729442596435547, "learning_rate": 0.0002993315822204189, "loss": 1.7891, "step": 9694 }, { "epoch": 0.473388671875, "grad_norm": 0.21080990135669708, "learning_rate": 0.00029929675967239585, "loss": 1.7792, "step": 9695 }, { "epoch": 0.4734375, "grad_norm": 0.2532891631126404, "learning_rate": 0.0002992619365355214, "loss": 1.7936, "step": 9696 }, { "epoch": 0.473486328125, "grad_norm": 0.24312688410282135, "learning_rate": 0.0002992271128106393, "loss": 1.805, "step": 9697 }, { "epoch": 0.47353515625, "grad_norm": 0.2526322305202484, "learning_rate": 0.00029919228849859366, "loss": 1.7872, "step": 9698 }, { "epoch": 0.473583984375, "grad_norm": 0.2324790060520172, "learning_rate": 0.0002991574636002285, "loss": 1.7939, "step": 9699 }, { "epoch": 0.4736328125, "grad_norm": 0.23190303146839142, "learning_rate": 0.0002991226381163878, "loss": 1.7998, "step": 9700 }, { "epoch": 0.473681640625, "grad_norm": 0.23574163019657135, "learning_rate": 0.0002990878120479156, "loss": 1.81, "step": 9701 }, { "epoch": 0.47373046875, "grad_norm": 0.23186524212360382, "learning_rate": 0.00029905298539565587, "loss": 1.7869, "step": 9702 }, { "epoch": 0.473779296875, "grad_norm": 0.23557725548744202, "learning_rate": 0.00029901815816045274, "loss": 1.7966, "step": 9703 }, { "epoch": 0.473828125, "grad_norm": 0.2350667417049408, "learning_rate": 0.0002989833303431503, "loss": 1.7954, "step": 9704 }, { "epoch": 0.473876953125, "grad_norm": 0.26157525181770325, "learning_rate": 0.00029894850194459255, "loss": 1.7968, "step": 9705 }, { "epoch": 0.47392578125, "grad_norm": 0.28319939970970154, "learning_rate": 0.0002989136729656236, "loss": 1.7998, "step": 9706 }, { "epoch": 0.473974609375, "grad_norm": 0.23311860859394073, "learning_rate": 0.00029887884340708764, "loss": 1.8188, "step": 9707 }, { "epoch": 0.4740234375, "grad_norm": 0.28684672713279724, "learning_rate": 0.00029884401326982864, "loss": 1.7979, "step": 9708 }, { "epoch": 0.474072265625, "grad_norm": 0.23859752714633942, "learning_rate": 0.00029880918255469085, "loss": 1.7959, "step": 9709 }, { "epoch": 0.47412109375, "grad_norm": 0.20705090463161469, "learning_rate": 0.0002987743512625184, "loss": 1.8115, "step": 9710 }, { "epoch": 0.474169921875, "grad_norm": 0.24778462946414948, "learning_rate": 0.00029873951939415547, "loss": 1.7926, "step": 9711 }, { "epoch": 0.47421875, "grad_norm": 0.2690025568008423, "learning_rate": 0.0002987046869504462, "loss": 1.8009, "step": 9712 }, { "epoch": 0.474267578125, "grad_norm": 0.21820388734340668, "learning_rate": 0.0002986698539322347, "loss": 1.7974, "step": 9713 }, { "epoch": 0.47431640625, "grad_norm": 0.28095701336860657, "learning_rate": 0.0002986350203403654, "loss": 1.7908, "step": 9714 }, { "epoch": 0.474365234375, "grad_norm": 0.3386375606060028, "learning_rate": 0.00029860018617568234, "loss": 1.8, "step": 9715 }, { "epoch": 0.4744140625, "grad_norm": 0.3637373447418213, "learning_rate": 0.0002985653514390298, "loss": 1.8224, "step": 9716 }, { "epoch": 0.474462890625, "grad_norm": 0.3265771269798279, "learning_rate": 0.00029853051613125213, "loss": 1.7922, "step": 9717 }, { "epoch": 0.47451171875, "grad_norm": 0.21430040895938873, "learning_rate": 0.0002984956802531935, "loss": 1.7884, "step": 9718 }, { "epoch": 0.474560546875, "grad_norm": 0.3175324499607086, "learning_rate": 0.000298460843805698, "loss": 1.7855, "step": 9719 }, { "epoch": 0.474609375, "grad_norm": 0.3557313084602356, "learning_rate": 0.00029842600678961023, "loss": 1.8029, "step": 9720 }, { "epoch": 0.474658203125, "grad_norm": 0.21800567209720612, "learning_rate": 0.0002983911692057743, "loss": 1.8112, "step": 9721 }, { "epoch": 0.47470703125, "grad_norm": 0.3031827509403229, "learning_rate": 0.0002983563310550347, "loss": 1.7911, "step": 9722 }, { "epoch": 0.474755859375, "grad_norm": 0.2959132194519043, "learning_rate": 0.00029832149233823555, "loss": 1.8097, "step": 9723 }, { "epoch": 0.4748046875, "grad_norm": 0.21690259873867035, "learning_rate": 0.00029828665305622133, "loss": 1.7811, "step": 9724 }, { "epoch": 0.474853515625, "grad_norm": 0.30515116453170776, "learning_rate": 0.0002982518132098364, "loss": 1.8139, "step": 9725 }, { "epoch": 0.47490234375, "grad_norm": 0.2821372449398041, "learning_rate": 0.0002982169727999251, "loss": 1.818, "step": 9726 }, { "epoch": 0.474951171875, "grad_norm": 0.22816352546215057, "learning_rate": 0.0002981821318273318, "loss": 1.8131, "step": 9727 }, { "epoch": 0.475, "grad_norm": 0.27475377917289734, "learning_rate": 0.00029814729029290085, "loss": 1.8035, "step": 9728 }, { "epoch": 0.475048828125, "grad_norm": 0.2409011274576187, "learning_rate": 0.00029811244819747675, "loss": 1.8031, "step": 9729 }, { "epoch": 0.47509765625, "grad_norm": 0.3239923119544983, "learning_rate": 0.00029807760554190395, "loss": 1.8204, "step": 9730 }, { "epoch": 0.475146484375, "grad_norm": 0.26694029569625854, "learning_rate": 0.00029804276232702674, "loss": 1.7982, "step": 9731 }, { "epoch": 0.4751953125, "grad_norm": 0.24472686648368835, "learning_rate": 0.00029800791855368974, "loss": 1.8041, "step": 9732 }, { "epoch": 0.475244140625, "grad_norm": 0.3240121304988861, "learning_rate": 0.0002979730742227373, "loss": 1.7846, "step": 9733 }, { "epoch": 0.47529296875, "grad_norm": 0.32070398330688477, "learning_rate": 0.000297938229335014, "loss": 1.8061, "step": 9734 }, { "epoch": 0.475341796875, "grad_norm": 0.25652483105659485, "learning_rate": 0.0002979033838913644, "loss": 1.8161, "step": 9735 }, { "epoch": 0.475390625, "grad_norm": 0.28734973073005676, "learning_rate": 0.00029786853789263274, "loss": 1.7932, "step": 9736 }, { "epoch": 0.475439453125, "grad_norm": 0.24870046973228455, "learning_rate": 0.00029783369133966375, "loss": 1.8008, "step": 9737 }, { "epoch": 0.47548828125, "grad_norm": 0.27237388491630554, "learning_rate": 0.00029779884423330186, "loss": 1.7978, "step": 9738 }, { "epoch": 0.475537109375, "grad_norm": 0.24941110610961914, "learning_rate": 0.0002977639965743917, "loss": 1.8204, "step": 9739 }, { "epoch": 0.4755859375, "grad_norm": 0.233229398727417, "learning_rate": 0.0002977291483637777, "loss": 1.8013, "step": 9740 }, { "epoch": 0.475634765625, "grad_norm": 0.2733632028102875, "learning_rate": 0.0002976942996023046, "loss": 1.7851, "step": 9741 }, { "epoch": 0.47568359375, "grad_norm": 0.2598375976085663, "learning_rate": 0.000297659450290817, "loss": 1.793, "step": 9742 }, { "epoch": 0.475732421875, "grad_norm": 0.29455116391181946, "learning_rate": 0.0002976246004301593, "loss": 1.7919, "step": 9743 }, { "epoch": 0.47578125, "grad_norm": 0.2943340539932251, "learning_rate": 0.0002975897500211762, "loss": 1.7974, "step": 9744 }, { "epoch": 0.475830078125, "grad_norm": 0.2988465130329132, "learning_rate": 0.0002975548990647125, "loss": 1.8194, "step": 9745 }, { "epoch": 0.47587890625, "grad_norm": 0.24812829494476318, "learning_rate": 0.00029752004756161265, "loss": 1.8227, "step": 9746 }, { "epoch": 0.475927734375, "grad_norm": 0.304236501455307, "learning_rate": 0.00029748519551272136, "loss": 1.8123, "step": 9747 }, { "epoch": 0.4759765625, "grad_norm": 0.25078776478767395, "learning_rate": 0.0002974503429188832, "loss": 1.8123, "step": 9748 }, { "epoch": 0.476025390625, "grad_norm": 0.28791284561157227, "learning_rate": 0.000297415489780943, "loss": 1.7706, "step": 9749 }, { "epoch": 0.47607421875, "grad_norm": 0.2458752542734146, "learning_rate": 0.00029738063609974544, "loss": 1.815, "step": 9750 }, { "epoch": 0.476123046875, "grad_norm": 0.2613685131072998, "learning_rate": 0.00029734578187613515, "loss": 1.8127, "step": 9751 }, { "epoch": 0.476171875, "grad_norm": 0.2895277738571167, "learning_rate": 0.0002973109271109569, "loss": 1.795, "step": 9752 }, { "epoch": 0.476220703125, "grad_norm": 0.27382710576057434, "learning_rate": 0.0002972760718050554, "loss": 1.8108, "step": 9753 }, { "epoch": 0.47626953125, "grad_norm": 0.2377520054578781, "learning_rate": 0.0002972412159592753, "loss": 1.8021, "step": 9754 }, { "epoch": 0.476318359375, "grad_norm": 0.2709229290485382, "learning_rate": 0.0002972063595744617, "loss": 1.7993, "step": 9755 }, { "epoch": 0.4763671875, "grad_norm": 0.2501039505004883, "learning_rate": 0.00029717150265145897, "loss": 1.8109, "step": 9756 }, { "epoch": 0.476416015625, "grad_norm": 0.23515020310878754, "learning_rate": 0.0002971366451911121, "loss": 1.7966, "step": 9757 }, { "epoch": 0.47646484375, "grad_norm": 0.2426547408103943, "learning_rate": 0.0002971017871942658, "loss": 1.8109, "step": 9758 }, { "epoch": 0.476513671875, "grad_norm": 0.25306540727615356, "learning_rate": 0.00029706692866176496, "loss": 1.7976, "step": 9759 }, { "epoch": 0.4765625, "grad_norm": 0.2747340798377991, "learning_rate": 0.0002970320695944544, "loss": 1.8091, "step": 9760 }, { "epoch": 0.476611328125, "grad_norm": 0.2273220270872116, "learning_rate": 0.00029699720999317884, "loss": 1.8247, "step": 9761 }, { "epoch": 0.47666015625, "grad_norm": 0.23923948407173157, "learning_rate": 0.00029696234985878334, "loss": 1.8135, "step": 9762 }, { "epoch": 0.476708984375, "grad_norm": 0.24023975431919098, "learning_rate": 0.00029692748919211265, "loss": 1.8258, "step": 9763 }, { "epoch": 0.4767578125, "grad_norm": 0.22124789655208588, "learning_rate": 0.00029689262799401155, "loss": 1.8013, "step": 9764 }, { "epoch": 0.476806640625, "grad_norm": 0.2672775089740753, "learning_rate": 0.0002968577662653251, "loss": 1.7954, "step": 9765 }, { "epoch": 0.47685546875, "grad_norm": 0.2874123752117157, "learning_rate": 0.0002968229040068982, "loss": 1.7866, "step": 9766 }, { "epoch": 0.476904296875, "grad_norm": 0.31148433685302734, "learning_rate": 0.0002967880412195755, "loss": 1.8066, "step": 9767 }, { "epoch": 0.476953125, "grad_norm": 0.27699166536331177, "learning_rate": 0.0002967531779042022, "loss": 1.7902, "step": 9768 }, { "epoch": 0.477001953125, "grad_norm": 0.25912874937057495, "learning_rate": 0.00029671831406162313, "loss": 1.8184, "step": 9769 }, { "epoch": 0.47705078125, "grad_norm": 0.2692389488220215, "learning_rate": 0.00029668344969268327, "loss": 1.811, "step": 9770 }, { "epoch": 0.477099609375, "grad_norm": 0.2407335340976715, "learning_rate": 0.0002966485847982277, "loss": 1.8066, "step": 9771 }, { "epoch": 0.4771484375, "grad_norm": 0.252442866563797, "learning_rate": 0.0002966137193791012, "loss": 1.7629, "step": 9772 }, { "epoch": 0.477197265625, "grad_norm": 0.30970609188079834, "learning_rate": 0.0002965788534361488, "loss": 1.8114, "step": 9773 }, { "epoch": 0.47724609375, "grad_norm": 0.22588716447353363, "learning_rate": 0.0002965439869702156, "loss": 1.8222, "step": 9774 }, { "epoch": 0.477294921875, "grad_norm": 0.25582006573677063, "learning_rate": 0.0002965091199821466, "loss": 1.8129, "step": 9775 }, { "epoch": 0.47734375, "grad_norm": 0.25359398126602173, "learning_rate": 0.00029647425247278673, "loss": 1.7836, "step": 9776 }, { "epoch": 0.477392578125, "grad_norm": 0.23674267530441284, "learning_rate": 0.00029643938444298115, "loss": 1.7976, "step": 9777 }, { "epoch": 0.47744140625, "grad_norm": 0.24164731800556183, "learning_rate": 0.0002964045158935748, "loss": 1.8004, "step": 9778 }, { "epoch": 0.477490234375, "grad_norm": 0.21679946780204773, "learning_rate": 0.00029636964682541283, "loss": 1.7992, "step": 9779 }, { "epoch": 0.4775390625, "grad_norm": 0.2502896785736084, "learning_rate": 0.0002963347772393404, "loss": 1.7704, "step": 9780 }, { "epoch": 0.477587890625, "grad_norm": 0.22763608396053314, "learning_rate": 0.0002962999071362024, "loss": 1.8014, "step": 9781 }, { "epoch": 0.47763671875, "grad_norm": 0.25141850113868713, "learning_rate": 0.0002962650365168441, "loss": 1.7653, "step": 9782 }, { "epoch": 0.477685546875, "grad_norm": 0.23627465963363647, "learning_rate": 0.0002962301653821105, "loss": 1.8173, "step": 9783 }, { "epoch": 0.477734375, "grad_norm": 0.2422734797000885, "learning_rate": 0.0002961952937328468, "loss": 1.8086, "step": 9784 }, { "epoch": 0.477783203125, "grad_norm": 0.2373104691505432, "learning_rate": 0.00029616042156989814, "loss": 1.8017, "step": 9785 }, { "epoch": 0.47783203125, "grad_norm": 0.2537926137447357, "learning_rate": 0.0002961255488941097, "loss": 1.8227, "step": 9786 }, { "epoch": 0.477880859375, "grad_norm": 0.22447146475315094, "learning_rate": 0.00029609067570632655, "loss": 1.8015, "step": 9787 }, { "epoch": 0.4779296875, "grad_norm": 0.25716057419776917, "learning_rate": 0.000296055802007394, "loss": 1.8042, "step": 9788 }, { "epoch": 0.477978515625, "grad_norm": 0.25863805413246155, "learning_rate": 0.00029602092779815707, "loss": 1.8172, "step": 9789 }, { "epoch": 0.47802734375, "grad_norm": 0.2807866334915161, "learning_rate": 0.00029598605307946116, "loss": 1.8217, "step": 9790 }, { "epoch": 0.478076171875, "grad_norm": 0.27312979102134705, "learning_rate": 0.0002959511778521513, "loss": 1.8217, "step": 9791 }, { "epoch": 0.478125, "grad_norm": 0.24250246584415436, "learning_rate": 0.0002959163021170729, "loss": 1.8131, "step": 9792 }, { "epoch": 0.478173828125, "grad_norm": 0.265669584274292, "learning_rate": 0.00029588142587507106, "loss": 1.798, "step": 9793 }, { "epoch": 0.47822265625, "grad_norm": 0.26833558082580566, "learning_rate": 0.0002958465491269911, "loss": 1.8153, "step": 9794 }, { "epoch": 0.478271484375, "grad_norm": 0.2832411825656891, "learning_rate": 0.0002958116718736783, "loss": 1.8171, "step": 9795 }, { "epoch": 0.4783203125, "grad_norm": 0.27536261081695557, "learning_rate": 0.0002957767941159779, "loss": 1.8168, "step": 9796 }, { "epoch": 0.478369140625, "grad_norm": 0.304451584815979, "learning_rate": 0.00029574191585473523, "loss": 1.8047, "step": 9797 }, { "epoch": 0.47841796875, "grad_norm": 0.24325060844421387, "learning_rate": 0.0002957070370907955, "loss": 1.7706, "step": 9798 }, { "epoch": 0.478466796875, "grad_norm": 0.3234080672264099, "learning_rate": 0.0002956721578250042, "loss": 1.8105, "step": 9799 }, { "epoch": 0.478515625, "grad_norm": 0.31588926911354065, "learning_rate": 0.00029563727805820647, "loss": 1.8016, "step": 9800 }, { "epoch": 0.478564453125, "grad_norm": 0.2797642648220062, "learning_rate": 0.0002956023977912477, "loss": 1.7975, "step": 9801 }, { "epoch": 0.47861328125, "grad_norm": 0.24479223787784576, "learning_rate": 0.00029556751702497323, "loss": 1.7861, "step": 9802 }, { "epoch": 0.478662109375, "grad_norm": 0.2661975026130676, "learning_rate": 0.0002955326357602285, "loss": 1.7946, "step": 9803 }, { "epoch": 0.4787109375, "grad_norm": 0.21623016893863678, "learning_rate": 0.0002954977539978589, "loss": 1.7829, "step": 9804 }, { "epoch": 0.478759765625, "grad_norm": 0.24484694004058838, "learning_rate": 0.0002954628717387098, "loss": 1.7747, "step": 9805 }, { "epoch": 0.47880859375, "grad_norm": 0.26093393564224243, "learning_rate": 0.0002954279889836265, "loss": 1.8164, "step": 9806 }, { "epoch": 0.478857421875, "grad_norm": 0.27157795429229736, "learning_rate": 0.00029539310573345446, "loss": 1.798, "step": 9807 }, { "epoch": 0.47890625, "grad_norm": 0.2356078326702118, "learning_rate": 0.00029535822198903914, "loss": 1.8139, "step": 9808 }, { "epoch": 0.478955078125, "grad_norm": 0.21945816278457642, "learning_rate": 0.0002953233377512259, "loss": 1.7793, "step": 9809 }, { "epoch": 0.47900390625, "grad_norm": 0.2579301595687866, "learning_rate": 0.0002952884530208604, "loss": 1.8097, "step": 9810 }, { "epoch": 0.479052734375, "grad_norm": 0.23159383237361908, "learning_rate": 0.0002952535677987878, "loss": 1.8053, "step": 9811 }, { "epoch": 0.4791015625, "grad_norm": 0.2551743686199188, "learning_rate": 0.0002952186820858537, "loss": 1.7892, "step": 9812 }, { "epoch": 0.479150390625, "grad_norm": 0.25367432832717896, "learning_rate": 0.0002951837958829036, "loss": 1.8006, "step": 9813 }, { "epoch": 0.47919921875, "grad_norm": 0.20646809041500092, "learning_rate": 0.0002951489091907831, "loss": 1.7939, "step": 9814 }, { "epoch": 0.479248046875, "grad_norm": 0.26607152819633484, "learning_rate": 0.00029511402201033753, "loss": 1.7963, "step": 9815 }, { "epoch": 0.479296875, "grad_norm": 0.2500280737876892, "learning_rate": 0.00029507913434241253, "loss": 1.8011, "step": 9816 }, { "epoch": 0.479345703125, "grad_norm": 0.26471593976020813, "learning_rate": 0.00029504424618785347, "loss": 1.7756, "step": 9817 }, { "epoch": 0.47939453125, "grad_norm": 0.2459592968225479, "learning_rate": 0.0002950093575475061, "loss": 1.7957, "step": 9818 }, { "epoch": 0.479443359375, "grad_norm": 0.29905590415000916, "learning_rate": 0.0002949744684222158, "loss": 1.8076, "step": 9819 }, { "epoch": 0.4794921875, "grad_norm": 0.3417394161224365, "learning_rate": 0.0002949395788128282, "loss": 1.8001, "step": 9820 }, { "epoch": 0.479541015625, "grad_norm": 0.29450106620788574, "learning_rate": 0.0002949046887201889, "loss": 1.7757, "step": 9821 }, { "epoch": 0.47958984375, "grad_norm": 0.26096075773239136, "learning_rate": 0.0002948697981451436, "loss": 1.7931, "step": 9822 }, { "epoch": 0.479638671875, "grad_norm": 0.3203551471233368, "learning_rate": 0.00029483490708853764, "loss": 1.8142, "step": 9823 }, { "epoch": 0.4796875, "grad_norm": 0.28446030616760254, "learning_rate": 0.00029480001555121685, "loss": 1.805, "step": 9824 }, { "epoch": 0.479736328125, "grad_norm": 0.2780652642250061, "learning_rate": 0.00029476512353402676, "loss": 1.8259, "step": 9825 }, { "epoch": 0.47978515625, "grad_norm": 0.25641360878944397, "learning_rate": 0.000294730231037813, "loss": 1.817, "step": 9826 }, { "epoch": 0.479833984375, "grad_norm": 0.30751991271972656, "learning_rate": 0.00029469533806342126, "loss": 1.788, "step": 9827 }, { "epoch": 0.4798828125, "grad_norm": 0.2476295679807663, "learning_rate": 0.0002946604446116971, "loss": 1.8023, "step": 9828 }, { "epoch": 0.479931640625, "grad_norm": 0.3234526813030243, "learning_rate": 0.00029462555068348635, "loss": 1.7788, "step": 9829 }, { "epoch": 0.47998046875, "grad_norm": 0.27530163526535034, "learning_rate": 0.00029459065627963465, "loss": 1.8131, "step": 9830 }, { "epoch": 0.480029296875, "grad_norm": 0.24374307692050934, "learning_rate": 0.0002945557614009876, "loss": 1.7606, "step": 9831 }, { "epoch": 0.480078125, "grad_norm": 0.24881723523139954, "learning_rate": 0.000294520866048391, "loss": 1.792, "step": 9832 }, { "epoch": 0.480126953125, "grad_norm": 0.2718912363052368, "learning_rate": 0.0002944859702226905, "loss": 1.784, "step": 9833 }, { "epoch": 0.48017578125, "grad_norm": 0.2940138578414917, "learning_rate": 0.00029445107392473183, "loss": 1.7957, "step": 9834 }, { "epoch": 0.480224609375, "grad_norm": 0.2760500907897949, "learning_rate": 0.00029441617715536073, "loss": 1.8138, "step": 9835 }, { "epoch": 0.4802734375, "grad_norm": 0.3251338303089142, "learning_rate": 0.000294381279915423, "loss": 1.7978, "step": 9836 }, { "epoch": 0.480322265625, "grad_norm": 0.350252240896225, "learning_rate": 0.00029434638220576445, "loss": 1.7993, "step": 9837 }, { "epoch": 0.48037109375, "grad_norm": 0.2368185967206955, "learning_rate": 0.00029431148402723074, "loss": 1.8119, "step": 9838 }, { "epoch": 0.480419921875, "grad_norm": 0.3127939701080322, "learning_rate": 0.00029427658538066777, "loss": 1.7945, "step": 9839 }, { "epoch": 0.48046875, "grad_norm": 0.29845714569091797, "learning_rate": 0.00029424168626692116, "loss": 1.7547, "step": 9840 }, { "epoch": 0.480517578125, "grad_norm": 0.2380504012107849, "learning_rate": 0.0002942067866868368, "loss": 1.786, "step": 9841 }, { "epoch": 0.48056640625, "grad_norm": 0.23891325294971466, "learning_rate": 0.0002941718866412606, "loss": 1.7928, "step": 9842 }, { "epoch": 0.480615234375, "grad_norm": 0.24681609869003296, "learning_rate": 0.0002941369861310383, "loss": 1.8031, "step": 9843 }, { "epoch": 0.4806640625, "grad_norm": 0.24944892525672913, "learning_rate": 0.0002941020851570158, "loss": 1.8012, "step": 9844 }, { "epoch": 0.480712890625, "grad_norm": 0.2590774595737457, "learning_rate": 0.0002940671837200389, "loss": 1.8156, "step": 9845 }, { "epoch": 0.48076171875, "grad_norm": 0.20564937591552734, "learning_rate": 0.00029403228182095346, "loss": 1.7847, "step": 9846 }, { "epoch": 0.480810546875, "grad_norm": 0.2815397381782532, "learning_rate": 0.0002939973794606055, "loss": 1.8177, "step": 9847 }, { "epoch": 0.480859375, "grad_norm": 0.2432486116886139, "learning_rate": 0.00029396247663984065, "loss": 1.7789, "step": 9848 }, { "epoch": 0.480908203125, "grad_norm": 0.21542194485664368, "learning_rate": 0.00029392757335950504, "loss": 1.8121, "step": 9849 }, { "epoch": 0.48095703125, "grad_norm": 0.27068597078323364, "learning_rate": 0.0002938926696204444, "loss": 1.7984, "step": 9850 }, { "epoch": 0.481005859375, "grad_norm": 0.2202564775943756, "learning_rate": 0.00029385776542350476, "loss": 1.7804, "step": 9851 }, { "epoch": 0.4810546875, "grad_norm": 0.2857850193977356, "learning_rate": 0.000293822860769532, "loss": 1.7645, "step": 9852 }, { "epoch": 0.481103515625, "grad_norm": 0.30347999930381775, "learning_rate": 0.0002937879556593721, "loss": 1.8002, "step": 9853 }, { "epoch": 0.48115234375, "grad_norm": 0.2345440536737442, "learning_rate": 0.00029375305009387106, "loss": 1.7847, "step": 9854 }, { "epoch": 0.481201171875, "grad_norm": 0.3067375123500824, "learning_rate": 0.0002937181440738747, "loss": 1.8071, "step": 9855 }, { "epoch": 0.48125, "grad_norm": 0.22153043746948242, "learning_rate": 0.0002936832376002292, "loss": 1.7829, "step": 9856 }, { "epoch": 0.481298828125, "grad_norm": 0.22582659125328064, "learning_rate": 0.00029364833067378033, "loss": 1.8037, "step": 9857 }, { "epoch": 0.48134765625, "grad_norm": 0.19605447351932526, "learning_rate": 0.0002936134232953742, "loss": 1.7927, "step": 9858 }, { "epoch": 0.481396484375, "grad_norm": 0.22776615619659424, "learning_rate": 0.00029357851546585686, "loss": 1.796, "step": 9859 }, { "epoch": 0.4814453125, "grad_norm": 0.24251319468021393, "learning_rate": 0.00029354360718607416, "loss": 1.8127, "step": 9860 }, { "epoch": 0.481494140625, "grad_norm": 0.24630630016326904, "learning_rate": 0.0002935086984568723, "loss": 1.7856, "step": 9861 }, { "epoch": 0.48154296875, "grad_norm": 0.21470770239830017, "learning_rate": 0.00029347378927909725, "loss": 1.7999, "step": 9862 }, { "epoch": 0.481591796875, "grad_norm": 0.2979525327682495, "learning_rate": 0.0002934388796535951, "loss": 1.7981, "step": 9863 }, { "epoch": 0.481640625, "grad_norm": 0.30051347613334656, "learning_rate": 0.00029340396958121195, "loss": 1.808, "step": 9864 }, { "epoch": 0.481689453125, "grad_norm": 0.2909068465232849, "learning_rate": 0.0002933690590627937, "loss": 1.8062, "step": 9865 }, { "epoch": 0.48173828125, "grad_norm": 0.32051005959510803, "learning_rate": 0.0002933341480991866, "loss": 1.7903, "step": 9866 }, { "epoch": 0.481787109375, "grad_norm": 0.26453956961631775, "learning_rate": 0.0002932992366912367, "loss": 1.7952, "step": 9867 }, { "epoch": 0.4818359375, "grad_norm": 0.2671327292919159, "learning_rate": 0.00029326432483979005, "loss": 1.8092, "step": 9868 }, { "epoch": 0.481884765625, "grad_norm": 0.23592694103717804, "learning_rate": 0.00029322941254569286, "loss": 1.7951, "step": 9869 }, { "epoch": 0.48193359375, "grad_norm": 0.2805989384651184, "learning_rate": 0.0002931944998097912, "loss": 1.7627, "step": 9870 }, { "epoch": 0.481982421875, "grad_norm": 0.23793306946754456, "learning_rate": 0.0002931595866329311, "loss": 1.7995, "step": 9871 }, { "epoch": 0.48203125, "grad_norm": 0.23949366807937622, "learning_rate": 0.000293124673015959, "loss": 1.8353, "step": 9872 }, { "epoch": 0.482080078125, "grad_norm": 0.28723010420799255, "learning_rate": 0.0002930897589597209, "loss": 1.7888, "step": 9873 }, { "epoch": 0.48212890625, "grad_norm": 0.2390584796667099, "learning_rate": 0.0002930548444650629, "loss": 1.8275, "step": 9874 }, { "epoch": 0.482177734375, "grad_norm": 0.26030233502388, "learning_rate": 0.0002930199295328312, "loss": 1.8271, "step": 9875 }, { "epoch": 0.4822265625, "grad_norm": 0.2611759305000305, "learning_rate": 0.000292985014163872, "loss": 1.8316, "step": 9876 }, { "epoch": 0.482275390625, "grad_norm": 0.21055983006954193, "learning_rate": 0.00029295009835903163, "loss": 1.8144, "step": 9877 }, { "epoch": 0.48232421875, "grad_norm": 0.277005136013031, "learning_rate": 0.0002929151821191561, "loss": 1.8026, "step": 9878 }, { "epoch": 0.482373046875, "grad_norm": 0.28450697660446167, "learning_rate": 0.00029288026544509177, "loss": 1.7759, "step": 9879 }, { "epoch": 0.482421875, "grad_norm": 0.23411493003368378, "learning_rate": 0.0002928453483376849, "loss": 1.7971, "step": 9880 }, { "epoch": 0.482470703125, "grad_norm": 0.30301839113235474, "learning_rate": 0.00029281043079778174, "loss": 1.8057, "step": 9881 }, { "epoch": 0.48251953125, "grad_norm": 0.2502041757106781, "learning_rate": 0.0002927755128262283, "loss": 1.8149, "step": 9882 }, { "epoch": 0.482568359375, "grad_norm": 0.2289862036705017, "learning_rate": 0.0002927405944238711, "loss": 1.7736, "step": 9883 }, { "epoch": 0.4826171875, "grad_norm": 0.2530671954154968, "learning_rate": 0.0002927056755915564, "loss": 1.7783, "step": 9884 }, { "epoch": 0.482666015625, "grad_norm": 0.29099974036216736, "learning_rate": 0.0002926707563301304, "loss": 1.8053, "step": 9885 }, { "epoch": 0.48271484375, "grad_norm": 0.24387453496456146, "learning_rate": 0.0002926358366404394, "loss": 1.8146, "step": 9886 }, { "epoch": 0.482763671875, "grad_norm": 0.2614687383174896, "learning_rate": 0.0002926009165233298, "loss": 1.8113, "step": 9887 }, { "epoch": 0.4828125, "grad_norm": 0.34961801767349243, "learning_rate": 0.0002925659959796477, "loss": 1.7708, "step": 9888 }, { "epoch": 0.482861328125, "grad_norm": 0.2942320704460144, "learning_rate": 0.00029253107501023973, "loss": 1.7797, "step": 9889 }, { "epoch": 0.48291015625, "grad_norm": 0.24638567864894867, "learning_rate": 0.000292496153615952, "loss": 1.788, "step": 9890 }, { "epoch": 0.482958984375, "grad_norm": 0.2802046835422516, "learning_rate": 0.00029246123179763094, "loss": 1.8092, "step": 9891 }, { "epoch": 0.4830078125, "grad_norm": 0.27251529693603516, "learning_rate": 0.0002924263095561229, "loss": 1.7883, "step": 9892 }, { "epoch": 0.483056640625, "grad_norm": 0.1918981820344925, "learning_rate": 0.0002923913868922742, "loss": 1.7945, "step": 9893 }, { "epoch": 0.48310546875, "grad_norm": 0.2622709572315216, "learning_rate": 0.0002923564638069313, "loss": 1.8053, "step": 9894 }, { "epoch": 0.483154296875, "grad_norm": 0.29552382230758667, "learning_rate": 0.0002923215403009406, "loss": 1.8173, "step": 9895 }, { "epoch": 0.483203125, "grad_norm": 0.23861843347549438, "learning_rate": 0.0002922866163751484, "loss": 1.8103, "step": 9896 }, { "epoch": 0.483251953125, "grad_norm": 0.2624778151512146, "learning_rate": 0.0002922516920304012, "loss": 1.7873, "step": 9897 }, { "epoch": 0.48330078125, "grad_norm": 0.3449486494064331, "learning_rate": 0.0002922167672675454, "loss": 1.8098, "step": 9898 }, { "epoch": 0.483349609375, "grad_norm": 0.2682177424430847, "learning_rate": 0.0002921818420874274, "loss": 1.7835, "step": 9899 }, { "epoch": 0.4833984375, "grad_norm": 0.24945653975009918, "learning_rate": 0.0002921469164908936, "loss": 1.7928, "step": 9900 }, { "epoch": 0.483447265625, "grad_norm": 0.3213101029396057, "learning_rate": 0.0002921119904787906, "loss": 1.7955, "step": 9901 }, { "epoch": 0.48349609375, "grad_norm": 0.27205371856689453, "learning_rate": 0.00029207706405196465, "loss": 1.7696, "step": 9902 }, { "epoch": 0.483544921875, "grad_norm": 0.23565241694450378, "learning_rate": 0.0002920421372112623, "loss": 1.8205, "step": 9903 }, { "epoch": 0.48359375, "grad_norm": 0.24839553236961365, "learning_rate": 0.0002920072099575302, "loss": 1.7942, "step": 9904 }, { "epoch": 0.483642578125, "grad_norm": 0.21612055599689484, "learning_rate": 0.00029197228229161466, "loss": 1.805, "step": 9905 }, { "epoch": 0.48369140625, "grad_norm": 0.23788319528102875, "learning_rate": 0.00029193735421436225, "loss": 1.7751, "step": 9906 }, { "epoch": 0.483740234375, "grad_norm": 0.23422788083553314, "learning_rate": 0.00029190242572661946, "loss": 1.8004, "step": 9907 }, { "epoch": 0.4837890625, "grad_norm": 0.21403497457504272, "learning_rate": 0.0002918674968292327, "loss": 1.7967, "step": 9908 }, { "epoch": 0.483837890625, "grad_norm": 0.22623316943645477, "learning_rate": 0.00029183256752304864, "loss": 1.7891, "step": 9909 }, { "epoch": 0.48388671875, "grad_norm": 0.23820166289806366, "learning_rate": 0.0002917976378089138, "loss": 1.787, "step": 9910 }, { "epoch": 0.483935546875, "grad_norm": 0.2766866683959961, "learning_rate": 0.0002917627076876748, "loss": 1.7967, "step": 9911 }, { "epoch": 0.483984375, "grad_norm": 0.25344356894493103, "learning_rate": 0.000291727777160178, "loss": 1.7934, "step": 9912 }, { "epoch": 0.484033203125, "grad_norm": 0.22622078657150269, "learning_rate": 0.0002916928462272701, "loss": 1.7788, "step": 9913 }, { "epoch": 0.48408203125, "grad_norm": 0.21040907502174377, "learning_rate": 0.00029165791488979767, "loss": 1.7828, "step": 9914 }, { "epoch": 0.484130859375, "grad_norm": 0.24479779601097107, "learning_rate": 0.0002916229831486073, "loss": 1.7827, "step": 9915 }, { "epoch": 0.4841796875, "grad_norm": 0.25724178552627563, "learning_rate": 0.0002915880510045456, "loss": 1.8051, "step": 9916 }, { "epoch": 0.484228515625, "grad_norm": 0.20137961208820343, "learning_rate": 0.0002915531184584591, "loss": 1.803, "step": 9917 }, { "epoch": 0.48427734375, "grad_norm": 0.22686652839183807, "learning_rate": 0.0002915181855111945, "loss": 1.8003, "step": 9918 }, { "epoch": 0.484326171875, "grad_norm": 0.258314847946167, "learning_rate": 0.0002914832521635984, "loss": 1.813, "step": 9919 }, { "epoch": 0.484375, "grad_norm": 0.22786132991313934, "learning_rate": 0.00029144831841651747, "loss": 1.7964, "step": 9920 }, { "epoch": 0.484423828125, "grad_norm": 0.22767286002635956, "learning_rate": 0.0002914133842707983, "loss": 1.7776, "step": 9921 }, { "epoch": 0.48447265625, "grad_norm": 0.21240054070949554, "learning_rate": 0.0002913784497272876, "loss": 1.7978, "step": 9922 }, { "epoch": 0.484521484375, "grad_norm": 0.22696459293365479, "learning_rate": 0.00029134351478683204, "loss": 1.7914, "step": 9923 }, { "epoch": 0.4845703125, "grad_norm": 0.2886315882205963, "learning_rate": 0.00029130857945027823, "loss": 1.8082, "step": 9924 }, { "epoch": 0.484619140625, "grad_norm": 0.3046523630619049, "learning_rate": 0.0002912736437184729, "loss": 1.7959, "step": 9925 }, { "epoch": 0.48466796875, "grad_norm": 0.2218378335237503, "learning_rate": 0.00029123870759226276, "loss": 1.7963, "step": 9926 }, { "epoch": 0.484716796875, "grad_norm": 0.28765323758125305, "learning_rate": 0.0002912037710724944, "loss": 1.7936, "step": 9927 }, { "epoch": 0.484765625, "grad_norm": 0.3090532124042511, "learning_rate": 0.0002911688341600147, "loss": 1.8069, "step": 9928 }, { "epoch": 0.484814453125, "grad_norm": 0.25284260511398315, "learning_rate": 0.00029113389685567025, "loss": 1.7735, "step": 9929 }, { "epoch": 0.48486328125, "grad_norm": 0.2673301100730896, "learning_rate": 0.0002910989591603079, "loss": 1.786, "step": 9930 }, { "epoch": 0.484912109375, "grad_norm": 0.24838627874851227, "learning_rate": 0.0002910640210747744, "loss": 1.7802, "step": 9931 }, { "epoch": 0.4849609375, "grad_norm": 0.2315889596939087, "learning_rate": 0.0002910290825999164, "loss": 1.7951, "step": 9932 }, { "epoch": 0.485009765625, "grad_norm": 0.2759148180484772, "learning_rate": 0.00029099414373658073, "loss": 1.7632, "step": 9933 }, { "epoch": 0.48505859375, "grad_norm": 0.22971443831920624, "learning_rate": 0.0002909592044856141, "loss": 1.7828, "step": 9934 }, { "epoch": 0.485107421875, "grad_norm": 0.32220974564552307, "learning_rate": 0.00029092426484786333, "loss": 1.8233, "step": 9935 }, { "epoch": 0.48515625, "grad_norm": 0.2871551215648651, "learning_rate": 0.00029088932482417517, "loss": 1.7742, "step": 9936 }, { "epoch": 0.485205078125, "grad_norm": 0.30032262206077576, "learning_rate": 0.00029085438441539655, "loss": 1.8239, "step": 9937 }, { "epoch": 0.48525390625, "grad_norm": 0.3061654567718506, "learning_rate": 0.00029081944362237413, "loss": 1.7982, "step": 9938 }, { "epoch": 0.485302734375, "grad_norm": 0.2675061523914337, "learning_rate": 0.00029078450244595473, "loss": 1.8019, "step": 9939 }, { "epoch": 0.4853515625, "grad_norm": 0.3583717346191406, "learning_rate": 0.0002907495608869854, "loss": 1.7989, "step": 9940 }, { "epoch": 0.485400390625, "grad_norm": 0.25924915075302124, "learning_rate": 0.00029071461894631263, "loss": 1.8322, "step": 9941 }, { "epoch": 0.48544921875, "grad_norm": 0.321650892496109, "learning_rate": 0.0002906796766247835, "loss": 1.7885, "step": 9942 }, { "epoch": 0.485498046875, "grad_norm": 0.33204445242881775, "learning_rate": 0.0002906447339232449, "loss": 1.7979, "step": 9943 }, { "epoch": 0.485546875, "grad_norm": 0.3147396445274353, "learning_rate": 0.0002906097908425435, "loss": 1.8297, "step": 9944 }, { "epoch": 0.485595703125, "grad_norm": 0.26746895909309387, "learning_rate": 0.0002905748473835263, "loss": 1.809, "step": 9945 }, { "epoch": 0.48564453125, "grad_norm": 0.38071078062057495, "learning_rate": 0.0002905399035470401, "loss": 1.7998, "step": 9946 }, { "epoch": 0.485693359375, "grad_norm": 0.2821538746356964, "learning_rate": 0.000290504959333932, "loss": 1.8009, "step": 9947 }, { "epoch": 0.4857421875, "grad_norm": 0.31408074498176575, "learning_rate": 0.0002904700147450487, "loss": 1.8011, "step": 9948 }, { "epoch": 0.485791015625, "grad_norm": 0.3649604618549347, "learning_rate": 0.0002904350697812371, "loss": 1.8148, "step": 9949 }, { "epoch": 0.48583984375, "grad_norm": 0.3126862347126007, "learning_rate": 0.00029040012444334426, "loss": 1.8023, "step": 9950 }, { "epoch": 0.485888671875, "grad_norm": 0.29463115334510803, "learning_rate": 0.00029036517873221696, "loss": 1.8151, "step": 9951 }, { "epoch": 0.4859375, "grad_norm": 0.3237072229385376, "learning_rate": 0.00029033023264870227, "loss": 1.7895, "step": 9952 }, { "epoch": 0.485986328125, "grad_norm": 0.2956814467906952, "learning_rate": 0.00029029528619364705, "loss": 1.8119, "step": 9953 }, { "epoch": 0.48603515625, "grad_norm": 0.2747967839241028, "learning_rate": 0.00029026033936789823, "loss": 1.7839, "step": 9954 }, { "epoch": 0.486083984375, "grad_norm": 0.2794943153858185, "learning_rate": 0.00029022539217230295, "loss": 1.8215, "step": 9955 }, { "epoch": 0.4861328125, "grad_norm": 0.3068987727165222, "learning_rate": 0.0002901904446077079, "loss": 1.806, "step": 9956 }, { "epoch": 0.486181640625, "grad_norm": 0.29626360535621643, "learning_rate": 0.0002901554966749604, "loss": 1.7902, "step": 9957 }, { "epoch": 0.48623046875, "grad_norm": 0.27694275975227356, "learning_rate": 0.00029012054837490716, "loss": 1.8169, "step": 9958 }, { "epoch": 0.486279296875, "grad_norm": 0.26009389758110046, "learning_rate": 0.0002900855997083953, "loss": 1.7651, "step": 9959 }, { "epoch": 0.486328125, "grad_norm": 0.2695169746875763, "learning_rate": 0.00029005065067627174, "loss": 1.7826, "step": 9960 }, { "epoch": 0.486376953125, "grad_norm": 0.25251877307891846, "learning_rate": 0.00029001570127938365, "loss": 1.789, "step": 9961 }, { "epoch": 0.48642578125, "grad_norm": 0.27571383118629456, "learning_rate": 0.00028998075151857796, "loss": 1.8045, "step": 9962 }, { "epoch": 0.486474609375, "grad_norm": 0.23728209733963013, "learning_rate": 0.00028994580139470174, "loss": 1.8103, "step": 9963 }, { "epoch": 0.4865234375, "grad_norm": 0.26717934012413025, "learning_rate": 0.000289910850908602, "loss": 1.8121, "step": 9964 }, { "epoch": 0.486572265625, "grad_norm": 0.2703065872192383, "learning_rate": 0.0002898759000611258, "loss": 1.7999, "step": 9965 }, { "epoch": 0.48662109375, "grad_norm": 0.2659280002117157, "learning_rate": 0.00028984094885312024, "loss": 1.7771, "step": 9966 }, { "epoch": 0.486669921875, "grad_norm": 0.2474212348461151, "learning_rate": 0.0002898059972854323, "loss": 1.8109, "step": 9967 }, { "epoch": 0.48671875, "grad_norm": 0.24202977120876312, "learning_rate": 0.0002897710453589092, "loss": 1.7898, "step": 9968 }, { "epoch": 0.486767578125, "grad_norm": 0.25267931818962097, "learning_rate": 0.00028973609307439787, "loss": 1.797, "step": 9969 }, { "epoch": 0.48681640625, "grad_norm": 0.23331832885742188, "learning_rate": 0.0002897011404327455, "loss": 1.7977, "step": 9970 }, { "epoch": 0.486865234375, "grad_norm": 0.2513820230960846, "learning_rate": 0.00028966618743479924, "loss": 1.8077, "step": 9971 }, { "epoch": 0.4869140625, "grad_norm": 0.24154545366764069, "learning_rate": 0.0002896312340814061, "loss": 1.8045, "step": 9972 }, { "epoch": 0.486962890625, "grad_norm": 0.22576040029525757, "learning_rate": 0.0002895962803734132, "loss": 1.7971, "step": 9973 }, { "epoch": 0.48701171875, "grad_norm": 0.23441539704799652, "learning_rate": 0.0002895613263116678, "loss": 1.8123, "step": 9974 }, { "epoch": 0.487060546875, "grad_norm": 0.24996386468410492, "learning_rate": 0.00028952637189701694, "loss": 1.7828, "step": 9975 }, { "epoch": 0.487109375, "grad_norm": 0.2101401686668396, "learning_rate": 0.0002894914171303078, "loss": 1.8119, "step": 9976 }, { "epoch": 0.487158203125, "grad_norm": 0.22605007886886597, "learning_rate": 0.00028945646201238755, "loss": 1.8074, "step": 9977 }, { "epoch": 0.48720703125, "grad_norm": 0.29468539357185364, "learning_rate": 0.0002894215065441032, "loss": 1.7995, "step": 9978 }, { "epoch": 0.487255859375, "grad_norm": 0.3408827483654022, "learning_rate": 0.0002893865507263022, "loss": 1.7605, "step": 9979 }, { "epoch": 0.4873046875, "grad_norm": 0.26497048139572144, "learning_rate": 0.00028935159455983146, "loss": 1.7928, "step": 9980 }, { "epoch": 0.487353515625, "grad_norm": 0.2543618977069855, "learning_rate": 0.00028931663804553835, "loss": 1.7875, "step": 9981 }, { "epoch": 0.48740234375, "grad_norm": 0.31898584961891174, "learning_rate": 0.0002892816811842701, "loss": 1.7956, "step": 9982 }, { "epoch": 0.487451171875, "grad_norm": 0.2512539029121399, "learning_rate": 0.0002892467239768737, "loss": 1.8253, "step": 9983 }, { "epoch": 0.4875, "grad_norm": 0.21783329546451569, "learning_rate": 0.00028921176642419656, "loss": 1.7624, "step": 9984 }, { "epoch": 0.487548828125, "grad_norm": 0.3041636645793915, "learning_rate": 0.0002891768085270858, "loss": 1.7828, "step": 9985 }, { "epoch": 0.48759765625, "grad_norm": 0.2940656840801239, "learning_rate": 0.0002891418502863888, "loss": 1.7683, "step": 9986 }, { "epoch": 0.487646484375, "grad_norm": 0.23692309856414795, "learning_rate": 0.0002891068917029526, "loss": 1.8258, "step": 9987 }, { "epoch": 0.4876953125, "grad_norm": 0.24273362755775452, "learning_rate": 0.0002890719327776246, "loss": 1.7948, "step": 9988 }, { "epoch": 0.487744140625, "grad_norm": 0.2151438295841217, "learning_rate": 0.000289036973511252, "loss": 1.7851, "step": 9989 }, { "epoch": 0.48779296875, "grad_norm": 0.23118847608566284, "learning_rate": 0.00028900201390468194, "loss": 1.8017, "step": 9990 }, { "epoch": 0.487841796875, "grad_norm": 0.23133698105812073, "learning_rate": 0.000288967053958762, "loss": 1.7837, "step": 9991 }, { "epoch": 0.487890625, "grad_norm": 0.21674811840057373, "learning_rate": 0.0002889320936743392, "loss": 1.7938, "step": 9992 }, { "epoch": 0.487939453125, "grad_norm": 0.23355408012866974, "learning_rate": 0.000288897133052261, "loss": 1.7988, "step": 9993 }, { "epoch": 0.48798828125, "grad_norm": 0.2573094666004181, "learning_rate": 0.00028886217209337446, "loss": 1.7892, "step": 9994 }, { "epoch": 0.488037109375, "grad_norm": 0.23846620321273804, "learning_rate": 0.0002888272107985272, "loss": 1.791, "step": 9995 }, { "epoch": 0.4880859375, "grad_norm": 0.22998221218585968, "learning_rate": 0.00028879224916856627, "loss": 1.8197, "step": 9996 }, { "epoch": 0.488134765625, "grad_norm": 0.2732134461402893, "learning_rate": 0.0002887572872043392, "loss": 1.7932, "step": 9997 }, { "epoch": 0.48818359375, "grad_norm": 0.33236539363861084, "learning_rate": 0.0002887223249066931, "loss": 1.7872, "step": 9998 }, { "epoch": 0.488232421875, "grad_norm": 0.2304680347442627, "learning_rate": 0.0002886873622764755, "loss": 1.7969, "step": 9999 }, { "epoch": 0.48828125, "grad_norm": 0.2703477740287781, "learning_rate": 0.00028865239931453367, "loss": 1.7793, "step": 10000 }, { "epoch": 0.488330078125, "grad_norm": 0.38166552782058716, "learning_rate": 0.00028861743602171495, "loss": 1.8042, "step": 10001 }, { "epoch": 0.48837890625, "grad_norm": 0.2777378261089325, "learning_rate": 0.00028858247239886683, "loss": 1.8077, "step": 10002 }, { "epoch": 0.488427734375, "grad_norm": 0.27653759717941284, "learning_rate": 0.0002885475084468364, "loss": 1.8026, "step": 10003 }, { "epoch": 0.4884765625, "grad_norm": 0.35141122341156006, "learning_rate": 0.0002885125441664713, "loss": 1.7851, "step": 10004 }, { "epoch": 0.488525390625, "grad_norm": 0.20807954668998718, "learning_rate": 0.00028847757955861896, "loss": 1.7825, "step": 10005 }, { "epoch": 0.48857421875, "grad_norm": 0.3439771831035614, "learning_rate": 0.0002884426146241265, "loss": 1.8113, "step": 10006 }, { "epoch": 0.488623046875, "grad_norm": 0.3313394784927368, "learning_rate": 0.0002884076493638416, "loss": 1.7781, "step": 10007 }, { "epoch": 0.488671875, "grad_norm": 0.27243006229400635, "learning_rate": 0.00028837268377861143, "loss": 1.8026, "step": 10008 }, { "epoch": 0.488720703125, "grad_norm": 0.3113704025745392, "learning_rate": 0.0002883377178692836, "loss": 1.803, "step": 10009 }, { "epoch": 0.48876953125, "grad_norm": 0.26098158955574036, "learning_rate": 0.0002883027516367054, "loss": 1.7889, "step": 10010 }, { "epoch": 0.488818359375, "grad_norm": 0.3026031255722046, "learning_rate": 0.00028826778508172433, "loss": 1.7723, "step": 10011 }, { "epoch": 0.4888671875, "grad_norm": 0.22948722541332245, "learning_rate": 0.0002882328182051879, "loss": 1.7941, "step": 10012 }, { "epoch": 0.488916015625, "grad_norm": 0.28160372376441956, "learning_rate": 0.0002881978510079434, "loss": 1.792, "step": 10013 }, { "epoch": 0.48896484375, "grad_norm": 0.2892118990421295, "learning_rate": 0.00028816288349083844, "loss": 1.7783, "step": 10014 }, { "epoch": 0.489013671875, "grad_norm": 0.2665007710456848, "learning_rate": 0.0002881279156547204, "loss": 1.7872, "step": 10015 }, { "epoch": 0.4890625, "grad_norm": 0.2616799473762512, "learning_rate": 0.00028809294750043677, "loss": 1.8166, "step": 10016 }, { "epoch": 0.489111328125, "grad_norm": 0.3036845326423645, "learning_rate": 0.00028805797902883514, "loss": 1.7935, "step": 10017 }, { "epoch": 0.48916015625, "grad_norm": 0.25501686334609985, "learning_rate": 0.00028802301024076276, "loss": 1.8051, "step": 10018 }, { "epoch": 0.489208984375, "grad_norm": 0.3019835948944092, "learning_rate": 0.0002879880411370673, "loss": 1.7814, "step": 10019 }, { "epoch": 0.4892578125, "grad_norm": 0.2769480347633362, "learning_rate": 0.00028795307171859626, "loss": 1.7936, "step": 10020 }, { "epoch": 0.489306640625, "grad_norm": 0.28821027278900146, "learning_rate": 0.0002879181019861971, "loss": 1.7965, "step": 10021 }, { "epoch": 0.48935546875, "grad_norm": 0.2829681932926178, "learning_rate": 0.00028788313194071737, "loss": 1.8123, "step": 10022 }, { "epoch": 0.489404296875, "grad_norm": 0.23079855740070343, "learning_rate": 0.00028784816158300457, "loss": 1.7938, "step": 10023 }, { "epoch": 0.489453125, "grad_norm": 0.26432421803474426, "learning_rate": 0.0002878131909139063, "loss": 1.806, "step": 10024 }, { "epoch": 0.489501953125, "grad_norm": 0.27536308765411377, "learning_rate": 0.00028777821993427, "loss": 1.7643, "step": 10025 }, { "epoch": 0.48955078125, "grad_norm": 0.23309916257858276, "learning_rate": 0.00028774324864494326, "loss": 1.8223, "step": 10026 }, { "epoch": 0.489599609375, "grad_norm": 0.2317882478237152, "learning_rate": 0.0002877082770467737, "loss": 1.7876, "step": 10027 }, { "epoch": 0.4896484375, "grad_norm": 0.31666630506515503, "learning_rate": 0.00028767330514060886, "loss": 1.8005, "step": 10028 }, { "epoch": 0.489697265625, "grad_norm": 0.21737174689769745, "learning_rate": 0.0002876383329272962, "loss": 1.7811, "step": 10029 }, { "epoch": 0.48974609375, "grad_norm": 0.25734996795654297, "learning_rate": 0.0002876033604076834, "loss": 1.8086, "step": 10030 }, { "epoch": 0.489794921875, "grad_norm": 0.31033191084861755, "learning_rate": 0.0002875683875826181, "loss": 1.8044, "step": 10031 }, { "epoch": 0.48984375, "grad_norm": 0.18523947894573212, "learning_rate": 0.0002875334144529478, "loss": 1.7987, "step": 10032 }, { "epoch": 0.489892578125, "grad_norm": 0.29154449701309204, "learning_rate": 0.0002874984410195202, "loss": 1.7962, "step": 10033 }, { "epoch": 0.48994140625, "grad_norm": 0.2543443739414215, "learning_rate": 0.00028746346728318275, "loss": 1.8083, "step": 10034 }, { "epoch": 0.489990234375, "grad_norm": 0.23388339579105377, "learning_rate": 0.0002874284932447831, "loss": 1.833, "step": 10035 }, { "epoch": 0.4900390625, "grad_norm": 0.27631935477256775, "learning_rate": 0.0002873935189051691, "loss": 1.8043, "step": 10036 }, { "epoch": 0.490087890625, "grad_norm": 0.25513210892677307, "learning_rate": 0.0002873585442651881, "loss": 1.7802, "step": 10037 }, { "epoch": 0.49013671875, "grad_norm": 0.2622489333152771, "learning_rate": 0.00028732356932568786, "loss": 1.7861, "step": 10038 }, { "epoch": 0.490185546875, "grad_norm": 0.3078816831111908, "learning_rate": 0.000287288594087516, "loss": 1.7758, "step": 10039 }, { "epoch": 0.490234375, "grad_norm": 0.29713112115859985, "learning_rate": 0.0002872536185515203, "loss": 1.7924, "step": 10040 }, { "epoch": 0.490283203125, "grad_norm": 0.2928237020969391, "learning_rate": 0.0002872186427185482, "loss": 1.7867, "step": 10041 }, { "epoch": 0.49033203125, "grad_norm": 0.2862407863140106, "learning_rate": 0.00028718366658944753, "loss": 1.8039, "step": 10042 }, { "epoch": 0.490380859375, "grad_norm": 0.28042253851890564, "learning_rate": 0.0002871486901650658, "loss": 1.7902, "step": 10043 }, { "epoch": 0.4904296875, "grad_norm": 0.30435821413993835, "learning_rate": 0.000287113713446251, "loss": 1.7988, "step": 10044 }, { "epoch": 0.490478515625, "grad_norm": 0.288055956363678, "learning_rate": 0.00028707873643385044, "loss": 1.8221, "step": 10045 }, { "epoch": 0.49052734375, "grad_norm": 0.24251225590705872, "learning_rate": 0.0002870437591287121, "loss": 1.7875, "step": 10046 }, { "epoch": 0.490576171875, "grad_norm": 0.2726694345474243, "learning_rate": 0.0002870087815316836, "loss": 1.7794, "step": 10047 }, { "epoch": 0.490625, "grad_norm": 0.28283295035362244, "learning_rate": 0.00028697380364361253, "loss": 1.8004, "step": 10048 }, { "epoch": 0.490673828125, "grad_norm": 0.2109626829624176, "learning_rate": 0.00028693882546534683, "loss": 1.75, "step": 10049 }, { "epoch": 0.49072265625, "grad_norm": 0.25281038880348206, "learning_rate": 0.000286903846997734, "loss": 1.7933, "step": 10050 }, { "epoch": 0.490771484375, "grad_norm": 0.2713511288166046, "learning_rate": 0.00028686886824162196, "loss": 1.8077, "step": 10051 }, { "epoch": 0.4908203125, "grad_norm": 0.2625810205936432, "learning_rate": 0.0002868338891978583, "loss": 1.7994, "step": 10052 }, { "epoch": 0.490869140625, "grad_norm": 0.26784297823905945, "learning_rate": 0.00028679890986729083, "loss": 1.7747, "step": 10053 }, { "epoch": 0.49091796875, "grad_norm": 0.25347527861595154, "learning_rate": 0.0002867639302507673, "loss": 1.7867, "step": 10054 }, { "epoch": 0.490966796875, "grad_norm": 0.28277409076690674, "learning_rate": 0.00028672895034913547, "loss": 1.8235, "step": 10055 }, { "epoch": 0.491015625, "grad_norm": 0.23969973623752594, "learning_rate": 0.0002866939701632431, "loss": 1.7719, "step": 10056 }, { "epoch": 0.491064453125, "grad_norm": 0.25481316447257996, "learning_rate": 0.000286658989693938, "loss": 1.8344, "step": 10057 }, { "epoch": 0.49111328125, "grad_norm": 0.2815608084201813, "learning_rate": 0.000286624008942068, "loss": 1.82, "step": 10058 }, { "epoch": 0.491162109375, "grad_norm": 0.23505640029907227, "learning_rate": 0.00028658902790848066, "loss": 1.7977, "step": 10059 }, { "epoch": 0.4912109375, "grad_norm": 0.30632948875427246, "learning_rate": 0.00028655404659402395, "loss": 1.7874, "step": 10060 }, { "epoch": 0.491259765625, "grad_norm": 0.2870742082595825, "learning_rate": 0.0002865190649995457, "loss": 1.7884, "step": 10061 }, { "epoch": 0.49130859375, "grad_norm": 0.2464676946401596, "learning_rate": 0.0002864840831258936, "loss": 1.8185, "step": 10062 }, { "epoch": 0.491357421875, "grad_norm": 0.31593260169029236, "learning_rate": 0.0002864491009739155, "loss": 1.7921, "step": 10063 }, { "epoch": 0.49140625, "grad_norm": 0.28922170400619507, "learning_rate": 0.0002864141185444593, "loss": 1.7645, "step": 10064 }, { "epoch": 0.491455078125, "grad_norm": 0.23443768918514252, "learning_rate": 0.00028637913583837274, "loss": 1.7971, "step": 10065 }, { "epoch": 0.49150390625, "grad_norm": 0.3088008463382721, "learning_rate": 0.00028634415285650367, "loss": 1.8112, "step": 10066 }, { "epoch": 0.491552734375, "grad_norm": 0.225654736161232, "learning_rate": 0.0002863091695997, "loss": 1.7917, "step": 10067 }, { "epoch": 0.4916015625, "grad_norm": 0.33493924140930176, "learning_rate": 0.00028627418606880944, "loss": 1.8295, "step": 10068 }, { "epoch": 0.491650390625, "grad_norm": 0.2748018801212311, "learning_rate": 0.00028623920226468, "loss": 1.7849, "step": 10069 }, { "epoch": 0.49169921875, "grad_norm": 0.24597397446632385, "learning_rate": 0.00028620421818815935, "loss": 1.7924, "step": 10070 }, { "epoch": 0.491748046875, "grad_norm": 0.37605491280555725, "learning_rate": 0.0002861692338400955, "loss": 1.8186, "step": 10071 }, { "epoch": 0.491796875, "grad_norm": 0.27388471364974976, "learning_rate": 0.0002861342492213364, "loss": 1.805, "step": 10072 }, { "epoch": 0.491845703125, "grad_norm": 0.3290787637233734, "learning_rate": 0.00028609926433272975, "loss": 1.8191, "step": 10073 }, { "epoch": 0.49189453125, "grad_norm": 0.29111775755882263, "learning_rate": 0.00028606427917512344, "loss": 1.7771, "step": 10074 }, { "epoch": 0.491943359375, "grad_norm": 0.27198782563209534, "learning_rate": 0.0002860292937493656, "loss": 1.7821, "step": 10075 }, { "epoch": 0.4919921875, "grad_norm": 0.2580801248550415, "learning_rate": 0.0002859943080563039, "loss": 1.8015, "step": 10076 }, { "epoch": 0.492041015625, "grad_norm": 0.27888354659080505, "learning_rate": 0.00028595932209678627, "loss": 1.826, "step": 10077 }, { "epoch": 0.49208984375, "grad_norm": 0.2506711184978485, "learning_rate": 0.0002859243358716607, "loss": 1.7571, "step": 10078 }, { "epoch": 0.492138671875, "grad_norm": 0.287472665309906, "learning_rate": 0.0002858893493817751, "loss": 1.7735, "step": 10079 }, { "epoch": 0.4921875, "grad_norm": 0.2877965569496155, "learning_rate": 0.0002858543626279773, "loss": 1.777, "step": 10080 }, { "epoch": 0.492236328125, "grad_norm": 0.22344309091567993, "learning_rate": 0.0002858193756111153, "loss": 1.7876, "step": 10081 }, { "epoch": 0.49228515625, "grad_norm": 0.3254357874393463, "learning_rate": 0.00028578438833203707, "loss": 1.7973, "step": 10082 }, { "epoch": 0.492333984375, "grad_norm": 0.21181367337703705, "learning_rate": 0.0002857494007915906, "loss": 1.8279, "step": 10083 }, { "epoch": 0.4923828125, "grad_norm": 0.2859530746936798, "learning_rate": 0.0002857144129906237, "loss": 1.7869, "step": 10084 }, { "epoch": 0.492431640625, "grad_norm": 0.2708185613155365, "learning_rate": 0.0002856794249299843, "loss": 1.8058, "step": 10085 }, { "epoch": 0.49248046875, "grad_norm": 0.27093398571014404, "learning_rate": 0.0002856444366105205, "loss": 1.7879, "step": 10086 }, { "epoch": 0.492529296875, "grad_norm": 0.2724718749523163, "learning_rate": 0.0002856094480330803, "loss": 1.8121, "step": 10087 }, { "epoch": 0.492578125, "grad_norm": 0.2683425545692444, "learning_rate": 0.0002855744591985116, "loss": 1.8002, "step": 10088 }, { "epoch": 0.492626953125, "grad_norm": 0.26884353160858154, "learning_rate": 0.00028553947010766234, "loss": 1.8032, "step": 10089 }, { "epoch": 0.49267578125, "grad_norm": 0.25099319219589233, "learning_rate": 0.0002855044807613806, "loss": 1.7624, "step": 10090 }, { "epoch": 0.492724609375, "grad_norm": 0.2289115935564041, "learning_rate": 0.0002854694911605142, "loss": 1.7828, "step": 10091 }, { "epoch": 0.4927734375, "grad_norm": 0.267946720123291, "learning_rate": 0.00028543450130591154, "loss": 1.776, "step": 10092 }, { "epoch": 0.492822265625, "grad_norm": 0.2560650408267975, "learning_rate": 0.0002853995111984201, "loss": 1.7906, "step": 10093 }, { "epoch": 0.49287109375, "grad_norm": 0.27870041131973267, "learning_rate": 0.0002853645208388883, "loss": 1.8067, "step": 10094 }, { "epoch": 0.492919921875, "grad_norm": 0.25930652022361755, "learning_rate": 0.0002853295302281639, "loss": 1.8123, "step": 10095 }, { "epoch": 0.49296875, "grad_norm": 0.25516965985298157, "learning_rate": 0.0002852945393670951, "loss": 1.7983, "step": 10096 }, { "epoch": 0.493017578125, "grad_norm": 0.28618520498275757, "learning_rate": 0.00028525954825652986, "loss": 1.8014, "step": 10097 }, { "epoch": 0.49306640625, "grad_norm": 0.20603321492671967, "learning_rate": 0.0002852245568973162, "loss": 1.8438, "step": 10098 }, { "epoch": 0.493115234375, "grad_norm": 0.25348979234695435, "learning_rate": 0.00028518956529030224, "loss": 1.7918, "step": 10099 }, { "epoch": 0.4931640625, "grad_norm": 0.24607539176940918, "learning_rate": 0.00028515457343633593, "loss": 1.801, "step": 10100 }, { "epoch": 0.493212890625, "grad_norm": 0.2553078532218933, "learning_rate": 0.00028511958133626544, "loss": 1.7921, "step": 10101 }, { "epoch": 0.49326171875, "grad_norm": 0.2546004354953766, "learning_rate": 0.0002850845889909387, "loss": 1.798, "step": 10102 }, { "epoch": 0.493310546875, "grad_norm": 0.22336477041244507, "learning_rate": 0.00028504959640120387, "loss": 1.7872, "step": 10103 }, { "epoch": 0.493359375, "grad_norm": 0.2508370876312256, "learning_rate": 0.00028501460356790897, "loss": 1.7887, "step": 10104 }, { "epoch": 0.493408203125, "grad_norm": 0.2403479516506195, "learning_rate": 0.00028497961049190213, "loss": 1.7715, "step": 10105 }, { "epoch": 0.49345703125, "grad_norm": 0.2404627799987793, "learning_rate": 0.0002849446171740314, "loss": 1.8082, "step": 10106 }, { "epoch": 0.493505859375, "grad_norm": 0.29712310433387756, "learning_rate": 0.0002849096236151449, "loss": 1.8003, "step": 10107 }, { "epoch": 0.4935546875, "grad_norm": 0.35511845350265503, "learning_rate": 0.0002848746298160908, "loss": 1.7901, "step": 10108 }, { "epoch": 0.493603515625, "grad_norm": 0.2910117506980896, "learning_rate": 0.000284839635777717, "loss": 1.7878, "step": 10109 }, { "epoch": 0.49365234375, "grad_norm": 0.28669974207878113, "learning_rate": 0.00028480464150087173, "loss": 1.7959, "step": 10110 }, { "epoch": 0.493701171875, "grad_norm": 0.22136136889457703, "learning_rate": 0.0002847696469864031, "loss": 1.7882, "step": 10111 }, { "epoch": 0.49375, "grad_norm": 0.29957515001296997, "learning_rate": 0.0002847346522351592, "loss": 1.8167, "step": 10112 }, { "epoch": 0.493798828125, "grad_norm": 0.3664534389972687, "learning_rate": 0.00028469965724798826, "loss": 1.8107, "step": 10113 }, { "epoch": 0.49384765625, "grad_norm": 0.21490108966827393, "learning_rate": 0.0002846646620257383, "loss": 1.7846, "step": 10114 }, { "epoch": 0.493896484375, "grad_norm": 0.2851812541484833, "learning_rate": 0.0002846296665692574, "loss": 1.7907, "step": 10115 }, { "epoch": 0.4939453125, "grad_norm": 0.30672764778137207, "learning_rate": 0.0002845946708793939, "loss": 1.7973, "step": 10116 }, { "epoch": 0.493994140625, "grad_norm": 0.25139063596725464, "learning_rate": 0.0002845596749569958, "loss": 1.7915, "step": 10117 }, { "epoch": 0.49404296875, "grad_norm": 0.2971017062664032, "learning_rate": 0.00028452467880291137, "loss": 1.8238, "step": 10118 }, { "epoch": 0.494091796875, "grad_norm": 0.2959168553352356, "learning_rate": 0.0002844896824179887, "loss": 1.7754, "step": 10119 }, { "epoch": 0.494140625, "grad_norm": 0.30222275853157043, "learning_rate": 0.0002844546858030758, "loss": 1.8121, "step": 10120 }, { "epoch": 0.494189453125, "grad_norm": 0.26281285285949707, "learning_rate": 0.0002844196889590211, "loss": 1.7952, "step": 10121 }, { "epoch": 0.49423828125, "grad_norm": 0.24821442365646362, "learning_rate": 0.0002843846918866727, "loss": 1.7842, "step": 10122 }, { "epoch": 0.494287109375, "grad_norm": 0.27503395080566406, "learning_rate": 0.00028434969458687867, "loss": 1.7973, "step": 10123 }, { "epoch": 0.4943359375, "grad_norm": 0.21184512972831726, "learning_rate": 0.00028431469706048727, "loss": 1.7905, "step": 10124 }, { "epoch": 0.494384765625, "grad_norm": 0.29447415471076965, "learning_rate": 0.0002842796993083469, "loss": 1.7628, "step": 10125 }, { "epoch": 0.49443359375, "grad_norm": 0.28307393193244934, "learning_rate": 0.0002842447013313054, "loss": 1.7834, "step": 10126 }, { "epoch": 0.494482421875, "grad_norm": 0.22858606278896332, "learning_rate": 0.0002842097031302111, "loss": 1.8205, "step": 10127 }, { "epoch": 0.49453125, "grad_norm": 0.30767735838890076, "learning_rate": 0.00028417470470591227, "loss": 1.8277, "step": 10128 }, { "epoch": 0.494580078125, "grad_norm": 0.23478277027606964, "learning_rate": 0.00028413970605925716, "loss": 1.7951, "step": 10129 }, { "epoch": 0.49462890625, "grad_norm": 0.2438567727804184, "learning_rate": 0.00028410470719109385, "loss": 1.7811, "step": 10130 }, { "epoch": 0.494677734375, "grad_norm": 0.2605186402797699, "learning_rate": 0.00028406970810227074, "loss": 1.8064, "step": 10131 }, { "epoch": 0.4947265625, "grad_norm": 0.2069489061832428, "learning_rate": 0.0002840347087936359, "loss": 1.7987, "step": 10132 }, { "epoch": 0.494775390625, "grad_norm": 0.2820732593536377, "learning_rate": 0.00028399970926603767, "loss": 1.8156, "step": 10133 }, { "epoch": 0.49482421875, "grad_norm": 0.20167872309684753, "learning_rate": 0.00028396470952032427, "loss": 1.7847, "step": 10134 }, { "epoch": 0.494873046875, "grad_norm": 0.20653480291366577, "learning_rate": 0.0002839297095573439, "loss": 1.7827, "step": 10135 }, { "epoch": 0.494921875, "grad_norm": 0.1935661882162094, "learning_rate": 0.00028389470937794487, "loss": 1.8019, "step": 10136 }, { "epoch": 0.494970703125, "grad_norm": 0.22042390704154968, "learning_rate": 0.0002838597089829754, "loss": 1.7952, "step": 10137 }, { "epoch": 0.49501953125, "grad_norm": 0.22260184586048126, "learning_rate": 0.0002838247083732837, "loss": 1.8077, "step": 10138 }, { "epoch": 0.495068359375, "grad_norm": 0.24375957250595093, "learning_rate": 0.0002837897075497182, "loss": 1.7929, "step": 10139 }, { "epoch": 0.4951171875, "grad_norm": 0.23885607719421387, "learning_rate": 0.000283754706513127, "loss": 1.8004, "step": 10140 }, { "epoch": 0.495166015625, "grad_norm": 0.2665223479270935, "learning_rate": 0.00028371970526435845, "loss": 1.7871, "step": 10141 }, { "epoch": 0.49521484375, "grad_norm": 0.24602577090263367, "learning_rate": 0.000283684703804261, "loss": 1.7898, "step": 10142 }, { "epoch": 0.495263671875, "grad_norm": 0.29127761721611023, "learning_rate": 0.0002836497021336827, "loss": 1.7835, "step": 10143 }, { "epoch": 0.4953125, "grad_norm": 0.3111700415611267, "learning_rate": 0.0002836147002534718, "loss": 1.7884, "step": 10144 }, { "epoch": 0.495361328125, "grad_norm": 0.24405622482299805, "learning_rate": 0.0002835796981644768, "loss": 1.8065, "step": 10145 }, { "epoch": 0.49541015625, "grad_norm": 0.36743953824043274, "learning_rate": 0.0002835446958675459, "loss": 1.784, "step": 10146 }, { "epoch": 0.495458984375, "grad_norm": 0.23512518405914307, "learning_rate": 0.00028350969336352744, "loss": 1.7811, "step": 10147 }, { "epoch": 0.4955078125, "grad_norm": 0.32147321105003357, "learning_rate": 0.00028347469065326974, "loss": 1.7914, "step": 10148 }, { "epoch": 0.495556640625, "grad_norm": 0.2932731807231903, "learning_rate": 0.00028343968773762105, "loss": 1.819, "step": 10149 }, { "epoch": 0.49560546875, "grad_norm": 0.22718337178230286, "learning_rate": 0.00028340468461742987, "loss": 1.7954, "step": 10150 }, { "epoch": 0.495654296875, "grad_norm": 0.27740031480789185, "learning_rate": 0.0002833696812935443, "loss": 1.7639, "step": 10151 }, { "epoch": 0.495703125, "grad_norm": 0.25377750396728516, "learning_rate": 0.00028333467776681283, "loss": 1.7799, "step": 10152 }, { "epoch": 0.495751953125, "grad_norm": 0.29183459281921387, "learning_rate": 0.0002832996740380837, "loss": 1.772, "step": 10153 }, { "epoch": 0.49580078125, "grad_norm": 0.24495768547058105, "learning_rate": 0.0002832646701082054, "loss": 1.8057, "step": 10154 }, { "epoch": 0.495849609375, "grad_norm": 0.24889706075191498, "learning_rate": 0.000283229665978026, "loss": 1.8072, "step": 10155 }, { "epoch": 0.4958984375, "grad_norm": 0.2504142224788666, "learning_rate": 0.0002831946616483942, "loss": 1.7749, "step": 10156 }, { "epoch": 0.495947265625, "grad_norm": 0.270499050617218, "learning_rate": 0.0002831596571201581, "loss": 1.8069, "step": 10157 }, { "epoch": 0.49599609375, "grad_norm": 0.2582266330718994, "learning_rate": 0.00028312465239416625, "loss": 1.8078, "step": 10158 }, { "epoch": 0.496044921875, "grad_norm": 0.24885550141334534, "learning_rate": 0.00028308964747126685, "loss": 1.7933, "step": 10159 }, { "epoch": 0.49609375, "grad_norm": 0.21540111303329468, "learning_rate": 0.00028305464235230837, "loss": 1.7985, "step": 10160 }, { "epoch": 0.496142578125, "grad_norm": 0.26607292890548706, "learning_rate": 0.00028301963703813917, "loss": 1.7853, "step": 10161 }, { "epoch": 0.49619140625, "grad_norm": 0.2482011318206787, "learning_rate": 0.0002829846315296076, "loss": 1.7965, "step": 10162 }, { "epoch": 0.496240234375, "grad_norm": 0.2649032175540924, "learning_rate": 0.00028294962582756203, "loss": 1.7808, "step": 10163 }, { "epoch": 0.4962890625, "grad_norm": 0.278649240732193, "learning_rate": 0.00028291461993285087, "loss": 1.7875, "step": 10164 }, { "epoch": 0.496337890625, "grad_norm": 0.3145754635334015, "learning_rate": 0.00028287961384632266, "loss": 1.7972, "step": 10165 }, { "epoch": 0.49638671875, "grad_norm": 0.30963295698165894, "learning_rate": 0.0002828446075688256, "loss": 1.8055, "step": 10166 }, { "epoch": 0.496435546875, "grad_norm": 0.25759732723236084, "learning_rate": 0.0002828096011012082, "loss": 1.7942, "step": 10167 }, { "epoch": 0.496484375, "grad_norm": 0.2529895007610321, "learning_rate": 0.00028277459444431887, "loss": 1.8048, "step": 10168 }, { "epoch": 0.496533203125, "grad_norm": 0.3111872673034668, "learning_rate": 0.00028273958759900597, "loss": 1.7911, "step": 10169 }, { "epoch": 0.49658203125, "grad_norm": 0.2974274456501007, "learning_rate": 0.0002827045805661179, "loss": 1.8104, "step": 10170 }, { "epoch": 0.496630859375, "grad_norm": 0.2644731104373932, "learning_rate": 0.0002826695733465032, "loss": 1.7994, "step": 10171 }, { "epoch": 0.4966796875, "grad_norm": 0.2758263349533081, "learning_rate": 0.0002826345659410102, "loss": 1.7975, "step": 10172 }, { "epoch": 0.496728515625, "grad_norm": 0.28264474868774414, "learning_rate": 0.0002825995583504873, "loss": 1.8054, "step": 10173 }, { "epoch": 0.49677734375, "grad_norm": 0.23564712703227997, "learning_rate": 0.00028256455057578306, "loss": 1.7992, "step": 10174 }, { "epoch": 0.496826171875, "grad_norm": 0.3087165057659149, "learning_rate": 0.0002825295426177458, "loss": 1.793, "step": 10175 }, { "epoch": 0.496875, "grad_norm": 0.2520882487297058, "learning_rate": 0.0002824945344772242, "loss": 1.8188, "step": 10176 }, { "epoch": 0.496923828125, "grad_norm": 0.27272361516952515, "learning_rate": 0.0002824595261550664, "loss": 1.7974, "step": 10177 }, { "epoch": 0.49697265625, "grad_norm": 0.28986141085624695, "learning_rate": 0.00028242451765212096, "loss": 1.7988, "step": 10178 }, { "epoch": 0.497021484375, "grad_norm": 0.2998816967010498, "learning_rate": 0.0002823895089692364, "loss": 1.7898, "step": 10179 }, { "epoch": 0.4970703125, "grad_norm": 0.28196337819099426, "learning_rate": 0.0002823545001072612, "loss": 1.8147, "step": 10180 }, { "epoch": 0.497119140625, "grad_norm": 0.26623204350471497, "learning_rate": 0.00028231949106704375, "loss": 1.8265, "step": 10181 }, { "epoch": 0.49716796875, "grad_norm": 0.2881239354610443, "learning_rate": 0.00028228448184943253, "loss": 1.7611, "step": 10182 }, { "epoch": 0.497216796875, "grad_norm": 0.2960749864578247, "learning_rate": 0.00028224947245527606, "loss": 1.8095, "step": 10183 }, { "epoch": 0.497265625, "grad_norm": 0.26340779662132263, "learning_rate": 0.00028221446288542284, "loss": 1.81, "step": 10184 }, { "epoch": 0.497314453125, "grad_norm": 0.27052590250968933, "learning_rate": 0.0002821794531407213, "loss": 1.7919, "step": 10185 }, { "epoch": 0.49736328125, "grad_norm": 0.2816900610923767, "learning_rate": 0.00028214444322201986, "loss": 1.789, "step": 10186 }, { "epoch": 0.497412109375, "grad_norm": 0.2803470492362976, "learning_rate": 0.00028210943313016715, "loss": 1.8039, "step": 10187 }, { "epoch": 0.4974609375, "grad_norm": 0.25546103715896606, "learning_rate": 0.0002820744228660117, "loss": 1.8183, "step": 10188 }, { "epoch": 0.497509765625, "grad_norm": 0.3239974081516266, "learning_rate": 0.00028203941243040183, "loss": 1.7847, "step": 10189 }, { "epoch": 0.49755859375, "grad_norm": 0.2540731430053711, "learning_rate": 0.0002820044018241862, "loss": 1.82, "step": 10190 }, { "epoch": 0.497607421875, "grad_norm": 0.3043220043182373, "learning_rate": 0.00028196939104821326, "loss": 1.8152, "step": 10191 }, { "epoch": 0.49765625, "grad_norm": 0.31724295020103455, "learning_rate": 0.0002819343801033314, "loss": 1.7993, "step": 10192 }, { "epoch": 0.497705078125, "grad_norm": 0.25799673795700073, "learning_rate": 0.00028189936899038946, "loss": 1.7979, "step": 10193 }, { "epoch": 0.49775390625, "grad_norm": 0.2985391318798065, "learning_rate": 0.0002818643577102357, "loss": 1.7805, "step": 10194 }, { "epoch": 0.497802734375, "grad_norm": 0.3070010244846344, "learning_rate": 0.0002818293462637187, "loss": 1.7863, "step": 10195 }, { "epoch": 0.4978515625, "grad_norm": 0.23408794403076172, "learning_rate": 0.000281794334651687, "loss": 1.7936, "step": 10196 }, { "epoch": 0.497900390625, "grad_norm": 0.3002418577671051, "learning_rate": 0.00028175932287498914, "loss": 1.8003, "step": 10197 }, { "epoch": 0.49794921875, "grad_norm": 0.22647984325885773, "learning_rate": 0.00028172431093447366, "loss": 1.7885, "step": 10198 }, { "epoch": 0.497998046875, "grad_norm": 0.2507069706916809, "learning_rate": 0.0002816892988309891, "loss": 1.7689, "step": 10199 }, { "epoch": 0.498046875, "grad_norm": 0.27787649631500244, "learning_rate": 0.000281654286565384, "loss": 1.7937, "step": 10200 }, { "epoch": 0.498095703125, "grad_norm": 0.2380613088607788, "learning_rate": 0.000281619274138507, "loss": 1.7881, "step": 10201 }, { "epoch": 0.49814453125, "grad_norm": 0.2690231204032898, "learning_rate": 0.00028158426155120645, "loss": 1.8047, "step": 10202 }, { "epoch": 0.498193359375, "grad_norm": 0.27076297998428345, "learning_rate": 0.0002815492488043311, "loss": 1.7983, "step": 10203 }, { "epoch": 0.4982421875, "grad_norm": 0.24468466639518738, "learning_rate": 0.0002815142358987295, "loss": 1.7963, "step": 10204 }, { "epoch": 0.498291015625, "grad_norm": 0.2639090120792389, "learning_rate": 0.0002814792228352501, "loss": 1.8077, "step": 10205 }, { "epoch": 0.49833984375, "grad_norm": 0.24168257415294647, "learning_rate": 0.00028144420961474147, "loss": 1.8114, "step": 10206 }, { "epoch": 0.498388671875, "grad_norm": 0.2655012607574463, "learning_rate": 0.00028140919623805227, "loss": 1.8046, "step": 10207 }, { "epoch": 0.4984375, "grad_norm": 0.2874071002006531, "learning_rate": 0.0002813741827060311, "loss": 1.8161, "step": 10208 }, { "epoch": 0.498486328125, "grad_norm": 0.23590052127838135, "learning_rate": 0.0002813391690195265, "loss": 1.7787, "step": 10209 }, { "epoch": 0.49853515625, "grad_norm": 0.31348568201065063, "learning_rate": 0.000281304155179387, "loss": 1.8057, "step": 10210 }, { "epoch": 0.498583984375, "grad_norm": 0.30515673756599426, "learning_rate": 0.0002812691411864613, "loss": 1.8298, "step": 10211 }, { "epoch": 0.4986328125, "grad_norm": 0.24686110019683838, "learning_rate": 0.00028123412704159783, "loss": 1.7966, "step": 10212 }, { "epoch": 0.498681640625, "grad_norm": 0.33482885360717773, "learning_rate": 0.00028119911274564533, "loss": 1.7902, "step": 10213 }, { "epoch": 0.49873046875, "grad_norm": 0.3034195303916931, "learning_rate": 0.0002811640982994523, "loss": 1.7894, "step": 10214 }, { "epoch": 0.498779296875, "grad_norm": 0.35064685344696045, "learning_rate": 0.0002811290837038675, "loss": 1.815, "step": 10215 }, { "epoch": 0.498828125, "grad_norm": 0.24459128081798553, "learning_rate": 0.00028109406895973936, "loss": 1.8074, "step": 10216 }, { "epoch": 0.498876953125, "grad_norm": 0.31280243396759033, "learning_rate": 0.0002810590540679166, "loss": 1.773, "step": 10217 }, { "epoch": 0.49892578125, "grad_norm": 0.26807424426078796, "learning_rate": 0.00028102403902924777, "loss": 1.8025, "step": 10218 }, { "epoch": 0.498974609375, "grad_norm": 0.24693402647972107, "learning_rate": 0.0002809890238445815, "loss": 1.8037, "step": 10219 }, { "epoch": 0.4990234375, "grad_norm": 0.33164647221565247, "learning_rate": 0.0002809540085147665, "loss": 1.811, "step": 10220 }, { "epoch": 0.499072265625, "grad_norm": 0.33472326397895813, "learning_rate": 0.0002809189930406513, "loss": 1.8057, "step": 10221 }, { "epoch": 0.49912109375, "grad_norm": 0.31788191199302673, "learning_rate": 0.0002808839774230845, "loss": 1.8168, "step": 10222 }, { "epoch": 0.499169921875, "grad_norm": 0.34075939655303955, "learning_rate": 0.0002808489616629147, "loss": 1.8029, "step": 10223 }, { "epoch": 0.49921875, "grad_norm": 0.27551183104515076, "learning_rate": 0.0002808139457609907, "loss": 1.7784, "step": 10224 }, { "epoch": 0.499267578125, "grad_norm": 0.3412671685218811, "learning_rate": 0.00028077892971816116, "loss": 1.8019, "step": 10225 }, { "epoch": 0.49931640625, "grad_norm": 0.28462928533554077, "learning_rate": 0.00028074391353527457, "loss": 1.8195, "step": 10226 }, { "epoch": 0.499365234375, "grad_norm": 0.3090519607067108, "learning_rate": 0.0002807088972131795, "loss": 1.7738, "step": 10227 }, { "epoch": 0.4994140625, "grad_norm": 0.3023638129234314, "learning_rate": 0.0002806738807527248, "loss": 1.7969, "step": 10228 }, { "epoch": 0.499462890625, "grad_norm": 0.2727789580821991, "learning_rate": 0.00028063886415475903, "loss": 1.8007, "step": 10229 }, { "epoch": 0.49951171875, "grad_norm": 0.32301777601242065, "learning_rate": 0.00028060384742013085, "loss": 1.793, "step": 10230 }, { "epoch": 0.499560546875, "grad_norm": 0.24733196198940277, "learning_rate": 0.00028056883054968895, "loss": 1.8182, "step": 10231 }, { "epoch": 0.499609375, "grad_norm": 0.30409738421440125, "learning_rate": 0.000280533813544282, "loss": 1.8066, "step": 10232 }, { "epoch": 0.499658203125, "grad_norm": 0.2640385925769806, "learning_rate": 0.00028049879640475856, "loss": 1.8106, "step": 10233 }, { "epoch": 0.49970703125, "grad_norm": 0.2951575517654419, "learning_rate": 0.00028046377913196733, "loss": 1.7989, "step": 10234 }, { "epoch": 0.499755859375, "grad_norm": 0.27021461725234985, "learning_rate": 0.00028042876172675713, "loss": 1.7932, "step": 10235 }, { "epoch": 0.4998046875, "grad_norm": 0.26622673869132996, "learning_rate": 0.00028039374418997655, "loss": 1.7907, "step": 10236 }, { "epoch": 0.499853515625, "grad_norm": 0.2896062135696411, "learning_rate": 0.0002803587265224742, "loss": 1.7718, "step": 10237 }, { "epoch": 0.49990234375, "grad_norm": 0.31425538659095764, "learning_rate": 0.00028032370872509876, "loss": 1.819, "step": 10238 }, { "epoch": 0.499951171875, "grad_norm": 0.27347978949546814, "learning_rate": 0.00028028869079869896, "loss": 1.775, "step": 10239 }, { "epoch": 0.5, "grad_norm": 0.32691261172294617, "learning_rate": 0.00028025367274412354, "loss": 1.8014, "step": 10240 }, { "epoch": 0.500048828125, "grad_norm": 0.2959631383419037, "learning_rate": 0.00028021865456222115, "loss": 1.7623, "step": 10241 }, { "epoch": 0.50009765625, "grad_norm": 0.2748745381832123, "learning_rate": 0.0002801836362538404, "loss": 1.8022, "step": 10242 }, { "epoch": 0.500146484375, "grad_norm": 0.33158010244369507, "learning_rate": 0.0002801486178198301, "loss": 1.7851, "step": 10243 }, { "epoch": 0.5001953125, "grad_norm": 0.26129335165023804, "learning_rate": 0.0002801135992610389, "loss": 1.7852, "step": 10244 }, { "epoch": 0.500244140625, "grad_norm": 0.28126367926597595, "learning_rate": 0.0002800785805783156, "loss": 1.7807, "step": 10245 }, { "epoch": 0.50029296875, "grad_norm": 0.2449120730161667, "learning_rate": 0.0002800435617725088, "loss": 1.7917, "step": 10246 }, { "epoch": 0.500341796875, "grad_norm": 0.2528541684150696, "learning_rate": 0.0002800085428444672, "loss": 1.7952, "step": 10247 }, { "epoch": 0.500390625, "grad_norm": 0.26173827052116394, "learning_rate": 0.0002799735237950395, "loss": 1.7953, "step": 10248 }, { "epoch": 0.500439453125, "grad_norm": 0.27691274881362915, "learning_rate": 0.00027993850462507445, "loss": 1.8018, "step": 10249 }, { "epoch": 0.50048828125, "grad_norm": 0.25589078664779663, "learning_rate": 0.0002799034853354208, "loss": 1.7854, "step": 10250 }, { "epoch": 0.500537109375, "grad_norm": 0.21774812042713165, "learning_rate": 0.0002798684659269272, "loss": 1.7982, "step": 10251 }, { "epoch": 0.5005859375, "grad_norm": 0.2471896857023239, "learning_rate": 0.0002798334464004425, "loss": 1.7942, "step": 10252 }, { "epoch": 0.500634765625, "grad_norm": 0.22502319514751434, "learning_rate": 0.0002797984267568153, "loss": 1.7996, "step": 10253 }, { "epoch": 0.50068359375, "grad_norm": 0.22640448808670044, "learning_rate": 0.0002797634069968944, "loss": 1.7937, "step": 10254 }, { "epoch": 0.500732421875, "grad_norm": 0.2471988946199417, "learning_rate": 0.0002797283871215285, "loss": 1.7767, "step": 10255 }, { "epoch": 0.50078125, "grad_norm": 0.23899197578430176, "learning_rate": 0.00027969336713156627, "loss": 1.7728, "step": 10256 }, { "epoch": 0.500830078125, "grad_norm": 0.2305116057395935, "learning_rate": 0.0002796583470278566, "loss": 1.7802, "step": 10257 }, { "epoch": 0.50087890625, "grad_norm": 0.2663472294807434, "learning_rate": 0.0002796233268112481, "loss": 1.8214, "step": 10258 }, { "epoch": 0.500927734375, "grad_norm": 0.22410164773464203, "learning_rate": 0.00027958830648258957, "loss": 1.8058, "step": 10259 }, { "epoch": 0.5009765625, "grad_norm": 0.2513119578361511, "learning_rate": 0.0002795532860427298, "loss": 1.7906, "step": 10260 }, { "epoch": 0.501025390625, "grad_norm": 0.227890282869339, "learning_rate": 0.00027951826549251743, "loss": 1.8055, "step": 10261 }, { "epoch": 0.50107421875, "grad_norm": 0.2812036871910095, "learning_rate": 0.00027948324483280124, "loss": 1.7992, "step": 10262 }, { "epoch": 0.501123046875, "grad_norm": 0.23753975331783295, "learning_rate": 0.00027944822406443005, "loss": 1.7826, "step": 10263 }, { "epoch": 0.501171875, "grad_norm": 0.2849946618080139, "learning_rate": 0.0002794132031882525, "loss": 1.7953, "step": 10264 }, { "epoch": 0.501220703125, "grad_norm": 0.2535715401172638, "learning_rate": 0.0002793781822051176, "loss": 1.7934, "step": 10265 }, { "epoch": 0.50126953125, "grad_norm": 0.24165533483028412, "learning_rate": 0.0002793431611158738, "loss": 1.8249, "step": 10266 }, { "epoch": 0.501318359375, "grad_norm": 0.31915202736854553, "learning_rate": 0.00027930813992137004, "loss": 1.8068, "step": 10267 }, { "epoch": 0.5013671875, "grad_norm": 0.23575744032859802, "learning_rate": 0.00027927311862245503, "loss": 1.8025, "step": 10268 }, { "epoch": 0.501416015625, "grad_norm": 0.2474510818719864, "learning_rate": 0.0002792380972199776, "loss": 1.818, "step": 10269 }, { "epoch": 0.50146484375, "grad_norm": 0.35181158781051636, "learning_rate": 0.00027920307571478644, "loss": 1.7752, "step": 10270 }, { "epoch": 0.501513671875, "grad_norm": 0.2468734085559845, "learning_rate": 0.00027916805410773033, "loss": 1.8012, "step": 10271 }, { "epoch": 0.5015625, "grad_norm": 0.2887323796749115, "learning_rate": 0.0002791330323996581, "loss": 1.803, "step": 10272 }, { "epoch": 0.501611328125, "grad_norm": 0.34386274218559265, "learning_rate": 0.00027909801059141856, "loss": 1.7683, "step": 10273 }, { "epoch": 0.50166015625, "grad_norm": 0.22373241186141968, "learning_rate": 0.0002790629886838604, "loss": 1.8021, "step": 10274 }, { "epoch": 0.501708984375, "grad_norm": 0.34103426337242126, "learning_rate": 0.0002790279666778325, "loss": 1.7996, "step": 10275 }, { "epoch": 0.5017578125, "grad_norm": 0.2320319414138794, "learning_rate": 0.0002789929445741835, "loss": 1.7817, "step": 10276 }, { "epoch": 0.501806640625, "grad_norm": 0.24765518307685852, "learning_rate": 0.0002789579223737623, "loss": 1.8138, "step": 10277 }, { "epoch": 0.50185546875, "grad_norm": 0.3287985026836395, "learning_rate": 0.00027892290007741773, "loss": 1.7877, "step": 10278 }, { "epoch": 0.501904296875, "grad_norm": 0.25345078110694885, "learning_rate": 0.0002788878776859985, "loss": 1.8183, "step": 10279 }, { "epoch": 0.501953125, "grad_norm": 0.3192475140094757, "learning_rate": 0.0002788528552003534, "loss": 1.7696, "step": 10280 }, { "epoch": 0.502001953125, "grad_norm": 0.2702324092388153, "learning_rate": 0.00027881783262133125, "loss": 1.7973, "step": 10281 }, { "epoch": 0.50205078125, "grad_norm": 0.3702698349952698, "learning_rate": 0.0002787828099497809, "loss": 1.7942, "step": 10282 }, { "epoch": 0.502099609375, "grad_norm": 0.33784642815589905, "learning_rate": 0.0002787477871865511, "loss": 1.7801, "step": 10283 }, { "epoch": 0.5021484375, "grad_norm": 0.31701308488845825, "learning_rate": 0.0002787127643324907, "loss": 1.8015, "step": 10284 }, { "epoch": 0.502197265625, "grad_norm": 0.3181675970554352, "learning_rate": 0.0002786777413884485, "loss": 1.8097, "step": 10285 }, { "epoch": 0.50224609375, "grad_norm": 0.2866346538066864, "learning_rate": 0.0002786427183552732, "loss": 1.8055, "step": 10286 }, { "epoch": 0.502294921875, "grad_norm": 0.28658270835876465, "learning_rate": 0.00027860769523381377, "loss": 1.8042, "step": 10287 }, { "epoch": 0.50234375, "grad_norm": 0.2381601780653, "learning_rate": 0.0002785726720249189, "loss": 1.8022, "step": 10288 }, { "epoch": 0.502392578125, "grad_norm": 0.27723291516304016, "learning_rate": 0.0002785376487294374, "loss": 1.7768, "step": 10289 }, { "epoch": 0.50244140625, "grad_norm": 0.2810715138912201, "learning_rate": 0.00027850262534821817, "loss": 1.8239, "step": 10290 }, { "epoch": 0.502490234375, "grad_norm": 0.26426661014556885, "learning_rate": 0.0002784676018821101, "loss": 1.7969, "step": 10291 }, { "epoch": 0.5025390625, "grad_norm": 0.3167368769645691, "learning_rate": 0.00027843257833196175, "loss": 1.7852, "step": 10292 }, { "epoch": 0.502587890625, "grad_norm": 0.27157339453697205, "learning_rate": 0.00027839755469862216, "loss": 1.7901, "step": 10293 }, { "epoch": 0.50263671875, "grad_norm": 0.2463444173336029, "learning_rate": 0.0002783625309829402, "loss": 1.7894, "step": 10294 }, { "epoch": 0.502685546875, "grad_norm": 0.26299765706062317, "learning_rate": 0.00027832750718576456, "loss": 1.7727, "step": 10295 }, { "epoch": 0.502734375, "grad_norm": 0.24019302427768707, "learning_rate": 0.00027829248330794404, "loss": 1.7942, "step": 10296 }, { "epoch": 0.502783203125, "grad_norm": 0.2561659812927246, "learning_rate": 0.0002782574593503276, "loss": 1.7963, "step": 10297 }, { "epoch": 0.50283203125, "grad_norm": 0.25006306171417236, "learning_rate": 0.000278222435313764, "loss": 1.801, "step": 10298 }, { "epoch": 0.502880859375, "grad_norm": 0.23116445541381836, "learning_rate": 0.00027818741119910206, "loss": 1.802, "step": 10299 }, { "epoch": 0.5029296875, "grad_norm": 0.24182192981243134, "learning_rate": 0.0002781523870071907, "loss": 1.7928, "step": 10300 }, { "epoch": 0.502978515625, "grad_norm": 0.24582213163375854, "learning_rate": 0.0002781173627388787, "loss": 1.7963, "step": 10301 }, { "epoch": 0.50302734375, "grad_norm": 0.26072391867637634, "learning_rate": 0.00027808233839501494, "loss": 1.7716, "step": 10302 }, { "epoch": 0.503076171875, "grad_norm": 0.20054583251476288, "learning_rate": 0.00027804731397644816, "loss": 1.7968, "step": 10303 }, { "epoch": 0.503125, "grad_norm": 0.320273220539093, "learning_rate": 0.0002780122894840273, "loss": 1.7879, "step": 10304 }, { "epoch": 0.503173828125, "grad_norm": 0.24194642901420593, "learning_rate": 0.00027797726491860127, "loss": 1.8144, "step": 10305 }, { "epoch": 0.50322265625, "grad_norm": 0.27988141775131226, "learning_rate": 0.00027794224028101865, "loss": 1.7973, "step": 10306 }, { "epoch": 0.503271484375, "grad_norm": 0.28279805183410645, "learning_rate": 0.0002779072155721286, "loss": 1.8034, "step": 10307 }, { "epoch": 0.5033203125, "grad_norm": 0.3155752122402191, "learning_rate": 0.0002778721907927799, "loss": 1.8056, "step": 10308 }, { "epoch": 0.503369140625, "grad_norm": 0.3221874535083771, "learning_rate": 0.0002778371659438212, "loss": 1.7981, "step": 10309 }, { "epoch": 0.50341796875, "grad_norm": 0.2620776891708374, "learning_rate": 0.0002778021410261016, "loss": 1.8152, "step": 10310 }, { "epoch": 0.503466796875, "grad_norm": 0.2716071903705597, "learning_rate": 0.0002777671160404699, "loss": 1.7914, "step": 10311 }, { "epoch": 0.503515625, "grad_norm": 0.26199400424957275, "learning_rate": 0.00027773209098777487, "loss": 1.7706, "step": 10312 }, { "epoch": 0.503564453125, "grad_norm": 0.2577771842479706, "learning_rate": 0.00027769706586886536, "loss": 1.8117, "step": 10313 }, { "epoch": 0.50361328125, "grad_norm": 0.2760865092277527, "learning_rate": 0.0002776620406845904, "loss": 1.7789, "step": 10314 }, { "epoch": 0.503662109375, "grad_norm": 0.3778751790523529, "learning_rate": 0.0002776270154357986, "loss": 1.7849, "step": 10315 }, { "epoch": 0.5037109375, "grad_norm": 0.29216063022613525, "learning_rate": 0.0002775919901233391, "loss": 1.7933, "step": 10316 }, { "epoch": 0.503759765625, "grad_norm": 0.33144035935401917, "learning_rate": 0.00027755696474806056, "loss": 1.8035, "step": 10317 }, { "epoch": 0.50380859375, "grad_norm": 0.3363924026489258, "learning_rate": 0.000277521939310812, "loss": 1.8043, "step": 10318 }, { "epoch": 0.503857421875, "grad_norm": 0.44113075733184814, "learning_rate": 0.0002774869138124422, "loss": 1.788, "step": 10319 }, { "epoch": 0.50390625, "grad_norm": 0.30861979722976685, "learning_rate": 0.0002774518882538, "loss": 1.7902, "step": 10320 }, { "epoch": 0.503955078125, "grad_norm": 0.3034624457359314, "learning_rate": 0.00027741686263573433, "loss": 1.794, "step": 10321 }, { "epoch": 0.50400390625, "grad_norm": 0.308701753616333, "learning_rate": 0.0002773818369590941, "loss": 1.7811, "step": 10322 }, { "epoch": 0.504052734375, "grad_norm": 0.25400587916374207, "learning_rate": 0.00027734681122472806, "loss": 1.806, "step": 10323 }, { "epoch": 0.5041015625, "grad_norm": 0.3515242338180542, "learning_rate": 0.00027731178543348515, "loss": 1.7966, "step": 10324 }, { "epoch": 0.504150390625, "grad_norm": 0.21413713693618774, "learning_rate": 0.00027727675958621437, "loss": 1.8101, "step": 10325 }, { "epoch": 0.50419921875, "grad_norm": 0.2906690239906311, "learning_rate": 0.00027724173368376436, "loss": 1.8121, "step": 10326 }, { "epoch": 0.504248046875, "grad_norm": 0.23853355646133423, "learning_rate": 0.00027720670772698433, "loss": 1.8007, "step": 10327 }, { "epoch": 0.504296875, "grad_norm": 0.20466165244579315, "learning_rate": 0.0002771716817167228, "loss": 1.7812, "step": 10328 }, { "epoch": 0.504345703125, "grad_norm": 0.25467389822006226, "learning_rate": 0.00027713665565382885, "loss": 1.7851, "step": 10329 }, { "epoch": 0.50439453125, "grad_norm": 0.25396928191185, "learning_rate": 0.00027710162953915136, "loss": 1.8109, "step": 10330 }, { "epoch": 0.504443359375, "grad_norm": 0.2609468698501587, "learning_rate": 0.0002770666033735392, "loss": 1.7873, "step": 10331 }, { "epoch": 0.5044921875, "grad_norm": 0.23587143421173096, "learning_rate": 0.0002770315771578412, "loss": 1.7869, "step": 10332 }, { "epoch": 0.504541015625, "grad_norm": 0.24767708778381348, "learning_rate": 0.00027699655089290633, "loss": 1.7783, "step": 10333 }, { "epoch": 0.50458984375, "grad_norm": 0.2926667034626007, "learning_rate": 0.0002769615245795834, "loss": 1.8111, "step": 10334 }, { "epoch": 0.504638671875, "grad_norm": 0.24492482841014862, "learning_rate": 0.00027692649821872136, "loss": 1.7764, "step": 10335 }, { "epoch": 0.5046875, "grad_norm": 0.2876880466938019, "learning_rate": 0.0002768914718111692, "loss": 1.7615, "step": 10336 }, { "epoch": 0.504736328125, "grad_norm": 0.28290241956710815, "learning_rate": 0.0002768564453577756, "loss": 1.8123, "step": 10337 }, { "epoch": 0.50478515625, "grad_norm": 0.25807300209999084, "learning_rate": 0.00027682141885938957, "loss": 1.8174, "step": 10338 }, { "epoch": 0.504833984375, "grad_norm": 0.3116143047809601, "learning_rate": 0.00027678639231686, "loss": 1.8062, "step": 10339 }, { "epoch": 0.5048828125, "grad_norm": 0.23812536895275116, "learning_rate": 0.00027675136573103573, "loss": 1.7893, "step": 10340 }, { "epoch": 0.504931640625, "grad_norm": 0.2937409579753876, "learning_rate": 0.0002767163391027657, "loss": 1.7737, "step": 10341 }, { "epoch": 0.50498046875, "grad_norm": 0.29886069893836975, "learning_rate": 0.0002766813124328989, "loss": 1.7744, "step": 10342 }, { "epoch": 0.505029296875, "grad_norm": 0.33950722217559814, "learning_rate": 0.00027664628572228407, "loss": 1.7658, "step": 10343 }, { "epoch": 0.505078125, "grad_norm": 0.2392999529838562, "learning_rate": 0.00027661125897177026, "loss": 1.7769, "step": 10344 }, { "epoch": 0.505126953125, "grad_norm": 0.3013325333595276, "learning_rate": 0.00027657623218220624, "loss": 1.8079, "step": 10345 }, { "epoch": 0.50517578125, "grad_norm": 0.30392730236053467, "learning_rate": 0.00027654120535444097, "loss": 1.8013, "step": 10346 }, { "epoch": 0.505224609375, "grad_norm": 0.21754327416419983, "learning_rate": 0.0002765061784893233, "loss": 1.7946, "step": 10347 }, { "epoch": 0.5052734375, "grad_norm": 0.29582229256629944, "learning_rate": 0.0002764711515877023, "loss": 1.7917, "step": 10348 }, { "epoch": 0.505322265625, "grad_norm": 0.22375330328941345, "learning_rate": 0.0002764361246504266, "loss": 1.7901, "step": 10349 }, { "epoch": 0.50537109375, "grad_norm": 0.24265851080417633, "learning_rate": 0.00027640109767834535, "loss": 1.8144, "step": 10350 }, { "epoch": 0.505419921875, "grad_norm": 0.27090802788734436, "learning_rate": 0.0002763660706723074, "loss": 1.7891, "step": 10351 }, { "epoch": 0.50546875, "grad_norm": 0.22139889001846313, "learning_rate": 0.00027633104363316164, "loss": 1.7821, "step": 10352 }, { "epoch": 0.505517578125, "grad_norm": 0.25001177191734314, "learning_rate": 0.0002762960165617569, "loss": 1.812, "step": 10353 }, { "epoch": 0.50556640625, "grad_norm": 0.21286235749721527, "learning_rate": 0.00027626098945894226, "loss": 1.8014, "step": 10354 }, { "epoch": 0.505615234375, "grad_norm": 0.21390342712402344, "learning_rate": 0.0002762259623255664, "loss": 1.7885, "step": 10355 }, { "epoch": 0.5056640625, "grad_norm": 0.2442866861820221, "learning_rate": 0.0002761909351624784, "loss": 1.7779, "step": 10356 }, { "epoch": 0.505712890625, "grad_norm": 0.25787317752838135, "learning_rate": 0.0002761559079705272, "loss": 1.7922, "step": 10357 }, { "epoch": 0.50576171875, "grad_norm": 0.2598050534725189, "learning_rate": 0.0002761208807505616, "loss": 1.7896, "step": 10358 }, { "epoch": 0.505810546875, "grad_norm": 0.19191311299800873, "learning_rate": 0.0002760858535034305, "loss": 1.779, "step": 10359 }, { "epoch": 0.505859375, "grad_norm": 0.25948014855384827, "learning_rate": 0.00027605082622998294, "loss": 1.7829, "step": 10360 }, { "epoch": 0.505908203125, "grad_norm": 0.21437415480613708, "learning_rate": 0.00027601579893106774, "loss": 1.782, "step": 10361 }, { "epoch": 0.50595703125, "grad_norm": 0.24517254531383514, "learning_rate": 0.0002759807716075339, "loss": 1.7865, "step": 10362 }, { "epoch": 0.506005859375, "grad_norm": 0.24576473236083984, "learning_rate": 0.00027594574426023015, "loss": 1.7812, "step": 10363 }, { "epoch": 0.5060546875, "grad_norm": 0.2426782250404358, "learning_rate": 0.00027591071689000556, "loss": 1.7772, "step": 10364 }, { "epoch": 0.506103515625, "grad_norm": 0.267014741897583, "learning_rate": 0.000275875689497709, "loss": 1.7986, "step": 10365 }, { "epoch": 0.50615234375, "grad_norm": 0.23749782145023346, "learning_rate": 0.0002758406620841895, "loss": 1.7732, "step": 10366 }, { "epoch": 0.506201171875, "grad_norm": 0.2183157205581665, "learning_rate": 0.0002758056346502958, "loss": 1.783, "step": 10367 }, { "epoch": 0.50625, "grad_norm": 0.24006550014019012, "learning_rate": 0.0002757706071968769, "loss": 1.7712, "step": 10368 }, { "epoch": 0.506298828125, "grad_norm": 0.2749210298061371, "learning_rate": 0.00027573557972478173, "loss": 1.8011, "step": 10369 }, { "epoch": 0.50634765625, "grad_norm": 0.2522885799407959, "learning_rate": 0.0002757005522348592, "loss": 1.7928, "step": 10370 }, { "epoch": 0.506396484375, "grad_norm": 0.21450330317020416, "learning_rate": 0.00027566552472795827, "loss": 1.7817, "step": 10371 }, { "epoch": 0.5064453125, "grad_norm": 0.24041809141635895, "learning_rate": 0.00027563049720492774, "loss": 1.8257, "step": 10372 }, { "epoch": 0.506494140625, "grad_norm": 0.21432258188724518, "learning_rate": 0.00027559546966661664, "loss": 1.7845, "step": 10373 }, { "epoch": 0.50654296875, "grad_norm": 0.20859727263450623, "learning_rate": 0.00027556044211387383, "loss": 1.8005, "step": 10374 }, { "epoch": 0.506591796875, "grad_norm": 0.22745466232299805, "learning_rate": 0.00027552541454754824, "loss": 1.7747, "step": 10375 }, { "epoch": 0.506640625, "grad_norm": 0.2845018804073334, "learning_rate": 0.0002754903869684888, "loss": 1.8179, "step": 10376 }, { "epoch": 0.506689453125, "grad_norm": 0.29828858375549316, "learning_rate": 0.00027545535937754446, "loss": 1.7823, "step": 10377 }, { "epoch": 0.50673828125, "grad_norm": 0.26342183351516724, "learning_rate": 0.0002754203317755642, "loss": 1.777, "step": 10378 }, { "epoch": 0.506787109375, "grad_norm": 0.26265308260917664, "learning_rate": 0.0002753853041633968, "loss": 1.8061, "step": 10379 }, { "epoch": 0.5068359375, "grad_norm": 0.203104168176651, "learning_rate": 0.0002753502765418912, "loss": 1.8044, "step": 10380 }, { "epoch": 0.506884765625, "grad_norm": 0.31997501850128174, "learning_rate": 0.0002753152489118964, "loss": 1.8128, "step": 10381 }, { "epoch": 0.50693359375, "grad_norm": 0.3136776089668274, "learning_rate": 0.0002752802212742613, "loss": 1.7873, "step": 10382 }, { "epoch": 0.506982421875, "grad_norm": 0.2894386649131775, "learning_rate": 0.0002752451936298348, "loss": 1.7808, "step": 10383 }, { "epoch": 0.50703125, "grad_norm": 0.2491052746772766, "learning_rate": 0.0002752101659794658, "loss": 1.7736, "step": 10384 }, { "epoch": 0.507080078125, "grad_norm": 0.24615128338336945, "learning_rate": 0.0002751751383240033, "loss": 1.7701, "step": 10385 }, { "epoch": 0.50712890625, "grad_norm": 0.27565938234329224, "learning_rate": 0.0002751401106642963, "loss": 1.7781, "step": 10386 }, { "epoch": 0.507177734375, "grad_norm": 0.24484942853450775, "learning_rate": 0.0002751050830011934, "loss": 1.7827, "step": 10387 }, { "epoch": 0.5072265625, "grad_norm": 0.2681996524333954, "learning_rate": 0.0002750700553355438, "loss": 1.8106, "step": 10388 }, { "epoch": 0.507275390625, "grad_norm": 0.2653842568397522, "learning_rate": 0.00027503502766819637, "loss": 1.7943, "step": 10389 }, { "epoch": 0.50732421875, "grad_norm": 0.28253141045570374, "learning_rate": 0.000275, "loss": 1.7759, "step": 10390 }, { "epoch": 0.507373046875, "grad_norm": 0.2826705276966095, "learning_rate": 0.0002749649723318037, "loss": 1.8049, "step": 10391 }, { "epoch": 0.507421875, "grad_norm": 0.28574633598327637, "learning_rate": 0.00027492994466445623, "loss": 1.7915, "step": 10392 }, { "epoch": 0.507470703125, "grad_norm": 0.2543141841888428, "learning_rate": 0.0002748949169988067, "loss": 1.7787, "step": 10393 }, { "epoch": 0.50751953125, "grad_norm": 0.3301946222782135, "learning_rate": 0.00027485988933570384, "loss": 1.7806, "step": 10394 }, { "epoch": 0.507568359375, "grad_norm": 0.28533273935317993, "learning_rate": 0.00027482486167599673, "loss": 1.7952, "step": 10395 }, { "epoch": 0.5076171875, "grad_norm": 0.2849986255168915, "learning_rate": 0.00027478983402053417, "loss": 1.7797, "step": 10396 }, { "epoch": 0.507666015625, "grad_norm": 0.29201826453208923, "learning_rate": 0.00027475480637016524, "loss": 1.8078, "step": 10397 }, { "epoch": 0.50771484375, "grad_norm": 0.32125094532966614, "learning_rate": 0.0002747197787257387, "loss": 1.7824, "step": 10398 }, { "epoch": 0.507763671875, "grad_norm": 0.31505706906318665, "learning_rate": 0.00027468475108810363, "loss": 1.8005, "step": 10399 }, { "epoch": 0.5078125, "grad_norm": 0.25443607568740845, "learning_rate": 0.0002746497234581089, "loss": 1.8122, "step": 10400 }, { "epoch": 0.507861328125, "grad_norm": 0.2954367399215698, "learning_rate": 0.00027461469583660325, "loss": 1.793, "step": 10401 }, { "epoch": 0.50791015625, "grad_norm": 0.30943354964256287, "learning_rate": 0.0002745796682244359, "loss": 1.7977, "step": 10402 }, { "epoch": 0.507958984375, "grad_norm": 0.3325978219509125, "learning_rate": 0.00027454464062245547, "loss": 1.7801, "step": 10403 }, { "epoch": 0.5080078125, "grad_norm": 0.26135900616645813, "learning_rate": 0.0002745096130315112, "loss": 1.7909, "step": 10404 }, { "epoch": 0.508056640625, "grad_norm": 0.3268338739871979, "learning_rate": 0.00027447458545245174, "loss": 1.7862, "step": 10405 }, { "epoch": 0.50810546875, "grad_norm": 0.29207369685173035, "learning_rate": 0.00027443955788612626, "loss": 1.8008, "step": 10406 }, { "epoch": 0.508154296875, "grad_norm": 0.24291710555553436, "learning_rate": 0.00027440453033338345, "loss": 1.7881, "step": 10407 }, { "epoch": 0.508203125, "grad_norm": 0.33061501383781433, "learning_rate": 0.00027436950279507234, "loss": 1.8185, "step": 10408 }, { "epoch": 0.508251953125, "grad_norm": 0.2188236117362976, "learning_rate": 0.0002743344752720419, "loss": 1.7932, "step": 10409 }, { "epoch": 0.50830078125, "grad_norm": 0.3480973541736603, "learning_rate": 0.0002742994477651408, "loss": 1.801, "step": 10410 }, { "epoch": 0.508349609375, "grad_norm": 0.3164389431476593, "learning_rate": 0.00027426442027521836, "loss": 1.7918, "step": 10411 }, { "epoch": 0.5083984375, "grad_norm": 0.3236790895462036, "learning_rate": 0.0002742293928031231, "loss": 1.7915, "step": 10412 }, { "epoch": 0.508447265625, "grad_norm": 0.28762558102607727, "learning_rate": 0.0002741943653497043, "loss": 1.8262, "step": 10413 }, { "epoch": 0.50849609375, "grad_norm": 0.2836938798427582, "learning_rate": 0.00027415933791581057, "loss": 1.8041, "step": 10414 }, { "epoch": 0.508544921875, "grad_norm": 0.2931956350803375, "learning_rate": 0.00027412431050229097, "loss": 1.7943, "step": 10415 }, { "epoch": 0.50859375, "grad_norm": 0.2904331088066101, "learning_rate": 0.0002740892831099945, "loss": 1.8179, "step": 10416 }, { "epoch": 0.508642578125, "grad_norm": 0.23652733862400055, "learning_rate": 0.0002740542557397699, "loss": 1.7859, "step": 10417 }, { "epoch": 0.50869140625, "grad_norm": 0.33933594822883606, "learning_rate": 0.00027401922839246626, "loss": 1.8027, "step": 10418 }, { "epoch": 0.508740234375, "grad_norm": 0.228530153632164, "learning_rate": 0.0002739842010689323, "loss": 1.7911, "step": 10419 }, { "epoch": 0.5087890625, "grad_norm": 0.2761094570159912, "learning_rate": 0.0002739491737700171, "loss": 1.7778, "step": 10420 }, { "epoch": 0.508837890625, "grad_norm": 0.2661076784133911, "learning_rate": 0.0002739141464965695, "loss": 1.7759, "step": 10421 }, { "epoch": 0.50888671875, "grad_norm": 0.2571067214012146, "learning_rate": 0.0002738791192494385, "loss": 1.7837, "step": 10422 }, { "epoch": 0.508935546875, "grad_norm": 0.2815813720226288, "learning_rate": 0.0002738440920294728, "loss": 1.7914, "step": 10423 }, { "epoch": 0.508984375, "grad_norm": 0.23630686104297638, "learning_rate": 0.0002738090648375216, "loss": 1.7973, "step": 10424 }, { "epoch": 0.509033203125, "grad_norm": 0.26573890447616577, "learning_rate": 0.0002737740376744336, "loss": 1.7704, "step": 10425 }, { "epoch": 0.50908203125, "grad_norm": 0.2365642935037613, "learning_rate": 0.00027373901054105783, "loss": 1.786, "step": 10426 }, { "epoch": 0.509130859375, "grad_norm": 0.26322388648986816, "learning_rate": 0.00027370398343824314, "loss": 1.8097, "step": 10427 }, { "epoch": 0.5091796875, "grad_norm": 0.2971648871898651, "learning_rate": 0.0002736689563668384, "loss": 1.7838, "step": 10428 }, { "epoch": 0.509228515625, "grad_norm": 0.2157181054353714, "learning_rate": 0.00027363392932769267, "loss": 1.769, "step": 10429 }, { "epoch": 0.50927734375, "grad_norm": 0.26076164841651917, "learning_rate": 0.00027359890232165463, "loss": 1.7672, "step": 10430 }, { "epoch": 0.509326171875, "grad_norm": 0.2147962749004364, "learning_rate": 0.0002735638753495734, "loss": 1.7898, "step": 10431 }, { "epoch": 0.509375, "grad_norm": 0.25836560130119324, "learning_rate": 0.0002735288484122978, "loss": 1.7936, "step": 10432 }, { "epoch": 0.509423828125, "grad_norm": 0.22391219437122345, "learning_rate": 0.00027349382151067674, "loss": 1.799, "step": 10433 }, { "epoch": 0.50947265625, "grad_norm": 0.2253541499376297, "learning_rate": 0.0002734587946455592, "loss": 1.8058, "step": 10434 }, { "epoch": 0.509521484375, "grad_norm": 0.2465190887451172, "learning_rate": 0.00027342376781779384, "loss": 1.7945, "step": 10435 }, { "epoch": 0.5095703125, "grad_norm": 0.2345888614654541, "learning_rate": 0.0002733887410282299, "loss": 1.8093, "step": 10436 }, { "epoch": 0.509619140625, "grad_norm": 0.2947506308555603, "learning_rate": 0.00027335371427771596, "loss": 1.7942, "step": 10437 }, { "epoch": 0.50966796875, "grad_norm": 0.2171000987291336, "learning_rate": 0.0002733186875671012, "loss": 1.8167, "step": 10438 }, { "epoch": 0.509716796875, "grad_norm": 0.2730587422847748, "learning_rate": 0.00027328366089723427, "loss": 1.7912, "step": 10439 }, { "epoch": 0.509765625, "grad_norm": 0.2786257863044739, "learning_rate": 0.00027324863426896436, "loss": 1.8118, "step": 10440 }, { "epoch": 0.509814453125, "grad_norm": 0.2798595428466797, "learning_rate": 0.00027321360768314015, "loss": 1.7878, "step": 10441 }, { "epoch": 0.50986328125, "grad_norm": 0.2705850303173065, "learning_rate": 0.0002731785811406105, "loss": 1.8073, "step": 10442 }, { "epoch": 0.509912109375, "grad_norm": 0.22692880034446716, "learning_rate": 0.0002731435546422245, "loss": 1.8107, "step": 10443 }, { "epoch": 0.5099609375, "grad_norm": 0.21031123399734497, "learning_rate": 0.0002731085281888309, "loss": 1.8116, "step": 10444 }, { "epoch": 0.510009765625, "grad_norm": 0.2291458249092102, "learning_rate": 0.00027307350178127867, "loss": 1.7926, "step": 10445 }, { "epoch": 0.51005859375, "grad_norm": 0.22928741574287415, "learning_rate": 0.0002730384754204166, "loss": 1.8064, "step": 10446 }, { "epoch": 0.510107421875, "grad_norm": 0.2241467386484146, "learning_rate": 0.00027300344910709376, "loss": 1.7953, "step": 10447 }, { "epoch": 0.51015625, "grad_norm": 0.261980801820755, "learning_rate": 0.00027296842284215875, "loss": 1.7838, "step": 10448 }, { "epoch": 0.510205078125, "grad_norm": 0.25282856822013855, "learning_rate": 0.0002729333966264609, "loss": 1.8013, "step": 10449 }, { "epoch": 0.51025390625, "grad_norm": 0.25193241238594055, "learning_rate": 0.0002728983704608488, "loss": 1.7876, "step": 10450 }, { "epoch": 0.510302734375, "grad_norm": 0.32717660069465637, "learning_rate": 0.0002728633443461712, "loss": 1.8036, "step": 10451 }, { "epoch": 0.5103515625, "grad_norm": 0.27431267499923706, "learning_rate": 0.00027282831828327725, "loss": 1.778, "step": 10452 }, { "epoch": 0.510400390625, "grad_norm": 0.25425663590431213, "learning_rate": 0.00027279329227301576, "loss": 1.7912, "step": 10453 }, { "epoch": 0.51044921875, "grad_norm": 0.27235621213912964, "learning_rate": 0.0002727582663162356, "loss": 1.7784, "step": 10454 }, { "epoch": 0.510498046875, "grad_norm": 0.3196277916431427, "learning_rate": 0.00027272324041378567, "loss": 1.7662, "step": 10455 }, { "epoch": 0.510546875, "grad_norm": 0.3267510235309601, "learning_rate": 0.0002726882145665149, "loss": 1.7797, "step": 10456 }, { "epoch": 0.510595703125, "grad_norm": 0.2325318455696106, "learning_rate": 0.000272653188775272, "loss": 1.7837, "step": 10457 }, { "epoch": 0.51064453125, "grad_norm": 0.2803974747657776, "learning_rate": 0.000272618163040906, "loss": 1.7773, "step": 10458 }, { "epoch": 0.510693359375, "grad_norm": 0.28587496280670166, "learning_rate": 0.00027258313736426576, "loss": 1.7722, "step": 10459 }, { "epoch": 0.5107421875, "grad_norm": 0.24926777184009552, "learning_rate": 0.00027254811174620003, "loss": 1.7937, "step": 10460 }, { "epoch": 0.510791015625, "grad_norm": 0.254496306180954, "learning_rate": 0.00027251308618755796, "loss": 1.7704, "step": 10461 }, { "epoch": 0.51083984375, "grad_norm": 0.29637426137924194, "learning_rate": 0.0002724780606891881, "loss": 1.7957, "step": 10462 }, { "epoch": 0.510888671875, "grad_norm": 0.22124293446540833, "learning_rate": 0.0002724430352519395, "loss": 1.7877, "step": 10463 }, { "epoch": 0.5109375, "grad_norm": 0.22257855534553528, "learning_rate": 0.00027240800987666093, "loss": 1.8021, "step": 10464 }, { "epoch": 0.510986328125, "grad_norm": 0.25739026069641113, "learning_rate": 0.0002723729845642014, "loss": 1.7884, "step": 10465 }, { "epoch": 0.51103515625, "grad_norm": 0.26502010226249695, "learning_rate": 0.00027233795931540976, "loss": 1.8169, "step": 10466 }, { "epoch": 0.511083984375, "grad_norm": 0.2496601641178131, "learning_rate": 0.00027230293413113473, "loss": 1.7721, "step": 10467 }, { "epoch": 0.5111328125, "grad_norm": 0.24187397956848145, "learning_rate": 0.0002722679090122253, "loss": 1.7978, "step": 10468 }, { "epoch": 0.511181640625, "grad_norm": 0.28356918692588806, "learning_rate": 0.00027223288395953016, "loss": 1.8163, "step": 10469 }, { "epoch": 0.51123046875, "grad_norm": 0.22507265210151672, "learning_rate": 0.0002721978589738985, "loss": 1.7692, "step": 10470 }, { "epoch": 0.511279296875, "grad_norm": 0.30227363109588623, "learning_rate": 0.0002721628340561788, "loss": 1.7708, "step": 10471 }, { "epoch": 0.511328125, "grad_norm": 0.3021829128265381, "learning_rate": 0.0002721278092072202, "loss": 1.7787, "step": 10472 }, { "epoch": 0.511376953125, "grad_norm": 0.22177062928676605, "learning_rate": 0.0002720927844278714, "loss": 1.7901, "step": 10473 }, { "epoch": 0.51142578125, "grad_norm": 0.2618754208087921, "learning_rate": 0.00027205775971898133, "loss": 1.8175, "step": 10474 }, { "epoch": 0.511474609375, "grad_norm": 0.2933461666107178, "learning_rate": 0.00027202273508139887, "loss": 1.7645, "step": 10475 }, { "epoch": 0.5115234375, "grad_norm": 0.23880566656589508, "learning_rate": 0.00027198771051597273, "loss": 1.7608, "step": 10476 }, { "epoch": 0.511572265625, "grad_norm": 0.19447198510169983, "learning_rate": 0.00027195268602355193, "loss": 1.7889, "step": 10477 }, { "epoch": 0.51162109375, "grad_norm": 0.2081865519285202, "learning_rate": 0.0002719176616049851, "loss": 1.7966, "step": 10478 }, { "epoch": 0.511669921875, "grad_norm": 0.23912440240383148, "learning_rate": 0.0002718826372611214, "loss": 1.8174, "step": 10479 }, { "epoch": 0.51171875, "grad_norm": 0.25783810019493103, "learning_rate": 0.0002718476129928093, "loss": 1.8041, "step": 10480 }, { "epoch": 0.511767578125, "grad_norm": 0.25184619426727295, "learning_rate": 0.000271812588800898, "loss": 1.8012, "step": 10481 }, { "epoch": 0.51181640625, "grad_norm": 0.2294999361038208, "learning_rate": 0.000271777564686236, "loss": 1.7811, "step": 10482 }, { "epoch": 0.511865234375, "grad_norm": 0.23428495228290558, "learning_rate": 0.00027174254064967244, "loss": 1.7748, "step": 10483 }, { "epoch": 0.5119140625, "grad_norm": 0.2487233579158783, "learning_rate": 0.000271707516692056, "loss": 1.7796, "step": 10484 }, { "epoch": 0.511962890625, "grad_norm": 0.2644192576408386, "learning_rate": 0.00027167249281423553, "loss": 1.7739, "step": 10485 }, { "epoch": 0.51201171875, "grad_norm": 0.24269482493400574, "learning_rate": 0.00027163746901705986, "loss": 1.7891, "step": 10486 }, { "epoch": 0.512060546875, "grad_norm": 0.23949077725410461, "learning_rate": 0.00027160244530137777, "loss": 1.7868, "step": 10487 }, { "epoch": 0.512109375, "grad_norm": 0.27155840396881104, "learning_rate": 0.0002715674216680383, "loss": 1.802, "step": 10488 }, { "epoch": 0.512158203125, "grad_norm": 0.21079106628894806, "learning_rate": 0.00027153239811789, "loss": 1.7826, "step": 10489 }, { "epoch": 0.51220703125, "grad_norm": 0.2410726249217987, "learning_rate": 0.00027149737465178187, "loss": 1.7844, "step": 10490 }, { "epoch": 0.512255859375, "grad_norm": 0.2436499297618866, "learning_rate": 0.00027146235127056264, "loss": 1.812, "step": 10491 }, { "epoch": 0.5123046875, "grad_norm": 0.21109159290790558, "learning_rate": 0.0002714273279750812, "loss": 1.8021, "step": 10492 }, { "epoch": 0.512353515625, "grad_norm": 0.301862508058548, "learning_rate": 0.0002713923047661864, "loss": 1.7878, "step": 10493 }, { "epoch": 0.51240234375, "grad_norm": 0.28624168038368225, "learning_rate": 0.00027135728164472687, "loss": 1.8066, "step": 10494 }, { "epoch": 0.512451171875, "grad_norm": 0.21978318691253662, "learning_rate": 0.00027132225861155155, "loss": 1.8237, "step": 10495 }, { "epoch": 0.5125, "grad_norm": 0.21781213581562042, "learning_rate": 0.00027128723566750937, "loss": 1.7963, "step": 10496 }, { "epoch": 0.512548828125, "grad_norm": 0.2689298391342163, "learning_rate": 0.0002712522128134489, "loss": 1.7762, "step": 10497 }, { "epoch": 0.51259765625, "grad_norm": 0.25640571117401123, "learning_rate": 0.000271217190050219, "loss": 1.7992, "step": 10498 }, { "epoch": 0.512646484375, "grad_norm": 0.2440406233072281, "learning_rate": 0.0002711821673786687, "loss": 1.7706, "step": 10499 }, { "epoch": 0.5126953125, "grad_norm": 0.24393504858016968, "learning_rate": 0.0002711471447996466, "loss": 1.8064, "step": 10500 }, { "epoch": 0.512744140625, "grad_norm": 0.31702739000320435, "learning_rate": 0.00027111212231400154, "loss": 1.7983, "step": 10501 }, { "epoch": 0.51279296875, "grad_norm": 0.3371526002883911, "learning_rate": 0.00027107709992258235, "loss": 1.7851, "step": 10502 }, { "epoch": 0.512841796875, "grad_norm": 0.30035027861595154, "learning_rate": 0.0002710420776262377, "loss": 1.7972, "step": 10503 }, { "epoch": 0.512890625, "grad_norm": 0.3353639245033264, "learning_rate": 0.0002710070554258165, "loss": 1.795, "step": 10504 }, { "epoch": 0.512939453125, "grad_norm": 0.2681877315044403, "learning_rate": 0.0002709720333221676, "loss": 1.7998, "step": 10505 }, { "epoch": 0.51298828125, "grad_norm": 0.31991732120513916, "learning_rate": 0.00027093701131613966, "loss": 1.7809, "step": 10506 }, { "epoch": 0.513037109375, "grad_norm": 0.3379665017127991, "learning_rate": 0.00027090198940858147, "loss": 1.7976, "step": 10507 }, { "epoch": 0.5130859375, "grad_norm": 0.2627660036087036, "learning_rate": 0.00027086696760034195, "loss": 1.7885, "step": 10508 }, { "epoch": 0.513134765625, "grad_norm": 0.3323020040988922, "learning_rate": 0.00027083194589226975, "loss": 1.7725, "step": 10509 }, { "epoch": 0.51318359375, "grad_norm": 0.2516469359397888, "learning_rate": 0.0002707969242852137, "loss": 1.7872, "step": 10510 }, { "epoch": 0.513232421875, "grad_norm": 0.31978246569633484, "learning_rate": 0.0002707619027800225, "loss": 1.8016, "step": 10511 }, { "epoch": 0.51328125, "grad_norm": 0.25777238607406616, "learning_rate": 0.00027072688137754505, "loss": 1.7789, "step": 10512 }, { "epoch": 0.513330078125, "grad_norm": 0.2623745799064636, "learning_rate": 0.00027069186007863, "loss": 1.7898, "step": 10513 }, { "epoch": 0.51337890625, "grad_norm": 0.24030496180057526, "learning_rate": 0.00027065683888412626, "loss": 1.794, "step": 10514 }, { "epoch": 0.513427734375, "grad_norm": 0.24826788902282715, "learning_rate": 0.0002706218177948825, "loss": 1.7979, "step": 10515 }, { "epoch": 0.5134765625, "grad_norm": 0.2811279296875, "learning_rate": 0.00027058679681174746, "loss": 1.7891, "step": 10516 }, { "epoch": 0.513525390625, "grad_norm": 0.27346065640449524, "learning_rate": 0.0002705517759355701, "loss": 1.8103, "step": 10517 }, { "epoch": 0.51357421875, "grad_norm": 0.24673479795455933, "learning_rate": 0.0002705167551671988, "loss": 1.7804, "step": 10518 }, { "epoch": 0.513623046875, "grad_norm": 0.29948848485946655, "learning_rate": 0.00027048173450748266, "loss": 1.7689, "step": 10519 }, { "epoch": 0.513671875, "grad_norm": 0.23564128577709198, "learning_rate": 0.00027044671395727034, "loss": 1.8005, "step": 10520 }, { "epoch": 0.513720703125, "grad_norm": 0.29112058877944946, "learning_rate": 0.00027041169351741046, "loss": 1.785, "step": 10521 }, { "epoch": 0.51376953125, "grad_norm": 0.30248087644577026, "learning_rate": 0.00027037667318875194, "loss": 1.7843, "step": 10522 }, { "epoch": 0.513818359375, "grad_norm": 0.25525522232055664, "learning_rate": 0.0002703416529721435, "loss": 1.7714, "step": 10523 }, { "epoch": 0.5138671875, "grad_norm": 0.25643423199653625, "learning_rate": 0.00027030663286843376, "loss": 1.796, "step": 10524 }, { "epoch": 0.513916015625, "grad_norm": 0.27331307530403137, "learning_rate": 0.00027027161287847165, "loss": 1.7996, "step": 10525 }, { "epoch": 0.51396484375, "grad_norm": 0.21996533870697021, "learning_rate": 0.0002702365930031057, "loss": 1.7797, "step": 10526 }, { "epoch": 0.514013671875, "grad_norm": 0.2704721689224243, "learning_rate": 0.00027020157324318477, "loss": 1.7973, "step": 10527 }, { "epoch": 0.5140625, "grad_norm": 0.27501028776168823, "learning_rate": 0.00027016655359955754, "loss": 1.7734, "step": 10528 }, { "epoch": 0.514111328125, "grad_norm": 0.3042936325073242, "learning_rate": 0.00027013153407307283, "loss": 1.7661, "step": 10529 }, { "epoch": 0.51416015625, "grad_norm": 0.27757883071899414, "learning_rate": 0.00027009651466457923, "loss": 1.7894, "step": 10530 }, { "epoch": 0.514208984375, "grad_norm": 0.23492290079593658, "learning_rate": 0.00027006149537492564, "loss": 1.7682, "step": 10531 }, { "epoch": 0.5142578125, "grad_norm": 0.32863327860832214, "learning_rate": 0.0002700264762049606, "loss": 1.8052, "step": 10532 }, { "epoch": 0.514306640625, "grad_norm": 0.2847210168838501, "learning_rate": 0.0002699914571555329, "loss": 1.8056, "step": 10533 }, { "epoch": 0.51435546875, "grad_norm": 0.2919783592224121, "learning_rate": 0.0002699564382274913, "loss": 1.8139, "step": 10534 }, { "epoch": 0.514404296875, "grad_norm": 0.22595831751823425, "learning_rate": 0.00026992141942168446, "loss": 1.7789, "step": 10535 }, { "epoch": 0.514453125, "grad_norm": 0.2769888937473297, "learning_rate": 0.0002698864007389611, "loss": 1.8044, "step": 10536 }, { "epoch": 0.514501953125, "grad_norm": 0.2685067653656006, "learning_rate": 0.00026985138218016993, "loss": 1.7942, "step": 10537 }, { "epoch": 0.51455078125, "grad_norm": 0.22006243467330933, "learning_rate": 0.00026981636374615964, "loss": 1.7837, "step": 10538 }, { "epoch": 0.514599609375, "grad_norm": 0.2472934126853943, "learning_rate": 0.00026978134543777894, "loss": 1.8146, "step": 10539 }, { "epoch": 0.5146484375, "grad_norm": 0.29063478112220764, "learning_rate": 0.0002697463272558765, "loss": 1.7808, "step": 10540 }, { "epoch": 0.514697265625, "grad_norm": 0.2360348403453827, "learning_rate": 0.00026971130920130107, "loss": 1.7944, "step": 10541 }, { "epoch": 0.51474609375, "grad_norm": 0.2601255774497986, "learning_rate": 0.0002696762912749013, "loss": 1.8028, "step": 10542 }, { "epoch": 0.514794921875, "grad_norm": 0.21533657610416412, "learning_rate": 0.00026964127347752593, "loss": 1.7926, "step": 10543 }, { "epoch": 0.51484375, "grad_norm": 0.24553652107715607, "learning_rate": 0.00026960625581002353, "loss": 1.7857, "step": 10544 }, { "epoch": 0.514892578125, "grad_norm": 0.24567532539367676, "learning_rate": 0.0002695712382732429, "loss": 1.7978, "step": 10545 }, { "epoch": 0.51494140625, "grad_norm": 0.21928060054779053, "learning_rate": 0.0002695362208680327, "loss": 1.804, "step": 10546 }, { "epoch": 0.514990234375, "grad_norm": 0.22524768114089966, "learning_rate": 0.0002695012035952415, "loss": 1.7892, "step": 10547 }, { "epoch": 0.5150390625, "grad_norm": 0.244704470038414, "learning_rate": 0.00026946618645571805, "loss": 1.7959, "step": 10548 }, { "epoch": 0.515087890625, "grad_norm": 0.23607489466667175, "learning_rate": 0.0002694311694503111, "loss": 1.79, "step": 10549 }, { "epoch": 0.51513671875, "grad_norm": 0.2225109189748764, "learning_rate": 0.0002693961525798692, "loss": 1.7872, "step": 10550 }, { "epoch": 0.515185546875, "grad_norm": 0.2329094409942627, "learning_rate": 0.000269361135845241, "loss": 1.7985, "step": 10551 }, { "epoch": 0.515234375, "grad_norm": 0.24400420486927032, "learning_rate": 0.00026932611924727527, "loss": 1.8072, "step": 10552 }, { "epoch": 0.515283203125, "grad_norm": 0.2206316441297531, "learning_rate": 0.0002692911027868205, "loss": 1.7898, "step": 10553 }, { "epoch": 0.51533203125, "grad_norm": 0.2631509602069855, "learning_rate": 0.00026925608646472557, "loss": 1.7618, "step": 10554 }, { "epoch": 0.515380859375, "grad_norm": 0.2027372419834137, "learning_rate": 0.0002692210702818389, "loss": 1.7925, "step": 10555 }, { "epoch": 0.5154296875, "grad_norm": 0.2770949900150299, "learning_rate": 0.00026918605423900926, "loss": 1.8169, "step": 10556 }, { "epoch": 0.515478515625, "grad_norm": 0.24145103991031647, "learning_rate": 0.0002691510383370853, "loss": 1.7647, "step": 10557 }, { "epoch": 0.51552734375, "grad_norm": 0.22616973519325256, "learning_rate": 0.00026911602257691556, "loss": 1.7874, "step": 10558 }, { "epoch": 0.515576171875, "grad_norm": 0.2688491940498352, "learning_rate": 0.00026908100695934885, "loss": 1.7772, "step": 10559 }, { "epoch": 0.515625, "grad_norm": 0.2412630319595337, "learning_rate": 0.0002690459914852336, "loss": 1.7949, "step": 10560 }, { "epoch": 0.515673828125, "grad_norm": 0.27946847677230835, "learning_rate": 0.00026901097615541857, "loss": 1.7974, "step": 10561 }, { "epoch": 0.51572265625, "grad_norm": 0.24328194558620453, "learning_rate": 0.00026897596097075227, "loss": 1.7807, "step": 10562 }, { "epoch": 0.515771484375, "grad_norm": 0.27015039324760437, "learning_rate": 0.00026894094593208343, "loss": 1.8025, "step": 10563 }, { "epoch": 0.5158203125, "grad_norm": 0.25412964820861816, "learning_rate": 0.0002689059310402606, "loss": 1.7662, "step": 10564 }, { "epoch": 0.515869140625, "grad_norm": 0.24821852147579193, "learning_rate": 0.0002688709162961326, "loss": 1.8045, "step": 10565 }, { "epoch": 0.51591796875, "grad_norm": 0.2767044007778168, "learning_rate": 0.00026883590170054765, "loss": 1.7964, "step": 10566 }, { "epoch": 0.515966796875, "grad_norm": 0.22796796262264252, "learning_rate": 0.0002688008872543547, "loss": 1.7771, "step": 10567 }, { "epoch": 0.516015625, "grad_norm": 0.25439023971557617, "learning_rate": 0.00026876587295840225, "loss": 1.7985, "step": 10568 }, { "epoch": 0.516064453125, "grad_norm": 0.26840639114379883, "learning_rate": 0.0002687308588135388, "loss": 1.7921, "step": 10569 }, { "epoch": 0.51611328125, "grad_norm": 0.22726264595985413, "learning_rate": 0.0002686958448206131, "loss": 1.8064, "step": 10570 }, { "epoch": 0.516162109375, "grad_norm": 0.2367148995399475, "learning_rate": 0.00026866083098047355, "loss": 1.7992, "step": 10571 }, { "epoch": 0.5162109375, "grad_norm": 0.2578705847263336, "learning_rate": 0.000268625817293969, "loss": 1.7833, "step": 10572 }, { "epoch": 0.516259765625, "grad_norm": 0.22427894175052643, "learning_rate": 0.0002685908037619477, "loss": 1.7953, "step": 10573 }, { "epoch": 0.51630859375, "grad_norm": 0.30450719594955444, "learning_rate": 0.00026855579038525856, "loss": 1.7668, "step": 10574 }, { "epoch": 0.516357421875, "grad_norm": 0.20124511420726776, "learning_rate": 0.00026852077716475, "loss": 1.7961, "step": 10575 }, { "epoch": 0.51640625, "grad_norm": 0.3188948333263397, "learning_rate": 0.00026848576410127065, "loss": 1.7779, "step": 10576 }, { "epoch": 0.516455078125, "grad_norm": 0.2928345203399658, "learning_rate": 0.000268450751195669, "loss": 1.7563, "step": 10577 }, { "epoch": 0.51650390625, "grad_norm": 0.29541680216789246, "learning_rate": 0.0002684157384487936, "loss": 1.7894, "step": 10578 }, { "epoch": 0.516552734375, "grad_norm": 0.2438768744468689, "learning_rate": 0.0002683807258614932, "loss": 1.7963, "step": 10579 }, { "epoch": 0.5166015625, "grad_norm": 0.2838546633720398, "learning_rate": 0.000268345713434616, "loss": 1.7738, "step": 10580 }, { "epoch": 0.516650390625, "grad_norm": 0.33434194326400757, "learning_rate": 0.00026831070116901094, "loss": 1.7945, "step": 10581 }, { "epoch": 0.51669921875, "grad_norm": 0.20141667127609253, "learning_rate": 0.0002682756890655264, "loss": 1.791, "step": 10582 }, { "epoch": 0.516748046875, "grad_norm": 0.31139692664146423, "learning_rate": 0.00026824067712501095, "loss": 1.7835, "step": 10583 }, { "epoch": 0.516796875, "grad_norm": 0.3169333338737488, "learning_rate": 0.00026820566534831316, "loss": 1.7901, "step": 10584 }, { "epoch": 0.516845703125, "grad_norm": 0.26749590039253235, "learning_rate": 0.00026817065373628134, "loss": 1.7734, "step": 10585 }, { "epoch": 0.51689453125, "grad_norm": 0.3282674252986908, "learning_rate": 0.00026813564228976447, "loss": 1.7834, "step": 10586 }, { "epoch": 0.516943359375, "grad_norm": 0.2622203230857849, "learning_rate": 0.00026810063100961057, "loss": 1.7584, "step": 10587 }, { "epoch": 0.5169921875, "grad_norm": 0.3011339008808136, "learning_rate": 0.0002680656198966686, "loss": 1.8119, "step": 10588 }, { "epoch": 0.517041015625, "grad_norm": 0.3157654404640198, "learning_rate": 0.00026803060895178683, "loss": 1.8056, "step": 10589 }, { "epoch": 0.51708984375, "grad_norm": 0.2990850806236267, "learning_rate": 0.0002679955981758139, "loss": 1.7688, "step": 10590 }, { "epoch": 0.517138671875, "grad_norm": 0.2629106342792511, "learning_rate": 0.0002679605875695982, "loss": 1.7817, "step": 10591 }, { "epoch": 0.5171875, "grad_norm": 0.4065033197402954, "learning_rate": 0.00026792557713398835, "loss": 1.8152, "step": 10592 }, { "epoch": 0.517236328125, "grad_norm": 0.3329312801361084, "learning_rate": 0.0002678905668698329, "loss": 1.8149, "step": 10593 }, { "epoch": 0.51728515625, "grad_norm": 0.2505604922771454, "learning_rate": 0.0002678555567779801, "loss": 1.7967, "step": 10594 }, { "epoch": 0.517333984375, "grad_norm": 0.33897271752357483, "learning_rate": 0.00026782054685927883, "loss": 1.8029, "step": 10595 }, { "epoch": 0.5173828125, "grad_norm": 0.21729212999343872, "learning_rate": 0.0002677855371145772, "loss": 1.7901, "step": 10596 }, { "epoch": 0.517431640625, "grad_norm": 0.35338953137397766, "learning_rate": 0.000267750527544724, "loss": 1.8155, "step": 10597 }, { "epoch": 0.51748046875, "grad_norm": 0.24491731822490692, "learning_rate": 0.0002677155181505675, "loss": 1.818, "step": 10598 }, { "epoch": 0.517529296875, "grad_norm": 0.36693695187568665, "learning_rate": 0.00026768050893295634, "loss": 1.8016, "step": 10599 }, { "epoch": 0.517578125, "grad_norm": 0.2762893736362457, "learning_rate": 0.00026764549989273886, "loss": 1.7834, "step": 10600 }, { "epoch": 0.517626953125, "grad_norm": 0.26513004302978516, "learning_rate": 0.0002676104910307636, "loss": 1.7793, "step": 10601 }, { "epoch": 0.51767578125, "grad_norm": 0.3144133388996124, "learning_rate": 0.0002675754823478791, "loss": 1.8152, "step": 10602 }, { "epoch": 0.517724609375, "grad_norm": 0.24649377167224884, "learning_rate": 0.0002675404738449337, "loss": 1.8031, "step": 10603 }, { "epoch": 0.5177734375, "grad_norm": 0.23164360225200653, "learning_rate": 0.00026750546552277595, "loss": 1.7714, "step": 10604 }, { "epoch": 0.517822265625, "grad_norm": 0.2840680181980133, "learning_rate": 0.00026747045738225415, "loss": 1.7538, "step": 10605 }, { "epoch": 0.51787109375, "grad_norm": 0.2480975240468979, "learning_rate": 0.000267435449424217, "loss": 1.7862, "step": 10606 }, { "epoch": 0.517919921875, "grad_norm": 0.24168698489665985, "learning_rate": 0.0002674004416495127, "loss": 1.7871, "step": 10607 }, { "epoch": 0.51796875, "grad_norm": 0.28518548607826233, "learning_rate": 0.0002673654340589899, "loss": 1.8102, "step": 10608 }, { "epoch": 0.518017578125, "grad_norm": 0.21673396229743958, "learning_rate": 0.00026733042665349696, "loss": 1.7855, "step": 10609 }, { "epoch": 0.51806640625, "grad_norm": 0.22524847090244293, "learning_rate": 0.0002672954194338822, "loss": 1.7992, "step": 10610 }, { "epoch": 0.518115234375, "grad_norm": 0.2286553978919983, "learning_rate": 0.00026726041240099417, "loss": 1.7826, "step": 10611 }, { "epoch": 0.5181640625, "grad_norm": 0.2522949278354645, "learning_rate": 0.0002672254055556812, "loss": 1.7747, "step": 10612 }, { "epoch": 0.518212890625, "grad_norm": 0.27115947008132935, "learning_rate": 0.00026719039889879187, "loss": 1.8067, "step": 10613 }, { "epoch": 0.51826171875, "grad_norm": 0.2333086133003235, "learning_rate": 0.0002671553924311744, "loss": 1.8062, "step": 10614 }, { "epoch": 0.518310546875, "grad_norm": 0.2624916434288025, "learning_rate": 0.00026712038615367737, "loss": 1.7841, "step": 10615 }, { "epoch": 0.518359375, "grad_norm": 0.2568109929561615, "learning_rate": 0.0002670853800671491, "loss": 1.778, "step": 10616 }, { "epoch": 0.518408203125, "grad_norm": 0.24945543706417084, "learning_rate": 0.00026705037417243806, "loss": 1.7902, "step": 10617 }, { "epoch": 0.51845703125, "grad_norm": 0.22296829521656036, "learning_rate": 0.00026701536847039255, "loss": 1.8128, "step": 10618 }, { "epoch": 0.518505859375, "grad_norm": 0.2863152027130127, "learning_rate": 0.0002669803629618609, "loss": 1.8124, "step": 10619 }, { "epoch": 0.5185546875, "grad_norm": 0.3335237503051758, "learning_rate": 0.0002669453576476917, "loss": 1.7802, "step": 10620 }, { "epoch": 0.518603515625, "grad_norm": 0.24465958774089813, "learning_rate": 0.0002669103525287332, "loss": 1.7969, "step": 10621 }, { "epoch": 0.51865234375, "grad_norm": 0.3133828938007355, "learning_rate": 0.0002668753476058339, "loss": 1.7891, "step": 10622 }, { "epoch": 0.518701171875, "grad_norm": 0.2986678183078766, "learning_rate": 0.0002668403428798419, "loss": 1.7809, "step": 10623 }, { "epoch": 0.51875, "grad_norm": 0.28863510489463806, "learning_rate": 0.00026680533835160585, "loss": 1.7645, "step": 10624 }, { "epoch": 0.518798828125, "grad_norm": 0.2907068431377411, "learning_rate": 0.000266770334021974, "loss": 1.8095, "step": 10625 }, { "epoch": 0.51884765625, "grad_norm": 0.2716098427772522, "learning_rate": 0.0002667353298917947, "loss": 1.7969, "step": 10626 }, { "epoch": 0.518896484375, "grad_norm": 0.3166191875934601, "learning_rate": 0.0002667003259619164, "loss": 1.7875, "step": 10627 }, { "epoch": 0.5189453125, "grad_norm": 0.2265421748161316, "learning_rate": 0.00026666532223318726, "loss": 1.8096, "step": 10628 }, { "epoch": 0.518994140625, "grad_norm": 0.30811989307403564, "learning_rate": 0.00026663031870645577, "loss": 1.7932, "step": 10629 }, { "epoch": 0.51904296875, "grad_norm": 0.24553528428077698, "learning_rate": 0.00026659531538257016, "loss": 1.7979, "step": 10630 }, { "epoch": 0.519091796875, "grad_norm": 0.28173282742500305, "learning_rate": 0.000266560312262379, "loss": 1.7809, "step": 10631 }, { "epoch": 0.519140625, "grad_norm": 0.2598568797111511, "learning_rate": 0.00026652530934673024, "loss": 1.8124, "step": 10632 }, { "epoch": 0.519189453125, "grad_norm": 0.2830774188041687, "learning_rate": 0.0002664903066364726, "loss": 1.7754, "step": 10633 }, { "epoch": 0.51923828125, "grad_norm": 0.2436181902885437, "learning_rate": 0.00026645530413245405, "loss": 1.7756, "step": 10634 }, { "epoch": 0.519287109375, "grad_norm": 0.2998560667037964, "learning_rate": 0.0002664203018355232, "loss": 1.7889, "step": 10635 }, { "epoch": 0.5193359375, "grad_norm": 0.28591442108154297, "learning_rate": 0.0002663852997465283, "loss": 1.7687, "step": 10636 }, { "epoch": 0.519384765625, "grad_norm": 0.2812385559082031, "learning_rate": 0.00026635029786631744, "loss": 1.8037, "step": 10637 }, { "epoch": 0.51943359375, "grad_norm": 0.21197839081287384, "learning_rate": 0.00026631529619573915, "loss": 1.8012, "step": 10638 }, { "epoch": 0.519482421875, "grad_norm": 0.28475767374038696, "learning_rate": 0.00026628029473564147, "loss": 1.8015, "step": 10639 }, { "epoch": 0.51953125, "grad_norm": 0.2704737186431885, "learning_rate": 0.00026624529348687307, "loss": 1.7771, "step": 10640 }, { "epoch": 0.519580078125, "grad_norm": 0.23722591996192932, "learning_rate": 0.0002662102924502818, "loss": 1.8044, "step": 10641 }, { "epoch": 0.51962890625, "grad_norm": 0.2707425057888031, "learning_rate": 0.0002661752916267163, "loss": 1.7841, "step": 10642 }, { "epoch": 0.519677734375, "grad_norm": 0.2553514242172241, "learning_rate": 0.00026614029101702477, "loss": 1.8185, "step": 10643 }, { "epoch": 0.5197265625, "grad_norm": 0.32345202565193176, "learning_rate": 0.00026610529062205516, "loss": 1.7861, "step": 10644 }, { "epoch": 0.519775390625, "grad_norm": 0.2778443396091461, "learning_rate": 0.0002660702904426562, "loss": 1.7967, "step": 10645 }, { "epoch": 0.51982421875, "grad_norm": 0.24985072016716003, "learning_rate": 0.00026603529047967576, "loss": 1.7876, "step": 10646 }, { "epoch": 0.519873046875, "grad_norm": 0.3086126744747162, "learning_rate": 0.00026600029073396236, "loss": 1.7707, "step": 10647 }, { "epoch": 0.519921875, "grad_norm": 0.23049284517765045, "learning_rate": 0.0002659652912063641, "loss": 1.7963, "step": 10648 }, { "epoch": 0.519970703125, "grad_norm": 0.24242034554481506, "learning_rate": 0.00026593029189772935, "loss": 1.7702, "step": 10649 }, { "epoch": 0.52001953125, "grad_norm": 0.30497604608535767, "learning_rate": 0.0002658952928089061, "loss": 1.7923, "step": 10650 }, { "epoch": 0.520068359375, "grad_norm": 0.21332807838916779, "learning_rate": 0.0002658602939407429, "loss": 1.7829, "step": 10651 }, { "epoch": 0.5201171875, "grad_norm": 0.2837532162666321, "learning_rate": 0.0002658252952940878, "loss": 1.8117, "step": 10652 }, { "epoch": 0.520166015625, "grad_norm": 0.2565271854400635, "learning_rate": 0.00026579029686978897, "loss": 1.7827, "step": 10653 }, { "epoch": 0.52021484375, "grad_norm": 0.23916460573673248, "learning_rate": 0.00026575529866869476, "loss": 1.8039, "step": 10654 }, { "epoch": 0.520263671875, "grad_norm": 0.2245122641324997, "learning_rate": 0.0002657203006916532, "loss": 1.7999, "step": 10655 }, { "epoch": 0.5203125, "grad_norm": 0.24556536972522736, "learning_rate": 0.00026568530293951276, "loss": 1.7725, "step": 10656 }, { "epoch": 0.520361328125, "grad_norm": 0.24841700494289398, "learning_rate": 0.00026565030541312137, "loss": 1.817, "step": 10657 }, { "epoch": 0.52041015625, "grad_norm": 0.266247034072876, "learning_rate": 0.0002656153081133274, "loss": 1.7912, "step": 10658 }, { "epoch": 0.520458984375, "grad_norm": 0.23587539792060852, "learning_rate": 0.0002655803110409789, "loss": 1.788, "step": 10659 }, { "epoch": 0.5205078125, "grad_norm": 0.24885651469230652, "learning_rate": 0.00026554531419692424, "loss": 1.7764, "step": 10660 }, { "epoch": 0.520556640625, "grad_norm": 0.22534306347370148, "learning_rate": 0.0002655103175820115, "loss": 1.7879, "step": 10661 }, { "epoch": 0.52060546875, "grad_norm": 0.27144408226013184, "learning_rate": 0.00026547532119708866, "loss": 1.8033, "step": 10662 }, { "epoch": 0.520654296875, "grad_norm": 0.24903425574302673, "learning_rate": 0.00026544032504300426, "loss": 1.7738, "step": 10663 }, { "epoch": 0.520703125, "grad_norm": 0.271980881690979, "learning_rate": 0.0002654053291206061, "loss": 1.8104, "step": 10664 }, { "epoch": 0.520751953125, "grad_norm": 0.3293367028236389, "learning_rate": 0.0002653703334307426, "loss": 1.7907, "step": 10665 }, { "epoch": 0.52080078125, "grad_norm": 0.28547242283821106, "learning_rate": 0.0002653353379742618, "loss": 1.797, "step": 10666 }, { "epoch": 0.520849609375, "grad_norm": 0.2803836464881897, "learning_rate": 0.00026530034275201183, "loss": 1.7678, "step": 10667 }, { "epoch": 0.5208984375, "grad_norm": 0.22208930552005768, "learning_rate": 0.0002652653477648409, "loss": 1.8308, "step": 10668 }, { "epoch": 0.520947265625, "grad_norm": 0.2574160695075989, "learning_rate": 0.00026523035301359696, "loss": 1.7782, "step": 10669 }, { "epoch": 0.52099609375, "grad_norm": 0.33438730239868164, "learning_rate": 0.0002651953584991284, "loss": 1.7903, "step": 10670 }, { "epoch": 0.521044921875, "grad_norm": 0.26381048560142517, "learning_rate": 0.00026516036422228305, "loss": 1.7678, "step": 10671 }, { "epoch": 0.52109375, "grad_norm": 0.22328594326972961, "learning_rate": 0.00026512537018390936, "loss": 1.7979, "step": 10672 }, { "epoch": 0.521142578125, "grad_norm": 0.2942512035369873, "learning_rate": 0.00026509037638485514, "loss": 1.7513, "step": 10673 }, { "epoch": 0.52119140625, "grad_norm": 0.25634604692459106, "learning_rate": 0.0002650553828259686, "loss": 1.7653, "step": 10674 }, { "epoch": 0.521240234375, "grad_norm": 0.23339010775089264, "learning_rate": 0.00026502038950809785, "loss": 1.7813, "step": 10675 }, { "epoch": 0.5212890625, "grad_norm": 0.35167521238327026, "learning_rate": 0.00026498539643209106, "loss": 1.7845, "step": 10676 }, { "epoch": 0.521337890625, "grad_norm": 0.3147641122341156, "learning_rate": 0.0002649504035987962, "loss": 1.8164, "step": 10677 }, { "epoch": 0.52138671875, "grad_norm": 0.2911986708641052, "learning_rate": 0.0002649154110090614, "loss": 1.78, "step": 10678 }, { "epoch": 0.521435546875, "grad_norm": 0.3141106367111206, "learning_rate": 0.0002648804186637347, "loss": 1.809, "step": 10679 }, { "epoch": 0.521484375, "grad_norm": 0.2929680049419403, "learning_rate": 0.00026484542656366405, "loss": 1.8053, "step": 10680 }, { "epoch": 0.521533203125, "grad_norm": 0.31885218620300293, "learning_rate": 0.00026481043470969785, "loss": 1.772, "step": 10681 }, { "epoch": 0.52158203125, "grad_norm": 0.23814183473587036, "learning_rate": 0.0002647754431026838, "loss": 1.7978, "step": 10682 }, { "epoch": 0.521630859375, "grad_norm": 0.24657252430915833, "learning_rate": 0.00026474045174347023, "loss": 1.7986, "step": 10683 }, { "epoch": 0.5216796875, "grad_norm": 0.2793145477771759, "learning_rate": 0.00026470546063290493, "loss": 1.7735, "step": 10684 }, { "epoch": 0.521728515625, "grad_norm": 0.2175055295228958, "learning_rate": 0.0002646704697718361, "loss": 1.8177, "step": 10685 }, { "epoch": 0.52177734375, "grad_norm": 0.30954161286354065, "learning_rate": 0.0002646354791611119, "loss": 1.8044, "step": 10686 }, { "epoch": 0.521826171875, "grad_norm": 0.27356189489364624, "learning_rate": 0.00026460048880157987, "loss": 1.7717, "step": 10687 }, { "epoch": 0.521875, "grad_norm": 0.26973244547843933, "learning_rate": 0.00026456549869408865, "loss": 1.7991, "step": 10688 }, { "epoch": 0.521923828125, "grad_norm": 0.2743048369884491, "learning_rate": 0.00026453050883948575, "loss": 1.768, "step": 10689 }, { "epoch": 0.52197265625, "grad_norm": 0.2656089961528778, "learning_rate": 0.0002644955192386195, "loss": 1.7556, "step": 10690 }, { "epoch": 0.522021484375, "grad_norm": 0.23296920955181122, "learning_rate": 0.00026446052989233764, "loss": 1.8169, "step": 10691 }, { "epoch": 0.5220703125, "grad_norm": 0.29822012782096863, "learning_rate": 0.00026442554080148843, "loss": 1.7651, "step": 10692 }, { "epoch": 0.522119140625, "grad_norm": 0.3473931550979614, "learning_rate": 0.0002643905519669197, "loss": 1.7747, "step": 10693 }, { "epoch": 0.52216796875, "grad_norm": 0.23167851567268372, "learning_rate": 0.00026435556338947946, "loss": 1.7996, "step": 10694 }, { "epoch": 0.522216796875, "grad_norm": 0.3301249146461487, "learning_rate": 0.00026432057507001577, "loss": 1.8181, "step": 10695 }, { "epoch": 0.522265625, "grad_norm": 0.29635921120643616, "learning_rate": 0.00026428558700937645, "loss": 1.7741, "step": 10696 }, { "epoch": 0.522314453125, "grad_norm": 0.2536870539188385, "learning_rate": 0.00026425059920840956, "loss": 1.7683, "step": 10697 }, { "epoch": 0.52236328125, "grad_norm": 0.26446419954299927, "learning_rate": 0.0002642156116679629, "loss": 1.8135, "step": 10698 }, { "epoch": 0.522412109375, "grad_norm": 0.2537066340446472, "learning_rate": 0.0002641806243888848, "loss": 1.7745, "step": 10699 }, { "epoch": 0.5224609375, "grad_norm": 0.24980005621910095, "learning_rate": 0.00026414563737202275, "loss": 1.7828, "step": 10700 }, { "epoch": 0.522509765625, "grad_norm": 0.2243027240037918, "learning_rate": 0.00026411065061822506, "loss": 1.7721, "step": 10701 }, { "epoch": 0.52255859375, "grad_norm": 0.2652481198310852, "learning_rate": 0.0002640756641283394, "loss": 1.7995, "step": 10702 }, { "epoch": 0.522607421875, "grad_norm": 0.23666433990001678, "learning_rate": 0.0002640406779032138, "loss": 1.7856, "step": 10703 }, { "epoch": 0.52265625, "grad_norm": 0.27331864833831787, "learning_rate": 0.00026400569194369623, "loss": 1.7806, "step": 10704 }, { "epoch": 0.522705078125, "grad_norm": 0.26139870285987854, "learning_rate": 0.00026397070625063445, "loss": 1.7865, "step": 10705 }, { "epoch": 0.52275390625, "grad_norm": 0.2632109820842743, "learning_rate": 0.00026393572082487654, "loss": 1.7991, "step": 10706 }, { "epoch": 0.522802734375, "grad_norm": 0.21501588821411133, "learning_rate": 0.00026390073566727023, "loss": 1.7944, "step": 10707 }, { "epoch": 0.5228515625, "grad_norm": 0.21838991343975067, "learning_rate": 0.00026386575077866364, "loss": 1.8053, "step": 10708 }, { "epoch": 0.522900390625, "grad_norm": 0.257364422082901, "learning_rate": 0.0002638307661599044, "loss": 1.7998, "step": 10709 }, { "epoch": 0.52294921875, "grad_norm": 0.22728337347507477, "learning_rate": 0.0002637957818118407, "loss": 1.7931, "step": 10710 }, { "epoch": 0.522998046875, "grad_norm": 0.2754043638706207, "learning_rate": 0.0002637607977353201, "loss": 1.7799, "step": 10711 }, { "epoch": 0.523046875, "grad_norm": 0.2421351671218872, "learning_rate": 0.0002637258139311906, "loss": 1.8054, "step": 10712 }, { "epoch": 0.523095703125, "grad_norm": 0.2533080577850342, "learning_rate": 0.00026369083040030014, "loss": 1.777, "step": 10713 }, { "epoch": 0.52314453125, "grad_norm": 0.2918800413608551, "learning_rate": 0.0002636558471434963, "loss": 1.781, "step": 10714 }, { "epoch": 0.523193359375, "grad_norm": 0.2315996140241623, "learning_rate": 0.00026362086416162735, "loss": 1.7894, "step": 10715 }, { "epoch": 0.5232421875, "grad_norm": 0.26928436756134033, "learning_rate": 0.00026358588145554076, "loss": 1.7704, "step": 10716 }, { "epoch": 0.523291015625, "grad_norm": 0.22655846178531647, "learning_rate": 0.0002635508990260845, "loss": 1.8068, "step": 10717 }, { "epoch": 0.52333984375, "grad_norm": 0.22126804292201996, "learning_rate": 0.0002635159168741064, "loss": 1.7817, "step": 10718 }, { "epoch": 0.523388671875, "grad_norm": 0.32125502824783325, "learning_rate": 0.0002634809350004544, "loss": 1.7931, "step": 10719 }, { "epoch": 0.5234375, "grad_norm": 0.238492950797081, "learning_rate": 0.0002634459534059761, "loss": 1.8033, "step": 10720 }, { "epoch": 0.523486328125, "grad_norm": 0.263751745223999, "learning_rate": 0.00026341097209151937, "loss": 1.7914, "step": 10721 }, { "epoch": 0.52353515625, "grad_norm": 0.2990016043186188, "learning_rate": 0.00026337599105793216, "loss": 1.7968, "step": 10722 }, { "epoch": 0.523583984375, "grad_norm": 0.308391809463501, "learning_rate": 0.000263341010306062, "loss": 1.7857, "step": 10723 }, { "epoch": 0.5236328125, "grad_norm": 0.25123292207717896, "learning_rate": 0.00026330602983675697, "loss": 1.7943, "step": 10724 }, { "epoch": 0.523681640625, "grad_norm": 0.2771945893764496, "learning_rate": 0.0002632710496508645, "loss": 1.7975, "step": 10725 }, { "epoch": 0.52373046875, "grad_norm": 0.33384034037590027, "learning_rate": 0.00026323606974923273, "loss": 1.7953, "step": 10726 }, { "epoch": 0.523779296875, "grad_norm": 0.25363728404045105, "learning_rate": 0.00026320109013270926, "loss": 1.7939, "step": 10727 }, { "epoch": 0.523828125, "grad_norm": 0.26505938172340393, "learning_rate": 0.00026316611080214183, "loss": 1.7805, "step": 10728 }, { "epoch": 0.523876953125, "grad_norm": 0.27613648772239685, "learning_rate": 0.0002631311317583782, "loss": 1.7802, "step": 10729 }, { "epoch": 0.52392578125, "grad_norm": 0.2603253722190857, "learning_rate": 0.000263096153002266, "loss": 1.7906, "step": 10730 }, { "epoch": 0.523974609375, "grad_norm": 0.30707883834838867, "learning_rate": 0.00026306117453465326, "loss": 1.786, "step": 10731 }, { "epoch": 0.5240234375, "grad_norm": 0.2751561403274536, "learning_rate": 0.00026302619635638745, "loss": 1.7855, "step": 10732 }, { "epoch": 0.524072265625, "grad_norm": 0.23981890082359314, "learning_rate": 0.00026299121846831653, "loss": 1.7809, "step": 10733 }, { "epoch": 0.52412109375, "grad_norm": 0.3037843704223633, "learning_rate": 0.0002629562408712879, "loss": 1.8056, "step": 10734 }, { "epoch": 0.524169921875, "grad_norm": 0.21521234512329102, "learning_rate": 0.0002629212635661496, "loss": 1.7776, "step": 10735 }, { "epoch": 0.52421875, "grad_norm": 0.2953055202960968, "learning_rate": 0.0002628862865537491, "loss": 1.7846, "step": 10736 }, { "epoch": 0.524267578125, "grad_norm": 0.23812423646450043, "learning_rate": 0.0002628513098349342, "loss": 1.7821, "step": 10737 }, { "epoch": 0.52431640625, "grad_norm": 0.2784697711467743, "learning_rate": 0.0002628163334105526, "loss": 1.8047, "step": 10738 }, { "epoch": 0.524365234375, "grad_norm": 0.23837119340896606, "learning_rate": 0.00026278135728145184, "loss": 1.7826, "step": 10739 }, { "epoch": 0.5244140625, "grad_norm": 0.25370875000953674, "learning_rate": 0.00026274638144847985, "loss": 1.7944, "step": 10740 }, { "epoch": 0.524462890625, "grad_norm": 0.30281302332878113, "learning_rate": 0.000262711405912484, "loss": 1.784, "step": 10741 }, { "epoch": 0.52451171875, "grad_norm": 0.21169604361057281, "learning_rate": 0.0002626764306743122, "loss": 1.8241, "step": 10742 }, { "epoch": 0.524560546875, "grad_norm": 0.33191439509391785, "learning_rate": 0.0002626414557348119, "loss": 1.8279, "step": 10743 }, { "epoch": 0.524609375, "grad_norm": 0.2123359888792038, "learning_rate": 0.000262606481094831, "loss": 1.7765, "step": 10744 }, { "epoch": 0.524658203125, "grad_norm": 0.2955290675163269, "learning_rate": 0.0002625715067552169, "loss": 1.7719, "step": 10745 }, { "epoch": 0.52470703125, "grad_norm": 0.25538620352745056, "learning_rate": 0.00026253653271681734, "loss": 1.8019, "step": 10746 }, { "epoch": 0.524755859375, "grad_norm": 0.3146819770336151, "learning_rate": 0.00026250155898047996, "loss": 1.8172, "step": 10747 }, { "epoch": 0.5248046875, "grad_norm": 0.33716756105422974, "learning_rate": 0.00026246658554705224, "loss": 1.8094, "step": 10748 }, { "epoch": 0.524853515625, "grad_norm": 0.27292412519454956, "learning_rate": 0.0002624316124173819, "loss": 1.8077, "step": 10749 }, { "epoch": 0.52490234375, "grad_norm": 0.33690816164016724, "learning_rate": 0.0002623966395923166, "loss": 1.7892, "step": 10750 }, { "epoch": 0.524951171875, "grad_norm": 0.23315678536891937, "learning_rate": 0.0002623616670727039, "loss": 1.8038, "step": 10751 }, { "epoch": 0.525, "grad_norm": 0.3443020284175873, "learning_rate": 0.00026232669485939117, "loss": 1.7758, "step": 10752 }, { "epoch": 0.525048828125, "grad_norm": 0.25876837968826294, "learning_rate": 0.00026229172295322636, "loss": 1.8028, "step": 10753 }, { "epoch": 0.52509765625, "grad_norm": 0.34452104568481445, "learning_rate": 0.0002622567513550568, "loss": 1.7937, "step": 10754 }, { "epoch": 0.525146484375, "grad_norm": 0.2257651388645172, "learning_rate": 0.00026222178006573004, "loss": 1.8157, "step": 10755 }, { "epoch": 0.5251953125, "grad_norm": 0.3381045162677765, "learning_rate": 0.00026218680908609383, "loss": 1.8064, "step": 10756 }, { "epoch": 0.525244140625, "grad_norm": 0.24602629244327545, "learning_rate": 0.00026215183841699546, "loss": 1.7977, "step": 10757 }, { "epoch": 0.52529296875, "grad_norm": 0.28129842877388, "learning_rate": 0.0002621168680592827, "loss": 1.7977, "step": 10758 }, { "epoch": 0.525341796875, "grad_norm": 0.21876484155654907, "learning_rate": 0.0002620818980138029, "loss": 1.8199, "step": 10759 }, { "epoch": 0.525390625, "grad_norm": 0.26843565702438354, "learning_rate": 0.00026204692828140383, "loss": 1.7998, "step": 10760 }, { "epoch": 0.525439453125, "grad_norm": 0.2755693197250366, "learning_rate": 0.0002620119588629327, "loss": 1.7668, "step": 10761 }, { "epoch": 0.52548828125, "grad_norm": 0.24880531430244446, "learning_rate": 0.00026197698975923727, "loss": 1.8062, "step": 10762 }, { "epoch": 0.525537109375, "grad_norm": 0.23395085334777832, "learning_rate": 0.00026194202097116494, "loss": 1.7874, "step": 10763 }, { "epoch": 0.5255859375, "grad_norm": 0.24415862560272217, "learning_rate": 0.00026190705249956326, "loss": 1.789, "step": 10764 }, { "epoch": 0.525634765625, "grad_norm": 0.21690739691257477, "learning_rate": 0.0002618720843452796, "loss": 1.7738, "step": 10765 }, { "epoch": 0.52568359375, "grad_norm": 0.23171783983707428, "learning_rate": 0.0002618371165091616, "loss": 1.7974, "step": 10766 }, { "epoch": 0.525732421875, "grad_norm": 0.23246753215789795, "learning_rate": 0.00026180214899205665, "loss": 1.793, "step": 10767 }, { "epoch": 0.52578125, "grad_norm": 0.19507938623428345, "learning_rate": 0.0002617671817948121, "loss": 1.8054, "step": 10768 }, { "epoch": 0.525830078125, "grad_norm": 0.2323579341173172, "learning_rate": 0.0002617322149182757, "loss": 1.7725, "step": 10769 }, { "epoch": 0.52587890625, "grad_norm": 0.25896555185317993, "learning_rate": 0.0002616972483632947, "loss": 1.7728, "step": 10770 }, { "epoch": 0.525927734375, "grad_norm": 0.23976527154445648, "learning_rate": 0.0002616622821307165, "loss": 1.8088, "step": 10771 }, { "epoch": 0.5259765625, "grad_norm": 0.234848290681839, "learning_rate": 0.00026162731622138865, "loss": 1.7917, "step": 10772 }, { "epoch": 0.526025390625, "grad_norm": 0.25080612301826477, "learning_rate": 0.0002615923506361585, "loss": 1.7892, "step": 10773 }, { "epoch": 0.52607421875, "grad_norm": 0.27291539311408997, "learning_rate": 0.0002615573853758735, "loss": 1.7903, "step": 10774 }, { "epoch": 0.526123046875, "grad_norm": 0.2941085398197174, "learning_rate": 0.00026152242044138107, "loss": 1.812, "step": 10775 }, { "epoch": 0.526171875, "grad_norm": 0.22141003608703613, "learning_rate": 0.00026148745583352866, "loss": 1.7729, "step": 10776 }, { "epoch": 0.526220703125, "grad_norm": 0.25097036361694336, "learning_rate": 0.0002614524915531636, "loss": 1.787, "step": 10777 }, { "epoch": 0.52626953125, "grad_norm": 0.32011470198631287, "learning_rate": 0.0002614175276011333, "loss": 1.8242, "step": 10778 }, { "epoch": 0.526318359375, "grad_norm": 0.2517211437225342, "learning_rate": 0.0002613825639782851, "loss": 1.768, "step": 10779 }, { "epoch": 0.5263671875, "grad_norm": 0.27249422669410706, "learning_rate": 0.00026134760068546636, "loss": 1.7918, "step": 10780 }, { "epoch": 0.526416015625, "grad_norm": 0.26674309372901917, "learning_rate": 0.0002613126377235245, "loss": 1.7991, "step": 10781 }, { "epoch": 0.52646484375, "grad_norm": 0.2410152107477188, "learning_rate": 0.00026127767509330697, "loss": 1.7908, "step": 10782 }, { "epoch": 0.526513671875, "grad_norm": 0.25495487451553345, "learning_rate": 0.00026124271279566095, "loss": 1.7903, "step": 10783 }, { "epoch": 0.5265625, "grad_norm": 0.28900083899497986, "learning_rate": 0.00026120775083143376, "loss": 1.7736, "step": 10784 }, { "epoch": 0.526611328125, "grad_norm": 0.301331102848053, "learning_rate": 0.00026117278920147284, "loss": 1.7845, "step": 10785 }, { "epoch": 0.52666015625, "grad_norm": 0.2353171408176422, "learning_rate": 0.0002611378279066256, "loss": 1.751, "step": 10786 }, { "epoch": 0.526708984375, "grad_norm": 0.28022804856300354, "learning_rate": 0.00026110286694773916, "loss": 1.8102, "step": 10787 }, { "epoch": 0.5267578125, "grad_norm": 0.25518614053726196, "learning_rate": 0.00026106790632566087, "loss": 1.7878, "step": 10788 }, { "epoch": 0.526806640625, "grad_norm": 0.3103974163532257, "learning_rate": 0.00026103294604123804, "loss": 1.7892, "step": 10789 }, { "epoch": 0.52685546875, "grad_norm": 0.23936960101127625, "learning_rate": 0.00026099798609531804, "loss": 1.77, "step": 10790 }, { "epoch": 0.526904296875, "grad_norm": 0.2712789475917816, "learning_rate": 0.0002609630264887481, "loss": 1.7893, "step": 10791 }, { "epoch": 0.526953125, "grad_norm": 0.2739861309528351, "learning_rate": 0.0002609280672223755, "loss": 1.8023, "step": 10792 }, { "epoch": 0.527001953125, "grad_norm": 0.2595835328102112, "learning_rate": 0.00026089310829704743, "loss": 1.814, "step": 10793 }, { "epoch": 0.52705078125, "grad_norm": 0.3088703751564026, "learning_rate": 0.0002608581497136113, "loss": 1.7986, "step": 10794 }, { "epoch": 0.527099609375, "grad_norm": 0.34642913937568665, "learning_rate": 0.0002608231914729142, "loss": 1.7706, "step": 10795 }, { "epoch": 0.5271484375, "grad_norm": 0.24642765522003174, "learning_rate": 0.0002607882335758036, "loss": 1.8014, "step": 10796 }, { "epoch": 0.527197265625, "grad_norm": 0.28415989875793457, "learning_rate": 0.00026075327602312635, "loss": 1.7904, "step": 10797 }, { "epoch": 0.52724609375, "grad_norm": 0.31140974164009094, "learning_rate": 0.00026071831881573, "loss": 1.7795, "step": 10798 }, { "epoch": 0.527294921875, "grad_norm": 0.22390858829021454, "learning_rate": 0.0002606833619544617, "loss": 1.7903, "step": 10799 }, { "epoch": 0.52734375, "grad_norm": 0.28464627265930176, "learning_rate": 0.0002606484054401686, "loss": 1.7651, "step": 10800 }, { "epoch": 0.527392578125, "grad_norm": 0.28628724813461304, "learning_rate": 0.0002606134492736979, "loss": 1.8148, "step": 10801 }, { "epoch": 0.52744140625, "grad_norm": 0.22732503712177277, "learning_rate": 0.0002605784934558968, "loss": 1.7701, "step": 10802 }, { "epoch": 0.527490234375, "grad_norm": 0.25572308897972107, "learning_rate": 0.00026054353798761254, "loss": 1.7909, "step": 10803 }, { "epoch": 0.5275390625, "grad_norm": 0.25881215929985046, "learning_rate": 0.00026050858286969224, "loss": 1.7661, "step": 10804 }, { "epoch": 0.527587890625, "grad_norm": 0.22181329131126404, "learning_rate": 0.0002604736281029831, "loss": 1.7641, "step": 10805 }, { "epoch": 0.52763671875, "grad_norm": 0.28035637736320496, "learning_rate": 0.0002604386736883322, "loss": 1.7677, "step": 10806 }, { "epoch": 0.527685546875, "grad_norm": 0.29543647170066833, "learning_rate": 0.0002604037196265868, "loss": 1.7908, "step": 10807 }, { "epoch": 0.527734375, "grad_norm": 0.21375973522663116, "learning_rate": 0.000260368765918594, "loss": 1.7948, "step": 10808 }, { "epoch": 0.527783203125, "grad_norm": 0.27447420358657837, "learning_rate": 0.00026033381256520085, "loss": 1.7798, "step": 10809 }, { "epoch": 0.52783203125, "grad_norm": 0.22853781282901764, "learning_rate": 0.0002602988595672545, "loss": 1.8014, "step": 10810 }, { "epoch": 0.527880859375, "grad_norm": 0.24078691005706787, "learning_rate": 0.0002602639069256022, "loss": 1.7966, "step": 10811 }, { "epoch": 0.5279296875, "grad_norm": 0.2549440264701843, "learning_rate": 0.0002602289546410909, "loss": 1.7613, "step": 10812 }, { "epoch": 0.527978515625, "grad_norm": 0.29296138882637024, "learning_rate": 0.00026019400271456777, "loss": 1.8073, "step": 10813 }, { "epoch": 0.52802734375, "grad_norm": 0.22328664362430573, "learning_rate": 0.0002601590511468798, "loss": 1.7772, "step": 10814 }, { "epoch": 0.528076171875, "grad_norm": 0.22994846105575562, "learning_rate": 0.00026012409993887427, "loss": 1.7891, "step": 10815 }, { "epoch": 0.528125, "grad_norm": 0.292797714471817, "learning_rate": 0.0002600891490913981, "loss": 1.7859, "step": 10816 }, { "epoch": 0.528173828125, "grad_norm": 0.2516554594039917, "learning_rate": 0.00026005419860529835, "loss": 1.7861, "step": 10817 }, { "epoch": 0.52822265625, "grad_norm": 0.25948819518089294, "learning_rate": 0.0002600192484814221, "loss": 1.7791, "step": 10818 }, { "epoch": 0.528271484375, "grad_norm": 0.2665332853794098, "learning_rate": 0.00025998429872061633, "loss": 1.767, "step": 10819 }, { "epoch": 0.5283203125, "grad_norm": 0.24318724870681763, "learning_rate": 0.0002599493493237283, "loss": 1.7929, "step": 10820 }, { "epoch": 0.528369140625, "grad_norm": 0.23777134716510773, "learning_rate": 0.0002599144002916048, "loss": 1.7849, "step": 10821 }, { "epoch": 0.52841796875, "grad_norm": 0.2690814733505249, "learning_rate": 0.000259879451625093, "loss": 1.7875, "step": 10822 }, { "epoch": 0.528466796875, "grad_norm": 0.23485474288463593, "learning_rate": 0.00025984450332503965, "loss": 1.7969, "step": 10823 }, { "epoch": 0.528515625, "grad_norm": 0.235359787940979, "learning_rate": 0.0002598095553922921, "loss": 1.7885, "step": 10824 }, { "epoch": 0.528564453125, "grad_norm": 0.2908155620098114, "learning_rate": 0.0002597746078276972, "loss": 1.796, "step": 10825 }, { "epoch": 0.52861328125, "grad_norm": 0.27821049094200134, "learning_rate": 0.0002597396606321018, "loss": 1.7884, "step": 10826 }, { "epoch": 0.528662109375, "grad_norm": 0.30699384212493896, "learning_rate": 0.00025970471380635303, "loss": 1.7713, "step": 10827 }, { "epoch": 0.5287109375, "grad_norm": 0.29043108224868774, "learning_rate": 0.0002596697673512978, "loss": 1.7616, "step": 10828 }, { "epoch": 0.528759765625, "grad_norm": 0.23985722661018372, "learning_rate": 0.00025963482126778307, "loss": 1.7828, "step": 10829 }, { "epoch": 0.52880859375, "grad_norm": 0.26755157113075256, "learning_rate": 0.00025959987555665583, "loss": 1.7425, "step": 10830 }, { "epoch": 0.528857421875, "grad_norm": 0.2544253468513489, "learning_rate": 0.00025956493021876296, "loss": 1.7943, "step": 10831 }, { "epoch": 0.52890625, "grad_norm": 0.25701817870140076, "learning_rate": 0.00025952998525495134, "loss": 1.7957, "step": 10832 }, { "epoch": 0.528955078125, "grad_norm": 0.28120943903923035, "learning_rate": 0.00025949504066606806, "loss": 1.7984, "step": 10833 }, { "epoch": 0.52900390625, "grad_norm": 0.2701709568500519, "learning_rate": 0.00025946009645295986, "loss": 1.7703, "step": 10834 }, { "epoch": 0.529052734375, "grad_norm": 0.304474800825119, "learning_rate": 0.00025942515261647374, "loss": 1.7914, "step": 10835 }, { "epoch": 0.5291015625, "grad_norm": 0.23183564841747284, "learning_rate": 0.00025939020915745657, "loss": 1.7903, "step": 10836 }, { "epoch": 0.529150390625, "grad_norm": 0.3342808485031128, "learning_rate": 0.0002593552660767552, "loss": 1.8049, "step": 10837 }, { "epoch": 0.52919921875, "grad_norm": 0.2596157193183899, "learning_rate": 0.00025932032337521653, "loss": 1.8327, "step": 10838 }, { "epoch": 0.529248046875, "grad_norm": 0.35892075300216675, "learning_rate": 0.00025928538105368735, "loss": 1.787, "step": 10839 }, { "epoch": 0.529296875, "grad_norm": 0.2743697166442871, "learning_rate": 0.00025925043911301476, "loss": 1.7844, "step": 10840 }, { "epoch": 0.529345703125, "grad_norm": 0.32256773114204407, "learning_rate": 0.00025921549755404525, "loss": 1.784, "step": 10841 }, { "epoch": 0.52939453125, "grad_norm": 0.28648191690444946, "learning_rate": 0.00025918055637762596, "loss": 1.7881, "step": 10842 }, { "epoch": 0.529443359375, "grad_norm": 0.29622170329093933, "learning_rate": 0.0002591456155846035, "loss": 1.7985, "step": 10843 }, { "epoch": 0.5294921875, "grad_norm": 0.24666807055473328, "learning_rate": 0.00025911067517582487, "loss": 1.7614, "step": 10844 }, { "epoch": 0.529541015625, "grad_norm": 0.2796502113342285, "learning_rate": 0.0002590757351521368, "loss": 1.7917, "step": 10845 }, { "epoch": 0.52958984375, "grad_norm": 0.24681763350963593, "learning_rate": 0.00025904079551438596, "loss": 1.7743, "step": 10846 }, { "epoch": 0.529638671875, "grad_norm": 0.2658560574054718, "learning_rate": 0.00025900585626341936, "loss": 1.7915, "step": 10847 }, { "epoch": 0.5296875, "grad_norm": 0.2465251237154007, "learning_rate": 0.00025897091740008366, "loss": 1.8009, "step": 10848 }, { "epoch": 0.529736328125, "grad_norm": 0.2803049385547638, "learning_rate": 0.00025893597892522567, "loss": 1.7838, "step": 10849 }, { "epoch": 0.52978515625, "grad_norm": 0.3009028434753418, "learning_rate": 0.00025890104083969205, "loss": 1.7975, "step": 10850 }, { "epoch": 0.529833984375, "grad_norm": 0.31167566776275635, "learning_rate": 0.00025886610314432973, "loss": 1.7837, "step": 10851 }, { "epoch": 0.5298828125, "grad_norm": 0.26381367444992065, "learning_rate": 0.00025883116583998536, "loss": 1.7728, "step": 10852 }, { "epoch": 0.529931640625, "grad_norm": 0.2747567594051361, "learning_rate": 0.00025879622892750563, "loss": 1.8073, "step": 10853 }, { "epoch": 0.52998046875, "grad_norm": 0.25473079085350037, "learning_rate": 0.00025876129240773744, "loss": 1.8004, "step": 10854 }, { "epoch": 0.530029296875, "grad_norm": 0.28281065821647644, "learning_rate": 0.0002587263562815272, "loss": 1.7891, "step": 10855 }, { "epoch": 0.530078125, "grad_norm": 0.2644596993923187, "learning_rate": 0.0002586914205497219, "loss": 1.8072, "step": 10856 }, { "epoch": 0.530126953125, "grad_norm": 0.2643897831439972, "learning_rate": 0.00025865648521316805, "loss": 1.7587, "step": 10857 }, { "epoch": 0.53017578125, "grad_norm": 0.26881515979766846, "learning_rate": 0.00025862155027271244, "loss": 1.7697, "step": 10858 }, { "epoch": 0.530224609375, "grad_norm": 0.26613083481788635, "learning_rate": 0.00025858661572920167, "loss": 1.8094, "step": 10859 }, { "epoch": 0.5302734375, "grad_norm": 0.2812589704990387, "learning_rate": 0.00025855168158348256, "loss": 1.7922, "step": 10860 }, { "epoch": 0.530322265625, "grad_norm": 0.21457314491271973, "learning_rate": 0.0002585167478364016, "loss": 1.7901, "step": 10861 }, { "epoch": 0.53037109375, "grad_norm": 0.33668258786201477, "learning_rate": 0.0002584818144888055, "loss": 1.7673, "step": 10862 }, { "epoch": 0.530419921875, "grad_norm": 0.29013875126838684, "learning_rate": 0.000258446881541541, "loss": 1.7472, "step": 10863 }, { "epoch": 0.53046875, "grad_norm": 0.22335608303546906, "learning_rate": 0.00025841194899545443, "loss": 1.7724, "step": 10864 }, { "epoch": 0.530517578125, "grad_norm": 0.3397728502750397, "learning_rate": 0.00025837701685139274, "loss": 1.7754, "step": 10865 }, { "epoch": 0.53056640625, "grad_norm": 0.2398567497730255, "learning_rate": 0.00025834208511020236, "loss": 1.7713, "step": 10866 }, { "epoch": 0.530615234375, "grad_norm": 0.24797698855400085, "learning_rate": 0.00025830715377272995, "loss": 1.764, "step": 10867 }, { "epoch": 0.5306640625, "grad_norm": 0.2622312903404236, "learning_rate": 0.00025827222283982204, "loss": 1.7809, "step": 10868 }, { "epoch": 0.530712890625, "grad_norm": 0.21938097476959229, "learning_rate": 0.0002582372923123253, "loss": 1.7598, "step": 10869 }, { "epoch": 0.53076171875, "grad_norm": 0.21646583080291748, "learning_rate": 0.00025820236219108617, "loss": 1.777, "step": 10870 }, { "epoch": 0.530810546875, "grad_norm": 0.22897304594516754, "learning_rate": 0.00025816743247695134, "loss": 1.8162, "step": 10871 }, { "epoch": 0.530859375, "grad_norm": 0.23751626908779144, "learning_rate": 0.0002581325031707674, "loss": 1.7891, "step": 10872 }, { "epoch": 0.530908203125, "grad_norm": 0.2392820119857788, "learning_rate": 0.00025809757427338063, "loss": 1.7937, "step": 10873 }, { "epoch": 0.53095703125, "grad_norm": 0.21496637165546417, "learning_rate": 0.0002580626457856379, "loss": 1.797, "step": 10874 }, { "epoch": 0.531005859375, "grad_norm": 0.262710303068161, "learning_rate": 0.0002580277177083853, "loss": 1.8125, "step": 10875 }, { "epoch": 0.5310546875, "grad_norm": 0.25147536396980286, "learning_rate": 0.00025799279004246983, "loss": 1.785, "step": 10876 }, { "epoch": 0.531103515625, "grad_norm": 0.23586806654930115, "learning_rate": 0.0002579578627887376, "loss": 1.7534, "step": 10877 }, { "epoch": 0.53115234375, "grad_norm": 0.2541486620903015, "learning_rate": 0.0002579229359480354, "loss": 1.7836, "step": 10878 }, { "epoch": 0.531201171875, "grad_norm": 0.2605987787246704, "learning_rate": 0.00025788800952120957, "loss": 1.7955, "step": 10879 }, { "epoch": 0.53125, "grad_norm": 0.24314071238040924, "learning_rate": 0.0002578530835091065, "loss": 1.7906, "step": 10880 }, { "epoch": 0.531298828125, "grad_norm": 0.26273006200790405, "learning_rate": 0.0002578181579125727, "loss": 1.7999, "step": 10881 }, { "epoch": 0.53134765625, "grad_norm": 0.2453802227973938, "learning_rate": 0.00025778323273245465, "loss": 1.7812, "step": 10882 }, { "epoch": 0.531396484375, "grad_norm": 0.2594987452030182, "learning_rate": 0.00025774830796959885, "loss": 1.7971, "step": 10883 }, { "epoch": 0.5314453125, "grad_norm": 0.2331363707780838, "learning_rate": 0.0002577133836248516, "loss": 1.794, "step": 10884 }, { "epoch": 0.531494140625, "grad_norm": 0.272929847240448, "learning_rate": 0.0002576784596990594, "loss": 1.7764, "step": 10885 }, { "epoch": 0.53154296875, "grad_norm": 0.2527105212211609, "learning_rate": 0.0002576435361930687, "loss": 1.7857, "step": 10886 }, { "epoch": 0.531591796875, "grad_norm": 0.2838629186153412, "learning_rate": 0.0002576086131077258, "loss": 1.7906, "step": 10887 }, { "epoch": 0.531640625, "grad_norm": 0.24833261966705322, "learning_rate": 0.0002575736904438772, "loss": 1.7894, "step": 10888 }, { "epoch": 0.531689453125, "grad_norm": 0.25436267256736755, "learning_rate": 0.0002575387682023691, "loss": 1.7914, "step": 10889 }, { "epoch": 0.53173828125, "grad_norm": 0.28320425748825073, "learning_rate": 0.0002575038463840481, "loss": 1.8063, "step": 10890 }, { "epoch": 0.531787109375, "grad_norm": 0.2417030781507492, "learning_rate": 0.0002574689249897603, "loss": 1.7765, "step": 10891 }, { "epoch": 0.5318359375, "grad_norm": 0.35710999369621277, "learning_rate": 0.0002574340040203523, "loss": 1.7958, "step": 10892 }, { "epoch": 0.531884765625, "grad_norm": 0.24608778953552246, "learning_rate": 0.00025739908347667024, "loss": 1.7541, "step": 10893 }, { "epoch": 0.53193359375, "grad_norm": 0.3043576180934906, "learning_rate": 0.00025736416335956063, "loss": 1.7737, "step": 10894 }, { "epoch": 0.531982421875, "grad_norm": 0.2806697487831116, "learning_rate": 0.0002573292436698696, "loss": 1.7855, "step": 10895 }, { "epoch": 0.53203125, "grad_norm": 0.3350776731967926, "learning_rate": 0.0002572943244084436, "loss": 1.7687, "step": 10896 }, { "epoch": 0.532080078125, "grad_norm": 0.24711966514587402, "learning_rate": 0.0002572594055761289, "loss": 1.7763, "step": 10897 }, { "epoch": 0.53212890625, "grad_norm": 0.2929198443889618, "learning_rate": 0.00025722448717377166, "loss": 1.7897, "step": 10898 }, { "epoch": 0.532177734375, "grad_norm": 0.29766786098480225, "learning_rate": 0.0002571895692022184, "loss": 1.7721, "step": 10899 }, { "epoch": 0.5322265625, "grad_norm": 0.29541072249412537, "learning_rate": 0.0002571546516623151, "loss": 1.7782, "step": 10900 }, { "epoch": 0.532275390625, "grad_norm": 0.22748012840747833, "learning_rate": 0.0002571197345549082, "loss": 1.7784, "step": 10901 }, { "epoch": 0.53232421875, "grad_norm": 0.28289228677749634, "learning_rate": 0.00025708481788084387, "loss": 1.7908, "step": 10902 }, { "epoch": 0.532373046875, "grad_norm": 0.27497974038124084, "learning_rate": 0.00025704990164096846, "loss": 1.7863, "step": 10903 }, { "epoch": 0.532421875, "grad_norm": 0.30068832635879517, "learning_rate": 0.0002570149858361281, "loss": 1.804, "step": 10904 }, { "epoch": 0.532470703125, "grad_norm": 0.2650860548019409, "learning_rate": 0.00025698007046716885, "loss": 1.7848, "step": 10905 }, { "epoch": 0.53251953125, "grad_norm": 0.2310185581445694, "learning_rate": 0.00025694515553493727, "loss": 1.8205, "step": 10906 }, { "epoch": 0.532568359375, "grad_norm": 0.2968834936618805, "learning_rate": 0.0002569102410402792, "loss": 1.8053, "step": 10907 }, { "epoch": 0.5326171875, "grad_norm": 0.26623260974884033, "learning_rate": 0.0002568753269840411, "loss": 1.7923, "step": 10908 }, { "epoch": 0.532666015625, "grad_norm": 0.23084084689617157, "learning_rate": 0.00025684041336706886, "loss": 1.774, "step": 10909 }, { "epoch": 0.53271484375, "grad_norm": 0.3268091082572937, "learning_rate": 0.0002568055001902089, "loss": 1.7944, "step": 10910 }, { "epoch": 0.532763671875, "grad_norm": 0.2564425468444824, "learning_rate": 0.00025677058745430723, "loss": 1.776, "step": 10911 }, { "epoch": 0.5328125, "grad_norm": 0.26350274682044983, "learning_rate": 0.00025673567516021003, "loss": 1.7985, "step": 10912 }, { "epoch": 0.532861328125, "grad_norm": 0.28850844502449036, "learning_rate": 0.0002567007633087634, "loss": 1.78, "step": 10913 }, { "epoch": 0.53291015625, "grad_norm": 0.25587838888168335, "learning_rate": 0.00025666585190081345, "loss": 1.7703, "step": 10914 }, { "epoch": 0.532958984375, "grad_norm": 0.23097527027130127, "learning_rate": 0.00025663094093720634, "loss": 1.7877, "step": 10915 }, { "epoch": 0.5330078125, "grad_norm": 0.24719169735908508, "learning_rate": 0.0002565960304187881, "loss": 1.787, "step": 10916 }, { "epoch": 0.533056640625, "grad_norm": 0.23558823764324188, "learning_rate": 0.0002565611203464049, "loss": 1.8106, "step": 10917 }, { "epoch": 0.53310546875, "grad_norm": 0.3096466362476349, "learning_rate": 0.0002565262107209027, "loss": 1.7863, "step": 10918 }, { "epoch": 0.533154296875, "grad_norm": 0.2388150691986084, "learning_rate": 0.00025649130154312773, "loss": 1.7714, "step": 10919 }, { "epoch": 0.533203125, "grad_norm": 0.2597604990005493, "learning_rate": 0.0002564563928139258, "loss": 1.7686, "step": 10920 }, { "epoch": 0.533251953125, "grad_norm": 0.274606317281723, "learning_rate": 0.0002564214845341432, "loss": 1.7812, "step": 10921 }, { "epoch": 0.53330078125, "grad_norm": 0.2275247424840927, "learning_rate": 0.0002563865767046259, "loss": 1.7861, "step": 10922 }, { "epoch": 0.533349609375, "grad_norm": 0.27088356018066406, "learning_rate": 0.00025635166932621965, "loss": 1.812, "step": 10923 }, { "epoch": 0.5333984375, "grad_norm": 0.22911392152309418, "learning_rate": 0.0002563167623997709, "loss": 1.7637, "step": 10924 }, { "epoch": 0.533447265625, "grad_norm": 0.30178752541542053, "learning_rate": 0.00025628185592612525, "loss": 1.8032, "step": 10925 }, { "epoch": 0.53349609375, "grad_norm": 0.2095085084438324, "learning_rate": 0.00025624694990612903, "loss": 1.8099, "step": 10926 }, { "epoch": 0.533544921875, "grad_norm": 0.2887331247329712, "learning_rate": 0.0002562120443406278, "loss": 1.7919, "step": 10927 }, { "epoch": 0.53359375, "grad_norm": 0.2544444501399994, "learning_rate": 0.00025617713923046804, "loss": 1.8063, "step": 10928 }, { "epoch": 0.533642578125, "grad_norm": 0.24633601307868958, "learning_rate": 0.0002561422345764953, "loss": 1.7761, "step": 10929 }, { "epoch": 0.53369140625, "grad_norm": 0.30216124653816223, "learning_rate": 0.00025610733037955563, "loss": 1.7663, "step": 10930 }, { "epoch": 0.533740234375, "grad_norm": 0.23634883761405945, "learning_rate": 0.00025607242664049505, "loss": 1.7676, "step": 10931 }, { "epoch": 0.5337890625, "grad_norm": 0.315355122089386, "learning_rate": 0.00025603752336015933, "loss": 1.7905, "step": 10932 }, { "epoch": 0.533837890625, "grad_norm": 0.270394504070282, "learning_rate": 0.00025600262053939463, "loss": 1.7878, "step": 10933 }, { "epoch": 0.53388671875, "grad_norm": 0.2638017535209656, "learning_rate": 0.0002559677181790465, "loss": 1.8031, "step": 10934 }, { "epoch": 0.533935546875, "grad_norm": 0.28592851758003235, "learning_rate": 0.0002559328162799611, "loss": 1.7772, "step": 10935 }, { "epoch": 0.533984375, "grad_norm": 0.23627649247646332, "learning_rate": 0.0002558979148429842, "loss": 1.7769, "step": 10936 }, { "epoch": 0.534033203125, "grad_norm": 0.31202760338783264, "learning_rate": 0.0002558630138689617, "loss": 1.7915, "step": 10937 }, { "epoch": 0.53408203125, "grad_norm": 0.20508402585983276, "learning_rate": 0.00025582811335873943, "loss": 1.7989, "step": 10938 }, { "epoch": 0.534130859375, "grad_norm": 0.2645478844642639, "learning_rate": 0.00025579321331316323, "loss": 1.8124, "step": 10939 }, { "epoch": 0.5341796875, "grad_norm": 0.23520836234092712, "learning_rate": 0.00025575831373307893, "loss": 1.7923, "step": 10940 }, { "epoch": 0.534228515625, "grad_norm": 0.24271661043167114, "learning_rate": 0.0002557234146193324, "loss": 1.7767, "step": 10941 }, { "epoch": 0.53427734375, "grad_norm": 0.31862929463386536, "learning_rate": 0.0002556885159727694, "loss": 1.7851, "step": 10942 }, { "epoch": 0.534326171875, "grad_norm": 0.23935523629188538, "learning_rate": 0.0002556536177942356, "loss": 1.7945, "step": 10943 }, { "epoch": 0.534375, "grad_norm": 0.30607396364212036, "learning_rate": 0.000255618720084577, "loss": 1.7974, "step": 10944 }, { "epoch": 0.534423828125, "grad_norm": 0.2967706024646759, "learning_rate": 0.00025558382284463925, "loss": 1.8088, "step": 10945 }, { "epoch": 0.53447265625, "grad_norm": 0.285732626914978, "learning_rate": 0.00025554892607526826, "loss": 1.8075, "step": 10946 }, { "epoch": 0.534521484375, "grad_norm": 0.24209485948085785, "learning_rate": 0.00025551402977730967, "loss": 1.767, "step": 10947 }, { "epoch": 0.5345703125, "grad_norm": 0.25660091638565063, "learning_rate": 0.0002554791339516091, "loss": 1.7684, "step": 10948 }, { "epoch": 0.534619140625, "grad_norm": 0.3307865262031555, "learning_rate": 0.0002554442385990125, "loss": 1.7885, "step": 10949 }, { "epoch": 0.53466796875, "grad_norm": 0.20940718054771423, "learning_rate": 0.0002554093437203654, "loss": 1.7779, "step": 10950 }, { "epoch": 0.534716796875, "grad_norm": 0.3356439769268036, "learning_rate": 0.0002553744493165137, "loss": 1.7701, "step": 10951 }, { "epoch": 0.534765625, "grad_norm": 0.25917142629623413, "learning_rate": 0.0002553395553883029, "loss": 1.7976, "step": 10952 }, { "epoch": 0.534814453125, "grad_norm": 0.26822102069854736, "learning_rate": 0.0002553046619365788, "loss": 1.7753, "step": 10953 }, { "epoch": 0.53486328125, "grad_norm": 0.25441673398017883, "learning_rate": 0.00025526976896218703, "loss": 1.7942, "step": 10954 }, { "epoch": 0.534912109375, "grad_norm": 0.23304018378257751, "learning_rate": 0.0002552348764659733, "loss": 1.7947, "step": 10955 }, { "epoch": 0.5349609375, "grad_norm": 0.2898719608783722, "learning_rate": 0.00025519998444878324, "loss": 1.7598, "step": 10956 }, { "epoch": 0.535009765625, "grad_norm": 0.23242227733135223, "learning_rate": 0.00025516509291146234, "loss": 1.7946, "step": 10957 }, { "epoch": 0.53505859375, "grad_norm": 0.27578458189964294, "learning_rate": 0.00025513020185485646, "loss": 1.7999, "step": 10958 }, { "epoch": 0.535107421875, "grad_norm": 0.2163749635219574, "learning_rate": 0.00025509531127981106, "loss": 1.8009, "step": 10959 }, { "epoch": 0.53515625, "grad_norm": 0.2904439866542816, "learning_rate": 0.0002550604211871718, "loss": 1.7885, "step": 10960 }, { "epoch": 0.535205078125, "grad_norm": 0.24544267356395721, "learning_rate": 0.0002550255315777842, "loss": 1.7602, "step": 10961 }, { "epoch": 0.53525390625, "grad_norm": 0.2656324803829193, "learning_rate": 0.000254990642452494, "loss": 1.7819, "step": 10962 }, { "epoch": 0.535302734375, "grad_norm": 0.30302876234054565, "learning_rate": 0.0002549557538121466, "loss": 1.7928, "step": 10963 }, { "epoch": 0.5353515625, "grad_norm": 0.24671144783496857, "learning_rate": 0.0002549208656575876, "loss": 1.7682, "step": 10964 }, { "epoch": 0.535400390625, "grad_norm": 0.32454225420951843, "learning_rate": 0.00025488597798966256, "loss": 1.7942, "step": 10965 }, { "epoch": 0.53544921875, "grad_norm": 0.32229286432266235, "learning_rate": 0.0002548510908092169, "loss": 1.7875, "step": 10966 }, { "epoch": 0.535498046875, "grad_norm": 0.33728694915771484, "learning_rate": 0.0002548162041170964, "loss": 1.8013, "step": 10967 }, { "epoch": 0.535546875, "grad_norm": 0.22547133266925812, "learning_rate": 0.00025478131791414633, "loss": 1.8116, "step": 10968 }, { "epoch": 0.535595703125, "grad_norm": 0.36478686332702637, "learning_rate": 0.0002547464322012123, "loss": 1.7965, "step": 10969 }, { "epoch": 0.53564453125, "grad_norm": 0.2702721357345581, "learning_rate": 0.0002547115469791397, "loss": 1.7821, "step": 10970 }, { "epoch": 0.535693359375, "grad_norm": 0.2939920723438263, "learning_rate": 0.0002546766622487741, "loss": 1.7888, "step": 10971 }, { "epoch": 0.5357421875, "grad_norm": 0.26833340525627136, "learning_rate": 0.00025464177801096094, "loss": 1.7922, "step": 10972 }, { "epoch": 0.535791015625, "grad_norm": 0.3137141466140747, "learning_rate": 0.0002546068942665455, "loss": 1.7811, "step": 10973 }, { "epoch": 0.53583984375, "grad_norm": 0.2741020917892456, "learning_rate": 0.00025457201101637363, "loss": 1.7528, "step": 10974 }, { "epoch": 0.535888671875, "grad_norm": 0.27607980370521545, "learning_rate": 0.0002545371282612902, "loss": 1.795, "step": 10975 }, { "epoch": 0.5359375, "grad_norm": 0.24548383057117462, "learning_rate": 0.0002545022460021411, "loss": 1.7905, "step": 10976 }, { "epoch": 0.535986328125, "grad_norm": 0.26062577962875366, "learning_rate": 0.00025446736423977146, "loss": 1.7739, "step": 10977 }, { "epoch": 0.53603515625, "grad_norm": 0.2289004921913147, "learning_rate": 0.0002544324829750268, "loss": 1.7711, "step": 10978 }, { "epoch": 0.536083984375, "grad_norm": 0.20596536993980408, "learning_rate": 0.0002543976022087523, "loss": 1.7723, "step": 10979 }, { "epoch": 0.5361328125, "grad_norm": 0.22468912601470947, "learning_rate": 0.0002543627219417936, "loss": 1.775, "step": 10980 }, { "epoch": 0.536181640625, "grad_norm": 0.2701500654220581, "learning_rate": 0.0002543278421749959, "loss": 1.7809, "step": 10981 }, { "epoch": 0.53623046875, "grad_norm": 0.22974511981010437, "learning_rate": 0.0002542929629092045, "loss": 1.7722, "step": 10982 }, { "epoch": 0.536279296875, "grad_norm": 0.21088066697120667, "learning_rate": 0.00025425808414526486, "loss": 1.7805, "step": 10983 }, { "epoch": 0.536328125, "grad_norm": 0.2638193368911743, "learning_rate": 0.00025422320588402216, "loss": 1.7833, "step": 10984 }, { "epoch": 0.536376953125, "grad_norm": 0.2538689970970154, "learning_rate": 0.00025418832812632177, "loss": 1.7653, "step": 10985 }, { "epoch": 0.53642578125, "grad_norm": 0.22944806516170502, "learning_rate": 0.0002541534508730089, "loss": 1.8066, "step": 10986 }, { "epoch": 0.536474609375, "grad_norm": 0.23562294244766235, "learning_rate": 0.0002541185741249289, "loss": 1.773, "step": 10987 }, { "epoch": 0.5365234375, "grad_norm": 0.2069801539182663, "learning_rate": 0.0002540836978829272, "loss": 1.7928, "step": 10988 }, { "epoch": 0.536572265625, "grad_norm": 0.24104049801826477, "learning_rate": 0.0002540488221478487, "loss": 1.7822, "step": 10989 }, { "epoch": 0.53662109375, "grad_norm": 0.249186173081398, "learning_rate": 0.0002540139469205389, "loss": 1.7996, "step": 10990 }, { "epoch": 0.536669921875, "grad_norm": 0.2397771179676056, "learning_rate": 0.00025397907220184296, "loss": 1.8046, "step": 10991 }, { "epoch": 0.53671875, "grad_norm": 0.26041191816329956, "learning_rate": 0.0002539441979926061, "loss": 1.8198, "step": 10992 }, { "epoch": 0.536767578125, "grad_norm": 0.2857736349105835, "learning_rate": 0.0002539093242936735, "loss": 1.7871, "step": 10993 }, { "epoch": 0.53681640625, "grad_norm": 0.3102831542491913, "learning_rate": 0.0002538744511058904, "loss": 1.7966, "step": 10994 }, { "epoch": 0.536865234375, "grad_norm": 0.27409598231315613, "learning_rate": 0.00025383957843010184, "loss": 1.7843, "step": 10995 }, { "epoch": 0.5369140625, "grad_norm": 0.22926653921604156, "learning_rate": 0.00025380470626715327, "loss": 1.8035, "step": 10996 }, { "epoch": 0.536962890625, "grad_norm": 0.294288694858551, "learning_rate": 0.0002537698346178896, "loss": 1.7868, "step": 10997 }, { "epoch": 0.53701171875, "grad_norm": 0.23993702232837677, "learning_rate": 0.0002537349634831559, "loss": 1.7699, "step": 10998 }, { "epoch": 0.537060546875, "grad_norm": 0.2517111897468567, "learning_rate": 0.00025370009286379764, "loss": 1.7735, "step": 10999 }, { "epoch": 0.537109375, "grad_norm": 0.2679988443851471, "learning_rate": 0.00025366522276065967, "loss": 1.7654, "step": 11000 }, { "epoch": 0.537158203125, "grad_norm": 0.3258879780769348, "learning_rate": 0.00025363035317458715, "loss": 1.7902, "step": 11001 }, { "epoch": 0.53720703125, "grad_norm": 0.2521243095397949, "learning_rate": 0.0002535954841064252, "loss": 1.7841, "step": 11002 }, { "epoch": 0.537255859375, "grad_norm": 0.27162933349609375, "learning_rate": 0.00025356061555701894, "loss": 1.7916, "step": 11003 }, { "epoch": 0.5373046875, "grad_norm": 0.2636101245880127, "learning_rate": 0.00025352574752721325, "loss": 1.7904, "step": 11004 }, { "epoch": 0.537353515625, "grad_norm": 0.2531655728816986, "learning_rate": 0.0002534908800178535, "loss": 1.7547, "step": 11005 }, { "epoch": 0.53740234375, "grad_norm": 0.21637894213199615, "learning_rate": 0.00025345601302978444, "loss": 1.7901, "step": 11006 }, { "epoch": 0.537451171875, "grad_norm": 0.25702470541000366, "learning_rate": 0.0002534211465638512, "loss": 1.7788, "step": 11007 }, { "epoch": 0.5375, "grad_norm": 0.23627416789531708, "learning_rate": 0.0002533862806208989, "loss": 1.7854, "step": 11008 }, { "epoch": 0.537548828125, "grad_norm": 0.23523961007595062, "learning_rate": 0.00025335141520177234, "loss": 1.784, "step": 11009 }, { "epoch": 0.53759765625, "grad_norm": 0.2395910769701004, "learning_rate": 0.00025331655030731676, "loss": 1.7659, "step": 11010 }, { "epoch": 0.537646484375, "grad_norm": 0.24457649886608124, "learning_rate": 0.00025328168593837685, "loss": 1.7629, "step": 11011 }, { "epoch": 0.5376953125, "grad_norm": 0.25108715891838074, "learning_rate": 0.00025324682209579784, "loss": 1.8019, "step": 11012 }, { "epoch": 0.537744140625, "grad_norm": 0.23304013907909393, "learning_rate": 0.0002532119587804245, "loss": 1.8071, "step": 11013 }, { "epoch": 0.53779296875, "grad_norm": 0.2707344889640808, "learning_rate": 0.00025317709599310197, "loss": 1.796, "step": 11014 }, { "epoch": 0.537841796875, "grad_norm": 0.27392396330833435, "learning_rate": 0.00025314223373467494, "loss": 1.778, "step": 11015 }, { "epoch": 0.537890625, "grad_norm": 0.24182184040546417, "learning_rate": 0.00025310737200598843, "loss": 1.794, "step": 11016 }, { "epoch": 0.537939453125, "grad_norm": 0.25870174169540405, "learning_rate": 0.00025307251080788744, "loss": 1.7887, "step": 11017 }, { "epoch": 0.53798828125, "grad_norm": 0.22061064839363098, "learning_rate": 0.00025303765014121664, "loss": 1.7869, "step": 11018 }, { "epoch": 0.538037109375, "grad_norm": 0.25599735975265503, "learning_rate": 0.0002530027900068212, "loss": 1.7785, "step": 11019 }, { "epoch": 0.5380859375, "grad_norm": 0.2655968964099884, "learning_rate": 0.0002529679304055456, "loss": 1.7866, "step": 11020 }, { "epoch": 0.538134765625, "grad_norm": 0.23279789090156555, "learning_rate": 0.0002529330713382351, "loss": 1.7995, "step": 11021 }, { "epoch": 0.53818359375, "grad_norm": 0.24217644333839417, "learning_rate": 0.00025289821280573423, "loss": 1.7867, "step": 11022 }, { "epoch": 0.538232421875, "grad_norm": 0.2530781328678131, "learning_rate": 0.000252863354808888, "loss": 1.7938, "step": 11023 }, { "epoch": 0.53828125, "grad_norm": 0.23804542422294617, "learning_rate": 0.00025282849734854107, "loss": 1.7771, "step": 11024 }, { "epoch": 0.538330078125, "grad_norm": 0.23279988765716553, "learning_rate": 0.00025279364042553836, "loss": 1.8014, "step": 11025 }, { "epoch": 0.53837890625, "grad_norm": 0.35094591975212097, "learning_rate": 0.00025275878404072466, "loss": 1.7941, "step": 11026 }, { "epoch": 0.538427734375, "grad_norm": 0.2796303629875183, "learning_rate": 0.0002527239281949446, "loss": 1.7718, "step": 11027 }, { "epoch": 0.5384765625, "grad_norm": 0.27756232023239136, "learning_rate": 0.00025268907288904317, "loss": 1.8023, "step": 11028 }, { "epoch": 0.538525390625, "grad_norm": 0.2601303458213806, "learning_rate": 0.00025265421812386483, "loss": 1.77, "step": 11029 }, { "epoch": 0.53857421875, "grad_norm": 0.32059329748153687, "learning_rate": 0.0002526193639002546, "loss": 1.8029, "step": 11030 }, { "epoch": 0.538623046875, "grad_norm": 0.30780208110809326, "learning_rate": 0.00025258451021905703, "loss": 1.7812, "step": 11031 }, { "epoch": 0.538671875, "grad_norm": 0.29399800300598145, "learning_rate": 0.0002525496570811168, "loss": 1.7752, "step": 11032 }, { "epoch": 0.538720703125, "grad_norm": 0.30167436599731445, "learning_rate": 0.00025251480448727873, "loss": 1.7955, "step": 11033 }, { "epoch": 0.53876953125, "grad_norm": 0.3033093214035034, "learning_rate": 0.0002524799524383874, "loss": 1.7943, "step": 11034 }, { "epoch": 0.538818359375, "grad_norm": 0.23017145693302155, "learning_rate": 0.00025244510093528754, "loss": 1.7824, "step": 11035 }, { "epoch": 0.5388671875, "grad_norm": 0.3498494327068329, "learning_rate": 0.0002524102499788238, "loss": 1.7833, "step": 11036 }, { "epoch": 0.538916015625, "grad_norm": 0.30351316928863525, "learning_rate": 0.0002523753995698408, "loss": 1.791, "step": 11037 }, { "epoch": 0.53896484375, "grad_norm": 0.2713892459869385, "learning_rate": 0.00025234054970918305, "loss": 1.7963, "step": 11038 }, { "epoch": 0.539013671875, "grad_norm": 0.2790580689907074, "learning_rate": 0.0002523057003976954, "loss": 1.7871, "step": 11039 }, { "epoch": 0.5390625, "grad_norm": 0.2768227756023407, "learning_rate": 0.00025227085163622234, "loss": 1.7831, "step": 11040 }, { "epoch": 0.539111328125, "grad_norm": 0.2325640767812729, "learning_rate": 0.0002522360034256084, "loss": 1.7837, "step": 11041 }, { "epoch": 0.53916015625, "grad_norm": 0.2732591927051544, "learning_rate": 0.0002522011557666982, "loss": 1.781, "step": 11042 }, { "epoch": 0.539208984375, "grad_norm": 0.2761472761631012, "learning_rate": 0.00025216630866033634, "loss": 1.7994, "step": 11043 }, { "epoch": 0.5392578125, "grad_norm": 0.22619466483592987, "learning_rate": 0.0002521314621073673, "loss": 1.801, "step": 11044 }, { "epoch": 0.539306640625, "grad_norm": 0.24565798044204712, "learning_rate": 0.0002520966161086357, "loss": 1.785, "step": 11045 }, { "epoch": 0.53935546875, "grad_norm": 0.23383870720863342, "learning_rate": 0.000252061770664986, "loss": 1.7745, "step": 11046 }, { "epoch": 0.539404296875, "grad_norm": 0.21076767146587372, "learning_rate": 0.0002520269257772627, "loss": 1.7971, "step": 11047 }, { "epoch": 0.539453125, "grad_norm": 0.27021655440330505, "learning_rate": 0.0002519920814463103, "loss": 1.7889, "step": 11048 }, { "epoch": 0.539501953125, "grad_norm": 0.2705923616886139, "learning_rate": 0.0002519572376729733, "loss": 1.7727, "step": 11049 }, { "epoch": 0.53955078125, "grad_norm": 0.24770891666412354, "learning_rate": 0.00025192239445809614, "loss": 1.7832, "step": 11050 }, { "epoch": 0.539599609375, "grad_norm": 0.22952435910701752, "learning_rate": 0.00025188755180252333, "loss": 1.7863, "step": 11051 }, { "epoch": 0.5396484375, "grad_norm": 0.2797076404094696, "learning_rate": 0.00025185270970709924, "loss": 1.7799, "step": 11052 }, { "epoch": 0.539697265625, "grad_norm": 0.25942227244377136, "learning_rate": 0.0002518178681726683, "loss": 1.8075, "step": 11053 }, { "epoch": 0.53974609375, "grad_norm": 0.22130246460437775, "learning_rate": 0.00025178302720007504, "loss": 1.789, "step": 11054 }, { "epoch": 0.539794921875, "grad_norm": 0.2505801022052765, "learning_rate": 0.0002517481867901636, "loss": 1.7674, "step": 11055 }, { "epoch": 0.53984375, "grad_norm": 0.24370615184307098, "learning_rate": 0.00025171334694377875, "loss": 1.785, "step": 11056 }, { "epoch": 0.539892578125, "grad_norm": 0.31440111994743347, "learning_rate": 0.0002516785076617645, "loss": 1.8014, "step": 11057 }, { "epoch": 0.53994140625, "grad_norm": 0.2529342770576477, "learning_rate": 0.00025164366894496536, "loss": 1.7904, "step": 11058 }, { "epoch": 0.539990234375, "grad_norm": 0.25490596890449524, "learning_rate": 0.00025160883079422574, "loss": 1.7813, "step": 11059 }, { "epoch": 0.5400390625, "grad_norm": 0.2913029193878174, "learning_rate": 0.00025157399321038985, "loss": 1.7844, "step": 11060 }, { "epoch": 0.540087890625, "grad_norm": 0.24246026575565338, "learning_rate": 0.000251539156194302, "loss": 1.7957, "step": 11061 }, { "epoch": 0.54013671875, "grad_norm": 0.24855268001556396, "learning_rate": 0.00025150431974680666, "loss": 1.7506, "step": 11062 }, { "epoch": 0.540185546875, "grad_norm": 0.27220940589904785, "learning_rate": 0.0002514694838687479, "loss": 1.7713, "step": 11063 }, { "epoch": 0.540234375, "grad_norm": 0.2525688707828522, "learning_rate": 0.0002514346485609702, "loss": 1.7921, "step": 11064 }, { "epoch": 0.540283203125, "grad_norm": 0.2182997316122055, "learning_rate": 0.0002513998138243177, "loss": 1.7874, "step": 11065 }, { "epoch": 0.54033203125, "grad_norm": 0.2246568500995636, "learning_rate": 0.0002513649796596347, "loss": 1.8023, "step": 11066 }, { "epoch": 0.540380859375, "grad_norm": 0.25185340642929077, "learning_rate": 0.0002513301460677653, "loss": 1.7816, "step": 11067 }, { "epoch": 0.5404296875, "grad_norm": 0.23160530626773834, "learning_rate": 0.0002512953130495539, "loss": 1.7668, "step": 11068 }, { "epoch": 0.540478515625, "grad_norm": 0.30667614936828613, "learning_rate": 0.0002512604806058446, "loss": 1.789, "step": 11069 }, { "epoch": 0.54052734375, "grad_norm": 0.24816860258579254, "learning_rate": 0.00025122564873748164, "loss": 1.7759, "step": 11070 }, { "epoch": 0.540576171875, "grad_norm": 0.25775057077407837, "learning_rate": 0.0002511908174453092, "loss": 1.7877, "step": 11071 }, { "epoch": 0.540625, "grad_norm": 0.2917340397834778, "learning_rate": 0.00025115598673017145, "loss": 1.7736, "step": 11072 }, { "epoch": 0.540673828125, "grad_norm": 0.2573351562023163, "learning_rate": 0.00025112115659291245, "loss": 1.7918, "step": 11073 }, { "epoch": 0.54072265625, "grad_norm": 0.24033550918102264, "learning_rate": 0.00025108632703437644, "loss": 1.7622, "step": 11074 }, { "epoch": 0.540771484375, "grad_norm": 0.30318912863731384, "learning_rate": 0.0002510514980554075, "loss": 1.7685, "step": 11075 }, { "epoch": 0.5408203125, "grad_norm": 0.3361665606498718, "learning_rate": 0.0002510166696568498, "loss": 1.7561, "step": 11076 }, { "epoch": 0.540869140625, "grad_norm": 0.2782335877418518, "learning_rate": 0.00025098184183954724, "loss": 1.778, "step": 11077 }, { "epoch": 0.54091796875, "grad_norm": 0.4012356698513031, "learning_rate": 0.00025094701460434416, "loss": 1.7929, "step": 11078 }, { "epoch": 0.540966796875, "grad_norm": 0.33853596448898315, "learning_rate": 0.0002509121879520845, "loss": 1.7933, "step": 11079 }, { "epoch": 0.541015625, "grad_norm": 0.28083521127700806, "learning_rate": 0.00025087736188361226, "loss": 1.7805, "step": 11080 }, { "epoch": 0.541064453125, "grad_norm": 0.3667486608028412, "learning_rate": 0.00025084253639977164, "loss": 1.7762, "step": 11081 }, { "epoch": 0.54111328125, "grad_norm": 0.21812660992145538, "learning_rate": 0.00025080771150140643, "loss": 1.7927, "step": 11082 }, { "epoch": 0.541162109375, "grad_norm": 0.3288244307041168, "learning_rate": 0.0002507728871893608, "loss": 1.7748, "step": 11083 }, { "epoch": 0.5412109375, "grad_norm": 0.23878638446331024, "learning_rate": 0.0002507380634644787, "loss": 1.7904, "step": 11084 }, { "epoch": 0.541259765625, "grad_norm": 0.30498069524765015, "learning_rate": 0.00025070324032760424, "loss": 1.803, "step": 11085 }, { "epoch": 0.54130859375, "grad_norm": 0.30018535256385803, "learning_rate": 0.00025066841777958117, "loss": 1.8031, "step": 11086 }, { "epoch": 0.541357421875, "grad_norm": 0.2760199010372162, "learning_rate": 0.0002506335958212535, "loss": 1.7933, "step": 11087 }, { "epoch": 0.54140625, "grad_norm": 0.32129397988319397, "learning_rate": 0.0002505987744534653, "loss": 1.7818, "step": 11088 }, { "epoch": 0.541455078125, "grad_norm": 0.2580602765083313, "learning_rate": 0.00025056395367706037, "loss": 1.7735, "step": 11089 }, { "epoch": 0.54150390625, "grad_norm": 0.24424394965171814, "learning_rate": 0.0002505291334928827, "loss": 1.7953, "step": 11090 }, { "epoch": 0.541552734375, "grad_norm": 0.2081185132265091, "learning_rate": 0.000250494313901776, "loss": 1.7924, "step": 11091 }, { "epoch": 0.5416015625, "grad_norm": 0.24739797413349152, "learning_rate": 0.00025045949490458446, "loss": 1.7719, "step": 11092 }, { "epoch": 0.541650390625, "grad_norm": 0.26537176966667175, "learning_rate": 0.0002504246765021517, "loss": 1.784, "step": 11093 }, { "epoch": 0.54169921875, "grad_norm": 0.25663864612579346, "learning_rate": 0.00025038985869532167, "loss": 1.7941, "step": 11094 }, { "epoch": 0.541748046875, "grad_norm": 0.2137758880853653, "learning_rate": 0.0002503550414849381, "loss": 1.7899, "step": 11095 }, { "epoch": 0.541796875, "grad_norm": 0.2870349586009979, "learning_rate": 0.000250320224871845, "loss": 1.7839, "step": 11096 }, { "epoch": 0.541845703125, "grad_norm": 0.24790918827056885, "learning_rate": 0.00025028540885688606, "loss": 1.8065, "step": 11097 }, { "epoch": 0.54189453125, "grad_norm": 0.24128304421901703, "learning_rate": 0.00025025059344090507, "loss": 1.7822, "step": 11098 }, { "epoch": 0.541943359375, "grad_norm": 0.2681152820587158, "learning_rate": 0.00025021577862474587, "loss": 1.7705, "step": 11099 }, { "epoch": 0.5419921875, "grad_norm": 0.23833739757537842, "learning_rate": 0.0002501809644092521, "loss": 1.7864, "step": 11100 }, { "epoch": 0.542041015625, "grad_norm": 0.24587395787239075, "learning_rate": 0.0002501461507952677, "loss": 1.7737, "step": 11101 }, { "epoch": 0.54208984375, "grad_norm": 0.2684563100337982, "learning_rate": 0.0002501113377836362, "loss": 1.7876, "step": 11102 }, { "epoch": 0.542138671875, "grad_norm": 0.24837028980255127, "learning_rate": 0.0002500765253752015, "loss": 1.801, "step": 11103 }, { "epoch": 0.5421875, "grad_norm": 0.2515249252319336, "learning_rate": 0.0002500417135708072, "loss": 1.7941, "step": 11104 }, { "epoch": 0.542236328125, "grad_norm": 0.2357073277235031, "learning_rate": 0.00025000690237129703, "loss": 1.7594, "step": 11105 }, { "epoch": 0.54228515625, "grad_norm": 0.23051892220973969, "learning_rate": 0.00024997209177751473, "loss": 1.7684, "step": 11106 }, { "epoch": 0.542333984375, "grad_norm": 0.2691432237625122, "learning_rate": 0.0002499372817903038, "loss": 1.7728, "step": 11107 }, { "epoch": 0.5423828125, "grad_norm": 0.22205908596515656, "learning_rate": 0.000249902472410508, "loss": 1.7925, "step": 11108 }, { "epoch": 0.542431640625, "grad_norm": 0.2812891900539398, "learning_rate": 0.00024986766363897095, "loss": 1.7613, "step": 11109 }, { "epoch": 0.54248046875, "grad_norm": 0.2534211277961731, "learning_rate": 0.0002498328554765363, "loss": 1.7897, "step": 11110 }, { "epoch": 0.542529296875, "grad_norm": 0.22938579320907593, "learning_rate": 0.0002497980479240475, "loss": 1.7915, "step": 11111 }, { "epoch": 0.542578125, "grad_norm": 0.23830023407936096, "learning_rate": 0.00024976324098234835, "loss": 1.7892, "step": 11112 }, { "epoch": 0.542626953125, "grad_norm": 0.22744624316692352, "learning_rate": 0.00024972843465228227, "loss": 1.794, "step": 11113 }, { "epoch": 0.54267578125, "grad_norm": 0.23841455578804016, "learning_rate": 0.000249693628934693, "loss": 1.7863, "step": 11114 }, { "epoch": 0.542724609375, "grad_norm": 0.22691349685192108, "learning_rate": 0.00024965882383042395, "loss": 1.755, "step": 11115 }, { "epoch": 0.5427734375, "grad_norm": 0.25188711285591125, "learning_rate": 0.0002496240193403185, "loss": 1.7891, "step": 11116 }, { "epoch": 0.542822265625, "grad_norm": 0.27210643887519836, "learning_rate": 0.0002495892154652205, "loss": 1.7767, "step": 11117 }, { "epoch": 0.54287109375, "grad_norm": 0.2169494479894638, "learning_rate": 0.0002495544122059732, "loss": 1.7931, "step": 11118 }, { "epoch": 0.542919921875, "grad_norm": 0.2481803148984909, "learning_rate": 0.00024951960956342016, "loss": 1.7621, "step": 11119 }, { "epoch": 0.54296875, "grad_norm": 0.23166576027870178, "learning_rate": 0.00024948480753840486, "loss": 1.7736, "step": 11120 }, { "epoch": 0.543017578125, "grad_norm": 0.19938789308071136, "learning_rate": 0.0002494500061317708, "loss": 1.7801, "step": 11121 }, { "epoch": 0.54306640625, "grad_norm": 0.23511333763599396, "learning_rate": 0.00024941520534436126, "loss": 1.7706, "step": 11122 }, { "epoch": 0.543115234375, "grad_norm": 0.21558785438537598, "learning_rate": 0.00024938040517701987, "loss": 1.7728, "step": 11123 }, { "epoch": 0.5431640625, "grad_norm": 0.21285666525363922, "learning_rate": 0.00024934560563058994, "loss": 1.7744, "step": 11124 }, { "epoch": 0.543212890625, "grad_norm": 0.22245226800441742, "learning_rate": 0.0002493108067059148, "loss": 1.7902, "step": 11125 }, { "epoch": 0.54326171875, "grad_norm": 0.26063624024391174, "learning_rate": 0.00024927600840383795, "loss": 1.7941, "step": 11126 }, { "epoch": 0.543310546875, "grad_norm": 0.26791712641716003, "learning_rate": 0.0002492412107252026, "loss": 1.796, "step": 11127 }, { "epoch": 0.543359375, "grad_norm": 0.23045453429222107, "learning_rate": 0.0002492064136708524, "loss": 1.7673, "step": 11128 }, { "epoch": 0.543408203125, "grad_norm": 0.21977661550045013, "learning_rate": 0.0002491716172416303, "loss": 1.7604, "step": 11129 }, { "epoch": 0.54345703125, "grad_norm": 0.2975873053073883, "learning_rate": 0.0002491368214383799, "loss": 1.7935, "step": 11130 }, { "epoch": 0.543505859375, "grad_norm": 0.2495177686214447, "learning_rate": 0.00024910202626194435, "loss": 1.7749, "step": 11131 }, { "epoch": 0.5435546875, "grad_norm": 0.26556307077407837, "learning_rate": 0.0002490672317131671, "loss": 1.8113, "step": 11132 }, { "epoch": 0.543603515625, "grad_norm": 0.3266705870628357, "learning_rate": 0.0002490324377928913, "loss": 1.8208, "step": 11133 }, { "epoch": 0.54365234375, "grad_norm": 0.2867780923843384, "learning_rate": 0.00024899764450196014, "loss": 1.7588, "step": 11134 }, { "epoch": 0.543701171875, "grad_norm": 0.31963276863098145, "learning_rate": 0.00024896285184121706, "loss": 1.7513, "step": 11135 }, { "epoch": 0.54375, "grad_norm": 0.256132036447525, "learning_rate": 0.00024892805981150507, "loss": 1.7899, "step": 11136 }, { "epoch": 0.543798828125, "grad_norm": 0.37510424852371216, "learning_rate": 0.0002488932684136677, "loss": 1.777, "step": 11137 }, { "epoch": 0.54384765625, "grad_norm": 0.3102555274963379, "learning_rate": 0.0002488584776485477, "loss": 1.7897, "step": 11138 }, { "epoch": 0.543896484375, "grad_norm": 0.2373683601617813, "learning_rate": 0.00024882368751698867, "loss": 1.7638, "step": 11139 }, { "epoch": 0.5439453125, "grad_norm": 0.32379207015037537, "learning_rate": 0.0002487888980198336, "loss": 1.7829, "step": 11140 }, { "epoch": 0.543994140625, "grad_norm": 0.2517765462398529, "learning_rate": 0.00024875410915792544, "loss": 1.772, "step": 11141 }, { "epoch": 0.54404296875, "grad_norm": 0.30313071608543396, "learning_rate": 0.00024871932093210777, "loss": 1.791, "step": 11142 }, { "epoch": 0.544091796875, "grad_norm": 0.25979742407798767, "learning_rate": 0.0002486845333432233, "loss": 1.7813, "step": 11143 }, { "epoch": 0.544140625, "grad_norm": 0.2656211256980896, "learning_rate": 0.0002486497463921154, "loss": 1.7878, "step": 11144 }, { "epoch": 0.544189453125, "grad_norm": 0.2569170594215393, "learning_rate": 0.00024861496007962705, "loss": 1.7901, "step": 11145 }, { "epoch": 0.54423828125, "grad_norm": 0.25742873549461365, "learning_rate": 0.0002485801744066013, "loss": 1.8168, "step": 11146 }, { "epoch": 0.544287109375, "grad_norm": 0.26031747460365295, "learning_rate": 0.00024854538937388125, "loss": 1.8104, "step": 11147 }, { "epoch": 0.5443359375, "grad_norm": 0.27951183915138245, "learning_rate": 0.00024851060498231003, "loss": 1.8056, "step": 11148 }, { "epoch": 0.544384765625, "grad_norm": 0.24166858196258545, "learning_rate": 0.00024847582123273054, "loss": 1.7844, "step": 11149 }, { "epoch": 0.54443359375, "grad_norm": 0.256085604429245, "learning_rate": 0.0002484410381259858, "loss": 1.7888, "step": 11150 }, { "epoch": 0.544482421875, "grad_norm": 0.2699926495552063, "learning_rate": 0.0002484062556629189, "loss": 1.7941, "step": 11151 }, { "epoch": 0.54453125, "grad_norm": 0.21433638036251068, "learning_rate": 0.00024837147384437265, "loss": 1.8075, "step": 11152 }, { "epoch": 0.544580078125, "grad_norm": 0.25528836250305176, "learning_rate": 0.00024833669267119025, "loss": 1.7978, "step": 11153 }, { "epoch": 0.54462890625, "grad_norm": 0.24548187851905823, "learning_rate": 0.00024830191214421436, "loss": 1.7897, "step": 11154 }, { "epoch": 0.544677734375, "grad_norm": 0.2612312436103821, "learning_rate": 0.00024826713226428827, "loss": 1.7876, "step": 11155 }, { "epoch": 0.5447265625, "grad_norm": 0.2262350618839264, "learning_rate": 0.00024823235303225456, "loss": 1.7729, "step": 11156 }, { "epoch": 0.544775390625, "grad_norm": 0.2609773874282837, "learning_rate": 0.0002481975744489564, "loss": 1.7784, "step": 11157 }, { "epoch": 0.54482421875, "grad_norm": 0.24417410790920258, "learning_rate": 0.00024816279651523654, "loss": 1.8081, "step": 11158 }, { "epoch": 0.544873046875, "grad_norm": 0.2652101516723633, "learning_rate": 0.00024812801923193773, "loss": 1.7845, "step": 11159 }, { "epoch": 0.544921875, "grad_norm": 0.2063475251197815, "learning_rate": 0.0002480932425999031, "loss": 1.7822, "step": 11160 }, { "epoch": 0.544970703125, "grad_norm": 0.2633485198020935, "learning_rate": 0.0002480584666199753, "loss": 1.7762, "step": 11161 }, { "epoch": 0.54501953125, "grad_norm": 0.22974617779254913, "learning_rate": 0.00024802369129299723, "loss": 1.8083, "step": 11162 }, { "epoch": 0.545068359375, "grad_norm": 0.2826057970523834, "learning_rate": 0.0002479889166198116, "loss": 1.7783, "step": 11163 }, { "epoch": 0.5451171875, "grad_norm": 0.30991774797439575, "learning_rate": 0.00024795414260126136, "loss": 1.7959, "step": 11164 }, { "epoch": 0.545166015625, "grad_norm": 0.22448667883872986, "learning_rate": 0.0002479193692381892, "loss": 1.7497, "step": 11165 }, { "epoch": 0.54521484375, "grad_norm": 0.4050252437591553, "learning_rate": 0.0002478845965314378, "loss": 1.7716, "step": 11166 }, { "epoch": 0.545263671875, "grad_norm": 0.31373515725135803, "learning_rate": 0.0002478498244818501, "loss": 1.78, "step": 11167 }, { "epoch": 0.5453125, "grad_norm": 0.3100581467151642, "learning_rate": 0.0002478150530902686, "loss": 1.7562, "step": 11168 }, { "epoch": 0.545361328125, "grad_norm": 0.31544584035873413, "learning_rate": 0.00024778028235753624, "loss": 1.7726, "step": 11169 }, { "epoch": 0.54541015625, "grad_norm": 0.28029364347457886, "learning_rate": 0.0002477455122844955, "loss": 1.7846, "step": 11170 }, { "epoch": 0.545458984375, "grad_norm": 0.25116977095603943, "learning_rate": 0.00024771074287198924, "loss": 1.7721, "step": 11171 }, { "epoch": 0.5455078125, "grad_norm": 0.24315191805362701, "learning_rate": 0.00024767597412086, "loss": 1.7833, "step": 11172 }, { "epoch": 0.545556640625, "grad_norm": 0.24954594671726227, "learning_rate": 0.00024764120603195056, "loss": 1.7758, "step": 11173 }, { "epoch": 0.54560546875, "grad_norm": 0.26541706919670105, "learning_rate": 0.0002476064386061034, "loss": 1.8087, "step": 11174 }, { "epoch": 0.545654296875, "grad_norm": 0.3042420744895935, "learning_rate": 0.00024757167184416123, "loss": 1.7926, "step": 11175 }, { "epoch": 0.545703125, "grad_norm": 0.27180323004722595, "learning_rate": 0.0002475369057469667, "loss": 1.7938, "step": 11176 }, { "epoch": 0.545751953125, "grad_norm": 0.2672041058540344, "learning_rate": 0.00024750214031536217, "loss": 1.7978, "step": 11177 }, { "epoch": 0.54580078125, "grad_norm": 0.22847755253314972, "learning_rate": 0.0002474673755501905, "loss": 1.7884, "step": 11178 }, { "epoch": 0.545849609375, "grad_norm": 0.21762047708034515, "learning_rate": 0.000247432611452294, "loss": 1.788, "step": 11179 }, { "epoch": 0.5458984375, "grad_norm": 0.24593640863895416, "learning_rate": 0.0002473978480225154, "loss": 1.775, "step": 11180 }, { "epoch": 0.545947265625, "grad_norm": 0.20378918945789337, "learning_rate": 0.00024736308526169696, "loss": 1.7987, "step": 11181 }, { "epoch": 0.54599609375, "grad_norm": 0.318946897983551, "learning_rate": 0.00024732832317068157, "loss": 1.7899, "step": 11182 }, { "epoch": 0.546044921875, "grad_norm": 0.23713022470474243, "learning_rate": 0.0002472935617503114, "loss": 1.7754, "step": 11183 }, { "epoch": 0.54609375, "grad_norm": 0.262216180562973, "learning_rate": 0.000247258801001429, "loss": 1.7652, "step": 11184 }, { "epoch": 0.546142578125, "grad_norm": 0.2678123414516449, "learning_rate": 0.0002472240409248769, "loss": 1.7687, "step": 11185 }, { "epoch": 0.54619140625, "grad_norm": 0.22233352065086365, "learning_rate": 0.0002471892815214974, "loss": 1.7964, "step": 11186 }, { "epoch": 0.546240234375, "grad_norm": 0.2919764816761017, "learning_rate": 0.00024715452279213307, "loss": 1.7858, "step": 11187 }, { "epoch": 0.5462890625, "grad_norm": 0.19906020164489746, "learning_rate": 0.0002471197647376262, "loss": 1.8058, "step": 11188 }, { "epoch": 0.546337890625, "grad_norm": 0.27902111411094666, "learning_rate": 0.00024708500735881936, "loss": 1.7888, "step": 11189 }, { "epoch": 0.54638671875, "grad_norm": 0.2589530348777771, "learning_rate": 0.0002470502506565546, "loss": 1.8104, "step": 11190 }, { "epoch": 0.546435546875, "grad_norm": 0.23246008157730103, "learning_rate": 0.0002470154946316746, "loss": 1.7922, "step": 11191 }, { "epoch": 0.546484375, "grad_norm": 0.2960057556629181, "learning_rate": 0.00024698073928502165, "loss": 1.77, "step": 11192 }, { "epoch": 0.546533203125, "grad_norm": 0.24098043143749237, "learning_rate": 0.00024694598461743775, "loss": 1.785, "step": 11193 }, { "epoch": 0.54658203125, "grad_norm": 0.2856671214103699, "learning_rate": 0.00024691123062976566, "loss": 1.7796, "step": 11194 }, { "epoch": 0.546630859375, "grad_norm": 0.2722865343093872, "learning_rate": 0.00024687647732284736, "loss": 1.7818, "step": 11195 }, { "epoch": 0.5466796875, "grad_norm": 0.2720339000225067, "learning_rate": 0.00024684172469752533, "loss": 1.766, "step": 11196 }, { "epoch": 0.546728515625, "grad_norm": 0.2771535813808441, "learning_rate": 0.0002468069727546415, "loss": 1.7911, "step": 11197 }, { "epoch": 0.54677734375, "grad_norm": 0.27367669343948364, "learning_rate": 0.0002467722214950386, "loss": 1.7709, "step": 11198 }, { "epoch": 0.546826171875, "grad_norm": 0.29840174317359924, "learning_rate": 0.0002467374709195585, "loss": 1.7885, "step": 11199 }, { "epoch": 0.546875, "grad_norm": 0.23652659356594086, "learning_rate": 0.0002467027210290434, "loss": 1.7917, "step": 11200 }, { "epoch": 0.546923828125, "grad_norm": 0.27695131301879883, "learning_rate": 0.0002466679718243357, "loss": 1.7917, "step": 11201 }, { "epoch": 0.54697265625, "grad_norm": 0.2780352234840393, "learning_rate": 0.00024663322330627734, "loss": 1.7379, "step": 11202 }, { "epoch": 0.547021484375, "grad_norm": 0.27425530552864075, "learning_rate": 0.0002465984754757108, "loss": 1.7874, "step": 11203 }, { "epoch": 0.5470703125, "grad_norm": 0.3299682140350342, "learning_rate": 0.00024656372833347783, "loss": 1.7565, "step": 11204 }, { "epoch": 0.547119140625, "grad_norm": 0.28836938738822937, "learning_rate": 0.00024652898188042086, "loss": 1.7856, "step": 11205 }, { "epoch": 0.54716796875, "grad_norm": 0.29654374718666077, "learning_rate": 0.0002464942361173818, "loss": 1.7786, "step": 11206 }, { "epoch": 0.547216796875, "grad_norm": 0.2500511109828949, "learning_rate": 0.00024645949104520296, "loss": 1.7693, "step": 11207 }, { "epoch": 0.547265625, "grad_norm": 0.2574094831943512, "learning_rate": 0.0002464247466647263, "loss": 1.7607, "step": 11208 }, { "epoch": 0.547314453125, "grad_norm": 0.25072479248046875, "learning_rate": 0.00024639000297679365, "loss": 1.7674, "step": 11209 }, { "epoch": 0.54736328125, "grad_norm": 0.30190351605415344, "learning_rate": 0.00024635525998224745, "loss": 1.7779, "step": 11210 }, { "epoch": 0.547412109375, "grad_norm": 0.2946368455886841, "learning_rate": 0.0002463205176819294, "loss": 1.7906, "step": 11211 }, { "epoch": 0.5474609375, "grad_norm": 0.2564232051372528, "learning_rate": 0.00024628577607668176, "loss": 1.7498, "step": 11212 }, { "epoch": 0.547509765625, "grad_norm": 0.24782578647136688, "learning_rate": 0.0002462510351673463, "loss": 1.7814, "step": 11213 }, { "epoch": 0.54755859375, "grad_norm": 0.3238525390625, "learning_rate": 0.00024621629495476526, "loss": 1.7747, "step": 11214 }, { "epoch": 0.547607421875, "grad_norm": 0.3038901388645172, "learning_rate": 0.00024618155543978026, "loss": 1.78, "step": 11215 }, { "epoch": 0.54765625, "grad_norm": 0.24092373251914978, "learning_rate": 0.00024614681662323354, "loss": 1.7664, "step": 11216 }, { "epoch": 0.547705078125, "grad_norm": 0.3113110065460205, "learning_rate": 0.0002461120785059669, "loss": 1.7837, "step": 11217 }, { "epoch": 0.54775390625, "grad_norm": 0.24071620404720306, "learning_rate": 0.0002460773410888222, "loss": 1.7925, "step": 11218 }, { "epoch": 0.547802734375, "grad_norm": 0.23795294761657715, "learning_rate": 0.00024604260437264137, "loss": 1.7941, "step": 11219 }, { "epoch": 0.5478515625, "grad_norm": 0.288452684879303, "learning_rate": 0.0002460078683582662, "loss": 1.8014, "step": 11220 }, { "epoch": 0.547900390625, "grad_norm": 0.23984794318675995, "learning_rate": 0.00024597313304653876, "loss": 1.7797, "step": 11221 }, { "epoch": 0.54794921875, "grad_norm": 0.2693209946155548, "learning_rate": 0.00024593839843830076, "loss": 1.7805, "step": 11222 }, { "epoch": 0.547998046875, "grad_norm": 0.2615242302417755, "learning_rate": 0.000245903664534394, "loss": 1.8116, "step": 11223 }, { "epoch": 0.548046875, "grad_norm": 0.2919301390647888, "learning_rate": 0.00024586893133566034, "loss": 1.7839, "step": 11224 }, { "epoch": 0.548095703125, "grad_norm": 0.2398396134376526, "learning_rate": 0.00024583419884294147, "loss": 1.779, "step": 11225 }, { "epoch": 0.54814453125, "grad_norm": 0.3604001998901367, "learning_rate": 0.0002457994670570792, "loss": 1.7915, "step": 11226 }, { "epoch": 0.548193359375, "grad_norm": 0.2639455497264862, "learning_rate": 0.00024576473597891533, "loss": 1.7669, "step": 11227 }, { "epoch": 0.5482421875, "grad_norm": 0.2589351236820221, "learning_rate": 0.00024573000560929167, "loss": 1.7689, "step": 11228 }, { "epoch": 0.548291015625, "grad_norm": 0.2672584652900696, "learning_rate": 0.0002456952759490497, "loss": 1.7598, "step": 11229 }, { "epoch": 0.54833984375, "grad_norm": 0.2446577250957489, "learning_rate": 0.00024566054699903133, "loss": 1.7817, "step": 11230 }, { "epoch": 0.548388671875, "grad_norm": 0.251257985830307, "learning_rate": 0.0002456258187600781, "loss": 1.7728, "step": 11231 }, { "epoch": 0.5484375, "grad_norm": 0.24959222972393036, "learning_rate": 0.00024559109123303184, "loss": 1.7755, "step": 11232 }, { "epoch": 0.548486328125, "grad_norm": 0.2304873913526535, "learning_rate": 0.00024555636441873414, "loss": 1.7593, "step": 11233 }, { "epoch": 0.54853515625, "grad_norm": 0.24436704814434052, "learning_rate": 0.0002455216383180265, "loss": 1.7945, "step": 11234 }, { "epoch": 0.548583984375, "grad_norm": 0.21494653820991516, "learning_rate": 0.00024548691293175076, "loss": 1.7634, "step": 11235 }, { "epoch": 0.5486328125, "grad_norm": 0.21852806210517883, "learning_rate": 0.0002454521882607482, "loss": 1.7867, "step": 11236 }, { "epoch": 0.548681640625, "grad_norm": 0.20766516029834747, "learning_rate": 0.00024541746430586074, "loss": 1.7922, "step": 11237 }, { "epoch": 0.54873046875, "grad_norm": 0.21084237098693848, "learning_rate": 0.0002453827410679298, "loss": 1.7541, "step": 11238 }, { "epoch": 0.548779296875, "grad_norm": 0.20695297420024872, "learning_rate": 0.0002453480185477969, "loss": 1.8077, "step": 11239 }, { "epoch": 0.548828125, "grad_norm": 0.21699610352516174, "learning_rate": 0.0002453132967463035, "loss": 1.7989, "step": 11240 }, { "epoch": 0.548876953125, "grad_norm": 0.2650805711746216, "learning_rate": 0.0002452785756642914, "loss": 1.7536, "step": 11241 }, { "epoch": 0.54892578125, "grad_norm": 0.27685046195983887, "learning_rate": 0.0002452438553026018, "loss": 1.7916, "step": 11242 }, { "epoch": 0.548974609375, "grad_norm": 0.26713719964027405, "learning_rate": 0.0002452091356620763, "loss": 1.7958, "step": 11243 }, { "epoch": 0.5490234375, "grad_norm": 0.2555120289325714, "learning_rate": 0.0002451744167435563, "loss": 1.7965, "step": 11244 }, { "epoch": 0.549072265625, "grad_norm": 0.20908373594284058, "learning_rate": 0.0002451396985478832, "loss": 1.8008, "step": 11245 }, { "epoch": 0.54912109375, "grad_norm": 0.24861061573028564, "learning_rate": 0.0002451049810758986, "loss": 1.8213, "step": 11246 }, { "epoch": 0.549169921875, "grad_norm": 0.22631801664829254, "learning_rate": 0.0002450702643284437, "loss": 1.7849, "step": 11247 }, { "epoch": 0.54921875, "grad_norm": 0.21904897689819336, "learning_rate": 0.0002450355483063601, "loss": 1.7837, "step": 11248 }, { "epoch": 0.549267578125, "grad_norm": 0.2716086804866791, "learning_rate": 0.000245000833010489, "loss": 1.7901, "step": 11249 }, { "epoch": 0.54931640625, "grad_norm": 0.2437204271554947, "learning_rate": 0.00024496611844167183, "loss": 1.7894, "step": 11250 }, { "epoch": 0.549365234375, "grad_norm": 0.21290799975395203, "learning_rate": 0.0002449314046007499, "loss": 1.7855, "step": 11251 }, { "epoch": 0.5494140625, "grad_norm": 0.28561487793922424, "learning_rate": 0.0002448966914885645, "loss": 1.7948, "step": 11252 }, { "epoch": 0.549462890625, "grad_norm": 0.2509996294975281, "learning_rate": 0.0002448619791059571, "loss": 1.7929, "step": 11253 }, { "epoch": 0.54951171875, "grad_norm": 0.23024463653564453, "learning_rate": 0.0002448272674537687, "loss": 1.7747, "step": 11254 }, { "epoch": 0.549560546875, "grad_norm": 0.2658689618110657, "learning_rate": 0.0002447925565328408, "loss": 1.7807, "step": 11255 }, { "epoch": 0.549609375, "grad_norm": 0.2508593797683716, "learning_rate": 0.00024475784634401447, "loss": 1.7777, "step": 11256 }, { "epoch": 0.549658203125, "grad_norm": 0.25605911016464233, "learning_rate": 0.0002447231368881311, "loss": 1.7424, "step": 11257 }, { "epoch": 0.54970703125, "grad_norm": 0.26610639691352844, "learning_rate": 0.00024468842816603194, "loss": 1.7762, "step": 11258 }, { "epoch": 0.549755859375, "grad_norm": 0.27067098021507263, "learning_rate": 0.00024465372017855785, "loss": 1.8022, "step": 11259 }, { "epoch": 0.5498046875, "grad_norm": 0.28619813919067383, "learning_rate": 0.0002446190129265504, "loss": 1.757, "step": 11260 }, { "epoch": 0.549853515625, "grad_norm": 0.2290281504392624, "learning_rate": 0.00024458430641085047, "loss": 1.7842, "step": 11261 }, { "epoch": 0.54990234375, "grad_norm": 0.27936437726020813, "learning_rate": 0.0002445496006322994, "loss": 1.772, "step": 11262 }, { "epoch": 0.549951171875, "grad_norm": 0.2272481918334961, "learning_rate": 0.0002445148955917381, "loss": 1.7705, "step": 11263 }, { "epoch": 0.55, "grad_norm": 0.2506495714187622, "learning_rate": 0.0002444801912900079, "loss": 1.772, "step": 11264 }, { "epoch": 0.550048828125, "grad_norm": 0.3269716203212738, "learning_rate": 0.00024444548772794975, "loss": 1.7758, "step": 11265 }, { "epoch": 0.55009765625, "grad_norm": 0.23366796970367432, "learning_rate": 0.0002444107849064048, "loss": 1.797, "step": 11266 }, { "epoch": 0.550146484375, "grad_norm": 0.2839733064174652, "learning_rate": 0.000244376082826214, "loss": 1.7774, "step": 11267 }, { "epoch": 0.5501953125, "grad_norm": 0.26786571741104126, "learning_rate": 0.0002443413814882184, "loss": 1.7603, "step": 11268 }, { "epoch": 0.550244140625, "grad_norm": 0.22028298676013947, "learning_rate": 0.0002443066808932592, "loss": 1.7837, "step": 11269 }, { "epoch": 0.55029296875, "grad_norm": 0.2506009638309479, "learning_rate": 0.00024427198104217713, "loss": 1.7937, "step": 11270 }, { "epoch": 0.550341796875, "grad_norm": 0.20882737636566162, "learning_rate": 0.00024423728193581336, "loss": 1.7822, "step": 11271 }, { "epoch": 0.550390625, "grad_norm": 0.2792503833770752, "learning_rate": 0.00024420258357500863, "loss": 1.7613, "step": 11272 }, { "epoch": 0.550439453125, "grad_norm": 0.25653690099716187, "learning_rate": 0.0002441678859606042, "loss": 1.7657, "step": 11273 }, { "epoch": 0.55048828125, "grad_norm": 0.23448772728443146, "learning_rate": 0.00024413318909344067, "loss": 1.8144, "step": 11274 }, { "epoch": 0.550537109375, "grad_norm": 0.2437364161014557, "learning_rate": 0.00024409849297435922, "loss": 1.8002, "step": 11275 }, { "epoch": 0.5505859375, "grad_norm": 0.2179231345653534, "learning_rate": 0.00024406379760420055, "loss": 1.7886, "step": 11276 }, { "epoch": 0.550634765625, "grad_norm": 0.22902624309062958, "learning_rate": 0.00024402910298380564, "loss": 1.771, "step": 11277 }, { "epoch": 0.55068359375, "grad_norm": 0.21333947777748108, "learning_rate": 0.00024399440911401532, "loss": 1.7769, "step": 11278 }, { "epoch": 0.550732421875, "grad_norm": 0.21652968227863312, "learning_rate": 0.0002439597159956703, "loss": 1.7772, "step": 11279 }, { "epoch": 0.55078125, "grad_norm": 0.21496231853961945, "learning_rate": 0.00024392502362961167, "loss": 1.7835, "step": 11280 }, { "epoch": 0.550830078125, "grad_norm": 0.2282000631093979, "learning_rate": 0.00024389033201667988, "loss": 1.8093, "step": 11281 }, { "epoch": 0.55087890625, "grad_norm": 0.22311919927597046, "learning_rate": 0.00024385564115771602, "loss": 1.7836, "step": 11282 }, { "epoch": 0.550927734375, "grad_norm": 0.20149074494838715, "learning_rate": 0.00024382095105356062, "loss": 1.8243, "step": 11283 }, { "epoch": 0.5509765625, "grad_norm": 0.23959781229496002, "learning_rate": 0.00024378626170505459, "loss": 1.7752, "step": 11284 }, { "epoch": 0.551025390625, "grad_norm": 0.2600245773792267, "learning_rate": 0.00024375157311303859, "loss": 1.7549, "step": 11285 }, { "epoch": 0.55107421875, "grad_norm": 0.2403825968503952, "learning_rate": 0.00024371688527835322, "loss": 1.7783, "step": 11286 }, { "epoch": 0.551123046875, "grad_norm": 0.23415507376194, "learning_rate": 0.0002436821982018394, "loss": 1.7707, "step": 11287 }, { "epoch": 0.551171875, "grad_norm": 0.27879175543785095, "learning_rate": 0.00024364751188433754, "loss": 1.7943, "step": 11288 }, { "epoch": 0.551220703125, "grad_norm": 0.2363780289888382, "learning_rate": 0.0002436128263266885, "loss": 1.8116, "step": 11289 }, { "epoch": 0.55126953125, "grad_norm": 0.28256362676620483, "learning_rate": 0.00024357814152973278, "loss": 1.7763, "step": 11290 }, { "epoch": 0.551318359375, "grad_norm": 0.26251956820487976, "learning_rate": 0.0002435434574943111, "loss": 1.7687, "step": 11291 }, { "epoch": 0.5513671875, "grad_norm": 0.26219746470451355, "learning_rate": 0.000243508774221264, "loss": 1.7646, "step": 11292 }, { "epoch": 0.551416015625, "grad_norm": 0.25534290075302124, "learning_rate": 0.00024347409171143202, "loss": 1.783, "step": 11293 }, { "epoch": 0.55146484375, "grad_norm": 0.29607707262039185, "learning_rate": 0.00024343940996565577, "loss": 1.7791, "step": 11294 }, { "epoch": 0.551513671875, "grad_norm": 0.2521096467971802, "learning_rate": 0.00024340472898477572, "loss": 1.7749, "step": 11295 }, { "epoch": 0.5515625, "grad_norm": 0.22596311569213867, "learning_rate": 0.00024337004876963253, "loss": 1.7798, "step": 11296 }, { "epoch": 0.551611328125, "grad_norm": 0.2584350109100342, "learning_rate": 0.00024333536932106658, "loss": 1.8008, "step": 11297 }, { "epoch": 0.55166015625, "grad_norm": 0.2502930760383606, "learning_rate": 0.00024330069063991844, "loss": 1.7684, "step": 11298 }, { "epoch": 0.551708984375, "grad_norm": 0.2790209949016571, "learning_rate": 0.00024326601272702843, "loss": 1.7865, "step": 11299 }, { "epoch": 0.5517578125, "grad_norm": 0.22424980998039246, "learning_rate": 0.00024323133558323723, "loss": 1.7807, "step": 11300 }, { "epoch": 0.551806640625, "grad_norm": 0.2983819544315338, "learning_rate": 0.000243196659209385, "loss": 1.7905, "step": 11301 }, { "epoch": 0.55185546875, "grad_norm": 0.2926940321922302, "learning_rate": 0.00024316198360631238, "loss": 1.781, "step": 11302 }, { "epoch": 0.551904296875, "grad_norm": 0.26196032762527466, "learning_rate": 0.00024312730877485962, "loss": 1.7904, "step": 11303 }, { "epoch": 0.551953125, "grad_norm": 0.2991834878921509, "learning_rate": 0.00024309263471586711, "loss": 1.7817, "step": 11304 }, { "epoch": 0.552001953125, "grad_norm": 0.23828303813934326, "learning_rate": 0.00024305796143017527, "loss": 1.7761, "step": 11305 }, { "epoch": 0.55205078125, "grad_norm": 0.28701701760292053, "learning_rate": 0.0002430232889186244, "loss": 1.7822, "step": 11306 }, { "epoch": 0.552099609375, "grad_norm": 0.22751371562480927, "learning_rate": 0.0002429886171820549, "loss": 1.7742, "step": 11307 }, { "epoch": 0.5521484375, "grad_norm": 0.2515164017677307, "learning_rate": 0.00024295394622130678, "loss": 1.7931, "step": 11308 }, { "epoch": 0.552197265625, "grad_norm": 0.23339258134365082, "learning_rate": 0.0002429192760372207, "loss": 1.7651, "step": 11309 }, { "epoch": 0.55224609375, "grad_norm": 0.2103181630373001, "learning_rate": 0.00024288460663063666, "loss": 1.8041, "step": 11310 }, { "epoch": 0.552294921875, "grad_norm": 0.2160784751176834, "learning_rate": 0.00024284993800239503, "loss": 1.8191, "step": 11311 }, { "epoch": 0.55234375, "grad_norm": 0.1995142549276352, "learning_rate": 0.00024281527015333588, "loss": 1.7888, "step": 11312 }, { "epoch": 0.552392578125, "grad_norm": 0.18177446722984314, "learning_rate": 0.0002427806030842996, "loss": 1.7705, "step": 11313 }, { "epoch": 0.55244140625, "grad_norm": 0.21844938397407532, "learning_rate": 0.0002427459367961263, "loss": 1.7876, "step": 11314 }, { "epoch": 0.552490234375, "grad_norm": 0.2635771930217743, "learning_rate": 0.00024271127128965602, "loss": 1.7951, "step": 11315 }, { "epoch": 0.5525390625, "grad_norm": 0.20854949951171875, "learning_rate": 0.00024267660656572914, "loss": 1.7556, "step": 11316 }, { "epoch": 0.552587890625, "grad_norm": 0.22904489934444427, "learning_rate": 0.00024264194262518563, "loss": 1.7897, "step": 11317 }, { "epoch": 0.55263671875, "grad_norm": 0.24942104518413544, "learning_rate": 0.00024260727946886563, "loss": 1.7658, "step": 11318 }, { "epoch": 0.552685546875, "grad_norm": 0.30095499753952026, "learning_rate": 0.00024257261709760924, "loss": 1.7584, "step": 11319 }, { "epoch": 0.552734375, "grad_norm": 0.24975748360157013, "learning_rate": 0.0002425379555122565, "loss": 1.782, "step": 11320 }, { "epoch": 0.552783203125, "grad_norm": 0.26608017086982727, "learning_rate": 0.0002425032947136475, "loss": 1.7931, "step": 11321 }, { "epoch": 0.55283203125, "grad_norm": 0.3277839422225952, "learning_rate": 0.00024246863470262232, "loss": 1.7839, "step": 11322 }, { "epoch": 0.552880859375, "grad_norm": 0.19855010509490967, "learning_rate": 0.00024243397548002083, "loss": 1.7583, "step": 11323 }, { "epoch": 0.5529296875, "grad_norm": 0.31050845980644226, "learning_rate": 0.00024239931704668317, "loss": 1.7923, "step": 11324 }, { "epoch": 0.552978515625, "grad_norm": 0.27825409173965454, "learning_rate": 0.0002423646594034492, "loss": 1.8129, "step": 11325 }, { "epoch": 0.55302734375, "grad_norm": 0.24997133016586304, "learning_rate": 0.000242330002551159, "loss": 1.8174, "step": 11326 }, { "epoch": 0.553076171875, "grad_norm": 0.2667170763015747, "learning_rate": 0.00024229534649065237, "loss": 1.7706, "step": 11327 }, { "epoch": 0.553125, "grad_norm": 0.27739518880844116, "learning_rate": 0.00024226069122276934, "loss": 1.8094, "step": 11328 }, { "epoch": 0.553173828125, "grad_norm": 0.2828289866447449, "learning_rate": 0.0002422260367483497, "loss": 1.773, "step": 11329 }, { "epoch": 0.55322265625, "grad_norm": 0.23376329243183136, "learning_rate": 0.00024219138306823346, "loss": 1.7623, "step": 11330 }, { "epoch": 0.553271484375, "grad_norm": 0.3054274618625641, "learning_rate": 0.00024215673018326034, "loss": 1.7734, "step": 11331 }, { "epoch": 0.5533203125, "grad_norm": 0.2523569166660309, "learning_rate": 0.00024212207809427028, "loss": 1.7936, "step": 11332 }, { "epoch": 0.553369140625, "grad_norm": 0.2957148551940918, "learning_rate": 0.00024208742680210311, "loss": 1.7726, "step": 11333 }, { "epoch": 0.55341796875, "grad_norm": 0.29615217447280884, "learning_rate": 0.00024205277630759853, "loss": 1.7814, "step": 11334 }, { "epoch": 0.553466796875, "grad_norm": 0.24097509682178497, "learning_rate": 0.00024201812661159644, "loss": 1.7749, "step": 11335 }, { "epoch": 0.553515625, "grad_norm": 0.2458125799894333, "learning_rate": 0.00024198347771493651, "loss": 1.782, "step": 11336 }, { "epoch": 0.553564453125, "grad_norm": 0.27039265632629395, "learning_rate": 0.00024194882961845853, "loss": 1.7801, "step": 11337 }, { "epoch": 0.55361328125, "grad_norm": 0.2929896414279938, "learning_rate": 0.0002419141823230022, "loss": 1.7699, "step": 11338 }, { "epoch": 0.553662109375, "grad_norm": 0.2803058326244354, "learning_rate": 0.00024187953582940726, "loss": 1.7782, "step": 11339 }, { "epoch": 0.5537109375, "grad_norm": 0.23051245510578156, "learning_rate": 0.00024184489013851342, "loss": 1.7521, "step": 11340 }, { "epoch": 0.553759765625, "grad_norm": 0.27078670263290405, "learning_rate": 0.00024181024525116025, "loss": 1.7844, "step": 11341 }, { "epoch": 0.55380859375, "grad_norm": 0.20694933831691742, "learning_rate": 0.00024177560116818753, "loss": 1.7751, "step": 11342 }, { "epoch": 0.553857421875, "grad_norm": 0.27014362812042236, "learning_rate": 0.00024174095789043464, "loss": 1.785, "step": 11343 }, { "epoch": 0.55390625, "grad_norm": 0.20919515192508698, "learning_rate": 0.0002417063154187415, "loss": 1.7994, "step": 11344 }, { "epoch": 0.553955078125, "grad_norm": 0.22731401026248932, "learning_rate": 0.0002416716737539475, "loss": 1.7982, "step": 11345 }, { "epoch": 0.55400390625, "grad_norm": 0.19216510653495789, "learning_rate": 0.00024163703289689233, "loss": 1.7909, "step": 11346 }, { "epoch": 0.554052734375, "grad_norm": 0.23416127264499664, "learning_rate": 0.0002416023928484154, "loss": 1.7804, "step": 11347 }, { "epoch": 0.5541015625, "grad_norm": 0.23118947446346283, "learning_rate": 0.00024156775360935636, "loss": 1.7822, "step": 11348 }, { "epoch": 0.554150390625, "grad_norm": 0.25209805369377136, "learning_rate": 0.0002415331151805546, "loss": 1.7698, "step": 11349 }, { "epoch": 0.55419921875, "grad_norm": 0.2087727040052414, "learning_rate": 0.0002414984775628497, "loss": 1.783, "step": 11350 }, { "epoch": 0.554248046875, "grad_norm": 0.2698673903942108, "learning_rate": 0.00024146384075708117, "loss": 1.7984, "step": 11351 }, { "epoch": 0.554296875, "grad_norm": 0.29460254311561584, "learning_rate": 0.00024142920476408832, "loss": 1.787, "step": 11352 }, { "epoch": 0.554345703125, "grad_norm": 0.26392585039138794, "learning_rate": 0.00024139456958471072, "loss": 1.775, "step": 11353 }, { "epoch": 0.55439453125, "grad_norm": 0.25798022747039795, "learning_rate": 0.00024135993521978767, "loss": 1.7941, "step": 11354 }, { "epoch": 0.554443359375, "grad_norm": 0.21501033008098602, "learning_rate": 0.0002413253016701587, "loss": 1.776, "step": 11355 }, { "epoch": 0.5544921875, "grad_norm": 0.24925708770751953, "learning_rate": 0.00024129066893666308, "loss": 1.7851, "step": 11356 }, { "epoch": 0.554541015625, "grad_norm": 0.23938368260860443, "learning_rate": 0.00024125603702014015, "loss": 1.774, "step": 11357 }, { "epoch": 0.55458984375, "grad_norm": 0.23432280123233795, "learning_rate": 0.00024122140592142927, "loss": 1.7847, "step": 11358 }, { "epoch": 0.554638671875, "grad_norm": 0.24646537005901337, "learning_rate": 0.00024118677564136977, "loss": 1.7716, "step": 11359 }, { "epoch": 0.5546875, "grad_norm": 0.25707584619522095, "learning_rate": 0.00024115214618080096, "loss": 1.7789, "step": 11360 }, { "epoch": 0.554736328125, "grad_norm": 0.21811649203300476, "learning_rate": 0.000241117517540562, "loss": 1.7671, "step": 11361 }, { "epoch": 0.55478515625, "grad_norm": 0.2353392392396927, "learning_rate": 0.00024108288972149228, "loss": 1.7852, "step": 11362 }, { "epoch": 0.554833984375, "grad_norm": 0.2274186611175537, "learning_rate": 0.00024104826272443086, "loss": 1.7667, "step": 11363 }, { "epoch": 0.5548828125, "grad_norm": 0.23596590757369995, "learning_rate": 0.00024101363655021723, "loss": 1.8034, "step": 11364 }, { "epoch": 0.554931640625, "grad_norm": 0.24839691817760468, "learning_rate": 0.00024097901119969034, "loss": 1.7591, "step": 11365 }, { "epoch": 0.55498046875, "grad_norm": 0.214142307639122, "learning_rate": 0.0002409443866736895, "loss": 1.8051, "step": 11366 }, { "epoch": 0.555029296875, "grad_norm": 0.23333032429218292, "learning_rate": 0.00024090976297305378, "loss": 1.7637, "step": 11367 }, { "epoch": 0.555078125, "grad_norm": 0.230083629488945, "learning_rate": 0.00024087514009862233, "loss": 1.7937, "step": 11368 }, { "epoch": 0.555126953125, "grad_norm": 0.26959022879600525, "learning_rate": 0.0002408405180512343, "loss": 1.7839, "step": 11369 }, { "epoch": 0.55517578125, "grad_norm": 0.26430273056030273, "learning_rate": 0.00024080589683172872, "loss": 1.7436, "step": 11370 }, { "epoch": 0.555224609375, "grad_norm": 0.23905085027217865, "learning_rate": 0.00024077127644094476, "loss": 1.7853, "step": 11371 }, { "epoch": 0.5552734375, "grad_norm": 0.27245768904685974, "learning_rate": 0.0002407366568797213, "loss": 1.7802, "step": 11372 }, { "epoch": 0.555322265625, "grad_norm": 0.24574221670627594, "learning_rate": 0.00024070203814889762, "loss": 1.7969, "step": 11373 }, { "epoch": 0.55537109375, "grad_norm": 0.231795996427536, "learning_rate": 0.00024066742024931247, "loss": 1.7653, "step": 11374 }, { "epoch": 0.555419921875, "grad_norm": 0.23032747209072113, "learning_rate": 0.00024063280318180515, "loss": 1.7899, "step": 11375 }, { "epoch": 0.55546875, "grad_norm": 0.22781223058700562, "learning_rate": 0.00024059818694721437, "loss": 1.7778, "step": 11376 }, { "epoch": 0.555517578125, "grad_norm": 0.22710056602954865, "learning_rate": 0.00024056357154637908, "loss": 1.7912, "step": 11377 }, { "epoch": 0.55556640625, "grad_norm": 0.26543840765953064, "learning_rate": 0.00024052895698013844, "loss": 1.7662, "step": 11378 }, { "epoch": 0.555615234375, "grad_norm": 0.21871879696846008, "learning_rate": 0.00024049434324933106, "loss": 1.7765, "step": 11379 }, { "epoch": 0.5556640625, "grad_norm": 0.2586654722690582, "learning_rate": 0.00024045973035479617, "loss": 1.7674, "step": 11380 }, { "epoch": 0.555712890625, "grad_norm": 0.27682730555534363, "learning_rate": 0.00024042511829737233, "loss": 1.7816, "step": 11381 }, { "epoch": 0.55576171875, "grad_norm": 0.24049127101898193, "learning_rate": 0.00024039050707789863, "loss": 1.7598, "step": 11382 }, { "epoch": 0.555810546875, "grad_norm": 0.2828759253025055, "learning_rate": 0.00024035589669721365, "loss": 1.7855, "step": 11383 }, { "epoch": 0.555859375, "grad_norm": 0.37932702898979187, "learning_rate": 0.00024032128715615648, "loss": 1.7853, "step": 11384 }, { "epoch": 0.555908203125, "grad_norm": 0.2802739441394806, "learning_rate": 0.0002402866784555658, "loss": 1.8184, "step": 11385 }, { "epoch": 0.55595703125, "grad_norm": 0.35514113306999207, "learning_rate": 0.00024025207059628025, "loss": 1.7763, "step": 11386 }, { "epoch": 0.556005859375, "grad_norm": 0.2864339053630829, "learning_rate": 0.00024021746357913876, "loss": 1.7647, "step": 11387 }, { "epoch": 0.5560546875, "grad_norm": 0.26160019636154175, "learning_rate": 0.00024018285740497986, "loss": 1.768, "step": 11388 }, { "epoch": 0.556103515625, "grad_norm": 0.29468420147895813, "learning_rate": 0.00024014825207464252, "loss": 1.7972, "step": 11389 }, { "epoch": 0.55615234375, "grad_norm": 0.2496839016675949, "learning_rate": 0.0002401136475889652, "loss": 1.7727, "step": 11390 }, { "epoch": 0.556201171875, "grad_norm": 0.3003726899623871, "learning_rate": 0.00024007904394878672, "loss": 1.8049, "step": 11391 }, { "epoch": 0.55625, "grad_norm": 0.2085151970386505, "learning_rate": 0.0002400444411549456, "loss": 1.7571, "step": 11392 }, { "epoch": 0.556298828125, "grad_norm": 0.23815956711769104, "learning_rate": 0.00024000983920828062, "loss": 1.7832, "step": 11393 }, { "epoch": 0.55634765625, "grad_norm": 0.2510593831539154, "learning_rate": 0.00023997523810963028, "loss": 1.7796, "step": 11394 }, { "epoch": 0.556396484375, "grad_norm": 0.2321648746728897, "learning_rate": 0.0002399406378598331, "loss": 1.7771, "step": 11395 }, { "epoch": 0.5564453125, "grad_norm": 0.2567684054374695, "learning_rate": 0.00023990603845972785, "loss": 1.7711, "step": 11396 }, { "epoch": 0.556494140625, "grad_norm": 0.20723271369934082, "learning_rate": 0.00023987143991015286, "loss": 1.7711, "step": 11397 }, { "epoch": 0.55654296875, "grad_norm": 0.24411641061306, "learning_rate": 0.0002398368422119468, "loss": 1.7651, "step": 11398 }, { "epoch": 0.556591796875, "grad_norm": 0.2334018498659134, "learning_rate": 0.00023980224536594802, "loss": 1.7793, "step": 11399 }, { "epoch": 0.556640625, "grad_norm": 0.2512112557888031, "learning_rate": 0.00023976764937299523, "loss": 1.805, "step": 11400 }, { "epoch": 0.556689453125, "grad_norm": 0.2603048086166382, "learning_rate": 0.0002397330542339267, "loss": 1.7569, "step": 11401 }, { "epoch": 0.55673828125, "grad_norm": 0.2805606424808502, "learning_rate": 0.00023969845994958094, "loss": 1.7829, "step": 11402 }, { "epoch": 0.556787109375, "grad_norm": 0.31812191009521484, "learning_rate": 0.00023966386652079643, "loss": 1.7988, "step": 11403 }, { "epoch": 0.5568359375, "grad_norm": 0.23549239337444305, "learning_rate": 0.0002396292739484114, "loss": 1.7963, "step": 11404 }, { "epoch": 0.556884765625, "grad_norm": 0.2876114845275879, "learning_rate": 0.00023959468223326436, "loss": 1.8023, "step": 11405 }, { "epoch": 0.55693359375, "grad_norm": 0.2709876596927643, "learning_rate": 0.0002395600913761936, "loss": 1.7537, "step": 11406 }, { "epoch": 0.556982421875, "grad_norm": 0.24741271138191223, "learning_rate": 0.00023952550137803765, "loss": 1.7765, "step": 11407 }, { "epoch": 0.55703125, "grad_norm": 0.26025596261024475, "learning_rate": 0.00023949091223963447, "loss": 1.7815, "step": 11408 }, { "epoch": 0.557080078125, "grad_norm": 0.2730824947357178, "learning_rate": 0.0002394563239618227, "loss": 1.7934, "step": 11409 }, { "epoch": 0.55712890625, "grad_norm": 0.25934135913848877, "learning_rate": 0.00023942173654544047, "loss": 1.795, "step": 11410 }, { "epoch": 0.557177734375, "grad_norm": 0.27141591906547546, "learning_rate": 0.00023938714999132595, "loss": 1.8009, "step": 11411 }, { "epoch": 0.5572265625, "grad_norm": 0.3074842691421509, "learning_rate": 0.00023935256430031754, "loss": 1.7794, "step": 11412 }, { "epoch": 0.557275390625, "grad_norm": 0.2439720779657364, "learning_rate": 0.00023931797947325328, "loss": 1.8175, "step": 11413 }, { "epoch": 0.55732421875, "grad_norm": 0.27966076135635376, "learning_rate": 0.00023928339551097162, "loss": 1.82, "step": 11414 }, { "epoch": 0.557373046875, "grad_norm": 0.24894152581691742, "learning_rate": 0.0002392488124143104, "loss": 1.7716, "step": 11415 }, { "epoch": 0.557421875, "grad_norm": 0.24940982460975647, "learning_rate": 0.00023921423018410804, "loss": 1.7724, "step": 11416 }, { "epoch": 0.557470703125, "grad_norm": 0.2627817392349243, "learning_rate": 0.00023917964882120248, "loss": 1.7919, "step": 11417 }, { "epoch": 0.55751953125, "grad_norm": 0.2098558247089386, "learning_rate": 0.000239145068326432, "loss": 1.7876, "step": 11418 }, { "epoch": 0.557568359375, "grad_norm": 0.2651488184928894, "learning_rate": 0.00023911048870063457, "loss": 1.7628, "step": 11419 }, { "epoch": 0.5576171875, "grad_norm": 0.24062807857990265, "learning_rate": 0.00023907590994464822, "loss": 1.7942, "step": 11420 }, { "epoch": 0.557666015625, "grad_norm": 0.2603066563606262, "learning_rate": 0.00023904133205931112, "loss": 1.7751, "step": 11421 }, { "epoch": 0.55771484375, "grad_norm": 0.288474440574646, "learning_rate": 0.0002390067550454611, "loss": 1.7786, "step": 11422 }, { "epoch": 0.557763671875, "grad_norm": 0.32594963908195496, "learning_rate": 0.00023897217890393645, "loss": 1.7764, "step": 11423 }, { "epoch": 0.5578125, "grad_norm": 0.25531721115112305, "learning_rate": 0.00023893760363557487, "loss": 1.7855, "step": 11424 }, { "epoch": 0.557861328125, "grad_norm": 0.250696063041687, "learning_rate": 0.00023890302924121454, "loss": 1.7671, "step": 11425 }, { "epoch": 0.55791015625, "grad_norm": 0.24976986646652222, "learning_rate": 0.0002388684557216933, "loss": 1.7822, "step": 11426 }, { "epoch": 0.557958984375, "grad_norm": 0.22782965004444122, "learning_rate": 0.00023883388307784892, "loss": 1.7818, "step": 11427 }, { "epoch": 0.5580078125, "grad_norm": 0.2777788043022156, "learning_rate": 0.00023879931131051956, "loss": 1.7875, "step": 11428 }, { "epoch": 0.558056640625, "grad_norm": 0.22221751511096954, "learning_rate": 0.00023876474042054292, "loss": 1.7624, "step": 11429 }, { "epoch": 0.55810546875, "grad_norm": 0.23708608746528625, "learning_rate": 0.00023873017040875694, "loss": 1.7807, "step": 11430 }, { "epoch": 0.558154296875, "grad_norm": 0.2185986191034317, "learning_rate": 0.00023869560127599937, "loss": 1.7822, "step": 11431 }, { "epoch": 0.558203125, "grad_norm": 0.24335028231143951, "learning_rate": 0.00023866103302310816, "loss": 1.7604, "step": 11432 }, { "epoch": 0.558251953125, "grad_norm": 0.2523519992828369, "learning_rate": 0.0002386264656509209, "loss": 1.76, "step": 11433 }, { "epoch": 0.55830078125, "grad_norm": 0.20904693007469177, "learning_rate": 0.00023859189916027558, "loss": 1.781, "step": 11434 }, { "epoch": 0.558349609375, "grad_norm": 0.2579854726791382, "learning_rate": 0.00023855733355200982, "loss": 1.7686, "step": 11435 }, { "epoch": 0.5583984375, "grad_norm": 0.2250244915485382, "learning_rate": 0.0002385227688269613, "loss": 1.7582, "step": 11436 }, { "epoch": 0.558447265625, "grad_norm": 0.3107696771621704, "learning_rate": 0.00023848820498596779, "loss": 1.7933, "step": 11437 }, { "epoch": 0.55849609375, "grad_norm": 0.24100133776664734, "learning_rate": 0.00023845364202986692, "loss": 1.7757, "step": 11438 }, { "epoch": 0.558544921875, "grad_norm": 0.27082303166389465, "learning_rate": 0.0002384190799594965, "loss": 1.751, "step": 11439 }, { "epoch": 0.55859375, "grad_norm": 0.24229012429714203, "learning_rate": 0.00023838451877569394, "loss": 1.7565, "step": 11440 }, { "epoch": 0.558642578125, "grad_norm": 0.24530738592147827, "learning_rate": 0.00023834995847929713, "loss": 1.7813, "step": 11441 }, { "epoch": 0.55869140625, "grad_norm": 0.24978715181350708, "learning_rate": 0.0002383153990711434, "loss": 1.7784, "step": 11442 }, { "epoch": 0.558740234375, "grad_norm": 0.2136981189250946, "learning_rate": 0.00023828084055207055, "loss": 1.7824, "step": 11443 }, { "epoch": 0.5587890625, "grad_norm": 0.23968231678009033, "learning_rate": 0.000238246282922916, "loss": 1.8015, "step": 11444 }, { "epoch": 0.558837890625, "grad_norm": 0.2027081698179245, "learning_rate": 0.00023821172618451725, "loss": 1.7917, "step": 11445 }, { "epoch": 0.55888671875, "grad_norm": 0.2580619156360626, "learning_rate": 0.00023817717033771196, "loss": 1.78, "step": 11446 }, { "epoch": 0.558935546875, "grad_norm": 0.19506531953811646, "learning_rate": 0.0002381426153833374, "loss": 1.8005, "step": 11447 }, { "epoch": 0.558984375, "grad_norm": 0.24828781187534332, "learning_rate": 0.0002381080613222313, "loss": 1.788, "step": 11448 }, { "epoch": 0.559033203125, "grad_norm": 0.2220609188079834, "learning_rate": 0.0002380735081552309, "loss": 1.772, "step": 11449 }, { "epoch": 0.55908203125, "grad_norm": 0.2981542944908142, "learning_rate": 0.00023803895588317376, "loss": 1.7727, "step": 11450 }, { "epoch": 0.559130859375, "grad_norm": 0.30075201392173767, "learning_rate": 0.00023800440450689707, "loss": 1.7774, "step": 11451 }, { "epoch": 0.5591796875, "grad_norm": 0.27173107862472534, "learning_rate": 0.00023796985402723853, "loss": 1.7764, "step": 11452 }, { "epoch": 0.559228515625, "grad_norm": 0.230641707777977, "learning_rate": 0.0002379353044450353, "loss": 1.7597, "step": 11453 }, { "epoch": 0.55927734375, "grad_norm": 0.28655076026916504, "learning_rate": 0.00023790075576112464, "loss": 1.7677, "step": 11454 }, { "epoch": 0.559326171875, "grad_norm": 0.23589397966861725, "learning_rate": 0.00023786620797634412, "loss": 1.7742, "step": 11455 }, { "epoch": 0.559375, "grad_norm": 0.24762418866157532, "learning_rate": 0.0002378316610915307, "loss": 1.792, "step": 11456 }, { "epoch": 0.559423828125, "grad_norm": 0.3179270327091217, "learning_rate": 0.000237797115107522, "loss": 1.8172, "step": 11457 }, { "epoch": 0.55947265625, "grad_norm": 0.2595844864845276, "learning_rate": 0.00023776257002515496, "loss": 1.7713, "step": 11458 }, { "epoch": 0.559521484375, "grad_norm": 0.28613945841789246, "learning_rate": 0.0002377280258452671, "loss": 1.7935, "step": 11459 }, { "epoch": 0.5595703125, "grad_norm": 0.33336836099624634, "learning_rate": 0.00023769348256869543, "loss": 1.7896, "step": 11460 }, { "epoch": 0.559619140625, "grad_norm": 0.2674769461154938, "learning_rate": 0.0002376589401962771, "loss": 1.7787, "step": 11461 }, { "epoch": 0.55966796875, "grad_norm": 0.25230979919433594, "learning_rate": 0.00023762439872884944, "loss": 1.8055, "step": 11462 }, { "epoch": 0.559716796875, "grad_norm": 0.268746554851532, "learning_rate": 0.00023758985816724944, "loss": 1.796, "step": 11463 }, { "epoch": 0.559765625, "grad_norm": 0.25147783756256104, "learning_rate": 0.0002375553185123143, "loss": 1.7946, "step": 11464 }, { "epoch": 0.559814453125, "grad_norm": 0.21550236642360687, "learning_rate": 0.00023752077976488107, "loss": 1.7912, "step": 11465 }, { "epoch": 0.55986328125, "grad_norm": 0.2573370039463043, "learning_rate": 0.00023748624192578694, "loss": 1.7927, "step": 11466 }, { "epoch": 0.559912109375, "grad_norm": 0.20861563086509705, "learning_rate": 0.00023745170499586883, "loss": 1.8009, "step": 11467 }, { "epoch": 0.5599609375, "grad_norm": 0.2179557979106903, "learning_rate": 0.00023741716897596383, "loss": 1.7964, "step": 11468 }, { "epoch": 0.560009765625, "grad_norm": 0.2660892903804779, "learning_rate": 0.000237382633866909, "loss": 1.766, "step": 11469 }, { "epoch": 0.56005859375, "grad_norm": 0.2605856657028198, "learning_rate": 0.00023734809966954109, "loss": 1.7551, "step": 11470 }, { "epoch": 0.560107421875, "grad_norm": 0.22054943442344666, "learning_rate": 0.00023731356638469737, "loss": 1.7833, "step": 11471 }, { "epoch": 0.56015625, "grad_norm": 0.3160383105278015, "learning_rate": 0.00023727903401321454, "loss": 1.8006, "step": 11472 }, { "epoch": 0.560205078125, "grad_norm": 0.22509685158729553, "learning_rate": 0.0002372445025559297, "loss": 1.7809, "step": 11473 }, { "epoch": 0.56025390625, "grad_norm": 0.2812139689922333, "learning_rate": 0.0002372099720136796, "loss": 1.7907, "step": 11474 }, { "epoch": 0.560302734375, "grad_norm": 0.2878464460372925, "learning_rate": 0.00023717544238730133, "loss": 1.7642, "step": 11475 }, { "epoch": 0.5603515625, "grad_norm": 0.24504438042640686, "learning_rate": 0.00023714091367763146, "loss": 1.8011, "step": 11476 }, { "epoch": 0.560400390625, "grad_norm": 0.2519926428794861, "learning_rate": 0.00023710638588550704, "loss": 1.7748, "step": 11477 }, { "epoch": 0.56044921875, "grad_norm": 0.255876362323761, "learning_rate": 0.00023707185901176487, "loss": 1.7643, "step": 11478 }, { "epoch": 0.560498046875, "grad_norm": 0.26750558614730835, "learning_rate": 0.0002370373330572415, "loss": 1.7483, "step": 11479 }, { "epoch": 0.560546875, "grad_norm": 0.24882446229457855, "learning_rate": 0.000237002808022774, "loss": 1.7678, "step": 11480 }, { "epoch": 0.560595703125, "grad_norm": 0.28071320056915283, "learning_rate": 0.00023696828390919883, "loss": 1.8047, "step": 11481 }, { "epoch": 0.56064453125, "grad_norm": 0.30315297842025757, "learning_rate": 0.000236933760717353, "loss": 1.7728, "step": 11482 }, { "epoch": 0.560693359375, "grad_norm": 0.2785033583641052, "learning_rate": 0.000236899238448073, "loss": 1.744, "step": 11483 }, { "epoch": 0.5607421875, "grad_norm": 0.29237207770347595, "learning_rate": 0.0002368647171021956, "loss": 1.7503, "step": 11484 }, { "epoch": 0.560791015625, "grad_norm": 0.29642754793167114, "learning_rate": 0.0002368301966805574, "loss": 1.7941, "step": 11485 }, { "epoch": 0.56083984375, "grad_norm": 0.3183002471923828, "learning_rate": 0.00023679567718399504, "loss": 1.7761, "step": 11486 }, { "epoch": 0.560888671875, "grad_norm": 0.249136283993721, "learning_rate": 0.00023676115861334513, "loss": 1.7693, "step": 11487 }, { "epoch": 0.5609375, "grad_norm": 0.33921536803245544, "learning_rate": 0.0002367266409694442, "loss": 1.7736, "step": 11488 }, { "epoch": 0.560986328125, "grad_norm": 0.21478845179080963, "learning_rate": 0.00023669212425312896, "loss": 1.7938, "step": 11489 }, { "epoch": 0.56103515625, "grad_norm": 0.2963981032371521, "learning_rate": 0.00023665760846523583, "loss": 1.7715, "step": 11490 }, { "epoch": 0.561083984375, "grad_norm": 0.2266303300857544, "learning_rate": 0.00023662309360660138, "loss": 1.7764, "step": 11491 }, { "epoch": 0.5611328125, "grad_norm": 0.27129116654396057, "learning_rate": 0.00023658857967806207, "loss": 1.7709, "step": 11492 }, { "epoch": 0.561181640625, "grad_norm": 0.2801748514175415, "learning_rate": 0.00023655406668045444, "loss": 1.782, "step": 11493 }, { "epoch": 0.56123046875, "grad_norm": 0.2608756422996521, "learning_rate": 0.0002365195546146149, "loss": 1.7655, "step": 11494 }, { "epoch": 0.561279296875, "grad_norm": 0.248932883143425, "learning_rate": 0.00023648504348137978, "loss": 1.7904, "step": 11495 }, { "epoch": 0.561328125, "grad_norm": 0.24463492631912231, "learning_rate": 0.00023645053328158567, "loss": 1.7524, "step": 11496 }, { "epoch": 0.561376953125, "grad_norm": 0.26000669598579407, "learning_rate": 0.0002364160240160687, "loss": 1.7654, "step": 11497 }, { "epoch": 0.56142578125, "grad_norm": 0.23781515657901764, "learning_rate": 0.00023638151568566558, "loss": 1.8042, "step": 11498 }, { "epoch": 0.561474609375, "grad_norm": 0.2530454993247986, "learning_rate": 0.0002363470082912123, "loss": 1.77, "step": 11499 }, { "epoch": 0.5615234375, "grad_norm": 0.19862070679664612, "learning_rate": 0.00023631250183354542, "loss": 1.7862, "step": 11500 }, { "epoch": 0.561572265625, "grad_norm": 0.23758752644062042, "learning_rate": 0.00023627799631350106, "loss": 1.7845, "step": 11501 }, { "epoch": 0.56162109375, "grad_norm": 0.18720516562461853, "learning_rate": 0.00023624349173191568, "loss": 1.7866, "step": 11502 }, { "epoch": 0.561669921875, "grad_norm": 0.27758094668388367, "learning_rate": 0.00023620898808962538, "loss": 1.7994, "step": 11503 }, { "epoch": 0.56171875, "grad_norm": 0.23431412875652313, "learning_rate": 0.00023617448538746632, "loss": 1.7702, "step": 11504 }, { "epoch": 0.561767578125, "grad_norm": 0.2355472296476364, "learning_rate": 0.00023613998362627492, "loss": 1.7577, "step": 11505 }, { "epoch": 0.56181640625, "grad_norm": 0.24708791077136993, "learning_rate": 0.00023610548280688715, "loss": 1.7793, "step": 11506 }, { "epoch": 0.561865234375, "grad_norm": 0.22030188143253326, "learning_rate": 0.00023607098293013934, "loss": 1.7764, "step": 11507 }, { "epoch": 0.5619140625, "grad_norm": 0.2830919027328491, "learning_rate": 0.00023603648399686745, "loss": 1.7723, "step": 11508 }, { "epoch": 0.561962890625, "grad_norm": 0.23750828206539154, "learning_rate": 0.00023600198600790773, "loss": 1.7608, "step": 11509 }, { "epoch": 0.56201171875, "grad_norm": 0.25665023922920227, "learning_rate": 0.00023596748896409609, "loss": 1.7696, "step": 11510 }, { "epoch": 0.562060546875, "grad_norm": 0.20616425573825836, "learning_rate": 0.00023593299286626891, "loss": 1.7788, "step": 11511 }, { "epoch": 0.562109375, "grad_norm": 0.2834511399269104, "learning_rate": 0.00023589849771526195, "loss": 1.7689, "step": 11512 }, { "epoch": 0.562158203125, "grad_norm": 0.2232811003923416, "learning_rate": 0.00023586400351191124, "loss": 1.8001, "step": 11513 }, { "epoch": 0.56220703125, "grad_norm": 0.27597761154174805, "learning_rate": 0.00023582951025705295, "loss": 1.7577, "step": 11514 }, { "epoch": 0.562255859375, "grad_norm": 0.2414289265871048, "learning_rate": 0.00023579501795152285, "loss": 1.7911, "step": 11515 }, { "epoch": 0.5623046875, "grad_norm": 0.28992804884910583, "learning_rate": 0.00023576052659615704, "loss": 1.7663, "step": 11516 }, { "epoch": 0.562353515625, "grad_norm": 0.35868361592292786, "learning_rate": 0.00023572603619179138, "loss": 1.7706, "step": 11517 }, { "epoch": 0.56240234375, "grad_norm": 0.22416876256465912, "learning_rate": 0.0002356915467392618, "loss": 1.7967, "step": 11518 }, { "epoch": 0.562451171875, "grad_norm": 0.3262398838996887, "learning_rate": 0.00023565705823940425, "loss": 1.7929, "step": 11519 }, { "epoch": 0.5625, "grad_norm": 0.2447422742843628, "learning_rate": 0.00023562257069305437, "loss": 1.8, "step": 11520 }, { "epoch": 0.562548828125, "grad_norm": 0.28027018904685974, "learning_rate": 0.0002355880841010482, "loss": 1.7888, "step": 11521 }, { "epoch": 0.56259765625, "grad_norm": 0.26490864157676697, "learning_rate": 0.00023555359846422138, "loss": 1.7997, "step": 11522 }, { "epoch": 0.562646484375, "grad_norm": 0.26652461290359497, "learning_rate": 0.0002355191137834099, "loss": 1.777, "step": 11523 }, { "epoch": 0.5626953125, "grad_norm": 0.3456554710865021, "learning_rate": 0.0002354846300594493, "loss": 1.7749, "step": 11524 }, { "epoch": 0.562744140625, "grad_norm": 0.2596210241317749, "learning_rate": 0.00023545014729317554, "loss": 1.7693, "step": 11525 }, { "epoch": 0.56279296875, "grad_norm": 0.29736316204071045, "learning_rate": 0.00023541566548542414, "loss": 1.7897, "step": 11526 }, { "epoch": 0.562841796875, "grad_norm": 0.3096063435077667, "learning_rate": 0.000235381184637031, "loss": 1.7759, "step": 11527 }, { "epoch": 0.562890625, "grad_norm": 0.2957518696784973, "learning_rate": 0.00023534670474883169, "loss": 1.7866, "step": 11528 }, { "epoch": 0.562939453125, "grad_norm": 0.30383744835853577, "learning_rate": 0.00023531222582166172, "loss": 1.786, "step": 11529 }, { "epoch": 0.56298828125, "grad_norm": 0.29914602637290955, "learning_rate": 0.0002352777478563569, "loss": 1.7893, "step": 11530 }, { "epoch": 0.563037109375, "grad_norm": 0.27716264128685, "learning_rate": 0.0002352432708537528, "loss": 1.7916, "step": 11531 }, { "epoch": 0.5630859375, "grad_norm": 0.2619040608406067, "learning_rate": 0.00023520879481468492, "loss": 1.7665, "step": 11532 }, { "epoch": 0.563134765625, "grad_norm": 0.3032156825065613, "learning_rate": 0.00023517431973998888, "loss": 1.7725, "step": 11533 }, { "epoch": 0.56318359375, "grad_norm": 0.21869507431983948, "learning_rate": 0.0002351398456305003, "loss": 1.779, "step": 11534 }, { "epoch": 0.563232421875, "grad_norm": 0.2675493061542511, "learning_rate": 0.00023510537248705443, "loss": 1.7869, "step": 11535 }, { "epoch": 0.56328125, "grad_norm": 0.21456781029701233, "learning_rate": 0.000235070900310487, "loss": 1.7768, "step": 11536 }, { "epoch": 0.563330078125, "grad_norm": 0.25055062770843506, "learning_rate": 0.00023503642910163342, "loss": 1.7507, "step": 11537 }, { "epoch": 0.56337890625, "grad_norm": 0.22410744428634644, "learning_rate": 0.00023500195886132897, "loss": 1.791, "step": 11538 }, { "epoch": 0.563427734375, "grad_norm": 0.2577510178089142, "learning_rate": 0.00023496748959040925, "loss": 1.7512, "step": 11539 }, { "epoch": 0.5634765625, "grad_norm": 0.21672064065933228, "learning_rate": 0.00023493302128970955, "loss": 1.7798, "step": 11540 }, { "epoch": 0.563525390625, "grad_norm": 0.21925053000450134, "learning_rate": 0.00023489855396006533, "loss": 1.7898, "step": 11541 }, { "epoch": 0.56357421875, "grad_norm": 0.2646840214729309, "learning_rate": 0.00023486408760231176, "loss": 1.7582, "step": 11542 }, { "epoch": 0.563623046875, "grad_norm": 0.22368106245994568, "learning_rate": 0.00023482962221728438, "loss": 1.7992, "step": 11543 }, { "epoch": 0.563671875, "grad_norm": 0.21306385099887848, "learning_rate": 0.00023479515780581834, "loss": 1.8027, "step": 11544 }, { "epoch": 0.563720703125, "grad_norm": 0.21068991720676422, "learning_rate": 0.00023476069436874892, "loss": 1.7863, "step": 11545 }, { "epoch": 0.56376953125, "grad_norm": 0.2226533591747284, "learning_rate": 0.00023472623190691144, "loss": 1.7831, "step": 11546 }, { "epoch": 0.563818359375, "grad_norm": 0.24329273402690887, "learning_rate": 0.00023469177042114103, "loss": 1.7888, "step": 11547 }, { "epoch": 0.5638671875, "grad_norm": 0.19703401625156403, "learning_rate": 0.00023465730991227308, "loss": 1.745, "step": 11548 }, { "epoch": 0.563916015625, "grad_norm": 0.25891372561454773, "learning_rate": 0.00023462285038114251, "loss": 1.7919, "step": 11549 }, { "epoch": 0.56396484375, "grad_norm": 0.27951452136039734, "learning_rate": 0.00023458839182858464, "loss": 1.8145, "step": 11550 }, { "epoch": 0.564013671875, "grad_norm": 0.2276812493801117, "learning_rate": 0.00023455393425543452, "loss": 1.7836, "step": 11551 }, { "epoch": 0.5640625, "grad_norm": 0.24027208983898163, "learning_rate": 0.0002345194776625274, "loss": 1.8005, "step": 11552 }, { "epoch": 0.564111328125, "grad_norm": 0.2692461907863617, "learning_rate": 0.00023448502205069817, "loss": 1.7777, "step": 11553 }, { "epoch": 0.56416015625, "grad_norm": 0.23243017494678497, "learning_rate": 0.00023445056742078197, "loss": 1.765, "step": 11554 }, { "epoch": 0.564208984375, "grad_norm": 0.23657658696174622, "learning_rate": 0.0002344161137736139, "loss": 1.7726, "step": 11555 }, { "epoch": 0.5642578125, "grad_norm": 0.3069109618663788, "learning_rate": 0.00023438166111002884, "loss": 1.7946, "step": 11556 }, { "epoch": 0.564306640625, "grad_norm": 0.26322048902511597, "learning_rate": 0.00023434720943086201, "loss": 1.7685, "step": 11557 }, { "epoch": 0.56435546875, "grad_norm": 0.23727412521839142, "learning_rate": 0.00023431275873694808, "loss": 1.7895, "step": 11558 }, { "epoch": 0.564404296875, "grad_norm": 0.2736336588859558, "learning_rate": 0.00023427830902912218, "loss": 1.7695, "step": 11559 }, { "epoch": 0.564453125, "grad_norm": 0.21702586114406586, "learning_rate": 0.00023424386030821914, "loss": 1.7648, "step": 11560 }, { "epoch": 0.564501953125, "grad_norm": 0.25102531909942627, "learning_rate": 0.00023420941257507394, "loss": 1.7835, "step": 11561 }, { "epoch": 0.56455078125, "grad_norm": 0.221318781375885, "learning_rate": 0.00023417496583052138, "loss": 1.7545, "step": 11562 }, { "epoch": 0.564599609375, "grad_norm": 0.18887962400913239, "learning_rate": 0.00023414052007539633, "loss": 1.7907, "step": 11563 }, { "epoch": 0.5646484375, "grad_norm": 0.20616835355758667, "learning_rate": 0.00023410607531053356, "loss": 1.8021, "step": 11564 }, { "epoch": 0.564697265625, "grad_norm": 0.20124036073684692, "learning_rate": 0.00023407163153676792, "loss": 1.7873, "step": 11565 }, { "epoch": 0.56474609375, "grad_norm": 0.24915039539337158, "learning_rate": 0.0002340371887549342, "loss": 1.7978, "step": 11566 }, { "epoch": 0.564794921875, "grad_norm": 0.24413153529167175, "learning_rate": 0.0002340027469658671, "loss": 1.7835, "step": 11567 }, { "epoch": 0.56484375, "grad_norm": 0.20077793300151825, "learning_rate": 0.00023396830617040143, "loss": 1.7893, "step": 11568 }, { "epoch": 0.564892578125, "grad_norm": 0.23435747623443604, "learning_rate": 0.0002339338663693717, "loss": 1.7723, "step": 11569 }, { "epoch": 0.56494140625, "grad_norm": 0.20258158445358276, "learning_rate": 0.00023389942756361283, "loss": 1.7529, "step": 11570 }, { "epoch": 0.564990234375, "grad_norm": 0.21172888576984406, "learning_rate": 0.00023386498975395926, "loss": 1.7939, "step": 11571 }, { "epoch": 0.5650390625, "grad_norm": 0.22093786299228668, "learning_rate": 0.00023383055294124577, "loss": 1.7808, "step": 11572 }, { "epoch": 0.565087890625, "grad_norm": 0.3063838481903076, "learning_rate": 0.00023379611712630687, "loss": 1.778, "step": 11573 }, { "epoch": 0.56513671875, "grad_norm": 0.2972537577152252, "learning_rate": 0.00023376168230997718, "loss": 1.7866, "step": 11574 }, { "epoch": 0.565185546875, "grad_norm": 0.2537241280078888, "learning_rate": 0.00023372724849309124, "loss": 1.7907, "step": 11575 }, { "epoch": 0.565234375, "grad_norm": 0.2892944812774658, "learning_rate": 0.00023369281567648366, "loss": 1.7842, "step": 11576 }, { "epoch": 0.565283203125, "grad_norm": 0.2643462121486664, "learning_rate": 0.00023365838386098882, "loss": 1.7973, "step": 11577 }, { "epoch": 0.56533203125, "grad_norm": 0.2490638941526413, "learning_rate": 0.00023362395304744128, "loss": 1.7926, "step": 11578 }, { "epoch": 0.565380859375, "grad_norm": 0.23872168362140656, "learning_rate": 0.00023358952323667548, "loss": 1.7744, "step": 11579 }, { "epoch": 0.5654296875, "grad_norm": 0.2319694608449936, "learning_rate": 0.00023355509442952584, "loss": 1.7599, "step": 11580 }, { "epoch": 0.565478515625, "grad_norm": 0.3470459580421448, "learning_rate": 0.00023352066662682674, "loss": 1.7793, "step": 11581 }, { "epoch": 0.56552734375, "grad_norm": 0.27294737100601196, "learning_rate": 0.00023348623982941263, "loss": 1.7742, "step": 11582 }, { "epoch": 0.565576171875, "grad_norm": 0.25279203057289124, "learning_rate": 0.0002334518140381179, "loss": 1.7714, "step": 11583 }, { "epoch": 0.565625, "grad_norm": 0.34211239218711853, "learning_rate": 0.0002334173892537768, "loss": 1.8098, "step": 11584 }, { "epoch": 0.565673828125, "grad_norm": 0.26431697607040405, "learning_rate": 0.00023338296547722366, "loss": 1.7502, "step": 11585 }, { "epoch": 0.56572265625, "grad_norm": 0.30187559127807617, "learning_rate": 0.00023334854270929285, "loss": 1.7771, "step": 11586 }, { "epoch": 0.565771484375, "grad_norm": 0.30432748794555664, "learning_rate": 0.00023331412095081854, "loss": 1.7757, "step": 11587 }, { "epoch": 0.5658203125, "grad_norm": 0.2678958773612976, "learning_rate": 0.000233279700202635, "loss": 1.7547, "step": 11588 }, { "epoch": 0.565869140625, "grad_norm": 0.2576768398284912, "learning_rate": 0.00023324528046557642, "loss": 1.7606, "step": 11589 }, { "epoch": 0.56591796875, "grad_norm": 0.23558931052684784, "learning_rate": 0.00023321086174047707, "loss": 1.7678, "step": 11590 }, { "epoch": 0.565966796875, "grad_norm": 0.2714109718799591, "learning_rate": 0.00023317644402817095, "loss": 1.7873, "step": 11591 }, { "epoch": 0.566015625, "grad_norm": 0.24318049848079681, "learning_rate": 0.0002331420273294924, "loss": 1.7888, "step": 11592 }, { "epoch": 0.566064453125, "grad_norm": 0.28653383255004883, "learning_rate": 0.00023310761164527545, "loss": 1.7705, "step": 11593 }, { "epoch": 0.56611328125, "grad_norm": 0.27502182126045227, "learning_rate": 0.0002330731969763541, "loss": 1.7725, "step": 11594 }, { "epoch": 0.566162109375, "grad_norm": 0.25274136662483215, "learning_rate": 0.0002330387833235626, "loss": 1.7654, "step": 11595 }, { "epoch": 0.5662109375, "grad_norm": 0.26947054266929626, "learning_rate": 0.00023300437068773488, "loss": 1.76, "step": 11596 }, { "epoch": 0.566259765625, "grad_norm": 0.2510233521461487, "learning_rate": 0.00023296995906970497, "loss": 1.7849, "step": 11597 }, { "epoch": 0.56630859375, "grad_norm": 0.23049283027648926, "learning_rate": 0.00023293554847030686, "loss": 1.76, "step": 11598 }, { "epoch": 0.566357421875, "grad_norm": 0.22931233048439026, "learning_rate": 0.0002329011388903745, "loss": 1.762, "step": 11599 }, { "epoch": 0.56640625, "grad_norm": 0.2537226974964142, "learning_rate": 0.00023286673033074185, "loss": 1.8172, "step": 11600 }, { "epoch": 0.566455078125, "grad_norm": 0.23884528875350952, "learning_rate": 0.00023283232279224292, "loss": 1.7728, "step": 11601 }, { "epoch": 0.56650390625, "grad_norm": 0.20215986669063568, "learning_rate": 0.00023279791627571145, "loss": 1.7679, "step": 11602 }, { "epoch": 0.566552734375, "grad_norm": 0.2506553828716278, "learning_rate": 0.00023276351078198144, "loss": 1.7559, "step": 11603 }, { "epoch": 0.5666015625, "grad_norm": 0.2308042198419571, "learning_rate": 0.0002327291063118866, "loss": 1.7734, "step": 11604 }, { "epoch": 0.566650390625, "grad_norm": 0.27745285630226135, "learning_rate": 0.00023269470286626094, "loss": 1.7904, "step": 11605 }, { "epoch": 0.56669921875, "grad_norm": 0.26552313566207886, "learning_rate": 0.0002326603004459381, "loss": 1.7687, "step": 11606 }, { "epoch": 0.566748046875, "grad_norm": 0.22174806892871857, "learning_rate": 0.00023262589905175186, "loss": 1.7832, "step": 11607 }, { "epoch": 0.566796875, "grad_norm": 0.23194070160388947, "learning_rate": 0.000232591498684536, "loss": 1.8017, "step": 11608 }, { "epoch": 0.566845703125, "grad_norm": 0.23496174812316895, "learning_rate": 0.00023255709934512427, "loss": 1.7774, "step": 11609 }, { "epoch": 0.56689453125, "grad_norm": 0.22531373798847198, "learning_rate": 0.00023252270103435031, "loss": 1.7668, "step": 11610 }, { "epoch": 0.566943359375, "grad_norm": 0.26147282123565674, "learning_rate": 0.0002324883037530478, "loss": 1.7627, "step": 11611 }, { "epoch": 0.5669921875, "grad_norm": 0.2424400895833969, "learning_rate": 0.00023245390750205048, "loss": 1.7545, "step": 11612 }, { "epoch": 0.567041015625, "grad_norm": 0.2343677431344986, "learning_rate": 0.00023241951228219183, "loss": 1.769, "step": 11613 }, { "epoch": 0.56708984375, "grad_norm": 0.29198700189590454, "learning_rate": 0.00023238511809430557, "loss": 1.8016, "step": 11614 }, { "epoch": 0.567138671875, "grad_norm": 0.21984660625457764, "learning_rate": 0.0002323507249392251, "loss": 1.7751, "step": 11615 }, { "epoch": 0.5671875, "grad_norm": 0.22581522166728973, "learning_rate": 0.00023231633281778415, "loss": 1.7782, "step": 11616 }, { "epoch": 0.567236328125, "grad_norm": 0.2436484843492508, "learning_rate": 0.0002322819417308162, "loss": 1.7672, "step": 11617 }, { "epoch": 0.56728515625, "grad_norm": 0.24901917576789856, "learning_rate": 0.00023224755167915463, "loss": 1.7873, "step": 11618 }, { "epoch": 0.567333984375, "grad_norm": 0.21113376319408417, "learning_rate": 0.00023221316266363302, "loss": 1.7763, "step": 11619 }, { "epoch": 0.5673828125, "grad_norm": 0.2633455991744995, "learning_rate": 0.0002321787746850848, "loss": 1.7794, "step": 11620 }, { "epoch": 0.567431640625, "grad_norm": 0.24489763379096985, "learning_rate": 0.00023214438774434338, "loss": 1.8081, "step": 11621 }, { "epoch": 0.56748046875, "grad_norm": 0.25082358717918396, "learning_rate": 0.00023211000184224212, "loss": 1.7834, "step": 11622 }, { "epoch": 0.567529296875, "grad_norm": 0.24858002364635468, "learning_rate": 0.0002320756169796145, "loss": 1.7775, "step": 11623 }, { "epoch": 0.567578125, "grad_norm": 0.24387720227241516, "learning_rate": 0.00023204123315729365, "loss": 1.7774, "step": 11624 }, { "epoch": 0.567626953125, "grad_norm": 0.2541669011116028, "learning_rate": 0.00023200685037611313, "loss": 1.7892, "step": 11625 }, { "epoch": 0.56767578125, "grad_norm": 0.21331585943698883, "learning_rate": 0.00023197246863690613, "loss": 1.7889, "step": 11626 }, { "epoch": 0.567724609375, "grad_norm": 0.24451230466365814, "learning_rate": 0.00023193808794050593, "loss": 1.7543, "step": 11627 }, { "epoch": 0.5677734375, "grad_norm": 0.21818694472312927, "learning_rate": 0.00023190370828774572, "loss": 1.7546, "step": 11628 }, { "epoch": 0.567822265625, "grad_norm": 0.22079475224018097, "learning_rate": 0.00023186932967945878, "loss": 1.7678, "step": 11629 }, { "epoch": 0.56787109375, "grad_norm": 0.21519263088703156, "learning_rate": 0.0002318349521164783, "loss": 1.7841, "step": 11630 }, { "epoch": 0.567919921875, "grad_norm": 0.24451805651187897, "learning_rate": 0.00023180057559963734, "loss": 1.7866, "step": 11631 }, { "epoch": 0.56796875, "grad_norm": 0.2654584050178528, "learning_rate": 0.0002317662001297693, "loss": 1.7862, "step": 11632 }, { "epoch": 0.568017578125, "grad_norm": 0.26935362815856934, "learning_rate": 0.000231731825707707, "loss": 1.8136, "step": 11633 }, { "epoch": 0.56806640625, "grad_norm": 0.270404577255249, "learning_rate": 0.0002316974523342838, "loss": 1.7745, "step": 11634 }, { "epoch": 0.568115234375, "grad_norm": 0.3497770428657532, "learning_rate": 0.0002316630800103325, "loss": 1.7818, "step": 11635 }, { "epoch": 0.5681640625, "grad_norm": 0.25183817744255066, "learning_rate": 0.00023162870873668634, "loss": 1.7574, "step": 11636 }, { "epoch": 0.568212890625, "grad_norm": 0.30262646079063416, "learning_rate": 0.00023159433851417828, "loss": 1.7976, "step": 11637 }, { "epoch": 0.56826171875, "grad_norm": 0.2998437285423279, "learning_rate": 0.0002315599693436412, "loss": 1.7999, "step": 11638 }, { "epoch": 0.568310546875, "grad_norm": 0.22922055423259735, "learning_rate": 0.00023152560122590825, "loss": 1.7798, "step": 11639 }, { "epoch": 0.568359375, "grad_norm": 0.2772531807422638, "learning_rate": 0.00023149123416181224, "loss": 1.7625, "step": 11640 }, { "epoch": 0.568408203125, "grad_norm": 0.2592029869556427, "learning_rate": 0.0002314568681521862, "loss": 1.7709, "step": 11641 }, { "epoch": 0.56845703125, "grad_norm": 0.24925534427165985, "learning_rate": 0.00023142250319786283, "loss": 1.7902, "step": 11642 }, { "epoch": 0.568505859375, "grad_norm": 0.2914668917655945, "learning_rate": 0.00023138813929967522, "loss": 1.7726, "step": 11643 }, { "epoch": 0.5685546875, "grad_norm": 0.25110098719596863, "learning_rate": 0.00023135377645845597, "loss": 1.8188, "step": 11644 }, { "epoch": 0.568603515625, "grad_norm": 0.27651527523994446, "learning_rate": 0.00023131941467503813, "loss": 1.7737, "step": 11645 }, { "epoch": 0.56865234375, "grad_norm": 0.24869312345981598, "learning_rate": 0.00023128505395025433, "loss": 1.7987, "step": 11646 }, { "epoch": 0.568701171875, "grad_norm": 0.25686362385749817, "learning_rate": 0.00023125069428493733, "loss": 1.7542, "step": 11647 }, { "epoch": 0.56875, "grad_norm": 0.2512875199317932, "learning_rate": 0.00023121633567991997, "loss": 1.7637, "step": 11648 }, { "epoch": 0.568798828125, "grad_norm": 0.30302953720092773, "learning_rate": 0.00023118197813603482, "loss": 1.7755, "step": 11649 }, { "epoch": 0.56884765625, "grad_norm": 0.21911904215812683, "learning_rate": 0.00023114762165411462, "loss": 1.7881, "step": 11650 }, { "epoch": 0.568896484375, "grad_norm": 0.2697167992591858, "learning_rate": 0.00023111326623499208, "loss": 1.7421, "step": 11651 }, { "epoch": 0.5689453125, "grad_norm": 0.22308725118637085, "learning_rate": 0.00023107891187949982, "loss": 1.7638, "step": 11652 }, { "epoch": 0.568994140625, "grad_norm": 0.29721447825431824, "learning_rate": 0.00023104455858847034, "loss": 1.7723, "step": 11653 }, { "epoch": 0.56904296875, "grad_norm": 0.28001832962036133, "learning_rate": 0.00023101020636273633, "loss": 1.7893, "step": 11654 }, { "epoch": 0.569091796875, "grad_norm": 0.26224857568740845, "learning_rate": 0.00023097585520313042, "loss": 1.7928, "step": 11655 }, { "epoch": 0.569140625, "grad_norm": 0.31066784262657166, "learning_rate": 0.00023094150511048485, "loss": 1.7708, "step": 11656 }, { "epoch": 0.569189453125, "grad_norm": 0.23195940256118774, "learning_rate": 0.0002309071560856324, "loss": 1.783, "step": 11657 }, { "epoch": 0.56923828125, "grad_norm": 0.2562269866466522, "learning_rate": 0.00023087280812940538, "loss": 1.7937, "step": 11658 }, { "epoch": 0.569287109375, "grad_norm": 0.265860915184021, "learning_rate": 0.0002308384612426364, "loss": 1.7863, "step": 11659 }, { "epoch": 0.5693359375, "grad_norm": 0.22934556007385254, "learning_rate": 0.0002308041154261577, "loss": 1.7743, "step": 11660 }, { "epoch": 0.569384765625, "grad_norm": 0.27848365902900696, "learning_rate": 0.00023076977068080186, "loss": 1.7843, "step": 11661 }, { "epoch": 0.56943359375, "grad_norm": 0.2996591627597809, "learning_rate": 0.00023073542700740112, "loss": 1.7793, "step": 11662 }, { "epoch": 0.569482421875, "grad_norm": 0.19599217176437378, "learning_rate": 0.00023070108440678783, "loss": 1.7758, "step": 11663 }, { "epoch": 0.56953125, "grad_norm": 0.26748305559158325, "learning_rate": 0.00023066674287979444, "loss": 1.7672, "step": 11664 }, { "epoch": 0.569580078125, "grad_norm": 0.2589167058467865, "learning_rate": 0.00023063240242725302, "loss": 1.7653, "step": 11665 }, { "epoch": 0.56962890625, "grad_norm": 0.22060629725456238, "learning_rate": 0.0002305980630499961, "loss": 1.7648, "step": 11666 }, { "epoch": 0.569677734375, "grad_norm": 0.30907371640205383, "learning_rate": 0.00023056372474885566, "loss": 1.7732, "step": 11667 }, { "epoch": 0.5697265625, "grad_norm": 0.25637438893318176, "learning_rate": 0.00023052938752466417, "loss": 1.7721, "step": 11668 }, { "epoch": 0.569775390625, "grad_norm": 0.27062857151031494, "learning_rate": 0.00023049505137825362, "loss": 1.7799, "step": 11669 }, { "epoch": 0.56982421875, "grad_norm": 0.25206926465034485, "learning_rate": 0.00023046071631045633, "loss": 1.7685, "step": 11670 }, { "epoch": 0.569873046875, "grad_norm": 0.30670076608657837, "learning_rate": 0.0002304263823221044, "loss": 1.7796, "step": 11671 }, { "epoch": 0.569921875, "grad_norm": 0.28473755717277527, "learning_rate": 0.00023039204941402975, "loss": 1.793, "step": 11672 }, { "epoch": 0.569970703125, "grad_norm": 0.26695212721824646, "learning_rate": 0.00023035771758706476, "loss": 1.7925, "step": 11673 }, { "epoch": 0.57001953125, "grad_norm": 0.29306426644325256, "learning_rate": 0.00023032338684204123, "loss": 1.7752, "step": 11674 }, { "epoch": 0.570068359375, "grad_norm": 0.25202247500419617, "learning_rate": 0.00023028905717979142, "loss": 1.7809, "step": 11675 }, { "epoch": 0.5701171875, "grad_norm": 0.2484148144721985, "learning_rate": 0.0002302547286011471, "loss": 1.7977, "step": 11676 }, { "epoch": 0.570166015625, "grad_norm": 0.23735691606998444, "learning_rate": 0.00023022040110694052, "loss": 1.7981, "step": 11677 }, { "epoch": 0.57021484375, "grad_norm": 0.22625501453876495, "learning_rate": 0.0002301860746980034, "loss": 1.7859, "step": 11678 }, { "epoch": 0.570263671875, "grad_norm": 0.22788359224796295, "learning_rate": 0.0002301517493751678, "loss": 1.7804, "step": 11679 }, { "epoch": 0.5703125, "grad_norm": 0.20899763703346252, "learning_rate": 0.0002301174251392657, "loss": 1.7839, "step": 11680 }, { "epoch": 0.570361328125, "grad_norm": 0.22446481883525848, "learning_rate": 0.00023008310199112864, "loss": 1.7658, "step": 11681 }, { "epoch": 0.57041015625, "grad_norm": 0.2747457027435303, "learning_rate": 0.00023004877993158885, "loss": 1.7859, "step": 11682 }, { "epoch": 0.570458984375, "grad_norm": 0.20718739926815033, "learning_rate": 0.00023001445896147788, "loss": 1.7896, "step": 11683 }, { "epoch": 0.5705078125, "grad_norm": 0.2505806088447571, "learning_rate": 0.00022998013908162774, "loss": 1.7887, "step": 11684 }, { "epoch": 0.570556640625, "grad_norm": 0.20636390149593353, "learning_rate": 0.00022994582029287004, "loss": 1.7823, "step": 11685 }, { "epoch": 0.57060546875, "grad_norm": 0.26700133085250854, "learning_rate": 0.0002299115025960366, "loss": 1.7755, "step": 11686 }, { "epoch": 0.570654296875, "grad_norm": 0.2645685076713562, "learning_rate": 0.00022987718599195905, "loss": 1.7752, "step": 11687 }, { "epoch": 0.570703125, "grad_norm": 0.26111313700675964, "learning_rate": 0.00022984287048146928, "loss": 1.7794, "step": 11688 }, { "epoch": 0.570751953125, "grad_norm": 0.30513688921928406, "learning_rate": 0.0002298085560653988, "loss": 1.7952, "step": 11689 }, { "epoch": 0.57080078125, "grad_norm": 0.2343727946281433, "learning_rate": 0.0002297742427445792, "loss": 1.7153, "step": 11690 }, { "epoch": 0.570849609375, "grad_norm": 0.22379353642463684, "learning_rate": 0.0002297399305198422, "loss": 1.7569, "step": 11691 }, { "epoch": 0.5708984375, "grad_norm": 0.2502244710922241, "learning_rate": 0.00022970561939201928, "loss": 1.8001, "step": 11692 }, { "epoch": 0.570947265625, "grad_norm": 0.26966026425361633, "learning_rate": 0.00022967130936194213, "loss": 1.7833, "step": 11693 }, { "epoch": 0.57099609375, "grad_norm": 0.22841085493564606, "learning_rate": 0.00022963700043044218, "loss": 1.7454, "step": 11694 }, { "epoch": 0.571044921875, "grad_norm": 0.23631948232650757, "learning_rate": 0.000229602692598351, "loss": 1.7789, "step": 11695 }, { "epoch": 0.57109375, "grad_norm": 0.27809804677963257, "learning_rate": 0.00022956838586650008, "loss": 1.8042, "step": 11696 }, { "epoch": 0.571142578125, "grad_norm": 0.1953914612531662, "learning_rate": 0.00022953408023572075, "loss": 1.7687, "step": 11697 }, { "epoch": 0.57119140625, "grad_norm": 0.24802695214748383, "learning_rate": 0.00022949977570684455, "loss": 1.7848, "step": 11698 }, { "epoch": 0.571240234375, "grad_norm": 0.21392026543617249, "learning_rate": 0.00022946547228070276, "loss": 1.768, "step": 11699 }, { "epoch": 0.5712890625, "grad_norm": 0.2703556716442108, "learning_rate": 0.00022943116995812696, "loss": 1.7802, "step": 11700 }, { "epoch": 0.571337890625, "grad_norm": 0.21077603101730347, "learning_rate": 0.00022939686873994826, "loss": 1.7415, "step": 11701 }, { "epoch": 0.57138671875, "grad_norm": 0.2958014905452728, "learning_rate": 0.00022936256862699818, "loss": 1.7746, "step": 11702 }, { "epoch": 0.571435546875, "grad_norm": 0.24618004262447357, "learning_rate": 0.00022932826962010779, "loss": 1.7603, "step": 11703 }, { "epoch": 0.571484375, "grad_norm": 0.2844912111759186, "learning_rate": 0.00022929397172010858, "loss": 1.79, "step": 11704 }, { "epoch": 0.571533203125, "grad_norm": 0.22701424360275269, "learning_rate": 0.00022925967492783166, "loss": 1.7751, "step": 11705 }, { "epoch": 0.57158203125, "grad_norm": 0.2641882300376892, "learning_rate": 0.00022922537924410823, "loss": 1.7927, "step": 11706 }, { "epoch": 0.571630859375, "grad_norm": 0.2650392949581146, "learning_rate": 0.00022919108466976958, "loss": 1.7913, "step": 11707 }, { "epoch": 0.5716796875, "grad_norm": 0.24368059635162354, "learning_rate": 0.0002291567912056467, "loss": 1.7824, "step": 11708 }, { "epoch": 0.571728515625, "grad_norm": 0.2639058530330658, "learning_rate": 0.0002291224988525709, "loss": 1.7837, "step": 11709 }, { "epoch": 0.57177734375, "grad_norm": 0.2509463131427765, "learning_rate": 0.00022908820761137312, "loss": 1.758, "step": 11710 }, { "epoch": 0.571826171875, "grad_norm": 0.2573181688785553, "learning_rate": 0.00022905391748288462, "loss": 1.7867, "step": 11711 }, { "epoch": 0.571875, "grad_norm": 0.24470511078834534, "learning_rate": 0.00022901962846793623, "loss": 1.7792, "step": 11712 }, { "epoch": 0.571923828125, "grad_norm": 0.21076452732086182, "learning_rate": 0.00022898534056735914, "loss": 1.7767, "step": 11713 }, { "epoch": 0.57197265625, "grad_norm": 0.22885780036449432, "learning_rate": 0.0002289510537819844, "loss": 1.7549, "step": 11714 }, { "epoch": 0.572021484375, "grad_norm": 0.2132328450679779, "learning_rate": 0.00022891676811264268, "loss": 1.7916, "step": 11715 }, { "epoch": 0.5720703125, "grad_norm": 0.23799650371074677, "learning_rate": 0.0002288824835601652, "loss": 1.7888, "step": 11716 }, { "epoch": 0.572119140625, "grad_norm": 0.22000697255134583, "learning_rate": 0.0002288482001253827, "loss": 1.7878, "step": 11717 }, { "epoch": 0.57216796875, "grad_norm": 0.2816336750984192, "learning_rate": 0.00022881391780912625, "loss": 1.7612, "step": 11718 }, { "epoch": 0.572216796875, "grad_norm": 0.19397155940532684, "learning_rate": 0.0002287796366122265, "loss": 1.7507, "step": 11719 }, { "epoch": 0.572265625, "grad_norm": 0.28451839089393616, "learning_rate": 0.00022874535653551447, "loss": 1.7728, "step": 11720 }, { "epoch": 0.572314453125, "grad_norm": 0.2524626851081848, "learning_rate": 0.00022871107757982097, "loss": 1.7662, "step": 11721 }, { "epoch": 0.57236328125, "grad_norm": 0.24988135695457458, "learning_rate": 0.00022867679974597656, "loss": 1.797, "step": 11722 }, { "epoch": 0.572412109375, "grad_norm": 0.2583902180194855, "learning_rate": 0.0002286425230348122, "loss": 1.7609, "step": 11723 }, { "epoch": 0.5724609375, "grad_norm": 0.2231185883283615, "learning_rate": 0.0002286082474471584, "loss": 1.7785, "step": 11724 }, { "epoch": 0.572509765625, "grad_norm": 0.29431676864624023, "learning_rate": 0.00022857397298384614, "loss": 1.7672, "step": 11725 }, { "epoch": 0.57255859375, "grad_norm": 0.2541133165359497, "learning_rate": 0.0002285396996457058, "loss": 1.7817, "step": 11726 }, { "epoch": 0.572607421875, "grad_norm": 0.23571300506591797, "learning_rate": 0.00022850542743356833, "loss": 1.7875, "step": 11727 }, { "epoch": 0.57265625, "grad_norm": 0.28852829337120056, "learning_rate": 0.00022847115634826404, "loss": 1.7886, "step": 11728 }, { "epoch": 0.572705078125, "grad_norm": 0.30364930629730225, "learning_rate": 0.00022843688639062376, "loss": 1.7911, "step": 11729 }, { "epoch": 0.57275390625, "grad_norm": 0.21015891432762146, "learning_rate": 0.00022840261756147795, "loss": 1.7743, "step": 11730 }, { "epoch": 0.572802734375, "grad_norm": 0.28028759360313416, "learning_rate": 0.00022836834986165707, "loss": 1.8016, "step": 11731 }, { "epoch": 0.5728515625, "grad_norm": 0.34295031428337097, "learning_rate": 0.00022833408329199178, "loss": 1.7615, "step": 11732 }, { "epoch": 0.572900390625, "grad_norm": 0.23139747977256775, "learning_rate": 0.0002282998178533124, "loss": 1.7675, "step": 11733 }, { "epoch": 0.57294921875, "grad_norm": 0.3313319981098175, "learning_rate": 0.00022826555354644947, "loss": 1.7698, "step": 11734 }, { "epoch": 0.572998046875, "grad_norm": 0.29129061102867126, "learning_rate": 0.00022823129037223333, "loss": 1.76, "step": 11735 }, { "epoch": 0.573046875, "grad_norm": 0.23060578107833862, "learning_rate": 0.00022819702833149454, "loss": 1.7323, "step": 11736 }, { "epoch": 0.573095703125, "grad_norm": 0.24925215542316437, "learning_rate": 0.00022816276742506332, "loss": 1.7704, "step": 11737 }, { "epoch": 0.57314453125, "grad_norm": 0.24131529033184052, "learning_rate": 0.00022812850765377014, "loss": 1.7788, "step": 11738 }, { "epoch": 0.573193359375, "grad_norm": 0.2632664144039154, "learning_rate": 0.00022809424901844516, "loss": 1.7783, "step": 11739 }, { "epoch": 0.5732421875, "grad_norm": 0.22818993031978607, "learning_rate": 0.00022805999151991873, "loss": 1.7585, "step": 11740 }, { "epoch": 0.573291015625, "grad_norm": 0.24236468970775604, "learning_rate": 0.00022802573515902114, "loss": 1.7753, "step": 11741 }, { "epoch": 0.57333984375, "grad_norm": 0.20336417853832245, "learning_rate": 0.00022799147993658253, "loss": 1.7939, "step": 11742 }, { "epoch": 0.573388671875, "grad_norm": 0.25456708669662476, "learning_rate": 0.00022795722585343333, "loss": 1.7628, "step": 11743 }, { "epoch": 0.5734375, "grad_norm": 0.20726491510868073, "learning_rate": 0.00022792297291040337, "loss": 1.7657, "step": 11744 }, { "epoch": 0.573486328125, "grad_norm": 0.22179333865642548, "learning_rate": 0.00022788872110832316, "loss": 1.7313, "step": 11745 }, { "epoch": 0.57353515625, "grad_norm": 0.27377092838287354, "learning_rate": 0.0002278544704480225, "loss": 1.7536, "step": 11746 }, { "epoch": 0.573583984375, "grad_norm": 0.21353311836719513, "learning_rate": 0.00022782022093033167, "loss": 1.7618, "step": 11747 }, { "epoch": 0.5736328125, "grad_norm": 0.2228369414806366, "learning_rate": 0.00022778597255608074, "loss": 1.7958, "step": 11748 }, { "epoch": 0.573681640625, "grad_norm": 0.22440579533576965, "learning_rate": 0.0002277517253260996, "loss": 1.7573, "step": 11749 }, { "epoch": 0.57373046875, "grad_norm": 0.2230260670185089, "learning_rate": 0.00022771747924121843, "loss": 1.8011, "step": 11750 }, { "epoch": 0.573779296875, "grad_norm": 0.25904837250709534, "learning_rate": 0.0002276832343022671, "loss": 1.774, "step": 11751 }, { "epoch": 0.573828125, "grad_norm": 0.2289923131465912, "learning_rate": 0.00022764899051007565, "loss": 1.7801, "step": 11752 }, { "epoch": 0.573876953125, "grad_norm": 0.2644418776035309, "learning_rate": 0.00022761474786547387, "loss": 1.7651, "step": 11753 }, { "epoch": 0.57392578125, "grad_norm": 0.2580769658088684, "learning_rate": 0.00022758050636929183, "loss": 1.7738, "step": 11754 }, { "epoch": 0.573974609375, "grad_norm": 0.23147259652614594, "learning_rate": 0.00022754626602235933, "loss": 1.7959, "step": 11755 }, { "epoch": 0.5740234375, "grad_norm": 0.24562840163707733, "learning_rate": 0.00022751202682550614, "loss": 1.7577, "step": 11756 }, { "epoch": 0.574072265625, "grad_norm": 0.2422007918357849, "learning_rate": 0.00022747778877956216, "loss": 1.7706, "step": 11757 }, { "epoch": 0.57412109375, "grad_norm": 0.2505413293838501, "learning_rate": 0.0002274435518853571, "loss": 1.7564, "step": 11758 }, { "epoch": 0.574169921875, "grad_norm": 0.237707257270813, "learning_rate": 0.0002274093161437208, "loss": 1.7888, "step": 11759 }, { "epoch": 0.57421875, "grad_norm": 0.2778937816619873, "learning_rate": 0.00022737508155548292, "loss": 1.7795, "step": 11760 }, { "epoch": 0.574267578125, "grad_norm": 0.2425573468208313, "learning_rate": 0.00022734084812147325, "loss": 1.7686, "step": 11761 }, { "epoch": 0.57431640625, "grad_norm": 0.29782018065452576, "learning_rate": 0.00022730661584252138, "loss": 1.7648, "step": 11762 }, { "epoch": 0.574365234375, "grad_norm": 0.22515332698822021, "learning_rate": 0.00022727238471945704, "loss": 1.7869, "step": 11763 }, { "epoch": 0.5744140625, "grad_norm": 0.2883457541465759, "learning_rate": 0.00022723815475310976, "loss": 1.7621, "step": 11764 }, { "epoch": 0.574462890625, "grad_norm": 0.22796082496643066, "learning_rate": 0.00022720392594430917, "loss": 1.7985, "step": 11765 }, { "epoch": 0.57451171875, "grad_norm": 0.2539088726043701, "learning_rate": 0.0002271696982938849, "loss": 1.776, "step": 11766 }, { "epoch": 0.574560546875, "grad_norm": 0.24712218344211578, "learning_rate": 0.0002271354718026663, "loss": 1.7546, "step": 11767 }, { "epoch": 0.574609375, "grad_norm": 0.23824883997440338, "learning_rate": 0.00022710124647148312, "loss": 1.7562, "step": 11768 }, { "epoch": 0.574658203125, "grad_norm": 0.31164100766181946, "learning_rate": 0.00022706702230116456, "loss": 1.798, "step": 11769 }, { "epoch": 0.57470703125, "grad_norm": 0.1946907788515091, "learning_rate": 0.0002270327992925404, "loss": 1.7686, "step": 11770 }, { "epoch": 0.574755859375, "grad_norm": 0.3176317811012268, "learning_rate": 0.00022699857744643972, "loss": 1.798, "step": 11771 }, { "epoch": 0.5748046875, "grad_norm": 0.2644718885421753, "learning_rate": 0.0002269643567636922, "loss": 1.7617, "step": 11772 }, { "epoch": 0.574853515625, "grad_norm": 0.2910451889038086, "learning_rate": 0.00022693013724512707, "loss": 1.7624, "step": 11773 }, { "epoch": 0.57490234375, "grad_norm": 0.29583272337913513, "learning_rate": 0.00022689591889157363, "loss": 1.779, "step": 11774 }, { "epoch": 0.574951171875, "grad_norm": 0.2530452609062195, "learning_rate": 0.00022686170170386133, "loss": 1.7804, "step": 11775 }, { "epoch": 0.575, "grad_norm": 0.32369518280029297, "learning_rate": 0.00022682748568281924, "loss": 1.785, "step": 11776 }, { "epoch": 0.575048828125, "grad_norm": 0.2145405411720276, "learning_rate": 0.00022679327082927682, "loss": 1.7627, "step": 11777 }, { "epoch": 0.57509765625, "grad_norm": 0.3434008061885834, "learning_rate": 0.00022675905714406315, "loss": 1.7762, "step": 11778 }, { "epoch": 0.575146484375, "grad_norm": 0.20076477527618408, "learning_rate": 0.00022672484462800757, "loss": 1.7519, "step": 11779 }, { "epoch": 0.5751953125, "grad_norm": 0.28517863154411316, "learning_rate": 0.0002266906332819391, "loss": 1.803, "step": 11780 }, { "epoch": 0.575244140625, "grad_norm": 0.22618840634822845, "learning_rate": 0.00022665642310668694, "loss": 1.7849, "step": 11781 }, { "epoch": 0.57529296875, "grad_norm": 0.2967688739299774, "learning_rate": 0.00022662221410308032, "loss": 1.7561, "step": 11782 }, { "epoch": 0.575341796875, "grad_norm": 0.20670461654663086, "learning_rate": 0.00022658800627194803, "loss": 1.7955, "step": 11783 }, { "epoch": 0.575390625, "grad_norm": 0.2591811716556549, "learning_rate": 0.00022655379961411944, "loss": 1.765, "step": 11784 }, { "epoch": 0.575439453125, "grad_norm": 0.20934174954891205, "learning_rate": 0.00022651959413042333, "loss": 1.7571, "step": 11785 }, { "epoch": 0.57548828125, "grad_norm": 0.24813005328178406, "learning_rate": 0.00022648538982168888, "loss": 1.7873, "step": 11786 }, { "epoch": 0.575537109375, "grad_norm": 0.20338258147239685, "learning_rate": 0.00022645118668874494, "loss": 1.7807, "step": 11787 }, { "epoch": 0.5755859375, "grad_norm": 0.23713427782058716, "learning_rate": 0.0002264169847324205, "loss": 1.7485, "step": 11788 }, { "epoch": 0.575634765625, "grad_norm": 0.2181568592786789, "learning_rate": 0.00022638278395354457, "loss": 1.7772, "step": 11789 }, { "epoch": 0.57568359375, "grad_norm": 0.2829798758029938, "learning_rate": 0.0002263485843529458, "loss": 1.7508, "step": 11790 }, { "epoch": 0.575732421875, "grad_norm": 0.22246497869491577, "learning_rate": 0.00022631438593145325, "loss": 1.7733, "step": 11791 }, { "epoch": 0.57578125, "grad_norm": 0.23250128328800201, "learning_rate": 0.00022628018868989559, "loss": 1.7432, "step": 11792 }, { "epoch": 0.575830078125, "grad_norm": 0.219936802983284, "learning_rate": 0.00022624599262910177, "loss": 1.7919, "step": 11793 }, { "epoch": 0.57587890625, "grad_norm": 0.22181619703769684, "learning_rate": 0.0002262117977499004, "loss": 1.7767, "step": 11794 }, { "epoch": 0.575927734375, "grad_norm": 0.20162835717201233, "learning_rate": 0.00022617760405312044, "loss": 1.7983, "step": 11795 }, { "epoch": 0.5759765625, "grad_norm": 0.2054627388715744, "learning_rate": 0.00022614341153959035, "loss": 1.7719, "step": 11796 }, { "epoch": 0.576025390625, "grad_norm": 0.2058229297399521, "learning_rate": 0.00022610922021013902, "loss": 1.7702, "step": 11797 }, { "epoch": 0.57607421875, "grad_norm": 0.22379817068576813, "learning_rate": 0.00022607503006559504, "loss": 1.7794, "step": 11798 }, { "epoch": 0.576123046875, "grad_norm": 0.258120059967041, "learning_rate": 0.00022604084110678686, "loss": 1.7745, "step": 11799 }, { "epoch": 0.576171875, "grad_norm": 0.25690096616744995, "learning_rate": 0.0002260066533345434, "loss": 1.7719, "step": 11800 }, { "epoch": 0.576220703125, "grad_norm": 0.30038267374038696, "learning_rate": 0.00022597246674969292, "loss": 1.7747, "step": 11801 }, { "epoch": 0.57626953125, "grad_norm": 0.20197942852973938, "learning_rate": 0.0002259382813530642, "loss": 1.76, "step": 11802 }, { "epoch": 0.576318359375, "grad_norm": 0.26107752323150635, "learning_rate": 0.00022590409714548556, "loss": 1.7694, "step": 11803 }, { "epoch": 0.5763671875, "grad_norm": 0.25343772768974304, "learning_rate": 0.00022586991412778564, "loss": 1.7902, "step": 11804 }, { "epoch": 0.576416015625, "grad_norm": 0.2700420022010803, "learning_rate": 0.0002258357323007928, "loss": 1.7927, "step": 11805 }, { "epoch": 0.57646484375, "grad_norm": 0.24064700305461884, "learning_rate": 0.00022580155166533546, "loss": 1.7811, "step": 11806 }, { "epoch": 0.576513671875, "grad_norm": 0.21182113885879517, "learning_rate": 0.00022576737222224208, "loss": 1.7761, "step": 11807 }, { "epoch": 0.5765625, "grad_norm": 0.26369938254356384, "learning_rate": 0.00022573319397234094, "loss": 1.7623, "step": 11808 }, { "epoch": 0.576611328125, "grad_norm": 0.22676412761211395, "learning_rate": 0.0002256990169164605, "loss": 1.7369, "step": 11809 }, { "epoch": 0.57666015625, "grad_norm": 0.2554503083229065, "learning_rate": 0.0002256648410554289, "loss": 1.7839, "step": 11810 }, { "epoch": 0.576708984375, "grad_norm": 0.2104123830795288, "learning_rate": 0.00022563066639007457, "loss": 1.7906, "step": 11811 }, { "epoch": 0.5767578125, "grad_norm": 0.2764149010181427, "learning_rate": 0.00022559649292122565, "loss": 1.7574, "step": 11812 }, { "epoch": 0.576806640625, "grad_norm": 0.23669929802417755, "learning_rate": 0.00022556232064971048, "loss": 1.8301, "step": 11813 }, { "epoch": 0.57685546875, "grad_norm": 0.25635769963264465, "learning_rate": 0.00022552814957635715, "loss": 1.7997, "step": 11814 }, { "epoch": 0.576904296875, "grad_norm": 0.2651548385620117, "learning_rate": 0.00022549397970199388, "loss": 1.7759, "step": 11815 }, { "epoch": 0.576953125, "grad_norm": 0.2254633903503418, "learning_rate": 0.00022545981102744877, "loss": 1.7905, "step": 11816 }, { "epoch": 0.577001953125, "grad_norm": 0.24549363553524017, "learning_rate": 0.00022542564355354993, "loss": 1.777, "step": 11817 }, { "epoch": 0.57705078125, "grad_norm": 0.21916094422340393, "learning_rate": 0.00022539147728112553, "loss": 1.7829, "step": 11818 }, { "epoch": 0.577099609375, "grad_norm": 0.23963910341262817, "learning_rate": 0.00022535731221100342, "loss": 1.7781, "step": 11819 }, { "epoch": 0.5771484375, "grad_norm": 0.23883910477161407, "learning_rate": 0.0002253231483440118, "loss": 1.7756, "step": 11820 }, { "epoch": 0.577197265625, "grad_norm": 0.2661629617214203, "learning_rate": 0.0002252889856809785, "loss": 1.7716, "step": 11821 }, { "epoch": 0.57724609375, "grad_norm": 0.23210729658603668, "learning_rate": 0.0002252548242227317, "loss": 1.7766, "step": 11822 }, { "epoch": 0.577294921875, "grad_norm": 0.2380048930644989, "learning_rate": 0.00022522066397009916, "loss": 1.7615, "step": 11823 }, { "epoch": 0.57734375, "grad_norm": 0.19983148574829102, "learning_rate": 0.00022518650492390875, "loss": 1.7746, "step": 11824 }, { "epoch": 0.577392578125, "grad_norm": 0.2636387050151825, "learning_rate": 0.00022515234708498844, "loss": 1.7935, "step": 11825 }, { "epoch": 0.57744140625, "grad_norm": 0.24581432342529297, "learning_rate": 0.0002251181904541661, "loss": 1.7732, "step": 11826 }, { "epoch": 0.577490234375, "grad_norm": 0.22077861428260803, "learning_rate": 0.00022508403503226947, "loss": 1.796, "step": 11827 }, { "epoch": 0.5775390625, "grad_norm": 0.21756352484226227, "learning_rate": 0.0002250498808201263, "loss": 1.7974, "step": 11828 }, { "epoch": 0.577587890625, "grad_norm": 0.25665199756622314, "learning_rate": 0.0002250157278185645, "loss": 1.7608, "step": 11829 }, { "epoch": 0.57763671875, "grad_norm": 0.23321761190891266, "learning_rate": 0.0002249815760284115, "loss": 1.757, "step": 11830 }, { "epoch": 0.577685546875, "grad_norm": 0.2339830994606018, "learning_rate": 0.0002249474254504954, "loss": 1.785, "step": 11831 }, { "epoch": 0.577734375, "grad_norm": 0.2471202313899994, "learning_rate": 0.00022491327608564356, "loss": 1.7683, "step": 11832 }, { "epoch": 0.577783203125, "grad_norm": 0.24319210648536682, "learning_rate": 0.00022487912793468375, "loss": 1.7555, "step": 11833 }, { "epoch": 0.57783203125, "grad_norm": 0.24765945971012115, "learning_rate": 0.00022484498099844348, "loss": 1.7641, "step": 11834 }, { "epoch": 0.577880859375, "grad_norm": 0.24691274762153625, "learning_rate": 0.0002248108352777505, "loss": 1.7954, "step": 11835 }, { "epoch": 0.5779296875, "grad_norm": 0.21293187141418457, "learning_rate": 0.0002247766907734321, "loss": 1.757, "step": 11836 }, { "epoch": 0.577978515625, "grad_norm": 0.2585211992263794, "learning_rate": 0.00022474254748631602, "loss": 1.7671, "step": 11837 }, { "epoch": 0.57802734375, "grad_norm": 0.25083455443382263, "learning_rate": 0.00022470840541722977, "loss": 1.793, "step": 11838 }, { "epoch": 0.578076171875, "grad_norm": 0.22180454432964325, "learning_rate": 0.00022467426456700064, "loss": 1.7618, "step": 11839 }, { "epoch": 0.578125, "grad_norm": 0.23353353142738342, "learning_rate": 0.00022464012493645608, "loss": 1.7808, "step": 11840 }, { "epoch": 0.578173828125, "grad_norm": 0.22427517175674438, "learning_rate": 0.0002246059865264236, "loss": 1.7647, "step": 11841 }, { "epoch": 0.57822265625, "grad_norm": 0.21777218580245972, "learning_rate": 0.0002245718493377305, "loss": 1.7262, "step": 11842 }, { "epoch": 0.578271484375, "grad_norm": 0.22000263631343842, "learning_rate": 0.0002245377133712042, "loss": 1.793, "step": 11843 }, { "epoch": 0.5783203125, "grad_norm": 0.194330096244812, "learning_rate": 0.0002245035786276719, "loss": 1.7759, "step": 11844 }, { "epoch": 0.578369140625, "grad_norm": 0.23091070353984833, "learning_rate": 0.00022446944510796097, "loss": 1.7535, "step": 11845 }, { "epoch": 0.57841796875, "grad_norm": 0.22518271207809448, "learning_rate": 0.00022443531281289865, "loss": 1.7509, "step": 11846 }, { "epoch": 0.578466796875, "grad_norm": 0.26945266127586365, "learning_rate": 0.00022440118174331215, "loss": 1.7785, "step": 11847 }, { "epoch": 0.578515625, "grad_norm": 0.2752166986465454, "learning_rate": 0.0002243670519000286, "loss": 1.7921, "step": 11848 }, { "epoch": 0.578564453125, "grad_norm": 0.22716271877288818, "learning_rate": 0.0002243329232838753, "loss": 1.7884, "step": 11849 }, { "epoch": 0.57861328125, "grad_norm": 0.2635689973831177, "learning_rate": 0.00022429879589567925, "loss": 1.7821, "step": 11850 }, { "epoch": 0.578662109375, "grad_norm": 0.24782083928585052, "learning_rate": 0.00022426466973626764, "loss": 1.7758, "step": 11851 }, { "epoch": 0.5787109375, "grad_norm": 0.2384311705827713, "learning_rate": 0.00022423054480646748, "loss": 1.7564, "step": 11852 }, { "epoch": 0.578759765625, "grad_norm": 0.2638762295246124, "learning_rate": 0.0002241964211071059, "loss": 1.7904, "step": 11853 }, { "epoch": 0.57880859375, "grad_norm": 0.2157703936100006, "learning_rate": 0.0002241622986390099, "loss": 1.7879, "step": 11854 }, { "epoch": 0.578857421875, "grad_norm": 0.3086513876914978, "learning_rate": 0.00022412817740300633, "loss": 1.751, "step": 11855 }, { "epoch": 0.57890625, "grad_norm": 0.2829948961734772, "learning_rate": 0.0002240940573999224, "loss": 1.7861, "step": 11856 }, { "epoch": 0.578955078125, "grad_norm": 0.26960495114326477, "learning_rate": 0.00022405993863058483, "loss": 1.7859, "step": 11857 }, { "epoch": 0.57900390625, "grad_norm": 0.24254591763019562, "learning_rate": 0.00022402582109582047, "loss": 1.784, "step": 11858 }, { "epoch": 0.579052734375, "grad_norm": 0.3261070251464844, "learning_rate": 0.00022399170479645643, "loss": 1.7518, "step": 11859 }, { "epoch": 0.5791015625, "grad_norm": 0.23927223682403564, "learning_rate": 0.0002239575897333193, "loss": 1.7874, "step": 11860 }, { "epoch": 0.579150390625, "grad_norm": 0.2595193088054657, "learning_rate": 0.000223923475907236, "loss": 1.7668, "step": 11861 }, { "epoch": 0.57919921875, "grad_norm": 0.24525606632232666, "learning_rate": 0.00022388936331903332, "loss": 1.7574, "step": 11862 }, { "epoch": 0.579248046875, "grad_norm": 0.2887837886810303, "learning_rate": 0.00022385525196953805, "loss": 1.7664, "step": 11863 }, { "epoch": 0.579296875, "grad_norm": 0.3138633668422699, "learning_rate": 0.00022382114185957674, "loss": 1.7644, "step": 11864 }, { "epoch": 0.579345703125, "grad_norm": 0.2080194056034088, "learning_rate": 0.0002237870329899762, "loss": 1.7892, "step": 11865 }, { "epoch": 0.57939453125, "grad_norm": 0.29572662711143494, "learning_rate": 0.00022375292536156311, "loss": 1.7864, "step": 11866 }, { "epoch": 0.579443359375, "grad_norm": 0.22753995656967163, "learning_rate": 0.000223718818975164, "loss": 1.7367, "step": 11867 }, { "epoch": 0.5794921875, "grad_norm": 0.28071704506874084, "learning_rate": 0.0002236847138316055, "loss": 1.7865, "step": 11868 }, { "epoch": 0.579541015625, "grad_norm": 0.31086015701293945, "learning_rate": 0.00022365060993171415, "loss": 1.7837, "step": 11869 }, { "epoch": 0.57958984375, "grad_norm": 0.27173036336898804, "learning_rate": 0.0002236165072763166, "loss": 1.7974, "step": 11870 }, { "epoch": 0.579638671875, "grad_norm": 0.24943380057811737, "learning_rate": 0.00022358240586623925, "loss": 1.7992, "step": 11871 }, { "epoch": 0.5796875, "grad_norm": 0.2480631172657013, "learning_rate": 0.0002235483057023086, "loss": 1.7804, "step": 11872 }, { "epoch": 0.579736328125, "grad_norm": 0.2859450876712799, "learning_rate": 0.00022351420678535112, "loss": 1.7864, "step": 11873 }, { "epoch": 0.57978515625, "grad_norm": 0.2517814040184021, "learning_rate": 0.00022348010911619315, "loss": 1.788, "step": 11874 }, { "epoch": 0.579833984375, "grad_norm": 0.2871347665786743, "learning_rate": 0.00022344601269566117, "loss": 1.772, "step": 11875 }, { "epoch": 0.5798828125, "grad_norm": 0.2508523464202881, "learning_rate": 0.00022341191752458145, "loss": 1.7948, "step": 11876 }, { "epoch": 0.579931640625, "grad_norm": 0.26440614461898804, "learning_rate": 0.00022337782360378044, "loss": 1.7884, "step": 11877 }, { "epoch": 0.57998046875, "grad_norm": 0.2679636776447296, "learning_rate": 0.0002233437309340843, "loss": 1.763, "step": 11878 }, { "epoch": 0.580029296875, "grad_norm": 0.2743584215641022, "learning_rate": 0.00022330963951631928, "loss": 1.7588, "step": 11879 }, { "epoch": 0.580078125, "grad_norm": 0.24371720850467682, "learning_rate": 0.00022327554935131172, "loss": 1.7311, "step": 11880 }, { "epoch": 0.580126953125, "grad_norm": 0.23317857086658478, "learning_rate": 0.0002232414604398878, "loss": 1.7843, "step": 11881 }, { "epoch": 0.58017578125, "grad_norm": 0.2277122586965561, "learning_rate": 0.00022320737278287367, "loss": 1.774, "step": 11882 }, { "epoch": 0.580224609375, "grad_norm": 0.27085360884666443, "learning_rate": 0.00022317328638109542, "loss": 1.7737, "step": 11883 }, { "epoch": 0.5802734375, "grad_norm": 0.23900362849235535, "learning_rate": 0.00022313920123537924, "loss": 1.7935, "step": 11884 }, { "epoch": 0.580322265625, "grad_norm": 0.21931342780590057, "learning_rate": 0.00022310511734655115, "loss": 1.7786, "step": 11885 }, { "epoch": 0.58037109375, "grad_norm": 0.2469407618045807, "learning_rate": 0.00022307103471543726, "loss": 1.7859, "step": 11886 }, { "epoch": 0.580419921875, "grad_norm": 0.2220131754875183, "learning_rate": 0.0002230369533428636, "loss": 1.782, "step": 11887 }, { "epoch": 0.58046875, "grad_norm": 0.25772926211357117, "learning_rate": 0.00022300287322965612, "loss": 1.8061, "step": 11888 }, { "epoch": 0.580517578125, "grad_norm": 0.2148260623216629, "learning_rate": 0.00022296879437664076, "loss": 1.7601, "step": 11889 }, { "epoch": 0.58056640625, "grad_norm": 0.21950951218605042, "learning_rate": 0.00022293471678464338, "loss": 1.764, "step": 11890 }, { "epoch": 0.580615234375, "grad_norm": 0.23019395768642426, "learning_rate": 0.00022290064045449017, "loss": 1.7885, "step": 11891 }, { "epoch": 0.5806640625, "grad_norm": 0.22849814593791962, "learning_rate": 0.00022286656538700656, "loss": 1.7625, "step": 11892 }, { "epoch": 0.580712890625, "grad_norm": 0.2238187938928604, "learning_rate": 0.00022283249158301882, "loss": 1.7623, "step": 11893 }, { "epoch": 0.58076171875, "grad_norm": 0.2531300485134125, "learning_rate": 0.00022279841904335242, "loss": 1.7813, "step": 11894 }, { "epoch": 0.580810546875, "grad_norm": 0.2693864107131958, "learning_rate": 0.00022276434776883337, "loss": 1.7566, "step": 11895 }, { "epoch": 0.580859375, "grad_norm": 0.2510056793689728, "learning_rate": 0.00022273027776028727, "loss": 1.7878, "step": 11896 }, { "epoch": 0.580908203125, "grad_norm": 0.24157489836215973, "learning_rate": 0.00022269620901853987, "loss": 1.7755, "step": 11897 }, { "epoch": 0.58095703125, "grad_norm": 0.2562022805213928, "learning_rate": 0.00022266214154441694, "loss": 1.7477, "step": 11898 }, { "epoch": 0.581005859375, "grad_norm": 0.25165870785713196, "learning_rate": 0.0002226280753387439, "loss": 1.7871, "step": 11899 }, { "epoch": 0.5810546875, "grad_norm": 0.30669569969177246, "learning_rate": 0.0002225940104023467, "loss": 1.7562, "step": 11900 }, { "epoch": 0.581103515625, "grad_norm": 0.31665563583374023, "learning_rate": 0.00022255994673605063, "loss": 1.794, "step": 11901 }, { "epoch": 0.58115234375, "grad_norm": 0.25985246896743774, "learning_rate": 0.0002225258843406815, "loss": 1.7532, "step": 11902 }, { "epoch": 0.581201171875, "grad_norm": 0.2293296903371811, "learning_rate": 0.00022249182321706457, "loss": 1.7811, "step": 11903 }, { "epoch": 0.58125, "grad_norm": 0.2769845724105835, "learning_rate": 0.00022245776336602563, "loss": 1.7894, "step": 11904 }, { "epoch": 0.581298828125, "grad_norm": 0.25168728828430176, "learning_rate": 0.00022242370478838992, "loss": 1.778, "step": 11905 }, { "epoch": 0.58134765625, "grad_norm": 0.23458845913410187, "learning_rate": 0.00022238964748498303, "loss": 1.7682, "step": 11906 }, { "epoch": 0.581396484375, "grad_norm": 0.2597223222255707, "learning_rate": 0.00022235559145663032, "loss": 1.7738, "step": 11907 }, { "epoch": 0.5814453125, "grad_norm": 0.2584303915500641, "learning_rate": 0.00022232153670415704, "loss": 1.7932, "step": 11908 }, { "epoch": 0.581494140625, "grad_norm": 0.23248538374900818, "learning_rate": 0.00022228748322838872, "loss": 1.7937, "step": 11909 }, { "epoch": 0.58154296875, "grad_norm": 0.27434635162353516, "learning_rate": 0.0002222534310301505, "loss": 1.758, "step": 11910 }, { "epoch": 0.581591796875, "grad_norm": 0.26151522994041443, "learning_rate": 0.00022221938011026787, "loss": 1.7678, "step": 11911 }, { "epoch": 0.581640625, "grad_norm": 0.2603873610496521, "learning_rate": 0.00022218533046956584, "loss": 1.7566, "step": 11912 }, { "epoch": 0.581689453125, "grad_norm": 0.3301132917404175, "learning_rate": 0.00022215128210886992, "loss": 1.7475, "step": 11913 }, { "epoch": 0.58173828125, "grad_norm": 0.2506447434425354, "learning_rate": 0.000222117235029005, "loss": 1.7622, "step": 11914 }, { "epoch": 0.581787109375, "grad_norm": 0.2818654775619507, "learning_rate": 0.00022208318923079646, "loss": 1.7411, "step": 11915 }, { "epoch": 0.5818359375, "grad_norm": 0.2547188997268677, "learning_rate": 0.00022204914471506932, "loss": 1.7388, "step": 11916 }, { "epoch": 0.581884765625, "grad_norm": 0.2777438461780548, "learning_rate": 0.00022201510148264864, "loss": 1.7864, "step": 11917 }, { "epoch": 0.58193359375, "grad_norm": 0.23200830817222595, "learning_rate": 0.00022198105953435965, "loss": 1.784, "step": 11918 }, { "epoch": 0.581982421875, "grad_norm": 0.29750537872314453, "learning_rate": 0.00022194701887102714, "loss": 1.7919, "step": 11919 }, { "epoch": 0.58203125, "grad_norm": 0.2628246545791626, "learning_rate": 0.0002219129794934764, "loss": 1.7958, "step": 11920 }, { "epoch": 0.582080078125, "grad_norm": 0.26481252908706665, "learning_rate": 0.0002218789414025321, "loss": 1.7765, "step": 11921 }, { "epoch": 0.58212890625, "grad_norm": 0.2644799053668976, "learning_rate": 0.00022184490459901946, "loss": 1.7701, "step": 11922 }, { "epoch": 0.582177734375, "grad_norm": 0.22559460997581482, "learning_rate": 0.0002218108690837633, "loss": 1.7833, "step": 11923 }, { "epoch": 0.5822265625, "grad_norm": 0.25416299700737, "learning_rate": 0.00022177683485758825, "loss": 1.8008, "step": 11924 }, { "epoch": 0.582275390625, "grad_norm": 0.2247617095708847, "learning_rate": 0.00022174280192131958, "loss": 1.7676, "step": 11925 }, { "epoch": 0.58232421875, "grad_norm": 0.2595248222351074, "learning_rate": 0.00022170877027578173, "loss": 1.7741, "step": 11926 }, { "epoch": 0.582373046875, "grad_norm": 0.25879958271980286, "learning_rate": 0.00022167473992179977, "loss": 1.8063, "step": 11927 }, { "epoch": 0.582421875, "grad_norm": 0.27176955342292786, "learning_rate": 0.0002216407108601982, "loss": 1.7669, "step": 11928 }, { "epoch": 0.582470703125, "grad_norm": 0.3166109025478363, "learning_rate": 0.000221606683091802, "loss": 1.7796, "step": 11929 }, { "epoch": 0.58251953125, "grad_norm": 0.2313435673713684, "learning_rate": 0.0002215726566174356, "loss": 1.7715, "step": 11930 }, { "epoch": 0.582568359375, "grad_norm": 0.2637203633785248, "learning_rate": 0.00022153863143792397, "loss": 1.7843, "step": 11931 }, { "epoch": 0.5826171875, "grad_norm": 0.23077785968780518, "learning_rate": 0.00022150460755409145, "loss": 1.7568, "step": 11932 }, { "epoch": 0.582666015625, "grad_norm": 0.24851928651332855, "learning_rate": 0.0002214705849667627, "loss": 1.7333, "step": 11933 }, { "epoch": 0.58271484375, "grad_norm": 0.216430202126503, "learning_rate": 0.0002214365636767624, "loss": 1.7978, "step": 11934 }, { "epoch": 0.582763671875, "grad_norm": 0.24010221660137177, "learning_rate": 0.00022140254368491492, "loss": 1.7931, "step": 11935 }, { "epoch": 0.5828125, "grad_norm": 0.24776026606559753, "learning_rate": 0.00022136852499204495, "loss": 1.774, "step": 11936 }, { "epoch": 0.582861328125, "grad_norm": 0.23705598711967468, "learning_rate": 0.00022133450759897683, "loss": 1.8054, "step": 11937 }, { "epoch": 0.58291015625, "grad_norm": 0.26197919249534607, "learning_rate": 0.00022130049150653508, "loss": 1.7663, "step": 11938 }, { "epoch": 0.582958984375, "grad_norm": 0.2755076289176941, "learning_rate": 0.00022126647671554396, "loss": 1.7814, "step": 11939 }, { "epoch": 0.5830078125, "grad_norm": 0.25826266407966614, "learning_rate": 0.00022123246322682804, "loss": 1.7827, "step": 11940 }, { "epoch": 0.583056640625, "grad_norm": 0.2771281599998474, "learning_rate": 0.00022119845104121155, "loss": 1.7839, "step": 11941 }, { "epoch": 0.58310546875, "grad_norm": 0.22744295001029968, "learning_rate": 0.00022116444015951876, "loss": 1.7497, "step": 11942 }, { "epoch": 0.583154296875, "grad_norm": 0.2863579988479614, "learning_rate": 0.0002211304305825741, "loss": 1.7936, "step": 11943 }, { "epoch": 0.583203125, "grad_norm": 0.24739480018615723, "learning_rate": 0.00022109642231120163, "loss": 1.7907, "step": 11944 }, { "epoch": 0.583251953125, "grad_norm": 0.2817528247833252, "learning_rate": 0.0002210624153462258, "loss": 1.7749, "step": 11945 }, { "epoch": 0.58330078125, "grad_norm": 0.26685282588005066, "learning_rate": 0.00022102840968847055, "loss": 1.7637, "step": 11946 }, { "epoch": 0.583349609375, "grad_norm": 0.24330739676952362, "learning_rate": 0.00022099440533876024, "loss": 1.7696, "step": 11947 }, { "epoch": 0.5833984375, "grad_norm": 0.3227158784866333, "learning_rate": 0.0002209604022979188, "loss": 1.7971, "step": 11948 }, { "epoch": 0.583447265625, "grad_norm": 0.3285180330276489, "learning_rate": 0.00022092640056677054, "loss": 1.7972, "step": 11949 }, { "epoch": 0.58349609375, "grad_norm": 0.2724788784980774, "learning_rate": 0.00022089240014613943, "loss": 1.774, "step": 11950 }, { "epoch": 0.583544921875, "grad_norm": 0.3399959206581116, "learning_rate": 0.00022085840103684934, "loss": 1.7707, "step": 11951 }, { "epoch": 0.58359375, "grad_norm": 0.2398703396320343, "learning_rate": 0.00022082440323972446, "loss": 1.7812, "step": 11952 }, { "epoch": 0.583642578125, "grad_norm": 0.3606792986392975, "learning_rate": 0.00022079040675558863, "loss": 1.7822, "step": 11953 }, { "epoch": 0.58369140625, "grad_norm": 0.3069958984851837, "learning_rate": 0.000220756411585266, "loss": 1.7609, "step": 11954 }, { "epoch": 0.583740234375, "grad_norm": 0.2754146456718445, "learning_rate": 0.00022072241772958014, "loss": 1.7658, "step": 11955 }, { "epoch": 0.5837890625, "grad_norm": 0.27665725350379944, "learning_rate": 0.0002206884251893552, "loss": 1.7502, "step": 11956 }, { "epoch": 0.583837890625, "grad_norm": 0.2766484320163727, "learning_rate": 0.00022065443396541496, "loss": 1.7626, "step": 11957 }, { "epoch": 0.58388671875, "grad_norm": 0.25170019268989563, "learning_rate": 0.000220620444058583, "loss": 1.7523, "step": 11958 }, { "epoch": 0.583935546875, "grad_norm": 0.30496954917907715, "learning_rate": 0.00022058645546968342, "loss": 1.7703, "step": 11959 }, { "epoch": 0.583984375, "grad_norm": 0.23452357947826385, "learning_rate": 0.00022055246819953966, "loss": 1.7586, "step": 11960 }, { "epoch": 0.584033203125, "grad_norm": 0.2905212938785553, "learning_rate": 0.00022051848224897576, "loss": 1.8011, "step": 11961 }, { "epoch": 0.58408203125, "grad_norm": 0.2377081662416458, "learning_rate": 0.00022048449761881506, "loss": 1.7649, "step": 11962 }, { "epoch": 0.584130859375, "grad_norm": 0.25926029682159424, "learning_rate": 0.00022045051430988138, "loss": 1.7869, "step": 11963 }, { "epoch": 0.5841796875, "grad_norm": 0.24471572041511536, "learning_rate": 0.0002204165323229983, "loss": 1.7777, "step": 11964 }, { "epoch": 0.584228515625, "grad_norm": 0.23177725076675415, "learning_rate": 0.00022038255165898952, "loss": 1.7396, "step": 11965 }, { "epoch": 0.58427734375, "grad_norm": 0.22753553092479706, "learning_rate": 0.00022034857231867844, "loss": 1.782, "step": 11966 }, { "epoch": 0.584326171875, "grad_norm": 0.22720147669315338, "learning_rate": 0.00022031459430288858, "loss": 1.7939, "step": 11967 }, { "epoch": 0.584375, "grad_norm": 0.2549671530723572, "learning_rate": 0.0002202806176124435, "loss": 1.7717, "step": 11968 }, { "epoch": 0.584423828125, "grad_norm": 0.21272799372673035, "learning_rate": 0.00022024664224816655, "loss": 1.7797, "step": 11969 }, { "epoch": 0.58447265625, "grad_norm": 0.24274861812591553, "learning_rate": 0.00022021266821088125, "loss": 1.7919, "step": 11970 }, { "epoch": 0.584521484375, "grad_norm": 0.2279130220413208, "learning_rate": 0.0002201786955014109, "loss": 1.7533, "step": 11971 }, { "epoch": 0.5845703125, "grad_norm": 0.22855272889137268, "learning_rate": 0.000220144724120579, "loss": 1.7929, "step": 11972 }, { "epoch": 0.584619140625, "grad_norm": 0.2249828428030014, "learning_rate": 0.00022011075406920866, "loss": 1.7923, "step": 11973 }, { "epoch": 0.58466796875, "grad_norm": 0.21197301149368286, "learning_rate": 0.00022007678534812343, "loss": 1.7625, "step": 11974 }, { "epoch": 0.584716796875, "grad_norm": 0.20099756121635437, "learning_rate": 0.0002200428179581464, "loss": 1.758, "step": 11975 }, { "epoch": 0.584765625, "grad_norm": 0.2322804480791092, "learning_rate": 0.00022000885190010078, "loss": 1.7802, "step": 11976 }, { "epoch": 0.584814453125, "grad_norm": 0.2199660688638687, "learning_rate": 0.00021997488717480986, "loss": 1.7978, "step": 11977 }, { "epoch": 0.58486328125, "grad_norm": 0.21924002468585968, "learning_rate": 0.00021994092378309666, "loss": 1.8, "step": 11978 }, { "epoch": 0.584912109375, "grad_norm": 0.22734180092811584, "learning_rate": 0.0002199069617257845, "loss": 1.7627, "step": 11979 }, { "epoch": 0.5849609375, "grad_norm": 0.21181440353393555, "learning_rate": 0.00021987300100369633, "loss": 1.779, "step": 11980 }, { "epoch": 0.585009765625, "grad_norm": 0.2133144587278366, "learning_rate": 0.00021983904161765533, "loss": 1.7749, "step": 11981 }, { "epoch": 0.58505859375, "grad_norm": 0.2507500648498535, "learning_rate": 0.0002198050835684845, "loss": 1.7663, "step": 11982 }, { "epoch": 0.585107421875, "grad_norm": 0.21320027112960815, "learning_rate": 0.00021977112685700673, "loss": 1.7797, "step": 11983 }, { "epoch": 0.58515625, "grad_norm": 0.2444348931312561, "learning_rate": 0.0002197371714840451, "loss": 1.7578, "step": 11984 }, { "epoch": 0.585205078125, "grad_norm": 0.22619156539440155, "learning_rate": 0.0002197032174504225, "loss": 1.7743, "step": 11985 }, { "epoch": 0.58525390625, "grad_norm": 0.21754997968673706, "learning_rate": 0.00021966926475696185, "loss": 1.7821, "step": 11986 }, { "epoch": 0.585302734375, "grad_norm": 0.1904059797525406, "learning_rate": 0.000219635313404486, "loss": 1.7917, "step": 11987 }, { "epoch": 0.5853515625, "grad_norm": 0.21763740479946136, "learning_rate": 0.00021960136339381787, "loss": 1.754, "step": 11988 }, { "epoch": 0.585400390625, "grad_norm": 0.22168108820915222, "learning_rate": 0.00021956741472578013, "loss": 1.7708, "step": 11989 }, { "epoch": 0.58544921875, "grad_norm": 0.25602689385414124, "learning_rate": 0.00021953346740119574, "loss": 1.7831, "step": 11990 }, { "epoch": 0.585498046875, "grad_norm": 0.24973078072071075, "learning_rate": 0.0002194995214208873, "loss": 1.7657, "step": 11991 }, { "epoch": 0.585546875, "grad_norm": 0.2617741525173187, "learning_rate": 0.00021946557678567747, "loss": 1.7607, "step": 11992 }, { "epoch": 0.585595703125, "grad_norm": 0.2603042721748352, "learning_rate": 0.00021943163349638905, "loss": 1.7422, "step": 11993 }, { "epoch": 0.58564453125, "grad_norm": 0.2527323067188263, "learning_rate": 0.0002193976915538446, "loss": 1.7645, "step": 11994 }, { "epoch": 0.585693359375, "grad_norm": 0.2413627654314041, "learning_rate": 0.00021936375095886685, "loss": 1.7757, "step": 11995 }, { "epoch": 0.5857421875, "grad_norm": 0.24695070087909698, "learning_rate": 0.00021932981171227816, "loss": 1.7768, "step": 11996 }, { "epoch": 0.585791015625, "grad_norm": 0.19945451617240906, "learning_rate": 0.00021929587381490134, "loss": 1.7723, "step": 11997 }, { "epoch": 0.58583984375, "grad_norm": 0.23963963985443115, "learning_rate": 0.00021926193726755868, "loss": 1.7873, "step": 11998 }, { "epoch": 0.585888671875, "grad_norm": 0.18910279870033264, "learning_rate": 0.0002192280020710728, "loss": 1.7588, "step": 11999 }, { "epoch": 0.5859375, "grad_norm": 0.21589243412017822, "learning_rate": 0.00021919406822626614, "loss": 1.7811, "step": 12000 }, { "epoch": 0.585986328125, "grad_norm": 0.20326606929302216, "learning_rate": 0.00021916013573396092, "loss": 1.7736, "step": 12001 }, { "epoch": 0.58603515625, "grad_norm": 0.23645450174808502, "learning_rate": 0.00021912620459497984, "loss": 1.7791, "step": 12002 }, { "epoch": 0.586083984375, "grad_norm": 0.2718321979045868, "learning_rate": 0.00021909227481014493, "loss": 1.7596, "step": 12003 }, { "epoch": 0.5861328125, "grad_norm": 0.22718782722949982, "learning_rate": 0.00021905834638027876, "loss": 1.774, "step": 12004 }, { "epoch": 0.586181640625, "grad_norm": 0.3135451674461365, "learning_rate": 0.00021902441930620348, "loss": 1.7603, "step": 12005 }, { "epoch": 0.58623046875, "grad_norm": 0.2836179733276367, "learning_rate": 0.0002189904935887414, "loss": 1.7491, "step": 12006 }, { "epoch": 0.586279296875, "grad_norm": 0.24272306263446808, "learning_rate": 0.0002189565692287146, "loss": 1.7695, "step": 12007 }, { "epoch": 0.586328125, "grad_norm": 0.3297591507434845, "learning_rate": 0.00021892264622694552, "loss": 1.794, "step": 12008 }, { "epoch": 0.586376953125, "grad_norm": 0.27164921164512634, "learning_rate": 0.00021888872458425613, "loss": 1.7558, "step": 12009 }, { "epoch": 0.58642578125, "grad_norm": 0.27436935901641846, "learning_rate": 0.00021885480430146843, "loss": 1.8002, "step": 12010 }, { "epoch": 0.586474609375, "grad_norm": 0.2560249865055084, "learning_rate": 0.00021882088537940482, "loss": 1.7645, "step": 12011 }, { "epoch": 0.5865234375, "grad_norm": 0.2837395966053009, "learning_rate": 0.00021878696781888707, "loss": 1.8029, "step": 12012 }, { "epoch": 0.586572265625, "grad_norm": 0.283006876707077, "learning_rate": 0.00021875305162073744, "loss": 1.7496, "step": 12013 }, { "epoch": 0.58662109375, "grad_norm": 0.23948746919631958, "learning_rate": 0.00021871913678577764, "loss": 1.7501, "step": 12014 }, { "epoch": 0.586669921875, "grad_norm": 0.2748660445213318, "learning_rate": 0.00021868522331482994, "loss": 1.7734, "step": 12015 }, { "epoch": 0.58671875, "grad_norm": 0.2424885332584381, "learning_rate": 0.00021865131120871601, "loss": 1.7781, "step": 12016 }, { "epoch": 0.586767578125, "grad_norm": 0.2946905791759491, "learning_rate": 0.00021861740046825778, "loss": 1.7818, "step": 12017 }, { "epoch": 0.58681640625, "grad_norm": 0.2310447245836258, "learning_rate": 0.00021858349109427723, "loss": 1.7898, "step": 12018 }, { "epoch": 0.586865234375, "grad_norm": 0.23127040266990662, "learning_rate": 0.00021854958308759599, "loss": 1.7656, "step": 12019 }, { "epoch": 0.5869140625, "grad_norm": 0.2380293756723404, "learning_rate": 0.00021851567644903607, "loss": 1.7553, "step": 12020 }, { "epoch": 0.586962890625, "grad_norm": 0.26752135157585144, "learning_rate": 0.00021848177117941904, "loss": 1.7767, "step": 12021 }, { "epoch": 0.58701171875, "grad_norm": 0.25128939747810364, "learning_rate": 0.00021844786727956672, "loss": 1.7736, "step": 12022 }, { "epoch": 0.587060546875, "grad_norm": 0.2125806361436844, "learning_rate": 0.00021841396475030074, "loss": 1.7762, "step": 12023 }, { "epoch": 0.587109375, "grad_norm": 0.2633594870567322, "learning_rate": 0.00021838006359244284, "loss": 1.7794, "step": 12024 }, { "epoch": 0.587158203125, "grad_norm": 0.23336222767829895, "learning_rate": 0.0002183461638068146, "loss": 1.7981, "step": 12025 }, { "epoch": 0.58720703125, "grad_norm": 0.22278012335300446, "learning_rate": 0.00021831226539423753, "loss": 1.75, "step": 12026 }, { "epoch": 0.587255859375, "grad_norm": 0.22267396748065948, "learning_rate": 0.00021827836835553333, "loss": 1.7498, "step": 12027 }, { "epoch": 0.5873046875, "grad_norm": 0.30499932169914246, "learning_rate": 0.0002182444726915233, "loss": 1.7638, "step": 12028 }, { "epoch": 0.587353515625, "grad_norm": 0.284332811832428, "learning_rate": 0.00021821057840302926, "loss": 1.7678, "step": 12029 }, { "epoch": 0.58740234375, "grad_norm": 0.2729993760585785, "learning_rate": 0.00021817668549087228, "loss": 1.7722, "step": 12030 }, { "epoch": 0.587451171875, "grad_norm": 0.34236767888069153, "learning_rate": 0.0002181427939558741, "loss": 1.7626, "step": 12031 }, { "epoch": 0.5875, "grad_norm": 0.2522682845592499, "learning_rate": 0.00021810890379885594, "loss": 1.7935, "step": 12032 }, { "epoch": 0.587548828125, "grad_norm": 0.27375465631484985, "learning_rate": 0.00021807501502063925, "loss": 1.7455, "step": 12033 }, { "epoch": 0.58759765625, "grad_norm": 0.291509211063385, "learning_rate": 0.00021804112762204537, "loss": 1.7837, "step": 12034 }, { "epoch": 0.587646484375, "grad_norm": 0.260495126247406, "learning_rate": 0.00021800724160389535, "loss": 1.7824, "step": 12035 }, { "epoch": 0.5876953125, "grad_norm": 0.2257155328989029, "learning_rate": 0.00021797335696701077, "loss": 1.7932, "step": 12036 }, { "epoch": 0.587744140625, "grad_norm": 0.2067989706993103, "learning_rate": 0.00021793947371221258, "loss": 1.7472, "step": 12037 }, { "epoch": 0.58779296875, "grad_norm": 0.2597755491733551, "learning_rate": 0.0002179055918403221, "loss": 1.7638, "step": 12038 }, { "epoch": 0.587841796875, "grad_norm": 0.22603020071983337, "learning_rate": 0.0002178717113521605, "loss": 1.7726, "step": 12039 }, { "epoch": 0.587890625, "grad_norm": 0.2404223531484604, "learning_rate": 0.0002178378322485489, "loss": 1.7469, "step": 12040 }, { "epoch": 0.587939453125, "grad_norm": 0.2337069809436798, "learning_rate": 0.00021780395453030837, "loss": 1.7974, "step": 12041 }, { "epoch": 0.58798828125, "grad_norm": 0.3069846034049988, "learning_rate": 0.0002177700781982599, "loss": 1.7904, "step": 12042 }, { "epoch": 0.588037109375, "grad_norm": 0.221166729927063, "learning_rate": 0.00021773620325322464, "loss": 1.7397, "step": 12043 }, { "epoch": 0.5880859375, "grad_norm": 0.29274263978004456, "learning_rate": 0.00021770232969602331, "loss": 1.7843, "step": 12044 }, { "epoch": 0.588134765625, "grad_norm": 0.26232442259788513, "learning_rate": 0.00021766845752747727, "loss": 1.7804, "step": 12045 }, { "epoch": 0.58818359375, "grad_norm": 0.2164953351020813, "learning_rate": 0.00021763458674840707, "loss": 1.7668, "step": 12046 }, { "epoch": 0.588232421875, "grad_norm": 0.28031280636787415, "learning_rate": 0.00021760071735963388, "loss": 1.7746, "step": 12047 }, { "epoch": 0.58828125, "grad_norm": 0.2708834111690521, "learning_rate": 0.0002175668493619783, "loss": 1.7853, "step": 12048 }, { "epoch": 0.588330078125, "grad_norm": 0.19739660620689392, "learning_rate": 0.00021753298275626132, "loss": 1.7496, "step": 12049 }, { "epoch": 0.58837890625, "grad_norm": 0.2718265652656555, "learning_rate": 0.00021749911754330377, "loss": 1.7765, "step": 12050 }, { "epoch": 0.588427734375, "grad_norm": 0.2344825118780136, "learning_rate": 0.0002174652537239261, "loss": 1.7887, "step": 12051 }, { "epoch": 0.5884765625, "grad_norm": 0.2005787342786789, "learning_rate": 0.0002174313912989494, "loss": 1.7486, "step": 12052 }, { "epoch": 0.588525390625, "grad_norm": 0.25308099389076233, "learning_rate": 0.00021739753026919407, "loss": 1.7623, "step": 12053 }, { "epoch": 0.58857421875, "grad_norm": 0.17975902557373047, "learning_rate": 0.0002173636706354809, "loss": 1.773, "step": 12054 }, { "epoch": 0.588623046875, "grad_norm": 0.23735778033733368, "learning_rate": 0.00021732981239863048, "loss": 1.7611, "step": 12055 }, { "epoch": 0.588671875, "grad_norm": 0.19488826394081116, "learning_rate": 0.0002172959555594634, "loss": 1.7738, "step": 12056 }, { "epoch": 0.588720703125, "grad_norm": 0.23695658147335052, "learning_rate": 0.00021726210011880015, "loss": 1.7536, "step": 12057 }, { "epoch": 0.58876953125, "grad_norm": 0.24661380052566528, "learning_rate": 0.0002172282460774614, "loss": 1.7553, "step": 12058 }, { "epoch": 0.588818359375, "grad_norm": 0.2091044783592224, "learning_rate": 0.00021719439343626746, "loss": 1.7687, "step": 12059 }, { "epoch": 0.5888671875, "grad_norm": 0.2462921291589737, "learning_rate": 0.0002171605421960388, "loss": 1.787, "step": 12060 }, { "epoch": 0.588916015625, "grad_norm": 0.23311690986156464, "learning_rate": 0.00021712669235759597, "loss": 1.7858, "step": 12061 }, { "epoch": 0.58896484375, "grad_norm": 0.2369278520345688, "learning_rate": 0.0002170928439217591, "loss": 1.7802, "step": 12062 }, { "epoch": 0.589013671875, "grad_norm": 0.27876728773117065, "learning_rate": 0.00021705899688934877, "loss": 1.7866, "step": 12063 }, { "epoch": 0.5890625, "grad_norm": 0.296485036611557, "learning_rate": 0.0002170251512611851, "loss": 1.7799, "step": 12064 }, { "epoch": 0.589111328125, "grad_norm": 0.2635905146598816, "learning_rate": 0.00021699130703808862, "loss": 1.7821, "step": 12065 }, { "epoch": 0.58916015625, "grad_norm": 0.26641130447387695, "learning_rate": 0.0002169574642208793, "loss": 1.7776, "step": 12066 }, { "epoch": 0.589208984375, "grad_norm": 0.21791864931583405, "learning_rate": 0.00021692362281037754, "loss": 1.7833, "step": 12067 }, { "epoch": 0.5892578125, "grad_norm": 0.28033071756362915, "learning_rate": 0.00021688978280740346, "loss": 1.7672, "step": 12068 }, { "epoch": 0.589306640625, "grad_norm": 0.21615341305732727, "learning_rate": 0.0002168559442127771, "loss": 1.7446, "step": 12069 }, { "epoch": 0.58935546875, "grad_norm": 0.25654637813568115, "learning_rate": 0.00021682210702731868, "loss": 1.7877, "step": 12070 }, { "epoch": 0.589404296875, "grad_norm": 0.34760332107543945, "learning_rate": 0.0002167882712518482, "loss": 1.7618, "step": 12071 }, { "epoch": 0.589453125, "grad_norm": 0.275812029838562, "learning_rate": 0.00021675443688718582, "loss": 1.783, "step": 12072 }, { "epoch": 0.589501953125, "grad_norm": 0.258914053440094, "learning_rate": 0.00021672060393415133, "loss": 1.7729, "step": 12073 }, { "epoch": 0.58955078125, "grad_norm": 0.3151358664035797, "learning_rate": 0.00021668677239356494, "loss": 1.7749, "step": 12074 }, { "epoch": 0.589599609375, "grad_norm": 0.24255824089050293, "learning_rate": 0.00021665294226624643, "loss": 1.7796, "step": 12075 }, { "epoch": 0.5896484375, "grad_norm": 0.2559942305088043, "learning_rate": 0.0002166191135530157, "loss": 1.7624, "step": 12076 }, { "epoch": 0.589697265625, "grad_norm": 0.2832132875919342, "learning_rate": 0.0002165852862546927, "loss": 1.7644, "step": 12077 }, { "epoch": 0.58974609375, "grad_norm": 0.25941166281700134, "learning_rate": 0.00021655146037209715, "loss": 1.7956, "step": 12078 }, { "epoch": 0.589794921875, "grad_norm": 0.23551584780216217, "learning_rate": 0.00021651763590604906, "loss": 1.7664, "step": 12079 }, { "epoch": 0.58984375, "grad_norm": 0.26323261857032776, "learning_rate": 0.0002164838128573679, "loss": 1.7562, "step": 12080 }, { "epoch": 0.589892578125, "grad_norm": 0.21652443706989288, "learning_rate": 0.00021644999122687365, "loss": 1.7943, "step": 12081 }, { "epoch": 0.58994140625, "grad_norm": 0.2531631290912628, "learning_rate": 0.0002164161710153858, "loss": 1.7861, "step": 12082 }, { "epoch": 0.589990234375, "grad_norm": 0.25053560733795166, "learning_rate": 0.00021638235222372422, "loss": 1.7863, "step": 12083 }, { "epoch": 0.5900390625, "grad_norm": 0.24226713180541992, "learning_rate": 0.00021634853485270834, "loss": 1.7517, "step": 12084 }, { "epoch": 0.590087890625, "grad_norm": 0.2269822359085083, "learning_rate": 0.0002163147189031579, "loss": 1.7752, "step": 12085 }, { "epoch": 0.59013671875, "grad_norm": 0.23384937644004822, "learning_rate": 0.00021628090437589233, "loss": 1.7618, "step": 12086 }, { "epoch": 0.590185546875, "grad_norm": 0.24881552159786224, "learning_rate": 0.00021624709127173125, "loss": 1.762, "step": 12087 }, { "epoch": 0.590234375, "grad_norm": 0.23591281473636627, "learning_rate": 0.00021621327959149417, "loss": 1.7633, "step": 12088 }, { "epoch": 0.590283203125, "grad_norm": 0.23699213564395905, "learning_rate": 0.00021617946933600042, "loss": 1.7777, "step": 12089 }, { "epoch": 0.59033203125, "grad_norm": 0.2773854732513428, "learning_rate": 0.00021614566050606954, "loss": 1.7948, "step": 12090 }, { "epoch": 0.590380859375, "grad_norm": 0.2846786081790924, "learning_rate": 0.00021611185310252073, "loss": 1.7946, "step": 12091 }, { "epoch": 0.5904296875, "grad_norm": 0.29732057452201843, "learning_rate": 0.00021607804712617362, "loss": 1.7747, "step": 12092 }, { "epoch": 0.590478515625, "grad_norm": 0.24229270219802856, "learning_rate": 0.00021604424257784733, "loss": 1.7744, "step": 12093 }, { "epoch": 0.59052734375, "grad_norm": 0.30713626742362976, "learning_rate": 0.00021601043945836113, "loss": 1.7663, "step": 12094 }, { "epoch": 0.590576171875, "grad_norm": 0.22867223620414734, "learning_rate": 0.00021597663776853443, "loss": 1.7922, "step": 12095 }, { "epoch": 0.590625, "grad_norm": 0.287074476480484, "learning_rate": 0.00021594283750918624, "loss": 1.7585, "step": 12096 }, { "epoch": 0.590673828125, "grad_norm": 0.24229776859283447, "learning_rate": 0.00021590903868113587, "loss": 1.7711, "step": 12097 }, { "epoch": 0.59072265625, "grad_norm": 0.23984837532043457, "learning_rate": 0.00021587524128520235, "loss": 1.7772, "step": 12098 }, { "epoch": 0.590771484375, "grad_norm": 0.21629314124584198, "learning_rate": 0.00021584144532220496, "loss": 1.7836, "step": 12099 }, { "epoch": 0.5908203125, "grad_norm": 0.23877088725566864, "learning_rate": 0.00021580765079296267, "loss": 1.7642, "step": 12100 }, { "epoch": 0.590869140625, "grad_norm": 0.21060581505298615, "learning_rate": 0.00021577385769829444, "loss": 1.7889, "step": 12101 }, { "epoch": 0.59091796875, "grad_norm": 0.26908987760543823, "learning_rate": 0.00021574006603901935, "loss": 1.758, "step": 12102 }, { "epoch": 0.590966796875, "grad_norm": 0.24787193536758423, "learning_rate": 0.00021570627581595642, "loss": 1.7707, "step": 12103 }, { "epoch": 0.591015625, "grad_norm": 0.23539923131465912, "learning_rate": 0.00021567248702992453, "loss": 1.755, "step": 12104 }, { "epoch": 0.591064453125, "grad_norm": 0.26649555563926697, "learning_rate": 0.00021563869968174254, "loss": 1.7675, "step": 12105 }, { "epoch": 0.59111328125, "grad_norm": 0.21349047124385834, "learning_rate": 0.00021560491377222934, "loss": 1.7716, "step": 12106 }, { "epoch": 0.591162109375, "grad_norm": 0.257559210062027, "learning_rate": 0.00021557112930220386, "loss": 1.7814, "step": 12107 }, { "epoch": 0.5912109375, "grad_norm": 0.23679344356060028, "learning_rate": 0.00021553734627248477, "loss": 1.7823, "step": 12108 }, { "epoch": 0.591259765625, "grad_norm": 0.2122325599193573, "learning_rate": 0.00021550356468389087, "loss": 1.7784, "step": 12109 }, { "epoch": 0.59130859375, "grad_norm": 0.2511180639266968, "learning_rate": 0.00021546978453724087, "loss": 1.7686, "step": 12110 }, { "epoch": 0.591357421875, "grad_norm": 0.22114725410938263, "learning_rate": 0.00021543600583335344, "loss": 1.7764, "step": 12111 }, { "epoch": 0.59140625, "grad_norm": 0.26813793182373047, "learning_rate": 0.00021540222857304724, "loss": 1.792, "step": 12112 }, { "epoch": 0.591455078125, "grad_norm": 0.2634042203426361, "learning_rate": 0.00021536845275714096, "loss": 1.7811, "step": 12113 }, { "epoch": 0.59150390625, "grad_norm": 0.2339235246181488, "learning_rate": 0.00021533467838645315, "loss": 1.7919, "step": 12114 }, { "epoch": 0.591552734375, "grad_norm": 0.29069599509239197, "learning_rate": 0.0002153009054618023, "loss": 1.7485, "step": 12115 }, { "epoch": 0.5916015625, "grad_norm": 0.2096702754497528, "learning_rate": 0.000215267133984007, "loss": 1.7662, "step": 12116 }, { "epoch": 0.591650390625, "grad_norm": 0.256778746843338, "learning_rate": 0.0002152333639538857, "loss": 1.7683, "step": 12117 }, { "epoch": 0.59169921875, "grad_norm": 0.2994065284729004, "learning_rate": 0.0002151995953722568, "loss": 1.7673, "step": 12118 }, { "epoch": 0.591748046875, "grad_norm": 0.23447397351264954, "learning_rate": 0.0002151658282399388, "loss": 1.801, "step": 12119 }, { "epoch": 0.591796875, "grad_norm": 0.2552155554294586, "learning_rate": 0.00021513206255774998, "loss": 1.7799, "step": 12120 }, { "epoch": 0.591845703125, "grad_norm": 0.2619011402130127, "learning_rate": 0.00021509829832650873, "loss": 1.7728, "step": 12121 }, { "epoch": 0.59189453125, "grad_norm": 0.2525261640548706, "learning_rate": 0.00021506453554703337, "loss": 1.7851, "step": 12122 }, { "epoch": 0.591943359375, "grad_norm": 0.21532194316387177, "learning_rate": 0.0002150307742201421, "loss": 1.776, "step": 12123 }, { "epoch": 0.5919921875, "grad_norm": 0.2447706013917923, "learning_rate": 0.0002149970143466532, "loss": 1.7608, "step": 12124 }, { "epoch": 0.592041015625, "grad_norm": 0.22164127230644226, "learning_rate": 0.00021496325592738492, "loss": 1.7839, "step": 12125 }, { "epoch": 0.59208984375, "grad_norm": 0.29768696427345276, "learning_rate": 0.0002149294989631553, "loss": 1.7727, "step": 12126 }, { "epoch": 0.592138671875, "grad_norm": 0.2316167801618576, "learning_rate": 0.00021489574345478262, "loss": 1.7521, "step": 12127 }, { "epoch": 0.5921875, "grad_norm": 0.23211722075939178, "learning_rate": 0.00021486198940308483, "loss": 1.7665, "step": 12128 }, { "epoch": 0.592236328125, "grad_norm": 0.2251432240009308, "learning_rate": 0.00021482823680888004, "loss": 1.7635, "step": 12129 }, { "epoch": 0.59228515625, "grad_norm": 0.21914787590503693, "learning_rate": 0.00021479448567298626, "loss": 1.7515, "step": 12130 }, { "epoch": 0.592333984375, "grad_norm": 0.23317013680934906, "learning_rate": 0.00021476073599622147, "loss": 1.761, "step": 12131 }, { "epoch": 0.5923828125, "grad_norm": 0.2535351812839508, "learning_rate": 0.0002147269877794037, "loss": 1.7906, "step": 12132 }, { "epoch": 0.592431640625, "grad_norm": 0.2247433066368103, "learning_rate": 0.00021469324102335076, "loss": 1.7779, "step": 12133 }, { "epoch": 0.59248046875, "grad_norm": 0.25499454140663147, "learning_rate": 0.00021465949572888065, "loss": 1.7797, "step": 12134 }, { "epoch": 0.592529296875, "grad_norm": 0.2990192770957947, "learning_rate": 0.000214625751896811, "loss": 1.7639, "step": 12135 }, { "epoch": 0.592578125, "grad_norm": 0.27937135100364685, "learning_rate": 0.00021459200952795988, "loss": 1.7753, "step": 12136 }, { "epoch": 0.592626953125, "grad_norm": 0.2877383530139923, "learning_rate": 0.00021455826862314486, "loss": 1.765, "step": 12137 }, { "epoch": 0.59267578125, "grad_norm": 0.2526237666606903, "learning_rate": 0.00021452452918318383, "loss": 1.7764, "step": 12138 }, { "epoch": 0.592724609375, "grad_norm": 0.2869631350040436, "learning_rate": 0.00021449079120889432, "loss": 1.7724, "step": 12139 }, { "epoch": 0.5927734375, "grad_norm": 0.20979629456996918, "learning_rate": 0.00021445705470109417, "loss": 1.7862, "step": 12140 }, { "epoch": 0.592822265625, "grad_norm": 0.2867053747177124, "learning_rate": 0.0002144233196606009, "loss": 1.7811, "step": 12141 }, { "epoch": 0.59287109375, "grad_norm": 0.23732753098011017, "learning_rate": 0.00021438958608823217, "loss": 1.7793, "step": 12142 }, { "epoch": 0.592919921875, "grad_norm": 0.24803753197193146, "learning_rate": 0.00021435585398480562, "loss": 1.8113, "step": 12143 }, { "epoch": 0.59296875, "grad_norm": 0.27046719193458557, "learning_rate": 0.0002143221233511385, "loss": 1.7587, "step": 12144 }, { "epoch": 0.593017578125, "grad_norm": 0.2687658965587616, "learning_rate": 0.00021428839418804858, "loss": 1.7762, "step": 12145 }, { "epoch": 0.59306640625, "grad_norm": 0.307984322309494, "learning_rate": 0.0002142546664963531, "loss": 1.7939, "step": 12146 }, { "epoch": 0.593115234375, "grad_norm": 0.28312140703201294, "learning_rate": 0.00021422094027686968, "loss": 1.7853, "step": 12147 }, { "epoch": 0.5931640625, "grad_norm": 0.3330404460430145, "learning_rate": 0.00021418721553041552, "loss": 1.7793, "step": 12148 }, { "epoch": 0.593212890625, "grad_norm": 0.2725485861301422, "learning_rate": 0.0002141534922578081, "loss": 1.7635, "step": 12149 }, { "epoch": 0.59326171875, "grad_norm": 0.2619970440864563, "learning_rate": 0.00021411977045986464, "loss": 1.7612, "step": 12150 }, { "epoch": 0.593310546875, "grad_norm": 0.27892524003982544, "learning_rate": 0.0002140860501374025, "loss": 1.772, "step": 12151 }, { "epoch": 0.593359375, "grad_norm": 0.23657429218292236, "learning_rate": 0.0002140523312912389, "loss": 1.7488, "step": 12152 }, { "epoch": 0.593408203125, "grad_norm": 0.24772486090660095, "learning_rate": 0.0002140186139221909, "loss": 1.7604, "step": 12153 }, { "epoch": 0.59345703125, "grad_norm": 0.2511005401611328, "learning_rate": 0.0002139848980310759, "loss": 1.7729, "step": 12154 }, { "epoch": 0.593505859375, "grad_norm": 0.2152465581893921, "learning_rate": 0.00021395118361871085, "loss": 1.7744, "step": 12155 }, { "epoch": 0.5935546875, "grad_norm": 0.25371965765953064, "learning_rate": 0.00021391747068591295, "loss": 1.7733, "step": 12156 }, { "epoch": 0.593603515625, "grad_norm": 0.22290726006031036, "learning_rate": 0.00021388375923349918, "loss": 1.7855, "step": 12157 }, { "epoch": 0.59365234375, "grad_norm": 0.22430166602134705, "learning_rate": 0.00021385004926228663, "loss": 1.7827, "step": 12158 }, { "epoch": 0.593701171875, "grad_norm": 0.2295631617307663, "learning_rate": 0.0002138163407730923, "loss": 1.7791, "step": 12159 }, { "epoch": 0.59375, "grad_norm": 0.22853030264377594, "learning_rate": 0.000213782633766733, "loss": 1.7774, "step": 12160 }, { "epoch": 0.593798828125, "grad_norm": 0.21357165277004242, "learning_rate": 0.00021374892824402587, "loss": 1.774, "step": 12161 }, { "epoch": 0.59384765625, "grad_norm": 0.2067769467830658, "learning_rate": 0.00021371522420578754, "loss": 1.7477, "step": 12162 }, { "epoch": 0.593896484375, "grad_norm": 0.2326560765504837, "learning_rate": 0.00021368152165283512, "loss": 1.7785, "step": 12163 }, { "epoch": 0.5939453125, "grad_norm": 0.21005713939666748, "learning_rate": 0.0002136478205859852, "loss": 1.7601, "step": 12164 }, { "epoch": 0.593994140625, "grad_norm": 0.24440617859363556, "learning_rate": 0.0002136141210060547, "loss": 1.7746, "step": 12165 }, { "epoch": 0.59404296875, "grad_norm": 0.19306187331676483, "learning_rate": 0.0002135804229138602, "loss": 1.7436, "step": 12166 }, { "epoch": 0.594091796875, "grad_norm": 0.2503749430179596, "learning_rate": 0.0002135467263102186, "loss": 1.7727, "step": 12167 }, { "epoch": 0.594140625, "grad_norm": 0.2396804541349411, "learning_rate": 0.00021351303119594646, "loss": 1.7962, "step": 12168 }, { "epoch": 0.594189453125, "grad_norm": 0.210529163479805, "learning_rate": 0.00021347933757186034, "loss": 1.7688, "step": 12169 }, { "epoch": 0.59423828125, "grad_norm": 0.2264488935470581, "learning_rate": 0.00021344564543877698, "loss": 1.7751, "step": 12170 }, { "epoch": 0.594287109375, "grad_norm": 0.21084024012088776, "learning_rate": 0.0002134119547975127, "loss": 1.778, "step": 12171 }, { "epoch": 0.5943359375, "grad_norm": 0.29567262530326843, "learning_rate": 0.00021337826564888436, "loss": 1.7576, "step": 12172 }, { "epoch": 0.594384765625, "grad_norm": 0.2599306106567383, "learning_rate": 0.0002133445779937081, "loss": 1.7636, "step": 12173 }, { "epoch": 0.59443359375, "grad_norm": 0.2278432548046112, "learning_rate": 0.00021331089183280062, "loss": 1.7587, "step": 12174 }, { "epoch": 0.594482421875, "grad_norm": 0.300382524728775, "learning_rate": 0.00021327720716697818, "loss": 1.7523, "step": 12175 }, { "epoch": 0.59453125, "grad_norm": 0.2477748990058899, "learning_rate": 0.0002132435239970573, "loss": 1.7845, "step": 12176 }, { "epoch": 0.594580078125, "grad_norm": 0.2537762224674225, "learning_rate": 0.0002132098423238542, "loss": 1.7577, "step": 12177 }, { "epoch": 0.59462890625, "grad_norm": 0.24839818477630615, "learning_rate": 0.00021317616214818513, "loss": 1.7502, "step": 12178 }, { "epoch": 0.594677734375, "grad_norm": 0.23866331577301025, "learning_rate": 0.00021314248347086652, "loss": 1.7682, "step": 12179 }, { "epoch": 0.5947265625, "grad_norm": 0.2097034454345703, "learning_rate": 0.00021310880629271445, "loss": 1.7684, "step": 12180 }, { "epoch": 0.594775390625, "grad_norm": 0.254108726978302, "learning_rate": 0.00021307513061454532, "loss": 1.767, "step": 12181 }, { "epoch": 0.59482421875, "grad_norm": 0.281200110912323, "learning_rate": 0.000213041456437175, "loss": 1.7722, "step": 12182 }, { "epoch": 0.594873046875, "grad_norm": 0.21141977608203888, "learning_rate": 0.00021300778376141983, "loss": 1.7679, "step": 12183 }, { "epoch": 0.594921875, "grad_norm": 0.3328951597213745, "learning_rate": 0.0002129741125880959, "loss": 1.7563, "step": 12184 }, { "epoch": 0.594970703125, "grad_norm": 0.24748864769935608, "learning_rate": 0.000212940442918019, "loss": 1.759, "step": 12185 }, { "epoch": 0.59501953125, "grad_norm": 0.25762009620666504, "learning_rate": 0.00021290677475200548, "loss": 1.756, "step": 12186 }, { "epoch": 0.595068359375, "grad_norm": 0.2741256058216095, "learning_rate": 0.000212873108090871, "loss": 1.7555, "step": 12187 }, { "epoch": 0.5951171875, "grad_norm": 0.28995636105537415, "learning_rate": 0.00021283944293543184, "loss": 1.7856, "step": 12188 }, { "epoch": 0.595166015625, "grad_norm": 0.26771441102027893, "learning_rate": 0.00021280577928650362, "loss": 1.7587, "step": 12189 }, { "epoch": 0.59521484375, "grad_norm": 0.2799322009086609, "learning_rate": 0.00021277211714490236, "loss": 1.7657, "step": 12190 }, { "epoch": 0.595263671875, "grad_norm": 0.253060519695282, "learning_rate": 0.00021273845651144374, "loss": 1.7657, "step": 12191 }, { "epoch": 0.5953125, "grad_norm": 0.27786538004875183, "learning_rate": 0.00021270479738694375, "loss": 1.7738, "step": 12192 }, { "epoch": 0.595361328125, "grad_norm": 0.25171446800231934, "learning_rate": 0.00021267113977221802, "loss": 1.7687, "step": 12193 }, { "epoch": 0.59541015625, "grad_norm": 0.24786430597305298, "learning_rate": 0.00021263748366808223, "loss": 1.8256, "step": 12194 }, { "epoch": 0.595458984375, "grad_norm": 0.22734017670154572, "learning_rate": 0.0002126038290753522, "loss": 1.7714, "step": 12195 }, { "epoch": 0.5955078125, "grad_norm": 0.24453504383563995, "learning_rate": 0.0002125701759948434, "loss": 1.7431, "step": 12196 }, { "epoch": 0.595556640625, "grad_norm": 0.21946796774864197, "learning_rate": 0.00021253652442737166, "loss": 1.7711, "step": 12197 }, { "epoch": 0.59560546875, "grad_norm": 0.23488299548625946, "learning_rate": 0.00021250287437375232, "loss": 1.7849, "step": 12198 }, { "epoch": 0.595654296875, "grad_norm": 0.24620585143566132, "learning_rate": 0.00021246922583480115, "loss": 1.7713, "step": 12199 }, { "epoch": 0.595703125, "grad_norm": 0.24422042071819305, "learning_rate": 0.0002124355788113334, "loss": 1.7761, "step": 12200 }, { "epoch": 0.595751953125, "grad_norm": 0.23433154821395874, "learning_rate": 0.00021240193330416475, "loss": 1.7744, "step": 12201 }, { "epoch": 0.59580078125, "grad_norm": 0.22429777681827545, "learning_rate": 0.00021236828931411056, "loss": 1.7801, "step": 12202 }, { "epoch": 0.595849609375, "grad_norm": 0.2478160560131073, "learning_rate": 0.00021233464684198612, "loss": 1.766, "step": 12203 }, { "epoch": 0.5958984375, "grad_norm": 0.20073100924491882, "learning_rate": 0.00021230100588860696, "loss": 1.7488, "step": 12204 }, { "epoch": 0.595947265625, "grad_norm": 0.2360234409570694, "learning_rate": 0.00021226736645478812, "loss": 1.7727, "step": 12205 }, { "epoch": 0.59599609375, "grad_norm": 0.1941664218902588, "learning_rate": 0.0002122337285413452, "loss": 1.7535, "step": 12206 }, { "epoch": 0.596044921875, "grad_norm": 0.23692554235458374, "learning_rate": 0.00021220009214909324, "loss": 1.7604, "step": 12207 }, { "epoch": 0.59609375, "grad_norm": 0.20508912205696106, "learning_rate": 0.00021216645727884753, "loss": 1.7853, "step": 12208 }, { "epoch": 0.596142578125, "grad_norm": 0.2376742959022522, "learning_rate": 0.00021213282393142314, "loss": 1.7825, "step": 12209 }, { "epoch": 0.59619140625, "grad_norm": 0.2152964472770691, "learning_rate": 0.00021209919210763534, "loss": 1.7878, "step": 12210 }, { "epoch": 0.596240234375, "grad_norm": 0.24582262337207794, "learning_rate": 0.00021206556180829918, "loss": 1.7786, "step": 12211 }, { "epoch": 0.5962890625, "grad_norm": 0.24855437874794006, "learning_rate": 0.00021203193303422958, "loss": 1.7831, "step": 12212 }, { "epoch": 0.596337890625, "grad_norm": 0.2006782591342926, "learning_rate": 0.00021199830578624179, "loss": 1.7869, "step": 12213 }, { "epoch": 0.59638671875, "grad_norm": 0.24434545636177063, "learning_rate": 0.00021196468006515057, "loss": 1.7842, "step": 12214 }, { "epoch": 0.596435546875, "grad_norm": 0.25135093927383423, "learning_rate": 0.00021193105587177108, "loss": 1.7593, "step": 12215 }, { "epoch": 0.596484375, "grad_norm": 0.2608675956726074, "learning_rate": 0.00021189743320691796, "loss": 1.7646, "step": 12216 }, { "epoch": 0.596533203125, "grad_norm": 0.20888544619083405, "learning_rate": 0.00021186381207140642, "loss": 1.7804, "step": 12217 }, { "epoch": 0.59658203125, "grad_norm": 0.28686222434043884, "learning_rate": 0.00021183019246605105, "loss": 1.7868, "step": 12218 }, { "epoch": 0.596630859375, "grad_norm": 0.2757958471775055, "learning_rate": 0.00021179657439166665, "loss": 1.7734, "step": 12219 }, { "epoch": 0.5966796875, "grad_norm": 0.23012962937355042, "learning_rate": 0.00021176295784906818, "loss": 1.7675, "step": 12220 }, { "epoch": 0.596728515625, "grad_norm": 0.2664319574832916, "learning_rate": 0.00021172934283907009, "loss": 1.7826, "step": 12221 }, { "epoch": 0.59677734375, "grad_norm": 0.21513523161411285, "learning_rate": 0.00021169572936248726, "loss": 1.7591, "step": 12222 }, { "epoch": 0.596826171875, "grad_norm": 0.2657579481601715, "learning_rate": 0.00021166211742013424, "loss": 1.7888, "step": 12223 }, { "epoch": 0.596875, "grad_norm": 0.21928946673870087, "learning_rate": 0.00021162850701282582, "loss": 1.7542, "step": 12224 }, { "epoch": 0.596923828125, "grad_norm": 0.2497182935476303, "learning_rate": 0.00021159489814137633, "loss": 1.777, "step": 12225 }, { "epoch": 0.59697265625, "grad_norm": 0.23867164552211761, "learning_rate": 0.00021156129080660048, "loss": 1.7727, "step": 12226 }, { "epoch": 0.597021484375, "grad_norm": 0.2434527426958084, "learning_rate": 0.00021152768500931274, "loss": 1.7635, "step": 12227 }, { "epoch": 0.5970703125, "grad_norm": 0.23647437989711761, "learning_rate": 0.00021149408075032744, "loss": 1.7798, "step": 12228 }, { "epoch": 0.597119140625, "grad_norm": 0.2679004371166229, "learning_rate": 0.00021146047803045925, "loss": 1.7688, "step": 12229 }, { "epoch": 0.59716796875, "grad_norm": 0.2214994728565216, "learning_rate": 0.0002114268768505223, "loss": 1.7641, "step": 12230 }, { "epoch": 0.597216796875, "grad_norm": 0.2896694242954254, "learning_rate": 0.0002113932772113311, "loss": 1.7838, "step": 12231 }, { "epoch": 0.597265625, "grad_norm": 0.36591118574142456, "learning_rate": 0.00021135967911369992, "loss": 1.7914, "step": 12232 }, { "epoch": 0.597314453125, "grad_norm": 0.26308509707450867, "learning_rate": 0.0002113260825584431, "loss": 1.7852, "step": 12233 }, { "epoch": 0.59736328125, "grad_norm": 0.3121616244316101, "learning_rate": 0.00021129248754637477, "loss": 1.7595, "step": 12234 }, { "epoch": 0.597412109375, "grad_norm": 0.2999546527862549, "learning_rate": 0.00021125889407830922, "loss": 1.7622, "step": 12235 }, { "epoch": 0.5974609375, "grad_norm": 0.2185298651456833, "learning_rate": 0.00021122530215506057, "loss": 1.7493, "step": 12236 }, { "epoch": 0.597509765625, "grad_norm": 0.27293843030929565, "learning_rate": 0.00021119171177744295, "loss": 1.7715, "step": 12237 }, { "epoch": 0.59755859375, "grad_norm": 0.21631240844726562, "learning_rate": 0.00021115812294627051, "loss": 1.77, "step": 12238 }, { "epoch": 0.597607421875, "grad_norm": 0.29393941164016724, "learning_rate": 0.00021112453566235712, "loss": 1.7741, "step": 12239 }, { "epoch": 0.59765625, "grad_norm": 0.2608848512172699, "learning_rate": 0.00021109094992651707, "loss": 1.794, "step": 12240 }, { "epoch": 0.597705078125, "grad_norm": 0.32123708724975586, "learning_rate": 0.00021105736573956408, "loss": 1.8099, "step": 12241 }, { "epoch": 0.59775390625, "grad_norm": 0.29693925380706787, "learning_rate": 0.0002110237831023123, "loss": 1.7854, "step": 12242 }, { "epoch": 0.597802734375, "grad_norm": 0.2215283215045929, "learning_rate": 0.0002109902020155755, "loss": 1.7763, "step": 12243 }, { "epoch": 0.5978515625, "grad_norm": 0.2647745907306671, "learning_rate": 0.0002109566224801675, "loss": 1.7785, "step": 12244 }, { "epoch": 0.597900390625, "grad_norm": 0.216582790017128, "learning_rate": 0.00021092304449690235, "loss": 1.7759, "step": 12245 }, { "epoch": 0.59794921875, "grad_norm": 0.2871783971786499, "learning_rate": 0.00021088946806659353, "loss": 1.7643, "step": 12246 }, { "epoch": 0.597998046875, "grad_norm": 0.20256289839744568, "learning_rate": 0.0002108558931900551, "loss": 1.7846, "step": 12247 }, { "epoch": 0.598046875, "grad_norm": 0.2574317753314972, "learning_rate": 0.0002108223198681005, "loss": 1.7674, "step": 12248 }, { "epoch": 0.598095703125, "grad_norm": 0.21034710109233856, "learning_rate": 0.0002107887481015437, "loss": 1.7361, "step": 12249 }, { "epoch": 0.59814453125, "grad_norm": 0.2500321865081787, "learning_rate": 0.00021075517789119803, "loss": 1.7536, "step": 12250 }, { "epoch": 0.598193359375, "grad_norm": 0.24541229009628296, "learning_rate": 0.00021072160923787735, "loss": 1.7781, "step": 12251 }, { "epoch": 0.5982421875, "grad_norm": 0.25593745708465576, "learning_rate": 0.00021068804214239507, "loss": 1.7556, "step": 12252 }, { "epoch": 0.598291015625, "grad_norm": 0.24723279476165771, "learning_rate": 0.0002106544766055647, "loss": 1.7522, "step": 12253 }, { "epoch": 0.59833984375, "grad_norm": 0.2857297956943512, "learning_rate": 0.00021062091262819989, "loss": 1.755, "step": 12254 }, { "epoch": 0.598388671875, "grad_norm": 0.2855015695095062, "learning_rate": 0.00021058735021111386, "loss": 1.7719, "step": 12255 }, { "epoch": 0.5984375, "grad_norm": 0.2056790292263031, "learning_rate": 0.00021055378935512026, "loss": 1.7939, "step": 12256 }, { "epoch": 0.598486328125, "grad_norm": 0.2819485068321228, "learning_rate": 0.00021052023006103227, "loss": 1.7694, "step": 12257 }, { "epoch": 0.59853515625, "grad_norm": 0.23330774903297424, "learning_rate": 0.0002104866723296634, "loss": 1.7887, "step": 12258 }, { "epoch": 0.598583984375, "grad_norm": 0.22040094435214996, "learning_rate": 0.00021045311616182673, "loss": 1.7785, "step": 12259 }, { "epoch": 0.5986328125, "grad_norm": 0.23539108037948608, "learning_rate": 0.00021041956155833574, "loss": 1.7467, "step": 12260 }, { "epoch": 0.598681640625, "grad_norm": 0.2222210317850113, "learning_rate": 0.00021038600852000357, "loss": 1.7914, "step": 12261 }, { "epoch": 0.59873046875, "grad_norm": 0.23220936954021454, "learning_rate": 0.00021035245704764327, "loss": 1.8022, "step": 12262 }, { "epoch": 0.598779296875, "grad_norm": 0.22108162939548492, "learning_rate": 0.00021031890714206825, "loss": 1.7362, "step": 12263 }, { "epoch": 0.598828125, "grad_norm": 0.19851985573768616, "learning_rate": 0.00021028535880409133, "loss": 1.7691, "step": 12264 }, { "epoch": 0.598876953125, "grad_norm": 0.22957244515419006, "learning_rate": 0.00021025181203452591, "loss": 1.7824, "step": 12265 }, { "epoch": 0.59892578125, "grad_norm": 0.1993449330329895, "learning_rate": 0.0002102182668341847, "loss": 1.7654, "step": 12266 }, { "epoch": 0.598974609375, "grad_norm": 0.2131151705980301, "learning_rate": 0.00021018472320388093, "loss": 1.7632, "step": 12267 }, { "epoch": 0.5990234375, "grad_norm": 0.19562940299510956, "learning_rate": 0.0002101511811444274, "loss": 1.7613, "step": 12268 }, { "epoch": 0.599072265625, "grad_norm": 0.22666510939598083, "learning_rate": 0.00021011764065663712, "loss": 1.765, "step": 12269 }, { "epoch": 0.59912109375, "grad_norm": 0.23622751235961914, "learning_rate": 0.00021008410174132302, "loss": 1.7865, "step": 12270 }, { "epoch": 0.599169921875, "grad_norm": 0.21681472659111023, "learning_rate": 0.00021005056439929776, "loss": 1.7593, "step": 12271 }, { "epoch": 0.59921875, "grad_norm": 0.24858370423316956, "learning_rate": 0.00021001702863137435, "loss": 1.7778, "step": 12272 }, { "epoch": 0.599267578125, "grad_norm": 0.27138757705688477, "learning_rate": 0.0002099834944383653, "loss": 1.7733, "step": 12273 }, { "epoch": 0.59931640625, "grad_norm": 0.26550155878067017, "learning_rate": 0.00020994996182108363, "loss": 1.7819, "step": 12274 }, { "epoch": 0.599365234375, "grad_norm": 0.236598402261734, "learning_rate": 0.00020991643078034183, "loss": 1.7563, "step": 12275 }, { "epoch": 0.5994140625, "grad_norm": 0.27075496315956116, "learning_rate": 0.00020988290131695265, "loss": 1.7619, "step": 12276 }, { "epoch": 0.599462890625, "grad_norm": 0.23636861145496368, "learning_rate": 0.00020984937343172873, "loss": 1.7638, "step": 12277 }, { "epoch": 0.59951171875, "grad_norm": 0.2852316200733185, "learning_rate": 0.00020981584712548247, "loss": 1.7935, "step": 12278 }, { "epoch": 0.599560546875, "grad_norm": 0.22875240445137024, "learning_rate": 0.0002097823223990266, "loss": 1.7809, "step": 12279 }, { "epoch": 0.599609375, "grad_norm": 0.2750106453895569, "learning_rate": 0.00020974879925317346, "loss": 1.7902, "step": 12280 }, { "epoch": 0.599658203125, "grad_norm": 0.2372676581144333, "learning_rate": 0.00020971527768873571, "loss": 1.7746, "step": 12281 }, { "epoch": 0.59970703125, "grad_norm": 0.2506546974182129, "learning_rate": 0.00020968175770652553, "loss": 1.7666, "step": 12282 }, { "epoch": 0.599755859375, "grad_norm": 0.21703238785266876, "learning_rate": 0.00020964823930735556, "loss": 1.7838, "step": 12283 }, { "epoch": 0.5998046875, "grad_norm": 0.24637402594089508, "learning_rate": 0.00020961472249203788, "loss": 1.7769, "step": 12284 }, { "epoch": 0.599853515625, "grad_norm": 0.23550215363502502, "learning_rate": 0.00020958120726138508, "loss": 1.797, "step": 12285 }, { "epoch": 0.59990234375, "grad_norm": 0.2286653220653534, "learning_rate": 0.00020954769361620918, "loss": 1.7645, "step": 12286 }, { "epoch": 0.599951171875, "grad_norm": 0.23135589063167572, "learning_rate": 0.00020951418155732248, "loss": 1.7924, "step": 12287 }, { "epoch": 0.6, "grad_norm": 0.21573016047477722, "learning_rate": 0.0002094806710855373, "loss": 1.755, "step": 12288 }, { "epoch": 0.600048828125, "grad_norm": 0.21684764325618744, "learning_rate": 0.0002094471622016656, "loss": 1.7626, "step": 12289 }, { "epoch": 0.60009765625, "grad_norm": 0.26424458622932434, "learning_rate": 0.00020941365490651965, "loss": 1.8095, "step": 12290 }, { "epoch": 0.600146484375, "grad_norm": 0.22665347158908844, "learning_rate": 0.00020938014920091142, "loss": 1.7716, "step": 12291 }, { "epoch": 0.6001953125, "grad_norm": 0.2482175976037979, "learning_rate": 0.00020934664508565304, "loss": 1.7724, "step": 12292 }, { "epoch": 0.600244140625, "grad_norm": 0.22809715569019318, "learning_rate": 0.00020931314256155643, "loss": 1.7889, "step": 12293 }, { "epoch": 0.60029296875, "grad_norm": 0.266008198261261, "learning_rate": 0.00020927964162943357, "loss": 1.7863, "step": 12294 }, { "epoch": 0.600341796875, "grad_norm": 0.2231818437576294, "learning_rate": 0.00020924614229009646, "loss": 1.7896, "step": 12295 }, { "epoch": 0.600390625, "grad_norm": 0.22893524169921875, "learning_rate": 0.00020921264454435684, "loss": 1.7733, "step": 12296 }, { "epoch": 0.600439453125, "grad_norm": 0.2354292869567871, "learning_rate": 0.00020917914839302672, "loss": 1.761, "step": 12297 }, { "epoch": 0.60048828125, "grad_norm": 0.22838136553764343, "learning_rate": 0.0002091456538369177, "loss": 1.7823, "step": 12298 }, { "epoch": 0.600537109375, "grad_norm": 0.23071929812431335, "learning_rate": 0.00020911216087684176, "loss": 1.7781, "step": 12299 }, { "epoch": 0.6005859375, "grad_norm": 0.24498429894447327, "learning_rate": 0.00020907866951361042, "loss": 1.7625, "step": 12300 }, { "epoch": 0.600634765625, "grad_norm": 0.22657433152198792, "learning_rate": 0.00020904517974803567, "loss": 1.766, "step": 12301 }, { "epoch": 0.60068359375, "grad_norm": 0.2871914207935333, "learning_rate": 0.00020901169158092886, "loss": 1.7879, "step": 12302 }, { "epoch": 0.600732421875, "grad_norm": 0.31916573643684387, "learning_rate": 0.00020897820501310173, "loss": 1.7595, "step": 12303 }, { "epoch": 0.60078125, "grad_norm": 0.25080621242523193, "learning_rate": 0.00020894472004536586, "loss": 1.7592, "step": 12304 }, { "epoch": 0.600830078125, "grad_norm": 0.3079030215740204, "learning_rate": 0.00020891123667853272, "loss": 1.76, "step": 12305 }, { "epoch": 0.60087890625, "grad_norm": 0.26097023487091064, "learning_rate": 0.00020887775491341393, "loss": 1.7886, "step": 12306 }, { "epoch": 0.600927734375, "grad_norm": 0.2626073658466339, "learning_rate": 0.00020884427475082075, "loss": 1.7612, "step": 12307 }, { "epoch": 0.6009765625, "grad_norm": 0.2496766746044159, "learning_rate": 0.00020881079619156486, "loss": 1.7848, "step": 12308 }, { "epoch": 0.601025390625, "grad_norm": 0.27565622329711914, "learning_rate": 0.00020877731923645736, "loss": 1.7795, "step": 12309 }, { "epoch": 0.60107421875, "grad_norm": 0.27432993054389954, "learning_rate": 0.0002087438438863099, "loss": 1.7723, "step": 12310 }, { "epoch": 0.601123046875, "grad_norm": 0.23809044063091278, "learning_rate": 0.00020871037014193357, "loss": 1.7614, "step": 12311 }, { "epoch": 0.601171875, "grad_norm": 0.26825428009033203, "learning_rate": 0.0002086768980041396, "loss": 1.7853, "step": 12312 }, { "epoch": 0.601220703125, "grad_norm": 0.26155638694763184, "learning_rate": 0.00020864342747373932, "loss": 1.7667, "step": 12313 }, { "epoch": 0.60126953125, "grad_norm": 0.2820681929588318, "learning_rate": 0.00020860995855154387, "loss": 1.7738, "step": 12314 }, { "epoch": 0.601318359375, "grad_norm": 0.24579942226409912, "learning_rate": 0.00020857649123836442, "loss": 1.761, "step": 12315 }, { "epoch": 0.6013671875, "grad_norm": 0.3379468619823456, "learning_rate": 0.00020854302553501204, "loss": 1.7884, "step": 12316 }, { "epoch": 0.601416015625, "grad_norm": 0.21346525847911835, "learning_rate": 0.00020850956144229793, "loss": 1.7576, "step": 12317 }, { "epoch": 0.60146484375, "grad_norm": 0.30701184272766113, "learning_rate": 0.00020847609896103286, "loss": 1.7393, "step": 12318 }, { "epoch": 0.601513671875, "grad_norm": 0.20346488058567047, "learning_rate": 0.00020844263809202812, "loss": 1.7536, "step": 12319 }, { "epoch": 0.6015625, "grad_norm": 0.31953907012939453, "learning_rate": 0.00020840917883609456, "loss": 1.755, "step": 12320 }, { "epoch": 0.601611328125, "grad_norm": 0.2366883009672165, "learning_rate": 0.00020837572119404286, "loss": 1.7416, "step": 12321 }, { "epoch": 0.60166015625, "grad_norm": 0.3017655909061432, "learning_rate": 0.00020834226516668424, "loss": 1.7736, "step": 12322 }, { "epoch": 0.601708984375, "grad_norm": 0.19939135015010834, "learning_rate": 0.00020830881075482928, "loss": 1.7621, "step": 12323 }, { "epoch": 0.6017578125, "grad_norm": 0.2609228789806366, "learning_rate": 0.00020827535795928893, "loss": 1.7696, "step": 12324 }, { "epoch": 0.601806640625, "grad_norm": 0.24499835073947906, "learning_rate": 0.0002082419067808738, "loss": 1.7736, "step": 12325 }, { "epoch": 0.60185546875, "grad_norm": 0.2746361792087555, "learning_rate": 0.00020820845722039477, "loss": 1.7785, "step": 12326 }, { "epoch": 0.601904296875, "grad_norm": 0.2421962022781372, "learning_rate": 0.0002081750092786624, "loss": 1.783, "step": 12327 }, { "epoch": 0.601953125, "grad_norm": 0.2454439103603363, "learning_rate": 0.00020814156295648746, "loss": 1.7862, "step": 12328 }, { "epoch": 0.602001953125, "grad_norm": 0.3077840805053711, "learning_rate": 0.0002081081182546804, "loss": 1.7796, "step": 12329 }, { "epoch": 0.60205078125, "grad_norm": 0.27878129482269287, "learning_rate": 0.00020807467517405172, "loss": 1.7554, "step": 12330 }, { "epoch": 0.602099609375, "grad_norm": 0.22285549342632294, "learning_rate": 0.0002080412337154122, "loss": 1.7675, "step": 12331 }, { "epoch": 0.6021484375, "grad_norm": 0.3328329920768738, "learning_rate": 0.0002080077938795721, "loss": 1.7508, "step": 12332 }, { "epoch": 0.602197265625, "grad_norm": 0.22015029191970825, "learning_rate": 0.00020797435566734197, "loss": 1.7379, "step": 12333 }, { "epoch": 0.60224609375, "grad_norm": 0.28831592202186584, "learning_rate": 0.00020794091907953217, "loss": 1.7413, "step": 12334 }, { "epoch": 0.602294921875, "grad_norm": 0.2090214639902115, "learning_rate": 0.00020790748411695308, "loss": 1.7723, "step": 12335 }, { "epoch": 0.60234375, "grad_norm": 0.2753020226955414, "learning_rate": 0.00020787405078041504, "loss": 1.7847, "step": 12336 }, { "epoch": 0.602392578125, "grad_norm": 0.22767868638038635, "learning_rate": 0.0002078406190707282, "loss": 1.7679, "step": 12337 }, { "epoch": 0.60244140625, "grad_norm": 0.26406484842300415, "learning_rate": 0.00020780718898870303, "loss": 1.7589, "step": 12338 }, { "epoch": 0.602490234375, "grad_norm": 0.247236967086792, "learning_rate": 0.00020777376053514958, "loss": 1.7862, "step": 12339 }, { "epoch": 0.6025390625, "grad_norm": 0.256912499666214, "learning_rate": 0.0002077403337108781, "loss": 1.7764, "step": 12340 }, { "epoch": 0.602587890625, "grad_norm": 0.29154860973358154, "learning_rate": 0.00020770690851669855, "loss": 1.7519, "step": 12341 }, { "epoch": 0.60263671875, "grad_norm": 0.2565051317214966, "learning_rate": 0.00020767348495342119, "loss": 1.7652, "step": 12342 }, { "epoch": 0.602685546875, "grad_norm": 0.2806755006313324, "learning_rate": 0.000207640063021856, "loss": 1.7557, "step": 12343 }, { "epoch": 0.602734375, "grad_norm": 0.23444807529449463, "learning_rate": 0.00020760664272281303, "loss": 1.7508, "step": 12344 }, { "epoch": 0.602783203125, "grad_norm": 0.2499619424343109, "learning_rate": 0.0002075732240571022, "loss": 1.7708, "step": 12345 }, { "epoch": 0.60283203125, "grad_norm": 0.2510562539100647, "learning_rate": 0.0002075398070255335, "loss": 1.7607, "step": 12346 }, { "epoch": 0.602880859375, "grad_norm": 0.20702052116394043, "learning_rate": 0.0002075063916289167, "loss": 1.7803, "step": 12347 }, { "epoch": 0.6029296875, "grad_norm": 0.2234422266483307, "learning_rate": 0.00020747297786806175, "loss": 1.7801, "step": 12348 }, { "epoch": 0.602978515625, "grad_norm": 0.22547318041324615, "learning_rate": 0.00020743956574377848, "loss": 1.769, "step": 12349 }, { "epoch": 0.60302734375, "grad_norm": 0.259117066860199, "learning_rate": 0.00020740615525687657, "loss": 1.7904, "step": 12350 }, { "epoch": 0.603076171875, "grad_norm": 0.26744693517684937, "learning_rate": 0.0002073727464081658, "loss": 1.7603, "step": 12351 }, { "epoch": 0.603125, "grad_norm": 0.2163683921098709, "learning_rate": 0.00020733933919845577, "loss": 1.7469, "step": 12352 }, { "epoch": 0.603173828125, "grad_norm": 0.2786138355731964, "learning_rate": 0.00020730593362855632, "loss": 1.7591, "step": 12353 }, { "epoch": 0.60322265625, "grad_norm": 0.290287047624588, "learning_rate": 0.00020727252969927694, "loss": 1.7726, "step": 12354 }, { "epoch": 0.603271484375, "grad_norm": 0.2865011394023895, "learning_rate": 0.0002072391274114272, "loss": 1.7679, "step": 12355 }, { "epoch": 0.6033203125, "grad_norm": 0.23675143718719482, "learning_rate": 0.00020720572676581668, "loss": 1.7932, "step": 12356 }, { "epoch": 0.603369140625, "grad_norm": 0.2615421712398529, "learning_rate": 0.00020717232776325473, "loss": 1.761, "step": 12357 }, { "epoch": 0.60341796875, "grad_norm": 0.22322924435138702, "learning_rate": 0.000207138930404551, "loss": 1.7874, "step": 12358 }, { "epoch": 0.603466796875, "grad_norm": 0.22999215126037598, "learning_rate": 0.0002071055346905148, "loss": 1.7615, "step": 12359 }, { "epoch": 0.603515625, "grad_norm": 0.21555881202220917, "learning_rate": 0.0002070721406219555, "loss": 1.7863, "step": 12360 }, { "epoch": 0.603564453125, "grad_norm": 0.2494351714849472, "learning_rate": 0.00020703874819968243, "loss": 1.7532, "step": 12361 }, { "epoch": 0.60361328125, "grad_norm": 0.22840605676174164, "learning_rate": 0.00020700535742450494, "loss": 1.7727, "step": 12362 }, { "epoch": 0.603662109375, "grad_norm": 0.2555084228515625, "learning_rate": 0.00020697196829723218, "loss": 1.7889, "step": 12363 }, { "epoch": 0.6037109375, "grad_norm": 0.2203066200017929, "learning_rate": 0.00020693858081867345, "loss": 1.7818, "step": 12364 }, { "epoch": 0.603759765625, "grad_norm": 0.3006170988082886, "learning_rate": 0.0002069051949896379, "loss": 1.7832, "step": 12365 }, { "epoch": 0.60380859375, "grad_norm": 0.20873259007930756, "learning_rate": 0.00020687181081093463, "loss": 1.7663, "step": 12366 }, { "epoch": 0.603857421875, "grad_norm": 0.26375311613082886, "learning_rate": 0.00020683842828337278, "loss": 1.7592, "step": 12367 }, { "epoch": 0.60390625, "grad_norm": 0.2393151968717575, "learning_rate": 0.0002068050474077614, "loss": 1.7742, "step": 12368 }, { "epoch": 0.603955078125, "grad_norm": 0.2517048120498657, "learning_rate": 0.0002067716681849095, "loss": 1.7653, "step": 12369 }, { "epoch": 0.60400390625, "grad_norm": 0.25242480635643005, "learning_rate": 0.00020673829061562606, "loss": 1.7696, "step": 12370 }, { "epoch": 0.604052734375, "grad_norm": 0.2462947964668274, "learning_rate": 0.0002067049147007199, "loss": 1.77, "step": 12371 }, { "epoch": 0.6041015625, "grad_norm": 0.20672406256198883, "learning_rate": 0.00020667154044100002, "loss": 1.7875, "step": 12372 }, { "epoch": 0.604150390625, "grad_norm": 0.23288409411907196, "learning_rate": 0.00020663816783727523, "loss": 1.7574, "step": 12373 }, { "epoch": 0.60419921875, "grad_norm": 0.17868337035179138, "learning_rate": 0.0002066047968903544, "loss": 1.7809, "step": 12374 }, { "epoch": 0.604248046875, "grad_norm": 0.24392937123775482, "learning_rate": 0.00020657142760104624, "loss": 1.7831, "step": 12375 }, { "epoch": 0.604296875, "grad_norm": 0.21227122843265533, "learning_rate": 0.00020653805997015951, "loss": 1.7893, "step": 12376 }, { "epoch": 0.604345703125, "grad_norm": 0.1943565309047699, "learning_rate": 0.0002065046939985029, "loss": 1.7927, "step": 12377 }, { "epoch": 0.60439453125, "grad_norm": 0.19582433998584747, "learning_rate": 0.00020647132968688514, "loss": 1.7811, "step": 12378 }, { "epoch": 0.604443359375, "grad_norm": 0.2213718146085739, "learning_rate": 0.00020643796703611467, "loss": 1.7862, "step": 12379 }, { "epoch": 0.6044921875, "grad_norm": 0.20866189897060394, "learning_rate": 0.00020640460604700017, "loss": 1.7641, "step": 12380 }, { "epoch": 0.604541015625, "grad_norm": 0.20570267736911774, "learning_rate": 0.00020637124672035018, "loss": 1.7914, "step": 12381 }, { "epoch": 0.60458984375, "grad_norm": 0.23911604285240173, "learning_rate": 0.0002063378890569731, "loss": 1.7708, "step": 12382 }, { "epoch": 0.604638671875, "grad_norm": 0.23542508482933044, "learning_rate": 0.0002063045330576775, "loss": 1.7747, "step": 12383 }, { "epoch": 0.6046875, "grad_norm": 0.2222011387348175, "learning_rate": 0.00020627117872327173, "loss": 1.7609, "step": 12384 }, { "epoch": 0.604736328125, "grad_norm": 0.24862396717071533, "learning_rate": 0.00020623782605456415, "loss": 1.7768, "step": 12385 }, { "epoch": 0.60478515625, "grad_norm": 0.2890821397304535, "learning_rate": 0.00020620447505236307, "loss": 1.7568, "step": 12386 }, { "epoch": 0.604833984375, "grad_norm": 0.21096763014793396, "learning_rate": 0.00020617112571747688, "loss": 1.766, "step": 12387 }, { "epoch": 0.6048828125, "grad_norm": 0.2548447251319885, "learning_rate": 0.00020613777805071365, "loss": 1.7716, "step": 12388 }, { "epoch": 0.604931640625, "grad_norm": 0.2810945510864258, "learning_rate": 0.00020610443205288176, "loss": 1.8, "step": 12389 }, { "epoch": 0.60498046875, "grad_norm": 0.27876850962638855, "learning_rate": 0.00020607108772478926, "loss": 1.7706, "step": 12390 }, { "epoch": 0.605029296875, "grad_norm": 0.2317655235528946, "learning_rate": 0.00020603774506724437, "loss": 1.7742, "step": 12391 }, { "epoch": 0.605078125, "grad_norm": 0.2822425961494446, "learning_rate": 0.00020600440408105513, "loss": 1.7501, "step": 12392 }, { "epoch": 0.605126953125, "grad_norm": 0.24822551012039185, "learning_rate": 0.0002059710647670296, "loss": 1.7786, "step": 12393 }, { "epoch": 0.60517578125, "grad_norm": 0.21822234988212585, "learning_rate": 0.00020593772712597575, "loss": 1.7491, "step": 12394 }, { "epoch": 0.605224609375, "grad_norm": 0.25776997208595276, "learning_rate": 0.00020590439115870157, "loss": 1.7775, "step": 12395 }, { "epoch": 0.6052734375, "grad_norm": 0.20677019655704498, "learning_rate": 0.00020587105686601493, "loss": 1.8036, "step": 12396 }, { "epoch": 0.605322265625, "grad_norm": 0.22201190888881683, "learning_rate": 0.00020583772424872387, "loss": 1.7654, "step": 12397 }, { "epoch": 0.60537109375, "grad_norm": 0.19982200860977173, "learning_rate": 0.00020580439330763605, "loss": 1.7667, "step": 12398 }, { "epoch": 0.605419921875, "grad_norm": 0.20294004678726196, "learning_rate": 0.00020577106404355937, "loss": 1.7725, "step": 12399 }, { "epoch": 0.60546875, "grad_norm": 0.2611319422721863, "learning_rate": 0.00020573773645730158, "loss": 1.7815, "step": 12400 }, { "epoch": 0.605517578125, "grad_norm": 0.23115618526935577, "learning_rate": 0.00020570441054967037, "loss": 1.7651, "step": 12401 }, { "epoch": 0.60556640625, "grad_norm": 0.24415114521980286, "learning_rate": 0.00020567108632147348, "loss": 1.8031, "step": 12402 }, { "epoch": 0.605615234375, "grad_norm": 0.24711143970489502, "learning_rate": 0.00020563776377351844, "loss": 1.7675, "step": 12403 }, { "epoch": 0.6056640625, "grad_norm": 0.19958558678627014, "learning_rate": 0.00020560444290661302, "loss": 1.7732, "step": 12404 }, { "epoch": 0.605712890625, "grad_norm": 0.2788539528846741, "learning_rate": 0.00020557112372156456, "loss": 1.8007, "step": 12405 }, { "epoch": 0.60576171875, "grad_norm": 0.2935912013053894, "learning_rate": 0.00020553780621918085, "loss": 1.7631, "step": 12406 }, { "epoch": 0.605810546875, "grad_norm": 0.2620902359485626, "learning_rate": 0.00020550449040026907, "loss": 1.7537, "step": 12407 }, { "epoch": 0.605859375, "grad_norm": 0.2204219400882721, "learning_rate": 0.0002054711762656369, "loss": 1.782, "step": 12408 }, { "epoch": 0.605908203125, "grad_norm": 0.34460580348968506, "learning_rate": 0.00020543786381609154, "loss": 1.77, "step": 12409 }, { "epoch": 0.60595703125, "grad_norm": 0.279707133769989, "learning_rate": 0.00020540455305244045, "loss": 1.7783, "step": 12410 }, { "epoch": 0.606005859375, "grad_norm": 0.22602395713329315, "learning_rate": 0.00020537124397549094, "loss": 1.7715, "step": 12411 }, { "epoch": 0.6060546875, "grad_norm": 0.29730361700057983, "learning_rate": 0.00020533793658605032, "loss": 1.756, "step": 12412 }, { "epoch": 0.606103515625, "grad_norm": 0.21235983073711395, "learning_rate": 0.00020530463088492575, "loss": 1.7691, "step": 12413 }, { "epoch": 0.60615234375, "grad_norm": 0.24229733645915985, "learning_rate": 0.00020527132687292443, "loss": 1.7521, "step": 12414 }, { "epoch": 0.606201171875, "grad_norm": 0.22333140671253204, "learning_rate": 0.00020523802455085349, "loss": 1.7862, "step": 12415 }, { "epoch": 0.60625, "grad_norm": 0.25012171268463135, "learning_rate": 0.0002052047239195201, "loss": 1.7403, "step": 12416 }, { "epoch": 0.606298828125, "grad_norm": 0.2540988326072693, "learning_rate": 0.00020517142497973134, "loss": 1.7627, "step": 12417 }, { "epoch": 0.60634765625, "grad_norm": 0.23093806207180023, "learning_rate": 0.00020513812773229417, "loss": 1.7927, "step": 12418 }, { "epoch": 0.606396484375, "grad_norm": 0.2639654576778412, "learning_rate": 0.00020510483217801558, "loss": 1.7883, "step": 12419 }, { "epoch": 0.6064453125, "grad_norm": 0.22604341804981232, "learning_rate": 0.0002050715383177026, "loss": 1.7806, "step": 12420 }, { "epoch": 0.606494140625, "grad_norm": 0.25233525037765503, "learning_rate": 0.000205038246152162, "loss": 1.7792, "step": 12421 }, { "epoch": 0.60654296875, "grad_norm": 0.2433248907327652, "learning_rate": 0.00020500495568220073, "loss": 1.7794, "step": 12422 }, { "epoch": 0.606591796875, "grad_norm": 0.24321019649505615, "learning_rate": 0.00020497166690862557, "loss": 1.7569, "step": 12423 }, { "epoch": 0.606640625, "grad_norm": 0.2746041715145111, "learning_rate": 0.00020493837983224334, "loss": 1.75, "step": 12424 }, { "epoch": 0.606689453125, "grad_norm": 0.2655218839645386, "learning_rate": 0.00020490509445386074, "loss": 1.7797, "step": 12425 }, { "epoch": 0.60673828125, "grad_norm": 0.2955166697502136, "learning_rate": 0.0002048718107742845, "loss": 1.7774, "step": 12426 }, { "epoch": 0.606787109375, "grad_norm": 0.2663581669330597, "learning_rate": 0.00020483852879432137, "loss": 1.7786, "step": 12427 }, { "epoch": 0.6068359375, "grad_norm": 0.2923806309700012, "learning_rate": 0.00020480524851477773, "loss": 1.7512, "step": 12428 }, { "epoch": 0.606884765625, "grad_norm": 0.3269783854484558, "learning_rate": 0.00020477196993646035, "loss": 1.7911, "step": 12429 }, { "epoch": 0.60693359375, "grad_norm": 0.28859007358551025, "learning_rate": 0.00020473869306017562, "loss": 1.7623, "step": 12430 }, { "epoch": 0.606982421875, "grad_norm": 0.28950417041778564, "learning_rate": 0.0002047054178867302, "loss": 1.7491, "step": 12431 }, { "epoch": 0.60703125, "grad_norm": 0.23816226422786713, "learning_rate": 0.00020467214441693038, "loss": 1.7508, "step": 12432 }, { "epoch": 0.607080078125, "grad_norm": 0.3239651918411255, "learning_rate": 0.0002046388726515827, "loss": 1.7789, "step": 12433 }, { "epoch": 0.60712890625, "grad_norm": 0.22701725363731384, "learning_rate": 0.00020460560259149342, "loss": 1.7643, "step": 12434 }, { "epoch": 0.607177734375, "grad_norm": 0.3059033751487732, "learning_rate": 0.000204572334237469, "loss": 1.7713, "step": 12435 }, { "epoch": 0.6072265625, "grad_norm": 0.25343966484069824, "learning_rate": 0.00020453906759031553, "loss": 1.764, "step": 12436 }, { "epoch": 0.607275390625, "grad_norm": 0.29498130083084106, "learning_rate": 0.00020450580265083946, "loss": 1.7495, "step": 12437 }, { "epoch": 0.60732421875, "grad_norm": 0.2572307586669922, "learning_rate": 0.0002044725394198469, "loss": 1.77, "step": 12438 }, { "epoch": 0.607373046875, "grad_norm": 0.3266981542110443, "learning_rate": 0.00020443927789814392, "loss": 1.7599, "step": 12439 }, { "epoch": 0.607421875, "grad_norm": 0.23352833092212677, "learning_rate": 0.0002044060180865368, "loss": 1.7796, "step": 12440 }, { "epoch": 0.607470703125, "grad_norm": 0.3129470944404602, "learning_rate": 0.00020437275998583143, "loss": 1.7554, "step": 12441 }, { "epoch": 0.60751953125, "grad_norm": 0.3007442057132721, "learning_rate": 0.00020433950359683412, "loss": 1.7699, "step": 12442 }, { "epoch": 0.607568359375, "grad_norm": 0.2567848861217499, "learning_rate": 0.0002043062489203506, "loss": 1.7789, "step": 12443 }, { "epoch": 0.6076171875, "grad_norm": 0.2957272231578827, "learning_rate": 0.000204272995957187, "loss": 1.7798, "step": 12444 }, { "epoch": 0.607666015625, "grad_norm": 0.21637167036533356, "learning_rate": 0.0002042397447081491, "loss": 1.7871, "step": 12445 }, { "epoch": 0.60771484375, "grad_norm": 0.31194302439689636, "learning_rate": 0.00020420649517404293, "loss": 1.785, "step": 12446 }, { "epoch": 0.607763671875, "grad_norm": 0.2306896448135376, "learning_rate": 0.00020417324735567423, "loss": 1.7618, "step": 12447 }, { "epoch": 0.6078125, "grad_norm": 0.2862534523010254, "learning_rate": 0.00020414000125384862, "loss": 1.7621, "step": 12448 }, { "epoch": 0.607861328125, "grad_norm": 0.2325669825077057, "learning_rate": 0.0002041067568693722, "loss": 1.7686, "step": 12449 }, { "epoch": 0.60791015625, "grad_norm": 0.2889689803123474, "learning_rate": 0.00020407351420305032, "loss": 1.7858, "step": 12450 }, { "epoch": 0.607958984375, "grad_norm": 0.23885776102542877, "learning_rate": 0.00020404027325568891, "loss": 1.7709, "step": 12451 }, { "epoch": 0.6080078125, "grad_norm": 0.23070406913757324, "learning_rate": 0.00020400703402809339, "loss": 1.7766, "step": 12452 }, { "epoch": 0.608056640625, "grad_norm": 0.2267468422651291, "learning_rate": 0.00020397379652106957, "loss": 1.7655, "step": 12453 }, { "epoch": 0.60810546875, "grad_norm": 0.2584117650985718, "learning_rate": 0.00020394056073542283, "loss": 1.7573, "step": 12454 }, { "epoch": 0.608154296875, "grad_norm": 0.2745421230792999, "learning_rate": 0.0002039073266719586, "loss": 1.8109, "step": 12455 }, { "epoch": 0.608203125, "grad_norm": 0.21182820200920105, "learning_rate": 0.00020387409433148245, "loss": 1.7556, "step": 12456 }, { "epoch": 0.608251953125, "grad_norm": 0.26510414481163025, "learning_rate": 0.00020384086371479977, "loss": 1.7589, "step": 12457 }, { "epoch": 0.60830078125, "grad_norm": 0.2664206027984619, "learning_rate": 0.0002038076348227159, "loss": 1.7707, "step": 12458 }, { "epoch": 0.608349609375, "grad_norm": 0.23838889598846436, "learning_rate": 0.00020377440765603622, "loss": 1.7877, "step": 12459 }, { "epoch": 0.6083984375, "grad_norm": 0.28615614771842957, "learning_rate": 0.00020374118221556603, "loss": 1.7335, "step": 12460 }, { "epoch": 0.608447265625, "grad_norm": 0.21151334047317505, "learning_rate": 0.0002037079585021105, "loss": 1.7758, "step": 12461 }, { "epoch": 0.60849609375, "grad_norm": 0.2602817118167877, "learning_rate": 0.00020367473651647489, "loss": 1.7628, "step": 12462 }, { "epoch": 0.608544921875, "grad_norm": 0.20750728249549866, "learning_rate": 0.00020364151625946436, "loss": 1.7606, "step": 12463 }, { "epoch": 0.60859375, "grad_norm": 0.27574023604393005, "learning_rate": 0.000203608297731884, "loss": 1.762, "step": 12464 }, { "epoch": 0.608642578125, "grad_norm": 0.22545666992664337, "learning_rate": 0.0002035750809345389, "loss": 1.7701, "step": 12465 }, { "epoch": 0.60869140625, "grad_norm": 0.2686530351638794, "learning_rate": 0.00020354186586823403, "loss": 1.7899, "step": 12466 }, { "epoch": 0.608740234375, "grad_norm": 0.21037939190864563, "learning_rate": 0.00020350865253377455, "loss": 1.7501, "step": 12467 }, { "epoch": 0.6087890625, "grad_norm": 0.27445030212402344, "learning_rate": 0.00020347544093196517, "loss": 1.7654, "step": 12468 }, { "epoch": 0.608837890625, "grad_norm": 0.22951021790504456, "learning_rate": 0.00020344223106361108, "loss": 1.7544, "step": 12469 }, { "epoch": 0.60888671875, "grad_norm": 0.2471674382686615, "learning_rate": 0.00020340902292951697, "loss": 1.7689, "step": 12470 }, { "epoch": 0.608935546875, "grad_norm": 0.2638282775878906, "learning_rate": 0.0002033758165304877, "loss": 1.7782, "step": 12471 }, { "epoch": 0.608984375, "grad_norm": 0.2474975734949112, "learning_rate": 0.00020334261186732812, "loss": 1.7753, "step": 12472 }, { "epoch": 0.609033203125, "grad_norm": 0.24742399156093597, "learning_rate": 0.00020330940894084276, "loss": 1.7668, "step": 12473 }, { "epoch": 0.60908203125, "grad_norm": 0.23942798376083374, "learning_rate": 0.00020327620775183663, "loss": 1.7511, "step": 12474 }, { "epoch": 0.609130859375, "grad_norm": 0.29437729716300964, "learning_rate": 0.0002032430083011141, "loss": 1.7829, "step": 12475 }, { "epoch": 0.6091796875, "grad_norm": 0.22615578770637512, "learning_rate": 0.00020320981058948002, "loss": 1.7585, "step": 12476 }, { "epoch": 0.609228515625, "grad_norm": 0.28518518805503845, "learning_rate": 0.00020317661461773874, "loss": 1.7371, "step": 12477 }, { "epoch": 0.60927734375, "grad_norm": 0.21588289737701416, "learning_rate": 0.00020314342038669502, "loss": 1.7593, "step": 12478 }, { "epoch": 0.609326171875, "grad_norm": 0.25683704018592834, "learning_rate": 0.00020311022789715321, "loss": 1.7605, "step": 12479 }, { "epoch": 0.609375, "grad_norm": 0.20034821331501007, "learning_rate": 0.0002030770371499177, "loss": 1.7545, "step": 12480 }, { "epoch": 0.609423828125, "grad_norm": 0.27155807614326477, "learning_rate": 0.00020304384814579307, "loss": 1.7808, "step": 12481 }, { "epoch": 0.60947265625, "grad_norm": 0.19807974994182587, "learning_rate": 0.0002030106608855835, "loss": 1.7661, "step": 12482 }, { "epoch": 0.609521484375, "grad_norm": 0.2609575390815735, "learning_rate": 0.00020297747537009354, "loss": 1.7659, "step": 12483 }, { "epoch": 0.6095703125, "grad_norm": 0.19220991432666779, "learning_rate": 0.00020294429160012717, "loss": 1.7707, "step": 12484 }, { "epoch": 0.609619140625, "grad_norm": 0.24973931908607483, "learning_rate": 0.0002029111095764889, "loss": 1.7853, "step": 12485 }, { "epoch": 0.60966796875, "grad_norm": 0.2171476185321808, "learning_rate": 0.0002028779292999827, "loss": 1.775, "step": 12486 }, { "epoch": 0.609716796875, "grad_norm": 0.2872629463672638, "learning_rate": 0.00020284475077141296, "loss": 1.737, "step": 12487 }, { "epoch": 0.609765625, "grad_norm": 0.2666846215724945, "learning_rate": 0.00020281157399158363, "loss": 1.7482, "step": 12488 }, { "epoch": 0.609814453125, "grad_norm": 0.24346928298473358, "learning_rate": 0.00020277839896129868, "loss": 1.7622, "step": 12489 }, { "epoch": 0.60986328125, "grad_norm": 0.26143065094947815, "learning_rate": 0.00020274522568136232, "loss": 1.7888, "step": 12490 }, { "epoch": 0.609912109375, "grad_norm": 0.288568377494812, "learning_rate": 0.00020271205415257844, "loss": 1.7512, "step": 12491 }, { "epoch": 0.6099609375, "grad_norm": 0.26803719997406006, "learning_rate": 0.00020267888437575104, "loss": 1.7662, "step": 12492 }, { "epoch": 0.610009765625, "grad_norm": 0.2596552073955536, "learning_rate": 0.00020264571635168394, "loss": 1.7645, "step": 12493 }, { "epoch": 0.61005859375, "grad_norm": 0.2655732035636902, "learning_rate": 0.00020261255008118112, "loss": 1.7729, "step": 12494 }, { "epoch": 0.610107421875, "grad_norm": 0.22144940495491028, "learning_rate": 0.00020257938556504619, "loss": 1.764, "step": 12495 }, { "epoch": 0.61015625, "grad_norm": 0.2434731274843216, "learning_rate": 0.00020254622280408313, "loss": 1.7643, "step": 12496 }, { "epoch": 0.610205078125, "grad_norm": 0.23353102803230286, "learning_rate": 0.00020251306179909558, "loss": 1.777, "step": 12497 }, { "epoch": 0.61025390625, "grad_norm": 0.2225208431482315, "learning_rate": 0.00020247990255088712, "loss": 1.7706, "step": 12498 }, { "epoch": 0.610302734375, "grad_norm": 0.21875669062137604, "learning_rate": 0.00020244674506026157, "loss": 1.785, "step": 12499 }, { "epoch": 0.6103515625, "grad_norm": 0.24522189795970917, "learning_rate": 0.00020241358932802234, "loss": 1.7633, "step": 12500 }, { "epoch": 0.610400390625, "grad_norm": 0.22085466980934143, "learning_rate": 0.00020238043535497312, "loss": 1.7789, "step": 12501 }, { "epoch": 0.61044921875, "grad_norm": 0.23632286489009857, "learning_rate": 0.0002023472831419174, "loss": 1.7572, "step": 12502 }, { "epoch": 0.610498046875, "grad_norm": 0.23794947564601898, "learning_rate": 0.0002023141326896587, "loss": 1.7814, "step": 12503 }, { "epoch": 0.610546875, "grad_norm": 0.21557694673538208, "learning_rate": 0.0002022809839990003, "loss": 1.7744, "step": 12504 }, { "epoch": 0.610595703125, "grad_norm": 0.26293322443962097, "learning_rate": 0.0002022478370707458, "loss": 1.7732, "step": 12505 }, { "epoch": 0.61064453125, "grad_norm": 0.2445235550403595, "learning_rate": 0.00020221469190569836, "loss": 1.7789, "step": 12506 }, { "epoch": 0.610693359375, "grad_norm": 0.2643339931964874, "learning_rate": 0.00020218154850466124, "loss": 1.7695, "step": 12507 }, { "epoch": 0.6107421875, "grad_norm": 0.2694852948188782, "learning_rate": 0.0002021484068684379, "loss": 1.7659, "step": 12508 }, { "epoch": 0.610791015625, "grad_norm": 0.2233901023864746, "learning_rate": 0.0002021152669978314, "loss": 1.7768, "step": 12509 }, { "epoch": 0.61083984375, "grad_norm": 0.2590900957584381, "learning_rate": 0.00020208212889364496, "loss": 1.7657, "step": 12510 }, { "epoch": 0.610888671875, "grad_norm": 0.22046463191509247, "learning_rate": 0.00020204899255668168, "loss": 1.7756, "step": 12511 }, { "epoch": 0.6109375, "grad_norm": 0.2971838116645813, "learning_rate": 0.0002020158579877448, "loss": 1.7445, "step": 12512 }, { "epoch": 0.610986328125, "grad_norm": 0.24558061361312866, "learning_rate": 0.00020198272518763717, "loss": 1.7739, "step": 12513 }, { "epoch": 0.61103515625, "grad_norm": 0.23425592482089996, "learning_rate": 0.0002019495941571618, "loss": 1.7788, "step": 12514 }, { "epoch": 0.611083984375, "grad_norm": 0.29549041390419006, "learning_rate": 0.00020191646489712179, "loss": 1.7273, "step": 12515 }, { "epoch": 0.6111328125, "grad_norm": 0.23588842153549194, "learning_rate": 0.00020188333740831988, "loss": 1.7505, "step": 12516 }, { "epoch": 0.611181640625, "grad_norm": 0.2320224493741989, "learning_rate": 0.00020185021169155909, "loss": 1.752, "step": 12517 }, { "epoch": 0.61123046875, "grad_norm": 0.273396760225296, "learning_rate": 0.00020181708774764213, "loss": 1.7891, "step": 12518 }, { "epoch": 0.611279296875, "grad_norm": 0.1939869374036789, "learning_rate": 0.00020178396557737194, "loss": 1.7741, "step": 12519 }, { "epoch": 0.611328125, "grad_norm": 0.26337653398513794, "learning_rate": 0.00020175084518155107, "loss": 1.7776, "step": 12520 }, { "epoch": 0.611376953125, "grad_norm": 0.2348279356956482, "learning_rate": 0.00020171772656098235, "loss": 1.7813, "step": 12521 }, { "epoch": 0.61142578125, "grad_norm": 0.2536207139492035, "learning_rate": 0.00020168460971646846, "loss": 1.7435, "step": 12522 }, { "epoch": 0.611474609375, "grad_norm": 0.23058675229549408, "learning_rate": 0.00020165149464881188, "loss": 1.7693, "step": 12523 }, { "epoch": 0.6115234375, "grad_norm": 0.24116596579551697, "learning_rate": 0.00020161838135881527, "loss": 1.793, "step": 12524 }, { "epoch": 0.611572265625, "grad_norm": 0.22631913423538208, "learning_rate": 0.0002015852698472811, "loss": 1.7598, "step": 12525 }, { "epoch": 0.61162109375, "grad_norm": 0.24386200308799744, "learning_rate": 0.00020155216011501199, "loss": 1.7857, "step": 12526 }, { "epoch": 0.611669921875, "grad_norm": 0.23097720742225647, "learning_rate": 0.0002015190521628102, "loss": 1.7789, "step": 12527 }, { "epoch": 0.61171875, "grad_norm": 0.23046940565109253, "learning_rate": 0.0002014859459914783, "loss": 1.7816, "step": 12528 }, { "epoch": 0.611767578125, "grad_norm": 0.24587836861610413, "learning_rate": 0.00020145284160181842, "loss": 1.7532, "step": 12529 }, { "epoch": 0.61181640625, "grad_norm": 0.2779623568058014, "learning_rate": 0.00020141973899463316, "loss": 1.7587, "step": 12530 }, { "epoch": 0.611865234375, "grad_norm": 0.2187386453151703, "learning_rate": 0.0002013866381707246, "loss": 1.7605, "step": 12531 }, { "epoch": 0.6119140625, "grad_norm": 0.24882368743419647, "learning_rate": 0.000201353539130895, "loss": 1.7685, "step": 12532 }, { "epoch": 0.611962890625, "grad_norm": 0.3306308090686798, "learning_rate": 0.00020132044187594656, "loss": 1.7943, "step": 12533 }, { "epoch": 0.61201171875, "grad_norm": 0.2454017698764801, "learning_rate": 0.00020128734640668128, "loss": 1.7501, "step": 12534 }, { "epoch": 0.612060546875, "grad_norm": 0.30816206336021423, "learning_rate": 0.0002012542527239016, "loss": 1.7711, "step": 12535 }, { "epoch": 0.612109375, "grad_norm": 0.2785518169403076, "learning_rate": 0.00020122116082840912, "loss": 1.7703, "step": 12536 }, { "epoch": 0.612158203125, "grad_norm": 0.28952065110206604, "learning_rate": 0.0002011880707210062, "loss": 1.7882, "step": 12537 }, { "epoch": 0.61220703125, "grad_norm": 0.3179168701171875, "learning_rate": 0.00020115498240249474, "loss": 1.7863, "step": 12538 }, { "epoch": 0.612255859375, "grad_norm": 0.2735767066478729, "learning_rate": 0.00020112189587367653, "loss": 1.7753, "step": 12539 }, { "epoch": 0.6123046875, "grad_norm": 0.2617149353027344, "learning_rate": 0.00020108881113535356, "loss": 1.7613, "step": 12540 }, { "epoch": 0.612353515625, "grad_norm": 0.22823520004749298, "learning_rate": 0.00020105572818832757, "loss": 1.7848, "step": 12541 }, { "epoch": 0.61240234375, "grad_norm": 0.27739399671554565, "learning_rate": 0.00020102264703340052, "loss": 1.7603, "step": 12542 }, { "epoch": 0.612451171875, "grad_norm": 0.23446720838546753, "learning_rate": 0.00020098956767137388, "loss": 1.7761, "step": 12543 }, { "epoch": 0.6125, "grad_norm": 0.25586894154548645, "learning_rate": 0.00020095649010304966, "loss": 1.7412, "step": 12544 }, { "epoch": 0.612548828125, "grad_norm": 0.26816338300704956, "learning_rate": 0.00020092341432922933, "loss": 1.7917, "step": 12545 }, { "epoch": 0.61259765625, "grad_norm": 0.23485760390758514, "learning_rate": 0.0002008903403507147, "loss": 1.7733, "step": 12546 }, { "epoch": 0.612646484375, "grad_norm": 0.2520756423473358, "learning_rate": 0.00020085726816830712, "loss": 1.7454, "step": 12547 }, { "epoch": 0.6126953125, "grad_norm": 0.26137179136276245, "learning_rate": 0.00020082419778280814, "loss": 1.7535, "step": 12548 }, { "epoch": 0.612744140625, "grad_norm": 0.26588767766952515, "learning_rate": 0.0002007911291950194, "loss": 1.7728, "step": 12549 }, { "epoch": 0.61279296875, "grad_norm": 0.27878740429878235, "learning_rate": 0.00020075806240574224, "loss": 1.75, "step": 12550 }, { "epoch": 0.612841796875, "grad_norm": 0.23237527906894684, "learning_rate": 0.0002007249974157781, "loss": 1.784, "step": 12551 }, { "epoch": 0.612890625, "grad_norm": 0.2882082164287567, "learning_rate": 0.00020069193422592825, "loss": 1.7666, "step": 12552 }, { "epoch": 0.612939453125, "grad_norm": 0.2590721845626831, "learning_rate": 0.00020065887283699418, "loss": 1.7916, "step": 12553 }, { "epoch": 0.61298828125, "grad_norm": 0.2605624496936798, "learning_rate": 0.00020062581324977697, "loss": 1.7546, "step": 12554 }, { "epoch": 0.613037109375, "grad_norm": 0.3129740357398987, "learning_rate": 0.00020059275546507798, "loss": 1.7693, "step": 12555 }, { "epoch": 0.6130859375, "grad_norm": 0.32363972067832947, "learning_rate": 0.00020055969948369834, "loss": 1.7715, "step": 12556 }, { "epoch": 0.613134765625, "grad_norm": 0.24913322925567627, "learning_rate": 0.00020052664530643911, "loss": 1.7545, "step": 12557 }, { "epoch": 0.61318359375, "grad_norm": 0.32119420170783997, "learning_rate": 0.00020049359293410158, "loss": 1.759, "step": 12558 }, { "epoch": 0.613232421875, "grad_norm": 0.2750900983810425, "learning_rate": 0.00020046054236748658, "loss": 1.7722, "step": 12559 }, { "epoch": 0.61328125, "grad_norm": 0.3398222327232361, "learning_rate": 0.00020042749360739526, "loss": 1.745, "step": 12560 }, { "epoch": 0.613330078125, "grad_norm": 0.2589671313762665, "learning_rate": 0.00020039444665462852, "loss": 1.7607, "step": 12561 }, { "epoch": 0.61337890625, "grad_norm": 0.34180811047554016, "learning_rate": 0.00020036140150998734, "loss": 1.7651, "step": 12562 }, { "epoch": 0.613427734375, "grad_norm": 0.2688341438770294, "learning_rate": 0.00020032835817427263, "loss": 1.7461, "step": 12563 }, { "epoch": 0.6134765625, "grad_norm": 0.32850223779678345, "learning_rate": 0.00020029531664828498, "loss": 1.7683, "step": 12564 }, { "epoch": 0.613525390625, "grad_norm": 0.2587418854236603, "learning_rate": 0.0002002622769328255, "loss": 1.7745, "step": 12565 }, { "epoch": 0.61357421875, "grad_norm": 0.29360559582710266, "learning_rate": 0.00020022923902869466, "loss": 1.7444, "step": 12566 }, { "epoch": 0.613623046875, "grad_norm": 0.24653391540050507, "learning_rate": 0.0002001962029366934, "loss": 1.7763, "step": 12567 }, { "epoch": 0.613671875, "grad_norm": 0.2604391872882843, "learning_rate": 0.00020016316865762213, "loss": 1.7538, "step": 12568 }, { "epoch": 0.613720703125, "grad_norm": 0.22025343775749207, "learning_rate": 0.0002001301361922817, "loss": 1.7464, "step": 12569 }, { "epoch": 0.61376953125, "grad_norm": 0.22323499619960785, "learning_rate": 0.00020009710554147247, "loss": 1.762, "step": 12570 }, { "epoch": 0.613818359375, "grad_norm": 0.22895057499408722, "learning_rate": 0.00020006407670599514, "loss": 1.7435, "step": 12571 }, { "epoch": 0.6138671875, "grad_norm": 0.2325063943862915, "learning_rate": 0.00020003104968665015, "loss": 1.7805, "step": 12572 }, { "epoch": 0.613916015625, "grad_norm": 0.22439375519752502, "learning_rate": 0.00019999802448423783, "loss": 1.7758, "step": 12573 }, { "epoch": 0.61396484375, "grad_norm": 0.21151591837406158, "learning_rate": 0.00019996500109955868, "loss": 1.7408, "step": 12574 }, { "epoch": 0.614013671875, "grad_norm": 0.2249910831451416, "learning_rate": 0.0001999319795334129, "loss": 1.7835, "step": 12575 }, { "epoch": 0.6140625, "grad_norm": 0.21279776096343994, "learning_rate": 0.00019989895978660106, "loss": 1.7687, "step": 12576 }, { "epoch": 0.614111328125, "grad_norm": 0.23802299797534943, "learning_rate": 0.00019986594185992314, "loss": 1.7926, "step": 12577 }, { "epoch": 0.61416015625, "grad_norm": 0.2489277869462967, "learning_rate": 0.0001998329257541796, "loss": 1.7604, "step": 12578 }, { "epoch": 0.614208984375, "grad_norm": 0.21707145869731903, "learning_rate": 0.00019979991147017035, "loss": 1.7663, "step": 12579 }, { "epoch": 0.6142578125, "grad_norm": 0.23734994232654572, "learning_rate": 0.00019976689900869576, "loss": 1.7761, "step": 12580 }, { "epoch": 0.614306640625, "grad_norm": 0.2483769655227661, "learning_rate": 0.0001997338883705559, "loss": 1.8122, "step": 12581 }, { "epoch": 0.61435546875, "grad_norm": 0.20402592420578003, "learning_rate": 0.00019970087955655053, "loss": 1.7335, "step": 12582 }, { "epoch": 0.614404296875, "grad_norm": 0.24651774764060974, "learning_rate": 0.00019966787256748, "loss": 1.7664, "step": 12583 }, { "epoch": 0.614453125, "grad_norm": 0.2181820273399353, "learning_rate": 0.00019963486740414395, "loss": 1.7764, "step": 12584 }, { "epoch": 0.614501953125, "grad_norm": 0.27131155133247375, "learning_rate": 0.00019960186406734254, "loss": 1.7559, "step": 12585 }, { "epoch": 0.61455078125, "grad_norm": 0.26859280467033386, "learning_rate": 0.00019956886255787548, "loss": 1.7741, "step": 12586 }, { "epoch": 0.614599609375, "grad_norm": 0.27488675713539124, "learning_rate": 0.00019953586287654269, "loss": 1.7728, "step": 12587 }, { "epoch": 0.6146484375, "grad_norm": 0.28912457823753357, "learning_rate": 0.00019950286502414384, "loss": 1.7594, "step": 12588 }, { "epoch": 0.614697265625, "grad_norm": 0.2661849558353424, "learning_rate": 0.00019946986900147874, "loss": 1.7543, "step": 12589 }, { "epoch": 0.61474609375, "grad_norm": 0.26180049777030945, "learning_rate": 0.00019943687480934708, "loss": 1.7623, "step": 12590 }, { "epoch": 0.614794921875, "grad_norm": 0.29350629448890686, "learning_rate": 0.00019940388244854836, "loss": 1.7803, "step": 12591 }, { "epoch": 0.61484375, "grad_norm": 0.2597517967224121, "learning_rate": 0.0001993708919198824, "loss": 1.7682, "step": 12592 }, { "epoch": 0.614892578125, "grad_norm": 0.2293930947780609, "learning_rate": 0.00019933790322414853, "loss": 1.7972, "step": 12593 }, { "epoch": 0.61494140625, "grad_norm": 0.30350780487060547, "learning_rate": 0.00019930491636214643, "loss": 1.7931, "step": 12594 }, { "epoch": 0.614990234375, "grad_norm": 0.22816641628742218, "learning_rate": 0.00019927193133467542, "loss": 1.7735, "step": 12595 }, { "epoch": 0.6150390625, "grad_norm": 0.2688407003879547, "learning_rate": 0.00019923894814253513, "loss": 1.7795, "step": 12596 }, { "epoch": 0.615087890625, "grad_norm": 0.24078890681266785, "learning_rate": 0.00019920596678652468, "loss": 1.7899, "step": 12597 }, { "epoch": 0.61513671875, "grad_norm": 0.24178734421730042, "learning_rate": 0.00019917298726744353, "loss": 1.808, "step": 12598 }, { "epoch": 0.615185546875, "grad_norm": 0.23287595808506012, "learning_rate": 0.00019914000958609103, "loss": 1.7912, "step": 12599 }, { "epoch": 0.615234375, "grad_norm": 0.24554093182086945, "learning_rate": 0.00019910703374326628, "loss": 1.7694, "step": 12600 }, { "epoch": 0.615283203125, "grad_norm": 0.25583070516586304, "learning_rate": 0.00019907405973976856, "loss": 1.7725, "step": 12601 }, { "epoch": 0.61533203125, "grad_norm": 0.21271857619285583, "learning_rate": 0.00019904108757639693, "loss": 1.7927, "step": 12602 }, { "epoch": 0.615380859375, "grad_norm": 0.23382702469825745, "learning_rate": 0.0001990081172539507, "loss": 1.8009, "step": 12603 }, { "epoch": 0.6154296875, "grad_norm": 0.1892673671245575, "learning_rate": 0.00019897514877322865, "loss": 1.7662, "step": 12604 }, { "epoch": 0.615478515625, "grad_norm": 0.209882453083992, "learning_rate": 0.00019894218213503006, "loss": 1.7851, "step": 12605 }, { "epoch": 0.61552734375, "grad_norm": 0.20037314295768738, "learning_rate": 0.00019890921734015376, "loss": 1.7656, "step": 12606 }, { "epoch": 0.615576171875, "grad_norm": 0.20852594077587128, "learning_rate": 0.00019887625438939866, "loss": 1.7687, "step": 12607 }, { "epoch": 0.615625, "grad_norm": 0.19355790317058563, "learning_rate": 0.00019884329328356376, "loss": 1.7302, "step": 12608 }, { "epoch": 0.615673828125, "grad_norm": 0.22205501794815063, "learning_rate": 0.0001988103340234478, "loss": 1.7681, "step": 12609 }, { "epoch": 0.61572265625, "grad_norm": 0.22255386412143707, "learning_rate": 0.0001987773766098496, "loss": 1.7455, "step": 12610 }, { "epoch": 0.615771484375, "grad_norm": 0.25456562638282776, "learning_rate": 0.00019874442104356787, "loss": 1.7468, "step": 12611 }, { "epoch": 0.6158203125, "grad_norm": 0.21128860116004944, "learning_rate": 0.00019871146732540146, "loss": 1.7524, "step": 12612 }, { "epoch": 0.615869140625, "grad_norm": 0.22578579187393188, "learning_rate": 0.00019867851545614886, "loss": 1.775, "step": 12613 }, { "epoch": 0.61591796875, "grad_norm": 0.1967761069536209, "learning_rate": 0.00019864556543660878, "loss": 1.7696, "step": 12614 }, { "epoch": 0.615966796875, "grad_norm": 0.19994497299194336, "learning_rate": 0.0001986126172675798, "loss": 1.7688, "step": 12615 }, { "epoch": 0.616015625, "grad_norm": 0.20824843645095825, "learning_rate": 0.00019857967094986034, "loss": 1.7588, "step": 12616 }, { "epoch": 0.616064453125, "grad_norm": 0.18977101147174835, "learning_rate": 0.00019854672648424897, "loss": 1.7746, "step": 12617 }, { "epoch": 0.61611328125, "grad_norm": 0.2316828966140747, "learning_rate": 0.0001985137838715441, "loss": 1.7837, "step": 12618 }, { "epoch": 0.616162109375, "grad_norm": 0.24573977291584015, "learning_rate": 0.00019848084311254423, "loss": 1.7686, "step": 12619 }, { "epoch": 0.6162109375, "grad_norm": 0.26888540387153625, "learning_rate": 0.00019844790420804742, "loss": 1.7704, "step": 12620 }, { "epoch": 0.616259765625, "grad_norm": 0.24490392208099365, "learning_rate": 0.00019841496715885227, "loss": 1.7838, "step": 12621 }, { "epoch": 0.61630859375, "grad_norm": 0.28278741240501404, "learning_rate": 0.00019838203196575693, "loss": 1.7945, "step": 12622 }, { "epoch": 0.616357421875, "grad_norm": 0.2757646441459656, "learning_rate": 0.00019834909862955952, "loss": 1.7744, "step": 12623 }, { "epoch": 0.61640625, "grad_norm": 0.25809112191200256, "learning_rate": 0.00019831616715105832, "loss": 1.7587, "step": 12624 }, { "epoch": 0.616455078125, "grad_norm": 0.23942391574382782, "learning_rate": 0.00019828323753105144, "loss": 1.769, "step": 12625 }, { "epoch": 0.61650390625, "grad_norm": 0.24610647559165955, "learning_rate": 0.00019825030977033693, "loss": 1.7741, "step": 12626 }, { "epoch": 0.616552734375, "grad_norm": 0.22259913384914398, "learning_rate": 0.00019821738386971283, "loss": 1.7415, "step": 12627 }, { "epoch": 0.6166015625, "grad_norm": 0.23554706573486328, "learning_rate": 0.00019818445982997706, "loss": 1.7776, "step": 12628 }, { "epoch": 0.616650390625, "grad_norm": 0.25008130073547363, "learning_rate": 0.00019815153765192763, "loss": 1.7675, "step": 12629 }, { "epoch": 0.61669921875, "grad_norm": 0.25404736399650574, "learning_rate": 0.0001981186173363625, "loss": 1.7933, "step": 12630 }, { "epoch": 0.616748046875, "grad_norm": 0.2499132752418518, "learning_rate": 0.0001980856988840794, "loss": 1.7726, "step": 12631 }, { "epoch": 0.616796875, "grad_norm": 0.28441861271858215, "learning_rate": 0.0001980527822958762, "loss": 1.742, "step": 12632 }, { "epoch": 0.616845703125, "grad_norm": 0.2539278566837311, "learning_rate": 0.00019801986757255057, "loss": 1.7565, "step": 12633 }, { "epoch": 0.61689453125, "grad_norm": 0.2586963176727295, "learning_rate": 0.00019798695471490036, "loss": 1.7726, "step": 12634 }, { "epoch": 0.616943359375, "grad_norm": 0.2368362545967102, "learning_rate": 0.0001979540437237231, "loss": 1.7648, "step": 12635 }, { "epoch": 0.6169921875, "grad_norm": 0.24047712981700897, "learning_rate": 0.00019792113459981658, "loss": 1.7751, "step": 12636 }, { "epoch": 0.617041015625, "grad_norm": 0.19982129335403442, "learning_rate": 0.0001978882273439782, "loss": 1.7763, "step": 12637 }, { "epoch": 0.61708984375, "grad_norm": 0.2540612518787384, "learning_rate": 0.00019785532195700566, "loss": 1.7674, "step": 12638 }, { "epoch": 0.617138671875, "grad_norm": 0.2256309539079666, "learning_rate": 0.0001978224184396964, "loss": 1.7527, "step": 12639 }, { "epoch": 0.6171875, "grad_norm": 0.17549973726272583, "learning_rate": 0.00019778951679284775, "loss": 1.7614, "step": 12640 }, { "epoch": 0.617236328125, "grad_norm": 0.24041634798049927, "learning_rate": 0.00019775661701725722, "loss": 1.7481, "step": 12641 }, { "epoch": 0.61728515625, "grad_norm": 0.21613195538520813, "learning_rate": 0.00019772371911372212, "loss": 1.7806, "step": 12642 }, { "epoch": 0.617333984375, "grad_norm": 0.21014678478240967, "learning_rate": 0.00019769082308303975, "loss": 1.7627, "step": 12643 }, { "epoch": 0.6173828125, "grad_norm": 0.23928342759609222, "learning_rate": 0.0001976579289260074, "loss": 1.792, "step": 12644 }, { "epoch": 0.617431640625, "grad_norm": 0.21764160692691803, "learning_rate": 0.00019762503664342234, "loss": 1.7625, "step": 12645 }, { "epoch": 0.61748046875, "grad_norm": 0.23600730299949646, "learning_rate": 0.00019759214623608164, "loss": 1.7745, "step": 12646 }, { "epoch": 0.617529296875, "grad_norm": 0.23263029754161835, "learning_rate": 0.00019755925770478244, "loss": 1.739, "step": 12647 }, { "epoch": 0.617578125, "grad_norm": 0.19748157262802124, "learning_rate": 0.00019752637105032196, "loss": 1.7647, "step": 12648 }, { "epoch": 0.617626953125, "grad_norm": 0.248871311545372, "learning_rate": 0.00019749348627349695, "loss": 1.7855, "step": 12649 }, { "epoch": 0.61767578125, "grad_norm": 0.19280454516410828, "learning_rate": 0.00019746060337510462, "loss": 1.7526, "step": 12650 }, { "epoch": 0.617724609375, "grad_norm": 0.28540605306625366, "learning_rate": 0.00019742772235594193, "loss": 1.766, "step": 12651 }, { "epoch": 0.6177734375, "grad_norm": 0.22253525257110596, "learning_rate": 0.00019739484321680567, "loss": 1.7884, "step": 12652 }, { "epoch": 0.617822265625, "grad_norm": 0.26190999150276184, "learning_rate": 0.0001973619659584927, "loss": 1.75, "step": 12653 }, { "epoch": 0.61787109375, "grad_norm": 0.22743567824363708, "learning_rate": 0.00019732909058179987, "loss": 1.7836, "step": 12654 }, { "epoch": 0.617919921875, "grad_norm": 0.27304932475090027, "learning_rate": 0.00019729621708752394, "loss": 1.7547, "step": 12655 }, { "epoch": 0.61796875, "grad_norm": 0.19850710034370422, "learning_rate": 0.00019726334547646169, "loss": 1.7502, "step": 12656 }, { "epoch": 0.618017578125, "grad_norm": 0.24266068637371063, "learning_rate": 0.0001972304757494096, "loss": 1.7588, "step": 12657 }, { "epoch": 0.61806640625, "grad_norm": 0.19833090901374817, "learning_rate": 0.00019719760790716452, "loss": 1.7661, "step": 12658 }, { "epoch": 0.618115234375, "grad_norm": 0.23739485442638397, "learning_rate": 0.00019716474195052286, "loss": 1.7467, "step": 12659 }, { "epoch": 0.6181640625, "grad_norm": 0.26991787552833557, "learning_rate": 0.00019713187788028119, "loss": 1.7774, "step": 12660 }, { "epoch": 0.618212890625, "grad_norm": 0.19289682805538177, "learning_rate": 0.00019709901569723605, "loss": 1.7604, "step": 12661 }, { "epoch": 0.61826171875, "grad_norm": 0.2921232283115387, "learning_rate": 0.00019706615540218382, "loss": 1.7708, "step": 12662 }, { "epoch": 0.618310546875, "grad_norm": 0.2333492785692215, "learning_rate": 0.00019703329699592093, "loss": 1.7661, "step": 12663 }, { "epoch": 0.618359375, "grad_norm": 0.21837306022644043, "learning_rate": 0.0001970004404792437, "loss": 1.7557, "step": 12664 }, { "epoch": 0.618408203125, "grad_norm": 0.25267013907432556, "learning_rate": 0.00019696758585294855, "loss": 1.7694, "step": 12665 }, { "epoch": 0.61845703125, "grad_norm": 0.23719915747642517, "learning_rate": 0.00019693473311783156, "loss": 1.7786, "step": 12666 }, { "epoch": 0.618505859375, "grad_norm": 0.22359004616737366, "learning_rate": 0.00019690188227468907, "loss": 1.7486, "step": 12667 }, { "epoch": 0.6185546875, "grad_norm": 0.2222931832075119, "learning_rate": 0.00019686903332431723, "loss": 1.7589, "step": 12668 }, { "epoch": 0.618603515625, "grad_norm": 0.24246959388256073, "learning_rate": 0.00019683618626751209, "loss": 1.7334, "step": 12669 }, { "epoch": 0.61865234375, "grad_norm": 0.18556618690490723, "learning_rate": 0.00019680334110506976, "loss": 1.7838, "step": 12670 }, { "epoch": 0.618701171875, "grad_norm": 0.22637653350830078, "learning_rate": 0.00019677049783778632, "loss": 1.7627, "step": 12671 }, { "epoch": 0.61875, "grad_norm": 0.2507722079753876, "learning_rate": 0.0001967376564664577, "loss": 1.7788, "step": 12672 }, { "epoch": 0.618798828125, "grad_norm": 0.2165704369544983, "learning_rate": 0.00019670481699187988, "loss": 1.7582, "step": 12673 }, { "epoch": 0.61884765625, "grad_norm": 0.20409157872200012, "learning_rate": 0.00019667197941484872, "loss": 1.7695, "step": 12674 }, { "epoch": 0.618896484375, "grad_norm": 0.2566186189651489, "learning_rate": 0.00019663914373616, "loss": 1.7878, "step": 12675 }, { "epoch": 0.6189453125, "grad_norm": 0.20624126493930817, "learning_rate": 0.00019660630995660966, "loss": 1.7907, "step": 12676 }, { "epoch": 0.618994140625, "grad_norm": 0.2316577136516571, "learning_rate": 0.00019657347807699332, "loss": 1.7571, "step": 12677 }, { "epoch": 0.61904296875, "grad_norm": 0.260759174823761, "learning_rate": 0.0001965406480981068, "loss": 1.7877, "step": 12678 }, { "epoch": 0.619091796875, "grad_norm": 0.1972658634185791, "learning_rate": 0.00019650782002074562, "loss": 1.8061, "step": 12679 }, { "epoch": 0.619140625, "grad_norm": 0.2571122944355011, "learning_rate": 0.00019647499384570555, "loss": 1.7722, "step": 12680 }, { "epoch": 0.619189453125, "grad_norm": 0.20418435335159302, "learning_rate": 0.00019644216957378214, "loss": 1.7764, "step": 12681 }, { "epoch": 0.61923828125, "grad_norm": 0.2170892059803009, "learning_rate": 0.00019640934720577076, "loss": 1.7836, "step": 12682 }, { "epoch": 0.619287109375, "grad_norm": 0.21122387051582336, "learning_rate": 0.00019637652674246709, "loss": 1.763, "step": 12683 }, { "epoch": 0.6193359375, "grad_norm": 0.24889177083969116, "learning_rate": 0.00019634370818466635, "loss": 1.764, "step": 12684 }, { "epoch": 0.619384765625, "grad_norm": 0.2146836668252945, "learning_rate": 0.0001963108915331641, "loss": 1.7509, "step": 12685 }, { "epoch": 0.61943359375, "grad_norm": 0.2210390269756317, "learning_rate": 0.0001962780767887556, "loss": 1.7736, "step": 12686 }, { "epoch": 0.619482421875, "grad_norm": 0.23546013236045837, "learning_rate": 0.00019624526395223618, "loss": 1.7532, "step": 12687 }, { "epoch": 0.61953125, "grad_norm": 0.25806543231010437, "learning_rate": 0.000196212453024401, "loss": 1.79, "step": 12688 }, { "epoch": 0.619580078125, "grad_norm": 0.22202447056770325, "learning_rate": 0.00019617964400604537, "loss": 1.7643, "step": 12689 }, { "epoch": 0.61962890625, "grad_norm": 0.19803181290626526, "learning_rate": 0.00019614683689796436, "loss": 1.7921, "step": 12690 }, { "epoch": 0.619677734375, "grad_norm": 0.2272113561630249, "learning_rate": 0.0001961140317009531, "loss": 1.7866, "step": 12691 }, { "epoch": 0.6197265625, "grad_norm": 0.25193434953689575, "learning_rate": 0.00019608122841580672, "loss": 1.7701, "step": 12692 }, { "epoch": 0.619775390625, "grad_norm": 0.2324574887752533, "learning_rate": 0.0001960484270433201, "loss": 1.7952, "step": 12693 }, { "epoch": 0.61982421875, "grad_norm": 0.2263520061969757, "learning_rate": 0.00019601562758428832, "loss": 1.7643, "step": 12694 }, { "epoch": 0.619873046875, "grad_norm": 0.29672542214393616, "learning_rate": 0.00019598283003950624, "loss": 1.7613, "step": 12695 }, { "epoch": 0.619921875, "grad_norm": 0.27778175473213196, "learning_rate": 0.00019595003440976878, "loss": 1.7757, "step": 12696 }, { "epoch": 0.619970703125, "grad_norm": 0.1968763917684555, "learning_rate": 0.00019591724069587075, "loss": 1.7553, "step": 12697 }, { "epoch": 0.62001953125, "grad_norm": 0.28992754220962524, "learning_rate": 0.0001958844488986069, "loss": 1.7712, "step": 12698 }, { "epoch": 0.620068359375, "grad_norm": 0.20550554990768433, "learning_rate": 0.00019585165901877207, "loss": 1.7645, "step": 12699 }, { "epoch": 0.6201171875, "grad_norm": 0.3043519854545593, "learning_rate": 0.00019581887105716073, "loss": 1.7303, "step": 12700 }, { "epoch": 0.620166015625, "grad_norm": 0.2663830816745758, "learning_rate": 0.00019578608501456774, "loss": 1.767, "step": 12701 }, { "epoch": 0.62021484375, "grad_norm": 0.25733482837677, "learning_rate": 0.00019575330089178757, "loss": 1.7749, "step": 12702 }, { "epoch": 0.620263671875, "grad_norm": 0.22342337667942047, "learning_rate": 0.00019572051868961494, "loss": 1.7618, "step": 12703 }, { "epoch": 0.6203125, "grad_norm": 0.23561610281467438, "learning_rate": 0.00019568773840884408, "loss": 1.7574, "step": 12704 }, { "epoch": 0.620361328125, "grad_norm": 0.27556943893432617, "learning_rate": 0.00019565496005026972, "loss": 1.7785, "step": 12705 }, { "epoch": 0.62041015625, "grad_norm": 0.19850601255893707, "learning_rate": 0.00019562218361468604, "loss": 1.7801, "step": 12706 }, { "epoch": 0.620458984375, "grad_norm": 0.2928226590156555, "learning_rate": 0.00019558940910288763, "loss": 1.7498, "step": 12707 }, { "epoch": 0.6205078125, "grad_norm": 0.21301786601543427, "learning_rate": 0.00019555663651566867, "loss": 1.777, "step": 12708 }, { "epoch": 0.620556640625, "grad_norm": 0.3096786439418793, "learning_rate": 0.0001955238658538233, "loss": 1.7647, "step": 12709 }, { "epoch": 0.62060546875, "grad_norm": 0.2668454647064209, "learning_rate": 0.0001954910971181461, "loss": 1.7557, "step": 12710 }, { "epoch": 0.620654296875, "grad_norm": 0.23983760178089142, "learning_rate": 0.00019545833030943082, "loss": 1.7546, "step": 12711 }, { "epoch": 0.620703125, "grad_norm": 0.26140356063842773, "learning_rate": 0.000195425565428472, "loss": 1.771, "step": 12712 }, { "epoch": 0.620751953125, "grad_norm": 0.2484748363494873, "learning_rate": 0.0001953928024760634, "loss": 1.8075, "step": 12713 }, { "epoch": 0.62080078125, "grad_norm": 0.2474798560142517, "learning_rate": 0.0001953600414529993, "loss": 1.748, "step": 12714 }, { "epoch": 0.620849609375, "grad_norm": 0.23891477286815643, "learning_rate": 0.00019532728236007358, "loss": 1.7457, "step": 12715 }, { "epoch": 0.6208984375, "grad_norm": 0.27953287959098816, "learning_rate": 0.00019529452519808006, "loss": 1.761, "step": 12716 }, { "epoch": 0.620947265625, "grad_norm": 0.22119787335395813, "learning_rate": 0.00019526176996781286, "loss": 1.7703, "step": 12717 }, { "epoch": 0.62099609375, "grad_norm": 0.2880809009075165, "learning_rate": 0.00019522901667006566, "loss": 1.7776, "step": 12718 }, { "epoch": 0.621044921875, "grad_norm": 0.21906405687332153, "learning_rate": 0.00019519626530563238, "loss": 1.7889, "step": 12719 }, { "epoch": 0.62109375, "grad_norm": 0.22949138283729553, "learning_rate": 0.00019516351587530663, "loss": 1.7757, "step": 12720 }, { "epoch": 0.621142578125, "grad_norm": 0.26074621081352234, "learning_rate": 0.0001951307683798823, "loss": 1.7416, "step": 12721 }, { "epoch": 0.62119140625, "grad_norm": 0.21494373679161072, "learning_rate": 0.00019509802282015293, "loss": 1.7754, "step": 12722 }, { "epoch": 0.621240234375, "grad_norm": 0.2838301360607147, "learning_rate": 0.0001950652791969122, "loss": 1.7859, "step": 12723 }, { "epoch": 0.6212890625, "grad_norm": 0.28683745861053467, "learning_rate": 0.00019503253751095367, "loss": 1.7213, "step": 12724 }, { "epoch": 0.621337890625, "grad_norm": 0.276813268661499, "learning_rate": 0.00019499979776307075, "loss": 1.7576, "step": 12725 }, { "epoch": 0.62138671875, "grad_norm": 0.2597324252128601, "learning_rate": 0.00019496705995405712, "loss": 1.7274, "step": 12726 }, { "epoch": 0.621435546875, "grad_norm": 0.2684575915336609, "learning_rate": 0.00019493432408470597, "loss": 1.7494, "step": 12727 }, { "epoch": 0.621484375, "grad_norm": 0.26109611988067627, "learning_rate": 0.0001949015901558109, "loss": 1.7681, "step": 12728 }, { "epoch": 0.621533203125, "grad_norm": 0.2848702371120453, "learning_rate": 0.000194868858168165, "loss": 1.749, "step": 12729 }, { "epoch": 0.62158203125, "grad_norm": 0.22002293169498444, "learning_rate": 0.00019483612812256192, "loss": 1.7292, "step": 12730 }, { "epoch": 0.621630859375, "grad_norm": 0.27671071887016296, "learning_rate": 0.00019480340001979445, "loss": 1.767, "step": 12731 }, { "epoch": 0.6216796875, "grad_norm": 0.2497180700302124, "learning_rate": 0.00019477067386065617, "loss": 1.7686, "step": 12732 }, { "epoch": 0.621728515625, "grad_norm": 0.22672192752361298, "learning_rate": 0.00019473794964594004, "loss": 1.7756, "step": 12733 }, { "epoch": 0.62177734375, "grad_norm": 0.3013558089733124, "learning_rate": 0.0001947052273764391, "loss": 1.7533, "step": 12734 }, { "epoch": 0.621826171875, "grad_norm": 0.19487860798835754, "learning_rate": 0.0001946725070529466, "loss": 1.7853, "step": 12735 }, { "epoch": 0.621875, "grad_norm": 0.23977316915988922, "learning_rate": 0.00019463978867625525, "loss": 1.7835, "step": 12736 }, { "epoch": 0.621923828125, "grad_norm": 0.23764413595199585, "learning_rate": 0.0001946070722471584, "loss": 1.7824, "step": 12737 }, { "epoch": 0.62197265625, "grad_norm": 0.2340308278799057, "learning_rate": 0.00019457435776644856, "loss": 1.7702, "step": 12738 }, { "epoch": 0.622021484375, "grad_norm": 0.29726821184158325, "learning_rate": 0.00019454164523491891, "loss": 1.7416, "step": 12739 }, { "epoch": 0.6220703125, "grad_norm": 0.22703486680984497, "learning_rate": 0.00019450893465336206, "loss": 1.7603, "step": 12740 }, { "epoch": 0.622119140625, "grad_norm": 0.2712631821632385, "learning_rate": 0.00019447622602257082, "loss": 1.782, "step": 12741 }, { "epoch": 0.62216796875, "grad_norm": 0.25189217925071716, "learning_rate": 0.00019444351934333798, "loss": 1.7665, "step": 12742 }, { "epoch": 0.622216796875, "grad_norm": 0.20673848688602448, "learning_rate": 0.0001944108146164561, "loss": 1.749, "step": 12743 }, { "epoch": 0.622265625, "grad_norm": 0.22265273332595825, "learning_rate": 0.00019437811184271796, "loss": 1.7703, "step": 12744 }, { "epoch": 0.622314453125, "grad_norm": 0.21782532334327698, "learning_rate": 0.00019434541102291596, "loss": 1.7499, "step": 12745 }, { "epoch": 0.62236328125, "grad_norm": 0.1973072588443756, "learning_rate": 0.00019431271215784285, "loss": 1.7772, "step": 12746 }, { "epoch": 0.622412109375, "grad_norm": 0.25900113582611084, "learning_rate": 0.00019428001524829082, "loss": 1.7744, "step": 12747 }, { "epoch": 0.6224609375, "grad_norm": 0.2714344263076782, "learning_rate": 0.0001942473202950526, "loss": 1.7802, "step": 12748 }, { "epoch": 0.622509765625, "grad_norm": 0.21379487216472626, "learning_rate": 0.00019421462729892047, "loss": 1.7681, "step": 12749 }, { "epoch": 0.62255859375, "grad_norm": 0.2986062467098236, "learning_rate": 0.00019418193626068658, "loss": 1.766, "step": 12750 }, { "epoch": 0.622607421875, "grad_norm": 0.20739562809467316, "learning_rate": 0.00019414924718114357, "loss": 1.7464, "step": 12751 }, { "epoch": 0.62265625, "grad_norm": 0.26989611983299255, "learning_rate": 0.00019411656006108335, "loss": 1.7758, "step": 12752 }, { "epoch": 0.622705078125, "grad_norm": 0.292274534702301, "learning_rate": 0.00019408387490129841, "loss": 1.7745, "step": 12753 }, { "epoch": 0.62275390625, "grad_norm": 0.2384527623653412, "learning_rate": 0.00019405119170258072, "loss": 1.7421, "step": 12754 }, { "epoch": 0.622802734375, "grad_norm": 0.2908434271812439, "learning_rate": 0.00019401851046572244, "loss": 1.7604, "step": 12755 }, { "epoch": 0.6228515625, "grad_norm": 0.2107495665550232, "learning_rate": 0.00019398583119151558, "loss": 1.7673, "step": 12756 }, { "epoch": 0.622900390625, "grad_norm": 0.2655731439590454, "learning_rate": 0.0001939531538807523, "loss": 1.7626, "step": 12757 }, { "epoch": 0.62294921875, "grad_norm": 0.2552792727947235, "learning_rate": 0.00019392047853422439, "loss": 1.7819, "step": 12758 }, { "epoch": 0.622998046875, "grad_norm": 0.2789907455444336, "learning_rate": 0.00019388780515272374, "loss": 1.7531, "step": 12759 }, { "epoch": 0.623046875, "grad_norm": 0.23027458786964417, "learning_rate": 0.00019385513373704246, "loss": 1.7587, "step": 12760 }, { "epoch": 0.623095703125, "grad_norm": 0.3012426197528839, "learning_rate": 0.00019382246428797202, "loss": 1.7478, "step": 12761 }, { "epoch": 0.62314453125, "grad_norm": 0.2065131813287735, "learning_rate": 0.0001937897968063045, "loss": 1.7601, "step": 12762 }, { "epoch": 0.623193359375, "grad_norm": 0.3667469918727875, "learning_rate": 0.0001937571312928314, "loss": 1.7513, "step": 12763 }, { "epoch": 0.6232421875, "grad_norm": 0.20896689593791962, "learning_rate": 0.00019372446774834458, "loss": 1.7684, "step": 12764 }, { "epoch": 0.623291015625, "grad_norm": 0.3571235239505768, "learning_rate": 0.0001936918061736355, "loss": 1.7755, "step": 12765 }, { "epoch": 0.62333984375, "grad_norm": 0.23998793959617615, "learning_rate": 0.0001936591465694959, "loss": 1.7727, "step": 12766 }, { "epoch": 0.623388671875, "grad_norm": 0.30074140429496765, "learning_rate": 0.00019362648893671725, "loss": 1.7681, "step": 12767 }, { "epoch": 0.6234375, "grad_norm": 0.22817720472812653, "learning_rate": 0.00019359383327609094, "loss": 1.774, "step": 12768 }, { "epoch": 0.623486328125, "grad_norm": 0.33952850103378296, "learning_rate": 0.00019356117958840857, "loss": 1.7516, "step": 12769 }, { "epoch": 0.62353515625, "grad_norm": 0.2114352434873581, "learning_rate": 0.00019352852787446129, "loss": 1.7947, "step": 12770 }, { "epoch": 0.623583984375, "grad_norm": 0.30964237451553345, "learning_rate": 0.00019349587813504072, "loss": 1.7733, "step": 12771 }, { "epoch": 0.6236328125, "grad_norm": 0.20604325830936432, "learning_rate": 0.00019346323037093793, "loss": 1.7605, "step": 12772 }, { "epoch": 0.623681640625, "grad_norm": 0.24169126152992249, "learning_rate": 0.00019343058458294439, "loss": 1.7747, "step": 12773 }, { "epoch": 0.62373046875, "grad_norm": 0.21575777232646942, "learning_rate": 0.00019339794077185115, "loss": 1.7543, "step": 12774 }, { "epoch": 0.623779296875, "grad_norm": 0.24183396995067596, "learning_rate": 0.00019336529893844922, "loss": 1.7489, "step": 12775 }, { "epoch": 0.623828125, "grad_norm": 0.21047435700893402, "learning_rate": 0.00019333265908353, "loss": 1.7564, "step": 12776 }, { "epoch": 0.623876953125, "grad_norm": 0.2528034746646881, "learning_rate": 0.00019330002120788425, "loss": 1.7795, "step": 12777 }, { "epoch": 0.62392578125, "grad_norm": 0.24422088265419006, "learning_rate": 0.0001932673853123033, "loss": 1.7768, "step": 12778 }, { "epoch": 0.623974609375, "grad_norm": 0.2167179137468338, "learning_rate": 0.00019323475139757778, "loss": 1.783, "step": 12779 }, { "epoch": 0.6240234375, "grad_norm": 0.27407675981521606, "learning_rate": 0.00019320211946449882, "loss": 1.7827, "step": 12780 }, { "epoch": 0.624072265625, "grad_norm": 0.27441129088401794, "learning_rate": 0.00019316948951385722, "loss": 1.7835, "step": 12781 }, { "epoch": 0.62412109375, "grad_norm": 0.22319351136684418, "learning_rate": 0.00019313686154644378, "loss": 1.7609, "step": 12782 }, { "epoch": 0.624169921875, "grad_norm": 0.2508923411369324, "learning_rate": 0.00019310423556304928, "loss": 1.7857, "step": 12783 }, { "epoch": 0.62421875, "grad_norm": 0.26485857367515564, "learning_rate": 0.00019307161156446433, "loss": 1.7652, "step": 12784 }, { "epoch": 0.624267578125, "grad_norm": 0.23117150366306305, "learning_rate": 0.00019303898955147985, "loss": 1.7743, "step": 12785 }, { "epoch": 0.62431640625, "grad_norm": 0.2502642571926117, "learning_rate": 0.00019300636952488616, "loss": 1.7775, "step": 12786 }, { "epoch": 0.624365234375, "grad_norm": 0.24267688393592834, "learning_rate": 0.0001929737514854741, "loss": 1.7853, "step": 12787 }, { "epoch": 0.6244140625, "grad_norm": 0.2655557096004486, "learning_rate": 0.00019294113543403396, "loss": 1.7762, "step": 12788 }, { "epoch": 0.624462890625, "grad_norm": 0.21319580078125, "learning_rate": 0.0001929085213713564, "loss": 1.7668, "step": 12789 }, { "epoch": 0.62451171875, "grad_norm": 0.23750559985637665, "learning_rate": 0.00019287590929823172, "loss": 1.7445, "step": 12790 }, { "epoch": 0.624560546875, "grad_norm": 0.2498442530632019, "learning_rate": 0.0001928432992154504, "loss": 1.7633, "step": 12791 }, { "epoch": 0.624609375, "grad_norm": 0.22482003271579742, "learning_rate": 0.0001928106911238028, "loss": 1.7622, "step": 12792 }, { "epoch": 0.624658203125, "grad_norm": 0.23141011595726013, "learning_rate": 0.000192778085024079, "loss": 1.7645, "step": 12793 }, { "epoch": 0.62470703125, "grad_norm": 0.2892123758792877, "learning_rate": 0.00019274548091706946, "loss": 1.7697, "step": 12794 }, { "epoch": 0.624755859375, "grad_norm": 0.20617693662643433, "learning_rate": 0.00019271287880356415, "loss": 1.735, "step": 12795 }, { "epoch": 0.6248046875, "grad_norm": 0.2599526345729828, "learning_rate": 0.0001926802786843535, "loss": 1.7741, "step": 12796 }, { "epoch": 0.624853515625, "grad_norm": 0.21740718185901642, "learning_rate": 0.00019264768056022723, "loss": 1.7584, "step": 12797 }, { "epoch": 0.62490234375, "grad_norm": 0.23812122642993927, "learning_rate": 0.00019261508443197577, "loss": 1.7455, "step": 12798 }, { "epoch": 0.624951171875, "grad_norm": 0.225615993142128, "learning_rate": 0.0001925824903003889, "loss": 1.7537, "step": 12799 }, { "epoch": 0.625, "grad_norm": 0.24110423028469086, "learning_rate": 0.0001925498981662565, "loss": 1.7581, "step": 12800 }, { "epoch": 0.625048828125, "grad_norm": 0.23609532415866852, "learning_rate": 0.00019251730803036864, "loss": 1.755, "step": 12801 }, { "epoch": 0.62509765625, "grad_norm": 0.21517804265022278, "learning_rate": 0.000192484719893515, "loss": 1.7682, "step": 12802 }, { "epoch": 0.625146484375, "grad_norm": 0.25032883882522583, "learning_rate": 0.0001924521337564855, "loss": 1.7675, "step": 12803 }, { "epoch": 0.6251953125, "grad_norm": 0.25314581394195557, "learning_rate": 0.00019241954962006985, "loss": 1.7682, "step": 12804 }, { "epoch": 0.625244140625, "grad_norm": 0.2107168585062027, "learning_rate": 0.00019238696748505784, "loss": 1.7608, "step": 12805 }, { "epoch": 0.62529296875, "grad_norm": 0.24326980113983154, "learning_rate": 0.00019235438735223893, "loss": 1.7438, "step": 12806 }, { "epoch": 0.625341796875, "grad_norm": 0.22271861135959625, "learning_rate": 0.00019232180922240294, "loss": 1.7613, "step": 12807 }, { "epoch": 0.625390625, "grad_norm": 0.24597173929214478, "learning_rate": 0.00019228923309633927, "loss": 1.7598, "step": 12808 }, { "epoch": 0.625439453125, "grad_norm": 0.2074389010667801, "learning_rate": 0.00019225665897483745, "loss": 1.7832, "step": 12809 }, { "epoch": 0.62548828125, "grad_norm": 0.20488710701465607, "learning_rate": 0.00019222408685868708, "loss": 1.7523, "step": 12810 }, { "epoch": 0.625537109375, "grad_norm": 0.2708546221256256, "learning_rate": 0.00019219151674867735, "loss": 1.7493, "step": 12811 }, { "epoch": 0.6255859375, "grad_norm": 0.31051990389823914, "learning_rate": 0.00019215894864559787, "loss": 1.7628, "step": 12812 }, { "epoch": 0.625634765625, "grad_norm": 0.22287532687187195, "learning_rate": 0.00019212638255023762, "loss": 1.7676, "step": 12813 }, { "epoch": 0.62568359375, "grad_norm": 0.25806736946105957, "learning_rate": 0.00019209381846338624, "loss": 1.7792, "step": 12814 }, { "epoch": 0.625732421875, "grad_norm": 0.23699381947517395, "learning_rate": 0.00019206125638583272, "loss": 1.7842, "step": 12815 }, { "epoch": 0.62578125, "grad_norm": 0.23199684917926788, "learning_rate": 0.00019202869631836634, "loss": 1.769, "step": 12816 }, { "epoch": 0.625830078125, "grad_norm": 0.26263949275016785, "learning_rate": 0.00019199613826177616, "loss": 1.7561, "step": 12817 }, { "epoch": 0.62587890625, "grad_norm": 0.24267761409282684, "learning_rate": 0.0001919635822168511, "loss": 1.7726, "step": 12818 }, { "epoch": 0.625927734375, "grad_norm": 0.23335996270179749, "learning_rate": 0.0001919310281843805, "loss": 1.7529, "step": 12819 }, { "epoch": 0.6259765625, "grad_norm": 0.24948270618915558, "learning_rate": 0.0001918984761651531, "loss": 1.769, "step": 12820 }, { "epoch": 0.626025390625, "grad_norm": 0.2442871630191803, "learning_rate": 0.00019186592615995796, "loss": 1.7451, "step": 12821 }, { "epoch": 0.62607421875, "grad_norm": 0.23986469209194183, "learning_rate": 0.0001918333781695838, "loss": 1.7608, "step": 12822 }, { "epoch": 0.626123046875, "grad_norm": 0.22422367334365845, "learning_rate": 0.0001918008321948196, "loss": 1.7647, "step": 12823 }, { "epoch": 0.626171875, "grad_norm": 0.2357105314731598, "learning_rate": 0.00019176828823645398, "loss": 1.7527, "step": 12824 }, { "epoch": 0.626220703125, "grad_norm": 0.24440179765224457, "learning_rate": 0.00019173574629527586, "loss": 1.7708, "step": 12825 }, { "epoch": 0.62626953125, "grad_norm": 0.24992600083351135, "learning_rate": 0.00019170320637207383, "loss": 1.7612, "step": 12826 }, { "epoch": 0.626318359375, "grad_norm": 0.2334330677986145, "learning_rate": 0.00019167066846763643, "loss": 1.752, "step": 12827 }, { "epoch": 0.6263671875, "grad_norm": 0.2825215458869934, "learning_rate": 0.00019163813258275242, "loss": 1.7445, "step": 12828 }, { "epoch": 0.626416015625, "grad_norm": 0.25217950344085693, "learning_rate": 0.0001916055987182101, "loss": 1.7603, "step": 12829 }, { "epoch": 0.62646484375, "grad_norm": 0.2993603050708771, "learning_rate": 0.00019157306687479826, "loss": 1.7579, "step": 12830 }, { "epoch": 0.626513671875, "grad_norm": 0.2255907654762268, "learning_rate": 0.00019154053705330504, "loss": 1.7457, "step": 12831 }, { "epoch": 0.6265625, "grad_norm": 0.28871849179267883, "learning_rate": 0.00019150800925451906, "loss": 1.7394, "step": 12832 }, { "epoch": 0.626611328125, "grad_norm": 0.23405015468597412, "learning_rate": 0.00019147548347922855, "loss": 1.7557, "step": 12833 }, { "epoch": 0.62666015625, "grad_norm": 0.2821712791919708, "learning_rate": 0.00019144295972822167, "loss": 1.796, "step": 12834 }, { "epoch": 0.626708984375, "grad_norm": 0.27544593811035156, "learning_rate": 0.0001914104380022869, "loss": 1.7618, "step": 12835 }, { "epoch": 0.6267578125, "grad_norm": 0.24574290215969086, "learning_rate": 0.00019137791830221225, "loss": 1.7744, "step": 12836 }, { "epoch": 0.626806640625, "grad_norm": 0.2660798132419586, "learning_rate": 0.00019134540062878602, "loss": 1.7515, "step": 12837 }, { "epoch": 0.62685546875, "grad_norm": 0.23495794832706451, "learning_rate": 0.0001913128849827961, "loss": 1.7996, "step": 12838 }, { "epoch": 0.626904296875, "grad_norm": 0.2696899473667145, "learning_rate": 0.00019128037136503074, "loss": 1.7667, "step": 12839 }, { "epoch": 0.626953125, "grad_norm": 0.2419763058423996, "learning_rate": 0.00019124785977627778, "loss": 1.7718, "step": 12840 }, { "epoch": 0.627001953125, "grad_norm": 0.2663937509059906, "learning_rate": 0.00019121535021732528, "loss": 1.7583, "step": 12841 }, { "epoch": 0.62705078125, "grad_norm": 0.23500850796699524, "learning_rate": 0.00019118284268896115, "loss": 1.7675, "step": 12842 }, { "epoch": 0.627099609375, "grad_norm": 0.2543582320213318, "learning_rate": 0.00019115033719197295, "loss": 1.7732, "step": 12843 }, { "epoch": 0.6271484375, "grad_norm": 0.2673613727092743, "learning_rate": 0.00019111783372714885, "loss": 1.7844, "step": 12844 }, { "epoch": 0.627197265625, "grad_norm": 0.2376621961593628, "learning_rate": 0.00019108533229527636, "loss": 1.7693, "step": 12845 }, { "epoch": 0.62724609375, "grad_norm": 0.2643154561519623, "learning_rate": 0.00019105283289714336, "loss": 1.7596, "step": 12846 }, { "epoch": 0.627294921875, "grad_norm": 0.2260269969701767, "learning_rate": 0.00019102033553353726, "loss": 1.7711, "step": 12847 }, { "epoch": 0.62734375, "grad_norm": 0.24059812724590302, "learning_rate": 0.00019098784020524595, "loss": 1.7598, "step": 12848 }, { "epoch": 0.627392578125, "grad_norm": 0.20995678007602692, "learning_rate": 0.00019095534691305672, "loss": 1.7723, "step": 12849 }, { "epoch": 0.62744140625, "grad_norm": 0.23224005103111267, "learning_rate": 0.00019092285565775719, "loss": 1.7994, "step": 12850 }, { "epoch": 0.627490234375, "grad_norm": 0.21193327009677887, "learning_rate": 0.00019089036644013492, "loss": 1.7513, "step": 12851 }, { "epoch": 0.6275390625, "grad_norm": 0.24349573254585266, "learning_rate": 0.00019085787926097702, "loss": 1.7729, "step": 12852 }, { "epoch": 0.627587890625, "grad_norm": 0.20249687135219574, "learning_rate": 0.00019082539412107108, "loss": 1.761, "step": 12853 }, { "epoch": 0.62763671875, "grad_norm": 0.22958606481552124, "learning_rate": 0.00019079291102120434, "loss": 1.7818, "step": 12854 }, { "epoch": 0.627685546875, "grad_norm": 0.2441253662109375, "learning_rate": 0.0001907604299621641, "loss": 1.7744, "step": 12855 }, { "epoch": 0.627734375, "grad_norm": 0.22548793256282806, "learning_rate": 0.0001907279509447374, "loss": 1.7697, "step": 12856 }, { "epoch": 0.627783203125, "grad_norm": 0.2450380027294159, "learning_rate": 0.00019069547396971165, "loss": 1.7559, "step": 12857 }, { "epoch": 0.62783203125, "grad_norm": 0.22343270480632782, "learning_rate": 0.00019066299903787372, "loss": 1.7669, "step": 12858 }, { "epoch": 0.627880859375, "grad_norm": 0.2108328938484192, "learning_rate": 0.0001906305261500108, "loss": 1.7606, "step": 12859 }, { "epoch": 0.6279296875, "grad_norm": 0.21916019916534424, "learning_rate": 0.00019059805530690992, "loss": 1.7422, "step": 12860 }, { "epoch": 0.627978515625, "grad_norm": 0.1972733736038208, "learning_rate": 0.00019056558650935783, "loss": 1.7716, "step": 12861 }, { "epoch": 0.62802734375, "grad_norm": 0.22225527465343475, "learning_rate": 0.00019053311975814174, "loss": 1.7775, "step": 12862 }, { "epoch": 0.628076171875, "grad_norm": 0.23632441461086273, "learning_rate": 0.00019050065505404824, "loss": 1.7548, "step": 12863 }, { "epoch": 0.628125, "grad_norm": 0.21355362236499786, "learning_rate": 0.00019046819239786433, "loss": 1.7436, "step": 12864 }, { "epoch": 0.628173828125, "grad_norm": 0.24318242073059082, "learning_rate": 0.0001904357317903766, "loss": 1.7771, "step": 12865 }, { "epoch": 0.62822265625, "grad_norm": 0.22597934305667877, "learning_rate": 0.00019040327323237194, "loss": 1.7755, "step": 12866 }, { "epoch": 0.628271484375, "grad_norm": 0.24519570171833038, "learning_rate": 0.0001903708167246369, "loss": 1.7631, "step": 12867 }, { "epoch": 0.6283203125, "grad_norm": 0.2718362808227539, "learning_rate": 0.00019033836226795808, "loss": 1.7806, "step": 12868 }, { "epoch": 0.628369140625, "grad_norm": 0.23915672302246094, "learning_rate": 0.00019030590986312212, "loss": 1.7915, "step": 12869 }, { "epoch": 0.62841796875, "grad_norm": 0.29455819725990295, "learning_rate": 0.00019027345951091535, "loss": 1.772, "step": 12870 }, { "epoch": 0.628466796875, "grad_norm": 0.2693130671977997, "learning_rate": 0.0001902410112121245, "loss": 1.756, "step": 12871 }, { "epoch": 0.628515625, "grad_norm": 0.26800987124443054, "learning_rate": 0.00019020856496753576, "loss": 1.7839, "step": 12872 }, { "epoch": 0.628564453125, "grad_norm": 0.2781972885131836, "learning_rate": 0.0001901761207779356, "loss": 1.7905, "step": 12873 }, { "epoch": 0.62861328125, "grad_norm": 0.22800426185131073, "learning_rate": 0.00019014367864411024, "loss": 1.7362, "step": 12874 }, { "epoch": 0.628662109375, "grad_norm": 0.31644630432128906, "learning_rate": 0.0001901112385668461, "loss": 1.7613, "step": 12875 }, { "epoch": 0.6287109375, "grad_norm": 0.2595898509025574, "learning_rate": 0.00019007880054692922, "loss": 1.7401, "step": 12876 }, { "epoch": 0.628759765625, "grad_norm": 0.2558329403400421, "learning_rate": 0.00019004636458514585, "loss": 1.7803, "step": 12877 }, { "epoch": 0.62880859375, "grad_norm": 0.2818831503391266, "learning_rate": 0.00019001393068228213, "loss": 1.7621, "step": 12878 }, { "epoch": 0.628857421875, "grad_norm": 0.2502208650112152, "learning_rate": 0.00018998149883912402, "loss": 1.7619, "step": 12879 }, { "epoch": 0.62890625, "grad_norm": 0.23515364527702332, "learning_rate": 0.0001899490690564576, "loss": 1.7444, "step": 12880 }, { "epoch": 0.628955078125, "grad_norm": 0.26284223794937134, "learning_rate": 0.00018991664133506875, "loss": 1.7471, "step": 12881 }, { "epoch": 0.62900390625, "grad_norm": 0.192531019449234, "learning_rate": 0.0001898842156757436, "loss": 1.7472, "step": 12882 }, { "epoch": 0.629052734375, "grad_norm": 0.29252392053604126, "learning_rate": 0.00018985179207926773, "loss": 1.7521, "step": 12883 }, { "epoch": 0.6291015625, "grad_norm": 0.2549192011356354, "learning_rate": 0.00018981937054642718, "loss": 1.7829, "step": 12884 }, { "epoch": 0.629150390625, "grad_norm": 0.20775532722473145, "learning_rate": 0.00018978695107800758, "loss": 1.7759, "step": 12885 }, { "epoch": 0.62919921875, "grad_norm": 0.23549416661262512, "learning_rate": 0.00018975453367479472, "loss": 1.7646, "step": 12886 }, { "epoch": 0.629248046875, "grad_norm": 0.2331274151802063, "learning_rate": 0.0001897221183375742, "loss": 1.7703, "step": 12887 }, { "epoch": 0.629296875, "grad_norm": 0.24367403984069824, "learning_rate": 0.00018968970506713162, "loss": 1.7639, "step": 12888 }, { "epoch": 0.629345703125, "grad_norm": 0.1951894909143448, "learning_rate": 0.00018965729386425262, "loss": 1.7624, "step": 12889 }, { "epoch": 0.62939453125, "grad_norm": 0.26516008377075195, "learning_rate": 0.00018962488472972256, "loss": 1.7602, "step": 12890 }, { "epoch": 0.629443359375, "grad_norm": 0.1935683786869049, "learning_rate": 0.00018959247766432718, "loss": 1.7775, "step": 12891 }, { "epoch": 0.6294921875, "grad_norm": 0.28315773606300354, "learning_rate": 0.00018956007266885162, "loss": 1.754, "step": 12892 }, { "epoch": 0.629541015625, "grad_norm": 0.23540809750556946, "learning_rate": 0.0001895276697440813, "loss": 1.7539, "step": 12893 }, { "epoch": 0.62958984375, "grad_norm": 0.24016433954238892, "learning_rate": 0.00018949526889080166, "loss": 1.7956, "step": 12894 }, { "epoch": 0.629638671875, "grad_norm": 0.23396500945091248, "learning_rate": 0.00018946287010979785, "loss": 1.7687, "step": 12895 }, { "epoch": 0.6296875, "grad_norm": 0.222173810005188, "learning_rate": 0.00018943047340185505, "loss": 1.7456, "step": 12896 }, { "epoch": 0.629736328125, "grad_norm": 0.22115269303321838, "learning_rate": 0.00018939807876775856, "loss": 1.77, "step": 12897 }, { "epoch": 0.62978515625, "grad_norm": 0.21988046169281006, "learning_rate": 0.00018936568620829335, "loss": 1.7886, "step": 12898 }, { "epoch": 0.629833984375, "grad_norm": 0.2137858122587204, "learning_rate": 0.00018933329572424446, "loss": 1.7595, "step": 12899 }, { "epoch": 0.6298828125, "grad_norm": 0.21933990716934204, "learning_rate": 0.00018930090731639716, "loss": 1.7736, "step": 12900 }, { "epoch": 0.629931640625, "grad_norm": 0.19622555375099182, "learning_rate": 0.0001892685209855361, "loss": 1.7659, "step": 12901 }, { "epoch": 0.62998046875, "grad_norm": 0.2035641074180603, "learning_rate": 0.00018923613673244626, "loss": 1.7485, "step": 12902 }, { "epoch": 0.630029296875, "grad_norm": 0.23316416144371033, "learning_rate": 0.00018920375455791266, "loss": 1.7653, "step": 12903 }, { "epoch": 0.630078125, "grad_norm": 0.19502460956573486, "learning_rate": 0.00018917137446271991, "loss": 1.7766, "step": 12904 }, { "epoch": 0.630126953125, "grad_norm": 0.23001787066459656, "learning_rate": 0.00018913899644765287, "loss": 1.7653, "step": 12905 }, { "epoch": 0.63017578125, "grad_norm": 0.2210087776184082, "learning_rate": 0.00018910662051349625, "loss": 1.7712, "step": 12906 }, { "epoch": 0.630224609375, "grad_norm": 0.21869491040706635, "learning_rate": 0.00018907424666103468, "loss": 1.7824, "step": 12907 }, { "epoch": 0.6302734375, "grad_norm": 0.25400564074516296, "learning_rate": 0.0001890418748910528, "loss": 1.761, "step": 12908 }, { "epoch": 0.630322265625, "grad_norm": 0.2257653921842575, "learning_rate": 0.00018900950520433512, "loss": 1.7641, "step": 12909 }, { "epoch": 0.63037109375, "grad_norm": 0.1996171921491623, "learning_rate": 0.00018897713760166618, "loss": 1.7665, "step": 12910 }, { "epoch": 0.630419921875, "grad_norm": 0.20143498480319977, "learning_rate": 0.00018894477208383043, "loss": 1.7726, "step": 12911 }, { "epoch": 0.63046875, "grad_norm": 0.23114566504955292, "learning_rate": 0.00018891240865161223, "loss": 1.7459, "step": 12912 }, { "epoch": 0.630517578125, "grad_norm": 0.20827853679656982, "learning_rate": 0.00018888004730579594, "loss": 1.7437, "step": 12913 }, { "epoch": 0.63056640625, "grad_norm": 0.25528866052627563, "learning_rate": 0.00018884768804716594, "loss": 1.7762, "step": 12914 }, { "epoch": 0.630615234375, "grad_norm": 0.24864789843559265, "learning_rate": 0.00018881533087650642, "loss": 1.777, "step": 12915 }, { "epoch": 0.6306640625, "grad_norm": 0.22929011285305023, "learning_rate": 0.00018878297579460158, "loss": 1.7601, "step": 12916 }, { "epoch": 0.630712890625, "grad_norm": 0.23643840849399567, "learning_rate": 0.00018875062280223565, "loss": 1.7704, "step": 12917 }, { "epoch": 0.63076171875, "grad_norm": 0.24423015117645264, "learning_rate": 0.0001887182719001927, "loss": 1.7461, "step": 12918 }, { "epoch": 0.630810546875, "grad_norm": 0.2401924580335617, "learning_rate": 0.00018868592308925663, "loss": 1.7802, "step": 12919 }, { "epoch": 0.630859375, "grad_norm": 0.2191183716058731, "learning_rate": 0.00018865357637021157, "loss": 1.7802, "step": 12920 }, { "epoch": 0.630908203125, "grad_norm": 0.26215577125549316, "learning_rate": 0.0001886212317438415, "loss": 1.7744, "step": 12921 }, { "epoch": 0.63095703125, "grad_norm": 0.23913700878620148, "learning_rate": 0.00018858888921093032, "loss": 1.7581, "step": 12922 }, { "epoch": 0.631005859375, "grad_norm": 0.26744168996810913, "learning_rate": 0.00018855654877226175, "loss": 1.7751, "step": 12923 }, { "epoch": 0.6310546875, "grad_norm": 0.2175983339548111, "learning_rate": 0.00018852421042861968, "loss": 1.7514, "step": 12924 }, { "epoch": 0.631103515625, "grad_norm": 0.2964774966239929, "learning_rate": 0.00018849187418078784, "loss": 1.7592, "step": 12925 }, { "epoch": 0.63115234375, "grad_norm": 0.24268780648708344, "learning_rate": 0.00018845954002955, "loss": 1.769, "step": 12926 }, { "epoch": 0.631201171875, "grad_norm": 0.2818390727043152, "learning_rate": 0.00018842720797568964, "loss": 1.7685, "step": 12927 }, { "epoch": 0.63125, "grad_norm": 0.24413836002349854, "learning_rate": 0.0001883948780199905, "loss": 1.7591, "step": 12928 }, { "epoch": 0.631298828125, "grad_norm": 0.26463037729263306, "learning_rate": 0.00018836255016323612, "loss": 1.7618, "step": 12929 }, { "epoch": 0.63134765625, "grad_norm": 0.264041006565094, "learning_rate": 0.00018833022440620985, "loss": 1.7753, "step": 12930 }, { "epoch": 0.631396484375, "grad_norm": 0.24626129865646362, "learning_rate": 0.0001882979007496952, "loss": 1.7693, "step": 12931 }, { "epoch": 0.6314453125, "grad_norm": 0.2608022093772888, "learning_rate": 0.00018826557919447568, "loss": 1.7499, "step": 12932 }, { "epoch": 0.631494140625, "grad_norm": 0.22170254588127136, "learning_rate": 0.00018823325974133448, "loss": 1.7482, "step": 12933 }, { "epoch": 0.63154296875, "grad_norm": 0.23694762587547302, "learning_rate": 0.0001882009423910549, "loss": 1.7374, "step": 12934 }, { "epoch": 0.631591796875, "grad_norm": 0.24430572986602783, "learning_rate": 0.0001881686271444203, "loss": 1.7663, "step": 12935 }, { "epoch": 0.631640625, "grad_norm": 0.2596279978752136, "learning_rate": 0.0001881363140022137, "loss": 1.7577, "step": 12936 }, { "epoch": 0.631689453125, "grad_norm": 0.2612156867980957, "learning_rate": 0.0001881040029652184, "loss": 1.7537, "step": 12937 }, { "epoch": 0.63173828125, "grad_norm": 0.2844061255455017, "learning_rate": 0.0001880716940342173, "loss": 1.7589, "step": 12938 }, { "epoch": 0.631787109375, "grad_norm": 0.27103734016418457, "learning_rate": 0.00018803938720999365, "loss": 1.7627, "step": 12939 }, { "epoch": 0.6318359375, "grad_norm": 0.2834377884864807, "learning_rate": 0.00018800708249333022, "loss": 1.7666, "step": 12940 }, { "epoch": 0.631884765625, "grad_norm": 0.22972184419631958, "learning_rate": 0.00018797477988501005, "loss": 1.7853, "step": 12941 }, { "epoch": 0.63193359375, "grad_norm": 0.27347517013549805, "learning_rate": 0.00018794247938581603, "loss": 1.7589, "step": 12942 }, { "epoch": 0.631982421875, "grad_norm": 0.28228050470352173, "learning_rate": 0.0001879101809965309, "loss": 1.7697, "step": 12943 }, { "epoch": 0.63203125, "grad_norm": 0.239313006401062, "learning_rate": 0.00018787788471793759, "loss": 1.7827, "step": 12944 }, { "epoch": 0.632080078125, "grad_norm": 0.2586964964866638, "learning_rate": 0.00018784559055081868, "loss": 1.7428, "step": 12945 }, { "epoch": 0.63212890625, "grad_norm": 0.24506349861621857, "learning_rate": 0.00018781329849595695, "loss": 1.7458, "step": 12946 }, { "epoch": 0.632177734375, "grad_norm": 0.25443917512893677, "learning_rate": 0.00018778100855413492, "loss": 1.7613, "step": 12947 }, { "epoch": 0.6322265625, "grad_norm": 0.20948931574821472, "learning_rate": 0.00018774872072613525, "loss": 1.764, "step": 12948 }, { "epoch": 0.632275390625, "grad_norm": 0.2598874568939209, "learning_rate": 0.00018771643501274045, "loss": 1.7578, "step": 12949 }, { "epoch": 0.63232421875, "grad_norm": 0.2082304060459137, "learning_rate": 0.00018768415141473294, "loss": 1.7728, "step": 12950 }, { "epoch": 0.632373046875, "grad_norm": 0.23921392858028412, "learning_rate": 0.0001876518699328952, "loss": 1.7644, "step": 12951 }, { "epoch": 0.632421875, "grad_norm": 0.21451473236083984, "learning_rate": 0.0001876195905680095, "loss": 1.7785, "step": 12952 }, { "epoch": 0.632470703125, "grad_norm": 0.2708272337913513, "learning_rate": 0.00018758731332085832, "loss": 1.7739, "step": 12953 }, { "epoch": 0.63251953125, "grad_norm": 0.2581273913383484, "learning_rate": 0.0001875550381922237, "loss": 1.773, "step": 12954 }, { "epoch": 0.632568359375, "grad_norm": 0.26357337832450867, "learning_rate": 0.00018752276518288813, "loss": 1.7607, "step": 12955 }, { "epoch": 0.6326171875, "grad_norm": 0.24267902970314026, "learning_rate": 0.00018749049429363346, "loss": 1.7844, "step": 12956 }, { "epoch": 0.632666015625, "grad_norm": 0.2666645050048828, "learning_rate": 0.0001874582255252421, "loss": 1.7785, "step": 12957 }, { "epoch": 0.63271484375, "grad_norm": 0.2539660930633545, "learning_rate": 0.00018742595887849596, "loss": 1.7821, "step": 12958 }, { "epoch": 0.632763671875, "grad_norm": 0.24901466071605682, "learning_rate": 0.00018739369435417702, "loss": 1.7642, "step": 12959 }, { "epoch": 0.6328125, "grad_norm": 0.24988341331481934, "learning_rate": 0.00018736143195306732, "loss": 1.7544, "step": 12960 }, { "epoch": 0.632861328125, "grad_norm": 0.25512000918388367, "learning_rate": 0.00018732917167594865, "loss": 1.7551, "step": 12961 }, { "epoch": 0.63291015625, "grad_norm": 0.215090811252594, "learning_rate": 0.00018729691352360305, "loss": 1.7428, "step": 12962 }, { "epoch": 0.632958984375, "grad_norm": 0.2521126866340637, "learning_rate": 0.0001872646574968121, "loss": 1.7388, "step": 12963 }, { "epoch": 0.6330078125, "grad_norm": 0.24217641353607178, "learning_rate": 0.00018723240359635774, "loss": 1.7595, "step": 12964 }, { "epoch": 0.633056640625, "grad_norm": 0.21402452886104584, "learning_rate": 0.00018720015182302152, "loss": 1.7692, "step": 12965 }, { "epoch": 0.63310546875, "grad_norm": 0.255217581987381, "learning_rate": 0.00018716790217758523, "loss": 1.7649, "step": 12966 }, { "epoch": 0.633154296875, "grad_norm": 0.21128492057323456, "learning_rate": 0.00018713565466083032, "loss": 1.7734, "step": 12967 }, { "epoch": 0.633203125, "grad_norm": 0.20786309242248535, "learning_rate": 0.00018710340927353847, "loss": 1.7599, "step": 12968 }, { "epoch": 0.633251953125, "grad_norm": 0.23382200300693512, "learning_rate": 0.0001870711660164911, "loss": 1.7691, "step": 12969 }, { "epoch": 0.63330078125, "grad_norm": 0.21403993666172028, "learning_rate": 0.00018703892489046965, "loss": 1.7664, "step": 12970 }, { "epoch": 0.633349609375, "grad_norm": 0.2475074678659439, "learning_rate": 0.00018700668589625551, "loss": 1.7602, "step": 12971 }, { "epoch": 0.6333984375, "grad_norm": 0.23547810316085815, "learning_rate": 0.00018697444903463008, "loss": 1.7679, "step": 12972 }, { "epoch": 0.633447265625, "grad_norm": 0.2090238332748413, "learning_rate": 0.00018694221430637454, "loss": 1.7862, "step": 12973 }, { "epoch": 0.63349609375, "grad_norm": 0.22453641891479492, "learning_rate": 0.0001869099817122702, "loss": 1.7408, "step": 12974 }, { "epoch": 0.633544921875, "grad_norm": 0.24282768368721008, "learning_rate": 0.00018687775125309826, "loss": 1.7868, "step": 12975 }, { "epoch": 0.63359375, "grad_norm": 0.23696352541446686, "learning_rate": 0.0001868455229296398, "loss": 1.7593, "step": 12976 }, { "epoch": 0.633642578125, "grad_norm": 0.24347712099552155, "learning_rate": 0.00018681329674267584, "loss": 1.7608, "step": 12977 }, { "epoch": 0.63369140625, "grad_norm": 0.20689570903778076, "learning_rate": 0.0001867810726929876, "loss": 1.7693, "step": 12978 }, { "epoch": 0.633740234375, "grad_norm": 0.23575438559055328, "learning_rate": 0.00018674885078135583, "loss": 1.7584, "step": 12979 }, { "epoch": 0.6337890625, "grad_norm": 0.19456961750984192, "learning_rate": 0.0001867166310085616, "loss": 1.7692, "step": 12980 }, { "epoch": 0.633837890625, "grad_norm": 0.2009662538766861, "learning_rate": 0.0001866844133753857, "loss": 1.7724, "step": 12981 }, { "epoch": 0.63388671875, "grad_norm": 0.18009528517723083, "learning_rate": 0.00018665219788260913, "loss": 1.7645, "step": 12982 }, { "epoch": 0.633935546875, "grad_norm": 0.2079070508480072, "learning_rate": 0.00018661998453101242, "loss": 1.7623, "step": 12983 }, { "epoch": 0.633984375, "grad_norm": 0.22670893371105194, "learning_rate": 0.00018658777332137644, "loss": 1.7577, "step": 12984 }, { "epoch": 0.634033203125, "grad_norm": 0.20815254747867584, "learning_rate": 0.00018655556425448185, "loss": 1.7573, "step": 12985 }, { "epoch": 0.63408203125, "grad_norm": 0.23581461608409882, "learning_rate": 0.00018652335733110903, "loss": 1.7449, "step": 12986 }, { "epoch": 0.634130859375, "grad_norm": 0.21264806389808655, "learning_rate": 0.00018649115255203885, "loss": 1.7729, "step": 12987 }, { "epoch": 0.6341796875, "grad_norm": 0.25682657957077026, "learning_rate": 0.00018645894991805168, "loss": 1.7653, "step": 12988 }, { "epoch": 0.634228515625, "grad_norm": 0.2556450664997101, "learning_rate": 0.00018642674942992803, "loss": 1.7822, "step": 12989 }, { "epoch": 0.63427734375, "grad_norm": 0.22916017472743988, "learning_rate": 0.00018639455108844817, "loss": 1.7627, "step": 12990 }, { "epoch": 0.634326171875, "grad_norm": 0.21122823655605316, "learning_rate": 0.00018636235489439263, "loss": 1.76, "step": 12991 }, { "epoch": 0.634375, "grad_norm": 0.2472270429134369, "learning_rate": 0.00018633016084854153, "loss": 1.7563, "step": 12992 }, { "epoch": 0.634423828125, "grad_norm": 0.23803168535232544, "learning_rate": 0.0001862979689516754, "loss": 1.763, "step": 12993 }, { "epoch": 0.63447265625, "grad_norm": 0.26782646775245667, "learning_rate": 0.00018626577920457415, "loss": 1.7623, "step": 12994 }, { "epoch": 0.634521484375, "grad_norm": 0.22928278148174286, "learning_rate": 0.000186233591608018, "loss": 1.7552, "step": 12995 }, { "epoch": 0.6345703125, "grad_norm": 0.24231475591659546, "learning_rate": 0.00018620140616278715, "loss": 1.7843, "step": 12996 }, { "epoch": 0.634619140625, "grad_norm": 0.21194319427013397, "learning_rate": 0.00018616922286966142, "loss": 1.7696, "step": 12997 }, { "epoch": 0.63466796875, "grad_norm": 0.22126714885234833, "learning_rate": 0.0001861370417294211, "loss": 1.7856, "step": 12998 }, { "epoch": 0.634716796875, "grad_norm": 0.21240904927253723, "learning_rate": 0.00018610486274284584, "loss": 1.7716, "step": 12999 }, { "epoch": 0.634765625, "grad_norm": 0.24168382585048676, "learning_rate": 0.00018607268591071575, "loss": 1.753, "step": 13000 }, { "epoch": 0.634814453125, "grad_norm": 0.2209339141845703, "learning_rate": 0.00018604051123381055, "loss": 1.77, "step": 13001 }, { "epoch": 0.63486328125, "grad_norm": 0.2060949206352234, "learning_rate": 0.00018600833871290995, "loss": 1.7852, "step": 13002 }, { "epoch": 0.634912109375, "grad_norm": 0.226776123046875, "learning_rate": 0.00018597616834879389, "loss": 1.7852, "step": 13003 }, { "epoch": 0.6349609375, "grad_norm": 0.2002253383398056, "learning_rate": 0.00018594400014224172, "loss": 1.7882, "step": 13004 }, { "epoch": 0.635009765625, "grad_norm": 0.261476069688797, "learning_rate": 0.0001859118340940334, "loss": 1.7423, "step": 13005 }, { "epoch": 0.63505859375, "grad_norm": 0.29571962356567383, "learning_rate": 0.00018587967020494827, "loss": 1.7463, "step": 13006 }, { "epoch": 0.635107421875, "grad_norm": 0.19834722578525543, "learning_rate": 0.00018584750847576605, "loss": 1.7576, "step": 13007 }, { "epoch": 0.63515625, "grad_norm": 0.3272201120853424, "learning_rate": 0.00018581534890726594, "loss": 1.7341, "step": 13008 }, { "epoch": 0.635205078125, "grad_norm": 0.21982167661190033, "learning_rate": 0.0001857831915002276, "loss": 1.7503, "step": 13009 }, { "epoch": 0.63525390625, "grad_norm": 0.31110692024230957, "learning_rate": 0.00018575103625543033, "loss": 1.7747, "step": 13010 }, { "epoch": 0.635302734375, "grad_norm": 0.23310577869415283, "learning_rate": 0.00018571888317365327, "loss": 1.7456, "step": 13011 }, { "epoch": 0.6353515625, "grad_norm": 0.3144375681877136, "learning_rate": 0.0001856867322556759, "loss": 1.7799, "step": 13012 }, { "epoch": 0.635400390625, "grad_norm": 0.2530553936958313, "learning_rate": 0.0001856545835022772, "loss": 1.7555, "step": 13013 }, { "epoch": 0.63544921875, "grad_norm": 0.28700193762779236, "learning_rate": 0.0001856224369142366, "loss": 1.7735, "step": 13014 }, { "epoch": 0.635498046875, "grad_norm": 0.25844651460647583, "learning_rate": 0.00018559029249233294, "loss": 1.7787, "step": 13015 }, { "epoch": 0.635546875, "grad_norm": 0.2605162262916565, "learning_rate": 0.00018555815023734546, "loss": 1.746, "step": 13016 }, { "epoch": 0.635595703125, "grad_norm": 0.2661563754081726, "learning_rate": 0.00018552601015005293, "loss": 1.7604, "step": 13017 }, { "epoch": 0.63564453125, "grad_norm": 0.2315644472837448, "learning_rate": 0.0001854938722312346, "loss": 1.7725, "step": 13018 }, { "epoch": 0.635693359375, "grad_norm": 0.26051807403564453, "learning_rate": 0.0001854617364816691, "loss": 1.7849, "step": 13019 }, { "epoch": 0.6357421875, "grad_norm": 0.2462824285030365, "learning_rate": 0.00018542960290213528, "loss": 1.7899, "step": 13020 }, { "epoch": 0.635791015625, "grad_norm": 0.23967677354812622, "learning_rate": 0.00018539747149341212, "loss": 1.7608, "step": 13021 }, { "epoch": 0.63583984375, "grad_norm": 0.23314924538135529, "learning_rate": 0.00018536534225627817, "loss": 1.7525, "step": 13022 }, { "epoch": 0.635888671875, "grad_norm": 0.26104262471199036, "learning_rate": 0.0001853332151915122, "loss": 1.7613, "step": 13023 }, { "epoch": 0.6359375, "grad_norm": 0.18428173661231995, "learning_rate": 0.00018530109029989267, "loss": 1.7896, "step": 13024 }, { "epoch": 0.635986328125, "grad_norm": 0.22410915791988373, "learning_rate": 0.00018526896758219846, "loss": 1.7332, "step": 13025 }, { "epoch": 0.63603515625, "grad_norm": 0.20292632281780243, "learning_rate": 0.00018523684703920774, "loss": 1.7665, "step": 13026 }, { "epoch": 0.636083984375, "grad_norm": 0.27118757367134094, "learning_rate": 0.0001852047286716993, "loss": 1.7769, "step": 13027 }, { "epoch": 0.6361328125, "grad_norm": 0.1931816041469574, "learning_rate": 0.00018517261248045136, "loss": 1.7774, "step": 13028 }, { "epoch": 0.636181640625, "grad_norm": 0.31617167592048645, "learning_rate": 0.00018514049846624225, "loss": 1.7707, "step": 13029 }, { "epoch": 0.63623046875, "grad_norm": 0.1962537169456482, "learning_rate": 0.0001851083866298504, "loss": 1.7544, "step": 13030 }, { "epoch": 0.636279296875, "grad_norm": 0.26484933495521545, "learning_rate": 0.000185076276972054, "loss": 1.7741, "step": 13031 }, { "epoch": 0.636328125, "grad_norm": 0.25170859694480896, "learning_rate": 0.00018504416949363127, "loss": 1.7648, "step": 13032 }, { "epoch": 0.636376953125, "grad_norm": 0.24241764843463898, "learning_rate": 0.00018501206419536037, "loss": 1.734, "step": 13033 }, { "epoch": 0.63642578125, "grad_norm": 0.24625557661056519, "learning_rate": 0.0001849799610780194, "loss": 1.7707, "step": 13034 }, { "epoch": 0.636474609375, "grad_norm": 0.2957073450088501, "learning_rate": 0.00018494786014238646, "loss": 1.7884, "step": 13035 }, { "epoch": 0.6365234375, "grad_norm": 0.20837202668190002, "learning_rate": 0.00018491576138923938, "loss": 1.7641, "step": 13036 }, { "epoch": 0.636572265625, "grad_norm": 0.25282642245292664, "learning_rate": 0.00018488366481935625, "loss": 1.7939, "step": 13037 }, { "epoch": 0.63662109375, "grad_norm": 0.2232581079006195, "learning_rate": 0.0001848515704335148, "loss": 1.7616, "step": 13038 }, { "epoch": 0.636669921875, "grad_norm": 0.2512585520744324, "learning_rate": 0.00018481947823249312, "loss": 1.769, "step": 13039 }, { "epoch": 0.63671875, "grad_norm": 0.26069769263267517, "learning_rate": 0.00018478738821706869, "loss": 1.7446, "step": 13040 }, { "epoch": 0.636767578125, "grad_norm": 0.26286402344703674, "learning_rate": 0.00018475530038801946, "loss": 1.7539, "step": 13041 }, { "epoch": 0.63681640625, "grad_norm": 0.22557102143764496, "learning_rate": 0.00018472321474612296, "loss": 1.7684, "step": 13042 }, { "epoch": 0.636865234375, "grad_norm": 0.23636101186275482, "learning_rate": 0.00018469113129215698, "loss": 1.7597, "step": 13043 }, { "epoch": 0.6369140625, "grad_norm": 0.22143951058387756, "learning_rate": 0.000184659050026899, "loss": 1.7657, "step": 13044 }, { "epoch": 0.636962890625, "grad_norm": 0.23717591166496277, "learning_rate": 0.0001846269709511264, "loss": 1.7567, "step": 13045 }, { "epoch": 0.63701171875, "grad_norm": 0.20032817125320435, "learning_rate": 0.00018459489406561687, "loss": 1.7492, "step": 13046 }, { "epoch": 0.637060546875, "grad_norm": 0.246286541223526, "learning_rate": 0.0001845628193711476, "loss": 1.7659, "step": 13047 }, { "epoch": 0.637109375, "grad_norm": 0.20308594405651093, "learning_rate": 0.0001845307468684962, "loss": 1.7511, "step": 13048 }, { "epoch": 0.637158203125, "grad_norm": 0.23368513584136963, "learning_rate": 0.0001844986765584397, "loss": 1.7652, "step": 13049 }, { "epoch": 0.63720703125, "grad_norm": 0.21990184485912323, "learning_rate": 0.00018446660844175555, "loss": 1.7533, "step": 13050 }, { "epoch": 0.637255859375, "grad_norm": 0.24042533338069916, "learning_rate": 0.00018443454251922082, "loss": 1.7601, "step": 13051 }, { "epoch": 0.6373046875, "grad_norm": 0.2177094966173172, "learning_rate": 0.00018440247879161282, "loss": 1.7687, "step": 13052 }, { "epoch": 0.637353515625, "grad_norm": 0.26325955986976624, "learning_rate": 0.00018437041725970855, "loss": 1.7671, "step": 13053 }, { "epoch": 0.63740234375, "grad_norm": 0.22136101126670837, "learning_rate": 0.00018433835792428484, "loss": 1.7624, "step": 13054 }, { "epoch": 0.637451171875, "grad_norm": 0.2768506109714508, "learning_rate": 0.00018430630078611904, "loss": 1.7452, "step": 13055 }, { "epoch": 0.6375, "grad_norm": 0.227437362074852, "learning_rate": 0.00018427424584598778, "loss": 1.7788, "step": 13056 }, { "epoch": 0.637548828125, "grad_norm": 0.35094743967056274, "learning_rate": 0.00018424219310466818, "loss": 1.7736, "step": 13057 }, { "epoch": 0.63759765625, "grad_norm": 0.21105168759822845, "learning_rate": 0.00018421014256293682, "loss": 1.7933, "step": 13058 }, { "epoch": 0.637646484375, "grad_norm": 0.32033178210258484, "learning_rate": 0.00018417809422157066, "loss": 1.7699, "step": 13059 }, { "epoch": 0.6376953125, "grad_norm": 0.26892417669296265, "learning_rate": 0.00018414604808134644, "loss": 1.7439, "step": 13060 }, { "epoch": 0.637744140625, "grad_norm": 0.34230202436447144, "learning_rate": 0.00018411400414304057, "loss": 1.7563, "step": 13061 }, { "epoch": 0.63779296875, "grad_norm": 0.2107839435338974, "learning_rate": 0.00018408196240742998, "loss": 1.7926, "step": 13062 }, { "epoch": 0.637841796875, "grad_norm": 0.35263508558273315, "learning_rate": 0.00018404992287529093, "loss": 1.7654, "step": 13063 }, { "epoch": 0.637890625, "grad_norm": 0.2190527319908142, "learning_rate": 0.0001840178855474001, "loss": 1.7772, "step": 13064 }, { "epoch": 0.637939453125, "grad_norm": 0.3478848934173584, "learning_rate": 0.00018398585042453392, "loss": 1.7498, "step": 13065 }, { "epoch": 0.63798828125, "grad_norm": 0.22119174897670746, "learning_rate": 0.00018395381750746886, "loss": 1.749, "step": 13066 }, { "epoch": 0.638037109375, "grad_norm": 0.29103100299835205, "learning_rate": 0.00018392178679698106, "loss": 1.7766, "step": 13067 }, { "epoch": 0.6380859375, "grad_norm": 0.24965974688529968, "learning_rate": 0.00018388975829384703, "loss": 1.7575, "step": 13068 }, { "epoch": 0.638134765625, "grad_norm": 0.24801398813724518, "learning_rate": 0.0001838577319988429, "loss": 1.7687, "step": 13069 }, { "epoch": 0.63818359375, "grad_norm": 0.23004615306854248, "learning_rate": 0.0001838257079127448, "loss": 1.7666, "step": 13070 }, { "epoch": 0.638232421875, "grad_norm": 0.22540779411792755, "learning_rate": 0.00018379368603632895, "loss": 1.7528, "step": 13071 }, { "epoch": 0.63828125, "grad_norm": 0.21994954347610474, "learning_rate": 0.00018376166637037135, "loss": 1.746, "step": 13072 }, { "epoch": 0.638330078125, "grad_norm": 0.2573997676372528, "learning_rate": 0.0001837296489156481, "loss": 1.7589, "step": 13073 }, { "epoch": 0.63837890625, "grad_norm": 0.25696155428886414, "learning_rate": 0.00018369763367293506, "loss": 1.7545, "step": 13074 }, { "epoch": 0.638427734375, "grad_norm": 0.21143700182437897, "learning_rate": 0.00018366562064300835, "loss": 1.7765, "step": 13075 }, { "epoch": 0.6384765625, "grad_norm": 0.22110962867736816, "learning_rate": 0.00018363360982664358, "loss": 1.7973, "step": 13076 }, { "epoch": 0.638525390625, "grad_norm": 0.21660073101520538, "learning_rate": 0.00018360160122461678, "loss": 1.7355, "step": 13077 }, { "epoch": 0.63857421875, "grad_norm": 0.20437103509902954, "learning_rate": 0.00018356959483770358, "loss": 1.7482, "step": 13078 }, { "epoch": 0.638623046875, "grad_norm": 0.22333313524723053, "learning_rate": 0.00018353759066667963, "loss": 1.768, "step": 13079 }, { "epoch": 0.638671875, "grad_norm": 0.2267937809228897, "learning_rate": 0.00018350558871232077, "loss": 1.7706, "step": 13080 }, { "epoch": 0.638720703125, "grad_norm": 0.21342839300632477, "learning_rate": 0.00018347358897540233, "loss": 1.7865, "step": 13081 }, { "epoch": 0.63876953125, "grad_norm": 0.22480936348438263, "learning_rate": 0.00018344159145670014, "loss": 1.7941, "step": 13082 }, { "epoch": 0.638818359375, "grad_norm": 0.23130469024181366, "learning_rate": 0.00018340959615698938, "loss": 1.7718, "step": 13083 }, { "epoch": 0.6388671875, "grad_norm": 0.20762087404727936, "learning_rate": 0.00018337760307704582, "loss": 1.765, "step": 13084 }, { "epoch": 0.638916015625, "grad_norm": 0.24247851967811584, "learning_rate": 0.00018334561221764446, "loss": 1.7493, "step": 13085 }, { "epoch": 0.63896484375, "grad_norm": 0.23542222380638123, "learning_rate": 0.000183313623579561, "loss": 1.758, "step": 13086 }, { "epoch": 0.639013671875, "grad_norm": 0.24556906521320343, "learning_rate": 0.0001832816371635705, "loss": 1.7609, "step": 13087 }, { "epoch": 0.6390625, "grad_norm": 0.24615797400474548, "learning_rate": 0.0001832496529704481, "loss": 1.7686, "step": 13088 }, { "epoch": 0.639111328125, "grad_norm": 0.2204940915107727, "learning_rate": 0.0001832176710009692, "loss": 1.7629, "step": 13089 }, { "epoch": 0.63916015625, "grad_norm": 0.24793462455272675, "learning_rate": 0.00018318569125590868, "loss": 1.7756, "step": 13090 }, { "epoch": 0.639208984375, "grad_norm": 0.21626201272010803, "learning_rate": 0.00018315371373604173, "loss": 1.7749, "step": 13091 }, { "epoch": 0.6392578125, "grad_norm": 0.23738883435726166, "learning_rate": 0.0001831217384421433, "loss": 1.7625, "step": 13092 }, { "epoch": 0.639306640625, "grad_norm": 0.2231060415506363, "learning_rate": 0.0001830897653749884, "loss": 1.7792, "step": 13093 }, { "epoch": 0.63935546875, "grad_norm": 0.22608928382396698, "learning_rate": 0.00018305779453535193, "loss": 1.7856, "step": 13094 }, { "epoch": 0.639404296875, "grad_norm": 0.2034834921360016, "learning_rate": 0.0001830258259240085, "loss": 1.7621, "step": 13095 }, { "epoch": 0.639453125, "grad_norm": 0.2188282310962677, "learning_rate": 0.0001829938595417332, "loss": 1.7567, "step": 13096 }, { "epoch": 0.639501953125, "grad_norm": 0.19782382249832153, "learning_rate": 0.00018296189538930058, "loss": 1.7626, "step": 13097 }, { "epoch": 0.63955078125, "grad_norm": 0.27781692147254944, "learning_rate": 0.00018292993346748543, "loss": 1.7554, "step": 13098 }, { "epoch": 0.639599609375, "grad_norm": 0.22977109253406525, "learning_rate": 0.0001828979737770622, "loss": 1.7446, "step": 13099 }, { "epoch": 0.6396484375, "grad_norm": 0.19264426827430725, "learning_rate": 0.00018286601631880566, "loss": 1.7614, "step": 13100 }, { "epoch": 0.639697265625, "grad_norm": 0.20831052958965302, "learning_rate": 0.00018283406109349015, "loss": 1.7714, "step": 13101 }, { "epoch": 0.63974609375, "grad_norm": 0.19010353088378906, "learning_rate": 0.00018280210810189034, "loss": 1.7189, "step": 13102 }, { "epoch": 0.639794921875, "grad_norm": 0.21038177609443665, "learning_rate": 0.00018277015734478047, "loss": 1.7713, "step": 13103 }, { "epoch": 0.63984375, "grad_norm": 0.18516214191913605, "learning_rate": 0.0001827382088229349, "loss": 1.764, "step": 13104 }, { "epoch": 0.639892578125, "grad_norm": 0.2198263555765152, "learning_rate": 0.00018270626253712798, "loss": 1.773, "step": 13105 }, { "epoch": 0.63994140625, "grad_norm": 0.20500601828098297, "learning_rate": 0.00018267431848813386, "loss": 1.7732, "step": 13106 }, { "epoch": 0.639990234375, "grad_norm": 0.21195261180400848, "learning_rate": 0.00018264237667672695, "loss": 1.7931, "step": 13107 }, { "epoch": 0.6400390625, "grad_norm": 0.20723897218704224, "learning_rate": 0.00018261043710368115, "loss": 1.7744, "step": 13108 }, { "epoch": 0.640087890625, "grad_norm": 0.24308893084526062, "learning_rate": 0.00018257849976977065, "loss": 1.7804, "step": 13109 }, { "epoch": 0.64013671875, "grad_norm": 0.21027691662311554, "learning_rate": 0.00018254656467576942, "loss": 1.7815, "step": 13110 }, { "epoch": 0.640185546875, "grad_norm": 0.23159289360046387, "learning_rate": 0.00018251463182245153, "loss": 1.7764, "step": 13111 }, { "epoch": 0.640234375, "grad_norm": 0.22181293368339539, "learning_rate": 0.00018248270121059085, "loss": 1.7326, "step": 13112 }, { "epoch": 0.640283203125, "grad_norm": 0.24016864597797394, "learning_rate": 0.0001824507728409611, "loss": 1.7374, "step": 13113 }, { "epoch": 0.64033203125, "grad_norm": 0.21728645265102386, "learning_rate": 0.0001824188467143364, "loss": 1.7529, "step": 13114 }, { "epoch": 0.640380859375, "grad_norm": 0.3239201307296753, "learning_rate": 0.00018238692283149016, "loss": 1.7856, "step": 13115 }, { "epoch": 0.6404296875, "grad_norm": 0.25917568802833557, "learning_rate": 0.00018235500119319643, "loss": 1.7583, "step": 13116 }, { "epoch": 0.640478515625, "grad_norm": 0.27208638191223145, "learning_rate": 0.0001823230818002285, "loss": 1.7591, "step": 13117 }, { "epoch": 0.64052734375, "grad_norm": 0.25462114810943604, "learning_rate": 0.00018229116465336026, "loss": 1.7548, "step": 13118 }, { "epoch": 0.640576171875, "grad_norm": 0.22373715043067932, "learning_rate": 0.00018225924975336517, "loss": 1.7413, "step": 13119 }, { "epoch": 0.640625, "grad_norm": 0.28708216547966003, "learning_rate": 0.0001822273371010165, "loss": 1.7568, "step": 13120 }, { "epoch": 0.640673828125, "grad_norm": 0.2336302250623703, "learning_rate": 0.000182195426697088, "loss": 1.7584, "step": 13121 }, { "epoch": 0.64072265625, "grad_norm": 0.3062640130519867, "learning_rate": 0.0001821635185423528, "loss": 1.7858, "step": 13122 }, { "epoch": 0.640771484375, "grad_norm": 0.2552526891231537, "learning_rate": 0.0001821316126375844, "loss": 1.7277, "step": 13123 }, { "epoch": 0.6408203125, "grad_norm": 0.3204278349876404, "learning_rate": 0.00018209970898355583, "loss": 1.7481, "step": 13124 }, { "epoch": 0.640869140625, "grad_norm": 0.26725542545318604, "learning_rate": 0.0001820678075810406, "loss": 1.7712, "step": 13125 }, { "epoch": 0.64091796875, "grad_norm": 0.3411102294921875, "learning_rate": 0.00018203590843081157, "loss": 1.7792, "step": 13126 }, { "epoch": 0.640966796875, "grad_norm": 0.22816573083400726, "learning_rate": 0.00018200401153364217, "loss": 1.7877, "step": 13127 }, { "epoch": 0.641015625, "grad_norm": 0.41883233189582825, "learning_rate": 0.00018197211689030513, "loss": 1.7768, "step": 13128 }, { "epoch": 0.641064453125, "grad_norm": 0.231300488114357, "learning_rate": 0.0001819402245015736, "loss": 1.7673, "step": 13129 }, { "epoch": 0.64111328125, "grad_norm": 0.3629192113876343, "learning_rate": 0.00018190833436822052, "loss": 1.7617, "step": 13130 }, { "epoch": 0.641162109375, "grad_norm": 0.2295815348625183, "learning_rate": 0.0001818764464910187, "loss": 1.7369, "step": 13131 }, { "epoch": 0.6412109375, "grad_norm": 0.31374412775039673, "learning_rate": 0.0001818445608707411, "loss": 1.7743, "step": 13132 }, { "epoch": 0.641259765625, "grad_norm": 0.280205100774765, "learning_rate": 0.00018181267750816028, "loss": 1.7524, "step": 13133 }, { "epoch": 0.64130859375, "grad_norm": 0.2945569157600403, "learning_rate": 0.00018178079640404916, "loss": 1.765, "step": 13134 }, { "epoch": 0.641357421875, "grad_norm": 0.28650563955307007, "learning_rate": 0.00018174891755918027, "loss": 1.7685, "step": 13135 }, { "epoch": 0.64140625, "grad_norm": 0.26681339740753174, "learning_rate": 0.00018171704097432638, "loss": 1.7679, "step": 13136 }, { "epoch": 0.641455078125, "grad_norm": 0.3071293532848358, "learning_rate": 0.00018168516665025992, "loss": 1.7718, "step": 13137 }, { "epoch": 0.64150390625, "grad_norm": 0.2566455900669098, "learning_rate": 0.00018165329458775338, "loss": 1.7893, "step": 13138 }, { "epoch": 0.641552734375, "grad_norm": 0.26476988196372986, "learning_rate": 0.00018162142478757933, "loss": 1.7521, "step": 13139 }, { "epoch": 0.6416015625, "grad_norm": 0.21874764561653137, "learning_rate": 0.0001815895572505099, "loss": 1.7665, "step": 13140 }, { "epoch": 0.641650390625, "grad_norm": 0.2740796208381653, "learning_rate": 0.0001815576919773177, "loss": 1.7466, "step": 13141 }, { "epoch": 0.64169921875, "grad_norm": 0.22161820530891418, "learning_rate": 0.0001815258289687749, "loss": 1.7697, "step": 13142 }, { "epoch": 0.641748046875, "grad_norm": 0.26314640045166016, "learning_rate": 0.00018149396822565372, "loss": 1.7733, "step": 13143 }, { "epoch": 0.641796875, "grad_norm": 0.1976163387298584, "learning_rate": 0.0001814621097487263, "loss": 1.7274, "step": 13144 }, { "epoch": 0.641845703125, "grad_norm": 0.27093982696533203, "learning_rate": 0.00018143025353876486, "loss": 1.7629, "step": 13145 }, { "epoch": 0.64189453125, "grad_norm": 0.20779922604560852, "learning_rate": 0.00018139839959654142, "loss": 1.7637, "step": 13146 }, { "epoch": 0.641943359375, "grad_norm": 0.2801046371459961, "learning_rate": 0.00018136654792282788, "loss": 1.7517, "step": 13147 }, { "epoch": 0.6419921875, "grad_norm": 0.19400621950626373, "learning_rate": 0.0001813346985183964, "loss": 1.7704, "step": 13148 }, { "epoch": 0.642041015625, "grad_norm": 0.2812210023403168, "learning_rate": 0.0001813028513840186, "loss": 1.7525, "step": 13149 }, { "epoch": 0.64208984375, "grad_norm": 0.21030855178833008, "learning_rate": 0.0001812710065204666, "loss": 1.7543, "step": 13150 }, { "epoch": 0.642138671875, "grad_norm": 0.2560277581214905, "learning_rate": 0.0001812391639285119, "loss": 1.7672, "step": 13151 }, { "epoch": 0.6421875, "grad_norm": 0.23867706954479218, "learning_rate": 0.00018120732360892662, "loss": 1.7407, "step": 13152 }, { "epoch": 0.642236328125, "grad_norm": 0.2054104059934616, "learning_rate": 0.00018117548556248204, "loss": 1.7713, "step": 13153 }, { "epoch": 0.64228515625, "grad_norm": 0.22720967233181, "learning_rate": 0.00018114364978995002, "loss": 1.7609, "step": 13154 }, { "epoch": 0.642333984375, "grad_norm": 0.2534187138080597, "learning_rate": 0.000181111816292102, "loss": 1.7893, "step": 13155 }, { "epoch": 0.6423828125, "grad_norm": 0.2799994945526123, "learning_rate": 0.0001810799850697095, "loss": 1.7674, "step": 13156 }, { "epoch": 0.642431640625, "grad_norm": 0.21712416410446167, "learning_rate": 0.00018104815612354408, "loss": 1.8035, "step": 13157 }, { "epoch": 0.64248046875, "grad_norm": 0.2289334535598755, "learning_rate": 0.00018101632945437707, "loss": 1.7546, "step": 13158 }, { "epoch": 0.642529296875, "grad_norm": 0.23862922191619873, "learning_rate": 0.0001809845050629798, "loss": 1.7728, "step": 13159 }, { "epoch": 0.642578125, "grad_norm": 0.1955699324607849, "learning_rate": 0.00018095268295012358, "loss": 1.7842, "step": 13160 }, { "epoch": 0.642626953125, "grad_norm": 0.22718048095703125, "learning_rate": 0.00018092086311657967, "loss": 1.7793, "step": 13161 }, { "epoch": 0.64267578125, "grad_norm": 0.2354206144809723, "learning_rate": 0.0001808890455631193, "loss": 1.7652, "step": 13162 }, { "epoch": 0.642724609375, "grad_norm": 0.215969517827034, "learning_rate": 0.00018085723029051336, "loss": 1.7792, "step": 13163 }, { "epoch": 0.6427734375, "grad_norm": 0.22745957970619202, "learning_rate": 0.00018082541729953327, "loss": 1.7669, "step": 13164 }, { "epoch": 0.642822265625, "grad_norm": 0.21745088696479797, "learning_rate": 0.00018079360659094962, "loss": 1.7614, "step": 13165 }, { "epoch": 0.64287109375, "grad_norm": 0.21123206615447998, "learning_rate": 0.0001807617981655338, "loss": 1.7806, "step": 13166 }, { "epoch": 0.642919921875, "grad_norm": 0.18182094395160675, "learning_rate": 0.00018072999202405632, "loss": 1.7432, "step": 13167 }, { "epoch": 0.64296875, "grad_norm": 0.20019501447677612, "learning_rate": 0.00018069818816728844, "loss": 1.7836, "step": 13168 }, { "epoch": 0.643017578125, "grad_norm": 0.2022821456193924, "learning_rate": 0.00018066638659600055, "loss": 1.7518, "step": 13169 }, { "epoch": 0.64306640625, "grad_norm": 0.2458772510290146, "learning_rate": 0.00018063458731096372, "loss": 1.7711, "step": 13170 }, { "epoch": 0.643115234375, "grad_norm": 0.2529013454914093, "learning_rate": 0.00018060279031294842, "loss": 1.7725, "step": 13171 }, { "epoch": 0.6431640625, "grad_norm": 0.2070503681898117, "learning_rate": 0.00018057099560272528, "loss": 1.7653, "step": 13172 }, { "epoch": 0.643212890625, "grad_norm": 0.27232825756073, "learning_rate": 0.00018053920318106492, "loss": 1.7625, "step": 13173 }, { "epoch": 0.64326171875, "grad_norm": 0.21497108042240143, "learning_rate": 0.00018050741304873792, "loss": 1.7971, "step": 13174 }, { "epoch": 0.643310546875, "grad_norm": 0.2599546015262604, "learning_rate": 0.0001804756252065146, "loss": 1.7745, "step": 13175 }, { "epoch": 0.643359375, "grad_norm": 0.240493506193161, "learning_rate": 0.00018044383965516549, "loss": 1.7376, "step": 13176 }, { "epoch": 0.643408203125, "grad_norm": 0.24725380539894104, "learning_rate": 0.00018041205639546089, "loss": 1.7831, "step": 13177 }, { "epoch": 0.64345703125, "grad_norm": 0.24537599086761475, "learning_rate": 0.00018038027542817121, "loss": 1.7286, "step": 13178 }, { "epoch": 0.643505859375, "grad_norm": 0.26023954153060913, "learning_rate": 0.00018034849675406634, "loss": 1.7711, "step": 13179 }, { "epoch": 0.6435546875, "grad_norm": 0.24103042483329773, "learning_rate": 0.00018031672037391693, "loss": 1.7558, "step": 13180 }, { "epoch": 0.643603515625, "grad_norm": 0.22240251302719116, "learning_rate": 0.00018028494628849267, "loss": 1.7902, "step": 13181 }, { "epoch": 0.64365234375, "grad_norm": 0.23346282541751862, "learning_rate": 0.000180253174498564, "loss": 1.7551, "step": 13182 }, { "epoch": 0.643701171875, "grad_norm": 0.21096135675907135, "learning_rate": 0.0001802214050049006, "loss": 1.7605, "step": 13183 }, { "epoch": 0.64375, "grad_norm": 0.24106836318969727, "learning_rate": 0.00018018963780827275, "loss": 1.7544, "step": 13184 }, { "epoch": 0.643798828125, "grad_norm": 0.22644124925136566, "learning_rate": 0.00018015787290945, "loss": 1.7725, "step": 13185 }, { "epoch": 0.64384765625, "grad_norm": 0.2187567502260208, "learning_rate": 0.00018012611030920268, "loss": 1.7638, "step": 13186 }, { "epoch": 0.643896484375, "grad_norm": 0.23522540926933289, "learning_rate": 0.0001800943500083001, "loss": 1.7597, "step": 13187 }, { "epoch": 0.6439453125, "grad_norm": 0.2390047311782837, "learning_rate": 0.00018006259200751225, "loss": 1.7492, "step": 13188 }, { "epoch": 0.643994140625, "grad_norm": 0.28128480911254883, "learning_rate": 0.0001800308363076087, "loss": 1.7635, "step": 13189 }, { "epoch": 0.64404296875, "grad_norm": 0.2209480255842209, "learning_rate": 0.00017999908290935917, "loss": 1.7352, "step": 13190 }, { "epoch": 0.644091796875, "grad_norm": 0.29383957386016846, "learning_rate": 0.0001799673318135332, "loss": 1.7419, "step": 13191 }, { "epoch": 0.644140625, "grad_norm": 0.21518075466156006, "learning_rate": 0.00017993558302090024, "loss": 1.7392, "step": 13192 }, { "epoch": 0.644189453125, "grad_norm": 0.22352498769760132, "learning_rate": 0.0001799038365322298, "loss": 1.7383, "step": 13193 }, { "epoch": 0.64423828125, "grad_norm": 0.23297615349292755, "learning_rate": 0.00017987209234829131, "loss": 1.7504, "step": 13194 }, { "epoch": 0.644287109375, "grad_norm": 0.24196957051753998, "learning_rate": 0.00017984035046985408, "loss": 1.7723, "step": 13195 }, { "epoch": 0.6443359375, "grad_norm": 0.21155910193920135, "learning_rate": 0.00017980861089768752, "loss": 1.7397, "step": 13196 }, { "epoch": 0.644384765625, "grad_norm": 0.24667038023471832, "learning_rate": 0.00017977687363256056, "loss": 1.7698, "step": 13197 }, { "epoch": 0.64443359375, "grad_norm": 0.18450315296649933, "learning_rate": 0.0001797451386752428, "loss": 1.7568, "step": 13198 }, { "epoch": 0.644482421875, "grad_norm": 0.2345537543296814, "learning_rate": 0.00017971340602650288, "loss": 1.7868, "step": 13199 }, { "epoch": 0.64453125, "grad_norm": 0.24621446430683136, "learning_rate": 0.0001796816756871103, "loss": 1.7393, "step": 13200 }, { "epoch": 0.644580078125, "grad_norm": 0.22703541815280914, "learning_rate": 0.0001796499476578337, "loss": 1.7804, "step": 13201 }, { "epoch": 0.64462890625, "grad_norm": 0.21957020461559296, "learning_rate": 0.00017961822193944245, "loss": 1.7704, "step": 13202 }, { "epoch": 0.644677734375, "grad_norm": 0.22759050130844116, "learning_rate": 0.00017958649853270497, "loss": 1.7832, "step": 13203 }, { "epoch": 0.6447265625, "grad_norm": 0.23663754761219025, "learning_rate": 0.0001795547774383906, "loss": 1.7457, "step": 13204 }, { "epoch": 0.644775390625, "grad_norm": 0.2542460560798645, "learning_rate": 0.00017952305865726775, "loss": 1.7737, "step": 13205 }, { "epoch": 0.64482421875, "grad_norm": 0.27464839816093445, "learning_rate": 0.00017949134219010532, "loss": 1.7446, "step": 13206 }, { "epoch": 0.644873046875, "grad_norm": 0.25838398933410645, "learning_rate": 0.0001794596280376719, "loss": 1.7824, "step": 13207 }, { "epoch": 0.644921875, "grad_norm": 0.23411694169044495, "learning_rate": 0.00017942791620073617, "loss": 1.7829, "step": 13208 }, { "epoch": 0.644970703125, "grad_norm": 0.20106537640094757, "learning_rate": 0.00017939620668006669, "loss": 1.7812, "step": 13209 }, { "epoch": 0.64501953125, "grad_norm": 0.244529128074646, "learning_rate": 0.00017936449947643197, "loss": 1.7651, "step": 13210 }, { "epoch": 0.645068359375, "grad_norm": 0.19004760682582855, "learning_rate": 0.0001793327945906003, "loss": 1.7987, "step": 13211 }, { "epoch": 0.6451171875, "grad_norm": 0.23832201957702637, "learning_rate": 0.00017930109202334043, "loss": 1.7572, "step": 13212 }, { "epoch": 0.645166015625, "grad_norm": 0.27209728956222534, "learning_rate": 0.00017926939177542029, "loss": 1.782, "step": 13213 }, { "epoch": 0.64521484375, "grad_norm": 0.20924067497253418, "learning_rate": 0.00017923769384760852, "loss": 1.7719, "step": 13214 }, { "epoch": 0.645263671875, "grad_norm": 0.24617572128772736, "learning_rate": 0.00017920599824067297, "loss": 1.7445, "step": 13215 }, { "epoch": 0.6453125, "grad_norm": 0.2595502436161041, "learning_rate": 0.00017917430495538217, "loss": 1.7385, "step": 13216 }, { "epoch": 0.645361328125, "grad_norm": 0.2083347886800766, "learning_rate": 0.00017914261399250398, "loss": 1.7592, "step": 13217 }, { "epoch": 0.64541015625, "grad_norm": 0.23413565754890442, "learning_rate": 0.00017911092535280665, "loss": 1.7414, "step": 13218 }, { "epoch": 0.645458984375, "grad_norm": 0.2060001641511917, "learning_rate": 0.00017907923903705793, "loss": 1.7345, "step": 13219 }, { "epoch": 0.6455078125, "grad_norm": 0.2347673773765564, "learning_rate": 0.0001790475550460261, "loss": 1.7556, "step": 13220 }, { "epoch": 0.645556640625, "grad_norm": 0.215752512216568, "learning_rate": 0.00017901587338047876, "loss": 1.7702, "step": 13221 }, { "epoch": 0.64560546875, "grad_norm": 0.21972328424453735, "learning_rate": 0.00017898419404118387, "loss": 1.7642, "step": 13222 }, { "epoch": 0.645654296875, "grad_norm": 0.21249274909496307, "learning_rate": 0.00017895251702890912, "loss": 1.7756, "step": 13223 }, { "epoch": 0.645703125, "grad_norm": 0.25559771060943604, "learning_rate": 0.00017892084234442235, "loss": 1.7705, "step": 13224 }, { "epoch": 0.645751953125, "grad_norm": 0.2134031355381012, "learning_rate": 0.00017888916998849108, "loss": 1.7457, "step": 13225 }, { "epoch": 0.64580078125, "grad_norm": 0.22590407729148865, "learning_rate": 0.00017885749996188305, "loss": 1.7537, "step": 13226 }, { "epoch": 0.645849609375, "grad_norm": 0.22392572462558746, "learning_rate": 0.0001788258322653657, "loss": 1.7623, "step": 13227 }, { "epoch": 0.6458984375, "grad_norm": 0.2058042734861374, "learning_rate": 0.00017879416689970662, "loss": 1.7528, "step": 13228 }, { "epoch": 0.645947265625, "grad_norm": 0.24730341136455536, "learning_rate": 0.00017876250386567318, "loss": 1.7556, "step": 13229 }, { "epoch": 0.64599609375, "grad_norm": 0.2267860621213913, "learning_rate": 0.0001787308431640329, "loss": 1.7674, "step": 13230 }, { "epoch": 0.646044921875, "grad_norm": 0.18944628536701202, "learning_rate": 0.00017869918479555274, "loss": 1.7702, "step": 13231 }, { "epoch": 0.64609375, "grad_norm": 0.2566264867782593, "learning_rate": 0.0001786675287610004, "loss": 1.7513, "step": 13232 }, { "epoch": 0.646142578125, "grad_norm": 0.194589301943779, "learning_rate": 0.0001786358750611427, "loss": 1.7831, "step": 13233 }, { "epoch": 0.64619140625, "grad_norm": 0.21871620416641235, "learning_rate": 0.00017860422369674717, "loss": 1.7563, "step": 13234 }, { "epoch": 0.646240234375, "grad_norm": 0.2234342247247696, "learning_rate": 0.00017857257466858057, "loss": 1.7653, "step": 13235 }, { "epoch": 0.6462890625, "grad_norm": 0.23144322633743286, "learning_rate": 0.00017854092797741023, "loss": 1.7643, "step": 13236 }, { "epoch": 0.646337890625, "grad_norm": 0.23498167097568512, "learning_rate": 0.00017850928362400298, "loss": 1.762, "step": 13237 }, { "epoch": 0.64638671875, "grad_norm": 0.20426301658153534, "learning_rate": 0.0001784776416091257, "loss": 1.7388, "step": 13238 }, { "epoch": 0.646435546875, "grad_norm": 0.24598471820354462, "learning_rate": 0.00017844600193354533, "loss": 1.7643, "step": 13239 }, { "epoch": 0.646484375, "grad_norm": 0.2137278914451599, "learning_rate": 0.00017841436459802866, "loss": 1.7762, "step": 13240 }, { "epoch": 0.646533203125, "grad_norm": 0.24875426292419434, "learning_rate": 0.00017838272960334252, "loss": 1.7526, "step": 13241 }, { "epoch": 0.64658203125, "grad_norm": 0.23123963177204132, "learning_rate": 0.00017835109695025348, "loss": 1.7719, "step": 13242 }, { "epoch": 0.646630859375, "grad_norm": 0.2123870998620987, "learning_rate": 0.00017831946663952826, "loss": 1.7601, "step": 13243 }, { "epoch": 0.6466796875, "grad_norm": 0.22263042628765106, "learning_rate": 0.00017828783867193348, "loss": 1.7763, "step": 13244 }, { "epoch": 0.646728515625, "grad_norm": 0.2165566086769104, "learning_rate": 0.00017825621304823558, "loss": 1.7391, "step": 13245 }, { "epoch": 0.64677734375, "grad_norm": 0.2155434936285019, "learning_rate": 0.0001782245897692012, "loss": 1.7345, "step": 13246 }, { "epoch": 0.646826171875, "grad_norm": 0.23841525614261627, "learning_rate": 0.00017819296883559644, "loss": 1.8078, "step": 13247 }, { "epoch": 0.646875, "grad_norm": 0.20830786228179932, "learning_rate": 0.00017816135024818801, "loss": 1.7602, "step": 13248 }, { "epoch": 0.646923828125, "grad_norm": 0.2464294284582138, "learning_rate": 0.00017812973400774197, "loss": 1.7606, "step": 13249 }, { "epoch": 0.64697265625, "grad_norm": 0.21045444905757904, "learning_rate": 0.00017809812011502474, "loss": 1.7457, "step": 13250 }, { "epoch": 0.647021484375, "grad_norm": 0.23069584369659424, "learning_rate": 0.0001780665085708023, "loss": 1.7676, "step": 13251 }, { "epoch": 0.6470703125, "grad_norm": 0.25190475583076477, "learning_rate": 0.000178034899375841, "loss": 1.7657, "step": 13252 }, { "epoch": 0.647119140625, "grad_norm": 0.1990361213684082, "learning_rate": 0.00017800329253090668, "loss": 1.7633, "step": 13253 }, { "epoch": 0.64716796875, "grad_norm": 0.2565827965736389, "learning_rate": 0.0001779716880367657, "loss": 1.7563, "step": 13254 }, { "epoch": 0.647216796875, "grad_norm": 0.227804496884346, "learning_rate": 0.00017794008589418371, "loss": 1.7801, "step": 13255 }, { "epoch": 0.647265625, "grad_norm": 0.2542424499988556, "learning_rate": 0.00017790848610392667, "loss": 1.7614, "step": 13256 }, { "epoch": 0.647314453125, "grad_norm": 0.20540450513362885, "learning_rate": 0.00017787688866676056, "loss": 1.7758, "step": 13257 }, { "epoch": 0.64736328125, "grad_norm": 0.24159200489521027, "learning_rate": 0.00017784529358345102, "loss": 1.7695, "step": 13258 }, { "epoch": 0.647412109375, "grad_norm": 0.1799468994140625, "learning_rate": 0.00017781370085476385, "loss": 1.7536, "step": 13259 }, { "epoch": 0.6474609375, "grad_norm": 0.22404895722866058, "learning_rate": 0.0001777821104814647, "loss": 1.7741, "step": 13260 }, { "epoch": 0.647509765625, "grad_norm": 0.19758588075637817, "learning_rate": 0.00017775052246431921, "loss": 1.7843, "step": 13261 }, { "epoch": 0.64755859375, "grad_norm": 0.2182161509990692, "learning_rate": 0.000177718936804093, "loss": 1.757, "step": 13262 }, { "epoch": 0.647607421875, "grad_norm": 0.20206709206104279, "learning_rate": 0.00017768735350155146, "loss": 1.7788, "step": 13263 }, { "epoch": 0.64765625, "grad_norm": 0.24855153262615204, "learning_rate": 0.00017765577255746018, "loss": 1.7663, "step": 13264 }, { "epoch": 0.647705078125, "grad_norm": 0.23347069323062897, "learning_rate": 0.00017762419397258428, "loss": 1.746, "step": 13265 }, { "epoch": 0.64775390625, "grad_norm": 0.20171815156936646, "learning_rate": 0.00017759261774768942, "loss": 1.7496, "step": 13266 }, { "epoch": 0.647802734375, "grad_norm": 0.20897045731544495, "learning_rate": 0.00017756104388354062, "loss": 1.7732, "step": 13267 }, { "epoch": 0.6478515625, "grad_norm": 0.21510018408298492, "learning_rate": 0.00017752947238090333, "loss": 1.7479, "step": 13268 }, { "epoch": 0.647900390625, "grad_norm": 0.21664342284202576, "learning_rate": 0.00017749790324054238, "loss": 1.7873, "step": 13269 }, { "epoch": 0.64794921875, "grad_norm": 0.2013530433177948, "learning_rate": 0.0001774663364632233, "loss": 1.7431, "step": 13270 }, { "epoch": 0.647998046875, "grad_norm": 0.2176719754934311, "learning_rate": 0.0001774347720497108, "loss": 1.7591, "step": 13271 }, { "epoch": 0.648046875, "grad_norm": 0.22504153847694397, "learning_rate": 0.00017740321000076997, "loss": 1.7799, "step": 13272 }, { "epoch": 0.648095703125, "grad_norm": 0.22330038249492645, "learning_rate": 0.00017737165031716585, "loss": 1.7576, "step": 13273 }, { "epoch": 0.64814453125, "grad_norm": 0.2528603672981262, "learning_rate": 0.00017734009299966313, "loss": 1.7644, "step": 13274 }, { "epoch": 0.648193359375, "grad_norm": 0.2833464443683624, "learning_rate": 0.00017730853804902674, "loss": 1.7854, "step": 13275 }, { "epoch": 0.6482421875, "grad_norm": 0.2251949906349182, "learning_rate": 0.0001772769854660214, "loss": 1.7566, "step": 13276 }, { "epoch": 0.648291015625, "grad_norm": 0.22564980387687683, "learning_rate": 0.00017724543525141184, "loss": 1.7597, "step": 13277 }, { "epoch": 0.64833984375, "grad_norm": 0.24276013672351837, "learning_rate": 0.00017721388740596266, "loss": 1.7721, "step": 13278 }, { "epoch": 0.648388671875, "grad_norm": 0.30407434701919556, "learning_rate": 0.00017718234193043854, "loss": 1.7753, "step": 13279 }, { "epoch": 0.6484375, "grad_norm": 0.20466573536396027, "learning_rate": 0.00017715079882560391, "loss": 1.7689, "step": 13280 }, { "epoch": 0.648486328125, "grad_norm": 0.31436261534690857, "learning_rate": 0.0001771192580922233, "loss": 1.7608, "step": 13281 }, { "epoch": 0.64853515625, "grad_norm": 0.23608510196208954, "learning_rate": 0.00017708771973106108, "loss": 1.7365, "step": 13282 }, { "epoch": 0.648583984375, "grad_norm": 0.3006272614002228, "learning_rate": 0.00017705618374288168, "loss": 1.7321, "step": 13283 }, { "epoch": 0.6486328125, "grad_norm": 0.291840136051178, "learning_rate": 0.00017702465012844938, "loss": 1.7649, "step": 13284 }, { "epoch": 0.648681640625, "grad_norm": 0.3057449162006378, "learning_rate": 0.00017699311888852827, "loss": 1.7826, "step": 13285 }, { "epoch": 0.64873046875, "grad_norm": 0.2201014906167984, "learning_rate": 0.00017696159002388278, "loss": 1.7569, "step": 13286 }, { "epoch": 0.648779296875, "grad_norm": 0.285508394241333, "learning_rate": 0.0001769300635352768, "loss": 1.7694, "step": 13287 }, { "epoch": 0.648828125, "grad_norm": 0.24949267506599426, "learning_rate": 0.00017689853942347468, "loss": 1.7845, "step": 13288 }, { "epoch": 0.648876953125, "grad_norm": 0.3178609311580658, "learning_rate": 0.0001768670176892402, "loss": 1.7941, "step": 13289 }, { "epoch": 0.64892578125, "grad_norm": 0.23225510120391846, "learning_rate": 0.00017683549833333735, "loss": 1.7436, "step": 13290 }, { "epoch": 0.648974609375, "grad_norm": 0.2995646893978119, "learning_rate": 0.0001768039813565301, "loss": 1.7746, "step": 13291 }, { "epoch": 0.6490234375, "grad_norm": 0.223585307598114, "learning_rate": 0.0001767724667595822, "loss": 1.7601, "step": 13292 }, { "epoch": 0.649072265625, "grad_norm": 0.2963421642780304, "learning_rate": 0.0001767409545432575, "loss": 1.7688, "step": 13293 }, { "epoch": 0.64912109375, "grad_norm": 0.2405901700258255, "learning_rate": 0.00017670944470831972, "loss": 1.7831, "step": 13294 }, { "epoch": 0.649169921875, "grad_norm": 0.3080733120441437, "learning_rate": 0.00017667793725553255, "loss": 1.7881, "step": 13295 }, { "epoch": 0.64921875, "grad_norm": 0.31561005115509033, "learning_rate": 0.00017664643218565952, "loss": 1.7341, "step": 13296 }, { "epoch": 0.649267578125, "grad_norm": 0.2618204355239868, "learning_rate": 0.00017661492949946417, "loss": 1.7545, "step": 13297 }, { "epoch": 0.64931640625, "grad_norm": 0.2824840247631073, "learning_rate": 0.00017658342919771015, "loss": 1.7464, "step": 13298 }, { "epoch": 0.649365234375, "grad_norm": 0.25999361276626587, "learning_rate": 0.0001765519312811607, "loss": 1.7916, "step": 13299 }, { "epoch": 0.6494140625, "grad_norm": 0.24459315836429596, "learning_rate": 0.00017652043575057936, "loss": 1.7318, "step": 13300 }, { "epoch": 0.649462890625, "grad_norm": 0.2188483476638794, "learning_rate": 0.00017648894260672933, "loss": 1.7617, "step": 13301 }, { "epoch": 0.64951171875, "grad_norm": 0.24142661690711975, "learning_rate": 0.000176457451850374, "loss": 1.7588, "step": 13302 }, { "epoch": 0.649560546875, "grad_norm": 0.23652192950248718, "learning_rate": 0.0001764259634822764, "loss": 1.7542, "step": 13303 }, { "epoch": 0.649609375, "grad_norm": 0.24582837522029877, "learning_rate": 0.00017639447750319987, "loss": 1.7977, "step": 13304 }, { "epoch": 0.649658203125, "grad_norm": 0.27966827154159546, "learning_rate": 0.00017636299391390732, "loss": 1.7487, "step": 13305 }, { "epoch": 0.64970703125, "grad_norm": 0.22730116546154022, "learning_rate": 0.00017633151271516183, "loss": 1.7717, "step": 13306 }, { "epoch": 0.649755859375, "grad_norm": 0.26355671882629395, "learning_rate": 0.00017630003390772648, "loss": 1.7332, "step": 13307 }, { "epoch": 0.6498046875, "grad_norm": 0.21991045773029327, "learning_rate": 0.00017626855749236403, "loss": 1.7444, "step": 13308 }, { "epoch": 0.649853515625, "grad_norm": 0.21069832146167755, "learning_rate": 0.00017623708346983747, "loss": 1.765, "step": 13309 }, { "epoch": 0.64990234375, "grad_norm": 0.2256467193365097, "learning_rate": 0.0001762056118409095, "loss": 1.7522, "step": 13310 }, { "epoch": 0.649951171875, "grad_norm": 0.19764406979084015, "learning_rate": 0.0001761741426063429, "loss": 1.7597, "step": 13311 }, { "epoch": 0.65, "grad_norm": 0.23482806980609894, "learning_rate": 0.00017614267576690035, "loss": 1.738, "step": 13312 }, { "epoch": 0.650048828125, "grad_norm": 0.2060970962047577, "learning_rate": 0.00017611121132334447, "loss": 1.7683, "step": 13313 }, { "epoch": 0.65009765625, "grad_norm": 0.2421201914548874, "learning_rate": 0.00017607974927643782, "loss": 1.7591, "step": 13314 }, { "epoch": 0.650146484375, "grad_norm": 0.20107203722000122, "learning_rate": 0.00017604828962694292, "loss": 1.7891, "step": 13315 }, { "epoch": 0.6501953125, "grad_norm": 0.2222226858139038, "learning_rate": 0.00017601683237562226, "loss": 1.7513, "step": 13316 }, { "epoch": 0.650244140625, "grad_norm": 0.2141885757446289, "learning_rate": 0.00017598537752323812, "loss": 1.7603, "step": 13317 }, { "epoch": 0.65029296875, "grad_norm": 0.21439455449581146, "learning_rate": 0.00017595392507055297, "loss": 1.7571, "step": 13318 }, { "epoch": 0.650341796875, "grad_norm": 0.20199182629585266, "learning_rate": 0.00017592247501832897, "loss": 1.756, "step": 13319 }, { "epoch": 0.650390625, "grad_norm": 0.2473469227552414, "learning_rate": 0.00017589102736732842, "loss": 1.7634, "step": 13320 }, { "epoch": 0.650439453125, "grad_norm": 0.2262004166841507, "learning_rate": 0.00017585958211831348, "loss": 1.7574, "step": 13321 }, { "epoch": 0.65048828125, "grad_norm": 0.20752392709255219, "learning_rate": 0.0001758281392720461, "loss": 1.749, "step": 13322 }, { "epoch": 0.650537109375, "grad_norm": 0.22780796885490417, "learning_rate": 0.0001757966988292886, "loss": 1.7472, "step": 13323 }, { "epoch": 0.6505859375, "grad_norm": 0.19870801270008087, "learning_rate": 0.00017576526079080267, "loss": 1.7721, "step": 13324 }, { "epoch": 0.650634765625, "grad_norm": 0.2178386002779007, "learning_rate": 0.00017573382515735043, "loss": 1.7695, "step": 13325 }, { "epoch": 0.65068359375, "grad_norm": 0.21491451561450958, "learning_rate": 0.00017570239192969366, "loss": 1.7848, "step": 13326 }, { "epoch": 0.650732421875, "grad_norm": 0.1856342852115631, "learning_rate": 0.00017567096110859422, "loss": 1.7679, "step": 13327 }, { "epoch": 0.65078125, "grad_norm": 0.21276751160621643, "learning_rate": 0.0001756395326948138, "loss": 1.7543, "step": 13328 }, { "epoch": 0.650830078125, "grad_norm": 0.19816827774047852, "learning_rate": 0.00017560810668911414, "loss": 1.7585, "step": 13329 }, { "epoch": 0.65087890625, "grad_norm": 0.21608567237854004, "learning_rate": 0.00017557668309225687, "loss": 1.7787, "step": 13330 }, { "epoch": 0.650927734375, "grad_norm": 0.24161295592784882, "learning_rate": 0.00017554526190500359, "loss": 1.7635, "step": 13331 }, { "epoch": 0.6509765625, "grad_norm": 0.26285022497177124, "learning_rate": 0.00017551384312811574, "loss": 1.7511, "step": 13332 }, { "epoch": 0.651025390625, "grad_norm": 0.2559151351451874, "learning_rate": 0.00017548242676235486, "loss": 1.7655, "step": 13333 }, { "epoch": 0.65107421875, "grad_norm": 0.21391740441322327, "learning_rate": 0.00017545101280848235, "loss": 1.7679, "step": 13334 }, { "epoch": 0.651123046875, "grad_norm": 0.2494644820690155, "learning_rate": 0.0001754196012672595, "loss": 1.7812, "step": 13335 }, { "epoch": 0.651171875, "grad_norm": 0.21884530782699585, "learning_rate": 0.00017538819213944756, "loss": 1.7766, "step": 13336 }, { "epoch": 0.651220703125, "grad_norm": 0.2606426775455475, "learning_rate": 0.00017535678542580785, "loss": 1.7718, "step": 13337 }, { "epoch": 0.65126953125, "grad_norm": 0.20851574838161469, "learning_rate": 0.0001753253811271015, "loss": 1.7631, "step": 13338 }, { "epoch": 0.651318359375, "grad_norm": 0.2409721314907074, "learning_rate": 0.00017529397924408968, "loss": 1.7609, "step": 13339 }, { "epoch": 0.6513671875, "grad_norm": 0.23448903858661652, "learning_rate": 0.00017526257977753325, "loss": 1.7566, "step": 13340 }, { "epoch": 0.651416015625, "grad_norm": 0.2477540224790573, "learning_rate": 0.00017523118272819345, "loss": 1.7746, "step": 13341 }, { "epoch": 0.65146484375, "grad_norm": 0.23830071091651917, "learning_rate": 0.00017519978809683095, "loss": 1.7685, "step": 13342 }, { "epoch": 0.651513671875, "grad_norm": 0.24908281862735748, "learning_rate": 0.00017516839588420691, "loss": 1.7583, "step": 13343 }, { "epoch": 0.6515625, "grad_norm": 0.25963887572288513, "learning_rate": 0.00017513700609108197, "loss": 1.7563, "step": 13344 }, { "epoch": 0.651611328125, "grad_norm": 0.23105160892009735, "learning_rate": 0.00017510561871821685, "loss": 1.7499, "step": 13345 }, { "epoch": 0.65166015625, "grad_norm": 0.26152846217155457, "learning_rate": 0.0001750742337663724, "loss": 1.7619, "step": 13346 }, { "epoch": 0.651708984375, "grad_norm": 0.2243603616952896, "learning_rate": 0.00017504285123630904, "loss": 1.7826, "step": 13347 }, { "epoch": 0.6517578125, "grad_norm": 0.2155798375606537, "learning_rate": 0.00017501147112878758, "loss": 1.7563, "step": 13348 }, { "epoch": 0.651806640625, "grad_norm": 0.2155366837978363, "learning_rate": 0.00017498009344456842, "loss": 1.7419, "step": 13349 }, { "epoch": 0.65185546875, "grad_norm": 0.22124646604061127, "learning_rate": 0.00017494871818441205, "loss": 1.7642, "step": 13350 }, { "epoch": 0.651904296875, "grad_norm": 0.21279780566692352, "learning_rate": 0.00017491734534907892, "loss": 1.7775, "step": 13351 }, { "epoch": 0.651953125, "grad_norm": 0.2121705263853073, "learning_rate": 0.00017488597493932928, "loss": 1.787, "step": 13352 }, { "epoch": 0.652001953125, "grad_norm": 0.20854860544204712, "learning_rate": 0.00017485460695592349, "loss": 1.7313, "step": 13353 }, { "epoch": 0.65205078125, "grad_norm": 0.22469548881053925, "learning_rate": 0.00017482324139962176, "loss": 1.7783, "step": 13354 }, { "epoch": 0.652099609375, "grad_norm": 0.215377539396286, "learning_rate": 0.0001747918782711843, "loss": 1.7478, "step": 13355 }, { "epoch": 0.6521484375, "grad_norm": 0.23404626548290253, "learning_rate": 0.00017476051757137106, "loss": 1.7535, "step": 13356 }, { "epoch": 0.652197265625, "grad_norm": 0.28814584016799927, "learning_rate": 0.00017472915930094232, "loss": 1.7863, "step": 13357 }, { "epoch": 0.65224609375, "grad_norm": 0.20425866544246674, "learning_rate": 0.00017469780346065784, "loss": 1.7682, "step": 13358 }, { "epoch": 0.652294921875, "grad_norm": 0.25179746747016907, "learning_rate": 0.00017466645005127782, "loss": 1.787, "step": 13359 }, { "epoch": 0.65234375, "grad_norm": 0.24546225368976593, "learning_rate": 0.00017463509907356185, "loss": 1.7703, "step": 13360 }, { "epoch": 0.652392578125, "grad_norm": 0.22971369326114655, "learning_rate": 0.00017460375052827005, "loss": 1.7653, "step": 13361 }, { "epoch": 0.65244140625, "grad_norm": 0.21853341162204742, "learning_rate": 0.0001745724044161618, "loss": 1.7999, "step": 13362 }, { "epoch": 0.652490234375, "grad_norm": 0.24808500707149506, "learning_rate": 0.00017454106073799724, "loss": 1.7631, "step": 13363 }, { "epoch": 0.6525390625, "grad_norm": 0.20099030435085297, "learning_rate": 0.00017450971949453564, "loss": 1.7402, "step": 13364 }, { "epoch": 0.652587890625, "grad_norm": 0.2504113018512726, "learning_rate": 0.00017447838068653676, "loss": 1.7358, "step": 13365 }, { "epoch": 0.65263671875, "grad_norm": 0.22684785723686218, "learning_rate": 0.00017444704431476005, "loss": 1.7913, "step": 13366 }, { "epoch": 0.652685546875, "grad_norm": 0.2589458227157593, "learning_rate": 0.00017441571037996503, "loss": 1.7694, "step": 13367 }, { "epoch": 0.652734375, "grad_norm": 0.2767685055732727, "learning_rate": 0.00017438437888291108, "loss": 1.7357, "step": 13368 }, { "epoch": 0.652783203125, "grad_norm": 0.23509620130062103, "learning_rate": 0.00017435304982435753, "loss": 1.7544, "step": 13369 }, { "epoch": 0.65283203125, "grad_norm": 0.2847837209701538, "learning_rate": 0.0001743217232050637, "loss": 1.7676, "step": 13370 }, { "epoch": 0.652880859375, "grad_norm": 0.2550676763057709, "learning_rate": 0.00017429039902578877, "loss": 1.7703, "step": 13371 }, { "epoch": 0.6529296875, "grad_norm": 0.25424724817276, "learning_rate": 0.00017425907728729192, "loss": 1.7569, "step": 13372 }, { "epoch": 0.652978515625, "grad_norm": 0.250482976436615, "learning_rate": 0.00017422775799033236, "loss": 1.7356, "step": 13373 }, { "epoch": 0.65302734375, "grad_norm": 0.2111019492149353, "learning_rate": 0.0001741964411356689, "loss": 1.7631, "step": 13374 }, { "epoch": 0.653076171875, "grad_norm": 0.30814114212989807, "learning_rate": 0.00017416512672406082, "loss": 1.7382, "step": 13375 }, { "epoch": 0.653125, "grad_norm": 0.2120734602212906, "learning_rate": 0.00017413381475626672, "loss": 1.746, "step": 13376 }, { "epoch": 0.653173828125, "grad_norm": 0.23524928092956543, "learning_rate": 0.00017410250523304586, "loss": 1.7515, "step": 13377 }, { "epoch": 0.65322265625, "grad_norm": 0.2434091418981552, "learning_rate": 0.00017407119815515665, "loss": 1.772, "step": 13378 }, { "epoch": 0.653271484375, "grad_norm": 0.1958337426185608, "learning_rate": 0.0001740398935233582, "loss": 1.7607, "step": 13379 }, { "epoch": 0.6533203125, "grad_norm": 0.2303868681192398, "learning_rate": 0.00017400859133840895, "loss": 1.7656, "step": 13380 }, { "epoch": 0.653369140625, "grad_norm": 0.20278961956501007, "learning_rate": 0.00017397729160106767, "loss": 1.7668, "step": 13381 }, { "epoch": 0.65341796875, "grad_norm": 0.20523875951766968, "learning_rate": 0.00017394599431209284, "loss": 1.7487, "step": 13382 }, { "epoch": 0.653466796875, "grad_norm": 0.2695981562137604, "learning_rate": 0.0001739146994722431, "loss": 1.7508, "step": 13383 }, { "epoch": 0.653515625, "grad_norm": 0.2190300077199936, "learning_rate": 0.00017388340708227674, "loss": 1.7579, "step": 13384 }, { "epoch": 0.653564453125, "grad_norm": 0.2655024826526642, "learning_rate": 0.0001738521171429523, "loss": 1.7625, "step": 13385 }, { "epoch": 0.65361328125, "grad_norm": 0.24238714575767517, "learning_rate": 0.00017382082965502804, "loss": 1.7853, "step": 13386 }, { "epoch": 0.653662109375, "grad_norm": 0.19749973714351654, "learning_rate": 0.00017378954461926227, "loss": 1.7583, "step": 13387 }, { "epoch": 0.6537109375, "grad_norm": 0.22340814769268036, "learning_rate": 0.0001737582620364132, "loss": 1.7719, "step": 13388 }, { "epoch": 0.653759765625, "grad_norm": 0.24366165697574615, "learning_rate": 0.00017372698190723908, "loss": 1.7656, "step": 13389 }, { "epoch": 0.65380859375, "grad_norm": 0.2325330525636673, "learning_rate": 0.0001736957042324977, "loss": 1.751, "step": 13390 }, { "epoch": 0.653857421875, "grad_norm": 0.2093604952096939, "learning_rate": 0.0001736644290129475, "loss": 1.7622, "step": 13391 }, { "epoch": 0.65390625, "grad_norm": 0.24453283846378326, "learning_rate": 0.00017363315624934614, "loss": 1.746, "step": 13392 }, { "epoch": 0.653955078125, "grad_norm": 0.21848808228969574, "learning_rate": 0.00017360188594245179, "loss": 1.7512, "step": 13393 }, { "epoch": 0.65400390625, "grad_norm": 0.24590085446834564, "learning_rate": 0.0001735706180930221, "loss": 1.7635, "step": 13394 }, { "epoch": 0.654052734375, "grad_norm": 0.25153249502182007, "learning_rate": 0.00017353935270181508, "loss": 1.7625, "step": 13395 }, { "epoch": 0.6541015625, "grad_norm": 0.26410067081451416, "learning_rate": 0.0001735080897695882, "loss": 1.7545, "step": 13396 }, { "epoch": 0.654150390625, "grad_norm": 0.2674253582954407, "learning_rate": 0.00017347682929709952, "loss": 1.7714, "step": 13397 }, { "epoch": 0.65419921875, "grad_norm": 0.22331427037715912, "learning_rate": 0.0001734455712851063, "loss": 1.7517, "step": 13398 }, { "epoch": 0.654248046875, "grad_norm": 0.25352734327316284, "learning_rate": 0.00017341431573436627, "loss": 1.7697, "step": 13399 }, { "epoch": 0.654296875, "grad_norm": 0.24314706027507782, "learning_rate": 0.0001733830626456369, "loss": 1.7435, "step": 13400 }, { "epoch": 0.654345703125, "grad_norm": 0.22729593515396118, "learning_rate": 0.00017335181201967565, "loss": 1.7598, "step": 13401 }, { "epoch": 0.65439453125, "grad_norm": 0.2061229795217514, "learning_rate": 0.00017332056385723993, "loss": 1.7718, "step": 13402 }, { "epoch": 0.654443359375, "grad_norm": 0.21371528506278992, "learning_rate": 0.000173289318159087, "loss": 1.75, "step": 13403 }, { "epoch": 0.6544921875, "grad_norm": 0.1932263970375061, "learning_rate": 0.00017325807492597417, "loss": 1.763, "step": 13404 }, { "epoch": 0.654541015625, "grad_norm": 0.21237069368362427, "learning_rate": 0.00017322683415865864, "loss": 1.7571, "step": 13405 }, { "epoch": 0.65458984375, "grad_norm": 0.19321215152740479, "learning_rate": 0.0001731955958578975, "loss": 1.7662, "step": 13406 }, { "epoch": 0.654638671875, "grad_norm": 0.21819083392620087, "learning_rate": 0.000173164360024448, "loss": 1.7554, "step": 13407 }, { "epoch": 0.6546875, "grad_norm": 0.21501699090003967, "learning_rate": 0.00017313312665906693, "loss": 1.7646, "step": 13408 }, { "epoch": 0.654736328125, "grad_norm": 0.2087414413690567, "learning_rate": 0.00017310189576251147, "loss": 1.7652, "step": 13409 }, { "epoch": 0.65478515625, "grad_norm": 0.2009798288345337, "learning_rate": 0.00017307066733553828, "loss": 1.7416, "step": 13410 }, { "epoch": 0.654833984375, "grad_norm": 0.21672531962394714, "learning_rate": 0.00017303944137890453, "loss": 1.7505, "step": 13411 }, { "epoch": 0.6548828125, "grad_norm": 0.18983593583106995, "learning_rate": 0.0001730082178933667, "loss": 1.7871, "step": 13412 }, { "epoch": 0.654931640625, "grad_norm": 0.19100111722946167, "learning_rate": 0.0001729769968796818, "loss": 1.7342, "step": 13413 }, { "epoch": 0.65498046875, "grad_norm": 0.2298450618982315, "learning_rate": 0.0001729457783386062, "loss": 1.7349, "step": 13414 }, { "epoch": 0.655029296875, "grad_norm": 0.23185376822948456, "learning_rate": 0.00017291456227089676, "loss": 1.7667, "step": 13415 }, { "epoch": 0.655078125, "grad_norm": 0.20867575705051422, "learning_rate": 0.00017288334867730982, "loss": 1.7626, "step": 13416 }, { "epoch": 0.655126953125, "grad_norm": 0.25181740522384644, "learning_rate": 0.000172852137558602, "loss": 1.7609, "step": 13417 }, { "epoch": 0.65517578125, "grad_norm": 0.21355856955051422, "learning_rate": 0.0001728209289155297, "loss": 1.7459, "step": 13418 }, { "epoch": 0.655224609375, "grad_norm": 0.22716552019119263, "learning_rate": 0.00017278972274884926, "loss": 1.7599, "step": 13419 }, { "epoch": 0.6552734375, "grad_norm": 0.23972366750240326, "learning_rate": 0.000172758519059317, "loss": 1.7678, "step": 13420 }, { "epoch": 0.655322265625, "grad_norm": 0.21259453892707825, "learning_rate": 0.00017272731784768918, "loss": 1.7732, "step": 13421 }, { "epoch": 0.65537109375, "grad_norm": 0.20848405361175537, "learning_rate": 0.000172696119114722, "loss": 1.7369, "step": 13422 }, { "epoch": 0.655419921875, "grad_norm": 0.1928834319114685, "learning_rate": 0.00017266492286117165, "loss": 1.7706, "step": 13423 }, { "epoch": 0.65546875, "grad_norm": 0.20702415704727173, "learning_rate": 0.0001726337290877939, "loss": 1.7299, "step": 13424 }, { "epoch": 0.655517578125, "grad_norm": 0.2276616245508194, "learning_rate": 0.00017260253779534515, "loss": 1.7558, "step": 13425 }, { "epoch": 0.65556640625, "grad_norm": 0.19394296407699585, "learning_rate": 0.00017257134898458098, "loss": 1.759, "step": 13426 }, { "epoch": 0.655615234375, "grad_norm": 0.21526388823986053, "learning_rate": 0.00017254016265625766, "loss": 1.7582, "step": 13427 }, { "epoch": 0.6556640625, "grad_norm": 0.18967241048812866, "learning_rate": 0.00017250897881113063, "loss": 1.7536, "step": 13428 }, { "epoch": 0.655712890625, "grad_norm": 0.20225752890110016, "learning_rate": 0.000172477797449956, "loss": 1.7743, "step": 13429 }, { "epoch": 0.65576171875, "grad_norm": 0.18930941820144653, "learning_rate": 0.00017244661857348915, "loss": 1.7595, "step": 13430 }, { "epoch": 0.655810546875, "grad_norm": 0.2066512107849121, "learning_rate": 0.00017241544218248606, "loss": 1.7533, "step": 13431 }, { "epoch": 0.655859375, "grad_norm": 0.23650863766670227, "learning_rate": 0.00017238426827770206, "loss": 1.7592, "step": 13432 }, { "epoch": 0.655908203125, "grad_norm": 0.23277415335178375, "learning_rate": 0.00017235309685989275, "loss": 1.7898, "step": 13433 }, { "epoch": 0.65595703125, "grad_norm": 0.20490762591362, "learning_rate": 0.0001723219279298136, "loss": 1.7637, "step": 13434 }, { "epoch": 0.656005859375, "grad_norm": 0.2529604732990265, "learning_rate": 0.00017229076148822005, "loss": 1.7621, "step": 13435 }, { "epoch": 0.6560546875, "grad_norm": 0.20971199870109558, "learning_rate": 0.0001722595975358674, "loss": 1.7436, "step": 13436 }, { "epoch": 0.656103515625, "grad_norm": 0.24611923098564148, "learning_rate": 0.0001722284360735109, "loss": 1.7808, "step": 13437 }, { "epoch": 0.65615234375, "grad_norm": 0.4778752028942108, "learning_rate": 0.00017219727710190587, "loss": 1.7641, "step": 13438 }, { "epoch": 0.656201171875, "grad_norm": 0.22370608150959015, "learning_rate": 0.00017216612062180748, "loss": 1.7835, "step": 13439 }, { "epoch": 0.65625, "grad_norm": 0.267514169216156, "learning_rate": 0.0001721349666339706, "loss": 1.7545, "step": 13440 }, { "epoch": 0.656298828125, "grad_norm": 0.24048031866550446, "learning_rate": 0.00017210381513915062, "loss": 1.755, "step": 13441 }, { "epoch": 0.65634765625, "grad_norm": 0.2054688185453415, "learning_rate": 0.00017207266613810217, "loss": 1.7683, "step": 13442 }, { "epoch": 0.656396484375, "grad_norm": 0.19325676560401917, "learning_rate": 0.00017204151963158055, "loss": 1.73, "step": 13443 }, { "epoch": 0.6564453125, "grad_norm": 0.2334774285554886, "learning_rate": 0.00017201037562034023, "loss": 1.7647, "step": 13444 }, { "epoch": 0.656494140625, "grad_norm": 0.20249265432357788, "learning_rate": 0.00017197923410513635, "loss": 1.7399, "step": 13445 }, { "epoch": 0.65654296875, "grad_norm": 0.2193009853363037, "learning_rate": 0.00017194809508672335, "loss": 1.7268, "step": 13446 }, { "epoch": 0.656591796875, "grad_norm": 0.17370174825191498, "learning_rate": 0.0001719169585658562, "loss": 1.7512, "step": 13447 }, { "epoch": 0.656640625, "grad_norm": 0.22946006059646606, "learning_rate": 0.00017188582454328932, "loss": 1.7653, "step": 13448 }, { "epoch": 0.656689453125, "grad_norm": 0.24427178502082825, "learning_rate": 0.0001718546930197773, "loss": 1.7329, "step": 13449 }, { "epoch": 0.65673828125, "grad_norm": 0.20789775252342224, "learning_rate": 0.00017182356399607468, "loss": 1.7596, "step": 13450 }, { "epoch": 0.656787109375, "grad_norm": 0.2933916449546814, "learning_rate": 0.00017179243747293585, "loss": 1.7548, "step": 13451 }, { "epoch": 0.6568359375, "grad_norm": 0.24038437008857727, "learning_rate": 0.00017176131345111523, "loss": 1.7408, "step": 13452 }, { "epoch": 0.656884765625, "grad_norm": 0.23682382702827454, "learning_rate": 0.00017173019193136713, "loss": 1.758, "step": 13453 }, { "epoch": 0.65693359375, "grad_norm": 0.2907555103302002, "learning_rate": 0.0001716990729144458, "loss": 1.7638, "step": 13454 }, { "epoch": 0.656982421875, "grad_norm": 0.22608131170272827, "learning_rate": 0.00017166795640110544, "loss": 1.7614, "step": 13455 }, { "epoch": 0.65703125, "grad_norm": 0.2313416749238968, "learning_rate": 0.00017163684239210013, "loss": 1.7536, "step": 13456 }, { "epoch": 0.657080078125, "grad_norm": 0.2484212964773178, "learning_rate": 0.0001716057308881841, "loss": 1.7534, "step": 13457 }, { "epoch": 0.65712890625, "grad_norm": 0.22502447664737701, "learning_rate": 0.00017157462189011104, "loss": 1.7596, "step": 13458 }, { "epoch": 0.657177734375, "grad_norm": 0.22983895242214203, "learning_rate": 0.00017154351539863533, "loss": 1.7286, "step": 13459 }, { "epoch": 0.6572265625, "grad_norm": 0.2120872139930725, "learning_rate": 0.00017151241141451044, "loss": 1.7668, "step": 13460 }, { "epoch": 0.657275390625, "grad_norm": 0.24016185104846954, "learning_rate": 0.00017148130993849056, "loss": 1.7483, "step": 13461 }, { "epoch": 0.65732421875, "grad_norm": 0.20489878952503204, "learning_rate": 0.00017145021097132916, "loss": 1.7371, "step": 13462 }, { "epoch": 0.657373046875, "grad_norm": 0.23278717696666718, "learning_rate": 0.00017141911451378023, "loss": 1.7688, "step": 13463 }, { "epoch": 0.657421875, "grad_norm": 0.20658905804157257, "learning_rate": 0.00017138802056659714, "loss": 1.7653, "step": 13464 }, { "epoch": 0.657470703125, "grad_norm": 0.2463936060667038, "learning_rate": 0.0001713569291305338, "loss": 1.7548, "step": 13465 }, { "epoch": 0.65751953125, "grad_norm": 0.21332204341888428, "learning_rate": 0.0001713258402063434, "loss": 1.7457, "step": 13466 }, { "epoch": 0.657568359375, "grad_norm": 0.23099559545516968, "learning_rate": 0.00017129475379477956, "loss": 1.7702, "step": 13467 }, { "epoch": 0.6576171875, "grad_norm": 0.28146442770957947, "learning_rate": 0.0001712636698965957, "loss": 1.7607, "step": 13468 }, { "epoch": 0.657666015625, "grad_norm": 0.2007402628660202, "learning_rate": 0.00017123258851254515, "loss": 1.7646, "step": 13469 }, { "epoch": 0.65771484375, "grad_norm": 0.29630953073501587, "learning_rate": 0.00017120150964338116, "loss": 1.7328, "step": 13470 }, { "epoch": 0.657763671875, "grad_norm": 0.26249295473098755, "learning_rate": 0.00017117043328985699, "loss": 1.7622, "step": 13471 }, { "epoch": 0.6578125, "grad_norm": 0.22364464402198792, "learning_rate": 0.0001711393594527258, "loss": 1.74, "step": 13472 }, { "epoch": 0.657861328125, "grad_norm": 0.23564092814922333, "learning_rate": 0.00017110828813274073, "loss": 1.7883, "step": 13473 }, { "epoch": 0.65791015625, "grad_norm": 0.23297570645809174, "learning_rate": 0.00017107721933065463, "loss": 1.7629, "step": 13474 }, { "epoch": 0.657958984375, "grad_norm": 0.23278814554214478, "learning_rate": 0.00017104615304722073, "loss": 1.7557, "step": 13475 }, { "epoch": 0.6580078125, "grad_norm": 0.2579677999019623, "learning_rate": 0.00017101508928319168, "loss": 1.7601, "step": 13476 }, { "epoch": 0.658056640625, "grad_norm": 0.21046310663223267, "learning_rate": 0.00017098402803932065, "loss": 1.7649, "step": 13477 }, { "epoch": 0.65810546875, "grad_norm": 0.25839799642562866, "learning_rate": 0.00017095296931636013, "loss": 1.7272, "step": 13478 }, { "epoch": 0.658154296875, "grad_norm": 0.21911685168743134, "learning_rate": 0.00017092191311506315, "loss": 1.7398, "step": 13479 }, { "epoch": 0.658203125, "grad_norm": 0.2296428680419922, "learning_rate": 0.00017089085943618198, "loss": 1.7472, "step": 13480 }, { "epoch": 0.658251953125, "grad_norm": 0.22421006858348846, "learning_rate": 0.00017085980828046972, "loss": 1.7761, "step": 13481 }, { "epoch": 0.65830078125, "grad_norm": 0.23303820192813873, "learning_rate": 0.00017082875964867855, "loss": 1.7517, "step": 13482 }, { "epoch": 0.658349609375, "grad_norm": 0.20314384996891022, "learning_rate": 0.0001707977135415611, "loss": 1.7463, "step": 13483 }, { "epoch": 0.6583984375, "grad_norm": 0.2121877372264862, "learning_rate": 0.00017076666995986974, "loss": 1.7336, "step": 13484 }, { "epoch": 0.658447265625, "grad_norm": 0.22153620421886444, "learning_rate": 0.0001707356289043569, "loss": 1.7609, "step": 13485 }, { "epoch": 0.65849609375, "grad_norm": 0.2028992772102356, "learning_rate": 0.00017070459037577484, "loss": 1.7713, "step": 13486 }, { "epoch": 0.658544921875, "grad_norm": 0.27480170130729675, "learning_rate": 0.00017067355437487585, "loss": 1.7815, "step": 13487 }, { "epoch": 0.65859375, "grad_norm": 0.2338757961988449, "learning_rate": 0.00017064252090241212, "loss": 1.7572, "step": 13488 }, { "epoch": 0.658642578125, "grad_norm": 0.20152990520000458, "learning_rate": 0.00017061148995913567, "loss": 1.7743, "step": 13489 }, { "epoch": 0.65869140625, "grad_norm": 0.2549814283847809, "learning_rate": 0.00017058046154579866, "loss": 1.7683, "step": 13490 }, { "epoch": 0.658740234375, "grad_norm": 0.22000306844711304, "learning_rate": 0.0001705494356631531, "loss": 1.7821, "step": 13491 }, { "epoch": 0.6587890625, "grad_norm": 0.21234539151191711, "learning_rate": 0.00017051841231195076, "loss": 1.7604, "step": 13492 }, { "epoch": 0.658837890625, "grad_norm": 0.2335890829563141, "learning_rate": 0.00017048739149294379, "loss": 1.8059, "step": 13493 }, { "epoch": 0.65888671875, "grad_norm": 0.215203195810318, "learning_rate": 0.00017045637320688367, "loss": 1.7383, "step": 13494 }, { "epoch": 0.658935546875, "grad_norm": 0.24413353204727173, "learning_rate": 0.0001704253574545225, "loss": 1.7436, "step": 13495 }, { "epoch": 0.658984375, "grad_norm": 0.23233065009117126, "learning_rate": 0.00017039434423661166, "loss": 1.76, "step": 13496 }, { "epoch": 0.659033203125, "grad_norm": 0.24356743693351746, "learning_rate": 0.0001703633335539031, "loss": 1.7479, "step": 13497 }, { "epoch": 0.65908203125, "grad_norm": 0.23178930580615997, "learning_rate": 0.0001703323254071481, "loss": 1.7376, "step": 13498 }, { "epoch": 0.659130859375, "grad_norm": 0.2569558024406433, "learning_rate": 0.00017030131979709832, "loss": 1.7846, "step": 13499 }, { "epoch": 0.6591796875, "grad_norm": 0.25634878873825073, "learning_rate": 0.00017027031672450514, "loss": 1.753, "step": 13500 }, { "epoch": 0.659228515625, "grad_norm": 0.20760303735733032, "learning_rate": 0.00017023931619011997, "loss": 1.7424, "step": 13501 }, { "epoch": 0.65927734375, "grad_norm": 0.23933015763759613, "learning_rate": 0.00017020831819469413, "loss": 1.758, "step": 13502 }, { "epoch": 0.659326171875, "grad_norm": 0.19287915527820587, "learning_rate": 0.00017017732273897891, "loss": 1.7445, "step": 13503 }, { "epoch": 0.659375, "grad_norm": 0.25421616435050964, "learning_rate": 0.00017014632982372547, "loss": 1.7538, "step": 13504 }, { "epoch": 0.659423828125, "grad_norm": 0.23366940021514893, "learning_rate": 0.000170115339449685, "loss": 1.7622, "step": 13505 }, { "epoch": 0.65947265625, "grad_norm": 0.22473469376564026, "learning_rate": 0.00017008435161760852, "loss": 1.7565, "step": 13506 }, { "epoch": 0.659521484375, "grad_norm": 0.24711495637893677, "learning_rate": 0.00017005336632824715, "loss": 1.763, "step": 13507 }, { "epoch": 0.6595703125, "grad_norm": 0.20953409373760223, "learning_rate": 0.0001700223835823516, "loss": 1.7491, "step": 13508 }, { "epoch": 0.659619140625, "grad_norm": 0.2509883642196655, "learning_rate": 0.00016999140338067305, "loss": 1.7648, "step": 13509 }, { "epoch": 0.65966796875, "grad_norm": 0.22237123548984528, "learning_rate": 0.00016996042572396208, "loss": 1.7347, "step": 13510 }, { "epoch": 0.659716796875, "grad_norm": 0.25700563192367554, "learning_rate": 0.00016992945061296971, "loss": 1.7485, "step": 13511 }, { "epoch": 0.659765625, "grad_norm": 0.22583413124084473, "learning_rate": 0.00016989847804844637, "loss": 1.7676, "step": 13512 }, { "epoch": 0.659814453125, "grad_norm": 0.22261323034763336, "learning_rate": 0.00016986750803114307, "loss": 1.7866, "step": 13513 }, { "epoch": 0.65986328125, "grad_norm": 0.22525477409362793, "learning_rate": 0.00016983654056180992, "loss": 1.7325, "step": 13514 }, { "epoch": 0.659912109375, "grad_norm": 0.22111468017101288, "learning_rate": 0.0001698055756411979, "loss": 1.7323, "step": 13515 }, { "epoch": 0.6599609375, "grad_norm": 0.23199419677257538, "learning_rate": 0.00016977461327005722, "loss": 1.7485, "step": 13516 }, { "epoch": 0.660009765625, "grad_norm": 0.2184438407421112, "learning_rate": 0.00016974365344913828, "loss": 1.7558, "step": 13517 }, { "epoch": 0.66005859375, "grad_norm": 0.2440384179353714, "learning_rate": 0.0001697126961791915, "loss": 1.7408, "step": 13518 }, { "epoch": 0.660107421875, "grad_norm": 0.23481900990009308, "learning_rate": 0.00016968174146096706, "loss": 1.7767, "step": 13519 }, { "epoch": 0.66015625, "grad_norm": 0.2568110227584839, "learning_rate": 0.00016965078929521526, "loss": 1.7449, "step": 13520 }, { "epoch": 0.660205078125, "grad_norm": 0.2288263738155365, "learning_rate": 0.0001696198396826862, "loss": 1.7654, "step": 13521 }, { "epoch": 0.66025390625, "grad_norm": 0.2385341376066208, "learning_rate": 0.00016958889262413002, "loss": 1.742, "step": 13522 }, { "epoch": 0.660302734375, "grad_norm": 0.20153914391994476, "learning_rate": 0.0001695579481202967, "loss": 1.767, "step": 13523 }, { "epoch": 0.6603515625, "grad_norm": 0.2519298493862152, "learning_rate": 0.0001695270061719362, "loss": 1.7495, "step": 13524 }, { "epoch": 0.660400390625, "grad_norm": 0.1869187355041504, "learning_rate": 0.00016949606677979856, "loss": 1.7666, "step": 13525 }, { "epoch": 0.66044921875, "grad_norm": 0.23936425149440765, "learning_rate": 0.0001694651299446333, "loss": 1.7551, "step": 13526 }, { "epoch": 0.660498046875, "grad_norm": 0.20389486849308014, "learning_rate": 0.00016943419566719058, "loss": 1.739, "step": 13527 }, { "epoch": 0.660546875, "grad_norm": 0.23066650331020355, "learning_rate": 0.0001694032639482198, "loss": 1.7574, "step": 13528 }, { "epoch": 0.660595703125, "grad_norm": 0.21323615312576294, "learning_rate": 0.0001693723347884709, "loss": 1.7728, "step": 13529 }, { "epoch": 0.66064453125, "grad_norm": 0.22557573020458221, "learning_rate": 0.00016934140818869315, "loss": 1.7293, "step": 13530 }, { "epoch": 0.660693359375, "grad_norm": 0.23109170794487, "learning_rate": 0.00016931048414963641, "loss": 1.776, "step": 13531 }, { "epoch": 0.6607421875, "grad_norm": 0.3598984181880951, "learning_rate": 0.00016927956267204992, "loss": 1.7677, "step": 13532 }, { "epoch": 0.660791015625, "grad_norm": 0.21461856365203857, "learning_rate": 0.0001692486437566832, "loss": 1.7662, "step": 13533 }, { "epoch": 0.66083984375, "grad_norm": 0.23645465075969696, "learning_rate": 0.00016921772740428553, "loss": 1.7625, "step": 13534 }, { "epoch": 0.660888671875, "grad_norm": 0.21704626083374023, "learning_rate": 0.00016918681361560622, "loss": 1.7419, "step": 13535 }, { "epoch": 0.6609375, "grad_norm": 0.23141126334667206, "learning_rate": 0.0001691559023913945, "loss": 1.7628, "step": 13536 }, { "epoch": 0.660986328125, "grad_norm": 0.22453747689723969, "learning_rate": 0.0001691249937323995, "loss": 1.7637, "step": 13537 }, { "epoch": 0.66103515625, "grad_norm": 0.2292078584432602, "learning_rate": 0.0001690940876393704, "loss": 1.7578, "step": 13538 }, { "epoch": 0.661083984375, "grad_norm": 0.22395892441272736, "learning_rate": 0.0001690631841130561, "loss": 1.7514, "step": 13539 }, { "epoch": 0.6611328125, "grad_norm": 0.24801571667194366, "learning_rate": 0.00016903228315420565, "loss": 1.7725, "step": 13540 }, { "epoch": 0.661181640625, "grad_norm": 0.2150566130876541, "learning_rate": 0.000169001384763568, "loss": 1.7439, "step": 13541 }, { "epoch": 0.66123046875, "grad_norm": 0.2840680778026581, "learning_rate": 0.00016897048894189191, "loss": 1.7607, "step": 13542 }, { "epoch": 0.661279296875, "grad_norm": 0.20069944858551025, "learning_rate": 0.00016893959568992618, "loss": 1.7449, "step": 13543 }, { "epoch": 0.661328125, "grad_norm": 0.22238865494728088, "learning_rate": 0.00016890870500841962, "loss": 1.7689, "step": 13544 }, { "epoch": 0.661376953125, "grad_norm": 0.20407520234584808, "learning_rate": 0.00016887781689812087, "loss": 1.7599, "step": 13545 }, { "epoch": 0.66142578125, "grad_norm": 0.18955551087856293, "learning_rate": 0.0001688469313597783, "loss": 1.7407, "step": 13546 }, { "epoch": 0.661474609375, "grad_norm": 0.21741165220737457, "learning_rate": 0.00016881604839414082, "loss": 1.745, "step": 13547 }, { "epoch": 0.6615234375, "grad_norm": 0.21619492769241333, "learning_rate": 0.00016878516800195658, "loss": 1.7537, "step": 13548 }, { "epoch": 0.661572265625, "grad_norm": 0.25070473551750183, "learning_rate": 0.00016875429018397425, "loss": 1.7521, "step": 13549 }, { "epoch": 0.66162109375, "grad_norm": 0.3346691429615021, "learning_rate": 0.00016872341494094195, "loss": 1.7316, "step": 13550 }, { "epoch": 0.661669921875, "grad_norm": 0.20884209871292114, "learning_rate": 0.00016869254227360814, "loss": 1.7504, "step": 13551 }, { "epoch": 0.66171875, "grad_norm": 0.25061866641044617, "learning_rate": 0.00016866167218272093, "loss": 1.7493, "step": 13552 }, { "epoch": 0.661767578125, "grad_norm": 0.2277701199054718, "learning_rate": 0.00016863080466902853, "loss": 1.7744, "step": 13553 }, { "epoch": 0.66181640625, "grad_norm": 0.24673524498939514, "learning_rate": 0.00016859993973327903, "loss": 1.7823, "step": 13554 }, { "epoch": 0.661865234375, "grad_norm": 0.27812978625297546, "learning_rate": 0.00016856907737622048, "loss": 1.7916, "step": 13555 }, { "epoch": 0.6619140625, "grad_norm": 0.2663078308105469, "learning_rate": 0.00016853821759860085, "loss": 1.7701, "step": 13556 }, { "epoch": 0.661962890625, "grad_norm": 0.23583415150642395, "learning_rate": 0.00016850736040116803, "loss": 1.7394, "step": 13557 }, { "epoch": 0.66201171875, "grad_norm": 0.23573370277881622, "learning_rate": 0.00016847650578466993, "loss": 1.7435, "step": 13558 }, { "epoch": 0.662060546875, "grad_norm": 0.190225288271904, "learning_rate": 0.00016844565374985425, "loss": 1.7825, "step": 13559 }, { "epoch": 0.662109375, "grad_norm": 0.21907220780849457, "learning_rate": 0.00016841480429746875, "loss": 1.7609, "step": 13560 }, { "epoch": 0.662158203125, "grad_norm": 0.20500974357128143, "learning_rate": 0.00016838395742826116, "loss": 1.7798, "step": 13561 }, { "epoch": 0.66220703125, "grad_norm": 0.22039946913719177, "learning_rate": 0.00016835311314297897, "loss": 1.7701, "step": 13562 }, { "epoch": 0.662255859375, "grad_norm": 0.19135764241218567, "learning_rate": 0.0001683222714423697, "loss": 1.7722, "step": 13563 }, { "epoch": 0.6623046875, "grad_norm": 0.21742765605449677, "learning_rate": 0.00016829143232718096, "loss": 1.7587, "step": 13564 }, { "epoch": 0.662353515625, "grad_norm": 0.20025400817394257, "learning_rate": 0.00016826059579816018, "loss": 1.7601, "step": 13565 }, { "epoch": 0.66240234375, "grad_norm": 0.23558230698108673, "learning_rate": 0.00016822976185605453, "loss": 1.7777, "step": 13566 }, { "epoch": 0.662451171875, "grad_norm": 0.2255455106496811, "learning_rate": 0.00016819893050161133, "loss": 1.7283, "step": 13567 }, { "epoch": 0.6625, "grad_norm": 0.23097115755081177, "learning_rate": 0.0001681681017355779, "loss": 1.7318, "step": 13568 }, { "epoch": 0.662548828125, "grad_norm": 0.21209682524204254, "learning_rate": 0.00016813727555870135, "loss": 1.7858, "step": 13569 }, { "epoch": 0.66259765625, "grad_norm": 0.2669881284236908, "learning_rate": 0.00016810645197172874, "loss": 1.78, "step": 13570 }, { "epoch": 0.662646484375, "grad_norm": 0.1883721798658371, "learning_rate": 0.00016807563097540718, "loss": 1.7688, "step": 13571 }, { "epoch": 0.6626953125, "grad_norm": 0.2530866265296936, "learning_rate": 0.0001680448125704836, "loss": 1.7606, "step": 13572 }, { "epoch": 0.662744140625, "grad_norm": 0.21615274250507355, "learning_rate": 0.0001680139967577049, "loss": 1.7764, "step": 13573 }, { "epoch": 0.66279296875, "grad_norm": 0.23901429772377014, "learning_rate": 0.00016798318353781794, "loss": 1.7681, "step": 13574 }, { "epoch": 0.662841796875, "grad_norm": 0.22381746768951416, "learning_rate": 0.00016795237291156954, "loss": 1.7791, "step": 13575 }, { "epoch": 0.662890625, "grad_norm": 0.24029473960399628, "learning_rate": 0.00016792156487970635, "loss": 1.7535, "step": 13576 }, { "epoch": 0.662939453125, "grad_norm": 0.2055678367614746, "learning_rate": 0.00016789075944297505, "loss": 1.7556, "step": 13577 }, { "epoch": 0.66298828125, "grad_norm": 0.2497011423110962, "learning_rate": 0.00016785995660212227, "loss": 1.7546, "step": 13578 }, { "epoch": 0.663037109375, "grad_norm": 0.22095008194446564, "learning_rate": 0.0001678291563578945, "loss": 1.7127, "step": 13579 }, { "epoch": 0.6630859375, "grad_norm": 0.2708148956298828, "learning_rate": 0.0001677983587110382, "loss": 1.7544, "step": 13580 }, { "epoch": 0.663134765625, "grad_norm": 0.22408249974250793, "learning_rate": 0.0001677675636622999, "loss": 1.7789, "step": 13581 }, { "epoch": 0.66318359375, "grad_norm": 0.26632463932037354, "learning_rate": 0.00016773677121242574, "loss": 1.7668, "step": 13582 }, { "epoch": 0.663232421875, "grad_norm": 0.20112183690071106, "learning_rate": 0.00016770598136216217, "loss": 1.7668, "step": 13583 }, { "epoch": 0.66328125, "grad_norm": 0.26808813214302063, "learning_rate": 0.0001676751941122554, "loss": 1.7441, "step": 13584 }, { "epoch": 0.663330078125, "grad_norm": 0.21027988195419312, "learning_rate": 0.00016764440946345145, "loss": 1.7531, "step": 13585 }, { "epoch": 0.66337890625, "grad_norm": 0.22968535125255585, "learning_rate": 0.00016761362741649644, "loss": 1.7397, "step": 13586 }, { "epoch": 0.663427734375, "grad_norm": 0.25825825333595276, "learning_rate": 0.0001675828479721365, "loss": 1.7629, "step": 13587 }, { "epoch": 0.6634765625, "grad_norm": 0.21907879412174225, "learning_rate": 0.0001675520711311175, "loss": 1.7652, "step": 13588 }, { "epoch": 0.663525390625, "grad_norm": 0.26226717233657837, "learning_rate": 0.0001675212968941854, "loss": 1.7596, "step": 13589 }, { "epoch": 0.66357421875, "grad_norm": 0.2054055631160736, "learning_rate": 0.000167490525262086, "loss": 1.7684, "step": 13590 }, { "epoch": 0.663623046875, "grad_norm": 0.24831148982048035, "learning_rate": 0.00016745975623556512, "loss": 1.7576, "step": 13591 }, { "epoch": 0.663671875, "grad_norm": 0.2066970318555832, "learning_rate": 0.00016742898981536843, "loss": 1.7468, "step": 13592 }, { "epoch": 0.663720703125, "grad_norm": 0.254884272813797, "learning_rate": 0.00016739822600224158, "loss": 1.7471, "step": 13593 }, { "epoch": 0.66376953125, "grad_norm": 0.2524746060371399, "learning_rate": 0.0001673674647969302, "loss": 1.7803, "step": 13594 }, { "epoch": 0.663818359375, "grad_norm": 0.21062105894088745, "learning_rate": 0.00016733670620017977, "loss": 1.7894, "step": 13595 }, { "epoch": 0.6638671875, "grad_norm": 0.2414965033531189, "learning_rate": 0.00016730595021273572, "loss": 1.759, "step": 13596 }, { "epoch": 0.663916015625, "grad_norm": 0.22008711099624634, "learning_rate": 0.00016727519683534353, "loss": 1.7611, "step": 13597 }, { "epoch": 0.66396484375, "grad_norm": 0.2025826871395111, "learning_rate": 0.0001672444460687485, "loss": 1.7679, "step": 13598 }, { "epoch": 0.664013671875, "grad_norm": 0.19531132280826569, "learning_rate": 0.00016721369791369584, "loss": 1.7773, "step": 13599 }, { "epoch": 0.6640625, "grad_norm": 0.20046669244766235, "learning_rate": 0.00016718295237093094, "loss": 1.7425, "step": 13600 }, { "epoch": 0.664111328125, "grad_norm": 0.18941977620124817, "learning_rate": 0.00016715220944119863, "loss": 1.7544, "step": 13601 }, { "epoch": 0.66416015625, "grad_norm": 0.22384874522686005, "learning_rate": 0.00016712146912524432, "loss": 1.7893, "step": 13602 }, { "epoch": 0.664208984375, "grad_norm": 0.19936048984527588, "learning_rate": 0.00016709073142381282, "loss": 1.789, "step": 13603 }, { "epoch": 0.6642578125, "grad_norm": 0.21466711163520813, "learning_rate": 0.00016705999633764908, "loss": 1.7703, "step": 13604 }, { "epoch": 0.664306640625, "grad_norm": 0.2061040848493576, "learning_rate": 0.00016702926386749813, "loss": 1.7571, "step": 13605 }, { "epoch": 0.66435546875, "grad_norm": 0.21379798650741577, "learning_rate": 0.00016699853401410465, "loss": 1.7568, "step": 13606 }, { "epoch": 0.664404296875, "grad_norm": 0.23359034955501556, "learning_rate": 0.00016696780677821342, "loss": 1.7432, "step": 13607 }, { "epoch": 0.664453125, "grad_norm": 0.24647222459316254, "learning_rate": 0.0001669370821605693, "loss": 1.7733, "step": 13608 }, { "epoch": 0.664501953125, "grad_norm": 0.19406600296497345, "learning_rate": 0.00016690636016191673, "loss": 1.743, "step": 13609 }, { "epoch": 0.66455078125, "grad_norm": 0.21583837270736694, "learning_rate": 0.00016687564078300042, "loss": 1.7724, "step": 13610 }, { "epoch": 0.664599609375, "grad_norm": 0.23427359759807587, "learning_rate": 0.00016684492402456474, "loss": 1.7597, "step": 13611 }, { "epoch": 0.6646484375, "grad_norm": 0.1886959820985794, "learning_rate": 0.0001668142098873543, "loss": 1.778, "step": 13612 }, { "epoch": 0.664697265625, "grad_norm": 0.20667724311351776, "learning_rate": 0.00016678349837211337, "loss": 1.7093, "step": 13613 }, { "epoch": 0.66474609375, "grad_norm": 0.18385301530361176, "learning_rate": 0.00016675278947958627, "loss": 1.7582, "step": 13614 }, { "epoch": 0.664794921875, "grad_norm": 0.2322179675102234, "learning_rate": 0.00016672208321051731, "loss": 1.7707, "step": 13615 }, { "epoch": 0.66484375, "grad_norm": 0.20823770761489868, "learning_rate": 0.00016669137956565075, "loss": 1.7311, "step": 13616 }, { "epoch": 0.664892578125, "grad_norm": 0.22790881991386414, "learning_rate": 0.00016666067854573043, "loss": 1.7403, "step": 13617 }, { "epoch": 0.66494140625, "grad_norm": 0.2321767956018448, "learning_rate": 0.0001666299801515008, "loss": 1.8016, "step": 13618 }, { "epoch": 0.664990234375, "grad_norm": 0.20309323072433472, "learning_rate": 0.00016659928438370546, "loss": 1.7665, "step": 13619 }, { "epoch": 0.6650390625, "grad_norm": 0.2231050580739975, "learning_rate": 0.00016656859124308872, "loss": 1.7621, "step": 13620 }, { "epoch": 0.665087890625, "grad_norm": 0.2629448473453522, "learning_rate": 0.00016653790073039416, "loss": 1.758, "step": 13621 }, { "epoch": 0.66513671875, "grad_norm": 0.2589092254638672, "learning_rate": 0.00016650721284636584, "loss": 1.7589, "step": 13622 }, { "epoch": 0.665185546875, "grad_norm": 0.21099236607551575, "learning_rate": 0.00016647652759174732, "loss": 1.7702, "step": 13623 }, { "epoch": 0.665234375, "grad_norm": 0.2529977858066559, "learning_rate": 0.00016644584496728232, "loss": 1.7544, "step": 13624 }, { "epoch": 0.665283203125, "grad_norm": 0.23855066299438477, "learning_rate": 0.00016641516497371446, "loss": 1.7582, "step": 13625 }, { "epoch": 0.66533203125, "grad_norm": 0.23387950658798218, "learning_rate": 0.00016638448761178734, "loss": 1.7568, "step": 13626 }, { "epoch": 0.665380859375, "grad_norm": 0.2461162507534027, "learning_rate": 0.00016635381288224442, "loss": 1.7471, "step": 13627 }, { "epoch": 0.6654296875, "grad_norm": 0.22956916689872742, "learning_rate": 0.00016632314078582916, "loss": 1.7485, "step": 13628 }, { "epoch": 0.665478515625, "grad_norm": 0.30430352687835693, "learning_rate": 0.00016629247132328486, "loss": 1.7488, "step": 13629 }, { "epoch": 0.66552734375, "grad_norm": 0.2827359139919281, "learning_rate": 0.00016626180449535487, "loss": 1.767, "step": 13630 }, { "epoch": 0.665576171875, "grad_norm": 0.2110675722360611, "learning_rate": 0.00016623114030278236, "loss": 1.7597, "step": 13631 }, { "epoch": 0.665625, "grad_norm": 0.27290603518486023, "learning_rate": 0.0001662004787463106, "loss": 1.7677, "step": 13632 }, { "epoch": 0.665673828125, "grad_norm": 0.21065759658813477, "learning_rate": 0.00016616981982668262, "loss": 1.7424, "step": 13633 }, { "epoch": 0.66572265625, "grad_norm": 0.2553914189338684, "learning_rate": 0.0001661391635446416, "loss": 1.7532, "step": 13634 }, { "epoch": 0.665771484375, "grad_norm": 0.23015382885932922, "learning_rate": 0.00016610850990093022, "loss": 1.8178, "step": 13635 }, { "epoch": 0.6658203125, "grad_norm": 0.23464255034923553, "learning_rate": 0.00016607785889629172, "loss": 1.7536, "step": 13636 }, { "epoch": 0.665869140625, "grad_norm": 0.2384198009967804, "learning_rate": 0.0001660472105314687, "loss": 1.7627, "step": 13637 }, { "epoch": 0.66591796875, "grad_norm": 0.209281787276268, "learning_rate": 0.00016601656480720424, "loss": 1.7609, "step": 13638 }, { "epoch": 0.665966796875, "grad_norm": 0.24629147350788116, "learning_rate": 0.00016598592172424072, "loss": 1.7499, "step": 13639 }, { "epoch": 0.666015625, "grad_norm": 0.2328837513923645, "learning_rate": 0.0001659552812833211, "loss": 1.7327, "step": 13640 }, { "epoch": 0.666064453125, "grad_norm": 0.23490749299526215, "learning_rate": 0.0001659246434851877, "loss": 1.7825, "step": 13641 }, { "epoch": 0.66611328125, "grad_norm": 0.3000348210334778, "learning_rate": 0.00016589400833058333, "loss": 1.7393, "step": 13642 }, { "epoch": 0.666162109375, "grad_norm": 0.2134193480014801, "learning_rate": 0.0001658633758202503, "loss": 1.7682, "step": 13643 }, { "epoch": 0.6662109375, "grad_norm": 0.2770293951034546, "learning_rate": 0.00016583274595493097, "loss": 1.759, "step": 13644 }, { "epoch": 0.666259765625, "grad_norm": 0.2137283980846405, "learning_rate": 0.0001658021187353678, "loss": 1.7471, "step": 13645 }, { "epoch": 0.66630859375, "grad_norm": 0.2347373515367508, "learning_rate": 0.00016577149416230298, "loss": 1.7661, "step": 13646 }, { "epoch": 0.666357421875, "grad_norm": 0.24572445452213287, "learning_rate": 0.00016574087223647873, "loss": 1.7562, "step": 13647 }, { "epoch": 0.66640625, "grad_norm": 0.23358513414859772, "learning_rate": 0.00016571025295863728, "loss": 1.7526, "step": 13648 }, { "epoch": 0.666455078125, "grad_norm": 0.25349536538124084, "learning_rate": 0.00016567963632952063, "loss": 1.7412, "step": 13649 }, { "epoch": 0.66650390625, "grad_norm": 0.2261439859867096, "learning_rate": 0.0001656490223498709, "loss": 1.7552, "step": 13650 }, { "epoch": 0.666552734375, "grad_norm": 0.24034294486045837, "learning_rate": 0.00016561841102042977, "loss": 1.7358, "step": 13651 }, { "epoch": 0.6666015625, "grad_norm": 0.22865040600299835, "learning_rate": 0.00016558780234193955, "loss": 1.7445, "step": 13652 }, { "epoch": 0.666650390625, "grad_norm": 0.23405753076076508, "learning_rate": 0.0001655571963151416, "loss": 1.7649, "step": 13653 }, { "epoch": 0.66669921875, "grad_norm": 0.23586730659008026, "learning_rate": 0.00016552659294077812, "loss": 1.743, "step": 13654 }, { "epoch": 0.666748046875, "grad_norm": 0.20147284865379333, "learning_rate": 0.0001654959922195905, "loss": 1.7836, "step": 13655 }, { "epoch": 0.666796875, "grad_norm": 0.25386324524879456, "learning_rate": 0.0001654653941523206, "loss": 1.7359, "step": 13656 }, { "epoch": 0.666845703125, "grad_norm": 0.20354118943214417, "learning_rate": 0.00016543479873970968, "loss": 1.7384, "step": 13657 }, { "epoch": 0.66689453125, "grad_norm": 0.2627514600753784, "learning_rate": 0.0001654042059824996, "loss": 1.7332, "step": 13658 }, { "epoch": 0.666943359375, "grad_norm": 0.24616169929504395, "learning_rate": 0.00016537361588143162, "loss": 1.7564, "step": 13659 }, { "epoch": 0.6669921875, "grad_norm": 0.2039877325296402, "learning_rate": 0.0001653430284372471, "loss": 1.7628, "step": 13660 }, { "epoch": 0.667041015625, "grad_norm": 0.23575907945632935, "learning_rate": 0.00016531244365068737, "loss": 1.7454, "step": 13661 }, { "epoch": 0.66708984375, "grad_norm": 0.22696706652641296, "learning_rate": 0.0001652818615224937, "loss": 1.7372, "step": 13662 }, { "epoch": 0.667138671875, "grad_norm": 0.2540645897388458, "learning_rate": 0.0001652512820534073, "loss": 1.7529, "step": 13663 }, { "epoch": 0.6671875, "grad_norm": 0.22789378464221954, "learning_rate": 0.00016522070524416918, "loss": 1.7638, "step": 13664 }, { "epoch": 0.667236328125, "grad_norm": 0.2721952497959137, "learning_rate": 0.0001651901310955205, "loss": 1.7876, "step": 13665 }, { "epoch": 0.66728515625, "grad_norm": 0.2384050041437149, "learning_rate": 0.00016515955960820223, "loss": 1.7659, "step": 13666 }, { "epoch": 0.667333984375, "grad_norm": 0.2413889616727829, "learning_rate": 0.00016512899078295523, "loss": 1.7647, "step": 13667 }, { "epoch": 0.6673828125, "grad_norm": 0.2553560733795166, "learning_rate": 0.00016509842462052055, "loss": 1.7757, "step": 13668 }, { "epoch": 0.667431640625, "grad_norm": 0.23778627812862396, "learning_rate": 0.00016506786112163862, "loss": 1.7515, "step": 13669 }, { "epoch": 0.66748046875, "grad_norm": 0.23677851259708405, "learning_rate": 0.00016503730028705055, "loss": 1.7667, "step": 13670 }, { "epoch": 0.667529296875, "grad_norm": 0.23902693390846252, "learning_rate": 0.00016500674211749675, "loss": 1.7549, "step": 13671 }, { "epoch": 0.667578125, "grad_norm": 0.26392924785614014, "learning_rate": 0.00016497618661371808, "loss": 1.7529, "step": 13672 }, { "epoch": 0.667626953125, "grad_norm": 0.20836763083934784, "learning_rate": 0.0001649456337764547, "loss": 1.7432, "step": 13673 }, { "epoch": 0.66767578125, "grad_norm": 0.25659212470054626, "learning_rate": 0.00016491508360644752, "loss": 1.7565, "step": 13674 }, { "epoch": 0.667724609375, "grad_norm": 0.2795547842979431, "learning_rate": 0.00016488453610443663, "loss": 1.759, "step": 13675 }, { "epoch": 0.6677734375, "grad_norm": 0.22637483477592468, "learning_rate": 0.00016485399127116247, "loss": 1.7679, "step": 13676 }, { "epoch": 0.667822265625, "grad_norm": 0.2824772596359253, "learning_rate": 0.00016482344910736534, "loss": 1.7679, "step": 13677 }, { "epoch": 0.66787109375, "grad_norm": 0.213218554854393, "learning_rate": 0.0001647929096137854, "loss": 1.751, "step": 13678 }, { "epoch": 0.667919921875, "grad_norm": 0.28563034534454346, "learning_rate": 0.00016476237279116284, "loss": 1.7639, "step": 13679 }, { "epoch": 0.66796875, "grad_norm": 0.20999032258987427, "learning_rate": 0.00016473183864023779, "loss": 1.7553, "step": 13680 }, { "epoch": 0.668017578125, "grad_norm": 0.29780012369155884, "learning_rate": 0.0001647013071617502, "loss": 1.7499, "step": 13681 }, { "epoch": 0.66806640625, "grad_norm": 0.26283177733421326, "learning_rate": 0.00016467077835644, "loss": 1.7609, "step": 13682 }, { "epoch": 0.668115234375, "grad_norm": 0.2638436257839203, "learning_rate": 0.00016464025222504713, "loss": 1.7357, "step": 13683 }, { "epoch": 0.6681640625, "grad_norm": 0.24965131282806396, "learning_rate": 0.00016460972876831154, "loss": 1.7606, "step": 13684 }, { "epoch": 0.668212890625, "grad_norm": 0.25367191433906555, "learning_rate": 0.00016457920798697263, "loss": 1.7893, "step": 13685 }, { "epoch": 0.66826171875, "grad_norm": 0.23737873136997223, "learning_rate": 0.00016454868988177054, "loss": 1.7744, "step": 13686 }, { "epoch": 0.668310546875, "grad_norm": 0.23770183324813843, "learning_rate": 0.00016451817445344453, "loss": 1.7389, "step": 13687 }, { "epoch": 0.668359375, "grad_norm": 0.19936978816986084, "learning_rate": 0.00016448766170273446, "loss": 1.7317, "step": 13688 }, { "epoch": 0.668408203125, "grad_norm": 0.2203092724084854, "learning_rate": 0.00016445715163037955, "loss": 1.7399, "step": 13689 }, { "epoch": 0.66845703125, "grad_norm": 0.2131940871477127, "learning_rate": 0.0001644266442371195, "loss": 1.7419, "step": 13690 }, { "epoch": 0.668505859375, "grad_norm": 0.23315292596817017, "learning_rate": 0.0001643961395236934, "loss": 1.7452, "step": 13691 }, { "epoch": 0.6685546875, "grad_norm": 0.20749418437480927, "learning_rate": 0.00016436563749084093, "loss": 1.7657, "step": 13692 }, { "epoch": 0.668603515625, "grad_norm": 0.2605833411216736, "learning_rate": 0.00016433513813930104, "loss": 1.7338, "step": 13693 }, { "epoch": 0.66865234375, "grad_norm": 0.1964385211467743, "learning_rate": 0.00016430464146981294, "loss": 1.7498, "step": 13694 }, { "epoch": 0.668701171875, "grad_norm": 0.3139520287513733, "learning_rate": 0.00016427414748311581, "loss": 1.7339, "step": 13695 }, { "epoch": 0.66875, "grad_norm": 0.21587412059307098, "learning_rate": 0.00016424365617994865, "loss": 1.7735, "step": 13696 }, { "epoch": 0.668798828125, "grad_norm": 0.23373806476593018, "learning_rate": 0.00016421316756105048, "loss": 1.7506, "step": 13697 }, { "epoch": 0.66884765625, "grad_norm": 0.23133882880210876, "learning_rate": 0.0001641826816271602, "loss": 1.7725, "step": 13698 }, { "epoch": 0.668896484375, "grad_norm": 0.21382997930049896, "learning_rate": 0.00016415219837901667, "loss": 1.7619, "step": 13699 }, { "epoch": 0.6689453125, "grad_norm": 0.2753791809082031, "learning_rate": 0.00016412171781735873, "loss": 1.77, "step": 13700 }, { "epoch": 0.668994140625, "grad_norm": 0.19843338429927826, "learning_rate": 0.0001640912399429249, "loss": 1.746, "step": 13701 }, { "epoch": 0.66904296875, "grad_norm": 0.27617618441581726, "learning_rate": 0.0001640607647564541, "loss": 1.7681, "step": 13702 }, { "epoch": 0.669091796875, "grad_norm": 0.2502332329750061, "learning_rate": 0.00016403029225868466, "loss": 1.7579, "step": 13703 }, { "epoch": 0.669140625, "grad_norm": 0.24886095523834229, "learning_rate": 0.00016399982245035542, "loss": 1.7668, "step": 13704 }, { "epoch": 0.669189453125, "grad_norm": 0.20972654223442078, "learning_rate": 0.00016396935533220444, "loss": 1.7446, "step": 13705 }, { "epoch": 0.66923828125, "grad_norm": 0.2621817886829376, "learning_rate": 0.00016393889090497054, "loss": 1.7675, "step": 13706 }, { "epoch": 0.669287109375, "grad_norm": 0.23432549834251404, "learning_rate": 0.00016390842916939164, "loss": 1.7474, "step": 13707 }, { "epoch": 0.6693359375, "grad_norm": 0.2382749319076538, "learning_rate": 0.0001638779701262064, "loss": 1.7313, "step": 13708 }, { "epoch": 0.669384765625, "grad_norm": 0.23993709683418274, "learning_rate": 0.00016384751377615276, "loss": 1.7605, "step": 13709 }, { "epoch": 0.66943359375, "grad_norm": 0.22586776316165924, "learning_rate": 0.0001638170601199689, "loss": 1.7556, "step": 13710 }, { "epoch": 0.669482421875, "grad_norm": 0.20790301263332367, "learning_rate": 0.0001637866091583929, "loss": 1.7624, "step": 13711 }, { "epoch": 0.66953125, "grad_norm": 0.24346227943897247, "learning_rate": 0.00016375616089216283, "loss": 1.7434, "step": 13712 }, { "epoch": 0.669580078125, "grad_norm": 0.22955238819122314, "learning_rate": 0.00016372571532201653, "loss": 1.7612, "step": 13713 }, { "epoch": 0.66962890625, "grad_norm": 0.20643873512744904, "learning_rate": 0.0001636952724486919, "loss": 1.7329, "step": 13714 }, { "epoch": 0.669677734375, "grad_norm": 0.22995978593826294, "learning_rate": 0.0001636648322729268, "loss": 1.7595, "step": 13715 }, { "epoch": 0.6697265625, "grad_norm": 0.2190212607383728, "learning_rate": 0.00016363439479545892, "loss": 1.7445, "step": 13716 }, { "epoch": 0.669775390625, "grad_norm": 0.19604019820690155, "learning_rate": 0.00016360396001702596, "loss": 1.7705, "step": 13717 }, { "epoch": 0.66982421875, "grad_norm": 0.22216400504112244, "learning_rate": 0.0001635735279383656, "loss": 1.7456, "step": 13718 }, { "epoch": 0.669873046875, "grad_norm": 0.22576284408569336, "learning_rate": 0.00016354309856021515, "loss": 1.7687, "step": 13719 }, { "epoch": 0.669921875, "grad_norm": 0.2176370918750763, "learning_rate": 0.00016351267188331243, "loss": 1.7395, "step": 13720 }, { "epoch": 0.669970703125, "grad_norm": 0.20548422634601593, "learning_rate": 0.00016348224790839445, "loss": 1.7705, "step": 13721 }, { "epoch": 0.67001953125, "grad_norm": 0.23503616452217102, "learning_rate": 0.00016345182663619898, "loss": 1.733, "step": 13722 }, { "epoch": 0.670068359375, "grad_norm": 0.1987636238336563, "learning_rate": 0.00016342140806746298, "loss": 1.7595, "step": 13723 }, { "epoch": 0.6701171875, "grad_norm": 0.23989006876945496, "learning_rate": 0.00016339099220292393, "loss": 1.7852, "step": 13724 }, { "epoch": 0.670166015625, "grad_norm": 0.2344467043876648, "learning_rate": 0.0001633605790433187, "loss": 1.7544, "step": 13725 }, { "epoch": 0.67021484375, "grad_norm": 0.24048937857151031, "learning_rate": 0.00016333016858938466, "loss": 1.7753, "step": 13726 }, { "epoch": 0.670263671875, "grad_norm": 0.21085235476493835, "learning_rate": 0.00016329976084185867, "loss": 1.7522, "step": 13727 }, { "epoch": 0.6703125, "grad_norm": 0.25409364700317383, "learning_rate": 0.00016326935580147767, "loss": 1.7694, "step": 13728 }, { "epoch": 0.670361328125, "grad_norm": 0.24121738970279694, "learning_rate": 0.00016323895346897861, "loss": 1.7523, "step": 13729 }, { "epoch": 0.67041015625, "grad_norm": 0.23845818638801575, "learning_rate": 0.00016320855384509828, "loss": 1.7401, "step": 13730 }, { "epoch": 0.670458984375, "grad_norm": 0.21940413117408752, "learning_rate": 0.00016317815693057348, "loss": 1.7551, "step": 13731 }, { "epoch": 0.6705078125, "grad_norm": 0.21527217328548431, "learning_rate": 0.00016314776272614091, "loss": 1.7616, "step": 13732 }, { "epoch": 0.670556640625, "grad_norm": 0.2270849347114563, "learning_rate": 0.00016311737123253712, "loss": 1.7603, "step": 13733 }, { "epoch": 0.67060546875, "grad_norm": 0.2479562610387802, "learning_rate": 0.00016308698245049886, "loss": 1.7549, "step": 13734 }, { "epoch": 0.670654296875, "grad_norm": 0.2268296629190445, "learning_rate": 0.00016305659638076228, "loss": 1.7706, "step": 13735 }, { "epoch": 0.670703125, "grad_norm": 0.2853209674358368, "learning_rate": 0.0001630262130240642, "loss": 1.786, "step": 13736 }, { "epoch": 0.670751953125, "grad_norm": 0.21937881410121918, "learning_rate": 0.00016299583238114064, "loss": 1.7614, "step": 13737 }, { "epoch": 0.67080078125, "grad_norm": 0.2787794768810272, "learning_rate": 0.00016296545445272824, "loss": 1.8107, "step": 13738 }, { "epoch": 0.670849609375, "grad_norm": 0.207695871591568, "learning_rate": 0.00016293507923956287, "loss": 1.7614, "step": 13739 }, { "epoch": 0.6708984375, "grad_norm": 0.31177952885627747, "learning_rate": 0.00016290470674238105, "loss": 1.7369, "step": 13740 }, { "epoch": 0.670947265625, "grad_norm": 0.21944934129714966, "learning_rate": 0.00016287433696191857, "loss": 1.7678, "step": 13741 }, { "epoch": 0.67099609375, "grad_norm": 0.28239405155181885, "learning_rate": 0.0001628439698989118, "loss": 1.7476, "step": 13742 }, { "epoch": 0.671044921875, "grad_norm": 0.2646435797214508, "learning_rate": 0.0001628136055540964, "loss": 1.7505, "step": 13743 }, { "epoch": 0.67109375, "grad_norm": 0.23868663609027863, "learning_rate": 0.00016278324392820845, "loss": 1.7724, "step": 13744 }, { "epoch": 0.671142578125, "grad_norm": 0.23773619532585144, "learning_rate": 0.0001627528850219837, "loss": 1.7527, "step": 13745 }, { "epoch": 0.67119140625, "grad_norm": 0.24401843547821045, "learning_rate": 0.000162722528836158, "loss": 1.7737, "step": 13746 }, { "epoch": 0.671240234375, "grad_norm": 0.22178837656974792, "learning_rate": 0.00016269217537146698, "loss": 1.7717, "step": 13747 }, { "epoch": 0.6712890625, "grad_norm": 0.2545543611049652, "learning_rate": 0.00016266182462864633, "loss": 1.7481, "step": 13748 }, { "epoch": 0.671337890625, "grad_norm": 0.20163370668888092, "learning_rate": 0.0001626314766084316, "loss": 1.7633, "step": 13749 }, { "epoch": 0.67138671875, "grad_norm": 0.2784641981124878, "learning_rate": 0.0001626011313115583, "loss": 1.7518, "step": 13750 }, { "epoch": 0.671435546875, "grad_norm": 0.204350546002388, "learning_rate": 0.00016257078873876192, "loss": 1.7576, "step": 13751 }, { "epoch": 0.671484375, "grad_norm": 0.2578820288181305, "learning_rate": 0.00016254044889077787, "loss": 1.7482, "step": 13752 }, { "epoch": 0.671533203125, "grad_norm": 0.20930808782577515, "learning_rate": 0.0001625101117683412, "loss": 1.7702, "step": 13753 }, { "epoch": 0.67158203125, "grad_norm": 0.25924235582351685, "learning_rate": 0.00016247977737218755, "loss": 1.7617, "step": 13754 }, { "epoch": 0.671630859375, "grad_norm": 0.21417222917079926, "learning_rate": 0.0001624494457030517, "loss": 1.7551, "step": 13755 }, { "epoch": 0.6716796875, "grad_norm": 0.2492237687110901, "learning_rate": 0.0001624191167616691, "loss": 1.7461, "step": 13756 }, { "epoch": 0.671728515625, "grad_norm": 0.22225810587406158, "learning_rate": 0.00016238879054877455, "loss": 1.7547, "step": 13757 }, { "epoch": 0.67177734375, "grad_norm": 0.2625472843647003, "learning_rate": 0.0001623584670651032, "loss": 1.7416, "step": 13758 }, { "epoch": 0.671826171875, "grad_norm": 0.18561767041683197, "learning_rate": 0.00016232814631138992, "loss": 1.7495, "step": 13759 }, { "epoch": 0.671875, "grad_norm": 0.2919287085533142, "learning_rate": 0.00016229782828836947, "loss": 1.7649, "step": 13760 }, { "epoch": 0.671923828125, "grad_norm": 0.17759808897972107, "learning_rate": 0.00016226751299677671, "loss": 1.7747, "step": 13761 }, { "epoch": 0.67197265625, "grad_norm": 0.24963165819644928, "learning_rate": 0.00016223720043734633, "loss": 1.7428, "step": 13762 }, { "epoch": 0.672021484375, "grad_norm": 0.20767442882061005, "learning_rate": 0.00016220689061081302, "loss": 1.7744, "step": 13763 }, { "epoch": 0.6720703125, "grad_norm": 0.228493332862854, "learning_rate": 0.0001621765835179113, "loss": 1.7599, "step": 13764 }, { "epoch": 0.672119140625, "grad_norm": 0.20147237181663513, "learning_rate": 0.00016214627915937574, "loss": 1.7917, "step": 13765 }, { "epoch": 0.67216796875, "grad_norm": 0.24162514507770538, "learning_rate": 0.00016211597753594076, "loss": 1.7517, "step": 13766 }, { "epoch": 0.672216796875, "grad_norm": 0.21513448655605316, "learning_rate": 0.00016208567864834075, "loss": 1.7616, "step": 13767 }, { "epoch": 0.672265625, "grad_norm": 0.22206242382526398, "learning_rate": 0.00016205538249731014, "loss": 1.7768, "step": 13768 }, { "epoch": 0.672314453125, "grad_norm": 0.20972426235675812, "learning_rate": 0.0001620250890835829, "loss": 1.7609, "step": 13769 }, { "epoch": 0.67236328125, "grad_norm": 0.2005716860294342, "learning_rate": 0.00016199479840789356, "loss": 1.733, "step": 13770 }, { "epoch": 0.672412109375, "grad_norm": 0.21600860357284546, "learning_rate": 0.00016196451047097588, "loss": 1.7544, "step": 13771 }, { "epoch": 0.6724609375, "grad_norm": 0.19131037592887878, "learning_rate": 0.00016193422527356426, "loss": 1.7452, "step": 13772 }, { "epoch": 0.672509765625, "grad_norm": 0.2077118307352066, "learning_rate": 0.0001619039428163924, "loss": 1.7899, "step": 13773 }, { "epoch": 0.67255859375, "grad_norm": 0.20314130187034607, "learning_rate": 0.00016187366310019448, "loss": 1.7675, "step": 13774 }, { "epoch": 0.672607421875, "grad_norm": 0.20554114878177643, "learning_rate": 0.00016184338612570406, "loss": 1.7512, "step": 13775 }, { "epoch": 0.67265625, "grad_norm": 0.17627818882465363, "learning_rate": 0.00016181311189365527, "loss": 1.7677, "step": 13776 }, { "epoch": 0.672705078125, "grad_norm": 0.2167440950870514, "learning_rate": 0.00016178284040478154, "loss": 1.7563, "step": 13777 }, { "epoch": 0.67275390625, "grad_norm": 0.20091994106769562, "learning_rate": 0.00016175257165981663, "loss": 1.7656, "step": 13778 }, { "epoch": 0.672802734375, "grad_norm": 0.21381592750549316, "learning_rate": 0.00016172230565949413, "loss": 1.7705, "step": 13779 }, { "epoch": 0.6728515625, "grad_norm": 0.21261078119277954, "learning_rate": 0.0001616920424045476, "loss": 1.761, "step": 13780 }, { "epoch": 0.672900390625, "grad_norm": 0.2158832848072052, "learning_rate": 0.0001616617818957104, "loss": 1.7386, "step": 13781 }, { "epoch": 0.67294921875, "grad_norm": 0.2310057133436203, "learning_rate": 0.000161631524133716, "loss": 1.7288, "step": 13782 }, { "epoch": 0.672998046875, "grad_norm": 0.1771828830242157, "learning_rate": 0.0001616012691192977, "loss": 1.7412, "step": 13783 }, { "epoch": 0.673046875, "grad_norm": 0.23812083899974823, "learning_rate": 0.00016157101685318871, "loss": 1.7483, "step": 13784 }, { "epoch": 0.673095703125, "grad_norm": 0.18052628636360168, "learning_rate": 0.00016154076733612227, "loss": 1.7401, "step": 13785 }, { "epoch": 0.67314453125, "grad_norm": 0.24411462247371674, "learning_rate": 0.00016151052056883158, "loss": 1.7374, "step": 13786 }, { "epoch": 0.673193359375, "grad_norm": 0.20529082417488098, "learning_rate": 0.00016148027655204944, "loss": 1.7369, "step": 13787 }, { "epoch": 0.6732421875, "grad_norm": 0.2302236109972, "learning_rate": 0.00016145003528650914, "loss": 1.7666, "step": 13788 }, { "epoch": 0.673291015625, "grad_norm": 0.2262401580810547, "learning_rate": 0.0001614197967729433, "loss": 1.7593, "step": 13789 }, { "epoch": 0.67333984375, "grad_norm": 0.19883058965206146, "learning_rate": 0.00016138956101208513, "loss": 1.765, "step": 13790 }, { "epoch": 0.673388671875, "grad_norm": 0.23496019840240479, "learning_rate": 0.00016135932800466702, "loss": 1.77, "step": 13791 }, { "epoch": 0.6734375, "grad_norm": 0.19710838794708252, "learning_rate": 0.00016132909775142207, "loss": 1.758, "step": 13792 }, { "epoch": 0.673486328125, "grad_norm": 0.2020508199930191, "learning_rate": 0.0001612988702530827, "loss": 1.7548, "step": 13793 }, { "epoch": 0.67353515625, "grad_norm": 0.2474236935377121, "learning_rate": 0.0001612686455103815, "loss": 1.752, "step": 13794 }, { "epoch": 0.673583984375, "grad_norm": 0.2307559847831726, "learning_rate": 0.0001612384235240511, "loss": 1.7615, "step": 13795 }, { "epoch": 0.6736328125, "grad_norm": 0.22529029846191406, "learning_rate": 0.00016120820429482385, "loss": 1.7776, "step": 13796 }, { "epoch": 0.673681640625, "grad_norm": 0.20151014626026154, "learning_rate": 0.00016117798782343224, "loss": 1.7648, "step": 13797 }, { "epoch": 0.67373046875, "grad_norm": 0.19849154353141785, "learning_rate": 0.00016114777411060853, "loss": 1.7409, "step": 13798 }, { "epoch": 0.673779296875, "grad_norm": 0.20849913358688354, "learning_rate": 0.00016111756315708494, "loss": 1.7316, "step": 13799 }, { "epoch": 0.673828125, "grad_norm": 0.2655599117279053, "learning_rate": 0.00016108735496359373, "loss": 1.759, "step": 13800 }, { "epoch": 0.673876953125, "grad_norm": 0.21990512311458588, "learning_rate": 0.00016105714953086703, "loss": 1.7721, "step": 13801 }, { "epoch": 0.67392578125, "grad_norm": 0.2677272856235504, "learning_rate": 0.0001610269468596368, "loss": 1.7652, "step": 13802 }, { "epoch": 0.673974609375, "grad_norm": 0.2565470039844513, "learning_rate": 0.00016099674695063508, "loss": 1.7434, "step": 13803 }, { "epoch": 0.6740234375, "grad_norm": 0.24015289545059204, "learning_rate": 0.00016096654980459388, "loss": 1.7437, "step": 13804 }, { "epoch": 0.674072265625, "grad_norm": 0.2652212977409363, "learning_rate": 0.00016093635542224478, "loss": 1.753, "step": 13805 }, { "epoch": 0.67412109375, "grad_norm": 0.20751096308231354, "learning_rate": 0.00016090616380431993, "loss": 1.7499, "step": 13806 }, { "epoch": 0.674169921875, "grad_norm": 0.292233943939209, "learning_rate": 0.00016087597495155072, "loss": 1.7522, "step": 13807 }, { "epoch": 0.67421875, "grad_norm": 0.21086525917053223, "learning_rate": 0.00016084578886466905, "loss": 1.7823, "step": 13808 }, { "epoch": 0.674267578125, "grad_norm": 0.2264125794172287, "learning_rate": 0.00016081560554440628, "loss": 1.7859, "step": 13809 }, { "epoch": 0.67431640625, "grad_norm": 0.2565629482269287, "learning_rate": 0.00016078542499149419, "loss": 1.7613, "step": 13810 }, { "epoch": 0.674365234375, "grad_norm": 0.2102144956588745, "learning_rate": 0.000160755247206664, "loss": 1.7575, "step": 13811 }, { "epoch": 0.6744140625, "grad_norm": 0.24897176027297974, "learning_rate": 0.00016072507219064714, "loss": 1.7596, "step": 13812 }, { "epoch": 0.674462890625, "grad_norm": 0.2595190107822418, "learning_rate": 0.00016069489994417503, "loss": 1.7599, "step": 13813 }, { "epoch": 0.67451171875, "grad_norm": 0.3236025869846344, "learning_rate": 0.0001606647304679788, "loss": 1.7716, "step": 13814 }, { "epoch": 0.674560546875, "grad_norm": 0.21184195578098297, "learning_rate": 0.0001606345637627897, "loss": 1.7515, "step": 13815 }, { "epoch": 0.674609375, "grad_norm": 0.2851148545742035, "learning_rate": 0.00016060439982933884, "loss": 1.7425, "step": 13816 }, { "epoch": 0.674658203125, "grad_norm": 0.22827133536338806, "learning_rate": 0.00016057423866835724, "loss": 1.7713, "step": 13817 }, { "epoch": 0.67470703125, "grad_norm": 0.2909943759441376, "learning_rate": 0.00016054408028057593, "loss": 1.7722, "step": 13818 }, { "epoch": 0.674755859375, "grad_norm": 0.2417847067117691, "learning_rate": 0.00016051392466672577, "loss": 1.7338, "step": 13819 }, { "epoch": 0.6748046875, "grad_norm": 0.2723807692527771, "learning_rate": 0.0001604837718275376, "loss": 1.7569, "step": 13820 }, { "epoch": 0.674853515625, "grad_norm": 0.26006031036376953, "learning_rate": 0.00016045362176374223, "loss": 1.7619, "step": 13821 }, { "epoch": 0.67490234375, "grad_norm": 0.22288036346435547, "learning_rate": 0.00016042347447607036, "loss": 1.7616, "step": 13822 }, { "epoch": 0.674951171875, "grad_norm": 0.24731726944446564, "learning_rate": 0.00016039332996525262, "loss": 1.7737, "step": 13823 }, { "epoch": 0.675, "grad_norm": 0.2820434272289276, "learning_rate": 0.00016036318823201974, "loss": 1.7641, "step": 13824 }, { "epoch": 0.675048828125, "grad_norm": 0.2568295896053314, "learning_rate": 0.00016033304927710187, "loss": 1.7376, "step": 13825 }, { "epoch": 0.67509765625, "grad_norm": 0.2919273376464844, "learning_rate": 0.00016030291310122988, "loss": 1.7427, "step": 13826 }, { "epoch": 0.675146484375, "grad_norm": 0.20410948991775513, "learning_rate": 0.00016027277970513383, "loss": 1.7347, "step": 13827 }, { "epoch": 0.6751953125, "grad_norm": 0.27715954184532166, "learning_rate": 0.00016024264908954413, "loss": 1.7394, "step": 13828 }, { "epoch": 0.675244140625, "grad_norm": 0.25455549359321594, "learning_rate": 0.00016021252125519105, "loss": 1.7734, "step": 13829 }, { "epoch": 0.67529296875, "grad_norm": 0.2587890923023224, "learning_rate": 0.00016018239620280473, "loss": 1.7335, "step": 13830 }, { "epoch": 0.675341796875, "grad_norm": 0.22962504625320435, "learning_rate": 0.00016015227393311525, "loss": 1.7573, "step": 13831 }, { "epoch": 0.675390625, "grad_norm": 0.2574337422847748, "learning_rate": 0.00016012215444685275, "loss": 1.7492, "step": 13832 }, { "epoch": 0.675439453125, "grad_norm": 0.21597716212272644, "learning_rate": 0.00016009203774474708, "loss": 1.7524, "step": 13833 }, { "epoch": 0.67548828125, "grad_norm": 0.24052777886390686, "learning_rate": 0.00016006192382752815, "loss": 1.7686, "step": 13834 }, { "epoch": 0.675537109375, "grad_norm": 0.22134338319301605, "learning_rate": 0.00016003181269592587, "loss": 1.747, "step": 13835 }, { "epoch": 0.6755859375, "grad_norm": 0.24237313866615295, "learning_rate": 0.00016000170435067002, "loss": 1.7742, "step": 13836 }, { "epoch": 0.675634765625, "grad_norm": 0.23036623001098633, "learning_rate": 0.0001599715987924902, "loss": 1.7534, "step": 13837 }, { "epoch": 0.67568359375, "grad_norm": 0.24793842434883118, "learning_rate": 0.0001599414960221161, "loss": 1.7791, "step": 13838 }, { "epoch": 0.675732421875, "grad_norm": 0.19970569014549255, "learning_rate": 0.00015991139604027734, "loss": 1.7573, "step": 13839 }, { "epoch": 0.67578125, "grad_norm": 0.23181387782096863, "learning_rate": 0.00015988129884770327, "loss": 1.756, "step": 13840 }, { "epoch": 0.675830078125, "grad_norm": 0.20018549263477325, "learning_rate": 0.00015985120444512345, "loss": 1.7718, "step": 13841 }, { "epoch": 0.67587890625, "grad_norm": 0.1888689547777176, "learning_rate": 0.00015982111283326718, "loss": 1.7731, "step": 13842 }, { "epoch": 0.675927734375, "grad_norm": 0.1972707360982895, "learning_rate": 0.00015979102401286383, "loss": 1.7449, "step": 13843 }, { "epoch": 0.6759765625, "grad_norm": 0.20931385457515717, "learning_rate": 0.0001597609379846426, "loss": 1.7317, "step": 13844 }, { "epoch": 0.676025390625, "grad_norm": 0.19485151767730713, "learning_rate": 0.00015973085474933258, "loss": 1.73, "step": 13845 }, { "epoch": 0.67607421875, "grad_norm": 0.23291511833667755, "learning_rate": 0.00015970077430766286, "loss": 1.7423, "step": 13846 }, { "epoch": 0.676123046875, "grad_norm": 0.22301426529884338, "learning_rate": 0.00015967069666036256, "loss": 1.7504, "step": 13847 }, { "epoch": 0.676171875, "grad_norm": 0.21874551475048065, "learning_rate": 0.00015964062180816053, "loss": 1.7503, "step": 13848 }, { "epoch": 0.676220703125, "grad_norm": 0.20300258696079254, "learning_rate": 0.0001596105497517857, "loss": 1.7759, "step": 13849 }, { "epoch": 0.67626953125, "grad_norm": 0.2509702444076538, "learning_rate": 0.00015958048049196695, "loss": 1.7554, "step": 13850 }, { "epoch": 0.676318359375, "grad_norm": 0.22337155044078827, "learning_rate": 0.00015955041402943295, "loss": 1.7351, "step": 13851 }, { "epoch": 0.6763671875, "grad_norm": 0.24423453211784363, "learning_rate": 0.00015952035036491242, "loss": 1.77, "step": 13852 }, { "epoch": 0.676416015625, "grad_norm": 0.24323518574237823, "learning_rate": 0.00015949028949913395, "loss": 1.7366, "step": 13853 }, { "epoch": 0.67646484375, "grad_norm": 0.22668762505054474, "learning_rate": 0.00015946023143282613, "loss": 1.7501, "step": 13854 }, { "epoch": 0.676513671875, "grad_norm": 0.22722128033638, "learning_rate": 0.00015943017616671745, "loss": 1.7666, "step": 13855 }, { "epoch": 0.6765625, "grad_norm": 0.24113339185714722, "learning_rate": 0.0001594001237015363, "loss": 1.7538, "step": 13856 }, { "epoch": 0.676611328125, "grad_norm": 0.21130594611167908, "learning_rate": 0.00015937007403801098, "loss": 1.7787, "step": 13857 }, { "epoch": 0.67666015625, "grad_norm": 0.23064905405044556, "learning_rate": 0.00015934002717686986, "loss": 1.7468, "step": 13858 }, { "epoch": 0.676708984375, "grad_norm": 0.21889455616474152, "learning_rate": 0.00015930998311884106, "loss": 1.7547, "step": 13859 }, { "epoch": 0.6767578125, "grad_norm": 0.24510346353054047, "learning_rate": 0.00015927994186465278, "loss": 1.7745, "step": 13860 }, { "epoch": 0.676806640625, "grad_norm": 0.19736941158771515, "learning_rate": 0.00015924990341503316, "loss": 1.7742, "step": 13861 }, { "epoch": 0.67685546875, "grad_norm": 0.21000942587852478, "learning_rate": 0.00015921986777071, "loss": 1.7502, "step": 13862 }, { "epoch": 0.676904296875, "grad_norm": 0.1974382847547531, "learning_rate": 0.00015918983493241147, "loss": 1.7614, "step": 13863 }, { "epoch": 0.676953125, "grad_norm": 0.22433112561702728, "learning_rate": 0.00015915980490086525, "loss": 1.7526, "step": 13864 }, { "epoch": 0.677001953125, "grad_norm": 0.2531175911426544, "learning_rate": 0.00015912977767679925, "loss": 1.7681, "step": 13865 }, { "epoch": 0.67705078125, "grad_norm": 0.19955477118492126, "learning_rate": 0.00015909975326094117, "loss": 1.7455, "step": 13866 }, { "epoch": 0.677099609375, "grad_norm": 0.2180325835943222, "learning_rate": 0.00015906973165401867, "loss": 1.7752, "step": 13867 }, { "epoch": 0.6771484375, "grad_norm": 0.23151303827762604, "learning_rate": 0.0001590397128567594, "loss": 1.7281, "step": 13868 }, { "epoch": 0.677197265625, "grad_norm": 0.22379599511623383, "learning_rate": 0.0001590096968698908, "loss": 1.7589, "step": 13869 }, { "epoch": 0.67724609375, "grad_norm": 0.21194010972976685, "learning_rate": 0.0001589796836941404, "loss": 1.7568, "step": 13870 }, { "epoch": 0.677294921875, "grad_norm": 0.2641364634037018, "learning_rate": 0.0001589496733302356, "loss": 1.7603, "step": 13871 }, { "epoch": 0.67734375, "grad_norm": 0.2205910086631775, "learning_rate": 0.00015891966577890365, "loss": 1.7536, "step": 13872 }, { "epoch": 0.677392578125, "grad_norm": 0.2288450449705124, "learning_rate": 0.0001588896610408719, "loss": 1.7321, "step": 13873 }, { "epoch": 0.67744140625, "grad_norm": 0.22019429504871368, "learning_rate": 0.0001588596591168675, "loss": 1.7717, "step": 13874 }, { "epoch": 0.677490234375, "grad_norm": 0.20392216742038727, "learning_rate": 0.00015882966000761755, "loss": 1.735, "step": 13875 }, { "epoch": 0.6775390625, "grad_norm": 0.2248189002275467, "learning_rate": 0.00015879966371384912, "loss": 1.756, "step": 13876 }, { "epoch": 0.677587890625, "grad_norm": 0.1905621737241745, "learning_rate": 0.0001587696702362893, "loss": 1.7854, "step": 13877 }, { "epoch": 0.67763671875, "grad_norm": 0.2395351678133011, "learning_rate": 0.0001587396795756647, "loss": 1.7797, "step": 13878 }, { "epoch": 0.677685546875, "grad_norm": 0.197648823261261, "learning_rate": 0.00015870969173270255, "loss": 1.7496, "step": 13879 }, { "epoch": 0.677734375, "grad_norm": 0.2234606146812439, "learning_rate": 0.00015867970670812925, "loss": 1.756, "step": 13880 }, { "epoch": 0.677783203125, "grad_norm": 0.19294396042823792, "learning_rate": 0.0001586497245026719, "loss": 1.7569, "step": 13881 }, { "epoch": 0.67783203125, "grad_norm": 0.21015723049640656, "learning_rate": 0.00015861974511705682, "loss": 1.7367, "step": 13882 }, { "epoch": 0.677880859375, "grad_norm": 0.21540014445781708, "learning_rate": 0.00015858976855201085, "loss": 1.7831, "step": 13883 }, { "epoch": 0.6779296875, "grad_norm": 0.1986658126115799, "learning_rate": 0.00015855979480826026, "loss": 1.743, "step": 13884 }, { "epoch": 0.677978515625, "grad_norm": 0.24114400148391724, "learning_rate": 0.00015852982388653162, "loss": 1.7621, "step": 13885 }, { "epoch": 0.67802734375, "grad_norm": 0.23860298097133636, "learning_rate": 0.00015849985578755127, "loss": 1.7308, "step": 13886 }, { "epoch": 0.678076171875, "grad_norm": 0.24998973309993744, "learning_rate": 0.0001584698905120455, "loss": 1.7522, "step": 13887 }, { "epoch": 0.678125, "grad_norm": 0.19124208390712738, "learning_rate": 0.00015843992806074053, "loss": 1.7556, "step": 13888 }, { "epoch": 0.678173828125, "grad_norm": 0.24729256331920624, "learning_rate": 0.00015840996843436258, "loss": 1.7501, "step": 13889 }, { "epoch": 0.67822265625, "grad_norm": 0.2592417001724243, "learning_rate": 0.00015838001163363775, "loss": 1.7489, "step": 13890 }, { "epoch": 0.678271484375, "grad_norm": 0.2247825562953949, "learning_rate": 0.00015835005765929195, "loss": 1.7611, "step": 13891 }, { "epoch": 0.6783203125, "grad_norm": 0.19296108186244965, "learning_rate": 0.00015832010651205126, "loss": 1.7726, "step": 13892 }, { "epoch": 0.678369140625, "grad_norm": 0.22919750213623047, "learning_rate": 0.00015829015819264147, "loss": 1.7369, "step": 13893 }, { "epoch": 0.67841796875, "grad_norm": 0.21549899876117706, "learning_rate": 0.00015826021270178852, "loss": 1.7673, "step": 13894 }, { "epoch": 0.678466796875, "grad_norm": 0.2007250189781189, "learning_rate": 0.0001582302700402182, "loss": 1.7567, "step": 13895 }, { "epoch": 0.678515625, "grad_norm": 0.2241358757019043, "learning_rate": 0.00015820033020865593, "loss": 1.7449, "step": 13896 }, { "epoch": 0.678564453125, "grad_norm": 0.21479304134845734, "learning_rate": 0.0001581703932078276, "loss": 1.7469, "step": 13897 }, { "epoch": 0.67861328125, "grad_norm": 0.22224928438663483, "learning_rate": 0.00015814045903845853, "loss": 1.7586, "step": 13898 }, { "epoch": 0.678662109375, "grad_norm": 0.22156329452991486, "learning_rate": 0.00015811052770127448, "loss": 1.7342, "step": 13899 }, { "epoch": 0.6787109375, "grad_norm": 0.24373246729373932, "learning_rate": 0.0001580805991970005, "loss": 1.7691, "step": 13900 }, { "epoch": 0.678759765625, "grad_norm": 0.22238609194755554, "learning_rate": 0.00015805067352636233, "loss": 1.7511, "step": 13901 }, { "epoch": 0.67880859375, "grad_norm": 0.2578531801700592, "learning_rate": 0.00015802075069008486, "loss": 1.7577, "step": 13902 }, { "epoch": 0.678857421875, "grad_norm": 0.19292037189006805, "learning_rate": 0.0001579908306888937, "loss": 1.732, "step": 13903 }, { "epoch": 0.67890625, "grad_norm": 0.26929718255996704, "learning_rate": 0.00015796091352351363, "loss": 1.7584, "step": 13904 }, { "epoch": 0.678955078125, "grad_norm": 0.23805156350135803, "learning_rate": 0.00015793099919466985, "loss": 1.7605, "step": 13905 }, { "epoch": 0.67900390625, "grad_norm": 0.2735961675643921, "learning_rate": 0.00015790108770308736, "loss": 1.762, "step": 13906 }, { "epoch": 0.679052734375, "grad_norm": 0.19212502241134644, "learning_rate": 0.00015787117904949105, "loss": 1.7414, "step": 13907 }, { "epoch": 0.6791015625, "grad_norm": 0.28002360463142395, "learning_rate": 0.0001578412732346059, "loss": 1.7522, "step": 13908 }, { "epoch": 0.679150390625, "grad_norm": 0.20370551943778992, "learning_rate": 0.00015781137025915653, "loss": 1.7396, "step": 13909 }, { "epoch": 0.67919921875, "grad_norm": 0.273445725440979, "learning_rate": 0.0001577814701238678, "loss": 1.7688, "step": 13910 }, { "epoch": 0.679248046875, "grad_norm": 0.21015244722366333, "learning_rate": 0.0001577515728294644, "loss": 1.7585, "step": 13911 }, { "epoch": 0.679296875, "grad_norm": 0.26715946197509766, "learning_rate": 0.0001577216783766706, "loss": 1.7499, "step": 13912 }, { "epoch": 0.679345703125, "grad_norm": 0.21030375361442566, "learning_rate": 0.0001576917867662114, "loss": 1.7541, "step": 13913 }, { "epoch": 0.67939453125, "grad_norm": 0.24456387758255005, "learning_rate": 0.0001576618979988108, "loss": 1.7514, "step": 13914 }, { "epoch": 0.679443359375, "grad_norm": 0.2110103964805603, "learning_rate": 0.00015763201207519352, "loss": 1.7609, "step": 13915 }, { "epoch": 0.6794921875, "grad_norm": 0.23016457259655, "learning_rate": 0.00015760212899608355, "loss": 1.7766, "step": 13916 }, { "epoch": 0.679541015625, "grad_norm": 0.2820949852466583, "learning_rate": 0.0001575722487622055, "loss": 1.7447, "step": 13917 }, { "epoch": 0.67958984375, "grad_norm": 0.22189055383205414, "learning_rate": 0.00015754237137428312, "loss": 1.7529, "step": 13918 }, { "epoch": 0.679638671875, "grad_norm": 0.2921696901321411, "learning_rate": 0.00015751249683304091, "loss": 1.7649, "step": 13919 }, { "epoch": 0.6796875, "grad_norm": 0.2685236930847168, "learning_rate": 0.00015748262513920269, "loss": 1.7381, "step": 13920 }, { "epoch": 0.679736328125, "grad_norm": 0.24093608558177948, "learning_rate": 0.00015745275629349241, "loss": 1.7576, "step": 13921 }, { "epoch": 0.67978515625, "grad_norm": 0.22845639288425446, "learning_rate": 0.0001574228902966341, "loss": 1.736, "step": 13922 }, { "epoch": 0.679833984375, "grad_norm": 0.23896290361881256, "learning_rate": 0.00015739302714935143, "loss": 1.7508, "step": 13923 }, { "epoch": 0.6798828125, "grad_norm": 0.21333101391792297, "learning_rate": 0.00015736316685236823, "loss": 1.7671, "step": 13924 }, { "epoch": 0.679931640625, "grad_norm": 0.23658882081508636, "learning_rate": 0.00015733330940640822, "loss": 1.7752, "step": 13925 }, { "epoch": 0.67998046875, "grad_norm": 0.2692720592021942, "learning_rate": 0.00015730345481219498, "loss": 1.7569, "step": 13926 }, { "epoch": 0.680029296875, "grad_norm": 0.18622636795043945, "learning_rate": 0.00015727360307045207, "loss": 1.7504, "step": 13927 }, { "epoch": 0.680078125, "grad_norm": 0.24934928119182587, "learning_rate": 0.00015724375418190296, "loss": 1.7496, "step": 13928 }, { "epoch": 0.680126953125, "grad_norm": 0.21420717239379883, "learning_rate": 0.00015721390814727118, "loss": 1.7776, "step": 13929 }, { "epoch": 0.68017578125, "grad_norm": 0.24775546789169312, "learning_rate": 0.00015718406496727978, "loss": 1.7625, "step": 13930 }, { "epoch": 0.680224609375, "grad_norm": 0.1972123682498932, "learning_rate": 0.00015715422464265236, "loss": 1.7394, "step": 13931 }, { "epoch": 0.6802734375, "grad_norm": 0.24737395346164703, "learning_rate": 0.00015712438717411186, "loss": 1.7738, "step": 13932 }, { "epoch": 0.680322265625, "grad_norm": 0.19348512589931488, "learning_rate": 0.0001570945525623817, "loss": 1.7367, "step": 13933 }, { "epoch": 0.68037109375, "grad_norm": 0.23454409837722778, "learning_rate": 0.00015706472080818457, "loss": 1.7603, "step": 13934 }, { "epoch": 0.680419921875, "grad_norm": 0.2516987919807434, "learning_rate": 0.00015703489191224386, "loss": 1.7378, "step": 13935 }, { "epoch": 0.68046875, "grad_norm": 0.22239696979522705, "learning_rate": 0.0001570050658752823, "loss": 1.7819, "step": 13936 }, { "epoch": 0.680517578125, "grad_norm": 0.3181464374065399, "learning_rate": 0.00015697524269802268, "loss": 1.7309, "step": 13937 }, { "epoch": 0.68056640625, "grad_norm": 0.20822151005268097, "learning_rate": 0.0001569454223811879, "loss": 1.7371, "step": 13938 }, { "epoch": 0.680615234375, "grad_norm": 0.23226295411586761, "learning_rate": 0.00015691560492550065, "loss": 1.7521, "step": 13939 }, { "epoch": 0.6806640625, "grad_norm": 0.23098091781139374, "learning_rate": 0.0001568857903316836, "loss": 1.7365, "step": 13940 }, { "epoch": 0.680712890625, "grad_norm": 0.22562247514724731, "learning_rate": 0.00015685597860045936, "loss": 1.7677, "step": 13941 }, { "epoch": 0.68076171875, "grad_norm": 0.21860083937644958, "learning_rate": 0.00015682616973255033, "loss": 1.7574, "step": 13942 }, { "epoch": 0.680810546875, "grad_norm": 0.2181418091058731, "learning_rate": 0.000156796363728679, "loss": 1.7746, "step": 13943 }, { "epoch": 0.680859375, "grad_norm": 0.23437534272670746, "learning_rate": 0.0001567665605895678, "loss": 1.7539, "step": 13944 }, { "epoch": 0.680908203125, "grad_norm": 0.20738863945007324, "learning_rate": 0.00015673676031593902, "loss": 1.7738, "step": 13945 }, { "epoch": 0.68095703125, "grad_norm": 0.24801498651504517, "learning_rate": 0.00015670696290851478, "loss": 1.7364, "step": 13946 }, { "epoch": 0.681005859375, "grad_norm": 0.245738685131073, "learning_rate": 0.00015667716836801745, "loss": 1.7511, "step": 13947 }, { "epoch": 0.6810546875, "grad_norm": 0.2373170405626297, "learning_rate": 0.00015664737669516884, "loss": 1.7581, "step": 13948 }, { "epoch": 0.681103515625, "grad_norm": 0.245553120970726, "learning_rate": 0.0001566175878906913, "loss": 1.7741, "step": 13949 }, { "epoch": 0.68115234375, "grad_norm": 0.19465941190719604, "learning_rate": 0.00015658780195530647, "loss": 1.751, "step": 13950 }, { "epoch": 0.681201171875, "grad_norm": 0.20994630455970764, "learning_rate": 0.0001565580188897366, "loss": 1.7657, "step": 13951 }, { "epoch": 0.68125, "grad_norm": 0.19949764013290405, "learning_rate": 0.00015652823869470307, "loss": 1.7478, "step": 13952 }, { "epoch": 0.681298828125, "grad_norm": 0.22384418547153473, "learning_rate": 0.000156498461370928, "loss": 1.739, "step": 13953 }, { "epoch": 0.68134765625, "grad_norm": 0.23343424499034882, "learning_rate": 0.0001564686869191329, "loss": 1.7736, "step": 13954 }, { "epoch": 0.681396484375, "grad_norm": 0.2893257737159729, "learning_rate": 0.00015643891534003937, "loss": 1.761, "step": 13955 }, { "epoch": 0.6814453125, "grad_norm": 0.21410292387008667, "learning_rate": 0.000156409146634369, "loss": 1.7464, "step": 13956 }, { "epoch": 0.681494140625, "grad_norm": 0.26389193534851074, "learning_rate": 0.0001563793808028432, "loss": 1.7746, "step": 13957 }, { "epoch": 0.68154296875, "grad_norm": 0.2139684110879898, "learning_rate": 0.00015634961784618347, "loss": 1.7455, "step": 13958 }, { "epoch": 0.681591796875, "grad_norm": 0.24425934255123138, "learning_rate": 0.00015631985776511098, "loss": 1.738, "step": 13959 }, { "epoch": 0.681640625, "grad_norm": 0.22444050014019012, "learning_rate": 0.00015629010056034714, "loss": 1.7401, "step": 13960 }, { "epoch": 0.681689453125, "grad_norm": 0.21224097907543182, "learning_rate": 0.00015626034623261308, "loss": 1.7544, "step": 13961 }, { "epoch": 0.68173828125, "grad_norm": 0.22816245257854462, "learning_rate": 0.0001562305947826299, "loss": 1.7672, "step": 13962 }, { "epoch": 0.681787109375, "grad_norm": 0.21377207338809967, "learning_rate": 0.00015620084621111878, "loss": 1.7475, "step": 13963 }, { "epoch": 0.6818359375, "grad_norm": 0.22291699051856995, "learning_rate": 0.00015617110051880046, "loss": 1.7601, "step": 13964 }, { "epoch": 0.681884765625, "grad_norm": 0.24629881978034973, "learning_rate": 0.00015614135770639613, "loss": 1.7454, "step": 13965 }, { "epoch": 0.68193359375, "grad_norm": 0.23268449306488037, "learning_rate": 0.00015611161777462628, "loss": 1.7569, "step": 13966 }, { "epoch": 0.681982421875, "grad_norm": 0.27941015362739563, "learning_rate": 0.0001560818807242121, "loss": 1.7468, "step": 13967 }, { "epoch": 0.68203125, "grad_norm": 0.24557963013648987, "learning_rate": 0.00015605214655587386, "loss": 1.7477, "step": 13968 }, { "epoch": 0.682080078125, "grad_norm": 0.23531299829483032, "learning_rate": 0.00015602241527033263, "loss": 1.7661, "step": 13969 }, { "epoch": 0.68212890625, "grad_norm": 0.2427663952112198, "learning_rate": 0.00015599268686830863, "loss": 1.7483, "step": 13970 }, { "epoch": 0.682177734375, "grad_norm": 0.2286076545715332, "learning_rate": 0.0001559629613505225, "loss": 1.7629, "step": 13971 }, { "epoch": 0.6822265625, "grad_norm": 0.21780115365982056, "learning_rate": 0.00015593323871769465, "loss": 1.7728, "step": 13972 }, { "epoch": 0.682275390625, "grad_norm": 0.2579369843006134, "learning_rate": 0.0001559035189705454, "loss": 1.7505, "step": 13973 }, { "epoch": 0.68232421875, "grad_norm": 0.19158200919628143, "learning_rate": 0.000155873802109795, "loss": 1.7307, "step": 13974 }, { "epoch": 0.682373046875, "grad_norm": 0.22805027663707733, "learning_rate": 0.00015584408813616374, "loss": 1.7625, "step": 13975 }, { "epoch": 0.682421875, "grad_norm": 0.1922028809785843, "learning_rate": 0.00015581437705037177, "loss": 1.7779, "step": 13976 }, { "epoch": 0.682470703125, "grad_norm": 0.22988606989383698, "learning_rate": 0.00015578466885313912, "loss": 1.7529, "step": 13977 }, { "epoch": 0.68251953125, "grad_norm": 0.2272430807352066, "learning_rate": 0.00015575496354518575, "loss": 1.7445, "step": 13978 }, { "epoch": 0.682568359375, "grad_norm": 0.21517838537693024, "learning_rate": 0.00015572526112723174, "loss": 1.7515, "step": 13979 }, { "epoch": 0.6826171875, "grad_norm": 0.23544466495513916, "learning_rate": 0.0001556955615999967, "loss": 1.7461, "step": 13980 }, { "epoch": 0.682666015625, "grad_norm": 0.22649742662906647, "learning_rate": 0.00015566586496420075, "loss": 1.7744, "step": 13981 }, { "epoch": 0.68271484375, "grad_norm": 0.2385229468345642, "learning_rate": 0.00015563617122056324, "loss": 1.7498, "step": 13982 }, { "epoch": 0.682763671875, "grad_norm": 0.26407766342163086, "learning_rate": 0.00015560648036980418, "loss": 1.7601, "step": 13983 }, { "epoch": 0.6828125, "grad_norm": 0.19080808758735657, "learning_rate": 0.00015557679241264284, "loss": 1.7584, "step": 13984 }, { "epoch": 0.682861328125, "grad_norm": 0.25397297739982605, "learning_rate": 0.00015554710734979905, "loss": 1.7445, "step": 13985 }, { "epoch": 0.68291015625, "grad_norm": 0.22597625851631165, "learning_rate": 0.00015551742518199192, "loss": 1.7598, "step": 13986 }, { "epoch": 0.682958984375, "grad_norm": 0.2623647451400757, "learning_rate": 0.00015548774590994112, "loss": 1.7443, "step": 13987 }, { "epoch": 0.6830078125, "grad_norm": 0.212709441781044, "learning_rate": 0.00015545806953436576, "loss": 1.7503, "step": 13988 }, { "epoch": 0.683056640625, "grad_norm": 0.2477998435497284, "learning_rate": 0.00015542839605598512, "loss": 1.7662, "step": 13989 }, { "epoch": 0.68310546875, "grad_norm": 0.25807321071624756, "learning_rate": 0.00015539872547551831, "loss": 1.7562, "step": 13990 }, { "epoch": 0.683154296875, "grad_norm": 0.21800373494625092, "learning_rate": 0.0001553690577936845, "loss": 1.7811, "step": 13991 }, { "epoch": 0.683203125, "grad_norm": 0.2535363733768463, "learning_rate": 0.00015533939301120276, "loss": 1.7791, "step": 13992 }, { "epoch": 0.683251953125, "grad_norm": 0.25140950083732605, "learning_rate": 0.00015530973112879188, "loss": 1.7606, "step": 13993 }, { "epoch": 0.68330078125, "grad_norm": 0.23640500009059906, "learning_rate": 0.00015528007214717085, "loss": 1.7471, "step": 13994 }, { "epoch": 0.683349609375, "grad_norm": 0.2514232099056244, "learning_rate": 0.00015525041606705852, "loss": 1.7701, "step": 13995 }, { "epoch": 0.6833984375, "grad_norm": 0.22210414707660675, "learning_rate": 0.0001552207628891734, "loss": 1.7361, "step": 13996 }, { "epoch": 0.683447265625, "grad_norm": 0.27603474259376526, "learning_rate": 0.00015519111261423453, "loss": 1.7402, "step": 13997 }, { "epoch": 0.68349609375, "grad_norm": 0.19456809759140015, "learning_rate": 0.0001551614652429601, "loss": 1.7175, "step": 13998 }, { "epoch": 0.683544921875, "grad_norm": 0.2658531665802002, "learning_rate": 0.00015513182077606902, "loss": 1.7651, "step": 13999 }, { "epoch": 0.68359375, "grad_norm": 0.21443021297454834, "learning_rate": 0.00015510217921427938, "loss": 1.7514, "step": 14000 }, { "epoch": 0.683642578125, "grad_norm": 0.2613143026828766, "learning_rate": 0.00015507254055830994, "loss": 1.7165, "step": 14001 }, { "epoch": 0.68369140625, "grad_norm": 0.20303648710250854, "learning_rate": 0.0001550429048088786, "loss": 1.7577, "step": 14002 }, { "epoch": 0.683740234375, "grad_norm": 0.28850069642066956, "learning_rate": 0.00015501327196670406, "loss": 1.7344, "step": 14003 }, { "epoch": 0.6837890625, "grad_norm": 0.22337213158607483, "learning_rate": 0.0001549836420325042, "loss": 1.7463, "step": 14004 }, { "epoch": 0.683837890625, "grad_norm": 0.2655450999736786, "learning_rate": 0.00015495401500699714, "loss": 1.7355, "step": 14005 }, { "epoch": 0.68388671875, "grad_norm": 0.2291691154241562, "learning_rate": 0.000154924390890901, "loss": 1.7458, "step": 14006 }, { "epoch": 0.683935546875, "grad_norm": 0.264070987701416, "learning_rate": 0.00015489476968493372, "loss": 1.7641, "step": 14007 }, { "epoch": 0.683984375, "grad_norm": 0.23200909793376923, "learning_rate": 0.0001548651513898132, "loss": 1.7687, "step": 14008 }, { "epoch": 0.684033203125, "grad_norm": 0.24396829307079315, "learning_rate": 0.0001548355360062572, "loss": 1.7613, "step": 14009 }, { "epoch": 0.68408203125, "grad_norm": 0.2187400758266449, "learning_rate": 0.00015480592353498356, "loss": 1.744, "step": 14010 }, { "epoch": 0.684130859375, "grad_norm": 0.2627669870853424, "learning_rate": 0.00015477631397670994, "loss": 1.7405, "step": 14011 }, { "epoch": 0.6841796875, "grad_norm": 0.24608252942562103, "learning_rate": 0.0001547467073321539, "loss": 1.7658, "step": 14012 }, { "epoch": 0.684228515625, "grad_norm": 0.25578615069389343, "learning_rate": 0.0001547171036020331, "loss": 1.7678, "step": 14013 }, { "epoch": 0.68427734375, "grad_norm": 0.23847246170043945, "learning_rate": 0.0001546875027870648, "loss": 1.7274, "step": 14014 }, { "epoch": 0.684326171875, "grad_norm": 0.25823885202407837, "learning_rate": 0.00015465790488796672, "loss": 1.7326, "step": 14015 }, { "epoch": 0.684375, "grad_norm": 0.2545561194419861, "learning_rate": 0.0001546283099054558, "loss": 1.7464, "step": 14016 }, { "epoch": 0.684423828125, "grad_norm": 0.24870583415031433, "learning_rate": 0.00015459871784024963, "loss": 1.7664, "step": 14017 }, { "epoch": 0.68447265625, "grad_norm": 0.235427126288414, "learning_rate": 0.00015456912869306512, "loss": 1.7534, "step": 14018 }, { "epoch": 0.684521484375, "grad_norm": 0.224961057305336, "learning_rate": 0.00015453954246461967, "loss": 1.7485, "step": 14019 }, { "epoch": 0.6845703125, "grad_norm": 0.23990502953529358, "learning_rate": 0.00015450995915563005, "loss": 1.7405, "step": 14020 }, { "epoch": 0.684619140625, "grad_norm": 0.24012552201747894, "learning_rate": 0.00015448037876681354, "loss": 1.753, "step": 14021 }, { "epoch": 0.68466796875, "grad_norm": 0.21236898005008698, "learning_rate": 0.00015445080129888677, "loss": 1.7428, "step": 14022 }, { "epoch": 0.684716796875, "grad_norm": 0.2616620361804962, "learning_rate": 0.0001544212267525667, "loss": 1.7631, "step": 14023 }, { "epoch": 0.684765625, "grad_norm": 0.19315335154533386, "learning_rate": 0.00015439165512857005, "loss": 1.7418, "step": 14024 }, { "epoch": 0.684814453125, "grad_norm": 0.2548242211341858, "learning_rate": 0.00015436208642761353, "loss": 1.749, "step": 14025 }, { "epoch": 0.68486328125, "grad_norm": 0.21077144145965576, "learning_rate": 0.00015433252065041379, "loss": 1.7624, "step": 14026 }, { "epoch": 0.684912109375, "grad_norm": 0.25129660964012146, "learning_rate": 0.00015430295779768731, "loss": 1.7572, "step": 14027 }, { "epoch": 0.6849609375, "grad_norm": 0.21076743304729462, "learning_rate": 0.00015427339787015072, "loss": 1.7622, "step": 14028 }, { "epoch": 0.685009765625, "grad_norm": 0.20641420781612396, "learning_rate": 0.0001542438408685203, "loss": 1.7463, "step": 14029 }, { "epoch": 0.68505859375, "grad_norm": 0.23760320246219635, "learning_rate": 0.0001542142867935123, "loss": 1.7609, "step": 14030 }, { "epoch": 0.685107421875, "grad_norm": 0.199886292219162, "learning_rate": 0.00015418473564584323, "loss": 1.7404, "step": 14031 }, { "epoch": 0.68515625, "grad_norm": 0.23175716400146484, "learning_rate": 0.00015415518742622903, "loss": 1.7678, "step": 14032 }, { "epoch": 0.685205078125, "grad_norm": 0.22204330563545227, "learning_rate": 0.00015412564213538603, "loss": 1.7746, "step": 14033 }, { "epoch": 0.68525390625, "grad_norm": 0.23555028438568115, "learning_rate": 0.0001540960997740301, "loss": 1.7537, "step": 14034 }, { "epoch": 0.685302734375, "grad_norm": 0.2287023365497589, "learning_rate": 0.00015406656034287752, "loss": 1.7521, "step": 14035 }, { "epoch": 0.6853515625, "grad_norm": 0.22529006004333496, "learning_rate": 0.00015403702384264378, "loss": 1.7583, "step": 14036 }, { "epoch": 0.685400390625, "grad_norm": 0.19742469489574432, "learning_rate": 0.00015400749027404513, "loss": 1.7412, "step": 14037 }, { "epoch": 0.68544921875, "grad_norm": 0.21311111748218536, "learning_rate": 0.0001539779596377971, "loss": 1.7637, "step": 14038 }, { "epoch": 0.685498046875, "grad_norm": 0.21622900664806366, "learning_rate": 0.00015394843193461543, "loss": 1.748, "step": 14039 }, { "epoch": 0.685546875, "grad_norm": 0.20063643157482147, "learning_rate": 0.0001539189071652158, "loss": 1.7581, "step": 14040 }, { "epoch": 0.685595703125, "grad_norm": 0.2311221808195114, "learning_rate": 0.00015388938533031365, "loss": 1.7355, "step": 14041 }, { "epoch": 0.68564453125, "grad_norm": 0.19139297306537628, "learning_rate": 0.0001538598664306246, "loss": 1.7599, "step": 14042 }, { "epoch": 0.685693359375, "grad_norm": 0.22070932388305664, "learning_rate": 0.000153830350466864, "loss": 1.7508, "step": 14043 }, { "epoch": 0.6857421875, "grad_norm": 0.18434619903564453, "learning_rate": 0.0001538008374397472, "loss": 1.7658, "step": 14044 }, { "epoch": 0.685791015625, "grad_norm": 0.22768482565879822, "learning_rate": 0.0001537713273499895, "loss": 1.7702, "step": 14045 }, { "epoch": 0.68583984375, "grad_norm": 0.20457114279270172, "learning_rate": 0.00015374182019830607, "loss": 1.7553, "step": 14046 }, { "epoch": 0.685888671875, "grad_norm": 0.239017516374588, "learning_rate": 0.00015371231598541207, "loss": 1.7777, "step": 14047 }, { "epoch": 0.6859375, "grad_norm": 0.19569018483161926, "learning_rate": 0.0001536828147120224, "loss": 1.7366, "step": 14048 }, { "epoch": 0.685986328125, "grad_norm": 0.2189849317073822, "learning_rate": 0.00015365331637885238, "loss": 1.7431, "step": 14049 }, { "epoch": 0.68603515625, "grad_norm": 0.2227030098438263, "learning_rate": 0.00015362382098661653, "loss": 1.7407, "step": 14050 }, { "epoch": 0.686083984375, "grad_norm": 0.23881173133850098, "learning_rate": 0.00015359432853603007, "loss": 1.7542, "step": 14051 }, { "epoch": 0.6861328125, "grad_norm": 0.20672182738780975, "learning_rate": 0.00015356483902780743, "loss": 1.7341, "step": 14052 }, { "epoch": 0.686181640625, "grad_norm": 0.28462398052215576, "learning_rate": 0.00015353535246266366, "loss": 1.7555, "step": 14053 }, { "epoch": 0.68623046875, "grad_norm": 0.2950030565261841, "learning_rate": 0.00015350586884131307, "loss": 1.7682, "step": 14054 }, { "epoch": 0.686279296875, "grad_norm": 0.19150033593177795, "learning_rate": 0.0001534763881644704, "loss": 1.7716, "step": 14055 }, { "epoch": 0.686328125, "grad_norm": 0.23728777468204498, "learning_rate": 0.00015344691043285005, "loss": 1.7444, "step": 14056 }, { "epoch": 0.686376953125, "grad_norm": 0.2433493435382843, "learning_rate": 0.00015341743564716655, "loss": 1.7572, "step": 14057 }, { "epoch": 0.68642578125, "grad_norm": 0.20992128551006317, "learning_rate": 0.0001533879638081341, "loss": 1.7593, "step": 14058 }, { "epoch": 0.686474609375, "grad_norm": 0.26319628953933716, "learning_rate": 0.00015335849491646708, "loss": 1.7844, "step": 14059 }, { "epoch": 0.6865234375, "grad_norm": 0.2270873337984085, "learning_rate": 0.0001533290289728797, "loss": 1.7772, "step": 14060 }, { "epoch": 0.686572265625, "grad_norm": 0.27258631587028503, "learning_rate": 0.00015329956597808598, "loss": 1.7824, "step": 14061 }, { "epoch": 0.68662109375, "grad_norm": 0.2381182312965393, "learning_rate": 0.0001532701059328001, "loss": 1.7672, "step": 14062 }, { "epoch": 0.686669921875, "grad_norm": 0.20445255935192108, "learning_rate": 0.00015324064883773597, "loss": 1.7511, "step": 14063 }, { "epoch": 0.68671875, "grad_norm": 0.2605317234992981, "learning_rate": 0.00015321119469360756, "loss": 1.7379, "step": 14064 }, { "epoch": 0.686767578125, "grad_norm": 0.2110133320093155, "learning_rate": 0.00015318174350112878, "loss": 1.7502, "step": 14065 }, { "epoch": 0.68681640625, "grad_norm": 0.2545999586582184, "learning_rate": 0.00015315229526101316, "loss": 1.7541, "step": 14066 }, { "epoch": 0.686865234375, "grad_norm": 0.2167186141014099, "learning_rate": 0.0001531228499739747, "loss": 1.7766, "step": 14067 }, { "epoch": 0.6869140625, "grad_norm": 0.23548215627670288, "learning_rate": 0.00015309340764072668, "loss": 1.7621, "step": 14068 }, { "epoch": 0.686962890625, "grad_norm": 0.20871037244796753, "learning_rate": 0.00015306396826198306, "loss": 1.7226, "step": 14069 }, { "epoch": 0.68701171875, "grad_norm": 0.1951487958431244, "learning_rate": 0.000153034531838457, "loss": 1.7474, "step": 14070 }, { "epoch": 0.687060546875, "grad_norm": 0.19750143587589264, "learning_rate": 0.00015300509837086219, "loss": 1.7711, "step": 14071 }, { "epoch": 0.687109375, "grad_norm": 0.2089388519525528, "learning_rate": 0.00015297566785991173, "loss": 1.7558, "step": 14072 }, { "epoch": 0.687158203125, "grad_norm": 0.24112705886363983, "learning_rate": 0.000152946240306319, "loss": 1.7628, "step": 14073 }, { "epoch": 0.68720703125, "grad_norm": 0.19496160745620728, "learning_rate": 0.00015291681571079718, "loss": 1.7636, "step": 14074 }, { "epoch": 0.687255859375, "grad_norm": 0.2251327782869339, "learning_rate": 0.00015288739407405948, "loss": 1.753, "step": 14075 }, { "epoch": 0.6873046875, "grad_norm": 0.18421447277069092, "learning_rate": 0.00015285797539681885, "loss": 1.769, "step": 14076 }, { "epoch": 0.687353515625, "grad_norm": 0.22209887206554413, "learning_rate": 0.00015282855967978828, "loss": 1.7669, "step": 14077 }, { "epoch": 0.68740234375, "grad_norm": 0.22341029345989227, "learning_rate": 0.00015279914692368075, "loss": 1.7486, "step": 14078 }, { "epoch": 0.687451171875, "grad_norm": 0.19860370457172394, "learning_rate": 0.00015276973712920905, "loss": 1.7492, "step": 14079 }, { "epoch": 0.6875, "grad_norm": 0.23263326287269592, "learning_rate": 0.000152740330297086, "loss": 1.7567, "step": 14080 }, { "epoch": 0.687548828125, "grad_norm": 0.19546052813529968, "learning_rate": 0.00015271092642802427, "loss": 1.7525, "step": 14081 }, { "epoch": 0.68759765625, "grad_norm": 0.24708932638168335, "learning_rate": 0.00015268152552273646, "loss": 1.7358, "step": 14082 }, { "epoch": 0.687646484375, "grad_norm": 0.22869600355625153, "learning_rate": 0.00015265212758193514, "loss": 1.7517, "step": 14083 }, { "epoch": 0.6876953125, "grad_norm": 0.21974483132362366, "learning_rate": 0.0001526227326063328, "loss": 1.759, "step": 14084 }, { "epoch": 0.687744140625, "grad_norm": 0.20090293884277344, "learning_rate": 0.00015259334059664192, "loss": 1.7509, "step": 14085 }, { "epoch": 0.68779296875, "grad_norm": 0.24087601900100708, "learning_rate": 0.00015256395155357466, "loss": 1.7477, "step": 14086 }, { "epoch": 0.687841796875, "grad_norm": 0.20021119713783264, "learning_rate": 0.00015253456547784355, "loss": 1.7657, "step": 14087 }, { "epoch": 0.687890625, "grad_norm": 0.2576856315135956, "learning_rate": 0.00015250518237016052, "loss": 1.7657, "step": 14088 }, { "epoch": 0.687939453125, "grad_norm": 0.21902278065681458, "learning_rate": 0.00015247580223123785, "loss": 1.7633, "step": 14089 }, { "epoch": 0.68798828125, "grad_norm": 0.24260349571704865, "learning_rate": 0.0001524464250617875, "loss": 1.7515, "step": 14090 }, { "epoch": 0.688037109375, "grad_norm": 0.2259005755186081, "learning_rate": 0.0001524170508625215, "loss": 1.7509, "step": 14091 }, { "epoch": 0.6880859375, "grad_norm": 0.24785882234573364, "learning_rate": 0.0001523876796341518, "loss": 1.7483, "step": 14092 }, { "epoch": 0.688134765625, "grad_norm": 0.20682305097579956, "learning_rate": 0.00015235831137739013, "loss": 1.7239, "step": 14093 }, { "epoch": 0.68818359375, "grad_norm": 0.20128238201141357, "learning_rate": 0.0001523289460929484, "loss": 1.7407, "step": 14094 }, { "epoch": 0.688232421875, "grad_norm": 0.22504009306430817, "learning_rate": 0.00015229958378153814, "loss": 1.7413, "step": 14095 }, { "epoch": 0.68828125, "grad_norm": 0.18329626321792603, "learning_rate": 0.00015227022444387107, "loss": 1.7523, "step": 14096 }, { "epoch": 0.688330078125, "grad_norm": 0.20377518236637115, "learning_rate": 0.00015224086808065873, "loss": 1.7648, "step": 14097 }, { "epoch": 0.68837890625, "grad_norm": 0.22511808574199677, "learning_rate": 0.00015221151469261253, "loss": 1.7294, "step": 14098 }, { "epoch": 0.688427734375, "grad_norm": 0.19951549172401428, "learning_rate": 0.00015218216428044393, "loss": 1.7422, "step": 14099 }, { "epoch": 0.6884765625, "grad_norm": 0.21044401824474335, "learning_rate": 0.00015215281684486425, "loss": 1.7402, "step": 14100 }, { "epoch": 0.688525390625, "grad_norm": 0.20570816099643707, "learning_rate": 0.00015212347238658478, "loss": 1.7515, "step": 14101 }, { "epoch": 0.68857421875, "grad_norm": 0.2376866489648819, "learning_rate": 0.00015209413090631663, "loss": 1.7787, "step": 14102 }, { "epoch": 0.688623046875, "grad_norm": 0.2151578813791275, "learning_rate": 0.000152064792404771, "loss": 1.7723, "step": 14103 }, { "epoch": 0.688671875, "grad_norm": 0.23604530096054077, "learning_rate": 0.00015203545688265887, "loss": 1.7383, "step": 14104 }, { "epoch": 0.688720703125, "grad_norm": 0.21707525849342346, "learning_rate": 0.00015200612434069133, "loss": 1.7223, "step": 14105 }, { "epoch": 0.68876953125, "grad_norm": 0.22555148601531982, "learning_rate": 0.0001519767947795791, "loss": 1.7461, "step": 14106 }, { "epoch": 0.688818359375, "grad_norm": 0.22996310889720917, "learning_rate": 0.000151947468200033, "loss": 1.759, "step": 14107 }, { "epoch": 0.6888671875, "grad_norm": 0.21447165310382843, "learning_rate": 0.000151918144602764, "loss": 1.7453, "step": 14108 }, { "epoch": 0.688916015625, "grad_norm": 0.19494205713272095, "learning_rate": 0.00015188882398848257, "loss": 1.7891, "step": 14109 }, { "epoch": 0.68896484375, "grad_norm": 0.22644665837287903, "learning_rate": 0.00015185950635789941, "loss": 1.7508, "step": 14110 }, { "epoch": 0.689013671875, "grad_norm": 0.19440753757953644, "learning_rate": 0.00015183019171172504, "loss": 1.7679, "step": 14111 }, { "epoch": 0.6890625, "grad_norm": 0.21704912185668945, "learning_rate": 0.00015180088005066996, "loss": 1.754, "step": 14112 }, { "epoch": 0.689111328125, "grad_norm": 0.1785733848810196, "learning_rate": 0.00015177157137544447, "loss": 1.7575, "step": 14113 }, { "epoch": 0.68916015625, "grad_norm": 0.24344514310359955, "learning_rate": 0.00015174226568675898, "loss": 1.7386, "step": 14114 }, { "epoch": 0.689208984375, "grad_norm": 0.19214655458927155, "learning_rate": 0.00015171296298532373, "loss": 1.7412, "step": 14115 }, { "epoch": 0.6892578125, "grad_norm": 0.25808578729629517, "learning_rate": 0.00015168366327184885, "loss": 1.7398, "step": 14116 }, { "epoch": 0.689306640625, "grad_norm": 0.22117377817630768, "learning_rate": 0.0001516543665470444, "loss": 1.7457, "step": 14117 }, { "epoch": 0.68935546875, "grad_norm": 0.28508254885673523, "learning_rate": 0.00015162507281162057, "loss": 1.7761, "step": 14118 }, { "epoch": 0.689404296875, "grad_norm": 0.22445711493492126, "learning_rate": 0.00015159578206628717, "loss": 1.7556, "step": 14119 }, { "epoch": 0.689453125, "grad_norm": 0.21331235766410828, "learning_rate": 0.00015156649431175413, "loss": 1.7761, "step": 14120 }, { "epoch": 0.689501953125, "grad_norm": 0.25302115082740784, "learning_rate": 0.0001515372095487313, "loss": 1.7431, "step": 14121 }, { "epoch": 0.68955078125, "grad_norm": 0.22870850563049316, "learning_rate": 0.00015150792777792843, "loss": 1.7782, "step": 14122 }, { "epoch": 0.689599609375, "grad_norm": 0.2600022256374359, "learning_rate": 0.00015147864900005498, "loss": 1.7596, "step": 14123 }, { "epoch": 0.6896484375, "grad_norm": 0.2719847857952118, "learning_rate": 0.0001514493732158209, "loss": 1.752, "step": 14124 }, { "epoch": 0.689697265625, "grad_norm": 0.23991532623767853, "learning_rate": 0.0001514201004259354, "loss": 1.7847, "step": 14125 }, { "epoch": 0.68974609375, "grad_norm": 0.26015451550483704, "learning_rate": 0.00015139083063110808, "loss": 1.7474, "step": 14126 }, { "epoch": 0.689794921875, "grad_norm": 0.255975604057312, "learning_rate": 0.00015136156383204827, "loss": 1.7602, "step": 14127 }, { "epoch": 0.68984375, "grad_norm": 0.25598645210266113, "learning_rate": 0.0001513323000294653, "loss": 1.7566, "step": 14128 }, { "epoch": 0.689892578125, "grad_norm": 0.2351517379283905, "learning_rate": 0.0001513030392240684, "loss": 1.7372, "step": 14129 }, { "epoch": 0.68994140625, "grad_norm": 0.266189306974411, "learning_rate": 0.0001512737814165667, "loss": 1.7532, "step": 14130 }, { "epoch": 0.689990234375, "grad_norm": 0.18566927313804626, "learning_rate": 0.0001512445266076693, "loss": 1.744, "step": 14131 }, { "epoch": 0.6900390625, "grad_norm": 0.25462964177131653, "learning_rate": 0.00015121527479808528, "loss": 1.7577, "step": 14132 }, { "epoch": 0.690087890625, "grad_norm": 0.20767365396022797, "learning_rate": 0.00015118602598852347, "loss": 1.7456, "step": 14133 }, { "epoch": 0.69013671875, "grad_norm": 0.26491057872772217, "learning_rate": 0.00015115678017969282, "loss": 1.7563, "step": 14134 }, { "epoch": 0.690185546875, "grad_norm": 0.25856611132621765, "learning_rate": 0.00015112753737230207, "loss": 1.7499, "step": 14135 }, { "epoch": 0.690234375, "grad_norm": 0.24240145087242126, "learning_rate": 0.00015109829756706, "loss": 1.764, "step": 14136 }, { "epoch": 0.690283203125, "grad_norm": 0.199027881026268, "learning_rate": 0.0001510690607646752, "loss": 1.7496, "step": 14137 }, { "epoch": 0.69033203125, "grad_norm": 0.2503545582294464, "learning_rate": 0.0001510398269658564, "loss": 1.7548, "step": 14138 }, { "epoch": 0.690380859375, "grad_norm": 0.209695965051651, "learning_rate": 0.00015101059617131176, "loss": 1.762, "step": 14139 }, { "epoch": 0.6904296875, "grad_norm": 0.23432046175003052, "learning_rate": 0.00015098136838175014, "loss": 1.7458, "step": 14140 }, { "epoch": 0.690478515625, "grad_norm": 0.24261851608753204, "learning_rate": 0.0001509521435978795, "loss": 1.7882, "step": 14141 }, { "epoch": 0.69052734375, "grad_norm": 0.20580096542835236, "learning_rate": 0.0001509229218204085, "loss": 1.7553, "step": 14142 }, { "epoch": 0.690576171875, "grad_norm": 0.24966001510620117, "learning_rate": 0.00015089370305004503, "loss": 1.7718, "step": 14143 }, { "epoch": 0.690625, "grad_norm": 0.23426012694835663, "learning_rate": 0.0001508644872874975, "loss": 1.7511, "step": 14144 }, { "epoch": 0.690673828125, "grad_norm": 0.220937579870224, "learning_rate": 0.0001508352745334738, "loss": 1.7387, "step": 14145 }, { "epoch": 0.69072265625, "grad_norm": 0.2125309854745865, "learning_rate": 0.00015080606478868193, "loss": 1.7468, "step": 14146 }, { "epoch": 0.690771484375, "grad_norm": 0.19949980080127716, "learning_rate": 0.0001507768580538299, "loss": 1.7386, "step": 14147 }, { "epoch": 0.6908203125, "grad_norm": 0.25435203313827515, "learning_rate": 0.0001507476543296255, "loss": 1.7634, "step": 14148 }, { "epoch": 0.690869140625, "grad_norm": 0.2101428061723709, "learning_rate": 0.0001507184536167765, "loss": 1.776, "step": 14149 }, { "epoch": 0.69091796875, "grad_norm": 0.23774448037147522, "learning_rate": 0.00015068925591599065, "loss": 1.7437, "step": 14150 }, { "epoch": 0.690966796875, "grad_norm": 0.22174130380153656, "learning_rate": 0.00015066006122797554, "loss": 1.7385, "step": 14151 }, { "epoch": 0.691015625, "grad_norm": 0.23236976563930511, "learning_rate": 0.00015063086955343873, "loss": 1.7558, "step": 14152 }, { "epoch": 0.691064453125, "grad_norm": 0.21686741709709167, "learning_rate": 0.00015060168089308775, "loss": 1.7488, "step": 14153 }, { "epoch": 0.69111328125, "grad_norm": 0.20585575699806213, "learning_rate": 0.00015057249524762996, "loss": 1.7312, "step": 14154 }, { "epoch": 0.691162109375, "grad_norm": 0.2480933666229248, "learning_rate": 0.0001505433126177727, "loss": 1.7729, "step": 14155 }, { "epoch": 0.6912109375, "grad_norm": 0.21437981724739075, "learning_rate": 0.00015051413300422333, "loss": 1.7441, "step": 14156 }, { "epoch": 0.691259765625, "grad_norm": 0.21319109201431274, "learning_rate": 0.00015048495640768877, "loss": 1.7714, "step": 14157 }, { "epoch": 0.69130859375, "grad_norm": 0.2231198251247406, "learning_rate": 0.00015045578282887656, "loss": 1.7602, "step": 14158 }, { "epoch": 0.691357421875, "grad_norm": 0.21616455912590027, "learning_rate": 0.00015042661226849328, "loss": 1.7721, "step": 14159 }, { "epoch": 0.69140625, "grad_norm": 0.25936004519462585, "learning_rate": 0.00015039744472724635, "loss": 1.7404, "step": 14160 }, { "epoch": 0.691455078125, "grad_norm": 0.2464645951986313, "learning_rate": 0.00015036828020584226, "loss": 1.7717, "step": 14161 }, { "epoch": 0.69150390625, "grad_norm": 0.21005785465240479, "learning_rate": 0.00015033911870498818, "loss": 1.7524, "step": 14162 }, { "epoch": 0.691552734375, "grad_norm": 0.21717950701713562, "learning_rate": 0.00015030996022539055, "loss": 1.7668, "step": 14163 }, { "epoch": 0.6916015625, "grad_norm": 0.21545511484146118, "learning_rate": 0.00015028080476775637, "loss": 1.7708, "step": 14164 }, { "epoch": 0.691650390625, "grad_norm": 0.20552216470241547, "learning_rate": 0.000150251652332792, "loss": 1.7483, "step": 14165 }, { "epoch": 0.69169921875, "grad_norm": 0.22285521030426025, "learning_rate": 0.00015022250292120407, "loss": 1.7658, "step": 14166 }, { "epoch": 0.691748046875, "grad_norm": 0.18902508914470673, "learning_rate": 0.00015019335653369904, "loss": 1.7452, "step": 14167 }, { "epoch": 0.691796875, "grad_norm": 0.23066677153110504, "learning_rate": 0.00015016421317098327, "loss": 1.7725, "step": 14168 }, { "epoch": 0.691845703125, "grad_norm": 0.20124690234661102, "learning_rate": 0.0001501350728337631, "loss": 1.7213, "step": 14169 }, { "epoch": 0.69189453125, "grad_norm": 0.21313364803791046, "learning_rate": 0.0001501059355227447, "loss": 1.7437, "step": 14170 }, { "epoch": 0.691943359375, "grad_norm": 0.22083033621311188, "learning_rate": 0.00015007680123863435, "loss": 1.7633, "step": 14171 }, { "epoch": 0.6919921875, "grad_norm": 0.21177317202091217, "learning_rate": 0.00015004766998213815, "loss": 1.7436, "step": 14172 }, { "epoch": 0.692041015625, "grad_norm": 0.2033698558807373, "learning_rate": 0.0001500185417539619, "loss": 1.7429, "step": 14173 }, { "epoch": 0.69208984375, "grad_norm": 0.22135598957538605, "learning_rate": 0.00014998941655481183, "loss": 1.7557, "step": 14174 }, { "epoch": 0.692138671875, "grad_norm": 0.21051910519599915, "learning_rate": 0.00014996029438539354, "loss": 1.7495, "step": 14175 }, { "epoch": 0.6921875, "grad_norm": 0.21290023624897003, "learning_rate": 0.00014993117524641315, "loss": 1.7611, "step": 14176 }, { "epoch": 0.692236328125, "grad_norm": 0.20576170086860657, "learning_rate": 0.000149902059138576, "loss": 1.7451, "step": 14177 }, { "epoch": 0.69228515625, "grad_norm": 0.20885494351387024, "learning_rate": 0.00014987294606258816, "loss": 1.766, "step": 14178 }, { "epoch": 0.692333984375, "grad_norm": 0.2141111046075821, "learning_rate": 0.0001498438360191548, "loss": 1.7798, "step": 14179 }, { "epoch": 0.6923828125, "grad_norm": 0.21863795816898346, "learning_rate": 0.0001498147290089818, "loss": 1.737, "step": 14180 }, { "epoch": 0.692431640625, "grad_norm": 0.2302342653274536, "learning_rate": 0.0001497856250327743, "loss": 1.7651, "step": 14181 }, { "epoch": 0.69248046875, "grad_norm": 0.25745657086372375, "learning_rate": 0.00014975652409123785, "loss": 1.7285, "step": 14182 }, { "epoch": 0.692529296875, "grad_norm": 0.22088095545768738, "learning_rate": 0.00014972742618507762, "loss": 1.7453, "step": 14183 }, { "epoch": 0.692578125, "grad_norm": 0.2510890066623688, "learning_rate": 0.00014969833131499885, "loss": 1.7423, "step": 14184 }, { "epoch": 0.692626953125, "grad_norm": 0.21414770185947418, "learning_rate": 0.0001496692394817067, "loss": 1.7668, "step": 14185 }, { "epoch": 0.69267578125, "grad_norm": 0.23286527395248413, "learning_rate": 0.00014964015068590623, "loss": 1.7469, "step": 14186 }, { "epoch": 0.692724609375, "grad_norm": 0.23108191788196564, "learning_rate": 0.0001496110649283024, "loss": 1.742, "step": 14187 }, { "epoch": 0.6927734375, "grad_norm": 0.24149499833583832, "learning_rate": 0.00014958198220960011, "loss": 1.7542, "step": 14188 }, { "epoch": 0.692822265625, "grad_norm": 0.21740218997001648, "learning_rate": 0.00014955290253050433, "loss": 1.7453, "step": 14189 }, { "epoch": 0.69287109375, "grad_norm": 0.2631298899650574, "learning_rate": 0.00014952382589171974, "loss": 1.7433, "step": 14190 }, { "epoch": 0.692919921875, "grad_norm": 0.1896974742412567, "learning_rate": 0.00014949475229395092, "loss": 1.7623, "step": 14191 }, { "epoch": 0.69296875, "grad_norm": 0.22891081869602203, "learning_rate": 0.00014946568173790275, "loss": 1.747, "step": 14192 }, { "epoch": 0.693017578125, "grad_norm": 0.22913695871829987, "learning_rate": 0.0001494366142242795, "loss": 1.7463, "step": 14193 }, { "epoch": 0.69306640625, "grad_norm": 0.2341773509979248, "learning_rate": 0.00014940754975378595, "loss": 1.7529, "step": 14194 }, { "epoch": 0.693115234375, "grad_norm": 0.21939891576766968, "learning_rate": 0.00014937848832712614, "loss": 1.7501, "step": 14195 }, { "epoch": 0.6931640625, "grad_norm": 0.20176124572753906, "learning_rate": 0.00014934942994500477, "loss": 1.7442, "step": 14196 }, { "epoch": 0.693212890625, "grad_norm": 0.20400793850421906, "learning_rate": 0.0001493203746081258, "loss": 1.7179, "step": 14197 }, { "epoch": 0.69326171875, "grad_norm": 0.2124052345752716, "learning_rate": 0.00014929132231719357, "loss": 1.7614, "step": 14198 }, { "epoch": 0.693310546875, "grad_norm": 0.20838764309883118, "learning_rate": 0.00014926227307291207, "loss": 1.7622, "step": 14199 }, { "epoch": 0.693359375, "grad_norm": 0.20728753507137299, "learning_rate": 0.00014923322687598545, "loss": 1.7598, "step": 14200 }, { "epoch": 0.693408203125, "grad_norm": 0.21391011774539948, "learning_rate": 0.0001492041837271176, "loss": 1.7624, "step": 14201 }, { "epoch": 0.69345703125, "grad_norm": 0.21282455325126648, "learning_rate": 0.0001491751436270124, "loss": 1.7285, "step": 14202 }, { "epoch": 0.693505859375, "grad_norm": 0.20741815865039825, "learning_rate": 0.00014914610657637371, "loss": 1.7296, "step": 14203 }, { "epoch": 0.6935546875, "grad_norm": 0.2280244529247284, "learning_rate": 0.00014911707257590524, "loss": 1.7445, "step": 14204 }, { "epoch": 0.693603515625, "grad_norm": 0.18623366951942444, "learning_rate": 0.00014908804162631065, "loss": 1.7533, "step": 14205 }, { "epoch": 0.69365234375, "grad_norm": 0.2637949585914612, "learning_rate": 0.0001490590137282936, "loss": 1.7447, "step": 14206 }, { "epoch": 0.693701171875, "grad_norm": 0.1981491595506668, "learning_rate": 0.00014902998888255737, "loss": 1.7568, "step": 14207 }, { "epoch": 0.69375, "grad_norm": 0.22942988574504852, "learning_rate": 0.00014900096708980571, "loss": 1.7829, "step": 14208 }, { "epoch": 0.693798828125, "grad_norm": 0.22677768766880035, "learning_rate": 0.00014897194835074168, "loss": 1.7586, "step": 14209 }, { "epoch": 0.69384765625, "grad_norm": 0.20550523698329926, "learning_rate": 0.00014894293266606889, "loss": 1.7648, "step": 14210 }, { "epoch": 0.693896484375, "grad_norm": 0.27179571986198425, "learning_rate": 0.00014891392003649027, "loss": 1.7399, "step": 14211 }, { "epoch": 0.6939453125, "grad_norm": 0.20014150440692902, "learning_rate": 0.00014888491046270925, "loss": 1.753, "step": 14212 }, { "epoch": 0.693994140625, "grad_norm": 0.2511496841907501, "learning_rate": 0.00014885590394542856, "loss": 1.7373, "step": 14213 }, { "epoch": 0.69404296875, "grad_norm": 0.2580462694168091, "learning_rate": 0.00014882690048535158, "loss": 1.712, "step": 14214 }, { "epoch": 0.694091796875, "grad_norm": 0.18966035544872284, "learning_rate": 0.00014879790008318089, "loss": 1.7512, "step": 14215 }, { "epoch": 0.694140625, "grad_norm": 0.2717084586620331, "learning_rate": 0.00014876890273961952, "loss": 1.7553, "step": 14216 }, { "epoch": 0.694189453125, "grad_norm": 0.20751477777957916, "learning_rate": 0.00014873990845537023, "loss": 1.7506, "step": 14217 }, { "epoch": 0.69423828125, "grad_norm": 0.2685500979423523, "learning_rate": 0.00014871091723113567, "loss": 1.7426, "step": 14218 }, { "epoch": 0.694287109375, "grad_norm": 0.24116134643554688, "learning_rate": 0.00014868192906761845, "loss": 1.7458, "step": 14219 }, { "epoch": 0.6943359375, "grad_norm": 0.2261624038219452, "learning_rate": 0.00014865294396552127, "loss": 1.7652, "step": 14220 }, { "epoch": 0.694384765625, "grad_norm": 0.2714863419532776, "learning_rate": 0.0001486239619255464, "loss": 1.7741, "step": 14221 }, { "epoch": 0.69443359375, "grad_norm": 0.24258141219615936, "learning_rate": 0.00014859498294839636, "loss": 1.7753, "step": 14222 }, { "epoch": 0.694482421875, "grad_norm": 0.2194356471300125, "learning_rate": 0.00014856600703477347, "loss": 1.7493, "step": 14223 }, { "epoch": 0.69453125, "grad_norm": 0.27195626497268677, "learning_rate": 0.00014853703418538005, "loss": 1.7769, "step": 14224 }, { "epoch": 0.694580078125, "grad_norm": 0.18013280630111694, "learning_rate": 0.00014850806440091808, "loss": 1.7338, "step": 14225 }, { "epoch": 0.69462890625, "grad_norm": 0.28398579359054565, "learning_rate": 0.00014847909768208993, "loss": 1.7192, "step": 14226 }, { "epoch": 0.694677734375, "grad_norm": 0.19561892747879028, "learning_rate": 0.00014845013402959732, "loss": 1.7587, "step": 14227 }, { "epoch": 0.6947265625, "grad_norm": 0.2857668995857239, "learning_rate": 0.00014842117344414255, "loss": 1.7692, "step": 14228 }, { "epoch": 0.694775390625, "grad_norm": 0.21199816465377808, "learning_rate": 0.00014839221592642714, "loss": 1.7354, "step": 14229 }, { "epoch": 0.69482421875, "grad_norm": 0.2444876730442047, "learning_rate": 0.00014836326147715334, "loss": 1.7417, "step": 14230 }, { "epoch": 0.694873046875, "grad_norm": 0.21781842410564423, "learning_rate": 0.00014833431009702246, "loss": 1.7369, "step": 14231 }, { "epoch": 0.694921875, "grad_norm": 0.2396879941225052, "learning_rate": 0.00014830536178673642, "loss": 1.7612, "step": 14232 }, { "epoch": 0.694970703125, "grad_norm": 0.2058224231004715, "learning_rate": 0.00014827641654699666, "loss": 1.7371, "step": 14233 }, { "epoch": 0.69501953125, "grad_norm": 0.2279793918132782, "learning_rate": 0.00014824747437850477, "loss": 1.7624, "step": 14234 }, { "epoch": 0.695068359375, "grad_norm": 0.24712689220905304, "learning_rate": 0.0001482185352819622, "loss": 1.7406, "step": 14235 }, { "epoch": 0.6951171875, "grad_norm": 0.19448520243167877, "learning_rate": 0.00014818959925807024, "loss": 1.7723, "step": 14236 }, { "epoch": 0.695166015625, "grad_norm": 0.2244778573513031, "learning_rate": 0.00014816066630753027, "loss": 1.7428, "step": 14237 }, { "epoch": 0.69521484375, "grad_norm": 0.19995588064193726, "learning_rate": 0.00014813173643104345, "loss": 1.7225, "step": 14238 }, { "epoch": 0.695263671875, "grad_norm": 0.2262740135192871, "learning_rate": 0.00014810280962931086, "loss": 1.7507, "step": 14239 }, { "epoch": 0.6953125, "grad_norm": 0.22772632539272308, "learning_rate": 0.00014807388590303374, "loss": 1.7437, "step": 14240 }, { "epoch": 0.695361328125, "grad_norm": 0.22334162890911102, "learning_rate": 0.00014804496525291282, "loss": 1.7346, "step": 14241 }, { "epoch": 0.69541015625, "grad_norm": 0.23779384791851044, "learning_rate": 0.0001480160476796493, "loss": 1.7405, "step": 14242 }, { "epoch": 0.695458984375, "grad_norm": 0.20824742317199707, "learning_rate": 0.00014798713318394375, "loss": 1.7462, "step": 14243 }, { "epoch": 0.6955078125, "grad_norm": 0.23807479441165924, "learning_rate": 0.0001479582217664972, "loss": 1.7649, "step": 14244 }, { "epoch": 0.695556640625, "grad_norm": 0.20181316137313843, "learning_rate": 0.0001479293134280101, "loss": 1.7401, "step": 14245 }, { "epoch": 0.69560546875, "grad_norm": 0.2398545891046524, "learning_rate": 0.00014790040816918326, "loss": 1.7362, "step": 14246 }, { "epoch": 0.695654296875, "grad_norm": 0.19243277609348297, "learning_rate": 0.00014787150599071704, "loss": 1.7666, "step": 14247 }, { "epoch": 0.695703125, "grad_norm": 0.23952780663967133, "learning_rate": 0.00014784260689331213, "loss": 1.7706, "step": 14248 }, { "epoch": 0.695751953125, "grad_norm": 0.20764172077178955, "learning_rate": 0.00014781371087766877, "loss": 1.7587, "step": 14249 }, { "epoch": 0.69580078125, "grad_norm": 0.21110229194164276, "learning_rate": 0.0001477848179444872, "loss": 1.7465, "step": 14250 }, { "epoch": 0.695849609375, "grad_norm": 0.1924627274274826, "learning_rate": 0.00014775592809446788, "loss": 1.7534, "step": 14251 }, { "epoch": 0.6958984375, "grad_norm": 0.22568540275096893, "learning_rate": 0.00014772704132831084, "loss": 1.7617, "step": 14252 }, { "epoch": 0.695947265625, "grad_norm": 0.19851137697696686, "learning_rate": 0.0001476981576467162, "loss": 1.747, "step": 14253 }, { "epoch": 0.69599609375, "grad_norm": 0.25151708722114563, "learning_rate": 0.00014766927705038398, "loss": 1.745, "step": 14254 }, { "epoch": 0.696044921875, "grad_norm": 0.20752646028995514, "learning_rate": 0.0001476403995400141, "loss": 1.7546, "step": 14255 }, { "epoch": 0.69609375, "grad_norm": 0.22815251350402832, "learning_rate": 0.00014761152511630658, "loss": 1.7535, "step": 14256 }, { "epoch": 0.696142578125, "grad_norm": 0.22756609320640564, "learning_rate": 0.00014758265377996088, "loss": 1.7516, "step": 14257 }, { "epoch": 0.69619140625, "grad_norm": 0.22612856328487396, "learning_rate": 0.00014755378553167714, "loss": 1.764, "step": 14258 }, { "epoch": 0.696240234375, "grad_norm": 0.2631654441356659, "learning_rate": 0.00014752492037215463, "loss": 1.7735, "step": 14259 }, { "epoch": 0.6962890625, "grad_norm": 0.21258698403835297, "learning_rate": 0.00014749605830209324, "loss": 1.7722, "step": 14260 }, { "epoch": 0.696337890625, "grad_norm": 0.2847956120967865, "learning_rate": 0.00014746719932219216, "loss": 1.7649, "step": 14261 }, { "epoch": 0.69638671875, "grad_norm": 0.20157547295093536, "learning_rate": 0.0001474383434331511, "loss": 1.765, "step": 14262 }, { "epoch": 0.696435546875, "grad_norm": 0.2835155427455902, "learning_rate": 0.00014740949063566915, "loss": 1.7452, "step": 14263 }, { "epoch": 0.696484375, "grad_norm": 0.20923221111297607, "learning_rate": 0.0001473806409304458, "loss": 1.7368, "step": 14264 }, { "epoch": 0.696533203125, "grad_norm": 0.25419679284095764, "learning_rate": 0.00014735179431818012, "loss": 1.7509, "step": 14265 }, { "epoch": 0.69658203125, "grad_norm": 0.24349938333034515, "learning_rate": 0.00014732295079957123, "loss": 1.7838, "step": 14266 }, { "epoch": 0.696630859375, "grad_norm": 0.3136366903781891, "learning_rate": 0.0001472941103753182, "loss": 1.7741, "step": 14267 }, { "epoch": 0.6966796875, "grad_norm": 0.2083485871553421, "learning_rate": 0.00014726527304612002, "loss": 1.743, "step": 14268 }, { "epoch": 0.696728515625, "grad_norm": 0.3275829553604126, "learning_rate": 0.00014723643881267557, "loss": 1.7548, "step": 14269 }, { "epoch": 0.69677734375, "grad_norm": 0.23902088403701782, "learning_rate": 0.00014720760767568365, "loss": 1.7323, "step": 14270 }, { "epoch": 0.696826171875, "grad_norm": 0.2887932360172272, "learning_rate": 0.00014717877963584303, "loss": 1.7819, "step": 14271 }, { "epoch": 0.696875, "grad_norm": 0.2593466639518738, "learning_rate": 0.00014714995469385233, "loss": 1.7724, "step": 14272 }, { "epoch": 0.696923828125, "grad_norm": 0.2146255522966385, "learning_rate": 0.00014712113285041028, "loss": 1.7649, "step": 14273 }, { "epoch": 0.69697265625, "grad_norm": 0.2760764956474304, "learning_rate": 0.00014709231410621536, "loss": 1.7308, "step": 14274 }, { "epoch": 0.697021484375, "grad_norm": 0.21577630937099457, "learning_rate": 0.0001470634984619658, "loss": 1.7205, "step": 14275 }, { "epoch": 0.6970703125, "grad_norm": 0.23405827581882477, "learning_rate": 0.00014703468591836027, "loss": 1.7283, "step": 14276 }, { "epoch": 0.697119140625, "grad_norm": 0.22149018943309784, "learning_rate": 0.0001470058764760968, "loss": 1.7657, "step": 14277 }, { "epoch": 0.69716796875, "grad_norm": 0.2069971263408661, "learning_rate": 0.0001469770701358739, "loss": 1.7441, "step": 14278 }, { "epoch": 0.697216796875, "grad_norm": 0.266215443611145, "learning_rate": 0.00014694826689838942, "loss": 1.7324, "step": 14279 }, { "epoch": 0.697265625, "grad_norm": 0.18488462269306183, "learning_rate": 0.00014691946676434172, "loss": 1.7508, "step": 14280 }, { "epoch": 0.697314453125, "grad_norm": 0.2294013947248459, "learning_rate": 0.0001468906697344285, "loss": 1.751, "step": 14281 }, { "epoch": 0.69736328125, "grad_norm": 0.2118714302778244, "learning_rate": 0.0001468618758093479, "loss": 1.728, "step": 14282 }, { "epoch": 0.697412109375, "grad_norm": 0.21095244586467743, "learning_rate": 0.0001468330849897977, "loss": 1.764, "step": 14283 }, { "epoch": 0.6974609375, "grad_norm": 0.22704681754112244, "learning_rate": 0.0001468042972764756, "loss": 1.7432, "step": 14284 }, { "epoch": 0.697509765625, "grad_norm": 0.2209356129169464, "learning_rate": 0.00014677551267007935, "loss": 1.756, "step": 14285 }, { "epoch": 0.69755859375, "grad_norm": 0.25524502992630005, "learning_rate": 0.00014674673117130659, "loss": 1.7346, "step": 14286 }, { "epoch": 0.697607421875, "grad_norm": 0.21861876547336578, "learning_rate": 0.00014671795278085482, "loss": 1.7669, "step": 14287 }, { "epoch": 0.69765625, "grad_norm": 0.20940445363521576, "learning_rate": 0.00014668917749942152, "loss": 1.7479, "step": 14288 }, { "epoch": 0.697705078125, "grad_norm": 0.24203114211559296, "learning_rate": 0.00014666040532770408, "loss": 1.7566, "step": 14289 }, { "epoch": 0.69775390625, "grad_norm": 0.18907371163368225, "learning_rate": 0.0001466316362663999, "loss": 1.7475, "step": 14290 }, { "epoch": 0.697802734375, "grad_norm": 0.21205751597881317, "learning_rate": 0.00014660287031620603, "loss": 1.7849, "step": 14291 }, { "epoch": 0.6978515625, "grad_norm": 0.19437773525714874, "learning_rate": 0.00014657410747781987, "loss": 1.7413, "step": 14292 }, { "epoch": 0.697900390625, "grad_norm": 0.2042141854763031, "learning_rate": 0.00014654534775193824, "loss": 1.7455, "step": 14293 }, { "epoch": 0.69794921875, "grad_norm": 0.1998637318611145, "learning_rate": 0.00014651659113925847, "loss": 1.7573, "step": 14294 }, { "epoch": 0.697998046875, "grad_norm": 0.20551463961601257, "learning_rate": 0.00014648783764047719, "loss": 1.7434, "step": 14295 }, { "epoch": 0.698046875, "grad_norm": 0.20266187191009521, "learning_rate": 0.00014645908725629154, "loss": 1.7426, "step": 14296 }, { "epoch": 0.698095703125, "grad_norm": 0.20850858092308044, "learning_rate": 0.00014643033998739806, "loss": 1.7501, "step": 14297 }, { "epoch": 0.69814453125, "grad_norm": 0.21729005873203278, "learning_rate": 0.00014640159583449372, "loss": 1.7516, "step": 14298 }, { "epoch": 0.698193359375, "grad_norm": 0.19129429757595062, "learning_rate": 0.00014637285479827494, "loss": 1.7676, "step": 14299 }, { "epoch": 0.6982421875, "grad_norm": 0.20008401572704315, "learning_rate": 0.00014634411687943836, "loss": 1.7107, "step": 14300 }, { "epoch": 0.698291015625, "grad_norm": 0.23362602293491364, "learning_rate": 0.00014631538207868046, "loss": 1.751, "step": 14301 }, { "epoch": 0.69833984375, "grad_norm": 0.1902385801076889, "learning_rate": 0.00014628665039669768, "loss": 1.7231, "step": 14302 }, { "epoch": 0.698388671875, "grad_norm": 0.26790180802345276, "learning_rate": 0.00014625792183418633, "loss": 1.7483, "step": 14303 }, { "epoch": 0.6984375, "grad_norm": 0.17362479865550995, "learning_rate": 0.00014622919639184267, "loss": 1.7488, "step": 14304 }, { "epoch": 0.698486328125, "grad_norm": 0.2375021129846573, "learning_rate": 0.00014620047407036288, "loss": 1.7252, "step": 14305 }, { "epoch": 0.69853515625, "grad_norm": 0.20109908282756805, "learning_rate": 0.0001461717548704431, "loss": 1.7597, "step": 14306 }, { "epoch": 0.698583984375, "grad_norm": 0.22088409960269928, "learning_rate": 0.0001461430387927793, "loss": 1.7289, "step": 14307 }, { "epoch": 0.6986328125, "grad_norm": 0.18239834904670715, "learning_rate": 0.0001461143258380676, "loss": 1.7485, "step": 14308 }, { "epoch": 0.698681640625, "grad_norm": 0.21350862085819244, "learning_rate": 0.00014608561600700358, "loss": 1.7515, "step": 14309 }, { "epoch": 0.69873046875, "grad_norm": 0.2097359597682953, "learning_rate": 0.00014605690930028336, "loss": 1.7572, "step": 14310 }, { "epoch": 0.698779296875, "grad_norm": 0.2022765576839447, "learning_rate": 0.00014602820571860242, "loss": 1.7796, "step": 14311 }, { "epoch": 0.698828125, "grad_norm": 0.2196948528289795, "learning_rate": 0.00014599950526265665, "loss": 1.7682, "step": 14312 }, { "epoch": 0.698876953125, "grad_norm": 0.23129305243492126, "learning_rate": 0.00014597080793314138, "loss": 1.7352, "step": 14313 }, { "epoch": 0.69892578125, "grad_norm": 0.19592627882957458, "learning_rate": 0.0001459421137307524, "loss": 1.745, "step": 14314 }, { "epoch": 0.698974609375, "grad_norm": 0.22631524503231049, "learning_rate": 0.0001459134226561849, "loss": 1.7331, "step": 14315 }, { "epoch": 0.6990234375, "grad_norm": 0.207552969455719, "learning_rate": 0.00014588473471013427, "loss": 1.7669, "step": 14316 }, { "epoch": 0.699072265625, "grad_norm": 0.2154960036277771, "learning_rate": 0.00014585604989329588, "loss": 1.7621, "step": 14317 }, { "epoch": 0.69912109375, "grad_norm": 0.18118369579315186, "learning_rate": 0.00014582736820636482, "loss": 1.7501, "step": 14318 }, { "epoch": 0.699169921875, "grad_norm": 0.20107993483543396, "learning_rate": 0.00014579868965003629, "loss": 1.7584, "step": 14319 }, { "epoch": 0.69921875, "grad_norm": 0.19587460160255432, "learning_rate": 0.0001457700142250053, "loss": 1.7467, "step": 14320 }, { "epoch": 0.699267578125, "grad_norm": 0.21054594218730927, "learning_rate": 0.00014574134193196686, "loss": 1.7591, "step": 14321 }, { "epoch": 0.69931640625, "grad_norm": 0.1853746920824051, "learning_rate": 0.00014571267277161586, "loss": 1.7148, "step": 14322 }, { "epoch": 0.699365234375, "grad_norm": 0.19138436019420624, "learning_rate": 0.0001456840067446471, "loss": 1.7721, "step": 14323 }, { "epoch": 0.6994140625, "grad_norm": 0.1794327199459076, "learning_rate": 0.00014565534385175527, "loss": 1.7509, "step": 14324 }, { "epoch": 0.699462890625, "grad_norm": 0.2186182737350464, "learning_rate": 0.00014562668409363516, "loss": 1.7601, "step": 14325 }, { "epoch": 0.69951171875, "grad_norm": 0.23902766406536102, "learning_rate": 0.00014559802747098137, "loss": 1.7658, "step": 14326 }, { "epoch": 0.699560546875, "grad_norm": 0.2384859025478363, "learning_rate": 0.00014556937398448817, "loss": 1.7478, "step": 14327 }, { "epoch": 0.699609375, "grad_norm": 0.20083796977996826, "learning_rate": 0.00014554072363485033, "loss": 1.7519, "step": 14328 }, { "epoch": 0.699658203125, "grad_norm": 0.2740035653114319, "learning_rate": 0.0001455120764227619, "loss": 1.7505, "step": 14329 }, { "epoch": 0.69970703125, "grad_norm": 0.30900681018829346, "learning_rate": 0.00014548343234891748, "loss": 1.763, "step": 14330 }, { "epoch": 0.699755859375, "grad_norm": 0.27142325043678284, "learning_rate": 0.000145454791414011, "loss": 1.7631, "step": 14331 }, { "epoch": 0.6998046875, "grad_norm": 0.26917704939842224, "learning_rate": 0.00014542615361873686, "loss": 1.7274, "step": 14332 }, { "epoch": 0.699853515625, "grad_norm": 0.29352521896362305, "learning_rate": 0.00014539751896378893, "loss": 1.7584, "step": 14333 }, { "epoch": 0.69990234375, "grad_norm": 0.2109507918357849, "learning_rate": 0.0001453688874498612, "loss": 1.7599, "step": 14334 }, { "epoch": 0.699951171875, "grad_norm": 0.2677037715911865, "learning_rate": 0.00014534025907764764, "loss": 1.7754, "step": 14335 }, { "epoch": 0.7, "grad_norm": 0.21596291661262512, "learning_rate": 0.00014531163384784212, "loss": 1.7723, "step": 14336 }, { "epoch": 0.700048828125, "grad_norm": 0.23018383979797363, "learning_rate": 0.0001452830117611383, "loss": 1.7204, "step": 14337 }, { "epoch": 0.70009765625, "grad_norm": 0.21102699637413025, "learning_rate": 0.00014525439281822995, "loss": 1.7641, "step": 14338 }, { "epoch": 0.700146484375, "grad_norm": 0.23221156001091003, "learning_rate": 0.00014522577701981055, "loss": 1.7446, "step": 14339 }, { "epoch": 0.7001953125, "grad_norm": 0.22705791890621185, "learning_rate": 0.00014519716436657378, "loss": 1.7638, "step": 14340 }, { "epoch": 0.700244140625, "grad_norm": 0.21106219291687012, "learning_rate": 0.00014516855485921298, "loss": 1.763, "step": 14341 }, { "epoch": 0.70029296875, "grad_norm": 0.26206356287002563, "learning_rate": 0.00014513994849842154, "loss": 1.7616, "step": 14342 }, { "epoch": 0.700341796875, "grad_norm": 0.21081219613552094, "learning_rate": 0.0001451113452848928, "loss": 1.7342, "step": 14343 }, { "epoch": 0.700390625, "grad_norm": 0.24661773443222046, "learning_rate": 0.00014508274521931997, "loss": 1.7712, "step": 14344 }, { "epoch": 0.700439453125, "grad_norm": 0.21516048908233643, "learning_rate": 0.00014505414830239614, "loss": 1.7652, "step": 14345 }, { "epoch": 0.70048828125, "grad_norm": 0.2604864537715912, "learning_rate": 0.00014502555453481457, "loss": 1.744, "step": 14346 }, { "epoch": 0.700537109375, "grad_norm": 0.2596631646156311, "learning_rate": 0.00014499696391726786, "loss": 1.7495, "step": 14347 }, { "epoch": 0.7005859375, "grad_norm": 0.22906938195228577, "learning_rate": 0.0001449683764504494, "loss": 1.7585, "step": 14348 }, { "epoch": 0.700634765625, "grad_norm": 0.2391805499792099, "learning_rate": 0.00014493979213505171, "loss": 1.7562, "step": 14349 }, { "epoch": 0.70068359375, "grad_norm": 0.19735531508922577, "learning_rate": 0.00014491121097176763, "loss": 1.7631, "step": 14350 }, { "epoch": 0.700732421875, "grad_norm": 0.21032559871673584, "learning_rate": 0.0001448826329612899, "loss": 1.7497, "step": 14351 }, { "epoch": 0.70078125, "grad_norm": 0.19153992831707, "learning_rate": 0.00014485405810431108, "loss": 1.7307, "step": 14352 }, { "epoch": 0.700830078125, "grad_norm": 0.21027062833309174, "learning_rate": 0.00014482548640152367, "loss": 1.737, "step": 14353 }, { "epoch": 0.70087890625, "grad_norm": 0.24607934057712555, "learning_rate": 0.0001447969178536202, "loss": 1.7508, "step": 14354 }, { "epoch": 0.700927734375, "grad_norm": 0.2043391913175583, "learning_rate": 0.00014476835246129307, "loss": 1.7727, "step": 14355 }, { "epoch": 0.7009765625, "grad_norm": 0.21710741519927979, "learning_rate": 0.00014473979022523454, "loss": 1.7665, "step": 14356 }, { "epoch": 0.701025390625, "grad_norm": 0.20971810817718506, "learning_rate": 0.00014471123114613682, "loss": 1.7558, "step": 14357 }, { "epoch": 0.70107421875, "grad_norm": 0.2652362287044525, "learning_rate": 0.00014468267522469209, "loss": 1.7861, "step": 14358 }, { "epoch": 0.701123046875, "grad_norm": 0.24280399084091187, "learning_rate": 0.00014465412246159244, "loss": 1.7237, "step": 14359 }, { "epoch": 0.701171875, "grad_norm": 0.2496628612279892, "learning_rate": 0.0001446255728575298, "loss": 1.7505, "step": 14360 }, { "epoch": 0.701220703125, "grad_norm": 0.2122601866722107, "learning_rate": 0.00014459702641319623, "loss": 1.7338, "step": 14361 }, { "epoch": 0.70126953125, "grad_norm": 0.29059046506881714, "learning_rate": 0.0001445684831292834, "loss": 1.7727, "step": 14362 }, { "epoch": 0.701318359375, "grad_norm": 0.24790586531162262, "learning_rate": 0.0001445399430064832, "loss": 1.7453, "step": 14363 }, { "epoch": 0.7013671875, "grad_norm": 0.24792467057704926, "learning_rate": 0.00014451140604548733, "loss": 1.7468, "step": 14364 }, { "epoch": 0.701416015625, "grad_norm": 0.22436179220676422, "learning_rate": 0.00014448287224698731, "loss": 1.7528, "step": 14365 }, { "epoch": 0.70146484375, "grad_norm": 0.240177720785141, "learning_rate": 0.00014445434161167487, "loss": 1.7266, "step": 14366 }, { "epoch": 0.701513671875, "grad_norm": 0.22316835820674896, "learning_rate": 0.00014442581414024123, "loss": 1.7593, "step": 14367 }, { "epoch": 0.7015625, "grad_norm": 0.2056206613779068, "learning_rate": 0.0001443972898333779, "loss": 1.7642, "step": 14368 }, { "epoch": 0.701611328125, "grad_norm": 0.22104933857917786, "learning_rate": 0.00014436876869177616, "loss": 1.7482, "step": 14369 }, { "epoch": 0.70166015625, "grad_norm": 0.19936540722846985, "learning_rate": 0.00014434025071612724, "loss": 1.7236, "step": 14370 }, { "epoch": 0.701708984375, "grad_norm": 0.2187422215938568, "learning_rate": 0.00014431173590712237, "loss": 1.7444, "step": 14371 }, { "epoch": 0.7017578125, "grad_norm": 0.19821485877037048, "learning_rate": 0.00014428322426545255, "loss": 1.7497, "step": 14372 }, { "epoch": 0.701806640625, "grad_norm": 0.22870288789272308, "learning_rate": 0.00014425471579180876, "loss": 1.7869, "step": 14373 }, { "epoch": 0.70185546875, "grad_norm": 0.2158551812171936, "learning_rate": 0.00014422621048688206, "loss": 1.773, "step": 14374 }, { "epoch": 0.701904296875, "grad_norm": 0.19998101890087128, "learning_rate": 0.00014419770835136315, "loss": 1.7396, "step": 14375 }, { "epoch": 0.701953125, "grad_norm": 0.21988476812839508, "learning_rate": 0.0001441692093859429, "loss": 1.7669, "step": 14376 }, { "epoch": 0.702001953125, "grad_norm": 0.18042829632759094, "learning_rate": 0.00014414071359131192, "loss": 1.7509, "step": 14377 }, { "epoch": 0.70205078125, "grad_norm": 0.19362910091876984, "learning_rate": 0.0001441122209681609, "loss": 1.7661, "step": 14378 }, { "epoch": 0.702099609375, "grad_norm": 0.19122625887393951, "learning_rate": 0.00014408373151718042, "loss": 1.7404, "step": 14379 }, { "epoch": 0.7021484375, "grad_norm": 0.23354335129261017, "learning_rate": 0.00014405524523906078, "loss": 1.7319, "step": 14380 }, { "epoch": 0.702197265625, "grad_norm": 0.18373508751392365, "learning_rate": 0.0001440267621344925, "loss": 1.7715, "step": 14381 }, { "epoch": 0.70224609375, "grad_norm": 0.2063829004764557, "learning_rate": 0.0001439982822041659, "loss": 1.741, "step": 14382 }, { "epoch": 0.702294921875, "grad_norm": 0.19427989423274994, "learning_rate": 0.00014396980544877124, "loss": 1.7614, "step": 14383 }, { "epoch": 0.70234375, "grad_norm": 0.20266743004322052, "learning_rate": 0.00014394133186899856, "loss": 1.7437, "step": 14384 }, { "epoch": 0.702392578125, "grad_norm": 0.21825553476810455, "learning_rate": 0.00014391286146553796, "loss": 1.7589, "step": 14385 }, { "epoch": 0.70244140625, "grad_norm": 0.2300042361021042, "learning_rate": 0.00014388439423907947, "loss": 1.7745, "step": 14386 }, { "epoch": 0.702490234375, "grad_norm": 0.16384126245975494, "learning_rate": 0.00014385593019031304, "loss": 1.7317, "step": 14387 }, { "epoch": 0.7025390625, "grad_norm": 0.2229653149843216, "learning_rate": 0.0001438274693199285, "loss": 1.7275, "step": 14388 }, { "epoch": 0.702587890625, "grad_norm": 0.20709313452243805, "learning_rate": 0.00014379901162861562, "loss": 1.7403, "step": 14389 }, { "epoch": 0.70263671875, "grad_norm": 0.2078065127134323, "learning_rate": 0.0001437705571170641, "loss": 1.7428, "step": 14390 }, { "epoch": 0.702685546875, "grad_norm": 0.22860021889209747, "learning_rate": 0.00014374210578596355, "loss": 1.7352, "step": 14391 }, { "epoch": 0.702734375, "grad_norm": 0.194871723651886, "learning_rate": 0.00014371365763600353, "loss": 1.7189, "step": 14392 }, { "epoch": 0.702783203125, "grad_norm": 0.2330675721168518, "learning_rate": 0.0001436852126678735, "loss": 1.7427, "step": 14393 }, { "epoch": 0.70283203125, "grad_norm": 0.21982234716415405, "learning_rate": 0.00014365677088226284, "loss": 1.7485, "step": 14394 }, { "epoch": 0.702880859375, "grad_norm": 0.2340109944343567, "learning_rate": 0.00014362833227986083, "loss": 1.7404, "step": 14395 }, { "epoch": 0.7029296875, "grad_norm": 0.20411302149295807, "learning_rate": 0.00014359989686135675, "loss": 1.7359, "step": 14396 }, { "epoch": 0.702978515625, "grad_norm": 0.22543835639953613, "learning_rate": 0.0001435714646274397, "loss": 1.7695, "step": 14397 }, { "epoch": 0.70302734375, "grad_norm": 0.21245843172073364, "learning_rate": 0.00014354303557879882, "loss": 1.767, "step": 14398 }, { "epoch": 0.703076171875, "grad_norm": 0.24872517585754395, "learning_rate": 0.00014351460971612313, "loss": 1.7555, "step": 14399 }, { "epoch": 0.703125, "grad_norm": 0.21075665950775146, "learning_rate": 0.00014348618704010147, "loss": 1.7549, "step": 14400 }, { "epoch": 0.703173828125, "grad_norm": 0.22992955148220062, "learning_rate": 0.00014345776755142275, "loss": 1.7614, "step": 14401 }, { "epoch": 0.70322265625, "grad_norm": 0.20605304837226868, "learning_rate": 0.0001434293512507756, "loss": 1.7668, "step": 14402 }, { "epoch": 0.703271484375, "grad_norm": 0.20100441575050354, "learning_rate": 0.00014340093813884904, "loss": 1.7611, "step": 14403 }, { "epoch": 0.7033203125, "grad_norm": 0.24470585584640503, "learning_rate": 0.00014337252821633128, "loss": 1.7584, "step": 14404 }, { "epoch": 0.703369140625, "grad_norm": 0.21286937594413757, "learning_rate": 0.00014334412148391112, "loss": 1.7545, "step": 14405 }, { "epoch": 0.70341796875, "grad_norm": 0.24111716449260712, "learning_rate": 0.00014331571794227692, "loss": 1.7537, "step": 14406 }, { "epoch": 0.703466796875, "grad_norm": 0.23932386934757233, "learning_rate": 0.0001432873175921171, "loss": 1.7433, "step": 14407 }, { "epoch": 0.703515625, "grad_norm": 0.20828713476657867, "learning_rate": 0.00014325892043411998, "loss": 1.7422, "step": 14408 }, { "epoch": 0.703564453125, "grad_norm": 0.2308097630739212, "learning_rate": 0.00014323052646897372, "loss": 1.7432, "step": 14409 }, { "epoch": 0.70361328125, "grad_norm": 0.21363383531570435, "learning_rate": 0.00014320213569736655, "loss": 1.7721, "step": 14410 }, { "epoch": 0.703662109375, "grad_norm": 0.23229826986789703, "learning_rate": 0.00014317374811998644, "loss": 1.7461, "step": 14411 }, { "epoch": 0.7037109375, "grad_norm": 0.23156039416790009, "learning_rate": 0.0001431453637375215, "loss": 1.7507, "step": 14412 }, { "epoch": 0.703759765625, "grad_norm": 0.20975510776042938, "learning_rate": 0.00014311698255065964, "loss": 1.7532, "step": 14413 }, { "epoch": 0.70380859375, "grad_norm": 0.2106269896030426, "learning_rate": 0.00014308860456008863, "loss": 1.7423, "step": 14414 }, { "epoch": 0.703857421875, "grad_norm": 0.22522585093975067, "learning_rate": 0.00014306022976649625, "loss": 1.7508, "step": 14415 }, { "epoch": 0.70390625, "grad_norm": 0.21926039457321167, "learning_rate": 0.0001430318581705702, "loss": 1.7468, "step": 14416 }, { "epoch": 0.703955078125, "grad_norm": 0.22211477160453796, "learning_rate": 0.0001430034897729982, "loss": 1.7477, "step": 14417 }, { "epoch": 0.70400390625, "grad_norm": 0.19520576298236847, "learning_rate": 0.0001429751245744675, "loss": 1.7429, "step": 14418 }, { "epoch": 0.704052734375, "grad_norm": 0.2187022864818573, "learning_rate": 0.00014294676257566586, "loss": 1.7714, "step": 14419 }, { "epoch": 0.7041015625, "grad_norm": 0.21178404986858368, "learning_rate": 0.00014291840377728037, "loss": 1.7658, "step": 14420 }, { "epoch": 0.704150390625, "grad_norm": 0.20537234842777252, "learning_rate": 0.00014289004817999866, "loss": 1.7464, "step": 14421 }, { "epoch": 0.70419921875, "grad_norm": 0.23317298293113708, "learning_rate": 0.00014286169578450758, "loss": 1.7333, "step": 14422 }, { "epoch": 0.704248046875, "grad_norm": 0.20956777036190033, "learning_rate": 0.0001428333465914946, "loss": 1.7552, "step": 14423 }, { "epoch": 0.704296875, "grad_norm": 0.2298991084098816, "learning_rate": 0.0001428050006016466, "loss": 1.7777, "step": 14424 }, { "epoch": 0.704345703125, "grad_norm": 0.2280956208705902, "learning_rate": 0.00014277665781565064, "loss": 1.7348, "step": 14425 }, { "epoch": 0.70439453125, "grad_norm": 0.2443852424621582, "learning_rate": 0.0001427483182341936, "loss": 1.7328, "step": 14426 }, { "epoch": 0.704443359375, "grad_norm": 0.22919413447380066, "learning_rate": 0.00014271998185796226, "loss": 1.7608, "step": 14427 }, { "epoch": 0.7044921875, "grad_norm": 0.20914770662784576, "learning_rate": 0.00014269164868764347, "loss": 1.7579, "step": 14428 }, { "epoch": 0.704541015625, "grad_norm": 0.23769651353359222, "learning_rate": 0.00014266331872392387, "loss": 1.7412, "step": 14429 }, { "epoch": 0.70458984375, "grad_norm": 0.22089941799640656, "learning_rate": 0.00014263499196749005, "loss": 1.7596, "step": 14430 }, { "epoch": 0.704638671875, "grad_norm": 0.2429371178150177, "learning_rate": 0.00014260666841902855, "loss": 1.7556, "step": 14431 }, { "epoch": 0.7046875, "grad_norm": 0.2085471749305725, "learning_rate": 0.00014257834807922582, "loss": 1.7654, "step": 14432 }, { "epoch": 0.704736328125, "grad_norm": 0.244627445936203, "learning_rate": 0.00014255003094876826, "loss": 1.7309, "step": 14433 }, { "epoch": 0.70478515625, "grad_norm": 0.2423064112663269, "learning_rate": 0.00014252171702834195, "loss": 1.7537, "step": 14434 }, { "epoch": 0.704833984375, "grad_norm": 0.2410588413476944, "learning_rate": 0.0001424934063186334, "loss": 1.7628, "step": 14435 }, { "epoch": 0.7048828125, "grad_norm": 0.2953113913536072, "learning_rate": 0.00014246509882032848, "loss": 1.7737, "step": 14436 }, { "epoch": 0.704931640625, "grad_norm": 0.1738114058971405, "learning_rate": 0.00014243679453411354, "loss": 1.7447, "step": 14437 }, { "epoch": 0.70498046875, "grad_norm": 0.2716725766658783, "learning_rate": 0.0001424084934606742, "loss": 1.7425, "step": 14438 }, { "epoch": 0.705029296875, "grad_norm": 0.1955289989709854, "learning_rate": 0.0001423801956006967, "loss": 1.7599, "step": 14439 }, { "epoch": 0.705078125, "grad_norm": 0.24512730538845062, "learning_rate": 0.00014235190095486654, "loss": 1.7609, "step": 14440 }, { "epoch": 0.705126953125, "grad_norm": 0.21578310430049896, "learning_rate": 0.0001423236095238698, "loss": 1.757, "step": 14441 }, { "epoch": 0.70517578125, "grad_norm": 0.2590654194355011, "learning_rate": 0.00014229532130839194, "loss": 1.7474, "step": 14442 }, { "epoch": 0.705224609375, "grad_norm": 0.21426081657409668, "learning_rate": 0.00014226703630911854, "loss": 1.7614, "step": 14443 }, { "epoch": 0.7052734375, "grad_norm": 0.25945714116096497, "learning_rate": 0.00014223875452673513, "loss": 1.7519, "step": 14444 }, { "epoch": 0.705322265625, "grad_norm": 0.22294281423091888, "learning_rate": 0.00014221047596192722, "loss": 1.7595, "step": 14445 }, { "epoch": 0.70537109375, "grad_norm": 0.21732370555400848, "learning_rate": 0.00014218220061538008, "loss": 1.7604, "step": 14446 }, { "epoch": 0.705419921875, "grad_norm": 0.2217358946800232, "learning_rate": 0.00014215392848777896, "loss": 1.7617, "step": 14447 }, { "epoch": 0.70546875, "grad_norm": 0.24151493608951569, "learning_rate": 0.00014212565957980917, "loss": 1.7356, "step": 14448 }, { "epoch": 0.705517578125, "grad_norm": 0.23120099306106567, "learning_rate": 0.00014209739389215575, "loss": 1.7365, "step": 14449 }, { "epoch": 0.70556640625, "grad_norm": 0.23634235560894012, "learning_rate": 0.00014206913142550377, "loss": 1.7373, "step": 14450 }, { "epoch": 0.705615234375, "grad_norm": 0.258171945810318, "learning_rate": 0.00014204087218053823, "loss": 1.7501, "step": 14451 }, { "epoch": 0.7056640625, "grad_norm": 0.20634546875953674, "learning_rate": 0.00014201261615794388, "loss": 1.7526, "step": 14452 }, { "epoch": 0.705712890625, "grad_norm": 0.24876481294631958, "learning_rate": 0.00014198436335840576, "loss": 1.7373, "step": 14453 }, { "epoch": 0.70576171875, "grad_norm": 0.20664550364017487, "learning_rate": 0.00014195611378260825, "loss": 1.7449, "step": 14454 }, { "epoch": 0.705810546875, "grad_norm": 0.24971437454223633, "learning_rate": 0.00014192786743123638, "loss": 1.755, "step": 14455 }, { "epoch": 0.705859375, "grad_norm": 0.23281671106815338, "learning_rate": 0.00014189962430497443, "loss": 1.7539, "step": 14456 }, { "epoch": 0.705908203125, "grad_norm": 0.2411680668592453, "learning_rate": 0.00014187138440450715, "loss": 1.7461, "step": 14457 }, { "epoch": 0.70595703125, "grad_norm": 0.24302005767822266, "learning_rate": 0.00014184314773051866, "loss": 1.771, "step": 14458 }, { "epoch": 0.706005859375, "grad_norm": 0.19022588431835175, "learning_rate": 0.00014181491428369363, "loss": 1.7681, "step": 14459 }, { "epoch": 0.7060546875, "grad_norm": 0.2331012785434723, "learning_rate": 0.0001417866840647161, "loss": 1.7595, "step": 14460 }, { "epoch": 0.706103515625, "grad_norm": 0.20788012444972992, "learning_rate": 0.00014175845707427027, "loss": 1.7569, "step": 14461 }, { "epoch": 0.70615234375, "grad_norm": 0.25772884488105774, "learning_rate": 0.0001417302333130403, "loss": 1.7367, "step": 14462 }, { "epoch": 0.706201171875, "grad_norm": 0.24062812328338623, "learning_rate": 0.00014170201278171016, "loss": 1.7362, "step": 14463 }, { "epoch": 0.70625, "grad_norm": 0.23970401287078857, "learning_rate": 0.00014167379548096388, "loss": 1.7603, "step": 14464 }, { "epoch": 0.706298828125, "grad_norm": 0.20469611883163452, "learning_rate": 0.0001416455814114852, "loss": 1.7623, "step": 14465 }, { "epoch": 0.70634765625, "grad_norm": 0.21715790033340454, "learning_rate": 0.00014161737057395807, "loss": 1.7298, "step": 14466 }, { "epoch": 0.706396484375, "grad_norm": 0.19994066655635834, "learning_rate": 0.00014158916296906616, "loss": 1.756, "step": 14467 }, { "epoch": 0.7064453125, "grad_norm": 0.2252056747674942, "learning_rate": 0.0001415609585974929, "loss": 1.7704, "step": 14468 }, { "epoch": 0.706494140625, "grad_norm": 0.19078144431114197, "learning_rate": 0.00014153275745992222, "loss": 1.7388, "step": 14469 }, { "epoch": 0.70654296875, "grad_norm": 0.21614046394824982, "learning_rate": 0.00014150455955703716, "loss": 1.7445, "step": 14470 }, { "epoch": 0.706591796875, "grad_norm": 0.20345839858055115, "learning_rate": 0.0001414763648895216, "loss": 1.7466, "step": 14471 }, { "epoch": 0.706640625, "grad_norm": 0.21537698805332184, "learning_rate": 0.00014144817345805837, "loss": 1.7622, "step": 14472 }, { "epoch": 0.706689453125, "grad_norm": 0.24314184486865997, "learning_rate": 0.00014141998526333115, "loss": 1.7497, "step": 14473 }, { "epoch": 0.70673828125, "grad_norm": 0.21651655435562134, "learning_rate": 0.00014139180030602272, "loss": 1.7397, "step": 14474 }, { "epoch": 0.706787109375, "grad_norm": 0.2772996425628662, "learning_rate": 0.00014136361858681654, "loss": 1.7588, "step": 14475 }, { "epoch": 0.7068359375, "grad_norm": 0.20441456139087677, "learning_rate": 0.0001413354401063953, "loss": 1.7369, "step": 14476 }, { "epoch": 0.706884765625, "grad_norm": 0.23560914397239685, "learning_rate": 0.0001413072648654421, "loss": 1.7519, "step": 14477 }, { "epoch": 0.70693359375, "grad_norm": 0.22336363792419434, "learning_rate": 0.00014127909286463974, "loss": 1.7458, "step": 14478 }, { "epoch": 0.706982421875, "grad_norm": 0.218794584274292, "learning_rate": 0.00014125092410467098, "loss": 1.7291, "step": 14479 }, { "epoch": 0.70703125, "grad_norm": 0.20323869585990906, "learning_rate": 0.00014122275858621854, "loss": 1.7532, "step": 14480 }, { "epoch": 0.707080078125, "grad_norm": 0.2094438672065735, "learning_rate": 0.00014119459630996498, "loss": 1.7725, "step": 14481 }, { "epoch": 0.70712890625, "grad_norm": 0.19606870412826538, "learning_rate": 0.00014116643727659292, "loss": 1.753, "step": 14482 }, { "epoch": 0.707177734375, "grad_norm": 0.20704644918441772, "learning_rate": 0.00014113828148678477, "loss": 1.7758, "step": 14483 }, { "epoch": 0.7072265625, "grad_norm": 0.19858261942863464, "learning_rate": 0.0001411101289412229, "loss": 1.7714, "step": 14484 }, { "epoch": 0.707275390625, "grad_norm": 0.21357588469982147, "learning_rate": 0.00014108197964058966, "loss": 1.7599, "step": 14485 }, { "epoch": 0.70732421875, "grad_norm": 0.18876726925373077, "learning_rate": 0.00014105383358556714, "loss": 1.7634, "step": 14486 }, { "epoch": 0.707373046875, "grad_norm": 0.183257058262825, "learning_rate": 0.00014102569077683772, "loss": 1.7451, "step": 14487 }, { "epoch": 0.707421875, "grad_norm": 0.19020722806453705, "learning_rate": 0.00014099755121508312, "loss": 1.8036, "step": 14488 }, { "epoch": 0.707470703125, "grad_norm": 0.21501651406288147, "learning_rate": 0.0001409694149009857, "loss": 1.7645, "step": 14489 }, { "epoch": 0.70751953125, "grad_norm": 0.22008588910102844, "learning_rate": 0.00014094128183522706, "loss": 1.7265, "step": 14490 }, { "epoch": 0.707568359375, "grad_norm": 0.20398947596549988, "learning_rate": 0.00014091315201848932, "loss": 1.7453, "step": 14491 }, { "epoch": 0.7076171875, "grad_norm": 0.2625548839569092, "learning_rate": 0.00014088502545145392, "loss": 1.7575, "step": 14492 }, { "epoch": 0.707666015625, "grad_norm": 0.2150447517633438, "learning_rate": 0.0001408569021348027, "loss": 1.7537, "step": 14493 }, { "epoch": 0.70771484375, "grad_norm": 0.21842683851718903, "learning_rate": 0.00014082878206921726, "loss": 1.7173, "step": 14494 }, { "epoch": 0.707763671875, "grad_norm": 0.219468891620636, "learning_rate": 0.00014080066525537908, "loss": 1.7455, "step": 14495 }, { "epoch": 0.7078125, "grad_norm": 0.21191473305225372, "learning_rate": 0.0001407725516939696, "loss": 1.7375, "step": 14496 }, { "epoch": 0.707861328125, "grad_norm": 0.23712949454784393, "learning_rate": 0.00014074444138567009, "loss": 1.743, "step": 14497 }, { "epoch": 0.70791015625, "grad_norm": 0.21030773222446442, "learning_rate": 0.00014071633433116194, "loss": 1.7384, "step": 14498 }, { "epoch": 0.707958984375, "grad_norm": 0.23290850222110748, "learning_rate": 0.00014068823053112634, "loss": 1.7658, "step": 14499 }, { "epoch": 0.7080078125, "grad_norm": 0.22443890571594238, "learning_rate": 0.00014066012998624437, "loss": 1.7416, "step": 14500 }, { "epoch": 0.708056640625, "grad_norm": 0.224365234375, "learning_rate": 0.00014063203269719713, "loss": 1.7473, "step": 14501 }, { "epoch": 0.70810546875, "grad_norm": 0.2955969274044037, "learning_rate": 0.00014060393866466539, "loss": 1.7416, "step": 14502 }, { "epoch": 0.708154296875, "grad_norm": 0.20812906324863434, "learning_rate": 0.00014057584788933032, "loss": 1.7606, "step": 14503 }, { "epoch": 0.708203125, "grad_norm": 0.23758918046951294, "learning_rate": 0.0001405477603718724, "loss": 1.7677, "step": 14504 }, { "epoch": 0.708251953125, "grad_norm": 0.2104630172252655, "learning_rate": 0.0001405196761129727, "loss": 1.7531, "step": 14505 }, { "epoch": 0.70830078125, "grad_norm": 0.187977597117424, "learning_rate": 0.00014049159511331154, "loss": 1.751, "step": 14506 }, { "epoch": 0.708349609375, "grad_norm": 0.22860532999038696, "learning_rate": 0.00014046351737356978, "loss": 1.7669, "step": 14507 }, { "epoch": 0.7083984375, "grad_norm": 0.21136049926280975, "learning_rate": 0.0001404354428944276, "loss": 1.7493, "step": 14508 }, { "epoch": 0.708447265625, "grad_norm": 0.23431704938411713, "learning_rate": 0.00014040737167656574, "loss": 1.7385, "step": 14509 }, { "epoch": 0.70849609375, "grad_norm": 0.2060132771730423, "learning_rate": 0.00014037930372066425, "loss": 1.7637, "step": 14510 }, { "epoch": 0.708544921875, "grad_norm": 0.23107832670211792, "learning_rate": 0.00014035123902740354, "loss": 1.7816, "step": 14511 }, { "epoch": 0.70859375, "grad_norm": 0.2680635452270508, "learning_rate": 0.00014032317759746368, "loss": 1.7725, "step": 14512 }, { "epoch": 0.708642578125, "grad_norm": 0.21414223313331604, "learning_rate": 0.00014029511943152483, "loss": 1.7657, "step": 14513 }, { "epoch": 0.70869140625, "grad_norm": 0.24529831111431122, "learning_rate": 0.000140267064530267, "loss": 1.7532, "step": 14514 }, { "epoch": 0.708740234375, "grad_norm": 0.21372167766094208, "learning_rate": 0.00014023901289437007, "loss": 1.7487, "step": 14515 }, { "epoch": 0.7087890625, "grad_norm": 0.2801934480667114, "learning_rate": 0.00014021096452451393, "loss": 1.7723, "step": 14516 }, { "epoch": 0.708837890625, "grad_norm": 0.21652375161647797, "learning_rate": 0.00014018291942137845, "loss": 1.7466, "step": 14517 }, { "epoch": 0.70888671875, "grad_norm": 0.28714054822921753, "learning_rate": 0.00014015487758564307, "loss": 1.7465, "step": 14518 }, { "epoch": 0.708935546875, "grad_norm": 0.23351122438907623, "learning_rate": 0.00014012683901798776, "loss": 1.7703, "step": 14519 }, { "epoch": 0.708984375, "grad_norm": 0.2634582817554474, "learning_rate": 0.00014009880371909162, "loss": 1.7384, "step": 14520 }, { "epoch": 0.709033203125, "grad_norm": 0.22199268639087677, "learning_rate": 0.00014007077168963456, "loss": 1.7226, "step": 14521 }, { "epoch": 0.70908203125, "grad_norm": 0.24323385953903198, "learning_rate": 0.0001400427429302956, "loss": 1.7258, "step": 14522 }, { "epoch": 0.709130859375, "grad_norm": 0.2010377198457718, "learning_rate": 0.00014001471744175435, "loss": 1.7484, "step": 14523 }, { "epoch": 0.7091796875, "grad_norm": 0.2472739964723587, "learning_rate": 0.00013998669522468965, "loss": 1.742, "step": 14524 }, { "epoch": 0.709228515625, "grad_norm": 0.2264685034751892, "learning_rate": 0.0001399586762797811, "loss": 1.7537, "step": 14525 }, { "epoch": 0.70927734375, "grad_norm": 0.2529104948043823, "learning_rate": 0.00013993066060770737, "loss": 1.7424, "step": 14526 }, { "epoch": 0.709326171875, "grad_norm": 0.2068510204553604, "learning_rate": 0.00013990264820914762, "loss": 1.7672, "step": 14527 }, { "epoch": 0.709375, "grad_norm": 0.25961652398109436, "learning_rate": 0.00013987463908478074, "loss": 1.7486, "step": 14528 }, { "epoch": 0.709423828125, "grad_norm": 0.2122366726398468, "learning_rate": 0.00013984663323528555, "loss": 1.763, "step": 14529 }, { "epoch": 0.70947265625, "grad_norm": 0.2416047900915146, "learning_rate": 0.00013981863066134077, "loss": 1.7384, "step": 14530 }, { "epoch": 0.709521484375, "grad_norm": 0.21760086715221405, "learning_rate": 0.00013979063136362508, "loss": 1.7511, "step": 14531 }, { "epoch": 0.7095703125, "grad_norm": 0.24389271438121796, "learning_rate": 0.00013976263534281702, "loss": 1.7869, "step": 14532 }, { "epoch": 0.709619140625, "grad_norm": 0.23470406234264374, "learning_rate": 0.0001397346425995952, "loss": 1.7631, "step": 14533 }, { "epoch": 0.70966796875, "grad_norm": 0.2537503242492676, "learning_rate": 0.00013970665313463795, "loss": 1.7532, "step": 14534 }, { "epoch": 0.709716796875, "grad_norm": 0.2887537479400635, "learning_rate": 0.0001396786669486238, "loss": 1.757, "step": 14535 }, { "epoch": 0.709765625, "grad_norm": 0.25216245651245117, "learning_rate": 0.00013965068404223063, "loss": 1.7369, "step": 14536 }, { "epoch": 0.709814453125, "grad_norm": 0.2766900360584259, "learning_rate": 0.00013962270441613707, "loss": 1.7473, "step": 14537 }, { "epoch": 0.70986328125, "grad_norm": 0.19891129434108734, "learning_rate": 0.00013959472807102086, "loss": 1.7498, "step": 14538 }, { "epoch": 0.709912109375, "grad_norm": 0.2940838932991028, "learning_rate": 0.0001395667550075604, "loss": 1.7569, "step": 14539 }, { "epoch": 0.7099609375, "grad_norm": 0.2194024920463562, "learning_rate": 0.00013953878522643322, "loss": 1.767, "step": 14540 }, { "epoch": 0.710009765625, "grad_norm": 0.2816281318664551, "learning_rate": 0.00013951081872831762, "loss": 1.7349, "step": 14541 }, { "epoch": 0.71005859375, "grad_norm": 0.2573615312576294, "learning_rate": 0.00013948285551389098, "loss": 1.7462, "step": 14542 }, { "epoch": 0.710107421875, "grad_norm": 0.24641132354736328, "learning_rate": 0.0001394548955838314, "loss": 1.7376, "step": 14543 }, { "epoch": 0.71015625, "grad_norm": 0.27173495292663574, "learning_rate": 0.00013942693893881619, "loss": 1.7305, "step": 14544 }, { "epoch": 0.710205078125, "grad_norm": 0.2618001699447632, "learning_rate": 0.00013939898557952307, "loss": 1.745, "step": 14545 }, { "epoch": 0.71025390625, "grad_norm": 0.24140483140945435, "learning_rate": 0.0001393710355066295, "loss": 1.7518, "step": 14546 }, { "epoch": 0.710302734375, "grad_norm": 0.263374924659729, "learning_rate": 0.0001393430887208128, "loss": 1.7289, "step": 14547 }, { "epoch": 0.7103515625, "grad_norm": 0.2744775414466858, "learning_rate": 0.00013931514522275035, "loss": 1.733, "step": 14548 }, { "epoch": 0.710400390625, "grad_norm": 0.2726001441478729, "learning_rate": 0.00013928720501311934, "loss": 1.7653, "step": 14549 }, { "epoch": 0.71044921875, "grad_norm": 0.2492046058177948, "learning_rate": 0.00013925926809259698, "loss": 1.7653, "step": 14550 }, { "epoch": 0.710498046875, "grad_norm": 0.2499043345451355, "learning_rate": 0.00013923133446186038, "loss": 1.7549, "step": 14551 }, { "epoch": 0.710546875, "grad_norm": 0.2507273852825165, "learning_rate": 0.0001392034041215863, "loss": 1.7531, "step": 14552 }, { "epoch": 0.710595703125, "grad_norm": 0.2398299276828766, "learning_rate": 0.00013917547707245197, "loss": 1.7386, "step": 14553 }, { "epoch": 0.71064453125, "grad_norm": 0.26761093735694885, "learning_rate": 0.00013914755331513395, "loss": 1.7587, "step": 14554 }, { "epoch": 0.710693359375, "grad_norm": 0.2416890412569046, "learning_rate": 0.00013911963285030926, "loss": 1.7409, "step": 14555 }, { "epoch": 0.7107421875, "grad_norm": 0.2878614068031311, "learning_rate": 0.00013909171567865424, "loss": 1.7517, "step": 14556 }, { "epoch": 0.710791015625, "grad_norm": 0.24280314147472382, "learning_rate": 0.00013906380180084588, "loss": 1.7559, "step": 14557 }, { "epoch": 0.71083984375, "grad_norm": 0.2739293873310089, "learning_rate": 0.00013903589121756033, "loss": 1.7518, "step": 14558 }, { "epoch": 0.710888671875, "grad_norm": 0.2311198115348816, "learning_rate": 0.00013900798392947435, "loss": 1.7377, "step": 14559 }, { "epoch": 0.7109375, "grad_norm": 0.24415786564350128, "learning_rate": 0.00013898007993726403, "loss": 1.7632, "step": 14560 }, { "epoch": 0.710986328125, "grad_norm": 0.2709764242172241, "learning_rate": 0.00013895217924160576, "loss": 1.75, "step": 14561 }, { "epoch": 0.71103515625, "grad_norm": 0.21242569386959076, "learning_rate": 0.00013892428184317574, "loss": 1.7288, "step": 14562 }, { "epoch": 0.711083984375, "grad_norm": 0.23573417961597443, "learning_rate": 0.00013889638774265006, "loss": 1.7597, "step": 14563 }, { "epoch": 0.7111328125, "grad_norm": 0.20117226243019104, "learning_rate": 0.00013886849694070479, "loss": 1.767, "step": 14564 }, { "epoch": 0.711181640625, "grad_norm": 0.2287127673625946, "learning_rate": 0.00013884060943801586, "loss": 1.7299, "step": 14565 }, { "epoch": 0.71123046875, "grad_norm": 0.19426533579826355, "learning_rate": 0.0001388127252352591, "loss": 1.7298, "step": 14566 }, { "epoch": 0.711279296875, "grad_norm": 0.22980913519859314, "learning_rate": 0.0001387848443331104, "loss": 1.7309, "step": 14567 }, { "epoch": 0.711328125, "grad_norm": 0.21908247470855713, "learning_rate": 0.00013875696673224541, "loss": 1.7541, "step": 14568 }, { "epoch": 0.711376953125, "grad_norm": 0.2055961936712265, "learning_rate": 0.0001387290924333399, "loss": 1.7285, "step": 14569 }, { "epoch": 0.71142578125, "grad_norm": 0.20905204117298126, "learning_rate": 0.0001387012214370691, "loss": 1.7656, "step": 14570 }, { "epoch": 0.711474609375, "grad_norm": 0.21819859743118286, "learning_rate": 0.0001386733537441089, "loss": 1.7638, "step": 14571 }, { "epoch": 0.7115234375, "grad_norm": 0.1775718778371811, "learning_rate": 0.00013864548935513433, "loss": 1.7454, "step": 14572 }, { "epoch": 0.711572265625, "grad_norm": 0.22195053100585938, "learning_rate": 0.00013861762827082103, "loss": 1.756, "step": 14573 }, { "epoch": 0.71162109375, "grad_norm": 0.18440227210521698, "learning_rate": 0.0001385897704918439, "loss": 1.7348, "step": 14574 }, { "epoch": 0.711669921875, "grad_norm": 0.22943590581417084, "learning_rate": 0.00013856191601887847, "loss": 1.7482, "step": 14575 }, { "epoch": 0.71171875, "grad_norm": 0.22338847815990448, "learning_rate": 0.0001385340648525995, "loss": 1.7441, "step": 14576 }, { "epoch": 0.711767578125, "grad_norm": 0.23933887481689453, "learning_rate": 0.0001385062169936821, "loss": 1.7261, "step": 14577 }, { "epoch": 0.71181640625, "grad_norm": 0.21322502195835114, "learning_rate": 0.00013847837244280122, "loss": 1.7343, "step": 14578 }, { "epoch": 0.711865234375, "grad_norm": 0.22515320777893066, "learning_rate": 0.00013845053120063166, "loss": 1.7582, "step": 14579 }, { "epoch": 0.7119140625, "grad_norm": 0.23621785640716553, "learning_rate": 0.00013842269326784818, "loss": 1.7276, "step": 14580 }, { "epoch": 0.711962890625, "grad_norm": 0.22040334343910217, "learning_rate": 0.00013839485864512548, "loss": 1.7603, "step": 14581 }, { "epoch": 0.71201171875, "grad_norm": 0.22342072427272797, "learning_rate": 0.00013836702733313806, "loss": 1.7493, "step": 14582 }, { "epoch": 0.712060546875, "grad_norm": 0.2221185714006424, "learning_rate": 0.00013833919933256054, "loss": 1.7436, "step": 14583 }, { "epoch": 0.712109375, "grad_norm": 0.211217999458313, "learning_rate": 0.0001383113746440673, "loss": 1.7532, "step": 14584 }, { "epoch": 0.712158203125, "grad_norm": 0.20720107853412628, "learning_rate": 0.00013828355326833284, "loss": 1.7421, "step": 14585 }, { "epoch": 0.71220703125, "grad_norm": 0.20546288788318634, "learning_rate": 0.00013825573520603106, "loss": 1.726, "step": 14586 }, { "epoch": 0.712255859375, "grad_norm": 0.20407022535800934, "learning_rate": 0.00013822792045783665, "loss": 1.7503, "step": 14587 }, { "epoch": 0.7123046875, "grad_norm": 0.18804365396499634, "learning_rate": 0.00013820010902442325, "loss": 1.7447, "step": 14588 }, { "epoch": 0.712353515625, "grad_norm": 0.2086261510848999, "learning_rate": 0.00013817230090646526, "loss": 1.7599, "step": 14589 }, { "epoch": 0.71240234375, "grad_norm": 0.16911926865577698, "learning_rate": 0.00013814449610463636, "loss": 1.74, "step": 14590 }, { "epoch": 0.712451171875, "grad_norm": 0.21061262488365173, "learning_rate": 0.0001381166946196107, "loss": 1.7427, "step": 14591 }, { "epoch": 0.7125, "grad_norm": 0.2163679450750351, "learning_rate": 0.00013808889645206175, "loss": 1.7512, "step": 14592 }, { "epoch": 0.712548828125, "grad_norm": 0.23120075464248657, "learning_rate": 0.00013806110160266354, "loss": 1.746, "step": 14593 }, { "epoch": 0.71259765625, "grad_norm": 0.25041845440864563, "learning_rate": 0.0001380333100720895, "loss": 1.7379, "step": 14594 }, { "epoch": 0.712646484375, "grad_norm": 0.21018901467323303, "learning_rate": 0.00013800552186101316, "loss": 1.7557, "step": 14595 }, { "epoch": 0.7126953125, "grad_norm": 0.21385341882705688, "learning_rate": 0.00013797773697010813, "loss": 1.7605, "step": 14596 }, { "epoch": 0.712744140625, "grad_norm": 0.23006463050842285, "learning_rate": 0.00013794995540004773, "loss": 1.7283, "step": 14597 }, { "epoch": 0.71279296875, "grad_norm": 0.25498834252357483, "learning_rate": 0.0001379221771515052, "loss": 1.7627, "step": 14598 }, { "epoch": 0.712841796875, "grad_norm": 0.21428252756595612, "learning_rate": 0.00013789440222515388, "loss": 1.7584, "step": 14599 }, { "epoch": 0.712890625, "grad_norm": 0.26141974329948425, "learning_rate": 0.0001378666306216669, "loss": 1.7387, "step": 14600 }, { "epoch": 0.712939453125, "grad_norm": 0.22879916429519653, "learning_rate": 0.00013783886234171723, "loss": 1.7365, "step": 14601 }, { "epoch": 0.71298828125, "grad_norm": 0.2511509358882904, "learning_rate": 0.00013781109738597794, "loss": 1.7308, "step": 14602 }, { "epoch": 0.713037109375, "grad_norm": 0.23322561383247375, "learning_rate": 0.00013778333575512197, "loss": 1.7359, "step": 14603 }, { "epoch": 0.7130859375, "grad_norm": 0.2582744359970093, "learning_rate": 0.00013775557744982202, "loss": 1.7421, "step": 14604 }, { "epoch": 0.713134765625, "grad_norm": 0.22344990074634552, "learning_rate": 0.00013772782247075104, "loss": 1.756, "step": 14605 }, { "epoch": 0.71318359375, "grad_norm": 0.25779733061790466, "learning_rate": 0.0001377000708185814, "loss": 1.7851, "step": 14606 }, { "epoch": 0.713232421875, "grad_norm": 0.26389262080192566, "learning_rate": 0.00013767232249398603, "loss": 1.739, "step": 14607 }, { "epoch": 0.71328125, "grad_norm": 0.27664726972579956, "learning_rate": 0.00013764457749763708, "loss": 1.7646, "step": 14608 }, { "epoch": 0.713330078125, "grad_norm": 0.25112318992614746, "learning_rate": 0.00013761683583020728, "loss": 1.7548, "step": 14609 }, { "epoch": 0.71337890625, "grad_norm": 0.26447200775146484, "learning_rate": 0.00013758909749236878, "loss": 1.7433, "step": 14610 }, { "epoch": 0.713427734375, "grad_norm": 0.24528975784778595, "learning_rate": 0.00013756136248479388, "loss": 1.7392, "step": 14611 }, { "epoch": 0.7134765625, "grad_norm": 0.22400863468647003, "learning_rate": 0.00013753363080815477, "loss": 1.7524, "step": 14612 }, { "epoch": 0.713525390625, "grad_norm": 0.2551407217979431, "learning_rate": 0.0001375059024631236, "loss": 1.7364, "step": 14613 }, { "epoch": 0.71357421875, "grad_norm": 0.1814095675945282, "learning_rate": 0.00013747817745037234, "loss": 1.7249, "step": 14614 }, { "epoch": 0.713623046875, "grad_norm": 0.24269413948059082, "learning_rate": 0.0001374504557705729, "loss": 1.7407, "step": 14615 }, { "epoch": 0.713671875, "grad_norm": 0.21934230625629425, "learning_rate": 0.00013742273742439718, "loss": 1.7499, "step": 14616 }, { "epoch": 0.713720703125, "grad_norm": 0.2293410748243332, "learning_rate": 0.00013739502241251695, "loss": 1.7598, "step": 14617 }, { "epoch": 0.71376953125, "grad_norm": 0.26390132308006287, "learning_rate": 0.0001373673107356039, "loss": 1.7459, "step": 14618 }, { "epoch": 0.713818359375, "grad_norm": 0.21253880858421326, "learning_rate": 0.00013733960239432967, "loss": 1.7435, "step": 14619 }, { "epoch": 0.7138671875, "grad_norm": 0.23494547605514526, "learning_rate": 0.0001373118973893657, "loss": 1.7143, "step": 14620 }, { "epoch": 0.713916015625, "grad_norm": 0.19882583618164062, "learning_rate": 0.00013728419572138362, "loss": 1.7869, "step": 14621 }, { "epoch": 0.71396484375, "grad_norm": 0.24181890487670898, "learning_rate": 0.0001372564973910546, "loss": 1.7577, "step": 14622 }, { "epoch": 0.714013671875, "grad_norm": 0.2149985432624817, "learning_rate": 0.00013722880239905013, "loss": 1.7455, "step": 14623 }, { "epoch": 0.7140625, "grad_norm": 0.23135609924793243, "learning_rate": 0.0001372011107460412, "loss": 1.7207, "step": 14624 }, { "epoch": 0.714111328125, "grad_norm": 0.23128975927829742, "learning_rate": 0.00013717342243269924, "loss": 1.7548, "step": 14625 }, { "epoch": 0.71416015625, "grad_norm": 0.21312524378299713, "learning_rate": 0.00013714573745969488, "loss": 1.7401, "step": 14626 }, { "epoch": 0.714208984375, "grad_norm": 0.21799899637699127, "learning_rate": 0.00013711805582769954, "loss": 1.7282, "step": 14627 }, { "epoch": 0.7142578125, "grad_norm": 0.20658446848392487, "learning_rate": 0.00013709037753738376, "loss": 1.751, "step": 14628 }, { "epoch": 0.714306640625, "grad_norm": 0.22360031306743622, "learning_rate": 0.00013706270258941853, "loss": 1.7372, "step": 14629 }, { "epoch": 0.71435546875, "grad_norm": 0.22793158888816833, "learning_rate": 0.00013703503098447446, "loss": 1.7255, "step": 14630 }, { "epoch": 0.714404296875, "grad_norm": 0.2301306128501892, "learning_rate": 0.00013700736272322228, "loss": 1.7507, "step": 14631 }, { "epoch": 0.714453125, "grad_norm": 0.21324078738689423, "learning_rate": 0.00013697969780633254, "loss": 1.7602, "step": 14632 }, { "epoch": 0.714501953125, "grad_norm": 0.23572267591953278, "learning_rate": 0.0001369520362344757, "loss": 1.7533, "step": 14633 }, { "epoch": 0.71455078125, "grad_norm": 0.19482481479644775, "learning_rate": 0.0001369243780083222, "loss": 1.7336, "step": 14634 }, { "epoch": 0.714599609375, "grad_norm": 0.21773453056812286, "learning_rate": 0.0001368967231285423, "loss": 1.7689, "step": 14635 }, { "epoch": 0.7146484375, "grad_norm": 0.2151648849248886, "learning_rate": 0.00013686907159580626, "loss": 1.7545, "step": 14636 }, { "epoch": 0.714697265625, "grad_norm": 0.22123853862285614, "learning_rate": 0.00013684142341078423, "loss": 1.7389, "step": 14637 }, { "epoch": 0.71474609375, "grad_norm": 0.19845108687877655, "learning_rate": 0.00013681377857414635, "loss": 1.7412, "step": 14638 }, { "epoch": 0.714794921875, "grad_norm": 0.22568385303020477, "learning_rate": 0.00013678613708656252, "loss": 1.7391, "step": 14639 }, { "epoch": 0.71484375, "grad_norm": 0.2116059511899948, "learning_rate": 0.00013675849894870267, "loss": 1.7403, "step": 14640 }, { "epoch": 0.714892578125, "grad_norm": 0.23051442205905914, "learning_rate": 0.0001367308641612367, "loss": 1.753, "step": 14641 }, { "epoch": 0.71494140625, "grad_norm": 0.20744140446186066, "learning_rate": 0.0001367032327248343, "loss": 1.7346, "step": 14642 }, { "epoch": 0.714990234375, "grad_norm": 0.21882151067256927, "learning_rate": 0.0001366756046401652, "loss": 1.7508, "step": 14643 }, { "epoch": 0.7150390625, "grad_norm": 0.2560811936855316, "learning_rate": 0.000136647979907899, "loss": 1.7489, "step": 14644 }, { "epoch": 0.715087890625, "grad_norm": 0.19724948704242706, "learning_rate": 0.0001366203585287051, "loss": 1.7522, "step": 14645 }, { "epoch": 0.71513671875, "grad_norm": 0.25355422496795654, "learning_rate": 0.00013659274050325295, "loss": 1.7715, "step": 14646 }, { "epoch": 0.715185546875, "grad_norm": 0.24579276144504547, "learning_rate": 0.00013656512583221198, "loss": 1.7282, "step": 14647 }, { "epoch": 0.715234375, "grad_norm": 0.22109642624855042, "learning_rate": 0.00013653751451625135, "loss": 1.7724, "step": 14648 }, { "epoch": 0.715283203125, "grad_norm": 0.25014322996139526, "learning_rate": 0.00013650990655604035, "loss": 1.7495, "step": 14649 }, { "epoch": 0.71533203125, "grad_norm": 0.23969468474388123, "learning_rate": 0.00013648230195224798, "loss": 1.7335, "step": 14650 }, { "epoch": 0.715380859375, "grad_norm": 0.19946594536304474, "learning_rate": 0.00013645470070554333, "loss": 1.7471, "step": 14651 }, { "epoch": 0.7154296875, "grad_norm": 0.21716392040252686, "learning_rate": 0.00013642710281659533, "loss": 1.7363, "step": 14652 }, { "epoch": 0.715478515625, "grad_norm": 0.21119874715805054, "learning_rate": 0.00013639950828607276, "loss": 1.7758, "step": 14653 }, { "epoch": 0.71552734375, "grad_norm": 0.20797032117843628, "learning_rate": 0.0001363719171146445, "loss": 1.7628, "step": 14654 }, { "epoch": 0.715576171875, "grad_norm": 0.22039459645748138, "learning_rate": 0.0001363443293029792, "loss": 1.7553, "step": 14655 }, { "epoch": 0.715625, "grad_norm": 0.2094486951828003, "learning_rate": 0.00013631674485174548, "loss": 1.769, "step": 14656 }, { "epoch": 0.715673828125, "grad_norm": 0.25252291560173035, "learning_rate": 0.00013628916376161182, "loss": 1.7261, "step": 14657 }, { "epoch": 0.71572265625, "grad_norm": 0.23983485996723175, "learning_rate": 0.0001362615860332468, "loss": 1.7417, "step": 14658 }, { "epoch": 0.715771484375, "grad_norm": 0.226181760430336, "learning_rate": 0.00013623401166731864, "loss": 1.7433, "step": 14659 }, { "epoch": 0.7158203125, "grad_norm": 0.21645382046699524, "learning_rate": 0.00013620644066449566, "loss": 1.7529, "step": 14660 }, { "epoch": 0.715869140625, "grad_norm": 0.241008922457695, "learning_rate": 0.0001361788730254462, "loss": 1.7476, "step": 14661 }, { "epoch": 0.71591796875, "grad_norm": 0.1949795037508011, "learning_rate": 0.00013615130875083826, "loss": 1.7718, "step": 14662 }, { "epoch": 0.715966796875, "grad_norm": 0.25895386934280396, "learning_rate": 0.00013612374784133978, "loss": 1.7475, "step": 14663 }, { "epoch": 0.716015625, "grad_norm": 0.18846333026885986, "learning_rate": 0.00013609619029761905, "loss": 1.7223, "step": 14664 }, { "epoch": 0.716064453125, "grad_norm": 0.22943758964538574, "learning_rate": 0.0001360686361203436, "loss": 1.7586, "step": 14665 }, { "epoch": 0.71611328125, "grad_norm": 0.19991813600063324, "learning_rate": 0.00013604108531018138, "loss": 1.7467, "step": 14666 }, { "epoch": 0.716162109375, "grad_norm": 0.21486155688762665, "learning_rate": 0.0001360135378678001, "loss": 1.7213, "step": 14667 }, { "epoch": 0.7162109375, "grad_norm": 0.22157736122608185, "learning_rate": 0.0001359859937938674, "loss": 1.7595, "step": 14668 }, { "epoch": 0.716259765625, "grad_norm": 0.2225077748298645, "learning_rate": 0.0001359584530890508, "loss": 1.7309, "step": 14669 }, { "epoch": 0.71630859375, "grad_norm": 0.22886301577091217, "learning_rate": 0.00013593091575401784, "loss": 1.7428, "step": 14670 }, { "epoch": 0.716357421875, "grad_norm": 0.2237207144498825, "learning_rate": 0.00013590338178943583, "loss": 1.7517, "step": 14671 }, { "epoch": 0.71640625, "grad_norm": 0.24574685096740723, "learning_rate": 0.00013587585119597211, "loss": 1.74, "step": 14672 }, { "epoch": 0.716455078125, "grad_norm": 0.23121239244937897, "learning_rate": 0.00013584832397429392, "loss": 1.7386, "step": 14673 }, { "epoch": 0.71650390625, "grad_norm": 0.26481086015701294, "learning_rate": 0.00013582080012506837, "loss": 1.7426, "step": 14674 }, { "epoch": 0.716552734375, "grad_norm": 0.26222938299179077, "learning_rate": 0.0001357932796489626, "loss": 1.7698, "step": 14675 }, { "epoch": 0.7166015625, "grad_norm": 0.25318244099617004, "learning_rate": 0.00013576576254664346, "loss": 1.7342, "step": 14676 }, { "epoch": 0.716650390625, "grad_norm": 0.25611820816993713, "learning_rate": 0.00013573824881877797, "loss": 1.7326, "step": 14677 }, { "epoch": 0.71669921875, "grad_norm": 0.2204977124929428, "learning_rate": 0.000135710738466033, "loss": 1.7191, "step": 14678 }, { "epoch": 0.716748046875, "grad_norm": 0.26601776480674744, "learning_rate": 0.000135683231489075, "loss": 1.7589, "step": 14679 }, { "epoch": 0.716796875, "grad_norm": 0.22448855638504028, "learning_rate": 0.00013565572788857097, "loss": 1.7648, "step": 14680 }, { "epoch": 0.716845703125, "grad_norm": 0.23827451467514038, "learning_rate": 0.00013562822766518715, "loss": 1.7674, "step": 14681 }, { "epoch": 0.71689453125, "grad_norm": 0.2323189228773117, "learning_rate": 0.0001356007308195904, "loss": 1.7714, "step": 14682 }, { "epoch": 0.716943359375, "grad_norm": 0.23258492350578308, "learning_rate": 0.00013557323735244675, "loss": 1.7173, "step": 14683 }, { "epoch": 0.7169921875, "grad_norm": 0.22166365385055542, "learning_rate": 0.00013554574726442283, "loss": 1.7579, "step": 14684 }, { "epoch": 0.717041015625, "grad_norm": 0.24835984408855438, "learning_rate": 0.00013551826055618473, "loss": 1.7226, "step": 14685 }, { "epoch": 0.71708984375, "grad_norm": 0.1873917132616043, "learning_rate": 0.0001354907772283986, "loss": 1.7402, "step": 14686 }, { "epoch": 0.717138671875, "grad_norm": 0.2584708034992218, "learning_rate": 0.00013546329728173062, "loss": 1.7468, "step": 14687 }, { "epoch": 0.7171875, "grad_norm": 0.20999997854232788, "learning_rate": 0.0001354358207168467, "loss": 1.7548, "step": 14688 }, { "epoch": 0.717236328125, "grad_norm": 0.25730952620506287, "learning_rate": 0.00013540834753441282, "loss": 1.7344, "step": 14689 }, { "epoch": 0.71728515625, "grad_norm": 0.2184804528951645, "learning_rate": 0.00013538087773509472, "loss": 1.7491, "step": 14690 }, { "epoch": 0.717333984375, "grad_norm": 0.25626498460769653, "learning_rate": 0.00013535341131955828, "loss": 1.7631, "step": 14691 }, { "epoch": 0.7173828125, "grad_norm": 0.20675590634346008, "learning_rate": 0.0001353259482884691, "loss": 1.7426, "step": 14692 }, { "epoch": 0.717431640625, "grad_norm": 0.2516041398048401, "learning_rate": 0.00013529848864249274, "loss": 1.7501, "step": 14693 }, { "epoch": 0.71748046875, "grad_norm": 0.2591190040111542, "learning_rate": 0.00013527103238229486, "loss": 1.7355, "step": 14694 }, { "epoch": 0.717529296875, "grad_norm": 0.19969742000102997, "learning_rate": 0.00013524357950854058, "loss": 1.739, "step": 14695 }, { "epoch": 0.717578125, "grad_norm": 0.20995457470417023, "learning_rate": 0.00013521613002189558, "loss": 1.7349, "step": 14696 }, { "epoch": 0.717626953125, "grad_norm": 0.20078997313976288, "learning_rate": 0.00013518868392302486, "loss": 1.7352, "step": 14697 }, { "epoch": 0.71767578125, "grad_norm": 0.20031514763832092, "learning_rate": 0.00013516124121259383, "loss": 1.751, "step": 14698 }, { "epoch": 0.717724609375, "grad_norm": 0.2514345049858093, "learning_rate": 0.00013513380189126732, "loss": 1.7382, "step": 14699 }, { "epoch": 0.7177734375, "grad_norm": 0.19685325026512146, "learning_rate": 0.00013510636595971066, "loss": 1.7298, "step": 14700 }, { "epoch": 0.717822265625, "grad_norm": 0.2608952820301056, "learning_rate": 0.0001350789334185884, "loss": 1.7078, "step": 14701 }, { "epoch": 0.71787109375, "grad_norm": 0.19966629147529602, "learning_rate": 0.0001350515042685658, "loss": 1.7225, "step": 14702 }, { "epoch": 0.717919921875, "grad_norm": 0.22831828892230988, "learning_rate": 0.00013502407851030736, "loss": 1.7456, "step": 14703 }, { "epoch": 0.71796875, "grad_norm": 0.22374065220355988, "learning_rate": 0.0001349966561444778, "loss": 1.7622, "step": 14704 }, { "epoch": 0.718017578125, "grad_norm": 0.23262569308280945, "learning_rate": 0.00013496923717174175, "loss": 1.7503, "step": 14705 }, { "epoch": 0.71806640625, "grad_norm": 0.2307075560092926, "learning_rate": 0.00013494182159276376, "loss": 1.7546, "step": 14706 }, { "epoch": 0.718115234375, "grad_norm": 0.2169235795736313, "learning_rate": 0.00013491440940820827, "loss": 1.7714, "step": 14707 }, { "epoch": 0.7181640625, "grad_norm": 0.19242845475673676, "learning_rate": 0.00013488700061873958, "loss": 1.7542, "step": 14708 }, { "epoch": 0.718212890625, "grad_norm": 0.21908363699913025, "learning_rate": 0.000134859595225022, "loss": 1.7596, "step": 14709 }, { "epoch": 0.71826171875, "grad_norm": 0.18231020867824554, "learning_rate": 0.00013483219322771973, "loss": 1.7643, "step": 14710 }, { "epoch": 0.718310546875, "grad_norm": 0.2151142656803131, "learning_rate": 0.00013480479462749684, "loss": 1.7363, "step": 14711 }, { "epoch": 0.718359375, "grad_norm": 0.1863875836133957, "learning_rate": 0.00013477739942501746, "loss": 1.7411, "step": 14712 }, { "epoch": 0.718408203125, "grad_norm": 0.19193178415298462, "learning_rate": 0.0001347500076209453, "loss": 1.7379, "step": 14713 }, { "epoch": 0.71845703125, "grad_norm": 0.21094022691249847, "learning_rate": 0.0001347226192159446, "loss": 1.7329, "step": 14714 }, { "epoch": 0.718505859375, "grad_norm": 0.18799038231372833, "learning_rate": 0.0001346952342106787, "loss": 1.7432, "step": 14715 }, { "epoch": 0.7185546875, "grad_norm": 0.20672735571861267, "learning_rate": 0.0001346678526058117, "loss": 1.752, "step": 14716 }, { "epoch": 0.718603515625, "grad_norm": 0.1943899542093277, "learning_rate": 0.00013464047440200688, "loss": 1.7561, "step": 14717 }, { "epoch": 0.71865234375, "grad_norm": 0.24598170816898346, "learning_rate": 0.00013461309959992806, "loss": 1.7251, "step": 14718 }, { "epoch": 0.718701171875, "grad_norm": 0.2179369032382965, "learning_rate": 0.0001345857282002384, "loss": 1.7538, "step": 14719 }, { "epoch": 0.71875, "grad_norm": 0.20306624472141266, "learning_rate": 0.0001345583602036016, "loss": 1.7572, "step": 14720 }, { "epoch": 0.718798828125, "grad_norm": 0.2271541953086853, "learning_rate": 0.0001345309956106807, "loss": 1.7478, "step": 14721 }, { "epoch": 0.71884765625, "grad_norm": 0.19713734090328217, "learning_rate": 0.00013450363442213894, "loss": 1.7396, "step": 14722 }, { "epoch": 0.718896484375, "grad_norm": 0.21006062626838684, "learning_rate": 0.0001344762766386395, "loss": 1.7297, "step": 14723 }, { "epoch": 0.7189453125, "grad_norm": 0.18499259650707245, "learning_rate": 0.0001344489222608454, "loss": 1.7505, "step": 14724 }, { "epoch": 0.718994140625, "grad_norm": 0.23433621227741241, "learning_rate": 0.00013442157128941957, "loss": 1.7635, "step": 14725 }, { "epoch": 0.71904296875, "grad_norm": 0.2783252000808716, "learning_rate": 0.00013439422372502492, "loss": 1.7674, "step": 14726 }, { "epoch": 0.719091796875, "grad_norm": 0.19983740150928497, "learning_rate": 0.00013436687956832417, "loss": 1.7583, "step": 14727 }, { "epoch": 0.719140625, "grad_norm": 0.263075590133667, "learning_rate": 0.0001343395388199802, "loss": 1.7371, "step": 14728 }, { "epoch": 0.719189453125, "grad_norm": 0.21914033591747284, "learning_rate": 0.0001343122014806553, "loss": 1.7601, "step": 14729 }, { "epoch": 0.71923828125, "grad_norm": 0.21269819140434265, "learning_rate": 0.00013428486755101245, "loss": 1.7417, "step": 14730 }, { "epoch": 0.719287109375, "grad_norm": 0.2124449610710144, "learning_rate": 0.00013425753703171373, "loss": 1.7431, "step": 14731 }, { "epoch": 0.7193359375, "grad_norm": 0.1758618801832199, "learning_rate": 0.00013423020992342177, "loss": 1.7352, "step": 14732 }, { "epoch": 0.719384765625, "grad_norm": 0.23967672884464264, "learning_rate": 0.00013420288622679867, "loss": 1.7579, "step": 14733 }, { "epoch": 0.71943359375, "grad_norm": 0.17277051508426666, "learning_rate": 0.00013417556594250686, "loss": 1.7582, "step": 14734 }, { "epoch": 0.719482421875, "grad_norm": 0.2390502244234085, "learning_rate": 0.0001341482490712082, "loss": 1.745, "step": 14735 }, { "epoch": 0.71953125, "grad_norm": 0.27717381715774536, "learning_rate": 0.000134120935613565, "loss": 1.7482, "step": 14736 }, { "epoch": 0.719580078125, "grad_norm": 0.21681253612041473, "learning_rate": 0.0001340936255702391, "loss": 1.732, "step": 14737 }, { "epoch": 0.71962890625, "grad_norm": 0.28660014271736145, "learning_rate": 0.00013406631894189233, "loss": 1.7128, "step": 14738 }, { "epoch": 0.719677734375, "grad_norm": 0.22928179800510406, "learning_rate": 0.00013403901572918656, "loss": 1.7409, "step": 14739 }, { "epoch": 0.7197265625, "grad_norm": 0.2813586890697479, "learning_rate": 0.0001340117159327835, "loss": 1.7535, "step": 14740 }, { "epoch": 0.719775390625, "grad_norm": 0.2074573040008545, "learning_rate": 0.00013398441955334475, "loss": 1.7629, "step": 14741 }, { "epoch": 0.71982421875, "grad_norm": 0.29672616720199585, "learning_rate": 0.0001339571265915319, "loss": 1.7545, "step": 14742 }, { "epoch": 0.719873046875, "grad_norm": 0.2176319807767868, "learning_rate": 0.00013392983704800637, "loss": 1.7275, "step": 14743 }, { "epoch": 0.719921875, "grad_norm": 0.23896779119968414, "learning_rate": 0.00013390255092342963, "loss": 1.762, "step": 14744 }, { "epoch": 0.719970703125, "grad_norm": 0.29726308584213257, "learning_rate": 0.00013387526821846292, "loss": 1.7437, "step": 14745 }, { "epoch": 0.72001953125, "grad_norm": 0.21356524527072906, "learning_rate": 0.00013384798893376748, "loss": 1.7644, "step": 14746 }, { "epoch": 0.720068359375, "grad_norm": 0.2924870550632477, "learning_rate": 0.00013382071307000432, "loss": 1.7694, "step": 14747 }, { "epoch": 0.7201171875, "grad_norm": 0.20029181241989136, "learning_rate": 0.00013379344062783478, "loss": 1.7405, "step": 14748 }, { "epoch": 0.720166015625, "grad_norm": 0.25321972370147705, "learning_rate": 0.00013376617160791942, "loss": 1.7635, "step": 14749 }, { "epoch": 0.72021484375, "grad_norm": 0.22018630802631378, "learning_rate": 0.00013373890601091955, "loss": 1.7488, "step": 14750 }, { "epoch": 0.720263671875, "grad_norm": 0.24385090172290802, "learning_rate": 0.00013371164383749567, "loss": 1.7269, "step": 14751 }, { "epoch": 0.7203125, "grad_norm": 0.21285051107406616, "learning_rate": 0.00013368438508830872, "loss": 1.7658, "step": 14752 }, { "epoch": 0.720361328125, "grad_norm": 0.24041861295700073, "learning_rate": 0.00013365712976401916, "loss": 1.768, "step": 14753 }, { "epoch": 0.72041015625, "grad_norm": 0.21453280746936798, "learning_rate": 0.00013362987786528761, "loss": 1.7315, "step": 14754 }, { "epoch": 0.720458984375, "grad_norm": 0.20772142708301544, "learning_rate": 0.00013360262939277458, "loss": 1.7595, "step": 14755 }, { "epoch": 0.7205078125, "grad_norm": 0.24565573036670685, "learning_rate": 0.00013357538434714038, "loss": 1.7389, "step": 14756 }, { "epoch": 0.720556640625, "grad_norm": 0.19498072564601898, "learning_rate": 0.00013354814272904537, "loss": 1.7353, "step": 14757 }, { "epoch": 0.72060546875, "grad_norm": 0.25758564472198486, "learning_rate": 0.00013352090453914978, "loss": 1.7553, "step": 14758 }, { "epoch": 0.720654296875, "grad_norm": 0.18146634101867676, "learning_rate": 0.00013349366977811369, "loss": 1.7334, "step": 14759 }, { "epoch": 0.720703125, "grad_norm": 0.24332599341869354, "learning_rate": 0.00013346643844659722, "loss": 1.7346, "step": 14760 }, { "epoch": 0.720751953125, "grad_norm": 0.20642252266407013, "learning_rate": 0.00013343921054526035, "loss": 1.7534, "step": 14761 }, { "epoch": 0.72080078125, "grad_norm": 0.2104908674955368, "learning_rate": 0.000133411986074763, "loss": 1.7338, "step": 14762 }, { "epoch": 0.720849609375, "grad_norm": 0.22438670694828033, "learning_rate": 0.00013338476503576475, "loss": 1.7355, "step": 14763 }, { "epoch": 0.7208984375, "grad_norm": 0.18713562190532684, "learning_rate": 0.00013335754742892564, "loss": 1.7333, "step": 14764 }, { "epoch": 0.720947265625, "grad_norm": 0.2302030622959137, "learning_rate": 0.000133330333254905, "loss": 1.7263, "step": 14765 }, { "epoch": 0.72099609375, "grad_norm": 0.2129971981048584, "learning_rate": 0.00013330312251436278, "loss": 1.7124, "step": 14766 }, { "epoch": 0.721044921875, "grad_norm": 0.20698955655097961, "learning_rate": 0.000133275915207958, "loss": 1.7378, "step": 14767 }, { "epoch": 0.72109375, "grad_norm": 0.24938862025737762, "learning_rate": 0.00013324871133635045, "loss": 1.7301, "step": 14768 }, { "epoch": 0.721142578125, "grad_norm": 0.17740315198898315, "learning_rate": 0.00013322151090019906, "loss": 1.7403, "step": 14769 }, { "epoch": 0.72119140625, "grad_norm": 0.2571585476398468, "learning_rate": 0.00013319431390016348, "loss": 1.7606, "step": 14770 }, { "epoch": 0.721240234375, "grad_norm": 0.18449732661247253, "learning_rate": 0.00013316712033690253, "loss": 1.7361, "step": 14771 }, { "epoch": 0.7212890625, "grad_norm": 0.27527105808258057, "learning_rate": 0.00013313993021107538, "loss": 1.7528, "step": 14772 }, { "epoch": 0.721337890625, "grad_norm": 0.20500428974628448, "learning_rate": 0.00013311274352334097, "loss": 1.7553, "step": 14773 }, { "epoch": 0.72138671875, "grad_norm": 0.3327484130859375, "learning_rate": 0.0001330855602743582, "loss": 1.7304, "step": 14774 }, { "epoch": 0.721435546875, "grad_norm": 0.20382799208164215, "learning_rate": 0.00013305838046478592, "loss": 1.7518, "step": 14775 }, { "epoch": 0.721484375, "grad_norm": 0.24020084738731384, "learning_rate": 0.0001330312040952828, "loss": 1.7567, "step": 14776 }, { "epoch": 0.721533203125, "grad_norm": 0.24541737139225006, "learning_rate": 0.0001330040311665075, "loss": 1.7661, "step": 14777 }, { "epoch": 0.72158203125, "grad_norm": 0.21971935033798218, "learning_rate": 0.0001329768616791186, "loss": 1.7357, "step": 14778 }, { "epoch": 0.721630859375, "grad_norm": 0.2430555373430252, "learning_rate": 0.00013294969563377456, "loss": 1.7524, "step": 14779 }, { "epoch": 0.7216796875, "grad_norm": 0.19163917005062103, "learning_rate": 0.00013292253303113382, "loss": 1.7446, "step": 14780 }, { "epoch": 0.721728515625, "grad_norm": 0.22705933451652527, "learning_rate": 0.00013289537387185452, "loss": 1.7396, "step": 14781 }, { "epoch": 0.72177734375, "grad_norm": 0.17596815526485443, "learning_rate": 0.00013286821815659513, "loss": 1.7332, "step": 14782 }, { "epoch": 0.721826171875, "grad_norm": 0.21132999658584595, "learning_rate": 0.00013284106588601352, "loss": 1.7426, "step": 14783 }, { "epoch": 0.721875, "grad_norm": 0.2031668722629547, "learning_rate": 0.00013281391706076803, "loss": 1.7585, "step": 14784 }, { "epoch": 0.721923828125, "grad_norm": 0.19149458408355713, "learning_rate": 0.00013278677168151637, "loss": 1.7104, "step": 14785 }, { "epoch": 0.72197265625, "grad_norm": 0.19857196509838104, "learning_rate": 0.00013275962974891665, "loss": 1.709, "step": 14786 }, { "epoch": 0.722021484375, "grad_norm": 0.20172233879566193, "learning_rate": 0.00013273249126362653, "loss": 1.7309, "step": 14787 }, { "epoch": 0.7220703125, "grad_norm": 0.23442979156970978, "learning_rate": 0.00013270535622630374, "loss": 1.7757, "step": 14788 }, { "epoch": 0.722119140625, "grad_norm": 0.2086840122938156, "learning_rate": 0.00013267822463760605, "loss": 1.7482, "step": 14789 }, { "epoch": 0.72216796875, "grad_norm": 0.17962387204170227, "learning_rate": 0.00013265109649819085, "loss": 1.753, "step": 14790 }, { "epoch": 0.722216796875, "grad_norm": 0.20650093257427216, "learning_rate": 0.00013262397180871573, "loss": 1.7243, "step": 14791 }, { "epoch": 0.722265625, "grad_norm": 0.18210628628730774, "learning_rate": 0.000132596850569838, "loss": 1.7685, "step": 14792 }, { "epoch": 0.722314453125, "grad_norm": 0.22933821380138397, "learning_rate": 0.000132569732782215, "loss": 1.782, "step": 14793 }, { "epoch": 0.72236328125, "grad_norm": 0.20536580681800842, "learning_rate": 0.000132542618446504, "loss": 1.7577, "step": 14794 }, { "epoch": 0.722412109375, "grad_norm": 0.20716612040996552, "learning_rate": 0.00013251550756336207, "loss": 1.7564, "step": 14795 }, { "epoch": 0.7224609375, "grad_norm": 0.20442265272140503, "learning_rate": 0.0001324884001334463, "loss": 1.7801, "step": 14796 }, { "epoch": 0.722509765625, "grad_norm": 0.2163412719964981, "learning_rate": 0.0001324612961574136, "loss": 1.7357, "step": 14797 }, { "epoch": 0.72255859375, "grad_norm": 0.28271982073783875, "learning_rate": 0.000132434195635921, "loss": 1.7556, "step": 14798 }, { "epoch": 0.722607421875, "grad_norm": 0.21832896769046783, "learning_rate": 0.00013240709856962509, "loss": 1.7654, "step": 14799 }, { "epoch": 0.72265625, "grad_norm": 0.2369491010904312, "learning_rate": 0.00013238000495918278, "loss": 1.7544, "step": 14800 }, { "epoch": 0.722705078125, "grad_norm": 0.23858919739723206, "learning_rate": 0.00013235291480525054, "loss": 1.7587, "step": 14801 }, { "epoch": 0.72275390625, "grad_norm": 0.1970434933900833, "learning_rate": 0.00013232582810848514, "loss": 1.7591, "step": 14802 }, { "epoch": 0.722802734375, "grad_norm": 0.24456657469272614, "learning_rate": 0.00013229874486954274, "loss": 1.7676, "step": 14803 }, { "epoch": 0.7228515625, "grad_norm": 0.20121710002422333, "learning_rate": 0.00013227166508908013, "loss": 1.7534, "step": 14804 }, { "epoch": 0.722900390625, "grad_norm": 0.2617485821247101, "learning_rate": 0.00013224458876775325, "loss": 1.7484, "step": 14805 }, { "epoch": 0.72294921875, "grad_norm": 0.1939314752817154, "learning_rate": 0.00013221751590621845, "loss": 1.7101, "step": 14806 }, { "epoch": 0.722998046875, "grad_norm": 0.26819974184036255, "learning_rate": 0.00013219044650513188, "loss": 1.7482, "step": 14807 }, { "epoch": 0.723046875, "grad_norm": 0.24970069527626038, "learning_rate": 0.0001321633805651496, "loss": 1.765, "step": 14808 }, { "epoch": 0.723095703125, "grad_norm": 0.2716580629348755, "learning_rate": 0.00013213631808692747, "loss": 1.7753, "step": 14809 }, { "epoch": 0.72314453125, "grad_norm": 0.23159098625183105, "learning_rate": 0.0001321092590711215, "loss": 1.7156, "step": 14810 }, { "epoch": 0.723193359375, "grad_norm": 0.25373849272727966, "learning_rate": 0.0001320822035183874, "loss": 1.7342, "step": 14811 }, { "epoch": 0.7232421875, "grad_norm": 0.24526861310005188, "learning_rate": 0.000132055151429381, "loss": 1.7629, "step": 14812 }, { "epoch": 0.723291015625, "grad_norm": 0.22306713461875916, "learning_rate": 0.00013202810280475775, "loss": 1.7715, "step": 14813 }, { "epoch": 0.72333984375, "grad_norm": 0.2500664293766022, "learning_rate": 0.00013200105764517338, "loss": 1.7717, "step": 14814 }, { "epoch": 0.723388671875, "grad_norm": 0.22126547992229462, "learning_rate": 0.00013197401595128316, "loss": 1.7495, "step": 14815 }, { "epoch": 0.7234375, "grad_norm": 0.28694066405296326, "learning_rate": 0.00013194697772374268, "loss": 1.7478, "step": 14816 }, { "epoch": 0.723486328125, "grad_norm": 0.21241925656795502, "learning_rate": 0.00013191994296320698, "loss": 1.739, "step": 14817 }, { "epoch": 0.72353515625, "grad_norm": 0.29753777384757996, "learning_rate": 0.00013189291167033158, "loss": 1.7303, "step": 14818 }, { "epoch": 0.723583984375, "grad_norm": 0.20085279643535614, "learning_rate": 0.00013186588384577128, "loss": 1.7306, "step": 14819 }, { "epoch": 0.7236328125, "grad_norm": 0.2618711292743683, "learning_rate": 0.0001318388594901814, "loss": 1.7614, "step": 14820 }, { "epoch": 0.723681640625, "grad_norm": 0.19421029090881348, "learning_rate": 0.0001318118386042167, "loss": 1.7358, "step": 14821 }, { "epoch": 0.72373046875, "grad_norm": 0.2557908594608307, "learning_rate": 0.00013178482118853218, "loss": 1.7642, "step": 14822 }, { "epoch": 0.723779296875, "grad_norm": 0.22733046114444733, "learning_rate": 0.00013175780724378257, "loss": 1.7273, "step": 14823 }, { "epoch": 0.723828125, "grad_norm": 0.27094724774360657, "learning_rate": 0.00013173079677062253, "loss": 1.7385, "step": 14824 }, { "epoch": 0.723876953125, "grad_norm": 0.2407318651676178, "learning_rate": 0.00013170378976970677, "loss": 1.734, "step": 14825 }, { "epoch": 0.72392578125, "grad_norm": 0.23601357638835907, "learning_rate": 0.00013167678624168976, "loss": 1.7535, "step": 14826 }, { "epoch": 0.723974609375, "grad_norm": 0.22519488632678986, "learning_rate": 0.00013164978618722598, "loss": 1.7356, "step": 14827 }, { "epoch": 0.7240234375, "grad_norm": 0.22048816084861755, "learning_rate": 0.00013162278960696984, "loss": 1.7462, "step": 14828 }, { "epoch": 0.724072265625, "grad_norm": 0.22996143996715546, "learning_rate": 0.00013159579650157555, "loss": 1.7384, "step": 14829 }, { "epoch": 0.72412109375, "grad_norm": 0.24155566096305847, "learning_rate": 0.00013156880687169742, "loss": 1.7556, "step": 14830 }, { "epoch": 0.724169921875, "grad_norm": 0.23482732474803925, "learning_rate": 0.0001315418207179893, "loss": 1.7498, "step": 14831 }, { "epoch": 0.72421875, "grad_norm": 0.2390628308057785, "learning_rate": 0.00013151483804110564, "loss": 1.7268, "step": 14832 }, { "epoch": 0.724267578125, "grad_norm": 0.2734534740447998, "learning_rate": 0.00013148785884169994, "loss": 1.7533, "step": 14833 }, { "epoch": 0.72431640625, "grad_norm": 0.23535463213920593, "learning_rate": 0.00013146088312042647, "loss": 1.7648, "step": 14834 }, { "epoch": 0.724365234375, "grad_norm": 0.2705346941947937, "learning_rate": 0.00013143391087793861, "loss": 1.7455, "step": 14835 }, { "epoch": 0.7244140625, "grad_norm": 0.18693436682224274, "learning_rate": 0.00013140694211489045, "loss": 1.7351, "step": 14836 }, { "epoch": 0.724462890625, "grad_norm": 0.21001926064491272, "learning_rate": 0.0001313799768319353, "loss": 1.742, "step": 14837 }, { "epoch": 0.72451171875, "grad_norm": 0.20886564254760742, "learning_rate": 0.00013135301502972688, "loss": 1.7391, "step": 14838 }, { "epoch": 0.724560546875, "grad_norm": 0.20087455213069916, "learning_rate": 0.00013132605670891848, "loss": 1.74, "step": 14839 }, { "epoch": 0.724609375, "grad_norm": 0.21628445386886597, "learning_rate": 0.00013129910187016354, "loss": 1.7462, "step": 14840 }, { "epoch": 0.724658203125, "grad_norm": 0.18249250948429108, "learning_rate": 0.0001312721505141153, "loss": 1.7498, "step": 14841 }, { "epoch": 0.72470703125, "grad_norm": 0.2079029381275177, "learning_rate": 0.00013124520264142696, "loss": 1.7454, "step": 14842 }, { "epoch": 0.724755859375, "grad_norm": 0.18631532788276672, "learning_rate": 0.00013121825825275162, "loss": 1.725, "step": 14843 }, { "epoch": 0.7248046875, "grad_norm": 0.24173040688037872, "learning_rate": 0.00013119131734874235, "loss": 1.7412, "step": 14844 }, { "epoch": 0.724853515625, "grad_norm": 0.1893642395734787, "learning_rate": 0.00013116437993005198, "loss": 1.7779, "step": 14845 }, { "epoch": 0.72490234375, "grad_norm": 0.2031894326210022, "learning_rate": 0.00013113744599733352, "loss": 1.743, "step": 14846 }, { "epoch": 0.724951171875, "grad_norm": 0.20225253701210022, "learning_rate": 0.00013111051555123949, "loss": 1.74, "step": 14847 }, { "epoch": 0.725, "grad_norm": 0.2154790163040161, "learning_rate": 0.00013108358859242287, "loss": 1.7248, "step": 14848 }, { "epoch": 0.725048828125, "grad_norm": 0.19677022099494934, "learning_rate": 0.00013105666512153592, "loss": 1.7503, "step": 14849 }, { "epoch": 0.72509765625, "grad_norm": 0.20723210275173187, "learning_rate": 0.0001310297451392315, "loss": 1.7522, "step": 14850 }, { "epoch": 0.725146484375, "grad_norm": 0.17566350102424622, "learning_rate": 0.00013100282864616172, "loss": 1.7399, "step": 14851 }, { "epoch": 0.7251953125, "grad_norm": 0.1910714954137802, "learning_rate": 0.00013097591564297924, "loss": 1.7547, "step": 14852 }, { "epoch": 0.725244140625, "grad_norm": 0.2034464329481125, "learning_rate": 0.000130949006130336, "loss": 1.72, "step": 14853 }, { "epoch": 0.72529296875, "grad_norm": 0.18172448873519897, "learning_rate": 0.00013092210010888448, "loss": 1.7358, "step": 14854 }, { "epoch": 0.725341796875, "grad_norm": 0.20762023329734802, "learning_rate": 0.00013089519757927652, "loss": 1.7724, "step": 14855 }, { "epoch": 0.725390625, "grad_norm": 0.19677546620368958, "learning_rate": 0.00013086829854216424, "loss": 1.7495, "step": 14856 }, { "epoch": 0.725439453125, "grad_norm": 0.24558648467063904, "learning_rate": 0.00013084140299819948, "loss": 1.7266, "step": 14857 }, { "epoch": 0.72548828125, "grad_norm": 0.19248703122138977, "learning_rate": 0.0001308145109480342, "loss": 1.7585, "step": 14858 }, { "epoch": 0.725537109375, "grad_norm": 0.2554747462272644, "learning_rate": 0.00013078762239232005, "loss": 1.7119, "step": 14859 }, { "epoch": 0.7255859375, "grad_norm": 0.2057848423719406, "learning_rate": 0.0001307607373317088, "loss": 1.7427, "step": 14860 }, { "epoch": 0.725634765625, "grad_norm": 0.2151692807674408, "learning_rate": 0.00013073385576685187, "loss": 1.7821, "step": 14861 }, { "epoch": 0.72568359375, "grad_norm": 0.2439451515674591, "learning_rate": 0.0001307069776984009, "loss": 1.7576, "step": 14862 }, { "epoch": 0.725732421875, "grad_norm": 0.21748791635036469, "learning_rate": 0.00013068010312700722, "loss": 1.7672, "step": 14863 }, { "epoch": 0.72578125, "grad_norm": 0.2558785676956177, "learning_rate": 0.00013065323205332222, "loss": 1.7439, "step": 14864 }, { "epoch": 0.725830078125, "grad_norm": 0.23601239919662476, "learning_rate": 0.00013062636447799705, "loss": 1.7575, "step": 14865 }, { "epoch": 0.72587890625, "grad_norm": 0.188508540391922, "learning_rate": 0.00013059950040168305, "loss": 1.7214, "step": 14866 }, { "epoch": 0.725927734375, "grad_norm": 0.24359866976737976, "learning_rate": 0.000130572639825031, "loss": 1.7751, "step": 14867 }, { "epoch": 0.7259765625, "grad_norm": 0.1760488897562027, "learning_rate": 0.00013054578274869223, "loss": 1.7534, "step": 14868 }, { "epoch": 0.726025390625, "grad_norm": 0.21282336115837097, "learning_rate": 0.0001305189291733173, "loss": 1.739, "step": 14869 }, { "epoch": 0.72607421875, "grad_norm": 0.19918224215507507, "learning_rate": 0.00013049207909955735, "loss": 1.748, "step": 14870 }, { "epoch": 0.726123046875, "grad_norm": 0.21449680626392365, "learning_rate": 0.00013046523252806285, "loss": 1.7303, "step": 14871 }, { "epoch": 0.726171875, "grad_norm": 0.23034384846687317, "learning_rate": 0.0001304383894594846, "loss": 1.7257, "step": 14872 }, { "epoch": 0.726220703125, "grad_norm": 0.1939050853252411, "learning_rate": 0.00013041154989447312, "loss": 1.7572, "step": 14873 }, { "epoch": 0.72626953125, "grad_norm": 0.2450093924999237, "learning_rate": 0.0001303847138336789, "loss": 1.7649, "step": 14874 }, { "epoch": 0.726318359375, "grad_norm": 0.2260316163301468, "learning_rate": 0.00013035788127775227, "loss": 1.7837, "step": 14875 }, { "epoch": 0.7263671875, "grad_norm": 0.24198253452777863, "learning_rate": 0.00013033105222734365, "loss": 1.7488, "step": 14876 }, { "epoch": 0.726416015625, "grad_norm": 0.22850985825061798, "learning_rate": 0.00013030422668310315, "loss": 1.7623, "step": 14877 }, { "epoch": 0.72646484375, "grad_norm": 0.21295222640037537, "learning_rate": 0.000130277404645681, "loss": 1.7363, "step": 14878 }, { "epoch": 0.726513671875, "grad_norm": 0.2078402191400528, "learning_rate": 0.0001302505861157272, "loss": 1.7221, "step": 14879 }, { "epoch": 0.7265625, "grad_norm": 0.2277650088071823, "learning_rate": 0.00013022377109389179, "loss": 1.7394, "step": 14880 }, { "epoch": 0.726611328125, "grad_norm": 0.24543726444244385, "learning_rate": 0.00013019695958082453, "loss": 1.7425, "step": 14881 }, { "epoch": 0.72666015625, "grad_norm": 0.21950510144233704, "learning_rate": 0.0001301701515771753, "loss": 1.7604, "step": 14882 }, { "epoch": 0.726708984375, "grad_norm": 0.21808138489723206, "learning_rate": 0.00013014334708359382, "loss": 1.7249, "step": 14883 }, { "epoch": 0.7267578125, "grad_norm": 0.22404222190380096, "learning_rate": 0.0001301165461007297, "loss": 1.7649, "step": 14884 }, { "epoch": 0.726806640625, "grad_norm": 0.2046983689069748, "learning_rate": 0.0001300897486292325, "loss": 1.7494, "step": 14885 }, { "epoch": 0.72685546875, "grad_norm": 0.22647999227046967, "learning_rate": 0.00013006295466975171, "loss": 1.7747, "step": 14886 }, { "epoch": 0.726904296875, "grad_norm": 0.21100768446922302, "learning_rate": 0.00013003616422293648, "loss": 1.7524, "step": 14887 }, { "epoch": 0.726953125, "grad_norm": 0.2066110372543335, "learning_rate": 0.00013000937728943645, "loss": 1.7553, "step": 14888 }, { "epoch": 0.727001953125, "grad_norm": 0.2169870138168335, "learning_rate": 0.00012998259386990058, "loss": 1.7277, "step": 14889 }, { "epoch": 0.72705078125, "grad_norm": 0.22685779631137848, "learning_rate": 0.00012995581396497806, "loss": 1.7596, "step": 14890 }, { "epoch": 0.727099609375, "grad_norm": 0.2126341462135315, "learning_rate": 0.00012992903757531787, "loss": 1.7441, "step": 14891 }, { "epoch": 0.7271484375, "grad_norm": 0.21410813927650452, "learning_rate": 0.00012990226470156909, "loss": 1.7558, "step": 14892 }, { "epoch": 0.727197265625, "grad_norm": 0.2303977757692337, "learning_rate": 0.00012987549534438042, "loss": 1.7468, "step": 14893 }, { "epoch": 0.72724609375, "grad_norm": 0.19948385655879974, "learning_rate": 0.00012984872950440074, "loss": 1.7559, "step": 14894 }, { "epoch": 0.727294921875, "grad_norm": 0.21136924624443054, "learning_rate": 0.00012982196718227867, "loss": 1.7491, "step": 14895 }, { "epoch": 0.72734375, "grad_norm": 0.2161148339509964, "learning_rate": 0.00012979520837866294, "loss": 1.7378, "step": 14896 }, { "epoch": 0.727392578125, "grad_norm": 0.22472897171974182, "learning_rate": 0.00012976845309420194, "loss": 1.7508, "step": 14897 }, { "epoch": 0.72744140625, "grad_norm": 0.20445884764194489, "learning_rate": 0.00012974170132954417, "loss": 1.7514, "step": 14898 }, { "epoch": 0.727490234375, "grad_norm": 0.2231004536151886, "learning_rate": 0.00012971495308533796, "loss": 1.7212, "step": 14899 }, { "epoch": 0.7275390625, "grad_norm": 0.1838916689157486, "learning_rate": 0.00012968820836223156, "loss": 1.7416, "step": 14900 }, { "epoch": 0.727587890625, "grad_norm": 0.21335865557193756, "learning_rate": 0.0001296614671608732, "loss": 1.7595, "step": 14901 }, { "epoch": 0.72763671875, "grad_norm": 0.19787177443504333, "learning_rate": 0.00012963472948191097, "loss": 1.7316, "step": 14902 }, { "epoch": 0.727685546875, "grad_norm": 0.22075308859348297, "learning_rate": 0.00012960799532599283, "loss": 1.7263, "step": 14903 }, { "epoch": 0.727734375, "grad_norm": 0.20414048433303833, "learning_rate": 0.00012958126469376675, "loss": 1.7558, "step": 14904 }, { "epoch": 0.727783203125, "grad_norm": 0.25564995408058167, "learning_rate": 0.00012955453758588058, "loss": 1.7281, "step": 14905 }, { "epoch": 0.72783203125, "grad_norm": 0.1789318025112152, "learning_rate": 0.00012952781400298198, "loss": 1.7411, "step": 14906 }, { "epoch": 0.727880859375, "grad_norm": 0.26929745078086853, "learning_rate": 0.0001295010939457187, "loss": 1.7279, "step": 14907 }, { "epoch": 0.7279296875, "grad_norm": 0.20028531551361084, "learning_rate": 0.00012947437741473827, "loss": 1.7592, "step": 14908 }, { "epoch": 0.727978515625, "grad_norm": 0.2654573619365692, "learning_rate": 0.0001294476644106883, "loss": 1.755, "step": 14909 }, { "epoch": 0.72802734375, "grad_norm": 0.18401505053043365, "learning_rate": 0.00012942095493421603, "loss": 1.7263, "step": 14910 }, { "epoch": 0.728076171875, "grad_norm": 0.23807014524936676, "learning_rate": 0.0001293942489859689, "loss": 1.7423, "step": 14911 }, { "epoch": 0.728125, "grad_norm": 0.2386295348405838, "learning_rate": 0.00012936754656659415, "loss": 1.7497, "step": 14912 }, { "epoch": 0.728173828125, "grad_norm": 0.189935103058815, "learning_rate": 0.00012934084767673884, "loss": 1.7666, "step": 14913 }, { "epoch": 0.72822265625, "grad_norm": 0.2393283247947693, "learning_rate": 0.00012931415231705017, "loss": 1.7256, "step": 14914 }, { "epoch": 0.728271484375, "grad_norm": 0.20182368159294128, "learning_rate": 0.00012928746048817503, "loss": 1.7484, "step": 14915 }, { "epoch": 0.7283203125, "grad_norm": 0.2202838510274887, "learning_rate": 0.00012926077219076038, "loss": 1.7493, "step": 14916 }, { "epoch": 0.728369140625, "grad_norm": 0.21040193736553192, "learning_rate": 0.000129234087425453, "loss": 1.7445, "step": 14917 }, { "epoch": 0.72841796875, "grad_norm": 0.20473600924015045, "learning_rate": 0.00012920740619289957, "loss": 1.7387, "step": 14918 }, { "epoch": 0.728466796875, "grad_norm": 0.2210513800382614, "learning_rate": 0.0001291807284937468, "loss": 1.728, "step": 14919 }, { "epoch": 0.728515625, "grad_norm": 0.23052282631397247, "learning_rate": 0.00012915405432864125, "loss": 1.7453, "step": 14920 }, { "epoch": 0.728564453125, "grad_norm": 0.22208446264266968, "learning_rate": 0.00012912738369822934, "loss": 1.7298, "step": 14921 }, { "epoch": 0.72861328125, "grad_norm": 0.21521367132663727, "learning_rate": 0.00012910071660315753, "loss": 1.7374, "step": 14922 }, { "epoch": 0.728662109375, "grad_norm": 0.21655452251434326, "learning_rate": 0.0001290740530440721, "loss": 1.7246, "step": 14923 }, { "epoch": 0.7287109375, "grad_norm": 0.19014514982700348, "learning_rate": 0.00012904739302161907, "loss": 1.7281, "step": 14924 }, { "epoch": 0.728759765625, "grad_norm": 0.19823597371578217, "learning_rate": 0.0001290207365364449, "loss": 1.7131, "step": 14925 }, { "epoch": 0.72880859375, "grad_norm": 0.1977771818637848, "learning_rate": 0.00012899408358919536, "loss": 1.7354, "step": 14926 }, { "epoch": 0.728857421875, "grad_norm": 0.18815496563911438, "learning_rate": 0.00012896743418051653, "loss": 1.758, "step": 14927 }, { "epoch": 0.72890625, "grad_norm": 0.21757054328918457, "learning_rate": 0.00012894078831105426, "loss": 1.7261, "step": 14928 }, { "epoch": 0.728955078125, "grad_norm": 0.19210577011108398, "learning_rate": 0.0001289141459814543, "loss": 1.7529, "step": 14929 }, { "epoch": 0.72900390625, "grad_norm": 0.23139964044094086, "learning_rate": 0.00012888750719236238, "loss": 1.7141, "step": 14930 }, { "epoch": 0.729052734375, "grad_norm": 0.1869451105594635, "learning_rate": 0.00012886087194442412, "loss": 1.7757, "step": 14931 }, { "epoch": 0.7291015625, "grad_norm": 0.2278963029384613, "learning_rate": 0.00012883424023828506, "loss": 1.7461, "step": 14932 }, { "epoch": 0.729150390625, "grad_norm": 0.22543779015541077, "learning_rate": 0.00012880761207459057, "loss": 1.754, "step": 14933 }, { "epoch": 0.72919921875, "grad_norm": 0.22583986818790436, "learning_rate": 0.00012878098745398606, "loss": 1.7228, "step": 14934 }, { "epoch": 0.729248046875, "grad_norm": 0.24710305035114288, "learning_rate": 0.0001287543663771168, "loss": 1.7339, "step": 14935 }, { "epoch": 0.729296875, "grad_norm": 0.21289359033107758, "learning_rate": 0.00012872774884462797, "loss": 1.7441, "step": 14936 }, { "epoch": 0.729345703125, "grad_norm": 0.21229811012744904, "learning_rate": 0.00012870113485716463, "loss": 1.7677, "step": 14937 }, { "epoch": 0.72939453125, "grad_norm": 0.22981438040733337, "learning_rate": 0.00012867452441537187, "loss": 1.7569, "step": 14938 }, { "epoch": 0.729443359375, "grad_norm": 0.22167368233203888, "learning_rate": 0.00012864791751989463, "loss": 1.7602, "step": 14939 }, { "epoch": 0.7294921875, "grad_norm": 0.2241816520690918, "learning_rate": 0.0001286213141713775, "loss": 1.7351, "step": 14940 }, { "epoch": 0.729541015625, "grad_norm": 0.20157334208488464, "learning_rate": 0.0001285947143704656, "loss": 1.7708, "step": 14941 }, { "epoch": 0.72958984375, "grad_norm": 0.22138743102550507, "learning_rate": 0.0001285681181178033, "loss": 1.7537, "step": 14942 }, { "epoch": 0.729638671875, "grad_norm": 0.21362482011318207, "learning_rate": 0.0001285415254140354, "loss": 1.7405, "step": 14943 }, { "epoch": 0.7296875, "grad_norm": 0.1898592859506607, "learning_rate": 0.00012851493625980616, "loss": 1.763, "step": 14944 }, { "epoch": 0.729736328125, "grad_norm": 0.2063257396221161, "learning_rate": 0.00012848835065576027, "loss": 1.7201, "step": 14945 }, { "epoch": 0.72978515625, "grad_norm": 0.23048505187034607, "learning_rate": 0.00012846176860254182, "loss": 1.7491, "step": 14946 }, { "epoch": 0.729833984375, "grad_norm": 0.20046953856945038, "learning_rate": 0.00012843519010079512, "loss": 1.7573, "step": 14947 }, { "epoch": 0.7298828125, "grad_norm": 0.21834565699100494, "learning_rate": 0.0001284086151511644, "loss": 1.748, "step": 14948 }, { "epoch": 0.729931640625, "grad_norm": 0.2165141999721527, "learning_rate": 0.00012838204375429364, "loss": 1.7308, "step": 14949 }, { "epoch": 0.72998046875, "grad_norm": 0.20308928191661835, "learning_rate": 0.0001283554759108268, "loss": 1.7524, "step": 14950 }, { "epoch": 0.730029296875, "grad_norm": 0.2648589015007019, "learning_rate": 0.00012832891162140787, "loss": 1.7591, "step": 14951 }, { "epoch": 0.730078125, "grad_norm": 0.19359217584133148, "learning_rate": 0.00012830235088668058, "loss": 1.7282, "step": 14952 }, { "epoch": 0.730126953125, "grad_norm": 0.2603549659252167, "learning_rate": 0.00012827579370728866, "loss": 1.7333, "step": 14953 }, { "epoch": 0.73017578125, "grad_norm": 0.18224720656871796, "learning_rate": 0.00012824924008387572, "loss": 1.763, "step": 14954 }, { "epoch": 0.730224609375, "grad_norm": 0.2466810792684555, "learning_rate": 0.00012822269001708554, "loss": 1.7294, "step": 14955 }, { "epoch": 0.7302734375, "grad_norm": 0.22407375276088715, "learning_rate": 0.00012819614350756116, "loss": 1.7631, "step": 14956 }, { "epoch": 0.730322265625, "grad_norm": 0.26837626099586487, "learning_rate": 0.0001281696005559463, "loss": 1.765, "step": 14957 }, { "epoch": 0.73037109375, "grad_norm": 0.23868754506111145, "learning_rate": 0.00012814306116288405, "loss": 1.7593, "step": 14958 }, { "epoch": 0.730419921875, "grad_norm": 0.25213876366615295, "learning_rate": 0.00012811652532901779, "loss": 1.7515, "step": 14959 }, { "epoch": 0.73046875, "grad_norm": 0.22725744545459747, "learning_rate": 0.0001280899930549904, "loss": 1.7271, "step": 14960 }, { "epoch": 0.730517578125, "grad_norm": 0.25383028388023376, "learning_rate": 0.00012806346434144521, "loss": 1.7654, "step": 14961 }, { "epoch": 0.73056640625, "grad_norm": 0.2386954426765442, "learning_rate": 0.0001280369391890249, "loss": 1.7376, "step": 14962 }, { "epoch": 0.730615234375, "grad_norm": 0.25330090522766113, "learning_rate": 0.00012801041759837256, "loss": 1.7525, "step": 14963 }, { "epoch": 0.7306640625, "grad_norm": 0.2281406968832016, "learning_rate": 0.00012798389957013076, "loss": 1.7466, "step": 14964 }, { "epoch": 0.730712890625, "grad_norm": 0.23718707263469696, "learning_rate": 0.00012795738510494226, "loss": 1.7471, "step": 14965 }, { "epoch": 0.73076171875, "grad_norm": 0.23985743522644043, "learning_rate": 0.00012793087420344968, "loss": 1.778, "step": 14966 }, { "epoch": 0.730810546875, "grad_norm": 0.21626046299934387, "learning_rate": 0.00012790436686629554, "loss": 1.7508, "step": 14967 }, { "epoch": 0.730859375, "grad_norm": 0.23909997940063477, "learning_rate": 0.0001278778630941222, "loss": 1.7216, "step": 14968 }, { "epoch": 0.730908203125, "grad_norm": 0.21771562099456787, "learning_rate": 0.00012785136288757205, "loss": 1.75, "step": 14969 }, { "epoch": 0.73095703125, "grad_norm": 0.19475311040878296, "learning_rate": 0.00012782486624728735, "loss": 1.7444, "step": 14970 }, { "epoch": 0.731005859375, "grad_norm": 0.24368631839752197, "learning_rate": 0.00012779837317391024, "loss": 1.7469, "step": 14971 }, { "epoch": 0.7310546875, "grad_norm": 0.23738527297973633, "learning_rate": 0.00012777188366808287, "loss": 1.7345, "step": 14972 }, { "epoch": 0.731103515625, "grad_norm": 0.20968596637248993, "learning_rate": 0.00012774539773044723, "loss": 1.7535, "step": 14973 }, { "epoch": 0.73115234375, "grad_norm": 0.1907137632369995, "learning_rate": 0.000127718915361645, "loss": 1.7828, "step": 14974 }, { "epoch": 0.731201171875, "grad_norm": 0.20302966237068176, "learning_rate": 0.00012769243656231833, "loss": 1.7376, "step": 14975 }, { "epoch": 0.73125, "grad_norm": 0.18835300207138062, "learning_rate": 0.00012766596133310866, "loss": 1.7315, "step": 14976 }, { "epoch": 0.731298828125, "grad_norm": 0.19810928404331207, "learning_rate": 0.000127639489674658, "loss": 1.7537, "step": 14977 }, { "epoch": 0.73134765625, "grad_norm": 0.18978284299373627, "learning_rate": 0.00012761302158760746, "loss": 1.7458, "step": 14978 }, { "epoch": 0.731396484375, "grad_norm": 0.2241039127111435, "learning_rate": 0.00012758655707259894, "loss": 1.7631, "step": 14979 }, { "epoch": 0.7314453125, "grad_norm": 0.17211008071899414, "learning_rate": 0.00012756009613027348, "loss": 1.7477, "step": 14980 }, { "epoch": 0.731494140625, "grad_norm": 0.20987160503864288, "learning_rate": 0.00012753363876127273, "loss": 1.7733, "step": 14981 }, { "epoch": 0.73154296875, "grad_norm": 0.19489778578281403, "learning_rate": 0.0001275071849662376, "loss": 1.734, "step": 14982 }, { "epoch": 0.731591796875, "grad_norm": 0.1728438287973404, "learning_rate": 0.00012748073474580935, "loss": 1.7325, "step": 14983 }, { "epoch": 0.731640625, "grad_norm": 0.23519641160964966, "learning_rate": 0.000127454288100629, "loss": 1.7534, "step": 14984 }, { "epoch": 0.731689453125, "grad_norm": 0.21184977889060974, "learning_rate": 0.00012742784503133752, "loss": 1.7449, "step": 14985 }, { "epoch": 0.73173828125, "grad_norm": 0.20469635725021362, "learning_rate": 0.00012740140553857575, "loss": 1.7554, "step": 14986 }, { "epoch": 0.731787109375, "grad_norm": 0.2457677721977234, "learning_rate": 0.00012737496962298455, "loss": 1.7431, "step": 14987 }, { "epoch": 0.7318359375, "grad_norm": 0.1821483075618744, "learning_rate": 0.00012734853728520453, "loss": 1.7367, "step": 14988 }, { "epoch": 0.731884765625, "grad_norm": 0.1968020498752594, "learning_rate": 0.00012732210852587638, "loss": 1.7487, "step": 14989 }, { "epoch": 0.73193359375, "grad_norm": 0.18087822198867798, "learning_rate": 0.0001272956833456405, "loss": 1.7365, "step": 14990 }, { "epoch": 0.731982421875, "grad_norm": 0.2129160314798355, "learning_rate": 0.00012726926174513748, "loss": 1.7563, "step": 14991 }, { "epoch": 0.73203125, "grad_norm": 0.18942146003246307, "learning_rate": 0.0001272428437250075, "loss": 1.7488, "step": 14992 }, { "epoch": 0.732080078125, "grad_norm": 0.2330494225025177, "learning_rate": 0.000127216429285891, "loss": 1.7443, "step": 14993 }, { "epoch": 0.73212890625, "grad_norm": 0.21128810942173004, "learning_rate": 0.00012719001842842793, "loss": 1.7594, "step": 14994 }, { "epoch": 0.732177734375, "grad_norm": 0.21690119802951813, "learning_rate": 0.00012716361115325863, "loss": 1.7454, "step": 14995 }, { "epoch": 0.7322265625, "grad_norm": 0.21600978076457977, "learning_rate": 0.0001271372074610229, "loss": 1.7521, "step": 14996 }, { "epoch": 0.732275390625, "grad_norm": 0.18959259986877441, "learning_rate": 0.00012711080735236085, "loss": 1.759, "step": 14997 }, { "epoch": 0.73232421875, "grad_norm": 0.2032066434621811, "learning_rate": 0.0001270844108279121, "loss": 1.7438, "step": 14998 }, { "epoch": 0.732373046875, "grad_norm": 0.1959209442138672, "learning_rate": 0.00012705801788831655, "loss": 1.7392, "step": 14999 }, { "epoch": 0.732421875, "grad_norm": 0.2126775085926056, "learning_rate": 0.00012703162853421374, "loss": 1.7529, "step": 15000 }, { "epoch": 0.732470703125, "grad_norm": 0.20460271835327148, "learning_rate": 0.0001270052427662433, "loss": 1.7275, "step": 15001 }, { "epoch": 0.73251953125, "grad_norm": 0.22313937544822693, "learning_rate": 0.00012697886058504474, "loss": 1.7435, "step": 15002 }, { "epoch": 0.732568359375, "grad_norm": 0.26241597533226013, "learning_rate": 0.00012695248199125737, "loss": 1.7465, "step": 15003 }, { "epoch": 0.7326171875, "grad_norm": 0.25147220492362976, "learning_rate": 0.00012692610698552056, "loss": 1.7451, "step": 15004 }, { "epoch": 0.732666015625, "grad_norm": 0.23829807341098785, "learning_rate": 0.00012689973556847351, "loss": 1.7596, "step": 15005 }, { "epoch": 0.73271484375, "grad_norm": 0.23543718457221985, "learning_rate": 0.0001268733677407553, "loss": 1.7332, "step": 15006 }, { "epoch": 0.732763671875, "grad_norm": 0.21333768963813782, "learning_rate": 0.00012684700350300515, "loss": 1.7402, "step": 15007 }, { "epoch": 0.7328125, "grad_norm": 0.19217465817928314, "learning_rate": 0.00012682064285586176, "loss": 1.7277, "step": 15008 }, { "epoch": 0.732861328125, "grad_norm": 0.21724030375480652, "learning_rate": 0.00012679428579996424, "loss": 1.7618, "step": 15009 }, { "epoch": 0.73291015625, "grad_norm": 0.20110970735549927, "learning_rate": 0.00012676793233595113, "loss": 1.7536, "step": 15010 }, { "epoch": 0.732958984375, "grad_norm": 0.22661079466342926, "learning_rate": 0.00012674158246446144, "loss": 1.7509, "step": 15011 }, { "epoch": 0.7330078125, "grad_norm": 0.2089833766222, "learning_rate": 0.00012671523618613346, "loss": 1.7303, "step": 15012 }, { "epoch": 0.733056640625, "grad_norm": 0.23245279490947723, "learning_rate": 0.000126688893501606, "loss": 1.7385, "step": 15013 }, { "epoch": 0.73310546875, "grad_norm": 0.23081374168395996, "learning_rate": 0.00012666255441151728, "loss": 1.7443, "step": 15014 }, { "epoch": 0.733154296875, "grad_norm": 0.188899964094162, "learning_rate": 0.00012663621891650573, "loss": 1.7632, "step": 15015 }, { "epoch": 0.733203125, "grad_norm": 0.20065490901470184, "learning_rate": 0.00012660988701720962, "loss": 1.7035, "step": 15016 }, { "epoch": 0.733251953125, "grad_norm": 0.2174440622329712, "learning_rate": 0.0001265835587142671, "loss": 1.7486, "step": 15017 }, { "epoch": 0.73330078125, "grad_norm": 0.20223429799079895, "learning_rate": 0.00012655723400831627, "loss": 1.7431, "step": 15018 }, { "epoch": 0.733349609375, "grad_norm": 0.20502141118049622, "learning_rate": 0.00012653091289999512, "loss": 1.7503, "step": 15019 }, { "epoch": 0.7333984375, "grad_norm": 0.2279554158449173, "learning_rate": 0.00012650459538994163, "loss": 1.7643, "step": 15020 }, { "epoch": 0.733447265625, "grad_norm": 0.21762681007385254, "learning_rate": 0.00012647828147879352, "loss": 1.7464, "step": 15021 }, { "epoch": 0.73349609375, "grad_norm": 0.19965951144695282, "learning_rate": 0.0001264519711671886, "loss": 1.7555, "step": 15022 }, { "epoch": 0.733544921875, "grad_norm": 0.1739526242017746, "learning_rate": 0.00012642566445576458, "loss": 1.7407, "step": 15023 }, { "epoch": 0.73359375, "grad_norm": 0.1873341202735901, "learning_rate": 0.00012639936134515876, "loss": 1.7343, "step": 15024 }, { "epoch": 0.733642578125, "grad_norm": 0.17731666564941406, "learning_rate": 0.000126373061836009, "loss": 1.7337, "step": 15025 }, { "epoch": 0.73369140625, "grad_norm": 0.18436524271965027, "learning_rate": 0.00012634676592895231, "loss": 1.7503, "step": 15026 }, { "epoch": 0.733740234375, "grad_norm": 0.19817398488521576, "learning_rate": 0.0001263204736246263, "loss": 1.7521, "step": 15027 }, { "epoch": 0.7337890625, "grad_norm": 0.17690446972846985, "learning_rate": 0.00012629418492366795, "loss": 1.7308, "step": 15028 }, { "epoch": 0.733837890625, "grad_norm": 0.19391761720180511, "learning_rate": 0.00012626789982671463, "loss": 1.7398, "step": 15029 }, { "epoch": 0.73388671875, "grad_norm": 0.1843273937702179, "learning_rate": 0.00012624161833440313, "loss": 1.762, "step": 15030 }, { "epoch": 0.733935546875, "grad_norm": 0.18471114337444305, "learning_rate": 0.00012621534044737062, "loss": 1.76, "step": 15031 }, { "epoch": 0.733984375, "grad_norm": 0.1959589421749115, "learning_rate": 0.00012618906616625384, "loss": 1.7602, "step": 15032 }, { "epoch": 0.734033203125, "grad_norm": 0.19589994847774506, "learning_rate": 0.00012616279549168959, "loss": 1.7576, "step": 15033 }, { "epoch": 0.73408203125, "grad_norm": 0.19763997197151184, "learning_rate": 0.0001261365284243145, "loss": 1.7376, "step": 15034 }, { "epoch": 0.734130859375, "grad_norm": 0.20624974370002747, "learning_rate": 0.00012611026496476532, "loss": 1.7338, "step": 15035 }, { "epoch": 0.7341796875, "grad_norm": 0.19306343793869019, "learning_rate": 0.00012608400511367846, "loss": 1.7487, "step": 15036 }, { "epoch": 0.734228515625, "grad_norm": 0.19827155768871307, "learning_rate": 0.00012605774887169036, "loss": 1.7553, "step": 15037 }, { "epoch": 0.73427734375, "grad_norm": 0.23356059193611145, "learning_rate": 0.00012603149623943737, "loss": 1.7394, "step": 15038 }, { "epoch": 0.734326171875, "grad_norm": 0.1865973025560379, "learning_rate": 0.0001260052472175558, "loss": 1.7555, "step": 15039 }, { "epoch": 0.734375, "grad_norm": 0.2220827341079712, "learning_rate": 0.0001259790018066817, "loss": 1.7509, "step": 15040 }, { "epoch": 0.734423828125, "grad_norm": 0.21095095574855804, "learning_rate": 0.00012595276000745132, "loss": 1.7418, "step": 15041 }, { "epoch": 0.73447265625, "grad_norm": 0.21971553564071655, "learning_rate": 0.0001259265218205004, "loss": 1.7409, "step": 15042 }, { "epoch": 0.734521484375, "grad_norm": 0.1794840693473816, "learning_rate": 0.00012590028724646514, "loss": 1.7646, "step": 15043 }, { "epoch": 0.7345703125, "grad_norm": 0.24138134717941284, "learning_rate": 0.00012587405628598102, "loss": 1.7305, "step": 15044 }, { "epoch": 0.734619140625, "grad_norm": 0.21858225762844086, "learning_rate": 0.00012584782893968412, "loss": 1.7518, "step": 15045 }, { "epoch": 0.73466796875, "grad_norm": 0.24041400849819183, "learning_rate": 0.0001258216052082098, "loss": 1.7492, "step": 15046 }, { "epoch": 0.734716796875, "grad_norm": 0.18530772626399994, "learning_rate": 0.00012579538509219378, "loss": 1.7658, "step": 15047 }, { "epoch": 0.734765625, "grad_norm": 0.27446961402893066, "learning_rate": 0.00012576916859227149, "loss": 1.7389, "step": 15048 }, { "epoch": 0.734814453125, "grad_norm": 0.20544523000717163, "learning_rate": 0.00012574295570907829, "loss": 1.729, "step": 15049 }, { "epoch": 0.73486328125, "grad_norm": 0.26179566979408264, "learning_rate": 0.00012571674644324944, "loss": 1.7537, "step": 15050 }, { "epoch": 0.734912109375, "grad_norm": 0.2012227177619934, "learning_rate": 0.00012569054079542016, "loss": 1.7643, "step": 15051 }, { "epoch": 0.7349609375, "grad_norm": 0.26488134264945984, "learning_rate": 0.00012566433876622557, "loss": 1.7396, "step": 15052 }, { "epoch": 0.735009765625, "grad_norm": 0.2137324959039688, "learning_rate": 0.00012563814035630074, "loss": 1.7564, "step": 15053 }, { "epoch": 0.73505859375, "grad_norm": 0.24446862936019897, "learning_rate": 0.00012561194556628058, "loss": 1.7394, "step": 15054 }, { "epoch": 0.735107421875, "grad_norm": 0.219169020652771, "learning_rate": 0.0001255857543967999, "loss": 1.7369, "step": 15055 }, { "epoch": 0.73515625, "grad_norm": 0.22221213579177856, "learning_rate": 0.0001255595668484935, "loss": 1.7315, "step": 15056 }, { "epoch": 0.735205078125, "grad_norm": 0.24710418283939362, "learning_rate": 0.0001255333829219962, "loss": 1.7332, "step": 15057 }, { "epoch": 0.73525390625, "grad_norm": 0.20796151459217072, "learning_rate": 0.00012550720261794222, "loss": 1.7319, "step": 15058 }, { "epoch": 0.735302734375, "grad_norm": 0.29782676696777344, "learning_rate": 0.00012548102593696648, "loss": 1.7577, "step": 15059 }, { "epoch": 0.7353515625, "grad_norm": 0.2055656611919403, "learning_rate": 0.00012545485287970297, "loss": 1.7376, "step": 15060 }, { "epoch": 0.735400390625, "grad_norm": 0.3349021077156067, "learning_rate": 0.0001254286834467865, "loss": 1.7321, "step": 15061 }, { "epoch": 0.73544921875, "grad_norm": 0.252930223941803, "learning_rate": 0.00012540251763885085, "loss": 1.754, "step": 15062 }, { "epoch": 0.735498046875, "grad_norm": 0.28600558638572693, "learning_rate": 0.00012537635545653054, "loss": 1.7322, "step": 15063 }, { "epoch": 0.735546875, "grad_norm": 0.22896753251552582, "learning_rate": 0.0001253501969004593, "loss": 1.7132, "step": 15064 }, { "epoch": 0.735595703125, "grad_norm": 0.292256236076355, "learning_rate": 0.00012532404197127138, "loss": 1.7444, "step": 15065 }, { "epoch": 0.73564453125, "grad_norm": 0.2261192947626114, "learning_rate": 0.00012529789066960054, "loss": 1.7851, "step": 15066 }, { "epoch": 0.735693359375, "grad_norm": 0.2428968846797943, "learning_rate": 0.00012527174299608058, "loss": 1.718, "step": 15067 }, { "epoch": 0.7357421875, "grad_norm": 0.2383478432893753, "learning_rate": 0.00012524559895134518, "loss": 1.7665, "step": 15068 }, { "epoch": 0.735791015625, "grad_norm": 0.2552802562713623, "learning_rate": 0.000125219458536028, "loss": 1.7458, "step": 15069 }, { "epoch": 0.73583984375, "grad_norm": 0.22315748035907745, "learning_rate": 0.0001251933217507626, "loss": 1.7233, "step": 15070 }, { "epoch": 0.735888671875, "grad_norm": 0.2187163233757019, "learning_rate": 0.00012516718859618238, "loss": 1.7543, "step": 15071 }, { "epoch": 0.7359375, "grad_norm": 0.22384023666381836, "learning_rate": 0.00012514105907292074, "loss": 1.7533, "step": 15072 }, { "epoch": 0.735986328125, "grad_norm": 0.22308239340782166, "learning_rate": 0.000125114933181611, "loss": 1.7559, "step": 15073 }, { "epoch": 0.73603515625, "grad_norm": 0.23408755660057068, "learning_rate": 0.0001250888109228861, "loss": 1.7472, "step": 15074 }, { "epoch": 0.736083984375, "grad_norm": 0.20762914419174194, "learning_rate": 0.0001250626922973795, "loss": 1.7605, "step": 15075 }, { "epoch": 0.7361328125, "grad_norm": 0.24490752816200256, "learning_rate": 0.00012503657730572382, "loss": 1.7208, "step": 15076 }, { "epoch": 0.736181640625, "grad_norm": 0.20941467583179474, "learning_rate": 0.0001250104659485523, "loss": 1.7256, "step": 15077 }, { "epoch": 0.73623046875, "grad_norm": 0.21143293380737305, "learning_rate": 0.00012498435822649756, "loss": 1.7567, "step": 15078 }, { "epoch": 0.736279296875, "grad_norm": 0.21475353837013245, "learning_rate": 0.0001249582541401925, "loss": 1.7413, "step": 15079 }, { "epoch": 0.736328125, "grad_norm": 0.22914928197860718, "learning_rate": 0.0001249321536902696, "loss": 1.7452, "step": 15080 }, { "epoch": 0.736376953125, "grad_norm": 0.19795742630958557, "learning_rate": 0.0001249060568773616, "loss": 1.747, "step": 15081 }, { "epoch": 0.73642578125, "grad_norm": 0.2257189154624939, "learning_rate": 0.0001248799637021009, "loss": 1.7636, "step": 15082 }, { "epoch": 0.736474609375, "grad_norm": 0.17986376583576202, "learning_rate": 0.00012485387416511984, "loss": 1.7412, "step": 15083 }, { "epoch": 0.7365234375, "grad_norm": 0.23735442757606506, "learning_rate": 0.00012482778826705077, "loss": 1.7497, "step": 15084 }, { "epoch": 0.736572265625, "grad_norm": 0.19534264504909515, "learning_rate": 0.0001248017060085259, "loss": 1.7107, "step": 15085 }, { "epoch": 0.73662109375, "grad_norm": 0.2147541046142578, "learning_rate": 0.00012477562739017736, "loss": 1.724, "step": 15086 }, { "epoch": 0.736669921875, "grad_norm": 0.18944454193115234, "learning_rate": 0.00012474955241263715, "loss": 1.753, "step": 15087 }, { "epoch": 0.73671875, "grad_norm": 0.20526708662509918, "learning_rate": 0.00012472348107653728, "loss": 1.7222, "step": 15088 }, { "epoch": 0.736767578125, "grad_norm": 0.22672942280769348, "learning_rate": 0.00012469741338250957, "loss": 1.7465, "step": 15089 }, { "epoch": 0.73681640625, "grad_norm": 0.18105483055114746, "learning_rate": 0.0001246713493311858, "loss": 1.7382, "step": 15090 }, { "epoch": 0.736865234375, "grad_norm": 0.24417459964752197, "learning_rate": 0.0001246452889231977, "loss": 1.7438, "step": 15091 }, { "epoch": 0.7369140625, "grad_norm": 0.18882006406784058, "learning_rate": 0.0001246192321591767, "loss": 1.7321, "step": 15092 }, { "epoch": 0.736962890625, "grad_norm": 0.2250267118215561, "learning_rate": 0.00012459317903975458, "loss": 1.7265, "step": 15093 }, { "epoch": 0.73701171875, "grad_norm": 0.24117180705070496, "learning_rate": 0.0001245671295655624, "loss": 1.7407, "step": 15094 }, { "epoch": 0.737060546875, "grad_norm": 0.21331240236759186, "learning_rate": 0.0001245410837372319, "loss": 1.7512, "step": 15095 }, { "epoch": 0.737109375, "grad_norm": 0.24001964926719666, "learning_rate": 0.00012451504155539393, "loss": 1.7265, "step": 15096 }, { "epoch": 0.737158203125, "grad_norm": 0.18509845435619354, "learning_rate": 0.00012448900302068, "loss": 1.7428, "step": 15097 }, { "epoch": 0.73720703125, "grad_norm": 0.24522943794727325, "learning_rate": 0.00012446296813372083, "loss": 1.7436, "step": 15098 }, { "epoch": 0.737255859375, "grad_norm": 0.18140868842601776, "learning_rate": 0.0001244369368951477, "loss": 1.7273, "step": 15099 }, { "epoch": 0.7373046875, "grad_norm": 0.22379200160503387, "learning_rate": 0.0001244109093055913, "loss": 1.7447, "step": 15100 }, { "epoch": 0.737353515625, "grad_norm": 0.19623300433158875, "learning_rate": 0.00012438488536568255, "loss": 1.7477, "step": 15101 }, { "epoch": 0.73740234375, "grad_norm": 0.25413498282432556, "learning_rate": 0.00012435886507605206, "loss": 1.7423, "step": 15102 }, { "epoch": 0.737451171875, "grad_norm": 0.22152197360992432, "learning_rate": 0.0001243328484373305, "loss": 1.7546, "step": 15103 }, { "epoch": 0.7375, "grad_norm": 0.22029277682304382, "learning_rate": 0.0001243068354501484, "loss": 1.7518, "step": 15104 }, { "epoch": 0.737548828125, "grad_norm": 0.21563848853111267, "learning_rate": 0.00012428082611513623, "loss": 1.7624, "step": 15105 }, { "epoch": 0.73759765625, "grad_norm": 0.2216986119747162, "learning_rate": 0.0001242548204329243, "loss": 1.7545, "step": 15106 }, { "epoch": 0.737646484375, "grad_norm": 0.19766949117183685, "learning_rate": 0.000124228818404143, "loss": 1.7667, "step": 15107 }, { "epoch": 0.7376953125, "grad_norm": 0.2007133811712265, "learning_rate": 0.00012420282002942223, "loss": 1.7545, "step": 15108 }, { "epoch": 0.737744140625, "grad_norm": 0.21795502305030823, "learning_rate": 0.00012417682530939243, "loss": 1.7729, "step": 15109 }, { "epoch": 0.73779296875, "grad_norm": 0.19770771265029907, "learning_rate": 0.0001241508342446833, "loss": 1.7563, "step": 15110 }, { "epoch": 0.737841796875, "grad_norm": 0.2133553922176361, "learning_rate": 0.00012412484683592504, "loss": 1.7416, "step": 15111 }, { "epoch": 0.737890625, "grad_norm": 0.22220584750175476, "learning_rate": 0.0001240988630837472, "loss": 1.7484, "step": 15112 }, { "epoch": 0.737939453125, "grad_norm": 0.2220803052186966, "learning_rate": 0.00012407288298877978, "loss": 1.746, "step": 15113 }, { "epoch": 0.73798828125, "grad_norm": 0.23351460695266724, "learning_rate": 0.00012404690655165213, "loss": 1.7402, "step": 15114 }, { "epoch": 0.738037109375, "grad_norm": 0.22567588090896606, "learning_rate": 0.00012402093377299414, "loss": 1.7378, "step": 15115 }, { "epoch": 0.7380859375, "grad_norm": 0.21551401913166046, "learning_rate": 0.00012399496465343503, "loss": 1.7294, "step": 15116 }, { "epoch": 0.738134765625, "grad_norm": 0.2030254304409027, "learning_rate": 0.00012396899919360428, "loss": 1.7462, "step": 15117 }, { "epoch": 0.73818359375, "grad_norm": 0.2269272357225418, "learning_rate": 0.00012394303739413114, "loss": 1.7494, "step": 15118 }, { "epoch": 0.738232421875, "grad_norm": 0.1930573582649231, "learning_rate": 0.00012391707925564488, "loss": 1.7354, "step": 15119 }, { "epoch": 0.73828125, "grad_norm": 0.2405630648136139, "learning_rate": 0.00012389112477877458, "loss": 1.7332, "step": 15120 }, { "epoch": 0.738330078125, "grad_norm": 0.21274268627166748, "learning_rate": 0.00012386517396414926, "loss": 1.7498, "step": 15121 }, { "epoch": 0.73837890625, "grad_norm": 0.23949934542179108, "learning_rate": 0.0001238392268123979, "loss": 1.7408, "step": 15122 }, { "epoch": 0.738427734375, "grad_norm": 0.1963912695646286, "learning_rate": 0.00012381328332414929, "loss": 1.724, "step": 15123 }, { "epoch": 0.7384765625, "grad_norm": 0.24133320152759552, "learning_rate": 0.0001237873435000322, "loss": 1.7601, "step": 15124 }, { "epoch": 0.738525390625, "grad_norm": 0.22524870932102203, "learning_rate": 0.00012376140734067536, "loss": 1.7774, "step": 15125 }, { "epoch": 0.73857421875, "grad_norm": 0.22095932066440582, "learning_rate": 0.0001237354748467073, "loss": 1.7427, "step": 15126 }, { "epoch": 0.738623046875, "grad_norm": 0.2027807980775833, "learning_rate": 0.0001237095460187566, "loss": 1.7521, "step": 15127 }, { "epoch": 0.738671875, "grad_norm": 0.1955127865076065, "learning_rate": 0.00012368362085745147, "loss": 1.7611, "step": 15128 }, { "epoch": 0.738720703125, "grad_norm": 0.1810222566127777, "learning_rate": 0.0001236576993634205, "loss": 1.7417, "step": 15129 }, { "epoch": 0.73876953125, "grad_norm": 0.1992330104112625, "learning_rate": 0.00012363178153729164, "loss": 1.7667, "step": 15130 }, { "epoch": 0.738818359375, "grad_norm": 0.19341939687728882, "learning_rate": 0.00012360586737969326, "loss": 1.7598, "step": 15131 }, { "epoch": 0.7388671875, "grad_norm": 0.18372894823551178, "learning_rate": 0.0001235799568912533, "loss": 1.7483, "step": 15132 }, { "epoch": 0.738916015625, "grad_norm": 0.19134700298309326, "learning_rate": 0.0001235540500725997, "loss": 1.772, "step": 15133 }, { "epoch": 0.73896484375, "grad_norm": 0.18100863695144653, "learning_rate": 0.00012352814692436035, "loss": 1.7621, "step": 15134 }, { "epoch": 0.739013671875, "grad_norm": 0.19107948243618011, "learning_rate": 0.00012350224744716304, "loss": 1.7267, "step": 15135 }, { "epoch": 0.7390625, "grad_norm": 0.2288815826177597, "learning_rate": 0.00012347635164163554, "loss": 1.7535, "step": 15136 }, { "epoch": 0.739111328125, "grad_norm": 0.21492339670658112, "learning_rate": 0.00012345045950840533, "loss": 1.7514, "step": 15137 }, { "epoch": 0.73916015625, "grad_norm": 0.2210037112236023, "learning_rate": 0.00012342457104810002, "loss": 1.7179, "step": 15138 }, { "epoch": 0.739208984375, "grad_norm": 0.21330828964710236, "learning_rate": 0.000123398686261347, "loss": 1.7268, "step": 15139 }, { "epoch": 0.7392578125, "grad_norm": 0.2230183631181717, "learning_rate": 0.00012337280514877362, "loss": 1.7206, "step": 15140 }, { "epoch": 0.739306640625, "grad_norm": 0.2315918505191803, "learning_rate": 0.00012334692771100712, "loss": 1.7178, "step": 15141 }, { "epoch": 0.73935546875, "grad_norm": 0.22698532044887543, "learning_rate": 0.00012332105394867465, "loss": 1.7503, "step": 15142 }, { "epoch": 0.739404296875, "grad_norm": 0.215286523103714, "learning_rate": 0.0001232951838624033, "loss": 1.7394, "step": 15143 }, { "epoch": 0.739453125, "grad_norm": 0.23044201731681824, "learning_rate": 0.00012326931745282004, "loss": 1.7483, "step": 15144 }, { "epoch": 0.739501953125, "grad_norm": 0.2589070498943329, "learning_rate": 0.00012324345472055178, "loss": 1.7298, "step": 15145 }, { "epoch": 0.73955078125, "grad_norm": 0.21113605797290802, "learning_rate": 0.00012321759566622534, "loss": 1.7199, "step": 15146 }, { "epoch": 0.739599609375, "grad_norm": 0.2342015951871872, "learning_rate": 0.00012319174029046744, "loss": 1.7424, "step": 15147 }, { "epoch": 0.7396484375, "grad_norm": 0.23888856172561646, "learning_rate": 0.00012316588859390457, "loss": 1.728, "step": 15148 }, { "epoch": 0.739697265625, "grad_norm": 0.23069991171360016, "learning_rate": 0.00012314004057716347, "loss": 1.7337, "step": 15149 }, { "epoch": 0.73974609375, "grad_norm": 0.2452702671289444, "learning_rate": 0.00012311419624087046, "loss": 1.7498, "step": 15150 }, { "epoch": 0.739794921875, "grad_norm": 0.2014397829771042, "learning_rate": 0.0001230883555856519, "loss": 1.7494, "step": 15151 }, { "epoch": 0.73984375, "grad_norm": 0.24406561255455017, "learning_rate": 0.00012306251861213412, "loss": 1.7119, "step": 15152 }, { "epoch": 0.739892578125, "grad_norm": 0.2179674506187439, "learning_rate": 0.00012303668532094323, "loss": 1.7439, "step": 15153 }, { "epoch": 0.73994140625, "grad_norm": 0.22541993856430054, "learning_rate": 0.0001230108557127054, "loss": 1.7649, "step": 15154 }, { "epoch": 0.739990234375, "grad_norm": 0.22225390374660492, "learning_rate": 0.00012298502978804653, "loss": 1.7616, "step": 15155 }, { "epoch": 0.7400390625, "grad_norm": 0.24524275958538055, "learning_rate": 0.00012295920754759262, "loss": 1.7498, "step": 15156 }, { "epoch": 0.740087890625, "grad_norm": 0.20264093577861786, "learning_rate": 0.00012293338899196948, "loss": 1.7309, "step": 15157 }, { "epoch": 0.74013671875, "grad_norm": 0.2479439079761505, "learning_rate": 0.00012290757412180281, "loss": 1.7651, "step": 15158 }, { "epoch": 0.740185546875, "grad_norm": 0.20327623188495636, "learning_rate": 0.00012288176293771827, "loss": 1.7317, "step": 15159 }, { "epoch": 0.740234375, "grad_norm": 0.25272154808044434, "learning_rate": 0.00012285595544034143, "loss": 1.7456, "step": 15160 }, { "epoch": 0.740283203125, "grad_norm": 0.2397030144929886, "learning_rate": 0.00012283015163029774, "loss": 1.7395, "step": 15161 }, { "epoch": 0.74033203125, "grad_norm": 0.23132939636707306, "learning_rate": 0.00012280435150821255, "loss": 1.7762, "step": 15162 }, { "epoch": 0.740380859375, "grad_norm": 0.25479909777641296, "learning_rate": 0.0001227785550747112, "loss": 1.7642, "step": 15163 }, { "epoch": 0.7404296875, "grad_norm": 0.22357048094272614, "learning_rate": 0.00012275276233041885, "loss": 1.7431, "step": 15164 }, { "epoch": 0.740478515625, "grad_norm": 0.2217922955751419, "learning_rate": 0.00012272697327596067, "loss": 1.7153, "step": 15165 }, { "epoch": 0.74052734375, "grad_norm": 0.20113012194633484, "learning_rate": 0.00012270118791196162, "loss": 1.769, "step": 15166 }, { "epoch": 0.740576171875, "grad_norm": 0.23615866899490356, "learning_rate": 0.00012267540623904657, "loss": 1.7384, "step": 15167 }, { "epoch": 0.740625, "grad_norm": 0.22188915312290192, "learning_rate": 0.00012264962825784046, "loss": 1.7501, "step": 15168 }, { "epoch": 0.740673828125, "grad_norm": 0.19923818111419678, "learning_rate": 0.000122623853968968, "loss": 1.7639, "step": 15169 }, { "epoch": 0.74072265625, "grad_norm": 0.23713575303554535, "learning_rate": 0.00012259808337305384, "loss": 1.7495, "step": 15170 }, { "epoch": 0.740771484375, "grad_norm": 0.20549152791500092, "learning_rate": 0.0001225723164707226, "loss": 1.7455, "step": 15171 }, { "epoch": 0.7408203125, "grad_norm": 0.26200518012046814, "learning_rate": 0.0001225465532625987, "loss": 1.7634, "step": 15172 }, { "epoch": 0.740869140625, "grad_norm": 0.23241187632083893, "learning_rate": 0.00012252079374930658, "loss": 1.7095, "step": 15173 }, { "epoch": 0.74091796875, "grad_norm": 0.22150254249572754, "learning_rate": 0.0001224950379314705, "loss": 1.7591, "step": 15174 }, { "epoch": 0.740966796875, "grad_norm": 0.25360220670700073, "learning_rate": 0.00012246928580971473, "loss": 1.7457, "step": 15175 }, { "epoch": 0.741015625, "grad_norm": 0.2301284521818161, "learning_rate": 0.00012244353738466335, "loss": 1.735, "step": 15176 }, { "epoch": 0.741064453125, "grad_norm": 0.23354096710681915, "learning_rate": 0.0001224177926569404, "loss": 1.7505, "step": 15177 }, { "epoch": 0.74111328125, "grad_norm": 0.22518694400787354, "learning_rate": 0.00012239205162716983, "loss": 1.7402, "step": 15178 }, { "epoch": 0.741162109375, "grad_norm": 0.18893247842788696, "learning_rate": 0.00012236631429597548, "loss": 1.7107, "step": 15179 }, { "epoch": 0.7412109375, "grad_norm": 0.23070663213729858, "learning_rate": 0.00012234058066398114, "loss": 1.7387, "step": 15180 }, { "epoch": 0.741259765625, "grad_norm": 0.19481536746025085, "learning_rate": 0.00012231485073181049, "loss": 1.7577, "step": 15181 }, { "epoch": 0.74130859375, "grad_norm": 0.2220422625541687, "learning_rate": 0.00012228912450008706, "loss": 1.7256, "step": 15182 }, { "epoch": 0.741357421875, "grad_norm": 0.19400516152381897, "learning_rate": 0.00012226340196943441, "loss": 1.7509, "step": 15183 }, { "epoch": 0.74140625, "grad_norm": 0.23427137732505798, "learning_rate": 0.000122237683140476, "loss": 1.753, "step": 15184 }, { "epoch": 0.741455078125, "grad_norm": 0.20250120759010315, "learning_rate": 0.00012221196801383497, "loss": 1.7251, "step": 15185 }, { "epoch": 0.74150390625, "grad_norm": 0.23580721020698547, "learning_rate": 0.00012218625659013465, "loss": 1.748, "step": 15186 }, { "epoch": 0.741552734375, "grad_norm": 0.1978321671485901, "learning_rate": 0.00012216054886999818, "loss": 1.725, "step": 15187 }, { "epoch": 0.7416015625, "grad_norm": 0.20656587183475494, "learning_rate": 0.0001221348448540486, "loss": 1.7074, "step": 15188 }, { "epoch": 0.741650390625, "grad_norm": 0.22677700221538544, "learning_rate": 0.00012210914454290886, "loss": 1.7431, "step": 15189 }, { "epoch": 0.74169921875, "grad_norm": 0.24433086812496185, "learning_rate": 0.0001220834479372018, "loss": 1.7408, "step": 15190 }, { "epoch": 0.741748046875, "grad_norm": 0.2067285180091858, "learning_rate": 0.00012205775503755028, "loss": 1.7767, "step": 15191 }, { "epoch": 0.741796875, "grad_norm": 0.24222531914710999, "learning_rate": 0.00012203206584457692, "loss": 1.7585, "step": 15192 }, { "epoch": 0.741845703125, "grad_norm": 0.2476406842470169, "learning_rate": 0.00012200638035890436, "loss": 1.7336, "step": 15193 }, { "epoch": 0.74189453125, "grad_norm": 0.21336862444877625, "learning_rate": 0.00012198069858115508, "loss": 1.7634, "step": 15194 }, { "epoch": 0.741943359375, "grad_norm": 0.23436470329761505, "learning_rate": 0.00012195502051195147, "loss": 1.7351, "step": 15195 }, { "epoch": 0.7419921875, "grad_norm": 0.20943747460842133, "learning_rate": 0.0001219293461519159, "loss": 1.7391, "step": 15196 }, { "epoch": 0.742041015625, "grad_norm": 0.2327640950679779, "learning_rate": 0.00012190367550167065, "loss": 1.751, "step": 15197 }, { "epoch": 0.74208984375, "grad_norm": 0.19971677660942078, "learning_rate": 0.00012187800856183775, "loss": 1.7577, "step": 15198 }, { "epoch": 0.742138671875, "grad_norm": 0.21318651735782623, "learning_rate": 0.0001218523453330394, "loss": 1.7301, "step": 15199 }, { "epoch": 0.7421875, "grad_norm": 0.20745126903057098, "learning_rate": 0.00012182668581589752, "loss": 1.7283, "step": 15200 }, { "epoch": 0.742236328125, "grad_norm": 0.20984894037246704, "learning_rate": 0.00012180103001103384, "loss": 1.7449, "step": 15201 }, { "epoch": 0.74228515625, "grad_norm": 0.20866701006889343, "learning_rate": 0.0001217753779190704, "loss": 1.7484, "step": 15202 }, { "epoch": 0.742333984375, "grad_norm": 0.21099750697612762, "learning_rate": 0.00012174972954062866, "loss": 1.7286, "step": 15203 }, { "epoch": 0.7423828125, "grad_norm": 0.20395483076572418, "learning_rate": 0.00012172408487633051, "loss": 1.7425, "step": 15204 }, { "epoch": 0.742431640625, "grad_norm": 0.19856925308704376, "learning_rate": 0.00012169844392679719, "loss": 1.7425, "step": 15205 }, { "epoch": 0.74248046875, "grad_norm": 0.22036148607730865, "learning_rate": 0.00012167280669265026, "loss": 1.7349, "step": 15206 }, { "epoch": 0.742529296875, "grad_norm": 0.2319817990064621, "learning_rate": 0.00012164717317451102, "loss": 1.7342, "step": 15207 }, { "epoch": 0.742578125, "grad_norm": 0.20080314576625824, "learning_rate": 0.00012162154337300076, "loss": 1.7258, "step": 15208 }, { "epoch": 0.742626953125, "grad_norm": 0.18623250722885132, "learning_rate": 0.0001215959172887406, "loss": 1.7473, "step": 15209 }, { "epoch": 0.74267578125, "grad_norm": 0.20477263629436493, "learning_rate": 0.00012157029492235167, "loss": 1.7637, "step": 15210 }, { "epoch": 0.742724609375, "grad_norm": 0.18406157195568085, "learning_rate": 0.00012154467627445487, "loss": 1.7486, "step": 15211 }, { "epoch": 0.7427734375, "grad_norm": 0.18923115730285645, "learning_rate": 0.00012151906134567114, "loss": 1.7599, "step": 15212 }, { "epoch": 0.742822265625, "grad_norm": 0.2215563803911209, "learning_rate": 0.00012149345013662125, "loss": 1.758, "step": 15213 }, { "epoch": 0.74287109375, "grad_norm": 0.18675976991653442, "learning_rate": 0.00012146784264792594, "loss": 1.7357, "step": 15214 }, { "epoch": 0.742919921875, "grad_norm": 0.22639009356498718, "learning_rate": 0.00012144223888020581, "loss": 1.7146, "step": 15215 }, { "epoch": 0.74296875, "grad_norm": 0.20777086913585663, "learning_rate": 0.00012141663883408138, "loss": 1.7516, "step": 15216 }, { "epoch": 0.743017578125, "grad_norm": 0.21506521105766296, "learning_rate": 0.00012139104251017309, "loss": 1.7409, "step": 15217 }, { "epoch": 0.74306640625, "grad_norm": 0.21766459941864014, "learning_rate": 0.00012136544990910135, "loss": 1.7401, "step": 15218 }, { "epoch": 0.743115234375, "grad_norm": 0.19170111417770386, "learning_rate": 0.00012133986103148625, "loss": 1.7497, "step": 15219 }, { "epoch": 0.7431640625, "grad_norm": 0.19806373119354248, "learning_rate": 0.00012131427587794818, "loss": 1.7609, "step": 15220 }, { "epoch": 0.743212890625, "grad_norm": 0.2247355580329895, "learning_rate": 0.00012128869444910699, "loss": 1.7428, "step": 15221 }, { "epoch": 0.74326171875, "grad_norm": 0.21665018796920776, "learning_rate": 0.00012126311674558292, "loss": 1.7722, "step": 15222 }, { "epoch": 0.743310546875, "grad_norm": 0.23651176691055298, "learning_rate": 0.00012123754276799557, "loss": 1.7596, "step": 15223 }, { "epoch": 0.743359375, "grad_norm": 0.24463513493537903, "learning_rate": 0.00012121197251696506, "loss": 1.73, "step": 15224 }, { "epoch": 0.743408203125, "grad_norm": 0.29192933440208435, "learning_rate": 0.00012118640599311088, "loss": 1.7453, "step": 15225 }, { "epoch": 0.74345703125, "grad_norm": 0.2149181365966797, "learning_rate": 0.0001211608431970527, "loss": 1.7461, "step": 15226 }, { "epoch": 0.743505859375, "grad_norm": 0.26615017652511597, "learning_rate": 0.00012113528412941007, "loss": 1.727, "step": 15227 }, { "epoch": 0.7435546875, "grad_norm": 0.2170993536710739, "learning_rate": 0.00012110972879080247, "loss": 1.751, "step": 15228 }, { "epoch": 0.743603515625, "grad_norm": 0.2206333875656128, "learning_rate": 0.00012108417718184921, "loss": 1.7354, "step": 15229 }, { "epoch": 0.74365234375, "grad_norm": 0.22881685197353363, "learning_rate": 0.00012105862930316957, "loss": 1.7289, "step": 15230 }, { "epoch": 0.743701171875, "grad_norm": 0.20355689525604248, "learning_rate": 0.00012103308515538275, "loss": 1.7376, "step": 15231 }, { "epoch": 0.74375, "grad_norm": 0.2030429095029831, "learning_rate": 0.00012100754473910779, "loss": 1.7422, "step": 15232 }, { "epoch": 0.743798828125, "grad_norm": 0.1900477111339569, "learning_rate": 0.0001209820080549637, "loss": 1.7386, "step": 15233 }, { "epoch": 0.74384765625, "grad_norm": 0.20787166059017181, "learning_rate": 0.00012095647510356944, "loss": 1.7681, "step": 15234 }, { "epoch": 0.743896484375, "grad_norm": 0.22930610179901123, "learning_rate": 0.00012093094588554365, "loss": 1.7535, "step": 15235 }, { "epoch": 0.7439453125, "grad_norm": 0.19179721176624298, "learning_rate": 0.00012090542040150527, "loss": 1.7361, "step": 15236 }, { "epoch": 0.743994140625, "grad_norm": 0.23291176557540894, "learning_rate": 0.0001208798986520727, "loss": 1.7679, "step": 15237 }, { "epoch": 0.74404296875, "grad_norm": 0.20189641416072845, "learning_rate": 0.00012085438063786477, "loss": 1.7312, "step": 15238 }, { "epoch": 0.744091796875, "grad_norm": 0.2014986127614975, "learning_rate": 0.00012082886635949958, "loss": 1.7455, "step": 15239 }, { "epoch": 0.744140625, "grad_norm": 0.2138751596212387, "learning_rate": 0.00012080335581759585, "loss": 1.7344, "step": 15240 }, { "epoch": 0.744189453125, "grad_norm": 0.19734875857830048, "learning_rate": 0.00012077784901277152, "loss": 1.7574, "step": 15241 }, { "epoch": 0.74423828125, "grad_norm": 0.21411171555519104, "learning_rate": 0.00012075234594564505, "loss": 1.7379, "step": 15242 }, { "epoch": 0.744287109375, "grad_norm": 0.19432884454727173, "learning_rate": 0.00012072684661683433, "loss": 1.7512, "step": 15243 }, { "epoch": 0.7443359375, "grad_norm": 0.20001529157161713, "learning_rate": 0.00012070135102695744, "loss": 1.7549, "step": 15244 }, { "epoch": 0.744384765625, "grad_norm": 0.22868070006370544, "learning_rate": 0.00012067585917663224, "loss": 1.7508, "step": 15245 }, { "epoch": 0.74443359375, "grad_norm": 0.18194811046123505, "learning_rate": 0.00012065037106647661, "loss": 1.708, "step": 15246 }, { "epoch": 0.744482421875, "grad_norm": 0.2232113927602768, "learning_rate": 0.00012062488669710824, "loss": 1.7573, "step": 15247 }, { "epoch": 0.74453125, "grad_norm": 0.19532877206802368, "learning_rate": 0.00012059940606914474, "loss": 1.7718, "step": 15248 }, { "epoch": 0.744580078125, "grad_norm": 0.21728850901126862, "learning_rate": 0.0001205739291832037, "loss": 1.7312, "step": 15249 }, { "epoch": 0.74462890625, "grad_norm": 0.2014794945716858, "learning_rate": 0.00012054845603990259, "loss": 1.7562, "step": 15250 }, { "epoch": 0.744677734375, "grad_norm": 0.2056998610496521, "learning_rate": 0.00012052298663985862, "loss": 1.7302, "step": 15251 }, { "epoch": 0.7447265625, "grad_norm": 0.19380804896354675, "learning_rate": 0.0001204975209836893, "loss": 1.7563, "step": 15252 }, { "epoch": 0.744775390625, "grad_norm": 0.2085753232240677, "learning_rate": 0.00012047205907201156, "loss": 1.7474, "step": 15253 }, { "epoch": 0.74482421875, "grad_norm": 0.220007985830307, "learning_rate": 0.00012044660090544273, "loss": 1.7047, "step": 15254 }, { "epoch": 0.744873046875, "grad_norm": 0.2117929309606552, "learning_rate": 0.00012042114648459956, "loss": 1.7198, "step": 15255 }, { "epoch": 0.744921875, "grad_norm": 0.2318292260169983, "learning_rate": 0.00012039569581009926, "loss": 1.7418, "step": 15256 }, { "epoch": 0.744970703125, "grad_norm": 0.17907124757766724, "learning_rate": 0.00012037024888255832, "loss": 1.7374, "step": 15257 }, { "epoch": 0.74501953125, "grad_norm": 0.22316746413707733, "learning_rate": 0.00012034480570259378, "loss": 1.749, "step": 15258 }, { "epoch": 0.745068359375, "grad_norm": 0.17354975640773773, "learning_rate": 0.00012031936627082205, "loss": 1.7563, "step": 15259 }, { "epoch": 0.7451171875, "grad_norm": 0.24439817667007446, "learning_rate": 0.00012029393058785972, "loss": 1.7541, "step": 15260 }, { "epoch": 0.745166015625, "grad_norm": 0.20835226774215698, "learning_rate": 0.0001202684986543233, "loss": 1.7419, "step": 15261 }, { "epoch": 0.74521484375, "grad_norm": 0.22604194283485413, "learning_rate": 0.00012024307047082912, "loss": 1.7318, "step": 15262 }, { "epoch": 0.745263671875, "grad_norm": 0.19527176022529602, "learning_rate": 0.00012021764603799345, "loss": 1.7361, "step": 15263 }, { "epoch": 0.7453125, "grad_norm": 0.22949859499931335, "learning_rate": 0.00012019222535643248, "loss": 1.76, "step": 15264 }, { "epoch": 0.745361328125, "grad_norm": 0.19337421655654907, "learning_rate": 0.0001201668084267623, "loss": 1.7408, "step": 15265 }, { "epoch": 0.74541015625, "grad_norm": 0.2247641682624817, "learning_rate": 0.0001201413952495989, "loss": 1.7306, "step": 15266 }, { "epoch": 0.745458984375, "grad_norm": 0.219275563955307, "learning_rate": 0.00012011598582555823, "loss": 1.7687, "step": 15267 }, { "epoch": 0.7455078125, "grad_norm": 0.19218403100967407, "learning_rate": 0.00012009058015525614, "loss": 1.7311, "step": 15268 }, { "epoch": 0.745556640625, "grad_norm": 0.22373554110527039, "learning_rate": 0.00012006517823930813, "loss": 1.7245, "step": 15269 }, { "epoch": 0.74560546875, "grad_norm": 0.23342010378837585, "learning_rate": 0.00012003978007833015, "loss": 1.7452, "step": 15270 }, { "epoch": 0.745654296875, "grad_norm": 0.21480606496334076, "learning_rate": 0.00012001438567293743, "loss": 1.7182, "step": 15271 }, { "epoch": 0.745703125, "grad_norm": 0.2307017594575882, "learning_rate": 0.00011998899502374576, "loss": 1.7321, "step": 15272 }, { "epoch": 0.745751953125, "grad_norm": 0.2116432934999466, "learning_rate": 0.00011996360813137017, "loss": 1.7644, "step": 15273 }, { "epoch": 0.74580078125, "grad_norm": 0.19607892632484436, "learning_rate": 0.00011993822499642625, "loss": 1.756, "step": 15274 }, { "epoch": 0.745849609375, "grad_norm": 0.20515085756778717, "learning_rate": 0.00011991284561952896, "loss": 1.7412, "step": 15275 }, { "epoch": 0.7458984375, "grad_norm": 0.19768640398979187, "learning_rate": 0.00011988747000129342, "loss": 1.7749, "step": 15276 }, { "epoch": 0.745947265625, "grad_norm": 0.2148384302854538, "learning_rate": 0.00011986209814233468, "loss": 1.7498, "step": 15277 }, { "epoch": 0.74599609375, "grad_norm": 0.20962773263454437, "learning_rate": 0.00011983673004326761, "loss": 1.7426, "step": 15278 }, { "epoch": 0.746044921875, "grad_norm": 0.20652949810028076, "learning_rate": 0.00011981136570470706, "loss": 1.7385, "step": 15279 }, { "epoch": 0.74609375, "grad_norm": 0.2408583015203476, "learning_rate": 0.00011978600512726775, "loss": 1.7621, "step": 15280 }, { "epoch": 0.746142578125, "grad_norm": 0.22628547251224518, "learning_rate": 0.00011976064831156428, "loss": 1.7266, "step": 15281 }, { "epoch": 0.74619140625, "grad_norm": 0.24596761167049408, "learning_rate": 0.00011973529525821122, "loss": 1.7402, "step": 15282 }, { "epoch": 0.746240234375, "grad_norm": 0.24216142296791077, "learning_rate": 0.00011970994596782305, "loss": 1.7201, "step": 15283 }, { "epoch": 0.7462890625, "grad_norm": 0.26196524500846863, "learning_rate": 0.00011968460044101414, "loss": 1.7428, "step": 15284 }, { "epoch": 0.746337890625, "grad_norm": 0.25884371995925903, "learning_rate": 0.00011965925867839858, "loss": 1.7187, "step": 15285 }, { "epoch": 0.74638671875, "grad_norm": 0.25473007559776306, "learning_rate": 0.00011963392068059082, "loss": 1.7494, "step": 15286 }, { "epoch": 0.746435546875, "grad_norm": 0.2509990930557251, "learning_rate": 0.00011960858644820469, "loss": 1.7374, "step": 15287 }, { "epoch": 0.746484375, "grad_norm": 0.261750191450119, "learning_rate": 0.00011958325598185443, "loss": 1.7447, "step": 15288 }, { "epoch": 0.746533203125, "grad_norm": 0.23179443180561066, "learning_rate": 0.00011955792928215369, "loss": 1.7664, "step": 15289 }, { "epoch": 0.74658203125, "grad_norm": 0.24228554964065552, "learning_rate": 0.00011953260634971655, "loss": 1.7425, "step": 15290 }, { "epoch": 0.746630859375, "grad_norm": 0.24732837080955505, "learning_rate": 0.00011950728718515645, "loss": 1.7397, "step": 15291 }, { "epoch": 0.7466796875, "grad_norm": 0.24460679292678833, "learning_rate": 0.00011948197178908733, "loss": 1.7446, "step": 15292 }, { "epoch": 0.746728515625, "grad_norm": 0.2180139422416687, "learning_rate": 0.0001194566601621225, "loss": 1.7736, "step": 15293 }, { "epoch": 0.74677734375, "grad_norm": 0.19696614146232605, "learning_rate": 0.00011943135230487542, "loss": 1.7484, "step": 15294 }, { "epoch": 0.746826171875, "grad_norm": 0.2415657788515091, "learning_rate": 0.00011940604821795955, "loss": 1.7237, "step": 15295 }, { "epoch": 0.746875, "grad_norm": 0.18532557785511017, "learning_rate": 0.00011938074790198807, "loss": 1.7434, "step": 15296 }, { "epoch": 0.746923828125, "grad_norm": 0.2598644196987152, "learning_rate": 0.00011935545135757423, "loss": 1.7595, "step": 15297 }, { "epoch": 0.74697265625, "grad_norm": 0.1909903883934021, "learning_rate": 0.00011933015858533102, "loss": 1.7285, "step": 15298 }, { "epoch": 0.747021484375, "grad_norm": 0.2215617150068283, "learning_rate": 0.00011930486958587153, "loss": 1.7224, "step": 15299 }, { "epoch": 0.7470703125, "grad_norm": 0.2190828174352646, "learning_rate": 0.00011927958435980857, "loss": 1.76, "step": 15300 }, { "epoch": 0.747119140625, "grad_norm": 0.2442711442708969, "learning_rate": 0.00011925430290775502, "loss": 1.7468, "step": 15301 }, { "epoch": 0.74716796875, "grad_norm": 0.21767738461494446, "learning_rate": 0.00011922902523032361, "loss": 1.7462, "step": 15302 }, { "epoch": 0.747216796875, "grad_norm": 0.2566450834274292, "learning_rate": 0.0001192037513281268, "loss": 1.738, "step": 15303 }, { "epoch": 0.747265625, "grad_norm": 0.21661897003650665, "learning_rate": 0.00011917848120177736, "loss": 1.735, "step": 15304 }, { "epoch": 0.747314453125, "grad_norm": 0.2598717510700226, "learning_rate": 0.00011915321485188749, "loss": 1.7465, "step": 15305 }, { "epoch": 0.74736328125, "grad_norm": 0.21356044709682465, "learning_rate": 0.00011912795227906978, "loss": 1.7407, "step": 15306 }, { "epoch": 0.747412109375, "grad_norm": 0.22038699686527252, "learning_rate": 0.00011910269348393625, "loss": 1.7319, "step": 15307 }, { "epoch": 0.7474609375, "grad_norm": 0.22940585017204285, "learning_rate": 0.00011907743846709934, "loss": 1.748, "step": 15308 }, { "epoch": 0.747509765625, "grad_norm": 0.2115560919046402, "learning_rate": 0.00011905218722917089, "loss": 1.74, "step": 15309 }, { "epoch": 0.74755859375, "grad_norm": 0.2538403272628784, "learning_rate": 0.00011902693977076298, "loss": 1.7568, "step": 15310 }, { "epoch": 0.747607421875, "grad_norm": 0.2058243751525879, "learning_rate": 0.00011900169609248748, "loss": 1.7365, "step": 15311 }, { "epoch": 0.74765625, "grad_norm": 0.2510313093662262, "learning_rate": 0.0001189764561949562, "loss": 1.7581, "step": 15312 }, { "epoch": 0.747705078125, "grad_norm": 0.2111852616071701, "learning_rate": 0.00011895122007878089, "loss": 1.7253, "step": 15313 }, { "epoch": 0.74775390625, "grad_norm": 0.255928635597229, "learning_rate": 0.0001189259877445731, "loss": 1.7452, "step": 15314 }, { "epoch": 0.747802734375, "grad_norm": 0.1850699484348297, "learning_rate": 0.00011890075919294438, "loss": 1.6972, "step": 15315 }, { "epoch": 0.7478515625, "grad_norm": 0.24323628842830658, "learning_rate": 0.00011887553442450618, "loss": 1.7165, "step": 15316 }, { "epoch": 0.747900390625, "grad_norm": 0.20487748086452484, "learning_rate": 0.00011885031343986982, "loss": 1.7903, "step": 15317 }, { "epoch": 0.74794921875, "grad_norm": 0.2920989692211151, "learning_rate": 0.00011882509623964665, "loss": 1.7409, "step": 15318 }, { "epoch": 0.747998046875, "grad_norm": 0.18955136835575104, "learning_rate": 0.00011879988282444758, "loss": 1.7616, "step": 15319 }, { "epoch": 0.748046875, "grad_norm": 0.2667074203491211, "learning_rate": 0.000118774673194884, "loss": 1.7419, "step": 15320 }, { "epoch": 0.748095703125, "grad_norm": 0.21385881304740906, "learning_rate": 0.00011874946735156658, "loss": 1.715, "step": 15321 }, { "epoch": 0.74814453125, "grad_norm": 0.23528186976909637, "learning_rate": 0.0001187242652951065, "loss": 1.722, "step": 15322 }, { "epoch": 0.748193359375, "grad_norm": 0.23643295466899872, "learning_rate": 0.00011869906702611424, "loss": 1.7299, "step": 15323 }, { "epoch": 0.7482421875, "grad_norm": 0.24152252078056335, "learning_rate": 0.00011867387254520083, "loss": 1.7477, "step": 15324 }, { "epoch": 0.748291015625, "grad_norm": 0.2777336537837982, "learning_rate": 0.00011864868185297653, "loss": 1.7366, "step": 15325 }, { "epoch": 0.74833984375, "grad_norm": 0.19790349900722504, "learning_rate": 0.00011862349495005221, "loss": 1.7346, "step": 15326 }, { "epoch": 0.748388671875, "grad_norm": 0.22087033092975616, "learning_rate": 0.00011859831183703804, "loss": 1.7221, "step": 15327 }, { "epoch": 0.7484375, "grad_norm": 0.2272063046693802, "learning_rate": 0.00011857313251454443, "loss": 1.7416, "step": 15328 }, { "epoch": 0.748486328125, "grad_norm": 0.21024447679519653, "learning_rate": 0.00011854795698318163, "loss": 1.7422, "step": 15329 }, { "epoch": 0.74853515625, "grad_norm": 0.21298544108867645, "learning_rate": 0.00011852278524355977, "loss": 1.7513, "step": 15330 }, { "epoch": 0.748583984375, "grad_norm": 0.21862691640853882, "learning_rate": 0.00011849761729628894, "loss": 1.7279, "step": 15331 }, { "epoch": 0.7486328125, "grad_norm": 0.2255273312330246, "learning_rate": 0.00011847245314197907, "loss": 1.725, "step": 15332 }, { "epoch": 0.748681640625, "grad_norm": 0.21288062632083893, "learning_rate": 0.00011844729278124009, "loss": 1.7383, "step": 15333 }, { "epoch": 0.74873046875, "grad_norm": 0.22568407654762268, "learning_rate": 0.0001184221362146818, "loss": 1.7244, "step": 15334 }, { "epoch": 0.748779296875, "grad_norm": 0.18552111089229584, "learning_rate": 0.00011839698344291368, "loss": 1.7685, "step": 15335 }, { "epoch": 0.748828125, "grad_norm": 0.2236400842666626, "learning_rate": 0.00011837183446654562, "loss": 1.775, "step": 15336 }, { "epoch": 0.748876953125, "grad_norm": 0.18607182800769806, "learning_rate": 0.00011834668928618681, "loss": 1.7566, "step": 15337 }, { "epoch": 0.74892578125, "grad_norm": 0.21714119613170624, "learning_rate": 0.00011832154790244704, "loss": 1.7649, "step": 15338 }, { "epoch": 0.748974609375, "grad_norm": 0.17325270175933838, "learning_rate": 0.00011829641031593527, "loss": 1.7062, "step": 15339 }, { "epoch": 0.7490234375, "grad_norm": 0.2136612832546234, "learning_rate": 0.00011827127652726102, "loss": 1.7721, "step": 15340 }, { "epoch": 0.749072265625, "grad_norm": 0.19927220046520233, "learning_rate": 0.00011824614653703316, "loss": 1.7365, "step": 15341 }, { "epoch": 0.74912109375, "grad_norm": 0.19299745559692383, "learning_rate": 0.00011822102034586101, "loss": 1.7512, "step": 15342 }, { "epoch": 0.749169921875, "grad_norm": 0.1932908445596695, "learning_rate": 0.00011819589795435333, "loss": 1.7642, "step": 15343 }, { "epoch": 0.74921875, "grad_norm": 0.19164788722991943, "learning_rate": 0.00011817077936311903, "loss": 1.7475, "step": 15344 }, { "epoch": 0.749267578125, "grad_norm": 0.22453302145004272, "learning_rate": 0.0001181456645727669, "loss": 1.7643, "step": 15345 }, { "epoch": 0.74931640625, "grad_norm": 0.18726810812950134, "learning_rate": 0.00011812055358390556, "loss": 1.7616, "step": 15346 }, { "epoch": 0.749365234375, "grad_norm": 0.21942201256752014, "learning_rate": 0.00011809544639714368, "loss": 1.7454, "step": 15347 }, { "epoch": 0.7494140625, "grad_norm": 0.18046733736991882, "learning_rate": 0.0001180703430130897, "loss": 1.7389, "step": 15348 }, { "epoch": 0.749462890625, "grad_norm": 0.22067022323608398, "learning_rate": 0.00011804524343235206, "loss": 1.7452, "step": 15349 }, { "epoch": 0.74951171875, "grad_norm": 0.17611612379550934, "learning_rate": 0.00011802014765553903, "loss": 1.7479, "step": 15350 }, { "epoch": 0.749560546875, "grad_norm": 0.21651074290275574, "learning_rate": 0.00011799505568325881, "loss": 1.7463, "step": 15351 }, { "epoch": 0.749609375, "grad_norm": 0.20503094792366028, "learning_rate": 0.00011796996751611965, "loss": 1.7256, "step": 15352 }, { "epoch": 0.749658203125, "grad_norm": 0.19236049056053162, "learning_rate": 0.00011794488315472933, "loss": 1.7298, "step": 15353 }, { "epoch": 0.74970703125, "grad_norm": 0.17611254751682281, "learning_rate": 0.00011791980259969609, "loss": 1.7441, "step": 15354 }, { "epoch": 0.749755859375, "grad_norm": 0.24409842491149902, "learning_rate": 0.00011789472585162748, "loss": 1.7405, "step": 15355 }, { "epoch": 0.7498046875, "grad_norm": 0.1992914378643036, "learning_rate": 0.00011786965291113157, "loss": 1.7222, "step": 15356 }, { "epoch": 0.749853515625, "grad_norm": 0.22704686224460602, "learning_rate": 0.00011784458377881571, "loss": 1.7154, "step": 15357 }, { "epoch": 0.74990234375, "grad_norm": 0.2137853056192398, "learning_rate": 0.00011781951845528774, "loss": 1.7362, "step": 15358 }, { "epoch": 0.749951171875, "grad_norm": 0.21681444346904755, "learning_rate": 0.00011779445694115488, "loss": 1.7255, "step": 15359 }, { "epoch": 0.75, "grad_norm": 0.2331029623746872, "learning_rate": 0.00011776939923702482, "loss": 1.749, "step": 15360 }, { "epoch": 0.750048828125, "grad_norm": 0.2014942616224289, "learning_rate": 0.00011774434534350461, "loss": 1.7555, "step": 15361 }, { "epoch": 0.75009765625, "grad_norm": 0.2111717015504837, "learning_rate": 0.00011771929526120152, "loss": 1.7645, "step": 15362 }, { "epoch": 0.750146484375, "grad_norm": 0.20462289452552795, "learning_rate": 0.00011769424899072265, "loss": 1.7418, "step": 15363 }, { "epoch": 0.7501953125, "grad_norm": 0.2236337661743164, "learning_rate": 0.00011766920653267506, "loss": 1.736, "step": 15364 }, { "epoch": 0.750244140625, "grad_norm": 0.18744415044784546, "learning_rate": 0.00011764416788766563, "loss": 1.7247, "step": 15365 }, { "epoch": 0.75029296875, "grad_norm": 0.24150070548057556, "learning_rate": 0.00011761913305630123, "loss": 1.7461, "step": 15366 }, { "epoch": 0.750341796875, "grad_norm": 0.19542625546455383, "learning_rate": 0.00011759410203918857, "loss": 1.7616, "step": 15367 }, { "epoch": 0.750390625, "grad_norm": 0.21318091452121735, "learning_rate": 0.00011756907483693436, "loss": 1.7353, "step": 15368 }, { "epoch": 0.750439453125, "grad_norm": 0.19486522674560547, "learning_rate": 0.00011754405145014498, "loss": 1.7293, "step": 15369 }, { "epoch": 0.75048828125, "grad_norm": 0.21462473273277283, "learning_rate": 0.00011751903187942714, "loss": 1.7328, "step": 15370 }, { "epoch": 0.750537109375, "grad_norm": 0.18900738656520844, "learning_rate": 0.00011749401612538697, "loss": 1.7432, "step": 15371 }, { "epoch": 0.7505859375, "grad_norm": 0.20960400998592377, "learning_rate": 0.00011746900418863097, "loss": 1.7249, "step": 15372 }, { "epoch": 0.750634765625, "grad_norm": 0.20570850372314453, "learning_rate": 0.00011744399606976506, "loss": 1.7317, "step": 15373 }, { "epoch": 0.75068359375, "grad_norm": 0.18531280755996704, "learning_rate": 0.00011741899176939566, "loss": 1.7176, "step": 15374 }, { "epoch": 0.750732421875, "grad_norm": 0.266284316778183, "learning_rate": 0.00011739399128812842, "loss": 1.7505, "step": 15375 }, { "epoch": 0.75078125, "grad_norm": 0.18727371096611023, "learning_rate": 0.00011736899462656957, "loss": 1.7175, "step": 15376 }, { "epoch": 0.750830078125, "grad_norm": 0.24335768818855286, "learning_rate": 0.00011734400178532473, "loss": 1.7614, "step": 15377 }, { "epoch": 0.75087890625, "grad_norm": 0.20401416718959808, "learning_rate": 0.00011731901276499963, "loss": 1.7619, "step": 15378 }, { "epoch": 0.750927734375, "grad_norm": 0.2387763112783432, "learning_rate": 0.00011729402756619997, "loss": 1.7311, "step": 15379 }, { "epoch": 0.7509765625, "grad_norm": 0.1952403485774994, "learning_rate": 0.00011726904618953122, "loss": 1.736, "step": 15380 }, { "epoch": 0.751025390625, "grad_norm": 0.22311335802078247, "learning_rate": 0.00011724406863559889, "loss": 1.7462, "step": 15381 }, { "epoch": 0.75107421875, "grad_norm": 0.20489798486232758, "learning_rate": 0.00011721909490500828, "loss": 1.7536, "step": 15382 }, { "epoch": 0.751123046875, "grad_norm": 0.21832890808582306, "learning_rate": 0.00011719412499836464, "loss": 1.7379, "step": 15383 }, { "epoch": 0.751171875, "grad_norm": 0.2072429358959198, "learning_rate": 0.0001171691589162732, "loss": 1.757, "step": 15384 }, { "epoch": 0.751220703125, "grad_norm": 0.20137935876846313, "learning_rate": 0.00011714419665933896, "loss": 1.7384, "step": 15385 }, { "epoch": 0.75126953125, "grad_norm": 0.20633898675441742, "learning_rate": 0.00011711923822816704, "loss": 1.7515, "step": 15386 }, { "epoch": 0.751318359375, "grad_norm": 0.19540078938007355, "learning_rate": 0.00011709428362336205, "loss": 1.7306, "step": 15387 }, { "epoch": 0.7513671875, "grad_norm": 0.21445302665233612, "learning_rate": 0.0001170693328455291, "loss": 1.7284, "step": 15388 }, { "epoch": 0.751416015625, "grad_norm": 0.19895966351032257, "learning_rate": 0.0001170443858952726, "loss": 1.7338, "step": 15389 }, { "epoch": 0.75146484375, "grad_norm": 0.20116782188415527, "learning_rate": 0.0001170194427731975, "loss": 1.7327, "step": 15390 }, { "epoch": 0.751513671875, "grad_norm": 0.18217509984970093, "learning_rate": 0.00011699450347990794, "loss": 1.7556, "step": 15391 }, { "epoch": 0.7515625, "grad_norm": 0.18042004108428955, "learning_rate": 0.00011696956801600867, "loss": 1.7585, "step": 15392 }, { "epoch": 0.751611328125, "grad_norm": 0.18040813505649567, "learning_rate": 0.00011694463638210382, "loss": 1.7274, "step": 15393 }, { "epoch": 0.75166015625, "grad_norm": 0.18151159584522247, "learning_rate": 0.00011691970857879767, "loss": 1.726, "step": 15394 }, { "epoch": 0.751708984375, "grad_norm": 0.17983126640319824, "learning_rate": 0.0001168947846066944, "loss": 1.7435, "step": 15395 }, { "epoch": 0.7517578125, "grad_norm": 0.20280784368515015, "learning_rate": 0.00011686986446639805, "loss": 1.7514, "step": 15396 }, { "epoch": 0.751806640625, "grad_norm": 0.21172913908958435, "learning_rate": 0.00011684494815851257, "loss": 1.7435, "step": 15397 }, { "epoch": 0.75185546875, "grad_norm": 0.1981533169746399, "learning_rate": 0.00011682003568364183, "loss": 1.7314, "step": 15398 }, { "epoch": 0.751904296875, "grad_norm": 0.19561925530433655, "learning_rate": 0.00011679512704238961, "loss": 1.7402, "step": 15399 }, { "epoch": 0.751953125, "grad_norm": 0.19431273639202118, "learning_rate": 0.00011677022223535957, "loss": 1.7536, "step": 15400 }, { "epoch": 0.752001953125, "grad_norm": 0.19784148037433624, "learning_rate": 0.00011674532126315534, "loss": 1.7408, "step": 15401 }, { "epoch": 0.75205078125, "grad_norm": 0.19970107078552246, "learning_rate": 0.00011672042412638037, "loss": 1.7653, "step": 15402 }, { "epoch": 0.752099609375, "grad_norm": 0.21628490090370178, "learning_rate": 0.00011669553082563808, "loss": 1.7318, "step": 15403 }, { "epoch": 0.7521484375, "grad_norm": 0.19858945906162262, "learning_rate": 0.00011667064136153181, "loss": 1.7644, "step": 15404 }, { "epoch": 0.752197265625, "grad_norm": 0.2326803356409073, "learning_rate": 0.00011664575573466471, "loss": 1.7395, "step": 15405 }, { "epoch": 0.75224609375, "grad_norm": 0.22210077941417694, "learning_rate": 0.00011662087394564002, "loss": 1.7247, "step": 15406 }, { "epoch": 0.752294921875, "grad_norm": 0.21712912619113922, "learning_rate": 0.00011659599599506056, "loss": 1.7501, "step": 15407 }, { "epoch": 0.75234375, "grad_norm": 0.20096300542354584, "learning_rate": 0.00011657112188352955, "loss": 1.7127, "step": 15408 }, { "epoch": 0.752392578125, "grad_norm": 0.2899881899356842, "learning_rate": 0.00011654625161164953, "loss": 1.7309, "step": 15409 }, { "epoch": 0.75244140625, "grad_norm": 0.191039577126503, "learning_rate": 0.00011652138518002353, "loss": 1.7471, "step": 15410 }, { "epoch": 0.752490234375, "grad_norm": 0.2687528133392334, "learning_rate": 0.000116496522589254, "loss": 1.731, "step": 15411 }, { "epoch": 0.7525390625, "grad_norm": 0.20385785400867462, "learning_rate": 0.0001164716638399436, "loss": 1.7157, "step": 15412 }, { "epoch": 0.752587890625, "grad_norm": 0.22333137691020966, "learning_rate": 0.00011644680893269481, "loss": 1.737, "step": 15413 }, { "epoch": 0.75263671875, "grad_norm": 0.19371594488620758, "learning_rate": 0.00011642195786810997, "loss": 1.7476, "step": 15414 }, { "epoch": 0.752685546875, "grad_norm": 0.23048929870128632, "learning_rate": 0.00011639711064679139, "loss": 1.7675, "step": 15415 }, { "epoch": 0.752734375, "grad_norm": 0.2341497838497162, "learning_rate": 0.00011637226726934124, "loss": 1.7356, "step": 15416 }, { "epoch": 0.752783203125, "grad_norm": 0.227524995803833, "learning_rate": 0.00011634742773636165, "loss": 1.7449, "step": 15417 }, { "epoch": 0.75283203125, "grad_norm": 0.22634512186050415, "learning_rate": 0.00011632259204845458, "loss": 1.7405, "step": 15418 }, { "epoch": 0.752880859375, "grad_norm": 0.1904686987400055, "learning_rate": 0.00011629776020622198, "loss": 1.7357, "step": 15419 }, { "epoch": 0.7529296875, "grad_norm": 0.23639273643493652, "learning_rate": 0.00011627293221026567, "loss": 1.744, "step": 15420 }, { "epoch": 0.752978515625, "grad_norm": 0.20709751546382904, "learning_rate": 0.00011624810806118735, "loss": 1.7473, "step": 15421 }, { "epoch": 0.75302734375, "grad_norm": 0.21897749602794647, "learning_rate": 0.00011622328775958872, "loss": 1.719, "step": 15422 }, { "epoch": 0.753076171875, "grad_norm": 0.21997573971748352, "learning_rate": 0.00011619847130607122, "loss": 1.7451, "step": 15423 }, { "epoch": 0.753125, "grad_norm": 0.20543326437473297, "learning_rate": 0.00011617365870123638, "loss": 1.7362, "step": 15424 }, { "epoch": 0.753173828125, "grad_norm": 0.22853729128837585, "learning_rate": 0.00011614884994568549, "loss": 1.7554, "step": 15425 }, { "epoch": 0.75322265625, "grad_norm": 0.17712566256523132, "learning_rate": 0.00011612404504001993, "loss": 1.7422, "step": 15426 }, { "epoch": 0.753271484375, "grad_norm": 0.2669623792171478, "learning_rate": 0.00011609924398484071, "loss": 1.7466, "step": 15427 }, { "epoch": 0.7533203125, "grad_norm": 0.19484016299247742, "learning_rate": 0.00011607444678074897, "loss": 1.7581, "step": 15428 }, { "epoch": 0.753369140625, "grad_norm": 0.24406549334526062, "learning_rate": 0.00011604965342834568, "loss": 1.7381, "step": 15429 }, { "epoch": 0.75341796875, "grad_norm": 0.1930113136768341, "learning_rate": 0.00011602486392823177, "loss": 1.758, "step": 15430 }, { "epoch": 0.753466796875, "grad_norm": 0.24570468068122864, "learning_rate": 0.00011600007828100797, "loss": 1.772, "step": 15431 }, { "epoch": 0.753515625, "grad_norm": 0.24876943230628967, "learning_rate": 0.00011597529648727503, "loss": 1.7501, "step": 15432 }, { "epoch": 0.753564453125, "grad_norm": 0.2134777009487152, "learning_rate": 0.00011595051854763352, "loss": 1.7413, "step": 15433 }, { "epoch": 0.75361328125, "grad_norm": 0.26077693700790405, "learning_rate": 0.00011592574446268399, "loss": 1.7349, "step": 15434 }, { "epoch": 0.753662109375, "grad_norm": 0.197441965341568, "learning_rate": 0.00011590097423302682, "loss": 1.7485, "step": 15435 }, { "epoch": 0.7537109375, "grad_norm": 0.21981237828731537, "learning_rate": 0.00011587620785926237, "loss": 1.7553, "step": 15436 }, { "epoch": 0.753759765625, "grad_norm": 0.20505738258361816, "learning_rate": 0.00011585144534199086, "loss": 1.7173, "step": 15437 }, { "epoch": 0.75380859375, "grad_norm": 0.20373676717281342, "learning_rate": 0.00011582668668181246, "loss": 1.7359, "step": 15438 }, { "epoch": 0.753857421875, "grad_norm": 0.23447272181510925, "learning_rate": 0.00011580193187932715, "loss": 1.7315, "step": 15439 }, { "epoch": 0.75390625, "grad_norm": 0.1922904998064041, "learning_rate": 0.00011577718093513496, "loss": 1.7436, "step": 15440 }, { "epoch": 0.753955078125, "grad_norm": 0.21018438041210175, "learning_rate": 0.00011575243384983567, "loss": 1.7043, "step": 15441 }, { "epoch": 0.75400390625, "grad_norm": 0.254252552986145, "learning_rate": 0.00011572769062402909, "loss": 1.752, "step": 15442 }, { "epoch": 0.754052734375, "grad_norm": 0.19781599938869476, "learning_rate": 0.00011570295125831487, "loss": 1.7522, "step": 15443 }, { "epoch": 0.7541015625, "grad_norm": 0.22196003794670105, "learning_rate": 0.00011567821575329263, "loss": 1.7541, "step": 15444 }, { "epoch": 0.754150390625, "grad_norm": 0.18732434511184692, "learning_rate": 0.00011565348410956188, "loss": 1.7397, "step": 15445 }, { "epoch": 0.75419921875, "grad_norm": 0.23382030427455902, "learning_rate": 0.00011562875632772193, "loss": 1.7751, "step": 15446 }, { "epoch": 0.754248046875, "grad_norm": 0.21436291933059692, "learning_rate": 0.00011560403240837207, "loss": 1.7716, "step": 15447 }, { "epoch": 0.754296875, "grad_norm": 0.18598970770835876, "learning_rate": 0.00011557931235211155, "loss": 1.7573, "step": 15448 }, { "epoch": 0.754345703125, "grad_norm": 0.21707221865653992, "learning_rate": 0.00011555459615953946, "loss": 1.7473, "step": 15449 }, { "epoch": 0.75439453125, "grad_norm": 0.18609224259853363, "learning_rate": 0.00011552988383125486, "loss": 1.7223, "step": 15450 }, { "epoch": 0.754443359375, "grad_norm": 0.2071874439716339, "learning_rate": 0.00011550517536785663, "loss": 1.7362, "step": 15451 }, { "epoch": 0.7544921875, "grad_norm": 0.18189561367034912, "learning_rate": 0.0001154804707699436, "loss": 1.7229, "step": 15452 }, { "epoch": 0.754541015625, "grad_norm": 0.21150241792201996, "learning_rate": 0.00011545577003811453, "loss": 1.7318, "step": 15453 }, { "epoch": 0.75458984375, "grad_norm": 0.18124189972877502, "learning_rate": 0.00011543107317296806, "loss": 1.7314, "step": 15454 }, { "epoch": 0.754638671875, "grad_norm": 0.21180976927280426, "learning_rate": 0.00011540638017510272, "loss": 1.7561, "step": 15455 }, { "epoch": 0.7546875, "grad_norm": 0.19728291034698486, "learning_rate": 0.00011538169104511695, "loss": 1.709, "step": 15456 }, { "epoch": 0.754736328125, "grad_norm": 0.19738100469112396, "learning_rate": 0.00011535700578360916, "loss": 1.7493, "step": 15457 }, { "epoch": 0.75478515625, "grad_norm": 0.2039182186126709, "learning_rate": 0.00011533232439117757, "loss": 1.7314, "step": 15458 }, { "epoch": 0.754833984375, "grad_norm": 0.20472851395606995, "learning_rate": 0.0001153076468684204, "loss": 1.7207, "step": 15459 }, { "epoch": 0.7548828125, "grad_norm": 0.20701228082180023, "learning_rate": 0.00011528297321593568, "loss": 1.7218, "step": 15460 }, { "epoch": 0.754931640625, "grad_norm": 0.21280963718891144, "learning_rate": 0.00011525830343432149, "loss": 1.727, "step": 15461 }, { "epoch": 0.75498046875, "grad_norm": 0.19297541677951813, "learning_rate": 0.00011523363752417553, "loss": 1.7519, "step": 15462 }, { "epoch": 0.755029296875, "grad_norm": 0.23676417768001556, "learning_rate": 0.00011520897548609584, "loss": 1.7567, "step": 15463 }, { "epoch": 0.755078125, "grad_norm": 0.1804332435131073, "learning_rate": 0.00011518431732067989, "loss": 1.7692, "step": 15464 }, { "epoch": 0.755126953125, "grad_norm": 0.23055388033390045, "learning_rate": 0.00011515966302852553, "loss": 1.7164, "step": 15465 }, { "epoch": 0.75517578125, "grad_norm": 0.17355237901210785, "learning_rate": 0.00011513501261023007, "loss": 1.7599, "step": 15466 }, { "epoch": 0.755224609375, "grad_norm": 0.2286115139722824, "learning_rate": 0.00011511036606639103, "loss": 1.7185, "step": 15467 }, { "epoch": 0.7552734375, "grad_norm": 0.19530390202999115, "learning_rate": 0.0001150857233976057, "loss": 1.7664, "step": 15468 }, { "epoch": 0.755322265625, "grad_norm": 0.24914298951625824, "learning_rate": 0.00011506108460447135, "loss": 1.7313, "step": 15469 }, { "epoch": 0.75537109375, "grad_norm": 0.19012433290481567, "learning_rate": 0.0001150364496875851, "loss": 1.7359, "step": 15470 }, { "epoch": 0.755419921875, "grad_norm": 0.24428939819335938, "learning_rate": 0.00011501181864754403, "loss": 1.7235, "step": 15471 }, { "epoch": 0.75546875, "grad_norm": 0.17875872552394867, "learning_rate": 0.00011498719148494505, "loss": 1.7397, "step": 15472 }, { "epoch": 0.755517578125, "grad_norm": 0.2114241123199463, "learning_rate": 0.00011496256820038505, "loss": 1.7388, "step": 15473 }, { "epoch": 0.75556640625, "grad_norm": 0.20315788686275482, "learning_rate": 0.00011493794879446076, "loss": 1.7635, "step": 15474 }, { "epoch": 0.755615234375, "grad_norm": 0.17729482054710388, "learning_rate": 0.00011491333326776888, "loss": 1.7382, "step": 15475 }, { "epoch": 0.7556640625, "grad_norm": 0.19418621063232422, "learning_rate": 0.00011488872162090599, "loss": 1.7551, "step": 15476 }, { "epoch": 0.755712890625, "grad_norm": 0.17971859872341156, "learning_rate": 0.00011486411385446855, "loss": 1.7344, "step": 15477 }, { "epoch": 0.75576171875, "grad_norm": 0.19849339127540588, "learning_rate": 0.00011483950996905299, "loss": 1.7216, "step": 15478 }, { "epoch": 0.755810546875, "grad_norm": 0.2304503321647644, "learning_rate": 0.00011481490996525563, "loss": 1.7483, "step": 15479 }, { "epoch": 0.755859375, "grad_norm": 0.20941083133220673, "learning_rate": 0.00011479031384367248, "loss": 1.7532, "step": 15480 }, { "epoch": 0.755908203125, "grad_norm": 0.21554715931415558, "learning_rate": 0.00011476572160489993, "loss": 1.7381, "step": 15481 }, { "epoch": 0.75595703125, "grad_norm": 0.20599474012851715, "learning_rate": 0.00011474113324953371, "loss": 1.7453, "step": 15482 }, { "epoch": 0.756005859375, "grad_norm": 0.1933327615261078, "learning_rate": 0.00011471654877816999, "loss": 1.7544, "step": 15483 }, { "epoch": 0.7560546875, "grad_norm": 0.20565694570541382, "learning_rate": 0.00011469196819140437, "loss": 1.7263, "step": 15484 }, { "epoch": 0.756103515625, "grad_norm": 0.2201058268547058, "learning_rate": 0.00011466739148983283, "loss": 1.7455, "step": 15485 }, { "epoch": 0.75615234375, "grad_norm": 0.20352856814861298, "learning_rate": 0.00011464281867405078, "loss": 1.7131, "step": 15486 }, { "epoch": 0.756201171875, "grad_norm": 0.19380736351013184, "learning_rate": 0.00011461824974465386, "loss": 1.7541, "step": 15487 }, { "epoch": 0.75625, "grad_norm": 0.21127045154571533, "learning_rate": 0.0001145936847022375, "loss": 1.7558, "step": 15488 }, { "epoch": 0.756298828125, "grad_norm": 0.2125881165266037, "learning_rate": 0.00011456912354739706, "loss": 1.7332, "step": 15489 }, { "epoch": 0.75634765625, "grad_norm": 0.22824037075042725, "learning_rate": 0.00011454456628072779, "loss": 1.7585, "step": 15490 }, { "epoch": 0.756396484375, "grad_norm": 0.22086083889007568, "learning_rate": 0.0001145200129028249, "loss": 1.7247, "step": 15491 }, { "epoch": 0.7564453125, "grad_norm": 0.23287338018417358, "learning_rate": 0.0001144954634142834, "loss": 1.7181, "step": 15492 }, { "epoch": 0.756494140625, "grad_norm": 0.23099902272224426, "learning_rate": 0.0001144709178156983, "loss": 1.7382, "step": 15493 }, { "epoch": 0.75654296875, "grad_norm": 0.21290802955627441, "learning_rate": 0.00011444637610766446, "loss": 1.7377, "step": 15494 }, { "epoch": 0.756591796875, "grad_norm": 0.19484901428222656, "learning_rate": 0.00011442183829077675, "loss": 1.7335, "step": 15495 }, { "epoch": 0.756640625, "grad_norm": 0.19884146749973297, "learning_rate": 0.00011439730436562967, "loss": 1.7701, "step": 15496 }, { "epoch": 0.756689453125, "grad_norm": 0.19939781725406647, "learning_rate": 0.0001143727743328181, "loss": 1.732, "step": 15497 }, { "epoch": 0.75673828125, "grad_norm": 0.17815615236759186, "learning_rate": 0.00011434824819293622, "loss": 1.7554, "step": 15498 }, { "epoch": 0.756787109375, "grad_norm": 0.21341444551944733, "learning_rate": 0.00011432372594657879, "loss": 1.7474, "step": 15499 }, { "epoch": 0.7568359375, "grad_norm": 0.1841687709093094, "learning_rate": 0.00011429920759433977, "loss": 1.7366, "step": 15500 }, { "epoch": 0.756884765625, "grad_norm": 0.2103486806154251, "learning_rate": 0.00011427469313681372, "loss": 1.7505, "step": 15501 }, { "epoch": 0.75693359375, "grad_norm": 0.2172895073890686, "learning_rate": 0.00011425018257459445, "loss": 1.7094, "step": 15502 }, { "epoch": 0.756982421875, "grad_norm": 0.1906469762325287, "learning_rate": 0.00011422567590827631, "loss": 1.7212, "step": 15503 }, { "epoch": 0.75703125, "grad_norm": 0.2036033570766449, "learning_rate": 0.000114201173138453, "loss": 1.7313, "step": 15504 }, { "epoch": 0.757080078125, "grad_norm": 0.18684938549995422, "learning_rate": 0.00011417667426571843, "loss": 1.7484, "step": 15505 }, { "epoch": 0.75712890625, "grad_norm": 0.23203115165233612, "learning_rate": 0.00011415217929066638, "loss": 1.7196, "step": 15506 }, { "epoch": 0.757177734375, "grad_norm": 0.21316897869110107, "learning_rate": 0.00011412768821389051, "loss": 1.7619, "step": 15507 }, { "epoch": 0.7572265625, "grad_norm": 0.23110991716384888, "learning_rate": 0.00011410320103598433, "loss": 1.7331, "step": 15508 }, { "epoch": 0.757275390625, "grad_norm": 0.2243393510580063, "learning_rate": 0.00011407871775754134, "loss": 1.7567, "step": 15509 }, { "epoch": 0.75732421875, "grad_norm": 0.20047500729560852, "learning_rate": 0.00011405423837915492, "loss": 1.7396, "step": 15510 }, { "epoch": 0.757373046875, "grad_norm": 0.251220166683197, "learning_rate": 0.00011402976290141838, "loss": 1.7243, "step": 15511 }, { "epoch": 0.757421875, "grad_norm": 0.18546853959560394, "learning_rate": 0.00011400529132492473, "loss": 1.7465, "step": 15512 }, { "epoch": 0.757470703125, "grad_norm": 0.223367378115654, "learning_rate": 0.00011398082365026732, "loss": 1.7386, "step": 15513 }, { "epoch": 0.75751953125, "grad_norm": 0.18475885689258575, "learning_rate": 0.0001139563598780389, "loss": 1.7245, "step": 15514 }, { "epoch": 0.757568359375, "grad_norm": 0.20575188100337982, "learning_rate": 0.0001139319000088326, "loss": 1.7474, "step": 15515 }, { "epoch": 0.7576171875, "grad_norm": 0.21040001511573792, "learning_rate": 0.00011390744404324097, "loss": 1.7471, "step": 15516 }, { "epoch": 0.757666015625, "grad_norm": 0.2194540649652481, "learning_rate": 0.00011388299198185697, "loss": 1.7368, "step": 15517 }, { "epoch": 0.75771484375, "grad_norm": 0.20664770901203156, "learning_rate": 0.00011385854382527299, "loss": 1.7544, "step": 15518 }, { "epoch": 0.757763671875, "grad_norm": 0.2299392968416214, "learning_rate": 0.00011383409957408179, "loss": 1.7467, "step": 15519 }, { "epoch": 0.7578125, "grad_norm": 0.22686152160167694, "learning_rate": 0.00011380965922887558, "loss": 1.7381, "step": 15520 }, { "epoch": 0.757861328125, "grad_norm": 0.2200826108455658, "learning_rate": 0.00011378522279024679, "loss": 1.7797, "step": 15521 }, { "epoch": 0.75791015625, "grad_norm": 0.26672980189323425, "learning_rate": 0.00011376079025878766, "loss": 1.7384, "step": 15522 }, { "epoch": 0.757958984375, "grad_norm": 0.19457115232944489, "learning_rate": 0.00011373636163509028, "loss": 1.7519, "step": 15523 }, { "epoch": 0.7580078125, "grad_norm": 0.3033190965652466, "learning_rate": 0.00011371193691974676, "loss": 1.7702, "step": 15524 }, { "epoch": 0.758056640625, "grad_norm": 0.2316833734512329, "learning_rate": 0.00011368751611334902, "loss": 1.7703, "step": 15525 }, { "epoch": 0.75810546875, "grad_norm": 0.226785346865654, "learning_rate": 0.00011366309921648893, "loss": 1.7357, "step": 15526 }, { "epoch": 0.758154296875, "grad_norm": 0.2618348300457001, "learning_rate": 0.00011363868622975825, "loss": 1.7461, "step": 15527 }, { "epoch": 0.758203125, "grad_norm": 0.20984874665737152, "learning_rate": 0.00011361427715374864, "loss": 1.7291, "step": 15528 }, { "epoch": 0.758251953125, "grad_norm": 0.26731276512145996, "learning_rate": 0.00011358987198905174, "loss": 1.7582, "step": 15529 }, { "epoch": 0.75830078125, "grad_norm": 0.18928472697734833, "learning_rate": 0.00011356547073625884, "loss": 1.7006, "step": 15530 }, { "epoch": 0.758349609375, "grad_norm": 0.2340380996465683, "learning_rate": 0.00011354107339596159, "loss": 1.7144, "step": 15531 }, { "epoch": 0.7583984375, "grad_norm": 0.20436573028564453, "learning_rate": 0.00011351667996875101, "loss": 1.7465, "step": 15532 }, { "epoch": 0.758447265625, "grad_norm": 0.2225678712129593, "learning_rate": 0.00011349229045521854, "loss": 1.7363, "step": 15533 }, { "epoch": 0.75849609375, "grad_norm": 0.21131457388401031, "learning_rate": 0.00011346790485595505, "loss": 1.762, "step": 15534 }, { "epoch": 0.758544921875, "grad_norm": 0.218245267868042, "learning_rate": 0.00011344352317155178, "loss": 1.7232, "step": 15535 }, { "epoch": 0.75859375, "grad_norm": 0.209030881524086, "learning_rate": 0.00011341914540259938, "loss": 1.7182, "step": 15536 }, { "epoch": 0.758642578125, "grad_norm": 0.21365222334861755, "learning_rate": 0.00011339477154968896, "loss": 1.7433, "step": 15537 }, { "epoch": 0.75869140625, "grad_norm": 0.20666442811489105, "learning_rate": 0.00011337040161341099, "loss": 1.7259, "step": 15538 }, { "epoch": 0.758740234375, "grad_norm": 0.20883667469024658, "learning_rate": 0.00011334603559435622, "loss": 1.7415, "step": 15539 }, { "epoch": 0.7587890625, "grad_norm": 0.22970344126224518, "learning_rate": 0.00011332167349311511, "loss": 1.7429, "step": 15540 }, { "epoch": 0.758837890625, "grad_norm": 0.23388129472732544, "learning_rate": 0.00011329731531027818, "loss": 1.7546, "step": 15541 }, { "epoch": 0.75888671875, "grad_norm": 0.22132468223571777, "learning_rate": 0.00011327296104643568, "loss": 1.751, "step": 15542 }, { "epoch": 0.758935546875, "grad_norm": 0.2284456193447113, "learning_rate": 0.00011324861070217791, "loss": 1.7249, "step": 15543 }, { "epoch": 0.758984375, "grad_norm": 0.21029742062091827, "learning_rate": 0.00011322426427809504, "loss": 1.7558, "step": 15544 }, { "epoch": 0.759033203125, "grad_norm": 0.20238737761974335, "learning_rate": 0.00011319992177477715, "loss": 1.7454, "step": 15545 }, { "epoch": 0.75908203125, "grad_norm": 0.22849391400814056, "learning_rate": 0.000113175583192814, "loss": 1.7277, "step": 15546 }, { "epoch": 0.759130859375, "grad_norm": 0.257733017206192, "learning_rate": 0.00011315124853279572, "loss": 1.7644, "step": 15547 }, { "epoch": 0.7591796875, "grad_norm": 0.2256270945072174, "learning_rate": 0.00011312691779531184, "loss": 1.7512, "step": 15548 }, { "epoch": 0.759228515625, "grad_norm": 0.23190054297447205, "learning_rate": 0.00011310259098095232, "loss": 1.7563, "step": 15549 }, { "epoch": 0.75927734375, "grad_norm": 0.228483647108078, "learning_rate": 0.00011307826809030639, "loss": 1.7113, "step": 15550 }, { "epoch": 0.759326171875, "grad_norm": 0.18910831212997437, "learning_rate": 0.00011305394912396389, "loss": 1.7486, "step": 15551 }, { "epoch": 0.759375, "grad_norm": 0.2187427431344986, "learning_rate": 0.00011302963408251388, "loss": 1.7423, "step": 15552 }, { "epoch": 0.759423828125, "grad_norm": 0.2169777899980545, "learning_rate": 0.00011300532296654594, "loss": 1.7449, "step": 15553 }, { "epoch": 0.75947265625, "grad_norm": 0.19806888699531555, "learning_rate": 0.00011298101577664909, "loss": 1.7378, "step": 15554 }, { "epoch": 0.759521484375, "grad_norm": 0.22670206427574158, "learning_rate": 0.0001129567125134125, "loss": 1.7469, "step": 15555 }, { "epoch": 0.7595703125, "grad_norm": 0.18402314186096191, "learning_rate": 0.00011293241317742517, "loss": 1.7395, "step": 15556 }, { "epoch": 0.759619140625, "grad_norm": 0.21782802045345306, "learning_rate": 0.00011290811776927599, "loss": 1.7404, "step": 15557 }, { "epoch": 0.75966796875, "grad_norm": 0.1848878562450409, "learning_rate": 0.00011288382628955381, "loss": 1.7487, "step": 15558 }, { "epoch": 0.759716796875, "grad_norm": 0.22874173521995544, "learning_rate": 0.00011285953873884733, "loss": 1.7681, "step": 15559 }, { "epoch": 0.759765625, "grad_norm": 0.1950862556695938, "learning_rate": 0.00011283525511774518, "loss": 1.7386, "step": 15560 }, { "epoch": 0.759814453125, "grad_norm": 0.20999501645565033, "learning_rate": 0.00011281097542683595, "loss": 1.7487, "step": 15561 }, { "epoch": 0.75986328125, "grad_norm": 0.21629555523395538, "learning_rate": 0.00011278669966670798, "loss": 1.7356, "step": 15562 }, { "epoch": 0.759912109375, "grad_norm": 0.21091380715370178, "learning_rate": 0.00011276242783794976, "loss": 1.7477, "step": 15563 }, { "epoch": 0.7599609375, "grad_norm": 0.20870491862297058, "learning_rate": 0.00011273815994114932, "loss": 1.7435, "step": 15564 }, { "epoch": 0.760009765625, "grad_norm": 0.18172235786914825, "learning_rate": 0.00011271389597689506, "loss": 1.7371, "step": 15565 }, { "epoch": 0.76005859375, "grad_norm": 0.22632986307144165, "learning_rate": 0.00011268963594577476, "loss": 1.7336, "step": 15566 }, { "epoch": 0.760107421875, "grad_norm": 0.1883706897497177, "learning_rate": 0.00011266537984837668, "loss": 1.7301, "step": 15567 }, { "epoch": 0.76015625, "grad_norm": 0.2048703134059906, "learning_rate": 0.00011264112768528838, "loss": 1.7563, "step": 15568 }, { "epoch": 0.760205078125, "grad_norm": 0.18614792823791504, "learning_rate": 0.00011261687945709795, "loss": 1.7308, "step": 15569 }, { "epoch": 0.76025390625, "grad_norm": 0.20680908858776093, "learning_rate": 0.00011259263516439282, "loss": 1.755, "step": 15570 }, { "epoch": 0.760302734375, "grad_norm": 0.1858890950679779, "learning_rate": 0.00011256839480776067, "loss": 1.7391, "step": 15571 }, { "epoch": 0.7603515625, "grad_norm": 0.20453563332557678, "learning_rate": 0.00011254415838778897, "loss": 1.7464, "step": 15572 }, { "epoch": 0.760400390625, "grad_norm": 0.19281892478466034, "learning_rate": 0.00011251992590506507, "loss": 1.763, "step": 15573 }, { "epoch": 0.76044921875, "grad_norm": 0.20675891637802124, "learning_rate": 0.00011249569736017635, "loss": 1.7285, "step": 15574 }, { "epoch": 0.760498046875, "grad_norm": 0.20431366562843323, "learning_rate": 0.00011247147275370992, "loss": 1.7585, "step": 15575 }, { "epoch": 0.760546875, "grad_norm": 0.18798856437206268, "learning_rate": 0.00011244725208625293, "loss": 1.7019, "step": 15576 }, { "epoch": 0.760595703125, "grad_norm": 0.19910171627998352, "learning_rate": 0.00011242303535839239, "loss": 1.7249, "step": 15577 }, { "epoch": 0.76064453125, "grad_norm": 0.19584406912326813, "learning_rate": 0.00011239882257071521, "loss": 1.7583, "step": 15578 }, { "epoch": 0.760693359375, "grad_norm": 0.2146121859550476, "learning_rate": 0.00011237461372380822, "loss": 1.7361, "step": 15579 }, { "epoch": 0.7607421875, "grad_norm": 0.1960442066192627, "learning_rate": 0.00011235040881825805, "loss": 1.7488, "step": 15580 }, { "epoch": 0.760791015625, "grad_norm": 0.21410131454467773, "learning_rate": 0.00011232620785465147, "loss": 1.75, "step": 15581 }, { "epoch": 0.76083984375, "grad_norm": 0.17084497213363647, "learning_rate": 0.00011230201083357482, "loss": 1.7148, "step": 15582 }, { "epoch": 0.760888671875, "grad_norm": 0.20141883194446564, "learning_rate": 0.00011227781775561479, "loss": 1.7259, "step": 15583 }, { "epoch": 0.7609375, "grad_norm": 0.21097497642040253, "learning_rate": 0.00011225362862135747, "loss": 1.7442, "step": 15584 }, { "epoch": 0.760986328125, "grad_norm": 0.18467998504638672, "learning_rate": 0.0001122294434313893, "loss": 1.7331, "step": 15585 }, { "epoch": 0.76103515625, "grad_norm": 0.23077958822250366, "learning_rate": 0.00011220526218629621, "loss": 1.7444, "step": 15586 }, { "epoch": 0.761083984375, "grad_norm": 0.20681235194206238, "learning_rate": 0.00011218108488666451, "loss": 1.7362, "step": 15587 }, { "epoch": 0.7611328125, "grad_norm": 0.23271110653877258, "learning_rate": 0.00011215691153307998, "loss": 1.7396, "step": 15588 }, { "epoch": 0.761181640625, "grad_norm": 0.22735272347927094, "learning_rate": 0.00011213274212612851, "loss": 1.7581, "step": 15589 }, { "epoch": 0.76123046875, "grad_norm": 0.2067994773387909, "learning_rate": 0.00011210857666639587, "loss": 1.7298, "step": 15590 }, { "epoch": 0.761279296875, "grad_norm": 0.1975868046283722, "learning_rate": 0.00011208441515446776, "loss": 1.7255, "step": 15591 }, { "epoch": 0.761328125, "grad_norm": 0.22353878617286682, "learning_rate": 0.00011206025759092971, "loss": 1.7526, "step": 15592 }, { "epoch": 0.761376953125, "grad_norm": 0.20767942070960999, "learning_rate": 0.00011203610397636724, "loss": 1.7454, "step": 15593 }, { "epoch": 0.76142578125, "grad_norm": 0.2292434424161911, "learning_rate": 0.00011201195431136571, "loss": 1.7546, "step": 15594 }, { "epoch": 0.761474609375, "grad_norm": 0.2627504765987396, "learning_rate": 0.00011198780859651042, "loss": 1.7491, "step": 15595 }, { "epoch": 0.7615234375, "grad_norm": 0.20617224276065826, "learning_rate": 0.00011196366683238654, "loss": 1.7293, "step": 15596 }, { "epoch": 0.761572265625, "grad_norm": 0.2649543881416321, "learning_rate": 0.00011193952901957924, "loss": 1.7586, "step": 15597 }, { "epoch": 0.76162109375, "grad_norm": 0.20402778685092926, "learning_rate": 0.00011191539515867332, "loss": 1.7567, "step": 15598 }, { "epoch": 0.761669921875, "grad_norm": 0.2507740557193756, "learning_rate": 0.00011189126525025397, "loss": 1.7297, "step": 15599 }, { "epoch": 0.76171875, "grad_norm": 0.21380747854709625, "learning_rate": 0.00011186713929490569, "loss": 1.7549, "step": 15600 }, { "epoch": 0.761767578125, "grad_norm": 0.2268209010362625, "learning_rate": 0.00011184301729321351, "loss": 1.7392, "step": 15601 }, { "epoch": 0.76181640625, "grad_norm": 0.19450543820858002, "learning_rate": 0.00011181889924576175, "loss": 1.7345, "step": 15602 }, { "epoch": 0.761865234375, "grad_norm": 0.217793270945549, "learning_rate": 0.00011179478515313518, "loss": 1.7495, "step": 15603 }, { "epoch": 0.7619140625, "grad_norm": 0.210358127951622, "learning_rate": 0.00011177067501591803, "loss": 1.7473, "step": 15604 }, { "epoch": 0.761962890625, "grad_norm": 0.2140682190656662, "learning_rate": 0.00011174656883469473, "loss": 1.7404, "step": 15605 }, { "epoch": 0.76201171875, "grad_norm": 0.23361366987228394, "learning_rate": 0.00011172246661004951, "loss": 1.7555, "step": 15606 }, { "epoch": 0.762060546875, "grad_norm": 0.21714971959590912, "learning_rate": 0.00011169836834256644, "loss": 1.7397, "step": 15607 }, { "epoch": 0.762109375, "grad_norm": 0.2233019769191742, "learning_rate": 0.00011167427403282965, "loss": 1.7358, "step": 15608 }, { "epoch": 0.762158203125, "grad_norm": 0.17800025641918182, "learning_rate": 0.00011165018368142303, "loss": 1.7402, "step": 15609 }, { "epoch": 0.76220703125, "grad_norm": 0.23622801899909973, "learning_rate": 0.00011162609728893044, "loss": 1.7617, "step": 15610 }, { "epoch": 0.762255859375, "grad_norm": 0.20736059546470642, "learning_rate": 0.00011160201485593563, "loss": 1.7488, "step": 15611 }, { "epoch": 0.7623046875, "grad_norm": 0.20685674250125885, "learning_rate": 0.00011157793638302227, "loss": 1.7439, "step": 15612 }, { "epoch": 0.762353515625, "grad_norm": 0.20167142152786255, "learning_rate": 0.00011155386187077398, "loss": 1.7776, "step": 15613 }, { "epoch": 0.76240234375, "grad_norm": 0.21220435202121735, "learning_rate": 0.00011152979131977403, "loss": 1.7285, "step": 15614 }, { "epoch": 0.762451171875, "grad_norm": 0.17668424546718597, "learning_rate": 0.00011150572473060604, "loss": 1.7402, "step": 15615 }, { "epoch": 0.7625, "grad_norm": 0.22399364411830902, "learning_rate": 0.00011148166210385303, "loss": 1.7345, "step": 15616 }, { "epoch": 0.762548828125, "grad_norm": 0.2079162746667862, "learning_rate": 0.00011145760344009844, "loss": 1.7335, "step": 15617 }, { "epoch": 0.76259765625, "grad_norm": 0.21725080907344818, "learning_rate": 0.00011143354873992508, "loss": 1.7342, "step": 15618 }, { "epoch": 0.762646484375, "grad_norm": 0.19050857424736023, "learning_rate": 0.00011140949800391623, "loss": 1.7334, "step": 15619 }, { "epoch": 0.7626953125, "grad_norm": 0.19610610604286194, "learning_rate": 0.00011138545123265449, "loss": 1.7481, "step": 15620 }, { "epoch": 0.762744140625, "grad_norm": 0.21780557930469513, "learning_rate": 0.00011136140842672291, "loss": 1.7566, "step": 15621 }, { "epoch": 0.76279296875, "grad_norm": 0.18140682578086853, "learning_rate": 0.00011133736958670398, "loss": 1.7318, "step": 15622 }, { "epoch": 0.762841796875, "grad_norm": 0.2288236767053604, "learning_rate": 0.00011131333471318039, "loss": 1.7554, "step": 15623 }, { "epoch": 0.762890625, "grad_norm": 0.1765262931585312, "learning_rate": 0.00011128930380673464, "loss": 1.7204, "step": 15624 }, { "epoch": 0.762939453125, "grad_norm": 0.21925769746303558, "learning_rate": 0.00011126527686794915, "loss": 1.7415, "step": 15625 }, { "epoch": 0.76298828125, "grad_norm": 0.19366717338562012, "learning_rate": 0.0001112412538974062, "loss": 1.739, "step": 15626 }, { "epoch": 0.763037109375, "grad_norm": 0.20224401354789734, "learning_rate": 0.00011121723489568805, "loss": 1.7523, "step": 15627 }, { "epoch": 0.7630859375, "grad_norm": 0.21292012929916382, "learning_rate": 0.00011119321986337678, "loss": 1.7379, "step": 15628 }, { "epoch": 0.763134765625, "grad_norm": 0.19611166417598724, "learning_rate": 0.00011116920880105448, "loss": 1.7385, "step": 15629 }, { "epoch": 0.76318359375, "grad_norm": 0.20421640574932098, "learning_rate": 0.00011114520170930291, "loss": 1.7338, "step": 15630 }, { "epoch": 0.763232421875, "grad_norm": 0.22492879629135132, "learning_rate": 0.00011112119858870412, "loss": 1.7552, "step": 15631 }, { "epoch": 0.76328125, "grad_norm": 0.18461664021015167, "learning_rate": 0.00011109719943983964, "loss": 1.7219, "step": 15632 }, { "epoch": 0.763330078125, "grad_norm": 0.20175188779830933, "learning_rate": 0.00011107320426329134, "loss": 1.7289, "step": 15633 }, { "epoch": 0.76337890625, "grad_norm": 0.19896961748600006, "learning_rate": 0.00011104921305964047, "loss": 1.7008, "step": 15634 }, { "epoch": 0.763427734375, "grad_norm": 0.20069821178913116, "learning_rate": 0.0001110252258294688, "loss": 1.7686, "step": 15635 }, { "epoch": 0.7634765625, "grad_norm": 0.20541289448738098, "learning_rate": 0.00011100124257335739, "loss": 1.7122, "step": 15636 }, { "epoch": 0.763525390625, "grad_norm": 0.18202047049999237, "learning_rate": 0.00011097726329188773, "loss": 1.7392, "step": 15637 }, { "epoch": 0.76357421875, "grad_norm": 0.20619522035121918, "learning_rate": 0.00011095328798564083, "loss": 1.7531, "step": 15638 }, { "epoch": 0.763623046875, "grad_norm": 0.19387006759643555, "learning_rate": 0.00011092931665519777, "loss": 1.7299, "step": 15639 }, { "epoch": 0.763671875, "grad_norm": 0.19750723242759705, "learning_rate": 0.00011090534930113955, "loss": 1.7398, "step": 15640 }, { "epoch": 0.763720703125, "grad_norm": 0.17818699777126312, "learning_rate": 0.00011088138592404698, "loss": 1.762, "step": 15641 }, { "epoch": 0.76376953125, "grad_norm": 0.1918543577194214, "learning_rate": 0.00011085742652450093, "loss": 1.7457, "step": 15642 }, { "epoch": 0.763818359375, "grad_norm": 0.18553753197193146, "learning_rate": 0.00011083347110308197, "loss": 1.7067, "step": 15643 }, { "epoch": 0.7638671875, "grad_norm": 0.18166914582252502, "learning_rate": 0.00011080951966037075, "loss": 1.7176, "step": 15644 }, { "epoch": 0.763916015625, "grad_norm": 0.2039860635995865, "learning_rate": 0.00011078557219694773, "loss": 1.7571, "step": 15645 }, { "epoch": 0.76396484375, "grad_norm": 0.1833166778087616, "learning_rate": 0.00011076162871339331, "loss": 1.7587, "step": 15646 }, { "epoch": 0.764013671875, "grad_norm": 0.18882127106189728, "learning_rate": 0.00011073768921028782, "loss": 1.7236, "step": 15647 }, { "epoch": 0.7640625, "grad_norm": 0.1967046558856964, "learning_rate": 0.00011071375368821127, "loss": 1.746, "step": 15648 }, { "epoch": 0.764111328125, "grad_norm": 0.18515437841415405, "learning_rate": 0.00011068982214774402, "loss": 1.7546, "step": 15649 }, { "epoch": 0.76416015625, "grad_norm": 0.2009866088628769, "learning_rate": 0.00011066589458946579, "loss": 1.7361, "step": 15650 }, { "epoch": 0.764208984375, "grad_norm": 0.19237063825130463, "learning_rate": 0.00011064197101395675, "loss": 1.7261, "step": 15651 }, { "epoch": 0.7642578125, "grad_norm": 0.2037006914615631, "learning_rate": 0.00011061805142179649, "loss": 1.7124, "step": 15652 }, { "epoch": 0.764306640625, "grad_norm": 0.21919775009155273, "learning_rate": 0.00011059413581356492, "loss": 1.7427, "step": 15653 }, { "epoch": 0.76435546875, "grad_norm": 0.20814752578735352, "learning_rate": 0.0001105702241898415, "loss": 1.7284, "step": 15654 }, { "epoch": 0.764404296875, "grad_norm": 0.21554705500602722, "learning_rate": 0.0001105463165512058, "loss": 1.7486, "step": 15655 }, { "epoch": 0.764453125, "grad_norm": 0.20318938791751862, "learning_rate": 0.00011052241289823724, "loss": 1.7485, "step": 15656 }, { "epoch": 0.764501953125, "grad_norm": 0.2052008956670761, "learning_rate": 0.00011049851323151512, "loss": 1.7318, "step": 15657 }, { "epoch": 0.76455078125, "grad_norm": 0.20340245962142944, "learning_rate": 0.00011047461755161873, "loss": 1.7408, "step": 15658 }, { "epoch": 0.764599609375, "grad_norm": 0.19887994229793549, "learning_rate": 0.00011045072585912713, "loss": 1.7438, "step": 15659 }, { "epoch": 0.7646484375, "grad_norm": 0.22878842055797577, "learning_rate": 0.0001104268381546194, "loss": 1.7734, "step": 15660 }, { "epoch": 0.764697265625, "grad_norm": 0.18972404301166534, "learning_rate": 0.00011040295443867446, "loss": 1.7488, "step": 15661 }, { "epoch": 0.76474609375, "grad_norm": 0.2499096840620041, "learning_rate": 0.00011037907471187114, "loss": 1.7501, "step": 15662 }, { "epoch": 0.764794921875, "grad_norm": 0.19711126387119293, "learning_rate": 0.00011035519897478821, "loss": 1.7505, "step": 15663 }, { "epoch": 0.76484375, "grad_norm": 0.2009989321231842, "learning_rate": 0.00011033132722800433, "loss": 1.735, "step": 15664 }, { "epoch": 0.764892578125, "grad_norm": 0.19960401952266693, "learning_rate": 0.000110307459472098, "loss": 1.7472, "step": 15665 }, { "epoch": 0.76494140625, "grad_norm": 0.2232591062784195, "learning_rate": 0.00011028359570764774, "loss": 1.7222, "step": 15666 }, { "epoch": 0.764990234375, "grad_norm": 0.20444099605083466, "learning_rate": 0.00011025973593523189, "loss": 1.7497, "step": 15667 }, { "epoch": 0.7650390625, "grad_norm": 0.2079327404499054, "learning_rate": 0.00011023588015542857, "loss": 1.7412, "step": 15668 }, { "epoch": 0.765087890625, "grad_norm": 0.2268010824918747, "learning_rate": 0.0001102120283688162, "loss": 1.729, "step": 15669 }, { "epoch": 0.76513671875, "grad_norm": 0.20495019853115082, "learning_rate": 0.0001101881805759726, "loss": 1.7456, "step": 15670 }, { "epoch": 0.765185546875, "grad_norm": 0.22056278586387634, "learning_rate": 0.00011016433677747598, "loss": 1.7379, "step": 15671 }, { "epoch": 0.765234375, "grad_norm": 0.18975408375263214, "learning_rate": 0.00011014049697390399, "loss": 1.7561, "step": 15672 }, { "epoch": 0.765283203125, "grad_norm": 0.24139541387557983, "learning_rate": 0.00011011666116583454, "loss": 1.7509, "step": 15673 }, { "epoch": 0.76533203125, "grad_norm": 0.2047036737203598, "learning_rate": 0.00011009282935384523, "loss": 1.7712, "step": 15674 }, { "epoch": 0.765380859375, "grad_norm": 0.21206501126289368, "learning_rate": 0.00011006900153851371, "loss": 1.7463, "step": 15675 }, { "epoch": 0.7654296875, "grad_norm": 0.20710401237010956, "learning_rate": 0.00011004517772041744, "loss": 1.7637, "step": 15676 }, { "epoch": 0.765478515625, "grad_norm": 0.19506151974201202, "learning_rate": 0.00011002135790013381, "loss": 1.7233, "step": 15677 }, { "epoch": 0.76552734375, "grad_norm": 0.19126853346824646, "learning_rate": 0.00010999754207824011, "loss": 1.7601, "step": 15678 }, { "epoch": 0.765576171875, "grad_norm": 0.2041281908750534, "learning_rate": 0.00010997373025531357, "loss": 1.747, "step": 15679 }, { "epoch": 0.765625, "grad_norm": 0.2080727368593216, "learning_rate": 0.00010994992243193123, "loss": 1.7366, "step": 15680 }, { "epoch": 0.765673828125, "grad_norm": 0.19438304007053375, "learning_rate": 0.00010992611860867012, "loss": 1.7413, "step": 15681 }, { "epoch": 0.76572265625, "grad_norm": 0.20347478985786438, "learning_rate": 0.00010990231878610714, "loss": 1.7309, "step": 15682 }, { "epoch": 0.765771484375, "grad_norm": 0.1962890774011612, "learning_rate": 0.00010987852296481912, "loss": 1.7161, "step": 15683 }, { "epoch": 0.7658203125, "grad_norm": 0.20600320398807526, "learning_rate": 0.00010985473114538272, "loss": 1.7052, "step": 15684 }, { "epoch": 0.765869140625, "grad_norm": 0.19243168830871582, "learning_rate": 0.00010983094332837463, "loss": 1.7232, "step": 15685 }, { "epoch": 0.76591796875, "grad_norm": 0.19544798135757446, "learning_rate": 0.00010980715951437129, "loss": 1.7062, "step": 15686 }, { "epoch": 0.765966796875, "grad_norm": 0.19305101037025452, "learning_rate": 0.00010978337970394922, "loss": 1.7513, "step": 15687 }, { "epoch": 0.766015625, "grad_norm": 0.19369591772556305, "learning_rate": 0.00010975960389768464, "loss": 1.7294, "step": 15688 }, { "epoch": 0.766064453125, "grad_norm": 0.19691728055477142, "learning_rate": 0.00010973583209615379, "loss": 1.7703, "step": 15689 }, { "epoch": 0.76611328125, "grad_norm": 0.21428674459457397, "learning_rate": 0.00010971206429993283, "loss": 1.737, "step": 15690 }, { "epoch": 0.766162109375, "grad_norm": 0.1904270350933075, "learning_rate": 0.00010968830050959777, "loss": 1.7315, "step": 15691 }, { "epoch": 0.7662109375, "grad_norm": 0.21259687840938568, "learning_rate": 0.00010966454072572458, "loss": 1.7382, "step": 15692 }, { "epoch": 0.766259765625, "grad_norm": 0.20086319744586945, "learning_rate": 0.00010964078494888908, "loss": 1.7432, "step": 15693 }, { "epoch": 0.76630859375, "grad_norm": 0.19646750390529633, "learning_rate": 0.00010961703317966699, "loss": 1.7634, "step": 15694 }, { "epoch": 0.766357421875, "grad_norm": 0.23516319692134857, "learning_rate": 0.00010959328541863395, "loss": 1.7271, "step": 15695 }, { "epoch": 0.76640625, "grad_norm": 0.22244228422641754, "learning_rate": 0.00010956954166636552, "loss": 1.7768, "step": 15696 }, { "epoch": 0.766455078125, "grad_norm": 0.21020136773586273, "learning_rate": 0.00010954580192343718, "loss": 1.7117, "step": 15697 }, { "epoch": 0.76650390625, "grad_norm": 0.2131785899400711, "learning_rate": 0.00010952206619042425, "loss": 1.7406, "step": 15698 }, { "epoch": 0.766552734375, "grad_norm": 0.18556946516036987, "learning_rate": 0.00010949833446790197, "loss": 1.7352, "step": 15699 }, { "epoch": 0.7666015625, "grad_norm": 0.22064705193042755, "learning_rate": 0.00010947460675644555, "loss": 1.7333, "step": 15700 }, { "epoch": 0.766650390625, "grad_norm": 0.19838358461856842, "learning_rate": 0.00010945088305663, "loss": 1.7495, "step": 15701 }, { "epoch": 0.76669921875, "grad_norm": 0.2157597541809082, "learning_rate": 0.0001094271633690303, "loss": 1.7723, "step": 15702 }, { "epoch": 0.766748046875, "grad_norm": 0.21227723360061646, "learning_rate": 0.00010940344769422132, "loss": 1.7418, "step": 15703 }, { "epoch": 0.766796875, "grad_norm": 0.2147406041622162, "learning_rate": 0.00010937973603277781, "loss": 1.7541, "step": 15704 }, { "epoch": 0.766845703125, "grad_norm": 0.24189314246177673, "learning_rate": 0.00010935602838527448, "loss": 1.7676, "step": 15705 }, { "epoch": 0.76689453125, "grad_norm": 0.20656578242778778, "learning_rate": 0.00010933232475228592, "loss": 1.7264, "step": 15706 }, { "epoch": 0.766943359375, "grad_norm": 0.2566450238227844, "learning_rate": 0.00010930862513438651, "loss": 1.7149, "step": 15707 }, { "epoch": 0.7669921875, "grad_norm": 0.20401012897491455, "learning_rate": 0.00010928492953215069, "loss": 1.7259, "step": 15708 }, { "epoch": 0.767041015625, "grad_norm": 0.22820134460926056, "learning_rate": 0.00010926123794615273, "loss": 1.7396, "step": 15709 }, { "epoch": 0.76708984375, "grad_norm": 0.20567159354686737, "learning_rate": 0.0001092375503769668, "loss": 1.7403, "step": 15710 }, { "epoch": 0.767138671875, "grad_norm": 0.23180091381072998, "learning_rate": 0.00010921386682516705, "loss": 1.7386, "step": 15711 }, { "epoch": 0.7671875, "grad_norm": 0.21890826523303986, "learning_rate": 0.0001091901872913274, "loss": 1.7364, "step": 15712 }, { "epoch": 0.767236328125, "grad_norm": 0.2445361465215683, "learning_rate": 0.00010916651177602178, "loss": 1.7237, "step": 15713 }, { "epoch": 0.76728515625, "grad_norm": 0.21704091131687164, "learning_rate": 0.00010914284027982399, "loss": 1.703, "step": 15714 }, { "epoch": 0.767333984375, "grad_norm": 0.21811096370220184, "learning_rate": 0.00010911917280330771, "loss": 1.7453, "step": 15715 }, { "epoch": 0.7673828125, "grad_norm": 0.20689469575881958, "learning_rate": 0.00010909550934704653, "loss": 1.7373, "step": 15716 }, { "epoch": 0.767431640625, "grad_norm": 0.2269361913204193, "learning_rate": 0.00010907184991161397, "loss": 1.7547, "step": 15717 }, { "epoch": 0.76748046875, "grad_norm": 0.1726401001214981, "learning_rate": 0.00010904819449758343, "loss": 1.7296, "step": 15718 }, { "epoch": 0.767529296875, "grad_norm": 0.23716199398040771, "learning_rate": 0.00010902454310552825, "loss": 1.7231, "step": 15719 }, { "epoch": 0.767578125, "grad_norm": 0.17575667798519135, "learning_rate": 0.00010900089573602159, "loss": 1.7292, "step": 15720 }, { "epoch": 0.767626953125, "grad_norm": 0.2399999052286148, "learning_rate": 0.00010897725238963657, "loss": 1.7567, "step": 15721 }, { "epoch": 0.76767578125, "grad_norm": 0.20800437033176422, "learning_rate": 0.00010895361306694631, "loss": 1.7419, "step": 15722 }, { "epoch": 0.767724609375, "grad_norm": 0.20908036828041077, "learning_rate": 0.0001089299777685235, "loss": 1.7316, "step": 15723 }, { "epoch": 0.7677734375, "grad_norm": 0.1961502730846405, "learning_rate": 0.00010890634649494122, "loss": 1.7282, "step": 15724 }, { "epoch": 0.767822265625, "grad_norm": 0.2245483249425888, "learning_rate": 0.00010888271924677201, "loss": 1.7367, "step": 15725 }, { "epoch": 0.76787109375, "grad_norm": 0.18800342082977295, "learning_rate": 0.00010885909602458854, "loss": 1.7108, "step": 15726 }, { "epoch": 0.767919921875, "grad_norm": 0.21489252150058746, "learning_rate": 0.00010883547682896336, "loss": 1.7432, "step": 15727 }, { "epoch": 0.76796875, "grad_norm": 0.2085273563861847, "learning_rate": 0.00010881186166046891, "loss": 1.7315, "step": 15728 }, { "epoch": 0.768017578125, "grad_norm": 0.19437409937381744, "learning_rate": 0.00010878825051967753, "loss": 1.751, "step": 15729 }, { "epoch": 0.76806640625, "grad_norm": 0.20150747895240784, "learning_rate": 0.00010876464340716139, "loss": 1.7708, "step": 15730 }, { "epoch": 0.768115234375, "grad_norm": 0.19158601760864258, "learning_rate": 0.0001087410403234927, "loss": 1.746, "step": 15731 }, { "epoch": 0.7681640625, "grad_norm": 0.1970009207725525, "learning_rate": 0.00010871744126924343, "loss": 1.7326, "step": 15732 }, { "epoch": 0.768212890625, "grad_norm": 0.19088158011436462, "learning_rate": 0.00010869384624498558, "loss": 1.7206, "step": 15733 }, { "epoch": 0.76826171875, "grad_norm": 0.19873706996440887, "learning_rate": 0.00010867025525129098, "loss": 1.7301, "step": 15734 }, { "epoch": 0.768310546875, "grad_norm": 0.19155800342559814, "learning_rate": 0.00010864666828873138, "loss": 1.741, "step": 15735 }, { "epoch": 0.768359375, "grad_norm": 0.19857700169086456, "learning_rate": 0.0001086230853578784, "loss": 1.7501, "step": 15736 }, { "epoch": 0.768408203125, "grad_norm": 0.19749176502227783, "learning_rate": 0.00010859950645930361, "loss": 1.757, "step": 15737 }, { "epoch": 0.76845703125, "grad_norm": 0.19544094800949097, "learning_rate": 0.00010857593159357848, "loss": 1.733, "step": 15738 }, { "epoch": 0.768505859375, "grad_norm": 0.18511511385440826, "learning_rate": 0.00010855236076127436, "loss": 1.7409, "step": 15739 }, { "epoch": 0.7685546875, "grad_norm": 0.2128717303276062, "learning_rate": 0.00010852879396296254, "loss": 1.7148, "step": 15740 }, { "epoch": 0.768603515625, "grad_norm": 0.2018902599811554, "learning_rate": 0.00010850523119921405, "loss": 1.7592, "step": 15741 }, { "epoch": 0.76865234375, "grad_norm": 0.2006378471851349, "learning_rate": 0.00010848167247060014, "loss": 1.7373, "step": 15742 }, { "epoch": 0.768701171875, "grad_norm": 0.21615825593471527, "learning_rate": 0.00010845811777769153, "loss": 1.7339, "step": 15743 }, { "epoch": 0.76875, "grad_norm": 0.2006320208311081, "learning_rate": 0.0001084345671210594, "loss": 1.7365, "step": 15744 }, { "epoch": 0.768798828125, "grad_norm": 0.2215346246957779, "learning_rate": 0.00010841102050127426, "loss": 1.745, "step": 15745 }, { "epoch": 0.76884765625, "grad_norm": 0.2349177896976471, "learning_rate": 0.00010838747791890689, "loss": 1.745, "step": 15746 }, { "epoch": 0.768896484375, "grad_norm": 0.2133651077747345, "learning_rate": 0.00010836393937452785, "loss": 1.7422, "step": 15747 }, { "epoch": 0.7689453125, "grad_norm": 0.1996876299381256, "learning_rate": 0.00010834040486870761, "loss": 1.7485, "step": 15748 }, { "epoch": 0.768994140625, "grad_norm": 0.22482746839523315, "learning_rate": 0.00010831687440201657, "loss": 1.7297, "step": 15749 }, { "epoch": 0.76904296875, "grad_norm": 0.19178548455238342, "learning_rate": 0.00010829334797502496, "loss": 1.7311, "step": 15750 }, { "epoch": 0.769091796875, "grad_norm": 0.2269558161497116, "learning_rate": 0.000108269825588303, "loss": 1.7646, "step": 15751 }, { "epoch": 0.769140625, "grad_norm": 0.1918371170759201, "learning_rate": 0.00010824630724242076, "loss": 1.7324, "step": 15752 }, { "epoch": 0.769189453125, "grad_norm": 0.25333917140960693, "learning_rate": 0.00010822279293794826, "loss": 1.7445, "step": 15753 }, { "epoch": 0.76923828125, "grad_norm": 0.21820682287216187, "learning_rate": 0.00010819928267545535, "loss": 1.7263, "step": 15754 }, { "epoch": 0.769287109375, "grad_norm": 0.2607594430446625, "learning_rate": 0.00010817577645551181, "loss": 1.7288, "step": 15755 }, { "epoch": 0.7693359375, "grad_norm": 0.23004253208637238, "learning_rate": 0.00010815227427868743, "loss": 1.7317, "step": 15756 }, { "epoch": 0.769384765625, "grad_norm": 0.2161465883255005, "learning_rate": 0.00010812877614555163, "loss": 1.7249, "step": 15757 }, { "epoch": 0.76943359375, "grad_norm": 0.2561553120613098, "learning_rate": 0.00010810528205667409, "loss": 1.7207, "step": 15758 }, { "epoch": 0.769482421875, "grad_norm": 0.21996194124221802, "learning_rate": 0.000108081792012624, "loss": 1.7322, "step": 15759 }, { "epoch": 0.76953125, "grad_norm": 0.24139490723609924, "learning_rate": 0.00010805830601397093, "loss": 1.7495, "step": 15760 }, { "epoch": 0.769580078125, "grad_norm": 0.21767234802246094, "learning_rate": 0.00010803482406128376, "loss": 1.7624, "step": 15761 }, { "epoch": 0.76962890625, "grad_norm": 0.2454310953617096, "learning_rate": 0.00010801134615513195, "loss": 1.7387, "step": 15762 }, { "epoch": 0.769677734375, "grad_norm": 0.20981375873088837, "learning_rate": 0.00010798787229608415, "loss": 1.7492, "step": 15763 }, { "epoch": 0.7697265625, "grad_norm": 0.23694705963134766, "learning_rate": 0.0001079644024847096, "loss": 1.7323, "step": 15764 }, { "epoch": 0.769775390625, "grad_norm": 0.2244122475385666, "learning_rate": 0.0001079409367215769, "loss": 1.7705, "step": 15765 }, { "epoch": 0.76982421875, "grad_norm": 0.20232240855693817, "learning_rate": 0.00010791747500725477, "loss": 1.7177, "step": 15766 }, { "epoch": 0.769873046875, "grad_norm": 0.22373920679092407, "learning_rate": 0.00010789401734231187, "loss": 1.7239, "step": 15767 }, { "epoch": 0.769921875, "grad_norm": 0.17644988000392914, "learning_rate": 0.00010787056372731673, "loss": 1.7507, "step": 15768 }, { "epoch": 0.769970703125, "grad_norm": 0.22656609117984772, "learning_rate": 0.00010784711416283777, "loss": 1.7514, "step": 15769 }, { "epoch": 0.77001953125, "grad_norm": 0.1916971653699875, "learning_rate": 0.00010782366864944323, "loss": 1.7543, "step": 15770 }, { "epoch": 0.770068359375, "grad_norm": 0.21930064260959625, "learning_rate": 0.00010780022718770144, "loss": 1.732, "step": 15771 }, { "epoch": 0.7701171875, "grad_norm": 0.19159209728240967, "learning_rate": 0.00010777678977818051, "loss": 1.7292, "step": 15772 }, { "epoch": 0.770166015625, "grad_norm": 0.2257976233959198, "learning_rate": 0.00010775335642144829, "loss": 1.7351, "step": 15773 }, { "epoch": 0.77021484375, "grad_norm": 0.18859131634235382, "learning_rate": 0.000107729927118073, "loss": 1.7132, "step": 15774 }, { "epoch": 0.770263671875, "grad_norm": 0.20465420186519623, "learning_rate": 0.00010770650186862216, "loss": 1.7469, "step": 15775 }, { "epoch": 0.7703125, "grad_norm": 0.19483159482479095, "learning_rate": 0.00010768308067366379, "loss": 1.744, "step": 15776 }, { "epoch": 0.770361328125, "grad_norm": 0.20026452839374542, "learning_rate": 0.00010765966353376521, "loss": 1.7433, "step": 15777 }, { "epoch": 0.77041015625, "grad_norm": 0.20141959190368652, "learning_rate": 0.00010763625044949432, "loss": 1.7287, "step": 15778 }, { "epoch": 0.770458984375, "grad_norm": 0.20588594675064087, "learning_rate": 0.00010761284142141819, "loss": 1.7281, "step": 15779 }, { "epoch": 0.7705078125, "grad_norm": 0.20035532116889954, "learning_rate": 0.00010758943645010447, "loss": 1.7468, "step": 15780 }, { "epoch": 0.770556640625, "grad_norm": 0.20475177466869354, "learning_rate": 0.00010756603553612018, "loss": 1.7244, "step": 15781 }, { "epoch": 0.77060546875, "grad_norm": 0.21141012012958527, "learning_rate": 0.00010754263868003253, "loss": 1.7407, "step": 15782 }, { "epoch": 0.770654296875, "grad_norm": 0.18915680050849915, "learning_rate": 0.00010751924588240855, "loss": 1.7617, "step": 15783 }, { "epoch": 0.770703125, "grad_norm": 0.2200254499912262, "learning_rate": 0.00010749585714381523, "loss": 1.7107, "step": 15784 }, { "epoch": 0.770751953125, "grad_norm": 0.18859298527240753, "learning_rate": 0.00010747247246481939, "loss": 1.7771, "step": 15785 }, { "epoch": 0.77080078125, "grad_norm": 0.21480894088745117, "learning_rate": 0.00010744909184598772, "loss": 1.7251, "step": 15786 }, { "epoch": 0.770849609375, "grad_norm": 0.18341846764087677, "learning_rate": 0.00010742571528788697, "loss": 1.7382, "step": 15787 }, { "epoch": 0.7708984375, "grad_norm": 0.1912951022386551, "learning_rate": 0.00010740234279108363, "loss": 1.767, "step": 15788 }, { "epoch": 0.770947265625, "grad_norm": 0.18591414391994476, "learning_rate": 0.00010737897435614415, "loss": 1.7192, "step": 15789 }, { "epoch": 0.77099609375, "grad_norm": 0.20208579301834106, "learning_rate": 0.00010735560998363495, "loss": 1.7327, "step": 15790 }, { "epoch": 0.771044921875, "grad_norm": 0.17257888615131378, "learning_rate": 0.00010733224967412211, "loss": 1.7204, "step": 15791 }, { "epoch": 0.77109375, "grad_norm": 0.20898815989494324, "learning_rate": 0.00010730889342817202, "loss": 1.7393, "step": 15792 }, { "epoch": 0.771142578125, "grad_norm": 0.17505080997943878, "learning_rate": 0.00010728554124635049, "loss": 1.7184, "step": 15793 }, { "epoch": 0.77119140625, "grad_norm": 0.2025507688522339, "learning_rate": 0.00010726219312922372, "loss": 1.7527, "step": 15794 }, { "epoch": 0.771240234375, "grad_norm": 0.19081030786037445, "learning_rate": 0.00010723884907735735, "loss": 1.731, "step": 15795 }, { "epoch": 0.7712890625, "grad_norm": 0.19480469822883606, "learning_rate": 0.00010721550909131738, "loss": 1.7436, "step": 15796 }, { "epoch": 0.771337890625, "grad_norm": 0.1900685876607895, "learning_rate": 0.00010719217317166924, "loss": 1.7415, "step": 15797 }, { "epoch": 0.77138671875, "grad_norm": 0.19352445006370544, "learning_rate": 0.00010716884131897869, "loss": 1.7665, "step": 15798 }, { "epoch": 0.771435546875, "grad_norm": 0.22390571236610413, "learning_rate": 0.00010714551353381105, "loss": 1.7313, "step": 15799 }, { "epoch": 0.771484375, "grad_norm": 0.23120611906051636, "learning_rate": 0.00010712218981673175, "loss": 1.7633, "step": 15800 }, { "epoch": 0.771533203125, "grad_norm": 0.21339339017868042, "learning_rate": 0.00010709887016830605, "loss": 1.7203, "step": 15801 }, { "epoch": 0.77158203125, "grad_norm": 0.21740137040615082, "learning_rate": 0.0001070755545890991, "loss": 1.7332, "step": 15802 }, { "epoch": 0.771630859375, "grad_norm": 0.22459164261817932, "learning_rate": 0.00010705224307967603, "loss": 1.7639, "step": 15803 }, { "epoch": 0.7716796875, "grad_norm": 0.2290268987417221, "learning_rate": 0.00010702893564060176, "loss": 1.7392, "step": 15804 }, { "epoch": 0.771728515625, "grad_norm": 0.20788393914699554, "learning_rate": 0.00010700563227244116, "loss": 1.7242, "step": 15805 }, { "epoch": 0.77177734375, "grad_norm": 0.24149397015571594, "learning_rate": 0.00010698233297575913, "loss": 1.7525, "step": 15806 }, { "epoch": 0.771826171875, "grad_norm": 0.20056582987308502, "learning_rate": 0.00010695903775112009, "loss": 1.7464, "step": 15807 }, { "epoch": 0.771875, "grad_norm": 0.21948860585689545, "learning_rate": 0.00010693574659908889, "loss": 1.7244, "step": 15808 }, { "epoch": 0.771923828125, "grad_norm": 0.1932355761528015, "learning_rate": 0.00010691245952022979, "loss": 1.7396, "step": 15809 }, { "epoch": 0.77197265625, "grad_norm": 0.19988082349300385, "learning_rate": 0.00010688917651510741, "loss": 1.7367, "step": 15810 }, { "epoch": 0.772021484375, "grad_norm": 0.21282358467578888, "learning_rate": 0.00010686589758428573, "loss": 1.73, "step": 15811 }, { "epoch": 0.7720703125, "grad_norm": 0.2190641164779663, "learning_rate": 0.00010684262272832926, "loss": 1.7069, "step": 15812 }, { "epoch": 0.772119140625, "grad_norm": 0.21127350628376007, "learning_rate": 0.00010681935194780179, "loss": 1.7315, "step": 15813 }, { "epoch": 0.77216796875, "grad_norm": 0.23509159684181213, "learning_rate": 0.00010679608524326756, "loss": 1.7024, "step": 15814 }, { "epoch": 0.772216796875, "grad_norm": 0.1954825073480606, "learning_rate": 0.00010677282261529028, "loss": 1.7502, "step": 15815 }, { "epoch": 0.772265625, "grad_norm": 0.19494353234767914, "learning_rate": 0.00010674956406443378, "loss": 1.7229, "step": 15816 }, { "epoch": 0.772314453125, "grad_norm": 0.21531276404857635, "learning_rate": 0.00010672630959126181, "loss": 1.7222, "step": 15817 }, { "epoch": 0.77236328125, "grad_norm": 0.1807437539100647, "learning_rate": 0.00010670305919633788, "loss": 1.7433, "step": 15818 }, { "epoch": 0.772412109375, "grad_norm": 0.23137842118740082, "learning_rate": 0.00010667981288022552, "loss": 1.7445, "step": 15819 }, { "epoch": 0.7724609375, "grad_norm": 0.19146138429641724, "learning_rate": 0.00010665657064348813, "loss": 1.723, "step": 15820 }, { "epoch": 0.772509765625, "grad_norm": 0.21959033608436584, "learning_rate": 0.000106633332486689, "loss": 1.7391, "step": 15821 }, { "epoch": 0.77255859375, "grad_norm": 0.19371618330478668, "learning_rate": 0.0001066100984103913, "loss": 1.7354, "step": 15822 }, { "epoch": 0.772607421875, "grad_norm": 0.22380433976650238, "learning_rate": 0.00010658686841515816, "loss": 1.7593, "step": 15823 }, { "epoch": 0.77265625, "grad_norm": 0.18923534452915192, "learning_rate": 0.00010656364250155264, "loss": 1.7362, "step": 15824 }, { "epoch": 0.772705078125, "grad_norm": 0.211842343211174, "learning_rate": 0.00010654042067013743, "loss": 1.7337, "step": 15825 }, { "epoch": 0.77275390625, "grad_norm": 0.21799734234809875, "learning_rate": 0.0001065172029214756, "loss": 1.7452, "step": 15826 }, { "epoch": 0.772802734375, "grad_norm": 0.22212812304496765, "learning_rate": 0.0001064939892561296, "loss": 1.7328, "step": 15827 }, { "epoch": 0.7728515625, "grad_norm": 0.20338301360607147, "learning_rate": 0.00010647077967466224, "loss": 1.7189, "step": 15828 }, { "epoch": 0.772900390625, "grad_norm": 0.2089158296585083, "learning_rate": 0.00010644757417763581, "loss": 1.7361, "step": 15829 }, { "epoch": 0.77294921875, "grad_norm": 0.20600475370883942, "learning_rate": 0.00010642437276561298, "loss": 1.7529, "step": 15830 }, { "epoch": 0.772998046875, "grad_norm": 0.22099171578884125, "learning_rate": 0.00010640117543915582, "loss": 1.7193, "step": 15831 }, { "epoch": 0.773046875, "grad_norm": 0.20082609355449677, "learning_rate": 0.00010637798219882664, "loss": 1.7498, "step": 15832 }, { "epoch": 0.773095703125, "grad_norm": 0.24113772809505463, "learning_rate": 0.0001063547930451875, "loss": 1.7289, "step": 15833 }, { "epoch": 0.77314453125, "grad_norm": 0.20039179921150208, "learning_rate": 0.00010633160797880048, "loss": 1.7538, "step": 15834 }, { "epoch": 0.773193359375, "grad_norm": 0.23313024640083313, "learning_rate": 0.00010630842700022744, "loss": 1.7243, "step": 15835 }, { "epoch": 0.7732421875, "grad_norm": 0.20459245145320892, "learning_rate": 0.00010628525011003018, "loss": 1.7484, "step": 15836 }, { "epoch": 0.773291015625, "grad_norm": 0.17731931805610657, "learning_rate": 0.00010626207730877044, "loss": 1.762, "step": 15837 }, { "epoch": 0.77333984375, "grad_norm": 0.2108851969242096, "learning_rate": 0.0001062389085970098, "loss": 1.766, "step": 15838 }, { "epoch": 0.773388671875, "grad_norm": 0.18531933426856995, "learning_rate": 0.00010621574397530981, "loss": 1.7706, "step": 15839 }, { "epoch": 0.7734375, "grad_norm": 0.18637166917324066, "learning_rate": 0.00010619258344423194, "loss": 1.7522, "step": 15840 }, { "epoch": 0.773486328125, "grad_norm": 0.18240317702293396, "learning_rate": 0.00010616942700433729, "loss": 1.7517, "step": 15841 }, { "epoch": 0.77353515625, "grad_norm": 0.18347470462322235, "learning_rate": 0.00010614627465618734, "loss": 1.7241, "step": 15842 }, { "epoch": 0.773583984375, "grad_norm": 0.20127956569194794, "learning_rate": 0.00010612312640034294, "loss": 1.7498, "step": 15843 }, { "epoch": 0.7736328125, "grad_norm": 0.18867434561252594, "learning_rate": 0.00010609998223736539, "loss": 1.7632, "step": 15844 }, { "epoch": 0.773681640625, "grad_norm": 0.20475615561008453, "learning_rate": 0.00010607684216781532, "loss": 1.7489, "step": 15845 }, { "epoch": 0.77373046875, "grad_norm": 0.18324552476406097, "learning_rate": 0.00010605370619225384, "loss": 1.7456, "step": 15846 }, { "epoch": 0.773779296875, "grad_norm": 0.22970634698867798, "learning_rate": 0.00010603057431124139, "loss": 1.7597, "step": 15847 }, { "epoch": 0.773828125, "grad_norm": 0.17632371187210083, "learning_rate": 0.0001060074465253388, "loss": 1.7495, "step": 15848 }, { "epoch": 0.773876953125, "grad_norm": 0.23718929290771484, "learning_rate": 0.0001059843228351065, "loss": 1.7611, "step": 15849 }, { "epoch": 0.77392578125, "grad_norm": 0.17690254747867584, "learning_rate": 0.0001059612032411049, "loss": 1.7495, "step": 15850 }, { "epoch": 0.773974609375, "grad_norm": 0.20254188776016235, "learning_rate": 0.00010593808774389435, "loss": 1.7432, "step": 15851 }, { "epoch": 0.7740234375, "grad_norm": 0.21369148790836334, "learning_rate": 0.0001059149763440351, "loss": 1.738, "step": 15852 }, { "epoch": 0.774072265625, "grad_norm": 0.17008987069129944, "learning_rate": 0.0001058918690420872, "loss": 1.7248, "step": 15853 }, { "epoch": 0.77412109375, "grad_norm": 0.253293514251709, "learning_rate": 0.00010586876583861073, "loss": 1.7615, "step": 15854 }, { "epoch": 0.774169921875, "grad_norm": 0.18352390825748444, "learning_rate": 0.00010584566673416557, "loss": 1.7453, "step": 15855 }, { "epoch": 0.77421875, "grad_norm": 0.21672599017620087, "learning_rate": 0.00010582257172931159, "loss": 1.7607, "step": 15856 }, { "epoch": 0.774267578125, "grad_norm": 0.18466457724571228, "learning_rate": 0.00010579948082460853, "loss": 1.7354, "step": 15857 }, { "epoch": 0.77431640625, "grad_norm": 0.2092222422361374, "learning_rate": 0.000105776394020616, "loss": 1.7204, "step": 15858 }, { "epoch": 0.774365234375, "grad_norm": 0.19306506216526031, "learning_rate": 0.00010575331131789341, "loss": 1.7127, "step": 15859 }, { "epoch": 0.7744140625, "grad_norm": 0.21556349098682404, "learning_rate": 0.00010573023271700043, "loss": 1.724, "step": 15860 }, { "epoch": 0.774462890625, "grad_norm": 0.2115621268749237, "learning_rate": 0.00010570715821849613, "loss": 1.751, "step": 15861 }, { "epoch": 0.77451171875, "grad_norm": 0.19125130772590637, "learning_rate": 0.00010568408782293996, "loss": 1.745, "step": 15862 }, { "epoch": 0.774560546875, "grad_norm": 0.20459450781345367, "learning_rate": 0.00010566102153089082, "loss": 1.7597, "step": 15863 }, { "epoch": 0.774609375, "grad_norm": 0.19888152182102203, "learning_rate": 0.000105637959342908, "loss": 1.724, "step": 15864 }, { "epoch": 0.774658203125, "grad_norm": 0.20113377273082733, "learning_rate": 0.00010561490125955026, "loss": 1.7628, "step": 15865 }, { "epoch": 0.77470703125, "grad_norm": 0.2121063470840454, "learning_rate": 0.00010559184728137645, "loss": 1.7403, "step": 15866 }, { "epoch": 0.774755859375, "grad_norm": 0.2018764317035675, "learning_rate": 0.00010556879740894531, "loss": 1.7281, "step": 15867 }, { "epoch": 0.7748046875, "grad_norm": 0.2143777757883072, "learning_rate": 0.00010554575164281551, "loss": 1.7316, "step": 15868 }, { "epoch": 0.774853515625, "grad_norm": 0.20607583224773407, "learning_rate": 0.00010552270998354554, "loss": 1.7385, "step": 15869 }, { "epoch": 0.77490234375, "grad_norm": 0.19598083198070526, "learning_rate": 0.00010549967243169386, "loss": 1.7113, "step": 15870 }, { "epoch": 0.774951171875, "grad_norm": 0.20919714868068695, "learning_rate": 0.00010547663898781878, "loss": 1.7392, "step": 15871 }, { "epoch": 0.775, "grad_norm": 0.2076198160648346, "learning_rate": 0.00010545360965247857, "loss": 1.7342, "step": 15872 }, { "epoch": 0.775048828125, "grad_norm": 0.2274690419435501, "learning_rate": 0.00010543058442623134, "loss": 1.7402, "step": 15873 }, { "epoch": 0.77509765625, "grad_norm": 0.20416854321956635, "learning_rate": 0.00010540756330963516, "loss": 1.7495, "step": 15874 }, { "epoch": 0.775146484375, "grad_norm": 0.23328621685504913, "learning_rate": 0.00010538454630324784, "loss": 1.7286, "step": 15875 }, { "epoch": 0.7751953125, "grad_norm": 0.19772469997406006, "learning_rate": 0.00010536153340762742, "loss": 1.7699, "step": 15876 }, { "epoch": 0.775244140625, "grad_norm": 0.22387313842773438, "learning_rate": 0.0001053385246233314, "loss": 1.7338, "step": 15877 }, { "epoch": 0.77529296875, "grad_norm": 0.20522397756576538, "learning_rate": 0.00010531551995091768, "loss": 1.7562, "step": 15878 }, { "epoch": 0.775341796875, "grad_norm": 0.2068837732076645, "learning_rate": 0.0001052925193909435, "loss": 1.7452, "step": 15879 }, { "epoch": 0.775390625, "grad_norm": 0.19844527542591095, "learning_rate": 0.00010526952294396661, "loss": 1.7368, "step": 15880 }, { "epoch": 0.775439453125, "grad_norm": 0.22532621026039124, "learning_rate": 0.00010524653061054403, "loss": 1.7319, "step": 15881 }, { "epoch": 0.77548828125, "grad_norm": 0.1992526352405548, "learning_rate": 0.00010522354239123333, "loss": 1.7333, "step": 15882 }, { "epoch": 0.775537109375, "grad_norm": 0.23010502755641937, "learning_rate": 0.00010520055828659138, "loss": 1.7449, "step": 15883 }, { "epoch": 0.7755859375, "grad_norm": 0.18513402342796326, "learning_rate": 0.00010517757829717531, "loss": 1.7207, "step": 15884 }, { "epoch": 0.775634765625, "grad_norm": 0.20192593336105347, "learning_rate": 0.00010515460242354211, "loss": 1.7486, "step": 15885 }, { "epoch": 0.77568359375, "grad_norm": 0.1809847503900528, "learning_rate": 0.00010513163066624852, "loss": 1.7503, "step": 15886 }, { "epoch": 0.775732421875, "grad_norm": 0.21854671835899353, "learning_rate": 0.00010510866302585137, "loss": 1.7837, "step": 15887 }, { "epoch": 0.77578125, "grad_norm": 0.18200691044330597, "learning_rate": 0.00010508569950290725, "loss": 1.7195, "step": 15888 }, { "epoch": 0.775830078125, "grad_norm": 0.19301298260688782, "learning_rate": 0.0001050627400979727, "loss": 1.7195, "step": 15889 }, { "epoch": 0.77587890625, "grad_norm": 0.19900964200496674, "learning_rate": 0.00010503978481160425, "loss": 1.7664, "step": 15890 }, { "epoch": 0.775927734375, "grad_norm": 0.2135148048400879, "learning_rate": 0.00010501683364435801, "loss": 1.7572, "step": 15891 }, { "epoch": 0.7759765625, "grad_norm": 0.19296588003635406, "learning_rate": 0.00010499388659679049, "loss": 1.7227, "step": 15892 }, { "epoch": 0.776025390625, "grad_norm": 0.20854543149471283, "learning_rate": 0.00010497094366945762, "loss": 1.7583, "step": 15893 }, { "epoch": 0.77607421875, "grad_norm": 0.2218504101037979, "learning_rate": 0.00010494800486291564, "loss": 1.7275, "step": 15894 }, { "epoch": 0.776123046875, "grad_norm": 0.2105279266834259, "learning_rate": 0.00010492507017772026, "loss": 1.7259, "step": 15895 }, { "epoch": 0.776171875, "grad_norm": 0.2001587152481079, "learning_rate": 0.00010490213961442757, "loss": 1.7445, "step": 15896 }, { "epoch": 0.776220703125, "grad_norm": 0.18684563040733337, "learning_rate": 0.00010487921317359303, "loss": 1.7153, "step": 15897 }, { "epoch": 0.77626953125, "grad_norm": 0.2365417182445526, "learning_rate": 0.00010485629085577259, "loss": 1.7421, "step": 15898 }, { "epoch": 0.776318359375, "grad_norm": 0.18771964311599731, "learning_rate": 0.00010483337266152155, "loss": 1.7109, "step": 15899 }, { "epoch": 0.7763671875, "grad_norm": 0.23378252983093262, "learning_rate": 0.00010481045859139543, "loss": 1.7331, "step": 15900 }, { "epoch": 0.776416015625, "grad_norm": 0.20996545255184174, "learning_rate": 0.00010478754864594958, "loss": 1.7404, "step": 15901 }, { "epoch": 0.77646484375, "grad_norm": 0.2315225452184677, "learning_rate": 0.00010476464282573921, "loss": 1.7627, "step": 15902 }, { "epoch": 0.776513671875, "grad_norm": 0.2417890429496765, "learning_rate": 0.00010474174113131956, "loss": 1.7756, "step": 15903 }, { "epoch": 0.7765625, "grad_norm": 0.22578351199626923, "learning_rate": 0.00010471884356324554, "loss": 1.7391, "step": 15904 }, { "epoch": 0.776611328125, "grad_norm": 0.24627068638801575, "learning_rate": 0.00010469595012207215, "loss": 1.7287, "step": 15905 }, { "epoch": 0.77666015625, "grad_norm": 0.21064819395542145, "learning_rate": 0.00010467306080835425, "loss": 1.7354, "step": 15906 }, { "epoch": 0.776708984375, "grad_norm": 0.21475905179977417, "learning_rate": 0.00010465017562264653, "loss": 1.7625, "step": 15907 }, { "epoch": 0.7767578125, "grad_norm": 0.2097315788269043, "learning_rate": 0.00010462729456550378, "loss": 1.7252, "step": 15908 }, { "epoch": 0.776806640625, "grad_norm": 0.21544136106967926, "learning_rate": 0.00010460441763748024, "loss": 1.7391, "step": 15909 }, { "epoch": 0.77685546875, "grad_norm": 0.2136451005935669, "learning_rate": 0.00010458154483913071, "loss": 1.7542, "step": 15910 }, { "epoch": 0.776904296875, "grad_norm": 0.2060265988111496, "learning_rate": 0.00010455867617100917, "loss": 1.732, "step": 15911 }, { "epoch": 0.776953125, "grad_norm": 0.2733600437641144, "learning_rate": 0.00010453581163367021, "loss": 1.7416, "step": 15912 }, { "epoch": 0.777001953125, "grad_norm": 0.20328488945960999, "learning_rate": 0.00010451295122766767, "loss": 1.713, "step": 15913 }, { "epoch": 0.77705078125, "grad_norm": 0.26709744334220886, "learning_rate": 0.00010449009495355585, "loss": 1.7458, "step": 15914 }, { "epoch": 0.777099609375, "grad_norm": 0.24243876338005066, "learning_rate": 0.00010446724281188844, "loss": 1.7168, "step": 15915 }, { "epoch": 0.7771484375, "grad_norm": 0.2377704232931137, "learning_rate": 0.00010444439480321953, "loss": 1.7378, "step": 15916 }, { "epoch": 0.777197265625, "grad_norm": 0.2620834410190582, "learning_rate": 0.00010442155092810268, "loss": 1.7622, "step": 15917 }, { "epoch": 0.77724609375, "grad_norm": 0.21761111915111542, "learning_rate": 0.00010439871118709156, "loss": 1.7157, "step": 15918 }, { "epoch": 0.777294921875, "grad_norm": 0.2337900847196579, "learning_rate": 0.00010437587558073974, "loss": 1.7356, "step": 15919 }, { "epoch": 0.77734375, "grad_norm": 0.24002987146377563, "learning_rate": 0.00010435304410960069, "loss": 1.7447, "step": 15920 }, { "epoch": 0.777392578125, "grad_norm": 0.21364808082580566, "learning_rate": 0.00010433021677422769, "loss": 1.7354, "step": 15921 }, { "epoch": 0.77744140625, "grad_norm": 0.23055478930473328, "learning_rate": 0.00010430739357517399, "loss": 1.738, "step": 15922 }, { "epoch": 0.777490234375, "grad_norm": 0.22154688835144043, "learning_rate": 0.00010428457451299275, "loss": 1.7173, "step": 15923 }, { "epoch": 0.7775390625, "grad_norm": 0.22170941531658173, "learning_rate": 0.00010426175958823696, "loss": 1.7491, "step": 15924 }, { "epoch": 0.777587890625, "grad_norm": 0.22967839241027832, "learning_rate": 0.00010423894880145967, "loss": 1.7598, "step": 15925 }, { "epoch": 0.77763671875, "grad_norm": 0.23327936232089996, "learning_rate": 0.00010421614215321365, "loss": 1.7393, "step": 15926 }, { "epoch": 0.777685546875, "grad_norm": 0.22441579401493073, "learning_rate": 0.00010419333964405153, "loss": 1.7371, "step": 15927 }, { "epoch": 0.777734375, "grad_norm": 0.21520014107227325, "learning_rate": 0.00010417054127452616, "loss": 1.7135, "step": 15928 }, { "epoch": 0.777783203125, "grad_norm": 0.2596590220928192, "learning_rate": 0.00010414774704518986, "loss": 1.7217, "step": 15929 }, { "epoch": 0.77783203125, "grad_norm": 0.2020244002342224, "learning_rate": 0.00010412495695659529, "loss": 1.7163, "step": 15930 }, { "epoch": 0.777880859375, "grad_norm": 0.21672315895557404, "learning_rate": 0.00010410217100929453, "loss": 1.7375, "step": 15931 }, { "epoch": 0.7779296875, "grad_norm": 0.2069716453552246, "learning_rate": 0.00010407938920384009, "loss": 1.7485, "step": 15932 }, { "epoch": 0.777978515625, "grad_norm": 0.20502439141273499, "learning_rate": 0.00010405661154078388, "loss": 1.7312, "step": 15933 }, { "epoch": 0.77802734375, "grad_norm": 0.22947119176387787, "learning_rate": 0.00010403383802067805, "loss": 1.7493, "step": 15934 }, { "epoch": 0.778076171875, "grad_norm": 0.19660289585590363, "learning_rate": 0.0001040110686440745, "loss": 1.7379, "step": 15935 }, { "epoch": 0.778125, "grad_norm": 0.22200219333171844, "learning_rate": 0.00010398830341152507, "loss": 1.7329, "step": 15936 }, { "epoch": 0.778173828125, "grad_norm": 0.19949598610401154, "learning_rate": 0.00010396554232358152, "loss": 1.7416, "step": 15937 }, { "epoch": 0.77822265625, "grad_norm": 0.2271718978881836, "learning_rate": 0.00010394278538079542, "loss": 1.7705, "step": 15938 }, { "epoch": 0.778271484375, "grad_norm": 0.2472742199897766, "learning_rate": 0.00010392003258371838, "loss": 1.7601, "step": 15939 }, { "epoch": 0.7783203125, "grad_norm": 0.2006935477256775, "learning_rate": 0.00010389728393290177, "loss": 1.7362, "step": 15940 }, { "epoch": 0.778369140625, "grad_norm": 0.23218001425266266, "learning_rate": 0.00010387453942889697, "loss": 1.7348, "step": 15941 }, { "epoch": 0.77841796875, "grad_norm": 0.22226794064044952, "learning_rate": 0.00010385179907225517, "loss": 1.7235, "step": 15942 }, { "epoch": 0.778466796875, "grad_norm": 0.22320644557476044, "learning_rate": 0.00010382906286352752, "loss": 1.736, "step": 15943 }, { "epoch": 0.778515625, "grad_norm": 0.23078039288520813, "learning_rate": 0.00010380633080326505, "loss": 1.7213, "step": 15944 }, { "epoch": 0.778564453125, "grad_norm": 0.204011470079422, "learning_rate": 0.00010378360289201869, "loss": 1.7328, "step": 15945 }, { "epoch": 0.77861328125, "grad_norm": 0.22720107436180115, "learning_rate": 0.00010376087913033932, "loss": 1.7379, "step": 15946 }, { "epoch": 0.778662109375, "grad_norm": 0.22467464208602905, "learning_rate": 0.00010373815951877749, "loss": 1.7427, "step": 15947 }, { "epoch": 0.7787109375, "grad_norm": 0.23855896294116974, "learning_rate": 0.00010371544405788411, "loss": 1.7232, "step": 15948 }, { "epoch": 0.778759765625, "grad_norm": 0.2010868936777115, "learning_rate": 0.00010369273274820948, "loss": 1.7236, "step": 15949 }, { "epoch": 0.77880859375, "grad_norm": 0.22137348353862762, "learning_rate": 0.00010367002559030406, "loss": 1.7296, "step": 15950 }, { "epoch": 0.778857421875, "grad_norm": 0.20827758312225342, "learning_rate": 0.00010364732258471826, "loss": 1.7272, "step": 15951 }, { "epoch": 0.77890625, "grad_norm": 0.20608367025852203, "learning_rate": 0.00010362462373200224, "loss": 1.7655, "step": 15952 }, { "epoch": 0.778955078125, "grad_norm": 0.2351643443107605, "learning_rate": 0.00010360192903270615, "loss": 1.7381, "step": 15953 }, { "epoch": 0.77900390625, "grad_norm": 0.19117626547813416, "learning_rate": 0.00010357923848738, "loss": 1.7094, "step": 15954 }, { "epoch": 0.779052734375, "grad_norm": 0.23230157792568207, "learning_rate": 0.00010355655209657372, "loss": 1.7578, "step": 15955 }, { "epoch": 0.7791015625, "grad_norm": 0.19555072486400604, "learning_rate": 0.00010353386986083716, "loss": 1.7591, "step": 15956 }, { "epoch": 0.779150390625, "grad_norm": 0.2071177065372467, "learning_rate": 0.00010351119178072, "loss": 1.7329, "step": 15957 }, { "epoch": 0.77919921875, "grad_norm": 0.2116207629442215, "learning_rate": 0.00010348851785677188, "loss": 1.7393, "step": 15958 }, { "epoch": 0.779248046875, "grad_norm": 0.22254174947738647, "learning_rate": 0.00010346584808954235, "loss": 1.7202, "step": 15959 }, { "epoch": 0.779296875, "grad_norm": 0.24686919152736664, "learning_rate": 0.00010344318247958078, "loss": 1.7493, "step": 15960 }, { "epoch": 0.779345703125, "grad_norm": 0.2169325351715088, "learning_rate": 0.00010342052102743648, "loss": 1.7665, "step": 15961 }, { "epoch": 0.77939453125, "grad_norm": 0.23413068056106567, "learning_rate": 0.00010339786373365876, "loss": 1.7239, "step": 15962 }, { "epoch": 0.779443359375, "grad_norm": 0.18966948986053467, "learning_rate": 0.00010337521059879661, "loss": 1.7136, "step": 15963 }, { "epoch": 0.7794921875, "grad_norm": 0.21804772317409515, "learning_rate": 0.00010335256162339915, "loss": 1.744, "step": 15964 }, { "epoch": 0.779541015625, "grad_norm": 0.18890923261642456, "learning_rate": 0.00010332991680801526, "loss": 1.7326, "step": 15965 }, { "epoch": 0.77958984375, "grad_norm": 0.2213725745677948, "learning_rate": 0.0001033072761531938, "loss": 1.7136, "step": 15966 }, { "epoch": 0.779638671875, "grad_norm": 0.20430110394954681, "learning_rate": 0.0001032846396594834, "loss": 1.7637, "step": 15967 }, { "epoch": 0.7796875, "grad_norm": 0.22054003179073334, "learning_rate": 0.00010326200732743272, "loss": 1.7594, "step": 15968 }, { "epoch": 0.779736328125, "grad_norm": 0.2044108808040619, "learning_rate": 0.00010323937915759022, "loss": 1.7691, "step": 15969 }, { "epoch": 0.77978515625, "grad_norm": 0.2260400503873825, "learning_rate": 0.00010321675515050438, "loss": 1.7495, "step": 15970 }, { "epoch": 0.779833984375, "grad_norm": 0.1989382654428482, "learning_rate": 0.0001031941353067235, "loss": 1.7597, "step": 15971 }, { "epoch": 0.7798828125, "grad_norm": 0.20722906291484833, "learning_rate": 0.00010317151962679575, "loss": 1.71, "step": 15972 }, { "epoch": 0.779931640625, "grad_norm": 0.23206965625286102, "learning_rate": 0.0001031489081112693, "loss": 1.7505, "step": 15973 }, { "epoch": 0.77998046875, "grad_norm": 0.20972256362438202, "learning_rate": 0.0001031263007606921, "loss": 1.707, "step": 15974 }, { "epoch": 0.780029296875, "grad_norm": 0.23905150592327118, "learning_rate": 0.0001031036975756121, "loss": 1.7201, "step": 15975 }, { "epoch": 0.780078125, "grad_norm": 0.21018405258655548, "learning_rate": 0.00010308109855657707, "loss": 1.7349, "step": 15976 }, { "epoch": 0.780126953125, "grad_norm": 0.2101045399904251, "learning_rate": 0.00010305850370413477, "loss": 1.7471, "step": 15977 }, { "epoch": 0.78017578125, "grad_norm": 0.21280086040496826, "learning_rate": 0.00010303591301883274, "loss": 1.7563, "step": 15978 }, { "epoch": 0.780224609375, "grad_norm": 0.1880335658788681, "learning_rate": 0.00010301332650121853, "loss": 1.7333, "step": 15979 }, { "epoch": 0.7802734375, "grad_norm": 0.2304251492023468, "learning_rate": 0.00010299074415183952, "loss": 1.7422, "step": 15980 }, { "epoch": 0.780322265625, "grad_norm": 0.19268439710140228, "learning_rate": 0.00010296816597124301, "loss": 1.7547, "step": 15981 }, { "epoch": 0.78037109375, "grad_norm": 0.24133850634098053, "learning_rate": 0.0001029455919599762, "loss": 1.7283, "step": 15982 }, { "epoch": 0.780419921875, "grad_norm": 0.21347711980342865, "learning_rate": 0.00010292302211858629, "loss": 1.7283, "step": 15983 }, { "epoch": 0.78046875, "grad_norm": 0.20028868317604065, "learning_rate": 0.00010290045644762002, "loss": 1.7399, "step": 15984 }, { "epoch": 0.780517578125, "grad_norm": 0.2464362233877182, "learning_rate": 0.00010287789494762459, "loss": 1.7421, "step": 15985 }, { "epoch": 0.78056640625, "grad_norm": 0.17130929231643677, "learning_rate": 0.00010285533761914659, "loss": 1.7488, "step": 15986 }, { "epoch": 0.780615234375, "grad_norm": 0.22223228216171265, "learning_rate": 0.00010283278446273279, "loss": 1.7159, "step": 15987 }, { "epoch": 0.7806640625, "grad_norm": 0.17232754826545715, "learning_rate": 0.00010281023547892979, "loss": 1.7142, "step": 15988 }, { "epoch": 0.780712890625, "grad_norm": 0.2247391641139984, "learning_rate": 0.00010278769066828404, "loss": 1.7221, "step": 15989 }, { "epoch": 0.78076171875, "grad_norm": 0.19195009768009186, "learning_rate": 0.00010276515003134199, "loss": 1.7352, "step": 15990 }, { "epoch": 0.780810546875, "grad_norm": 0.21235400438308716, "learning_rate": 0.0001027426135686499, "loss": 1.7059, "step": 15991 }, { "epoch": 0.780859375, "grad_norm": 0.2139524221420288, "learning_rate": 0.00010272008128075394, "loss": 1.7405, "step": 15992 }, { "epoch": 0.780908203125, "grad_norm": 0.20946182310581207, "learning_rate": 0.0001026975531682002, "loss": 1.7429, "step": 15993 }, { "epoch": 0.78095703125, "grad_norm": 0.23407313227653503, "learning_rate": 0.00010267502923153474, "loss": 1.7251, "step": 15994 }, { "epoch": 0.781005859375, "grad_norm": 0.21104392409324646, "learning_rate": 0.00010265250947130334, "loss": 1.7362, "step": 15995 }, { "epoch": 0.7810546875, "grad_norm": 0.23869125545024872, "learning_rate": 0.00010262999388805188, "loss": 1.7768, "step": 15996 }, { "epoch": 0.781103515625, "grad_norm": 0.23039160668849945, "learning_rate": 0.00010260748248232596, "loss": 1.7376, "step": 15997 }, { "epoch": 0.78115234375, "grad_norm": 0.21326681971549988, "learning_rate": 0.00010258497525467123, "loss": 1.7239, "step": 15998 }, { "epoch": 0.781201171875, "grad_norm": 0.28352996706962585, "learning_rate": 0.00010256247220563311, "loss": 1.7481, "step": 15999 }, { "epoch": 0.78125, "grad_norm": 0.21020503342151642, "learning_rate": 0.00010253997333575705, "loss": 1.7186, "step": 16000 }, { "epoch": 0.781298828125, "grad_norm": 0.24262648820877075, "learning_rate": 0.00010251747864558833, "loss": 1.7306, "step": 16001 }, { "epoch": 0.78134765625, "grad_norm": 0.25145113468170166, "learning_rate": 0.00010249498813567194, "loss": 1.7449, "step": 16002 }, { "epoch": 0.781396484375, "grad_norm": 0.1966390758752823, "learning_rate": 0.00010247250180655324, "loss": 1.7717, "step": 16003 }, { "epoch": 0.7814453125, "grad_norm": 0.2738315761089325, "learning_rate": 0.00010245001965877698, "loss": 1.7273, "step": 16004 }, { "epoch": 0.781494140625, "grad_norm": 0.19559872150421143, "learning_rate": 0.0001024275416928882, "loss": 1.7281, "step": 16005 }, { "epoch": 0.78154296875, "grad_norm": 0.23082588613033295, "learning_rate": 0.00010240506790943154, "loss": 1.7146, "step": 16006 }, { "epoch": 0.781591796875, "grad_norm": 0.22252823412418365, "learning_rate": 0.00010238259830895172, "loss": 1.7232, "step": 16007 }, { "epoch": 0.781640625, "grad_norm": 0.22889360785484314, "learning_rate": 0.00010236013289199331, "loss": 1.7576, "step": 16008 }, { "epoch": 0.781689453125, "grad_norm": 0.20886924862861633, "learning_rate": 0.0001023376716591008, "loss": 1.7249, "step": 16009 }, { "epoch": 0.78173828125, "grad_norm": 0.2338382750749588, "learning_rate": 0.0001023152146108185, "loss": 1.7139, "step": 16010 }, { "epoch": 0.781787109375, "grad_norm": 0.17519409954547882, "learning_rate": 0.0001022927617476907, "loss": 1.734, "step": 16011 }, { "epoch": 0.7818359375, "grad_norm": 0.22902940213680267, "learning_rate": 0.0001022703130702616, "loss": 1.7165, "step": 16012 }, { "epoch": 0.781884765625, "grad_norm": 0.18589691817760468, "learning_rate": 0.00010224786857907523, "loss": 1.7547, "step": 16013 }, { "epoch": 0.78193359375, "grad_norm": 0.21667569875717163, "learning_rate": 0.00010222542827467555, "loss": 1.7369, "step": 16014 }, { "epoch": 0.781982421875, "grad_norm": 0.22409026324748993, "learning_rate": 0.00010220299215760642, "loss": 1.7292, "step": 16015 }, { "epoch": 0.78203125, "grad_norm": 0.19279663264751434, "learning_rate": 0.00010218056022841157, "loss": 1.7314, "step": 16016 }, { "epoch": 0.782080078125, "grad_norm": 0.2053004801273346, "learning_rate": 0.00010215813248763478, "loss": 1.739, "step": 16017 }, { "epoch": 0.78212890625, "grad_norm": 0.22221355140209198, "learning_rate": 0.00010213570893581937, "loss": 1.7415, "step": 16018 }, { "epoch": 0.782177734375, "grad_norm": 0.18449902534484863, "learning_rate": 0.00010211328957350907, "loss": 1.7292, "step": 16019 }, { "epoch": 0.7822265625, "grad_norm": 0.2276257425546646, "learning_rate": 0.00010209087440124697, "loss": 1.7521, "step": 16020 }, { "epoch": 0.782275390625, "grad_norm": 0.16600067913532257, "learning_rate": 0.00010206846341957654, "loss": 1.7374, "step": 16021 }, { "epoch": 0.78232421875, "grad_norm": 0.2268042117357254, "learning_rate": 0.00010204605662904074, "loss": 1.7488, "step": 16022 }, { "epoch": 0.782373046875, "grad_norm": 0.18672236800193787, "learning_rate": 0.00010202365403018279, "loss": 1.746, "step": 16023 }, { "epoch": 0.782421875, "grad_norm": 0.21273848414421082, "learning_rate": 0.00010200125562354546, "loss": 1.7298, "step": 16024 }, { "epoch": 0.782470703125, "grad_norm": 0.20997712016105652, "learning_rate": 0.00010197886140967182, "loss": 1.7684, "step": 16025 }, { "epoch": 0.78251953125, "grad_norm": 0.22148174047470093, "learning_rate": 0.00010195647138910438, "loss": 1.7344, "step": 16026 }, { "epoch": 0.782568359375, "grad_norm": 0.20844464004039764, "learning_rate": 0.00010193408556238592, "loss": 1.7287, "step": 16027 }, { "epoch": 0.7826171875, "grad_norm": 0.22887514531612396, "learning_rate": 0.0001019117039300589, "loss": 1.7101, "step": 16028 }, { "epoch": 0.782666015625, "grad_norm": 0.20246219635009766, "learning_rate": 0.0001018893264926658, "loss": 1.7329, "step": 16029 }, { "epoch": 0.78271484375, "grad_norm": 0.18939699232578278, "learning_rate": 0.00010186695325074894, "loss": 1.7347, "step": 16030 }, { "epoch": 0.782763671875, "grad_norm": 0.20527099072933197, "learning_rate": 0.0001018445842048506, "loss": 1.753, "step": 16031 }, { "epoch": 0.7828125, "grad_norm": 0.2091841995716095, "learning_rate": 0.00010182221935551284, "loss": 1.7323, "step": 16032 }, { "epoch": 0.782861328125, "grad_norm": 0.18467245995998383, "learning_rate": 0.00010179985870327779, "loss": 1.7599, "step": 16033 }, { "epoch": 0.78291015625, "grad_norm": 0.22692027688026428, "learning_rate": 0.0001017775022486872, "loss": 1.7423, "step": 16034 }, { "epoch": 0.782958984375, "grad_norm": 0.18769772350788116, "learning_rate": 0.00010175514999228314, "loss": 1.7592, "step": 16035 }, { "epoch": 0.7830078125, "grad_norm": 0.18684391677379608, "learning_rate": 0.00010173280193460704, "loss": 1.7149, "step": 16036 }, { "epoch": 0.783056640625, "grad_norm": 0.17755264043807983, "learning_rate": 0.00010171045807620083, "loss": 1.7185, "step": 16037 }, { "epoch": 0.78310546875, "grad_norm": 0.1859411597251892, "learning_rate": 0.00010168811841760576, "loss": 1.7201, "step": 16038 }, { "epoch": 0.783154296875, "grad_norm": 0.20669056475162506, "learning_rate": 0.00010166578295936355, "loss": 1.7276, "step": 16039 }, { "epoch": 0.783203125, "grad_norm": 0.19172221422195435, "learning_rate": 0.00010164345170201516, "loss": 1.7493, "step": 16040 }, { "epoch": 0.783251953125, "grad_norm": 0.2231704443693161, "learning_rate": 0.00010162112464610215, "loss": 1.7279, "step": 16041 }, { "epoch": 0.78330078125, "grad_norm": 0.22399011254310608, "learning_rate": 0.00010159880179216539, "loss": 1.7297, "step": 16042 }, { "epoch": 0.783349609375, "grad_norm": 0.2053135633468628, "learning_rate": 0.00010157648314074596, "loss": 1.7438, "step": 16043 }, { "epoch": 0.7833984375, "grad_norm": 0.1945188194513321, "learning_rate": 0.00010155416869238482, "loss": 1.7513, "step": 16044 }, { "epoch": 0.783447265625, "grad_norm": 0.1979161500930786, "learning_rate": 0.00010153185844762275, "loss": 1.723, "step": 16045 }, { "epoch": 0.78349609375, "grad_norm": 0.19669345021247864, "learning_rate": 0.00010150955240700047, "loss": 1.7613, "step": 16046 }, { "epoch": 0.783544921875, "grad_norm": 0.22319000959396362, "learning_rate": 0.00010148725057105853, "loss": 1.7676, "step": 16047 }, { "epoch": 0.78359375, "grad_norm": 0.1554364562034607, "learning_rate": 0.0001014649529403375, "loss": 1.7311, "step": 16048 }, { "epoch": 0.783642578125, "grad_norm": 0.21202310919761658, "learning_rate": 0.00010144265951537772, "loss": 1.7109, "step": 16049 }, { "epoch": 0.78369140625, "grad_norm": 0.20248958468437195, "learning_rate": 0.00010142037029671954, "loss": 1.7361, "step": 16050 }, { "epoch": 0.783740234375, "grad_norm": 0.2572457790374756, "learning_rate": 0.00010139808528490321, "loss": 1.74, "step": 16051 }, { "epoch": 0.7837890625, "grad_norm": 0.20714905858039856, "learning_rate": 0.00010137580448046861, "loss": 1.7229, "step": 16052 }, { "epoch": 0.783837890625, "grad_norm": 0.22462815046310425, "learning_rate": 0.00010135352788395599, "loss": 1.7464, "step": 16053 }, { "epoch": 0.78388671875, "grad_norm": 0.21205157041549683, "learning_rate": 0.00010133125549590504, "loss": 1.7325, "step": 16054 }, { "epoch": 0.783935546875, "grad_norm": 0.2434646338224411, "learning_rate": 0.00010130898731685575, "loss": 1.7506, "step": 16055 }, { "epoch": 0.783984375, "grad_norm": 0.19961783289909363, "learning_rate": 0.00010128672334734756, "loss": 1.7475, "step": 16056 }, { "epoch": 0.784033203125, "grad_norm": 0.2621971070766449, "learning_rate": 0.00010126446358792033, "loss": 1.7488, "step": 16057 }, { "epoch": 0.78408203125, "grad_norm": 0.22922255098819733, "learning_rate": 0.00010124220803911326, "loss": 1.7419, "step": 16058 }, { "epoch": 0.784130859375, "grad_norm": 0.19842609763145447, "learning_rate": 0.000101219956701466, "loss": 1.7627, "step": 16059 }, { "epoch": 0.7841796875, "grad_norm": 0.20999735593795776, "learning_rate": 0.00010119770957551765, "loss": 1.7383, "step": 16060 }, { "epoch": 0.784228515625, "grad_norm": 0.20633333921432495, "learning_rate": 0.00010117546666180744, "loss": 1.7228, "step": 16061 }, { "epoch": 0.78427734375, "grad_norm": 0.1775844544172287, "learning_rate": 0.00010115322796087442, "loss": 1.7351, "step": 16062 }, { "epoch": 0.784326171875, "grad_norm": 0.2187204509973526, "learning_rate": 0.0001011309934732576, "loss": 1.7498, "step": 16063 }, { "epoch": 0.784375, "grad_norm": 0.17562882602214813, "learning_rate": 0.00010110876319949584, "loss": 1.7307, "step": 16064 }, { "epoch": 0.784423828125, "grad_norm": 0.22398951649665833, "learning_rate": 0.00010108653714012791, "loss": 1.7593, "step": 16065 }, { "epoch": 0.78447265625, "grad_norm": 0.18393175303936005, "learning_rate": 0.00010106431529569249, "loss": 1.7115, "step": 16066 }, { "epoch": 0.784521484375, "grad_norm": 0.21201741695404053, "learning_rate": 0.00010104209766672817, "loss": 1.7421, "step": 16067 }, { "epoch": 0.7845703125, "grad_norm": 0.1902483105659485, "learning_rate": 0.00010101988425377326, "loss": 1.7485, "step": 16068 }, { "epoch": 0.784619140625, "grad_norm": 0.2156866043806076, "learning_rate": 0.00010099767505736634, "loss": 1.7374, "step": 16069 }, { "epoch": 0.78466796875, "grad_norm": 0.21902914345264435, "learning_rate": 0.00010097547007804545, "loss": 1.7327, "step": 16070 }, { "epoch": 0.784716796875, "grad_norm": 0.21505044400691986, "learning_rate": 0.00010095326931634895, "loss": 1.7383, "step": 16071 }, { "epoch": 0.784765625, "grad_norm": 0.22101901471614838, "learning_rate": 0.00010093107277281468, "loss": 1.7394, "step": 16072 }, { "epoch": 0.784814453125, "grad_norm": 0.2143956571817398, "learning_rate": 0.00010090888044798083, "loss": 1.7403, "step": 16073 }, { "epoch": 0.78486328125, "grad_norm": 0.22838616371154785, "learning_rate": 0.00010088669234238501, "loss": 1.7176, "step": 16074 }, { "epoch": 0.784912109375, "grad_norm": 0.1980375498533249, "learning_rate": 0.0001008645084565652, "loss": 1.7142, "step": 16075 }, { "epoch": 0.7849609375, "grad_norm": 0.20489414036273956, "learning_rate": 0.00010084232879105887, "loss": 1.7015, "step": 16076 }, { "epoch": 0.785009765625, "grad_norm": 0.1806100606918335, "learning_rate": 0.0001008201533464036, "loss": 1.7375, "step": 16077 }, { "epoch": 0.78505859375, "grad_norm": 0.17953144013881683, "learning_rate": 0.00010079798212313688, "loss": 1.7541, "step": 16078 }, { "epoch": 0.785107421875, "grad_norm": 0.21506048738956451, "learning_rate": 0.000100775815121796, "loss": 1.7291, "step": 16079 }, { "epoch": 0.78515625, "grad_norm": 0.17767265439033508, "learning_rate": 0.0001007536523429182, "loss": 1.7436, "step": 16080 }, { "epoch": 0.785205078125, "grad_norm": 0.21037092804908752, "learning_rate": 0.00010073149378704066, "loss": 1.7473, "step": 16081 }, { "epoch": 0.78525390625, "grad_norm": 0.18252184987068176, "learning_rate": 0.00010070933945470031, "loss": 1.7639, "step": 16082 }, { "epoch": 0.785302734375, "grad_norm": 0.22379718720912933, "learning_rate": 0.0001006871893464342, "loss": 1.7256, "step": 16083 }, { "epoch": 0.7853515625, "grad_norm": 0.22675684094429016, "learning_rate": 0.0001006650434627791, "loss": 1.7295, "step": 16084 }, { "epoch": 0.785400390625, "grad_norm": 0.19532155990600586, "learning_rate": 0.00010064290180427174, "loss": 1.7115, "step": 16085 }, { "epoch": 0.78544921875, "grad_norm": 0.2616465091705322, "learning_rate": 0.00010062076437144866, "loss": 1.7579, "step": 16086 }, { "epoch": 0.785498046875, "grad_norm": 0.20084704458713531, "learning_rate": 0.00010059863116484653, "loss": 1.7218, "step": 16087 }, { "epoch": 0.785546875, "grad_norm": 0.2324833869934082, "learning_rate": 0.00010057650218500158, "loss": 1.7314, "step": 16088 }, { "epoch": 0.785595703125, "grad_norm": 0.19856783747673035, "learning_rate": 0.00010055437743245038, "loss": 1.7551, "step": 16089 }, { "epoch": 0.78564453125, "grad_norm": 0.2505822777748108, "learning_rate": 0.00010053225690772882, "loss": 1.7272, "step": 16090 }, { "epoch": 0.785693359375, "grad_norm": 0.21730783581733704, "learning_rate": 0.00010051014061137335, "loss": 1.7281, "step": 16091 }, { "epoch": 0.7857421875, "grad_norm": 0.23619161546230316, "learning_rate": 0.00010048802854391973, "loss": 1.7342, "step": 16092 }, { "epoch": 0.785791015625, "grad_norm": 0.22960889339447021, "learning_rate": 0.00010046592070590393, "loss": 1.725, "step": 16093 }, { "epoch": 0.78583984375, "grad_norm": 0.23655280470848083, "learning_rate": 0.00010044381709786176, "loss": 1.7486, "step": 16094 }, { "epoch": 0.785888671875, "grad_norm": 0.22838793694972992, "learning_rate": 0.00010042171772032894, "loss": 1.7605, "step": 16095 }, { "epoch": 0.7859375, "grad_norm": 0.2500551640987396, "learning_rate": 0.00010039962257384103, "loss": 1.732, "step": 16096 }, { "epoch": 0.785986328125, "grad_norm": 0.20954878628253937, "learning_rate": 0.00010037753165893353, "loss": 1.7437, "step": 16097 }, { "epoch": 0.78603515625, "grad_norm": 0.21572257578372955, "learning_rate": 0.00010035544497614184, "loss": 1.7075, "step": 16098 }, { "epoch": 0.786083984375, "grad_norm": 0.23359419405460358, "learning_rate": 0.00010033336252600126, "loss": 1.7076, "step": 16099 }, { "epoch": 0.7861328125, "grad_norm": 0.22791531682014465, "learning_rate": 0.00010031128430904698, "loss": 1.7483, "step": 16100 }, { "epoch": 0.786181640625, "grad_norm": 0.24101226031780243, "learning_rate": 0.00010028921032581411, "loss": 1.7476, "step": 16101 }, { "epoch": 0.78623046875, "grad_norm": 0.20155014097690582, "learning_rate": 0.00010026714057683748, "loss": 1.7514, "step": 16102 }, { "epoch": 0.786279296875, "grad_norm": 0.23281289637088776, "learning_rate": 0.00010024507506265218, "loss": 1.7131, "step": 16103 }, { "epoch": 0.786328125, "grad_norm": 0.18025308847427368, "learning_rate": 0.00010022301378379278, "loss": 1.7147, "step": 16104 }, { "epoch": 0.786376953125, "grad_norm": 0.2231946438550949, "learning_rate": 0.00010020095674079416, "loss": 1.7655, "step": 16105 }, { "epoch": 0.78642578125, "grad_norm": 0.18648365139961243, "learning_rate": 0.00010017890393419067, "loss": 1.7335, "step": 16106 }, { "epoch": 0.786474609375, "grad_norm": 0.19931812584400177, "learning_rate": 0.00010015685536451702, "loss": 1.7254, "step": 16107 }, { "epoch": 0.7865234375, "grad_norm": 0.20645128190517426, "learning_rate": 0.00010013481103230729, "loss": 1.7312, "step": 16108 }, { "epoch": 0.786572265625, "grad_norm": 0.19528837502002716, "learning_rate": 0.00010011277093809607, "loss": 1.7176, "step": 16109 }, { "epoch": 0.78662109375, "grad_norm": 0.19257177412509918, "learning_rate": 0.00010009073508241728, "loss": 1.7126, "step": 16110 }, { "epoch": 0.786669921875, "grad_norm": 0.1938593089580536, "learning_rate": 0.00010006870346580504, "loss": 1.7497, "step": 16111 }, { "epoch": 0.78671875, "grad_norm": 0.1938421130180359, "learning_rate": 0.0001000466760887933, "loss": 1.7285, "step": 16112 }, { "epoch": 0.786767578125, "grad_norm": 0.1873917430639267, "learning_rate": 0.00010002465295191593, "loss": 1.7502, "step": 16113 }, { "epoch": 0.78681640625, "grad_norm": 0.18426986038684845, "learning_rate": 0.00010000263405570666, "loss": 1.7544, "step": 16114 }, { "epoch": 0.786865234375, "grad_norm": 0.18882159888744354, "learning_rate": 9.998061940069916e-05, "loss": 1.7172, "step": 16115 }, { "epoch": 0.7869140625, "grad_norm": 0.18073530495166779, "learning_rate": 9.995860898742694e-05, "loss": 1.7624, "step": 16116 }, { "epoch": 0.786962890625, "grad_norm": 0.21087013185024261, "learning_rate": 9.993660281642349e-05, "loss": 1.7481, "step": 16117 }, { "epoch": 0.78701171875, "grad_norm": 0.19843022525310516, "learning_rate": 9.991460088822208e-05, "loss": 1.7223, "step": 16118 }, { "epoch": 0.787060546875, "grad_norm": 0.18129070103168488, "learning_rate": 9.989260320335605e-05, "loss": 1.7628, "step": 16119 }, { "epoch": 0.787109375, "grad_norm": 0.19219762086868286, "learning_rate": 9.987060976235835e-05, "loss": 1.7256, "step": 16120 }, { "epoch": 0.787158203125, "grad_norm": 0.19101427495479584, "learning_rate": 9.984862056576224e-05, "loss": 1.7277, "step": 16121 }, { "epoch": 0.78720703125, "grad_norm": 0.1891576051712036, "learning_rate": 9.982663561410038e-05, "loss": 1.7445, "step": 16122 }, { "epoch": 0.787255859375, "grad_norm": 0.19410021603107452, "learning_rate": 9.98046549079059e-05, "loss": 1.7418, "step": 16123 }, { "epoch": 0.7873046875, "grad_norm": 0.20694659650325775, "learning_rate": 9.978267844771122e-05, "loss": 1.7283, "step": 16124 }, { "epoch": 0.787353515625, "grad_norm": 0.17964638769626617, "learning_rate": 9.976070623404924e-05, "loss": 1.731, "step": 16125 }, { "epoch": 0.78740234375, "grad_norm": 0.20202967524528503, "learning_rate": 9.97387382674522e-05, "loss": 1.731, "step": 16126 }, { "epoch": 0.787451171875, "grad_norm": 0.19029556214809418, "learning_rate": 9.971677454845268e-05, "loss": 1.7443, "step": 16127 }, { "epoch": 0.7875, "grad_norm": 0.21254776418209076, "learning_rate": 9.969481507758298e-05, "loss": 1.7424, "step": 16128 }, { "epoch": 0.787548828125, "grad_norm": 0.19240917265415192, "learning_rate": 9.967285985537525e-05, "loss": 1.7244, "step": 16129 }, { "epoch": 0.78759765625, "grad_norm": 0.2185835838317871, "learning_rate": 9.965090888236161e-05, "loss": 1.7285, "step": 16130 }, { "epoch": 0.787646484375, "grad_norm": 0.21588803827762604, "learning_rate": 9.962896215907408e-05, "loss": 1.7229, "step": 16131 }, { "epoch": 0.7876953125, "grad_norm": 0.20805883407592773, "learning_rate": 9.960701968604455e-05, "loss": 1.7289, "step": 16132 }, { "epoch": 0.787744140625, "grad_norm": 0.2059025913476944, "learning_rate": 9.95850814638048e-05, "loss": 1.7552, "step": 16133 }, { "epoch": 0.78779296875, "grad_norm": 0.2041051983833313, "learning_rate": 9.956314749288651e-05, "loss": 1.7386, "step": 16134 }, { "epoch": 0.787841796875, "grad_norm": 0.23615317046642303, "learning_rate": 9.954121777382137e-05, "loss": 1.7485, "step": 16135 }, { "epoch": 0.787890625, "grad_norm": 0.179252028465271, "learning_rate": 9.951929230714064e-05, "loss": 1.7164, "step": 16136 }, { "epoch": 0.787939453125, "grad_norm": 0.20169132947921753, "learning_rate": 9.949737109337597e-05, "loss": 1.7323, "step": 16137 }, { "epoch": 0.78798828125, "grad_norm": 0.20450840890407562, "learning_rate": 9.94754541330584e-05, "loss": 1.7316, "step": 16138 }, { "epoch": 0.788037109375, "grad_norm": 0.1773328334093094, "learning_rate": 9.945354142671933e-05, "loss": 1.7129, "step": 16139 }, { "epoch": 0.7880859375, "grad_norm": 0.22952339053153992, "learning_rate": 9.943163297488957e-05, "loss": 1.7484, "step": 16140 }, { "epoch": 0.788134765625, "grad_norm": 0.19285650551319122, "learning_rate": 9.940972877810036e-05, "loss": 1.7369, "step": 16141 }, { "epoch": 0.78818359375, "grad_norm": 0.22062446177005768, "learning_rate": 9.938782883688235e-05, "loss": 1.7534, "step": 16142 }, { "epoch": 0.788232421875, "grad_norm": 0.18492205440998077, "learning_rate": 9.93659331517665e-05, "loss": 1.7356, "step": 16143 }, { "epoch": 0.78828125, "grad_norm": 0.21278390288352966, "learning_rate": 9.93440417232833e-05, "loss": 1.7361, "step": 16144 }, { "epoch": 0.788330078125, "grad_norm": 0.19028626382350922, "learning_rate": 9.932215455196339e-05, "loss": 1.7231, "step": 16145 }, { "epoch": 0.78837890625, "grad_norm": 0.2228592485189438, "learning_rate": 9.930027163833716e-05, "loss": 1.7385, "step": 16146 }, { "epoch": 0.788427734375, "grad_norm": 0.221259206533432, "learning_rate": 9.927839298293503e-05, "loss": 1.7488, "step": 16147 }, { "epoch": 0.7884765625, "grad_norm": 0.22413060069084167, "learning_rate": 9.925651858628721e-05, "loss": 1.7374, "step": 16148 }, { "epoch": 0.788525390625, "grad_norm": 0.2748167812824249, "learning_rate": 9.923464844892385e-05, "loss": 1.7469, "step": 16149 }, { "epoch": 0.78857421875, "grad_norm": 0.21961143612861633, "learning_rate": 9.921278257137501e-05, "loss": 1.7545, "step": 16150 }, { "epoch": 0.788623046875, "grad_norm": 0.2672104835510254, "learning_rate": 9.919092095417068e-05, "loss": 1.7375, "step": 16151 }, { "epoch": 0.788671875, "grad_norm": 0.24824635684490204, "learning_rate": 9.916906359784048e-05, "loss": 1.7191, "step": 16152 }, { "epoch": 0.788720703125, "grad_norm": 0.23394139111042023, "learning_rate": 9.914721050291442e-05, "loss": 1.7339, "step": 16153 }, { "epoch": 0.78876953125, "grad_norm": 0.2746376693248749, "learning_rate": 9.912536166992185e-05, "loss": 1.7407, "step": 16154 }, { "epoch": 0.788818359375, "grad_norm": 0.22010327875614166, "learning_rate": 9.910351709939258e-05, "loss": 1.7413, "step": 16155 }, { "epoch": 0.7888671875, "grad_norm": 0.24704110622406006, "learning_rate": 9.908167679185574e-05, "loss": 1.7434, "step": 16156 }, { "epoch": 0.788916015625, "grad_norm": 0.2135961651802063, "learning_rate": 9.905984074784094e-05, "loss": 1.7519, "step": 16157 }, { "epoch": 0.78896484375, "grad_norm": 0.21265137195587158, "learning_rate": 9.903800896787711e-05, "loss": 1.7281, "step": 16158 }, { "epoch": 0.789013671875, "grad_norm": 0.20949797332286835, "learning_rate": 9.901618145249363e-05, "loss": 1.705, "step": 16159 }, { "epoch": 0.7890625, "grad_norm": 0.2036302089691162, "learning_rate": 9.899435820221932e-05, "loss": 1.7461, "step": 16160 }, { "epoch": 0.789111328125, "grad_norm": 0.2190365046262741, "learning_rate": 9.897253921758314e-05, "loss": 1.7273, "step": 16161 }, { "epoch": 0.78916015625, "grad_norm": 0.20739303529262543, "learning_rate": 9.895072449911388e-05, "loss": 1.7181, "step": 16162 }, { "epoch": 0.789208984375, "grad_norm": 0.26278769969940186, "learning_rate": 9.892891404734027e-05, "loss": 1.7548, "step": 16163 }, { "epoch": 0.7892578125, "grad_norm": 0.18183162808418274, "learning_rate": 9.890710786279086e-05, "loss": 1.7447, "step": 16164 }, { "epoch": 0.789306640625, "grad_norm": 0.24172456562519073, "learning_rate": 9.888530594599415e-05, "loss": 1.7268, "step": 16165 }, { "epoch": 0.78935546875, "grad_norm": 0.18205249309539795, "learning_rate": 9.886350829747859e-05, "loss": 1.7482, "step": 16166 }, { "epoch": 0.789404296875, "grad_norm": 0.25591713190078735, "learning_rate": 9.884171491777236e-05, "loss": 1.7099, "step": 16167 }, { "epoch": 0.789453125, "grad_norm": 0.20033124089241028, "learning_rate": 9.881992580740373e-05, "loss": 1.7444, "step": 16168 }, { "epoch": 0.789501953125, "grad_norm": 0.2413547933101654, "learning_rate": 9.879814096690078e-05, "loss": 1.753, "step": 16169 }, { "epoch": 0.78955078125, "grad_norm": 0.1958892047405243, "learning_rate": 9.87763603967913e-05, "loss": 1.7379, "step": 16170 }, { "epoch": 0.789599609375, "grad_norm": 0.2225836217403412, "learning_rate": 9.875458409760347e-05, "loss": 1.7516, "step": 16171 }, { "epoch": 0.7896484375, "grad_norm": 0.1931510716676712, "learning_rate": 9.873281206986474e-05, "loss": 1.7366, "step": 16172 }, { "epoch": 0.789697265625, "grad_norm": 0.20459012687206268, "learning_rate": 9.871104431410306e-05, "loss": 1.7208, "step": 16173 }, { "epoch": 0.78974609375, "grad_norm": 0.18845844268798828, "learning_rate": 9.868928083084572e-05, "loss": 1.7493, "step": 16174 }, { "epoch": 0.789794921875, "grad_norm": 0.20561645925045013, "learning_rate": 9.866752162062042e-05, "loss": 1.7347, "step": 16175 }, { "epoch": 0.78984375, "grad_norm": 0.19254371523857117, "learning_rate": 9.864576668395427e-05, "loss": 1.7268, "step": 16176 }, { "epoch": 0.789892578125, "grad_norm": 0.2333666980266571, "learning_rate": 9.862401602137477e-05, "loss": 1.7461, "step": 16177 }, { "epoch": 0.78994140625, "grad_norm": 0.20750777423381805, "learning_rate": 9.86022696334089e-05, "loss": 1.7272, "step": 16178 }, { "epoch": 0.789990234375, "grad_norm": 0.2214708775281906, "learning_rate": 9.858052752058373e-05, "loss": 1.771, "step": 16179 }, { "epoch": 0.7900390625, "grad_norm": 0.20198573172092438, "learning_rate": 9.855878968342621e-05, "loss": 1.7371, "step": 16180 }, { "epoch": 0.790087890625, "grad_norm": 0.1877024918794632, "learning_rate": 9.85370561224632e-05, "loss": 1.7526, "step": 16181 }, { "epoch": 0.79013671875, "grad_norm": 0.1923818290233612, "learning_rate": 9.851532683822135e-05, "loss": 1.7476, "step": 16182 }, { "epoch": 0.790185546875, "grad_norm": 0.19352950155735016, "learning_rate": 9.849360183122738e-05, "loss": 1.7378, "step": 16183 }, { "epoch": 0.790234375, "grad_norm": 0.19425556063652039, "learning_rate": 9.847188110200775e-05, "loss": 1.6977, "step": 16184 }, { "epoch": 0.790283203125, "grad_norm": 0.2138962745666504, "learning_rate": 9.845016465108893e-05, "loss": 1.7474, "step": 16185 }, { "epoch": 0.79033203125, "grad_norm": 0.2009938359260559, "learning_rate": 9.84284524789972e-05, "loss": 1.7291, "step": 16186 }, { "epoch": 0.790380859375, "grad_norm": 0.21195891499519348, "learning_rate": 9.840674458625883e-05, "loss": 1.7238, "step": 16187 }, { "epoch": 0.7904296875, "grad_norm": 0.17746469378471375, "learning_rate": 9.838504097339974e-05, "loss": 1.7387, "step": 16188 }, { "epoch": 0.790478515625, "grad_norm": 0.2050853669643402, "learning_rate": 9.836334164094624e-05, "loss": 1.7237, "step": 16189 }, { "epoch": 0.79052734375, "grad_norm": 0.18154288828372955, "learning_rate": 9.83416465894239e-05, "loss": 1.7441, "step": 16190 }, { "epoch": 0.790576171875, "grad_norm": 0.2204589545726776, "learning_rate": 9.831995581935882e-05, "loss": 1.7485, "step": 16191 }, { "epoch": 0.790625, "grad_norm": 0.20342324674129486, "learning_rate": 9.829826933127642e-05, "loss": 1.7843, "step": 16192 }, { "epoch": 0.790673828125, "grad_norm": 0.2281622588634491, "learning_rate": 9.82765871257026e-05, "loss": 1.7325, "step": 16193 }, { "epoch": 0.79072265625, "grad_norm": 0.18414345383644104, "learning_rate": 9.825490920316254e-05, "loss": 1.7482, "step": 16194 }, { "epoch": 0.790771484375, "grad_norm": 0.20336753129959106, "learning_rate": 9.823323556418179e-05, "loss": 1.7048, "step": 16195 }, { "epoch": 0.7908203125, "grad_norm": 0.18537360429763794, "learning_rate": 9.821156620928557e-05, "loss": 1.7213, "step": 16196 }, { "epoch": 0.790869140625, "grad_norm": 0.22230535745620728, "learning_rate": 9.818990113899908e-05, "loss": 1.7291, "step": 16197 }, { "epoch": 0.79091796875, "grad_norm": 0.1761665642261505, "learning_rate": 9.816824035384737e-05, "loss": 1.7309, "step": 16198 }, { "epoch": 0.790966796875, "grad_norm": 0.2023794949054718, "learning_rate": 9.814658385435543e-05, "loss": 1.7288, "step": 16199 }, { "epoch": 0.791015625, "grad_norm": 0.16901035606861115, "learning_rate": 9.81249316410481e-05, "loss": 1.7123, "step": 16200 }, { "epoch": 0.791064453125, "grad_norm": 0.18621844053268433, "learning_rate": 9.810328371445019e-05, "loss": 1.7298, "step": 16201 }, { "epoch": 0.79111328125, "grad_norm": 0.1731046885251999, "learning_rate": 9.808164007508627e-05, "loss": 1.7437, "step": 16202 }, { "epoch": 0.791162109375, "grad_norm": 0.19872093200683594, "learning_rate": 9.806000072348097e-05, "loss": 1.737, "step": 16203 }, { "epoch": 0.7912109375, "grad_norm": 0.1731557697057724, "learning_rate": 9.803836566015873e-05, "loss": 1.7251, "step": 16204 }, { "epoch": 0.791259765625, "grad_norm": 0.20020700991153717, "learning_rate": 9.80167348856438e-05, "loss": 1.748, "step": 16205 }, { "epoch": 0.79130859375, "grad_norm": 0.1652749925851822, "learning_rate": 9.799510840046054e-05, "loss": 1.7433, "step": 16206 }, { "epoch": 0.791357421875, "grad_norm": 0.20116156339645386, "learning_rate": 9.797348620513307e-05, "loss": 1.7249, "step": 16207 }, { "epoch": 0.79140625, "grad_norm": 0.17440655827522278, "learning_rate": 9.795186830018524e-05, "loss": 1.7416, "step": 16208 }, { "epoch": 0.791455078125, "grad_norm": 0.22042357921600342, "learning_rate": 9.793025468614128e-05, "loss": 1.7473, "step": 16209 }, { "epoch": 0.79150390625, "grad_norm": 0.18222947418689728, "learning_rate": 9.790864536352479e-05, "loss": 1.7208, "step": 16210 }, { "epoch": 0.791552734375, "grad_norm": 0.20690441131591797, "learning_rate": 9.788704033285955e-05, "loss": 1.7427, "step": 16211 }, { "epoch": 0.7916015625, "grad_norm": 0.20884846150875092, "learning_rate": 9.786543959466918e-05, "loss": 1.7155, "step": 16212 }, { "epoch": 0.791650390625, "grad_norm": 0.1636194884777069, "learning_rate": 9.784384314947716e-05, "loss": 1.732, "step": 16213 }, { "epoch": 0.79169921875, "grad_norm": 0.1925991326570511, "learning_rate": 9.782225099780697e-05, "loss": 1.7572, "step": 16214 }, { "epoch": 0.791748046875, "grad_norm": 0.1657593846321106, "learning_rate": 9.780066314018185e-05, "loss": 1.7235, "step": 16215 }, { "epoch": 0.791796875, "grad_norm": 0.21938742697238922, "learning_rate": 9.777907957712501e-05, "loss": 1.7509, "step": 16216 }, { "epoch": 0.791845703125, "grad_norm": 0.18595001101493835, "learning_rate": 9.775750030915956e-05, "loss": 1.7263, "step": 16217 }, { "epoch": 0.79189453125, "grad_norm": 0.2282344251871109, "learning_rate": 9.773592533680848e-05, "loss": 1.7269, "step": 16218 }, { "epoch": 0.791943359375, "grad_norm": 0.2255033254623413, "learning_rate": 9.771435466059468e-05, "loss": 1.7135, "step": 16219 }, { "epoch": 0.7919921875, "grad_norm": 0.1924857795238495, "learning_rate": 9.769278828104092e-05, "loss": 1.7399, "step": 16220 }, { "epoch": 0.792041015625, "grad_norm": 0.18937109410762787, "learning_rate": 9.767122619866988e-05, "loss": 1.7395, "step": 16221 }, { "epoch": 0.79208984375, "grad_norm": 0.17500938475131989, "learning_rate": 9.764966841400413e-05, "loss": 1.7282, "step": 16222 }, { "epoch": 0.792138671875, "grad_norm": 0.17770150303840637, "learning_rate": 9.762811492756617e-05, "loss": 1.6886, "step": 16223 }, { "epoch": 0.7921875, "grad_norm": 0.18755479156970978, "learning_rate": 9.760656573987831e-05, "loss": 1.7401, "step": 16224 }, { "epoch": 0.792236328125, "grad_norm": 0.16954374313354492, "learning_rate": 9.758502085146287e-05, "loss": 1.7376, "step": 16225 }, { "epoch": 0.79228515625, "grad_norm": 0.19870920479297638, "learning_rate": 9.756348026284197e-05, "loss": 1.7591, "step": 16226 }, { "epoch": 0.792333984375, "grad_norm": 0.1777314990758896, "learning_rate": 9.754194397453774e-05, "loss": 1.7274, "step": 16227 }, { "epoch": 0.7923828125, "grad_norm": 0.21874527633190155, "learning_rate": 9.752041198707202e-05, "loss": 1.7512, "step": 16228 }, { "epoch": 0.792431640625, "grad_norm": 0.21565811336040497, "learning_rate": 9.74988843009667e-05, "loss": 1.7603, "step": 16229 }, { "epoch": 0.79248046875, "grad_norm": 0.2053920179605484, "learning_rate": 9.74773609167435e-05, "loss": 1.7068, "step": 16230 }, { "epoch": 0.792529296875, "grad_norm": 0.23548628389835358, "learning_rate": 9.74558418349241e-05, "loss": 1.7541, "step": 16231 }, { "epoch": 0.792578125, "grad_norm": 0.21238131821155548, "learning_rate": 9.743432705602999e-05, "loss": 1.7523, "step": 16232 }, { "epoch": 0.792626953125, "grad_norm": 0.2062956988811493, "learning_rate": 9.741281658058263e-05, "loss": 1.7203, "step": 16233 }, { "epoch": 0.79267578125, "grad_norm": 0.22700031101703644, "learning_rate": 9.739131040910335e-05, "loss": 1.7463, "step": 16234 }, { "epoch": 0.792724609375, "grad_norm": 0.21878796815872192, "learning_rate": 9.736980854211332e-05, "loss": 1.7448, "step": 16235 }, { "epoch": 0.7927734375, "grad_norm": 0.22949965298175812, "learning_rate": 9.734831098013372e-05, "loss": 1.7299, "step": 16236 }, { "epoch": 0.792822265625, "grad_norm": 0.2006070762872696, "learning_rate": 9.732681772368549e-05, "loss": 1.7502, "step": 16237 }, { "epoch": 0.79287109375, "grad_norm": 0.22744065523147583, "learning_rate": 9.73053287732896e-05, "loss": 1.7222, "step": 16238 }, { "epoch": 0.792919921875, "grad_norm": 0.17747072875499725, "learning_rate": 9.72838441294668e-05, "loss": 1.7481, "step": 16239 }, { "epoch": 0.79296875, "grad_norm": 0.22707051038742065, "learning_rate": 9.726236379273784e-05, "loss": 1.7282, "step": 16240 }, { "epoch": 0.793017578125, "grad_norm": 0.20679135620594025, "learning_rate": 9.724088776362327e-05, "loss": 1.7302, "step": 16241 }, { "epoch": 0.79306640625, "grad_norm": 0.18389151990413666, "learning_rate": 9.721941604264362e-05, "loss": 1.7204, "step": 16242 }, { "epoch": 0.793115234375, "grad_norm": 0.19910790026187897, "learning_rate": 9.719794863031922e-05, "loss": 1.7512, "step": 16243 }, { "epoch": 0.7931640625, "grad_norm": 0.1964099258184433, "learning_rate": 9.717648552717045e-05, "loss": 1.7395, "step": 16244 }, { "epoch": 0.793212890625, "grad_norm": 0.20235982537269592, "learning_rate": 9.715502673371732e-05, "loss": 1.742, "step": 16245 }, { "epoch": 0.79326171875, "grad_norm": 0.1986262947320938, "learning_rate": 9.713357225048008e-05, "loss": 1.7437, "step": 16246 }, { "epoch": 0.793310546875, "grad_norm": 0.21767114102840424, "learning_rate": 9.711212207797856e-05, "loss": 1.7304, "step": 16247 }, { "epoch": 0.793359375, "grad_norm": 0.2184060513973236, "learning_rate": 9.709067621673268e-05, "loss": 1.7585, "step": 16248 }, { "epoch": 0.793408203125, "grad_norm": 0.192129448056221, "learning_rate": 9.706923466726221e-05, "loss": 1.7379, "step": 16249 }, { "epoch": 0.79345703125, "grad_norm": 0.19221355020999908, "learning_rate": 9.704779743008676e-05, "loss": 1.7396, "step": 16250 }, { "epoch": 0.793505859375, "grad_norm": 0.22274257242679596, "learning_rate": 9.702636450572591e-05, "loss": 1.7617, "step": 16251 }, { "epoch": 0.7935546875, "grad_norm": 0.1952250450849533, "learning_rate": 9.700493589469911e-05, "loss": 1.7396, "step": 16252 }, { "epoch": 0.793603515625, "grad_norm": 0.25115349888801575, "learning_rate": 9.69835115975257e-05, "loss": 1.7187, "step": 16253 }, { "epoch": 0.79365234375, "grad_norm": 0.2017742544412613, "learning_rate": 9.696209161472485e-05, "loss": 1.7135, "step": 16254 }, { "epoch": 0.793701171875, "grad_norm": 0.25832098722457886, "learning_rate": 9.694067594681579e-05, "loss": 1.705, "step": 16255 }, { "epoch": 0.79375, "grad_norm": 0.18718671798706055, "learning_rate": 9.69192645943175e-05, "loss": 1.7391, "step": 16256 }, { "epoch": 0.793798828125, "grad_norm": 0.23919114470481873, "learning_rate": 9.68978575577489e-05, "loss": 1.7549, "step": 16257 }, { "epoch": 0.79384765625, "grad_norm": 0.18833929300308228, "learning_rate": 9.687645483762877e-05, "loss": 1.7506, "step": 16258 }, { "epoch": 0.793896484375, "grad_norm": 0.20745037496089935, "learning_rate": 9.68550564344759e-05, "loss": 1.726, "step": 16259 }, { "epoch": 0.7939453125, "grad_norm": 0.20598381757736206, "learning_rate": 9.683366234880883e-05, "loss": 1.7496, "step": 16260 }, { "epoch": 0.793994140625, "grad_norm": 0.21982727944850922, "learning_rate": 9.681227258114614e-05, "loss": 1.7139, "step": 16261 }, { "epoch": 0.79404296875, "grad_norm": 0.1895647943019867, "learning_rate": 9.679088713200617e-05, "loss": 1.755, "step": 16262 }, { "epoch": 0.794091796875, "grad_norm": 0.26112043857574463, "learning_rate": 9.676950600190714e-05, "loss": 1.737, "step": 16263 }, { "epoch": 0.794140625, "grad_norm": 0.18844150006771088, "learning_rate": 9.674812919136747e-05, "loss": 1.7239, "step": 16264 }, { "epoch": 0.794189453125, "grad_norm": 0.20711930096149445, "learning_rate": 9.672675670090492e-05, "loss": 1.7169, "step": 16265 }, { "epoch": 0.79423828125, "grad_norm": 0.2114950269460678, "learning_rate": 9.67053885310378e-05, "loss": 1.7453, "step": 16266 }, { "epoch": 0.794287109375, "grad_norm": 0.2003052532672882, "learning_rate": 9.668402468228375e-05, "loss": 1.7294, "step": 16267 }, { "epoch": 0.7943359375, "grad_norm": 0.19665378332138062, "learning_rate": 9.666266515516062e-05, "loss": 1.7573, "step": 16268 }, { "epoch": 0.794384765625, "grad_norm": 0.19611278176307678, "learning_rate": 9.664130995018608e-05, "loss": 1.7417, "step": 16269 }, { "epoch": 0.79443359375, "grad_norm": 0.19176431000232697, "learning_rate": 9.66199590678777e-05, "loss": 1.7461, "step": 16270 }, { "epoch": 0.794482421875, "grad_norm": 0.20513015985488892, "learning_rate": 9.659861250875289e-05, "loss": 1.7723, "step": 16271 }, { "epoch": 0.79453125, "grad_norm": 0.19847896695137024, "learning_rate": 9.657727027332905e-05, "loss": 1.7391, "step": 16272 }, { "epoch": 0.794580078125, "grad_norm": 0.20996816456317902, "learning_rate": 9.655593236212343e-05, "loss": 1.7191, "step": 16273 }, { "epoch": 0.79462890625, "grad_norm": 0.20434343814849854, "learning_rate": 9.653459877565312e-05, "loss": 1.7396, "step": 16274 }, { "epoch": 0.794677734375, "grad_norm": 0.22748565673828125, "learning_rate": 9.651326951443517e-05, "loss": 1.7583, "step": 16275 }, { "epoch": 0.7947265625, "grad_norm": 0.19059105217456818, "learning_rate": 9.649194457898658e-05, "loss": 1.7368, "step": 16276 }, { "epoch": 0.794775390625, "grad_norm": 0.2664650082588196, "learning_rate": 9.64706239698241e-05, "loss": 1.7289, "step": 16277 }, { "epoch": 0.79482421875, "grad_norm": 0.24492336809635162, "learning_rate": 9.644930768746451e-05, "loss": 1.7224, "step": 16278 }, { "epoch": 0.794873046875, "grad_norm": 0.26013875007629395, "learning_rate": 9.642799573242429e-05, "loss": 1.7321, "step": 16279 }, { "epoch": 0.794921875, "grad_norm": 0.24790087342262268, "learning_rate": 9.640668810522018e-05, "loss": 1.7251, "step": 16280 }, { "epoch": 0.794970703125, "grad_norm": 0.21894316375255585, "learning_rate": 9.638538480636838e-05, "loss": 1.7421, "step": 16281 }, { "epoch": 0.79501953125, "grad_norm": 0.22794334590435028, "learning_rate": 9.636408583638536e-05, "loss": 1.7199, "step": 16282 }, { "epoch": 0.795068359375, "grad_norm": 0.21077823638916016, "learning_rate": 9.63427911957871e-05, "loss": 1.7283, "step": 16283 }, { "epoch": 0.7951171875, "grad_norm": 0.20604440569877625, "learning_rate": 9.632150088508998e-05, "loss": 1.729, "step": 16284 }, { "epoch": 0.795166015625, "grad_norm": 0.2079855501651764, "learning_rate": 9.63002149048097e-05, "loss": 1.7582, "step": 16285 }, { "epoch": 0.79521484375, "grad_norm": 0.21194902062416077, "learning_rate": 9.627893325546241e-05, "loss": 1.7437, "step": 16286 }, { "epoch": 0.795263671875, "grad_norm": 0.1941373348236084, "learning_rate": 9.625765593756367e-05, "loss": 1.7072, "step": 16287 }, { "epoch": 0.7953125, "grad_norm": 0.2041316032409668, "learning_rate": 9.623638295162926e-05, "loss": 1.7496, "step": 16288 }, { "epoch": 0.795361328125, "grad_norm": 0.20229023694992065, "learning_rate": 9.621511429817471e-05, "loss": 1.7136, "step": 16289 }, { "epoch": 0.79541015625, "grad_norm": 0.1978979855775833, "learning_rate": 9.619384997771552e-05, "loss": 1.7659, "step": 16290 }, { "epoch": 0.795458984375, "grad_norm": 0.19073985517024994, "learning_rate": 9.617258999076703e-05, "loss": 1.7275, "step": 16291 }, { "epoch": 0.7955078125, "grad_norm": 0.1865220069885254, "learning_rate": 9.615133433784446e-05, "loss": 1.74, "step": 16292 }, { "epoch": 0.795556640625, "grad_norm": 0.20114944875240326, "learning_rate": 9.613008301946301e-05, "loss": 1.7468, "step": 16293 }, { "epoch": 0.79560546875, "grad_norm": 0.19370004534721375, "learning_rate": 9.610883603613768e-05, "loss": 1.734, "step": 16294 }, { "epoch": 0.795654296875, "grad_norm": 0.1931942254304886, "learning_rate": 9.608759338838345e-05, "loss": 1.7384, "step": 16295 }, { "epoch": 0.795703125, "grad_norm": 0.19771528244018555, "learning_rate": 9.606635507671518e-05, "loss": 1.7353, "step": 16296 }, { "epoch": 0.795751953125, "grad_norm": 0.19238877296447754, "learning_rate": 9.604512110164742e-05, "loss": 1.7402, "step": 16297 }, { "epoch": 0.79580078125, "grad_norm": 0.19929955899715424, "learning_rate": 9.602389146369506e-05, "loss": 1.7444, "step": 16298 }, { "epoch": 0.795849609375, "grad_norm": 0.19564782083034515, "learning_rate": 9.600266616337238e-05, "loss": 1.729, "step": 16299 }, { "epoch": 0.7958984375, "grad_norm": 0.2350172996520996, "learning_rate": 9.598144520119396e-05, "loss": 1.7376, "step": 16300 }, { "epoch": 0.795947265625, "grad_norm": 0.17206481099128723, "learning_rate": 9.596022857767394e-05, "loss": 1.7153, "step": 16301 }, { "epoch": 0.79599609375, "grad_norm": 0.24593496322631836, "learning_rate": 9.593901629332675e-05, "loss": 1.7666, "step": 16302 }, { "epoch": 0.796044921875, "grad_norm": 0.18323028087615967, "learning_rate": 9.591780834866628e-05, "loss": 1.7288, "step": 16303 }, { "epoch": 0.79609375, "grad_norm": 0.2251947969198227, "learning_rate": 9.589660474420661e-05, "loss": 1.7437, "step": 16304 }, { "epoch": 0.796142578125, "grad_norm": 0.2290329933166504, "learning_rate": 9.587540548046161e-05, "loss": 1.7159, "step": 16305 }, { "epoch": 0.79619140625, "grad_norm": 0.21424500644207, "learning_rate": 9.585421055794507e-05, "loss": 1.715, "step": 16306 }, { "epoch": 0.796240234375, "grad_norm": 0.2195882499217987, "learning_rate": 9.583301997717065e-05, "loss": 1.7427, "step": 16307 }, { "epoch": 0.7962890625, "grad_norm": 0.1853068619966507, "learning_rate": 9.581183373865194e-05, "loss": 1.7297, "step": 16308 }, { "epoch": 0.796337890625, "grad_norm": 0.23589874804019928, "learning_rate": 9.579065184290238e-05, "loss": 1.7371, "step": 16309 }, { "epoch": 0.79638671875, "grad_norm": 0.18321913480758667, "learning_rate": 9.576947429043536e-05, "loss": 1.7251, "step": 16310 }, { "epoch": 0.796435546875, "grad_norm": 0.2143521010875702, "learning_rate": 9.574830108176415e-05, "loss": 1.7335, "step": 16311 }, { "epoch": 0.796484375, "grad_norm": 0.20428891479969025, "learning_rate": 9.572713221740189e-05, "loss": 1.7313, "step": 16312 }, { "epoch": 0.796533203125, "grad_norm": 0.1964520812034607, "learning_rate": 9.57059676978615e-05, "loss": 1.7083, "step": 16313 }, { "epoch": 0.79658203125, "grad_norm": 0.23292523622512817, "learning_rate": 9.568480752365617e-05, "loss": 1.7361, "step": 16314 }, { "epoch": 0.796630859375, "grad_norm": 0.17345470190048218, "learning_rate": 9.566365169529844e-05, "loss": 1.7302, "step": 16315 }, { "epoch": 0.7966796875, "grad_norm": 0.24351343512535095, "learning_rate": 9.564250021330134e-05, "loss": 1.7478, "step": 16316 }, { "epoch": 0.796728515625, "grad_norm": 0.18687821924686432, "learning_rate": 9.562135307817718e-05, "loss": 1.7413, "step": 16317 }, { "epoch": 0.79677734375, "grad_norm": 0.23375482857227325, "learning_rate": 9.56002102904388e-05, "loss": 1.7348, "step": 16318 }, { "epoch": 0.796826171875, "grad_norm": 0.24023833870887756, "learning_rate": 9.55790718505983e-05, "loss": 1.7517, "step": 16319 }, { "epoch": 0.796875, "grad_norm": 0.19297786056995392, "learning_rate": 9.555793775916827e-05, "loss": 1.7089, "step": 16320 }, { "epoch": 0.796923828125, "grad_norm": 0.24259212613105774, "learning_rate": 9.553680801666072e-05, "loss": 1.7157, "step": 16321 }, { "epoch": 0.79697265625, "grad_norm": 0.2075357288122177, "learning_rate": 9.551568262358782e-05, "loss": 1.7244, "step": 16322 }, { "epoch": 0.797021484375, "grad_norm": 0.23210105299949646, "learning_rate": 9.549456158046155e-05, "loss": 1.7311, "step": 16323 }, { "epoch": 0.7970703125, "grad_norm": 0.2091519981622696, "learning_rate": 9.547344488779374e-05, "loss": 1.748, "step": 16324 }, { "epoch": 0.797119140625, "grad_norm": 0.22507572174072266, "learning_rate": 9.54523325460963e-05, "loss": 1.7238, "step": 16325 }, { "epoch": 0.79716796875, "grad_norm": 0.22273290157318115, "learning_rate": 9.54312245558808e-05, "loss": 1.7459, "step": 16326 }, { "epoch": 0.797216796875, "grad_norm": 0.21885739266872406, "learning_rate": 9.541012091765883e-05, "loss": 1.7238, "step": 16327 }, { "epoch": 0.797265625, "grad_norm": 0.24159978330135345, "learning_rate": 9.53890216319419e-05, "loss": 1.709, "step": 16328 }, { "epoch": 0.797314453125, "grad_norm": 0.19546067714691162, "learning_rate": 9.536792669924121e-05, "loss": 1.7573, "step": 16329 }, { "epoch": 0.79736328125, "grad_norm": 0.2337043583393097, "learning_rate": 9.53468361200683e-05, "loss": 1.7483, "step": 16330 }, { "epoch": 0.797412109375, "grad_norm": 0.18327155709266663, "learning_rate": 9.532574989493398e-05, "loss": 1.7195, "step": 16331 }, { "epoch": 0.7974609375, "grad_norm": 0.23834195733070374, "learning_rate": 9.53046680243496e-05, "loss": 1.71, "step": 16332 }, { "epoch": 0.797509765625, "grad_norm": 0.2158951312303543, "learning_rate": 9.528359050882584e-05, "loss": 1.7376, "step": 16333 }, { "epoch": 0.79755859375, "grad_norm": 0.24747151136398315, "learning_rate": 9.526251734887375e-05, "loss": 1.7341, "step": 16334 }, { "epoch": 0.797607421875, "grad_norm": 0.19516555964946747, "learning_rate": 9.524144854500384e-05, "loss": 1.7251, "step": 16335 }, { "epoch": 0.79765625, "grad_norm": 0.24326027929782867, "learning_rate": 9.522038409772696e-05, "loss": 1.7086, "step": 16336 }, { "epoch": 0.797705078125, "grad_norm": 0.1821555346250534, "learning_rate": 9.519932400755346e-05, "loss": 1.7224, "step": 16337 }, { "epoch": 0.79775390625, "grad_norm": 0.19594568014144897, "learning_rate": 9.517826827499379e-05, "loss": 1.7284, "step": 16338 }, { "epoch": 0.797802734375, "grad_norm": 0.20554761588573456, "learning_rate": 9.515721690055823e-05, "loss": 1.7598, "step": 16339 }, { "epoch": 0.7978515625, "grad_norm": 0.16969946026802063, "learning_rate": 9.513616988475702e-05, "loss": 1.707, "step": 16340 }, { "epoch": 0.797900390625, "grad_norm": 0.2678753733634949, "learning_rate": 9.511512722810026e-05, "loss": 1.7275, "step": 16341 }, { "epoch": 0.79794921875, "grad_norm": 0.1858757883310318, "learning_rate": 9.509408893109787e-05, "loss": 1.7261, "step": 16342 }, { "epoch": 0.797998046875, "grad_norm": 0.24334898591041565, "learning_rate": 9.507305499425977e-05, "loss": 1.7638, "step": 16343 }, { "epoch": 0.798046875, "grad_norm": 0.23067715764045715, "learning_rate": 9.505202541809577e-05, "loss": 1.724, "step": 16344 }, { "epoch": 0.798095703125, "grad_norm": 0.23510047793388367, "learning_rate": 9.503100020311547e-05, "loss": 1.7336, "step": 16345 }, { "epoch": 0.79814453125, "grad_norm": 0.22408123314380646, "learning_rate": 9.500997934982854e-05, "loss": 1.7513, "step": 16346 }, { "epoch": 0.798193359375, "grad_norm": 0.2018115073442459, "learning_rate": 9.498896285874423e-05, "loss": 1.7202, "step": 16347 }, { "epoch": 0.7982421875, "grad_norm": 0.22581374645233154, "learning_rate": 9.496795073037214e-05, "loss": 1.7227, "step": 16348 }, { "epoch": 0.798291015625, "grad_norm": 0.20107531547546387, "learning_rate": 9.49469429652213e-05, "loss": 1.7287, "step": 16349 }, { "epoch": 0.79833984375, "grad_norm": 0.20290717482566833, "learning_rate": 9.492593956380106e-05, "loss": 1.7322, "step": 16350 }, { "epoch": 0.798388671875, "grad_norm": 0.22451409697532654, "learning_rate": 9.490494052662025e-05, "loss": 1.7333, "step": 16351 }, { "epoch": 0.7984375, "grad_norm": 0.19366022944450378, "learning_rate": 9.488394585418797e-05, "loss": 1.731, "step": 16352 }, { "epoch": 0.798486328125, "grad_norm": 0.205631822347641, "learning_rate": 9.486295554701288e-05, "loss": 1.7541, "step": 16353 }, { "epoch": 0.79853515625, "grad_norm": 0.1936027854681015, "learning_rate": 9.484196960560389e-05, "loss": 1.7476, "step": 16354 }, { "epoch": 0.798583984375, "grad_norm": 0.21072404086589813, "learning_rate": 9.482098803046946e-05, "loss": 1.7601, "step": 16355 }, { "epoch": 0.7986328125, "grad_norm": 0.2036409080028534, "learning_rate": 9.480001082211812e-05, "loss": 1.749, "step": 16356 }, { "epoch": 0.798681640625, "grad_norm": 0.2163359522819519, "learning_rate": 9.47790379810583e-05, "loss": 1.7391, "step": 16357 }, { "epoch": 0.79873046875, "grad_norm": 0.17456693947315216, "learning_rate": 9.475806950779826e-05, "loss": 1.7255, "step": 16358 }, { "epoch": 0.798779296875, "grad_norm": 0.21060116589069366, "learning_rate": 9.473710540284623e-05, "loss": 1.735, "step": 16359 }, { "epoch": 0.798828125, "grad_norm": 0.17911472916603088, "learning_rate": 9.471614566671024e-05, "loss": 1.6937, "step": 16360 }, { "epoch": 0.798876953125, "grad_norm": 0.20331153273582458, "learning_rate": 9.469519029989837e-05, "loss": 1.7548, "step": 16361 }, { "epoch": 0.79892578125, "grad_norm": 0.177165687084198, "learning_rate": 9.46742393029184e-05, "loss": 1.7264, "step": 16362 }, { "epoch": 0.798974609375, "grad_norm": 0.23250159621238708, "learning_rate": 9.465329267627803e-05, "loss": 1.7289, "step": 16363 }, { "epoch": 0.7990234375, "grad_norm": 0.22071880102157593, "learning_rate": 9.463235042048513e-05, "loss": 1.7247, "step": 16364 }, { "epoch": 0.799072265625, "grad_norm": 0.2039611041545868, "learning_rate": 9.4611412536047e-05, "loss": 1.7364, "step": 16365 }, { "epoch": 0.79912109375, "grad_norm": 0.21421776711940765, "learning_rate": 9.459047902347132e-05, "loss": 1.7507, "step": 16366 }, { "epoch": 0.799169921875, "grad_norm": 0.21453195810317993, "learning_rate": 9.456954988326519e-05, "loss": 1.7368, "step": 16367 }, { "epoch": 0.79921875, "grad_norm": 0.2014440894126892, "learning_rate": 9.454862511593612e-05, "loss": 1.7241, "step": 16368 }, { "epoch": 0.799267578125, "grad_norm": 0.16200025379657745, "learning_rate": 9.452770472199095e-05, "loss": 1.72, "step": 16369 }, { "epoch": 0.79931640625, "grad_norm": 0.2217150628566742, "learning_rate": 9.450678870193699e-05, "loss": 1.7632, "step": 16370 }, { "epoch": 0.799365234375, "grad_norm": 0.19401556253433228, "learning_rate": 9.448587705628095e-05, "loss": 1.7403, "step": 16371 }, { "epoch": 0.7994140625, "grad_norm": 0.23936642706394196, "learning_rate": 9.446496978552968e-05, "loss": 1.7232, "step": 16372 }, { "epoch": 0.799462890625, "grad_norm": 0.17378033697605133, "learning_rate": 9.444406689018991e-05, "loss": 1.7263, "step": 16373 }, { "epoch": 0.79951171875, "grad_norm": 0.22298859059810638, "learning_rate": 9.442316837076827e-05, "loss": 1.7469, "step": 16374 }, { "epoch": 0.799560546875, "grad_norm": 0.18414871394634247, "learning_rate": 9.44022742277712e-05, "loss": 1.7493, "step": 16375 }, { "epoch": 0.799609375, "grad_norm": 0.2552633583545685, "learning_rate": 9.438138446170508e-05, "loss": 1.7451, "step": 16376 }, { "epoch": 0.799658203125, "grad_norm": 0.20217232406139374, "learning_rate": 9.436049907307624e-05, "loss": 1.7394, "step": 16377 }, { "epoch": 0.79970703125, "grad_norm": 0.24843965470790863, "learning_rate": 9.433961806239084e-05, "loss": 1.7438, "step": 16378 }, { "epoch": 0.799755859375, "grad_norm": 0.22997592389583588, "learning_rate": 9.43187414301549e-05, "loss": 1.7409, "step": 16379 }, { "epoch": 0.7998046875, "grad_norm": 0.22711703181266785, "learning_rate": 9.429786917687451e-05, "loss": 1.7328, "step": 16380 }, { "epoch": 0.799853515625, "grad_norm": 0.24425190687179565, "learning_rate": 9.42770013030553e-05, "loss": 1.7608, "step": 16381 }, { "epoch": 0.79990234375, "grad_norm": 0.20834703743457794, "learning_rate": 9.425613780920327e-05, "loss": 1.7402, "step": 16382 }, { "epoch": 0.799951171875, "grad_norm": 0.26387113332748413, "learning_rate": 9.423527869582385e-05, "loss": 1.7416, "step": 16383 }, { "epoch": 0.8, "grad_norm": 0.22081531584262848, "learning_rate": 9.421442396342278e-05, "loss": 1.7437, "step": 16384 }, { "epoch": 0.800048828125, "grad_norm": 0.23208118975162506, "learning_rate": 9.419357361250527e-05, "loss": 1.7123, "step": 16385 }, { "epoch": 0.80009765625, "grad_norm": 0.21770095825195312, "learning_rate": 9.417272764357688e-05, "loss": 1.7498, "step": 16386 }, { "epoch": 0.800146484375, "grad_norm": 0.222747340798378, "learning_rate": 9.415188605714267e-05, "loss": 1.7361, "step": 16387 }, { "epoch": 0.8001953125, "grad_norm": 0.20957930386066437, "learning_rate": 9.413104885370777e-05, "loss": 1.7318, "step": 16388 }, { "epoch": 0.800244140625, "grad_norm": 0.21008804440498352, "learning_rate": 9.41102160337772e-05, "loss": 1.7404, "step": 16389 }, { "epoch": 0.80029296875, "grad_norm": 0.20966015756130219, "learning_rate": 9.40893875978559e-05, "loss": 1.7589, "step": 16390 }, { "epoch": 0.800341796875, "grad_norm": 0.21528111398220062, "learning_rate": 9.406856354644862e-05, "loss": 1.736, "step": 16391 }, { "epoch": 0.800390625, "grad_norm": 0.1828315109014511, "learning_rate": 9.404774388006008e-05, "loss": 1.7386, "step": 16392 }, { "epoch": 0.800439453125, "grad_norm": 0.24416552484035492, "learning_rate": 9.402692859919478e-05, "loss": 1.7398, "step": 16393 }, { "epoch": 0.80048828125, "grad_norm": 0.18903499841690063, "learning_rate": 9.400611770435734e-05, "loss": 1.7132, "step": 16394 }, { "epoch": 0.800537109375, "grad_norm": 0.2524186372756958, "learning_rate": 9.3985311196052e-05, "loss": 1.7502, "step": 16395 }, { "epoch": 0.8005859375, "grad_norm": 0.2247905135154724, "learning_rate": 9.396450907478313e-05, "loss": 1.7538, "step": 16396 }, { "epoch": 0.800634765625, "grad_norm": 0.20665200054645538, "learning_rate": 9.394371134105472e-05, "loss": 1.7338, "step": 16397 }, { "epoch": 0.80068359375, "grad_norm": 0.20588751137256622, "learning_rate": 9.392291799537103e-05, "loss": 1.7274, "step": 16398 }, { "epoch": 0.800732421875, "grad_norm": 0.19381354749202728, "learning_rate": 9.39021290382358e-05, "loss": 1.7139, "step": 16399 }, { "epoch": 0.80078125, "grad_norm": 0.19193395972251892, "learning_rate": 9.388134447015306e-05, "loss": 1.7337, "step": 16400 }, { "epoch": 0.800830078125, "grad_norm": 0.23975712060928345, "learning_rate": 9.386056429162631e-05, "loss": 1.7393, "step": 16401 }, { "epoch": 0.80087890625, "grad_norm": 0.16854235529899597, "learning_rate": 9.383978850315947e-05, "loss": 1.747, "step": 16402 }, { "epoch": 0.800927734375, "grad_norm": 0.22057387232780457, "learning_rate": 9.381901710525576e-05, "loss": 1.7264, "step": 16403 }, { "epoch": 0.8009765625, "grad_norm": 0.1864451766014099, "learning_rate": 9.379825009841886e-05, "loss": 1.7151, "step": 16404 }, { "epoch": 0.801025390625, "grad_norm": 0.21553733944892883, "learning_rate": 9.37774874831519e-05, "loss": 1.7404, "step": 16405 }, { "epoch": 0.80107421875, "grad_norm": 0.23492376506328583, "learning_rate": 9.37567292599581e-05, "loss": 1.7146, "step": 16406 }, { "epoch": 0.801123046875, "grad_norm": 0.19275948405265808, "learning_rate": 9.373597542934058e-05, "loss": 1.7253, "step": 16407 }, { "epoch": 0.801171875, "grad_norm": 0.228746697306633, "learning_rate": 9.371522599180232e-05, "loss": 1.7238, "step": 16408 }, { "epoch": 0.801220703125, "grad_norm": 0.21352507174015045, "learning_rate": 9.369448094784622e-05, "loss": 1.7497, "step": 16409 }, { "epoch": 0.80126953125, "grad_norm": 0.19578786194324493, "learning_rate": 9.367374029797499e-05, "loss": 1.764, "step": 16410 }, { "epoch": 0.801318359375, "grad_norm": 0.25761380791664124, "learning_rate": 9.365300404269136e-05, "loss": 1.7262, "step": 16411 }, { "epoch": 0.8013671875, "grad_norm": 0.18676383793354034, "learning_rate": 9.363227218249795e-05, "loss": 1.7457, "step": 16412 }, { "epoch": 0.801416015625, "grad_norm": 0.2284812480211258, "learning_rate": 9.361154471789699e-05, "loss": 1.7354, "step": 16413 }, { "epoch": 0.80146484375, "grad_norm": 0.2086566984653473, "learning_rate": 9.35908216493911e-05, "loss": 1.7172, "step": 16414 }, { "epoch": 0.801513671875, "grad_norm": 0.20619194209575653, "learning_rate": 9.357010297748225e-05, "loss": 1.7277, "step": 16415 }, { "epoch": 0.8015625, "grad_norm": 0.22522711753845215, "learning_rate": 9.354938870267285e-05, "loss": 1.7079, "step": 16416 }, { "epoch": 0.801611328125, "grad_norm": 0.19365008175373077, "learning_rate": 9.352867882546467e-05, "loss": 1.7299, "step": 16417 }, { "epoch": 0.80166015625, "grad_norm": 0.23901088535785675, "learning_rate": 9.350797334635989e-05, "loss": 1.7073, "step": 16418 }, { "epoch": 0.801708984375, "grad_norm": 0.20350416004657745, "learning_rate": 9.348727226586004e-05, "loss": 1.73, "step": 16419 }, { "epoch": 0.8017578125, "grad_norm": 0.2054963856935501, "learning_rate": 9.346657558446711e-05, "loss": 1.7369, "step": 16420 }, { "epoch": 0.801806640625, "grad_norm": 0.20726755261421204, "learning_rate": 9.34458833026825e-05, "loss": 1.7432, "step": 16421 }, { "epoch": 0.80185546875, "grad_norm": 0.19718864560127258, "learning_rate": 9.342519542100775e-05, "loss": 1.7144, "step": 16422 }, { "epoch": 0.801904296875, "grad_norm": 0.18445521593093872, "learning_rate": 9.340451193994426e-05, "loss": 1.7419, "step": 16423 }, { "epoch": 0.801953125, "grad_norm": 0.20734356343746185, "learning_rate": 9.338383285999334e-05, "loss": 1.7431, "step": 16424 }, { "epoch": 0.802001953125, "grad_norm": 0.19531750679016113, "learning_rate": 9.336315818165614e-05, "loss": 1.7155, "step": 16425 }, { "epoch": 0.80205078125, "grad_norm": 0.19761136174201965, "learning_rate": 9.334248790543371e-05, "loss": 1.7455, "step": 16426 }, { "epoch": 0.802099609375, "grad_norm": 0.19141650199890137, "learning_rate": 9.332182203182704e-05, "loss": 1.7609, "step": 16427 }, { "epoch": 0.8021484375, "grad_norm": 0.20033098757266998, "learning_rate": 9.330116056133696e-05, "loss": 1.7335, "step": 16428 }, { "epoch": 0.802197265625, "grad_norm": 0.21024446189403534, "learning_rate": 9.328050349446424e-05, "loss": 1.7422, "step": 16429 }, { "epoch": 0.80224609375, "grad_norm": 0.19891275465488434, "learning_rate": 9.325985083170957e-05, "loss": 1.7381, "step": 16430 }, { "epoch": 0.802294921875, "grad_norm": 0.22720062732696533, "learning_rate": 9.323920257357329e-05, "loss": 1.7449, "step": 16431 }, { "epoch": 0.80234375, "grad_norm": 0.20189757645130157, "learning_rate": 9.321855872055611e-05, "loss": 1.7025, "step": 16432 }, { "epoch": 0.802392578125, "grad_norm": 0.2046567052602768, "learning_rate": 9.319791927315804e-05, "loss": 1.7312, "step": 16433 }, { "epoch": 0.80244140625, "grad_norm": 0.19310857355594635, "learning_rate": 9.31772842318796e-05, "loss": 1.7203, "step": 16434 }, { "epoch": 0.802490234375, "grad_norm": 0.1964392364025116, "learning_rate": 9.315665359722063e-05, "loss": 1.7282, "step": 16435 }, { "epoch": 0.8025390625, "grad_norm": 0.18928685784339905, "learning_rate": 9.313602736968137e-05, "loss": 1.7501, "step": 16436 }, { "epoch": 0.802587890625, "grad_norm": 0.2063368260860443, "learning_rate": 9.311540554976149e-05, "loss": 1.7352, "step": 16437 }, { "epoch": 0.80263671875, "grad_norm": 0.18901720643043518, "learning_rate": 9.309478813796097e-05, "loss": 1.7107, "step": 16438 }, { "epoch": 0.802685546875, "grad_norm": 0.2070494294166565, "learning_rate": 9.307417513477934e-05, "loss": 1.7442, "step": 16439 }, { "epoch": 0.802734375, "grad_norm": 0.2064252495765686, "learning_rate": 9.305356654071627e-05, "loss": 1.7292, "step": 16440 }, { "epoch": 0.802783203125, "grad_norm": 0.21634522080421448, "learning_rate": 9.303296235627117e-05, "loss": 1.7452, "step": 16441 }, { "epoch": 0.80283203125, "grad_norm": 0.23131240904331207, "learning_rate": 9.30123625819434e-05, "loss": 1.722, "step": 16442 }, { "epoch": 0.802880859375, "grad_norm": 0.2124890238046646, "learning_rate": 9.299176721823227e-05, "loss": 1.7253, "step": 16443 }, { "epoch": 0.8029296875, "grad_norm": 0.225021094083786, "learning_rate": 9.297117626563687e-05, "loss": 1.7419, "step": 16444 }, { "epoch": 0.802978515625, "grad_norm": 0.19485905766487122, "learning_rate": 9.295058972465622e-05, "loss": 1.7227, "step": 16445 }, { "epoch": 0.80302734375, "grad_norm": 0.21651828289031982, "learning_rate": 9.293000759578932e-05, "loss": 1.737, "step": 16446 }, { "epoch": 0.803076171875, "grad_norm": 0.17424321174621582, "learning_rate": 9.290942987953496e-05, "loss": 1.7411, "step": 16447 }, { "epoch": 0.803125, "grad_norm": 0.20171859860420227, "learning_rate": 9.28888565763919e-05, "loss": 1.7418, "step": 16448 }, { "epoch": 0.803173828125, "grad_norm": 0.17942725121974945, "learning_rate": 9.286828768685858e-05, "loss": 1.7213, "step": 16449 }, { "epoch": 0.80322265625, "grad_norm": 0.18883195519447327, "learning_rate": 9.284772321143377e-05, "loss": 1.7508, "step": 16450 }, { "epoch": 0.803271484375, "grad_norm": 0.1827327311038971, "learning_rate": 9.282716315061563e-05, "loss": 1.7195, "step": 16451 }, { "epoch": 0.8033203125, "grad_norm": 0.191480815410614, "learning_rate": 9.280660750490262e-05, "loss": 1.7287, "step": 16452 }, { "epoch": 0.803369140625, "grad_norm": 0.18815721571445465, "learning_rate": 9.278605627479276e-05, "loss": 1.7419, "step": 16453 }, { "epoch": 0.80341796875, "grad_norm": 0.18496392667293549, "learning_rate": 9.276550946078429e-05, "loss": 1.7311, "step": 16454 }, { "epoch": 0.803466796875, "grad_norm": 0.1928032636642456, "learning_rate": 9.27449670633751e-05, "loss": 1.7518, "step": 16455 }, { "epoch": 0.803515625, "grad_norm": 0.1769254505634308, "learning_rate": 9.272442908306303e-05, "loss": 1.7426, "step": 16456 }, { "epoch": 0.803564453125, "grad_norm": 0.19810250401496887, "learning_rate": 9.270389552034586e-05, "loss": 1.7202, "step": 16457 }, { "epoch": 0.80361328125, "grad_norm": 0.16819268465042114, "learning_rate": 9.268336637572125e-05, "loss": 1.7442, "step": 16458 }, { "epoch": 0.803662109375, "grad_norm": 0.20183148980140686, "learning_rate": 9.266284164968671e-05, "loss": 1.7497, "step": 16459 }, { "epoch": 0.8037109375, "grad_norm": 0.17865513265132904, "learning_rate": 9.26423213427397e-05, "loss": 1.732, "step": 16460 }, { "epoch": 0.803759765625, "grad_norm": 0.19493524730205536, "learning_rate": 9.262180545537755e-05, "loss": 1.7321, "step": 16461 }, { "epoch": 0.80380859375, "grad_norm": 0.18739262223243713, "learning_rate": 9.260129398809747e-05, "loss": 1.7115, "step": 16462 }, { "epoch": 0.803857421875, "grad_norm": 0.1843206137418747, "learning_rate": 9.258078694139651e-05, "loss": 1.7568, "step": 16463 }, { "epoch": 0.80390625, "grad_norm": 0.20146118104457855, "learning_rate": 9.25602843157718e-05, "loss": 1.7329, "step": 16464 }, { "epoch": 0.803955078125, "grad_norm": 0.18130871653556824, "learning_rate": 9.253978611172011e-05, "loss": 1.7532, "step": 16465 }, { "epoch": 0.80400390625, "grad_norm": 0.1950683295726776, "learning_rate": 9.251929232973835e-05, "loss": 1.729, "step": 16466 }, { "epoch": 0.804052734375, "grad_norm": 0.1878000795841217, "learning_rate": 9.249880297032312e-05, "loss": 1.7389, "step": 16467 }, { "epoch": 0.8041015625, "grad_norm": 0.18514476716518402, "learning_rate": 9.247831803397107e-05, "loss": 1.7422, "step": 16468 }, { "epoch": 0.804150390625, "grad_norm": 0.19675739109516144, "learning_rate": 9.245783752117851e-05, "loss": 1.7381, "step": 16469 }, { "epoch": 0.80419921875, "grad_norm": 0.20540574193000793, "learning_rate": 9.243736143244205e-05, "loss": 1.7224, "step": 16470 }, { "epoch": 0.804248046875, "grad_norm": 0.19275477528572083, "learning_rate": 9.241688976825771e-05, "loss": 1.724, "step": 16471 }, { "epoch": 0.804296875, "grad_norm": 0.2091197818517685, "learning_rate": 9.239642252912174e-05, "loss": 1.749, "step": 16472 }, { "epoch": 0.804345703125, "grad_norm": 0.19234977662563324, "learning_rate": 9.237595971553017e-05, "loss": 1.7211, "step": 16473 }, { "epoch": 0.80439453125, "grad_norm": 0.20070889592170715, "learning_rate": 9.235550132797893e-05, "loss": 1.7322, "step": 16474 }, { "epoch": 0.804443359375, "grad_norm": 0.24546997249126434, "learning_rate": 9.233504736696388e-05, "loss": 1.6993, "step": 16475 }, { "epoch": 0.8044921875, "grad_norm": 0.1956571340560913, "learning_rate": 9.231459783298068e-05, "loss": 1.731, "step": 16476 }, { "epoch": 0.804541015625, "grad_norm": 0.22555211186408997, "learning_rate": 9.229415272652497e-05, "loss": 1.7264, "step": 16477 }, { "epoch": 0.80458984375, "grad_norm": 0.22959034144878387, "learning_rate": 9.227371204809228e-05, "loss": 1.7255, "step": 16478 }, { "epoch": 0.804638671875, "grad_norm": 0.19117969274520874, "learning_rate": 9.225327579817797e-05, "loss": 1.7185, "step": 16479 }, { "epoch": 0.8046875, "grad_norm": 0.2757861018180847, "learning_rate": 9.223284397727734e-05, "loss": 1.7237, "step": 16480 }, { "epoch": 0.804736328125, "grad_norm": 0.22626478970050812, "learning_rate": 9.221241658588556e-05, "loss": 1.748, "step": 16481 }, { "epoch": 0.80478515625, "grad_norm": 0.24933476746082306, "learning_rate": 9.21919936244977e-05, "loss": 1.752, "step": 16482 }, { "epoch": 0.804833984375, "grad_norm": 0.18647490441799164, "learning_rate": 9.217157509360877e-05, "loss": 1.7472, "step": 16483 }, { "epoch": 0.8048828125, "grad_norm": 0.23887541890144348, "learning_rate": 9.215116099371356e-05, "loss": 1.7418, "step": 16484 }, { "epoch": 0.804931640625, "grad_norm": 0.20797380805015564, "learning_rate": 9.21307513253069e-05, "loss": 1.7303, "step": 16485 }, { "epoch": 0.80498046875, "grad_norm": 0.22712303698062897, "learning_rate": 9.211034608888339e-05, "loss": 1.7388, "step": 16486 }, { "epoch": 0.805029296875, "grad_norm": 0.2690981328487396, "learning_rate": 9.208994528493756e-05, "loss": 1.7207, "step": 16487 }, { "epoch": 0.805078125, "grad_norm": 0.2272244095802307, "learning_rate": 9.206954891396394e-05, "loss": 1.7185, "step": 16488 }, { "epoch": 0.805126953125, "grad_norm": 0.2350049763917923, "learning_rate": 9.20491569764567e-05, "loss": 1.7273, "step": 16489 }, { "epoch": 0.80517578125, "grad_norm": 0.2390940636396408, "learning_rate": 9.202876947291009e-05, "loss": 1.7279, "step": 16490 }, { "epoch": 0.805224609375, "grad_norm": 0.22058497369289398, "learning_rate": 9.200838640381828e-05, "loss": 1.7231, "step": 16491 }, { "epoch": 0.8052734375, "grad_norm": 0.22587747871875763, "learning_rate": 9.198800776967523e-05, "loss": 1.7356, "step": 16492 }, { "epoch": 0.805322265625, "grad_norm": 0.22814808785915375, "learning_rate": 9.196763357097484e-05, "loss": 1.7091, "step": 16493 }, { "epoch": 0.80537109375, "grad_norm": 0.21711775660514832, "learning_rate": 9.19472638082109e-05, "loss": 1.6947, "step": 16494 }, { "epoch": 0.805419921875, "grad_norm": 0.25969401001930237, "learning_rate": 9.192689848187709e-05, "loss": 1.7403, "step": 16495 }, { "epoch": 0.80546875, "grad_norm": 0.21124492585659027, "learning_rate": 9.190653759246697e-05, "loss": 1.7356, "step": 16496 }, { "epoch": 0.805517578125, "grad_norm": 0.20840582251548767, "learning_rate": 9.188618114047403e-05, "loss": 1.7043, "step": 16497 }, { "epoch": 0.80556640625, "grad_norm": 0.22931620478630066, "learning_rate": 9.186582912639156e-05, "loss": 1.7333, "step": 16498 }, { "epoch": 0.805615234375, "grad_norm": 0.18807485699653625, "learning_rate": 9.18454815507129e-05, "loss": 1.753, "step": 16499 }, { "epoch": 0.8056640625, "grad_norm": 0.19826345145702362, "learning_rate": 9.182513841393108e-05, "loss": 1.7409, "step": 16500 }, { "epoch": 0.805712890625, "grad_norm": 0.22491338849067688, "learning_rate": 9.180479971653924e-05, "loss": 1.7308, "step": 16501 }, { "epoch": 0.80576171875, "grad_norm": 0.1924811601638794, "learning_rate": 9.178446545903023e-05, "loss": 1.7366, "step": 16502 }, { "epoch": 0.805810546875, "grad_norm": 0.22800511121749878, "learning_rate": 9.17641356418969e-05, "loss": 1.7342, "step": 16503 }, { "epoch": 0.805859375, "grad_norm": 0.18293125927448273, "learning_rate": 9.174381026563195e-05, "loss": 1.7262, "step": 16504 }, { "epoch": 0.805908203125, "grad_norm": 0.222537562251091, "learning_rate": 9.172348933072805e-05, "loss": 1.7237, "step": 16505 }, { "epoch": 0.80595703125, "grad_norm": 0.1869410127401352, "learning_rate": 9.170317283767757e-05, "loss": 1.7542, "step": 16506 }, { "epoch": 0.806005859375, "grad_norm": 0.20493018627166748, "learning_rate": 9.168286078697294e-05, "loss": 1.7376, "step": 16507 }, { "epoch": 0.8060546875, "grad_norm": 0.2041061371564865, "learning_rate": 9.166255317910649e-05, "loss": 1.7327, "step": 16508 }, { "epoch": 0.806103515625, "grad_norm": 0.19062776863574982, "learning_rate": 9.164225001457033e-05, "loss": 1.7516, "step": 16509 }, { "epoch": 0.80615234375, "grad_norm": 0.21062271296977997, "learning_rate": 9.162195129385656e-05, "loss": 1.7459, "step": 16510 }, { "epoch": 0.806201171875, "grad_norm": 0.17436093091964722, "learning_rate": 9.16016570174571e-05, "loss": 1.7157, "step": 16511 }, { "epoch": 0.80625, "grad_norm": 0.24185331165790558, "learning_rate": 9.158136718586386e-05, "loss": 1.7463, "step": 16512 }, { "epoch": 0.806298828125, "grad_norm": 0.16949482262134552, "learning_rate": 9.156108179956852e-05, "loss": 1.7592, "step": 16513 }, { "epoch": 0.80634765625, "grad_norm": 0.20784945785999298, "learning_rate": 9.154080085906272e-05, "loss": 1.7296, "step": 16514 }, { "epoch": 0.806396484375, "grad_norm": 0.18561919033527374, "learning_rate": 9.152052436483803e-05, "loss": 1.7501, "step": 16515 }, { "epoch": 0.8064453125, "grad_norm": 0.17189785838127136, "learning_rate": 9.150025231738583e-05, "loss": 1.7043, "step": 16516 }, { "epoch": 0.806494140625, "grad_norm": 0.20576539635658264, "learning_rate": 9.147998471719743e-05, "loss": 1.726, "step": 16517 }, { "epoch": 0.80654296875, "grad_norm": 0.19571742415428162, "learning_rate": 9.145972156476404e-05, "loss": 1.7562, "step": 16518 }, { "epoch": 0.806591796875, "grad_norm": 0.18195733428001404, "learning_rate": 9.143946286057676e-05, "loss": 1.7188, "step": 16519 }, { "epoch": 0.806640625, "grad_norm": 0.19301988184452057, "learning_rate": 9.141920860512657e-05, "loss": 1.7218, "step": 16520 }, { "epoch": 0.806689453125, "grad_norm": 0.18399366736412048, "learning_rate": 9.139895879890433e-05, "loss": 1.724, "step": 16521 }, { "epoch": 0.80673828125, "grad_norm": 0.18972305953502655, "learning_rate": 9.137871344240085e-05, "loss": 1.7344, "step": 16522 }, { "epoch": 0.806787109375, "grad_norm": 0.1780238002538681, "learning_rate": 9.13584725361068e-05, "loss": 1.7379, "step": 16523 }, { "epoch": 0.8068359375, "grad_norm": 0.19048339128494263, "learning_rate": 9.133823608051261e-05, "loss": 1.739, "step": 16524 }, { "epoch": 0.806884765625, "grad_norm": 0.19254441559314728, "learning_rate": 9.131800407610894e-05, "loss": 1.7536, "step": 16525 }, { "epoch": 0.80693359375, "grad_norm": 0.22509422898292542, "learning_rate": 9.129777652338595e-05, "loss": 1.7634, "step": 16526 }, { "epoch": 0.806982421875, "grad_norm": 0.20090964436531067, "learning_rate": 9.127755342283391e-05, "loss": 1.745, "step": 16527 }, { "epoch": 0.80703125, "grad_norm": 0.24643874168395996, "learning_rate": 9.125733477494297e-05, "loss": 1.7332, "step": 16528 }, { "epoch": 0.807080078125, "grad_norm": 0.24997279047966003, "learning_rate": 9.123712058020314e-05, "loss": 1.7545, "step": 16529 }, { "epoch": 0.80712890625, "grad_norm": 0.20813724398612976, "learning_rate": 9.121691083910435e-05, "loss": 1.7153, "step": 16530 }, { "epoch": 0.807177734375, "grad_norm": 0.28771182894706726, "learning_rate": 9.119670555213637e-05, "loss": 1.7343, "step": 16531 }, { "epoch": 0.8072265625, "grad_norm": 0.2146776020526886, "learning_rate": 9.11765047197889e-05, "loss": 1.7273, "step": 16532 }, { "epoch": 0.807275390625, "grad_norm": 0.20877867937088013, "learning_rate": 9.115630834255148e-05, "loss": 1.716, "step": 16533 }, { "epoch": 0.80732421875, "grad_norm": 0.2521439790725708, "learning_rate": 9.113611642091366e-05, "loss": 1.7464, "step": 16534 }, { "epoch": 0.807373046875, "grad_norm": 0.21402166783809662, "learning_rate": 9.111592895536475e-05, "loss": 1.728, "step": 16535 }, { "epoch": 0.807421875, "grad_norm": 0.18060532212257385, "learning_rate": 9.109574594639409e-05, "loss": 1.7306, "step": 16536 }, { "epoch": 0.807470703125, "grad_norm": 0.2366638481616974, "learning_rate": 9.10755673944907e-05, "loss": 1.721, "step": 16537 }, { "epoch": 0.80751953125, "grad_norm": 0.2112952321767807, "learning_rate": 9.105539330014373e-05, "loss": 1.7593, "step": 16538 }, { "epoch": 0.807568359375, "grad_norm": 0.2022944539785385, "learning_rate": 9.103522366384215e-05, "loss": 1.7476, "step": 16539 }, { "epoch": 0.8076171875, "grad_norm": 0.2507442235946655, "learning_rate": 9.101505848607461e-05, "loss": 1.7185, "step": 16540 }, { "epoch": 0.807666015625, "grad_norm": 0.18644797801971436, "learning_rate": 9.099489776733e-05, "loss": 1.7146, "step": 16541 }, { "epoch": 0.80771484375, "grad_norm": 0.24354034662246704, "learning_rate": 9.097474150809681e-05, "loss": 1.7281, "step": 16542 }, { "epoch": 0.807763671875, "grad_norm": 0.20336361229419708, "learning_rate": 9.095458970886371e-05, "loss": 1.7448, "step": 16543 }, { "epoch": 0.8078125, "grad_norm": 0.1924849897623062, "learning_rate": 9.093444237011887e-05, "loss": 1.7274, "step": 16544 }, { "epoch": 0.807861328125, "grad_norm": 0.2091527134180069, "learning_rate": 9.09142994923508e-05, "loss": 1.725, "step": 16545 }, { "epoch": 0.80791015625, "grad_norm": 0.1951557993888855, "learning_rate": 9.089416107604753e-05, "loss": 1.7079, "step": 16546 }, { "epoch": 0.807958984375, "grad_norm": 0.18600396811962128, "learning_rate": 9.087402712169718e-05, "loss": 1.7283, "step": 16547 }, { "epoch": 0.8080078125, "grad_norm": 0.18767337501049042, "learning_rate": 9.085389762978771e-05, "loss": 1.7533, "step": 16548 }, { "epoch": 0.808056640625, "grad_norm": 0.20554496347904205, "learning_rate": 9.083377260080696e-05, "loss": 1.7514, "step": 16549 }, { "epoch": 0.80810546875, "grad_norm": 0.23364697396755219, "learning_rate": 9.081365203524269e-05, "loss": 1.727, "step": 16550 }, { "epoch": 0.808154296875, "grad_norm": 0.22015441954135895, "learning_rate": 9.079353593358253e-05, "loss": 1.7674, "step": 16551 }, { "epoch": 0.808203125, "grad_norm": 0.20170457661151886, "learning_rate": 9.077342429631405e-05, "loss": 1.7254, "step": 16552 }, { "epoch": 0.808251953125, "grad_norm": 0.21060077846050262, "learning_rate": 9.075331712392459e-05, "loss": 1.7312, "step": 16553 }, { "epoch": 0.80830078125, "grad_norm": 0.19359524548053741, "learning_rate": 9.073321441690152e-05, "loss": 1.7337, "step": 16554 }, { "epoch": 0.808349609375, "grad_norm": 0.1885976493358612, "learning_rate": 9.071311617573208e-05, "loss": 1.7502, "step": 16555 }, { "epoch": 0.8083984375, "grad_norm": 0.21386247873306274, "learning_rate": 9.06930224009033e-05, "loss": 1.724, "step": 16556 }, { "epoch": 0.808447265625, "grad_norm": 0.17622798681259155, "learning_rate": 9.067293309290224e-05, "loss": 1.7413, "step": 16557 }, { "epoch": 0.80849609375, "grad_norm": 0.1984950304031372, "learning_rate": 9.06528482522156e-05, "loss": 1.7581, "step": 16558 }, { "epoch": 0.808544921875, "grad_norm": 0.18217507004737854, "learning_rate": 9.063276787933044e-05, "loss": 1.7339, "step": 16559 }, { "epoch": 0.80859375, "grad_norm": 0.18869945406913757, "learning_rate": 9.061269197473313e-05, "loss": 1.7307, "step": 16560 }, { "epoch": 0.808642578125, "grad_norm": 0.17273515462875366, "learning_rate": 9.05926205389105e-05, "loss": 1.7163, "step": 16561 }, { "epoch": 0.80869140625, "grad_norm": 0.17208459973335266, "learning_rate": 9.057255357234875e-05, "loss": 1.7347, "step": 16562 }, { "epoch": 0.808740234375, "grad_norm": 0.18142180144786835, "learning_rate": 9.055249107553443e-05, "loss": 1.731, "step": 16563 }, { "epoch": 0.8087890625, "grad_norm": 0.2106517106294632, "learning_rate": 9.053243304895361e-05, "loss": 1.7577, "step": 16564 }, { "epoch": 0.808837890625, "grad_norm": 0.17068354785442352, "learning_rate": 9.051237949309249e-05, "loss": 1.7246, "step": 16565 }, { "epoch": 0.80888671875, "grad_norm": 0.21204540133476257, "learning_rate": 9.049233040843705e-05, "loss": 1.7208, "step": 16566 }, { "epoch": 0.808935546875, "grad_norm": 0.17995597422122955, "learning_rate": 9.047228579547325e-05, "loss": 1.7576, "step": 16567 }, { "epoch": 0.808984375, "grad_norm": 0.23270003497600555, "learning_rate": 9.045224565468683e-05, "loss": 1.7194, "step": 16568 }, { "epoch": 0.809033203125, "grad_norm": 0.2261408120393753, "learning_rate": 9.043220998656349e-05, "loss": 1.7376, "step": 16569 }, { "epoch": 0.80908203125, "grad_norm": 0.19344472885131836, "learning_rate": 9.041217879158883e-05, "loss": 1.734, "step": 16570 }, { "epoch": 0.809130859375, "grad_norm": 0.2679902911186218, "learning_rate": 9.039215207024832e-05, "loss": 1.7661, "step": 16571 }, { "epoch": 0.8091796875, "grad_norm": 0.19548656046390533, "learning_rate": 9.037212982302731e-05, "loss": 1.7292, "step": 16572 }, { "epoch": 0.809228515625, "grad_norm": 0.26222488284111023, "learning_rate": 9.035211205041112e-05, "loss": 1.7381, "step": 16573 }, { "epoch": 0.80927734375, "grad_norm": 0.25339487195014954, "learning_rate": 9.03320987528847e-05, "loss": 1.7504, "step": 16574 }, { "epoch": 0.809326171875, "grad_norm": 0.22609061002731323, "learning_rate": 9.031208993093339e-05, "loss": 1.7466, "step": 16575 }, { "epoch": 0.809375, "grad_norm": 0.25554153323173523, "learning_rate": 9.029208558504183e-05, "loss": 1.7278, "step": 16576 }, { "epoch": 0.809423828125, "grad_norm": 0.232152059674263, "learning_rate": 9.027208571569506e-05, "loss": 1.7355, "step": 16577 }, { "epoch": 0.80947265625, "grad_norm": 0.22993279993534088, "learning_rate": 9.025209032337758e-05, "loss": 1.7507, "step": 16578 }, { "epoch": 0.809521484375, "grad_norm": 0.2086019366979599, "learning_rate": 9.023209940857428e-05, "loss": 1.7185, "step": 16579 }, { "epoch": 0.8095703125, "grad_norm": 0.2262033373117447, "learning_rate": 9.021211297176935e-05, "loss": 1.7112, "step": 16580 }, { "epoch": 0.809619140625, "grad_norm": 0.18707354366779327, "learning_rate": 9.019213101344743e-05, "loss": 1.7342, "step": 16581 }, { "epoch": 0.80966796875, "grad_norm": 0.21440695226192474, "learning_rate": 9.017215353409264e-05, "loss": 1.7217, "step": 16582 }, { "epoch": 0.809716796875, "grad_norm": 0.19296292960643768, "learning_rate": 9.015218053418919e-05, "loss": 1.7344, "step": 16583 }, { "epoch": 0.809765625, "grad_norm": 0.2029787003993988, "learning_rate": 9.013221201422114e-05, "loss": 1.7139, "step": 16584 }, { "epoch": 0.809814453125, "grad_norm": 0.1974063217639923, "learning_rate": 9.011224797467246e-05, "loss": 1.7253, "step": 16585 }, { "epoch": 0.80986328125, "grad_norm": 0.19336970150470734, "learning_rate": 9.009228841602701e-05, "loss": 1.7368, "step": 16586 }, { "epoch": 0.809912109375, "grad_norm": 0.21440543234348297, "learning_rate": 9.007233333876847e-05, "loss": 1.7281, "step": 16587 }, { "epoch": 0.8099609375, "grad_norm": 0.20034347474575043, "learning_rate": 9.005238274338051e-05, "loss": 1.7302, "step": 16588 }, { "epoch": 0.810009765625, "grad_norm": 0.22993017733097076, "learning_rate": 9.00324366303467e-05, "loss": 1.7392, "step": 16589 }, { "epoch": 0.81005859375, "grad_norm": 0.19895707070827484, "learning_rate": 9.001249500015029e-05, "loss": 1.725, "step": 16590 }, { "epoch": 0.810107421875, "grad_norm": 0.20129869878292084, "learning_rate": 8.999255785327479e-05, "loss": 1.7342, "step": 16591 }, { "epoch": 0.81015625, "grad_norm": 0.18432994186878204, "learning_rate": 8.997262519020317e-05, "loss": 1.7117, "step": 16592 }, { "epoch": 0.810205078125, "grad_norm": 0.20494970679283142, "learning_rate": 8.995269701141872e-05, "loss": 1.7415, "step": 16593 }, { "epoch": 0.81025390625, "grad_norm": 0.17703992128372192, "learning_rate": 8.993277331740423e-05, "loss": 1.7222, "step": 16594 }, { "epoch": 0.810302734375, "grad_norm": 0.18156304955482483, "learning_rate": 8.991285410864278e-05, "loss": 1.7103, "step": 16595 }, { "epoch": 0.8103515625, "grad_norm": 0.2203257828950882, "learning_rate": 8.989293938561687e-05, "loss": 1.7399, "step": 16596 }, { "epoch": 0.810400390625, "grad_norm": 0.204295352101326, "learning_rate": 8.987302914880944e-05, "loss": 1.7217, "step": 16597 }, { "epoch": 0.81044921875, "grad_norm": 0.2017636001110077, "learning_rate": 8.985312339870278e-05, "loss": 1.746, "step": 16598 }, { "epoch": 0.810498046875, "grad_norm": 0.1800576150417328, "learning_rate": 8.983322213577941e-05, "loss": 1.7403, "step": 16599 }, { "epoch": 0.810546875, "grad_norm": 0.23230457305908203, "learning_rate": 8.981332536052172e-05, "loss": 1.7511, "step": 16600 }, { "epoch": 0.810595703125, "grad_norm": 0.1917482167482376, "learning_rate": 8.979343307341181e-05, "loss": 1.7328, "step": 16601 }, { "epoch": 0.81064453125, "grad_norm": 0.20445315539836884, "learning_rate": 8.977354527493188e-05, "loss": 1.7129, "step": 16602 }, { "epoch": 0.810693359375, "grad_norm": 0.19356635212898254, "learning_rate": 8.97536619655639e-05, "loss": 1.7454, "step": 16603 }, { "epoch": 0.8107421875, "grad_norm": 0.19594311714172363, "learning_rate": 8.97337831457897e-05, "loss": 1.722, "step": 16604 }, { "epoch": 0.810791015625, "grad_norm": 0.19021858274936676, "learning_rate": 8.971390881609115e-05, "loss": 1.7418, "step": 16605 }, { "epoch": 0.81083984375, "grad_norm": 0.20572814345359802, "learning_rate": 8.969403897694985e-05, "loss": 1.7133, "step": 16606 }, { "epoch": 0.810888671875, "grad_norm": 0.2211436778306961, "learning_rate": 8.967417362884745e-05, "loss": 1.7526, "step": 16607 }, { "epoch": 0.8109375, "grad_norm": 0.2520994544029236, "learning_rate": 8.965431277226523e-05, "loss": 1.7544, "step": 16608 }, { "epoch": 0.810986328125, "grad_norm": 0.20749302208423615, "learning_rate": 8.963445640768475e-05, "loss": 1.7229, "step": 16609 }, { "epoch": 0.81103515625, "grad_norm": 0.29095199704170227, "learning_rate": 8.961460453558705e-05, "loss": 1.7354, "step": 16610 }, { "epoch": 0.811083984375, "grad_norm": 0.1900784969329834, "learning_rate": 8.959475715645346e-05, "loss": 1.7248, "step": 16611 }, { "epoch": 0.8111328125, "grad_norm": 0.22748634219169617, "learning_rate": 8.957491427076475e-05, "loss": 1.7143, "step": 16612 }, { "epoch": 0.811181640625, "grad_norm": 0.1830708235502243, "learning_rate": 8.955507587900208e-05, "loss": 1.7387, "step": 16613 }, { "epoch": 0.81123046875, "grad_norm": 0.2428310513496399, "learning_rate": 8.953524198164603e-05, "loss": 1.7086, "step": 16614 }, { "epoch": 0.811279296875, "grad_norm": 0.1732262670993805, "learning_rate": 8.951541257917747e-05, "loss": 1.7401, "step": 16615 }, { "epoch": 0.811328125, "grad_norm": 0.24557438492774963, "learning_rate": 8.94955876720769e-05, "loss": 1.7041, "step": 16616 }, { "epoch": 0.811376953125, "grad_norm": 0.16415643692016602, "learning_rate": 8.947576726082476e-05, "loss": 1.7048, "step": 16617 }, { "epoch": 0.81142578125, "grad_norm": 0.22564207017421722, "learning_rate": 8.945595134590146e-05, "loss": 1.7213, "step": 16618 }, { "epoch": 0.811474609375, "grad_norm": 0.20125772058963776, "learning_rate": 8.943613992778725e-05, "loss": 1.7531, "step": 16619 }, { "epoch": 0.8115234375, "grad_norm": 0.21559660136699677, "learning_rate": 8.941633300696224e-05, "loss": 1.71, "step": 16620 }, { "epoch": 0.811572265625, "grad_norm": 0.19601000845432281, "learning_rate": 8.93965305839065e-05, "loss": 1.7195, "step": 16621 }, { "epoch": 0.81162109375, "grad_norm": 0.21580883860588074, "learning_rate": 8.937673265909998e-05, "loss": 1.7188, "step": 16622 }, { "epoch": 0.811669921875, "grad_norm": 0.18076260387897491, "learning_rate": 8.935693923302252e-05, "loss": 1.7269, "step": 16623 }, { "epoch": 0.81171875, "grad_norm": 0.18526174128055573, "learning_rate": 8.933715030615366e-05, "loss": 1.7081, "step": 16624 }, { "epoch": 0.811767578125, "grad_norm": 0.21061179041862488, "learning_rate": 8.931736587897325e-05, "loss": 1.7313, "step": 16625 }, { "epoch": 0.81181640625, "grad_norm": 0.19276322424411774, "learning_rate": 8.929758595196052e-05, "loss": 1.741, "step": 16626 }, { "epoch": 0.811865234375, "grad_norm": 0.19735823571681976, "learning_rate": 8.927781052559513e-05, "loss": 1.7125, "step": 16627 }, { "epoch": 0.8119140625, "grad_norm": 0.2059759646654129, "learning_rate": 8.925803960035608e-05, "loss": 1.7589, "step": 16628 }, { "epoch": 0.811962890625, "grad_norm": 0.20203036069869995, "learning_rate": 8.923827317672278e-05, "loss": 1.7248, "step": 16629 }, { "epoch": 0.81201171875, "grad_norm": 0.18183860182762146, "learning_rate": 8.921851125517405e-05, "loss": 1.7508, "step": 16630 }, { "epoch": 0.812060546875, "grad_norm": 0.209787979722023, "learning_rate": 8.919875383618908e-05, "loss": 1.7295, "step": 16631 }, { "epoch": 0.812109375, "grad_norm": 0.20127937197685242, "learning_rate": 8.91790009202465e-05, "loss": 1.7249, "step": 16632 }, { "epoch": 0.812158203125, "grad_norm": 0.20039597153663635, "learning_rate": 8.915925250782513e-05, "loss": 1.7293, "step": 16633 }, { "epoch": 0.81220703125, "grad_norm": 0.19375818967819214, "learning_rate": 8.913950859940359e-05, "loss": 1.7194, "step": 16634 }, { "epoch": 0.812255859375, "grad_norm": 0.2074817419052124, "learning_rate": 8.911976919546036e-05, "loss": 1.7183, "step": 16635 }, { "epoch": 0.8123046875, "grad_norm": 0.20172138512134552, "learning_rate": 8.910003429647387e-05, "loss": 1.7617, "step": 16636 }, { "epoch": 0.812353515625, "grad_norm": 0.20480120182037354, "learning_rate": 8.908030390292241e-05, "loss": 1.7179, "step": 16637 }, { "epoch": 0.81240234375, "grad_norm": 0.18073350191116333, "learning_rate": 8.906057801528413e-05, "loss": 1.6946, "step": 16638 }, { "epoch": 0.812451171875, "grad_norm": 0.2362477332353592, "learning_rate": 8.904085663403716e-05, "loss": 1.6917, "step": 16639 }, { "epoch": 0.8125, "grad_norm": 0.19415795803070068, "learning_rate": 8.90211397596594e-05, "loss": 1.7508, "step": 16640 }, { "epoch": 0.812548828125, "grad_norm": 0.21020196378231049, "learning_rate": 8.900142739262879e-05, "loss": 1.7053, "step": 16641 }, { "epoch": 0.81259765625, "grad_norm": 0.22207777202129364, "learning_rate": 8.898171953342291e-05, "loss": 1.7385, "step": 16642 }, { "epoch": 0.812646484375, "grad_norm": 0.18502290546894073, "learning_rate": 8.896201618251959e-05, "loss": 1.7323, "step": 16643 }, { "epoch": 0.8126953125, "grad_norm": 0.2079804390668869, "learning_rate": 8.894231734039617e-05, "loss": 1.7413, "step": 16644 }, { "epoch": 0.812744140625, "grad_norm": 0.20228742063045502, "learning_rate": 8.892262300753029e-05, "loss": 1.7279, "step": 16645 }, { "epoch": 0.81279296875, "grad_norm": 0.19667783379554749, "learning_rate": 8.890293318439901e-05, "loss": 1.7414, "step": 16646 }, { "epoch": 0.812841796875, "grad_norm": 0.17510472238063812, "learning_rate": 8.888324787147978e-05, "loss": 1.7295, "step": 16647 }, { "epoch": 0.812890625, "grad_norm": 0.21443231403827667, "learning_rate": 8.88635670692495e-05, "loss": 1.7433, "step": 16648 }, { "epoch": 0.812939453125, "grad_norm": 0.2020285278558731, "learning_rate": 8.884389077818523e-05, "loss": 1.7316, "step": 16649 }, { "epoch": 0.81298828125, "grad_norm": 0.19632595777511597, "learning_rate": 8.882421899876377e-05, "loss": 1.7254, "step": 16650 }, { "epoch": 0.813037109375, "grad_norm": 0.20954054594039917, "learning_rate": 8.880455173146199e-05, "loss": 1.7388, "step": 16651 }, { "epoch": 0.8130859375, "grad_norm": 0.1964465081691742, "learning_rate": 8.878488897675645e-05, "loss": 1.7205, "step": 16652 }, { "epoch": 0.813134765625, "grad_norm": 0.21788544952869415, "learning_rate": 8.876523073512374e-05, "loss": 1.6992, "step": 16653 }, { "epoch": 0.81318359375, "grad_norm": 0.1901778131723404, "learning_rate": 8.874557700704031e-05, "loss": 1.7259, "step": 16654 }, { "epoch": 0.813232421875, "grad_norm": 0.20775416493415833, "learning_rate": 8.872592779298242e-05, "loss": 1.7292, "step": 16655 }, { "epoch": 0.81328125, "grad_norm": 0.17901940643787384, "learning_rate": 8.870628309342632e-05, "loss": 1.7286, "step": 16656 }, { "epoch": 0.813330078125, "grad_norm": 0.2315925806760788, "learning_rate": 8.86866429088482e-05, "loss": 1.7361, "step": 16657 }, { "epoch": 0.81337890625, "grad_norm": 0.17952705919742584, "learning_rate": 8.866700723972385e-05, "loss": 1.7336, "step": 16658 }, { "epoch": 0.813427734375, "grad_norm": 0.2273687720298767, "learning_rate": 8.86473760865294e-05, "loss": 1.7213, "step": 16659 }, { "epoch": 0.8134765625, "grad_norm": 0.2138269692659378, "learning_rate": 8.862774944974038e-05, "loss": 1.7349, "step": 16660 }, { "epoch": 0.813525390625, "grad_norm": 0.26141291856765747, "learning_rate": 8.86081273298327e-05, "loss": 1.7439, "step": 16661 }, { "epoch": 0.81357421875, "grad_norm": 0.22346673905849457, "learning_rate": 8.858850972728166e-05, "loss": 1.7423, "step": 16662 }, { "epoch": 0.813623046875, "grad_norm": 0.2542325258255005, "learning_rate": 8.856889664256297e-05, "loss": 1.7301, "step": 16663 }, { "epoch": 0.813671875, "grad_norm": 0.2142067551612854, "learning_rate": 8.854928807615176e-05, "loss": 1.7172, "step": 16664 }, { "epoch": 0.813720703125, "grad_norm": 0.21122834086418152, "learning_rate": 8.852968402852343e-05, "loss": 1.7519, "step": 16665 }, { "epoch": 0.81376953125, "grad_norm": 0.23832552134990692, "learning_rate": 8.851008450015299e-05, "loss": 1.7586, "step": 16666 }, { "epoch": 0.813818359375, "grad_norm": 0.17244230210781097, "learning_rate": 8.849048949151546e-05, "loss": 1.7115, "step": 16667 }, { "epoch": 0.8138671875, "grad_norm": 0.21841853857040405, "learning_rate": 8.847089900308575e-05, "loss": 1.7526, "step": 16668 }, { "epoch": 0.813916015625, "grad_norm": 0.19563649594783783, "learning_rate": 8.845131303533868e-05, "loss": 1.7415, "step": 16669 }, { "epoch": 0.81396484375, "grad_norm": 0.22814558446407318, "learning_rate": 8.84317315887489e-05, "loss": 1.7656, "step": 16670 }, { "epoch": 0.814013671875, "grad_norm": 0.21790143847465515, "learning_rate": 8.8412154663791e-05, "loss": 1.7574, "step": 16671 }, { "epoch": 0.8140625, "grad_norm": 0.199219211935997, "learning_rate": 8.83925822609394e-05, "loss": 1.7458, "step": 16672 }, { "epoch": 0.814111328125, "grad_norm": 0.21214435994625092, "learning_rate": 8.837301438066852e-05, "loss": 1.7315, "step": 16673 }, { "epoch": 0.81416015625, "grad_norm": 0.20365090668201447, "learning_rate": 8.835345102345258e-05, "loss": 1.724, "step": 16674 }, { "epoch": 0.814208984375, "grad_norm": 0.20087619125843048, "learning_rate": 8.833389218976573e-05, "loss": 1.754, "step": 16675 }, { "epoch": 0.8142578125, "grad_norm": 0.20346148312091827, "learning_rate": 8.831433788008186e-05, "loss": 1.7018, "step": 16676 }, { "epoch": 0.814306640625, "grad_norm": 0.1862020343542099, "learning_rate": 8.829478809487513e-05, "loss": 1.7193, "step": 16677 }, { "epoch": 0.81435546875, "grad_norm": 0.20384538173675537, "learning_rate": 8.82752428346191e-05, "loss": 1.7175, "step": 16678 }, { "epoch": 0.814404296875, "grad_norm": 0.1874806433916092, "learning_rate": 8.825570209978765e-05, "loss": 1.79, "step": 16679 }, { "epoch": 0.814453125, "grad_norm": 0.2174118161201477, "learning_rate": 8.823616589085421e-05, "loss": 1.7385, "step": 16680 }, { "epoch": 0.814501953125, "grad_norm": 0.1963174194097519, "learning_rate": 8.821663420829243e-05, "loss": 1.699, "step": 16681 }, { "epoch": 0.81455078125, "grad_norm": 0.19465306401252747, "learning_rate": 8.819710705257553e-05, "loss": 1.7196, "step": 16682 }, { "epoch": 0.814599609375, "grad_norm": 0.19896307587623596, "learning_rate": 8.817758442417678e-05, "loss": 1.757, "step": 16683 }, { "epoch": 0.8146484375, "grad_norm": 0.20958474278450012, "learning_rate": 8.815806632356943e-05, "loss": 1.7272, "step": 16684 }, { "epoch": 0.814697265625, "grad_norm": 0.1820516586303711, "learning_rate": 8.81385527512264e-05, "loss": 1.7186, "step": 16685 }, { "epoch": 0.81474609375, "grad_norm": 0.20728595554828644, "learning_rate": 8.811904370762068e-05, "loss": 1.7251, "step": 16686 }, { "epoch": 0.814794921875, "grad_norm": 0.18016988039016724, "learning_rate": 8.809953919322507e-05, "loss": 1.7565, "step": 16687 }, { "epoch": 0.81484375, "grad_norm": 0.21689069271087646, "learning_rate": 8.808003920851229e-05, "loss": 1.7519, "step": 16688 }, { "epoch": 0.814892578125, "grad_norm": 0.18949724733829498, "learning_rate": 8.806054375395493e-05, "loss": 1.7271, "step": 16689 }, { "epoch": 0.81494140625, "grad_norm": 0.21049213409423828, "learning_rate": 8.804105283002548e-05, "loss": 1.7343, "step": 16690 }, { "epoch": 0.814990234375, "grad_norm": 0.22229565680027008, "learning_rate": 8.802156643719636e-05, "loss": 1.7345, "step": 16691 }, { "epoch": 0.8150390625, "grad_norm": 0.17915010452270508, "learning_rate": 8.800208457593969e-05, "loss": 1.7285, "step": 16692 }, { "epoch": 0.815087890625, "grad_norm": 0.23882454633712769, "learning_rate": 8.798260724672782e-05, "loss": 1.7208, "step": 16693 }, { "epoch": 0.81513671875, "grad_norm": 0.17305850982666016, "learning_rate": 8.796313445003265e-05, "loss": 1.7003, "step": 16694 }, { "epoch": 0.815185546875, "grad_norm": 0.2394171804189682, "learning_rate": 8.794366618632626e-05, "loss": 1.7295, "step": 16695 }, { "epoch": 0.815234375, "grad_norm": 0.18460199236869812, "learning_rate": 8.79242024560803e-05, "loss": 1.7817, "step": 16696 }, { "epoch": 0.815283203125, "grad_norm": 0.21531958878040314, "learning_rate": 8.79047432597667e-05, "loss": 1.7263, "step": 16697 }, { "epoch": 0.81533203125, "grad_norm": 0.23641881346702576, "learning_rate": 8.788528859785682e-05, "loss": 1.7054, "step": 16698 }, { "epoch": 0.815380859375, "grad_norm": 0.21739868819713593, "learning_rate": 8.786583847082244e-05, "loss": 1.7107, "step": 16699 }, { "epoch": 0.8154296875, "grad_norm": 0.24831265211105347, "learning_rate": 8.784639287913473e-05, "loss": 1.7422, "step": 16700 }, { "epoch": 0.815478515625, "grad_norm": 0.17147327959537506, "learning_rate": 8.782695182326506e-05, "loss": 1.7572, "step": 16701 }, { "epoch": 0.81552734375, "grad_norm": 0.21385374665260315, "learning_rate": 8.780751530368458e-05, "loss": 1.7274, "step": 16702 }, { "epoch": 0.815576171875, "grad_norm": 0.18995265662670135, "learning_rate": 8.778808332086436e-05, "loss": 1.7288, "step": 16703 }, { "epoch": 0.815625, "grad_norm": 0.20180167257785797, "learning_rate": 8.776865587527536e-05, "loss": 1.7225, "step": 16704 }, { "epoch": 0.815673828125, "grad_norm": 0.2043600082397461, "learning_rate": 8.774923296738837e-05, "loss": 1.7344, "step": 16705 }, { "epoch": 0.81572265625, "grad_norm": 0.20430754125118256, "learning_rate": 8.772981459767417e-05, "loss": 1.7301, "step": 16706 }, { "epoch": 0.815771484375, "grad_norm": 0.18747326731681824, "learning_rate": 8.771040076660343e-05, "loss": 1.7273, "step": 16707 }, { "epoch": 0.8158203125, "grad_norm": 0.20251770317554474, "learning_rate": 8.769099147464649e-05, "loss": 1.7452, "step": 16708 }, { "epoch": 0.815869140625, "grad_norm": 0.17872996628284454, "learning_rate": 8.767158672227394e-05, "loss": 1.7218, "step": 16709 }, { "epoch": 0.81591796875, "grad_norm": 0.2185889333486557, "learning_rate": 8.765218650995591e-05, "loss": 1.7128, "step": 16710 }, { "epoch": 0.815966796875, "grad_norm": 0.1795586496591568, "learning_rate": 8.763279083816273e-05, "loss": 1.7425, "step": 16711 }, { "epoch": 0.816015625, "grad_norm": 0.1653401404619217, "learning_rate": 8.761339970736426e-05, "loss": 1.711, "step": 16712 }, { "epoch": 0.816064453125, "grad_norm": 0.20590026676654816, "learning_rate": 8.759401311803075e-05, "loss": 1.7457, "step": 16713 }, { "epoch": 0.81611328125, "grad_norm": 0.1816648542881012, "learning_rate": 8.75746310706318e-05, "loss": 1.7227, "step": 16714 }, { "epoch": 0.816162109375, "grad_norm": 0.18743537366390228, "learning_rate": 8.755525356563729e-05, "loss": 1.7329, "step": 16715 }, { "epoch": 0.8162109375, "grad_norm": 0.18810608983039856, "learning_rate": 8.753588060351678e-05, "loss": 1.7452, "step": 16716 }, { "epoch": 0.816259765625, "grad_norm": 0.2082296907901764, "learning_rate": 8.75165121847398e-05, "loss": 1.7501, "step": 16717 }, { "epoch": 0.81630859375, "grad_norm": 0.20816613733768463, "learning_rate": 8.749714830977579e-05, "loss": 1.7211, "step": 16718 }, { "epoch": 0.816357421875, "grad_norm": 0.23911476135253906, "learning_rate": 8.7477788979094e-05, "loss": 1.7544, "step": 16719 }, { "epoch": 0.81640625, "grad_norm": 0.20518583059310913, "learning_rate": 8.745843419316367e-05, "loss": 1.7201, "step": 16720 }, { "epoch": 0.816455078125, "grad_norm": 0.19702035188674927, "learning_rate": 8.743908395245384e-05, "loss": 1.7164, "step": 16721 }, { "epoch": 0.81650390625, "grad_norm": 0.19344082474708557, "learning_rate": 8.741973825743351e-05, "loss": 1.7291, "step": 16722 }, { "epoch": 0.816552734375, "grad_norm": 0.18561697006225586, "learning_rate": 8.740039710857151e-05, "loss": 1.7263, "step": 16723 }, { "epoch": 0.8166015625, "grad_norm": 0.18676640093326569, "learning_rate": 8.738106050633662e-05, "loss": 1.7488, "step": 16724 }, { "epoch": 0.816650390625, "grad_norm": 0.1924913078546524, "learning_rate": 8.736172845119744e-05, "loss": 1.7251, "step": 16725 }, { "epoch": 0.81669921875, "grad_norm": 0.2049311101436615, "learning_rate": 8.734240094362253e-05, "loss": 1.7343, "step": 16726 }, { "epoch": 0.816748046875, "grad_norm": 0.17724783718585968, "learning_rate": 8.732307798408035e-05, "loss": 1.7355, "step": 16727 }, { "epoch": 0.816796875, "grad_norm": 0.20564308762550354, "learning_rate": 8.730375957303904e-05, "loss": 1.7434, "step": 16728 }, { "epoch": 0.816845703125, "grad_norm": 0.17123478651046753, "learning_rate": 8.728444571096704e-05, "loss": 1.6851, "step": 16729 }, { "epoch": 0.81689453125, "grad_norm": 0.22884120047092438, "learning_rate": 8.726513639833219e-05, "loss": 1.7457, "step": 16730 }, { "epoch": 0.816943359375, "grad_norm": 0.20366336405277252, "learning_rate": 8.724583163560268e-05, "loss": 1.748, "step": 16731 }, { "epoch": 0.8169921875, "grad_norm": 0.18714118003845215, "learning_rate": 8.722653142324616e-05, "loss": 1.7246, "step": 16732 }, { "epoch": 0.817041015625, "grad_norm": 0.21715053915977478, "learning_rate": 8.720723576173065e-05, "loss": 1.7287, "step": 16733 }, { "epoch": 0.81708984375, "grad_norm": 0.1745864450931549, "learning_rate": 8.718794465152358e-05, "loss": 1.7603, "step": 16734 }, { "epoch": 0.817138671875, "grad_norm": 0.22269262373447418, "learning_rate": 8.716865809309254e-05, "loss": 1.7025, "step": 16735 }, { "epoch": 0.8171875, "grad_norm": 0.19271518290042877, "learning_rate": 8.714937608690499e-05, "loss": 1.7183, "step": 16736 }, { "epoch": 0.817236328125, "grad_norm": 0.22480325400829315, "learning_rate": 8.713009863342825e-05, "loss": 1.7203, "step": 16737 }, { "epoch": 0.81728515625, "grad_norm": 0.2028125524520874, "learning_rate": 8.711082573312947e-05, "loss": 1.7223, "step": 16738 }, { "epoch": 0.817333984375, "grad_norm": 0.22613172233104706, "learning_rate": 8.709155738647578e-05, "loss": 1.6981, "step": 16739 }, { "epoch": 0.8173828125, "grad_norm": 0.1921650469303131, "learning_rate": 8.707229359393418e-05, "loss": 1.7126, "step": 16740 }, { "epoch": 0.817431640625, "grad_norm": 0.22944074869155884, "learning_rate": 8.70530343559715e-05, "loss": 1.7305, "step": 16741 }, { "epoch": 0.81748046875, "grad_norm": 0.2299039512872696, "learning_rate": 8.703377967305456e-05, "loss": 1.7231, "step": 16742 }, { "epoch": 0.817529296875, "grad_norm": 0.2134367674589157, "learning_rate": 8.701452954564996e-05, "loss": 1.7372, "step": 16743 }, { "epoch": 0.817578125, "grad_norm": 0.2506028115749359, "learning_rate": 8.699528397422424e-05, "loss": 1.7555, "step": 16744 }, { "epoch": 0.817626953125, "grad_norm": 0.17062875628471375, "learning_rate": 8.69760429592439e-05, "loss": 1.7262, "step": 16745 }, { "epoch": 0.81767578125, "grad_norm": 0.2335483580827713, "learning_rate": 8.69568065011752e-05, "loss": 1.7178, "step": 16746 }, { "epoch": 0.817724609375, "grad_norm": 0.19612930715084076, "learning_rate": 8.69375746004844e-05, "loss": 1.7419, "step": 16747 }, { "epoch": 0.8177734375, "grad_norm": 0.21136939525604248, "learning_rate": 8.691834725763748e-05, "loss": 1.7272, "step": 16748 }, { "epoch": 0.817822265625, "grad_norm": 0.21373388171195984, "learning_rate": 8.68991244731006e-05, "loss": 1.731, "step": 16749 }, { "epoch": 0.81787109375, "grad_norm": 0.21474798023700714, "learning_rate": 8.687990624733955e-05, "loss": 1.7549, "step": 16750 }, { "epoch": 0.817919921875, "grad_norm": 0.24268028140068054, "learning_rate": 8.686069258082008e-05, "loss": 1.7531, "step": 16751 }, { "epoch": 0.81796875, "grad_norm": 0.23948520421981812, "learning_rate": 8.684148347400786e-05, "loss": 1.7336, "step": 16752 }, { "epoch": 0.818017578125, "grad_norm": 0.23320084810256958, "learning_rate": 8.682227892736847e-05, "loss": 1.72, "step": 16753 }, { "epoch": 0.81806640625, "grad_norm": 0.24647395312786102, "learning_rate": 8.680307894136732e-05, "loss": 1.7215, "step": 16754 }, { "epoch": 0.818115234375, "grad_norm": 0.2042318433523178, "learning_rate": 8.678388351646977e-05, "loss": 1.7422, "step": 16755 }, { "epoch": 0.8181640625, "grad_norm": 0.23770174384117126, "learning_rate": 8.676469265314099e-05, "loss": 1.7235, "step": 16756 }, { "epoch": 0.818212890625, "grad_norm": 0.2012113481760025, "learning_rate": 8.674550635184612e-05, "loss": 1.7262, "step": 16757 }, { "epoch": 0.81826171875, "grad_norm": 0.19702135026454926, "learning_rate": 8.672632461305014e-05, "loss": 1.7443, "step": 16758 }, { "epoch": 0.818310546875, "grad_norm": 0.19709284603595734, "learning_rate": 8.670714743721796e-05, "loss": 1.7283, "step": 16759 }, { "epoch": 0.818359375, "grad_norm": 0.20971643924713135, "learning_rate": 8.66879748248143e-05, "loss": 1.7242, "step": 16760 }, { "epoch": 0.818408203125, "grad_norm": 0.1922774761915207, "learning_rate": 8.666880677630388e-05, "loss": 1.7414, "step": 16761 }, { "epoch": 0.81845703125, "grad_norm": 0.1942562609910965, "learning_rate": 8.664964329215125e-05, "loss": 1.7327, "step": 16762 }, { "epoch": 0.818505859375, "grad_norm": 0.184602290391922, "learning_rate": 8.66304843728208e-05, "loss": 1.7302, "step": 16763 }, { "epoch": 0.8185546875, "grad_norm": 0.18792060017585754, "learning_rate": 8.66113300187769e-05, "loss": 1.7118, "step": 16764 }, { "epoch": 0.818603515625, "grad_norm": 0.17573358118534088, "learning_rate": 8.659218023048379e-05, "loss": 1.7226, "step": 16765 }, { "epoch": 0.81865234375, "grad_norm": 0.19207578897476196, "learning_rate": 8.65730350084056e-05, "loss": 1.7331, "step": 16766 }, { "epoch": 0.818701171875, "grad_norm": 0.1691143810749054, "learning_rate": 8.655389435300621e-05, "loss": 1.7263, "step": 16767 }, { "epoch": 0.81875, "grad_norm": 0.16953103244304657, "learning_rate": 8.653475826474964e-05, "loss": 1.6901, "step": 16768 }, { "epoch": 0.818798828125, "grad_norm": 0.20101973414421082, "learning_rate": 8.651562674409957e-05, "loss": 1.7225, "step": 16769 }, { "epoch": 0.81884765625, "grad_norm": 0.1684264987707138, "learning_rate": 8.649649979151974e-05, "loss": 1.7269, "step": 16770 }, { "epoch": 0.818896484375, "grad_norm": 0.19116781651973724, "learning_rate": 8.647737740747363e-05, "loss": 1.7466, "step": 16771 }, { "epoch": 0.8189453125, "grad_norm": 0.19653551280498505, "learning_rate": 8.645825959242479e-05, "loss": 1.7321, "step": 16772 }, { "epoch": 0.818994140625, "grad_norm": 0.19111447036266327, "learning_rate": 8.64391463468365e-05, "loss": 1.7366, "step": 16773 }, { "epoch": 0.81904296875, "grad_norm": 0.20585334300994873, "learning_rate": 8.642003767117196e-05, "loss": 1.7425, "step": 16774 }, { "epoch": 0.819091796875, "grad_norm": 0.19973814487457275, "learning_rate": 8.64009335658943e-05, "loss": 1.7256, "step": 16775 }, { "epoch": 0.819140625, "grad_norm": 0.18817222118377686, "learning_rate": 8.638183403146657e-05, "loss": 1.7083, "step": 16776 }, { "epoch": 0.819189453125, "grad_norm": 0.21562331914901733, "learning_rate": 8.63627390683516e-05, "loss": 1.7232, "step": 16777 }, { "epoch": 0.81923828125, "grad_norm": 0.17562982439994812, "learning_rate": 8.634364867701222e-05, "loss": 1.7236, "step": 16778 }, { "epoch": 0.819287109375, "grad_norm": 0.22186079621315002, "learning_rate": 8.632456285791107e-05, "loss": 1.7305, "step": 16779 }, { "epoch": 0.8193359375, "grad_norm": 0.19856049120426178, "learning_rate": 8.63054816115107e-05, "loss": 1.7579, "step": 16780 }, { "epoch": 0.819384765625, "grad_norm": 0.20869778096675873, "learning_rate": 8.628640493827361e-05, "loss": 1.7133, "step": 16781 }, { "epoch": 0.81943359375, "grad_norm": 0.2119804471731186, "learning_rate": 8.626733283866209e-05, "loss": 1.7085, "step": 16782 }, { "epoch": 0.819482421875, "grad_norm": 0.18072929978370667, "learning_rate": 8.62482653131384e-05, "loss": 1.7326, "step": 16783 }, { "epoch": 0.81953125, "grad_norm": 0.19455839693546295, "learning_rate": 8.622920236216467e-05, "loss": 1.7395, "step": 16784 }, { "epoch": 0.819580078125, "grad_norm": 0.20529283583164215, "learning_rate": 8.621014398620278e-05, "loss": 1.7326, "step": 16785 }, { "epoch": 0.81962890625, "grad_norm": 0.20058739185333252, "learning_rate": 8.619109018571483e-05, "loss": 1.7037, "step": 16786 }, { "epoch": 0.819677734375, "grad_norm": 0.20227067172527313, "learning_rate": 8.617204096116247e-05, "loss": 1.7364, "step": 16787 }, { "epoch": 0.8197265625, "grad_norm": 0.2024182677268982, "learning_rate": 8.615299631300738e-05, "loss": 1.7257, "step": 16788 }, { "epoch": 0.819775390625, "grad_norm": 0.20102867484092712, "learning_rate": 8.613395624171112e-05, "loss": 1.7093, "step": 16789 }, { "epoch": 0.81982421875, "grad_norm": 0.19497615098953247, "learning_rate": 8.611492074773519e-05, "loss": 1.7081, "step": 16790 }, { "epoch": 0.819873046875, "grad_norm": 0.17718614637851715, "learning_rate": 8.60958898315409e-05, "loss": 1.7195, "step": 16791 }, { "epoch": 0.819921875, "grad_norm": 0.1917216032743454, "learning_rate": 8.607686349358949e-05, "loss": 1.7256, "step": 16792 }, { "epoch": 0.819970703125, "grad_norm": 0.18223044276237488, "learning_rate": 8.605784173434205e-05, "loss": 1.7111, "step": 16793 }, { "epoch": 0.82001953125, "grad_norm": 0.18570677936077118, "learning_rate": 8.603882455425962e-05, "loss": 1.7069, "step": 16794 }, { "epoch": 0.820068359375, "grad_norm": 0.21158215403556824, "learning_rate": 8.601981195380309e-05, "loss": 1.7525, "step": 16795 }, { "epoch": 0.8201171875, "grad_norm": 0.216263547539711, "learning_rate": 8.600080393343327e-05, "loss": 1.7028, "step": 16796 }, { "epoch": 0.820166015625, "grad_norm": 0.17458096146583557, "learning_rate": 8.598180049361076e-05, "loss": 1.7114, "step": 16797 }, { "epoch": 0.82021484375, "grad_norm": 0.18803870677947998, "learning_rate": 8.596280163479621e-05, "loss": 1.7273, "step": 16798 }, { "epoch": 0.820263671875, "grad_norm": 0.18681290745735168, "learning_rate": 8.594380735745e-05, "loss": 1.7327, "step": 16799 }, { "epoch": 0.8203125, "grad_norm": 0.18431895971298218, "learning_rate": 8.592481766203256e-05, "loss": 1.7259, "step": 16800 }, { "epoch": 0.820361328125, "grad_norm": 0.1793018877506256, "learning_rate": 8.590583254900399e-05, "loss": 1.6973, "step": 16801 }, { "epoch": 0.82041015625, "grad_norm": 0.18583476543426514, "learning_rate": 8.588685201882458e-05, "loss": 1.7286, "step": 16802 }, { "epoch": 0.820458984375, "grad_norm": 0.1926766335964203, "learning_rate": 8.586787607195413e-05, "loss": 1.7437, "step": 16803 }, { "epoch": 0.8205078125, "grad_norm": 0.17949053645133972, "learning_rate": 8.584890470885276e-05, "loss": 1.7242, "step": 16804 }, { "epoch": 0.820556640625, "grad_norm": 0.2078527808189392, "learning_rate": 8.582993792998003e-05, "loss": 1.7092, "step": 16805 }, { "epoch": 0.82060546875, "grad_norm": 0.21538704633712769, "learning_rate": 8.581097573579585e-05, "loss": 1.7529, "step": 16806 }, { "epoch": 0.820654296875, "grad_norm": 0.1946837455034256, "learning_rate": 8.579201812675962e-05, "loss": 1.7334, "step": 16807 }, { "epoch": 0.820703125, "grad_norm": 0.21062909066677094, "learning_rate": 8.577306510333083e-05, "loss": 1.7157, "step": 16808 }, { "epoch": 0.820751953125, "grad_norm": 0.22031494975090027, "learning_rate": 8.57541166659688e-05, "loss": 1.7357, "step": 16809 }, { "epoch": 0.82080078125, "grad_norm": 0.1836862713098526, "learning_rate": 8.573517281513283e-05, "loss": 1.7164, "step": 16810 }, { "epoch": 0.820849609375, "grad_norm": 0.20129583775997162, "learning_rate": 8.5716233551282e-05, "loss": 1.7337, "step": 16811 }, { "epoch": 0.8208984375, "grad_norm": 0.20188648998737335, "learning_rate": 8.569729887487529e-05, "loss": 1.7536, "step": 16812 }, { "epoch": 0.820947265625, "grad_norm": 0.19988039135932922, "learning_rate": 8.567836878637167e-05, "loss": 1.6973, "step": 16813 }, { "epoch": 0.82099609375, "grad_norm": 0.1927431970834732, "learning_rate": 8.565944328622985e-05, "loss": 1.7259, "step": 16814 }, { "epoch": 0.821044921875, "grad_norm": 0.19478178024291992, "learning_rate": 8.564052237490853e-05, "loss": 1.7275, "step": 16815 }, { "epoch": 0.82109375, "grad_norm": 0.16545960307121277, "learning_rate": 8.562160605286628e-05, "loss": 1.748, "step": 16816 }, { "epoch": 0.821142578125, "grad_norm": 0.18565905094146729, "learning_rate": 8.56026943205616e-05, "loss": 1.7517, "step": 16817 }, { "epoch": 0.82119140625, "grad_norm": 0.19482029974460602, "learning_rate": 8.55837871784528e-05, "loss": 1.7271, "step": 16818 }, { "epoch": 0.821240234375, "grad_norm": 0.23064737021923065, "learning_rate": 8.5564884626998e-05, "loss": 1.7212, "step": 16819 }, { "epoch": 0.8212890625, "grad_norm": 0.19033752381801605, "learning_rate": 8.554598666665548e-05, "loss": 1.7273, "step": 16820 }, { "epoch": 0.821337890625, "grad_norm": 0.2566113770008087, "learning_rate": 8.55270932978831e-05, "loss": 1.7165, "step": 16821 }, { "epoch": 0.82138671875, "grad_norm": 0.19966787099838257, "learning_rate": 8.550820452113897e-05, "loss": 1.7199, "step": 16822 }, { "epoch": 0.821435546875, "grad_norm": 0.22617077827453613, "learning_rate": 8.548932033688059e-05, "loss": 1.7351, "step": 16823 }, { "epoch": 0.821484375, "grad_norm": 0.22349083423614502, "learning_rate": 8.547044074556589e-05, "loss": 1.7358, "step": 16824 }, { "epoch": 0.821533203125, "grad_norm": 0.1995031088590622, "learning_rate": 8.545156574765228e-05, "loss": 1.7327, "step": 16825 }, { "epoch": 0.82158203125, "grad_norm": 0.19464465975761414, "learning_rate": 8.543269534359727e-05, "loss": 1.725, "step": 16826 }, { "epoch": 0.821630859375, "grad_norm": 0.22506913542747498, "learning_rate": 8.54138295338582e-05, "loss": 1.7086, "step": 16827 }, { "epoch": 0.8216796875, "grad_norm": 0.1882917284965515, "learning_rate": 8.539496831889224e-05, "loss": 1.7223, "step": 16828 }, { "epoch": 0.821728515625, "grad_norm": 0.2193419188261032, "learning_rate": 8.537611169915657e-05, "loss": 1.7434, "step": 16829 }, { "epoch": 0.82177734375, "grad_norm": 0.20638881623744965, "learning_rate": 8.535725967510819e-05, "loss": 1.7363, "step": 16830 }, { "epoch": 0.821826171875, "grad_norm": 0.20995669066905975, "learning_rate": 8.533841224720396e-05, "loss": 1.7416, "step": 16831 }, { "epoch": 0.821875, "grad_norm": 0.2153986543416977, "learning_rate": 8.531956941590069e-05, "loss": 1.7351, "step": 16832 }, { "epoch": 0.821923828125, "grad_norm": 0.23377658426761627, "learning_rate": 8.530073118165502e-05, "loss": 1.7449, "step": 16833 }, { "epoch": 0.82197265625, "grad_norm": 0.20138972997665405, "learning_rate": 8.528189754492361e-05, "loss": 1.7158, "step": 16834 }, { "epoch": 0.822021484375, "grad_norm": 0.2047191858291626, "learning_rate": 8.526306850616272e-05, "loss": 1.7347, "step": 16835 }, { "epoch": 0.8220703125, "grad_norm": 0.21226118505001068, "learning_rate": 8.52442440658289e-05, "loss": 1.7231, "step": 16836 }, { "epoch": 0.822119140625, "grad_norm": 0.21699272096157074, "learning_rate": 8.522542422437817e-05, "loss": 1.7577, "step": 16837 }, { "epoch": 0.82216796875, "grad_norm": 0.20180198550224304, "learning_rate": 8.520660898226685e-05, "loss": 1.7084, "step": 16838 }, { "epoch": 0.822216796875, "grad_norm": 0.1975889354944229, "learning_rate": 8.518779833995074e-05, "loss": 1.7583, "step": 16839 }, { "epoch": 0.822265625, "grad_norm": 0.2186805009841919, "learning_rate": 8.516899229788594e-05, "loss": 1.7149, "step": 16840 }, { "epoch": 0.822314453125, "grad_norm": 0.19816848635673523, "learning_rate": 8.5150190856528e-05, "loss": 1.7472, "step": 16841 }, { "epoch": 0.82236328125, "grad_norm": 0.20064586400985718, "learning_rate": 8.513139401633282e-05, "loss": 1.7331, "step": 16842 }, { "epoch": 0.822412109375, "grad_norm": 0.2095855325460434, "learning_rate": 8.511260177775582e-05, "loss": 1.7295, "step": 16843 }, { "epoch": 0.8224609375, "grad_norm": 0.2048993855714798, "learning_rate": 8.509381414125243e-05, "loss": 1.7289, "step": 16844 }, { "epoch": 0.822509765625, "grad_norm": 0.22236210107803345, "learning_rate": 8.507503110727802e-05, "loss": 1.7556, "step": 16845 }, { "epoch": 0.82255859375, "grad_norm": 0.22182206809520721, "learning_rate": 8.505625267628783e-05, "loss": 1.7237, "step": 16846 }, { "epoch": 0.822607421875, "grad_norm": 0.21752679347991943, "learning_rate": 8.503747884873698e-05, "loss": 1.7625, "step": 16847 }, { "epoch": 0.82265625, "grad_norm": 0.2115432322025299, "learning_rate": 8.501870962508044e-05, "loss": 1.7454, "step": 16848 }, { "epoch": 0.822705078125, "grad_norm": 0.2086665779352188, "learning_rate": 8.499994500577306e-05, "loss": 1.729, "step": 16849 }, { "epoch": 0.82275390625, "grad_norm": 0.23341722786426544, "learning_rate": 8.498118499126973e-05, "loss": 1.7303, "step": 16850 }, { "epoch": 0.822802734375, "grad_norm": 0.16226355731487274, "learning_rate": 8.496242958202496e-05, "loss": 1.732, "step": 16851 }, { "epoch": 0.8228515625, "grad_norm": 0.22808606922626495, "learning_rate": 8.494367877849346e-05, "loss": 1.7019, "step": 16852 }, { "epoch": 0.822900390625, "grad_norm": 0.20440377295017242, "learning_rate": 8.49249325811295e-05, "loss": 1.7487, "step": 16853 }, { "epoch": 0.82294921875, "grad_norm": 0.20422378182411194, "learning_rate": 8.490619099038763e-05, "loss": 1.7533, "step": 16854 }, { "epoch": 0.822998046875, "grad_norm": 0.23260760307312012, "learning_rate": 8.48874540067218e-05, "loss": 1.7382, "step": 16855 }, { "epoch": 0.823046875, "grad_norm": 0.19472351670265198, "learning_rate": 8.486872163058637e-05, "loss": 1.7439, "step": 16856 }, { "epoch": 0.823095703125, "grad_norm": 0.20922532677650452, "learning_rate": 8.484999386243511e-05, "loss": 1.7448, "step": 16857 }, { "epoch": 0.82314453125, "grad_norm": 0.18978816270828247, "learning_rate": 8.483127070272215e-05, "loss": 1.7139, "step": 16858 }, { "epoch": 0.823193359375, "grad_norm": 0.1824633777141571, "learning_rate": 8.481255215190105e-05, "loss": 1.7289, "step": 16859 }, { "epoch": 0.8232421875, "grad_norm": 0.18290971219539642, "learning_rate": 8.479383821042555e-05, "loss": 1.7216, "step": 16860 }, { "epoch": 0.823291015625, "grad_norm": 0.18373842537403107, "learning_rate": 8.477512887874917e-05, "loss": 1.7455, "step": 16861 }, { "epoch": 0.82333984375, "grad_norm": 0.1999458521604538, "learning_rate": 8.47564241573254e-05, "loss": 1.7555, "step": 16862 }, { "epoch": 0.823388671875, "grad_norm": 0.1673600971698761, "learning_rate": 8.473772404660753e-05, "loss": 1.7205, "step": 16863 }, { "epoch": 0.8234375, "grad_norm": 0.20066824555397034, "learning_rate": 8.471902854704874e-05, "loss": 1.7308, "step": 16864 }, { "epoch": 0.823486328125, "grad_norm": 0.16892032325267792, "learning_rate": 8.470033765910219e-05, "loss": 1.7289, "step": 16865 }, { "epoch": 0.82353515625, "grad_norm": 0.19670994579792023, "learning_rate": 8.468165138322082e-05, "loss": 1.7544, "step": 16866 }, { "epoch": 0.823583984375, "grad_norm": 0.20588012039661407, "learning_rate": 8.466296971985755e-05, "loss": 1.7297, "step": 16867 }, { "epoch": 0.8236328125, "grad_norm": 0.19579660892486572, "learning_rate": 8.464429266946516e-05, "loss": 1.7246, "step": 16868 }, { "epoch": 0.823681640625, "grad_norm": 0.2035694122314453, "learning_rate": 8.462562023249618e-05, "loss": 1.7463, "step": 16869 }, { "epoch": 0.82373046875, "grad_norm": 0.20898447930812836, "learning_rate": 8.460695240940333e-05, "loss": 1.7415, "step": 16870 }, { "epoch": 0.823779296875, "grad_norm": 0.19231149554252625, "learning_rate": 8.458828920063886e-05, "loss": 1.703, "step": 16871 }, { "epoch": 0.823828125, "grad_norm": 0.21552349627017975, "learning_rate": 8.456963060665528e-05, "loss": 1.7198, "step": 16872 }, { "epoch": 0.823876953125, "grad_norm": 0.20556093752384186, "learning_rate": 8.455097662790456e-05, "loss": 1.7118, "step": 16873 }, { "epoch": 0.82392578125, "grad_norm": 0.20225432515144348, "learning_rate": 8.453232726483903e-05, "loss": 1.7424, "step": 16874 }, { "epoch": 0.823974609375, "grad_norm": 0.18683534860610962, "learning_rate": 8.45136825179105e-05, "loss": 1.7099, "step": 16875 }, { "epoch": 0.8240234375, "grad_norm": 0.22274266183376312, "learning_rate": 8.4495042387571e-05, "loss": 1.7671, "step": 16876 }, { "epoch": 0.824072265625, "grad_norm": 0.1790142059326172, "learning_rate": 8.447640687427214e-05, "loss": 1.7127, "step": 16877 }, { "epoch": 0.82412109375, "grad_norm": 0.1952400654554367, "learning_rate": 8.445777597846563e-05, "loss": 1.7116, "step": 16878 }, { "epoch": 0.824169921875, "grad_norm": 0.20155075192451477, "learning_rate": 8.443914970060298e-05, "loss": 1.7577, "step": 16879 }, { "epoch": 0.82421875, "grad_norm": 0.17237164080142975, "learning_rate": 8.442052804113567e-05, "loss": 1.7485, "step": 16880 }, { "epoch": 0.824267578125, "grad_norm": 0.20170369744300842, "learning_rate": 8.440191100051494e-05, "loss": 1.7439, "step": 16881 }, { "epoch": 0.82431640625, "grad_norm": 0.192783921957016, "learning_rate": 8.438329857919202e-05, "loss": 1.7151, "step": 16882 }, { "epoch": 0.824365234375, "grad_norm": 0.19925212860107422, "learning_rate": 8.436469077761804e-05, "loss": 1.7245, "step": 16883 }, { "epoch": 0.8244140625, "grad_norm": 0.1752372831106186, "learning_rate": 8.434608759624396e-05, "loss": 1.7186, "step": 16884 }, { "epoch": 0.824462890625, "grad_norm": 0.1967497169971466, "learning_rate": 8.43274890355205e-05, "loss": 1.7373, "step": 16885 }, { "epoch": 0.82451171875, "grad_norm": 0.19256845116615295, "learning_rate": 8.430889509589865e-05, "loss": 1.7174, "step": 16886 }, { "epoch": 0.824560546875, "grad_norm": 0.16724419593811035, "learning_rate": 8.429030577782882e-05, "loss": 1.7149, "step": 16887 }, { "epoch": 0.824609375, "grad_norm": 0.22765269875526428, "learning_rate": 8.427172108176173e-05, "loss": 1.721, "step": 16888 }, { "epoch": 0.824658203125, "grad_norm": 0.1934201866388321, "learning_rate": 8.425314100814763e-05, "loss": 1.7417, "step": 16889 }, { "epoch": 0.82470703125, "grad_norm": 0.21155595779418945, "learning_rate": 8.4234565557437e-05, "loss": 1.7226, "step": 16890 }, { "epoch": 0.824755859375, "grad_norm": 0.21938739717006683, "learning_rate": 8.421599473007982e-05, "loss": 1.7536, "step": 16891 }, { "epoch": 0.8248046875, "grad_norm": 0.19360284507274628, "learning_rate": 8.419742852652636e-05, "loss": 1.7222, "step": 16892 }, { "epoch": 0.824853515625, "grad_norm": 0.1849316954612732, "learning_rate": 8.41788669472265e-05, "loss": 1.7446, "step": 16893 }, { "epoch": 0.82490234375, "grad_norm": 0.2157135158777237, "learning_rate": 8.416030999263008e-05, "loss": 1.7313, "step": 16894 }, { "epoch": 0.824951171875, "grad_norm": 0.18348433077335358, "learning_rate": 8.414175766318688e-05, "loss": 1.7322, "step": 16895 }, { "epoch": 0.825, "grad_norm": 0.20133821666240692, "learning_rate": 8.412320995934653e-05, "loss": 1.7623, "step": 16896 }, { "epoch": 0.825048828125, "grad_norm": 0.20664511620998383, "learning_rate": 8.410466688155851e-05, "loss": 1.74, "step": 16897 }, { "epoch": 0.82509765625, "grad_norm": 0.1761031448841095, "learning_rate": 8.408612843027227e-05, "loss": 1.7245, "step": 16898 }, { "epoch": 0.825146484375, "grad_norm": 0.20573307573795319, "learning_rate": 8.406759460593707e-05, "loss": 1.7029, "step": 16899 }, { "epoch": 0.8251953125, "grad_norm": 0.17428728938102722, "learning_rate": 8.404906540900212e-05, "loss": 1.7083, "step": 16900 }, { "epoch": 0.825244140625, "grad_norm": 0.19499847292900085, "learning_rate": 8.403054083991648e-05, "loss": 1.7186, "step": 16901 }, { "epoch": 0.82529296875, "grad_norm": 0.17782394587993622, "learning_rate": 8.401202089912916e-05, "loss": 1.7381, "step": 16902 }, { "epoch": 0.825341796875, "grad_norm": 0.22389616072177887, "learning_rate": 8.399350558708882e-05, "loss": 1.7313, "step": 16903 }, { "epoch": 0.825390625, "grad_norm": 0.18049325048923492, "learning_rate": 8.397499490424447e-05, "loss": 1.7424, "step": 16904 }, { "epoch": 0.825439453125, "grad_norm": 0.20778433978557587, "learning_rate": 8.395648885104445e-05, "loss": 1.7222, "step": 16905 }, { "epoch": 0.82548828125, "grad_norm": 0.17266413569450378, "learning_rate": 8.393798742793753e-05, "loss": 1.6941, "step": 16906 }, { "epoch": 0.825537109375, "grad_norm": 0.22843894362449646, "learning_rate": 8.391949063537186e-05, "loss": 1.766, "step": 16907 }, { "epoch": 0.8255859375, "grad_norm": 0.16664747893810272, "learning_rate": 8.390099847379596e-05, "loss": 1.7314, "step": 16908 }, { "epoch": 0.825634765625, "grad_norm": 0.19533281028270721, "learning_rate": 8.388251094365785e-05, "loss": 1.7232, "step": 16909 }, { "epoch": 0.82568359375, "grad_norm": 0.17049944400787354, "learning_rate": 8.38640280454056e-05, "loss": 1.7253, "step": 16910 }, { "epoch": 0.825732421875, "grad_norm": 0.19165365397930145, "learning_rate": 8.384554977948719e-05, "loss": 1.7291, "step": 16911 }, { "epoch": 0.82578125, "grad_norm": 0.20347145199775696, "learning_rate": 8.382707614635049e-05, "loss": 1.7092, "step": 16912 }, { "epoch": 0.825830078125, "grad_norm": 0.2322273850440979, "learning_rate": 8.380860714644317e-05, "loss": 1.7313, "step": 16913 }, { "epoch": 0.82587890625, "grad_norm": 0.19640114903450012, "learning_rate": 8.379014278021282e-05, "loss": 1.7173, "step": 16914 }, { "epoch": 0.825927734375, "grad_norm": 0.23724448680877686, "learning_rate": 8.377168304810703e-05, "loss": 1.7425, "step": 16915 }, { "epoch": 0.8259765625, "grad_norm": 0.22332055866718292, "learning_rate": 8.375322795057311e-05, "loss": 1.7325, "step": 16916 }, { "epoch": 0.826025390625, "grad_norm": 0.23930993676185608, "learning_rate": 8.373477748805834e-05, "loss": 1.7305, "step": 16917 }, { "epoch": 0.82607421875, "grad_norm": 0.21113339066505432, "learning_rate": 8.371633166100997e-05, "loss": 1.7507, "step": 16918 }, { "epoch": 0.826123046875, "grad_norm": 0.22075186669826508, "learning_rate": 8.369789046987487e-05, "loss": 1.7464, "step": 16919 }, { "epoch": 0.826171875, "grad_norm": 0.22175370156764984, "learning_rate": 8.36794539151002e-05, "loss": 1.717, "step": 16920 }, { "epoch": 0.826220703125, "grad_norm": 0.22171378135681152, "learning_rate": 8.366102199713255e-05, "loss": 1.7203, "step": 16921 }, { "epoch": 0.82626953125, "grad_norm": 0.2041543871164322, "learning_rate": 8.364259471641886e-05, "loss": 1.708, "step": 16922 }, { "epoch": 0.826318359375, "grad_norm": 0.22476617991924286, "learning_rate": 8.362417207340553e-05, "loss": 1.7391, "step": 16923 }, { "epoch": 0.8263671875, "grad_norm": 0.19834351539611816, "learning_rate": 8.360575406853923e-05, "loss": 1.7399, "step": 16924 }, { "epoch": 0.826416015625, "grad_norm": 0.19897836446762085, "learning_rate": 8.358734070226615e-05, "loss": 1.7234, "step": 16925 }, { "epoch": 0.82646484375, "grad_norm": 0.22330252826213837, "learning_rate": 8.356893197503273e-05, "loss": 1.7331, "step": 16926 }, { "epoch": 0.826513671875, "grad_norm": 0.204402357339859, "learning_rate": 8.355052788728502e-05, "loss": 1.7363, "step": 16927 }, { "epoch": 0.8265625, "grad_norm": 0.21588687598705292, "learning_rate": 8.353212843946905e-05, "loss": 1.7082, "step": 16928 }, { "epoch": 0.826611328125, "grad_norm": 0.21271643042564392, "learning_rate": 8.351373363203079e-05, "loss": 1.7413, "step": 16929 }, { "epoch": 0.82666015625, "grad_norm": 0.2048100233078003, "learning_rate": 8.349534346541599e-05, "loss": 1.764, "step": 16930 }, { "epoch": 0.826708984375, "grad_norm": 0.20611736178398132, "learning_rate": 8.347695794007043e-05, "loss": 1.7105, "step": 16931 }, { "epoch": 0.8267578125, "grad_norm": 0.22880181670188904, "learning_rate": 8.345857705643965e-05, "loss": 1.6946, "step": 16932 }, { "epoch": 0.826806640625, "grad_norm": 0.18036754429340363, "learning_rate": 8.344020081496916e-05, "loss": 1.7238, "step": 16933 }, { "epoch": 0.82685546875, "grad_norm": 0.2105465531349182, "learning_rate": 8.342182921610428e-05, "loss": 1.7039, "step": 16934 }, { "epoch": 0.826904296875, "grad_norm": 0.21161554753780365, "learning_rate": 8.340346226029031e-05, "loss": 1.7203, "step": 16935 }, { "epoch": 0.826953125, "grad_norm": 0.17612066864967346, "learning_rate": 8.338509994797236e-05, "loss": 1.7389, "step": 16936 }, { "epoch": 0.827001953125, "grad_norm": 0.1997154802083969, "learning_rate": 8.33667422795954e-05, "loss": 1.7246, "step": 16937 }, { "epoch": 0.82705078125, "grad_norm": 0.18994276225566864, "learning_rate": 8.334838925560448e-05, "loss": 1.7385, "step": 16938 }, { "epoch": 0.827099609375, "grad_norm": 0.18769077956676483, "learning_rate": 8.333004087644421e-05, "loss": 1.7058, "step": 16939 }, { "epoch": 0.8271484375, "grad_norm": 0.18006350100040436, "learning_rate": 8.331169714255949e-05, "loss": 1.7293, "step": 16940 }, { "epoch": 0.827197265625, "grad_norm": 0.19617262482643127, "learning_rate": 8.329335805439469e-05, "loss": 1.7292, "step": 16941 }, { "epoch": 0.82724609375, "grad_norm": 0.18554578721523285, "learning_rate": 8.327502361239449e-05, "loss": 1.737, "step": 16942 }, { "epoch": 0.827294921875, "grad_norm": 0.20560982823371887, "learning_rate": 8.325669381700304e-05, "loss": 1.7296, "step": 16943 }, { "epoch": 0.82734375, "grad_norm": 0.1798848807811737, "learning_rate": 8.323836866866471e-05, "loss": 1.7588, "step": 16944 }, { "epoch": 0.827392578125, "grad_norm": 0.20498156547546387, "learning_rate": 8.322004816782354e-05, "loss": 1.7195, "step": 16945 }, { "epoch": 0.82744140625, "grad_norm": 0.20573526620864868, "learning_rate": 8.320173231492356e-05, "loss": 1.7185, "step": 16946 }, { "epoch": 0.827490234375, "grad_norm": 0.22384443879127502, "learning_rate": 8.318342111040872e-05, "loss": 1.7506, "step": 16947 }, { "epoch": 0.8275390625, "grad_norm": 0.19154703617095947, "learning_rate": 8.316511455472276e-05, "loss": 1.7269, "step": 16948 }, { "epoch": 0.827587890625, "grad_norm": 0.2201906144618988, "learning_rate": 8.314681264830937e-05, "loss": 1.7391, "step": 16949 }, { "epoch": 0.82763671875, "grad_norm": 0.19510658085346222, "learning_rate": 8.312851539161209e-05, "loss": 1.7149, "step": 16950 }, { "epoch": 0.827685546875, "grad_norm": 0.22765478491783142, "learning_rate": 8.311022278507444e-05, "loss": 1.721, "step": 16951 }, { "epoch": 0.827734375, "grad_norm": 0.19898682832717896, "learning_rate": 8.309193482913971e-05, "loss": 1.7284, "step": 16952 }, { "epoch": 0.827783203125, "grad_norm": 0.26081809401512146, "learning_rate": 8.307365152425104e-05, "loss": 1.731, "step": 16953 }, { "epoch": 0.82783203125, "grad_norm": 0.18884442746639252, "learning_rate": 8.305537287085168e-05, "loss": 1.747, "step": 16954 }, { "epoch": 0.827880859375, "grad_norm": 0.2504102289676666, "learning_rate": 8.303709886938449e-05, "loss": 1.734, "step": 16955 }, { "epoch": 0.8279296875, "grad_norm": 0.20112347602844238, "learning_rate": 8.301882952029253e-05, "loss": 1.7526, "step": 16956 }, { "epoch": 0.827978515625, "grad_norm": 0.2108546942472458, "learning_rate": 8.300056482401838e-05, "loss": 1.7168, "step": 16957 }, { "epoch": 0.82802734375, "grad_norm": 0.20791806280612946, "learning_rate": 8.298230478100485e-05, "loss": 1.7589, "step": 16958 }, { "epoch": 0.828076171875, "grad_norm": 0.1900111734867096, "learning_rate": 8.296404939169436e-05, "loss": 1.7335, "step": 16959 }, { "epoch": 0.828125, "grad_norm": 0.2158309817314148, "learning_rate": 8.294579865652954e-05, "loss": 1.7294, "step": 16960 }, { "epoch": 0.828173828125, "grad_norm": 0.18557529151439667, "learning_rate": 8.292755257595251e-05, "loss": 1.7121, "step": 16961 }, { "epoch": 0.82822265625, "grad_norm": 0.20132564008235931, "learning_rate": 8.290931115040553e-05, "loss": 1.7394, "step": 16962 }, { "epoch": 0.828271484375, "grad_norm": 0.17861732840538025, "learning_rate": 8.289107438033076e-05, "loss": 1.7552, "step": 16963 }, { "epoch": 0.8283203125, "grad_norm": 0.1793593168258667, "learning_rate": 8.287284226617011e-05, "loss": 1.7163, "step": 16964 }, { "epoch": 0.828369140625, "grad_norm": 0.1762089729309082, "learning_rate": 8.28546148083655e-05, "loss": 1.724, "step": 16965 }, { "epoch": 0.82841796875, "grad_norm": 0.1906147003173828, "learning_rate": 8.283639200735867e-05, "loss": 1.711, "step": 16966 }, { "epoch": 0.828466796875, "grad_norm": 0.1843876987695694, "learning_rate": 8.281817386359125e-05, "loss": 1.7209, "step": 16967 }, { "epoch": 0.828515625, "grad_norm": 0.17751847207546234, "learning_rate": 8.279996037750481e-05, "loss": 1.7627, "step": 16968 }, { "epoch": 0.828564453125, "grad_norm": 0.22378703951835632, "learning_rate": 8.278175154954068e-05, "loss": 1.7492, "step": 16969 }, { "epoch": 0.82861328125, "grad_norm": 0.17433249950408936, "learning_rate": 8.276354738014033e-05, "loss": 1.6903, "step": 16970 }, { "epoch": 0.828662109375, "grad_norm": 0.2572706341743469, "learning_rate": 8.274534786974474e-05, "loss": 1.7309, "step": 16971 }, { "epoch": 0.8287109375, "grad_norm": 0.1801525354385376, "learning_rate": 8.272715301879518e-05, "loss": 1.7577, "step": 16972 }, { "epoch": 0.828759765625, "grad_norm": 0.252201110124588, "learning_rate": 8.270896282773247e-05, "loss": 1.7317, "step": 16973 }, { "epoch": 0.82880859375, "grad_norm": 0.1934880018234253, "learning_rate": 8.26907772969976e-05, "loss": 1.7224, "step": 16974 }, { "epoch": 0.828857421875, "grad_norm": 0.2428668886423111, "learning_rate": 8.267259642703117e-05, "loss": 1.704, "step": 16975 }, { "epoch": 0.82890625, "grad_norm": 0.1830534189939499, "learning_rate": 8.265442021827398e-05, "loss": 1.7332, "step": 16976 }, { "epoch": 0.828955078125, "grad_norm": 0.22910292446613312, "learning_rate": 8.263624867116637e-05, "loss": 1.7106, "step": 16977 }, { "epoch": 0.82900390625, "grad_norm": 0.21498408913612366, "learning_rate": 8.261808178614882e-05, "loss": 1.7334, "step": 16978 }, { "epoch": 0.829052734375, "grad_norm": 0.20661561191082, "learning_rate": 8.259991956366164e-05, "loss": 1.7085, "step": 16979 }, { "epoch": 0.8291015625, "grad_norm": 0.20617464184761047, "learning_rate": 8.258176200414495e-05, "loss": 1.7181, "step": 16980 }, { "epoch": 0.829150390625, "grad_norm": 0.17399974167346954, "learning_rate": 8.256360910803887e-05, "loss": 1.729, "step": 16981 }, { "epoch": 0.82919921875, "grad_norm": 0.21539932489395142, "learning_rate": 8.254546087578333e-05, "loss": 1.7369, "step": 16982 }, { "epoch": 0.829248046875, "grad_norm": 0.18708333373069763, "learning_rate": 8.252731730781814e-05, "loss": 1.7279, "step": 16983 }, { "epoch": 0.829296875, "grad_norm": 0.19827447831630707, "learning_rate": 8.250917840458305e-05, "loss": 1.7372, "step": 16984 }, { "epoch": 0.829345703125, "grad_norm": 0.22536827623844147, "learning_rate": 8.249104416651767e-05, "loss": 1.7506, "step": 16985 }, { "epoch": 0.82939453125, "grad_norm": 0.1736147403717041, "learning_rate": 8.247291459406149e-05, "loss": 1.7264, "step": 16986 }, { "epoch": 0.829443359375, "grad_norm": 0.2199755311012268, "learning_rate": 8.245478968765391e-05, "loss": 1.7446, "step": 16987 }, { "epoch": 0.8294921875, "grad_norm": 0.1786433458328247, "learning_rate": 8.243666944773425e-05, "loss": 1.7262, "step": 16988 }, { "epoch": 0.829541015625, "grad_norm": 0.21014012396335602, "learning_rate": 8.24185538747415e-05, "loss": 1.7428, "step": 16989 }, { "epoch": 0.82958984375, "grad_norm": 0.20612826943397522, "learning_rate": 8.240044296911489e-05, "loss": 1.7453, "step": 16990 }, { "epoch": 0.829638671875, "grad_norm": 0.16788050532341003, "learning_rate": 8.238233673129322e-05, "loss": 1.7616, "step": 16991 }, { "epoch": 0.8296875, "grad_norm": 0.1979055255651474, "learning_rate": 8.236423516171546e-05, "loss": 1.7247, "step": 16992 }, { "epoch": 0.829736328125, "grad_norm": 0.17901988327503204, "learning_rate": 8.23461382608201e-05, "loss": 1.7284, "step": 16993 }, { "epoch": 0.82978515625, "grad_norm": 0.20128196477890015, "learning_rate": 8.232804602904596e-05, "loss": 1.721, "step": 16994 }, { "epoch": 0.829833984375, "grad_norm": 0.1794348508119583, "learning_rate": 8.230995846683138e-05, "loss": 1.7485, "step": 16995 }, { "epoch": 0.8298828125, "grad_norm": 0.20867060124874115, "learning_rate": 8.229187557461474e-05, "loss": 1.7403, "step": 16996 }, { "epoch": 0.829931640625, "grad_norm": 0.2175954133272171, "learning_rate": 8.227379735283433e-05, "loss": 1.7478, "step": 16997 }, { "epoch": 0.82998046875, "grad_norm": 0.21097025275230408, "learning_rate": 8.225572380192828e-05, "loss": 1.7347, "step": 16998 }, { "epoch": 0.830029296875, "grad_norm": 0.2349458634853363, "learning_rate": 8.22376549223346e-05, "loss": 1.7446, "step": 16999 }, { "epoch": 0.830078125, "grad_norm": 0.19202165305614471, "learning_rate": 8.221959071449123e-05, "loss": 1.7376, "step": 17000 }, { "epoch": 0.830126953125, "grad_norm": 0.2094990611076355, "learning_rate": 8.220153117883594e-05, "loss": 1.7227, "step": 17001 }, { "epoch": 0.83017578125, "grad_norm": 0.255660742521286, "learning_rate": 8.218347631580645e-05, "loss": 1.7123, "step": 17002 }, { "epoch": 0.830224609375, "grad_norm": 0.1951301246881485, "learning_rate": 8.21654261258403e-05, "loss": 1.7609, "step": 17003 }, { "epoch": 0.8302734375, "grad_norm": 0.2160385251045227, "learning_rate": 8.214738060937499e-05, "loss": 1.7254, "step": 17004 }, { "epoch": 0.830322265625, "grad_norm": 0.23197688162326813, "learning_rate": 8.212933976684783e-05, "loss": 1.6979, "step": 17005 }, { "epoch": 0.83037109375, "grad_norm": 0.17980298399925232, "learning_rate": 8.21113035986961e-05, "loss": 1.7181, "step": 17006 }, { "epoch": 0.830419921875, "grad_norm": 0.24418911337852478, "learning_rate": 8.209327210535686e-05, "loss": 1.7188, "step": 17007 }, { "epoch": 0.83046875, "grad_norm": 0.2151344269514084, "learning_rate": 8.207524528726723e-05, "loss": 1.7342, "step": 17008 }, { "epoch": 0.830517578125, "grad_norm": 0.1995626538991928, "learning_rate": 8.205722314486392e-05, "loss": 1.7309, "step": 17009 }, { "epoch": 0.83056640625, "grad_norm": 0.2278156280517578, "learning_rate": 8.203920567858391e-05, "loss": 1.7336, "step": 17010 }, { "epoch": 0.830615234375, "grad_norm": 0.19766975939273834, "learning_rate": 8.20211928888637e-05, "loss": 1.7282, "step": 17011 }, { "epoch": 0.8306640625, "grad_norm": 0.2347654104232788, "learning_rate": 8.200318477613994e-05, "loss": 1.7294, "step": 17012 }, { "epoch": 0.830712890625, "grad_norm": 0.20451563596725464, "learning_rate": 8.198518134084904e-05, "loss": 1.7183, "step": 17013 }, { "epoch": 0.83076171875, "grad_norm": 0.21147151291370392, "learning_rate": 8.196718258342735e-05, "loss": 1.7577, "step": 17014 }, { "epoch": 0.830810546875, "grad_norm": 0.1945006251335144, "learning_rate": 8.194918850431108e-05, "loss": 1.7226, "step": 17015 }, { "epoch": 0.830859375, "grad_norm": 0.2075461894273758, "learning_rate": 8.193119910393629e-05, "loss": 1.7167, "step": 17016 }, { "epoch": 0.830908203125, "grad_norm": 0.22256556153297424, "learning_rate": 8.191321438273899e-05, "loss": 1.7468, "step": 17017 }, { "epoch": 0.83095703125, "grad_norm": 0.1956544816493988, "learning_rate": 8.18952343411551e-05, "loss": 1.7335, "step": 17018 }, { "epoch": 0.831005859375, "grad_norm": 0.21605798602104187, "learning_rate": 8.187725897962036e-05, "loss": 1.733, "step": 17019 }, { "epoch": 0.8310546875, "grad_norm": 0.20662955939769745, "learning_rate": 8.185928829857035e-05, "loss": 1.7205, "step": 17020 }, { "epoch": 0.831103515625, "grad_norm": 0.2023877054452896, "learning_rate": 8.184132229844071e-05, "loss": 1.7316, "step": 17021 }, { "epoch": 0.83115234375, "grad_norm": 0.18817462027072906, "learning_rate": 8.182336097966675e-05, "loss": 1.728, "step": 17022 }, { "epoch": 0.831201171875, "grad_norm": 0.1962328553199768, "learning_rate": 8.180540434268387e-05, "loss": 1.7401, "step": 17023 }, { "epoch": 0.83125, "grad_norm": 0.2123502939939499, "learning_rate": 8.17874523879272e-05, "loss": 1.7412, "step": 17024 }, { "epoch": 0.831298828125, "grad_norm": 0.17448316514492035, "learning_rate": 8.176950511583189e-05, "loss": 1.7316, "step": 17025 }, { "epoch": 0.83134765625, "grad_norm": 0.201190784573555, "learning_rate": 8.175156252683282e-05, "loss": 1.7306, "step": 17026 }, { "epoch": 0.831396484375, "grad_norm": 0.22093689441680908, "learning_rate": 8.173362462136496e-05, "loss": 1.7331, "step": 17027 }, { "epoch": 0.8314453125, "grad_norm": 0.18333086371421814, "learning_rate": 8.171569139986293e-05, "loss": 1.7272, "step": 17028 }, { "epoch": 0.831494140625, "grad_norm": 0.2198711782693863, "learning_rate": 8.169776286276138e-05, "loss": 1.7124, "step": 17029 }, { "epoch": 0.83154296875, "grad_norm": 0.18815787136554718, "learning_rate": 8.167983901049486e-05, "loss": 1.7353, "step": 17030 }, { "epoch": 0.831591796875, "grad_norm": 0.19689634442329407, "learning_rate": 8.166191984349776e-05, "loss": 1.7515, "step": 17031 }, { "epoch": 0.831640625, "grad_norm": 0.18703047931194305, "learning_rate": 8.164400536220434e-05, "loss": 1.7292, "step": 17032 }, { "epoch": 0.831689453125, "grad_norm": 0.19781620800495148, "learning_rate": 8.162609556704878e-05, "loss": 1.6966, "step": 17033 }, { "epoch": 0.83173828125, "grad_norm": 0.20912127196788788, "learning_rate": 8.160819045846518e-05, "loss": 1.7109, "step": 17034 }, { "epoch": 0.831787109375, "grad_norm": 0.22000481188297272, "learning_rate": 8.159029003688745e-05, "loss": 1.7028, "step": 17035 }, { "epoch": 0.8318359375, "grad_norm": 0.22306014597415924, "learning_rate": 8.15723943027494e-05, "loss": 1.7286, "step": 17036 }, { "epoch": 0.831884765625, "grad_norm": 0.17947471141815186, "learning_rate": 8.155450325648481e-05, "loss": 1.737, "step": 17037 }, { "epoch": 0.83193359375, "grad_norm": 0.21337929368019104, "learning_rate": 8.15366168985272e-05, "loss": 1.7179, "step": 17038 }, { "epoch": 0.831982421875, "grad_norm": 0.17494365572929382, "learning_rate": 8.151873522931013e-05, "loss": 1.7053, "step": 17039 }, { "epoch": 0.83203125, "grad_norm": 0.21764236688613892, "learning_rate": 8.150085824926696e-05, "loss": 1.7275, "step": 17040 }, { "epoch": 0.832080078125, "grad_norm": 0.19149303436279297, "learning_rate": 8.148298595883094e-05, "loss": 1.7188, "step": 17041 }, { "epoch": 0.83212890625, "grad_norm": 0.20541763305664062, "learning_rate": 8.146511835843523e-05, "loss": 1.7253, "step": 17042 }, { "epoch": 0.832177734375, "grad_norm": 0.2275131493806839, "learning_rate": 8.144725544851287e-05, "loss": 1.7013, "step": 17043 }, { "epoch": 0.8322265625, "grad_norm": 0.20911955833435059, "learning_rate": 8.142939722949674e-05, "loss": 1.7363, "step": 17044 }, { "epoch": 0.832275390625, "grad_norm": 0.21099919080734253, "learning_rate": 8.141154370181977e-05, "loss": 1.7134, "step": 17045 }, { "epoch": 0.83232421875, "grad_norm": 0.2468503713607788, "learning_rate": 8.139369486591444e-05, "loss": 1.7447, "step": 17046 }, { "epoch": 0.832373046875, "grad_norm": 0.22965438663959503, "learning_rate": 8.137585072221357e-05, "loss": 1.7391, "step": 17047 }, { "epoch": 0.832421875, "grad_norm": 0.23887673020362854, "learning_rate": 8.135801127114945e-05, "loss": 1.7323, "step": 17048 }, { "epoch": 0.832470703125, "grad_norm": 0.23104199767112732, "learning_rate": 8.13401765131545e-05, "loss": 1.728, "step": 17049 }, { "epoch": 0.83251953125, "grad_norm": 0.2080947309732437, "learning_rate": 8.132234644866096e-05, "loss": 1.7236, "step": 17050 }, { "epoch": 0.832568359375, "grad_norm": 0.2749614715576172, "learning_rate": 8.130452107810094e-05, "loss": 1.7392, "step": 17051 }, { "epoch": 0.8326171875, "grad_norm": 0.19509415328502655, "learning_rate": 8.128670040190647e-05, "loss": 1.7463, "step": 17052 }, { "epoch": 0.832666015625, "grad_norm": 0.2403278648853302, "learning_rate": 8.126888442050948e-05, "loss": 1.7086, "step": 17053 }, { "epoch": 0.83271484375, "grad_norm": 0.20953898131847382, "learning_rate": 8.125107313434168e-05, "loss": 1.7234, "step": 17054 }, { "epoch": 0.832763671875, "grad_norm": 0.24523106217384338, "learning_rate": 8.123326654383479e-05, "loss": 1.7413, "step": 17055 }, { "epoch": 0.8328125, "grad_norm": 0.197571262717247, "learning_rate": 8.121546464942035e-05, "loss": 1.7138, "step": 17056 }, { "epoch": 0.832861328125, "grad_norm": 0.198944553732872, "learning_rate": 8.119766745152981e-05, "loss": 1.7476, "step": 17057 }, { "epoch": 0.83291015625, "grad_norm": 0.19813379645347595, "learning_rate": 8.117987495059451e-05, "loss": 1.7349, "step": 17058 }, { "epoch": 0.832958984375, "grad_norm": 0.22161458432674408, "learning_rate": 8.116208714704565e-05, "loss": 1.7162, "step": 17059 }, { "epoch": 0.8330078125, "grad_norm": 0.1887810230255127, "learning_rate": 8.114430404131432e-05, "loss": 1.739, "step": 17060 }, { "epoch": 0.833056640625, "grad_norm": 0.23811198770999908, "learning_rate": 8.11265256338316e-05, "loss": 1.7143, "step": 17061 }, { "epoch": 0.83310546875, "grad_norm": 0.1770259439945221, "learning_rate": 8.110875192502817e-05, "loss": 1.7249, "step": 17062 }, { "epoch": 0.833154296875, "grad_norm": 0.2181708812713623, "learning_rate": 8.109098291533499e-05, "loss": 1.7069, "step": 17063 }, { "epoch": 0.833203125, "grad_norm": 0.1911035031080246, "learning_rate": 8.107321860518255e-05, "loss": 1.7088, "step": 17064 }, { "epoch": 0.833251953125, "grad_norm": 0.20747578144073486, "learning_rate": 8.105545899500155e-05, "loss": 1.7072, "step": 17065 }, { "epoch": 0.83330078125, "grad_norm": 0.1945456862449646, "learning_rate": 8.103770408522222e-05, "loss": 1.6892, "step": 17066 }, { "epoch": 0.833349609375, "grad_norm": 0.1850874274969101, "learning_rate": 8.101995387627508e-05, "loss": 1.7178, "step": 17067 }, { "epoch": 0.8333984375, "grad_norm": 0.21669548749923706, "learning_rate": 8.100220836859009e-05, "loss": 1.7249, "step": 17068 }, { "epoch": 0.833447265625, "grad_norm": 0.1955285370349884, "learning_rate": 8.09844675625975e-05, "loss": 1.7382, "step": 17069 }, { "epoch": 0.83349609375, "grad_norm": 0.21682609617710114, "learning_rate": 8.096673145872718e-05, "loss": 1.7366, "step": 17070 }, { "epoch": 0.833544921875, "grad_norm": 0.2158200889825821, "learning_rate": 8.094900005740901e-05, "loss": 1.7157, "step": 17071 }, { "epoch": 0.83359375, "grad_norm": 0.20855455100536346, "learning_rate": 8.09312733590727e-05, "loss": 1.7279, "step": 17072 }, { "epoch": 0.833642578125, "grad_norm": 0.2068440169095993, "learning_rate": 8.091355136414792e-05, "loss": 1.7466, "step": 17073 }, { "epoch": 0.83369140625, "grad_norm": 0.18837027251720428, "learning_rate": 8.089583407306415e-05, "loss": 1.742, "step": 17074 }, { "epoch": 0.833740234375, "grad_norm": 0.1956804245710373, "learning_rate": 8.087812148625077e-05, "loss": 1.7073, "step": 17075 }, { "epoch": 0.8337890625, "grad_norm": 0.21255561709403992, "learning_rate": 8.086041360413709e-05, "loss": 1.7412, "step": 17076 }, { "epoch": 0.833837890625, "grad_norm": 0.18207751214504242, "learning_rate": 8.084271042715223e-05, "loss": 1.7233, "step": 17077 }, { "epoch": 0.83388671875, "grad_norm": 0.2141083925962448, "learning_rate": 8.08250119557253e-05, "loss": 1.7347, "step": 17078 }, { "epoch": 0.833935546875, "grad_norm": 0.17067556083202362, "learning_rate": 8.080731819028524e-05, "loss": 1.7471, "step": 17079 }, { "epoch": 0.833984375, "grad_norm": 0.20268040895462036, "learning_rate": 8.078962913126072e-05, "loss": 1.7011, "step": 17080 }, { "epoch": 0.834033203125, "grad_norm": 0.1921287626028061, "learning_rate": 8.077194477908067e-05, "loss": 1.7016, "step": 17081 }, { "epoch": 0.83408203125, "grad_norm": 0.19000960886478424, "learning_rate": 8.075426513417348e-05, "loss": 1.7366, "step": 17082 }, { "epoch": 0.834130859375, "grad_norm": 0.1954164355993271, "learning_rate": 8.073659019696784e-05, "loss": 1.7132, "step": 17083 }, { "epoch": 0.8341796875, "grad_norm": 0.18269221484661102, "learning_rate": 8.071891996789193e-05, "loss": 1.7277, "step": 17084 }, { "epoch": 0.834228515625, "grad_norm": 0.1992829591035843, "learning_rate": 8.070125444737414e-05, "loss": 1.7235, "step": 17085 }, { "epoch": 0.83427734375, "grad_norm": 0.21188920736312866, "learning_rate": 8.068359363584252e-05, "loss": 1.7387, "step": 17086 }, { "epoch": 0.834326171875, "grad_norm": 0.20586585998535156, "learning_rate": 8.06659375337251e-05, "loss": 1.7125, "step": 17087 }, { "epoch": 0.834375, "grad_norm": 0.22510504722595215, "learning_rate": 8.064828614144981e-05, "loss": 1.758, "step": 17088 }, { "epoch": 0.834423828125, "grad_norm": 0.18686620891094208, "learning_rate": 8.063063945944444e-05, "loss": 1.7464, "step": 17089 }, { "epoch": 0.83447265625, "grad_norm": 0.21826809644699097, "learning_rate": 8.06129974881367e-05, "loss": 1.7149, "step": 17090 }, { "epoch": 0.834521484375, "grad_norm": 0.18875186145305634, "learning_rate": 8.059536022795414e-05, "loss": 1.7188, "step": 17091 }, { "epoch": 0.8345703125, "grad_norm": 0.2161749303340912, "learning_rate": 8.057772767932418e-05, "loss": 1.7314, "step": 17092 }, { "epoch": 0.834619140625, "grad_norm": 0.20108474791049957, "learning_rate": 8.056009984267421e-05, "loss": 1.7525, "step": 17093 }, { "epoch": 0.83466796875, "grad_norm": 0.19488836824893951, "learning_rate": 8.054247671843143e-05, "loss": 1.7427, "step": 17094 }, { "epoch": 0.834716796875, "grad_norm": 0.2178536057472229, "learning_rate": 8.052485830702299e-05, "loss": 1.7169, "step": 17095 }, { "epoch": 0.834765625, "grad_norm": 0.18781064450740814, "learning_rate": 8.050724460887575e-05, "loss": 1.7218, "step": 17096 }, { "epoch": 0.834814453125, "grad_norm": 0.22295229136943817, "learning_rate": 8.04896356244168e-05, "loss": 1.7452, "step": 17097 }, { "epoch": 0.83486328125, "grad_norm": 0.18735873699188232, "learning_rate": 8.047203135407268e-05, "loss": 1.748, "step": 17098 }, { "epoch": 0.834912109375, "grad_norm": 0.21520663797855377, "learning_rate": 8.045443179827029e-05, "loss": 1.739, "step": 17099 }, { "epoch": 0.8349609375, "grad_norm": 0.1830063909292221, "learning_rate": 8.043683695743593e-05, "loss": 1.7229, "step": 17100 }, { "epoch": 0.835009765625, "grad_norm": 0.18281883001327515, "learning_rate": 8.041924683199624e-05, "loss": 1.7276, "step": 17101 }, { "epoch": 0.83505859375, "grad_norm": 0.18527182936668396, "learning_rate": 8.040166142237731e-05, "loss": 1.7182, "step": 17102 }, { "epoch": 0.835107421875, "grad_norm": 0.18841604888439178, "learning_rate": 8.038408072900557e-05, "loss": 1.7177, "step": 17103 }, { "epoch": 0.83515625, "grad_norm": 0.18817220628261566, "learning_rate": 8.036650475230692e-05, "loss": 1.7337, "step": 17104 }, { "epoch": 0.835205078125, "grad_norm": 0.1833420842885971, "learning_rate": 8.034893349270742e-05, "loss": 1.7038, "step": 17105 }, { "epoch": 0.83525390625, "grad_norm": 0.19085650146007538, "learning_rate": 8.03313669506329e-05, "loss": 1.7437, "step": 17106 }, { "epoch": 0.835302734375, "grad_norm": 0.18428456783294678, "learning_rate": 8.031380512650907e-05, "loss": 1.7209, "step": 17107 }, { "epoch": 0.8353515625, "grad_norm": 0.18868516385555267, "learning_rate": 8.02962480207616e-05, "loss": 1.7057, "step": 17108 }, { "epoch": 0.835400390625, "grad_norm": 0.21181966364383698, "learning_rate": 8.027869563381596e-05, "loss": 1.7502, "step": 17109 }, { "epoch": 0.83544921875, "grad_norm": 0.1880013346672058, "learning_rate": 8.026114796609758e-05, "loss": 1.7246, "step": 17110 }, { "epoch": 0.835498046875, "grad_norm": 0.1700906753540039, "learning_rate": 8.024360501803174e-05, "loss": 1.7228, "step": 17111 }, { "epoch": 0.835546875, "grad_norm": 0.1781565546989441, "learning_rate": 8.02260667900436e-05, "loss": 1.7257, "step": 17112 }, { "epoch": 0.835595703125, "grad_norm": 0.18756553530693054, "learning_rate": 8.020853328255825e-05, "loss": 1.7448, "step": 17113 }, { "epoch": 0.83564453125, "grad_norm": 0.20732983946800232, "learning_rate": 8.01910044960005e-05, "loss": 1.7358, "step": 17114 }, { "epoch": 0.835693359375, "grad_norm": 0.181076318025589, "learning_rate": 8.017348043079535e-05, "loss": 1.7266, "step": 17115 }, { "epoch": 0.8357421875, "grad_norm": 0.20663252472877502, "learning_rate": 8.015596108736736e-05, "loss": 1.7373, "step": 17116 }, { "epoch": 0.835791015625, "grad_norm": 0.1792069971561432, "learning_rate": 8.013844646614126e-05, "loss": 1.7314, "step": 17117 }, { "epoch": 0.83583984375, "grad_norm": 0.18350955843925476, "learning_rate": 8.012093656754135e-05, "loss": 1.7227, "step": 17118 }, { "epoch": 0.835888671875, "grad_norm": 0.20960666239261627, "learning_rate": 8.010343139199223e-05, "loss": 1.7462, "step": 17119 }, { "epoch": 0.8359375, "grad_norm": 0.18243089318275452, "learning_rate": 8.008593093991797e-05, "loss": 1.7494, "step": 17120 }, { "epoch": 0.835986328125, "grad_norm": 0.18799126148223877, "learning_rate": 8.006843521174276e-05, "loss": 1.7168, "step": 17121 }, { "epoch": 0.83603515625, "grad_norm": 0.17559932172298431, "learning_rate": 8.005094420789062e-05, "loss": 1.7396, "step": 17122 }, { "epoch": 0.836083984375, "grad_norm": 0.19019530713558197, "learning_rate": 8.003345792878548e-05, "loss": 1.7474, "step": 17123 }, { "epoch": 0.8361328125, "grad_norm": 0.18911218643188477, "learning_rate": 8.001597637485111e-05, "loss": 1.7487, "step": 17124 }, { "epoch": 0.836181640625, "grad_norm": 0.17536544799804688, "learning_rate": 7.999849954651121e-05, "loss": 1.7319, "step": 17125 }, { "epoch": 0.83623046875, "grad_norm": 0.184773251414299, "learning_rate": 7.998102744418937e-05, "loss": 1.7354, "step": 17126 }, { "epoch": 0.836279296875, "grad_norm": 0.18706114590168, "learning_rate": 7.996356006830896e-05, "loss": 1.7005, "step": 17127 }, { "epoch": 0.836328125, "grad_norm": 0.20475873351097107, "learning_rate": 7.994609741929337e-05, "loss": 1.7152, "step": 17128 }, { "epoch": 0.836376953125, "grad_norm": 0.18820801377296448, "learning_rate": 7.992863949756588e-05, "loss": 1.7063, "step": 17129 }, { "epoch": 0.83642578125, "grad_norm": 0.17653721570968628, "learning_rate": 7.991118630354944e-05, "loss": 1.7268, "step": 17130 }, { "epoch": 0.836474609375, "grad_norm": 0.18840523064136505, "learning_rate": 7.989373783766717e-05, "loss": 1.7395, "step": 17131 }, { "epoch": 0.8365234375, "grad_norm": 0.18175852298736572, "learning_rate": 7.987629410034189e-05, "loss": 1.7157, "step": 17132 }, { "epoch": 0.836572265625, "grad_norm": 0.19532613456249237, "learning_rate": 7.985885509199644e-05, "loss": 1.7081, "step": 17133 }, { "epoch": 0.83662109375, "grad_norm": 0.18045641481876373, "learning_rate": 7.984142081305332e-05, "loss": 1.7369, "step": 17134 }, { "epoch": 0.836669921875, "grad_norm": 0.20749635994434357, "learning_rate": 7.982399126393524e-05, "loss": 1.7103, "step": 17135 }, { "epoch": 0.83671875, "grad_norm": 0.18413454294204712, "learning_rate": 7.980656644506445e-05, "loss": 1.7565, "step": 17136 }, { "epoch": 0.836767578125, "grad_norm": 0.237308070063591, "learning_rate": 7.978914635686346e-05, "loss": 1.7059, "step": 17137 }, { "epoch": 0.83681640625, "grad_norm": 0.18677745759487152, "learning_rate": 7.977173099975425e-05, "loss": 1.7354, "step": 17138 }, { "epoch": 0.836865234375, "grad_norm": 0.20568633079528809, "learning_rate": 7.975432037415901e-05, "loss": 1.7301, "step": 17139 }, { "epoch": 0.8369140625, "grad_norm": 0.1986490786075592, "learning_rate": 7.973691448049966e-05, "loss": 1.7628, "step": 17140 }, { "epoch": 0.836962890625, "grad_norm": 0.20550774037837982, "learning_rate": 7.971951331919807e-05, "loss": 1.7225, "step": 17141 }, { "epoch": 0.83701171875, "grad_norm": 0.20411914587020874, "learning_rate": 7.970211689067595e-05, "loss": 1.7191, "step": 17142 }, { "epoch": 0.837060546875, "grad_norm": 0.18087662756443024, "learning_rate": 7.968472519535494e-05, "loss": 1.7514, "step": 17143 }, { "epoch": 0.837109375, "grad_norm": 0.1928185373544693, "learning_rate": 7.966733823365652e-05, "loss": 1.7435, "step": 17144 }, { "epoch": 0.837158203125, "grad_norm": 0.19133396446704865, "learning_rate": 7.964995600600217e-05, "loss": 1.7094, "step": 17145 }, { "epoch": 0.83720703125, "grad_norm": 0.19710932672023773, "learning_rate": 7.963257851281294e-05, "loss": 1.7319, "step": 17146 }, { "epoch": 0.837255859375, "grad_norm": 0.18768270313739777, "learning_rate": 7.961520575451023e-05, "loss": 1.7312, "step": 17147 }, { "epoch": 0.8373046875, "grad_norm": 0.18487487733364105, "learning_rate": 7.959783773151489e-05, "loss": 1.7218, "step": 17148 }, { "epoch": 0.837353515625, "grad_norm": 0.21402758359909058, "learning_rate": 7.958047444424804e-05, "loss": 1.7078, "step": 17149 }, { "epoch": 0.83740234375, "grad_norm": 0.1788729727268219, "learning_rate": 7.956311589313028e-05, "loss": 1.763, "step": 17150 }, { "epoch": 0.837451171875, "grad_norm": 0.23362164199352264, "learning_rate": 7.954576207858253e-05, "loss": 1.7409, "step": 17151 }, { "epoch": 0.8375, "grad_norm": 0.1762227565050125, "learning_rate": 7.952841300102513e-05, "loss": 1.7558, "step": 17152 }, { "epoch": 0.837548828125, "grad_norm": 0.27271080017089844, "learning_rate": 7.951106866087883e-05, "loss": 1.7408, "step": 17153 }, { "epoch": 0.83759765625, "grad_norm": 0.17836087942123413, "learning_rate": 7.949372905856376e-05, "loss": 1.7297, "step": 17154 }, { "epoch": 0.837646484375, "grad_norm": 0.22691278159618378, "learning_rate": 7.947639419450023e-05, "loss": 1.7298, "step": 17155 }, { "epoch": 0.8376953125, "grad_norm": 0.1800403594970703, "learning_rate": 7.945906406910838e-05, "loss": 1.758, "step": 17156 }, { "epoch": 0.837744140625, "grad_norm": 0.20799800753593445, "learning_rate": 7.944173868280819e-05, "loss": 1.7398, "step": 17157 }, { "epoch": 0.83779296875, "grad_norm": 0.18687456846237183, "learning_rate": 7.94244180360196e-05, "loss": 1.7176, "step": 17158 }, { "epoch": 0.837841796875, "grad_norm": 0.17939507961273193, "learning_rate": 7.940710212916234e-05, "loss": 1.71, "step": 17159 }, { "epoch": 0.837890625, "grad_norm": 0.2120555192232132, "learning_rate": 7.938979096265608e-05, "loss": 1.7291, "step": 17160 }, { "epoch": 0.837939453125, "grad_norm": 0.19869624078273773, "learning_rate": 7.937248453692045e-05, "loss": 1.7376, "step": 17161 }, { "epoch": 0.83798828125, "grad_norm": 0.17706891894340515, "learning_rate": 7.935518285237476e-05, "loss": 1.7142, "step": 17162 }, { "epoch": 0.838037109375, "grad_norm": 0.19252346456050873, "learning_rate": 7.933788590943848e-05, "loss": 1.7373, "step": 17163 }, { "epoch": 0.8380859375, "grad_norm": 0.17549830675125122, "learning_rate": 7.93205937085306e-05, "loss": 1.7394, "step": 17164 }, { "epoch": 0.838134765625, "grad_norm": 0.18985296785831451, "learning_rate": 7.930330625007045e-05, "loss": 1.7561, "step": 17165 }, { "epoch": 0.83818359375, "grad_norm": 0.1833483874797821, "learning_rate": 7.92860235344768e-05, "loss": 1.6972, "step": 17166 }, { "epoch": 0.838232421875, "grad_norm": 0.18903183937072754, "learning_rate": 7.926874556216873e-05, "loss": 1.7487, "step": 17167 }, { "epoch": 0.83828125, "grad_norm": 0.20700033009052277, "learning_rate": 7.925147233356472e-05, "loss": 1.7457, "step": 17168 }, { "epoch": 0.838330078125, "grad_norm": 0.19246476888656616, "learning_rate": 7.923420384908367e-05, "loss": 1.7384, "step": 17169 }, { "epoch": 0.83837890625, "grad_norm": 0.1806136667728424, "learning_rate": 7.921694010914385e-05, "loss": 1.7303, "step": 17170 }, { "epoch": 0.838427734375, "grad_norm": 0.19354288280010223, "learning_rate": 7.919968111416387e-05, "loss": 1.7284, "step": 17171 }, { "epoch": 0.8384765625, "grad_norm": 0.21153034269809723, "learning_rate": 7.918242686456189e-05, "loss": 1.7194, "step": 17172 }, { "epoch": 0.838525390625, "grad_norm": 0.18285609781742096, "learning_rate": 7.91651773607561e-05, "loss": 1.7362, "step": 17173 }, { "epoch": 0.83857421875, "grad_norm": 0.19800321757793427, "learning_rate": 7.914793260316458e-05, "loss": 1.7462, "step": 17174 }, { "epoch": 0.838623046875, "grad_norm": 0.17291076481342316, "learning_rate": 7.913069259220526e-05, "loss": 1.7037, "step": 17175 }, { "epoch": 0.838671875, "grad_norm": 0.1844538003206253, "learning_rate": 7.911345732829595e-05, "loss": 1.7172, "step": 17176 }, { "epoch": 0.838720703125, "grad_norm": 0.18833856284618378, "learning_rate": 7.909622681185436e-05, "loss": 1.7195, "step": 17177 }, { "epoch": 0.83876953125, "grad_norm": 0.16690441966056824, "learning_rate": 7.907900104329813e-05, "loss": 1.7515, "step": 17178 }, { "epoch": 0.838818359375, "grad_norm": 0.18239101767539978, "learning_rate": 7.906178002304478e-05, "loss": 1.7048, "step": 17179 }, { "epoch": 0.8388671875, "grad_norm": 0.16114458441734314, "learning_rate": 7.904456375151148e-05, "loss": 1.7087, "step": 17180 }, { "epoch": 0.838916015625, "grad_norm": 0.20924554765224457, "learning_rate": 7.90273522291157e-05, "loss": 1.7353, "step": 17181 }, { "epoch": 0.83896484375, "grad_norm": 0.17702890932559967, "learning_rate": 7.901014545627439e-05, "loss": 1.7564, "step": 17182 }, { "epoch": 0.839013671875, "grad_norm": 0.18825121223926544, "learning_rate": 7.899294343340477e-05, "loss": 1.7279, "step": 17183 }, { "epoch": 0.8390625, "grad_norm": 0.21563930809497833, "learning_rate": 7.89757461609235e-05, "loss": 1.746, "step": 17184 }, { "epoch": 0.839111328125, "grad_norm": 0.17413093149662018, "learning_rate": 7.895855363924765e-05, "loss": 1.7381, "step": 17185 }, { "epoch": 0.83916015625, "grad_norm": 0.19181406497955322, "learning_rate": 7.894136586879362e-05, "loss": 1.7045, "step": 17186 }, { "epoch": 0.839208984375, "grad_norm": 0.19896838068962097, "learning_rate": 7.892418284997822e-05, "loss": 1.7466, "step": 17187 }, { "epoch": 0.8392578125, "grad_norm": 0.1724318563938141, "learning_rate": 7.890700458321771e-05, "loss": 1.7132, "step": 17188 }, { "epoch": 0.839306640625, "grad_norm": 0.22026439011096954, "learning_rate": 7.88898310689285e-05, "loss": 1.7228, "step": 17189 }, { "epoch": 0.83935546875, "grad_norm": 0.19552484154701233, "learning_rate": 7.887266230752677e-05, "loss": 1.7256, "step": 17190 }, { "epoch": 0.839404296875, "grad_norm": 0.23615343868732452, "learning_rate": 7.885549829942866e-05, "loss": 1.7295, "step": 17191 }, { "epoch": 0.839453125, "grad_norm": 0.2095504254102707, "learning_rate": 7.883833904505011e-05, "loss": 1.7207, "step": 17192 }, { "epoch": 0.839501953125, "grad_norm": 0.20600026845932007, "learning_rate": 7.8821184544807e-05, "loss": 1.7584, "step": 17193 }, { "epoch": 0.83955078125, "grad_norm": 0.17932790517807007, "learning_rate": 7.880403479911511e-05, "loss": 1.7287, "step": 17194 }, { "epoch": 0.839599609375, "grad_norm": 0.19321765005588531, "learning_rate": 7.878688980839007e-05, "loss": 1.7419, "step": 17195 }, { "epoch": 0.8396484375, "grad_norm": 0.18761739134788513, "learning_rate": 7.876974957304737e-05, "loss": 1.7261, "step": 17196 }, { "epoch": 0.839697265625, "grad_norm": 0.19904163479804993, "learning_rate": 7.875261409350247e-05, "loss": 1.7325, "step": 17197 }, { "epoch": 0.83974609375, "grad_norm": 0.1903233677148819, "learning_rate": 7.873548337017057e-05, "loss": 1.7197, "step": 17198 }, { "epoch": 0.839794921875, "grad_norm": 0.20200195908546448, "learning_rate": 7.871835740346698e-05, "loss": 1.7274, "step": 17199 }, { "epoch": 0.83984375, "grad_norm": 0.17648468911647797, "learning_rate": 7.87012361938066e-05, "loss": 1.7441, "step": 17200 }, { "epoch": 0.839892578125, "grad_norm": 0.21945950388908386, "learning_rate": 7.868411974160458e-05, "loss": 1.7299, "step": 17201 }, { "epoch": 0.83994140625, "grad_norm": 0.2055371254682541, "learning_rate": 7.86670080472755e-05, "loss": 1.765, "step": 17202 }, { "epoch": 0.839990234375, "grad_norm": 0.22198747098445892, "learning_rate": 7.864990111123435e-05, "loss": 1.7274, "step": 17203 }, { "epoch": 0.8400390625, "grad_norm": 0.21117500960826874, "learning_rate": 7.863279893389552e-05, "loss": 1.7338, "step": 17204 }, { "epoch": 0.840087890625, "grad_norm": 0.21826834976673126, "learning_rate": 7.861570151567357e-05, "loss": 1.7368, "step": 17205 }, { "epoch": 0.84013671875, "grad_norm": 0.19660726189613342, "learning_rate": 7.859860885698289e-05, "loss": 1.7349, "step": 17206 }, { "epoch": 0.840185546875, "grad_norm": 0.22107890248298645, "learning_rate": 7.85815209582377e-05, "loss": 1.7012, "step": 17207 }, { "epoch": 0.840234375, "grad_norm": 0.20137973129749298, "learning_rate": 7.856443781985214e-05, "loss": 1.7224, "step": 17208 }, { "epoch": 0.840283203125, "grad_norm": 0.2269793152809143, "learning_rate": 7.854735944224025e-05, "loss": 1.7375, "step": 17209 }, { "epoch": 0.84033203125, "grad_norm": 0.22319665551185608, "learning_rate": 7.853028582581592e-05, "loss": 1.7284, "step": 17210 }, { "epoch": 0.840380859375, "grad_norm": 0.22518470883369446, "learning_rate": 7.851321697099298e-05, "loss": 1.7279, "step": 17211 }, { "epoch": 0.8404296875, "grad_norm": 0.23076164722442627, "learning_rate": 7.849615287818505e-05, "loss": 1.7461, "step": 17212 }, { "epoch": 0.840478515625, "grad_norm": 0.2083640694618225, "learning_rate": 7.84790935478058e-05, "loss": 1.7222, "step": 17213 }, { "epoch": 0.84052734375, "grad_norm": 0.19871877133846283, "learning_rate": 7.84620389802685e-05, "loss": 1.7116, "step": 17214 }, { "epoch": 0.840576171875, "grad_norm": 0.18610073626041412, "learning_rate": 7.844498917598667e-05, "loss": 1.7418, "step": 17215 }, { "epoch": 0.840625, "grad_norm": 0.2217043787240982, "learning_rate": 7.842794413537334e-05, "loss": 1.7324, "step": 17216 }, { "epoch": 0.840673828125, "grad_norm": 0.17533043026924133, "learning_rate": 7.841090385884182e-05, "loss": 1.7269, "step": 17217 }, { "epoch": 0.84072265625, "grad_norm": 0.20972153544425964, "learning_rate": 7.839386834680487e-05, "loss": 1.7222, "step": 17218 }, { "epoch": 0.840771484375, "grad_norm": 0.19829076528549194, "learning_rate": 7.83768375996756e-05, "loss": 1.726, "step": 17219 }, { "epoch": 0.8408203125, "grad_norm": 0.18968379497528076, "learning_rate": 7.835981161786651e-05, "loss": 1.7253, "step": 17220 }, { "epoch": 0.840869140625, "grad_norm": 0.1923225075006485, "learning_rate": 7.834279040179045e-05, "loss": 1.7224, "step": 17221 }, { "epoch": 0.84091796875, "grad_norm": 0.17987391352653503, "learning_rate": 7.832577395185984e-05, "loss": 1.7593, "step": 17222 }, { "epoch": 0.840966796875, "grad_norm": 0.20435579121112823, "learning_rate": 7.830876226848708e-05, "loss": 1.7137, "step": 17223 }, { "epoch": 0.841015625, "grad_norm": 0.19252236187458038, "learning_rate": 7.82917553520845e-05, "loss": 1.7435, "step": 17224 }, { "epoch": 0.841064453125, "grad_norm": 0.21863742172718048, "learning_rate": 7.827475320306428e-05, "loss": 1.7444, "step": 17225 }, { "epoch": 0.84111328125, "grad_norm": 0.1862245798110962, "learning_rate": 7.825775582183845e-05, "loss": 1.7541, "step": 17226 }, { "epoch": 0.841162109375, "grad_norm": 0.2004610300064087, "learning_rate": 7.824076320881895e-05, "loss": 1.7105, "step": 17227 }, { "epoch": 0.8412109375, "grad_norm": 0.21824221312999725, "learning_rate": 7.822377536441765e-05, "loss": 1.7096, "step": 17228 }, { "epoch": 0.841259765625, "grad_norm": 0.18178552389144897, "learning_rate": 7.820679228904626e-05, "loss": 1.7294, "step": 17229 }, { "epoch": 0.84130859375, "grad_norm": 0.20978611707687378, "learning_rate": 7.818981398311628e-05, "loss": 1.7206, "step": 17230 }, { "epoch": 0.841357421875, "grad_norm": 0.18845409154891968, "learning_rate": 7.81728404470394e-05, "loss": 1.7444, "step": 17231 }, { "epoch": 0.84140625, "grad_norm": 0.21817214787006378, "learning_rate": 7.815587168122672e-05, "loss": 1.7208, "step": 17232 }, { "epoch": 0.841455078125, "grad_norm": 0.22010552883148193, "learning_rate": 7.813890768608975e-05, "loss": 1.7199, "step": 17233 }, { "epoch": 0.84150390625, "grad_norm": 0.20087940990924835, "learning_rate": 7.812194846203944e-05, "loss": 1.7415, "step": 17234 }, { "epoch": 0.841552734375, "grad_norm": 0.2226061373949051, "learning_rate": 7.810499400948694e-05, "loss": 1.7064, "step": 17235 }, { "epoch": 0.8416015625, "grad_norm": 0.19975529611110687, "learning_rate": 7.808804432884301e-05, "loss": 1.7209, "step": 17236 }, { "epoch": 0.841650390625, "grad_norm": 0.1975266933441162, "learning_rate": 7.807109942051863e-05, "loss": 1.7108, "step": 17237 }, { "epoch": 0.84169921875, "grad_norm": 0.21121656894683838, "learning_rate": 7.805415928492429e-05, "loss": 1.7606, "step": 17238 }, { "epoch": 0.841748046875, "grad_norm": 0.19875742495059967, "learning_rate": 7.803722392247062e-05, "loss": 1.6932, "step": 17239 }, { "epoch": 0.841796875, "grad_norm": 0.17767909169197083, "learning_rate": 7.80202933335681e-05, "loss": 1.7318, "step": 17240 }, { "epoch": 0.841845703125, "grad_norm": 0.1929442286491394, "learning_rate": 7.800336751862701e-05, "loss": 1.7212, "step": 17241 }, { "epoch": 0.84189453125, "grad_norm": 0.19725918769836426, "learning_rate": 7.798644647805756e-05, "loss": 1.7433, "step": 17242 }, { "epoch": 0.841943359375, "grad_norm": 0.18746253848075867, "learning_rate": 7.796953021226988e-05, "loss": 1.7394, "step": 17243 }, { "epoch": 0.8419921875, "grad_norm": 0.21341882646083832, "learning_rate": 7.795261872167391e-05, "loss": 1.7222, "step": 17244 }, { "epoch": 0.842041015625, "grad_norm": 0.20021240413188934, "learning_rate": 7.793571200667955e-05, "loss": 1.7248, "step": 17245 }, { "epoch": 0.84208984375, "grad_norm": 0.19398823380470276, "learning_rate": 7.791881006769652e-05, "loss": 1.7484, "step": 17246 }, { "epoch": 0.842138671875, "grad_norm": 0.20786619186401367, "learning_rate": 7.790191290513445e-05, "loss": 1.7348, "step": 17247 }, { "epoch": 0.8421875, "grad_norm": 0.166002094745636, "learning_rate": 7.788502051940289e-05, "loss": 1.7265, "step": 17248 }, { "epoch": 0.842236328125, "grad_norm": 0.2134629786014557, "learning_rate": 7.786813291091125e-05, "loss": 1.7257, "step": 17249 }, { "epoch": 0.84228515625, "grad_norm": 0.205793559551239, "learning_rate": 7.785125008006868e-05, "loss": 1.7374, "step": 17250 }, { "epoch": 0.842333984375, "grad_norm": 0.1894647628068924, "learning_rate": 7.783437202728458e-05, "loss": 1.7285, "step": 17251 }, { "epoch": 0.8423828125, "grad_norm": 0.20508810877799988, "learning_rate": 7.781749875296776e-05, "loss": 1.6987, "step": 17252 }, { "epoch": 0.842431640625, "grad_norm": 0.1725630760192871, "learning_rate": 7.780063025752737e-05, "loss": 1.6803, "step": 17253 }, { "epoch": 0.84248046875, "grad_norm": 0.20683728158473969, "learning_rate": 7.778376654137201e-05, "loss": 1.7311, "step": 17254 }, { "epoch": 0.842529296875, "grad_norm": 0.1671970933675766, "learning_rate": 7.776690760491065e-05, "loss": 1.7462, "step": 17255 }, { "epoch": 0.842578125, "grad_norm": 0.17790894210338593, "learning_rate": 7.775005344855166e-05, "loss": 1.7399, "step": 17256 }, { "epoch": 0.842626953125, "grad_norm": 0.1901623159646988, "learning_rate": 7.773320407270362e-05, "loss": 1.723, "step": 17257 }, { "epoch": 0.84267578125, "grad_norm": 0.19486266374588013, "learning_rate": 7.771635947777484e-05, "loss": 1.7344, "step": 17258 }, { "epoch": 0.842724609375, "grad_norm": 0.18987233936786652, "learning_rate": 7.76995196641736e-05, "loss": 1.7262, "step": 17259 }, { "epoch": 0.8427734375, "grad_norm": 0.20350180566310883, "learning_rate": 7.7682684632308e-05, "loss": 1.7171, "step": 17260 }, { "epoch": 0.842822265625, "grad_norm": 0.1878216713666916, "learning_rate": 7.766585438258606e-05, "loss": 1.7102, "step": 17261 }, { "epoch": 0.84287109375, "grad_norm": 0.1652728170156479, "learning_rate": 7.764902891541568e-05, "loss": 1.7312, "step": 17262 }, { "epoch": 0.842919921875, "grad_norm": 0.20453710854053497, "learning_rate": 7.763220823120462e-05, "loss": 1.713, "step": 17263 }, { "epoch": 0.84296875, "grad_norm": 0.17408588528633118, "learning_rate": 7.761539233036058e-05, "loss": 1.718, "step": 17264 }, { "epoch": 0.843017578125, "grad_norm": 0.20363208651542664, "learning_rate": 7.759858121329109e-05, "loss": 1.6979, "step": 17265 }, { "epoch": 0.84306640625, "grad_norm": 0.1837119609117508, "learning_rate": 7.758177488040355e-05, "loss": 1.7278, "step": 17266 }, { "epoch": 0.843115234375, "grad_norm": 0.17813116312026978, "learning_rate": 7.756497333210531e-05, "loss": 1.7381, "step": 17267 }, { "epoch": 0.8431640625, "grad_norm": 0.21626143157482147, "learning_rate": 7.754817656880357e-05, "loss": 1.7368, "step": 17268 }, { "epoch": 0.843212890625, "grad_norm": 0.18610234558582306, "learning_rate": 7.753138459090543e-05, "loss": 1.751, "step": 17269 }, { "epoch": 0.84326171875, "grad_norm": 0.17306959629058838, "learning_rate": 7.751459739881774e-05, "loss": 1.7343, "step": 17270 }, { "epoch": 0.843310546875, "grad_norm": 0.20114454627037048, "learning_rate": 7.749781499294754e-05, "loss": 1.7286, "step": 17271 }, { "epoch": 0.843359375, "grad_norm": 0.17128095030784607, "learning_rate": 7.748103737370142e-05, "loss": 1.7199, "step": 17272 }, { "epoch": 0.843408203125, "grad_norm": 0.19766536355018616, "learning_rate": 7.746426454148604e-05, "loss": 1.7278, "step": 17273 }, { "epoch": 0.84345703125, "grad_norm": 0.1763126701116562, "learning_rate": 7.74474964967079e-05, "loss": 1.7312, "step": 17274 }, { "epoch": 0.843505859375, "grad_norm": 0.1674671769142151, "learning_rate": 7.743073323977343e-05, "loss": 1.7342, "step": 17275 }, { "epoch": 0.8435546875, "grad_norm": 0.19167576730251312, "learning_rate": 7.74139747710888e-05, "loss": 1.7237, "step": 17276 }, { "epoch": 0.843603515625, "grad_norm": 0.1685066968202591, "learning_rate": 7.739722109106029e-05, "loss": 1.7188, "step": 17277 }, { "epoch": 0.84365234375, "grad_norm": 0.21304340660572052, "learning_rate": 7.738047220009385e-05, "loss": 1.7395, "step": 17278 }, { "epoch": 0.843701171875, "grad_norm": 0.18247002363204956, "learning_rate": 7.736372809859545e-05, "loss": 1.7427, "step": 17279 }, { "epoch": 0.84375, "grad_norm": 0.19836752116680145, "learning_rate": 7.734698878697085e-05, "loss": 1.7335, "step": 17280 }, { "epoch": 0.843798828125, "grad_norm": 0.18257693946361542, "learning_rate": 7.73302542656258e-05, "loss": 1.7356, "step": 17281 }, { "epoch": 0.84384765625, "grad_norm": 0.20205935835838318, "learning_rate": 7.731352453496585e-05, "loss": 1.7352, "step": 17282 }, { "epoch": 0.843896484375, "grad_norm": 0.19629183411598206, "learning_rate": 7.729679959539642e-05, "loss": 1.7262, "step": 17283 }, { "epoch": 0.8439453125, "grad_norm": 0.2060038447380066, "learning_rate": 7.728007944732289e-05, "loss": 1.7372, "step": 17284 }, { "epoch": 0.843994140625, "grad_norm": 0.19119583070278168, "learning_rate": 7.726336409115051e-05, "loss": 1.7355, "step": 17285 }, { "epoch": 0.84404296875, "grad_norm": 0.19985421001911163, "learning_rate": 7.724665352728433e-05, "loss": 1.7295, "step": 17286 }, { "epoch": 0.844091796875, "grad_norm": 0.18892769515514374, "learning_rate": 7.722994775612944e-05, "loss": 1.7359, "step": 17287 }, { "epoch": 0.844140625, "grad_norm": 0.18721115589141846, "learning_rate": 7.721324677809059e-05, "loss": 1.7208, "step": 17288 }, { "epoch": 0.844189453125, "grad_norm": 0.18226107954978943, "learning_rate": 7.719655059357263e-05, "loss": 1.7176, "step": 17289 }, { "epoch": 0.84423828125, "grad_norm": 0.1946883499622345, "learning_rate": 7.717985920298014e-05, "loss": 1.7286, "step": 17290 }, { "epoch": 0.844287109375, "grad_norm": 0.19640673696994781, "learning_rate": 7.716317260671772e-05, "loss": 1.7255, "step": 17291 }, { "epoch": 0.8443359375, "grad_norm": 0.22250546514987946, "learning_rate": 7.714649080518975e-05, "loss": 1.7345, "step": 17292 }, { "epoch": 0.844384765625, "grad_norm": 0.17313921451568604, "learning_rate": 7.712981379880052e-05, "loss": 1.7317, "step": 17293 }, { "epoch": 0.84443359375, "grad_norm": 0.23382128775119781, "learning_rate": 7.711314158795424e-05, "loss": 1.719, "step": 17294 }, { "epoch": 0.844482421875, "grad_norm": 0.18623898923397064, "learning_rate": 7.709647417305493e-05, "loss": 1.7092, "step": 17295 }, { "epoch": 0.84453125, "grad_norm": 0.17564833164215088, "learning_rate": 7.707981155450657e-05, "loss": 1.7311, "step": 17296 }, { "epoch": 0.844580078125, "grad_norm": 0.2427954226732254, "learning_rate": 7.706315373271299e-05, "loss": 1.7571, "step": 17297 }, { "epoch": 0.84462890625, "grad_norm": 0.16143594682216644, "learning_rate": 7.704650070807791e-05, "loss": 1.7156, "step": 17298 }, { "epoch": 0.844677734375, "grad_norm": 0.24821516871452332, "learning_rate": 7.702985248100492e-05, "loss": 1.7355, "step": 17299 }, { "epoch": 0.8447265625, "grad_norm": 0.18742266297340393, "learning_rate": 7.701320905189746e-05, "loss": 1.7461, "step": 17300 }, { "epoch": 0.844775390625, "grad_norm": 0.19148164987564087, "learning_rate": 7.699657042115896e-05, "loss": 1.75, "step": 17301 }, { "epoch": 0.84482421875, "grad_norm": 0.19263768196105957, "learning_rate": 7.697993658919267e-05, "loss": 1.7416, "step": 17302 }, { "epoch": 0.844873046875, "grad_norm": 0.17730595171451569, "learning_rate": 7.696330755640168e-05, "loss": 1.7235, "step": 17303 }, { "epoch": 0.844921875, "grad_norm": 0.2163456678390503, "learning_rate": 7.694668332318907e-05, "loss": 1.7345, "step": 17304 }, { "epoch": 0.844970703125, "grad_norm": 0.17875351011753082, "learning_rate": 7.693006388995767e-05, "loss": 1.6957, "step": 17305 }, { "epoch": 0.84501953125, "grad_norm": 0.20199675858020782, "learning_rate": 7.691344925711034e-05, "loss": 1.714, "step": 17306 }, { "epoch": 0.845068359375, "grad_norm": 0.1843039095401764, "learning_rate": 7.689683942504968e-05, "loss": 1.7537, "step": 17307 }, { "epoch": 0.8451171875, "grad_norm": 0.18540671467781067, "learning_rate": 7.688023439417828e-05, "loss": 1.7237, "step": 17308 }, { "epoch": 0.845166015625, "grad_norm": 0.19891870021820068, "learning_rate": 7.686363416489856e-05, "loss": 1.7386, "step": 17309 }, { "epoch": 0.84521484375, "grad_norm": 0.16983410716056824, "learning_rate": 7.684703873761284e-05, "loss": 1.7333, "step": 17310 }, { "epoch": 0.845263671875, "grad_norm": 0.17176209390163422, "learning_rate": 7.683044811272334e-05, "loss": 1.7144, "step": 17311 }, { "epoch": 0.8453125, "grad_norm": 0.17470164597034454, "learning_rate": 7.681386229063212e-05, "loss": 1.7434, "step": 17312 }, { "epoch": 0.845361328125, "grad_norm": 0.17046000063419342, "learning_rate": 7.679728127174117e-05, "loss": 1.7379, "step": 17313 }, { "epoch": 0.84541015625, "grad_norm": 0.1771945208311081, "learning_rate": 7.678070505645237e-05, "loss": 1.7359, "step": 17314 }, { "epoch": 0.845458984375, "grad_norm": 0.18460983037948608, "learning_rate": 7.676413364516741e-05, "loss": 1.7338, "step": 17315 }, { "epoch": 0.8455078125, "grad_norm": 0.19649957120418549, "learning_rate": 7.674756703828795e-05, "loss": 1.7178, "step": 17316 }, { "epoch": 0.845556640625, "grad_norm": 0.186112180352211, "learning_rate": 7.673100523621545e-05, "loss": 1.7489, "step": 17317 }, { "epoch": 0.84560546875, "grad_norm": 0.19770289957523346, "learning_rate": 7.671444823935134e-05, "loss": 1.7531, "step": 17318 }, { "epoch": 0.845654296875, "grad_norm": 0.19819749891757965, "learning_rate": 7.669789604809686e-05, "loss": 1.7262, "step": 17319 }, { "epoch": 0.845703125, "grad_norm": 0.18275193870067596, "learning_rate": 7.668134866285321e-05, "loss": 1.7027, "step": 17320 }, { "epoch": 0.845751953125, "grad_norm": 0.1934559941291809, "learning_rate": 7.666480608402137e-05, "loss": 1.7388, "step": 17321 }, { "epoch": 0.84580078125, "grad_norm": 0.18907080590724945, "learning_rate": 7.664826831200239e-05, "loss": 1.7371, "step": 17322 }, { "epoch": 0.845849609375, "grad_norm": 0.20747923851013184, "learning_rate": 7.663173534719686e-05, "loss": 1.7094, "step": 17323 }, { "epoch": 0.8458984375, "grad_norm": 0.23744571208953857, "learning_rate": 7.661520719000568e-05, "loss": 1.7317, "step": 17324 }, { "epoch": 0.845947265625, "grad_norm": 0.18366236984729767, "learning_rate": 7.659868384082926e-05, "loss": 1.7201, "step": 17325 }, { "epoch": 0.84599609375, "grad_norm": 0.18556295335292816, "learning_rate": 7.658216530006824e-05, "loss": 1.693, "step": 17326 }, { "epoch": 0.846044921875, "grad_norm": 0.21454282104969025, "learning_rate": 7.656565156812276e-05, "loss": 1.7176, "step": 17327 }, { "epoch": 0.84609375, "grad_norm": 0.1821679174900055, "learning_rate": 7.654914264539314e-05, "loss": 1.7329, "step": 17328 }, { "epoch": 0.846142578125, "grad_norm": 0.1911267340183258, "learning_rate": 7.653263853227949e-05, "loss": 1.7325, "step": 17329 }, { "epoch": 0.84619140625, "grad_norm": 0.21136359870433807, "learning_rate": 7.65161392291818e-05, "loss": 1.7452, "step": 17330 }, { "epoch": 0.846240234375, "grad_norm": 0.17498767375946045, "learning_rate": 7.649964473649993e-05, "loss": 1.75, "step": 17331 }, { "epoch": 0.8462890625, "grad_norm": 0.18421708047389984, "learning_rate": 7.648315505463365e-05, "loss": 1.7362, "step": 17332 }, { "epoch": 0.846337890625, "grad_norm": 0.18421578407287598, "learning_rate": 7.646667018398259e-05, "loss": 1.731, "step": 17333 }, { "epoch": 0.84638671875, "grad_norm": 0.17394134402275085, "learning_rate": 7.645019012494628e-05, "loss": 1.7223, "step": 17334 }, { "epoch": 0.846435546875, "grad_norm": 0.1764073520898819, "learning_rate": 7.643371487792413e-05, "loss": 1.7661, "step": 17335 }, { "epoch": 0.846484375, "grad_norm": 0.18317218124866486, "learning_rate": 7.641724444331543e-05, "loss": 1.7427, "step": 17336 }, { "epoch": 0.846533203125, "grad_norm": 0.18221743404865265, "learning_rate": 7.640077882151933e-05, "loss": 1.7385, "step": 17337 }, { "epoch": 0.84658203125, "grad_norm": 0.17369580268859863, "learning_rate": 7.63843180129349e-05, "loss": 1.7349, "step": 17338 }, { "epoch": 0.846630859375, "grad_norm": 0.21099711954593658, "learning_rate": 7.636786201796112e-05, "loss": 1.7327, "step": 17339 }, { "epoch": 0.8466796875, "grad_norm": 0.16678951680660248, "learning_rate": 7.635141083699682e-05, "loss": 1.6971, "step": 17340 }, { "epoch": 0.846728515625, "grad_norm": 0.20847319066524506, "learning_rate": 7.633496447044058e-05, "loss": 1.7103, "step": 17341 }, { "epoch": 0.84677734375, "grad_norm": 0.227696493268013, "learning_rate": 7.631852291869115e-05, "loss": 1.746, "step": 17342 }, { "epoch": 0.846826171875, "grad_norm": 0.19294045865535736, "learning_rate": 7.630208618214687e-05, "loss": 1.7317, "step": 17343 }, { "epoch": 0.846875, "grad_norm": 0.2457706183195114, "learning_rate": 7.628565426120627e-05, "loss": 1.7062, "step": 17344 }, { "epoch": 0.846923828125, "grad_norm": 0.2005961388349533, "learning_rate": 7.626922715626736e-05, "loss": 1.7377, "step": 17345 }, { "epoch": 0.84697265625, "grad_norm": 0.24034510552883148, "learning_rate": 7.62528048677285e-05, "loss": 1.7222, "step": 17346 }, { "epoch": 0.847021484375, "grad_norm": 0.19382326304912567, "learning_rate": 7.623638739598754e-05, "loss": 1.7147, "step": 17347 }, { "epoch": 0.8470703125, "grad_norm": 0.227453351020813, "learning_rate": 7.62199747414424e-05, "loss": 1.7411, "step": 17348 }, { "epoch": 0.847119140625, "grad_norm": 0.20189876854419708, "learning_rate": 7.620356690449087e-05, "loss": 1.7382, "step": 17349 }, { "epoch": 0.84716796875, "grad_norm": 0.22535957396030426, "learning_rate": 7.618716388553063e-05, "loss": 1.7185, "step": 17350 }, { "epoch": 0.847216796875, "grad_norm": 0.20317329466342926, "learning_rate": 7.617076568495917e-05, "loss": 1.7264, "step": 17351 }, { "epoch": 0.847265625, "grad_norm": 0.1881062537431717, "learning_rate": 7.615437230317397e-05, "loss": 1.7326, "step": 17352 }, { "epoch": 0.847314453125, "grad_norm": 0.21405626833438873, "learning_rate": 7.61379837405723e-05, "loss": 1.7233, "step": 17353 }, { "epoch": 0.84736328125, "grad_norm": 0.1984485387802124, "learning_rate": 7.612159999755133e-05, "loss": 1.71, "step": 17354 }, { "epoch": 0.847412109375, "grad_norm": 0.20493370294570923, "learning_rate": 7.61052210745082e-05, "loss": 1.7321, "step": 17355 }, { "epoch": 0.8474609375, "grad_norm": 0.22463180124759674, "learning_rate": 7.608884697183987e-05, "loss": 1.7172, "step": 17356 }, { "epoch": 0.847509765625, "grad_norm": 0.16992433369159698, "learning_rate": 7.607247768994302e-05, "loss": 1.7141, "step": 17357 }, { "epoch": 0.84755859375, "grad_norm": 0.2466733455657959, "learning_rate": 7.60561132292146e-05, "loss": 1.7373, "step": 17358 }, { "epoch": 0.847607421875, "grad_norm": 0.16967423260211945, "learning_rate": 7.6039753590051e-05, "loss": 1.7202, "step": 17359 }, { "epoch": 0.84765625, "grad_norm": 0.2161078006029129, "learning_rate": 7.602339877284892e-05, "loss": 1.7329, "step": 17360 }, { "epoch": 0.847705078125, "grad_norm": 0.1871550977230072, "learning_rate": 7.600704877800454e-05, "loss": 1.7365, "step": 17361 }, { "epoch": 0.84775390625, "grad_norm": 0.2242741733789444, "learning_rate": 7.599070360591429e-05, "loss": 1.7316, "step": 17362 }, { "epoch": 0.847802734375, "grad_norm": 0.19800256192684174, "learning_rate": 7.59743632569741e-05, "loss": 1.7487, "step": 17363 }, { "epoch": 0.8478515625, "grad_norm": 0.2054646760225296, "learning_rate": 7.595802773158022e-05, "loss": 1.7139, "step": 17364 }, { "epoch": 0.847900390625, "grad_norm": 0.1951770782470703, "learning_rate": 7.594169703012844e-05, "loss": 1.736, "step": 17365 }, { "epoch": 0.84794921875, "grad_norm": 0.22532223165035248, "learning_rate": 7.592537115301451e-05, "loss": 1.748, "step": 17366 }, { "epoch": 0.847998046875, "grad_norm": 0.2155442237854004, "learning_rate": 7.590905010063418e-05, "loss": 1.749, "step": 17367 }, { "epoch": 0.848046875, "grad_norm": 0.21432289481163025, "learning_rate": 7.589273387338293e-05, "loss": 1.745, "step": 17368 }, { "epoch": 0.848095703125, "grad_norm": 0.16838112473487854, "learning_rate": 7.587642247165624e-05, "loss": 1.7357, "step": 17369 }, { "epoch": 0.84814453125, "grad_norm": 0.24322089552879333, "learning_rate": 7.586011589584944e-05, "loss": 1.7289, "step": 17370 }, { "epoch": 0.848193359375, "grad_norm": 0.17854022979736328, "learning_rate": 7.584381414635772e-05, "loss": 1.7093, "step": 17371 }, { "epoch": 0.8482421875, "grad_norm": 0.19124852120876312, "learning_rate": 7.582751722357616e-05, "loss": 1.7149, "step": 17372 }, { "epoch": 0.848291015625, "grad_norm": 0.20702311396598816, "learning_rate": 7.581122512789973e-05, "loss": 1.7288, "step": 17373 }, { "epoch": 0.84833984375, "grad_norm": 0.1995590180158615, "learning_rate": 7.579493785972333e-05, "loss": 1.7285, "step": 17374 }, { "epoch": 0.848388671875, "grad_norm": 0.22672493755817413, "learning_rate": 7.577865541944157e-05, "loss": 1.7171, "step": 17375 }, { "epoch": 0.8484375, "grad_norm": 0.2371075302362442, "learning_rate": 7.576237780744924e-05, "loss": 1.7407, "step": 17376 }, { "epoch": 0.848486328125, "grad_norm": 0.19285787642002106, "learning_rate": 7.574610502414065e-05, "loss": 1.6998, "step": 17377 }, { "epoch": 0.84853515625, "grad_norm": 0.22473908960819244, "learning_rate": 7.572983706991038e-05, "loss": 1.7332, "step": 17378 }, { "epoch": 0.848583984375, "grad_norm": 0.18922069668769836, "learning_rate": 7.57135739451525e-05, "loss": 1.7179, "step": 17379 }, { "epoch": 0.8486328125, "grad_norm": 0.1974305957555771, "learning_rate": 7.569731565026136e-05, "loss": 1.7267, "step": 17380 }, { "epoch": 0.848681640625, "grad_norm": 0.20300503075122833, "learning_rate": 7.568106218563085e-05, "loss": 1.7486, "step": 17381 }, { "epoch": 0.84873046875, "grad_norm": 0.17374317348003387, "learning_rate": 7.566481355165494e-05, "loss": 1.735, "step": 17382 }, { "epoch": 0.848779296875, "grad_norm": 0.1938909888267517, "learning_rate": 7.564856974872738e-05, "loss": 1.7214, "step": 17383 }, { "epoch": 0.848828125, "grad_norm": 0.19325952231884003, "learning_rate": 7.563233077724194e-05, "loss": 1.7558, "step": 17384 }, { "epoch": 0.848876953125, "grad_norm": 0.1920120269060135, "learning_rate": 7.561609663759208e-05, "loss": 1.7417, "step": 17385 }, { "epoch": 0.84892578125, "grad_norm": 0.19324085116386414, "learning_rate": 7.559986733017135e-05, "loss": 1.7662, "step": 17386 }, { "epoch": 0.848974609375, "grad_norm": 0.20740634202957153, "learning_rate": 7.5583642855373e-05, "loss": 1.7287, "step": 17387 }, { "epoch": 0.8490234375, "grad_norm": 0.17807014286518097, "learning_rate": 7.556742321359027e-05, "loss": 1.7426, "step": 17388 }, { "epoch": 0.849072265625, "grad_norm": 0.19997096061706543, "learning_rate": 7.555120840521628e-05, "loss": 1.722, "step": 17389 }, { "epoch": 0.84912109375, "grad_norm": 0.20472079515457153, "learning_rate": 7.553499843064405e-05, "loss": 1.7477, "step": 17390 }, { "epoch": 0.849169921875, "grad_norm": 0.18036676943302155, "learning_rate": 7.551879329026626e-05, "loss": 1.7058, "step": 17391 }, { "epoch": 0.84921875, "grad_norm": 0.2258678376674652, "learning_rate": 7.550259298447589e-05, "loss": 1.7306, "step": 17392 }, { "epoch": 0.849267578125, "grad_norm": 0.18979741632938385, "learning_rate": 7.548639751366536e-05, "loss": 1.7425, "step": 17393 }, { "epoch": 0.84931640625, "grad_norm": 0.20965921878814697, "learning_rate": 7.547020687822735e-05, "loss": 1.7312, "step": 17394 }, { "epoch": 0.849365234375, "grad_norm": 0.18831244111061096, "learning_rate": 7.545402107855409e-05, "loss": 1.7235, "step": 17395 }, { "epoch": 0.8494140625, "grad_norm": 0.21453580260276794, "learning_rate": 7.543784011503805e-05, "loss": 1.7139, "step": 17396 }, { "epoch": 0.849462890625, "grad_norm": 0.17709821462631226, "learning_rate": 7.542166398807119e-05, "loss": 1.7272, "step": 17397 }, { "epoch": 0.84951171875, "grad_norm": 0.20504635572433472, "learning_rate": 7.54054926980457e-05, "loss": 1.7367, "step": 17398 }, { "epoch": 0.849560546875, "grad_norm": 0.19922573864459991, "learning_rate": 7.538932624535342e-05, "loss": 1.733, "step": 17399 }, { "epoch": 0.849609375, "grad_norm": 0.19349807500839233, "learning_rate": 7.537316463038619e-05, "loss": 1.7306, "step": 17400 }, { "epoch": 0.849658203125, "grad_norm": 0.1920558214187622, "learning_rate": 7.535700785353568e-05, "loss": 1.7322, "step": 17401 }, { "epoch": 0.84970703125, "grad_norm": 0.20176607370376587, "learning_rate": 7.534085591519349e-05, "loss": 1.734, "step": 17402 }, { "epoch": 0.849755859375, "grad_norm": 0.19706174731254578, "learning_rate": 7.532470881575106e-05, "loss": 1.7392, "step": 17403 }, { "epoch": 0.8498046875, "grad_norm": 0.19313251972198486, "learning_rate": 7.530856655559972e-05, "loss": 1.7313, "step": 17404 }, { "epoch": 0.849853515625, "grad_norm": 0.20153450965881348, "learning_rate": 7.529242913513071e-05, "loss": 1.7148, "step": 17405 }, { "epoch": 0.84990234375, "grad_norm": 0.16987422108650208, "learning_rate": 7.527629655473517e-05, "loss": 1.7263, "step": 17406 }, { "epoch": 0.849951171875, "grad_norm": 0.1729429066181183, "learning_rate": 7.526016881480394e-05, "loss": 1.7367, "step": 17407 }, { "epoch": 0.85, "grad_norm": 0.1783575415611267, "learning_rate": 7.524404591572809e-05, "loss": 1.7069, "step": 17408 }, { "epoch": 0.850048828125, "grad_norm": 0.17372193932533264, "learning_rate": 7.522792785789817e-05, "loss": 1.7205, "step": 17409 }, { "epoch": 0.85009765625, "grad_norm": 0.167855367064476, "learning_rate": 7.5211814641705e-05, "loss": 1.7326, "step": 17410 }, { "epoch": 0.850146484375, "grad_norm": 0.19500961899757385, "learning_rate": 7.519570626753892e-05, "loss": 1.726, "step": 17411 }, { "epoch": 0.8501953125, "grad_norm": 0.17566987872123718, "learning_rate": 7.517960273579052e-05, "loss": 1.7204, "step": 17412 }, { "epoch": 0.850244140625, "grad_norm": 0.19048933684825897, "learning_rate": 7.51635040468499e-05, "loss": 1.7372, "step": 17413 }, { "epoch": 0.85029296875, "grad_norm": 0.178767129778862, "learning_rate": 7.514741020110738e-05, "loss": 1.7171, "step": 17414 }, { "epoch": 0.850341796875, "grad_norm": 0.17206718027591705, "learning_rate": 7.513132119895289e-05, "loss": 1.716, "step": 17415 }, { "epoch": 0.850390625, "grad_norm": 0.1943594217300415, "learning_rate": 7.51152370407764e-05, "loss": 1.7221, "step": 17416 }, { "epoch": 0.850439453125, "grad_norm": 0.16627764701843262, "learning_rate": 7.509915772696775e-05, "loss": 1.7432, "step": 17417 }, { "epoch": 0.85048828125, "grad_norm": 0.18645602464675903, "learning_rate": 7.50830832579166e-05, "loss": 1.7457, "step": 17418 }, { "epoch": 0.850537109375, "grad_norm": 0.20910054445266724, "learning_rate": 7.506701363401253e-05, "loss": 1.7305, "step": 17419 }, { "epoch": 0.8505859375, "grad_norm": 0.18872365355491638, "learning_rate": 7.505094885564501e-05, "loss": 1.7189, "step": 17420 }, { "epoch": 0.850634765625, "grad_norm": 0.21309378743171692, "learning_rate": 7.503488892320336e-05, "loss": 1.7488, "step": 17421 }, { "epoch": 0.85068359375, "grad_norm": 0.18123690783977509, "learning_rate": 7.501883383707688e-05, "loss": 1.7444, "step": 17422 }, { "epoch": 0.850732421875, "grad_norm": 0.20743386447429657, "learning_rate": 7.500278359765458e-05, "loss": 1.7163, "step": 17423 }, { "epoch": 0.85078125, "grad_norm": 0.19964465498924255, "learning_rate": 7.498673820532557e-05, "loss": 1.7153, "step": 17424 }, { "epoch": 0.850830078125, "grad_norm": 0.20175443589687347, "learning_rate": 7.497069766047854e-05, "loss": 1.7197, "step": 17425 }, { "epoch": 0.85087890625, "grad_norm": 0.22199948132038116, "learning_rate": 7.495466196350245e-05, "loss": 1.724, "step": 17426 }, { "epoch": 0.850927734375, "grad_norm": 0.1756700575351715, "learning_rate": 7.493863111478574e-05, "loss": 1.7138, "step": 17427 }, { "epoch": 0.8509765625, "grad_norm": 0.20718970894813538, "learning_rate": 7.492260511471715e-05, "loss": 1.7308, "step": 17428 }, { "epoch": 0.851025390625, "grad_norm": 0.17237405478954315, "learning_rate": 7.490658396368485e-05, "loss": 1.7226, "step": 17429 }, { "epoch": 0.85107421875, "grad_norm": 0.22715511918067932, "learning_rate": 7.489056766207735e-05, "loss": 1.7312, "step": 17430 }, { "epoch": 0.851123046875, "grad_norm": 0.18879617750644684, "learning_rate": 7.48745562102826e-05, "loss": 1.7179, "step": 17431 }, { "epoch": 0.851171875, "grad_norm": 0.23126965761184692, "learning_rate": 7.485854960868885e-05, "loss": 1.7568, "step": 17432 }, { "epoch": 0.851220703125, "grad_norm": 0.2119106501340866, "learning_rate": 7.484254785768391e-05, "loss": 1.7334, "step": 17433 }, { "epoch": 0.85126953125, "grad_norm": 0.172904372215271, "learning_rate": 7.482655095765563e-05, "loss": 1.7221, "step": 17434 }, { "epoch": 0.851318359375, "grad_norm": 0.22437041997909546, "learning_rate": 7.481055890899169e-05, "loss": 1.729, "step": 17435 }, { "epoch": 0.8513671875, "grad_norm": 0.192928746342659, "learning_rate": 7.47945717120797e-05, "loss": 1.7328, "step": 17436 }, { "epoch": 0.851416015625, "grad_norm": 0.21075361967086792, "learning_rate": 7.47785893673071e-05, "loss": 1.7434, "step": 17437 }, { "epoch": 0.85146484375, "grad_norm": 0.18077494204044342, "learning_rate": 7.476261187506125e-05, "loss": 1.704, "step": 17438 }, { "epoch": 0.851513671875, "grad_norm": 0.19211775064468384, "learning_rate": 7.474663923572936e-05, "loss": 1.7347, "step": 17439 }, { "epoch": 0.8515625, "grad_norm": 0.2031562328338623, "learning_rate": 7.473067144969861e-05, "loss": 1.6969, "step": 17440 }, { "epoch": 0.851611328125, "grad_norm": 0.17854580283164978, "learning_rate": 7.471470851735583e-05, "loss": 1.7294, "step": 17441 }, { "epoch": 0.85166015625, "grad_norm": 0.22817599773406982, "learning_rate": 7.469875043908808e-05, "loss": 1.7393, "step": 17442 }, { "epoch": 0.851708984375, "grad_norm": 0.16473793983459473, "learning_rate": 7.468279721528198e-05, "loss": 1.7233, "step": 17443 }, { "epoch": 0.8517578125, "grad_norm": 0.19940125942230225, "learning_rate": 7.466684884632425e-05, "loss": 1.7282, "step": 17444 }, { "epoch": 0.851806640625, "grad_norm": 0.19663597643375397, "learning_rate": 7.465090533260132e-05, "loss": 1.7185, "step": 17445 }, { "epoch": 0.85185546875, "grad_norm": 0.18218229711055756, "learning_rate": 7.463496667449978e-05, "loss": 1.716, "step": 17446 }, { "epoch": 0.851904296875, "grad_norm": 0.19524089992046356, "learning_rate": 7.46190328724057e-05, "loss": 1.7605, "step": 17447 }, { "epoch": 0.851953125, "grad_norm": 0.18326592445373535, "learning_rate": 7.460310392670541e-05, "loss": 1.7197, "step": 17448 }, { "epoch": 0.852001953125, "grad_norm": 0.17000208795070648, "learning_rate": 7.458717983778485e-05, "loss": 1.7285, "step": 17449 }, { "epoch": 0.85205078125, "grad_norm": 0.18377690017223358, "learning_rate": 7.457126060602998e-05, "loss": 1.702, "step": 17450 }, { "epoch": 0.852099609375, "grad_norm": 0.1676035076379776, "learning_rate": 7.455534623182666e-05, "loss": 1.7001, "step": 17451 }, { "epoch": 0.8521484375, "grad_norm": 0.19521594047546387, "learning_rate": 7.453943671556056e-05, "loss": 1.7423, "step": 17452 }, { "epoch": 0.852197265625, "grad_norm": 0.18365752696990967, "learning_rate": 7.452353205761725e-05, "loss": 1.7272, "step": 17453 }, { "epoch": 0.85224609375, "grad_norm": 0.1838628202676773, "learning_rate": 7.450763225838222e-05, "loss": 1.7247, "step": 17454 }, { "epoch": 0.852294921875, "grad_norm": 0.20158173143863678, "learning_rate": 7.449173731824077e-05, "loss": 1.7536, "step": 17455 }, { "epoch": 0.85234375, "grad_norm": 0.18481358885765076, "learning_rate": 7.447584723757814e-05, "loss": 1.7177, "step": 17456 }, { "epoch": 0.852392578125, "grad_norm": 0.18913981318473816, "learning_rate": 7.445996201677948e-05, "loss": 1.7188, "step": 17457 }, { "epoch": 0.85244140625, "grad_norm": 0.1654644012451172, "learning_rate": 7.44440816562298e-05, "loss": 1.7161, "step": 17458 }, { "epoch": 0.852490234375, "grad_norm": 0.24361926317214966, "learning_rate": 7.442820615631381e-05, "loss": 1.7467, "step": 17459 }, { "epoch": 0.8525390625, "grad_norm": 0.17957612872123718, "learning_rate": 7.441233551741651e-05, "loss": 1.7191, "step": 17460 }, { "epoch": 0.852587890625, "grad_norm": 0.21319466829299927, "learning_rate": 7.439646973992229e-05, "loss": 1.7256, "step": 17461 }, { "epoch": 0.85263671875, "grad_norm": 0.18069154024124146, "learning_rate": 7.43806088242159e-05, "loss": 1.7232, "step": 17462 }, { "epoch": 0.852685546875, "grad_norm": 0.20033049583435059, "learning_rate": 7.436475277068152e-05, "loss": 1.7257, "step": 17463 }, { "epoch": 0.852734375, "grad_norm": 0.1947268694639206, "learning_rate": 7.434890157970365e-05, "loss": 1.7171, "step": 17464 }, { "epoch": 0.852783203125, "grad_norm": 0.204610213637352, "learning_rate": 7.433305525166629e-05, "loss": 1.7184, "step": 17465 }, { "epoch": 0.85283203125, "grad_norm": 0.18615663051605225, "learning_rate": 7.431721378695355e-05, "loss": 1.7271, "step": 17466 }, { "epoch": 0.852880859375, "grad_norm": 0.2086789309978485, "learning_rate": 7.430137718594939e-05, "loss": 1.7251, "step": 17467 }, { "epoch": 0.8529296875, "grad_norm": 0.19308742880821228, "learning_rate": 7.428554544903756e-05, "loss": 1.7118, "step": 17468 }, { "epoch": 0.852978515625, "grad_norm": 0.16792544722557068, "learning_rate": 7.426971857660182e-05, "loss": 1.7336, "step": 17469 }, { "epoch": 0.85302734375, "grad_norm": 0.1967795342206955, "learning_rate": 7.42538965690257e-05, "loss": 1.7218, "step": 17470 }, { "epoch": 0.853076171875, "grad_norm": 0.18146257102489471, "learning_rate": 7.423807942669267e-05, "loss": 1.7173, "step": 17471 }, { "epoch": 0.853125, "grad_norm": 0.19230353832244873, "learning_rate": 7.422226714998607e-05, "loss": 1.7305, "step": 17472 }, { "epoch": 0.853173828125, "grad_norm": 0.17517827451229095, "learning_rate": 7.420645973928912e-05, "loss": 1.731, "step": 17473 }, { "epoch": 0.85322265625, "grad_norm": 0.17545455694198608, "learning_rate": 7.4190657194985e-05, "loss": 1.7055, "step": 17474 }, { "epoch": 0.853271484375, "grad_norm": 0.20816318690776825, "learning_rate": 7.417485951745656e-05, "loss": 1.7313, "step": 17475 }, { "epoch": 0.8533203125, "grad_norm": 0.1722179651260376, "learning_rate": 7.41590667070868e-05, "loss": 1.7338, "step": 17476 }, { "epoch": 0.853369140625, "grad_norm": 0.2189646065235138, "learning_rate": 7.414327876425834e-05, "loss": 1.7227, "step": 17477 }, { "epoch": 0.85341796875, "grad_norm": 0.17746715247631073, "learning_rate": 7.412749568935395e-05, "loss": 1.7473, "step": 17478 }, { "epoch": 0.853466796875, "grad_norm": 0.17334389686584473, "learning_rate": 7.411171748275602e-05, "loss": 1.7572, "step": 17479 }, { "epoch": 0.853515625, "grad_norm": 0.19798345863819122, "learning_rate": 7.409594414484709e-05, "loss": 1.7436, "step": 17480 }, { "epoch": 0.853564453125, "grad_norm": 0.19216695427894592, "learning_rate": 7.408017567600924e-05, "loss": 1.7152, "step": 17481 }, { "epoch": 0.85361328125, "grad_norm": 0.16895891726016998, "learning_rate": 7.406441207662487e-05, "loss": 1.7191, "step": 17482 }, { "epoch": 0.853662109375, "grad_norm": 0.1929677128791809, "learning_rate": 7.404865334707586e-05, "loss": 1.7185, "step": 17483 }, { "epoch": 0.8537109375, "grad_norm": 0.17658214271068573, "learning_rate": 7.403289948774417e-05, "loss": 1.7059, "step": 17484 }, { "epoch": 0.853759765625, "grad_norm": 0.19498544931411743, "learning_rate": 7.401715049901163e-05, "loss": 1.723, "step": 17485 }, { "epoch": 0.85380859375, "grad_norm": 0.19612812995910645, "learning_rate": 7.40014063812599e-05, "loss": 1.7274, "step": 17486 }, { "epoch": 0.853857421875, "grad_norm": 0.19162949919700623, "learning_rate": 7.398566713487057e-05, "loss": 1.7361, "step": 17487 }, { "epoch": 0.85390625, "grad_norm": 0.20131775736808777, "learning_rate": 7.396993276022509e-05, "loss": 1.733, "step": 17488 }, { "epoch": 0.853955078125, "grad_norm": 0.16798952221870422, "learning_rate": 7.39542032577048e-05, "loss": 1.742, "step": 17489 }, { "epoch": 0.85400390625, "grad_norm": 0.2260064035654068, "learning_rate": 7.39384786276909e-05, "loss": 1.7066, "step": 17490 }, { "epoch": 0.854052734375, "grad_norm": 0.20814630389213562, "learning_rate": 7.392275887056452e-05, "loss": 1.7464, "step": 17491 }, { "epoch": 0.8541015625, "grad_norm": 0.20171025395393372, "learning_rate": 7.390704398670665e-05, "loss": 1.7138, "step": 17492 }, { "epoch": 0.854150390625, "grad_norm": 0.27269214391708374, "learning_rate": 7.389133397649806e-05, "loss": 1.7202, "step": 17493 }, { "epoch": 0.85419921875, "grad_norm": 0.17354463040828705, "learning_rate": 7.387562884031964e-05, "loss": 1.7292, "step": 17494 }, { "epoch": 0.854248046875, "grad_norm": 0.23957033455371857, "learning_rate": 7.38599285785518e-05, "loss": 1.7044, "step": 17495 }, { "epoch": 0.854296875, "grad_norm": 0.19714701175689697, "learning_rate": 7.384423319157534e-05, "loss": 1.7235, "step": 17496 }, { "epoch": 0.854345703125, "grad_norm": 0.19513726234436035, "learning_rate": 7.382854267977038e-05, "loss": 1.7058, "step": 17497 }, { "epoch": 0.85439453125, "grad_norm": 0.24053972959518433, "learning_rate": 7.38128570435174e-05, "loss": 1.7395, "step": 17498 }, { "epoch": 0.854443359375, "grad_norm": 0.18098516762256622, "learning_rate": 7.379717628319641e-05, "loss": 1.7332, "step": 17499 }, { "epoch": 0.8544921875, "grad_norm": 0.21133798360824585, "learning_rate": 7.378150039918752e-05, "loss": 1.7283, "step": 17500 }, { "epoch": 0.854541015625, "grad_norm": 0.19986693561077118, "learning_rate": 7.37658293918706e-05, "loss": 1.7386, "step": 17501 }, { "epoch": 0.85458984375, "grad_norm": 0.21003291010856628, "learning_rate": 7.375016326162547e-05, "loss": 1.7268, "step": 17502 }, { "epoch": 0.854638671875, "grad_norm": 0.2002452164888382, "learning_rate": 7.373450200883185e-05, "loss": 1.7433, "step": 17503 }, { "epoch": 0.8546875, "grad_norm": 0.1967248171567917, "learning_rate": 7.371884563386925e-05, "loss": 1.7469, "step": 17504 }, { "epoch": 0.854736328125, "grad_norm": 0.2003423273563385, "learning_rate": 7.370319413711713e-05, "loss": 1.7288, "step": 17505 }, { "epoch": 0.85478515625, "grad_norm": 0.19041259586811066, "learning_rate": 7.36875475189548e-05, "loss": 1.7349, "step": 17506 }, { "epoch": 0.854833984375, "grad_norm": 0.20446716248989105, "learning_rate": 7.367190577976152e-05, "loss": 1.7507, "step": 17507 }, { "epoch": 0.8548828125, "grad_norm": 0.20588171482086182, "learning_rate": 7.365626891991639e-05, "loss": 1.726, "step": 17508 }, { "epoch": 0.854931640625, "grad_norm": 0.21996808052062988, "learning_rate": 7.364063693979822e-05, "loss": 1.7238, "step": 17509 }, { "epoch": 0.85498046875, "grad_norm": 0.16883216798305511, "learning_rate": 7.36250098397861e-05, "loss": 1.7132, "step": 17510 }, { "epoch": 0.855029296875, "grad_norm": 0.22522246837615967, "learning_rate": 7.360938762025857e-05, "loss": 1.7198, "step": 17511 }, { "epoch": 0.855078125, "grad_norm": 0.1623651385307312, "learning_rate": 7.359377028159441e-05, "loss": 1.7469, "step": 17512 }, { "epoch": 0.855126953125, "grad_norm": 0.19509965181350708, "learning_rate": 7.357815782417194e-05, "loss": 1.744, "step": 17513 }, { "epoch": 0.85517578125, "grad_norm": 0.19334258139133453, "learning_rate": 7.356255024836973e-05, "loss": 1.7123, "step": 17514 }, { "epoch": 0.855224609375, "grad_norm": 0.2090274542570114, "learning_rate": 7.354694755456583e-05, "loss": 1.724, "step": 17515 }, { "epoch": 0.8552734375, "grad_norm": 0.20149491727352142, "learning_rate": 7.353134974313864e-05, "loss": 1.7385, "step": 17516 }, { "epoch": 0.855322265625, "grad_norm": 0.24860498309135437, "learning_rate": 7.351575681446599e-05, "loss": 1.7067, "step": 17517 }, { "epoch": 0.85537109375, "grad_norm": 0.1899438500404358, "learning_rate": 7.350016876892582e-05, "loss": 1.728, "step": 17518 }, { "epoch": 0.855419921875, "grad_norm": 0.2139921337366104, "learning_rate": 7.348458560689596e-05, "loss": 1.7389, "step": 17519 }, { "epoch": 0.85546875, "grad_norm": 0.18405821919441223, "learning_rate": 7.346900732875403e-05, "loss": 1.7382, "step": 17520 }, { "epoch": 0.855517578125, "grad_norm": 0.21140500903129578, "learning_rate": 7.345343393487766e-05, "loss": 1.7338, "step": 17521 }, { "epoch": 0.85556640625, "grad_norm": 0.20441706478595734, "learning_rate": 7.343786542564421e-05, "loss": 1.7258, "step": 17522 }, { "epoch": 0.855615234375, "grad_norm": 0.21410414576530457, "learning_rate": 7.342230180143104e-05, "loss": 1.7286, "step": 17523 }, { "epoch": 0.8556640625, "grad_norm": 0.19003915786743164, "learning_rate": 7.340674306261535e-05, "loss": 1.7433, "step": 17524 }, { "epoch": 0.855712890625, "grad_norm": 0.19567584991455078, "learning_rate": 7.339118920957417e-05, "loss": 1.7253, "step": 17525 }, { "epoch": 0.85576171875, "grad_norm": 0.18098250031471252, "learning_rate": 7.33756402426845e-05, "loss": 1.7108, "step": 17526 }, { "epoch": 0.855810546875, "grad_norm": 0.1825166940689087, "learning_rate": 7.336009616232317e-05, "loss": 1.7386, "step": 17527 }, { "epoch": 0.855859375, "grad_norm": 0.19591431319713593, "learning_rate": 7.334455696886696e-05, "loss": 1.7451, "step": 17528 }, { "epoch": 0.855908203125, "grad_norm": 0.19670003652572632, "learning_rate": 7.332902266269234e-05, "loss": 1.728, "step": 17529 }, { "epoch": 0.85595703125, "grad_norm": 0.1824454516172409, "learning_rate": 7.331349324417594e-05, "loss": 1.7349, "step": 17530 }, { "epoch": 0.856005859375, "grad_norm": 0.19118793308734894, "learning_rate": 7.329796871369399e-05, "loss": 1.7027, "step": 17531 }, { "epoch": 0.8560546875, "grad_norm": 0.18614646792411804, "learning_rate": 7.328244907162289e-05, "loss": 1.7224, "step": 17532 }, { "epoch": 0.856103515625, "grad_norm": 0.19116592407226562, "learning_rate": 7.326693431833864e-05, "loss": 1.7298, "step": 17533 }, { "epoch": 0.85615234375, "grad_norm": 0.17146313190460205, "learning_rate": 7.325142445421735e-05, "loss": 1.7157, "step": 17534 }, { "epoch": 0.856201171875, "grad_norm": 0.19860707223415375, "learning_rate": 7.323591947963485e-05, "loss": 1.7533, "step": 17535 }, { "epoch": 0.85625, "grad_norm": 0.16285517811775208, "learning_rate": 7.322041939496694e-05, "loss": 1.7305, "step": 17536 }, { "epoch": 0.856298828125, "grad_norm": 0.1749168336391449, "learning_rate": 7.320492420058924e-05, "loss": 1.7116, "step": 17537 }, { "epoch": 0.85634765625, "grad_norm": 0.18915040791034698, "learning_rate": 7.318943389687738e-05, "loss": 1.7236, "step": 17538 }, { "epoch": 0.856396484375, "grad_norm": 0.18497033417224884, "learning_rate": 7.317394848420669e-05, "loss": 1.7236, "step": 17539 }, { "epoch": 0.8564453125, "grad_norm": 0.18925443291664124, "learning_rate": 7.315846796295251e-05, "loss": 1.7506, "step": 17540 }, { "epoch": 0.856494140625, "grad_norm": 0.16490086913108826, "learning_rate": 7.314299233349e-05, "loss": 1.718, "step": 17541 }, { "epoch": 0.85654296875, "grad_norm": 0.19448910653591156, "learning_rate": 7.312752159619425e-05, "loss": 1.7367, "step": 17542 }, { "epoch": 0.856591796875, "grad_norm": 0.18859373033046722, "learning_rate": 7.31120557514402e-05, "loss": 1.7463, "step": 17543 }, { "epoch": 0.856640625, "grad_norm": 0.17740558087825775, "learning_rate": 7.309659479960268e-05, "loss": 1.7459, "step": 17544 }, { "epoch": 0.856689453125, "grad_norm": 0.18955101072788239, "learning_rate": 7.30811387410564e-05, "loss": 1.7208, "step": 17545 }, { "epoch": 0.85673828125, "grad_norm": 0.17355142533779144, "learning_rate": 7.306568757617593e-05, "loss": 1.7354, "step": 17546 }, { "epoch": 0.856787109375, "grad_norm": 0.1757836788892746, "learning_rate": 7.305024130533575e-05, "loss": 1.6949, "step": 17547 }, { "epoch": 0.8568359375, "grad_norm": 0.18096929788589478, "learning_rate": 7.303479992891027e-05, "loss": 1.7152, "step": 17548 }, { "epoch": 0.856884765625, "grad_norm": 0.17842623591423035, "learning_rate": 7.301936344727358e-05, "loss": 1.7366, "step": 17549 }, { "epoch": 0.85693359375, "grad_norm": 0.22322306036949158, "learning_rate": 7.300393186079997e-05, "loss": 1.7322, "step": 17550 }, { "epoch": 0.856982421875, "grad_norm": 0.18367597460746765, "learning_rate": 7.298850516986331e-05, "loss": 1.7303, "step": 17551 }, { "epoch": 0.85703125, "grad_norm": 0.20814229547977448, "learning_rate": 7.297308337483753e-05, "loss": 1.7433, "step": 17552 }, { "epoch": 0.857080078125, "grad_norm": 0.20548053085803986, "learning_rate": 7.295766647609636e-05, "loss": 1.732, "step": 17553 }, { "epoch": 0.85712890625, "grad_norm": 0.20345984399318695, "learning_rate": 7.294225447401348e-05, "loss": 1.7315, "step": 17554 }, { "epoch": 0.857177734375, "grad_norm": 0.20660828053951263, "learning_rate": 7.292684736896239e-05, "loss": 1.7318, "step": 17555 }, { "epoch": 0.8572265625, "grad_norm": 0.17857471108436584, "learning_rate": 7.291144516131651e-05, "loss": 1.7439, "step": 17556 }, { "epoch": 0.857275390625, "grad_norm": 0.2099241316318512, "learning_rate": 7.28960478514491e-05, "loss": 1.7104, "step": 17557 }, { "epoch": 0.85732421875, "grad_norm": 0.1991538107395172, "learning_rate": 7.288065543973335e-05, "loss": 1.7309, "step": 17558 }, { "epoch": 0.857373046875, "grad_norm": 0.2102830708026886, "learning_rate": 7.286526792654227e-05, "loss": 1.7281, "step": 17559 }, { "epoch": 0.857421875, "grad_norm": 0.21048985421657562, "learning_rate": 7.284988531224884e-05, "loss": 1.731, "step": 17560 }, { "epoch": 0.857470703125, "grad_norm": 0.18582770228385925, "learning_rate": 7.283450759722585e-05, "loss": 1.7305, "step": 17561 }, { "epoch": 0.85751953125, "grad_norm": 0.1939193606376648, "learning_rate": 7.281913478184597e-05, "loss": 1.7235, "step": 17562 }, { "epoch": 0.857568359375, "grad_norm": 0.21601472795009613, "learning_rate": 7.28037668664818e-05, "loss": 1.7151, "step": 17563 }, { "epoch": 0.8576171875, "grad_norm": 0.18931178748607635, "learning_rate": 7.278840385150577e-05, "loss": 1.7362, "step": 17564 }, { "epoch": 0.857666015625, "grad_norm": 0.21833553910255432, "learning_rate": 7.277304573729023e-05, "loss": 1.7399, "step": 17565 }, { "epoch": 0.85771484375, "grad_norm": 0.21441198885440826, "learning_rate": 7.275769252420738e-05, "loss": 1.7412, "step": 17566 }, { "epoch": 0.857763671875, "grad_norm": 0.20945118367671967, "learning_rate": 7.274234421262942e-05, "loss": 1.7331, "step": 17567 }, { "epoch": 0.8578125, "grad_norm": 0.20694348216056824, "learning_rate": 7.272700080292816e-05, "loss": 1.74, "step": 17568 }, { "epoch": 0.857861328125, "grad_norm": 0.20625752210617065, "learning_rate": 7.271166229547554e-05, "loss": 1.7226, "step": 17569 }, { "epoch": 0.85791015625, "grad_norm": 0.2038184255361557, "learning_rate": 7.269632869064334e-05, "loss": 1.7379, "step": 17570 }, { "epoch": 0.857958984375, "grad_norm": 0.21384990215301514, "learning_rate": 7.26809999888031e-05, "loss": 1.7083, "step": 17571 }, { "epoch": 0.8580078125, "grad_norm": 0.18505285680294037, "learning_rate": 7.26656761903264e-05, "loss": 1.7258, "step": 17572 }, { "epoch": 0.858056640625, "grad_norm": 0.18385811150074005, "learning_rate": 7.265035729558456e-05, "loss": 1.7552, "step": 17573 }, { "epoch": 0.85810546875, "grad_norm": 0.20921039581298828, "learning_rate": 7.263504330494889e-05, "loss": 1.7059, "step": 17574 }, { "epoch": 0.858154296875, "grad_norm": 0.19916154444217682, "learning_rate": 7.261973421879052e-05, "loss": 1.7185, "step": 17575 }, { "epoch": 0.858203125, "grad_norm": 0.19895298779010773, "learning_rate": 7.260443003748049e-05, "loss": 1.7195, "step": 17576 }, { "epoch": 0.858251953125, "grad_norm": 0.2140757441520691, "learning_rate": 7.25891307613897e-05, "loss": 1.7147, "step": 17577 }, { "epoch": 0.85830078125, "grad_norm": 0.1861199587583542, "learning_rate": 7.257383639088892e-05, "loss": 1.7337, "step": 17578 }, { "epoch": 0.858349609375, "grad_norm": 0.21001718938350677, "learning_rate": 7.255854692634886e-05, "loss": 1.6989, "step": 17579 }, { "epoch": 0.8583984375, "grad_norm": 0.2395028918981552, "learning_rate": 7.254326236814007e-05, "loss": 1.7473, "step": 17580 }, { "epoch": 0.858447265625, "grad_norm": 0.19770489633083344, "learning_rate": 7.252798271663294e-05, "loss": 1.7197, "step": 17581 }, { "epoch": 0.85849609375, "grad_norm": 0.22263027727603912, "learning_rate": 7.251270797219782e-05, "loss": 1.7432, "step": 17582 }, { "epoch": 0.858544921875, "grad_norm": 0.22565780580043793, "learning_rate": 7.249743813520495e-05, "loss": 1.7473, "step": 17583 }, { "epoch": 0.85859375, "grad_norm": 0.19467441737651825, "learning_rate": 7.248217320602429e-05, "loss": 1.7186, "step": 17584 }, { "epoch": 0.858642578125, "grad_norm": 0.19764183461666107, "learning_rate": 7.246691318502592e-05, "loss": 1.7206, "step": 17585 }, { "epoch": 0.85869140625, "grad_norm": 0.20727108418941498, "learning_rate": 7.245165807257953e-05, "loss": 1.7501, "step": 17586 }, { "epoch": 0.858740234375, "grad_norm": 0.1833348572254181, "learning_rate": 7.243640786905503e-05, "loss": 1.742, "step": 17587 }, { "epoch": 0.8587890625, "grad_norm": 0.1843889057636261, "learning_rate": 7.242116257482188e-05, "loss": 1.731, "step": 17588 }, { "epoch": 0.858837890625, "grad_norm": 0.1995699405670166, "learning_rate": 7.240592219024961e-05, "loss": 1.7364, "step": 17589 }, { "epoch": 0.85888671875, "grad_norm": 0.25563669204711914, "learning_rate": 7.239068671570758e-05, "loss": 1.7106, "step": 17590 }, { "epoch": 0.858935546875, "grad_norm": 0.17426900565624237, "learning_rate": 7.237545615156503e-05, "loss": 1.7126, "step": 17591 }, { "epoch": 0.858984375, "grad_norm": 0.25533032417297363, "learning_rate": 7.23602304981911e-05, "loss": 1.7337, "step": 17592 }, { "epoch": 0.859033203125, "grad_norm": 0.20625348389148712, "learning_rate": 7.234500975595476e-05, "loss": 1.7311, "step": 17593 }, { "epoch": 0.85908203125, "grad_norm": 0.2127239853143692, "learning_rate": 7.232979392522491e-05, "loss": 1.7244, "step": 17594 }, { "epoch": 0.859130859375, "grad_norm": 0.21957483887672424, "learning_rate": 7.231458300637037e-05, "loss": 1.763, "step": 17595 }, { "epoch": 0.8591796875, "grad_norm": 0.2024005502462387, "learning_rate": 7.229937699975972e-05, "loss": 1.7154, "step": 17596 }, { "epoch": 0.859228515625, "grad_norm": 0.23063048720359802, "learning_rate": 7.228417590576152e-05, "loss": 1.7222, "step": 17597 }, { "epoch": 0.85927734375, "grad_norm": 0.20241570472717285, "learning_rate": 7.226897972474417e-05, "loss": 1.7118, "step": 17598 }, { "epoch": 0.859326171875, "grad_norm": 0.20646803081035614, "learning_rate": 7.225378845707598e-05, "loss": 1.7055, "step": 17599 }, { "epoch": 0.859375, "grad_norm": 0.18537095189094543, "learning_rate": 7.22386021031251e-05, "loss": 1.7227, "step": 17600 }, { "epoch": 0.859423828125, "grad_norm": 0.22436372935771942, "learning_rate": 7.222342066325964e-05, "loss": 1.7416, "step": 17601 }, { "epoch": 0.85947265625, "grad_norm": 0.18945914506912231, "learning_rate": 7.220824413784741e-05, "loss": 1.742, "step": 17602 }, { "epoch": 0.859521484375, "grad_norm": 0.21626710891723633, "learning_rate": 7.21930725272564e-05, "loss": 1.7317, "step": 17603 }, { "epoch": 0.8595703125, "grad_norm": 0.19188758730888367, "learning_rate": 7.21779058318541e-05, "loss": 1.7151, "step": 17604 }, { "epoch": 0.859619140625, "grad_norm": 0.22227872908115387, "learning_rate": 7.216274405200828e-05, "loss": 1.7151, "step": 17605 }, { "epoch": 0.85966796875, "grad_norm": 0.20683574676513672, "learning_rate": 7.214758718808627e-05, "loss": 1.7383, "step": 17606 }, { "epoch": 0.859716796875, "grad_norm": 0.1836816668510437, "learning_rate": 7.213243524045551e-05, "loss": 1.7197, "step": 17607 }, { "epoch": 0.859765625, "grad_norm": 0.18948139250278473, "learning_rate": 7.211728820948315e-05, "loss": 1.7097, "step": 17608 }, { "epoch": 0.859814453125, "grad_norm": 0.1928671896457672, "learning_rate": 7.210214609553626e-05, "loss": 1.7253, "step": 17609 }, { "epoch": 0.85986328125, "grad_norm": 0.21079722046852112, "learning_rate": 7.20870088989819e-05, "loss": 1.7221, "step": 17610 }, { "epoch": 0.859912109375, "grad_norm": 0.18626463413238525, "learning_rate": 7.207187662018688e-05, "loss": 1.7412, "step": 17611 }, { "epoch": 0.8599609375, "grad_norm": 0.2244148850440979, "learning_rate": 7.205674925951797e-05, "loss": 1.7047, "step": 17612 }, { "epoch": 0.860009765625, "grad_norm": 0.18869484961032867, "learning_rate": 7.204162681734178e-05, "loss": 1.738, "step": 17613 }, { "epoch": 0.86005859375, "grad_norm": 0.22574594616889954, "learning_rate": 7.202650929402482e-05, "loss": 1.7513, "step": 17614 }, { "epoch": 0.860107421875, "grad_norm": 0.2007441371679306, "learning_rate": 7.201139668993348e-05, "loss": 1.7201, "step": 17615 }, { "epoch": 0.86015625, "grad_norm": 0.22003476321697235, "learning_rate": 7.199628900543401e-05, "loss": 1.7232, "step": 17616 }, { "epoch": 0.860205078125, "grad_norm": 0.1806425303220749, "learning_rate": 7.19811862408926e-05, "loss": 1.704, "step": 17617 }, { "epoch": 0.86025390625, "grad_norm": 0.2238454967737198, "learning_rate": 7.196608839667517e-05, "loss": 1.7381, "step": 17618 }, { "epoch": 0.860302734375, "grad_norm": 0.17601606249809265, "learning_rate": 7.19509954731478e-05, "loss": 1.7241, "step": 17619 }, { "epoch": 0.8603515625, "grad_norm": 0.1894073635339737, "learning_rate": 7.19359074706761e-05, "loss": 1.7228, "step": 17620 }, { "epoch": 0.860400390625, "grad_norm": 0.18004155158996582, "learning_rate": 7.192082438962592e-05, "loss": 1.7266, "step": 17621 }, { "epoch": 0.86044921875, "grad_norm": 0.2076016217470169, "learning_rate": 7.19057462303626e-05, "loss": 1.7199, "step": 17622 }, { "epoch": 0.860498046875, "grad_norm": 0.17378002405166626, "learning_rate": 7.189067299325178e-05, "loss": 1.7123, "step": 17623 }, { "epoch": 0.860546875, "grad_norm": 0.22042743861675262, "learning_rate": 7.187560467865857e-05, "loss": 1.6904, "step": 17624 }, { "epoch": 0.860595703125, "grad_norm": 0.17389969527721405, "learning_rate": 7.186054128694838e-05, "loss": 1.7188, "step": 17625 }, { "epoch": 0.86064453125, "grad_norm": 0.18348650634288788, "learning_rate": 7.184548281848613e-05, "loss": 1.722, "step": 17626 }, { "epoch": 0.860693359375, "grad_norm": 0.18227757513523102, "learning_rate": 7.18304292736368e-05, "loss": 1.6939, "step": 17627 }, { "epoch": 0.8607421875, "grad_norm": 0.16463227570056915, "learning_rate": 7.181538065276524e-05, "loss": 1.7216, "step": 17628 }, { "epoch": 0.860791015625, "grad_norm": 0.16882355511188507, "learning_rate": 7.180033695623617e-05, "loss": 1.7402, "step": 17629 }, { "epoch": 0.86083984375, "grad_norm": 0.17950686812400818, "learning_rate": 7.17852981844142e-05, "loss": 1.7296, "step": 17630 }, { "epoch": 0.860888671875, "grad_norm": 0.18659570813179016, "learning_rate": 7.17702643376638e-05, "loss": 1.7046, "step": 17631 }, { "epoch": 0.8609375, "grad_norm": 0.18968400359153748, "learning_rate": 7.175523541634928e-05, "loss": 1.7202, "step": 17632 }, { "epoch": 0.860986328125, "grad_norm": 0.19628369808197021, "learning_rate": 7.174021142083494e-05, "loss": 1.7361, "step": 17633 }, { "epoch": 0.86103515625, "grad_norm": 0.17326612770557404, "learning_rate": 7.172519235148486e-05, "loss": 1.7075, "step": 17634 }, { "epoch": 0.861083984375, "grad_norm": 0.23301084339618683, "learning_rate": 7.171017820866311e-05, "loss": 1.7432, "step": 17635 }, { "epoch": 0.8611328125, "grad_norm": 0.1968957781791687, "learning_rate": 7.16951689927334e-05, "loss": 1.7466, "step": 17636 }, { "epoch": 0.861181640625, "grad_norm": 0.19177478551864624, "learning_rate": 7.168016470405972e-05, "loss": 1.7149, "step": 17637 }, { "epoch": 0.86123046875, "grad_norm": 0.20483586192131042, "learning_rate": 7.16651653430055e-05, "loss": 1.7233, "step": 17638 }, { "epoch": 0.861279296875, "grad_norm": 0.18738068640232086, "learning_rate": 7.16501709099344e-05, "loss": 1.7218, "step": 17639 }, { "epoch": 0.861328125, "grad_norm": 0.2003963440656662, "learning_rate": 7.163518140520973e-05, "loss": 1.7302, "step": 17640 }, { "epoch": 0.861376953125, "grad_norm": 0.1869557499885559, "learning_rate": 7.162019682919492e-05, "loss": 1.7317, "step": 17641 }, { "epoch": 0.86142578125, "grad_norm": 0.22747798264026642, "learning_rate": 7.160521718225297e-05, "loss": 1.7238, "step": 17642 }, { "epoch": 0.861474609375, "grad_norm": 0.18667107820510864, "learning_rate": 7.1590242464747e-05, "loss": 1.6969, "step": 17643 }, { "epoch": 0.8615234375, "grad_norm": 0.20519377291202545, "learning_rate": 7.15752726770399e-05, "loss": 1.7054, "step": 17644 }, { "epoch": 0.861572265625, "grad_norm": 0.20920713245868683, "learning_rate": 7.15603078194945e-05, "loss": 1.7161, "step": 17645 }, { "epoch": 0.86162109375, "grad_norm": 0.15693527460098267, "learning_rate": 7.154534789247349e-05, "loss": 1.7197, "step": 17646 }, { "epoch": 0.861669921875, "grad_norm": 0.22593173384666443, "learning_rate": 7.153039289633943e-05, "loss": 1.7081, "step": 17647 }, { "epoch": 0.86171875, "grad_norm": 0.18532954156398773, "learning_rate": 7.151544283145478e-05, "loss": 1.7593, "step": 17648 }, { "epoch": 0.861767578125, "grad_norm": 0.22041140496730804, "learning_rate": 7.150049769818181e-05, "loss": 1.733, "step": 17649 }, { "epoch": 0.86181640625, "grad_norm": 0.20100924372673035, "learning_rate": 7.14855574968828e-05, "loss": 1.7197, "step": 17650 }, { "epoch": 0.861865234375, "grad_norm": 0.20335465669631958, "learning_rate": 7.147062222791983e-05, "loss": 1.7168, "step": 17651 }, { "epoch": 0.8619140625, "grad_norm": 0.22614511847496033, "learning_rate": 7.145569189165477e-05, "loss": 1.6961, "step": 17652 }, { "epoch": 0.861962890625, "grad_norm": 0.17631889879703522, "learning_rate": 7.144076648844964e-05, "loss": 1.7283, "step": 17653 }, { "epoch": 0.86201171875, "grad_norm": 0.21143081784248352, "learning_rate": 7.142584601866597e-05, "loss": 1.7294, "step": 17654 }, { "epoch": 0.862060546875, "grad_norm": 0.18603526055812836, "learning_rate": 7.141093048266555e-05, "loss": 1.7266, "step": 17655 }, { "epoch": 0.862109375, "grad_norm": 0.18998411297798157, "learning_rate": 7.13960198808097e-05, "loss": 1.7294, "step": 17656 }, { "epoch": 0.862158203125, "grad_norm": 0.18565627932548523, "learning_rate": 7.138111421345999e-05, "loss": 1.7286, "step": 17657 }, { "epoch": 0.86220703125, "grad_norm": 0.21161562204360962, "learning_rate": 7.136621348097749e-05, "loss": 1.7537, "step": 17658 }, { "epoch": 0.862255859375, "grad_norm": 0.16171827912330627, "learning_rate": 7.135131768372346e-05, "loss": 1.7067, "step": 17659 }, { "epoch": 0.8623046875, "grad_norm": 0.18612971901893616, "learning_rate": 7.133642682205883e-05, "loss": 1.7205, "step": 17660 }, { "epoch": 0.862353515625, "grad_norm": 0.18426884710788727, "learning_rate": 7.132154089634452e-05, "loss": 1.7557, "step": 17661 }, { "epoch": 0.86240234375, "grad_norm": 0.16551724076271057, "learning_rate": 7.130665990694129e-05, "loss": 1.7166, "step": 17662 }, { "epoch": 0.862451171875, "grad_norm": 0.17865219712257385, "learning_rate": 7.129178385420979e-05, "loss": 1.7097, "step": 17663 }, { "epoch": 0.8625, "grad_norm": 0.18931621313095093, "learning_rate": 7.127691273851057e-05, "loss": 1.701, "step": 17664 }, { "epoch": 0.862548828125, "grad_norm": 0.1767130047082901, "learning_rate": 7.126204656020405e-05, "loss": 1.7327, "step": 17665 }, { "epoch": 0.86259765625, "grad_norm": 0.18128947913646698, "learning_rate": 7.124718531965051e-05, "loss": 1.7422, "step": 17666 }, { "epoch": 0.862646484375, "grad_norm": 0.1632750928401947, "learning_rate": 7.123232901721019e-05, "loss": 1.7321, "step": 17667 }, { "epoch": 0.8626953125, "grad_norm": 0.1944117546081543, "learning_rate": 7.121747765324297e-05, "loss": 1.7332, "step": 17668 }, { "epoch": 0.862744140625, "grad_norm": 0.17525388300418854, "learning_rate": 7.120263122810897e-05, "loss": 1.688, "step": 17669 }, { "epoch": 0.86279296875, "grad_norm": 0.1663488894701004, "learning_rate": 7.118778974216788e-05, "loss": 1.7103, "step": 17670 }, { "epoch": 0.862841796875, "grad_norm": 0.1884116232395172, "learning_rate": 7.117295319577953e-05, "loss": 1.7319, "step": 17671 }, { "epoch": 0.862890625, "grad_norm": 0.16771167516708374, "learning_rate": 7.115812158930331e-05, "loss": 1.7333, "step": 17672 }, { "epoch": 0.862939453125, "grad_norm": 0.19281704723834991, "learning_rate": 7.114329492309885e-05, "loss": 1.7404, "step": 17673 }, { "epoch": 0.86298828125, "grad_norm": 0.1681196242570877, "learning_rate": 7.112847319752534e-05, "loss": 1.7218, "step": 17674 }, { "epoch": 0.863037109375, "grad_norm": 0.18400554358959198, "learning_rate": 7.111365641294218e-05, "loss": 1.7235, "step": 17675 }, { "epoch": 0.8630859375, "grad_norm": 0.18082869052886963, "learning_rate": 7.109884456970831e-05, "loss": 1.742, "step": 17676 }, { "epoch": 0.863134765625, "grad_norm": 0.17986194789409637, "learning_rate": 7.108403766818273e-05, "loss": 1.7051, "step": 17677 }, { "epoch": 0.86318359375, "grad_norm": 0.18491947650909424, "learning_rate": 7.106923570872434e-05, "loss": 1.745, "step": 17678 }, { "epoch": 0.863232421875, "grad_norm": 0.15843181312084198, "learning_rate": 7.105443869169185e-05, "loss": 1.7181, "step": 17679 }, { "epoch": 0.86328125, "grad_norm": 0.19737322628498077, "learning_rate": 7.103964661744388e-05, "loss": 1.7415, "step": 17680 }, { "epoch": 0.863330078125, "grad_norm": 0.1892387866973877, "learning_rate": 7.102485948633896e-05, "loss": 1.732, "step": 17681 }, { "epoch": 0.86337890625, "grad_norm": 0.170208141207695, "learning_rate": 7.101007729873539e-05, "loss": 1.7039, "step": 17682 }, { "epoch": 0.863427734375, "grad_norm": 0.18644928932189941, "learning_rate": 7.099530005499153e-05, "loss": 1.7202, "step": 17683 }, { "epoch": 0.8634765625, "grad_norm": 0.20044578611850739, "learning_rate": 7.098052775546546e-05, "loss": 1.7546, "step": 17684 }, { "epoch": 0.863525390625, "grad_norm": 0.18391205370426178, "learning_rate": 7.096576040051524e-05, "loss": 1.712, "step": 17685 }, { "epoch": 0.86357421875, "grad_norm": 0.20059995353221893, "learning_rate": 7.095099799049866e-05, "loss": 1.7188, "step": 17686 }, { "epoch": 0.863623046875, "grad_norm": 0.16068615019321442, "learning_rate": 7.093624052577364e-05, "loss": 1.7234, "step": 17687 }, { "epoch": 0.863671875, "grad_norm": 0.22687412798404694, "learning_rate": 7.092148800669771e-05, "loss": 1.7404, "step": 17688 }, { "epoch": 0.863720703125, "grad_norm": 0.18675516545772552, "learning_rate": 7.090674043362856e-05, "loss": 1.7248, "step": 17689 }, { "epoch": 0.86376953125, "grad_norm": 0.23123879730701447, "learning_rate": 7.089199780692341e-05, "loss": 1.7104, "step": 17690 }, { "epoch": 0.863818359375, "grad_norm": 0.2129630297422409, "learning_rate": 7.08772601269398e-05, "loss": 1.7432, "step": 17691 }, { "epoch": 0.8638671875, "grad_norm": 0.22668008506298065, "learning_rate": 7.086252739403465e-05, "loss": 1.7117, "step": 17692 }, { "epoch": 0.863916015625, "grad_norm": 0.21998606622219086, "learning_rate": 7.084779960856527e-05, "loss": 1.7387, "step": 17693 }, { "epoch": 0.86396484375, "grad_norm": 0.21918952465057373, "learning_rate": 7.083307677088842e-05, "loss": 1.7222, "step": 17694 }, { "epoch": 0.864013671875, "grad_norm": 0.20420020818710327, "learning_rate": 7.0818358881361e-05, "loss": 1.7366, "step": 17695 }, { "epoch": 0.8640625, "grad_norm": 0.21306215226650238, "learning_rate": 7.080364594033966e-05, "loss": 1.7186, "step": 17696 }, { "epoch": 0.864111328125, "grad_norm": 0.18800421059131622, "learning_rate": 7.078893794818105e-05, "loss": 1.7155, "step": 17697 }, { "epoch": 0.86416015625, "grad_norm": 0.19302137196063995, "learning_rate": 7.077423490524156e-05, "loss": 1.7292, "step": 17698 }, { "epoch": 0.864208984375, "grad_norm": 0.20807009935379028, "learning_rate": 7.075953681187756e-05, "loss": 1.732, "step": 17699 }, { "epoch": 0.8642578125, "grad_norm": 0.1870962679386139, "learning_rate": 7.074484366844528e-05, "loss": 1.7225, "step": 17700 }, { "epoch": 0.864306640625, "grad_norm": 0.2150651514530182, "learning_rate": 7.073015547530086e-05, "loss": 1.747, "step": 17701 }, { "epoch": 0.86435546875, "grad_norm": 0.2082197368144989, "learning_rate": 7.071547223280015e-05, "loss": 1.7329, "step": 17702 }, { "epoch": 0.864404296875, "grad_norm": 0.22415700554847717, "learning_rate": 7.070079394129915e-05, "loss": 1.7046, "step": 17703 }, { "epoch": 0.864453125, "grad_norm": 0.21618112921714783, "learning_rate": 7.068612060115346e-05, "loss": 1.7352, "step": 17704 }, { "epoch": 0.864501953125, "grad_norm": 0.19464564323425293, "learning_rate": 7.067145221271888e-05, "loss": 1.723, "step": 17705 }, { "epoch": 0.86455078125, "grad_norm": 0.2287190854549408, "learning_rate": 7.065678877635075e-05, "loss": 1.7532, "step": 17706 }, { "epoch": 0.864599609375, "grad_norm": 0.1858421415090561, "learning_rate": 7.064213029240456e-05, "loss": 1.7319, "step": 17707 }, { "epoch": 0.8646484375, "grad_norm": 0.21511463820934296, "learning_rate": 7.062747676123543e-05, "loss": 1.7357, "step": 17708 }, { "epoch": 0.864697265625, "grad_norm": 0.20637021958827972, "learning_rate": 7.061282818319872e-05, "loss": 1.7367, "step": 17709 }, { "epoch": 0.86474609375, "grad_norm": 0.18542571365833282, "learning_rate": 7.059818455864925e-05, "loss": 1.703, "step": 17710 }, { "epoch": 0.864794921875, "grad_norm": 0.23658306896686554, "learning_rate": 7.058354588794198e-05, "loss": 1.7238, "step": 17711 }, { "epoch": 0.86484375, "grad_norm": 0.20517182350158691, "learning_rate": 7.056891217143171e-05, "loss": 1.7128, "step": 17712 }, { "epoch": 0.864892578125, "grad_norm": 0.2395840883255005, "learning_rate": 7.055428340947311e-05, "loss": 1.7337, "step": 17713 }, { "epoch": 0.86494140625, "grad_norm": 0.1961636245250702, "learning_rate": 7.053965960242071e-05, "loss": 1.7283, "step": 17714 }, { "epoch": 0.864990234375, "grad_norm": 0.1958022117614746, "learning_rate": 7.052504075062887e-05, "loss": 1.7416, "step": 17715 }, { "epoch": 0.8650390625, "grad_norm": 0.2567346692085266, "learning_rate": 7.051042685445199e-05, "loss": 1.7143, "step": 17716 }, { "epoch": 0.865087890625, "grad_norm": 0.17682641744613647, "learning_rate": 7.049581791424418e-05, "loss": 1.702, "step": 17717 }, { "epoch": 0.86513671875, "grad_norm": 0.23855407536029816, "learning_rate": 7.048121393035952e-05, "loss": 1.7234, "step": 17718 }, { "epoch": 0.865185546875, "grad_norm": 0.20329341292381287, "learning_rate": 7.046661490315198e-05, "loss": 1.7421, "step": 17719 }, { "epoch": 0.865234375, "grad_norm": 0.16995634138584137, "learning_rate": 7.045202083297529e-05, "loss": 1.7109, "step": 17720 }, { "epoch": 0.865283203125, "grad_norm": 0.22320666909217834, "learning_rate": 7.043743172018327e-05, "loss": 1.7135, "step": 17721 }, { "epoch": 0.86533203125, "grad_norm": 0.18662521243095398, "learning_rate": 7.042284756512939e-05, "loss": 1.7386, "step": 17722 }, { "epoch": 0.865380859375, "grad_norm": 0.1895136684179306, "learning_rate": 7.04082683681672e-05, "loss": 1.747, "step": 17723 }, { "epoch": 0.8654296875, "grad_norm": 0.2160906046628952, "learning_rate": 7.039369412964992e-05, "loss": 1.7469, "step": 17724 }, { "epoch": 0.865478515625, "grad_norm": 0.17449866235256195, "learning_rate": 7.037912484993092e-05, "loss": 1.7423, "step": 17725 }, { "epoch": 0.86552734375, "grad_norm": 0.1865914762020111, "learning_rate": 7.036456052936318e-05, "loss": 1.7295, "step": 17726 }, { "epoch": 0.865576171875, "grad_norm": 0.1968938261270523, "learning_rate": 7.03500011682997e-05, "loss": 1.7441, "step": 17727 }, { "epoch": 0.865625, "grad_norm": 0.18826378881931305, "learning_rate": 7.03354467670934e-05, "loss": 1.7177, "step": 17728 }, { "epoch": 0.865673828125, "grad_norm": 0.19432641565799713, "learning_rate": 7.032089732609696e-05, "loss": 1.7304, "step": 17729 }, { "epoch": 0.86572265625, "grad_norm": 0.21943525969982147, "learning_rate": 7.030635284566301e-05, "loss": 1.7374, "step": 17730 }, { "epoch": 0.865771484375, "grad_norm": 0.17574049532413483, "learning_rate": 7.029181332614402e-05, "loss": 1.7062, "step": 17731 }, { "epoch": 0.8658203125, "grad_norm": 0.1926216185092926, "learning_rate": 7.027727876789241e-05, "loss": 1.7292, "step": 17732 }, { "epoch": 0.865869140625, "grad_norm": 0.2109452337026596, "learning_rate": 7.026274917126044e-05, "loss": 1.7315, "step": 17733 }, { "epoch": 0.86591796875, "grad_norm": 0.18559028208255768, "learning_rate": 7.024822453660019e-05, "loss": 1.7355, "step": 17734 }, { "epoch": 0.865966796875, "grad_norm": 0.2097337394952774, "learning_rate": 7.023370486426379e-05, "loss": 1.7323, "step": 17735 }, { "epoch": 0.866015625, "grad_norm": 0.19224204123020172, "learning_rate": 7.021919015460299e-05, "loss": 1.7484, "step": 17736 }, { "epoch": 0.866064453125, "grad_norm": 0.20631356537342072, "learning_rate": 7.020468040796969e-05, "loss": 1.7159, "step": 17737 }, { "epoch": 0.86611328125, "grad_norm": 0.17553547024726868, "learning_rate": 7.01901756247154e-05, "loss": 1.7207, "step": 17738 }, { "epoch": 0.866162109375, "grad_norm": 0.17590966820716858, "learning_rate": 7.017567580519184e-05, "loss": 1.7281, "step": 17739 }, { "epoch": 0.8662109375, "grad_norm": 0.18689046800136566, "learning_rate": 7.016118094975024e-05, "loss": 1.7202, "step": 17740 }, { "epoch": 0.866259765625, "grad_norm": 0.17747965455055237, "learning_rate": 7.014669105874209e-05, "loss": 1.7511, "step": 17741 }, { "epoch": 0.86630859375, "grad_norm": 0.20292454957962036, "learning_rate": 7.013220613251837e-05, "loss": 1.7103, "step": 17742 }, { "epoch": 0.866357421875, "grad_norm": 0.18413911759853363, "learning_rate": 7.011772617143031e-05, "loss": 1.7143, "step": 17743 }, { "epoch": 0.86640625, "grad_norm": 0.19478704035282135, "learning_rate": 7.010325117582869e-05, "loss": 1.7112, "step": 17744 }, { "epoch": 0.866455078125, "grad_norm": 0.18922831118106842, "learning_rate": 7.008878114606441e-05, "loss": 1.7319, "step": 17745 }, { "epoch": 0.86650390625, "grad_norm": 0.20012196898460388, "learning_rate": 7.007431608248815e-05, "loss": 1.7277, "step": 17746 }, { "epoch": 0.866552734375, "grad_norm": 0.17821146547794342, "learning_rate": 7.005985598545046e-05, "loss": 1.705, "step": 17747 }, { "epoch": 0.8666015625, "grad_norm": 0.19031083583831787, "learning_rate": 7.004540085530184e-05, "loss": 1.717, "step": 17748 }, { "epoch": 0.866650390625, "grad_norm": 0.1842140257358551, "learning_rate": 7.003095069239255e-05, "loss": 1.7058, "step": 17749 }, { "epoch": 0.86669921875, "grad_norm": 0.18504641950130463, "learning_rate": 7.001650549707283e-05, "loss": 1.7079, "step": 17750 }, { "epoch": 0.866748046875, "grad_norm": 0.18892763555049896, "learning_rate": 7.000206526969283e-05, "loss": 1.7003, "step": 17751 }, { "epoch": 0.866796875, "grad_norm": 0.18005713820457458, "learning_rate": 6.998763001060244e-05, "loss": 1.7266, "step": 17752 }, { "epoch": 0.866845703125, "grad_norm": 0.18303723633289337, "learning_rate": 6.997319972015159e-05, "loss": 1.7528, "step": 17753 }, { "epoch": 0.86689453125, "grad_norm": 0.18307745456695557, "learning_rate": 6.995877439868989e-05, "loss": 1.7163, "step": 17754 }, { "epoch": 0.866943359375, "grad_norm": 0.19761091470718384, "learning_rate": 6.994435404656708e-05, "loss": 1.7172, "step": 17755 }, { "epoch": 0.8669921875, "grad_norm": 0.17428742349147797, "learning_rate": 6.99299386641325e-05, "loss": 1.7286, "step": 17756 }, { "epoch": 0.867041015625, "grad_norm": 0.18792782723903656, "learning_rate": 6.991552825173574e-05, "loss": 1.7241, "step": 17757 }, { "epoch": 0.86708984375, "grad_norm": 0.1805383712053299, "learning_rate": 6.99011228097258e-05, "loss": 1.736, "step": 17758 }, { "epoch": 0.867138671875, "grad_norm": 0.18689358234405518, "learning_rate": 6.988672233845202e-05, "loss": 1.7301, "step": 17759 }, { "epoch": 0.8671875, "grad_norm": 0.191310852766037, "learning_rate": 6.987232683826325e-05, "loss": 1.7171, "step": 17760 }, { "epoch": 0.867236328125, "grad_norm": 0.1952594518661499, "learning_rate": 6.985793630950845e-05, "loss": 1.7158, "step": 17761 }, { "epoch": 0.86728515625, "grad_norm": 0.16781897842884064, "learning_rate": 6.984355075253635e-05, "loss": 1.7044, "step": 17762 }, { "epoch": 0.867333984375, "grad_norm": 0.20182640850543976, "learning_rate": 6.982917016769565e-05, "loss": 1.7273, "step": 17763 }, { "epoch": 0.8673828125, "grad_norm": 0.172761008143425, "learning_rate": 6.981479455533485e-05, "loss": 1.7236, "step": 17764 }, { "epoch": 0.867431640625, "grad_norm": 0.1993354707956314, "learning_rate": 6.980042391580231e-05, "loss": 1.7106, "step": 17765 }, { "epoch": 0.86748046875, "grad_norm": 0.19025850296020508, "learning_rate": 6.978605824944636e-05, "loss": 1.7482, "step": 17766 }, { "epoch": 0.867529296875, "grad_norm": 0.18329772353172302, "learning_rate": 6.977169755661518e-05, "loss": 1.7317, "step": 17767 }, { "epoch": 0.867578125, "grad_norm": 0.21638606488704681, "learning_rate": 6.975734183765678e-05, "loss": 1.7148, "step": 17768 }, { "epoch": 0.867626953125, "grad_norm": 0.20634613931179047, "learning_rate": 6.974299109291912e-05, "loss": 1.6934, "step": 17769 }, { "epoch": 0.86767578125, "grad_norm": 0.23201830685138702, "learning_rate": 6.972864532274989e-05, "loss": 1.7322, "step": 17770 }, { "epoch": 0.867724609375, "grad_norm": 0.21277815103530884, "learning_rate": 6.971430452749696e-05, "loss": 1.7101, "step": 17771 }, { "epoch": 0.8677734375, "grad_norm": 0.1917581558227539, "learning_rate": 6.969996870750767e-05, "loss": 1.7389, "step": 17772 }, { "epoch": 0.867822265625, "grad_norm": 0.21429570019245148, "learning_rate": 6.968563786312969e-05, "loss": 1.7284, "step": 17773 }, { "epoch": 0.86787109375, "grad_norm": 0.17085562646389008, "learning_rate": 6.967131199471011e-05, "loss": 1.7277, "step": 17774 }, { "epoch": 0.867919921875, "grad_norm": 0.19499757885932922, "learning_rate": 6.965699110259635e-05, "loss": 1.7596, "step": 17775 }, { "epoch": 0.86796875, "grad_norm": 0.19493035972118378, "learning_rate": 6.96426751871353e-05, "loss": 1.728, "step": 17776 }, { "epoch": 0.868017578125, "grad_norm": 0.17777468264102936, "learning_rate": 6.962836424867406e-05, "loss": 1.7087, "step": 17777 }, { "epoch": 0.86806640625, "grad_norm": 0.18140430748462677, "learning_rate": 6.961405828755939e-05, "loss": 1.7209, "step": 17778 }, { "epoch": 0.868115234375, "grad_norm": 0.16611315310001373, "learning_rate": 6.9599757304138e-05, "loss": 1.7227, "step": 17779 }, { "epoch": 0.8681640625, "grad_norm": 0.2157808244228363, "learning_rate": 6.958546129875651e-05, "loss": 1.7199, "step": 17780 }, { "epoch": 0.868212890625, "grad_norm": 0.17769795656204224, "learning_rate": 6.957117027176141e-05, "loss": 1.7399, "step": 17781 }, { "epoch": 0.86826171875, "grad_norm": 0.19659359753131866, "learning_rate": 6.955688422349901e-05, "loss": 1.7176, "step": 17782 }, { "epoch": 0.868310546875, "grad_norm": 0.20663484930992126, "learning_rate": 6.95426031543156e-05, "loss": 1.7592, "step": 17783 }, { "epoch": 0.868359375, "grad_norm": 0.2157040387392044, "learning_rate": 6.952832706455726e-05, "loss": 1.7509, "step": 17784 }, { "epoch": 0.868408203125, "grad_norm": 0.1956561952829361, "learning_rate": 6.951405595456997e-05, "loss": 1.732, "step": 17785 }, { "epoch": 0.86845703125, "grad_norm": 0.19081135094165802, "learning_rate": 6.949978982469965e-05, "loss": 1.7364, "step": 17786 }, { "epoch": 0.868505859375, "grad_norm": 0.18178121745586395, "learning_rate": 6.948552867529201e-05, "loss": 1.7202, "step": 17787 }, { "epoch": 0.8685546875, "grad_norm": 0.20923925936222076, "learning_rate": 6.947127250669268e-05, "loss": 1.7107, "step": 17788 }, { "epoch": 0.868603515625, "grad_norm": 0.19328813254833221, "learning_rate": 6.945702131924725e-05, "loss": 1.7137, "step": 17789 }, { "epoch": 0.86865234375, "grad_norm": 0.20783616602420807, "learning_rate": 6.944277511330093e-05, "loss": 1.7189, "step": 17790 }, { "epoch": 0.868701171875, "grad_norm": 0.1919604390859604, "learning_rate": 6.94285338891992e-05, "loss": 1.7233, "step": 17791 }, { "epoch": 0.86875, "grad_norm": 0.2019084095954895, "learning_rate": 6.9414297647287e-05, "loss": 1.738, "step": 17792 }, { "epoch": 0.868798828125, "grad_norm": 0.1864495724439621, "learning_rate": 6.940006638790955e-05, "loss": 1.753, "step": 17793 }, { "epoch": 0.86884765625, "grad_norm": 0.20286306738853455, "learning_rate": 6.93858401114116e-05, "loss": 1.711, "step": 17794 }, { "epoch": 0.868896484375, "grad_norm": 0.19142307341098785, "learning_rate": 6.937161881813807e-05, "loss": 1.7451, "step": 17795 }, { "epoch": 0.8689453125, "grad_norm": 0.17035388946533203, "learning_rate": 6.93574025084335e-05, "loss": 1.7208, "step": 17796 }, { "epoch": 0.868994140625, "grad_norm": 0.1995464563369751, "learning_rate": 6.934319118264252e-05, "loss": 1.7067, "step": 17797 }, { "epoch": 0.86904296875, "grad_norm": 0.1941162645816803, "learning_rate": 6.932898484110951e-05, "loss": 1.7456, "step": 17798 }, { "epoch": 0.869091796875, "grad_norm": 0.19546222686767578, "learning_rate": 6.931478348417883e-05, "loss": 1.7325, "step": 17799 }, { "epoch": 0.869140625, "grad_norm": 0.17525990307331085, "learning_rate": 6.930058711219456e-05, "loss": 1.7619, "step": 17800 }, { "epoch": 0.869189453125, "grad_norm": 0.20727211236953735, "learning_rate": 6.928639572550084e-05, "loss": 1.7403, "step": 17801 }, { "epoch": 0.86923828125, "grad_norm": 0.18245720863342285, "learning_rate": 6.927220932444159e-05, "loss": 1.716, "step": 17802 }, { "epoch": 0.869287109375, "grad_norm": 0.19419129192829132, "learning_rate": 6.925802790936065e-05, "loss": 1.7376, "step": 17803 }, { "epoch": 0.8693359375, "grad_norm": 0.20200695097446442, "learning_rate": 6.924385148060167e-05, "loss": 1.7736, "step": 17804 }, { "epoch": 0.869384765625, "grad_norm": 0.1779521405696869, "learning_rate": 6.922968003850825e-05, "loss": 1.7413, "step": 17805 }, { "epoch": 0.86943359375, "grad_norm": 0.22735199332237244, "learning_rate": 6.921551358342384e-05, "loss": 1.7342, "step": 17806 }, { "epoch": 0.869482421875, "grad_norm": 0.19614970684051514, "learning_rate": 6.920135211569183e-05, "loss": 1.7201, "step": 17807 }, { "epoch": 0.86953125, "grad_norm": 0.18746677041053772, "learning_rate": 6.918719563565533e-05, "loss": 1.7326, "step": 17808 }, { "epoch": 0.869580078125, "grad_norm": 0.19411601126194, "learning_rate": 6.917304414365756e-05, "loss": 1.7346, "step": 17809 }, { "epoch": 0.86962890625, "grad_norm": 0.17988936603069305, "learning_rate": 6.915889764004136e-05, "loss": 1.7299, "step": 17810 }, { "epoch": 0.869677734375, "grad_norm": 0.1751905232667923, "learning_rate": 6.914475612514972e-05, "loss": 1.7037, "step": 17811 }, { "epoch": 0.8697265625, "grad_norm": 0.2128361463546753, "learning_rate": 6.913061959932524e-05, "loss": 1.7241, "step": 17812 }, { "epoch": 0.869775390625, "grad_norm": 0.18762491643428802, "learning_rate": 6.91164880629106e-05, "loss": 1.7051, "step": 17813 }, { "epoch": 0.86982421875, "grad_norm": 0.16510619223117828, "learning_rate": 6.910236151624826e-05, "loss": 1.7259, "step": 17814 }, { "epoch": 0.869873046875, "grad_norm": 0.18308144807815552, "learning_rate": 6.908823995968064e-05, "loss": 1.716, "step": 17815 }, { "epoch": 0.869921875, "grad_norm": 0.17569077014923096, "learning_rate": 6.907412339354992e-05, "loss": 1.729, "step": 17816 }, { "epoch": 0.869970703125, "grad_norm": 0.1795748919248581, "learning_rate": 6.906001181819827e-05, "loss": 1.7028, "step": 17817 }, { "epoch": 0.87001953125, "grad_norm": 0.1921849101781845, "learning_rate": 6.904590523396769e-05, "loss": 1.6905, "step": 17818 }, { "epoch": 0.870068359375, "grad_norm": 0.18631699681282043, "learning_rate": 6.903180364120004e-05, "loss": 1.7198, "step": 17819 }, { "epoch": 0.8701171875, "grad_norm": 0.20921404659748077, "learning_rate": 6.90177070402371e-05, "loss": 1.7185, "step": 17820 }, { "epoch": 0.870166015625, "grad_norm": 0.20885612070560455, "learning_rate": 6.900361543142054e-05, "loss": 1.7339, "step": 17821 }, { "epoch": 0.87021484375, "grad_norm": 0.18358540534973145, "learning_rate": 6.898952881509185e-05, "loss": 1.7296, "step": 17822 }, { "epoch": 0.870263671875, "grad_norm": 0.19959904253482819, "learning_rate": 6.897544719159241e-05, "loss": 1.7396, "step": 17823 }, { "epoch": 0.8703125, "grad_norm": 0.17795057594776154, "learning_rate": 6.896137056126355e-05, "loss": 1.7306, "step": 17824 }, { "epoch": 0.870361328125, "grad_norm": 0.18620705604553223, "learning_rate": 6.89472989244464e-05, "loss": 1.7315, "step": 17825 }, { "epoch": 0.87041015625, "grad_norm": 0.19316847622394562, "learning_rate": 6.893323228148201e-05, "loss": 1.7329, "step": 17826 }, { "epoch": 0.870458984375, "grad_norm": 0.1909635066986084, "learning_rate": 6.891917063271127e-05, "loss": 1.7273, "step": 17827 }, { "epoch": 0.8705078125, "grad_norm": 0.20236319303512573, "learning_rate": 6.890511397847505e-05, "loss": 1.7345, "step": 17828 }, { "epoch": 0.870556640625, "grad_norm": 0.18754664063453674, "learning_rate": 6.88910623191139e-05, "loss": 1.7484, "step": 17829 }, { "epoch": 0.87060546875, "grad_norm": 0.1862086057662964, "learning_rate": 6.887701565496848e-05, "loss": 1.7049, "step": 17830 }, { "epoch": 0.870654296875, "grad_norm": 0.20114341378211975, "learning_rate": 6.886297398637917e-05, "loss": 1.723, "step": 17831 }, { "epoch": 0.870703125, "grad_norm": 0.18968553841114044, "learning_rate": 6.884893731368628e-05, "loss": 1.7313, "step": 17832 }, { "epoch": 0.870751953125, "grad_norm": 0.1944444477558136, "learning_rate": 6.883490563723002e-05, "loss": 1.7081, "step": 17833 }, { "epoch": 0.87080078125, "grad_norm": 0.21194103360176086, "learning_rate": 6.882087895735045e-05, "loss": 1.7296, "step": 17834 }, { "epoch": 0.870849609375, "grad_norm": 0.19169938564300537, "learning_rate": 6.880685727438754e-05, "loss": 1.7125, "step": 17835 }, { "epoch": 0.8708984375, "grad_norm": 0.18569496273994446, "learning_rate": 6.879284058868107e-05, "loss": 1.7173, "step": 17836 }, { "epoch": 0.870947265625, "grad_norm": 0.1869591921567917, "learning_rate": 6.87788289005708e-05, "loss": 1.7486, "step": 17837 }, { "epoch": 0.87099609375, "grad_norm": 0.20721040666103363, "learning_rate": 6.876482221039628e-05, "loss": 1.7292, "step": 17838 }, { "epoch": 0.871044921875, "grad_norm": 0.18978960812091827, "learning_rate": 6.875082051849698e-05, "loss": 1.697, "step": 17839 }, { "epoch": 0.87109375, "grad_norm": 0.17458070814609528, "learning_rate": 6.873682382521225e-05, "loss": 1.7156, "step": 17840 }, { "epoch": 0.871142578125, "grad_norm": 0.20379310846328735, "learning_rate": 6.872283213088127e-05, "loss": 1.7117, "step": 17841 }, { "epoch": 0.87119140625, "grad_norm": 0.17419128119945526, "learning_rate": 6.870884543584322e-05, "loss": 1.7101, "step": 17842 }, { "epoch": 0.871240234375, "grad_norm": 0.2028089165687561, "learning_rate": 6.869486374043702e-05, "loss": 1.7186, "step": 17843 }, { "epoch": 0.8712890625, "grad_norm": 0.1770002543926239, "learning_rate": 6.868088704500155e-05, "loss": 1.7692, "step": 17844 }, { "epoch": 0.871337890625, "grad_norm": 0.22425960004329681, "learning_rate": 6.866691534987551e-05, "loss": 1.7218, "step": 17845 }, { "epoch": 0.87138671875, "grad_norm": 0.19696110486984253, "learning_rate": 6.86529486553976e-05, "loss": 1.7169, "step": 17846 }, { "epoch": 0.871435546875, "grad_norm": 0.22793208062648773, "learning_rate": 6.863898696190615e-05, "loss": 1.7508, "step": 17847 }, { "epoch": 0.871484375, "grad_norm": 0.18946810066699982, "learning_rate": 6.862503026973976e-05, "loss": 1.739, "step": 17848 }, { "epoch": 0.871533203125, "grad_norm": 0.24533802270889282, "learning_rate": 6.86110785792365e-05, "loss": 1.7379, "step": 17849 }, { "epoch": 0.87158203125, "grad_norm": 0.20441333949565887, "learning_rate": 6.859713189073457e-05, "loss": 1.7323, "step": 17850 }, { "epoch": 0.871630859375, "grad_norm": 0.23146584630012512, "learning_rate": 6.858319020457196e-05, "loss": 1.7209, "step": 17851 }, { "epoch": 0.8716796875, "grad_norm": 0.19381891191005707, "learning_rate": 6.856925352108657e-05, "loss": 1.7193, "step": 17852 }, { "epoch": 0.871728515625, "grad_norm": 0.23171406984329224, "learning_rate": 6.855532184061617e-05, "loss": 1.7308, "step": 17853 }, { "epoch": 0.87177734375, "grad_norm": 0.183364599943161, "learning_rate": 6.854139516349842e-05, "loss": 1.6912, "step": 17854 }, { "epoch": 0.871826171875, "grad_norm": 0.19621050357818604, "learning_rate": 6.852747349007079e-05, "loss": 1.6866, "step": 17855 }, { "epoch": 0.871875, "grad_norm": 0.21158653497695923, "learning_rate": 6.851355682067073e-05, "loss": 1.7323, "step": 17856 }, { "epoch": 0.871923828125, "grad_norm": 0.17584876716136932, "learning_rate": 6.849964515563553e-05, "loss": 1.7331, "step": 17857 }, { "epoch": 0.87197265625, "grad_norm": 0.19453641772270203, "learning_rate": 6.848573849530232e-05, "loss": 1.7188, "step": 17858 }, { "epoch": 0.872021484375, "grad_norm": 0.17673254013061523, "learning_rate": 6.847183684000815e-05, "loss": 1.7262, "step": 17859 }, { "epoch": 0.8720703125, "grad_norm": 0.175822913646698, "learning_rate": 6.845794019008992e-05, "loss": 1.7416, "step": 17860 }, { "epoch": 0.872119140625, "grad_norm": 0.20799970626831055, "learning_rate": 6.844404854588447e-05, "loss": 1.7293, "step": 17861 }, { "epoch": 0.87216796875, "grad_norm": 0.1750548630952835, "learning_rate": 6.843016190772847e-05, "loss": 1.7292, "step": 17862 }, { "epoch": 0.872216796875, "grad_norm": 0.20432569086551666, "learning_rate": 6.841628027595837e-05, "loss": 1.7356, "step": 17863 }, { "epoch": 0.872265625, "grad_norm": 0.181493878364563, "learning_rate": 6.840240365091076e-05, "loss": 1.7093, "step": 17864 }, { "epoch": 0.872314453125, "grad_norm": 0.18327932059764862, "learning_rate": 6.838853203292184e-05, "loss": 1.7346, "step": 17865 }, { "epoch": 0.87236328125, "grad_norm": 0.22242380678653717, "learning_rate": 6.837466542232787e-05, "loss": 1.7245, "step": 17866 }, { "epoch": 0.872412109375, "grad_norm": 0.18023058772087097, "learning_rate": 6.836080381946486e-05, "loss": 1.7183, "step": 17867 }, { "epoch": 0.8724609375, "grad_norm": 0.17711903154850006, "learning_rate": 6.834694722466879e-05, "loss": 1.7265, "step": 17868 }, { "epoch": 0.872509765625, "grad_norm": 0.2164091318845749, "learning_rate": 6.833309563827545e-05, "loss": 1.7237, "step": 17869 }, { "epoch": 0.87255859375, "grad_norm": 0.20269882678985596, "learning_rate": 6.831924906062062e-05, "loss": 1.715, "step": 17870 }, { "epoch": 0.872607421875, "grad_norm": 0.1847335249185562, "learning_rate": 6.830540749203979e-05, "loss": 1.6987, "step": 17871 }, { "epoch": 0.87265625, "grad_norm": 0.19898365437984467, "learning_rate": 6.829157093286849e-05, "loss": 1.7339, "step": 17872 }, { "epoch": 0.872705078125, "grad_norm": 0.19250664114952087, "learning_rate": 6.8277739383442e-05, "loss": 1.7285, "step": 17873 }, { "epoch": 0.87275390625, "grad_norm": 0.21779276430606842, "learning_rate": 6.826391284409564e-05, "loss": 1.751, "step": 17874 }, { "epoch": 0.872802734375, "grad_norm": 0.1878890097141266, "learning_rate": 6.825009131516439e-05, "loss": 1.7273, "step": 17875 }, { "epoch": 0.8728515625, "grad_norm": 0.19918431341648102, "learning_rate": 6.823627479698328e-05, "loss": 1.7339, "step": 17876 }, { "epoch": 0.872900390625, "grad_norm": 0.2254570871591568, "learning_rate": 6.822246328988718e-05, "loss": 1.7425, "step": 17877 }, { "epoch": 0.87294921875, "grad_norm": 0.17051275074481964, "learning_rate": 6.820865679421086e-05, "loss": 1.7184, "step": 17878 }, { "epoch": 0.872998046875, "grad_norm": 0.2355802059173584, "learning_rate": 6.819485531028877e-05, "loss": 1.7165, "step": 17879 }, { "epoch": 0.873046875, "grad_norm": 0.20741188526153564, "learning_rate": 6.818105883845558e-05, "loss": 1.7164, "step": 17880 }, { "epoch": 0.873095703125, "grad_norm": 0.22383908927440643, "learning_rate": 6.816726737904555e-05, "loss": 1.7181, "step": 17881 }, { "epoch": 0.87314453125, "grad_norm": 0.2155289351940155, "learning_rate": 6.815348093239301e-05, "loss": 1.7238, "step": 17882 }, { "epoch": 0.873193359375, "grad_norm": 0.18633250892162323, "learning_rate": 6.813969949883196e-05, "loss": 1.7113, "step": 17883 }, { "epoch": 0.8732421875, "grad_norm": 0.1825202852487564, "learning_rate": 6.812592307869657e-05, "loss": 1.7222, "step": 17884 }, { "epoch": 0.873291015625, "grad_norm": 0.19425655901432037, "learning_rate": 6.811215167232052e-05, "loss": 1.721, "step": 17885 }, { "epoch": 0.87333984375, "grad_norm": 0.16664059460163116, "learning_rate": 6.809838528003781e-05, "loss": 1.7382, "step": 17886 }, { "epoch": 0.873388671875, "grad_norm": 0.19619044661521912, "learning_rate": 6.80846239021819e-05, "loss": 1.7421, "step": 17887 }, { "epoch": 0.8734375, "grad_norm": 0.1865283101797104, "learning_rate": 6.807086753908636e-05, "loss": 1.736, "step": 17888 }, { "epoch": 0.873486328125, "grad_norm": 0.19172513484954834, "learning_rate": 6.805711619108463e-05, "loss": 1.7176, "step": 17889 }, { "epoch": 0.87353515625, "grad_norm": 0.18303394317626953, "learning_rate": 6.804336985850989e-05, "loss": 1.7169, "step": 17890 }, { "epoch": 0.873583984375, "grad_norm": 0.18800827860832214, "learning_rate": 6.802962854169538e-05, "loss": 1.7433, "step": 17891 }, { "epoch": 0.8736328125, "grad_norm": 0.1936405748128891, "learning_rate": 6.80158922409741e-05, "loss": 1.7251, "step": 17892 }, { "epoch": 0.873681640625, "grad_norm": 0.1900075227022171, "learning_rate": 6.800216095667894e-05, "loss": 1.7314, "step": 17893 }, { "epoch": 0.87373046875, "grad_norm": 0.20262326300144196, "learning_rate": 6.798843468914273e-05, "loss": 1.7328, "step": 17894 }, { "epoch": 0.873779296875, "grad_norm": 0.17300738394260406, "learning_rate": 6.79747134386981e-05, "loss": 1.7345, "step": 17895 }, { "epoch": 0.873828125, "grad_norm": 0.2187032401561737, "learning_rate": 6.796099720567767e-05, "loss": 1.7453, "step": 17896 }, { "epoch": 0.873876953125, "grad_norm": 0.19572703540325165, "learning_rate": 6.794728599041369e-05, "loss": 1.7407, "step": 17897 }, { "epoch": 0.87392578125, "grad_norm": 0.20624729990959167, "learning_rate": 6.79335797932387e-05, "loss": 1.7077, "step": 17898 }, { "epoch": 0.873974609375, "grad_norm": 0.1790080964565277, "learning_rate": 6.791987861448466e-05, "loss": 1.7432, "step": 17899 }, { "epoch": 0.8740234375, "grad_norm": 0.18709400296211243, "learning_rate": 6.790618245448382e-05, "loss": 1.7213, "step": 17900 }, { "epoch": 0.874072265625, "grad_norm": 0.1866280883550644, "learning_rate": 6.789249131356795e-05, "loss": 1.716, "step": 17901 }, { "epoch": 0.87412109375, "grad_norm": 0.1642514318227768, "learning_rate": 6.787880519206899e-05, "loss": 1.7204, "step": 17902 }, { "epoch": 0.874169921875, "grad_norm": 0.17150817811489105, "learning_rate": 6.786512409031859e-05, "loss": 1.7308, "step": 17903 }, { "epoch": 0.87421875, "grad_norm": 0.17104001343250275, "learning_rate": 6.785144800864827e-05, "loss": 1.7208, "step": 17904 }, { "epoch": 0.874267578125, "grad_norm": 0.16251233220100403, "learning_rate": 6.783777694738954e-05, "loss": 1.6992, "step": 17905 }, { "epoch": 0.87431640625, "grad_norm": 0.1845407485961914, "learning_rate": 6.782411090687375e-05, "loss": 1.702, "step": 17906 }, { "epoch": 0.874365234375, "grad_norm": 0.17475837469100952, "learning_rate": 6.781044988743205e-05, "loss": 1.7515, "step": 17907 }, { "epoch": 0.8744140625, "grad_norm": 0.16293780505657196, "learning_rate": 6.779679388939555e-05, "loss": 1.6946, "step": 17908 }, { "epoch": 0.874462890625, "grad_norm": 0.1811080276966095, "learning_rate": 6.778314291309523e-05, "loss": 1.7501, "step": 17909 }, { "epoch": 0.87451171875, "grad_norm": 0.17362460494041443, "learning_rate": 6.776949695886191e-05, "loss": 1.7392, "step": 17910 }, { "epoch": 0.874560546875, "grad_norm": 0.16015289723873138, "learning_rate": 6.775585602702633e-05, "loss": 1.7197, "step": 17911 }, { "epoch": 0.874609375, "grad_norm": 0.18335048854351044, "learning_rate": 6.77422201179191e-05, "loss": 1.7203, "step": 17912 }, { "epoch": 0.874658203125, "grad_norm": 0.1635589301586151, "learning_rate": 6.772858923187061e-05, "loss": 1.7238, "step": 17913 }, { "epoch": 0.87470703125, "grad_norm": 0.1805035024881363, "learning_rate": 6.771496336921134e-05, "loss": 1.7116, "step": 17914 }, { "epoch": 0.874755859375, "grad_norm": 0.1617557555437088, "learning_rate": 6.770134253027141e-05, "loss": 1.7368, "step": 17915 }, { "epoch": 0.8748046875, "grad_norm": 0.18289704620838165, "learning_rate": 6.768772671538103e-05, "loss": 1.7077, "step": 17916 }, { "epoch": 0.874853515625, "grad_norm": 0.17346549034118652, "learning_rate": 6.76741159248701e-05, "loss": 1.7508, "step": 17917 }, { "epoch": 0.87490234375, "grad_norm": 0.18032464385032654, "learning_rate": 6.76605101590686e-05, "loss": 1.7047, "step": 17918 }, { "epoch": 0.874951171875, "grad_norm": 0.16733673214912415, "learning_rate": 6.764690941830613e-05, "loss": 1.7224, "step": 17919 }, { "epoch": 0.875, "grad_norm": 0.1839507818222046, "learning_rate": 6.763331370291248e-05, "loss": 1.7671, "step": 17920 }, { "epoch": 0.875048828125, "grad_norm": 0.1920897662639618, "learning_rate": 6.761972301321702e-05, "loss": 1.7267, "step": 17921 }, { "epoch": 0.87509765625, "grad_norm": 0.19811154901981354, "learning_rate": 6.760613734954919e-05, "loss": 1.7325, "step": 17922 }, { "epoch": 0.875146484375, "grad_norm": 0.2016463428735733, "learning_rate": 6.759255671223825e-05, "loss": 1.7406, "step": 17923 }, { "epoch": 0.8751953125, "grad_norm": 0.22579973936080933, "learning_rate": 6.757898110161332e-05, "loss": 1.7157, "step": 17924 }, { "epoch": 0.875244140625, "grad_norm": 0.21073608100414276, "learning_rate": 6.756541051800341e-05, "loss": 1.731, "step": 17925 }, { "epoch": 0.87529296875, "grad_norm": 0.20107236504554749, "learning_rate": 6.755184496173742e-05, "loss": 1.7307, "step": 17926 }, { "epoch": 0.875341796875, "grad_norm": 0.18439677357673645, "learning_rate": 6.753828443314415e-05, "loss": 1.7204, "step": 17927 }, { "epoch": 0.875390625, "grad_norm": 0.196847602725029, "learning_rate": 6.752472893255224e-05, "loss": 1.7076, "step": 17928 }, { "epoch": 0.875439453125, "grad_norm": 0.2030870020389557, "learning_rate": 6.751117846029019e-05, "loss": 1.7309, "step": 17929 }, { "epoch": 0.87548828125, "grad_norm": 0.18142126500606537, "learning_rate": 6.749763301668647e-05, "loss": 1.7039, "step": 17930 }, { "epoch": 0.875537109375, "grad_norm": 0.1826467365026474, "learning_rate": 6.748409260206925e-05, "loss": 1.6948, "step": 17931 }, { "epoch": 0.8755859375, "grad_norm": 0.20027831196784973, "learning_rate": 6.747055721676683e-05, "loss": 1.7293, "step": 17932 }, { "epoch": 0.875634765625, "grad_norm": 0.17773543298244476, "learning_rate": 6.74570268611071e-05, "loss": 1.7428, "step": 17933 }, { "epoch": 0.87568359375, "grad_norm": 0.20017704367637634, "learning_rate": 6.744350153541815e-05, "loss": 1.7675, "step": 17934 }, { "epoch": 0.875732421875, "grad_norm": 0.1809801310300827, "learning_rate": 6.742998124002761e-05, "loss": 1.709, "step": 17935 }, { "epoch": 0.87578125, "grad_norm": 0.18685784935951233, "learning_rate": 6.741646597526329e-05, "loss": 1.7076, "step": 17936 }, { "epoch": 0.875830078125, "grad_norm": 0.19125083088874817, "learning_rate": 6.740295574145268e-05, "loss": 1.7288, "step": 17937 }, { "epoch": 0.87587890625, "grad_norm": 0.1646651178598404, "learning_rate": 6.73894505389232e-05, "loss": 1.723, "step": 17938 }, { "epoch": 0.875927734375, "grad_norm": 0.20217251777648926, "learning_rate": 6.737595036800219e-05, "loss": 1.7411, "step": 17939 }, { "epoch": 0.8759765625, "grad_norm": 0.17507201433181763, "learning_rate": 6.736245522901682e-05, "loss": 1.731, "step": 17940 }, { "epoch": 0.876025390625, "grad_norm": 0.21204882860183716, "learning_rate": 6.734896512229416e-05, "loss": 1.7277, "step": 17941 }, { "epoch": 0.87607421875, "grad_norm": 0.18803881108760834, "learning_rate": 6.733548004816117e-05, "loss": 1.7518, "step": 17942 }, { "epoch": 0.876123046875, "grad_norm": 0.19195102155208588, "learning_rate": 6.732200000694464e-05, "loss": 1.7399, "step": 17943 }, { "epoch": 0.876171875, "grad_norm": 0.17779026925563812, "learning_rate": 6.73085249989713e-05, "loss": 1.7219, "step": 17944 }, { "epoch": 0.876220703125, "grad_norm": 0.21063818037509918, "learning_rate": 6.72950550245677e-05, "loss": 1.7263, "step": 17945 }, { "epoch": 0.87626953125, "grad_norm": 0.18172000348567963, "learning_rate": 6.728159008406037e-05, "loss": 1.7172, "step": 17946 }, { "epoch": 0.876318359375, "grad_norm": 0.17288881540298462, "learning_rate": 6.726813017777548e-05, "loss": 1.7351, "step": 17947 }, { "epoch": 0.8763671875, "grad_norm": 0.17815296351909637, "learning_rate": 6.725467530603944e-05, "loss": 1.7406, "step": 17948 }, { "epoch": 0.876416015625, "grad_norm": 0.19547812640666962, "learning_rate": 6.724122546917817e-05, "loss": 1.7376, "step": 17949 }, { "epoch": 0.87646484375, "grad_norm": 0.19094814360141754, "learning_rate": 6.722778066751778e-05, "loss": 1.7116, "step": 17950 }, { "epoch": 0.876513671875, "grad_norm": 0.21936701238155365, "learning_rate": 6.721434090138397e-05, "loss": 1.6922, "step": 17951 }, { "epoch": 0.8765625, "grad_norm": 0.1713154911994934, "learning_rate": 6.720090617110264e-05, "loss": 1.7284, "step": 17952 }, { "epoch": 0.876611328125, "grad_norm": 0.20163066685199738, "learning_rate": 6.718747647699918e-05, "loss": 1.7303, "step": 17953 }, { "epoch": 0.87666015625, "grad_norm": 0.18274497985839844, "learning_rate": 6.717405181939928e-05, "loss": 1.7037, "step": 17954 }, { "epoch": 0.876708984375, "grad_norm": 0.17507000267505646, "learning_rate": 6.716063219862817e-05, "loss": 1.7017, "step": 17955 }, { "epoch": 0.8767578125, "grad_norm": 0.1854737550020218, "learning_rate": 6.71472176150111e-05, "loss": 1.7671, "step": 17956 }, { "epoch": 0.876806640625, "grad_norm": 0.21523825824260712, "learning_rate": 6.71338080688732e-05, "loss": 1.7127, "step": 17957 }, { "epoch": 0.87685546875, "grad_norm": 0.1997760683298111, "learning_rate": 6.712040356053946e-05, "loss": 1.7237, "step": 17958 }, { "epoch": 0.876904296875, "grad_norm": 0.17960546910762787, "learning_rate": 6.710700409033474e-05, "loss": 1.7049, "step": 17959 }, { "epoch": 0.876953125, "grad_norm": 0.2185206413269043, "learning_rate": 6.70936096585838e-05, "loss": 1.748, "step": 17960 }, { "epoch": 0.877001953125, "grad_norm": 0.18704581260681152, "learning_rate": 6.708022026561127e-05, "loss": 1.7463, "step": 17961 }, { "epoch": 0.87705078125, "grad_norm": 0.17272818088531494, "learning_rate": 6.706683591174168e-05, "loss": 1.7338, "step": 17962 }, { "epoch": 0.877099609375, "grad_norm": 0.18695445358753204, "learning_rate": 6.705345659729927e-05, "loss": 1.7263, "step": 17963 }, { "epoch": 0.8771484375, "grad_norm": 0.18514710664749146, "learning_rate": 6.704008232260852e-05, "loss": 1.7384, "step": 17964 }, { "epoch": 0.877197265625, "grad_norm": 0.17431895434856415, "learning_rate": 6.702671308799336e-05, "loss": 1.7401, "step": 17965 }, { "epoch": 0.87724609375, "grad_norm": 0.1996472179889679, "learning_rate": 6.701334889377797e-05, "loss": 1.7306, "step": 17966 }, { "epoch": 0.877294921875, "grad_norm": 0.18069307506084442, "learning_rate": 6.699998974028609e-05, "loss": 1.7434, "step": 17967 }, { "epoch": 0.87734375, "grad_norm": 0.1730852574110031, "learning_rate": 6.69866356278416e-05, "loss": 1.704, "step": 17968 }, { "epoch": 0.877392578125, "grad_norm": 0.19324414432048798, "learning_rate": 6.697328655676807e-05, "loss": 1.7475, "step": 17969 }, { "epoch": 0.87744140625, "grad_norm": 0.194650799036026, "learning_rate": 6.695994252738915e-05, "loss": 1.7308, "step": 17970 }, { "epoch": 0.877490234375, "grad_norm": 0.17192275822162628, "learning_rate": 6.694660354002811e-05, "loss": 1.714, "step": 17971 }, { "epoch": 0.8775390625, "grad_norm": 0.21759900450706482, "learning_rate": 6.69332695950083e-05, "loss": 1.7122, "step": 17972 }, { "epoch": 0.877587890625, "grad_norm": 0.17568649351596832, "learning_rate": 6.691994069265284e-05, "loss": 1.7371, "step": 17973 }, { "epoch": 0.87763671875, "grad_norm": 0.19406035542488098, "learning_rate": 6.690661683328478e-05, "loss": 1.734, "step": 17974 }, { "epoch": 0.877685546875, "grad_norm": 0.19971971213817596, "learning_rate": 6.689329801722706e-05, "loss": 1.7506, "step": 17975 }, { "epoch": 0.877734375, "grad_norm": 0.18257196247577667, "learning_rate": 6.687998424480246e-05, "loss": 1.7366, "step": 17976 }, { "epoch": 0.877783203125, "grad_norm": 0.17775024473667145, "learning_rate": 6.68666755163336e-05, "loss": 1.7384, "step": 17977 }, { "epoch": 0.87783203125, "grad_norm": 0.17909517884254456, "learning_rate": 6.685337183214312e-05, "loss": 1.7214, "step": 17978 }, { "epoch": 0.877880859375, "grad_norm": 0.21030710637569427, "learning_rate": 6.68400731925534e-05, "loss": 1.7336, "step": 17979 }, { "epoch": 0.8779296875, "grad_norm": 0.1744312196969986, "learning_rate": 6.682677959788675e-05, "loss": 1.7308, "step": 17980 }, { "epoch": 0.877978515625, "grad_norm": 0.18897373974323273, "learning_rate": 6.681349104846525e-05, "loss": 1.7416, "step": 17981 }, { "epoch": 0.87802734375, "grad_norm": 0.18598131835460663, "learning_rate": 6.680020754461115e-05, "loss": 1.72, "step": 17982 }, { "epoch": 0.878076171875, "grad_norm": 0.19550231099128723, "learning_rate": 6.67869290866462e-05, "loss": 1.7328, "step": 17983 }, { "epoch": 0.878125, "grad_norm": 0.18682323396205902, "learning_rate": 6.677365567489241e-05, "loss": 1.7163, "step": 17984 }, { "epoch": 0.878173828125, "grad_norm": 0.19051513075828552, "learning_rate": 6.676038730967125e-05, "loss": 1.6948, "step": 17985 }, { "epoch": 0.87822265625, "grad_norm": 0.1788247972726822, "learning_rate": 6.674712399130448e-05, "loss": 1.7515, "step": 17986 }, { "epoch": 0.878271484375, "grad_norm": 0.17802752554416656, "learning_rate": 6.673386572011343e-05, "loss": 1.6977, "step": 17987 }, { "epoch": 0.8783203125, "grad_norm": 0.20239609479904175, "learning_rate": 6.67206124964195e-05, "loss": 1.7293, "step": 17988 }, { "epoch": 0.878369140625, "grad_norm": 0.18353642523288727, "learning_rate": 6.670736432054384e-05, "loss": 1.7409, "step": 17989 }, { "epoch": 0.87841796875, "grad_norm": 0.18628549575805664, "learning_rate": 6.669412119280752e-05, "loss": 1.7285, "step": 17990 }, { "epoch": 0.878466796875, "grad_norm": 0.18671664595603943, "learning_rate": 6.668088311353151e-05, "loss": 1.7343, "step": 17991 }, { "epoch": 0.878515625, "grad_norm": 0.17258942127227783, "learning_rate": 6.666765008303671e-05, "loss": 1.7085, "step": 17992 }, { "epoch": 0.878564453125, "grad_norm": 0.1914634257555008, "learning_rate": 6.665442210164376e-05, "loss": 1.6974, "step": 17993 }, { "epoch": 0.87861328125, "grad_norm": 0.18595144152641296, "learning_rate": 6.664119916967325e-05, "loss": 1.7279, "step": 17994 }, { "epoch": 0.878662109375, "grad_norm": 0.2108178287744522, "learning_rate": 6.662798128744572e-05, "loss": 1.736, "step": 17995 }, { "epoch": 0.8787109375, "grad_norm": 0.17325298488140106, "learning_rate": 6.66147684552815e-05, "loss": 1.7394, "step": 17996 }, { "epoch": 0.878759765625, "grad_norm": 0.18511223793029785, "learning_rate": 6.660156067350068e-05, "loss": 1.6965, "step": 17997 }, { "epoch": 0.87880859375, "grad_norm": 0.21487104892730713, "learning_rate": 6.658835794242354e-05, "loss": 1.7355, "step": 17998 }, { "epoch": 0.878857421875, "grad_norm": 0.1818135678768158, "learning_rate": 6.65751602623699e-05, "loss": 1.6986, "step": 17999 }, { "epoch": 0.87890625, "grad_norm": 0.20660485327243805, "learning_rate": 6.65619676336598e-05, "loss": 1.7272, "step": 18000 }, { "epoch": 0.878955078125, "grad_norm": 0.17034675180912018, "learning_rate": 6.654878005661277e-05, "loss": 1.7063, "step": 18001 }, { "epoch": 0.87900390625, "grad_norm": 0.20942389965057373, "learning_rate": 6.653559753154863e-05, "loss": 1.7281, "step": 18002 }, { "epoch": 0.879052734375, "grad_norm": 0.18107767403125763, "learning_rate": 6.652242005878666e-05, "loss": 1.7395, "step": 18003 }, { "epoch": 0.8791015625, "grad_norm": 0.2034059315919876, "learning_rate": 6.650924763864642e-05, "loss": 1.7275, "step": 18004 }, { "epoch": 0.879150390625, "grad_norm": 0.18927700817584991, "learning_rate": 6.649608027144701e-05, "loss": 1.7229, "step": 18005 }, { "epoch": 0.87919921875, "grad_norm": 0.1709078848361969, "learning_rate": 6.64829179575076e-05, "loss": 1.6975, "step": 18006 }, { "epoch": 0.879248046875, "grad_norm": 0.22451330721378326, "learning_rate": 6.64697606971472e-05, "loss": 1.7053, "step": 18007 }, { "epoch": 0.879296875, "grad_norm": 0.16568276286125183, "learning_rate": 6.645660849068472e-05, "loss": 1.7194, "step": 18008 }, { "epoch": 0.879345703125, "grad_norm": 0.2022733837366104, "learning_rate": 6.644346133843885e-05, "loss": 1.7334, "step": 18009 }, { "epoch": 0.87939453125, "grad_norm": 0.20273533463478088, "learning_rate": 6.643031924072823e-05, "loss": 1.7299, "step": 18010 }, { "epoch": 0.879443359375, "grad_norm": 0.2070731669664383, "learning_rate": 6.641718219787138e-05, "loss": 1.7265, "step": 18011 }, { "epoch": 0.8794921875, "grad_norm": 0.19044554233551025, "learning_rate": 6.640405021018673e-05, "loss": 1.728, "step": 18012 }, { "epoch": 0.879541015625, "grad_norm": 0.18143272399902344, "learning_rate": 6.639092327799248e-05, "loss": 1.7308, "step": 18013 }, { "epoch": 0.87958984375, "grad_norm": 0.1837439388036728, "learning_rate": 6.637780140160684e-05, "loss": 1.698, "step": 18014 }, { "epoch": 0.879638671875, "grad_norm": 0.17677713930606842, "learning_rate": 6.63646845813477e-05, "loss": 1.7231, "step": 18015 }, { "epoch": 0.8796875, "grad_norm": 0.18403273820877075, "learning_rate": 6.635157281753314e-05, "loss": 1.7213, "step": 18016 }, { "epoch": 0.879736328125, "grad_norm": 0.2210846245288849, "learning_rate": 6.633846611048077e-05, "loss": 1.7361, "step": 18017 }, { "epoch": 0.87978515625, "grad_norm": 0.18271934986114502, "learning_rate": 6.632536446050838e-05, "loss": 1.7455, "step": 18018 }, { "epoch": 0.879833984375, "grad_norm": 0.19587023556232452, "learning_rate": 6.631226786793336e-05, "loss": 1.7176, "step": 18019 }, { "epoch": 0.8798828125, "grad_norm": 0.20884713530540466, "learning_rate": 6.629917633307323e-05, "loss": 1.7219, "step": 18020 }, { "epoch": 0.879931640625, "grad_norm": 0.1667211502790451, "learning_rate": 6.628608985624523e-05, "loss": 1.7446, "step": 18021 }, { "epoch": 0.87998046875, "grad_norm": 0.19748812913894653, "learning_rate": 6.627300843776651e-05, "loss": 1.7248, "step": 18022 }, { "epoch": 0.880029296875, "grad_norm": 0.20530201494693756, "learning_rate": 6.625993207795413e-05, "loss": 1.7302, "step": 18023 }, { "epoch": 0.880078125, "grad_norm": 0.1725250631570816, "learning_rate": 6.624686077712497e-05, "loss": 1.7335, "step": 18024 }, { "epoch": 0.880126953125, "grad_norm": 0.17751912772655487, "learning_rate": 6.623379453559586e-05, "loss": 1.729, "step": 18025 }, { "epoch": 0.88017578125, "grad_norm": 0.19952046871185303, "learning_rate": 6.622073335368344e-05, "loss": 1.717, "step": 18026 }, { "epoch": 0.880224609375, "grad_norm": 0.1642349511384964, "learning_rate": 6.620767723170433e-05, "loss": 1.7123, "step": 18027 }, { "epoch": 0.8802734375, "grad_norm": 0.21259918808937073, "learning_rate": 6.619462616997488e-05, "loss": 1.7266, "step": 18028 }, { "epoch": 0.880322265625, "grad_norm": 0.1993362307548523, "learning_rate": 6.618158016881141e-05, "loss": 1.7159, "step": 18029 }, { "epoch": 0.88037109375, "grad_norm": 0.19369880855083466, "learning_rate": 6.616853922853013e-05, "loss": 1.7435, "step": 18030 }, { "epoch": 0.880419921875, "grad_norm": 0.2064821720123291, "learning_rate": 6.615550334944705e-05, "loss": 1.7228, "step": 18031 }, { "epoch": 0.88046875, "grad_norm": 0.21233607828617096, "learning_rate": 6.614247253187816e-05, "loss": 1.7358, "step": 18032 }, { "epoch": 0.880517578125, "grad_norm": 0.18687696754932404, "learning_rate": 6.612944677613919e-05, "loss": 1.7132, "step": 18033 }, { "epoch": 0.88056640625, "grad_norm": 0.21327579021453857, "learning_rate": 6.611642608254593e-05, "loss": 1.7144, "step": 18034 }, { "epoch": 0.880615234375, "grad_norm": 0.2022206336259842, "learning_rate": 6.610341045141386e-05, "loss": 1.7176, "step": 18035 }, { "epoch": 0.8806640625, "grad_norm": 0.18980947136878967, "learning_rate": 6.609039988305851e-05, "loss": 1.7307, "step": 18036 }, { "epoch": 0.880712890625, "grad_norm": 0.21717853844165802, "learning_rate": 6.607739437779511e-05, "loss": 1.7207, "step": 18037 }, { "epoch": 0.88076171875, "grad_norm": 0.22272919118404388, "learning_rate": 6.606439393593895e-05, "loss": 1.6833, "step": 18038 }, { "epoch": 0.880810546875, "grad_norm": 0.1809690147638321, "learning_rate": 6.605139855780503e-05, "loss": 1.7202, "step": 18039 }, { "epoch": 0.880859375, "grad_norm": 0.2398211658000946, "learning_rate": 6.603840824370835e-05, "loss": 1.7331, "step": 18040 }, { "epoch": 0.880908203125, "grad_norm": 0.2099655568599701, "learning_rate": 6.60254229939637e-05, "loss": 1.7198, "step": 18041 }, { "epoch": 0.88095703125, "grad_norm": 0.19283311069011688, "learning_rate": 6.601244280888582e-05, "loss": 1.6956, "step": 18042 }, { "epoch": 0.881005859375, "grad_norm": 0.21942970156669617, "learning_rate": 6.59994676887893e-05, "loss": 1.7287, "step": 18043 }, { "epoch": 0.8810546875, "grad_norm": 0.18190069496631622, "learning_rate": 6.59864976339886e-05, "loss": 1.7196, "step": 18044 }, { "epoch": 0.881103515625, "grad_norm": 0.185027077794075, "learning_rate": 6.597353264479802e-05, "loss": 1.7259, "step": 18045 }, { "epoch": 0.88115234375, "grad_norm": 0.20056737959384918, "learning_rate": 6.596057272153182e-05, "loss": 1.7083, "step": 18046 }, { "epoch": 0.881201171875, "grad_norm": 0.20662981271743774, "learning_rate": 6.594761786450406e-05, "loss": 1.7306, "step": 18047 }, { "epoch": 0.88125, "grad_norm": 0.19469766318798065, "learning_rate": 6.593466807402874e-05, "loss": 1.7151, "step": 18048 }, { "epoch": 0.881298828125, "grad_norm": 0.2026575356721878, "learning_rate": 6.592172335041972e-05, "loss": 1.7455, "step": 18049 }, { "epoch": 0.88134765625, "grad_norm": 0.17178891599178314, "learning_rate": 6.590878369399073e-05, "loss": 1.7359, "step": 18050 }, { "epoch": 0.881396484375, "grad_norm": 0.1783125400543213, "learning_rate": 6.589584910505529e-05, "loss": 1.7161, "step": 18051 }, { "epoch": 0.8814453125, "grad_norm": 0.179584801197052, "learning_rate": 6.588291958392702e-05, "loss": 1.6941, "step": 18052 }, { "epoch": 0.881494140625, "grad_norm": 0.19650696218013763, "learning_rate": 6.586999513091909e-05, "loss": 1.7072, "step": 18053 }, { "epoch": 0.88154296875, "grad_norm": 0.17139919102191925, "learning_rate": 6.585707574634492e-05, "loss": 1.7391, "step": 18054 }, { "epoch": 0.881591796875, "grad_norm": 0.20575980842113495, "learning_rate": 6.584416143051751e-05, "loss": 1.7272, "step": 18055 }, { "epoch": 0.881640625, "grad_norm": 0.21165065467357635, "learning_rate": 6.583125218374992e-05, "loss": 1.7447, "step": 18056 }, { "epoch": 0.881689453125, "grad_norm": 0.18323807418346405, "learning_rate": 6.581834800635492e-05, "loss": 1.7355, "step": 18057 }, { "epoch": 0.88173828125, "grad_norm": 0.19648727774620056, "learning_rate": 6.580544889864536e-05, "loss": 1.7247, "step": 18058 }, { "epoch": 0.881787109375, "grad_norm": 0.21121153235435486, "learning_rate": 6.579255486093378e-05, "loss": 1.7209, "step": 18059 }, { "epoch": 0.8818359375, "grad_norm": 0.18082566559314728, "learning_rate": 6.577966589353272e-05, "loss": 1.6745, "step": 18060 }, { "epoch": 0.881884765625, "grad_norm": 0.18536917865276337, "learning_rate": 6.576678199675452e-05, "loss": 1.721, "step": 18061 }, { "epoch": 0.88193359375, "grad_norm": 0.1799064576625824, "learning_rate": 6.575390317091149e-05, "loss": 1.732, "step": 18062 }, { "epoch": 0.881982421875, "grad_norm": 0.18859654664993286, "learning_rate": 6.57410294163157e-05, "loss": 1.734, "step": 18063 }, { "epoch": 0.88203125, "grad_norm": 0.18186704814434052, "learning_rate": 6.572816073327918e-05, "loss": 1.748, "step": 18064 }, { "epoch": 0.882080078125, "grad_norm": 0.1886535882949829, "learning_rate": 6.571529712211382e-05, "loss": 1.7303, "step": 18065 }, { "epoch": 0.88212890625, "grad_norm": 0.15949289500713348, "learning_rate": 6.570243858313139e-05, "loss": 1.7004, "step": 18066 }, { "epoch": 0.882177734375, "grad_norm": 0.2088097333908081, "learning_rate": 6.568958511664348e-05, "loss": 1.7502, "step": 18067 }, { "epoch": 0.8822265625, "grad_norm": 0.15157510340213776, "learning_rate": 6.567673672296167e-05, "loss": 1.744, "step": 18068 }, { "epoch": 0.882275390625, "grad_norm": 0.17888355255126953, "learning_rate": 6.566389340239725e-05, "loss": 1.7367, "step": 18069 }, { "epoch": 0.88232421875, "grad_norm": 0.16490983963012695, "learning_rate": 6.565105515526162e-05, "loss": 1.7077, "step": 18070 }, { "epoch": 0.882373046875, "grad_norm": 0.17305505275726318, "learning_rate": 6.563822198186579e-05, "loss": 1.7401, "step": 18071 }, { "epoch": 0.882421875, "grad_norm": 0.1798306703567505, "learning_rate": 6.562539388252093e-05, "loss": 1.7196, "step": 18072 }, { "epoch": 0.882470703125, "grad_norm": 0.15876105427742004, "learning_rate": 6.561257085753783e-05, "loss": 1.7161, "step": 18073 }, { "epoch": 0.88251953125, "grad_norm": 0.17032264173030853, "learning_rate": 6.559975290722727e-05, "loss": 1.7193, "step": 18074 }, { "epoch": 0.882568359375, "grad_norm": 0.16419579088687897, "learning_rate": 6.558694003189994e-05, "loss": 1.7446, "step": 18075 }, { "epoch": 0.8826171875, "grad_norm": 0.169435054063797, "learning_rate": 6.557413223186635e-05, "loss": 1.7337, "step": 18076 }, { "epoch": 0.882666015625, "grad_norm": 0.1699172407388687, "learning_rate": 6.556132950743694e-05, "loss": 1.7387, "step": 18077 }, { "epoch": 0.88271484375, "grad_norm": 0.16321855783462524, "learning_rate": 6.554853185892194e-05, "loss": 1.7065, "step": 18078 }, { "epoch": 0.882763671875, "grad_norm": 0.2056647539138794, "learning_rate": 6.553573928663158e-05, "loss": 1.7258, "step": 18079 }, { "epoch": 0.8828125, "grad_norm": 0.1646033525466919, "learning_rate": 6.552295179087581e-05, "loss": 1.749, "step": 18080 }, { "epoch": 0.882861328125, "grad_norm": 0.18772895634174347, "learning_rate": 6.551016937196462e-05, "loss": 1.7233, "step": 18081 }, { "epoch": 0.88291015625, "grad_norm": 0.1805143505334854, "learning_rate": 6.549739203020782e-05, "loss": 1.7516, "step": 18082 }, { "epoch": 0.882958984375, "grad_norm": 0.18572968244552612, "learning_rate": 6.548461976591497e-05, "loss": 1.7162, "step": 18083 }, { "epoch": 0.8830078125, "grad_norm": 0.15871554613113403, "learning_rate": 6.547185257939572e-05, "loss": 1.6967, "step": 18084 }, { "epoch": 0.883056640625, "grad_norm": 0.1691572517156601, "learning_rate": 6.545909047095944e-05, "loss": 1.7388, "step": 18085 }, { "epoch": 0.88310546875, "grad_norm": 0.17551833391189575, "learning_rate": 6.544633344091546e-05, "loss": 1.7319, "step": 18086 }, { "epoch": 0.883154296875, "grad_norm": 0.18141913414001465, "learning_rate": 6.543358148957293e-05, "loss": 1.7083, "step": 18087 }, { "epoch": 0.883203125, "grad_norm": 0.19330868124961853, "learning_rate": 6.542083461724095e-05, "loss": 1.6994, "step": 18088 }, { "epoch": 0.883251953125, "grad_norm": 0.19036608934402466, "learning_rate": 6.540809282422841e-05, "loss": 1.7378, "step": 18089 }, { "epoch": 0.88330078125, "grad_norm": 0.20342327654361725, "learning_rate": 6.539535611084407e-05, "loss": 1.7138, "step": 18090 }, { "epoch": 0.883349609375, "grad_norm": 0.18019914627075195, "learning_rate": 6.538262447739672e-05, "loss": 1.7201, "step": 18091 }, { "epoch": 0.8833984375, "grad_norm": 0.1860264539718628, "learning_rate": 6.536989792419484e-05, "loss": 1.6982, "step": 18092 }, { "epoch": 0.883447265625, "grad_norm": 0.20475095510482788, "learning_rate": 6.535717645154691e-05, "loss": 1.7029, "step": 18093 }, { "epoch": 0.88349609375, "grad_norm": 0.1690441370010376, "learning_rate": 6.534446005976125e-05, "loss": 1.7269, "step": 18094 }, { "epoch": 0.883544921875, "grad_norm": 0.22415857017040253, "learning_rate": 6.533174874914603e-05, "loss": 1.736, "step": 18095 }, { "epoch": 0.88359375, "grad_norm": 0.1823878437280655, "learning_rate": 6.531904252000931e-05, "loss": 1.7424, "step": 18096 }, { "epoch": 0.883642578125, "grad_norm": 0.1790696084499359, "learning_rate": 6.530634137265908e-05, "loss": 1.7319, "step": 18097 }, { "epoch": 0.88369140625, "grad_norm": 0.20287248492240906, "learning_rate": 6.52936453074031e-05, "loss": 1.7431, "step": 18098 }, { "epoch": 0.883740234375, "grad_norm": 0.18591132760047913, "learning_rate": 6.52809543245491e-05, "loss": 1.7316, "step": 18099 }, { "epoch": 0.8837890625, "grad_norm": 0.1783635914325714, "learning_rate": 6.526826842440468e-05, "loss": 1.7501, "step": 18100 }, { "epoch": 0.883837890625, "grad_norm": 0.19544751942157745, "learning_rate": 6.525558760727726e-05, "loss": 1.6785, "step": 18101 }, { "epoch": 0.88388671875, "grad_norm": 0.18970870971679688, "learning_rate": 6.52429118734742e-05, "loss": 1.7071, "step": 18102 }, { "epoch": 0.883935546875, "grad_norm": 0.21136832237243652, "learning_rate": 6.523024122330267e-05, "loss": 1.7419, "step": 18103 }, { "epoch": 0.883984375, "grad_norm": 0.1902368664741516, "learning_rate": 6.521757565706976e-05, "loss": 1.7162, "step": 18104 }, { "epoch": 0.884033203125, "grad_norm": 0.2320837825536728, "learning_rate": 6.520491517508248e-05, "loss": 1.7396, "step": 18105 }, { "epoch": 0.88408203125, "grad_norm": 0.17428290843963623, "learning_rate": 6.519225977764758e-05, "loss": 1.7193, "step": 18106 }, { "epoch": 0.884130859375, "grad_norm": 0.2232479602098465, "learning_rate": 6.517960946507188e-05, "loss": 1.7073, "step": 18107 }, { "epoch": 0.8841796875, "grad_norm": 0.1969604790210724, "learning_rate": 6.516696423766189e-05, "loss": 1.7142, "step": 18108 }, { "epoch": 0.884228515625, "grad_norm": 0.1967054158449173, "learning_rate": 6.515432409572407e-05, "loss": 1.7284, "step": 18109 }, { "epoch": 0.88427734375, "grad_norm": 0.2050294429063797, "learning_rate": 6.51416890395648e-05, "loss": 1.7026, "step": 18110 }, { "epoch": 0.884326171875, "grad_norm": 0.18337547779083252, "learning_rate": 6.512905906949033e-05, "loss": 1.7386, "step": 18111 }, { "epoch": 0.884375, "grad_norm": 0.18215064704418182, "learning_rate": 6.51164341858067e-05, "loss": 1.7414, "step": 18112 }, { "epoch": 0.884423828125, "grad_norm": 0.22584404051303864, "learning_rate": 6.510381438881988e-05, "loss": 1.6898, "step": 18113 }, { "epoch": 0.88447265625, "grad_norm": 0.16502507030963898, "learning_rate": 6.509119967883578e-05, "loss": 1.722, "step": 18114 }, { "epoch": 0.884521484375, "grad_norm": 0.1828504055738449, "learning_rate": 6.50785900561601e-05, "loss": 1.74, "step": 18115 }, { "epoch": 0.8845703125, "grad_norm": 0.20152047276496887, "learning_rate": 6.50659855210984e-05, "loss": 1.734, "step": 18116 }, { "epoch": 0.884619140625, "grad_norm": 0.1925877034664154, "learning_rate": 6.505338607395625e-05, "loss": 1.7473, "step": 18117 }, { "epoch": 0.88466796875, "grad_norm": 0.19443584978580475, "learning_rate": 6.504079171503892e-05, "loss": 1.717, "step": 18118 }, { "epoch": 0.884716796875, "grad_norm": 0.1649099588394165, "learning_rate": 6.50282024446517e-05, "loss": 1.7156, "step": 18119 }, { "epoch": 0.884765625, "grad_norm": 0.21861031651496887, "learning_rate": 6.501561826309969e-05, "loss": 1.7314, "step": 18120 }, { "epoch": 0.884814453125, "grad_norm": 0.18482547998428345, "learning_rate": 6.500303917068787e-05, "loss": 1.7214, "step": 18121 }, { "epoch": 0.88486328125, "grad_norm": 0.1701931357383728, "learning_rate": 6.499046516772107e-05, "loss": 1.7142, "step": 18122 }, { "epoch": 0.884912109375, "grad_norm": 0.2026526778936386, "learning_rate": 6.497789625450414e-05, "loss": 1.7415, "step": 18123 }, { "epoch": 0.8849609375, "grad_norm": 0.1760786473751068, "learning_rate": 6.496533243134151e-05, "loss": 1.748, "step": 18124 }, { "epoch": 0.885009765625, "grad_norm": 0.1874309480190277, "learning_rate": 6.495277369853793e-05, "loss": 1.7337, "step": 18125 }, { "epoch": 0.88505859375, "grad_norm": 0.16453109681606293, "learning_rate": 6.49402200563975e-05, "loss": 1.7224, "step": 18126 }, { "epoch": 0.885107421875, "grad_norm": 0.1721518635749817, "learning_rate": 6.49276715052247e-05, "loss": 1.7385, "step": 18127 }, { "epoch": 0.88515625, "grad_norm": 0.16729526221752167, "learning_rate": 6.491512804532349e-05, "loss": 1.7219, "step": 18128 }, { "epoch": 0.885205078125, "grad_norm": 0.1804005354642868, "learning_rate": 6.490258967699797e-05, "loss": 1.7281, "step": 18129 }, { "epoch": 0.88525390625, "grad_norm": 0.18571554124355316, "learning_rate": 6.489005640055192e-05, "loss": 1.7222, "step": 18130 }, { "epoch": 0.885302734375, "grad_norm": 0.1863096058368683, "learning_rate": 6.48775282162892e-05, "loss": 1.7287, "step": 18131 }, { "epoch": 0.8853515625, "grad_norm": 0.20446649193763733, "learning_rate": 6.48650051245134e-05, "loss": 1.7054, "step": 18132 }, { "epoch": 0.885400390625, "grad_norm": 0.21023212373256683, "learning_rate": 6.485248712552799e-05, "loss": 1.7312, "step": 18133 }, { "epoch": 0.88544921875, "grad_norm": 0.19156932830810547, "learning_rate": 6.483997421963641e-05, "loss": 1.743, "step": 18134 }, { "epoch": 0.885498046875, "grad_norm": 0.22080643475055695, "learning_rate": 6.482746640714188e-05, "loss": 1.7148, "step": 18135 }, { "epoch": 0.885546875, "grad_norm": 0.1897190660238266, "learning_rate": 6.481496368834755e-05, "loss": 1.7123, "step": 18136 }, { "epoch": 0.885595703125, "grad_norm": 0.19896534085273743, "learning_rate": 6.480246606355646e-05, "loss": 1.7083, "step": 18137 }, { "epoch": 0.88564453125, "grad_norm": 0.16205823421478271, "learning_rate": 6.478997353307145e-05, "loss": 1.7042, "step": 18138 }, { "epoch": 0.885693359375, "grad_norm": 0.1869042068719864, "learning_rate": 6.477748609719535e-05, "loss": 1.7306, "step": 18139 }, { "epoch": 0.8857421875, "grad_norm": 0.1983247548341751, "learning_rate": 6.47650037562307e-05, "loss": 1.7249, "step": 18140 }, { "epoch": 0.885791015625, "grad_norm": 0.1858665943145752, "learning_rate": 6.475252651048019e-05, "loss": 1.6968, "step": 18141 }, { "epoch": 0.88583984375, "grad_norm": 0.1909674108028412, "learning_rate": 6.474005436024601e-05, "loss": 1.7498, "step": 18142 }, { "epoch": 0.885888671875, "grad_norm": 0.162797749042511, "learning_rate": 6.47275873058306e-05, "loss": 1.7084, "step": 18143 }, { "epoch": 0.8859375, "grad_norm": 0.17841166257858276, "learning_rate": 6.471512534753596e-05, "loss": 1.6897, "step": 18144 }, { "epoch": 0.885986328125, "grad_norm": 0.1748240441083908, "learning_rate": 6.470266848566429e-05, "loss": 1.7097, "step": 18145 }, { "epoch": 0.88603515625, "grad_norm": 0.171674445271492, "learning_rate": 6.469021672051732e-05, "loss": 1.7319, "step": 18146 }, { "epoch": 0.886083984375, "grad_norm": 0.21243928372859955, "learning_rate": 6.467777005239699e-05, "loss": 1.7019, "step": 18147 }, { "epoch": 0.8861328125, "grad_norm": 0.17276690900325775, "learning_rate": 6.466532848160485e-05, "loss": 1.7349, "step": 18148 }, { "epoch": 0.886181640625, "grad_norm": 0.19420623779296875, "learning_rate": 6.465289200844243e-05, "loss": 1.7071, "step": 18149 }, { "epoch": 0.88623046875, "grad_norm": 0.19679105281829834, "learning_rate": 6.464046063321116e-05, "loss": 1.6944, "step": 18150 }, { "epoch": 0.886279296875, "grad_norm": 0.17376621067523956, "learning_rate": 6.462803435621234e-05, "loss": 1.7374, "step": 18151 }, { "epoch": 0.886328125, "grad_norm": 0.193831205368042, "learning_rate": 6.461561317774712e-05, "loss": 1.7269, "step": 18152 }, { "epoch": 0.886376953125, "grad_norm": 0.18121670186519623, "learning_rate": 6.460319709811653e-05, "loss": 1.7063, "step": 18153 }, { "epoch": 0.88642578125, "grad_norm": 0.17142046988010406, "learning_rate": 6.459078611762148e-05, "loss": 1.7547, "step": 18154 }, { "epoch": 0.886474609375, "grad_norm": 0.1957298070192337, "learning_rate": 6.45783802365628e-05, "loss": 1.7324, "step": 18155 }, { "epoch": 0.8865234375, "grad_norm": 0.18534940481185913, "learning_rate": 6.45659794552411e-05, "loss": 1.6951, "step": 18156 }, { "epoch": 0.886572265625, "grad_norm": 0.18546728789806366, "learning_rate": 6.4553583773957e-05, "loss": 1.7282, "step": 18157 }, { "epoch": 0.88662109375, "grad_norm": 0.18879848718643188, "learning_rate": 6.454119319301079e-05, "loss": 1.7497, "step": 18158 }, { "epoch": 0.886669921875, "grad_norm": 0.17664197087287903, "learning_rate": 6.45288077127029e-05, "loss": 1.7306, "step": 18159 }, { "epoch": 0.88671875, "grad_norm": 0.20606376230716705, "learning_rate": 6.45164273333334e-05, "loss": 1.7196, "step": 18160 }, { "epoch": 0.886767578125, "grad_norm": 0.1911628544330597, "learning_rate": 6.450405205520245e-05, "loss": 1.7243, "step": 18161 }, { "epoch": 0.88681640625, "grad_norm": 0.18892014026641846, "learning_rate": 6.449168187860984e-05, "loss": 1.7323, "step": 18162 }, { "epoch": 0.886865234375, "grad_norm": 0.22111311554908752, "learning_rate": 6.447931680385548e-05, "loss": 1.7236, "step": 18163 }, { "epoch": 0.8869140625, "grad_norm": 0.17534810304641724, "learning_rate": 6.446695683123901e-05, "loss": 1.7238, "step": 18164 }, { "epoch": 0.886962890625, "grad_norm": 0.21188907325267792, "learning_rate": 6.445460196105997e-05, "loss": 1.713, "step": 18165 }, { "epoch": 0.88701171875, "grad_norm": 0.21196790039539337, "learning_rate": 6.444225219361781e-05, "loss": 1.7288, "step": 18166 }, { "epoch": 0.887060546875, "grad_norm": 0.16951490938663483, "learning_rate": 6.44299075292118e-05, "loss": 1.7239, "step": 18167 }, { "epoch": 0.887109375, "grad_norm": 0.1866571605205536, "learning_rate": 6.441756796814118e-05, "loss": 1.7436, "step": 18168 }, { "epoch": 0.887158203125, "grad_norm": 0.21685150265693665, "learning_rate": 6.440523351070497e-05, "loss": 1.737, "step": 18169 }, { "epoch": 0.88720703125, "grad_norm": 0.17666025459766388, "learning_rate": 6.439290415720213e-05, "loss": 1.7449, "step": 18170 }, { "epoch": 0.887255859375, "grad_norm": 0.19639676809310913, "learning_rate": 6.438057990793144e-05, "loss": 1.7238, "step": 18171 }, { "epoch": 0.8873046875, "grad_norm": 0.19764313101768494, "learning_rate": 6.436826076319159e-05, "loss": 1.7121, "step": 18172 }, { "epoch": 0.887353515625, "grad_norm": 0.17508137226104736, "learning_rate": 6.435594672328121e-05, "loss": 1.7168, "step": 18173 }, { "epoch": 0.88740234375, "grad_norm": 0.1746588498353958, "learning_rate": 6.434363778849864e-05, "loss": 1.7152, "step": 18174 }, { "epoch": 0.887451171875, "grad_norm": 0.19956842064857483, "learning_rate": 6.43313339591423e-05, "loss": 1.7362, "step": 18175 }, { "epoch": 0.8875, "grad_norm": 0.2033187448978424, "learning_rate": 6.431903523551028e-05, "loss": 1.7088, "step": 18176 }, { "epoch": 0.887548828125, "grad_norm": 0.21238549053668976, "learning_rate": 6.430674161790075e-05, "loss": 1.7147, "step": 18177 }, { "epoch": 0.88759765625, "grad_norm": 0.17747335135936737, "learning_rate": 6.429445310661152e-05, "loss": 1.71, "step": 18178 }, { "epoch": 0.887646484375, "grad_norm": 0.21100729703903198, "learning_rate": 6.428216970194059e-05, "loss": 1.74, "step": 18179 }, { "epoch": 0.8876953125, "grad_norm": 0.18183699250221252, "learning_rate": 6.42698914041855e-05, "loss": 1.7169, "step": 18180 }, { "epoch": 0.887744140625, "grad_norm": 0.20003741979599, "learning_rate": 6.425761821364395e-05, "loss": 1.7378, "step": 18181 }, { "epoch": 0.88779296875, "grad_norm": 0.16938994824886322, "learning_rate": 6.424535013061331e-05, "loss": 1.7221, "step": 18182 }, { "epoch": 0.887841796875, "grad_norm": 0.2559182047843933, "learning_rate": 6.423308715539093e-05, "loss": 1.7252, "step": 18183 }, { "epoch": 0.887890625, "grad_norm": 0.1715300977230072, "learning_rate": 6.422082928827399e-05, "loss": 1.7188, "step": 18184 }, { "epoch": 0.887939453125, "grad_norm": 0.16440925002098083, "learning_rate": 6.42085765295596e-05, "loss": 1.7232, "step": 18185 }, { "epoch": 0.88798828125, "grad_norm": 0.23898375034332275, "learning_rate": 6.419632887954473e-05, "loss": 1.7476, "step": 18186 }, { "epoch": 0.888037109375, "grad_norm": 0.1857219934463501, "learning_rate": 6.418408633852615e-05, "loss": 1.7466, "step": 18187 }, { "epoch": 0.8880859375, "grad_norm": 0.19596783816814423, "learning_rate": 6.417184890680063e-05, "loss": 1.707, "step": 18188 }, { "epoch": 0.888134765625, "grad_norm": 0.19546189904212952, "learning_rate": 6.415961658466471e-05, "loss": 1.7292, "step": 18189 }, { "epoch": 0.88818359375, "grad_norm": 0.19692908227443695, "learning_rate": 6.414738937241489e-05, "loss": 1.7235, "step": 18190 }, { "epoch": 0.888232421875, "grad_norm": 0.19948320090770721, "learning_rate": 6.41351672703475e-05, "loss": 1.7206, "step": 18191 }, { "epoch": 0.88828125, "grad_norm": 0.15780490636825562, "learning_rate": 6.412295027875868e-05, "loss": 1.7501, "step": 18192 }, { "epoch": 0.888330078125, "grad_norm": 0.19386568665504456, "learning_rate": 6.411073839794466e-05, "loss": 1.7263, "step": 18193 }, { "epoch": 0.88837890625, "grad_norm": 0.1700931042432785, "learning_rate": 6.409853162820123e-05, "loss": 1.742, "step": 18194 }, { "epoch": 0.888427734375, "grad_norm": 0.19644062221050262, "learning_rate": 6.40863299698244e-05, "loss": 1.7271, "step": 18195 }, { "epoch": 0.8884765625, "grad_norm": 0.17556190490722656, "learning_rate": 6.407413342310973e-05, "loss": 1.7051, "step": 18196 }, { "epoch": 0.888525390625, "grad_norm": 0.18690375983715057, "learning_rate": 6.4061941988353e-05, "loss": 1.7099, "step": 18197 }, { "epoch": 0.88857421875, "grad_norm": 0.20492517948150635, "learning_rate": 6.404975566584948e-05, "loss": 1.7364, "step": 18198 }, { "epoch": 0.888623046875, "grad_norm": 0.19007275998592377, "learning_rate": 6.403757445589465e-05, "loss": 1.7002, "step": 18199 }, { "epoch": 0.888671875, "grad_norm": 0.16966462135314941, "learning_rate": 6.402539835878367e-05, "loss": 1.7348, "step": 18200 }, { "epoch": 0.888720703125, "grad_norm": 0.200526162981987, "learning_rate": 6.401322737481166e-05, "loss": 1.7192, "step": 18201 }, { "epoch": 0.88876953125, "grad_norm": 0.170922189950943, "learning_rate": 6.40010615042736e-05, "loss": 1.7328, "step": 18202 }, { "epoch": 0.888818359375, "grad_norm": 0.16846412420272827, "learning_rate": 6.39889007474643e-05, "loss": 1.6898, "step": 18203 }, { "epoch": 0.8888671875, "grad_norm": 0.1831415295600891, "learning_rate": 6.397674510467854e-05, "loss": 1.7116, "step": 18204 }, { "epoch": 0.888916015625, "grad_norm": 0.1701013296842575, "learning_rate": 6.39645945762109e-05, "loss": 1.71, "step": 18205 }, { "epoch": 0.88896484375, "grad_norm": 0.18906626105308533, "learning_rate": 6.395244916235583e-05, "loss": 1.7398, "step": 18206 }, { "epoch": 0.889013671875, "grad_norm": 0.18923139572143555, "learning_rate": 6.394030886340778e-05, "loss": 1.7191, "step": 18207 }, { "epoch": 0.8890625, "grad_norm": 0.1786137819290161, "learning_rate": 6.392817367966081e-05, "loss": 1.7126, "step": 18208 }, { "epoch": 0.889111328125, "grad_norm": 0.16146020591259003, "learning_rate": 6.39160436114092e-05, "loss": 1.7127, "step": 18209 }, { "epoch": 0.88916015625, "grad_norm": 0.197652667760849, "learning_rate": 6.39039186589468e-05, "loss": 1.7175, "step": 18210 }, { "epoch": 0.889208984375, "grad_norm": 0.17256946861743927, "learning_rate": 6.389179882256757e-05, "loss": 1.7159, "step": 18211 }, { "epoch": 0.8892578125, "grad_norm": 0.18138504028320312, "learning_rate": 6.387968410256514e-05, "loss": 1.7175, "step": 18212 }, { "epoch": 0.889306640625, "grad_norm": 0.17011359333992004, "learning_rate": 6.386757449923326e-05, "loss": 1.7035, "step": 18213 }, { "epoch": 0.88935546875, "grad_norm": 0.19602316617965698, "learning_rate": 6.385547001286525e-05, "loss": 1.7386, "step": 18214 }, { "epoch": 0.889404296875, "grad_norm": 0.17440128326416016, "learning_rate": 6.384337064375463e-05, "loss": 1.7357, "step": 18215 }, { "epoch": 0.889453125, "grad_norm": 0.18393176794052124, "learning_rate": 6.383127639219452e-05, "loss": 1.7079, "step": 18216 }, { "epoch": 0.889501953125, "grad_norm": 0.1773124486207962, "learning_rate": 6.381918725847809e-05, "loss": 1.7018, "step": 18217 }, { "epoch": 0.88955078125, "grad_norm": 0.16709595918655396, "learning_rate": 6.38071032428983e-05, "loss": 1.7388, "step": 18218 }, { "epoch": 0.889599609375, "grad_norm": 0.1861485242843628, "learning_rate": 6.379502434574806e-05, "loss": 1.7238, "step": 18219 }, { "epoch": 0.8896484375, "grad_norm": 0.18287234008312225, "learning_rate": 6.378295056732007e-05, "loss": 1.7388, "step": 18220 }, { "epoch": 0.889697265625, "grad_norm": 0.1876552253961563, "learning_rate": 6.377088190790697e-05, "loss": 1.7022, "step": 18221 }, { "epoch": 0.88974609375, "grad_norm": 0.17655573785305023, "learning_rate": 6.375881836780123e-05, "loss": 1.7231, "step": 18222 }, { "epoch": 0.889794921875, "grad_norm": 0.18963952362537384, "learning_rate": 6.374675994729527e-05, "loss": 1.7609, "step": 18223 }, { "epoch": 0.88984375, "grad_norm": 0.20261059701442719, "learning_rate": 6.373470664668124e-05, "loss": 1.7229, "step": 18224 }, { "epoch": 0.889892578125, "grad_norm": 0.16935904324054718, "learning_rate": 6.372265846625141e-05, "loss": 1.7345, "step": 18225 }, { "epoch": 0.88994140625, "grad_norm": 0.186310276389122, "learning_rate": 6.371061540629762e-05, "loss": 1.7172, "step": 18226 }, { "epoch": 0.889990234375, "grad_norm": 0.1864636242389679, "learning_rate": 6.369857746711187e-05, "loss": 1.7328, "step": 18227 }, { "epoch": 0.8900390625, "grad_norm": 0.2009792923927307, "learning_rate": 6.368654464898577e-05, "loss": 1.7317, "step": 18228 }, { "epoch": 0.890087890625, "grad_norm": 0.18244925141334534, "learning_rate": 6.367451695221111e-05, "loss": 1.718, "step": 18229 }, { "epoch": 0.89013671875, "grad_norm": 0.21198855340480804, "learning_rate": 6.366249437707926e-05, "loss": 1.7344, "step": 18230 }, { "epoch": 0.890185546875, "grad_norm": 0.18958155810832977, "learning_rate": 6.36504769238817e-05, "loss": 1.745, "step": 18231 }, { "epoch": 0.890234375, "grad_norm": 0.20054885745048523, "learning_rate": 6.363846459290958e-05, "loss": 1.7256, "step": 18232 }, { "epoch": 0.890283203125, "grad_norm": 0.21604453027248383, "learning_rate": 6.362645738445407e-05, "loss": 1.704, "step": 18233 }, { "epoch": 0.89033203125, "grad_norm": 0.20398879051208496, "learning_rate": 6.361445529880622e-05, "loss": 1.727, "step": 18234 }, { "epoch": 0.890380859375, "grad_norm": 0.21941585838794708, "learning_rate": 6.360245833625682e-05, "loss": 1.7403, "step": 18235 }, { "epoch": 0.8904296875, "grad_norm": 0.1890622228384018, "learning_rate": 6.359046649709674e-05, "loss": 1.7411, "step": 18236 }, { "epoch": 0.890478515625, "grad_norm": 0.23285715281963348, "learning_rate": 6.357847978161652e-05, "loss": 1.7357, "step": 18237 }, { "epoch": 0.89052734375, "grad_norm": 0.17803584039211273, "learning_rate": 6.356649819010666e-05, "loss": 1.7267, "step": 18238 }, { "epoch": 0.890576171875, "grad_norm": 0.23218773305416107, "learning_rate": 6.355452172285763e-05, "loss": 1.7289, "step": 18239 }, { "epoch": 0.890625, "grad_norm": 0.17704614996910095, "learning_rate": 6.354255038015962e-05, "loss": 1.748, "step": 18240 }, { "epoch": 0.890673828125, "grad_norm": 0.21956680715084076, "learning_rate": 6.353058416230281e-05, "loss": 1.7355, "step": 18241 }, { "epoch": 0.89072265625, "grad_norm": 0.21122387051582336, "learning_rate": 6.351862306957714e-05, "loss": 1.7267, "step": 18242 }, { "epoch": 0.890771484375, "grad_norm": 0.21552971005439758, "learning_rate": 6.35066671022726e-05, "loss": 1.7286, "step": 18243 }, { "epoch": 0.8908203125, "grad_norm": 0.19926011562347412, "learning_rate": 6.349471626067882e-05, "loss": 1.7287, "step": 18244 }, { "epoch": 0.890869140625, "grad_norm": 0.2215854972600937, "learning_rate": 6.348277054508558e-05, "loss": 1.7182, "step": 18245 }, { "epoch": 0.89091796875, "grad_norm": 0.18701308965682983, "learning_rate": 6.34708299557823e-05, "loss": 1.6886, "step": 18246 }, { "epoch": 0.890966796875, "grad_norm": 0.20505157113075256, "learning_rate": 6.345889449305841e-05, "loss": 1.7322, "step": 18247 }, { "epoch": 0.891015625, "grad_norm": 0.2039642482995987, "learning_rate": 6.344696415720311e-05, "loss": 1.7361, "step": 18248 }, { "epoch": 0.891064453125, "grad_norm": 0.17661152780056, "learning_rate": 6.34350389485057e-05, "loss": 1.7356, "step": 18249 }, { "epoch": 0.89111328125, "grad_norm": 0.20321427285671234, "learning_rate": 6.342311886725501e-05, "loss": 1.6935, "step": 18250 }, { "epoch": 0.891162109375, "grad_norm": 0.2066071629524231, "learning_rate": 6.341120391374003e-05, "loss": 1.7044, "step": 18251 }, { "epoch": 0.8912109375, "grad_norm": 0.18059226870536804, "learning_rate": 6.33992940882495e-05, "loss": 1.7115, "step": 18252 }, { "epoch": 0.891259765625, "grad_norm": 0.2133335918188095, "learning_rate": 6.338738939107207e-05, "loss": 1.7347, "step": 18253 }, { "epoch": 0.89130859375, "grad_norm": 0.1912650614976883, "learning_rate": 6.337548982249629e-05, "loss": 1.7008, "step": 18254 }, { "epoch": 0.891357421875, "grad_norm": 0.22082744538784027, "learning_rate": 6.336359538281051e-05, "loss": 1.7386, "step": 18255 }, { "epoch": 0.89140625, "grad_norm": 0.21871529519557953, "learning_rate": 6.335170607230304e-05, "loss": 1.7204, "step": 18256 }, { "epoch": 0.891455078125, "grad_norm": 0.20781484246253967, "learning_rate": 6.333982189126205e-05, "loss": 1.7104, "step": 18257 }, { "epoch": 0.89150390625, "grad_norm": 0.22451281547546387, "learning_rate": 6.332794283997544e-05, "loss": 1.7425, "step": 18258 }, { "epoch": 0.891552734375, "grad_norm": 0.19285959005355835, "learning_rate": 6.331606891873126e-05, "loss": 1.7272, "step": 18259 }, { "epoch": 0.8916015625, "grad_norm": 0.20228341221809387, "learning_rate": 6.330420012781715e-05, "loss": 1.7064, "step": 18260 }, { "epoch": 0.891650390625, "grad_norm": 0.22247177362442017, "learning_rate": 6.329233646752091e-05, "loss": 1.7333, "step": 18261 }, { "epoch": 0.89169921875, "grad_norm": 0.1871512234210968, "learning_rate": 6.328047793812987e-05, "loss": 1.7176, "step": 18262 }, { "epoch": 0.891748046875, "grad_norm": 0.20007207989692688, "learning_rate": 6.326862453993164e-05, "loss": 1.7344, "step": 18263 }, { "epoch": 0.891796875, "grad_norm": 0.18528617918491364, "learning_rate": 6.325677627321333e-05, "loss": 1.733, "step": 18264 }, { "epoch": 0.891845703125, "grad_norm": 0.2225828319787979, "learning_rate": 6.324493313826222e-05, "loss": 1.7394, "step": 18265 }, { "epoch": 0.89189453125, "grad_norm": 0.191288024187088, "learning_rate": 6.323309513536523e-05, "loss": 1.7156, "step": 18266 }, { "epoch": 0.891943359375, "grad_norm": 0.21524344384670258, "learning_rate": 6.322126226480933e-05, "loss": 1.7462, "step": 18267 }, { "epoch": 0.8919921875, "grad_norm": 0.1908317357301712, "learning_rate": 6.320943452688128e-05, "loss": 1.7009, "step": 18268 }, { "epoch": 0.892041015625, "grad_norm": 0.21280241012573242, "learning_rate": 6.319761192186774e-05, "loss": 1.713, "step": 18269 }, { "epoch": 0.89208984375, "grad_norm": 0.18233172595500946, "learning_rate": 6.318579445005522e-05, "loss": 1.7256, "step": 18270 }, { "epoch": 0.892138671875, "grad_norm": 0.19974657893180847, "learning_rate": 6.317398211173014e-05, "loss": 1.7213, "step": 18271 }, { "epoch": 0.8921875, "grad_norm": 0.19843065738677979, "learning_rate": 6.316217490717879e-05, "loss": 1.7115, "step": 18272 }, { "epoch": 0.892236328125, "grad_norm": 0.16676612198352814, "learning_rate": 6.315037283668731e-05, "loss": 1.7234, "step": 18273 }, { "epoch": 0.89228515625, "grad_norm": 0.17500053346157074, "learning_rate": 6.313857590054174e-05, "loss": 1.7383, "step": 18274 }, { "epoch": 0.892333984375, "grad_norm": 0.18137767910957336, "learning_rate": 6.312678409902805e-05, "loss": 1.7295, "step": 18275 }, { "epoch": 0.8923828125, "grad_norm": 0.16844375431537628, "learning_rate": 6.311499743243189e-05, "loss": 1.7382, "step": 18276 }, { "epoch": 0.892431640625, "grad_norm": 0.17072705924510956, "learning_rate": 6.310321590103905e-05, "loss": 1.7138, "step": 18277 }, { "epoch": 0.89248046875, "grad_norm": 0.16956810653209686, "learning_rate": 6.309143950513495e-05, "loss": 1.7158, "step": 18278 }, { "epoch": 0.892529296875, "grad_norm": 0.1871160864830017, "learning_rate": 6.30796682450051e-05, "loss": 1.7256, "step": 18279 }, { "epoch": 0.892578125, "grad_norm": 0.16776715219020844, "learning_rate": 6.306790212093473e-05, "loss": 1.7121, "step": 18280 }, { "epoch": 0.892626953125, "grad_norm": 0.15724024176597595, "learning_rate": 6.305614113320906e-05, "loss": 1.7246, "step": 18281 }, { "epoch": 0.89267578125, "grad_norm": 0.17302608489990234, "learning_rate": 6.304438528211303e-05, "loss": 1.7018, "step": 18282 }, { "epoch": 0.892724609375, "grad_norm": 0.17590323090553284, "learning_rate": 6.303263456793162e-05, "loss": 1.7324, "step": 18283 }, { "epoch": 0.8927734375, "grad_norm": 0.20188014209270477, "learning_rate": 6.30208889909496e-05, "loss": 1.7123, "step": 18284 }, { "epoch": 0.892822265625, "grad_norm": 0.18237346410751343, "learning_rate": 6.300914855145164e-05, "loss": 1.7168, "step": 18285 }, { "epoch": 0.89287109375, "grad_norm": 0.21167807281017303, "learning_rate": 6.299741324972229e-05, "loss": 1.7473, "step": 18286 }, { "epoch": 0.892919921875, "grad_norm": 0.19001038372516632, "learning_rate": 6.298568308604593e-05, "loss": 1.7293, "step": 18287 }, { "epoch": 0.89296875, "grad_norm": 0.18261504173278809, "learning_rate": 6.297395806070688e-05, "loss": 1.6969, "step": 18288 }, { "epoch": 0.893017578125, "grad_norm": 0.20348906517028809, "learning_rate": 6.29622381739893e-05, "loss": 1.7012, "step": 18289 }, { "epoch": 0.89306640625, "grad_norm": 0.1608370691537857, "learning_rate": 6.295052342617724e-05, "loss": 1.7304, "step": 18290 }, { "epoch": 0.893115234375, "grad_norm": 0.215216726064682, "learning_rate": 6.29388138175546e-05, "loss": 1.7159, "step": 18291 }, { "epoch": 0.8931640625, "grad_norm": 0.19980019330978394, "learning_rate": 6.292710934840513e-05, "loss": 1.7244, "step": 18292 }, { "epoch": 0.893212890625, "grad_norm": 0.1960480511188507, "learning_rate": 6.291541001901261e-05, "loss": 1.7191, "step": 18293 }, { "epoch": 0.89326171875, "grad_norm": 0.18892902135849, "learning_rate": 6.290371582966046e-05, "loss": 1.7231, "step": 18294 }, { "epoch": 0.893310546875, "grad_norm": 0.19271178543567657, "learning_rate": 6.289202678063219e-05, "loss": 1.7141, "step": 18295 }, { "epoch": 0.893359375, "grad_norm": 0.17496506869792938, "learning_rate": 6.288034287221102e-05, "loss": 1.7062, "step": 18296 }, { "epoch": 0.893408203125, "grad_norm": 0.1816774308681488, "learning_rate": 6.286866410468022e-05, "loss": 1.708, "step": 18297 }, { "epoch": 0.89345703125, "grad_norm": 0.17815819382667542, "learning_rate": 6.285699047832267e-05, "loss": 1.6991, "step": 18298 }, { "epoch": 0.893505859375, "grad_norm": 0.1673557162284851, "learning_rate": 6.28453219934215e-05, "loss": 1.6968, "step": 18299 }, { "epoch": 0.8935546875, "grad_norm": 0.19751006364822388, "learning_rate": 6.283365865025932e-05, "loss": 1.7197, "step": 18300 }, { "epoch": 0.893603515625, "grad_norm": 0.17160360515117645, "learning_rate": 6.282200044911891e-05, "loss": 1.732, "step": 18301 }, { "epoch": 0.89365234375, "grad_norm": 0.1901138722896576, "learning_rate": 6.281034739028275e-05, "loss": 1.7162, "step": 18302 }, { "epoch": 0.893701171875, "grad_norm": 0.18317212164402008, "learning_rate": 6.279869947403333e-05, "loss": 1.715, "step": 18303 }, { "epoch": 0.89375, "grad_norm": 0.166327565908432, "learning_rate": 6.278705670065289e-05, "loss": 1.6974, "step": 18304 }, { "epoch": 0.893798828125, "grad_norm": 0.18606878817081451, "learning_rate": 6.277541907042362e-05, "loss": 1.7379, "step": 18305 }, { "epoch": 0.89384765625, "grad_norm": 0.18190966546535492, "learning_rate": 6.276378658362756e-05, "loss": 1.7195, "step": 18306 }, { "epoch": 0.893896484375, "grad_norm": 0.18255043029785156, "learning_rate": 6.275215924054665e-05, "loss": 1.6964, "step": 18307 }, { "epoch": 0.8939453125, "grad_norm": 0.18943306803703308, "learning_rate": 6.274053704146268e-05, "loss": 1.724, "step": 18308 }, { "epoch": 0.893994140625, "grad_norm": 0.17680753767490387, "learning_rate": 6.272891998665736e-05, "loss": 1.7309, "step": 18309 }, { "epoch": 0.89404296875, "grad_norm": 0.17689530551433563, "learning_rate": 6.271730807641214e-05, "loss": 1.7361, "step": 18310 }, { "epoch": 0.894091796875, "grad_norm": 0.17082147300243378, "learning_rate": 6.270570131100854e-05, "loss": 1.7435, "step": 18311 }, { "epoch": 0.894140625, "grad_norm": 0.2021143138408661, "learning_rate": 6.269409969072778e-05, "loss": 1.7267, "step": 18312 }, { "epoch": 0.894189453125, "grad_norm": 0.16869951784610748, "learning_rate": 6.268250321585114e-05, "loss": 1.6981, "step": 18313 }, { "epoch": 0.89423828125, "grad_norm": 0.1783602088689804, "learning_rate": 6.267091188665956e-05, "loss": 1.7087, "step": 18314 }, { "epoch": 0.894287109375, "grad_norm": 0.1914423555135727, "learning_rate": 6.265932570343406e-05, "loss": 1.701, "step": 18315 }, { "epoch": 0.8943359375, "grad_norm": 0.1848371922969818, "learning_rate": 6.264774466645537e-05, "loss": 1.7435, "step": 18316 }, { "epoch": 0.894384765625, "grad_norm": 0.20876680314540863, "learning_rate": 6.263616877600417e-05, "loss": 1.6893, "step": 18317 }, { "epoch": 0.89443359375, "grad_norm": 0.20119637250900269, "learning_rate": 6.262459803236105e-05, "loss": 1.7405, "step": 18318 }, { "epoch": 0.894482421875, "grad_norm": 0.17426040768623352, "learning_rate": 6.261303243580642e-05, "loss": 1.7365, "step": 18319 }, { "epoch": 0.89453125, "grad_norm": 0.21439996361732483, "learning_rate": 6.260147198662058e-05, "loss": 1.7236, "step": 18320 }, { "epoch": 0.894580078125, "grad_norm": 0.17427225410938263, "learning_rate": 6.258991668508371e-05, "loss": 1.7353, "step": 18321 }, { "epoch": 0.89462890625, "grad_norm": 0.19226253032684326, "learning_rate": 6.257836653147585e-05, "loss": 1.7283, "step": 18322 }, { "epoch": 0.894677734375, "grad_norm": 0.20971614122390747, "learning_rate": 6.256682152607691e-05, "loss": 1.7509, "step": 18323 }, { "epoch": 0.8947265625, "grad_norm": 0.19728872179985046, "learning_rate": 6.255528166916674e-05, "loss": 1.7231, "step": 18324 }, { "epoch": 0.894775390625, "grad_norm": 0.18792030215263367, "learning_rate": 6.254374696102499e-05, "loss": 1.7049, "step": 18325 }, { "epoch": 0.89482421875, "grad_norm": 0.1928092986345291, "learning_rate": 6.253221740193123e-05, "loss": 1.7246, "step": 18326 }, { "epoch": 0.894873046875, "grad_norm": 0.19961318373680115, "learning_rate": 6.252069299216486e-05, "loss": 1.7244, "step": 18327 }, { "epoch": 0.894921875, "grad_norm": 0.17351281642913818, "learning_rate": 6.250917373200522e-05, "loss": 1.7183, "step": 18328 }, { "epoch": 0.894970703125, "grad_norm": 0.19422456622123718, "learning_rate": 6.249765962173147e-05, "loss": 1.7192, "step": 18329 }, { "epoch": 0.89501953125, "grad_norm": 0.19103561341762543, "learning_rate": 6.248615066162262e-05, "loss": 1.73, "step": 18330 }, { "epoch": 0.895068359375, "grad_norm": 0.1895809769630432, "learning_rate": 6.247464685195771e-05, "loss": 1.7335, "step": 18331 }, { "epoch": 0.8951171875, "grad_norm": 0.21224956214427948, "learning_rate": 6.24631481930154e-05, "loss": 1.7082, "step": 18332 }, { "epoch": 0.895166015625, "grad_norm": 0.2186012715101242, "learning_rate": 6.245165468507452e-05, "loss": 1.7463, "step": 18333 }, { "epoch": 0.89521484375, "grad_norm": 0.20249901711940765, "learning_rate": 6.244016632841352e-05, "loss": 1.7185, "step": 18334 }, { "epoch": 0.895263671875, "grad_norm": 0.19005009531974792, "learning_rate": 6.242868312331083e-05, "loss": 1.7404, "step": 18335 }, { "epoch": 0.8953125, "grad_norm": 0.21381472051143646, "learning_rate": 6.241720507004479e-05, "loss": 1.735, "step": 18336 }, { "epoch": 0.895361328125, "grad_norm": 0.19568726420402527, "learning_rate": 6.240573216889357e-05, "loss": 1.7491, "step": 18337 }, { "epoch": 0.89541015625, "grad_norm": 0.20470477640628815, "learning_rate": 6.239426442013526e-05, "loss": 1.7096, "step": 18338 }, { "epoch": 0.895458984375, "grad_norm": 0.23893243074417114, "learning_rate": 6.238280182404772e-05, "loss": 1.7094, "step": 18339 }, { "epoch": 0.8955078125, "grad_norm": 0.1576741486787796, "learning_rate": 6.23713443809088e-05, "loss": 1.7281, "step": 18340 }, { "epoch": 0.895556640625, "grad_norm": 0.23551706969738007, "learning_rate": 6.235989209099618e-05, "loss": 1.7181, "step": 18341 }, { "epoch": 0.89560546875, "grad_norm": 0.22149376571178436, "learning_rate": 6.234844495458741e-05, "loss": 1.7387, "step": 18342 }, { "epoch": 0.895654296875, "grad_norm": 0.1789098083972931, "learning_rate": 6.233700297195991e-05, "loss": 1.7296, "step": 18343 }, { "epoch": 0.895703125, "grad_norm": 0.22004766762256622, "learning_rate": 6.232556614339101e-05, "loss": 1.716, "step": 18344 }, { "epoch": 0.895751953125, "grad_norm": 0.20091673731803894, "learning_rate": 6.231413446915784e-05, "loss": 1.7279, "step": 18345 }, { "epoch": 0.89580078125, "grad_norm": 0.19743461906909943, "learning_rate": 6.230270794953754e-05, "loss": 1.7196, "step": 18346 }, { "epoch": 0.895849609375, "grad_norm": 0.19290897250175476, "learning_rate": 6.229128658480698e-05, "loss": 1.6929, "step": 18347 }, { "epoch": 0.8958984375, "grad_norm": 0.19886967539787292, "learning_rate": 6.227987037524297e-05, "loss": 1.756, "step": 18348 }, { "epoch": 0.895947265625, "grad_norm": 0.19253166019916534, "learning_rate": 6.22684593211222e-05, "loss": 1.705, "step": 18349 }, { "epoch": 0.89599609375, "grad_norm": 0.19199924170970917, "learning_rate": 6.225705342272124e-05, "loss": 1.7246, "step": 18350 }, { "epoch": 0.896044921875, "grad_norm": 0.19196441769599915, "learning_rate": 6.224565268031649e-05, "loss": 1.7522, "step": 18351 }, { "epoch": 0.89609375, "grad_norm": 0.2134256511926651, "learning_rate": 6.223425709418425e-05, "loss": 1.7249, "step": 18352 }, { "epoch": 0.896142578125, "grad_norm": 0.18532302975654602, "learning_rate": 6.222286666460075e-05, "loss": 1.7211, "step": 18353 }, { "epoch": 0.89619140625, "grad_norm": 0.17588496208190918, "learning_rate": 6.221148139184203e-05, "loss": 1.7278, "step": 18354 }, { "epoch": 0.896240234375, "grad_norm": 0.2042093724012375, "learning_rate": 6.220010127618398e-05, "loss": 1.7265, "step": 18355 }, { "epoch": 0.8962890625, "grad_norm": 0.18407228589057922, "learning_rate": 6.218872631790246e-05, "loss": 1.7193, "step": 18356 }, { "epoch": 0.896337890625, "grad_norm": 0.20286011695861816, "learning_rate": 6.217735651727311e-05, "loss": 1.7401, "step": 18357 }, { "epoch": 0.89638671875, "grad_norm": 0.19443422555923462, "learning_rate": 6.216599187457152e-05, "loss": 1.7352, "step": 18358 }, { "epoch": 0.896435546875, "grad_norm": 0.1700797826051712, "learning_rate": 6.21546323900731e-05, "loss": 1.7503, "step": 18359 }, { "epoch": 0.896484375, "grad_norm": 0.18710899353027344, "learning_rate": 6.214327806405318e-05, "loss": 1.7327, "step": 18360 }, { "epoch": 0.896533203125, "grad_norm": 0.20078805088996887, "learning_rate": 6.21319288967869e-05, "loss": 1.7539, "step": 18361 }, { "epoch": 0.89658203125, "grad_norm": 0.16919811069965363, "learning_rate": 6.212058488854934e-05, "loss": 1.733, "step": 18362 }, { "epoch": 0.896630859375, "grad_norm": 0.18867021799087524, "learning_rate": 6.210924603961544e-05, "loss": 1.7411, "step": 18363 }, { "epoch": 0.8966796875, "grad_norm": 0.16250433027744293, "learning_rate": 6.209791235025998e-05, "loss": 1.7082, "step": 18364 }, { "epoch": 0.896728515625, "grad_norm": 0.17565789818763733, "learning_rate": 6.208658382075769e-05, "loss": 1.7259, "step": 18365 }, { "epoch": 0.89677734375, "grad_norm": 0.17072419822216034, "learning_rate": 6.207526045138311e-05, "loss": 1.7376, "step": 18366 }, { "epoch": 0.896826171875, "grad_norm": 0.1740790605545044, "learning_rate": 6.206394224241059e-05, "loss": 1.7171, "step": 18367 }, { "epoch": 0.896875, "grad_norm": 0.19744080305099487, "learning_rate": 6.205262919411456e-05, "loss": 1.7447, "step": 18368 }, { "epoch": 0.896923828125, "grad_norm": 0.16424089670181274, "learning_rate": 6.204132130676911e-05, "loss": 1.7311, "step": 18369 }, { "epoch": 0.89697265625, "grad_norm": 0.18115650117397308, "learning_rate": 6.203001858064832e-05, "loss": 1.7226, "step": 18370 }, { "epoch": 0.897021484375, "grad_norm": 0.19435866177082062, "learning_rate": 6.201872101602616e-05, "loss": 1.7431, "step": 18371 }, { "epoch": 0.8970703125, "grad_norm": 0.1779332458972931, "learning_rate": 6.200742861317638e-05, "loss": 1.7014, "step": 18372 }, { "epoch": 0.897119140625, "grad_norm": 0.2031826227903366, "learning_rate": 6.199614137237269e-05, "loss": 1.6986, "step": 18373 }, { "epoch": 0.89716796875, "grad_norm": 0.1910219043493271, "learning_rate": 6.198485929388862e-05, "loss": 1.7434, "step": 18374 }, { "epoch": 0.897216796875, "grad_norm": 0.18514497578144073, "learning_rate": 6.197358237799763e-05, "loss": 1.7411, "step": 18375 }, { "epoch": 0.897265625, "grad_norm": 0.2012154906988144, "learning_rate": 6.196231062497302e-05, "loss": 1.7171, "step": 18376 }, { "epoch": 0.897314453125, "grad_norm": 0.18344999849796295, "learning_rate": 6.195104403508798e-05, "loss": 1.717, "step": 18377 }, { "epoch": 0.89736328125, "grad_norm": 0.1738726645708084, "learning_rate": 6.193978260861554e-05, "loss": 1.7097, "step": 18378 }, { "epoch": 0.897412109375, "grad_norm": 0.19274799525737762, "learning_rate": 6.192852634582862e-05, "loss": 1.708, "step": 18379 }, { "epoch": 0.8974609375, "grad_norm": 0.21220092475414276, "learning_rate": 6.191727524700005e-05, "loss": 1.7407, "step": 18380 }, { "epoch": 0.897509765625, "grad_norm": 0.15755687654018402, "learning_rate": 6.190602931240252e-05, "loss": 1.7326, "step": 18381 }, { "epoch": 0.89755859375, "grad_norm": 0.181696817278862, "learning_rate": 6.189478854230856e-05, "loss": 1.7354, "step": 18382 }, { "epoch": 0.897607421875, "grad_norm": 0.19552306830883026, "learning_rate": 6.18835529369906e-05, "loss": 1.7014, "step": 18383 }, { "epoch": 0.89765625, "grad_norm": 0.16294535994529724, "learning_rate": 6.1872322496721e-05, "loss": 1.7055, "step": 18384 }, { "epoch": 0.897705078125, "grad_norm": 0.17723006010055542, "learning_rate": 6.18610972217718e-05, "loss": 1.7229, "step": 18385 }, { "epoch": 0.89775390625, "grad_norm": 0.18136708438396454, "learning_rate": 6.184987711241525e-05, "loss": 1.7095, "step": 18386 }, { "epoch": 0.897802734375, "grad_norm": 0.17175205051898956, "learning_rate": 6.183866216892306e-05, "loss": 1.7307, "step": 18387 }, { "epoch": 0.8978515625, "grad_norm": 0.17377229034900665, "learning_rate": 6.182745239156723e-05, "loss": 1.7347, "step": 18388 }, { "epoch": 0.897900390625, "grad_norm": 0.16773931682109833, "learning_rate": 6.181624778061932e-05, "loss": 1.7267, "step": 18389 }, { "epoch": 0.89794921875, "grad_norm": 0.1685056984424591, "learning_rate": 6.18050483363509e-05, "loss": 1.7384, "step": 18390 }, { "epoch": 0.897998046875, "grad_norm": 0.18069134652614594, "learning_rate": 6.179385405903342e-05, "loss": 1.6967, "step": 18391 }, { "epoch": 0.898046875, "grad_norm": 0.1696562021970749, "learning_rate": 6.178266494893818e-05, "loss": 1.7316, "step": 18392 }, { "epoch": 0.898095703125, "grad_norm": 0.16520166397094727, "learning_rate": 6.177148100633637e-05, "loss": 1.6939, "step": 18393 }, { "epoch": 0.89814453125, "grad_norm": 0.1711091250181198, "learning_rate": 6.176030223149899e-05, "loss": 1.7047, "step": 18394 }, { "epoch": 0.898193359375, "grad_norm": 0.16899091005325317, "learning_rate": 6.1749128624697e-05, "loss": 1.6989, "step": 18395 }, { "epoch": 0.8982421875, "grad_norm": 0.17906083166599274, "learning_rate": 6.173796018620123e-05, "loss": 1.7192, "step": 18396 }, { "epoch": 0.898291015625, "grad_norm": 0.18359345197677612, "learning_rate": 6.172679691628232e-05, "loss": 1.711, "step": 18397 }, { "epoch": 0.89833984375, "grad_norm": 0.16441085934638977, "learning_rate": 6.171563881521081e-05, "loss": 1.7321, "step": 18398 }, { "epoch": 0.898388671875, "grad_norm": 0.19308951497077942, "learning_rate": 6.170448588325716e-05, "loss": 1.7155, "step": 18399 }, { "epoch": 0.8984375, "grad_norm": 0.17399825155735016, "learning_rate": 6.169333812069169e-05, "loss": 1.7325, "step": 18400 }, { "epoch": 0.898486328125, "grad_norm": 0.19181674718856812, "learning_rate": 6.168219552778448e-05, "loss": 1.753, "step": 18401 }, { "epoch": 0.89853515625, "grad_norm": 0.17987415194511414, "learning_rate": 6.167105810480569e-05, "loss": 1.71, "step": 18402 }, { "epoch": 0.898583984375, "grad_norm": 0.17895714938640594, "learning_rate": 6.165992585202512e-05, "loss": 1.7281, "step": 18403 }, { "epoch": 0.8986328125, "grad_norm": 0.198751762509346, "learning_rate": 6.164879876971272e-05, "loss": 1.7223, "step": 18404 }, { "epoch": 0.898681640625, "grad_norm": 0.16205543279647827, "learning_rate": 6.163767685813804e-05, "loss": 1.7155, "step": 18405 }, { "epoch": 0.89873046875, "grad_norm": 0.16928349435329437, "learning_rate": 6.16265601175707e-05, "loss": 1.749, "step": 18406 }, { "epoch": 0.898779296875, "grad_norm": 0.15388824045658112, "learning_rate": 6.161544854828005e-05, "loss": 1.6902, "step": 18407 }, { "epoch": 0.898828125, "grad_norm": 0.1727152019739151, "learning_rate": 6.160434215053551e-05, "loss": 1.7072, "step": 18408 }, { "epoch": 0.898876953125, "grad_norm": 0.16298748552799225, "learning_rate": 6.159324092460616e-05, "loss": 1.7333, "step": 18409 }, { "epoch": 0.89892578125, "grad_norm": 0.2080477625131607, "learning_rate": 6.158214487076103e-05, "loss": 1.7293, "step": 18410 }, { "epoch": 0.898974609375, "grad_norm": 0.1849224865436554, "learning_rate": 6.15710539892691e-05, "loss": 1.7308, "step": 18411 }, { "epoch": 0.8990234375, "grad_norm": 0.18569466471672058, "learning_rate": 6.155996828039912e-05, "loss": 1.7166, "step": 18412 }, { "epoch": 0.899072265625, "grad_norm": 0.2083674818277359, "learning_rate": 6.154888774441982e-05, "loss": 1.7513, "step": 18413 }, { "epoch": 0.89912109375, "grad_norm": 0.20191597938537598, "learning_rate": 6.153781238159968e-05, "loss": 1.7284, "step": 18414 }, { "epoch": 0.899169921875, "grad_norm": 0.18774431943893433, "learning_rate": 6.152674219220717e-05, "loss": 1.7063, "step": 18415 }, { "epoch": 0.89921875, "grad_norm": 0.1978060007095337, "learning_rate": 6.151567717651057e-05, "loss": 1.7267, "step": 18416 }, { "epoch": 0.899267578125, "grad_norm": 0.19563257694244385, "learning_rate": 6.150461733477802e-05, "loss": 1.7029, "step": 18417 }, { "epoch": 0.89931640625, "grad_norm": 0.1817401647567749, "learning_rate": 6.149356266727763e-05, "loss": 1.7274, "step": 18418 }, { "epoch": 0.899365234375, "grad_norm": 0.18430909514427185, "learning_rate": 6.148251317427723e-05, "loss": 1.706, "step": 18419 }, { "epoch": 0.8994140625, "grad_norm": 0.18245169520378113, "learning_rate": 6.147146885604471e-05, "loss": 1.6947, "step": 18420 }, { "epoch": 0.899462890625, "grad_norm": 0.19639255106449127, "learning_rate": 6.146042971284762e-05, "loss": 1.7119, "step": 18421 }, { "epoch": 0.89951171875, "grad_norm": 0.20507746934890747, "learning_rate": 6.144939574495365e-05, "loss": 1.714, "step": 18422 }, { "epoch": 0.899560546875, "grad_norm": 0.17991618812084198, "learning_rate": 6.143836695263008e-05, "loss": 1.7106, "step": 18423 }, { "epoch": 0.899609375, "grad_norm": 0.2308264523744583, "learning_rate": 6.142734333614431e-05, "loss": 1.7179, "step": 18424 }, { "epoch": 0.899658203125, "grad_norm": 0.1694404035806656, "learning_rate": 6.141632489576342e-05, "loss": 1.7155, "step": 18425 }, { "epoch": 0.89970703125, "grad_norm": 0.2299022525548935, "learning_rate": 6.140531163175451e-05, "loss": 1.7088, "step": 18426 }, { "epoch": 0.899755859375, "grad_norm": 0.23684953153133392, "learning_rate": 6.139430354438445e-05, "loss": 1.7044, "step": 18427 }, { "epoch": 0.8998046875, "grad_norm": 0.20297829806804657, "learning_rate": 6.138330063392007e-05, "loss": 1.72, "step": 18428 }, { "epoch": 0.899853515625, "grad_norm": 0.24556533992290497, "learning_rate": 6.1372302900628e-05, "loss": 1.697, "step": 18429 }, { "epoch": 0.89990234375, "grad_norm": 0.19882456958293915, "learning_rate": 6.136131034477482e-05, "loss": 1.7269, "step": 18430 }, { "epoch": 0.899951171875, "grad_norm": 0.21456459164619446, "learning_rate": 6.135032296662689e-05, "loss": 1.7296, "step": 18431 }, { "epoch": 0.9, "grad_norm": 0.22196635603904724, "learning_rate": 6.133934076645056e-05, "loss": 1.7428, "step": 18432 }, { "epoch": 0.900048828125, "grad_norm": 0.1659580022096634, "learning_rate": 6.132836374451193e-05, "loss": 1.7076, "step": 18433 }, { "epoch": 0.90009765625, "grad_norm": 0.24053610861301422, "learning_rate": 6.131739190107711e-05, "loss": 1.7376, "step": 18434 }, { "epoch": 0.900146484375, "grad_norm": 0.20148512721061707, "learning_rate": 6.130642523641192e-05, "loss": 1.7163, "step": 18435 }, { "epoch": 0.9001953125, "grad_norm": 0.18732380867004395, "learning_rate": 6.129546375078224e-05, "loss": 1.7265, "step": 18436 }, { "epoch": 0.900244140625, "grad_norm": 0.2236081212759018, "learning_rate": 6.128450744445362e-05, "loss": 1.7131, "step": 18437 }, { "epoch": 0.90029296875, "grad_norm": 0.19778801500797272, "learning_rate": 6.127355631769172e-05, "loss": 1.7196, "step": 18438 }, { "epoch": 0.900341796875, "grad_norm": 0.2101156860589981, "learning_rate": 6.126261037076182e-05, "loss": 1.7196, "step": 18439 }, { "epoch": 0.900390625, "grad_norm": 0.2066008448600769, "learning_rate": 6.125166960392935e-05, "loss": 1.7375, "step": 18440 }, { "epoch": 0.900439453125, "grad_norm": 0.20692722499370575, "learning_rate": 6.124073401745934e-05, "loss": 1.7137, "step": 18441 }, { "epoch": 0.90048828125, "grad_norm": 0.19848085939884186, "learning_rate": 6.12298036116169e-05, "loss": 1.7107, "step": 18442 }, { "epoch": 0.900537109375, "grad_norm": 0.20937617123126984, "learning_rate": 6.12188783866669e-05, "loss": 1.7587, "step": 18443 }, { "epoch": 0.9005859375, "grad_norm": 0.1893872618675232, "learning_rate": 6.120795834287415e-05, "loss": 1.7192, "step": 18444 }, { "epoch": 0.900634765625, "grad_norm": 0.1957191824913025, "learning_rate": 6.119704348050326e-05, "loss": 1.769, "step": 18445 }, { "epoch": 0.90068359375, "grad_norm": 0.19566702842712402, "learning_rate": 6.118613379981882e-05, "loss": 1.7291, "step": 18446 }, { "epoch": 0.900732421875, "grad_norm": 0.19140110909938812, "learning_rate": 6.117522930108517e-05, "loss": 1.7356, "step": 18447 }, { "epoch": 0.90078125, "grad_norm": 0.20891425013542175, "learning_rate": 6.116432998456665e-05, "loss": 1.747, "step": 18448 }, { "epoch": 0.900830078125, "grad_norm": 0.19845786690711975, "learning_rate": 6.115343585052737e-05, "loss": 1.7389, "step": 18449 }, { "epoch": 0.90087890625, "grad_norm": 0.19259807467460632, "learning_rate": 6.114254689923135e-05, "loss": 1.7236, "step": 18450 }, { "epoch": 0.900927734375, "grad_norm": 0.20308883488178253, "learning_rate": 6.113166313094257e-05, "loss": 1.7207, "step": 18451 }, { "epoch": 0.9009765625, "grad_norm": 0.22199992835521698, "learning_rate": 6.112078454592476e-05, "loss": 1.7359, "step": 18452 }, { "epoch": 0.901025390625, "grad_norm": 0.1635744273662567, "learning_rate": 6.110991114444151e-05, "loss": 1.7114, "step": 18453 }, { "epoch": 0.90107421875, "grad_norm": 0.20391833782196045, "learning_rate": 6.109904292675645e-05, "loss": 1.6985, "step": 18454 }, { "epoch": 0.901123046875, "grad_norm": 0.21688495576381683, "learning_rate": 6.108817989313287e-05, "loss": 1.7378, "step": 18455 }, { "epoch": 0.901171875, "grad_norm": 0.18393845856189728, "learning_rate": 6.107732204383417e-05, "loss": 1.7127, "step": 18456 }, { "epoch": 0.901220703125, "grad_norm": 0.1989585906267166, "learning_rate": 6.106646937912338e-05, "loss": 1.7544, "step": 18457 }, { "epoch": 0.90126953125, "grad_norm": 0.18735890090465546, "learning_rate": 6.105562189926362e-05, "loss": 1.7243, "step": 18458 }, { "epoch": 0.901318359375, "grad_norm": 0.18409842252731323, "learning_rate": 6.104477960451774e-05, "loss": 1.7144, "step": 18459 }, { "epoch": 0.9013671875, "grad_norm": 0.2110367864370346, "learning_rate": 6.103394249514849e-05, "loss": 1.7398, "step": 18460 }, { "epoch": 0.901416015625, "grad_norm": 0.20505043864250183, "learning_rate": 6.102311057141857e-05, "loss": 1.7404, "step": 18461 }, { "epoch": 0.90146484375, "grad_norm": 0.209311842918396, "learning_rate": 6.1012283833590465e-05, "loss": 1.7314, "step": 18462 }, { "epoch": 0.901513671875, "grad_norm": 0.2342492789030075, "learning_rate": 6.100146228192657e-05, "loss": 1.6952, "step": 18463 }, { "epoch": 0.9015625, "grad_norm": 0.17621055245399475, "learning_rate": 6.0990645916689144e-05, "loss": 1.7083, "step": 18464 }, { "epoch": 0.901611328125, "grad_norm": 0.23409403860569, "learning_rate": 6.097983473814037e-05, "loss": 1.7311, "step": 18465 }, { "epoch": 0.90166015625, "grad_norm": 0.17906567454338074, "learning_rate": 6.096902874654224e-05, "loss": 1.7171, "step": 18466 }, { "epoch": 0.901708984375, "grad_norm": 0.16384685039520264, "learning_rate": 6.095822794215665e-05, "loss": 1.737, "step": 18467 }, { "epoch": 0.9017578125, "grad_norm": 0.18602485954761505, "learning_rate": 6.0947432325245405e-05, "loss": 1.7015, "step": 18468 }, { "epoch": 0.901806640625, "grad_norm": 0.19818004965782166, "learning_rate": 6.093664189607002e-05, "loss": 1.7288, "step": 18469 }, { "epoch": 0.90185546875, "grad_norm": 0.19951914250850677, "learning_rate": 6.092585665489218e-05, "loss": 1.745, "step": 18470 }, { "epoch": 0.901904296875, "grad_norm": 0.18976181745529175, "learning_rate": 6.091507660197311e-05, "loss": 1.7304, "step": 18471 }, { "epoch": 0.901953125, "grad_norm": 0.1994553953409195, "learning_rate": 6.090430173757422e-05, "loss": 1.7106, "step": 18472 }, { "epoch": 0.902001953125, "grad_norm": 0.20296108722686768, "learning_rate": 6.089353206195652e-05, "loss": 1.7151, "step": 18473 }, { "epoch": 0.90205078125, "grad_norm": 0.16879890859127045, "learning_rate": 6.0882767575381155e-05, "loss": 1.7182, "step": 18474 }, { "epoch": 0.902099609375, "grad_norm": 0.17993378639221191, "learning_rate": 6.087200827810886e-05, "loss": 1.7165, "step": 18475 }, { "epoch": 0.9021484375, "grad_norm": 0.1933520883321762, "learning_rate": 6.086125417040054e-05, "loss": 1.7359, "step": 18476 }, { "epoch": 0.902197265625, "grad_norm": 0.17882443964481354, "learning_rate": 6.085050525251671e-05, "loss": 1.7217, "step": 18477 }, { "epoch": 0.90224609375, "grad_norm": 0.1830960214138031, "learning_rate": 6.083976152471798e-05, "loss": 1.7209, "step": 18478 }, { "epoch": 0.902294921875, "grad_norm": 0.18713998794555664, "learning_rate": 6.082902298726464e-05, "loss": 1.7126, "step": 18479 }, { "epoch": 0.90234375, "grad_norm": 0.21059022843837738, "learning_rate": 6.081828964041701e-05, "loss": 1.7187, "step": 18480 }, { "epoch": 0.902392578125, "grad_norm": 0.17628750205039978, "learning_rate": 6.08075614844352e-05, "loss": 1.729, "step": 18481 }, { "epoch": 0.90244140625, "grad_norm": 0.1817895621061325, "learning_rate": 6.07968385195792e-05, "loss": 1.7047, "step": 18482 }, { "epoch": 0.902490234375, "grad_norm": 0.1768413931131363, "learning_rate": 6.078612074610896e-05, "loss": 1.7047, "step": 18483 }, { "epoch": 0.9025390625, "grad_norm": 0.18847712874412537, "learning_rate": 6.077540816428417e-05, "loss": 1.7175, "step": 18484 }, { "epoch": 0.902587890625, "grad_norm": 0.1890711933374405, "learning_rate": 6.076470077436441e-05, "loss": 1.7373, "step": 18485 }, { "epoch": 0.90263671875, "grad_norm": 0.1618199199438095, "learning_rate": 6.0753998576609306e-05, "loss": 1.7184, "step": 18486 }, { "epoch": 0.902685546875, "grad_norm": 0.18057489395141602, "learning_rate": 6.074330157127811e-05, "loss": 1.7094, "step": 18487 }, { "epoch": 0.902734375, "grad_norm": 0.17843353748321533, "learning_rate": 6.0732609758630205e-05, "loss": 1.7418, "step": 18488 }, { "epoch": 0.902783203125, "grad_norm": 0.16675424575805664, "learning_rate": 6.072192313892457e-05, "loss": 1.722, "step": 18489 }, { "epoch": 0.90283203125, "grad_norm": 0.17850011587142944, "learning_rate": 6.071124171242033e-05, "loss": 1.6912, "step": 18490 }, { "epoch": 0.902880859375, "grad_norm": 0.1712804138660431, "learning_rate": 6.070056547937626e-05, "loss": 1.7088, "step": 18491 }, { "epoch": 0.9029296875, "grad_norm": 0.1775740087032318, "learning_rate": 6.0689894440051214e-05, "loss": 1.7277, "step": 18492 }, { "epoch": 0.902978515625, "grad_norm": 0.16675414144992828, "learning_rate": 6.067922859470371e-05, "loss": 1.7308, "step": 18493 }, { "epoch": 0.90302734375, "grad_norm": 0.20580904185771942, "learning_rate": 6.0668567943592284e-05, "loss": 1.727, "step": 18494 }, { "epoch": 0.903076171875, "grad_norm": 0.18431004881858826, "learning_rate": 6.06579124869753e-05, "loss": 1.6774, "step": 18495 }, { "epoch": 0.903125, "grad_norm": 0.19498687982559204, "learning_rate": 6.0647262225111034e-05, "loss": 1.7408, "step": 18496 }, { "epoch": 0.903173828125, "grad_norm": 0.17643480002880096, "learning_rate": 6.0636617158257564e-05, "loss": 1.7144, "step": 18497 }, { "epoch": 0.90322265625, "grad_norm": 0.20296378433704376, "learning_rate": 6.0625977286672876e-05, "loss": 1.7158, "step": 18498 }, { "epoch": 0.903271484375, "grad_norm": 0.17230890691280365, "learning_rate": 6.061534261061488e-05, "loss": 1.7283, "step": 18499 }, { "epoch": 0.9033203125, "grad_norm": 0.16782739758491516, "learning_rate": 6.060471313034128e-05, "loss": 1.7206, "step": 18500 }, { "epoch": 0.903369140625, "grad_norm": 0.18587292730808258, "learning_rate": 6.0594088846109705e-05, "loss": 1.7299, "step": 18501 }, { "epoch": 0.90341796875, "grad_norm": 0.17580145597457886, "learning_rate": 6.058346975817765e-05, "loss": 1.7177, "step": 18502 }, { "epoch": 0.903466796875, "grad_norm": 0.17223626375198364, "learning_rate": 6.0572855866802426e-05, "loss": 1.7131, "step": 18503 }, { "epoch": 0.903515625, "grad_norm": 0.19076606631278992, "learning_rate": 6.0562247172241374e-05, "loss": 1.74, "step": 18504 }, { "epoch": 0.903564453125, "grad_norm": 0.1830274760723114, "learning_rate": 6.0551643674751456e-05, "loss": 1.7171, "step": 18505 }, { "epoch": 0.90361328125, "grad_norm": 0.16579172015190125, "learning_rate": 6.054104537458983e-05, "loss": 1.7199, "step": 18506 }, { "epoch": 0.903662109375, "grad_norm": 0.1624152660369873, "learning_rate": 6.053045227201318e-05, "loss": 1.7151, "step": 18507 }, { "epoch": 0.9037109375, "grad_norm": 0.18082183599472046, "learning_rate": 6.0519864367278397e-05, "loss": 1.7161, "step": 18508 }, { "epoch": 0.903759765625, "grad_norm": 0.1614193320274353, "learning_rate": 6.0509281660641946e-05, "loss": 1.7417, "step": 18509 }, { "epoch": 0.90380859375, "grad_norm": 0.18102765083312988, "learning_rate": 6.049870415236043e-05, "loss": 1.6816, "step": 18510 }, { "epoch": 0.903857421875, "grad_norm": 0.19125518202781677, "learning_rate": 6.048813184269011e-05, "loss": 1.7144, "step": 18511 }, { "epoch": 0.90390625, "grad_norm": 0.18945103883743286, "learning_rate": 6.0477564731887275e-05, "loss": 1.7237, "step": 18512 }, { "epoch": 0.903955078125, "grad_norm": 0.176550954580307, "learning_rate": 6.046700282020797e-05, "loss": 1.7486, "step": 18513 }, { "epoch": 0.90400390625, "grad_norm": 0.19051343202590942, "learning_rate": 6.045644610790825e-05, "loss": 1.7129, "step": 18514 }, { "epoch": 0.904052734375, "grad_norm": 0.20084525644779205, "learning_rate": 6.04458945952439e-05, "loss": 1.709, "step": 18515 }, { "epoch": 0.9041015625, "grad_norm": 0.16544462740421295, "learning_rate": 6.043534828247066e-05, "loss": 1.6836, "step": 18516 }, { "epoch": 0.904150390625, "grad_norm": 0.1875123679637909, "learning_rate": 6.0424807169844137e-05, "loss": 1.7323, "step": 18517 }, { "epoch": 0.90419921875, "grad_norm": 0.16643118858337402, "learning_rate": 6.041427125761982e-05, "loss": 1.7207, "step": 18518 }, { "epoch": 0.904248046875, "grad_norm": 0.18167726695537567, "learning_rate": 6.040374054605299e-05, "loss": 1.6963, "step": 18519 }, { "epoch": 0.904296875, "grad_norm": 0.16481830179691315, "learning_rate": 6.039321503539898e-05, "loss": 1.7191, "step": 18520 }, { "epoch": 0.904345703125, "grad_norm": 0.20739683508872986, "learning_rate": 6.038269472591275e-05, "loss": 1.6911, "step": 18521 }, { "epoch": 0.90439453125, "grad_norm": 0.1733318567276001, "learning_rate": 6.037217961784937e-05, "loss": 1.7094, "step": 18522 }, { "epoch": 0.904443359375, "grad_norm": 0.19629240036010742, "learning_rate": 6.036166971146362e-05, "loss": 1.7077, "step": 18523 }, { "epoch": 0.9044921875, "grad_norm": 0.1941371113061905, "learning_rate": 6.03511650070103e-05, "loss": 1.7414, "step": 18524 }, { "epoch": 0.904541015625, "grad_norm": 0.18481098115444183, "learning_rate": 6.034066550474386e-05, "loss": 1.7292, "step": 18525 }, { "epoch": 0.90458984375, "grad_norm": 0.1906365156173706, "learning_rate": 6.033017120491893e-05, "loss": 1.6969, "step": 18526 }, { "epoch": 0.904638671875, "grad_norm": 0.1854468435049057, "learning_rate": 6.0319682107789735e-05, "loss": 1.7181, "step": 18527 }, { "epoch": 0.9046875, "grad_norm": 0.1787605583667755, "learning_rate": 6.030919821361049e-05, "loss": 1.7549, "step": 18528 }, { "epoch": 0.904736328125, "grad_norm": 0.18829011917114258, "learning_rate": 6.0298719522635345e-05, "loss": 1.7128, "step": 18529 }, { "epoch": 0.90478515625, "grad_norm": 0.20737464725971222, "learning_rate": 6.0288246035118185e-05, "loss": 1.7245, "step": 18530 }, { "epoch": 0.904833984375, "grad_norm": 0.16661521792411804, "learning_rate": 6.0277777751312916e-05, "loss": 1.7223, "step": 18531 }, { "epoch": 0.9048828125, "grad_norm": 0.21517953276634216, "learning_rate": 6.0267314671473175e-05, "loss": 1.712, "step": 18532 }, { "epoch": 0.904931640625, "grad_norm": 0.1741795390844345, "learning_rate": 6.025685679585259e-05, "loss": 1.7038, "step": 18533 }, { "epoch": 0.90498046875, "grad_norm": 0.18535612523555756, "learning_rate": 6.0246404124704606e-05, "loss": 1.7125, "step": 18534 }, { "epoch": 0.905029296875, "grad_norm": 0.1920461803674698, "learning_rate": 6.023595665828255e-05, "loss": 1.7351, "step": 18535 }, { "epoch": 0.905078125, "grad_norm": 0.20054768025875092, "learning_rate": 6.0225514396839665e-05, "loss": 1.7327, "step": 18536 }, { "epoch": 0.905126953125, "grad_norm": 0.17490771412849426, "learning_rate": 6.021507734062891e-05, "loss": 1.7495, "step": 18537 }, { "epoch": 0.90517578125, "grad_norm": 0.18719640374183655, "learning_rate": 6.0204645489903385e-05, "loss": 1.713, "step": 18538 }, { "epoch": 0.905224609375, "grad_norm": 0.2079559862613678, "learning_rate": 6.0194218844915754e-05, "loss": 1.6977, "step": 18539 }, { "epoch": 0.9052734375, "grad_norm": 0.15932676196098328, "learning_rate": 6.0183797405918886e-05, "loss": 1.7112, "step": 18540 }, { "epoch": 0.905322265625, "grad_norm": 0.1905774474143982, "learning_rate": 6.017338117316521e-05, "loss": 1.7043, "step": 18541 }, { "epoch": 0.90537109375, "grad_norm": 0.17430737614631653, "learning_rate": 6.016297014690729e-05, "loss": 1.7268, "step": 18542 }, { "epoch": 0.905419921875, "grad_norm": 0.17011937499046326, "learning_rate": 6.015256432739735e-05, "loss": 1.74, "step": 18543 }, { "epoch": 0.90546875, "grad_norm": 0.19420865178108215, "learning_rate": 6.014216371488762e-05, "loss": 1.7253, "step": 18544 }, { "epoch": 0.905517578125, "grad_norm": 0.17793910205364227, "learning_rate": 6.0131768309630175e-05, "loss": 1.731, "step": 18545 }, { "epoch": 0.90556640625, "grad_norm": 0.193207249045372, "learning_rate": 6.012137811187694e-05, "loss": 1.7316, "step": 18546 }, { "epoch": 0.905615234375, "grad_norm": 0.18496906757354736, "learning_rate": 6.011099312187974e-05, "loss": 1.7014, "step": 18547 }, { "epoch": 0.9056640625, "grad_norm": 0.17323076725006104, "learning_rate": 6.0100613339890274e-05, "loss": 1.7216, "step": 18548 }, { "epoch": 0.905712890625, "grad_norm": 0.18918995559215546, "learning_rate": 6.009023876616008e-05, "loss": 1.7343, "step": 18549 }, { "epoch": 0.90576171875, "grad_norm": 0.18239887058734894, "learning_rate": 6.00798694009406e-05, "loss": 1.7431, "step": 18550 }, { "epoch": 0.905810546875, "grad_norm": 0.17180904746055603, "learning_rate": 6.006950524448317e-05, "loss": 1.7168, "step": 18551 }, { "epoch": 0.905859375, "grad_norm": 0.20478713512420654, "learning_rate": 6.0059146297038966e-05, "loss": 1.7216, "step": 18552 }, { "epoch": 0.905908203125, "grad_norm": 0.18056245148181915, "learning_rate": 6.004879255885898e-05, "loss": 1.7649, "step": 18553 }, { "epoch": 0.90595703125, "grad_norm": 0.18791382014751434, "learning_rate": 6.0038444030194256e-05, "loss": 1.7403, "step": 18554 }, { "epoch": 0.906005859375, "grad_norm": 0.1945670247077942, "learning_rate": 6.0028100711295486e-05, "loss": 1.7583, "step": 18555 }, { "epoch": 0.9060546875, "grad_norm": 0.18992997705936432, "learning_rate": 6.0017762602413446e-05, "loss": 1.7166, "step": 18556 }, { "epoch": 0.906103515625, "grad_norm": 0.1838054209947586, "learning_rate": 6.00074297037986e-05, "loss": 1.7112, "step": 18557 }, { "epoch": 0.90615234375, "grad_norm": 0.17764106392860413, "learning_rate": 5.9997102015701484e-05, "loss": 1.7045, "step": 18558 }, { "epoch": 0.906201171875, "grad_norm": 0.2319062501192093, "learning_rate": 5.998677953837225e-05, "loss": 1.717, "step": 18559 }, { "epoch": 0.90625, "grad_norm": 0.18761661648750305, "learning_rate": 5.9976462272061216e-05, "loss": 1.681, "step": 18560 }, { "epoch": 0.906298828125, "grad_norm": 0.2014342099428177, "learning_rate": 5.9966150217018344e-05, "loss": 1.704, "step": 18561 }, { "epoch": 0.90634765625, "grad_norm": 0.19780667126178741, "learning_rate": 5.995584337349359e-05, "loss": 1.702, "step": 18562 }, { "epoch": 0.906396484375, "grad_norm": 0.19442348182201385, "learning_rate": 5.9945541741736726e-05, "loss": 1.7112, "step": 18563 }, { "epoch": 0.9064453125, "grad_norm": 0.20369981229305267, "learning_rate": 5.993524532199743e-05, "loss": 1.7026, "step": 18564 }, { "epoch": 0.906494140625, "grad_norm": 0.19010604918003082, "learning_rate": 5.992495411452523e-05, "loss": 1.7206, "step": 18565 }, { "epoch": 0.90654296875, "grad_norm": 0.1885276436805725, "learning_rate": 5.991466811956957e-05, "loss": 1.731, "step": 18566 }, { "epoch": 0.906591796875, "grad_norm": 0.18560580909252167, "learning_rate": 5.9904387337379735e-05, "loss": 1.7336, "step": 18567 }, { "epoch": 0.906640625, "grad_norm": 0.16496896743774414, "learning_rate": 5.989411176820489e-05, "loss": 1.7404, "step": 18568 }, { "epoch": 0.906689453125, "grad_norm": 0.2029796987771988, "learning_rate": 5.988384141229404e-05, "loss": 1.7205, "step": 18569 }, { "epoch": 0.90673828125, "grad_norm": 0.17554785311222076, "learning_rate": 5.987357626989614e-05, "loss": 1.7171, "step": 18570 }, { "epoch": 0.906787109375, "grad_norm": 0.1849539577960968, "learning_rate": 5.986331634125991e-05, "loss": 1.7236, "step": 18571 }, { "epoch": 0.9068359375, "grad_norm": 0.18885688483715057, "learning_rate": 5.985306162663408e-05, "loss": 1.7129, "step": 18572 }, { "epoch": 0.906884765625, "grad_norm": 0.1723763793706894, "learning_rate": 5.9842812126267115e-05, "loss": 1.721, "step": 18573 }, { "epoch": 0.90693359375, "grad_norm": 0.196063831448555, "learning_rate": 5.9832567840407486e-05, "loss": 1.7164, "step": 18574 }, { "epoch": 0.906982421875, "grad_norm": 0.16880092024803162, "learning_rate": 5.9822328769303405e-05, "loss": 1.7156, "step": 18575 }, { "epoch": 0.90703125, "grad_norm": 0.16004246473312378, "learning_rate": 5.981209491320311e-05, "loss": 1.7077, "step": 18576 }, { "epoch": 0.907080078125, "grad_norm": 0.18718639016151428, "learning_rate": 5.980186627235454e-05, "loss": 1.6948, "step": 18577 }, { "epoch": 0.90712890625, "grad_norm": 0.15893970429897308, "learning_rate": 5.979164284700565e-05, "loss": 1.7306, "step": 18578 }, { "epoch": 0.907177734375, "grad_norm": 0.18913571536540985, "learning_rate": 5.9781424637404174e-05, "loss": 1.7362, "step": 18579 }, { "epoch": 0.9072265625, "grad_norm": 0.1857011467218399, "learning_rate": 5.977121164379778e-05, "loss": 1.719, "step": 18580 }, { "epoch": 0.907275390625, "grad_norm": 0.1915217787027359, "learning_rate": 5.9761003866433973e-05, "loss": 1.7276, "step": 18581 }, { "epoch": 0.90732421875, "grad_norm": 0.2190561592578888, "learning_rate": 5.975080130556016e-05, "loss": 1.7075, "step": 18582 }, { "epoch": 0.907373046875, "grad_norm": 0.17697283625602722, "learning_rate": 5.974060396142361e-05, "loss": 1.6954, "step": 18583 }, { "epoch": 0.907421875, "grad_norm": 0.17657461762428284, "learning_rate": 5.9730411834271484e-05, "loss": 1.761, "step": 18584 }, { "epoch": 0.907470703125, "grad_norm": 0.18136481940746307, "learning_rate": 5.9720224924350755e-05, "loss": 1.7255, "step": 18585 }, { "epoch": 0.90751953125, "grad_norm": 0.18802814185619354, "learning_rate": 5.971004323190832e-05, "loss": 1.7325, "step": 18586 }, { "epoch": 0.907568359375, "grad_norm": 0.1747857928276062, "learning_rate": 5.969986675719097e-05, "loss": 1.7335, "step": 18587 }, { "epoch": 0.9076171875, "grad_norm": 0.20094935595989227, "learning_rate": 5.968969550044531e-05, "loss": 1.6991, "step": 18588 }, { "epoch": 0.907666015625, "grad_norm": 0.18303297460079193, "learning_rate": 5.967952946191785e-05, "loss": 1.7634, "step": 18589 }, { "epoch": 0.90771484375, "grad_norm": 0.20906755328178406, "learning_rate": 5.966936864185501e-05, "loss": 1.7376, "step": 18590 }, { "epoch": 0.907763671875, "grad_norm": 0.19587570428848267, "learning_rate": 5.9659213040502976e-05, "loss": 1.7167, "step": 18591 }, { "epoch": 0.9078125, "grad_norm": 0.20292751491069794, "learning_rate": 5.964906265810795e-05, "loss": 1.7527, "step": 18592 }, { "epoch": 0.907861328125, "grad_norm": 0.180024191737175, "learning_rate": 5.963891749491586e-05, "loss": 1.7176, "step": 18593 }, { "epoch": 0.90791015625, "grad_norm": 0.2171490639448166, "learning_rate": 5.962877755117268e-05, "loss": 1.7271, "step": 18594 }, { "epoch": 0.907958984375, "grad_norm": 0.1576019823551178, "learning_rate": 5.961864282712407e-05, "loss": 1.7071, "step": 18595 }, { "epoch": 0.9080078125, "grad_norm": 0.1792200803756714, "learning_rate": 5.9608513323015695e-05, "loss": 1.725, "step": 18596 }, { "epoch": 0.908056640625, "grad_norm": 0.17912019789218903, "learning_rate": 5.959838903909302e-05, "loss": 1.7416, "step": 18597 }, { "epoch": 0.90810546875, "grad_norm": 0.19826240837574005, "learning_rate": 5.958826997560147e-05, "loss": 1.7034, "step": 18598 }, { "epoch": 0.908154296875, "grad_norm": 0.17377236485481262, "learning_rate": 5.957815613278625e-05, "loss": 1.7123, "step": 18599 }, { "epoch": 0.908203125, "grad_norm": 0.19119992852210999, "learning_rate": 5.956804751089248e-05, "loss": 1.7548, "step": 18600 }, { "epoch": 0.908251953125, "grad_norm": 0.20665639638900757, "learning_rate": 5.9557944110165134e-05, "loss": 1.7184, "step": 18601 }, { "epoch": 0.90830078125, "grad_norm": 0.17762771248817444, "learning_rate": 5.9547845930849116e-05, "loss": 1.7076, "step": 18602 }, { "epoch": 0.908349609375, "grad_norm": 0.20344112813472748, "learning_rate": 5.953775297318915e-05, "loss": 1.7254, "step": 18603 }, { "epoch": 0.9083984375, "grad_norm": 0.17659233510494232, "learning_rate": 5.9527665237429834e-05, "loss": 1.7169, "step": 18604 }, { "epoch": 0.908447265625, "grad_norm": 0.22297604382038116, "learning_rate": 5.951758272381565e-05, "loss": 1.7155, "step": 18605 }, { "epoch": 0.90849609375, "grad_norm": 0.19426701962947845, "learning_rate": 5.9507505432590994e-05, "loss": 1.715, "step": 18606 }, { "epoch": 0.908544921875, "grad_norm": 0.1913589984178543, "learning_rate": 5.949743336400003e-05, "loss": 1.7376, "step": 18607 }, { "epoch": 0.90859375, "grad_norm": 0.19959092140197754, "learning_rate": 5.948736651828693e-05, "loss": 1.7324, "step": 18608 }, { "epoch": 0.908642578125, "grad_norm": 0.2173137664794922, "learning_rate": 5.947730489569563e-05, "loss": 1.719, "step": 18609 }, { "epoch": 0.90869140625, "grad_norm": 0.184962198138237, "learning_rate": 5.946724849647005e-05, "loss": 1.729, "step": 18610 }, { "epoch": 0.908740234375, "grad_norm": 0.22176694869995117, "learning_rate": 5.945719732085379e-05, "loss": 1.7142, "step": 18611 }, { "epoch": 0.9087890625, "grad_norm": 0.16954754292964935, "learning_rate": 5.9447151369090556e-05, "loss": 1.7309, "step": 18612 }, { "epoch": 0.908837890625, "grad_norm": 0.19648955762386322, "learning_rate": 5.943711064142375e-05, "loss": 1.7595, "step": 18613 }, { "epoch": 0.90888671875, "grad_norm": 0.18713806569576263, "learning_rate": 5.942707513809678e-05, "loss": 1.7046, "step": 18614 }, { "epoch": 0.908935546875, "grad_norm": 0.17276974022388458, "learning_rate": 5.94170448593528e-05, "loss": 1.7184, "step": 18615 }, { "epoch": 0.908984375, "grad_norm": 0.19139689207077026, "learning_rate": 5.940701980543498e-05, "loss": 1.7113, "step": 18616 }, { "epoch": 0.909033203125, "grad_norm": 0.17851704359054565, "learning_rate": 5.9396999976586194e-05, "loss": 1.7324, "step": 18617 }, { "epoch": 0.90908203125, "grad_norm": 0.16740137338638306, "learning_rate": 5.938698537304935e-05, "loss": 1.7134, "step": 18618 }, { "epoch": 0.909130859375, "grad_norm": 0.17262788116931915, "learning_rate": 5.937697599506714e-05, "loss": 1.7234, "step": 18619 }, { "epoch": 0.9091796875, "grad_norm": 0.1889919936656952, "learning_rate": 5.936697184288213e-05, "loss": 1.7018, "step": 18620 }, { "epoch": 0.909228515625, "grad_norm": 0.18676862120628357, "learning_rate": 5.935697291673679e-05, "loss": 1.7149, "step": 18621 }, { "epoch": 0.90927734375, "grad_norm": 0.20791026949882507, "learning_rate": 5.934697921687349e-05, "loss": 1.7282, "step": 18622 }, { "epoch": 0.909326171875, "grad_norm": 0.17924709618091583, "learning_rate": 5.933699074353438e-05, "loss": 1.7309, "step": 18623 }, { "epoch": 0.909375, "grad_norm": 0.21024660766124725, "learning_rate": 5.9327007496961585e-05, "loss": 1.7321, "step": 18624 }, { "epoch": 0.909423828125, "grad_norm": 0.19405022263526917, "learning_rate": 5.931702947739699e-05, "loss": 1.723, "step": 18625 }, { "epoch": 0.90947265625, "grad_norm": 0.20022058486938477, "learning_rate": 5.9307056685082504e-05, "loss": 1.7133, "step": 18626 }, { "epoch": 0.909521484375, "grad_norm": 0.20479385554790497, "learning_rate": 5.929708912025976e-05, "loss": 1.7193, "step": 18627 }, { "epoch": 0.9095703125, "grad_norm": 0.18805071711540222, "learning_rate": 5.928712678317036e-05, "loss": 1.7144, "step": 18628 }, { "epoch": 0.909619140625, "grad_norm": 0.1952916383743286, "learning_rate": 5.927716967405578e-05, "loss": 1.7248, "step": 18629 }, { "epoch": 0.90966796875, "grad_norm": 0.19589844346046448, "learning_rate": 5.9267217793157275e-05, "loss": 1.7043, "step": 18630 }, { "epoch": 0.909716796875, "grad_norm": 0.2048637568950653, "learning_rate": 5.925727114071605e-05, "loss": 1.7089, "step": 18631 }, { "epoch": 0.909765625, "grad_norm": 0.19132770597934723, "learning_rate": 5.9247329716973204e-05, "loss": 1.7536, "step": 18632 }, { "epoch": 0.909814453125, "grad_norm": 0.17472368478775024, "learning_rate": 5.9237393522169666e-05, "loss": 1.7035, "step": 18633 }, { "epoch": 0.90986328125, "grad_norm": 0.21162651479244232, "learning_rate": 5.9227462556546214e-05, "loss": 1.7279, "step": 18634 }, { "epoch": 0.909912109375, "grad_norm": 0.18308372795581818, "learning_rate": 5.921753682034356e-05, "loss": 1.7261, "step": 18635 }, { "epoch": 0.9099609375, "grad_norm": 0.1713726669549942, "learning_rate": 5.9207616313802264e-05, "loss": 1.7215, "step": 18636 }, { "epoch": 0.910009765625, "grad_norm": 0.19005155563354492, "learning_rate": 5.919770103716275e-05, "loss": 1.7274, "step": 18637 }, { "epoch": 0.91005859375, "grad_norm": 0.18180584907531738, "learning_rate": 5.9187790990665336e-05, "loss": 1.7002, "step": 18638 }, { "epoch": 0.910107421875, "grad_norm": 0.18462121486663818, "learning_rate": 5.917788617455016e-05, "loss": 1.6922, "step": 18639 }, { "epoch": 0.91015625, "grad_norm": 0.1620727926492691, "learning_rate": 5.916798658905733e-05, "loss": 1.7221, "step": 18640 }, { "epoch": 0.910205078125, "grad_norm": 0.19527959823608398, "learning_rate": 5.915809223442672e-05, "loss": 1.7139, "step": 18641 }, { "epoch": 0.91025390625, "grad_norm": 0.16311413049697876, "learning_rate": 5.914820311089819e-05, "loss": 1.7277, "step": 18642 }, { "epoch": 0.910302734375, "grad_norm": 0.17822310328483582, "learning_rate": 5.913831921871133e-05, "loss": 1.701, "step": 18643 }, { "epoch": 0.9103515625, "grad_norm": 0.1725311428308487, "learning_rate": 5.912844055810575e-05, "loss": 1.7069, "step": 18644 }, { "epoch": 0.910400390625, "grad_norm": 0.17377515137195587, "learning_rate": 5.9118567129320847e-05, "loss": 1.6993, "step": 18645 }, { "epoch": 0.91044921875, "grad_norm": 0.18819324672222137, "learning_rate": 5.910869893259587e-05, "loss": 1.7324, "step": 18646 }, { "epoch": 0.910498046875, "grad_norm": 0.20423394441604614, "learning_rate": 5.909883596817007e-05, "loss": 1.7185, "step": 18647 }, { "epoch": 0.910546875, "grad_norm": 0.21154148876667023, "learning_rate": 5.908897823628242e-05, "loss": 1.7539, "step": 18648 }, { "epoch": 0.910595703125, "grad_norm": 0.19046448171138763, "learning_rate": 5.907912573717181e-05, "loss": 1.7368, "step": 18649 }, { "epoch": 0.91064453125, "grad_norm": 0.21926045417785645, "learning_rate": 5.906927847107708e-05, "loss": 1.741, "step": 18650 }, { "epoch": 0.910693359375, "grad_norm": 0.19073864817619324, "learning_rate": 5.9059436438236874e-05, "loss": 1.7124, "step": 18651 }, { "epoch": 0.9107421875, "grad_norm": 0.20878329873085022, "learning_rate": 5.904959963888967e-05, "loss": 1.7047, "step": 18652 }, { "epoch": 0.910791015625, "grad_norm": 0.1824326366186142, "learning_rate": 5.903976807327395e-05, "loss": 1.7459, "step": 18653 }, { "epoch": 0.91083984375, "grad_norm": 0.18539868295192719, "learning_rate": 5.902994174162793e-05, "loss": 1.7185, "step": 18654 }, { "epoch": 0.910888671875, "grad_norm": 0.17684821784496307, "learning_rate": 5.902012064418981e-05, "loss": 1.7405, "step": 18655 }, { "epoch": 0.9109375, "grad_norm": 0.19272391498088837, "learning_rate": 5.901030478119756e-05, "loss": 1.7164, "step": 18656 }, { "epoch": 0.910986328125, "grad_norm": 0.1642683893442154, "learning_rate": 5.900049415288909e-05, "loss": 1.709, "step": 18657 }, { "epoch": 0.91103515625, "grad_norm": 0.19251757860183716, "learning_rate": 5.899068875950221e-05, "loss": 1.7088, "step": 18658 }, { "epoch": 0.911083984375, "grad_norm": 0.17982138693332672, "learning_rate": 5.898088860127449e-05, "loss": 1.7205, "step": 18659 }, { "epoch": 0.9111328125, "grad_norm": 0.18069100379943848, "learning_rate": 5.89710936784435e-05, "loss": 1.7345, "step": 18660 }, { "epoch": 0.911181640625, "grad_norm": 0.20094117522239685, "learning_rate": 5.896130399124663e-05, "loss": 1.7108, "step": 18661 }, { "epoch": 0.91123046875, "grad_norm": 0.16443392634391785, "learning_rate": 5.895151953992107e-05, "loss": 1.7354, "step": 18662 }, { "epoch": 0.911279296875, "grad_norm": 0.16135156154632568, "learning_rate": 5.8941740324704064e-05, "loss": 1.716, "step": 18663 }, { "epoch": 0.911328125, "grad_norm": 0.17172493040561676, "learning_rate": 5.893196634583251e-05, "loss": 1.7248, "step": 18664 }, { "epoch": 0.911376953125, "grad_norm": 0.18011058866977692, "learning_rate": 5.892219760354338e-05, "loss": 1.7096, "step": 18665 }, { "epoch": 0.91142578125, "grad_norm": 0.16220532357692719, "learning_rate": 5.891243409807335e-05, "loss": 1.7337, "step": 18666 }, { "epoch": 0.911474609375, "grad_norm": 0.17711889743804932, "learning_rate": 5.890267582965913e-05, "loss": 1.7339, "step": 18667 }, { "epoch": 0.9115234375, "grad_norm": 0.17394691705703735, "learning_rate": 5.889292279853714e-05, "loss": 1.7276, "step": 18668 }, { "epoch": 0.911572265625, "grad_norm": 0.16631408035755157, "learning_rate": 5.8883175004943776e-05, "loss": 1.7126, "step": 18669 }, { "epoch": 0.91162109375, "grad_norm": 0.1717785894870758, "learning_rate": 5.887343244911532e-05, "loss": 1.7172, "step": 18670 }, { "epoch": 0.911669921875, "grad_norm": 0.19398237764835358, "learning_rate": 5.886369513128785e-05, "loss": 1.714, "step": 18671 }, { "epoch": 0.91171875, "grad_norm": 0.17323750257492065, "learning_rate": 5.885396305169736e-05, "loss": 1.7179, "step": 18672 }, { "epoch": 0.911767578125, "grad_norm": 0.16365984082221985, "learning_rate": 5.8844236210579736e-05, "loss": 1.7288, "step": 18673 }, { "epoch": 0.91181640625, "grad_norm": 0.1803804636001587, "learning_rate": 5.88345146081707e-05, "loss": 1.7319, "step": 18674 }, { "epoch": 0.911865234375, "grad_norm": 0.17435680329799652, "learning_rate": 5.882479824470587e-05, "loss": 1.7207, "step": 18675 }, { "epoch": 0.9119140625, "grad_norm": 0.1722751259803772, "learning_rate": 5.8815087120420705e-05, "loss": 1.7056, "step": 18676 }, { "epoch": 0.911962890625, "grad_norm": 0.18216365575790405, "learning_rate": 5.880538123555061e-05, "loss": 1.7345, "step": 18677 }, { "epoch": 0.91201171875, "grad_norm": 0.19739940762519836, "learning_rate": 5.8795680590330785e-05, "loss": 1.7258, "step": 18678 }, { "epoch": 0.912060546875, "grad_norm": 0.18035708367824554, "learning_rate": 5.878598518499639e-05, "loss": 1.7145, "step": 18679 }, { "epoch": 0.912109375, "grad_norm": 0.171481654047966, "learning_rate": 5.8776295019782246e-05, "loss": 1.7099, "step": 18680 }, { "epoch": 0.912158203125, "grad_norm": 0.2062315195798874, "learning_rate": 5.8766610094923386e-05, "loss": 1.7458, "step": 18681 }, { "epoch": 0.91220703125, "grad_norm": 0.18874193727970123, "learning_rate": 5.875693041065441e-05, "loss": 1.7178, "step": 18682 }, { "epoch": 0.912255859375, "grad_norm": 0.19307096302509308, "learning_rate": 5.874725596720998e-05, "loss": 1.7244, "step": 18683 }, { "epoch": 0.9123046875, "grad_norm": 0.17810912430286407, "learning_rate": 5.873758676482449e-05, "loss": 1.7364, "step": 18684 }, { "epoch": 0.912353515625, "grad_norm": 0.1838877946138382, "learning_rate": 5.8727922803732404e-05, "loss": 1.6918, "step": 18685 }, { "epoch": 0.91240234375, "grad_norm": 0.19189660251140594, "learning_rate": 5.871826408416777e-05, "loss": 1.7154, "step": 18686 }, { "epoch": 0.912451171875, "grad_norm": 0.20727315545082092, "learning_rate": 5.870861060636484e-05, "loss": 1.7339, "step": 18687 }, { "epoch": 0.9125, "grad_norm": 0.1756720244884491, "learning_rate": 5.869896237055748e-05, "loss": 1.7218, "step": 18688 }, { "epoch": 0.912548828125, "grad_norm": 0.19581079483032227, "learning_rate": 5.868931937697951e-05, "loss": 1.732, "step": 18689 }, { "epoch": 0.91259765625, "grad_norm": 0.17974147200584412, "learning_rate": 5.8679681625864704e-05, "loss": 1.7023, "step": 18690 }, { "epoch": 0.912646484375, "grad_norm": 0.18316371738910675, "learning_rate": 5.8670049117446586e-05, "loss": 1.7285, "step": 18691 }, { "epoch": 0.9126953125, "grad_norm": 0.18969984352588654, "learning_rate": 5.866042185195862e-05, "loss": 1.713, "step": 18692 }, { "epoch": 0.912744140625, "grad_norm": 0.17928464710712433, "learning_rate": 5.865079982963415e-05, "loss": 1.7221, "step": 18693 }, { "epoch": 0.91279296875, "grad_norm": 0.17449036240577698, "learning_rate": 5.864118305070635e-05, "loss": 1.7383, "step": 18694 }, { "epoch": 0.912841796875, "grad_norm": 0.18720024824142456, "learning_rate": 5.863157151540832e-05, "loss": 1.7194, "step": 18695 }, { "epoch": 0.912890625, "grad_norm": 0.1750451773405075, "learning_rate": 5.862196522397295e-05, "loss": 1.7331, "step": 18696 }, { "epoch": 0.912939453125, "grad_norm": 0.19272656738758087, "learning_rate": 5.861236417663312e-05, "loss": 1.7043, "step": 18697 }, { "epoch": 0.91298828125, "grad_norm": 0.18301521241664886, "learning_rate": 5.8602768373621456e-05, "loss": 1.7089, "step": 18698 }, { "epoch": 0.913037109375, "grad_norm": 0.18685290217399597, "learning_rate": 5.859317781517058e-05, "loss": 1.7069, "step": 18699 }, { "epoch": 0.9130859375, "grad_norm": 0.18405365943908691, "learning_rate": 5.858359250151288e-05, "loss": 1.7057, "step": 18700 }, { "epoch": 0.913134765625, "grad_norm": 0.1845480054616928, "learning_rate": 5.857401243288069e-05, "loss": 1.743, "step": 18701 }, { "epoch": 0.91318359375, "grad_norm": 0.1889806091785431, "learning_rate": 5.856443760950616e-05, "loss": 1.7465, "step": 18702 }, { "epoch": 0.913232421875, "grad_norm": 0.16811621189117432, "learning_rate": 5.855486803162142e-05, "loss": 1.7292, "step": 18703 }, { "epoch": 0.91328125, "grad_norm": 0.1704496592283249, "learning_rate": 5.854530369945833e-05, "loss": 1.7204, "step": 18704 }, { "epoch": 0.913330078125, "grad_norm": 0.17776106297969818, "learning_rate": 5.853574461324868e-05, "loss": 1.7506, "step": 18705 }, { "epoch": 0.91337890625, "grad_norm": 0.1640356481075287, "learning_rate": 5.852619077322417e-05, "loss": 1.7323, "step": 18706 }, { "epoch": 0.913427734375, "grad_norm": 0.17375925183296204, "learning_rate": 5.851664217961634e-05, "loss": 1.713, "step": 18707 }, { "epoch": 0.9134765625, "grad_norm": 0.17920371890068054, "learning_rate": 5.850709883265661e-05, "loss": 1.7385, "step": 18708 }, { "epoch": 0.913525390625, "grad_norm": 0.16730046272277832, "learning_rate": 5.849756073257626e-05, "loss": 1.7151, "step": 18709 }, { "epoch": 0.91357421875, "grad_norm": 0.17848189175128937, "learning_rate": 5.848802787960648e-05, "loss": 1.6998, "step": 18710 }, { "epoch": 0.913623046875, "grad_norm": 0.1835009902715683, "learning_rate": 5.847850027397828e-05, "loss": 1.6898, "step": 18711 }, { "epoch": 0.913671875, "grad_norm": 0.1902068704366684, "learning_rate": 5.8468977915922566e-05, "loss": 1.7356, "step": 18712 }, { "epoch": 0.913720703125, "grad_norm": 0.16785898804664612, "learning_rate": 5.845946080567019e-05, "loss": 1.7157, "step": 18713 }, { "epoch": 0.91376953125, "grad_norm": 0.19284871220588684, "learning_rate": 5.8449948943451665e-05, "loss": 1.7327, "step": 18714 }, { "epoch": 0.913818359375, "grad_norm": 0.1739910989999771, "learning_rate": 5.844044232949767e-05, "loss": 1.7301, "step": 18715 }, { "epoch": 0.9138671875, "grad_norm": 0.2051868736743927, "learning_rate": 5.843094096403846e-05, "loss": 1.7292, "step": 18716 }, { "epoch": 0.913916015625, "grad_norm": 0.2049933522939682, "learning_rate": 5.842144484730446e-05, "loss": 1.7196, "step": 18717 }, { "epoch": 0.91396484375, "grad_norm": 0.19901838898658752, "learning_rate": 5.841195397952567e-05, "loss": 1.7325, "step": 18718 }, { "epoch": 0.914013671875, "grad_norm": 0.17634634673595428, "learning_rate": 5.840246836093226e-05, "loss": 1.7296, "step": 18719 }, { "epoch": 0.9140625, "grad_norm": 0.18463987112045288, "learning_rate": 5.839298799175401e-05, "loss": 1.7185, "step": 18720 }, { "epoch": 0.914111328125, "grad_norm": 0.1781737506389618, "learning_rate": 5.838351287222071e-05, "loss": 1.6963, "step": 18721 }, { "epoch": 0.91416015625, "grad_norm": 0.17945760488510132, "learning_rate": 5.8374043002561995e-05, "loss": 1.7164, "step": 18722 }, { "epoch": 0.914208984375, "grad_norm": 0.17231707274913788, "learning_rate": 5.836457838300741e-05, "loss": 1.7021, "step": 18723 }, { "epoch": 0.9142578125, "grad_norm": 0.16566407680511475, "learning_rate": 5.835511901378628e-05, "loss": 1.6921, "step": 18724 }, { "epoch": 0.914306640625, "grad_norm": 0.19876864552497864, "learning_rate": 5.834566489512789e-05, "loss": 1.7134, "step": 18725 }, { "epoch": 0.91435546875, "grad_norm": 0.1833697110414505, "learning_rate": 5.8336216027261385e-05, "loss": 1.7349, "step": 18726 }, { "epoch": 0.914404296875, "grad_norm": 0.21366438269615173, "learning_rate": 5.832677241041574e-05, "loss": 1.7268, "step": 18727 }, { "epoch": 0.914453125, "grad_norm": 0.16568101942539215, "learning_rate": 5.8317334044819865e-05, "loss": 1.7159, "step": 18728 }, { "epoch": 0.914501953125, "grad_norm": 0.2012602984905243, "learning_rate": 5.830790093070249e-05, "loss": 1.7326, "step": 18729 }, { "epoch": 0.91455078125, "grad_norm": 0.19488699734210968, "learning_rate": 5.829847306829218e-05, "loss": 1.7219, "step": 18730 }, { "epoch": 0.914599609375, "grad_norm": 0.17598943412303925, "learning_rate": 5.82890504578175e-05, "loss": 1.7068, "step": 18731 }, { "epoch": 0.9146484375, "grad_norm": 0.20516063272953033, "learning_rate": 5.8279633099506754e-05, "loss": 1.7143, "step": 18732 }, { "epoch": 0.914697265625, "grad_norm": 0.17767296731472015, "learning_rate": 5.827022099358825e-05, "loss": 1.709, "step": 18733 }, { "epoch": 0.91474609375, "grad_norm": 0.19785192608833313, "learning_rate": 5.8260814140290016e-05, "loss": 1.7156, "step": 18734 }, { "epoch": 0.914794921875, "grad_norm": 0.2050919085741043, "learning_rate": 5.825141253984014e-05, "loss": 1.7325, "step": 18735 }, { "epoch": 0.91484375, "grad_norm": 0.16447241604328156, "learning_rate": 5.8242016192466336e-05, "loss": 1.7166, "step": 18736 }, { "epoch": 0.914892578125, "grad_norm": 0.203188955783844, "learning_rate": 5.8232625098396494e-05, "loss": 1.7321, "step": 18737 }, { "epoch": 0.91494140625, "grad_norm": 0.2020789086818695, "learning_rate": 5.8223239257858095e-05, "loss": 1.7292, "step": 18738 }, { "epoch": 0.914990234375, "grad_norm": 0.19943015277385712, "learning_rate": 5.8213858671078644e-05, "loss": 1.7254, "step": 18739 }, { "epoch": 0.9150390625, "grad_norm": 0.1980915069580078, "learning_rate": 5.8204483338285486e-05, "loss": 1.7073, "step": 18740 }, { "epoch": 0.915087890625, "grad_norm": 0.20888081192970276, "learning_rate": 5.819511325970585e-05, "loss": 1.7121, "step": 18741 }, { "epoch": 0.91513671875, "grad_norm": 0.18390722572803497, "learning_rate": 5.8185748435566844e-05, "loss": 1.7111, "step": 18742 }, { "epoch": 0.915185546875, "grad_norm": 0.2126796841621399, "learning_rate": 5.8176388866095384e-05, "loss": 1.7371, "step": 18743 }, { "epoch": 0.915234375, "grad_norm": 0.2186541110277176, "learning_rate": 5.8167034551518345e-05, "loss": 1.727, "step": 18744 }, { "epoch": 0.915283203125, "grad_norm": 0.16287820041179657, "learning_rate": 5.8157685492062445e-05, "loss": 1.7298, "step": 18745 }, { "epoch": 0.91533203125, "grad_norm": 0.2184925675392151, "learning_rate": 5.814834168795421e-05, "loss": 1.7154, "step": 18746 }, { "epoch": 0.915380859375, "grad_norm": 0.2219237983226776, "learning_rate": 5.813900313942017e-05, "loss": 1.7467, "step": 18747 }, { "epoch": 0.9154296875, "grad_norm": 0.17952057719230652, "learning_rate": 5.8129669846686574e-05, "loss": 1.7385, "step": 18748 }, { "epoch": 0.915478515625, "grad_norm": 0.2125743180513382, "learning_rate": 5.8120341809979686e-05, "loss": 1.7131, "step": 18749 }, { "epoch": 0.91552734375, "grad_norm": 0.2368326336145401, "learning_rate": 5.8111019029525534e-05, "loss": 1.7208, "step": 18750 }, { "epoch": 0.915576171875, "grad_norm": 0.18563306331634521, "learning_rate": 5.810170150555011e-05, "loss": 1.7303, "step": 18751 }, { "epoch": 0.915625, "grad_norm": 0.21008026599884033, "learning_rate": 5.8092389238279167e-05, "loss": 1.7208, "step": 18752 }, { "epoch": 0.915673828125, "grad_norm": 0.22098606824874878, "learning_rate": 5.808308222793848e-05, "loss": 1.743, "step": 18753 }, { "epoch": 0.91572265625, "grad_norm": 0.20772534608840942, "learning_rate": 5.8073780474753535e-05, "loss": 1.7124, "step": 18754 }, { "epoch": 0.915771484375, "grad_norm": 0.1903577297925949, "learning_rate": 5.806448397894982e-05, "loss": 1.7322, "step": 18755 }, { "epoch": 0.9158203125, "grad_norm": 0.20041392743587494, "learning_rate": 5.805519274075258e-05, "loss": 1.7018, "step": 18756 }, { "epoch": 0.915869140625, "grad_norm": 0.1833859235048294, "learning_rate": 5.8045906760387056e-05, "loss": 1.7179, "step": 18757 }, { "epoch": 0.91591796875, "grad_norm": 0.20748330652713776, "learning_rate": 5.8036626038078295e-05, "loss": 1.722, "step": 18758 }, { "epoch": 0.915966796875, "grad_norm": 0.16926755011081696, "learning_rate": 5.802735057405119e-05, "loss": 1.7334, "step": 18759 }, { "epoch": 0.916015625, "grad_norm": 0.1871284544467926, "learning_rate": 5.8018080368530555e-05, "loss": 1.7057, "step": 18760 }, { "epoch": 0.916064453125, "grad_norm": 0.21657906472682953, "learning_rate": 5.8008815421741086e-05, "loss": 1.7136, "step": 18761 }, { "epoch": 0.91611328125, "grad_norm": 0.1683107316493988, "learning_rate": 5.799955573390729e-05, "loss": 1.7155, "step": 18762 }, { "epoch": 0.916162109375, "grad_norm": 0.20054112374782562, "learning_rate": 5.799030130525361e-05, "loss": 1.7223, "step": 18763 }, { "epoch": 0.9162109375, "grad_norm": 0.1692163348197937, "learning_rate": 5.79810521360043e-05, "loss": 1.7379, "step": 18764 }, { "epoch": 0.916259765625, "grad_norm": 0.18253208696842194, "learning_rate": 5.7971808226383556e-05, "loss": 1.7196, "step": 18765 }, { "epoch": 0.91630859375, "grad_norm": 0.1792958527803421, "learning_rate": 5.796256957661538e-05, "loss": 1.7312, "step": 18766 }, { "epoch": 0.916357421875, "grad_norm": 0.1904533952474594, "learning_rate": 5.7953336186923724e-05, "loss": 1.7044, "step": 18767 }, { "epoch": 0.91640625, "grad_norm": 0.163395956158638, "learning_rate": 5.794410805753231e-05, "loss": 1.7354, "step": 18768 }, { "epoch": 0.916455078125, "grad_norm": 0.19414560496807098, "learning_rate": 5.793488518866484e-05, "loss": 1.7167, "step": 18769 }, { "epoch": 0.91650390625, "grad_norm": 0.18508653342723846, "learning_rate": 5.792566758054478e-05, "loss": 1.671, "step": 18770 }, { "epoch": 0.916552734375, "grad_norm": 0.165741965174675, "learning_rate": 5.7916455233395636e-05, "loss": 1.7114, "step": 18771 }, { "epoch": 0.9166015625, "grad_norm": 0.186482235789299, "learning_rate": 5.790724814744054e-05, "loss": 1.7098, "step": 18772 }, { "epoch": 0.916650390625, "grad_norm": 0.1751406490802765, "learning_rate": 5.789804632290273e-05, "loss": 1.7306, "step": 18773 }, { "epoch": 0.91669921875, "grad_norm": 0.1606944054365158, "learning_rate": 5.788884976000516e-05, "loss": 1.7249, "step": 18774 }, { "epoch": 0.916748046875, "grad_norm": 0.21279151737689972, "learning_rate": 5.787965845897076e-05, "loss": 1.7252, "step": 18775 }, { "epoch": 0.916796875, "grad_norm": 0.18016155064105988, "learning_rate": 5.787047242002225e-05, "loss": 1.7185, "step": 18776 }, { "epoch": 0.916845703125, "grad_norm": 0.2064698338508606, "learning_rate": 5.786129164338229e-05, "loss": 1.7296, "step": 18777 }, { "epoch": 0.91689453125, "grad_norm": 0.1897643804550171, "learning_rate": 5.785211612927338e-05, "loss": 1.7149, "step": 18778 }, { "epoch": 0.916943359375, "grad_norm": 0.18278203904628754, "learning_rate": 5.7842945877917914e-05, "loss": 1.6926, "step": 18779 }, { "epoch": 0.9169921875, "grad_norm": 0.20033513009548187, "learning_rate": 5.7833780889538044e-05, "loss": 1.696, "step": 18780 }, { "epoch": 0.917041015625, "grad_norm": 0.1888706535100937, "learning_rate": 5.782462116435605e-05, "loss": 1.7442, "step": 18781 }, { "epoch": 0.91708984375, "grad_norm": 0.16701921820640564, "learning_rate": 5.781546670259377e-05, "loss": 1.7089, "step": 18782 }, { "epoch": 0.917138671875, "grad_norm": 0.17854955792427063, "learning_rate": 5.7806317504473195e-05, "loss": 1.714, "step": 18783 }, { "epoch": 0.9171875, "grad_norm": 0.17434555292129517, "learning_rate": 5.7797173570215955e-05, "loss": 1.7268, "step": 18784 }, { "epoch": 0.917236328125, "grad_norm": 0.16279533505439758, "learning_rate": 5.778803490004374e-05, "loss": 1.7337, "step": 18785 }, { "epoch": 0.91728515625, "grad_norm": 0.16919615864753723, "learning_rate": 5.777890149417799e-05, "loss": 1.7228, "step": 18786 }, { "epoch": 0.917333984375, "grad_norm": 0.16664192080497742, "learning_rate": 5.776977335284011e-05, "loss": 1.7375, "step": 18787 }, { "epoch": 0.9173828125, "grad_norm": 0.16366955637931824, "learning_rate": 5.7760650476251275e-05, "loss": 1.7191, "step": 18788 }, { "epoch": 0.917431640625, "grad_norm": 0.18369321525096893, "learning_rate": 5.775153286463259e-05, "loss": 1.7304, "step": 18789 }, { "epoch": 0.91748046875, "grad_norm": 0.1838279366493225, "learning_rate": 5.7742420518205046e-05, "loss": 1.7395, "step": 18790 }, { "epoch": 0.917529296875, "grad_norm": 0.17717741429805756, "learning_rate": 5.773331343718948e-05, "loss": 1.7201, "step": 18791 }, { "epoch": 0.917578125, "grad_norm": 0.18216460943222046, "learning_rate": 5.7724211621806624e-05, "loss": 1.7114, "step": 18792 }, { "epoch": 0.917626953125, "grad_norm": 0.16009174287319183, "learning_rate": 5.771511507227707e-05, "loss": 1.7129, "step": 18793 }, { "epoch": 0.91767578125, "grad_norm": 0.17748792469501495, "learning_rate": 5.770602378882124e-05, "loss": 1.7161, "step": 18794 }, { "epoch": 0.917724609375, "grad_norm": 0.16393186151981354, "learning_rate": 5.769693777165951e-05, "loss": 1.7321, "step": 18795 }, { "epoch": 0.9177734375, "grad_norm": 0.1818397343158722, "learning_rate": 5.7687857021012085e-05, "loss": 1.7329, "step": 18796 }, { "epoch": 0.917822265625, "grad_norm": 0.18951590359210968, "learning_rate": 5.7678781537099034e-05, "loss": 1.7259, "step": 18797 }, { "epoch": 0.91787109375, "grad_norm": 0.19731523096561432, "learning_rate": 5.766971132014025e-05, "loss": 1.7125, "step": 18798 }, { "epoch": 0.917919921875, "grad_norm": 0.18042264878749847, "learning_rate": 5.766064637035569e-05, "loss": 1.7317, "step": 18799 }, { "epoch": 0.91796875, "grad_norm": 0.17280234396457672, "learning_rate": 5.76515866879649e-05, "loss": 1.7225, "step": 18800 }, { "epoch": 0.918017578125, "grad_norm": 0.1684073656797409, "learning_rate": 5.7642532273187584e-05, "loss": 1.748, "step": 18801 }, { "epoch": 0.91806640625, "grad_norm": 0.1809220314025879, "learning_rate": 5.763348312624309e-05, "loss": 1.744, "step": 18802 }, { "epoch": 0.918115234375, "grad_norm": 0.17118963599205017, "learning_rate": 5.762443924735078e-05, "loss": 1.7433, "step": 18803 }, { "epoch": 0.9181640625, "grad_norm": 0.17794835567474365, "learning_rate": 5.761540063672982e-05, "loss": 1.7028, "step": 18804 }, { "epoch": 0.918212890625, "grad_norm": 0.1832440346479416, "learning_rate": 5.760636729459925e-05, "loss": 1.7256, "step": 18805 }, { "epoch": 0.91826171875, "grad_norm": 0.18177452683448792, "learning_rate": 5.7597339221178034e-05, "loss": 1.7355, "step": 18806 }, { "epoch": 0.918310546875, "grad_norm": 0.17450816929340363, "learning_rate": 5.758831641668498e-05, "loss": 1.6891, "step": 18807 }, { "epoch": 0.918359375, "grad_norm": 0.1901458352804184, "learning_rate": 5.7579298881338724e-05, "loss": 1.72, "step": 18808 }, { "epoch": 0.918408203125, "grad_norm": 0.17725777626037598, "learning_rate": 5.757028661535784e-05, "loss": 1.7221, "step": 18809 }, { "epoch": 0.91845703125, "grad_norm": 0.16594822704792023, "learning_rate": 5.756127961896071e-05, "loss": 1.7165, "step": 18810 }, { "epoch": 0.918505859375, "grad_norm": 0.17149107158184052, "learning_rate": 5.755227789236569e-05, "loss": 1.7214, "step": 18811 }, { "epoch": 0.9185546875, "grad_norm": 0.16443607211112976, "learning_rate": 5.7543281435790906e-05, "loss": 1.7083, "step": 18812 }, { "epoch": 0.918603515625, "grad_norm": 0.17494671046733856, "learning_rate": 5.753429024945442e-05, "loss": 1.7115, "step": 18813 }, { "epoch": 0.91865234375, "grad_norm": 0.16587252914905548, "learning_rate": 5.7525304333574084e-05, "loss": 1.7263, "step": 18814 }, { "epoch": 0.918701171875, "grad_norm": 0.16448768973350525, "learning_rate": 5.751632368836774e-05, "loss": 1.7, "step": 18815 }, { "epoch": 0.91875, "grad_norm": 0.1718723177909851, "learning_rate": 5.7507348314053e-05, "loss": 1.7271, "step": 18816 }, { "epoch": 0.918798828125, "grad_norm": 0.17988911271095276, "learning_rate": 5.7498378210847424e-05, "loss": 1.7459, "step": 18817 }, { "epoch": 0.91884765625, "grad_norm": 0.17206592857837677, "learning_rate": 5.748941337896836e-05, "loss": 1.7036, "step": 18818 }, { "epoch": 0.918896484375, "grad_norm": 0.16274473071098328, "learning_rate": 5.748045381863314e-05, "loss": 1.727, "step": 18819 }, { "epoch": 0.9189453125, "grad_norm": 0.20071478188037872, "learning_rate": 5.747149953005883e-05, "loss": 1.7077, "step": 18820 }, { "epoch": 0.918994140625, "grad_norm": 0.1739257574081421, "learning_rate": 5.746255051346255e-05, "loss": 1.7076, "step": 18821 }, { "epoch": 0.91904296875, "grad_norm": 0.1874539852142334, "learning_rate": 5.745360676906109e-05, "loss": 1.7014, "step": 18822 }, { "epoch": 0.919091796875, "grad_norm": 0.1741909384727478, "learning_rate": 5.744466829707127e-05, "loss": 1.7239, "step": 18823 }, { "epoch": 0.919140625, "grad_norm": 0.1824684590101242, "learning_rate": 5.7435735097709674e-05, "loss": 1.7392, "step": 18824 }, { "epoch": 0.919189453125, "grad_norm": 0.18009540438652039, "learning_rate": 5.742680717119283e-05, "loss": 1.7158, "step": 18825 }, { "epoch": 0.91923828125, "grad_norm": 0.20081965625286102, "learning_rate": 5.741788451773711e-05, "loss": 1.7174, "step": 18826 }, { "epoch": 0.919287109375, "grad_norm": 0.17328225076198578, "learning_rate": 5.740896713755876e-05, "loss": 1.7148, "step": 18827 }, { "epoch": 0.9193359375, "grad_norm": 0.20427687466144562, "learning_rate": 5.740005503087391e-05, "loss": 1.7126, "step": 18828 }, { "epoch": 0.919384765625, "grad_norm": 0.18693119287490845, "learning_rate": 5.739114819789854e-05, "loss": 1.7296, "step": 18829 }, { "epoch": 0.91943359375, "grad_norm": 0.20091751217842102, "learning_rate": 5.738224663884852e-05, "loss": 1.7085, "step": 18830 }, { "epoch": 0.919482421875, "grad_norm": 0.2175808846950531, "learning_rate": 5.737335035393961e-05, "loss": 1.7206, "step": 18831 }, { "epoch": 0.91953125, "grad_norm": 0.1757475733757019, "learning_rate": 5.736445934338734e-05, "loss": 1.725, "step": 18832 }, { "epoch": 0.919580078125, "grad_norm": 0.2203904241323471, "learning_rate": 5.73555736074073e-05, "loss": 1.7088, "step": 18833 }, { "epoch": 0.91962890625, "grad_norm": 0.20872975885868073, "learning_rate": 5.734669314621474e-05, "loss": 1.7355, "step": 18834 }, { "epoch": 0.919677734375, "grad_norm": 0.17851603031158447, "learning_rate": 5.733781796002497e-05, "loss": 1.7343, "step": 18835 }, { "epoch": 0.9197265625, "grad_norm": 0.2009424865245819, "learning_rate": 5.732894804905301e-05, "loss": 1.7143, "step": 18836 }, { "epoch": 0.919775390625, "grad_norm": 0.21171347796916962, "learning_rate": 5.7320083413513916e-05, "loss": 1.7351, "step": 18837 }, { "epoch": 0.91982421875, "grad_norm": 0.18754315376281738, "learning_rate": 5.7311224053622444e-05, "loss": 1.7134, "step": 18838 }, { "epoch": 0.919873046875, "grad_norm": 0.1811935007572174, "learning_rate": 5.730236996959337e-05, "loss": 1.7278, "step": 18839 }, { "epoch": 0.919921875, "grad_norm": 0.2031995952129364, "learning_rate": 5.7293521161641236e-05, "loss": 1.7214, "step": 18840 }, { "epoch": 0.919970703125, "grad_norm": 0.16116514801979065, "learning_rate": 5.728467762998054e-05, "loss": 1.6896, "step": 18841 }, { "epoch": 0.92001953125, "grad_norm": 0.18545134365558624, "learning_rate": 5.727583937482557e-05, "loss": 1.7226, "step": 18842 }, { "epoch": 0.920068359375, "grad_norm": 0.15883581340312958, "learning_rate": 5.726700639639057e-05, "loss": 1.7516, "step": 18843 }, { "epoch": 0.9201171875, "grad_norm": 0.16499443352222443, "learning_rate": 5.72581786948896e-05, "loss": 1.7222, "step": 18844 }, { "epoch": 0.920166015625, "grad_norm": 0.17741753160953522, "learning_rate": 5.7249356270536585e-05, "loss": 1.7306, "step": 18845 }, { "epoch": 0.92021484375, "grad_norm": 0.18105657398700714, "learning_rate": 5.7240539123545355e-05, "loss": 1.7269, "step": 18846 }, { "epoch": 0.920263671875, "grad_norm": 0.17380701005458832, "learning_rate": 5.7231727254129634e-05, "loss": 1.7071, "step": 18847 }, { "epoch": 0.9203125, "grad_norm": 0.1656285524368286, "learning_rate": 5.722292066250291e-05, "loss": 1.6946, "step": 18848 }, { "epoch": 0.920361328125, "grad_norm": 0.175824373960495, "learning_rate": 5.721411934887873e-05, "loss": 1.6793, "step": 18849 }, { "epoch": 0.92041015625, "grad_norm": 0.19770152866840363, "learning_rate": 5.720532331347027e-05, "loss": 1.7427, "step": 18850 }, { "epoch": 0.920458984375, "grad_norm": 0.16342012584209442, "learning_rate": 5.7196532556490815e-05, "loss": 1.6887, "step": 18851 }, { "epoch": 0.9205078125, "grad_norm": 0.18345655500888824, "learning_rate": 5.718774707815334e-05, "loss": 1.7063, "step": 18852 }, { "epoch": 0.920556640625, "grad_norm": 0.1739836484193802, "learning_rate": 5.7178966878670844e-05, "loss": 1.7254, "step": 18853 }, { "epoch": 0.92060546875, "grad_norm": 0.18464972078800201, "learning_rate": 5.7170191958256056e-05, "loss": 1.705, "step": 18854 }, { "epoch": 0.920654296875, "grad_norm": 0.18404796719551086, "learning_rate": 5.716142231712168e-05, "loss": 1.7328, "step": 18855 }, { "epoch": 0.920703125, "grad_norm": 0.18252240121364594, "learning_rate": 5.7152657955480244e-05, "loss": 1.7204, "step": 18856 }, { "epoch": 0.920751953125, "grad_norm": 0.20770461857318878, "learning_rate": 5.714389887354416e-05, "loss": 1.706, "step": 18857 }, { "epoch": 0.92080078125, "grad_norm": 0.16776986420154572, "learning_rate": 5.71351450715257e-05, "loss": 1.7321, "step": 18858 }, { "epoch": 0.920849609375, "grad_norm": 0.17912927269935608, "learning_rate": 5.712639654963703e-05, "loss": 1.7166, "step": 18859 }, { "epoch": 0.9208984375, "grad_norm": 0.18550437688827515, "learning_rate": 5.711765330809017e-05, "loss": 1.7307, "step": 18860 }, { "epoch": 0.920947265625, "grad_norm": 0.16439183056354523, "learning_rate": 5.710891534709705e-05, "loss": 1.7122, "step": 18861 }, { "epoch": 0.92099609375, "grad_norm": 0.19533434510231018, "learning_rate": 5.710018266686941e-05, "loss": 1.708, "step": 18862 }, { "epoch": 0.921044921875, "grad_norm": 0.16397280991077423, "learning_rate": 5.70914552676189e-05, "loss": 1.7313, "step": 18863 }, { "epoch": 0.92109375, "grad_norm": 0.18831539154052734, "learning_rate": 5.708273314955702e-05, "loss": 1.7328, "step": 18864 }, { "epoch": 0.921142578125, "grad_norm": 0.1969527304172516, "learning_rate": 5.70740163128952e-05, "loss": 1.7517, "step": 18865 }, { "epoch": 0.92119140625, "grad_norm": 0.1640554517507553, "learning_rate": 5.706530475784466e-05, "loss": 1.7266, "step": 18866 }, { "epoch": 0.921240234375, "grad_norm": 0.1754608303308487, "learning_rate": 5.705659848461656e-05, "loss": 1.7132, "step": 18867 }, { "epoch": 0.9212890625, "grad_norm": 0.20679736137390137, "learning_rate": 5.704789749342186e-05, "loss": 1.7216, "step": 18868 }, { "epoch": 0.921337890625, "grad_norm": 0.17746500670909882, "learning_rate": 5.703920178447152e-05, "loss": 1.6993, "step": 18869 }, { "epoch": 0.92138671875, "grad_norm": 0.18327899277210236, "learning_rate": 5.703051135797619e-05, "loss": 1.708, "step": 18870 }, { "epoch": 0.921435546875, "grad_norm": 0.16602429747581482, "learning_rate": 5.702182621414656e-05, "loss": 1.7088, "step": 18871 }, { "epoch": 0.921484375, "grad_norm": 0.17832937836647034, "learning_rate": 5.701314635319307e-05, "loss": 1.7232, "step": 18872 }, { "epoch": 0.921533203125, "grad_norm": 0.17169131338596344, "learning_rate": 5.7004471775326106e-05, "loss": 1.7213, "step": 18873 }, { "epoch": 0.92158203125, "grad_norm": 0.15272656083106995, "learning_rate": 5.699580248075591e-05, "loss": 1.7201, "step": 18874 }, { "epoch": 0.921630859375, "grad_norm": 0.17179858684539795, "learning_rate": 5.6987138469692576e-05, "loss": 1.7144, "step": 18875 }, { "epoch": 0.9216796875, "grad_norm": 0.17301179468631744, "learning_rate": 5.69784797423461e-05, "loss": 1.7124, "step": 18876 }, { "epoch": 0.921728515625, "grad_norm": 0.1760352998971939, "learning_rate": 5.696982629892633e-05, "loss": 1.7045, "step": 18877 }, { "epoch": 0.92177734375, "grad_norm": 0.1740346997976303, "learning_rate": 5.696117813964298e-05, "loss": 1.7294, "step": 18878 }, { "epoch": 0.921826171875, "grad_norm": 0.1613505482673645, "learning_rate": 5.6952535264705646e-05, "loss": 1.7282, "step": 18879 }, { "epoch": 0.921875, "grad_norm": 0.18010693788528442, "learning_rate": 5.694389767432377e-05, "loss": 1.7167, "step": 18880 }, { "epoch": 0.921923828125, "grad_norm": 0.18438448011875153, "learning_rate": 5.693526536870676e-05, "loss": 1.7195, "step": 18881 }, { "epoch": 0.92197265625, "grad_norm": 0.17269763350486755, "learning_rate": 5.692663834806378e-05, "loss": 1.7564, "step": 18882 }, { "epoch": 0.922021484375, "grad_norm": 0.18600386381149292, "learning_rate": 5.6918016612603904e-05, "loss": 1.6968, "step": 18883 }, { "epoch": 0.9220703125, "grad_norm": 0.19080433249473572, "learning_rate": 5.690940016253608e-05, "loss": 1.6729, "step": 18884 }, { "epoch": 0.922119140625, "grad_norm": 0.18889851868152618, "learning_rate": 5.69007889980692e-05, "loss": 1.7263, "step": 18885 }, { "epoch": 0.92216796875, "grad_norm": 0.17971469461917877, "learning_rate": 5.68921831194119e-05, "loss": 1.7279, "step": 18886 }, { "epoch": 0.922216796875, "grad_norm": 0.17797935009002686, "learning_rate": 5.6883582526772754e-05, "loss": 1.7288, "step": 18887 }, { "epoch": 0.922265625, "grad_norm": 0.19373497366905212, "learning_rate": 5.6874987220360234e-05, "loss": 1.7361, "step": 18888 }, { "epoch": 0.922314453125, "grad_norm": 0.17900946736335754, "learning_rate": 5.686639720038265e-05, "loss": 1.7282, "step": 18889 }, { "epoch": 0.92236328125, "grad_norm": 0.20595327019691467, "learning_rate": 5.6857812467048176e-05, "loss": 1.7159, "step": 18890 }, { "epoch": 0.922412109375, "grad_norm": 0.16718102991580963, "learning_rate": 5.684923302056486e-05, "loss": 1.7349, "step": 18891 }, { "epoch": 0.9224609375, "grad_norm": 0.20126521587371826, "learning_rate": 5.684065886114062e-05, "loss": 1.7027, "step": 18892 }, { "epoch": 0.922509765625, "grad_norm": 0.1807904988527298, "learning_rate": 5.6832089988983286e-05, "loss": 1.7196, "step": 18893 }, { "epoch": 0.92255859375, "grad_norm": 0.18307560682296753, "learning_rate": 5.6823526404300535e-05, "loss": 1.7025, "step": 18894 }, { "epoch": 0.922607421875, "grad_norm": 0.17776554822921753, "learning_rate": 5.681496810729991e-05, "loss": 1.726, "step": 18895 }, { "epoch": 0.92265625, "grad_norm": 0.20236805081367493, "learning_rate": 5.6806415098188825e-05, "loss": 1.722, "step": 18896 }, { "epoch": 0.922705078125, "grad_norm": 0.1925136148929596, "learning_rate": 5.679786737717455e-05, "loss": 1.7244, "step": 18897 }, { "epoch": 0.92275390625, "grad_norm": 0.17854858934879303, "learning_rate": 5.6789324944464275e-05, "loss": 1.7194, "step": 18898 }, { "epoch": 0.922802734375, "grad_norm": 0.16064512729644775, "learning_rate": 5.678078780026501e-05, "loss": 1.7393, "step": 18899 }, { "epoch": 0.9228515625, "grad_norm": 0.1769985407590866, "learning_rate": 5.677225594478367e-05, "loss": 1.7017, "step": 18900 }, { "epoch": 0.922900390625, "grad_norm": 0.18359626829624176, "learning_rate": 5.676372937822701e-05, "loss": 1.7258, "step": 18901 }, { "epoch": 0.92294921875, "grad_norm": 0.2017776221036911, "learning_rate": 5.675520810080173e-05, "loss": 1.74, "step": 18902 }, { "epoch": 0.922998046875, "grad_norm": 0.1713893711566925, "learning_rate": 5.6746692112714296e-05, "loss": 1.7095, "step": 18903 }, { "epoch": 0.923046875, "grad_norm": 0.2198924422264099, "learning_rate": 5.673818141417111e-05, "loss": 1.7318, "step": 18904 }, { "epoch": 0.923095703125, "grad_norm": 0.167132169008255, "learning_rate": 5.672967600537847e-05, "loss": 1.7273, "step": 18905 }, { "epoch": 0.92314453125, "grad_norm": 0.20758409798145294, "learning_rate": 5.672117588654253e-05, "loss": 1.7087, "step": 18906 }, { "epoch": 0.923193359375, "grad_norm": 0.17735907435417175, "learning_rate": 5.671268105786918e-05, "loss": 1.7549, "step": 18907 }, { "epoch": 0.9232421875, "grad_norm": 0.20262879133224487, "learning_rate": 5.670419151956441e-05, "loss": 1.7248, "step": 18908 }, { "epoch": 0.923291015625, "grad_norm": 0.19071705639362335, "learning_rate": 5.669570727183395e-05, "loss": 1.7202, "step": 18909 }, { "epoch": 0.92333984375, "grad_norm": 0.17567774653434753, "learning_rate": 5.66872283148834e-05, "loss": 1.7132, "step": 18910 }, { "epoch": 0.923388671875, "grad_norm": 0.19392743706703186, "learning_rate": 5.667875464891825e-05, "loss": 1.6915, "step": 18911 }, { "epoch": 0.9234375, "grad_norm": 0.2004377841949463, "learning_rate": 5.6670286274143874e-05, "loss": 1.7303, "step": 18912 }, { "epoch": 0.923486328125, "grad_norm": 0.18866118788719177, "learning_rate": 5.6661823190765504e-05, "loss": 1.7343, "step": 18913 }, { "epoch": 0.92353515625, "grad_norm": 0.18294622004032135, "learning_rate": 5.665336539898829e-05, "loss": 1.7138, "step": 18914 }, { "epoch": 0.923583984375, "grad_norm": 0.1900603175163269, "learning_rate": 5.664491289901717e-05, "loss": 1.7157, "step": 18915 }, { "epoch": 0.9236328125, "grad_norm": 0.17588689923286438, "learning_rate": 5.6636465691057004e-05, "loss": 1.7331, "step": 18916 }, { "epoch": 0.923681640625, "grad_norm": 0.2099446952342987, "learning_rate": 5.662802377531253e-05, "loss": 1.7135, "step": 18917 }, { "epoch": 0.92373046875, "grad_norm": 0.19644948840141296, "learning_rate": 5.661958715198833e-05, "loss": 1.689, "step": 18918 }, { "epoch": 0.923779296875, "grad_norm": 0.20980022847652435, "learning_rate": 5.661115582128889e-05, "loss": 1.696, "step": 18919 }, { "epoch": 0.923828125, "grad_norm": 0.22105400264263153, "learning_rate": 5.6602729783418535e-05, "loss": 1.7116, "step": 18920 }, { "epoch": 0.923876953125, "grad_norm": 0.17524589598178864, "learning_rate": 5.659430903858149e-05, "loss": 1.6975, "step": 18921 }, { "epoch": 0.92392578125, "grad_norm": 0.22154898941516876, "learning_rate": 5.658589358698186e-05, "loss": 1.7028, "step": 18922 }, { "epoch": 0.923974609375, "grad_norm": 0.1563657522201538, "learning_rate": 5.65774834288235e-05, "loss": 1.6933, "step": 18923 }, { "epoch": 0.9240234375, "grad_norm": 0.20053692162036896, "learning_rate": 5.656907856431038e-05, "loss": 1.7258, "step": 18924 }, { "epoch": 0.924072265625, "grad_norm": 0.20857679843902588, "learning_rate": 5.656067899364607e-05, "loss": 1.7105, "step": 18925 }, { "epoch": 0.92412109375, "grad_norm": 0.172746941447258, "learning_rate": 5.6552284717034234e-05, "loss": 1.739, "step": 18926 }, { "epoch": 0.924169921875, "grad_norm": 0.2285148650407791, "learning_rate": 5.6543895734678234e-05, "loss": 1.7138, "step": 18927 }, { "epoch": 0.92421875, "grad_norm": 0.18570469319820404, "learning_rate": 5.653551204678148e-05, "loss": 1.7186, "step": 18928 }, { "epoch": 0.924267578125, "grad_norm": 0.18748758733272552, "learning_rate": 5.652713365354708e-05, "loss": 1.7256, "step": 18929 }, { "epoch": 0.92431640625, "grad_norm": 0.18986797332763672, "learning_rate": 5.6518760555178125e-05, "loss": 1.7206, "step": 18930 }, { "epoch": 0.924365234375, "grad_norm": 0.19033768773078918, "learning_rate": 5.651039275187751e-05, "loss": 1.7128, "step": 18931 }, { "epoch": 0.9244140625, "grad_norm": 0.16690142452716827, "learning_rate": 5.6502030243848084e-05, "loss": 1.7252, "step": 18932 }, { "epoch": 0.924462890625, "grad_norm": 0.19760052859783173, "learning_rate": 5.649367303129247e-05, "loss": 1.7324, "step": 18933 }, { "epoch": 0.92451171875, "grad_norm": 0.19561563432216644, "learning_rate": 5.648532111441326e-05, "loss": 1.6974, "step": 18934 }, { "epoch": 0.924560546875, "grad_norm": 0.16901926696300507, "learning_rate": 5.647697449341282e-05, "loss": 1.7052, "step": 18935 }, { "epoch": 0.924609375, "grad_norm": 0.2058410495519638, "learning_rate": 5.6468633168493495e-05, "loss": 1.7302, "step": 18936 }, { "epoch": 0.924658203125, "grad_norm": 0.1996762603521347, "learning_rate": 5.6460297139857375e-05, "loss": 1.7163, "step": 18937 }, { "epoch": 0.92470703125, "grad_norm": 0.18097491562366486, "learning_rate": 5.645196640770656e-05, "loss": 1.7271, "step": 18938 }, { "epoch": 0.924755859375, "grad_norm": 0.1975492537021637, "learning_rate": 5.64436409722429e-05, "loss": 1.7172, "step": 18939 }, { "epoch": 0.9248046875, "grad_norm": 0.19690220057964325, "learning_rate": 5.643532083366823e-05, "loss": 1.7297, "step": 18940 }, { "epoch": 0.924853515625, "grad_norm": 0.19769811630249023, "learning_rate": 5.6427005992184065e-05, "loss": 1.7263, "step": 18941 }, { "epoch": 0.92490234375, "grad_norm": 0.18354585766792297, "learning_rate": 5.6418696447992085e-05, "loss": 1.7424, "step": 18942 }, { "epoch": 0.924951171875, "grad_norm": 0.19996969401836395, "learning_rate": 5.641039220129357e-05, "loss": 1.7305, "step": 18943 }, { "epoch": 0.925, "grad_norm": 0.1887461245059967, "learning_rate": 5.640209325228984e-05, "loss": 1.7037, "step": 18944 }, { "epoch": 0.925048828125, "grad_norm": 0.1962970793247223, "learning_rate": 5.639379960118196e-05, "loss": 1.7173, "step": 18945 }, { "epoch": 0.92509765625, "grad_norm": 0.2031613439321518, "learning_rate": 5.638551124817101e-05, "loss": 1.72, "step": 18946 }, { "epoch": 0.925146484375, "grad_norm": 0.1821039468050003, "learning_rate": 5.6377228193457795e-05, "loss": 1.7236, "step": 18947 }, { "epoch": 0.9251953125, "grad_norm": 0.2178535759449005, "learning_rate": 5.636895043724314e-05, "loss": 1.7343, "step": 18948 }, { "epoch": 0.925244140625, "grad_norm": 0.18414810299873352, "learning_rate": 5.636067797972759e-05, "loss": 1.7093, "step": 18949 }, { "epoch": 0.92529296875, "grad_norm": 0.19031763076782227, "learning_rate": 5.635241082111166e-05, "loss": 1.7251, "step": 18950 }, { "epoch": 0.925341796875, "grad_norm": 0.1919899880886078, "learning_rate": 5.6344148961595705e-05, "loss": 1.7231, "step": 18951 }, { "epoch": 0.925390625, "grad_norm": 0.1792648583650589, "learning_rate": 5.633589240137999e-05, "loss": 1.7114, "step": 18952 }, { "epoch": 0.925439453125, "grad_norm": 0.1692652851343155, "learning_rate": 5.632764114066457e-05, "loss": 1.7204, "step": 18953 }, { "epoch": 0.92548828125, "grad_norm": 0.22597664594650269, "learning_rate": 5.6319395179649435e-05, "loss": 1.7089, "step": 18954 }, { "epoch": 0.925537109375, "grad_norm": 0.18586045503616333, "learning_rate": 5.631115451853447e-05, "loss": 1.7085, "step": 18955 }, { "epoch": 0.9255859375, "grad_norm": 0.1674470603466034, "learning_rate": 5.6302919157519395e-05, "loss": 1.7114, "step": 18956 }, { "epoch": 0.925634765625, "grad_norm": 0.1943425089120865, "learning_rate": 5.6294689096803714e-05, "loss": 1.7382, "step": 18957 }, { "epoch": 0.92568359375, "grad_norm": 0.18267419934272766, "learning_rate": 5.6286464336587e-05, "loss": 1.7155, "step": 18958 }, { "epoch": 0.925732421875, "grad_norm": 0.18023568391799927, "learning_rate": 5.6278244877068485e-05, "loss": 1.7369, "step": 18959 }, { "epoch": 0.92578125, "grad_norm": 0.19590646028518677, "learning_rate": 5.6270030718447495e-05, "loss": 1.7117, "step": 18960 }, { "epoch": 0.925830078125, "grad_norm": 0.1884944587945938, "learning_rate": 5.626182186092297e-05, "loss": 1.7473, "step": 18961 }, { "epoch": 0.92587890625, "grad_norm": 0.19697205722332, "learning_rate": 5.625361830469397e-05, "loss": 1.709, "step": 18962 }, { "epoch": 0.925927734375, "grad_norm": 0.19950714707374573, "learning_rate": 5.624542004995921e-05, "loss": 1.7224, "step": 18963 }, { "epoch": 0.9259765625, "grad_norm": 0.2200767546892166, "learning_rate": 5.6237227096917515e-05, "loss": 1.7404, "step": 18964 }, { "epoch": 0.926025390625, "grad_norm": 0.17383982241153717, "learning_rate": 5.622903944576735e-05, "loss": 1.68, "step": 18965 }, { "epoch": 0.92607421875, "grad_norm": 0.2254597395658493, "learning_rate": 5.6220857096707146e-05, "loss": 1.7322, "step": 18966 }, { "epoch": 0.926123046875, "grad_norm": 0.19920003414154053, "learning_rate": 5.6212680049935245e-05, "loss": 1.7408, "step": 18967 }, { "epoch": 0.926171875, "grad_norm": 0.1847621500492096, "learning_rate": 5.620450830564983e-05, "loss": 1.7235, "step": 18968 }, { "epoch": 0.926220703125, "grad_norm": 0.17708250880241394, "learning_rate": 5.619634186404892e-05, "loss": 1.7007, "step": 18969 }, { "epoch": 0.92626953125, "grad_norm": 0.1949075609445572, "learning_rate": 5.618818072533045e-05, "loss": 1.7038, "step": 18970 }, { "epoch": 0.926318359375, "grad_norm": 0.16023918986320496, "learning_rate": 5.618002488969223e-05, "loss": 1.6934, "step": 18971 }, { "epoch": 0.9263671875, "grad_norm": 0.18175119161605835, "learning_rate": 5.617187435733188e-05, "loss": 1.7057, "step": 18972 }, { "epoch": 0.926416015625, "grad_norm": 0.1553708016872406, "learning_rate": 5.616372912844698e-05, "loss": 1.7241, "step": 18973 }, { "epoch": 0.92646484375, "grad_norm": 0.16612890362739563, "learning_rate": 5.6155589203234935e-05, "loss": 1.7227, "step": 18974 }, { "epoch": 0.926513671875, "grad_norm": 0.17335174977779388, "learning_rate": 5.614745458189294e-05, "loss": 1.7105, "step": 18975 }, { "epoch": 0.9265625, "grad_norm": 0.17386960983276367, "learning_rate": 5.613932526461827e-05, "loss": 1.7075, "step": 18976 }, { "epoch": 0.926611328125, "grad_norm": 0.17163024842739105, "learning_rate": 5.613120125160783e-05, "loss": 1.7271, "step": 18977 }, { "epoch": 0.92666015625, "grad_norm": 0.1772652566432953, "learning_rate": 5.612308254305861e-05, "loss": 1.7176, "step": 18978 }, { "epoch": 0.926708984375, "grad_norm": 0.17151400446891785, "learning_rate": 5.6114969139167285e-05, "loss": 1.722, "step": 18979 }, { "epoch": 0.9267578125, "grad_norm": 0.1688147783279419, "learning_rate": 5.610686104013058e-05, "loss": 1.7233, "step": 18980 }, { "epoch": 0.926806640625, "grad_norm": 0.19757376611232758, "learning_rate": 5.609875824614493e-05, "loss": 1.7344, "step": 18981 }, { "epoch": 0.92685546875, "grad_norm": 0.17643344402313232, "learning_rate": 5.609066075740675e-05, "loss": 1.7341, "step": 18982 }, { "epoch": 0.926904296875, "grad_norm": 0.19351458549499512, "learning_rate": 5.6082568574112274e-05, "loss": 1.7455, "step": 18983 }, { "epoch": 0.926953125, "grad_norm": 0.16358435153961182, "learning_rate": 5.6074481696457596e-05, "loss": 1.7143, "step": 18984 }, { "epoch": 0.927001953125, "grad_norm": 0.19279679656028748, "learning_rate": 5.606640012463877e-05, "loss": 1.7175, "step": 18985 }, { "epoch": 0.92705078125, "grad_norm": 0.1660301238298416, "learning_rate": 5.6058323858851606e-05, "loss": 1.7096, "step": 18986 }, { "epoch": 0.927099609375, "grad_norm": 0.1732509285211563, "learning_rate": 5.605025289929188e-05, "loss": 1.7172, "step": 18987 }, { "epoch": 0.9271484375, "grad_norm": 0.18885396420955658, "learning_rate": 5.6042187246155175e-05, "loss": 1.7395, "step": 18988 }, { "epoch": 0.927197265625, "grad_norm": 0.1751922369003296, "learning_rate": 5.6034126899636936e-05, "loss": 1.7108, "step": 18989 }, { "epoch": 0.92724609375, "grad_norm": 0.1803571581840515, "learning_rate": 5.602607185993262e-05, "loss": 1.7381, "step": 18990 }, { "epoch": 0.927294921875, "grad_norm": 0.17722474038600922, "learning_rate": 5.60180221272373e-05, "loss": 1.7178, "step": 18991 }, { "epoch": 0.92734375, "grad_norm": 0.17288051545619965, "learning_rate": 5.6009977701746195e-05, "loss": 1.7222, "step": 18992 }, { "epoch": 0.927392578125, "grad_norm": 0.18089504539966583, "learning_rate": 5.600193858365415e-05, "loss": 1.7389, "step": 18993 }, { "epoch": 0.92744140625, "grad_norm": 0.15842728316783905, "learning_rate": 5.599390477315614e-05, "loss": 1.7051, "step": 18994 }, { "epoch": 0.927490234375, "grad_norm": 0.224094957113266, "learning_rate": 5.598587627044673e-05, "loss": 1.7519, "step": 18995 }, { "epoch": 0.9275390625, "grad_norm": 0.15952113270759583, "learning_rate": 5.597785307572061e-05, "loss": 1.701, "step": 18996 }, { "epoch": 0.927587890625, "grad_norm": 0.20998618006706238, "learning_rate": 5.5969835189172134e-05, "loss": 1.7308, "step": 18997 }, { "epoch": 0.92763671875, "grad_norm": 0.1710273027420044, "learning_rate": 5.5961822610995734e-05, "loss": 1.6969, "step": 18998 }, { "epoch": 0.927685546875, "grad_norm": 0.18500854074954987, "learning_rate": 5.5953815341385497e-05, "loss": 1.7037, "step": 18999 }, { "epoch": 0.927734375, "grad_norm": 0.19678141176700592, "learning_rate": 5.594581338053553e-05, "loss": 1.7088, "step": 19000 }, { "epoch": 0.927783203125, "grad_norm": 0.16759595274925232, "learning_rate": 5.5937816728639746e-05, "loss": 1.7179, "step": 19001 }, { "epoch": 0.92783203125, "grad_norm": 0.18900984525680542, "learning_rate": 5.592982538589199e-05, "loss": 1.7393, "step": 19002 }, { "epoch": 0.927880859375, "grad_norm": 0.18387186527252197, "learning_rate": 5.592183935248592e-05, "loss": 1.7177, "step": 19003 }, { "epoch": 0.9279296875, "grad_norm": 0.17456883192062378, "learning_rate": 5.591385862861505e-05, "loss": 1.7102, "step": 19004 }, { "epoch": 0.927978515625, "grad_norm": 0.1724269986152649, "learning_rate": 5.590588321447285e-05, "loss": 1.725, "step": 19005 }, { "epoch": 0.92802734375, "grad_norm": 0.16390438377857208, "learning_rate": 5.589791311025258e-05, "loss": 1.7082, "step": 19006 }, { "epoch": 0.928076171875, "grad_norm": 0.1645166426897049, "learning_rate": 5.5889948316147424e-05, "loss": 1.6944, "step": 19007 }, { "epoch": 0.928125, "grad_norm": 0.2032794952392578, "learning_rate": 5.5881988832350416e-05, "loss": 1.7168, "step": 19008 }, { "epoch": 0.928173828125, "grad_norm": 0.16995562613010406, "learning_rate": 5.587403465905439e-05, "loss": 1.7208, "step": 19009 }, { "epoch": 0.92822265625, "grad_norm": 0.19791555404663086, "learning_rate": 5.5866085796452235e-05, "loss": 1.7265, "step": 19010 }, { "epoch": 0.928271484375, "grad_norm": 0.1809415966272354, "learning_rate": 5.5858142244736477e-05, "loss": 1.7085, "step": 19011 }, { "epoch": 0.9283203125, "grad_norm": 0.18638399243354797, "learning_rate": 5.585020400409977e-05, "loss": 1.7224, "step": 19012 }, { "epoch": 0.928369140625, "grad_norm": 0.17717678844928741, "learning_rate": 5.584227107473437e-05, "loss": 1.7182, "step": 19013 }, { "epoch": 0.92841796875, "grad_norm": 0.1771697700023651, "learning_rate": 5.583434345683265e-05, "loss": 1.6829, "step": 19014 }, { "epoch": 0.928466796875, "grad_norm": 0.16946688294410706, "learning_rate": 5.5826421150586685e-05, "loss": 1.7152, "step": 19015 }, { "epoch": 0.928515625, "grad_norm": 0.17779934406280518, "learning_rate": 5.581850415618847e-05, "loss": 1.713, "step": 19016 }, { "epoch": 0.928564453125, "grad_norm": 0.1695616990327835, "learning_rate": 5.5810592473829894e-05, "loss": 1.7276, "step": 19017 }, { "epoch": 0.92861328125, "grad_norm": 0.19593583047389984, "learning_rate": 5.58026861037027e-05, "loss": 1.7583, "step": 19018 }, { "epoch": 0.928662109375, "grad_norm": 0.1859525889158249, "learning_rate": 5.5794785045998526e-05, "loss": 1.7195, "step": 19019 }, { "epoch": 0.9287109375, "grad_norm": 0.1856563240289688, "learning_rate": 5.578688930090882e-05, "loss": 1.7304, "step": 19020 }, { "epoch": 0.928759765625, "grad_norm": 0.2009582817554474, "learning_rate": 5.577899886862497e-05, "loss": 1.7278, "step": 19021 }, { "epoch": 0.92880859375, "grad_norm": 0.18855509161949158, "learning_rate": 5.5771113749338215e-05, "loss": 1.7087, "step": 19022 }, { "epoch": 0.928857421875, "grad_norm": 0.16429375112056732, "learning_rate": 5.5763233943239646e-05, "loss": 1.7154, "step": 19023 }, { "epoch": 0.92890625, "grad_norm": 0.19368043541908264, "learning_rate": 5.5755359450520256e-05, "loss": 1.7457, "step": 19024 }, { "epoch": 0.928955078125, "grad_norm": 0.1805080622434616, "learning_rate": 5.574749027137083e-05, "loss": 1.7076, "step": 19025 }, { "epoch": 0.92900390625, "grad_norm": 0.1718953400850296, "learning_rate": 5.573962640598217e-05, "loss": 1.7052, "step": 19026 }, { "epoch": 0.929052734375, "grad_norm": 0.19593702256679535, "learning_rate": 5.573176785454475e-05, "loss": 1.7064, "step": 19027 }, { "epoch": 0.9291015625, "grad_norm": 0.18874263763427734, "learning_rate": 5.572391461724916e-05, "loss": 1.7113, "step": 19028 }, { "epoch": 0.929150390625, "grad_norm": 0.21638083457946777, "learning_rate": 5.57160666942856e-05, "loss": 1.7133, "step": 19029 }, { "epoch": 0.92919921875, "grad_norm": 0.1634802222251892, "learning_rate": 5.570822408584441e-05, "loss": 1.7354, "step": 19030 }, { "epoch": 0.929248046875, "grad_norm": 0.17191165685653687, "learning_rate": 5.570038679211551e-05, "loss": 1.7382, "step": 19031 }, { "epoch": 0.929296875, "grad_norm": 0.2000647932291031, "learning_rate": 5.569255481328898e-05, "loss": 1.725, "step": 19032 }, { "epoch": 0.929345703125, "grad_norm": 0.16783824563026428, "learning_rate": 5.5684728149554545e-05, "loss": 1.7158, "step": 19033 }, { "epoch": 0.92939453125, "grad_norm": 0.17775417864322662, "learning_rate": 5.567690680110193e-05, "loss": 1.7231, "step": 19034 }, { "epoch": 0.929443359375, "grad_norm": 0.18682406842708588, "learning_rate": 5.566909076812067e-05, "loss": 1.7379, "step": 19035 }, { "epoch": 0.9294921875, "grad_norm": 0.1743701845407486, "learning_rate": 5.566128005080019e-05, "loss": 1.7331, "step": 19036 }, { "epoch": 0.929541015625, "grad_norm": 0.18309058248996735, "learning_rate": 5.5653474649329834e-05, "loss": 1.7058, "step": 19037 }, { "epoch": 0.92958984375, "grad_norm": 0.1619430035352707, "learning_rate": 5.5645674563898724e-05, "loss": 1.7146, "step": 19038 }, { "epoch": 0.929638671875, "grad_norm": 0.1951819360256195, "learning_rate": 5.563787979469591e-05, "loss": 1.682, "step": 19039 }, { "epoch": 0.9296875, "grad_norm": 0.18125832080841064, "learning_rate": 5.563009034191034e-05, "loss": 1.7263, "step": 19040 }, { "epoch": 0.929736328125, "grad_norm": 0.16593782603740692, "learning_rate": 5.562230620573074e-05, "loss": 1.7089, "step": 19041 }, { "epoch": 0.92978515625, "grad_norm": 0.19403745234012604, "learning_rate": 5.561452738634583e-05, "loss": 1.7051, "step": 19042 }, { "epoch": 0.929833984375, "grad_norm": 0.16708441078662872, "learning_rate": 5.560675388394405e-05, "loss": 1.7265, "step": 19043 }, { "epoch": 0.9298828125, "grad_norm": 0.18835695087909698, "learning_rate": 5.5598985698713914e-05, "loss": 1.7179, "step": 19044 }, { "epoch": 0.929931640625, "grad_norm": 0.21164818108081818, "learning_rate": 5.559122283084356e-05, "loss": 1.7211, "step": 19045 }, { "epoch": 0.92998046875, "grad_norm": 0.17690731585025787, "learning_rate": 5.5583465280521244e-05, "loss": 1.7479, "step": 19046 }, { "epoch": 0.930029296875, "grad_norm": 0.17787598073482513, "learning_rate": 5.557571304793488e-05, "loss": 1.716, "step": 19047 }, { "epoch": 0.930078125, "grad_norm": 0.1760224997997284, "learning_rate": 5.5567966133272436e-05, "loss": 1.72, "step": 19048 }, { "epoch": 0.930126953125, "grad_norm": 0.19307492673397064, "learning_rate": 5.5560224536721623e-05, "loss": 1.7312, "step": 19049 }, { "epoch": 0.93017578125, "grad_norm": 0.16124390065670013, "learning_rate": 5.555248825847004e-05, "loss": 1.7002, "step": 19050 }, { "epoch": 0.930224609375, "grad_norm": 0.18517297506332397, "learning_rate": 5.55447572987052e-05, "loss": 1.7031, "step": 19051 }, { "epoch": 0.9302734375, "grad_norm": 0.17002050578594208, "learning_rate": 5.5537031657614485e-05, "loss": 1.72, "step": 19052 }, { "epoch": 0.930322265625, "grad_norm": 0.17940452694892883, "learning_rate": 5.552931133538513e-05, "loss": 1.7106, "step": 19053 }, { "epoch": 0.93037109375, "grad_norm": 0.17627078294754028, "learning_rate": 5.5521596332204245e-05, "loss": 1.7279, "step": 19054 }, { "epoch": 0.930419921875, "grad_norm": 0.1641618311405182, "learning_rate": 5.551388664825877e-05, "loss": 1.7076, "step": 19055 }, { "epoch": 0.93046875, "grad_norm": 0.19736644625663757, "learning_rate": 5.5506182283735605e-05, "loss": 1.7257, "step": 19056 }, { "epoch": 0.930517578125, "grad_norm": 0.17979411780834198, "learning_rate": 5.549848323882144e-05, "loss": 1.7325, "step": 19057 }, { "epoch": 0.93056640625, "grad_norm": 0.2007094919681549, "learning_rate": 5.5490789513702915e-05, "loss": 1.73, "step": 19058 }, { "epoch": 0.930615234375, "grad_norm": 0.17512376606464386, "learning_rate": 5.54831011085664e-05, "loss": 1.7049, "step": 19059 }, { "epoch": 0.9306640625, "grad_norm": 0.19094976782798767, "learning_rate": 5.547541802359834e-05, "loss": 1.7198, "step": 19060 }, { "epoch": 0.930712890625, "grad_norm": 0.18810218572616577, "learning_rate": 5.546774025898484e-05, "loss": 1.7378, "step": 19061 }, { "epoch": 0.93076171875, "grad_norm": 0.19439731538295746, "learning_rate": 5.5460067814912087e-05, "loss": 1.7614, "step": 19062 }, { "epoch": 0.930810546875, "grad_norm": 0.18441241979599, "learning_rate": 5.54524006915659e-05, "loss": 1.6977, "step": 19063 }, { "epoch": 0.930859375, "grad_norm": 0.19714216887950897, "learning_rate": 5.5444738889132225e-05, "loss": 1.7125, "step": 19064 }, { "epoch": 0.930908203125, "grad_norm": 0.20298361778259277, "learning_rate": 5.5437082407796635e-05, "loss": 1.7025, "step": 19065 }, { "epoch": 0.93095703125, "grad_norm": 0.1826719045639038, "learning_rate": 5.5429431247744784e-05, "loss": 1.7521, "step": 19066 }, { "epoch": 0.931005859375, "grad_norm": 0.20319733023643494, "learning_rate": 5.542178540916208e-05, "loss": 1.7169, "step": 19067 }, { "epoch": 0.9310546875, "grad_norm": 0.19571968913078308, "learning_rate": 5.5414144892233785e-05, "loss": 1.6908, "step": 19068 }, { "epoch": 0.931103515625, "grad_norm": 0.1773713380098343, "learning_rate": 5.540650969714511e-05, "loss": 1.6987, "step": 19069 }, { "epoch": 0.93115234375, "grad_norm": 0.18834826350212097, "learning_rate": 5.539887982408109e-05, "loss": 1.7267, "step": 19070 }, { "epoch": 0.931201171875, "grad_norm": 0.1934843361377716, "learning_rate": 5.5391255273226655e-05, "loss": 1.7226, "step": 19071 }, { "epoch": 0.93125, "grad_norm": 0.18170717358589172, "learning_rate": 5.538363604476655e-05, "loss": 1.7315, "step": 19072 }, { "epoch": 0.931298828125, "grad_norm": 0.1918388456106186, "learning_rate": 5.53760221388855e-05, "loss": 1.7301, "step": 19073 }, { "epoch": 0.93134765625, "grad_norm": 0.181952103972435, "learning_rate": 5.536841355576801e-05, "loss": 1.7463, "step": 19074 }, { "epoch": 0.931396484375, "grad_norm": 0.18814566731452942, "learning_rate": 5.536081029559842e-05, "loss": 1.7479, "step": 19075 }, { "epoch": 0.9314453125, "grad_norm": 0.18689857423305511, "learning_rate": 5.5353212358561085e-05, "loss": 1.7235, "step": 19076 }, { "epoch": 0.931494140625, "grad_norm": 0.17419208586215973, "learning_rate": 5.534561974484007e-05, "loss": 1.7104, "step": 19077 }, { "epoch": 0.93154296875, "grad_norm": 0.19344022870063782, "learning_rate": 5.533803245461947e-05, "loss": 1.7102, "step": 19078 }, { "epoch": 0.931591796875, "grad_norm": 0.1916085183620453, "learning_rate": 5.533045048808308e-05, "loss": 1.7117, "step": 19079 }, { "epoch": 0.931640625, "grad_norm": 0.20830568671226501, "learning_rate": 5.5322873845414773e-05, "loss": 1.7134, "step": 19080 }, { "epoch": 0.931689453125, "grad_norm": 0.20125320553779602, "learning_rate": 5.531530252679804e-05, "loss": 1.6932, "step": 19081 }, { "epoch": 0.93173828125, "grad_norm": 0.19851595163345337, "learning_rate": 5.530773653241648e-05, "loss": 1.7253, "step": 19082 }, { "epoch": 0.931787109375, "grad_norm": 0.17706121504306793, "learning_rate": 5.530017586245343e-05, "loss": 1.7247, "step": 19083 }, { "epoch": 0.9318359375, "grad_norm": 0.20216110348701477, "learning_rate": 5.529262051709211e-05, "loss": 1.7085, "step": 19084 }, { "epoch": 0.931884765625, "grad_norm": 0.17492592334747314, "learning_rate": 5.5285070496515625e-05, "loss": 1.7179, "step": 19085 }, { "epoch": 0.93193359375, "grad_norm": 0.19052192568778992, "learning_rate": 5.527752580090699e-05, "loss": 1.7062, "step": 19086 }, { "epoch": 0.931982421875, "grad_norm": 0.1739547699689865, "learning_rate": 5.5269986430449045e-05, "loss": 1.7174, "step": 19087 }, { "epoch": 0.93203125, "grad_norm": 0.18333540856838226, "learning_rate": 5.526245238532451e-05, "loss": 1.7169, "step": 19088 }, { "epoch": 0.932080078125, "grad_norm": 0.15854902565479279, "learning_rate": 5.5254923665715965e-05, "loss": 1.7112, "step": 19089 }, { "epoch": 0.93212890625, "grad_norm": 0.16918271780014038, "learning_rate": 5.524740027180588e-05, "loss": 1.713, "step": 19090 }, { "epoch": 0.932177734375, "grad_norm": 0.17137393355369568, "learning_rate": 5.5239882203776615e-05, "loss": 1.7173, "step": 19091 }, { "epoch": 0.9322265625, "grad_norm": 0.1643778383731842, "learning_rate": 5.523236946181035e-05, "loss": 1.7282, "step": 19092 }, { "epoch": 0.932275390625, "grad_norm": 0.16725093126296997, "learning_rate": 5.5224862046089166e-05, "loss": 1.7091, "step": 19093 }, { "epoch": 0.93232421875, "grad_norm": 0.15668049454689026, "learning_rate": 5.5217359956795026e-05, "loss": 1.7221, "step": 19094 }, { "epoch": 0.932373046875, "grad_norm": 0.1566595733165741, "learning_rate": 5.520986319410972e-05, "loss": 1.7153, "step": 19095 }, { "epoch": 0.932421875, "grad_norm": 0.20533007383346558, "learning_rate": 5.5202371758214996e-05, "loss": 1.7628, "step": 19096 }, { "epoch": 0.932470703125, "grad_norm": 0.15909861028194427, "learning_rate": 5.5194885649292335e-05, "loss": 1.7236, "step": 19097 }, { "epoch": 0.93251953125, "grad_norm": 0.18166421353816986, "learning_rate": 5.518740486752325e-05, "loss": 1.7466, "step": 19098 }, { "epoch": 0.932568359375, "grad_norm": 0.1791982799768448, "learning_rate": 5.517992941308898e-05, "loss": 1.737, "step": 19099 }, { "epoch": 0.9326171875, "grad_norm": 0.17827698588371277, "learning_rate": 5.5172459286170707e-05, "loss": 1.7387, "step": 19100 }, { "epoch": 0.932666015625, "grad_norm": 0.17991036176681519, "learning_rate": 5.5164994486949505e-05, "loss": 1.7193, "step": 19101 }, { "epoch": 0.93271484375, "grad_norm": 0.17240551114082336, "learning_rate": 5.515753501560628e-05, "loss": 1.7663, "step": 19102 }, { "epoch": 0.932763671875, "grad_norm": 0.1678207814693451, "learning_rate": 5.515008087232181e-05, "loss": 1.7462, "step": 19103 }, { "epoch": 0.9328125, "grad_norm": 0.17936155200004578, "learning_rate": 5.514263205727676e-05, "loss": 1.7252, "step": 19104 }, { "epoch": 0.932861328125, "grad_norm": 0.16843312978744507, "learning_rate": 5.513518857065164e-05, "loss": 1.7251, "step": 19105 }, { "epoch": 0.93291015625, "grad_norm": 0.1837855875492096, "learning_rate": 5.5127750412626883e-05, "loss": 1.726, "step": 19106 }, { "epoch": 0.932958984375, "grad_norm": 0.17359262704849243, "learning_rate": 5.5120317583382715e-05, "loss": 1.7369, "step": 19107 }, { "epoch": 0.9330078125, "grad_norm": 0.1817748248577118, "learning_rate": 5.511289008309932e-05, "loss": 1.7254, "step": 19108 }, { "epoch": 0.933056640625, "grad_norm": 0.1665397733449936, "learning_rate": 5.510546791195668e-05, "loss": 1.7089, "step": 19109 }, { "epoch": 0.93310546875, "grad_norm": 0.16270266473293304, "learning_rate": 5.50980510701347e-05, "loss": 1.7228, "step": 19110 }, { "epoch": 0.933154296875, "grad_norm": 0.16300298273563385, "learning_rate": 5.5090639557813084e-05, "loss": 1.7263, "step": 19111 }, { "epoch": 0.933203125, "grad_norm": 0.18473996222019196, "learning_rate": 5.5083233375171545e-05, "loss": 1.7151, "step": 19112 }, { "epoch": 0.933251953125, "grad_norm": 0.15672439336776733, "learning_rate": 5.507583252238948e-05, "loss": 1.7015, "step": 19113 }, { "epoch": 0.93330078125, "grad_norm": 0.16240201890468597, "learning_rate": 5.5068436999646324e-05, "loss": 1.7159, "step": 19114 }, { "epoch": 0.933349609375, "grad_norm": 0.16571909189224243, "learning_rate": 5.506104680712126e-05, "loss": 1.7242, "step": 19115 }, { "epoch": 0.9333984375, "grad_norm": 0.1750631481409073, "learning_rate": 5.505366194499345e-05, "loss": 1.7057, "step": 19116 }, { "epoch": 0.933447265625, "grad_norm": 0.17542725801467896, "learning_rate": 5.504628241344185e-05, "loss": 1.7131, "step": 19117 }, { "epoch": 0.93349609375, "grad_norm": 0.1600482314825058, "learning_rate": 5.5038908212645305e-05, "loss": 1.7045, "step": 19118 }, { "epoch": 0.933544921875, "grad_norm": 0.15960653126239777, "learning_rate": 5.50315393427825e-05, "loss": 1.7655, "step": 19119 }, { "epoch": 0.93359375, "grad_norm": 0.1780203878879547, "learning_rate": 5.502417580403207e-05, "loss": 1.7199, "step": 19120 }, { "epoch": 0.933642578125, "grad_norm": 0.15823329985141754, "learning_rate": 5.5016817596572486e-05, "loss": 1.7467, "step": 19121 }, { "epoch": 0.93369140625, "grad_norm": 0.17424337565898895, "learning_rate": 5.500946472058206e-05, "loss": 1.7433, "step": 19122 }, { "epoch": 0.933740234375, "grad_norm": 0.15901175141334534, "learning_rate": 5.500211717623899e-05, "loss": 1.7179, "step": 19123 }, { "epoch": 0.9337890625, "grad_norm": 0.15749746561050415, "learning_rate": 5.4994774963721344e-05, "loss": 1.7337, "step": 19124 }, { "epoch": 0.933837890625, "grad_norm": 0.1741018295288086, "learning_rate": 5.498743808320709e-05, "loss": 1.738, "step": 19125 }, { "epoch": 0.93388671875, "grad_norm": 0.17456956207752228, "learning_rate": 5.498010653487403e-05, "loss": 1.7223, "step": 19126 }, { "epoch": 0.933935546875, "grad_norm": 0.1705860197544098, "learning_rate": 5.497278031889985e-05, "loss": 1.7157, "step": 19127 }, { "epoch": 0.933984375, "grad_norm": 0.16782036423683167, "learning_rate": 5.496545943546208e-05, "loss": 1.7055, "step": 19128 }, { "epoch": 0.934033203125, "grad_norm": 0.16511528193950653, "learning_rate": 5.495814388473821e-05, "loss": 1.7108, "step": 19129 }, { "epoch": 0.93408203125, "grad_norm": 0.18176792562007904, "learning_rate": 5.4950833666905515e-05, "loss": 1.736, "step": 19130 }, { "epoch": 0.934130859375, "grad_norm": 0.15621207654476166, "learning_rate": 5.4943528782141106e-05, "loss": 1.7014, "step": 19131 }, { "epoch": 0.9341796875, "grad_norm": 0.18458688259124756, "learning_rate": 5.4936229230622105e-05, "loss": 1.6914, "step": 19132 }, { "epoch": 0.934228515625, "grad_norm": 0.18917974829673767, "learning_rate": 5.492893501252537e-05, "loss": 1.7472, "step": 19133 }, { "epoch": 0.93427734375, "grad_norm": 0.21644793450832367, "learning_rate": 5.49216461280277e-05, "loss": 1.7212, "step": 19134 }, { "epoch": 0.934326171875, "grad_norm": 0.16669519245624542, "learning_rate": 5.4914362577305754e-05, "loss": 1.7158, "step": 19135 }, { "epoch": 0.934375, "grad_norm": 0.18690243363380432, "learning_rate": 5.4907084360536035e-05, "loss": 1.6946, "step": 19136 }, { "epoch": 0.934423828125, "grad_norm": 0.19539880752563477, "learning_rate": 5.489981147789493e-05, "loss": 1.7081, "step": 19137 }, { "epoch": 0.93447265625, "grad_norm": 0.1807488352060318, "learning_rate": 5.4892543929558745e-05, "loss": 1.7037, "step": 19138 }, { "epoch": 0.934521484375, "grad_norm": 0.18616946041584015, "learning_rate": 5.4885281715703575e-05, "loss": 1.7227, "step": 19139 }, { "epoch": 0.9345703125, "grad_norm": 0.17777001857757568, "learning_rate": 5.4878024836505464e-05, "loss": 1.7312, "step": 19140 }, { "epoch": 0.934619140625, "grad_norm": 0.18547067046165466, "learning_rate": 5.487077329214026e-05, "loss": 1.6984, "step": 19141 }, { "epoch": 0.93466796875, "grad_norm": 0.17912597954273224, "learning_rate": 5.486352708278368e-05, "loss": 1.7135, "step": 19142 }, { "epoch": 0.934716796875, "grad_norm": 0.19919252395629883, "learning_rate": 5.48562862086114e-05, "loss": 1.7378, "step": 19143 }, { "epoch": 0.934765625, "grad_norm": 0.17099548876285553, "learning_rate": 5.48490506697989e-05, "loss": 1.711, "step": 19144 }, { "epoch": 0.934814453125, "grad_norm": 0.1901087462902069, "learning_rate": 5.484182046652152e-05, "loss": 1.7076, "step": 19145 }, { "epoch": 0.93486328125, "grad_norm": 0.19077566266059875, "learning_rate": 5.483459559895448e-05, "loss": 1.7145, "step": 19146 }, { "epoch": 0.934912109375, "grad_norm": 0.18115627765655518, "learning_rate": 5.48273760672729e-05, "loss": 1.7283, "step": 19147 }, { "epoch": 0.9349609375, "grad_norm": 0.19845743477344513, "learning_rate": 5.482016187165174e-05, "loss": 1.7022, "step": 19148 }, { "epoch": 0.935009765625, "grad_norm": 0.1818310171365738, "learning_rate": 5.4812953012265854e-05, "loss": 1.7139, "step": 19149 }, { "epoch": 0.93505859375, "grad_norm": 0.17754897475242615, "learning_rate": 5.480574948928999e-05, "loss": 1.755, "step": 19150 }, { "epoch": 0.935107421875, "grad_norm": 0.18174207210540771, "learning_rate": 5.479855130289864e-05, "loss": 1.6961, "step": 19151 }, { "epoch": 0.93515625, "grad_norm": 0.1800980269908905, "learning_rate": 5.4791358453266314e-05, "loss": 1.7484, "step": 19152 }, { "epoch": 0.935205078125, "grad_norm": 0.17302510142326355, "learning_rate": 5.478417094056735e-05, "loss": 1.7286, "step": 19153 }, { "epoch": 0.93525390625, "grad_norm": 0.1912689357995987, "learning_rate": 5.4776988764975886e-05, "loss": 1.7212, "step": 19154 }, { "epoch": 0.935302734375, "grad_norm": 0.18483878672122955, "learning_rate": 5.476981192666605e-05, "loss": 1.7022, "step": 19155 }, { "epoch": 0.9353515625, "grad_norm": 0.1598966121673584, "learning_rate": 5.4762640425811756e-05, "loss": 1.7358, "step": 19156 }, { "epoch": 0.935400390625, "grad_norm": 0.1792423278093338, "learning_rate": 5.475547426258678e-05, "loss": 1.741, "step": 19157 }, { "epoch": 0.93544921875, "grad_norm": 0.18070778250694275, "learning_rate": 5.474831343716485e-05, "loss": 1.7215, "step": 19158 }, { "epoch": 0.935498046875, "grad_norm": 0.16184137761592865, "learning_rate": 5.474115794971949e-05, "loss": 1.7104, "step": 19159 }, { "epoch": 0.935546875, "grad_norm": 0.17809313535690308, "learning_rate": 5.4734007800424126e-05, "loss": 1.7387, "step": 19160 }, { "epoch": 0.935595703125, "grad_norm": 0.18192078173160553, "learning_rate": 5.472686298945204e-05, "loss": 1.7166, "step": 19161 }, { "epoch": 0.93564453125, "grad_norm": 0.16800905764102936, "learning_rate": 5.4719723516976375e-05, "loss": 1.711, "step": 19162 }, { "epoch": 0.935693359375, "grad_norm": 0.19263629615306854, "learning_rate": 5.471258938317022e-05, "loss": 1.7326, "step": 19163 }, { "epoch": 0.9357421875, "grad_norm": 0.17266689240932465, "learning_rate": 5.47054605882064e-05, "loss": 1.7102, "step": 19164 }, { "epoch": 0.935791015625, "grad_norm": 0.19290819764137268, "learning_rate": 5.469833713225774e-05, "loss": 1.7128, "step": 19165 }, { "epoch": 0.93583984375, "grad_norm": 0.17996026575565338, "learning_rate": 5.4691219015496875e-05, "loss": 1.7009, "step": 19166 }, { "epoch": 0.935888671875, "grad_norm": 0.18991826474666595, "learning_rate": 5.468410623809633e-05, "loss": 1.7284, "step": 19167 }, { "epoch": 0.9359375, "grad_norm": 0.20634563267230988, "learning_rate": 5.467699880022844e-05, "loss": 1.7351, "step": 19168 }, { "epoch": 0.935986328125, "grad_norm": 0.20704908668994904, "learning_rate": 5.4669896702065514e-05, "loss": 1.7127, "step": 19169 }, { "epoch": 0.93603515625, "grad_norm": 0.19526036083698273, "learning_rate": 5.466279994377964e-05, "loss": 1.7516, "step": 19170 }, { "epoch": 0.936083984375, "grad_norm": 0.2039603739976883, "learning_rate": 5.4655708525542806e-05, "loss": 1.714, "step": 19171 }, { "epoch": 0.9361328125, "grad_norm": 0.1654931753873825, "learning_rate": 5.464862244752693e-05, "loss": 1.7033, "step": 19172 }, { "epoch": 0.936181640625, "grad_norm": 0.18330001831054688, "learning_rate": 5.4641541709903694e-05, "loss": 1.743, "step": 19173 }, { "epoch": 0.93623046875, "grad_norm": 0.19389908015727997, "learning_rate": 5.463446631284475e-05, "loss": 1.7086, "step": 19174 }, { "epoch": 0.936279296875, "grad_norm": 0.19304978847503662, "learning_rate": 5.4627396256521537e-05, "loss": 1.6972, "step": 19175 }, { "epoch": 0.936328125, "grad_norm": 0.20380902290344238, "learning_rate": 5.462033154110541e-05, "loss": 1.6998, "step": 19176 }, { "epoch": 0.936376953125, "grad_norm": 0.1885487586259842, "learning_rate": 5.461327216676762e-05, "loss": 1.7384, "step": 19177 }, { "epoch": 0.93642578125, "grad_norm": 0.2023526281118393, "learning_rate": 5.460621813367925e-05, "loss": 1.7215, "step": 19178 }, { "epoch": 0.936474609375, "grad_norm": 0.1945219337940216, "learning_rate": 5.459916944201123e-05, "loss": 1.6941, "step": 19179 }, { "epoch": 0.9365234375, "grad_norm": 0.17814095318317413, "learning_rate": 5.45921260919344e-05, "loss": 1.7326, "step": 19180 }, { "epoch": 0.936572265625, "grad_norm": 0.21507905423641205, "learning_rate": 5.4585088083619466e-05, "loss": 1.74, "step": 19181 }, { "epoch": 0.93662109375, "grad_norm": 0.19600605964660645, "learning_rate": 5.4578055417237014e-05, "loss": 1.7042, "step": 19182 }, { "epoch": 0.936669921875, "grad_norm": 0.2039310783147812, "learning_rate": 5.457102809295748e-05, "loss": 1.709, "step": 19183 }, { "epoch": 0.93671875, "grad_norm": 0.19659827649593353, "learning_rate": 5.456400611095115e-05, "loss": 1.7282, "step": 19184 }, { "epoch": 0.936767578125, "grad_norm": 0.17769095301628113, "learning_rate": 5.455698947138825e-05, "loss": 1.715, "step": 19185 }, { "epoch": 0.93681640625, "grad_norm": 0.17196150124073029, "learning_rate": 5.4549978174438776e-05, "loss": 1.7066, "step": 19186 }, { "epoch": 0.936865234375, "grad_norm": 0.18341100215911865, "learning_rate": 5.454297222027275e-05, "loss": 1.7034, "step": 19187 }, { "epoch": 0.9369140625, "grad_norm": 0.16251598298549652, "learning_rate": 5.453597160905985e-05, "loss": 1.7055, "step": 19188 }, { "epoch": 0.936962890625, "grad_norm": 0.17945443093776703, "learning_rate": 5.452897634096981e-05, "loss": 1.6939, "step": 19189 }, { "epoch": 0.93701171875, "grad_norm": 0.19742976129055023, "learning_rate": 5.452198641617216e-05, "loss": 1.709, "step": 19190 }, { "epoch": 0.937060546875, "grad_norm": 0.16325077414512634, "learning_rate": 5.4515001834836315e-05, "loss": 1.6898, "step": 19191 }, { "epoch": 0.937109375, "grad_norm": 0.18516220152378082, "learning_rate": 5.450802259713153e-05, "loss": 1.7242, "step": 19192 }, { "epoch": 0.937158203125, "grad_norm": 0.18391765654087067, "learning_rate": 5.450104870322694e-05, "loss": 1.6979, "step": 19193 }, { "epoch": 0.93720703125, "grad_norm": 0.15852774679660797, "learning_rate": 5.449408015329157e-05, "loss": 1.7319, "step": 19194 }, { "epoch": 0.937255859375, "grad_norm": 0.20193806290626526, "learning_rate": 5.448711694749436e-05, "loss": 1.6964, "step": 19195 }, { "epoch": 0.9373046875, "grad_norm": 0.1885325312614441, "learning_rate": 5.4480159086004e-05, "loss": 1.7114, "step": 19196 }, { "epoch": 0.937353515625, "grad_norm": 0.18206177651882172, "learning_rate": 5.447320656898917e-05, "loss": 1.7364, "step": 19197 }, { "epoch": 0.93740234375, "grad_norm": 0.174879789352417, "learning_rate": 5.446625939661832e-05, "loss": 1.7255, "step": 19198 }, { "epoch": 0.937451171875, "grad_norm": 0.17643427848815918, "learning_rate": 5.4459317569059863e-05, "loss": 1.7533, "step": 19199 }, { "epoch": 0.9375, "grad_norm": 0.1898118257522583, "learning_rate": 5.445238108648204e-05, "loss": 1.7082, "step": 19200 }, { "epoch": 0.937548828125, "grad_norm": 0.17484164237976074, "learning_rate": 5.444544994905294e-05, "loss": 1.731, "step": 19201 }, { "epoch": 0.93759765625, "grad_norm": 0.1662691980600357, "learning_rate": 5.4438524156940524e-05, "loss": 1.7233, "step": 19202 }, { "epoch": 0.937646484375, "grad_norm": 0.20343156158924103, "learning_rate": 5.443160371031272e-05, "loss": 1.7336, "step": 19203 }, { "epoch": 0.9376953125, "grad_norm": 0.1595224142074585, "learning_rate": 5.442468860933717e-05, "loss": 1.7264, "step": 19204 }, { "epoch": 0.937744140625, "grad_norm": 0.2212471067905426, "learning_rate": 5.441777885418151e-05, "loss": 1.7338, "step": 19205 }, { "epoch": 0.93779296875, "grad_norm": 0.1805216670036316, "learning_rate": 5.4410874445013194e-05, "loss": 1.7043, "step": 19206 }, { "epoch": 0.937841796875, "grad_norm": 0.16922344267368317, "learning_rate": 5.440397538199957e-05, "loss": 1.7137, "step": 19207 }, { "epoch": 0.937890625, "grad_norm": 0.22011035680770874, "learning_rate": 5.439708166530778e-05, "loss": 1.7122, "step": 19208 }, { "epoch": 0.937939453125, "grad_norm": 0.20186762511730194, "learning_rate": 5.4390193295104985e-05, "loss": 1.7178, "step": 19209 }, { "epoch": 0.93798828125, "grad_norm": 0.1907031089067459, "learning_rate": 5.4383310271558084e-05, "loss": 1.7256, "step": 19210 }, { "epoch": 0.938037109375, "grad_norm": 0.21617931127548218, "learning_rate": 5.43764325948339e-05, "loss": 1.7371, "step": 19211 }, { "epoch": 0.9380859375, "grad_norm": 0.18927648663520813, "learning_rate": 5.436956026509911e-05, "loss": 1.7325, "step": 19212 }, { "epoch": 0.938134765625, "grad_norm": 0.19275374710559845, "learning_rate": 5.43626932825203e-05, "loss": 1.7202, "step": 19213 }, { "epoch": 0.93818359375, "grad_norm": 0.2049146443605423, "learning_rate": 5.435583164726386e-05, "loss": 1.6999, "step": 19214 }, { "epoch": 0.938232421875, "grad_norm": 0.1907564401626587, "learning_rate": 5.4348975359496106e-05, "loss": 1.7101, "step": 19215 }, { "epoch": 0.93828125, "grad_norm": 0.20048922300338745, "learning_rate": 5.434212441938321e-05, "loss": 1.712, "step": 19216 }, { "epoch": 0.938330078125, "grad_norm": 0.17236338555812836, "learning_rate": 5.433527882709122e-05, "loss": 1.7067, "step": 19217 }, { "epoch": 0.93837890625, "grad_norm": 0.18253545463085175, "learning_rate": 5.4328438582785993e-05, "loss": 1.7068, "step": 19218 }, { "epoch": 0.938427734375, "grad_norm": 0.17819450795650482, "learning_rate": 5.432160368663337e-05, "loss": 1.711, "step": 19219 }, { "epoch": 0.9384765625, "grad_norm": 0.17783983051776886, "learning_rate": 5.431477413879896e-05, "loss": 1.7424, "step": 19220 }, { "epoch": 0.938525390625, "grad_norm": 0.19961757957935333, "learning_rate": 5.430794993944829e-05, "loss": 1.7178, "step": 19221 }, { "epoch": 0.93857421875, "grad_norm": 0.18971814215183258, "learning_rate": 5.4301131088746765e-05, "loss": 1.7232, "step": 19222 }, { "epoch": 0.938623046875, "grad_norm": 0.18119379878044128, "learning_rate": 5.429431758685964e-05, "loss": 1.7302, "step": 19223 }, { "epoch": 0.938671875, "grad_norm": 0.22464430332183838, "learning_rate": 5.428750943395201e-05, "loss": 1.7254, "step": 19224 }, { "epoch": 0.938720703125, "grad_norm": 0.17297881841659546, "learning_rate": 5.4280706630188955e-05, "loss": 1.7495, "step": 19225 }, { "epoch": 0.93876953125, "grad_norm": 0.1982857584953308, "learning_rate": 5.427390917573527e-05, "loss": 1.7394, "step": 19226 }, { "epoch": 0.938818359375, "grad_norm": 0.20271198451519012, "learning_rate": 5.426711707075572e-05, "loss": 1.7063, "step": 19227 }, { "epoch": 0.9388671875, "grad_norm": 0.17797693610191345, "learning_rate": 5.4260330315414925e-05, "loss": 1.7064, "step": 19228 }, { "epoch": 0.938916015625, "grad_norm": 0.17396511137485504, "learning_rate": 5.425354890987739e-05, "loss": 1.7597, "step": 19229 }, { "epoch": 0.93896484375, "grad_norm": 0.2104208916425705, "learning_rate": 5.4246772854307416e-05, "loss": 1.7246, "step": 19230 }, { "epoch": 0.939013671875, "grad_norm": 0.1710568517446518, "learning_rate": 5.424000214886926e-05, "loss": 1.7048, "step": 19231 }, { "epoch": 0.9390625, "grad_norm": 0.18564945459365845, "learning_rate": 5.423323679372701e-05, "loss": 1.7077, "step": 19232 }, { "epoch": 0.939111328125, "grad_norm": 0.1803162842988968, "learning_rate": 5.422647678904463e-05, "loss": 1.7275, "step": 19233 }, { "epoch": 0.93916015625, "grad_norm": 0.1899017095565796, "learning_rate": 5.4219722134985954e-05, "loss": 1.7303, "step": 19234 }, { "epoch": 0.939208984375, "grad_norm": 0.191674143075943, "learning_rate": 5.42129728317147e-05, "loss": 1.7138, "step": 19235 }, { "epoch": 0.9392578125, "grad_norm": 0.17816194891929626, "learning_rate": 5.4206228879394396e-05, "loss": 1.724, "step": 19236 }, { "epoch": 0.939306640625, "grad_norm": 0.18047592043876648, "learning_rate": 5.419949027818856e-05, "loss": 1.7009, "step": 19237 }, { "epoch": 0.93935546875, "grad_norm": 0.21007633209228516, "learning_rate": 5.419275702826042e-05, "loss": 1.7423, "step": 19238 }, { "epoch": 0.939404296875, "grad_norm": 0.1630827635526657, "learning_rate": 5.4186029129773246e-05, "loss": 1.7091, "step": 19239 }, { "epoch": 0.939453125, "grad_norm": 0.2195165604352951, "learning_rate": 5.417930658289003e-05, "loss": 1.7178, "step": 19240 }, { "epoch": 0.939501953125, "grad_norm": 0.19228048622608185, "learning_rate": 5.4172589387773744e-05, "loss": 1.6941, "step": 19241 }, { "epoch": 0.93955078125, "grad_norm": 0.1772821694612503, "learning_rate": 5.4165877544587155e-05, "loss": 1.7005, "step": 19242 }, { "epoch": 0.939599609375, "grad_norm": 0.19489255547523499, "learning_rate": 5.415917105349294e-05, "loss": 1.7265, "step": 19243 }, { "epoch": 0.9396484375, "grad_norm": 0.17462259531021118, "learning_rate": 5.415246991465364e-05, "loss": 1.7176, "step": 19244 }, { "epoch": 0.939697265625, "grad_norm": 0.20676717162132263, "learning_rate": 5.414577412823165e-05, "loss": 1.747, "step": 19245 }, { "epoch": 0.93974609375, "grad_norm": 0.1927424967288971, "learning_rate": 5.413908369438926e-05, "loss": 1.7365, "step": 19246 }, { "epoch": 0.939794921875, "grad_norm": 0.18839959800243378, "learning_rate": 5.413239861328862e-05, "loss": 1.6975, "step": 19247 }, { "epoch": 0.93984375, "grad_norm": 0.18528224527835846, "learning_rate": 5.412571888509173e-05, "loss": 1.7051, "step": 19248 }, { "epoch": 0.939892578125, "grad_norm": 0.20634853839874268, "learning_rate": 5.4119044509960504e-05, "loss": 1.7397, "step": 19249 }, { "epoch": 0.93994140625, "grad_norm": 0.17839157581329346, "learning_rate": 5.411237548805668e-05, "loss": 1.725, "step": 19250 }, { "epoch": 0.939990234375, "grad_norm": 0.19720704853534698, "learning_rate": 5.4105711819541927e-05, "loss": 1.6985, "step": 19251 }, { "epoch": 0.9400390625, "grad_norm": 0.1850074678659439, "learning_rate": 5.409905350457767e-05, "loss": 1.706, "step": 19252 }, { "epoch": 0.940087890625, "grad_norm": 0.19772014021873474, "learning_rate": 5.4092400543325344e-05, "loss": 1.7288, "step": 19253 }, { "epoch": 0.94013671875, "grad_norm": 0.17550140619277954, "learning_rate": 5.408575293594616e-05, "loss": 1.7028, "step": 19254 }, { "epoch": 0.940185546875, "grad_norm": 0.1869385540485382, "learning_rate": 5.4079110682601236e-05, "loss": 1.7298, "step": 19255 }, { "epoch": 0.940234375, "grad_norm": 0.16338738799095154, "learning_rate": 5.4072473783451524e-05, "loss": 1.7256, "step": 19256 }, { "epoch": 0.940283203125, "grad_norm": 0.1926645189523697, "learning_rate": 5.4065842238657945e-05, "loss": 1.7224, "step": 19257 }, { "epoch": 0.94033203125, "grad_norm": 0.1881629228591919, "learning_rate": 5.405921604838113e-05, "loss": 1.6962, "step": 19258 }, { "epoch": 0.940380859375, "grad_norm": 0.16965700685977936, "learning_rate": 5.405259521278176e-05, "loss": 1.7232, "step": 19259 }, { "epoch": 0.9404296875, "grad_norm": 0.18834081292152405, "learning_rate": 5.404597973202024e-05, "loss": 1.6978, "step": 19260 }, { "epoch": 0.940478515625, "grad_norm": 0.17200243473052979, "learning_rate": 5.403936960625691e-05, "loss": 1.7204, "step": 19261 }, { "epoch": 0.94052734375, "grad_norm": 0.17755237221717834, "learning_rate": 5.403276483565198e-05, "loss": 1.7242, "step": 19262 }, { "epoch": 0.940576171875, "grad_norm": 0.18529269099235535, "learning_rate": 5.402616542036551e-05, "loss": 1.7223, "step": 19263 }, { "epoch": 0.940625, "grad_norm": 0.1846979409456253, "learning_rate": 5.401957136055745e-05, "loss": 1.7176, "step": 19264 }, { "epoch": 0.940673828125, "grad_norm": 0.16296321153640747, "learning_rate": 5.401298265638762e-05, "loss": 1.7187, "step": 19265 }, { "epoch": 0.94072265625, "grad_norm": 0.18892763555049896, "learning_rate": 5.400639930801567e-05, "loss": 1.7188, "step": 19266 }, { "epoch": 0.940771484375, "grad_norm": 0.1593358963727951, "learning_rate": 5.3999821315601205e-05, "loss": 1.7118, "step": 19267 }, { "epoch": 0.9408203125, "grad_norm": 0.17591656744480133, "learning_rate": 5.3993248679303606e-05, "loss": 1.676, "step": 19268 }, { "epoch": 0.940869140625, "grad_norm": 0.16386236250400543, "learning_rate": 5.3986681399282215e-05, "loss": 1.7221, "step": 19269 }, { "epoch": 0.94091796875, "grad_norm": 0.19257576763629913, "learning_rate": 5.398011947569609e-05, "loss": 1.718, "step": 19270 }, { "epoch": 0.940966796875, "grad_norm": 0.1616230010986328, "learning_rate": 5.3973562908704404e-05, "loss": 1.7169, "step": 19271 }, { "epoch": 0.941015625, "grad_norm": 0.1966991275548935, "learning_rate": 5.396701169846593e-05, "loss": 1.711, "step": 19272 }, { "epoch": 0.941064453125, "grad_norm": 0.19112528860569, "learning_rate": 5.3960465845139555e-05, "loss": 1.6892, "step": 19273 }, { "epoch": 0.94111328125, "grad_norm": 0.1579350382089615, "learning_rate": 5.395392534888384e-05, "loss": 1.7062, "step": 19274 }, { "epoch": 0.941162109375, "grad_norm": 0.19865132868289948, "learning_rate": 5.3947390209857356e-05, "loss": 1.7398, "step": 19275 }, { "epoch": 0.9412109375, "grad_norm": 0.1797996312379837, "learning_rate": 5.394086042821846e-05, "loss": 1.6971, "step": 19276 }, { "epoch": 0.941259765625, "grad_norm": 0.160263329744339, "learning_rate": 5.39343360041254e-05, "loss": 1.7089, "step": 19277 }, { "epoch": 0.94130859375, "grad_norm": 0.19233901798725128, "learning_rate": 5.392781693773631e-05, "loss": 1.7163, "step": 19278 }, { "epoch": 0.941357421875, "grad_norm": 0.19284074008464813, "learning_rate": 5.392130322920918e-05, "loss": 1.7186, "step": 19279 }, { "epoch": 0.94140625, "grad_norm": 0.16738517582416534, "learning_rate": 5.391479487870188e-05, "loss": 1.7285, "step": 19280 }, { "epoch": 0.941455078125, "grad_norm": 0.21919503808021545, "learning_rate": 5.390829188637215e-05, "loss": 1.7183, "step": 19281 }, { "epoch": 0.94150390625, "grad_norm": 0.17016719281673431, "learning_rate": 5.3901794252377576e-05, "loss": 1.7213, "step": 19282 }, { "epoch": 0.941552734375, "grad_norm": 0.1785038560628891, "learning_rate": 5.3895301976875646e-05, "loss": 1.7173, "step": 19283 }, { "epoch": 0.9416015625, "grad_norm": 0.19256967306137085, "learning_rate": 5.3888815060023715e-05, "loss": 1.7244, "step": 19284 }, { "epoch": 0.941650390625, "grad_norm": 0.16942615807056427, "learning_rate": 5.3882333501979014e-05, "loss": 1.7287, "step": 19285 }, { "epoch": 0.94169921875, "grad_norm": 0.17863231897354126, "learning_rate": 5.3875857302898554e-05, "loss": 1.6972, "step": 19286 }, { "epoch": 0.941748046875, "grad_norm": 0.1810971051454544, "learning_rate": 5.38693864629394e-05, "loss": 1.7059, "step": 19287 }, { "epoch": 0.941796875, "grad_norm": 0.18703223764896393, "learning_rate": 5.386292098225826e-05, "loss": 1.7311, "step": 19288 }, { "epoch": 0.941845703125, "grad_norm": 0.17641140520572662, "learning_rate": 5.385646086101194e-05, "loss": 1.7171, "step": 19289 }, { "epoch": 0.94189453125, "grad_norm": 0.17208799719810486, "learning_rate": 5.385000609935692e-05, "loss": 1.7269, "step": 19290 }, { "epoch": 0.941943359375, "grad_norm": 0.17957788705825806, "learning_rate": 5.384355669744969e-05, "loss": 1.7057, "step": 19291 }, { "epoch": 0.9419921875, "grad_norm": 0.1566953808069229, "learning_rate": 5.383711265544653e-05, "loss": 1.7257, "step": 19292 }, { "epoch": 0.942041015625, "grad_norm": 0.1719968020915985, "learning_rate": 5.383067397350365e-05, "loss": 1.7202, "step": 19293 }, { "epoch": 0.94208984375, "grad_norm": 0.17101794481277466, "learning_rate": 5.3824240651777035e-05, "loss": 1.7197, "step": 19294 }, { "epoch": 0.942138671875, "grad_norm": 0.1710592806339264, "learning_rate": 5.3817812690422665e-05, "loss": 1.7269, "step": 19295 }, { "epoch": 0.9421875, "grad_norm": 0.16433458030223846, "learning_rate": 5.381139008959629e-05, "loss": 1.7026, "step": 19296 }, { "epoch": 0.942236328125, "grad_norm": 0.1721886694431305, "learning_rate": 5.3804972849453565e-05, "loss": 1.7098, "step": 19297 }, { "epoch": 0.94228515625, "grad_norm": 0.1712203472852707, "learning_rate": 5.379856097015004e-05, "loss": 1.7073, "step": 19298 }, { "epoch": 0.942333984375, "grad_norm": 0.19561932981014252, "learning_rate": 5.379215445184111e-05, "loss": 1.7237, "step": 19299 }, { "epoch": 0.9423828125, "grad_norm": 0.17695951461791992, "learning_rate": 5.378575329468203e-05, "loss": 1.7098, "step": 19300 }, { "epoch": 0.942431640625, "grad_norm": 0.16419348120689392, "learning_rate": 5.3779357498827946e-05, "loss": 1.7209, "step": 19301 }, { "epoch": 0.94248046875, "grad_norm": 0.22034692764282227, "learning_rate": 5.377296706443384e-05, "loss": 1.6929, "step": 19302 }, { "epoch": 0.942529296875, "grad_norm": 0.2084118127822876, "learning_rate": 5.3766581991654644e-05, "loss": 1.7214, "step": 19303 }, { "epoch": 0.942578125, "grad_norm": 0.1960430145263672, "learning_rate": 5.3760202280645014e-05, "loss": 1.7262, "step": 19304 }, { "epoch": 0.942626953125, "grad_norm": 0.17989110946655273, "learning_rate": 5.375382793155967e-05, "loss": 1.7295, "step": 19305 }, { "epoch": 0.94267578125, "grad_norm": 0.2061346173286438, "learning_rate": 5.3747458944553035e-05, "loss": 1.7264, "step": 19306 }, { "epoch": 0.942724609375, "grad_norm": 0.21949590742588043, "learning_rate": 5.37410953197795e-05, "loss": 1.7044, "step": 19307 }, { "epoch": 0.9427734375, "grad_norm": 0.1587376743555069, "learning_rate": 5.373473705739326e-05, "loss": 1.7221, "step": 19308 }, { "epoch": 0.942822265625, "grad_norm": 0.1985331028699875, "learning_rate": 5.372838415754844e-05, "loss": 1.7476, "step": 19309 }, { "epoch": 0.94287109375, "grad_norm": 0.20928092300891876, "learning_rate": 5.372203662039902e-05, "loss": 1.7202, "step": 19310 }, { "epoch": 0.942919921875, "grad_norm": 0.1610950231552124, "learning_rate": 5.371569444609879e-05, "loss": 1.7254, "step": 19311 }, { "epoch": 0.94296875, "grad_norm": 0.17462210357189178, "learning_rate": 5.370935763480147e-05, "loss": 1.6815, "step": 19312 }, { "epoch": 0.943017578125, "grad_norm": 0.19526325166225433, "learning_rate": 5.370302618666067e-05, "loss": 1.7232, "step": 19313 }, { "epoch": 0.94306640625, "grad_norm": 0.17306019365787506, "learning_rate": 5.3696700101829824e-05, "loss": 1.7372, "step": 19314 }, { "epoch": 0.943115234375, "grad_norm": 0.2079641968011856, "learning_rate": 5.3690379380462215e-05, "loss": 1.7043, "step": 19315 }, { "epoch": 0.9431640625, "grad_norm": 0.1849195957183838, "learning_rate": 5.3684064022711086e-05, "loss": 1.7003, "step": 19316 }, { "epoch": 0.943212890625, "grad_norm": 0.17179818451404572, "learning_rate": 5.3677754028729464e-05, "loss": 1.7002, "step": 19317 }, { "epoch": 0.94326171875, "grad_norm": 0.16843010485172272, "learning_rate": 5.3671449398670276e-05, "loss": 1.7021, "step": 19318 }, { "epoch": 0.943310546875, "grad_norm": 0.20243006944656372, "learning_rate": 5.366515013268634e-05, "loss": 1.7256, "step": 19319 }, { "epoch": 0.943359375, "grad_norm": 0.17087924480438232, "learning_rate": 5.3658856230930294e-05, "loss": 1.7221, "step": 19320 }, { "epoch": 0.943408203125, "grad_norm": 0.1751481294631958, "learning_rate": 5.3652567693554716e-05, "loss": 1.7212, "step": 19321 }, { "epoch": 0.94345703125, "grad_norm": 0.19185616075992584, "learning_rate": 5.364628452071192e-05, "loss": 1.727, "step": 19322 }, { "epoch": 0.943505859375, "grad_norm": 0.15937930345535278, "learning_rate": 5.3640006712554335e-05, "loss": 1.7246, "step": 19323 }, { "epoch": 0.9435546875, "grad_norm": 0.1888284832239151, "learning_rate": 5.363373426923396e-05, "loss": 1.7175, "step": 19324 }, { "epoch": 0.943603515625, "grad_norm": 0.18603599071502686, "learning_rate": 5.362746719090295e-05, "loss": 1.7132, "step": 19325 }, { "epoch": 0.94365234375, "grad_norm": 0.17718957364559174, "learning_rate": 5.3621205477713044e-05, "loss": 1.698, "step": 19326 }, { "epoch": 0.943701171875, "grad_norm": 0.17332996428012848, "learning_rate": 5.361494912981614e-05, "loss": 1.7214, "step": 19327 }, { "epoch": 0.94375, "grad_norm": 0.20708294212818146, "learning_rate": 5.360869814736378e-05, "loss": 1.7152, "step": 19328 }, { "epoch": 0.943798828125, "grad_norm": 0.18320728838443756, "learning_rate": 5.360245253050746e-05, "loss": 1.7035, "step": 19329 }, { "epoch": 0.94384765625, "grad_norm": 0.18285304307937622, "learning_rate": 5.3596212279398615e-05, "loss": 1.7227, "step": 19330 }, { "epoch": 0.943896484375, "grad_norm": 0.20477482676506042, "learning_rate": 5.3589977394188416e-05, "loss": 1.7181, "step": 19331 }, { "epoch": 0.9439453125, "grad_norm": 0.17855113744735718, "learning_rate": 5.358374787502798e-05, "loss": 1.7463, "step": 19332 }, { "epoch": 0.943994140625, "grad_norm": 0.1752084642648697, "learning_rate": 5.3577523722068316e-05, "loss": 1.7234, "step": 19333 }, { "epoch": 0.94404296875, "grad_norm": 0.18078047037124634, "learning_rate": 5.357130493546025e-05, "loss": 1.7216, "step": 19334 }, { "epoch": 0.944091796875, "grad_norm": 0.165853351354599, "learning_rate": 5.356509151535452e-05, "loss": 1.7087, "step": 19335 }, { "epoch": 0.944140625, "grad_norm": 0.19814912974834442, "learning_rate": 5.355888346190166e-05, "loss": 1.7129, "step": 19336 }, { "epoch": 0.944189453125, "grad_norm": 0.2004590481519699, "learning_rate": 5.3552680775252216e-05, "loss": 1.7157, "step": 19337 }, { "epoch": 0.94423828125, "grad_norm": 0.17778463661670685, "learning_rate": 5.3546483455556416e-05, "loss": 1.7188, "step": 19338 }, { "epoch": 0.944287109375, "grad_norm": 0.17652283608913422, "learning_rate": 5.354029150296453e-05, "loss": 1.7026, "step": 19339 }, { "epoch": 0.9443359375, "grad_norm": 0.19743748009204865, "learning_rate": 5.353410491762656e-05, "loss": 1.692, "step": 19340 }, { "epoch": 0.944384765625, "grad_norm": 0.17141248285770416, "learning_rate": 5.352792369969254e-05, "loss": 1.7289, "step": 19341 }, { "epoch": 0.94443359375, "grad_norm": 0.18435679376125336, "learning_rate": 5.3521747849312165e-05, "loss": 1.7236, "step": 19342 }, { "epoch": 0.944482421875, "grad_norm": 0.22261390089988708, "learning_rate": 5.3515577366635206e-05, "loss": 1.7252, "step": 19343 }, { "epoch": 0.94453125, "grad_norm": 0.17766475677490234, "learning_rate": 5.3509412251811124e-05, "loss": 1.7053, "step": 19344 }, { "epoch": 0.944580078125, "grad_norm": 0.24491475522518158, "learning_rate": 5.350325250498942e-05, "loss": 1.7076, "step": 19345 }, { "epoch": 0.94462890625, "grad_norm": 0.18065744638442993, "learning_rate": 5.3497098126319305e-05, "loss": 1.732, "step": 19346 }, { "epoch": 0.944677734375, "grad_norm": 0.1755867302417755, "learning_rate": 5.3490949115949995e-05, "loss": 1.7008, "step": 19347 }, { "epoch": 0.9447265625, "grad_norm": 0.22149460017681122, "learning_rate": 5.348480547403049e-05, "loss": 1.7028, "step": 19348 }, { "epoch": 0.944775390625, "grad_norm": 0.1591416448354721, "learning_rate": 5.3478667200709685e-05, "loss": 1.7575, "step": 19349 }, { "epoch": 0.94482421875, "grad_norm": 0.18464119732379913, "learning_rate": 5.3472534296136325e-05, "loss": 1.7192, "step": 19350 }, { "epoch": 0.944873046875, "grad_norm": 0.1901557743549347, "learning_rate": 5.3466406760459095e-05, "loss": 1.7159, "step": 19351 }, { "epoch": 0.944921875, "grad_norm": 0.18362629413604736, "learning_rate": 5.346028459382647e-05, "loss": 1.7315, "step": 19352 }, { "epoch": 0.944970703125, "grad_norm": 0.18215543031692505, "learning_rate": 5.345416779638686e-05, "loss": 1.7078, "step": 19353 }, { "epoch": 0.94501953125, "grad_norm": 0.21237635612487793, "learning_rate": 5.344805636828845e-05, "loss": 1.731, "step": 19354 }, { "epoch": 0.945068359375, "grad_norm": 0.16573813557624817, "learning_rate": 5.344195030967939e-05, "loss": 1.732, "step": 19355 }, { "epoch": 0.9451171875, "grad_norm": 0.17011131346225739, "learning_rate": 5.343584962070766e-05, "loss": 1.7162, "step": 19356 }, { "epoch": 0.945166015625, "grad_norm": 0.18126840889453888, "learning_rate": 5.342975430152113e-05, "loss": 1.727, "step": 19357 }, { "epoch": 0.94521484375, "grad_norm": 0.18153010308742523, "learning_rate": 5.3423664352267494e-05, "loss": 1.7247, "step": 19358 }, { "epoch": 0.945263671875, "grad_norm": 0.1625993549823761, "learning_rate": 5.341757977309439e-05, "loss": 1.6993, "step": 19359 }, { "epoch": 0.9453125, "grad_norm": 0.1888958215713501, "learning_rate": 5.341150056414922e-05, "loss": 1.6933, "step": 19360 }, { "epoch": 0.945361328125, "grad_norm": 0.16700001060962677, "learning_rate": 5.34054267255794e-05, "loss": 1.6821, "step": 19361 }, { "epoch": 0.94541015625, "grad_norm": 0.16454827785491943, "learning_rate": 5.3399358257532054e-05, "loss": 1.6994, "step": 19362 }, { "epoch": 0.945458984375, "grad_norm": 0.1833314746618271, "learning_rate": 5.3393295160154315e-05, "loss": 1.6966, "step": 19363 }, { "epoch": 0.9455078125, "grad_norm": 0.18586567044258118, "learning_rate": 5.33872374335931e-05, "loss": 1.7326, "step": 19364 }, { "epoch": 0.945556640625, "grad_norm": 0.17334376275539398, "learning_rate": 5.338118507799523e-05, "loss": 1.7275, "step": 19365 }, { "epoch": 0.94560546875, "grad_norm": 0.17694097757339478, "learning_rate": 5.3375138093507356e-05, "loss": 1.6981, "step": 19366 }, { "epoch": 0.945654296875, "grad_norm": 0.17821311950683594, "learning_rate": 5.336909648027609e-05, "loss": 1.7157, "step": 19367 }, { "epoch": 0.945703125, "grad_norm": 0.17800374329090118, "learning_rate": 5.336306023844783e-05, "loss": 1.7149, "step": 19368 }, { "epoch": 0.945751953125, "grad_norm": 0.1809304654598236, "learning_rate": 5.335702936816886e-05, "loss": 1.683, "step": 19369 }, { "epoch": 0.94580078125, "grad_norm": 0.18256674706935883, "learning_rate": 5.335100386958535e-05, "loss": 1.7316, "step": 19370 }, { "epoch": 0.945849609375, "grad_norm": 0.18113726377487183, "learning_rate": 5.334498374284334e-05, "loss": 1.734, "step": 19371 }, { "epoch": 0.9458984375, "grad_norm": 0.15729153156280518, "learning_rate": 5.333896898808871e-05, "loss": 1.7197, "step": 19372 }, { "epoch": 0.945947265625, "grad_norm": 0.19680069386959076, "learning_rate": 5.333295960546726e-05, "loss": 1.7416, "step": 19373 }, { "epoch": 0.94599609375, "grad_norm": 0.1798873245716095, "learning_rate": 5.332695559512461e-05, "loss": 1.7065, "step": 19374 }, { "epoch": 0.946044921875, "grad_norm": 0.1961226761341095, "learning_rate": 5.33209569572063e-05, "loss": 1.7028, "step": 19375 }, { "epoch": 0.94609375, "grad_norm": 0.18039558827877045, "learning_rate": 5.331496369185767e-05, "loss": 1.6995, "step": 19376 }, { "epoch": 0.946142578125, "grad_norm": 0.19693011045455933, "learning_rate": 5.3308975799224015e-05, "loss": 1.7216, "step": 19377 }, { "epoch": 0.94619140625, "grad_norm": 0.2119114249944687, "learning_rate": 5.3302993279450415e-05, "loss": 1.7168, "step": 19378 }, { "epoch": 0.946240234375, "grad_norm": 0.1901889592409134, "learning_rate": 5.329701613268188e-05, "loss": 1.7186, "step": 19379 }, { "epoch": 0.9462890625, "grad_norm": 0.18826256692409515, "learning_rate": 5.3291044359063306e-05, "loss": 1.7192, "step": 19380 }, { "epoch": 0.946337890625, "grad_norm": 0.1816982924938202, "learning_rate": 5.3285077958739354e-05, "loss": 1.72, "step": 19381 }, { "epoch": 0.94638671875, "grad_norm": 0.19685539603233337, "learning_rate": 5.327911693185468e-05, "loss": 1.7212, "step": 19382 }, { "epoch": 0.946435546875, "grad_norm": 0.19533608853816986, "learning_rate": 5.327316127855375e-05, "loss": 1.7246, "step": 19383 }, { "epoch": 0.946484375, "grad_norm": 0.16501076519489288, "learning_rate": 5.326721099898086e-05, "loss": 1.6936, "step": 19384 }, { "epoch": 0.946533203125, "grad_norm": 0.1774587482213974, "learning_rate": 5.326126609328027e-05, "loss": 1.7419, "step": 19385 }, { "epoch": 0.94658203125, "grad_norm": 0.18944090604782104, "learning_rate": 5.325532656159603e-05, "loss": 1.7197, "step": 19386 }, { "epoch": 0.946630859375, "grad_norm": 0.1650717705488205, "learning_rate": 5.324939240407209e-05, "loss": 1.7066, "step": 19387 }, { "epoch": 0.9466796875, "grad_norm": 0.17627456784248352, "learning_rate": 5.32434636208523e-05, "loss": 1.7364, "step": 19388 }, { "epoch": 0.946728515625, "grad_norm": 0.17104971408843994, "learning_rate": 5.323754021208032e-05, "loss": 1.7217, "step": 19389 }, { "epoch": 0.94677734375, "grad_norm": 0.15649615228176117, "learning_rate": 5.3231622177899715e-05, "loss": 1.7297, "step": 19390 }, { "epoch": 0.946826171875, "grad_norm": 0.17802223563194275, "learning_rate": 5.322570951845392e-05, "loss": 1.7002, "step": 19391 }, { "epoch": 0.946875, "grad_norm": 0.18634946644306183, "learning_rate": 5.321980223388621e-05, "loss": 1.7494, "step": 19392 }, { "epoch": 0.946923828125, "grad_norm": 0.16820189356803894, "learning_rate": 5.321390032433977e-05, "loss": 1.7405, "step": 19393 }, { "epoch": 0.94697265625, "grad_norm": 0.17238648235797882, "learning_rate": 5.320800378995763e-05, "loss": 1.7099, "step": 19394 }, { "epoch": 0.947021484375, "grad_norm": 0.16757068037986755, "learning_rate": 5.3202112630882746e-05, "loss": 1.7111, "step": 19395 }, { "epoch": 0.9470703125, "grad_norm": 0.17162689566612244, "learning_rate": 5.319622684725781e-05, "loss": 1.7343, "step": 19396 }, { "epoch": 0.947119140625, "grad_norm": 0.15764261782169342, "learning_rate": 5.3190346439225523e-05, "loss": 1.7258, "step": 19397 }, { "epoch": 0.94716796875, "grad_norm": 0.17815154790878296, "learning_rate": 5.31844714069284e-05, "loss": 1.7021, "step": 19398 }, { "epoch": 0.947216796875, "grad_norm": 0.18562524020671844, "learning_rate": 5.317860175050879e-05, "loss": 1.712, "step": 19399 }, { "epoch": 0.947265625, "grad_norm": 0.18253138661384583, "learning_rate": 5.3172737470109006e-05, "loss": 1.6987, "step": 19400 }, { "epoch": 0.947314453125, "grad_norm": 0.19108688831329346, "learning_rate": 5.3166878565871115e-05, "loss": 1.7232, "step": 19401 }, { "epoch": 0.94736328125, "grad_norm": 0.17324574291706085, "learning_rate": 5.316102503793718e-05, "loss": 1.7309, "step": 19402 }, { "epoch": 0.947412109375, "grad_norm": 0.17730027437210083, "learning_rate": 5.3155176886449005e-05, "loss": 1.7229, "step": 19403 }, { "epoch": 0.9474609375, "grad_norm": 0.17791330814361572, "learning_rate": 5.3149334111548326e-05, "loss": 1.719, "step": 19404 }, { "epoch": 0.947509765625, "grad_norm": 0.17932821810245514, "learning_rate": 5.3143496713376785e-05, "loss": 1.7369, "step": 19405 }, { "epoch": 0.94755859375, "grad_norm": 0.15983901917934418, "learning_rate": 5.313766469207585e-05, "loss": 1.7077, "step": 19406 }, { "epoch": 0.947607421875, "grad_norm": 0.16833403706550598, "learning_rate": 5.313183804778684e-05, "loss": 1.6822, "step": 19407 }, { "epoch": 0.94765625, "grad_norm": 0.1808045506477356, "learning_rate": 5.312601678065099e-05, "loss": 1.7265, "step": 19408 }, { "epoch": 0.947705078125, "grad_norm": 0.2146819531917572, "learning_rate": 5.312020089080937e-05, "loss": 1.7297, "step": 19409 }, { "epoch": 0.94775390625, "grad_norm": 0.17504215240478516, "learning_rate": 5.3114390378402943e-05, "loss": 1.7035, "step": 19410 }, { "epoch": 0.947802734375, "grad_norm": 0.18253956735134125, "learning_rate": 5.310858524357254e-05, "loss": 1.725, "step": 19411 }, { "epoch": 0.9478515625, "grad_norm": 0.18157432973384857, "learning_rate": 5.3102785486458816e-05, "loss": 1.7374, "step": 19412 }, { "epoch": 0.947900390625, "grad_norm": 0.18858715891838074, "learning_rate": 5.3096991107202386e-05, "loss": 1.7363, "step": 19413 }, { "epoch": 0.94794921875, "grad_norm": 0.1606300324201584, "learning_rate": 5.3091202105943625e-05, "loss": 1.6956, "step": 19414 }, { "epoch": 0.947998046875, "grad_norm": 0.18056926131248474, "learning_rate": 5.308541848282285e-05, "loss": 1.7308, "step": 19415 }, { "epoch": 0.948046875, "grad_norm": 0.19330893456935883, "learning_rate": 5.307964023798027e-05, "loss": 1.7001, "step": 19416 }, { "epoch": 0.948095703125, "grad_norm": 0.1746804267168045, "learning_rate": 5.3073867371555885e-05, "loss": 1.7478, "step": 19417 }, { "epoch": 0.94814453125, "grad_norm": 0.1948101818561554, "learning_rate": 5.306809988368962e-05, "loss": 1.7268, "step": 19418 }, { "epoch": 0.948193359375, "grad_norm": 0.17357738316059113, "learning_rate": 5.306233777452123e-05, "loss": 1.7157, "step": 19419 }, { "epoch": 0.9482421875, "grad_norm": 0.20306238532066345, "learning_rate": 5.305658104419043e-05, "loss": 1.7452, "step": 19420 }, { "epoch": 0.948291015625, "grad_norm": 0.1728808432817459, "learning_rate": 5.305082969283666e-05, "loss": 1.7418, "step": 19421 }, { "epoch": 0.94833984375, "grad_norm": 0.18550196290016174, "learning_rate": 5.3045083720599346e-05, "loss": 1.7057, "step": 19422 }, { "epoch": 0.948388671875, "grad_norm": 0.19170357286930084, "learning_rate": 5.3039343127617775e-05, "loss": 1.7262, "step": 19423 }, { "epoch": 0.9484375, "grad_norm": 0.1613461673259735, "learning_rate": 5.303360791403103e-05, "loss": 1.7094, "step": 19424 }, { "epoch": 0.948486328125, "grad_norm": 0.19954870641231537, "learning_rate": 5.3027878079978105e-05, "loss": 1.7227, "step": 19425 }, { "epoch": 0.94853515625, "grad_norm": 0.18090218305587769, "learning_rate": 5.302215362559791e-05, "loss": 1.7284, "step": 19426 }, { "epoch": 0.948583984375, "grad_norm": 0.1814705729484558, "learning_rate": 5.301643455102914e-05, "loss": 1.7346, "step": 19427 }, { "epoch": 0.9486328125, "grad_norm": 0.19167745113372803, "learning_rate": 5.301072085641043e-05, "loss": 1.7086, "step": 19428 }, { "epoch": 0.948681640625, "grad_norm": 0.17057882249355316, "learning_rate": 5.300501254188024e-05, "loss": 1.7507, "step": 19429 }, { "epoch": 0.94873046875, "grad_norm": 0.16602568328380585, "learning_rate": 5.299930960757691e-05, "loss": 1.7029, "step": 19430 }, { "epoch": 0.948779296875, "grad_norm": 0.18410539627075195, "learning_rate": 5.29936120536387e-05, "loss": 1.726, "step": 19431 }, { "epoch": 0.948828125, "grad_norm": 0.20833854377269745, "learning_rate": 5.298791988020362e-05, "loss": 1.7213, "step": 19432 }, { "epoch": 0.948876953125, "grad_norm": 0.17537130415439606, "learning_rate": 5.298223308740968e-05, "loss": 1.6915, "step": 19433 }, { "epoch": 0.94892578125, "grad_norm": 0.20675985515117645, "learning_rate": 5.297655167539468e-05, "loss": 1.7263, "step": 19434 }, { "epoch": 0.948974609375, "grad_norm": 0.18958981335163116, "learning_rate": 5.2970875644296346e-05, "loss": 1.728, "step": 19435 }, { "epoch": 0.9490234375, "grad_norm": 0.19211570918560028, "learning_rate": 5.296520499425219e-05, "loss": 1.7229, "step": 19436 }, { "epoch": 0.949072265625, "grad_norm": 0.1994934231042862, "learning_rate": 5.295953972539968e-05, "loss": 1.7172, "step": 19437 }, { "epoch": 0.94912109375, "grad_norm": 0.19715629518032074, "learning_rate": 5.295387983787612e-05, "loss": 1.7097, "step": 19438 }, { "epoch": 0.949169921875, "grad_norm": 0.19230903685092926, "learning_rate": 5.2948225331818656e-05, "loss": 1.7552, "step": 19439 }, { "epoch": 0.94921875, "grad_norm": 0.19415879249572754, "learning_rate": 5.294257620736437e-05, "loss": 1.7375, "step": 19440 }, { "epoch": 0.949267578125, "grad_norm": 0.1822342723608017, "learning_rate": 5.293693246465013e-05, "loss": 1.7182, "step": 19441 }, { "epoch": 0.94931640625, "grad_norm": 0.2341160774230957, "learning_rate": 5.293129410381273e-05, "loss": 1.7142, "step": 19442 }, { "epoch": 0.949365234375, "grad_norm": 0.18179260194301605, "learning_rate": 5.292566112498884e-05, "loss": 1.7156, "step": 19443 }, { "epoch": 0.9494140625, "grad_norm": 0.19313287734985352, "learning_rate": 5.2920033528314946e-05, "loss": 1.7055, "step": 19444 }, { "epoch": 0.949462890625, "grad_norm": 0.21646511554718018, "learning_rate": 5.291441131392748e-05, "loss": 1.7292, "step": 19445 }, { "epoch": 0.94951171875, "grad_norm": 0.1685452163219452, "learning_rate": 5.290879448196269e-05, "loss": 1.7118, "step": 19446 }, { "epoch": 0.949560546875, "grad_norm": 0.4056761562824249, "learning_rate": 5.2903183032556645e-05, "loss": 1.7527, "step": 19447 }, { "epoch": 0.949609375, "grad_norm": 0.16675615310668945, "learning_rate": 5.289757696584542e-05, "loss": 1.7214, "step": 19448 }, { "epoch": 0.949658203125, "grad_norm": 0.19292423129081726, "learning_rate": 5.2891976281964856e-05, "loss": 1.7004, "step": 19449 }, { "epoch": 0.94970703125, "grad_norm": 0.1749865710735321, "learning_rate": 5.2886380981050684e-05, "loss": 1.7265, "step": 19450 }, { "epoch": 0.949755859375, "grad_norm": 0.20238423347473145, "learning_rate": 5.28807910632385e-05, "loss": 1.7157, "step": 19451 }, { "epoch": 0.9498046875, "grad_norm": 0.16921213269233704, "learning_rate": 5.2875206528663804e-05, "loss": 1.7439, "step": 19452 }, { "epoch": 0.949853515625, "grad_norm": 0.1701173335313797, "learning_rate": 5.286962737746192e-05, "loss": 1.7223, "step": 19453 }, { "epoch": 0.94990234375, "grad_norm": 0.16654089093208313, "learning_rate": 5.286405360976808e-05, "loss": 1.7243, "step": 19454 }, { "epoch": 0.949951171875, "grad_norm": 0.17604932188987732, "learning_rate": 5.2858485225717375e-05, "loss": 1.6982, "step": 19455 }, { "epoch": 0.95, "grad_norm": 0.17597559094429016, "learning_rate": 5.2852922225444714e-05, "loss": 1.7346, "step": 19456 }, { "epoch": 0.950048828125, "grad_norm": 0.18643918633460999, "learning_rate": 5.2847364609084994e-05, "loss": 1.733, "step": 19457 }, { "epoch": 0.95009765625, "grad_norm": 0.1740085780620575, "learning_rate": 5.284181237677283e-05, "loss": 1.7015, "step": 19458 }, { "epoch": 0.950146484375, "grad_norm": 0.18078140914440155, "learning_rate": 5.283626552864285e-05, "loss": 1.7193, "step": 19459 }, { "epoch": 0.9501953125, "grad_norm": 0.16635239124298096, "learning_rate": 5.283072406482945e-05, "loss": 1.7089, "step": 19460 }, { "epoch": 0.950244140625, "grad_norm": 0.20276327431201935, "learning_rate": 5.282518798546694e-05, "loss": 1.7262, "step": 19461 }, { "epoch": 0.95029296875, "grad_norm": 0.1935426890850067, "learning_rate": 5.281965729068951e-05, "loss": 1.7278, "step": 19462 }, { "epoch": 0.950341796875, "grad_norm": 0.1933811604976654, "learning_rate": 5.2814131980631176e-05, "loss": 1.7257, "step": 19463 }, { "epoch": 0.950390625, "grad_norm": 0.2005709558725357, "learning_rate": 5.280861205542586e-05, "loss": 1.7218, "step": 19464 }, { "epoch": 0.950439453125, "grad_norm": 0.19337253272533417, "learning_rate": 5.280309751520732e-05, "loss": 1.7366, "step": 19465 }, { "epoch": 0.95048828125, "grad_norm": 0.19613967835903168, "learning_rate": 5.2797588360109225e-05, "loss": 1.7304, "step": 19466 }, { "epoch": 0.950537109375, "grad_norm": 0.18575461208820343, "learning_rate": 5.27920845902651e-05, "loss": 1.7094, "step": 19467 }, { "epoch": 0.9505859375, "grad_norm": 0.1806202232837677, "learning_rate": 5.278658620580834e-05, "loss": 1.7105, "step": 19468 }, { "epoch": 0.950634765625, "grad_norm": 0.16960501670837402, "learning_rate": 5.278109320687216e-05, "loss": 1.7034, "step": 19469 }, { "epoch": 0.95068359375, "grad_norm": 0.17197437584400177, "learning_rate": 5.277560559358972e-05, "loss": 1.7343, "step": 19470 }, { "epoch": 0.950732421875, "grad_norm": 0.17642734944820404, "learning_rate": 5.277012336609403e-05, "loss": 1.7268, "step": 19471 }, { "epoch": 0.95078125, "grad_norm": 0.16704238951206207, "learning_rate": 5.276464652451792e-05, "loss": 1.7205, "step": 19472 }, { "epoch": 0.950830078125, "grad_norm": 0.16624781489372253, "learning_rate": 5.275917506899414e-05, "loss": 1.7279, "step": 19473 }, { "epoch": 0.95087890625, "grad_norm": 0.17059844732284546, "learning_rate": 5.275370899965531e-05, "loss": 1.7147, "step": 19474 }, { "epoch": 0.950927734375, "grad_norm": 0.1854737102985382, "learning_rate": 5.2748248316633874e-05, "loss": 1.7119, "step": 19475 }, { "epoch": 0.9509765625, "grad_norm": 0.15841048955917358, "learning_rate": 5.2742793020062226e-05, "loss": 1.7157, "step": 19476 }, { "epoch": 0.951025390625, "grad_norm": 0.17102691531181335, "learning_rate": 5.2737343110072526e-05, "loss": 1.7098, "step": 19477 }, { "epoch": 0.95107421875, "grad_norm": 0.1962209939956665, "learning_rate": 5.2731898586796904e-05, "loss": 1.6966, "step": 19478 }, { "epoch": 0.951123046875, "grad_norm": 0.15952515602111816, "learning_rate": 5.2726459450367254e-05, "loss": 1.7162, "step": 19479 }, { "epoch": 0.951171875, "grad_norm": 0.1951586902141571, "learning_rate": 5.272102570091546e-05, "loss": 1.7377, "step": 19480 }, { "epoch": 0.951220703125, "grad_norm": 0.1752651035785675, "learning_rate": 5.271559733857316e-05, "loss": 1.7333, "step": 19481 }, { "epoch": 0.95126953125, "grad_norm": 0.17296822369098663, "learning_rate": 5.271017436347198e-05, "loss": 1.7022, "step": 19482 }, { "epoch": 0.951318359375, "grad_norm": 0.188217431306839, "learning_rate": 5.2704756775743283e-05, "loss": 1.7239, "step": 19483 }, { "epoch": 0.9513671875, "grad_norm": 0.18337590992450714, "learning_rate": 5.269934457551839e-05, "loss": 1.7229, "step": 19484 }, { "epoch": 0.951416015625, "grad_norm": 0.1592545211315155, "learning_rate": 5.2693937762928466e-05, "loss": 1.7208, "step": 19485 }, { "epoch": 0.95146484375, "grad_norm": 0.18844814598560333, "learning_rate": 5.2688536338104606e-05, "loss": 1.7517, "step": 19486 }, { "epoch": 0.951513671875, "grad_norm": 0.1840697079896927, "learning_rate": 5.268314030117764e-05, "loss": 1.7156, "step": 19487 }, { "epoch": 0.9515625, "grad_norm": 0.1744319349527359, "learning_rate": 5.267774965227838e-05, "loss": 1.714, "step": 19488 }, { "epoch": 0.951611328125, "grad_norm": 0.20778246223926544, "learning_rate": 5.267236439153747e-05, "loss": 1.6945, "step": 19489 }, { "epoch": 0.95166015625, "grad_norm": 0.18256613612174988, "learning_rate": 5.266698451908542e-05, "loss": 1.7021, "step": 19490 }, { "epoch": 0.951708984375, "grad_norm": 0.1874130219221115, "learning_rate": 5.266161003505263e-05, "loss": 1.7113, "step": 19491 }, { "epoch": 0.9517578125, "grad_norm": 0.18339550495147705, "learning_rate": 5.265624093956934e-05, "loss": 1.7292, "step": 19492 }, { "epoch": 0.951806640625, "grad_norm": 0.1829349845647812, "learning_rate": 5.2650877232765655e-05, "loss": 1.7241, "step": 19493 }, { "epoch": 0.95185546875, "grad_norm": 0.17936640977859497, "learning_rate": 5.264551891477162e-05, "loss": 1.7111, "step": 19494 }, { "epoch": 0.951904296875, "grad_norm": 0.18171626329421997, "learning_rate": 5.264016598571708e-05, "loss": 1.7477, "step": 19495 }, { "epoch": 0.951953125, "grad_norm": 0.1803160458803177, "learning_rate": 5.263481844573176e-05, "loss": 1.6997, "step": 19496 }, { "epoch": 0.952001953125, "grad_norm": 0.18132853507995605, "learning_rate": 5.262947629494522e-05, "loss": 1.7049, "step": 19497 }, { "epoch": 0.95205078125, "grad_norm": 0.20546703040599823, "learning_rate": 5.262413953348702e-05, "loss": 1.7051, "step": 19498 }, { "epoch": 0.952099609375, "grad_norm": 0.15625853836536407, "learning_rate": 5.2618808161486425e-05, "loss": 1.7052, "step": 19499 }, { "epoch": 0.9521484375, "grad_norm": 0.21776829659938812, "learning_rate": 5.2613482179072704e-05, "loss": 1.7273, "step": 19500 }, { "epoch": 0.952197265625, "grad_norm": 0.1665053814649582, "learning_rate": 5.2608161586374884e-05, "loss": 1.7314, "step": 19501 }, { "epoch": 0.95224609375, "grad_norm": 0.1712280809879303, "learning_rate": 5.260284638352193e-05, "loss": 1.7192, "step": 19502 }, { "epoch": 0.952294921875, "grad_norm": 0.19056682288646698, "learning_rate": 5.259753657064267e-05, "loss": 1.738, "step": 19503 }, { "epoch": 0.95234375, "grad_norm": 0.1655058115720749, "learning_rate": 5.2592232147865805e-05, "loss": 1.7186, "step": 19504 }, { "epoch": 0.952392578125, "grad_norm": 0.16639912128448486, "learning_rate": 5.258693311531987e-05, "loss": 1.7297, "step": 19505 }, { "epoch": 0.95244140625, "grad_norm": 0.17249135673046112, "learning_rate": 5.258163947313327e-05, "loss": 1.7124, "step": 19506 }, { "epoch": 0.952490234375, "grad_norm": 0.1644812375307083, "learning_rate": 5.2576351221434355e-05, "loss": 1.709, "step": 19507 }, { "epoch": 0.9525390625, "grad_norm": 0.16641923785209656, "learning_rate": 5.257106836035124e-05, "loss": 1.7168, "step": 19508 }, { "epoch": 0.952587890625, "grad_norm": 0.19336912035942078, "learning_rate": 5.2565790890012006e-05, "loss": 1.7028, "step": 19509 }, { "epoch": 0.95263671875, "grad_norm": 0.15299035608768463, "learning_rate": 5.256051881054452e-05, "loss": 1.7486, "step": 19510 }, { "epoch": 0.952685546875, "grad_norm": 0.1746036559343338, "learning_rate": 5.255525212207655e-05, "loss": 1.6992, "step": 19511 }, { "epoch": 0.952734375, "grad_norm": 0.17264465987682343, "learning_rate": 5.254999082473577e-05, "loss": 1.7219, "step": 19512 }, { "epoch": 0.952783203125, "grad_norm": 0.16212409734725952, "learning_rate": 5.254473491864967e-05, "loss": 1.7188, "step": 19513 }, { "epoch": 0.95283203125, "grad_norm": 0.16703297197818756, "learning_rate": 5.253948440394565e-05, "loss": 1.71, "step": 19514 }, { "epoch": 0.952880859375, "grad_norm": 0.1776873767375946, "learning_rate": 5.2534239280750936e-05, "loss": 1.7136, "step": 19515 }, { "epoch": 0.9529296875, "grad_norm": 0.16595931351184845, "learning_rate": 5.252899954919267e-05, "loss": 1.701, "step": 19516 }, { "epoch": 0.952978515625, "grad_norm": 0.1661350578069687, "learning_rate": 5.252376520939782e-05, "loss": 1.726, "step": 19517 }, { "epoch": 0.95302734375, "grad_norm": 0.15403972566127777, "learning_rate": 5.2518536261493274e-05, "loss": 1.7414, "step": 19518 }, { "epoch": 0.953076171875, "grad_norm": 0.16549977660179138, "learning_rate": 5.2513312705605716e-05, "loss": 1.7048, "step": 19519 }, { "epoch": 0.953125, "grad_norm": 0.17058373987674713, "learning_rate": 5.2508094541861786e-05, "loss": 1.7235, "step": 19520 }, { "epoch": 0.953173828125, "grad_norm": 0.17146719992160797, "learning_rate": 5.2502881770387934e-05, "loss": 1.724, "step": 19521 }, { "epoch": 0.95322265625, "grad_norm": 0.18061329424381256, "learning_rate": 5.249767439131049e-05, "loss": 1.7235, "step": 19522 }, { "epoch": 0.953271484375, "grad_norm": 0.16250020265579224, "learning_rate": 5.249247240475566e-05, "loss": 1.7315, "step": 19523 }, { "epoch": 0.9533203125, "grad_norm": 0.1809968650341034, "learning_rate": 5.248727581084952e-05, "loss": 1.7383, "step": 19524 }, { "epoch": 0.953369140625, "grad_norm": 0.1860070824623108, "learning_rate": 5.248208460971803e-05, "loss": 1.7106, "step": 19525 }, { "epoch": 0.95341796875, "grad_norm": 0.16591709852218628, "learning_rate": 5.2476898801486976e-05, "loss": 1.7198, "step": 19526 }, { "epoch": 0.953466796875, "grad_norm": 0.15944527089595795, "learning_rate": 5.247171838628206e-05, "loss": 1.7081, "step": 19527 }, { "epoch": 0.953515625, "grad_norm": 0.19099164009094238, "learning_rate": 5.246654336422883e-05, "loss": 1.7443, "step": 19528 }, { "epoch": 0.953564453125, "grad_norm": 0.17116855084896088, "learning_rate": 5.24613737354527e-05, "loss": 1.7266, "step": 19529 }, { "epoch": 0.95361328125, "grad_norm": 0.1584916114807129, "learning_rate": 5.245620950007897e-05, "loss": 1.7027, "step": 19530 }, { "epoch": 0.953662109375, "grad_norm": 0.1779477447271347, "learning_rate": 5.245105065823276e-05, "loss": 1.7218, "step": 19531 }, { "epoch": 0.9537109375, "grad_norm": 0.16828884184360504, "learning_rate": 5.244589721003917e-05, "loss": 1.7323, "step": 19532 }, { "epoch": 0.953759765625, "grad_norm": 0.15413039922714233, "learning_rate": 5.2440749155623014e-05, "loss": 1.6913, "step": 19533 }, { "epoch": 0.95380859375, "grad_norm": 0.17923212051391602, "learning_rate": 5.2435606495109155e-05, "loss": 1.701, "step": 19534 }, { "epoch": 0.953857421875, "grad_norm": 0.14768998324871063, "learning_rate": 5.243046922862214e-05, "loss": 1.74, "step": 19535 }, { "epoch": 0.95390625, "grad_norm": 0.20432798564434052, "learning_rate": 5.2425337356286556e-05, "loss": 1.6957, "step": 19536 }, { "epoch": 0.953955078125, "grad_norm": 0.16037316620349884, "learning_rate": 5.24202108782267e-05, "loss": 1.7146, "step": 19537 }, { "epoch": 0.95400390625, "grad_norm": 0.16501522064208984, "learning_rate": 5.241508979456687e-05, "loss": 1.7218, "step": 19538 }, { "epoch": 0.954052734375, "grad_norm": 0.17989163100719452, "learning_rate": 5.240997410543114e-05, "loss": 1.7307, "step": 19539 }, { "epoch": 0.9541015625, "grad_norm": 0.16999216377735138, "learning_rate": 5.240486381094354e-05, "loss": 1.7307, "step": 19540 }, { "epoch": 0.954150390625, "grad_norm": 0.17062705755233765, "learning_rate": 5.239975891122787e-05, "loss": 1.7081, "step": 19541 }, { "epoch": 0.95419921875, "grad_norm": 0.16934075951576233, "learning_rate": 5.23946594064079e-05, "loss": 1.7159, "step": 19542 }, { "epoch": 0.954248046875, "grad_norm": 0.2009933590888977, "learning_rate": 5.238956529660719e-05, "loss": 1.706, "step": 19543 }, { "epoch": 0.954296875, "grad_norm": 0.1581937074661255, "learning_rate": 5.238447658194921e-05, "loss": 1.7245, "step": 19544 }, { "epoch": 0.954345703125, "grad_norm": 0.19399204850196838, "learning_rate": 5.237939326255729e-05, "loss": 1.7046, "step": 19545 }, { "epoch": 0.95439453125, "grad_norm": 0.18063591420650482, "learning_rate": 5.2374315338554636e-05, "loss": 1.6975, "step": 19546 }, { "epoch": 0.954443359375, "grad_norm": 0.17100289463996887, "learning_rate": 5.236924281006429e-05, "loss": 1.7091, "step": 19547 }, { "epoch": 0.9544921875, "grad_norm": 0.1836501955986023, "learning_rate": 5.2364175677209205e-05, "loss": 1.7146, "step": 19548 }, { "epoch": 0.954541015625, "grad_norm": 0.18450520932674408, "learning_rate": 5.235911394011219e-05, "loss": 1.7183, "step": 19549 }, { "epoch": 0.95458984375, "grad_norm": 0.21083389222621918, "learning_rate": 5.2354057598895916e-05, "loss": 1.7001, "step": 19550 }, { "epoch": 0.954638671875, "grad_norm": 0.171628937125206, "learning_rate": 5.234900665368291e-05, "loss": 1.7025, "step": 19551 }, { "epoch": 0.9546875, "grad_norm": 0.17952580749988556, "learning_rate": 5.2343961104595646e-05, "loss": 1.7285, "step": 19552 }, { "epoch": 0.954736328125, "grad_norm": 0.21061748266220093, "learning_rate": 5.233892095175631e-05, "loss": 1.7092, "step": 19553 }, { "epoch": 0.95478515625, "grad_norm": 0.16159874200820923, "learning_rate": 5.233388619528715e-05, "loss": 1.7074, "step": 19554 }, { "epoch": 0.954833984375, "grad_norm": 0.16905495524406433, "learning_rate": 5.2328856835310146e-05, "loss": 1.7084, "step": 19555 }, { "epoch": 0.9548828125, "grad_norm": 0.17956094443798065, "learning_rate": 5.232383287194717e-05, "loss": 1.7222, "step": 19556 }, { "epoch": 0.954931640625, "grad_norm": 0.16487114131450653, "learning_rate": 5.231881430532003e-05, "loss": 1.7261, "step": 19557 }, { "epoch": 0.95498046875, "grad_norm": 0.1993648111820221, "learning_rate": 5.231380113555029e-05, "loss": 1.7371, "step": 19558 }, { "epoch": 0.955029296875, "grad_norm": 0.2083011418581009, "learning_rate": 5.2308793362759495e-05, "loss": 1.7392, "step": 19559 }, { "epoch": 0.955078125, "grad_norm": 0.16637007892131805, "learning_rate": 5.2303790987069e-05, "loss": 1.7243, "step": 19560 }, { "epoch": 0.955126953125, "grad_norm": 0.18164397776126862, "learning_rate": 5.229879400860004e-05, "loss": 1.7292, "step": 19561 }, { "epoch": 0.95517578125, "grad_norm": 0.17989031970500946, "learning_rate": 5.2293802427473753e-05, "loss": 1.7212, "step": 19562 }, { "epoch": 0.955224609375, "grad_norm": 0.1926538199186325, "learning_rate": 5.2288816243811045e-05, "loss": 1.7215, "step": 19563 }, { "epoch": 0.9552734375, "grad_norm": 0.1846127063035965, "learning_rate": 5.228383545773284e-05, "loss": 1.7135, "step": 19564 }, { "epoch": 0.955322265625, "grad_norm": 0.16912822425365448, "learning_rate": 5.227886006935977e-05, "loss": 1.7173, "step": 19565 }, { "epoch": 0.95537109375, "grad_norm": 0.1988985389471054, "learning_rate": 5.2273890078812486e-05, "loss": 1.6818, "step": 19566 }, { "epoch": 0.955419921875, "grad_norm": 0.15908277034759521, "learning_rate": 5.226892548621139e-05, "loss": 1.6969, "step": 19567 }, { "epoch": 0.95546875, "grad_norm": 0.17046357691287994, "learning_rate": 5.226396629167684e-05, "loss": 1.7197, "step": 19568 }, { "epoch": 0.955517578125, "grad_norm": 0.18793705105781555, "learning_rate": 5.2259012495328986e-05, "loss": 1.7224, "step": 19569 }, { "epoch": 0.95556640625, "grad_norm": 0.16470707952976227, "learning_rate": 5.225406409728796e-05, "loss": 1.7207, "step": 19570 }, { "epoch": 0.955615234375, "grad_norm": 0.15903325378894806, "learning_rate": 5.224912109767362e-05, "loss": 1.7178, "step": 19571 }, { "epoch": 0.9556640625, "grad_norm": 0.1877879947423935, "learning_rate": 5.224418349660578e-05, "loss": 1.7313, "step": 19572 }, { "epoch": 0.955712890625, "grad_norm": 0.1830729991197586, "learning_rate": 5.2239251294204106e-05, "loss": 1.704, "step": 19573 }, { "epoch": 0.95576171875, "grad_norm": 0.16726191341876984, "learning_rate": 5.2234324490588155e-05, "loss": 1.7091, "step": 19574 }, { "epoch": 0.955810546875, "grad_norm": 0.19099773466587067, "learning_rate": 5.222940308587732e-05, "loss": 1.7047, "step": 19575 }, { "epoch": 0.955859375, "grad_norm": 0.16426017880439758, "learning_rate": 5.222448708019087e-05, "loss": 1.7088, "step": 19576 }, { "epoch": 0.955908203125, "grad_norm": 0.168794646859169, "learning_rate": 5.221957647364795e-05, "loss": 1.6833, "step": 19577 }, { "epoch": 0.95595703125, "grad_norm": 0.17929814755916595, "learning_rate": 5.2214671266367556e-05, "loss": 1.7041, "step": 19578 }, { "epoch": 0.956005859375, "grad_norm": 0.18267585337162018, "learning_rate": 5.220977145846862e-05, "loss": 1.7163, "step": 19579 }, { "epoch": 0.9560546875, "grad_norm": 0.1625693291425705, "learning_rate": 5.220487705006985e-05, "loss": 1.7207, "step": 19580 }, { "epoch": 0.956103515625, "grad_norm": 0.1751021295785904, "learning_rate": 5.2199988041289856e-05, "loss": 1.6948, "step": 19581 }, { "epoch": 0.95615234375, "grad_norm": 0.18824604153633118, "learning_rate": 5.21951044322472e-05, "loss": 1.6977, "step": 19582 }, { "epoch": 0.956201171875, "grad_norm": 0.18081675469875336, "learning_rate": 5.219022622306012e-05, "loss": 1.7251, "step": 19583 }, { "epoch": 0.95625, "grad_norm": 0.20824801921844482, "learning_rate": 5.218535341384697e-05, "loss": 1.7104, "step": 19584 }, { "epoch": 0.956298828125, "grad_norm": 0.192050501704216, "learning_rate": 5.2180486004725734e-05, "loss": 1.7312, "step": 19585 }, { "epoch": 0.95634765625, "grad_norm": 0.16724450886249542, "learning_rate": 5.2175623995814486e-05, "loss": 1.6956, "step": 19586 }, { "epoch": 0.956396484375, "grad_norm": 0.20182405412197113, "learning_rate": 5.2170767387230946e-05, "loss": 1.7252, "step": 19587 }, { "epoch": 0.9564453125, "grad_norm": 0.17686624825000763, "learning_rate": 5.2165916179092945e-05, "loss": 1.7307, "step": 19588 }, { "epoch": 0.956494140625, "grad_norm": 0.17753371596336365, "learning_rate": 5.216107037151793e-05, "loss": 1.7038, "step": 19589 }, { "epoch": 0.95654296875, "grad_norm": 0.1773853600025177, "learning_rate": 5.2156229964623434e-05, "loss": 1.7346, "step": 19590 }, { "epoch": 0.956591796875, "grad_norm": 0.18592669069766998, "learning_rate": 5.2151394958526715e-05, "loss": 1.7126, "step": 19591 }, { "epoch": 0.956640625, "grad_norm": 0.1863066554069519, "learning_rate": 5.214656535334498e-05, "loss": 1.7408, "step": 19592 }, { "epoch": 0.956689453125, "grad_norm": 0.1725461483001709, "learning_rate": 5.214174114919528e-05, "loss": 1.7195, "step": 19593 }, { "epoch": 0.95673828125, "grad_norm": 0.18579696118831635, "learning_rate": 5.213692234619453e-05, "loss": 1.6965, "step": 19594 }, { "epoch": 0.956787109375, "grad_norm": 0.17360694706439972, "learning_rate": 5.213210894445951e-05, "loss": 1.7262, "step": 19595 }, { "epoch": 0.9568359375, "grad_norm": 0.17373701930046082, "learning_rate": 5.2127300944106885e-05, "loss": 1.7202, "step": 19596 }, { "epoch": 0.956884765625, "grad_norm": 0.18443650007247925, "learning_rate": 5.212249834525316e-05, "loss": 1.7208, "step": 19597 }, { "epoch": 0.95693359375, "grad_norm": 0.1540227234363556, "learning_rate": 5.211770114801476e-05, "loss": 1.7311, "step": 19598 }, { "epoch": 0.956982421875, "grad_norm": 0.17952169477939606, "learning_rate": 5.21129093525079e-05, "loss": 1.723, "step": 19599 }, { "epoch": 0.95703125, "grad_norm": 0.1987178772687912, "learning_rate": 5.21081229588488e-05, "loss": 1.7253, "step": 19600 }, { "epoch": 0.957080078125, "grad_norm": 0.18613845109939575, "learning_rate": 5.2103341967153365e-05, "loss": 1.7183, "step": 19601 }, { "epoch": 0.95712890625, "grad_norm": 0.18096469342708588, "learning_rate": 5.209856637753753e-05, "loss": 1.7236, "step": 19602 }, { "epoch": 0.957177734375, "grad_norm": 0.1558893918991089, "learning_rate": 5.2093796190117014e-05, "loss": 1.6983, "step": 19603 }, { "epoch": 0.9572265625, "grad_norm": 0.1765030324459076, "learning_rate": 5.208903140500745e-05, "loss": 1.7107, "step": 19604 }, { "epoch": 0.957275390625, "grad_norm": 0.1711384356021881, "learning_rate": 5.2084272022324253e-05, "loss": 1.7321, "step": 19605 }, { "epoch": 0.95732421875, "grad_norm": 0.16715846955776215, "learning_rate": 5.2079518042182835e-05, "loss": 1.7102, "step": 19606 }, { "epoch": 0.957373046875, "grad_norm": 0.17694436013698578, "learning_rate": 5.207476946469838e-05, "loss": 1.7107, "step": 19607 }, { "epoch": 0.957421875, "grad_norm": 0.18486225605010986, "learning_rate": 5.207002628998601e-05, "loss": 1.7143, "step": 19608 }, { "epoch": 0.957470703125, "grad_norm": 0.16211967170238495, "learning_rate": 5.2065288518160627e-05, "loss": 1.7198, "step": 19609 }, { "epoch": 0.95751953125, "grad_norm": 0.1817048192024231, "learning_rate": 5.206055614933709e-05, "loss": 1.6979, "step": 19610 }, { "epoch": 0.957568359375, "grad_norm": 0.1645270586013794, "learning_rate": 5.2055829183630085e-05, "loss": 1.6854, "step": 19611 }, { "epoch": 0.9576171875, "grad_norm": 0.1806369423866272, "learning_rate": 5.205110762115416e-05, "loss": 1.7384, "step": 19612 }, { "epoch": 0.957666015625, "grad_norm": 0.1646173596382141, "learning_rate": 5.204639146202376e-05, "loss": 1.7054, "step": 19613 }, { "epoch": 0.95771484375, "grad_norm": 0.16661682724952698, "learning_rate": 5.2041680706353194e-05, "loss": 1.7113, "step": 19614 }, { "epoch": 0.957763671875, "grad_norm": 0.15931878983974457, "learning_rate": 5.203697535425661e-05, "loss": 1.716, "step": 19615 }, { "epoch": 0.9578125, "grad_norm": 0.1931285709142685, "learning_rate": 5.2032275405848056e-05, "loss": 1.7194, "step": 19616 }, { "epoch": 0.957861328125, "grad_norm": 0.16172640025615692, "learning_rate": 5.2027580861241425e-05, "loss": 1.7151, "step": 19617 }, { "epoch": 0.95791015625, "grad_norm": 0.1826617419719696, "learning_rate": 5.202289172055054e-05, "loss": 1.702, "step": 19618 }, { "epoch": 0.957958984375, "grad_norm": 0.19150637090206146, "learning_rate": 5.201820798388897e-05, "loss": 1.7362, "step": 19619 }, { "epoch": 0.9580078125, "grad_norm": 0.18279440701007843, "learning_rate": 5.2013529651370314e-05, "loss": 1.7215, "step": 19620 }, { "epoch": 0.958056640625, "grad_norm": 0.17803174257278442, "learning_rate": 5.20088567231079e-05, "loss": 1.7268, "step": 19621 }, { "epoch": 0.95810546875, "grad_norm": 0.180302232503891, "learning_rate": 5.200418919921496e-05, "loss": 1.7126, "step": 19622 }, { "epoch": 0.958154296875, "grad_norm": 0.20091716945171356, "learning_rate": 5.199952707980469e-05, "loss": 1.7391, "step": 19623 }, { "epoch": 0.958203125, "grad_norm": 0.18127456307411194, "learning_rate": 5.199487036499001e-05, "loss": 1.7196, "step": 19624 }, { "epoch": 0.958251953125, "grad_norm": 0.20016352832317352, "learning_rate": 5.199021905488381e-05, "loss": 1.7086, "step": 19625 }, { "epoch": 0.95830078125, "grad_norm": 0.1865532398223877, "learning_rate": 5.198557314959883e-05, "loss": 1.7325, "step": 19626 }, { "epoch": 0.958349609375, "grad_norm": 0.20168258249759674, "learning_rate": 5.198093264924764e-05, "loss": 1.7109, "step": 19627 }, { "epoch": 0.9583984375, "grad_norm": 0.19135530292987823, "learning_rate": 5.197629755394272e-05, "loss": 1.7261, "step": 19628 }, { "epoch": 0.958447265625, "grad_norm": 0.19731266796588898, "learning_rate": 5.197166786379642e-05, "loss": 1.7143, "step": 19629 }, { "epoch": 0.95849609375, "grad_norm": 0.19855035841464996, "learning_rate": 5.196704357892092e-05, "loss": 1.7301, "step": 19630 }, { "epoch": 0.958544921875, "grad_norm": 0.1830548197031021, "learning_rate": 5.1962424699428296e-05, "loss": 1.7352, "step": 19631 }, { "epoch": 0.95859375, "grad_norm": 0.18740350008010864, "learning_rate": 5.195781122543049e-05, "loss": 1.6871, "step": 19632 }, { "epoch": 0.958642578125, "grad_norm": 0.2004057914018631, "learning_rate": 5.195320315703932e-05, "loss": 1.7254, "step": 19633 }, { "epoch": 0.95869140625, "grad_norm": 0.17885857820510864, "learning_rate": 5.194860049436648e-05, "loss": 1.6834, "step": 19634 }, { "epoch": 0.958740234375, "grad_norm": 0.1938142031431198, "learning_rate": 5.19440032375235e-05, "loss": 1.7278, "step": 19635 }, { "epoch": 0.9587890625, "grad_norm": 0.21202562749385834, "learning_rate": 5.193941138662181e-05, "loss": 1.6734, "step": 19636 }, { "epoch": 0.958837890625, "grad_norm": 0.18834203481674194, "learning_rate": 5.193482494177265e-05, "loss": 1.755, "step": 19637 }, { "epoch": 0.95888671875, "grad_norm": 0.17422637343406677, "learning_rate": 5.193024390308726e-05, "loss": 1.7103, "step": 19638 }, { "epoch": 0.958935546875, "grad_norm": 0.21220266819000244, "learning_rate": 5.1925668270676625e-05, "loss": 1.6748, "step": 19639 }, { "epoch": 0.958984375, "grad_norm": 0.17846009135246277, "learning_rate": 5.1921098044651594e-05, "loss": 1.7458, "step": 19640 }, { "epoch": 0.959033203125, "grad_norm": 0.20319689810276031, "learning_rate": 5.1916533225123e-05, "loss": 1.7313, "step": 19641 }, { "epoch": 0.95908203125, "grad_norm": 0.19772037863731384, "learning_rate": 5.1911973812201434e-05, "loss": 1.7097, "step": 19642 }, { "epoch": 0.959130859375, "grad_norm": 0.23319923877716064, "learning_rate": 5.190741980599742e-05, "loss": 1.6973, "step": 19643 }, { "epoch": 0.9591796875, "grad_norm": 0.17321674525737762, "learning_rate": 5.190287120662132e-05, "loss": 1.7145, "step": 19644 }, { "epoch": 0.959228515625, "grad_norm": 0.18922443687915802, "learning_rate": 5.1898328014183364e-05, "loss": 1.7175, "step": 19645 }, { "epoch": 0.95927734375, "grad_norm": 0.1668263077735901, "learning_rate": 5.189379022879367e-05, "loss": 1.7155, "step": 19646 }, { "epoch": 0.959326171875, "grad_norm": 0.1697976142168045, "learning_rate": 5.1889257850562215e-05, "loss": 1.7246, "step": 19647 }, { "epoch": 0.959375, "grad_norm": 0.18151618540287018, "learning_rate": 5.188473087959885e-05, "loss": 1.7266, "step": 19648 }, { "epoch": 0.959423828125, "grad_norm": 0.18257290124893188, "learning_rate": 5.188020931601326e-05, "loss": 1.7119, "step": 19649 }, { "epoch": 0.95947265625, "grad_norm": 0.18062208592891693, "learning_rate": 5.187569315991506e-05, "loss": 1.7077, "step": 19650 }, { "epoch": 0.959521484375, "grad_norm": 0.16287341713905334, "learning_rate": 5.187118241141367e-05, "loss": 1.692, "step": 19651 }, { "epoch": 0.9595703125, "grad_norm": 0.15902946889400482, "learning_rate": 5.1866677070618485e-05, "loss": 1.7004, "step": 19652 }, { "epoch": 0.959619140625, "grad_norm": 0.17842136323451996, "learning_rate": 5.186217713763859e-05, "loss": 1.724, "step": 19653 }, { "epoch": 0.95966796875, "grad_norm": 0.1662401407957077, "learning_rate": 5.185768261258314e-05, "loss": 1.7209, "step": 19654 }, { "epoch": 0.959716796875, "grad_norm": 0.1842823028564453, "learning_rate": 5.1853193495561004e-05, "loss": 1.6971, "step": 19655 }, { "epoch": 0.959765625, "grad_norm": 0.17299184203147888, "learning_rate": 5.1848709786681e-05, "loss": 1.7147, "step": 19656 }, { "epoch": 0.959814453125, "grad_norm": 0.17142455279827118, "learning_rate": 5.18442314860518e-05, "loss": 1.7125, "step": 19657 }, { "epoch": 0.95986328125, "grad_norm": 0.1928454339504242, "learning_rate": 5.183975859378193e-05, "loss": 1.7359, "step": 19658 }, { "epoch": 0.959912109375, "grad_norm": 0.1660342812538147, "learning_rate": 5.1835291109979785e-05, "loss": 1.72, "step": 19659 }, { "epoch": 0.9599609375, "grad_norm": 0.1846991330385208, "learning_rate": 5.1830829034753644e-05, "loss": 1.7024, "step": 19660 }, { "epoch": 0.960009765625, "grad_norm": 0.1869836449623108, "learning_rate": 5.1826372368211665e-05, "loss": 1.7395, "step": 19661 }, { "epoch": 0.96005859375, "grad_norm": 0.17115400731563568, "learning_rate": 5.182192111046185e-05, "loss": 1.7434, "step": 19662 }, { "epoch": 0.960107421875, "grad_norm": 0.1769021600484848, "learning_rate": 5.1817475261612056e-05, "loss": 1.7081, "step": 19663 }, { "epoch": 0.96015625, "grad_norm": 0.20366200804710388, "learning_rate": 5.181303482177009e-05, "loss": 1.7183, "step": 19664 }, { "epoch": 0.960205078125, "grad_norm": 0.1840965449810028, "learning_rate": 5.1808599791043495e-05, "loss": 1.7285, "step": 19665 }, { "epoch": 0.96025390625, "grad_norm": 0.17835737764835358, "learning_rate": 5.180417016953979e-05, "loss": 1.7118, "step": 19666 }, { "epoch": 0.960302734375, "grad_norm": 0.1918899565935135, "learning_rate": 5.1799745957366376e-05, "loss": 1.7294, "step": 19667 }, { "epoch": 0.9603515625, "grad_norm": 0.1685774177312851, "learning_rate": 5.179532715463041e-05, "loss": 1.7024, "step": 19668 }, { "epoch": 0.960400390625, "grad_norm": 0.18570035696029663, "learning_rate": 5.1790913761439e-05, "loss": 1.7147, "step": 19669 }, { "epoch": 0.96044921875, "grad_norm": 0.18774324655532837, "learning_rate": 5.178650577789915e-05, "loss": 1.7005, "step": 19670 }, { "epoch": 0.960498046875, "grad_norm": 0.1926199495792389, "learning_rate": 5.1782103204117635e-05, "loss": 1.7029, "step": 19671 }, { "epoch": 0.960546875, "grad_norm": 0.18227896094322205, "learning_rate": 5.177770604020118e-05, "loss": 1.7189, "step": 19672 }, { "epoch": 0.960595703125, "grad_norm": 0.18177826702594757, "learning_rate": 5.1773314286256376e-05, "loss": 1.719, "step": 19673 }, { "epoch": 0.96064453125, "grad_norm": 0.17709694802761078, "learning_rate": 5.1768927942389625e-05, "loss": 1.7033, "step": 19674 }, { "epoch": 0.960693359375, "grad_norm": 0.18674983084201813, "learning_rate": 5.176454700870725e-05, "loss": 1.6978, "step": 19675 }, { "epoch": 0.9607421875, "grad_norm": 0.158392995595932, "learning_rate": 5.1760171485315415e-05, "loss": 1.712, "step": 19676 }, { "epoch": 0.960791015625, "grad_norm": 0.19457517564296722, "learning_rate": 5.175580137232019e-05, "loss": 1.7014, "step": 19677 }, { "epoch": 0.96083984375, "grad_norm": 0.16463764011859894, "learning_rate": 5.1751436669827453e-05, "loss": 1.7119, "step": 19678 }, { "epoch": 0.960888671875, "grad_norm": 0.17320433259010315, "learning_rate": 5.174707737794301e-05, "loss": 1.7262, "step": 19679 }, { "epoch": 0.9609375, "grad_norm": 0.16878312826156616, "learning_rate": 5.1742723496772516e-05, "loss": 1.7096, "step": 19680 }, { "epoch": 0.960986328125, "grad_norm": 0.1640959233045578, "learning_rate": 5.173837502642147e-05, "loss": 1.7085, "step": 19681 }, { "epoch": 0.96103515625, "grad_norm": 0.1715308278799057, "learning_rate": 5.173403196699528e-05, "loss": 1.6995, "step": 19682 }, { "epoch": 0.961083984375, "grad_norm": 0.17980554699897766, "learning_rate": 5.172969431859918e-05, "loss": 1.7297, "step": 19683 }, { "epoch": 0.9611328125, "grad_norm": 0.1605185568332672, "learning_rate": 5.1725362081338325e-05, "loss": 1.7089, "step": 19684 }, { "epoch": 0.961181640625, "grad_norm": 0.16970273852348328, "learning_rate": 5.17210352553177e-05, "loss": 1.7363, "step": 19685 }, { "epoch": 0.96123046875, "grad_norm": 0.17124754190444946, "learning_rate": 5.171671384064215e-05, "loss": 1.7342, "step": 19686 }, { "epoch": 0.961279296875, "grad_norm": 0.1621280163526535, "learning_rate": 5.1712397837416435e-05, "loss": 1.7096, "step": 19687 }, { "epoch": 0.961328125, "grad_norm": 0.19005867838859558, "learning_rate": 5.1708087245745154e-05, "loss": 1.7075, "step": 19688 }, { "epoch": 0.961376953125, "grad_norm": 0.15624982118606567, "learning_rate": 5.1703782065732774e-05, "loss": 1.7171, "step": 19689 }, { "epoch": 0.96142578125, "grad_norm": 0.19859996438026428, "learning_rate": 5.169948229748362e-05, "loss": 1.729, "step": 19690 }, { "epoch": 0.961474609375, "grad_norm": 0.17848682403564453, "learning_rate": 5.1695187941101896e-05, "loss": 1.6997, "step": 19691 }, { "epoch": 0.9615234375, "grad_norm": 0.19647367298603058, "learning_rate": 5.16908989966917e-05, "loss": 1.6815, "step": 19692 }, { "epoch": 0.961572265625, "grad_norm": 0.18623881042003632, "learning_rate": 5.168661546435699e-05, "loss": 1.7194, "step": 19693 }, { "epoch": 0.96162109375, "grad_norm": 0.17919033765792847, "learning_rate": 5.1682337344201555e-05, "loss": 1.7229, "step": 19694 }, { "epoch": 0.961669921875, "grad_norm": 0.19080884754657745, "learning_rate": 5.167806463632907e-05, "loss": 1.6869, "step": 19695 }, { "epoch": 0.96171875, "grad_norm": 0.1704850047826767, "learning_rate": 5.167379734084313e-05, "loss": 1.7067, "step": 19696 }, { "epoch": 0.961767578125, "grad_norm": 0.1838814914226532, "learning_rate": 5.1669535457847085e-05, "loss": 1.6957, "step": 19697 }, { "epoch": 0.96181640625, "grad_norm": 0.18920038640499115, "learning_rate": 5.166527898744432e-05, "loss": 1.6993, "step": 19698 }, { "epoch": 0.961865234375, "grad_norm": 0.19834692776203156, "learning_rate": 5.166102792973792e-05, "loss": 1.7103, "step": 19699 }, { "epoch": 0.9619140625, "grad_norm": 0.20165465772151947, "learning_rate": 5.165678228483092e-05, "loss": 1.7405, "step": 19700 }, { "epoch": 0.961962890625, "grad_norm": 0.16620884835720062, "learning_rate": 5.165254205282626e-05, "loss": 1.7055, "step": 19701 }, { "epoch": 0.96201171875, "grad_norm": 0.1643991470336914, "learning_rate": 5.164830723382667e-05, "loss": 1.7099, "step": 19702 }, { "epoch": 0.962060546875, "grad_norm": 0.18918867409229279, "learning_rate": 5.16440778279348e-05, "loss": 1.6984, "step": 19703 }, { "epoch": 0.962109375, "grad_norm": 0.1653657853603363, "learning_rate": 5.163985383525312e-05, "loss": 1.6803, "step": 19704 }, { "epoch": 0.962158203125, "grad_norm": 0.15710557997226715, "learning_rate": 5.163563525588407e-05, "loss": 1.7291, "step": 19705 }, { "epoch": 0.96220703125, "grad_norm": 0.1813742220401764, "learning_rate": 5.163142208992983e-05, "loss": 1.7192, "step": 19706 }, { "epoch": 0.962255859375, "grad_norm": 0.18037045001983643, "learning_rate": 5.162721433749252e-05, "loss": 1.6833, "step": 19707 }, { "epoch": 0.9623046875, "grad_norm": 0.15908795595169067, "learning_rate": 5.1623011998674125e-05, "loss": 1.723, "step": 19708 }, { "epoch": 0.962353515625, "grad_norm": 0.2124801129102707, "learning_rate": 5.1618815073576516e-05, "loss": 1.7055, "step": 19709 }, { "epoch": 0.96240234375, "grad_norm": 0.18171784281730652, "learning_rate": 5.161462356230136e-05, "loss": 1.7095, "step": 19710 }, { "epoch": 0.962451171875, "grad_norm": 0.19214919209480286, "learning_rate": 5.1610437464950274e-05, "loss": 1.7017, "step": 19711 }, { "epoch": 0.9625, "grad_norm": 0.18955114483833313, "learning_rate": 5.160625678162472e-05, "loss": 1.7352, "step": 19712 }, { "epoch": 0.962548828125, "grad_norm": 0.18917132914066315, "learning_rate": 5.160208151242599e-05, "loss": 1.7163, "step": 19713 }, { "epoch": 0.96259765625, "grad_norm": 0.17085619270801544, "learning_rate": 5.1597911657455304e-05, "loss": 1.6973, "step": 19714 }, { "epoch": 0.962646484375, "grad_norm": 0.17427025735378265, "learning_rate": 5.15937472168137e-05, "loss": 1.7288, "step": 19715 }, { "epoch": 0.9626953125, "grad_norm": 0.18582622706890106, "learning_rate": 5.1589588190602124e-05, "loss": 1.7374, "step": 19716 }, { "epoch": 0.962744140625, "grad_norm": 0.17363084852695465, "learning_rate": 5.158543457892134e-05, "loss": 1.757, "step": 19717 }, { "epoch": 0.96279296875, "grad_norm": 0.18578237295150757, "learning_rate": 5.158128638187206e-05, "loss": 1.73, "step": 19718 }, { "epoch": 0.962841796875, "grad_norm": 0.1811852753162384, "learning_rate": 5.1577143599554784e-05, "loss": 1.7232, "step": 19719 }, { "epoch": 0.962890625, "grad_norm": 0.18544235825538635, "learning_rate": 5.157300623206995e-05, "loss": 1.7129, "step": 19720 }, { "epoch": 0.962939453125, "grad_norm": 0.1690768301486969, "learning_rate": 5.156887427951779e-05, "loss": 1.7262, "step": 19721 }, { "epoch": 0.96298828125, "grad_norm": 0.1803729087114334, "learning_rate": 5.1564747741998476e-05, "loss": 1.7287, "step": 19722 }, { "epoch": 0.963037109375, "grad_norm": 0.17641186714172363, "learning_rate": 5.156062661961201e-05, "loss": 1.7484, "step": 19723 }, { "epoch": 0.9630859375, "grad_norm": 0.17158553004264832, "learning_rate": 5.155651091245825e-05, "loss": 1.7178, "step": 19724 }, { "epoch": 0.963134765625, "grad_norm": 0.18212297558784485, "learning_rate": 5.155240062063697e-05, "loss": 1.7061, "step": 19725 }, { "epoch": 0.96318359375, "grad_norm": 0.17756956815719604, "learning_rate": 5.154829574424777e-05, "loss": 1.7146, "step": 19726 }, { "epoch": 0.963232421875, "grad_norm": 0.175641730427742, "learning_rate": 5.154419628339015e-05, "loss": 1.7193, "step": 19727 }, { "epoch": 0.96328125, "grad_norm": 0.182330921292305, "learning_rate": 5.1540102238163435e-05, "loss": 1.7332, "step": 19728 }, { "epoch": 0.963330078125, "grad_norm": 0.18828876316547394, "learning_rate": 5.1536013608666896e-05, "loss": 1.7129, "step": 19729 }, { "epoch": 0.96337890625, "grad_norm": 0.1994941532611847, "learning_rate": 5.153193039499959e-05, "loss": 1.7183, "step": 19730 }, { "epoch": 0.963427734375, "grad_norm": 0.19665759801864624, "learning_rate": 5.152785259726048e-05, "loss": 1.7146, "step": 19731 }, { "epoch": 0.9634765625, "grad_norm": 0.1832432746887207, "learning_rate": 5.1523780215548406e-05, "loss": 1.6979, "step": 19732 }, { "epoch": 0.963525390625, "grad_norm": 0.16044981777668, "learning_rate": 5.1519713249962035e-05, "loss": 1.7218, "step": 19733 }, { "epoch": 0.96357421875, "grad_norm": 0.18198956549167633, "learning_rate": 5.151565170059997e-05, "loss": 1.7157, "step": 19734 }, { "epoch": 0.963623046875, "grad_norm": 0.16232101619243622, "learning_rate": 5.151159556756063e-05, "loss": 1.7221, "step": 19735 }, { "epoch": 0.963671875, "grad_norm": 0.18782402575016022, "learning_rate": 5.150754485094232e-05, "loss": 1.7143, "step": 19736 }, { "epoch": 0.963720703125, "grad_norm": 0.1633298099040985, "learning_rate": 5.150349955084323e-05, "loss": 1.7182, "step": 19737 }, { "epoch": 0.96376953125, "grad_norm": 0.16790704429149628, "learning_rate": 5.149945966736136e-05, "loss": 1.7169, "step": 19738 }, { "epoch": 0.963818359375, "grad_norm": 0.18183587491512299, "learning_rate": 5.1495425200594664e-05, "loss": 1.7039, "step": 19739 }, { "epoch": 0.9638671875, "grad_norm": 0.16579587757587433, "learning_rate": 5.149139615064087e-05, "loss": 1.7222, "step": 19740 }, { "epoch": 0.963916015625, "grad_norm": 0.16263526678085327, "learning_rate": 5.14873725175977e-05, "loss": 1.7002, "step": 19741 }, { "epoch": 0.96396484375, "grad_norm": 0.18404166400432587, "learning_rate": 5.1483354301562564e-05, "loss": 1.7063, "step": 19742 }, { "epoch": 0.964013671875, "grad_norm": 0.16023169457912445, "learning_rate": 5.1479341502632975e-05, "loss": 1.723, "step": 19743 }, { "epoch": 0.9640625, "grad_norm": 0.18702609837055206, "learning_rate": 5.147533412090607e-05, "loss": 1.7195, "step": 19744 }, { "epoch": 0.964111328125, "grad_norm": 0.17585936188697815, "learning_rate": 5.147133215647905e-05, "loss": 1.7212, "step": 19745 }, { "epoch": 0.96416015625, "grad_norm": 0.17246854305267334, "learning_rate": 5.146733560944883e-05, "loss": 1.7389, "step": 19746 }, { "epoch": 0.964208984375, "grad_norm": 0.18938550353050232, "learning_rate": 5.146334447991237e-05, "loss": 1.7122, "step": 19747 }, { "epoch": 0.9642578125, "grad_norm": 0.18677112460136414, "learning_rate": 5.145935876796633e-05, "loss": 1.7192, "step": 19748 }, { "epoch": 0.964306640625, "grad_norm": 0.17068015038967133, "learning_rate": 5.145537847370729e-05, "loss": 1.7106, "step": 19749 }, { "epoch": 0.96435546875, "grad_norm": 0.17938818037509918, "learning_rate": 5.145140359723178e-05, "loss": 1.7153, "step": 19750 }, { "epoch": 0.964404296875, "grad_norm": 0.22168676555156708, "learning_rate": 5.144743413863608e-05, "loss": 1.7228, "step": 19751 }, { "epoch": 0.964453125, "grad_norm": 0.17574435472488403, "learning_rate": 5.1443470098016424e-05, "loss": 1.7364, "step": 19752 }, { "epoch": 0.964501953125, "grad_norm": 0.17036858201026917, "learning_rate": 5.143951147546889e-05, "loss": 1.7119, "step": 19753 }, { "epoch": 0.96455078125, "grad_norm": 0.20114298164844513, "learning_rate": 5.1435558271089396e-05, "loss": 1.7105, "step": 19754 }, { "epoch": 0.964599609375, "grad_norm": 0.18092305958271027, "learning_rate": 5.143161048497373e-05, "loss": 1.7226, "step": 19755 }, { "epoch": 0.9646484375, "grad_norm": 0.17193830013275146, "learning_rate": 5.1427668117217604e-05, "loss": 1.6885, "step": 19756 }, { "epoch": 0.964697265625, "grad_norm": 0.1959213763475418, "learning_rate": 5.1423731167916596e-05, "loss": 1.7042, "step": 19757 }, { "epoch": 0.96474609375, "grad_norm": 0.1765255182981491, "learning_rate": 5.1419799637166036e-05, "loss": 1.7185, "step": 19758 }, { "epoch": 0.964794921875, "grad_norm": 0.1984596848487854, "learning_rate": 5.141587352506129e-05, "loss": 1.7166, "step": 19759 }, { "epoch": 0.96484375, "grad_norm": 0.16723839938640594, "learning_rate": 5.141195283169745e-05, "loss": 1.6797, "step": 19760 }, { "epoch": 0.964892578125, "grad_norm": 0.18065592646598816, "learning_rate": 5.1408037557169587e-05, "loss": 1.7006, "step": 19761 }, { "epoch": 0.96494140625, "grad_norm": 0.1917867809534073, "learning_rate": 5.140412770157253e-05, "loss": 1.7391, "step": 19762 }, { "epoch": 0.964990234375, "grad_norm": 0.15801095962524414, "learning_rate": 5.1400223265001094e-05, "loss": 1.7158, "step": 19763 }, { "epoch": 0.9650390625, "grad_norm": 0.17517204582691193, "learning_rate": 5.1396324247549865e-05, "loss": 1.7152, "step": 19764 }, { "epoch": 0.965087890625, "grad_norm": 0.16765475273132324, "learning_rate": 5.139243064931339e-05, "loss": 1.7167, "step": 19765 }, { "epoch": 0.96513671875, "grad_norm": 0.16117948293685913, "learning_rate": 5.138854247038599e-05, "loss": 1.7019, "step": 19766 }, { "epoch": 0.965185546875, "grad_norm": 0.17125271260738373, "learning_rate": 5.1384659710861915e-05, "loss": 1.7203, "step": 19767 }, { "epoch": 0.965234375, "grad_norm": 0.1613466590642929, "learning_rate": 5.1380782370835265e-05, "loss": 1.7253, "step": 19768 }, { "epoch": 0.965283203125, "grad_norm": 0.185184046626091, "learning_rate": 5.137691045039999e-05, "loss": 1.7035, "step": 19769 }, { "epoch": 0.96533203125, "grad_norm": 0.16697601974010468, "learning_rate": 5.137304394964998e-05, "loss": 1.6942, "step": 19770 }, { "epoch": 0.965380859375, "grad_norm": 0.1944078654050827, "learning_rate": 5.136918286867888e-05, "loss": 1.7215, "step": 19771 }, { "epoch": 0.9654296875, "grad_norm": 0.19416630268096924, "learning_rate": 5.1365327207580286e-05, "loss": 1.7253, "step": 19772 }, { "epoch": 0.965478515625, "grad_norm": 0.1944686472415924, "learning_rate": 5.136147696644769e-05, "loss": 1.7108, "step": 19773 }, { "epoch": 0.96552734375, "grad_norm": 0.18963491916656494, "learning_rate": 5.1357632145374346e-05, "loss": 1.7269, "step": 19774 }, { "epoch": 0.965576171875, "grad_norm": 0.17832599580287933, "learning_rate": 5.1353792744453484e-05, "loss": 1.7098, "step": 19775 }, { "epoch": 0.965625, "grad_norm": 0.18989227712154388, "learning_rate": 5.134995876377809e-05, "loss": 1.7043, "step": 19776 }, { "epoch": 0.965673828125, "grad_norm": 0.18920393288135529, "learning_rate": 5.134613020344116e-05, "loss": 1.7406, "step": 19777 }, { "epoch": 0.96572265625, "grad_norm": 0.20139342546463013, "learning_rate": 5.13423070635354e-05, "loss": 1.7314, "step": 19778 }, { "epoch": 0.965771484375, "grad_norm": 0.1764364093542099, "learning_rate": 5.133848934415358e-05, "loss": 1.715, "step": 19779 }, { "epoch": 0.9658203125, "grad_norm": 0.1947682648897171, "learning_rate": 5.13346770453881e-05, "loss": 1.6854, "step": 19780 }, { "epoch": 0.965869140625, "grad_norm": 0.18550746142864227, "learning_rate": 5.1330870167331445e-05, "loss": 1.7255, "step": 19781 }, { "epoch": 0.96591796875, "grad_norm": 0.19091199338436127, "learning_rate": 5.1327068710075835e-05, "loss": 1.6976, "step": 19782 }, { "epoch": 0.965966796875, "grad_norm": 0.19104142487049103, "learning_rate": 5.132327267371341e-05, "loss": 1.734, "step": 19783 }, { "epoch": 0.966015625, "grad_norm": 0.19173376262187958, "learning_rate": 5.1319482058336175e-05, "loss": 1.7306, "step": 19784 }, { "epoch": 0.966064453125, "grad_norm": 0.18843944370746613, "learning_rate": 5.1315696864036005e-05, "loss": 1.7145, "step": 19785 }, { "epoch": 0.96611328125, "grad_norm": 0.18639224767684937, "learning_rate": 5.131191709090462e-05, "loss": 1.7219, "step": 19786 }, { "epoch": 0.966162109375, "grad_norm": 0.17853984236717224, "learning_rate": 5.130814273903362e-05, "loss": 1.7181, "step": 19787 }, { "epoch": 0.9662109375, "grad_norm": 0.18073244392871857, "learning_rate": 5.130437380851449e-05, "loss": 1.7135, "step": 19788 }, { "epoch": 0.966259765625, "grad_norm": 0.1800093948841095, "learning_rate": 5.130061029943859e-05, "loss": 1.7252, "step": 19789 }, { "epoch": 0.96630859375, "grad_norm": 0.15545229613780975, "learning_rate": 5.129685221189713e-05, "loss": 1.7031, "step": 19790 }, { "epoch": 0.966357421875, "grad_norm": 0.18379324674606323, "learning_rate": 5.1293099545981164e-05, "loss": 1.6986, "step": 19791 }, { "epoch": 0.96640625, "grad_norm": 0.1787470430135727, "learning_rate": 5.128935230178165e-05, "loss": 1.7162, "step": 19792 }, { "epoch": 0.966455078125, "grad_norm": 0.1640629768371582, "learning_rate": 5.128561047938942e-05, "loss": 1.7111, "step": 19793 }, { "epoch": 0.96650390625, "grad_norm": 0.20144902169704437, "learning_rate": 5.128187407889514e-05, "loss": 1.7145, "step": 19794 }, { "epoch": 0.966552734375, "grad_norm": 0.1675378382205963, "learning_rate": 5.12781431003894e-05, "loss": 1.7306, "step": 19795 }, { "epoch": 0.9666015625, "grad_norm": 0.20521101355552673, "learning_rate": 5.127441754396256e-05, "loss": 1.7024, "step": 19796 }, { "epoch": 0.966650390625, "grad_norm": 0.16233858466148376, "learning_rate": 5.1270697409704994e-05, "loss": 1.7208, "step": 19797 }, { "epoch": 0.96669921875, "grad_norm": 0.1645967960357666, "learning_rate": 5.1266982697706785e-05, "loss": 1.6969, "step": 19798 }, { "epoch": 0.966748046875, "grad_norm": 0.2036702036857605, "learning_rate": 5.1263273408058e-05, "loss": 1.7352, "step": 19799 }, { "epoch": 0.966796875, "grad_norm": 0.20435214042663574, "learning_rate": 5.1259569540848536e-05, "loss": 1.7389, "step": 19800 }, { "epoch": 0.966845703125, "grad_norm": 0.16609910130500793, "learning_rate": 5.1255871096168156e-05, "loss": 1.7153, "step": 19801 }, { "epoch": 0.96689453125, "grad_norm": 0.2084902822971344, "learning_rate": 5.1252178074106496e-05, "loss": 1.717, "step": 19802 }, { "epoch": 0.966943359375, "grad_norm": 0.16476163268089294, "learning_rate": 5.124849047475307e-05, "loss": 1.7052, "step": 19803 }, { "epoch": 0.9669921875, "grad_norm": 0.1663900464773178, "learning_rate": 5.124480829819721e-05, "loss": 1.7073, "step": 19804 }, { "epoch": 0.967041015625, "grad_norm": 0.20314635336399078, "learning_rate": 5.124113154452821e-05, "loss": 1.7381, "step": 19805 }, { "epoch": 0.96708984375, "grad_norm": 0.1834680438041687, "learning_rate": 5.123746021383513e-05, "loss": 1.7174, "step": 19806 }, { "epoch": 0.967138671875, "grad_norm": 0.1755964308977127, "learning_rate": 5.1233794306207e-05, "loss": 1.731, "step": 19807 }, { "epoch": 0.9671875, "grad_norm": 0.17693835496902466, "learning_rate": 5.123013382173261e-05, "loss": 1.6804, "step": 19808 }, { "epoch": 0.967236328125, "grad_norm": 0.19801142811775208, "learning_rate": 5.122647876050071e-05, "loss": 1.7295, "step": 19809 }, { "epoch": 0.96728515625, "grad_norm": 0.17889177799224854, "learning_rate": 5.122282912259987e-05, "loss": 1.7341, "step": 19810 }, { "epoch": 0.967333984375, "grad_norm": 0.19825738668441772, "learning_rate": 5.121918490811856e-05, "loss": 1.7118, "step": 19811 }, { "epoch": 0.9673828125, "grad_norm": 0.20021018385887146, "learning_rate": 5.1215546117145074e-05, "loss": 1.7164, "step": 19812 }, { "epoch": 0.967431640625, "grad_norm": 0.16905201971530914, "learning_rate": 5.1211912749767635e-05, "loss": 1.7024, "step": 19813 }, { "epoch": 0.96748046875, "grad_norm": 0.19018958508968353, "learning_rate": 5.120828480607425e-05, "loss": 1.7001, "step": 19814 }, { "epoch": 0.967529296875, "grad_norm": 0.19981208443641663, "learning_rate": 5.1204662286152894e-05, "loss": 1.7064, "step": 19815 }, { "epoch": 0.967578125, "grad_norm": 0.18461114168167114, "learning_rate": 5.120104519009135e-05, "loss": 1.716, "step": 19816 }, { "epoch": 0.967626953125, "grad_norm": 0.21608547866344452, "learning_rate": 5.1197433517977245e-05, "loss": 1.7287, "step": 19817 }, { "epoch": 0.96767578125, "grad_norm": 0.15723970532417297, "learning_rate": 5.1193827269898156e-05, "loss": 1.7027, "step": 19818 }, { "epoch": 0.967724609375, "grad_norm": 0.2402680218219757, "learning_rate": 5.1190226445941475e-05, "loss": 1.7453, "step": 19819 }, { "epoch": 0.9677734375, "grad_norm": 0.20007206499576569, "learning_rate": 5.118663104619445e-05, "loss": 1.7303, "step": 19820 }, { "epoch": 0.967822265625, "grad_norm": 0.1783142238855362, "learning_rate": 5.1183041070744246e-05, "loss": 1.7099, "step": 19821 }, { "epoch": 0.96787109375, "grad_norm": 0.23048989474773407, "learning_rate": 5.117945651967784e-05, "loss": 1.7068, "step": 19822 }, { "epoch": 0.967919921875, "grad_norm": 0.1797751486301422, "learning_rate": 5.117587739308215e-05, "loss": 1.7188, "step": 19823 }, { "epoch": 0.96796875, "grad_norm": 0.17869238555431366, "learning_rate": 5.1172303691043854e-05, "loss": 1.7274, "step": 19824 }, { "epoch": 0.968017578125, "grad_norm": 0.19914443790912628, "learning_rate": 5.1168735413649616e-05, "loss": 1.7233, "step": 19825 }, { "epoch": 0.96806640625, "grad_norm": 0.20099639892578125, "learning_rate": 5.1165172560985895e-05, "loss": 1.7142, "step": 19826 }, { "epoch": 0.968115234375, "grad_norm": 0.19569960236549377, "learning_rate": 5.116161513313907e-05, "loss": 1.721, "step": 19827 }, { "epoch": 0.9681640625, "grad_norm": 0.16421294212341309, "learning_rate": 5.115806313019529e-05, "loss": 1.7078, "step": 19828 }, { "epoch": 0.968212890625, "grad_norm": 0.18895134329795837, "learning_rate": 5.1154516552240704e-05, "loss": 1.737, "step": 19829 }, { "epoch": 0.96826171875, "grad_norm": 0.18674542009830475, "learning_rate": 5.1150975399361225e-05, "loss": 1.7341, "step": 19830 }, { "epoch": 0.968310546875, "grad_norm": 0.17043647170066833, "learning_rate": 5.114743967164272e-05, "loss": 1.6989, "step": 19831 }, { "epoch": 0.968359375, "grad_norm": 0.1607346087694168, "learning_rate": 5.114390936917086e-05, "loss": 1.6922, "step": 19832 }, { "epoch": 0.968408203125, "grad_norm": 0.1980375200510025, "learning_rate": 5.114038449203119e-05, "loss": 1.6954, "step": 19833 }, { "epoch": 0.96845703125, "grad_norm": 0.1805868297815323, "learning_rate": 5.113686504030914e-05, "loss": 1.7337, "step": 19834 }, { "epoch": 0.968505859375, "grad_norm": 0.15989983081817627, "learning_rate": 5.113335101409003e-05, "loss": 1.7502, "step": 19835 }, { "epoch": 0.9685546875, "grad_norm": 0.1710604727268219, "learning_rate": 5.1129842413459e-05, "loss": 1.7138, "step": 19836 }, { "epoch": 0.968603515625, "grad_norm": 0.17283031344413757, "learning_rate": 5.11263392385011e-05, "loss": 1.7269, "step": 19837 }, { "epoch": 0.96865234375, "grad_norm": 0.16700425744056702, "learning_rate": 5.112284148930123e-05, "loss": 1.7048, "step": 19838 }, { "epoch": 0.968701171875, "grad_norm": 0.15792681276798248, "learning_rate": 5.111934916594415e-05, "loss": 1.6886, "step": 19839 }, { "epoch": 0.96875, "grad_norm": 0.1647024005651474, "learning_rate": 5.11158622685145e-05, "loss": 1.7212, "step": 19840 }, { "epoch": 0.968798828125, "grad_norm": 0.18476688861846924, "learning_rate": 5.1112380797096825e-05, "loss": 1.725, "step": 19841 }, { "epoch": 0.96884765625, "grad_norm": 0.16857396066188812, "learning_rate": 5.110890475177542e-05, "loss": 1.7048, "step": 19842 }, { "epoch": 0.968896484375, "grad_norm": 0.1642446517944336, "learning_rate": 5.110543413263464e-05, "loss": 1.7045, "step": 19843 }, { "epoch": 0.9689453125, "grad_norm": 0.17369188368320465, "learning_rate": 5.1101968939758476e-05, "loss": 1.727, "step": 19844 }, { "epoch": 0.968994140625, "grad_norm": 0.17390702664852142, "learning_rate": 5.109850917323103e-05, "loss": 1.7354, "step": 19845 }, { "epoch": 0.96904296875, "grad_norm": 0.18176807463169098, "learning_rate": 5.109505483313606e-05, "loss": 1.7144, "step": 19846 }, { "epoch": 0.969091796875, "grad_norm": 0.18403051793575287, "learning_rate": 5.1091605919557336e-05, "loss": 1.7314, "step": 19847 }, { "epoch": 0.969140625, "grad_norm": 0.16347120702266693, "learning_rate": 5.1088162432578424e-05, "loss": 1.7052, "step": 19848 }, { "epoch": 0.969189453125, "grad_norm": 0.16591675579547882, "learning_rate": 5.108472437228279e-05, "loss": 1.6951, "step": 19849 }, { "epoch": 0.96923828125, "grad_norm": 0.19328290224075317, "learning_rate": 5.108129173875375e-05, "loss": 1.7316, "step": 19850 }, { "epoch": 0.969287109375, "grad_norm": 0.1694650501012802, "learning_rate": 5.1077864532074496e-05, "loss": 1.7285, "step": 19851 }, { "epoch": 0.9693359375, "grad_norm": 0.20689399540424347, "learning_rate": 5.107444275232807e-05, "loss": 1.7396, "step": 19852 }, { "epoch": 0.969384765625, "grad_norm": 0.15776759386062622, "learning_rate": 5.107102639959746e-05, "loss": 1.745, "step": 19853 }, { "epoch": 0.96943359375, "grad_norm": 0.21624569594860077, "learning_rate": 5.106761547396542e-05, "loss": 1.7156, "step": 19854 }, { "epoch": 0.969482421875, "grad_norm": 0.19313137233257294, "learning_rate": 5.106420997551461e-05, "loss": 1.709, "step": 19855 }, { "epoch": 0.96953125, "grad_norm": 0.17359168827533722, "learning_rate": 5.1060809904327586e-05, "loss": 1.7123, "step": 19856 }, { "epoch": 0.969580078125, "grad_norm": 0.21370546519756317, "learning_rate": 5.105741526048675e-05, "loss": 1.7256, "step": 19857 }, { "epoch": 0.96962890625, "grad_norm": 0.1795377880334854, "learning_rate": 5.105402604407436e-05, "loss": 1.7138, "step": 19858 }, { "epoch": 0.969677734375, "grad_norm": 0.18324141204357147, "learning_rate": 5.105064225517259e-05, "loss": 1.7122, "step": 19859 }, { "epoch": 0.9697265625, "grad_norm": 0.19246609508991241, "learning_rate": 5.104726389386338e-05, "loss": 1.7062, "step": 19860 }, { "epoch": 0.969775390625, "grad_norm": 0.190134197473526, "learning_rate": 5.104389096022868e-05, "loss": 1.7227, "step": 19861 }, { "epoch": 0.96982421875, "grad_norm": 0.17339029908180237, "learning_rate": 5.104052345435019e-05, "loss": 1.7086, "step": 19862 }, { "epoch": 0.969873046875, "grad_norm": 0.1774396300315857, "learning_rate": 5.1037161376309565e-05, "loss": 1.714, "step": 19863 }, { "epoch": 0.969921875, "grad_norm": 0.1918959766626358, "learning_rate": 5.103380472618824e-05, "loss": 1.718, "step": 19864 }, { "epoch": 0.969970703125, "grad_norm": 0.19459331035614014, "learning_rate": 5.10304535040676e-05, "loss": 1.719, "step": 19865 }, { "epoch": 0.97001953125, "grad_norm": 0.16450341045856476, "learning_rate": 5.1027107710028855e-05, "loss": 1.7338, "step": 19866 }, { "epoch": 0.970068359375, "grad_norm": 0.19988301396369934, "learning_rate": 5.1023767344153106e-05, "loss": 1.7258, "step": 19867 }, { "epoch": 0.9701171875, "grad_norm": 0.1961880475282669, "learning_rate": 5.1020432406521284e-05, "loss": 1.7018, "step": 19868 }, { "epoch": 0.970166015625, "grad_norm": 0.18177054822444916, "learning_rate": 5.1017102897214224e-05, "loss": 1.7242, "step": 19869 }, { "epoch": 0.97021484375, "grad_norm": 0.19323456287384033, "learning_rate": 5.101377881631264e-05, "loss": 1.716, "step": 19870 }, { "epoch": 0.970263671875, "grad_norm": 0.17457804083824158, "learning_rate": 5.101046016389704e-05, "loss": 1.7105, "step": 19871 }, { "epoch": 0.9703125, "grad_norm": 0.19958102703094482, "learning_rate": 5.100714694004791e-05, "loss": 1.7314, "step": 19872 }, { "epoch": 0.970361328125, "grad_norm": 0.17981471121311188, "learning_rate": 5.100383914484554e-05, "loss": 1.7275, "step": 19873 }, { "epoch": 0.97041015625, "grad_norm": 0.204581618309021, "learning_rate": 5.1000536778370064e-05, "loss": 1.7632, "step": 19874 }, { "epoch": 0.970458984375, "grad_norm": 0.177188441157341, "learning_rate": 5.0997239840701566e-05, "loss": 1.7048, "step": 19875 }, { "epoch": 0.9705078125, "grad_norm": 0.1804732233285904, "learning_rate": 5.099394833191988e-05, "loss": 1.7037, "step": 19876 }, { "epoch": 0.970556640625, "grad_norm": 0.17404970526695251, "learning_rate": 5.099066225210485e-05, "loss": 1.7243, "step": 19877 }, { "epoch": 0.97060546875, "grad_norm": 0.1773812174797058, "learning_rate": 5.098738160133608e-05, "loss": 1.7301, "step": 19878 }, { "epoch": 0.970654296875, "grad_norm": 0.18268811702728271, "learning_rate": 5.0984106379693085e-05, "loss": 1.6938, "step": 19879 }, { "epoch": 0.970703125, "grad_norm": 0.18307650089263916, "learning_rate": 5.0980836587255244e-05, "loss": 1.7457, "step": 19880 }, { "epoch": 0.970751953125, "grad_norm": 0.19874757528305054, "learning_rate": 5.097757222410181e-05, "loss": 1.7178, "step": 19881 }, { "epoch": 0.97080078125, "grad_norm": 0.1585792452096939, "learning_rate": 5.097431329031188e-05, "loss": 1.7209, "step": 19882 }, { "epoch": 0.970849609375, "grad_norm": 0.17245188355445862, "learning_rate": 5.097105978596447e-05, "loss": 1.717, "step": 19883 }, { "epoch": 0.9708984375, "grad_norm": 0.1722298115491867, "learning_rate": 5.0967811711138403e-05, "loss": 1.7129, "step": 19884 }, { "epoch": 0.970947265625, "grad_norm": 0.17403686046600342, "learning_rate": 5.096456906591239e-05, "loss": 1.7243, "step": 19885 }, { "epoch": 0.97099609375, "grad_norm": 0.18511465191841125, "learning_rate": 5.096133185036504e-05, "loss": 1.7309, "step": 19886 }, { "epoch": 0.971044921875, "grad_norm": 0.15511846542358398, "learning_rate": 5.0958100064574786e-05, "loss": 1.7241, "step": 19887 }, { "epoch": 0.97109375, "grad_norm": 0.1699327677488327, "learning_rate": 5.0954873708619986e-05, "loss": 1.7218, "step": 19888 }, { "epoch": 0.971142578125, "grad_norm": 0.17863714694976807, "learning_rate": 5.0951652782578826e-05, "loss": 1.6934, "step": 19889 }, { "epoch": 0.97119140625, "grad_norm": 0.15414991974830627, "learning_rate": 5.0948437286529355e-05, "loss": 1.7129, "step": 19890 }, { "epoch": 0.971240234375, "grad_norm": 0.18007254600524902, "learning_rate": 5.09452272205495e-05, "loss": 1.73, "step": 19891 }, { "epoch": 0.9712890625, "grad_norm": 0.21632501482963562, "learning_rate": 5.094202258471708e-05, "loss": 1.7118, "step": 19892 }, { "epoch": 0.971337890625, "grad_norm": 0.16035468876361847, "learning_rate": 5.0938823379109745e-05, "loss": 1.7195, "step": 19893 }, { "epoch": 0.97138671875, "grad_norm": 0.16307508945465088, "learning_rate": 5.0935629603805024e-05, "loss": 1.7258, "step": 19894 }, { "epoch": 0.971435546875, "grad_norm": 0.1943732500076294, "learning_rate": 5.093244125888035e-05, "loss": 1.6912, "step": 19895 }, { "epoch": 0.971484375, "grad_norm": 0.16609559953212738, "learning_rate": 5.092925834441295e-05, "loss": 1.6965, "step": 19896 }, { "epoch": 0.971533203125, "grad_norm": 0.1703895479440689, "learning_rate": 5.0926080860480014e-05, "loss": 1.7217, "step": 19897 }, { "epoch": 0.97158203125, "grad_norm": 0.18415188789367676, "learning_rate": 5.09229088071585e-05, "loss": 1.7289, "step": 19898 }, { "epoch": 0.971630859375, "grad_norm": 0.16437137126922607, "learning_rate": 5.091974218452534e-05, "loss": 1.7148, "step": 19899 }, { "epoch": 0.9716796875, "grad_norm": 0.15464593470096588, "learning_rate": 5.091658099265724e-05, "loss": 1.7087, "step": 19900 }, { "epoch": 0.971728515625, "grad_norm": 0.19425371289253235, "learning_rate": 5.0913425231630844e-05, "loss": 1.7014, "step": 19901 }, { "epoch": 0.97177734375, "grad_norm": 0.15979157388210297, "learning_rate": 5.091027490152259e-05, "loss": 1.7088, "step": 19902 }, { "epoch": 0.971826171875, "grad_norm": 0.16883233189582825, "learning_rate": 5.090713000240886e-05, "loss": 1.7138, "step": 19903 }, { "epoch": 0.971875, "grad_norm": 0.16967402398586273, "learning_rate": 5.0903990534365885e-05, "loss": 1.7269, "step": 19904 }, { "epoch": 0.971923828125, "grad_norm": 0.17703509330749512, "learning_rate": 5.0900856497469726e-05, "loss": 1.6994, "step": 19905 }, { "epoch": 0.97197265625, "grad_norm": 0.167903870344162, "learning_rate": 5.089772789179635e-05, "loss": 1.7075, "step": 19906 }, { "epoch": 0.972021484375, "grad_norm": 0.17462660372257233, "learning_rate": 5.0894604717421575e-05, "loss": 1.7169, "step": 19907 }, { "epoch": 0.9720703125, "grad_norm": 0.18794167041778564, "learning_rate": 5.0891486974421106e-05, "loss": 1.7146, "step": 19908 }, { "epoch": 0.972119140625, "grad_norm": 0.16342422366142273, "learning_rate": 5.08883746628705e-05, "loss": 1.7189, "step": 19909 }, { "epoch": 0.97216796875, "grad_norm": 0.1741134375333786, "learning_rate": 5.088526778284518e-05, "loss": 1.728, "step": 19910 }, { "epoch": 0.972216796875, "grad_norm": 0.1933853030204773, "learning_rate": 5.088216633442047e-05, "loss": 1.7058, "step": 19911 }, { "epoch": 0.972265625, "grad_norm": 0.16538095474243164, "learning_rate": 5.087907031767148e-05, "loss": 1.7145, "step": 19912 }, { "epoch": 0.972314453125, "grad_norm": 0.1700088232755661, "learning_rate": 5.087597973267328e-05, "loss": 1.716, "step": 19913 }, { "epoch": 0.97236328125, "grad_norm": 0.17329633235931396, "learning_rate": 5.087289457950077e-05, "loss": 1.7181, "step": 19914 }, { "epoch": 0.972412109375, "grad_norm": 0.18234574794769287, "learning_rate": 5.086981485822875e-05, "loss": 1.7041, "step": 19915 }, { "epoch": 0.9724609375, "grad_norm": 0.17522281408309937, "learning_rate": 5.086674056893181e-05, "loss": 1.7043, "step": 19916 }, { "epoch": 0.972509765625, "grad_norm": 0.1710648089647293, "learning_rate": 5.086367171168449e-05, "loss": 1.6974, "step": 19917 }, { "epoch": 0.97255859375, "grad_norm": 0.1719582974910736, "learning_rate": 5.086060828656114e-05, "loss": 1.7298, "step": 19918 }, { "epoch": 0.972607421875, "grad_norm": 0.193008154630661, "learning_rate": 5.0857550293636035e-05, "loss": 1.7135, "step": 19919 }, { "epoch": 0.97265625, "grad_norm": 0.1651025116443634, "learning_rate": 5.085449773298326e-05, "loss": 1.7222, "step": 19920 }, { "epoch": 0.972705078125, "grad_norm": 0.156376913189888, "learning_rate": 5.08514506046768e-05, "loss": 1.7171, "step": 19921 }, { "epoch": 0.97275390625, "grad_norm": 0.173716738820076, "learning_rate": 5.084840890879054e-05, "loss": 1.7154, "step": 19922 }, { "epoch": 0.972802734375, "grad_norm": 0.155419260263443, "learning_rate": 5.084537264539816e-05, "loss": 1.7101, "step": 19923 }, { "epoch": 0.9728515625, "grad_norm": 0.1713407039642334, "learning_rate": 5.084234181457324e-05, "loss": 1.6929, "step": 19924 }, { "epoch": 0.972900390625, "grad_norm": 0.156722292304039, "learning_rate": 5.0839316416389264e-05, "loss": 1.7112, "step": 19925 }, { "epoch": 0.97294921875, "grad_norm": 0.17183320224285126, "learning_rate": 5.083629645091957e-05, "loss": 1.7254, "step": 19926 }, { "epoch": 0.972998046875, "grad_norm": 0.16810691356658936, "learning_rate": 5.0833281918237296e-05, "loss": 1.7397, "step": 19927 }, { "epoch": 0.973046875, "grad_norm": 0.1896941363811493, "learning_rate": 5.083027281841553e-05, "loss": 1.7127, "step": 19928 }, { "epoch": 0.973095703125, "grad_norm": 0.1664097011089325, "learning_rate": 5.082726915152719e-05, "loss": 1.7165, "step": 19929 }, { "epoch": 0.97314453125, "grad_norm": 0.1922796368598938, "learning_rate": 5.0824270917645095e-05, "loss": 1.7009, "step": 19930 }, { "epoch": 0.973193359375, "grad_norm": 0.20279958844184875, "learning_rate": 5.0821278116841914e-05, "loss": 1.7218, "step": 19931 }, { "epoch": 0.9732421875, "grad_norm": 0.17243528366088867, "learning_rate": 5.081829074919012e-05, "loss": 1.6759, "step": 19932 }, { "epoch": 0.973291015625, "grad_norm": 0.18588195741176605, "learning_rate": 5.081530881476219e-05, "loss": 1.6976, "step": 19933 }, { "epoch": 0.97333984375, "grad_norm": 0.19742797315120697, "learning_rate": 5.081233231363034e-05, "loss": 1.6716, "step": 19934 }, { "epoch": 0.973388671875, "grad_norm": 0.19067472219467163, "learning_rate": 5.080936124586673e-05, "loss": 1.7419, "step": 19935 }, { "epoch": 0.9734375, "grad_norm": 0.19511014223098755, "learning_rate": 5.080639561154336e-05, "loss": 1.7259, "step": 19936 }, { "epoch": 0.973486328125, "grad_norm": 0.2063290774822235, "learning_rate": 5.080343541073211e-05, "loss": 1.7303, "step": 19937 }, { "epoch": 0.97353515625, "grad_norm": 0.21757429838180542, "learning_rate": 5.080048064350475e-05, "loss": 1.6982, "step": 19938 }, { "epoch": 0.973583984375, "grad_norm": 0.17403526604175568, "learning_rate": 5.079753130993283e-05, "loss": 1.7021, "step": 19939 }, { "epoch": 0.9736328125, "grad_norm": 0.20505072176456451, "learning_rate": 5.079458741008786e-05, "loss": 1.714, "step": 19940 }, { "epoch": 0.973681640625, "grad_norm": 0.178058460354805, "learning_rate": 5.079164894404119e-05, "loss": 1.7039, "step": 19941 }, { "epoch": 0.97373046875, "grad_norm": 0.1770646572113037, "learning_rate": 5.0788715911864034e-05, "loss": 1.7163, "step": 19942 }, { "epoch": 0.973779296875, "grad_norm": 0.20241382718086243, "learning_rate": 5.078578831362748e-05, "loss": 1.7036, "step": 19943 }, { "epoch": 0.973828125, "grad_norm": 0.18635699152946472, "learning_rate": 5.078286614940248e-05, "loss": 1.719, "step": 19944 }, { "epoch": 0.973876953125, "grad_norm": 0.22193950414657593, "learning_rate": 5.077994941925987e-05, "loss": 1.6982, "step": 19945 }, { "epoch": 0.97392578125, "grad_norm": 0.16412238776683807, "learning_rate": 5.0777038123270286e-05, "loss": 1.7341, "step": 19946 }, { "epoch": 0.973974609375, "grad_norm": 0.2184685468673706, "learning_rate": 5.077413226150435e-05, "loss": 1.6988, "step": 19947 }, { "epoch": 0.9740234375, "grad_norm": 0.1984361857175827, "learning_rate": 5.077123183403245e-05, "loss": 1.7221, "step": 19948 }, { "epoch": 0.974072265625, "grad_norm": 0.20553554594516754, "learning_rate": 5.076833684092489e-05, "loss": 1.7212, "step": 19949 }, { "epoch": 0.97412109375, "grad_norm": 0.1833529770374298, "learning_rate": 5.076544728225185e-05, "loss": 1.745, "step": 19950 }, { "epoch": 0.974169921875, "grad_norm": 0.20574216544628143, "learning_rate": 5.076256315808332e-05, "loss": 1.6824, "step": 19951 }, { "epoch": 0.97421875, "grad_norm": 0.2293785810470581, "learning_rate": 5.075968446848924e-05, "loss": 1.7318, "step": 19952 }, { "epoch": 0.974267578125, "grad_norm": 0.15726548433303833, "learning_rate": 5.0756811213539356e-05, "loss": 1.7202, "step": 19953 }, { "epoch": 0.97431640625, "grad_norm": 0.19109590351581573, "learning_rate": 5.075394339330329e-05, "loss": 1.7169, "step": 19954 }, { "epoch": 0.974365234375, "grad_norm": 0.20183244347572327, "learning_rate": 5.0751081007850576e-05, "loss": 1.7108, "step": 19955 }, { "epoch": 0.9744140625, "grad_norm": 0.1601494550704956, "learning_rate": 5.074822405725056e-05, "loss": 1.6901, "step": 19956 }, { "epoch": 0.974462890625, "grad_norm": 0.20523343980312347, "learning_rate": 5.0745372541572525e-05, "loss": 1.7293, "step": 19957 }, { "epoch": 0.97451171875, "grad_norm": 0.17764994502067566, "learning_rate": 5.074252646088553e-05, "loss": 1.6929, "step": 19958 }, { "epoch": 0.974560546875, "grad_norm": 0.17784449458122253, "learning_rate": 5.0739685815258584e-05, "loss": 1.7083, "step": 19959 }, { "epoch": 0.974609375, "grad_norm": 0.2173750251531601, "learning_rate": 5.0736850604760506e-05, "loss": 1.6998, "step": 19960 }, { "epoch": 0.974658203125, "grad_norm": 0.1826135367155075, "learning_rate": 5.073402082946002e-05, "loss": 1.7158, "step": 19961 }, { "epoch": 0.97470703125, "grad_norm": 0.19414161145687103, "learning_rate": 5.073119648942573e-05, "loss": 1.7186, "step": 19962 }, { "epoch": 0.974755859375, "grad_norm": 0.17804282903671265, "learning_rate": 5.0728377584726065e-05, "loss": 1.7075, "step": 19963 }, { "epoch": 0.9748046875, "grad_norm": 0.1750907003879547, "learning_rate": 5.072556411542935e-05, "loss": 1.7315, "step": 19964 }, { "epoch": 0.974853515625, "grad_norm": 0.19183926284313202, "learning_rate": 5.072275608160376e-05, "loss": 1.7269, "step": 19965 }, { "epoch": 0.97490234375, "grad_norm": 0.17720867693424225, "learning_rate": 5.071995348331737e-05, "loss": 1.7211, "step": 19966 }, { "epoch": 0.974951171875, "grad_norm": 0.18700268864631653, "learning_rate": 5.0717156320638084e-05, "loss": 1.7175, "step": 19967 }, { "epoch": 0.975, "grad_norm": 0.16852912306785583, "learning_rate": 5.0714364593633715e-05, "loss": 1.7365, "step": 19968 }, { "epoch": 0.975048828125, "grad_norm": 0.18481723964214325, "learning_rate": 5.071157830237188e-05, "loss": 1.7413, "step": 19969 }, { "epoch": 0.97509765625, "grad_norm": 0.178096204996109, "learning_rate": 5.070879744692017e-05, "loss": 1.6986, "step": 19970 }, { "epoch": 0.975146484375, "grad_norm": 0.1682254672050476, "learning_rate": 5.070602202734593e-05, "loss": 1.7112, "step": 19971 }, { "epoch": 0.9751953125, "grad_norm": 0.15492767095565796, "learning_rate": 5.0703252043716456e-05, "loss": 1.7036, "step": 19972 }, { "epoch": 0.975244140625, "grad_norm": 0.1906287670135498, "learning_rate": 5.070048749609886e-05, "loss": 1.7127, "step": 19973 }, { "epoch": 0.97529296875, "grad_norm": 0.16885364055633545, "learning_rate": 5.069772838456014e-05, "loss": 1.7188, "step": 19974 }, { "epoch": 0.975341796875, "grad_norm": 0.15967653691768646, "learning_rate": 5.069497470916718e-05, "loss": 1.7193, "step": 19975 }, { "epoch": 0.975390625, "grad_norm": 0.19331510365009308, "learning_rate": 5.06922264699867e-05, "loss": 1.7438, "step": 19976 }, { "epoch": 0.975439453125, "grad_norm": 0.18593105673789978, "learning_rate": 5.068948366708533e-05, "loss": 1.7438, "step": 19977 }, { "epoch": 0.97548828125, "grad_norm": 0.1846170872449875, "learning_rate": 5.068674630052955e-05, "loss": 1.6978, "step": 19978 }, { "epoch": 0.975537109375, "grad_norm": 0.17764700949192047, "learning_rate": 5.0684014370385664e-05, "loss": 1.7148, "step": 19979 }, { "epoch": 0.9755859375, "grad_norm": 0.17306284606456757, "learning_rate": 5.0681287876719904e-05, "loss": 1.7088, "step": 19980 }, { "epoch": 0.975634765625, "grad_norm": 0.2009502500295639, "learning_rate": 5.0678566819598356e-05, "loss": 1.7543, "step": 19981 }, { "epoch": 0.97568359375, "grad_norm": 0.1824766993522644, "learning_rate": 5.067585119908694e-05, "loss": 1.7328, "step": 19982 }, { "epoch": 0.975732421875, "grad_norm": 0.17648620903491974, "learning_rate": 5.067314101525151e-05, "loss": 1.7398, "step": 19983 }, { "epoch": 0.97578125, "grad_norm": 0.17374952137470245, "learning_rate": 5.067043626815774e-05, "loss": 1.7108, "step": 19984 }, { "epoch": 0.975830078125, "grad_norm": 0.19711169600486755, "learning_rate": 5.0667736957871153e-05, "loss": 1.7334, "step": 19985 }, { "epoch": 0.97587890625, "grad_norm": 0.17449545860290527, "learning_rate": 5.066504308445719e-05, "loss": 1.7355, "step": 19986 }, { "epoch": 0.975927734375, "grad_norm": 0.18398405611515045, "learning_rate": 5.0662354647981125e-05, "loss": 1.7076, "step": 19987 }, { "epoch": 0.9759765625, "grad_norm": 0.17434953153133392, "learning_rate": 5.065967164850816e-05, "loss": 1.7284, "step": 19988 }, { "epoch": 0.976025390625, "grad_norm": 0.18265345692634583, "learning_rate": 5.065699408610326e-05, "loss": 1.7249, "step": 19989 }, { "epoch": 0.97607421875, "grad_norm": 0.1936682015657425, "learning_rate": 5.0654321960831334e-05, "loss": 1.7246, "step": 19990 }, { "epoch": 0.976123046875, "grad_norm": 0.17061947286128998, "learning_rate": 5.065165527275718e-05, "loss": 1.7045, "step": 19991 }, { "epoch": 0.976171875, "grad_norm": 0.18242378532886505, "learning_rate": 5.064899402194538e-05, "loss": 1.7432, "step": 19992 }, { "epoch": 0.976220703125, "grad_norm": 0.18468475341796875, "learning_rate": 5.064633820846048e-05, "loss": 1.718, "step": 19993 }, { "epoch": 0.97626953125, "grad_norm": 0.1683613657951355, "learning_rate": 5.064368783236678e-05, "loss": 1.717, "step": 19994 }, { "epoch": 0.976318359375, "grad_norm": 0.1600581705570221, "learning_rate": 5.064104289372857e-05, "loss": 1.7289, "step": 19995 }, { "epoch": 0.9763671875, "grad_norm": 0.1765521913766861, "learning_rate": 5.063840339260993e-05, "loss": 1.7209, "step": 19996 }, { "epoch": 0.976416015625, "grad_norm": 0.18513597548007965, "learning_rate": 5.063576932907484e-05, "loss": 1.7067, "step": 19997 }, { "epoch": 0.97646484375, "grad_norm": 0.15752184391021729, "learning_rate": 5.0633140703187126e-05, "loss": 1.7148, "step": 19998 }, { "epoch": 0.976513671875, "grad_norm": 0.193820983171463, "learning_rate": 5.063051751501051e-05, "loss": 1.7286, "step": 19999 }, { "epoch": 0.9765625, "grad_norm": 0.17869745194911957, "learning_rate": 5.0627899764608565e-05, "loss": 1.7255, "step": 20000 }, { "epoch": 0.976611328125, "grad_norm": 0.1680459976196289, "learning_rate": 5.0625287452044695e-05, "loss": 1.7234, "step": 20001 }, { "epoch": 0.97666015625, "grad_norm": 0.21477560698986053, "learning_rate": 5.062268057738228e-05, "loss": 1.6983, "step": 20002 }, { "epoch": 0.976708984375, "grad_norm": 0.16190354526042938, "learning_rate": 5.062007914068445e-05, "loss": 1.717, "step": 20003 }, { "epoch": 0.9767578125, "grad_norm": 0.18940143287181854, "learning_rate": 5.061748314201427e-05, "loss": 1.7067, "step": 20004 }, { "epoch": 0.976806640625, "grad_norm": 0.17365799844264984, "learning_rate": 5.061489258143465e-05, "loss": 1.7072, "step": 20005 }, { "epoch": 0.97685546875, "grad_norm": 0.1873703896999359, "learning_rate": 5.061230745900837e-05, "loss": 1.702, "step": 20006 }, { "epoch": 0.976904296875, "grad_norm": 0.18756930530071259, "learning_rate": 5.06097277747981e-05, "loss": 1.7293, "step": 20007 }, { "epoch": 0.976953125, "grad_norm": 0.19002608954906464, "learning_rate": 5.060715352886633e-05, "loss": 1.7384, "step": 20008 }, { "epoch": 0.977001953125, "grad_norm": 0.18149125576019287, "learning_rate": 5.0604584721275486e-05, "loss": 1.7116, "step": 20009 }, { "epoch": 0.97705078125, "grad_norm": 0.17464298009872437, "learning_rate": 5.0602021352087823e-05, "loss": 1.7212, "step": 20010 }, { "epoch": 0.977099609375, "grad_norm": 0.204761803150177, "learning_rate": 5.059946342136544e-05, "loss": 1.7195, "step": 20011 }, { "epoch": 0.9771484375, "grad_norm": 0.1788269430398941, "learning_rate": 5.059691092917033e-05, "loss": 1.7046, "step": 20012 }, { "epoch": 0.977197265625, "grad_norm": 0.18995442986488342, "learning_rate": 5.0594363875564375e-05, "loss": 1.7262, "step": 20013 }, { "epoch": 0.97724609375, "grad_norm": 0.17659921944141388, "learning_rate": 5.059182226060931e-05, "loss": 1.7115, "step": 20014 }, { "epoch": 0.977294921875, "grad_norm": 0.17474137246608734, "learning_rate": 5.058928608436671e-05, "loss": 1.7225, "step": 20015 }, { "epoch": 0.97734375, "grad_norm": 0.16363996267318726, "learning_rate": 5.058675534689805e-05, "loss": 1.7375, "step": 20016 }, { "epoch": 0.977392578125, "grad_norm": 0.17589512467384338, "learning_rate": 5.0584230048264674e-05, "loss": 1.7354, "step": 20017 }, { "epoch": 0.97744140625, "grad_norm": 0.17594538629055023, "learning_rate": 5.058171018852777e-05, "loss": 1.7255, "step": 20018 }, { "epoch": 0.977490234375, "grad_norm": 0.17169636487960815, "learning_rate": 5.057919576774841e-05, "loss": 1.7107, "step": 20019 }, { "epoch": 0.9775390625, "grad_norm": 0.19657352566719055, "learning_rate": 5.057668678598756e-05, "loss": 1.7215, "step": 20020 }, { "epoch": 0.977587890625, "grad_norm": 0.15661244094371796, "learning_rate": 5.057418324330597e-05, "loss": 1.7072, "step": 20021 }, { "epoch": 0.97763671875, "grad_norm": 0.20977303385734558, "learning_rate": 5.0571685139764395e-05, "loss": 1.7202, "step": 20022 }, { "epoch": 0.977685546875, "grad_norm": 0.17112024128437042, "learning_rate": 5.0569192475423285e-05, "loss": 1.7421, "step": 20023 }, { "epoch": 0.977734375, "grad_norm": 0.17368492484092712, "learning_rate": 5.056670525034314e-05, "loss": 1.7169, "step": 20024 }, { "epoch": 0.977783203125, "grad_norm": 0.17554998397827148, "learning_rate": 5.056422346458419e-05, "loss": 1.7199, "step": 20025 }, { "epoch": 0.97783203125, "grad_norm": 0.16010798513889313, "learning_rate": 5.056174711820658e-05, "loss": 1.7145, "step": 20026 }, { "epoch": 0.977880859375, "grad_norm": 0.1673174500465393, "learning_rate": 5.055927621127035e-05, "loss": 1.7319, "step": 20027 }, { "epoch": 0.9779296875, "grad_norm": 0.1832393854856491, "learning_rate": 5.055681074383539e-05, "loss": 1.7056, "step": 20028 }, { "epoch": 0.977978515625, "grad_norm": 0.1738172024488449, "learning_rate": 5.055435071596144e-05, "loss": 1.7189, "step": 20029 }, { "epoch": 0.97802734375, "grad_norm": 0.1748424619436264, "learning_rate": 5.055189612770808e-05, "loss": 1.7058, "step": 20030 }, { "epoch": 0.978076171875, "grad_norm": 0.19484823942184448, "learning_rate": 5.054944697913486e-05, "loss": 1.7518, "step": 20031 }, { "epoch": 0.978125, "grad_norm": 0.17560264468193054, "learning_rate": 5.0547003270301126e-05, "loss": 1.7227, "step": 20032 }, { "epoch": 0.978173828125, "grad_norm": 0.18409495055675507, "learning_rate": 5.054456500126607e-05, "loss": 1.7089, "step": 20033 }, { "epoch": 0.97822265625, "grad_norm": 0.19665829837322235, "learning_rate": 5.054213217208884e-05, "loss": 1.7254, "step": 20034 }, { "epoch": 0.978271484375, "grad_norm": 0.17398913204669952, "learning_rate": 5.053970478282831e-05, "loss": 1.7318, "step": 20035 }, { "epoch": 0.9783203125, "grad_norm": 0.1795104593038559, "learning_rate": 5.053728283354341e-05, "loss": 1.6893, "step": 20036 }, { "epoch": 0.978369140625, "grad_norm": 0.18742065131664276, "learning_rate": 5.053486632429279e-05, "loss": 1.7014, "step": 20037 }, { "epoch": 0.97841796875, "grad_norm": 0.167989119887352, "learning_rate": 5.053245525513498e-05, "loss": 1.7169, "step": 20038 }, { "epoch": 0.978466796875, "grad_norm": 0.1621403992176056, "learning_rate": 5.053004962612848e-05, "loss": 1.7283, "step": 20039 }, { "epoch": 0.978515625, "grad_norm": 0.18671053647994995, "learning_rate": 5.052764943733157e-05, "loss": 1.7192, "step": 20040 }, { "epoch": 0.978564453125, "grad_norm": 0.15706251561641693, "learning_rate": 5.05252546888024e-05, "loss": 1.7352, "step": 20041 }, { "epoch": 0.97861328125, "grad_norm": 0.1816587895154953, "learning_rate": 5.052286538059903e-05, "loss": 1.692, "step": 20042 }, { "epoch": 0.978662109375, "grad_norm": 0.16404005885124207, "learning_rate": 5.052048151277936e-05, "loss": 1.7238, "step": 20043 }, { "epoch": 0.9787109375, "grad_norm": 0.18229681253433228, "learning_rate": 5.051810308540117e-05, "loss": 1.7085, "step": 20044 }, { "epoch": 0.978759765625, "grad_norm": 0.15676288306713104, "learning_rate": 5.05157300985221e-05, "loss": 1.6972, "step": 20045 }, { "epoch": 0.97880859375, "grad_norm": 0.19392603635787964, "learning_rate": 5.0513362552199656e-05, "loss": 1.7197, "step": 20046 }, { "epoch": 0.978857421875, "grad_norm": 0.20212475955486298, "learning_rate": 5.051100044649122e-05, "loss": 1.7094, "step": 20047 }, { "epoch": 0.97890625, "grad_norm": 0.15854337811470032, "learning_rate": 5.0508643781454054e-05, "loss": 1.7412, "step": 20048 }, { "epoch": 0.978955078125, "grad_norm": 0.18135513365268707, "learning_rate": 5.050629255714523e-05, "loss": 1.6962, "step": 20049 }, { "epoch": 0.97900390625, "grad_norm": 0.19122183322906494, "learning_rate": 5.050394677362178e-05, "loss": 1.7159, "step": 20050 }, { "epoch": 0.979052734375, "grad_norm": 0.17889250814914703, "learning_rate": 5.0501606430940556e-05, "loss": 1.7302, "step": 20051 }, { "epoch": 0.9791015625, "grad_norm": 0.20106607675552368, "learning_rate": 5.049927152915823e-05, "loss": 1.7335, "step": 20052 }, { "epoch": 0.979150390625, "grad_norm": 0.21250371634960175, "learning_rate": 5.049694206833143e-05, "loss": 1.7265, "step": 20053 }, { "epoch": 0.97919921875, "grad_norm": 0.2134973257780075, "learning_rate": 5.049461804851661e-05, "loss": 1.7252, "step": 20054 }, { "epoch": 0.979248046875, "grad_norm": 0.2050783485174179, "learning_rate": 5.049229946977009e-05, "loss": 1.7091, "step": 20055 }, { "epoch": 0.979296875, "grad_norm": 0.1907326877117157, "learning_rate": 5.048998633214807e-05, "loss": 1.7213, "step": 20056 }, { "epoch": 0.979345703125, "grad_norm": 0.18353088200092316, "learning_rate": 5.048767863570655e-05, "loss": 1.7028, "step": 20057 }, { "epoch": 0.97939453125, "grad_norm": 0.19490794837474823, "learning_rate": 5.0485376380501576e-05, "loss": 1.7109, "step": 20058 }, { "epoch": 0.979443359375, "grad_norm": 0.1693468540906906, "learning_rate": 5.0483079566588836e-05, "loss": 1.6853, "step": 20059 }, { "epoch": 0.9794921875, "grad_norm": 0.18667569756507874, "learning_rate": 5.0480788194024044e-05, "loss": 1.7056, "step": 20060 }, { "epoch": 0.979541015625, "grad_norm": 0.1946214884519577, "learning_rate": 5.047850226286274e-05, "loss": 1.7155, "step": 20061 }, { "epoch": 0.97958984375, "grad_norm": 0.15912601351737976, "learning_rate": 5.047622177316029e-05, "loss": 1.6925, "step": 20062 }, { "epoch": 0.979638671875, "grad_norm": 0.19855515658855438, "learning_rate": 5.0473946724972e-05, "loss": 1.7167, "step": 20063 }, { "epoch": 0.9796875, "grad_norm": 0.16847127676010132, "learning_rate": 5.0471677118353004e-05, "loss": 1.7209, "step": 20064 }, { "epoch": 0.979736328125, "grad_norm": 0.168428435921669, "learning_rate": 5.046941295335829e-05, "loss": 1.7216, "step": 20065 }, { "epoch": 0.97978515625, "grad_norm": 0.2252252995967865, "learning_rate": 5.046715423004273e-05, "loss": 1.7168, "step": 20066 }, { "epoch": 0.979833984375, "grad_norm": 0.16399255394935608, "learning_rate": 5.046490094846108e-05, "loss": 1.724, "step": 20067 }, { "epoch": 0.9798828125, "grad_norm": 0.2023688405752182, "learning_rate": 5.0462653108667956e-05, "loss": 1.7183, "step": 20068 }, { "epoch": 0.979931640625, "grad_norm": 0.18122807145118713, "learning_rate": 5.0460410710717803e-05, "loss": 1.7152, "step": 20069 }, { "epoch": 0.97998046875, "grad_norm": 0.15826642513275146, "learning_rate": 5.045817375466501e-05, "loss": 1.7067, "step": 20070 }, { "epoch": 0.980029296875, "grad_norm": 0.1804591566324234, "learning_rate": 5.045594224056376e-05, "loss": 1.6888, "step": 20071 }, { "epoch": 0.980078125, "grad_norm": 0.17948058247566223, "learning_rate": 5.045371616846815e-05, "loss": 1.707, "step": 20072 }, { "epoch": 0.980126953125, "grad_norm": 0.17093735933303833, "learning_rate": 5.045149553843213e-05, "loss": 1.7202, "step": 20073 }, { "epoch": 0.98017578125, "grad_norm": 0.16607865691184998, "learning_rate": 5.04492803505095e-05, "loss": 1.7153, "step": 20074 }, { "epoch": 0.980224609375, "grad_norm": 0.18709976971149445, "learning_rate": 5.0447070604753984e-05, "loss": 1.7275, "step": 20075 }, { "epoch": 0.9802734375, "grad_norm": 0.17638735473155975, "learning_rate": 5.0444866301219094e-05, "loss": 1.7464, "step": 20076 }, { "epoch": 0.980322265625, "grad_norm": 0.17598757147789001, "learning_rate": 5.0442667439958305e-05, "loss": 1.7328, "step": 20077 }, { "epoch": 0.98037109375, "grad_norm": 0.16338671743869781, "learning_rate": 5.044047402102485e-05, "loss": 1.7316, "step": 20078 }, { "epoch": 0.980419921875, "grad_norm": 0.16802777349948883, "learning_rate": 5.043828604447193e-05, "loss": 1.7305, "step": 20079 }, { "epoch": 0.98046875, "grad_norm": 0.17860302329063416, "learning_rate": 5.043610351035255e-05, "loss": 1.6851, "step": 20080 }, { "epoch": 0.980517578125, "grad_norm": 0.16372732818126678, "learning_rate": 5.0433926418719634e-05, "loss": 1.7087, "step": 20081 }, { "epoch": 0.98056640625, "grad_norm": 0.16730478405952454, "learning_rate": 5.04317547696259e-05, "loss": 1.7046, "step": 20082 }, { "epoch": 0.980615234375, "grad_norm": 0.17241671681404114, "learning_rate": 5.0429588563124034e-05, "loss": 1.7051, "step": 20083 }, { "epoch": 0.9806640625, "grad_norm": 0.18053989112377167, "learning_rate": 5.0427427799266494e-05, "loss": 1.7279, "step": 20084 }, { "epoch": 0.980712890625, "grad_norm": 0.16200602054595947, "learning_rate": 5.042527247810565e-05, "loss": 1.7063, "step": 20085 }, { "epoch": 0.98076171875, "grad_norm": 0.17827416956424713, "learning_rate": 5.0423122599693745e-05, "loss": 1.6967, "step": 20086 }, { "epoch": 0.980810546875, "grad_norm": 0.1611417829990387, "learning_rate": 5.042097816408291e-05, "loss": 1.7038, "step": 20087 }, { "epoch": 0.980859375, "grad_norm": 0.17139185965061188, "learning_rate": 5.04188391713251e-05, "loss": 1.705, "step": 20088 }, { "epoch": 0.980908203125, "grad_norm": 0.17295241355895996, "learning_rate": 5.041670562147211e-05, "loss": 1.6869, "step": 20089 }, { "epoch": 0.98095703125, "grad_norm": 0.16652515530586243, "learning_rate": 5.0414577514575735e-05, "loss": 1.7334, "step": 20090 }, { "epoch": 0.981005859375, "grad_norm": 0.1579989790916443, "learning_rate": 5.041245485068747e-05, "loss": 1.6971, "step": 20091 }, { "epoch": 0.9810546875, "grad_norm": 0.16862903535366058, "learning_rate": 5.041033762985879e-05, "loss": 1.7321, "step": 20092 }, { "epoch": 0.981103515625, "grad_norm": 0.1710192859172821, "learning_rate": 5.040822585214104e-05, "loss": 1.6962, "step": 20093 }, { "epoch": 0.98115234375, "grad_norm": 0.15994809567928314, "learning_rate": 5.040611951758534e-05, "loss": 1.7389, "step": 20094 }, { "epoch": 0.981201171875, "grad_norm": 0.1823238879442215, "learning_rate": 5.04040186262428e-05, "loss": 1.697, "step": 20095 }, { "epoch": 0.98125, "grad_norm": 0.18344078958034515, "learning_rate": 5.04019231781643e-05, "loss": 1.7176, "step": 20096 }, { "epoch": 0.981298828125, "grad_norm": 0.15899409353733063, "learning_rate": 5.039983317340062e-05, "loss": 1.7387, "step": 20097 }, { "epoch": 0.98134765625, "grad_norm": 0.17519131302833557, "learning_rate": 5.0397748612002424e-05, "loss": 1.7126, "step": 20098 }, { "epoch": 0.981396484375, "grad_norm": 0.171137735247612, "learning_rate": 5.039566949402024e-05, "loss": 1.7297, "step": 20099 }, { "epoch": 0.9814453125, "grad_norm": 0.17623431980609894, "learning_rate": 5.0393595819504435e-05, "loss": 1.7373, "step": 20100 }, { "epoch": 0.981494140625, "grad_norm": 0.19186566770076752, "learning_rate": 5.03915275885053e-05, "loss": 1.7246, "step": 20101 }, { "epoch": 0.98154296875, "grad_norm": 0.17180617153644562, "learning_rate": 5.0389464801072964e-05, "loss": 1.7174, "step": 20102 }, { "epoch": 0.981591796875, "grad_norm": 0.16268707811832428, "learning_rate": 5.038740745725735e-05, "loss": 1.709, "step": 20103 }, { "epoch": 0.981640625, "grad_norm": 0.17353226244449615, "learning_rate": 5.038535555710841e-05, "loss": 1.7311, "step": 20104 }, { "epoch": 0.981689453125, "grad_norm": 0.17764005064964294, "learning_rate": 5.0383309100675816e-05, "loss": 1.7221, "step": 20105 }, { "epoch": 0.98173828125, "grad_norm": 0.18271121382713318, "learning_rate": 5.0381268088009194e-05, "loss": 1.7029, "step": 20106 }, { "epoch": 0.981787109375, "grad_norm": 0.17296390235424042, "learning_rate": 5.037923251915797e-05, "loss": 1.6874, "step": 20107 }, { "epoch": 0.9818359375, "grad_norm": 0.16007184982299805, "learning_rate": 5.037720239417153e-05, "loss": 1.7189, "step": 20108 }, { "epoch": 0.981884765625, "grad_norm": 0.18293702602386475, "learning_rate": 5.037517771309902e-05, "loss": 1.7096, "step": 20109 }, { "epoch": 0.98193359375, "grad_norm": 0.18698132038116455, "learning_rate": 5.0373158475989554e-05, "loss": 1.7209, "step": 20110 }, { "epoch": 0.981982421875, "grad_norm": 0.1616678535938263, "learning_rate": 5.0371144682892074e-05, "loss": 1.7183, "step": 20111 }, { "epoch": 0.98203125, "grad_norm": 0.18732434511184692, "learning_rate": 5.0369136333855357e-05, "loss": 1.6836, "step": 20112 }, { "epoch": 0.982080078125, "grad_norm": 0.17750869691371918, "learning_rate": 5.036713342892807e-05, "loss": 1.7311, "step": 20113 }, { "epoch": 0.98212890625, "grad_norm": 0.16605079174041748, "learning_rate": 5.036513596815878e-05, "loss": 1.7311, "step": 20114 }, { "epoch": 0.982177734375, "grad_norm": 0.19601045548915863, "learning_rate": 5.03631439515959e-05, "loss": 1.7237, "step": 20115 }, { "epoch": 0.9822265625, "grad_norm": 0.17780272662639618, "learning_rate": 5.03611573792877e-05, "loss": 1.7246, "step": 20116 }, { "epoch": 0.982275390625, "grad_norm": 0.1793123036623001, "learning_rate": 5.035917625128233e-05, "loss": 1.7395, "step": 20117 }, { "epoch": 0.98232421875, "grad_norm": 0.16947530210018158, "learning_rate": 5.035720056762778e-05, "loss": 1.7153, "step": 20118 }, { "epoch": 0.982373046875, "grad_norm": 0.19721056520938873, "learning_rate": 5.035523032837196e-05, "loss": 1.7025, "step": 20119 }, { "epoch": 0.982421875, "grad_norm": 0.1720884144306183, "learning_rate": 5.03532655335626e-05, "loss": 1.7277, "step": 20120 }, { "epoch": 0.982470703125, "grad_norm": 0.1920393705368042, "learning_rate": 5.0351306183247335e-05, "loss": 1.7037, "step": 20121 }, { "epoch": 0.98251953125, "grad_norm": 0.17610444128513336, "learning_rate": 5.034935227747365e-05, "loss": 1.7013, "step": 20122 }, { "epoch": 0.982568359375, "grad_norm": 0.1970602422952652, "learning_rate": 5.0347403816288884e-05, "loss": 1.7419, "step": 20123 }, { "epoch": 0.9826171875, "grad_norm": 0.18326304852962494, "learning_rate": 5.0345460799740284e-05, "loss": 1.7371, "step": 20124 }, { "epoch": 0.982666015625, "grad_norm": 0.17139889299869537, "learning_rate": 5.03435232278749e-05, "loss": 1.7177, "step": 20125 }, { "epoch": 0.98271484375, "grad_norm": 0.18297965824604034, "learning_rate": 5.0341591100739744e-05, "loss": 1.7217, "step": 20126 }, { "epoch": 0.982763671875, "grad_norm": 0.19347167015075684, "learning_rate": 5.0339664418381614e-05, "loss": 1.7288, "step": 20127 }, { "epoch": 0.9828125, "grad_norm": 0.18103444576263428, "learning_rate": 5.033774318084722e-05, "loss": 1.739, "step": 20128 }, { "epoch": 0.982861328125, "grad_norm": 0.20771145820617676, "learning_rate": 5.033582738818309e-05, "loss": 1.7168, "step": 20129 }, { "epoch": 0.98291015625, "grad_norm": 0.18171654641628265, "learning_rate": 5.0333917040435675e-05, "loss": 1.7357, "step": 20130 }, { "epoch": 0.982958984375, "grad_norm": 0.18054696917533875, "learning_rate": 5.033201213765129e-05, "loss": 1.7105, "step": 20131 }, { "epoch": 0.9830078125, "grad_norm": 0.1789781004190445, "learning_rate": 5.033011267987607e-05, "loss": 1.733, "step": 20132 }, { "epoch": 0.983056640625, "grad_norm": 0.18683023750782013, "learning_rate": 5.032821866715609e-05, "loss": 1.7152, "step": 20133 }, { "epoch": 0.98310546875, "grad_norm": 0.16112063825130463, "learning_rate": 5.032633009953722e-05, "loss": 1.7185, "step": 20134 }, { "epoch": 0.983154296875, "grad_norm": 0.16673274338245392, "learning_rate": 5.032444697706524e-05, "loss": 1.7294, "step": 20135 }, { "epoch": 0.983203125, "grad_norm": 0.19077271223068237, "learning_rate": 5.032256929978578e-05, "loss": 1.7137, "step": 20136 }, { "epoch": 0.983251953125, "grad_norm": 0.17868895828723907, "learning_rate": 5.032069706774438e-05, "loss": 1.7162, "step": 20137 }, { "epoch": 0.98330078125, "grad_norm": 0.20729443430900574, "learning_rate": 5.0318830280986374e-05, "loss": 1.7317, "step": 20138 }, { "epoch": 0.983349609375, "grad_norm": 0.1896732896566391, "learning_rate": 5.031696893955701e-05, "loss": 1.7391, "step": 20139 }, { "epoch": 0.9833984375, "grad_norm": 0.17384135723114014, "learning_rate": 5.0315113043501455e-05, "loss": 1.6895, "step": 20140 }, { "epoch": 0.983447265625, "grad_norm": 0.18387411534786224, "learning_rate": 5.031326259286461e-05, "loss": 1.7223, "step": 20141 }, { "epoch": 0.98349609375, "grad_norm": 0.17074260115623474, "learning_rate": 5.031141758769137e-05, "loss": 1.6898, "step": 20142 }, { "epoch": 0.983544921875, "grad_norm": 0.16188205778598785, "learning_rate": 5.0309578028026426e-05, "loss": 1.725, "step": 20143 }, { "epoch": 0.98359375, "grad_norm": 0.17082911729812622, "learning_rate": 5.030774391391439e-05, "loss": 1.7499, "step": 20144 }, { "epoch": 0.983642578125, "grad_norm": 0.18746931850910187, "learning_rate": 5.030591524539967e-05, "loss": 1.7138, "step": 20145 }, { "epoch": 0.98369140625, "grad_norm": 0.15401074290275574, "learning_rate": 5.030409202252664e-05, "loss": 1.7137, "step": 20146 }, { "epoch": 0.983740234375, "grad_norm": 0.15926161408424377, "learning_rate": 5.0302274245339444e-05, "loss": 1.702, "step": 20147 }, { "epoch": 0.9837890625, "grad_norm": 0.15542981028556824, "learning_rate": 5.030046191388216e-05, "loss": 1.7219, "step": 20148 }, { "epoch": 0.983837890625, "grad_norm": 0.18768130242824554, "learning_rate": 5.029865502819869e-05, "loss": 1.7454, "step": 20149 }, { "epoch": 0.98388671875, "grad_norm": 0.165265291929245, "learning_rate": 5.029685358833286e-05, "loss": 1.7045, "step": 20150 }, { "epoch": 0.983935546875, "grad_norm": 0.16364723443984985, "learning_rate": 5.029505759432829e-05, "loss": 1.7438, "step": 20151 }, { "epoch": 0.983984375, "grad_norm": 0.17801958322525024, "learning_rate": 5.029326704622853e-05, "loss": 1.7382, "step": 20152 }, { "epoch": 0.984033203125, "grad_norm": 0.17139603197574615, "learning_rate": 5.029148194407698e-05, "loss": 1.7174, "step": 20153 }, { "epoch": 0.98408203125, "grad_norm": 0.1692235767841339, "learning_rate": 5.028970228791689e-05, "loss": 1.738, "step": 20154 }, { "epoch": 0.984130859375, "grad_norm": 0.1698538362979889, "learning_rate": 5.0287928077791384e-05, "loss": 1.7245, "step": 20155 }, { "epoch": 0.9841796875, "grad_norm": 0.18025939166545868, "learning_rate": 5.028615931374347e-05, "loss": 1.7035, "step": 20156 }, { "epoch": 0.984228515625, "grad_norm": 0.1629084050655365, "learning_rate": 5.028439599581603e-05, "loss": 1.7425, "step": 20157 }, { "epoch": 0.98427734375, "grad_norm": 0.18036995828151703, "learning_rate": 5.02826381240518e-05, "loss": 1.6799, "step": 20158 }, { "epoch": 0.984326171875, "grad_norm": 0.17802534997463226, "learning_rate": 5.028088569849334e-05, "loss": 1.693, "step": 20159 }, { "epoch": 0.984375, "grad_norm": 0.17825423181056976, "learning_rate": 5.027913871918317e-05, "loss": 1.7362, "step": 20160 }, { "epoch": 0.984423828125, "grad_norm": 0.1690993309020996, "learning_rate": 5.027739718616363e-05, "loss": 1.7134, "step": 20161 }, { "epoch": 0.98447265625, "grad_norm": 0.18168006837368011, "learning_rate": 5.0275661099476895e-05, "loss": 1.6812, "step": 20162 }, { "epoch": 0.984521484375, "grad_norm": 0.17096631228923798, "learning_rate": 5.0273930459165036e-05, "loss": 1.7176, "step": 20163 }, { "epoch": 0.9845703125, "grad_norm": 0.18116922676563263, "learning_rate": 5.027220526527004e-05, "loss": 1.7217, "step": 20164 }, { "epoch": 0.984619140625, "grad_norm": 0.17297540605068207, "learning_rate": 5.0270485517833685e-05, "loss": 1.7081, "step": 20165 }, { "epoch": 0.98466796875, "grad_norm": 0.18104995787143707, "learning_rate": 5.0268771216897656e-05, "loss": 1.697, "step": 20166 }, { "epoch": 0.984716796875, "grad_norm": 0.17630304396152496, "learning_rate": 5.0267062362503486e-05, "loss": 1.714, "step": 20167 }, { "epoch": 0.984765625, "grad_norm": 0.18368279933929443, "learning_rate": 5.026535895469264e-05, "loss": 1.6879, "step": 20168 }, { "epoch": 0.984814453125, "grad_norm": 0.1805930882692337, "learning_rate": 5.026366099350636e-05, "loss": 1.6993, "step": 20169 }, { "epoch": 0.98486328125, "grad_norm": 0.17874933779239655, "learning_rate": 5.02619684789858e-05, "loss": 1.7335, "step": 20170 }, { "epoch": 0.984912109375, "grad_norm": 0.17820917069911957, "learning_rate": 5.0260281411172e-05, "loss": 1.7393, "step": 20171 }, { "epoch": 0.9849609375, "grad_norm": 0.1760483682155609, "learning_rate": 5.025859979010583e-05, "loss": 1.7297, "step": 20172 }, { "epoch": 0.985009765625, "grad_norm": 0.15872454643249512, "learning_rate": 5.025692361582805e-05, "loss": 1.7518, "step": 20173 }, { "epoch": 0.98505859375, "grad_norm": 0.1908136010169983, "learning_rate": 5.025525288837926e-05, "loss": 1.7155, "step": 20174 }, { "epoch": 0.985107421875, "grad_norm": 0.1723785549402237, "learning_rate": 5.025358760780001e-05, "loss": 1.6882, "step": 20175 }, { "epoch": 0.98515625, "grad_norm": 0.17243313789367676, "learning_rate": 5.0251927774130596e-05, "loss": 1.7226, "step": 20176 }, { "epoch": 0.985205078125, "grad_norm": 0.1939619928598404, "learning_rate": 5.02502733874113e-05, "loss": 1.7049, "step": 20177 }, { "epoch": 0.98525390625, "grad_norm": 0.16279077529907227, "learning_rate": 5.024862444768217e-05, "loss": 1.6985, "step": 20178 }, { "epoch": 0.985302734375, "grad_norm": 0.17945390939712524, "learning_rate": 5.024698095498319e-05, "loss": 1.7173, "step": 20179 }, { "epoch": 0.9853515625, "grad_norm": 0.16268275678157806, "learning_rate": 5.0245342909354196e-05, "loss": 1.7247, "step": 20180 }, { "epoch": 0.985400390625, "grad_norm": 0.1866128146648407, "learning_rate": 5.024371031083487e-05, "loss": 1.7236, "step": 20181 }, { "epoch": 0.98544921875, "grad_norm": 0.1603821963071823, "learning_rate": 5.0242083159464806e-05, "loss": 1.7182, "step": 20182 }, { "epoch": 0.985498046875, "grad_norm": 0.16840820014476776, "learning_rate": 5.0240461455283436e-05, "loss": 1.7247, "step": 20183 }, { "epoch": 0.985546875, "grad_norm": 0.17765364050865173, "learning_rate": 5.023884519833004e-05, "loss": 1.7141, "step": 20184 }, { "epoch": 0.985595703125, "grad_norm": 0.1492106169462204, "learning_rate": 5.02372343886438e-05, "loss": 1.7146, "step": 20185 }, { "epoch": 0.98564453125, "grad_norm": 0.17701862752437592, "learning_rate": 5.0235629026263744e-05, "loss": 1.699, "step": 20186 }, { "epoch": 0.985693359375, "grad_norm": 0.16541758179664612, "learning_rate": 5.0234029111228816e-05, "loss": 1.7206, "step": 20187 }, { "epoch": 0.9857421875, "grad_norm": 0.18835946917533875, "learning_rate": 5.023243464357775e-05, "loss": 1.7502, "step": 20188 }, { "epoch": 0.985791015625, "grad_norm": 0.16727064549922943, "learning_rate": 5.023084562334923e-05, "loss": 1.7182, "step": 20189 }, { "epoch": 0.98583984375, "grad_norm": 0.1823078989982605, "learning_rate": 5.022926205058174e-05, "loss": 1.717, "step": 20190 }, { "epoch": 0.985888671875, "grad_norm": 0.17176726460456848, "learning_rate": 5.0227683925313646e-05, "loss": 1.7098, "step": 20191 }, { "epoch": 0.9859375, "grad_norm": 0.1720377653837204, "learning_rate": 5.022611124758323e-05, "loss": 1.7274, "step": 20192 }, { "epoch": 0.985986328125, "grad_norm": 0.1833937168121338, "learning_rate": 5.022454401742856e-05, "loss": 1.7099, "step": 20193 }, { "epoch": 0.98603515625, "grad_norm": 0.1665884554386139, "learning_rate": 5.0222982234887685e-05, "loss": 1.7169, "step": 20194 }, { "epoch": 0.986083984375, "grad_norm": 0.18031850457191467, "learning_rate": 5.0221425899998394e-05, "loss": 1.7272, "step": 20195 }, { "epoch": 0.9861328125, "grad_norm": 0.1772412210702896, "learning_rate": 5.021987501279843e-05, "loss": 1.7405, "step": 20196 }, { "epoch": 0.986181640625, "grad_norm": 0.17363782227039337, "learning_rate": 5.021832957332539e-05, "loss": 1.6937, "step": 20197 }, { "epoch": 0.98623046875, "grad_norm": 0.18163278698921204, "learning_rate": 5.021678958161673e-05, "loss": 1.7345, "step": 20198 }, { "epoch": 0.986279296875, "grad_norm": 0.15934734046459198, "learning_rate": 5.021525503770977e-05, "loss": 1.7199, "step": 20199 }, { "epoch": 0.986328125, "grad_norm": 0.16240449249744415, "learning_rate": 5.0213725941641674e-05, "loss": 1.731, "step": 20200 }, { "epoch": 0.986376953125, "grad_norm": 0.18653322756290436, "learning_rate": 5.0212202293449515e-05, "loss": 1.7019, "step": 20201 }, { "epoch": 0.98642578125, "grad_norm": 0.1722019463777542, "learning_rate": 5.021068409317024e-05, "loss": 1.7175, "step": 20202 }, { "epoch": 0.986474609375, "grad_norm": 0.2170557826757431, "learning_rate": 5.020917134084062e-05, "loss": 1.6957, "step": 20203 }, { "epoch": 0.9865234375, "grad_norm": 0.1745189130306244, "learning_rate": 5.020766403649735e-05, "loss": 1.7322, "step": 20204 }, { "epoch": 0.986572265625, "grad_norm": 0.1913844645023346, "learning_rate": 5.0206162180176915e-05, "loss": 1.7109, "step": 20205 }, { "epoch": 0.98662109375, "grad_norm": 0.20750892162322998, "learning_rate": 5.0204665771915736e-05, "loss": 1.7276, "step": 20206 }, { "epoch": 0.986669921875, "grad_norm": 0.1671033501625061, "learning_rate": 5.020317481175011e-05, "loss": 1.7259, "step": 20207 }, { "epoch": 0.98671875, "grad_norm": 0.2140306830406189, "learning_rate": 5.0201689299716104e-05, "loss": 1.7196, "step": 20208 }, { "epoch": 0.986767578125, "grad_norm": 0.18191881477832794, "learning_rate": 5.020020923584979e-05, "loss": 1.692, "step": 20209 }, { "epoch": 0.98681640625, "grad_norm": 0.17136067152023315, "learning_rate": 5.0198734620186984e-05, "loss": 1.7235, "step": 20210 }, { "epoch": 0.986865234375, "grad_norm": 0.20149242877960205, "learning_rate": 5.019726545276346e-05, "loss": 1.7088, "step": 20211 }, { "epoch": 0.9869140625, "grad_norm": 0.20384685695171356, "learning_rate": 5.019580173361483e-05, "loss": 1.7447, "step": 20212 }, { "epoch": 0.986962890625, "grad_norm": 0.16833163797855377, "learning_rate": 5.0194343462776524e-05, "loss": 1.7072, "step": 20213 }, { "epoch": 0.98701171875, "grad_norm": 0.2261127233505249, "learning_rate": 5.019289064028391e-05, "loss": 1.7422, "step": 20214 }, { "epoch": 0.987060546875, "grad_norm": 0.18824288249015808, "learning_rate": 5.019144326617223e-05, "loss": 1.7058, "step": 20215 }, { "epoch": 0.987109375, "grad_norm": 0.17985351383686066, "learning_rate": 5.019000134047651e-05, "loss": 1.7, "step": 20216 }, { "epoch": 0.987158203125, "grad_norm": 0.1917368620634079, "learning_rate": 5.01885648632317e-05, "loss": 1.7212, "step": 20217 }, { "epoch": 0.98720703125, "grad_norm": 0.17570850253105164, "learning_rate": 5.018713383447266e-05, "loss": 1.7217, "step": 20218 }, { "epoch": 0.987255859375, "grad_norm": 0.18538615107536316, "learning_rate": 5.0185708254234023e-05, "loss": 1.7128, "step": 20219 }, { "epoch": 0.9873046875, "grad_norm": 0.1785171627998352, "learning_rate": 5.018428812255038e-05, "loss": 1.696, "step": 20220 }, { "epoch": 0.987353515625, "grad_norm": 0.1790570169687271, "learning_rate": 5.018287343945613e-05, "loss": 1.7281, "step": 20221 }, { "epoch": 0.98740234375, "grad_norm": 0.18034963309764862, "learning_rate": 5.018146420498554e-05, "loss": 1.7019, "step": 20222 }, { "epoch": 0.987451171875, "grad_norm": 0.17448276281356812, "learning_rate": 5.01800604191728e-05, "loss": 1.7127, "step": 20223 }, { "epoch": 0.9875, "grad_norm": 0.18926897644996643, "learning_rate": 5.017866208205191e-05, "loss": 1.7187, "step": 20224 }, { "epoch": 0.987548828125, "grad_norm": 0.16418185830116272, "learning_rate": 5.0177269193656757e-05, "loss": 1.7251, "step": 20225 }, { "epoch": 0.98759765625, "grad_norm": 0.16232454776763916, "learning_rate": 5.01758817540211e-05, "loss": 1.712, "step": 20226 }, { "epoch": 0.987646484375, "grad_norm": 0.1710779219865799, "learning_rate": 5.0174499763178586e-05, "loss": 1.6978, "step": 20227 }, { "epoch": 0.9876953125, "grad_norm": 0.15407009422779083, "learning_rate": 5.0173123221162686e-05, "loss": 1.7259, "step": 20228 }, { "epoch": 0.987744140625, "grad_norm": 0.16630148887634277, "learning_rate": 5.017175212800677e-05, "loss": 1.7188, "step": 20229 }, { "epoch": 0.98779296875, "grad_norm": 0.18948596715927124, "learning_rate": 5.017038648374408e-05, "loss": 1.7229, "step": 20230 }, { "epoch": 0.987841796875, "grad_norm": 0.18305493891239166, "learning_rate": 5.016902628840768e-05, "loss": 1.7185, "step": 20231 }, { "epoch": 0.987890625, "grad_norm": 0.20184952020645142, "learning_rate": 5.016767154203056e-05, "loss": 1.7352, "step": 20232 }, { "epoch": 0.987939453125, "grad_norm": 0.17279398441314697, "learning_rate": 5.016632224464556e-05, "loss": 1.6977, "step": 20233 }, { "epoch": 0.98798828125, "grad_norm": 0.20558230578899384, "learning_rate": 5.0164978396285345e-05, "loss": 1.6768, "step": 20234 }, { "epoch": 0.988037109375, "grad_norm": 0.18914677202701569, "learning_rate": 5.0163639996982535e-05, "loss": 1.7223, "step": 20235 }, { "epoch": 0.9880859375, "grad_norm": 0.194166898727417, "learning_rate": 5.0162307046769526e-05, "loss": 1.7268, "step": 20236 }, { "epoch": 0.988134765625, "grad_norm": 0.17219333350658417, "learning_rate": 5.016097954567865e-05, "loss": 1.7187, "step": 20237 }, { "epoch": 0.98818359375, "grad_norm": 0.17621494829654694, "learning_rate": 5.0159657493742074e-05, "loss": 1.7035, "step": 20238 }, { "epoch": 0.988232421875, "grad_norm": 0.17866814136505127, "learning_rate": 5.0158340890991796e-05, "loss": 1.7121, "step": 20239 }, { "epoch": 0.98828125, "grad_norm": 0.16800372302532196, "learning_rate": 5.01570297374598e-05, "loss": 1.6948, "step": 20240 }, { "epoch": 0.988330078125, "grad_norm": 0.16438846290111542, "learning_rate": 5.015572403317781e-05, "loss": 1.7326, "step": 20241 }, { "epoch": 0.98837890625, "grad_norm": 0.16364668309688568, "learning_rate": 5.015442377817749e-05, "loss": 1.7337, "step": 20242 }, { "epoch": 0.988427734375, "grad_norm": 0.16754387319087982, "learning_rate": 5.015312897249035e-05, "loss": 1.7069, "step": 20243 }, { "epoch": 0.9884765625, "grad_norm": 0.18955601751804352, "learning_rate": 5.0151839616147775e-05, "loss": 1.7391, "step": 20244 }, { "epoch": 0.988525390625, "grad_norm": 0.1668349653482437, "learning_rate": 5.015055570918101e-05, "loss": 1.7183, "step": 20245 }, { "epoch": 0.98857421875, "grad_norm": 0.16621948778629303, "learning_rate": 5.014927725162114e-05, "loss": 1.7083, "step": 20246 }, { "epoch": 0.988623046875, "grad_norm": 0.1716442108154297, "learning_rate": 5.01480042434992e-05, "loss": 1.735, "step": 20247 }, { "epoch": 0.988671875, "grad_norm": 0.17258131504058838, "learning_rate": 5.014673668484604e-05, "loss": 1.7014, "step": 20248 }, { "epoch": 0.988720703125, "grad_norm": 0.15245281159877777, "learning_rate": 5.0145474575692324e-05, "loss": 1.7379, "step": 20249 }, { "epoch": 0.98876953125, "grad_norm": 0.18457002937793732, "learning_rate": 5.0144217916068714e-05, "loss": 1.7457, "step": 20250 }, { "epoch": 0.988818359375, "grad_norm": 0.17048633098602295, "learning_rate": 5.0142966706005605e-05, "loss": 1.7108, "step": 20251 }, { "epoch": 0.9888671875, "grad_norm": 0.183852881193161, "learning_rate": 5.014172094553335e-05, "loss": 1.7352, "step": 20252 }, { "epoch": 0.988916015625, "grad_norm": 0.17306511104106903, "learning_rate": 5.014048063468215e-05, "loss": 1.7362, "step": 20253 }, { "epoch": 0.98896484375, "grad_norm": 0.18956054747104645, "learning_rate": 5.0139245773482026e-05, "loss": 1.7149, "step": 20254 }, { "epoch": 0.989013671875, "grad_norm": 0.17331530153751373, "learning_rate": 5.0138016361962966e-05, "loss": 1.7246, "step": 20255 }, { "epoch": 0.9890625, "grad_norm": 0.18295617401599884, "learning_rate": 5.013679240015472e-05, "loss": 1.7107, "step": 20256 }, { "epoch": 0.989111328125, "grad_norm": 0.16916720569133759, "learning_rate": 5.013557388808694e-05, "loss": 1.7156, "step": 20257 }, { "epoch": 0.98916015625, "grad_norm": 0.16986258327960968, "learning_rate": 5.013436082578921e-05, "loss": 1.7521, "step": 20258 }, { "epoch": 0.989208984375, "grad_norm": 0.1628047674894333, "learning_rate": 5.013315321329088e-05, "loss": 1.7229, "step": 20259 }, { "epoch": 0.9892578125, "grad_norm": 0.18464601039886475, "learning_rate": 5.013195105062124e-05, "loss": 1.7307, "step": 20260 }, { "epoch": 0.989306640625, "grad_norm": 0.17628657817840576, "learning_rate": 5.013075433780945e-05, "loss": 1.6842, "step": 20261 }, { "epoch": 0.98935546875, "grad_norm": 0.16948793828487396, "learning_rate": 5.012956307488447e-05, "loss": 1.7032, "step": 20262 }, { "epoch": 0.989404296875, "grad_norm": 0.1539289355278015, "learning_rate": 5.012837726187519e-05, "loss": 1.7281, "step": 20263 }, { "epoch": 0.989453125, "grad_norm": 0.17945386469364166, "learning_rate": 5.0127196898810315e-05, "loss": 1.7516, "step": 20264 }, { "epoch": 0.989501953125, "grad_norm": 0.16939601302146912, "learning_rate": 5.012602198571853e-05, "loss": 1.7114, "step": 20265 }, { "epoch": 0.98955078125, "grad_norm": 0.1953752338886261, "learning_rate": 5.012485252262823e-05, "loss": 1.7421, "step": 20266 }, { "epoch": 0.989599609375, "grad_norm": 0.16091205179691315, "learning_rate": 5.012368850956782e-05, "loss": 1.7322, "step": 20267 }, { "epoch": 0.9896484375, "grad_norm": 0.1667259931564331, "learning_rate": 5.012252994656546e-05, "loss": 1.7306, "step": 20268 }, { "epoch": 0.989697265625, "grad_norm": 0.20227766036987305, "learning_rate": 5.012137683364928e-05, "loss": 1.7068, "step": 20269 }, { "epoch": 0.98974609375, "grad_norm": 0.17664775252342224, "learning_rate": 5.012022917084716e-05, "loss": 1.7399, "step": 20270 }, { "epoch": 0.989794921875, "grad_norm": 0.1868121325969696, "learning_rate": 5.011908695818698e-05, "loss": 1.7123, "step": 20271 }, { "epoch": 0.98984375, "grad_norm": 0.18133603036403656, "learning_rate": 5.01179501956964e-05, "loss": 1.7142, "step": 20272 }, { "epoch": 0.989892578125, "grad_norm": 0.17805543541908264, "learning_rate": 5.0116818883402946e-05, "loss": 1.6987, "step": 20273 }, { "epoch": 0.98994140625, "grad_norm": 0.16571465134620667, "learning_rate": 5.0115693021334076e-05, "loss": 1.7114, "step": 20274 }, { "epoch": 0.989990234375, "grad_norm": 0.1780179888010025, "learning_rate": 5.0114572609517034e-05, "loss": 1.7038, "step": 20275 }, { "epoch": 0.9900390625, "grad_norm": 0.18043430149555206, "learning_rate": 5.011345764797901e-05, "loss": 1.7046, "step": 20276 }, { "epoch": 0.990087890625, "grad_norm": 0.17529277503490448, "learning_rate": 5.0112348136746994e-05, "loss": 1.6926, "step": 20277 }, { "epoch": 0.99013671875, "grad_norm": 0.16603787243366241, "learning_rate": 5.0111244075847913e-05, "loss": 1.7349, "step": 20278 }, { "epoch": 0.990185546875, "grad_norm": 0.179668590426445, "learning_rate": 5.011014546530849e-05, "loss": 1.716, "step": 20279 }, { "epoch": 0.990234375, "grad_norm": 0.19040325284004211, "learning_rate": 5.0109052305155376e-05, "loss": 1.7246, "step": 20280 }, { "epoch": 0.990283203125, "grad_norm": 0.18136471509933472, "learning_rate": 5.010796459541507e-05, "loss": 1.7361, "step": 20281 }, { "epoch": 0.99033203125, "grad_norm": 0.19463491439819336, "learning_rate": 5.010688233611389e-05, "loss": 1.7396, "step": 20282 }, { "epoch": 0.990380859375, "grad_norm": 0.17662779986858368, "learning_rate": 5.010580552727812e-05, "loss": 1.6958, "step": 20283 }, { "epoch": 0.9904296875, "grad_norm": 0.17435647547245026, "learning_rate": 5.010473416893381e-05, "loss": 1.72, "step": 20284 }, { "epoch": 0.990478515625, "grad_norm": 0.21112653613090515, "learning_rate": 5.010366826110695e-05, "loss": 1.7279, "step": 20285 }, { "epoch": 0.99052734375, "grad_norm": 0.16715280711650848, "learning_rate": 5.0102607803823374e-05, "loss": 1.7276, "step": 20286 }, { "epoch": 0.990576171875, "grad_norm": 0.20989440381526947, "learning_rate": 5.0101552797108775e-05, "loss": 1.7317, "step": 20287 }, { "epoch": 0.990625, "grad_norm": 0.18797901272773743, "learning_rate": 5.010050324098874e-05, "loss": 1.7038, "step": 20288 }, { "epoch": 0.990673828125, "grad_norm": 0.1688162088394165, "learning_rate": 5.0099459135488685e-05, "loss": 1.7143, "step": 20289 }, { "epoch": 0.99072265625, "grad_norm": 0.1947789192199707, "learning_rate": 5.009842048063391e-05, "loss": 1.7028, "step": 20290 }, { "epoch": 0.990771484375, "grad_norm": 0.1786980926990509, "learning_rate": 5.00973872764496e-05, "loss": 1.7308, "step": 20291 }, { "epoch": 0.9908203125, "grad_norm": 0.1944187432527542, "learning_rate": 5.009635952296081e-05, "loss": 1.7024, "step": 20292 }, { "epoch": 0.990869140625, "grad_norm": 0.18866223096847534, "learning_rate": 5.0095337220192424e-05, "loss": 1.7269, "step": 20293 }, { "epoch": 0.99091796875, "grad_norm": 0.17963053286075592, "learning_rate": 5.0094320368169214e-05, "loss": 1.7072, "step": 20294 }, { "epoch": 0.990966796875, "grad_norm": 0.1801489144563675, "learning_rate": 5.0093308966915864e-05, "loss": 1.7637, "step": 20295 }, { "epoch": 0.991015625, "grad_norm": 0.18549244105815887, "learning_rate": 5.009230301645684e-05, "loss": 1.7349, "step": 20296 }, { "epoch": 0.991064453125, "grad_norm": 0.17906592786312103, "learning_rate": 5.009130251681655e-05, "loss": 1.7193, "step": 20297 }, { "epoch": 0.99111328125, "grad_norm": 0.19264283776283264, "learning_rate": 5.0090307468019215e-05, "loss": 1.7271, "step": 20298 }, { "epoch": 0.991162109375, "grad_norm": 0.17230384051799774, "learning_rate": 5.008931787008899e-05, "loss": 1.707, "step": 20299 }, { "epoch": 0.9912109375, "grad_norm": 0.18480463325977325, "learning_rate": 5.008833372304983e-05, "loss": 1.7188, "step": 20300 }, { "epoch": 0.991259765625, "grad_norm": 0.1756400167942047, "learning_rate": 5.008735502692558e-05, "loss": 1.7232, "step": 20301 }, { "epoch": 0.99130859375, "grad_norm": 0.17109809815883636, "learning_rate": 5.008638178173999e-05, "loss": 1.7282, "step": 20302 }, { "epoch": 0.991357421875, "grad_norm": 0.17778173089027405, "learning_rate": 5.008541398751663e-05, "loss": 1.7138, "step": 20303 }, { "epoch": 0.99140625, "grad_norm": 0.16796451807022095, "learning_rate": 5.008445164427895e-05, "loss": 1.6985, "step": 20304 }, { "epoch": 0.991455078125, "grad_norm": 0.18489976227283478, "learning_rate": 5.0083494752050304e-05, "loss": 1.713, "step": 20305 }, { "epoch": 0.99150390625, "grad_norm": 0.18082879483699799, "learning_rate": 5.008254331085385e-05, "loss": 1.7095, "step": 20306 }, { "epoch": 0.991552734375, "grad_norm": 0.1623985916376114, "learning_rate": 5.008159732071263e-05, "loss": 1.7159, "step": 20307 }, { "epoch": 0.9916015625, "grad_norm": 0.1811671257019043, "learning_rate": 5.008065678164962e-05, "loss": 1.7051, "step": 20308 }, { "epoch": 0.991650390625, "grad_norm": 0.1852104514837265, "learning_rate": 5.007972169368758e-05, "loss": 1.701, "step": 20309 }, { "epoch": 0.99169921875, "grad_norm": 0.1951219141483307, "learning_rate": 5.0078792056849196e-05, "loss": 1.7122, "step": 20310 }, { "epoch": 0.991748046875, "grad_norm": 0.17673923075199127, "learning_rate": 5.0077867871156974e-05, "loss": 1.7216, "step": 20311 }, { "epoch": 0.991796875, "grad_norm": 0.16849762201309204, "learning_rate": 5.007694913663334e-05, "loss": 1.7232, "step": 20312 }, { "epoch": 0.991845703125, "grad_norm": 0.1968660205602646, "learning_rate": 5.007603585330053e-05, "loss": 1.7082, "step": 20313 }, { "epoch": 0.99189453125, "grad_norm": 0.1774509847164154, "learning_rate": 5.0075128021180676e-05, "loss": 1.7215, "step": 20314 }, { "epoch": 0.991943359375, "grad_norm": 0.1685331016778946, "learning_rate": 5.007422564029584e-05, "loss": 1.7468, "step": 20315 }, { "epoch": 0.9919921875, "grad_norm": 0.1717253178358078, "learning_rate": 5.0073328710667816e-05, "loss": 1.7219, "step": 20316 }, { "epoch": 0.992041015625, "grad_norm": 0.16259445250034332, "learning_rate": 5.0072437232318396e-05, "loss": 1.702, "step": 20317 }, { "epoch": 0.99208984375, "grad_norm": 0.1561025232076645, "learning_rate": 5.007155120526915e-05, "loss": 1.7268, "step": 20318 }, { "epoch": 0.992138671875, "grad_norm": 0.17756180465221405, "learning_rate": 5.0070670629541564e-05, "loss": 1.7033, "step": 20319 }, { "epoch": 0.9921875, "grad_norm": 0.1670379340648651, "learning_rate": 5.0069795505157e-05, "loss": 1.7318, "step": 20320 }, { "epoch": 0.992236328125, "grad_norm": 0.1935696303844452, "learning_rate": 5.006892583213663e-05, "loss": 1.6876, "step": 20321 }, { "epoch": 0.99228515625, "grad_norm": 0.18659386038780212, "learning_rate": 5.006806161050158e-05, "loss": 1.698, "step": 20322 }, { "epoch": 0.992333984375, "grad_norm": 0.16459856927394867, "learning_rate": 5.0067202840272716e-05, "loss": 1.7213, "step": 20323 }, { "epoch": 0.9923828125, "grad_norm": 0.1810399889945984, "learning_rate": 5.0066349521470954e-05, "loss": 1.6866, "step": 20324 }, { "epoch": 0.992431640625, "grad_norm": 0.17209242284297943, "learning_rate": 5.006550165411688e-05, "loss": 1.7355, "step": 20325 }, { "epoch": 0.99248046875, "grad_norm": 0.15436133742332458, "learning_rate": 5.00646592382311e-05, "loss": 1.704, "step": 20326 }, { "epoch": 0.992529296875, "grad_norm": 0.19022047519683838, "learning_rate": 5.0063822273834016e-05, "loss": 1.6998, "step": 20327 }, { "epoch": 0.992578125, "grad_norm": 0.19611112773418427, "learning_rate": 5.006299076094589e-05, "loss": 1.7329, "step": 20328 }, { "epoch": 0.992626953125, "grad_norm": 0.16994531452655792, "learning_rate": 5.006216469958693e-05, "loss": 1.7169, "step": 20329 }, { "epoch": 0.99267578125, "grad_norm": 0.1882762759923935, "learning_rate": 5.00613440897771e-05, "loss": 1.7128, "step": 20330 }, { "epoch": 0.992724609375, "grad_norm": 0.17193423211574554, "learning_rate": 5.006052893153631e-05, "loss": 1.7181, "step": 20331 }, { "epoch": 0.9927734375, "grad_norm": 0.20278140902519226, "learning_rate": 5.005971922488434e-05, "loss": 1.7309, "step": 20332 }, { "epoch": 0.992822265625, "grad_norm": 0.17395535111427307, "learning_rate": 5.005891496984077e-05, "loss": 1.7161, "step": 20333 }, { "epoch": 0.99287109375, "grad_norm": 0.17469988763332367, "learning_rate": 5.005811616642511e-05, "loss": 1.7305, "step": 20334 }, { "epoch": 0.992919921875, "grad_norm": 0.16842499375343323, "learning_rate": 5.005732281465673e-05, "loss": 1.6947, "step": 20335 }, { "epoch": 0.99296875, "grad_norm": 0.17447471618652344, "learning_rate": 5.005653491455485e-05, "loss": 1.7258, "step": 20336 }, { "epoch": 0.993017578125, "grad_norm": 0.16126304864883423, "learning_rate": 5.005575246613854e-05, "loss": 1.6982, "step": 20337 }, { "epoch": 0.99306640625, "grad_norm": 0.1871057003736496, "learning_rate": 5.005497546942682e-05, "loss": 1.6968, "step": 20338 }, { "epoch": 0.993115234375, "grad_norm": 0.19437631964683533, "learning_rate": 5.005420392443847e-05, "loss": 1.7003, "step": 20339 }, { "epoch": 0.9931640625, "grad_norm": 0.1617085337638855, "learning_rate": 5.00534378311922e-05, "loss": 1.7287, "step": 20340 }, { "epoch": 0.993212890625, "grad_norm": 0.1897381991147995, "learning_rate": 5.00526771897066e-05, "loss": 1.7102, "step": 20341 }, { "epoch": 0.99326171875, "grad_norm": 0.18751312792301178, "learning_rate": 5.005192200000008e-05, "loss": 1.7499, "step": 20342 }, { "epoch": 0.993310546875, "grad_norm": 0.1637164205312729, "learning_rate": 5.005117226209095e-05, "loss": 1.7291, "step": 20343 }, { "epoch": 0.993359375, "grad_norm": 0.20648381114006042, "learning_rate": 5.00504279759974e-05, "loss": 1.7241, "step": 20344 }, { "epoch": 0.993408203125, "grad_norm": 0.17871659994125366, "learning_rate": 5.0049689141737424e-05, "loss": 1.6993, "step": 20345 }, { "epoch": 0.99345703125, "grad_norm": 0.20082731544971466, "learning_rate": 5.004895575932897e-05, "loss": 1.7235, "step": 20346 }, { "epoch": 0.993505859375, "grad_norm": 0.1909726858139038, "learning_rate": 5.00482278287898e-05, "loss": 1.7213, "step": 20347 }, { "epoch": 0.9935546875, "grad_norm": 0.188546821475029, "learning_rate": 5.0047505350137554e-05, "loss": 1.7148, "step": 20348 }, { "epoch": 0.993603515625, "grad_norm": 0.17814502120018005, "learning_rate": 5.004678832338975e-05, "loss": 1.7234, "step": 20349 }, { "epoch": 0.99365234375, "grad_norm": 0.20342442393302917, "learning_rate": 5.004607674856374e-05, "loss": 1.683, "step": 20350 }, { "epoch": 0.993701171875, "grad_norm": 0.19986310601234436, "learning_rate": 5.0045370625676775e-05, "loss": 1.7115, "step": 20351 }, { "epoch": 0.99375, "grad_norm": 0.20516908168792725, "learning_rate": 5.0044669954746e-05, "loss": 1.7067, "step": 20352 }, { "epoch": 0.993798828125, "grad_norm": 0.17865876853466034, "learning_rate": 5.004397473578837e-05, "loss": 1.7148, "step": 20353 }, { "epoch": 0.99384765625, "grad_norm": 0.18799179792404175, "learning_rate": 5.0043284968820725e-05, "loss": 1.7073, "step": 20354 }, { "epoch": 0.993896484375, "grad_norm": 0.20503345131874084, "learning_rate": 5.004260065385983e-05, "loss": 1.7119, "step": 20355 }, { "epoch": 0.9939453125, "grad_norm": 0.17492930591106415, "learning_rate": 5.0041921790922215e-05, "loss": 1.7122, "step": 20356 }, { "epoch": 0.993994140625, "grad_norm": 0.19728846848011017, "learning_rate": 5.004124838002434e-05, "loss": 1.7156, "step": 20357 }, { "epoch": 0.99404296875, "grad_norm": 0.20013806223869324, "learning_rate": 5.004058042118258e-05, "loss": 1.725, "step": 20358 }, { "epoch": 0.994091796875, "grad_norm": 0.18482878804206848, "learning_rate": 5.0039917914413065e-05, "loss": 1.737, "step": 20359 }, { "epoch": 0.994140625, "grad_norm": 0.1818455457687378, "learning_rate": 5.003926085973185e-05, "loss": 1.7277, "step": 20360 }, { "epoch": 0.994189453125, "grad_norm": 0.15558183193206787, "learning_rate": 5.003860925715491e-05, "loss": 1.729, "step": 20361 }, { "epoch": 0.99423828125, "grad_norm": 0.17398203909397125, "learning_rate": 5.003796310669799e-05, "loss": 1.7013, "step": 20362 }, { "epoch": 0.994287109375, "grad_norm": 0.17997683584690094, "learning_rate": 5.0037322408376766e-05, "loss": 1.7114, "step": 20363 }, { "epoch": 0.9943359375, "grad_norm": 0.17034102976322174, "learning_rate": 5.0036687162206775e-05, "loss": 1.7387, "step": 20364 }, { "epoch": 0.994384765625, "grad_norm": 0.17186301946640015, "learning_rate": 5.00360573682034e-05, "loss": 1.705, "step": 20365 }, { "epoch": 0.99443359375, "grad_norm": 0.17073971033096313, "learning_rate": 5.003543302638191e-05, "loss": 1.7145, "step": 20366 }, { "epoch": 0.994482421875, "grad_norm": 0.1690240204334259, "learning_rate": 5.0034814136757436e-05, "loss": 1.7198, "step": 20367 }, { "epoch": 0.99453125, "grad_norm": 0.20877988636493683, "learning_rate": 5.003420069934497e-05, "loss": 1.6997, "step": 20368 }, { "epoch": 0.994580078125, "grad_norm": 0.15490029752254486, "learning_rate": 5.0033592714159403e-05, "loss": 1.7076, "step": 20369 }, { "epoch": 0.99462890625, "grad_norm": 0.17714405059814453, "learning_rate": 5.0032990181215434e-05, "loss": 1.7426, "step": 20370 }, { "epoch": 0.994677734375, "grad_norm": 0.17641788721084595, "learning_rate": 5.003239310052769e-05, "loss": 1.7199, "step": 20371 }, { "epoch": 0.9947265625, "grad_norm": 0.16273757815361023, "learning_rate": 5.003180147211066e-05, "loss": 1.7451, "step": 20372 }, { "epoch": 0.994775390625, "grad_norm": 0.16546444594860077, "learning_rate": 5.0031215295978625e-05, "loss": 1.7147, "step": 20373 }, { "epoch": 0.99482421875, "grad_norm": 0.1893787980079651, "learning_rate": 5.003063457214583e-05, "loss": 1.7292, "step": 20374 }, { "epoch": 0.994873046875, "grad_norm": 0.19260969758033752, "learning_rate": 5.003005930062636e-05, "loss": 1.6988, "step": 20375 }, { "epoch": 0.994921875, "grad_norm": 0.17441818118095398, "learning_rate": 5.002948948143413e-05, "loss": 1.6835, "step": 20376 }, { "epoch": 0.994970703125, "grad_norm": 0.18927516043186188, "learning_rate": 5.002892511458298e-05, "loss": 1.7391, "step": 20377 }, { "epoch": 0.99501953125, "grad_norm": 0.17848125100135803, "learning_rate": 5.002836620008654e-05, "loss": 1.7267, "step": 20378 }, { "epoch": 0.995068359375, "grad_norm": 0.18005990982055664, "learning_rate": 5.0027812737958416e-05, "loss": 1.7135, "step": 20379 }, { "epoch": 0.9951171875, "grad_norm": 0.17004595696926117, "learning_rate": 5.002726472821198e-05, "loss": 1.7211, "step": 20380 }, { "epoch": 0.995166015625, "grad_norm": 0.2135671079158783, "learning_rate": 5.002672217086051e-05, "loss": 1.7113, "step": 20381 }, { "epoch": 0.99521484375, "grad_norm": 0.19239498674869537, "learning_rate": 5.002618506591719e-05, "loss": 1.7122, "step": 20382 }, { "epoch": 0.995263671875, "grad_norm": 0.1761694699525833, "learning_rate": 5.002565341339501e-05, "loss": 1.7139, "step": 20383 }, { "epoch": 0.9953125, "grad_norm": 0.2180684506893158, "learning_rate": 5.0025127213306854e-05, "loss": 1.7226, "step": 20384 }, { "epoch": 0.995361328125, "grad_norm": 0.1775873452425003, "learning_rate": 5.00246064656655e-05, "loss": 1.6783, "step": 20385 }, { "epoch": 0.99541015625, "grad_norm": 0.18846847116947174, "learning_rate": 5.0024091170483524e-05, "loss": 1.7067, "step": 20386 }, { "epoch": 0.995458984375, "grad_norm": 0.18013995885849, "learning_rate": 5.0023581327773475e-05, "loss": 1.7183, "step": 20387 }, { "epoch": 0.9955078125, "grad_norm": 0.18589144945144653, "learning_rate": 5.002307693754764e-05, "loss": 1.7304, "step": 20388 }, { "epoch": 0.995556640625, "grad_norm": 0.17464445531368256, "learning_rate": 5.0022577999818306e-05, "loss": 1.7195, "step": 20389 }, { "epoch": 0.99560546875, "grad_norm": 0.19573083519935608, "learning_rate": 5.002208451459754e-05, "loss": 1.7238, "step": 20390 }, { "epoch": 0.995654296875, "grad_norm": 0.19970189034938812, "learning_rate": 5.002159648189729e-05, "loss": 1.7032, "step": 20391 }, { "epoch": 0.995703125, "grad_norm": 0.16562478244304657, "learning_rate": 5.002111390172941e-05, "loss": 1.7091, "step": 20392 }, { "epoch": 0.995751953125, "grad_norm": 0.19090212881565094, "learning_rate": 5.002063677410555e-05, "loss": 1.7136, "step": 20393 }, { "epoch": 0.99580078125, "grad_norm": 0.18619850277900696, "learning_rate": 5.002016509903735e-05, "loss": 1.7172, "step": 20394 }, { "epoch": 0.995849609375, "grad_norm": 0.19143374264240265, "learning_rate": 5.0019698876536175e-05, "loss": 1.703, "step": 20395 }, { "epoch": 0.9958984375, "grad_norm": 0.21559523046016693, "learning_rate": 5.001923810661333e-05, "loss": 1.7072, "step": 20396 }, { "epoch": 0.995947265625, "grad_norm": 0.19049274921417236, "learning_rate": 5.001878278928001e-05, "loss": 1.7011, "step": 20397 }, { "epoch": 0.99599609375, "grad_norm": 0.19314931333065033, "learning_rate": 5.001833292454723e-05, "loss": 1.7079, "step": 20398 }, { "epoch": 0.996044921875, "grad_norm": 0.23248343169689178, "learning_rate": 5.00178885124259e-05, "loss": 1.729, "step": 20399 }, { "epoch": 0.99609375, "grad_norm": 0.1758887618780136, "learning_rate": 5.00174495529268e-05, "loss": 1.6856, "step": 20400 }, { "epoch": 0.996142578125, "grad_norm": 0.2071230560541153, "learning_rate": 5.001701604606057e-05, "loss": 1.7265, "step": 20401 }, { "epoch": 0.99619140625, "grad_norm": 0.19966569542884827, "learning_rate": 5.001658799183768e-05, "loss": 1.7253, "step": 20402 }, { "epoch": 0.996240234375, "grad_norm": 0.1780015081167221, "learning_rate": 5.001616539026854e-05, "loss": 1.7475, "step": 20403 }, { "epoch": 0.9962890625, "grad_norm": 0.19048403203487396, "learning_rate": 5.0015748241363374e-05, "loss": 1.7078, "step": 20404 }, { "epoch": 0.996337890625, "grad_norm": 0.18304304778575897, "learning_rate": 5.001533654513231e-05, "loss": 1.7124, "step": 20405 }, { "epoch": 0.99638671875, "grad_norm": 0.1772465705871582, "learning_rate": 5.001493030158531e-05, "loss": 1.7208, "step": 20406 }, { "epoch": 0.996435546875, "grad_norm": 0.1979982554912567, "learning_rate": 5.001452951073223e-05, "loss": 1.7303, "step": 20407 }, { "epoch": 0.996484375, "grad_norm": 0.1564207822084427, "learning_rate": 5.001413417258279e-05, "loss": 1.7091, "step": 20408 }, { "epoch": 0.996533203125, "grad_norm": 0.23126472532749176, "learning_rate": 5.001374428714655e-05, "loss": 1.7133, "step": 20409 }, { "epoch": 0.99658203125, "grad_norm": 0.18050582706928253, "learning_rate": 5.0013359854432965e-05, "loss": 1.7083, "step": 20410 }, { "epoch": 0.996630859375, "grad_norm": 0.18944242596626282, "learning_rate": 5.001298087445137e-05, "loss": 1.7019, "step": 20411 }, { "epoch": 0.9966796875, "grad_norm": 0.20146240293979645, "learning_rate": 5.001260734721091e-05, "loss": 1.6955, "step": 20412 }, { "epoch": 0.996728515625, "grad_norm": 0.195143923163414, "learning_rate": 5.00122392727207e-05, "loss": 1.7092, "step": 20413 }, { "epoch": 0.99677734375, "grad_norm": 0.18397483229637146, "learning_rate": 5.001187665098958e-05, "loss": 1.7143, "step": 20414 }, { "epoch": 0.996826171875, "grad_norm": 0.1954929083585739, "learning_rate": 5.0011519482026426e-05, "loss": 1.6877, "step": 20415 }, { "epoch": 0.996875, "grad_norm": 0.17994047701358795, "learning_rate": 5.001116776583982e-05, "loss": 1.7473, "step": 20416 }, { "epoch": 0.996923828125, "grad_norm": 0.20306557416915894, "learning_rate": 5.001082150243832e-05, "loss": 1.719, "step": 20417 }, { "epoch": 0.99697265625, "grad_norm": 0.16985858976840973, "learning_rate": 5.001048069183033e-05, "loss": 1.7048, "step": 20418 }, { "epoch": 0.997021484375, "grad_norm": 0.20281511545181274, "learning_rate": 5.001014533402409e-05, "loss": 1.7184, "step": 20419 }, { "epoch": 0.9970703125, "grad_norm": 0.2013767957687378, "learning_rate": 5.0009815429027735e-05, "loss": 1.7187, "step": 20420 }, { "epoch": 0.997119140625, "grad_norm": 0.17650751769542694, "learning_rate": 5.0009490976849266e-05, "loss": 1.7024, "step": 20421 }, { "epoch": 0.99716796875, "grad_norm": 0.2115846574306488, "learning_rate": 5.0009171977496536e-05, "loss": 1.7251, "step": 20422 }, { "epoch": 0.997216796875, "grad_norm": 0.18653273582458496, "learning_rate": 5.0008858430977255e-05, "loss": 1.7355, "step": 20423 }, { "epoch": 0.997265625, "grad_norm": 0.21041952073574066, "learning_rate": 5.000855033729906e-05, "loss": 1.7018, "step": 20424 }, { "epoch": 0.997314453125, "grad_norm": 0.2012995481491089, "learning_rate": 5.0008247696469414e-05, "loss": 1.7312, "step": 20425 }, { "epoch": 0.99736328125, "grad_norm": 0.1963556408882141, "learning_rate": 5.000795050849564e-05, "loss": 1.7163, "step": 20426 }, { "epoch": 0.997412109375, "grad_norm": 0.20294328033924103, "learning_rate": 5.0007658773384914e-05, "loss": 1.7338, "step": 20427 }, { "epoch": 0.9974609375, "grad_norm": 0.2114742398262024, "learning_rate": 5.000737249114438e-05, "loss": 1.725, "step": 20428 }, { "epoch": 0.997509765625, "grad_norm": 0.15983456373214722, "learning_rate": 5.000709166178089e-05, "loss": 1.7276, "step": 20429 }, { "epoch": 0.99755859375, "grad_norm": 0.183927521109581, "learning_rate": 5.000681628530132e-05, "loss": 1.7096, "step": 20430 }, { "epoch": 0.997607421875, "grad_norm": 0.19163142144680023, "learning_rate": 5.000654636171229e-05, "loss": 1.705, "step": 20431 }, { "epoch": 0.99765625, "grad_norm": 0.182555690407753, "learning_rate": 5.0006281891020395e-05, "loss": 1.6971, "step": 20432 }, { "epoch": 0.997705078125, "grad_norm": 0.2067059576511383, "learning_rate": 5.0006022873232e-05, "loss": 1.7052, "step": 20433 }, { "epoch": 0.99775390625, "grad_norm": 0.1758645921945572, "learning_rate": 5.00057693083534e-05, "loss": 1.7271, "step": 20434 }, { "epoch": 0.997802734375, "grad_norm": 0.19872163236141205, "learning_rate": 5.000552119639073e-05, "loss": 1.7192, "step": 20435 }, { "epoch": 0.9978515625, "grad_norm": 0.20527388155460358, "learning_rate": 5.0005278537350045e-05, "loss": 1.7092, "step": 20436 }, { "epoch": 0.997900390625, "grad_norm": 0.18802741169929504, "learning_rate": 5.000504133123717e-05, "loss": 1.7027, "step": 20437 }, { "epoch": 0.99794921875, "grad_norm": 0.1816863715648651, "learning_rate": 5.000480957805787e-05, "loss": 1.7174, "step": 20438 }, { "epoch": 0.997998046875, "grad_norm": 0.20540139079093933, "learning_rate": 5.000458327781777e-05, "loss": 1.7266, "step": 20439 }, { "epoch": 0.998046875, "grad_norm": 0.16939231753349304, "learning_rate": 5.000436243052237e-05, "loss": 1.7146, "step": 20440 }, { "epoch": 0.998095703125, "grad_norm": 0.2028639018535614, "learning_rate": 5.0004147036176994e-05, "loss": 1.7214, "step": 20441 }, { "epoch": 0.99814453125, "grad_norm": 0.1941128671169281, "learning_rate": 5.0003937094786895e-05, "loss": 1.7191, "step": 20442 }, { "epoch": 0.998193359375, "grad_norm": 0.15863992273807526, "learning_rate": 5.00037326063571e-05, "loss": 1.7108, "step": 20443 }, { "epoch": 0.9982421875, "grad_norm": 0.20537641644477844, "learning_rate": 5.0003533570892644e-05, "loss": 1.7367, "step": 20444 }, { "epoch": 0.998291015625, "grad_norm": 0.2171962410211563, "learning_rate": 5.0003339988398283e-05, "loss": 1.6918, "step": 20445 }, { "epoch": 0.99833984375, "grad_norm": 0.15963761508464813, "learning_rate": 5.000315185887877e-05, "loss": 1.6908, "step": 20446 }, { "epoch": 0.998388671875, "grad_norm": 0.1885351538658142, "learning_rate": 5.00029691823386e-05, "loss": 1.683, "step": 20447 }, { "epoch": 0.9984375, "grad_norm": 0.22049978375434875, "learning_rate": 5.0002791958782245e-05, "loss": 1.7032, "step": 20448 }, { "epoch": 0.998486328125, "grad_norm": 0.1720593273639679, "learning_rate": 5.0002620188213997e-05, "loss": 1.6883, "step": 20449 }, { "epoch": 0.99853515625, "grad_norm": 0.19303138554096222, "learning_rate": 5.000245387063801e-05, "loss": 1.7292, "step": 20450 }, { "epoch": 0.998583984375, "grad_norm": 0.17038358747959137, "learning_rate": 5.000229300605831e-05, "loss": 1.7013, "step": 20451 }, { "epoch": 0.9986328125, "grad_norm": 0.16052761673927307, "learning_rate": 5.00021375944788e-05, "loss": 1.7156, "step": 20452 }, { "epoch": 0.998681640625, "grad_norm": 0.17365865409374237, "learning_rate": 5.000198763590325e-05, "loss": 1.7096, "step": 20453 }, { "epoch": 0.99873046875, "grad_norm": 0.19276465475559235, "learning_rate": 5.000184313033528e-05, "loss": 1.7161, "step": 20454 }, { "epoch": 0.998779296875, "grad_norm": 0.16817092895507812, "learning_rate": 5.000170407777839e-05, "loss": 1.7049, "step": 20455 }, { "epoch": 0.998828125, "grad_norm": 0.2216482013463974, "learning_rate": 5.000157047823599e-05, "loss": 1.7271, "step": 20456 }, { "epoch": 0.998876953125, "grad_norm": 0.18321508169174194, "learning_rate": 5.000144233171129e-05, "loss": 1.6959, "step": 20457 }, { "epoch": 0.99892578125, "grad_norm": 0.17624156177043915, "learning_rate": 5.0001319638207386e-05, "loss": 1.7228, "step": 20458 }, { "epoch": 0.998974609375, "grad_norm": 0.17125001549720764, "learning_rate": 5.0001202397727265e-05, "loss": 1.7075, "step": 20459 }, { "epoch": 0.9990234375, "grad_norm": 0.16478082537651062, "learning_rate": 5.000109061027377e-05, "loss": 1.7311, "step": 20460 }, { "epoch": 0.999072265625, "grad_norm": 0.18896082043647766, "learning_rate": 5.000098427584961e-05, "loss": 1.7318, "step": 20461 }, { "epoch": 0.99912109375, "grad_norm": 0.16594763100147247, "learning_rate": 5.0000883394457366e-05, "loss": 1.7181, "step": 20462 }, { "epoch": 0.999169921875, "grad_norm": 0.16924746334552765, "learning_rate": 5.000078796609945e-05, "loss": 1.6971, "step": 20463 }, { "epoch": 0.99921875, "grad_norm": 0.16758742928504944, "learning_rate": 5.000069799077821e-05, "loss": 1.6945, "step": 20464 }, { "epoch": 0.999267578125, "grad_norm": 0.18916431069374084, "learning_rate": 5.000061346849583e-05, "loss": 1.7033, "step": 20465 }, { "epoch": 0.99931640625, "grad_norm": 0.1740533858537674, "learning_rate": 5.000053439925432e-05, "loss": 1.7196, "step": 20466 }, { "epoch": 0.999365234375, "grad_norm": 0.18011803925037384, "learning_rate": 5.000046078305564e-05, "loss": 1.7194, "step": 20467 }, { "epoch": 0.9994140625, "grad_norm": 0.1704941838979721, "learning_rate": 5.0000392619901565e-05, "loss": 1.7159, "step": 20468 }, { "epoch": 0.999462890625, "grad_norm": 0.18184469640254974, "learning_rate": 5.000032990979371e-05, "loss": 1.7213, "step": 20469 }, { "epoch": 0.99951171875, "grad_norm": 0.16508732736110687, "learning_rate": 5.0000272652733645e-05, "loss": 1.7039, "step": 20470 }, { "epoch": 0.999560546875, "grad_norm": 0.18341322243213654, "learning_rate": 5.0000220848722735e-05, "loss": 1.7156, "step": 20471 }, { "epoch": 0.999609375, "grad_norm": 0.19077393412590027, "learning_rate": 5.0000174497762217e-05, "loss": 1.6875, "step": 20472 }, { "epoch": 0.999658203125, "grad_norm": 0.18747974932193756, "learning_rate": 5.000013359985326e-05, "loss": 1.7041, "step": 20473 }, { "epoch": 0.99970703125, "grad_norm": 0.18025146424770355, "learning_rate": 5.000009815499681e-05, "loss": 1.719, "step": 20474 }, { "epoch": 0.999755859375, "grad_norm": 0.19249343872070312, "learning_rate": 5.000006816319375e-05, "loss": 1.6863, "step": 20475 }, { "epoch": 0.9998046875, "grad_norm": 0.1828012466430664, "learning_rate": 5.0000043624444784e-05, "loss": 1.7202, "step": 20476 }, { "epoch": 0.999853515625, "grad_norm": 0.19179128110408783, "learning_rate": 5.000002453875054e-05, "loss": 1.7089, "step": 20477 }, { "epoch": 0.99990234375, "grad_norm": 0.1586974859237671, "learning_rate": 5.000001090611147e-05, "loss": 1.7276, "step": 20478 }, { "epoch": 0.999951171875, "grad_norm": 0.18802161514759064, "learning_rate": 5.000000272652788e-05, "loss": 1.7029, "step": 20479 }, { "epoch": 1.0, "grad_norm": 0.18075965344905853, "learning_rate": 5e-05, "loss": 1.7294, "step": 20480 } ], "logging_steps": 1.0, "max_steps": 20480, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.253889217580866e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }