{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3993, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00025043826696719256, "grad_norm": 1.5703125, "learning_rate": 5.0000000000000004e-08, "loss": 1.8145, "step": 1 }, { "epoch": 0.0005008765339343851, "grad_norm": 1.53125, "learning_rate": 1.0000000000000001e-07, "loss": 1.8751, "step": 2 }, { "epoch": 0.0007513148009015778, "grad_norm": 1.71875, "learning_rate": 1.5000000000000002e-07, "loss": 1.7677, "step": 3 }, { "epoch": 0.0010017530678687703, "grad_norm": 1.515625, "learning_rate": 2.0000000000000002e-07, "loss": 1.9468, "step": 4 }, { "epoch": 0.001252191334835963, "grad_norm": 1.890625, "learning_rate": 2.5000000000000004e-07, "loss": 1.9857, "step": 5 }, { "epoch": 0.0015026296018031556, "grad_norm": 1.609375, "learning_rate": 3.0000000000000004e-07, "loss": 1.7221, "step": 6 }, { "epoch": 0.001753067868770348, "grad_norm": 1.546875, "learning_rate": 3.5000000000000004e-07, "loss": 1.8989, "step": 7 }, { "epoch": 0.0020035061357375405, "grad_norm": 1.28125, "learning_rate": 4.0000000000000003e-07, "loss": 1.6351, "step": 8 }, { "epoch": 0.002253944402704733, "grad_norm": 1.5390625, "learning_rate": 4.5000000000000003e-07, "loss": 1.7285, "step": 9 }, { "epoch": 0.002504382669671926, "grad_norm": 1.671875, "learning_rate": 5.000000000000001e-07, "loss": 2.0455, "step": 10 }, { "epoch": 0.0027548209366391185, "grad_norm": 1.7734375, "learning_rate": 5.5e-07, "loss": 1.923, "step": 11 }, { "epoch": 0.003005259203606311, "grad_norm": 1.4375, "learning_rate": 6.000000000000001e-07, "loss": 1.9359, "step": 12 }, { "epoch": 0.0032556974705735034, "grad_norm": 1.6875, "learning_rate": 6.5e-07, "loss": 1.9896, "step": 13 }, { "epoch": 0.003506135737540696, "grad_norm": 1.15625, "learning_rate": 7.000000000000001e-07, "loss": 1.3711, "step": 14 }, { "epoch": 0.003756574004507889, "grad_norm": 2.0625, "learning_rate": 7.5e-07, "loss": 1.9006, "step": 15 }, { "epoch": 0.004007012271475081, "grad_norm": 1.5234375, "learning_rate": 8.000000000000001e-07, "loss": 1.7795, "step": 16 }, { "epoch": 0.004257450538442274, "grad_norm": 1.7578125, "learning_rate": 8.500000000000001e-07, "loss": 1.7868, "step": 17 }, { "epoch": 0.004507888805409466, "grad_norm": 1.9609375, "learning_rate": 9.000000000000001e-07, "loss": 1.783, "step": 18 }, { "epoch": 0.004758327072376659, "grad_norm": 1.890625, "learning_rate": 9.500000000000001e-07, "loss": 1.9311, "step": 19 }, { "epoch": 0.005008765339343852, "grad_norm": 2.109375, "learning_rate": 1.0000000000000002e-06, "loss": 1.9994, "step": 20 }, { "epoch": 0.005259203606311044, "grad_norm": 1.390625, "learning_rate": 1.0500000000000001e-06, "loss": 1.7499, "step": 21 }, { "epoch": 0.005509641873278237, "grad_norm": 1.5, "learning_rate": 1.1e-06, "loss": 1.902, "step": 22 }, { "epoch": 0.00576008014024543, "grad_norm": 1.7265625, "learning_rate": 1.1500000000000002e-06, "loss": 1.8108, "step": 23 }, { "epoch": 0.006010518407212622, "grad_norm": 1.5703125, "learning_rate": 1.2000000000000002e-06, "loss": 1.9501, "step": 24 }, { "epoch": 0.006260956674179815, "grad_norm": 1.8125, "learning_rate": 1.25e-06, "loss": 1.969, "step": 25 }, { "epoch": 0.006511394941147007, "grad_norm": 2.0, "learning_rate": 1.3e-06, "loss": 1.837, "step": 26 }, { "epoch": 0.0067618332081141996, "grad_norm": 1.6875, "learning_rate": 1.3500000000000002e-06, "loss": 1.8221, "step": 27 }, { "epoch": 0.007012271475081392, "grad_norm": 1.8671875, "learning_rate": 1.4000000000000001e-06, "loss": 1.8018, "step": 28 }, { "epoch": 0.007262709742048585, "grad_norm": 1.734375, "learning_rate": 1.45e-06, "loss": 1.6929, "step": 29 }, { "epoch": 0.007513148009015778, "grad_norm": 1.59375, "learning_rate": 1.5e-06, "loss": 1.8181, "step": 30 }, { "epoch": 0.00776358627598297, "grad_norm": 1.9296875, "learning_rate": 1.5500000000000002e-06, "loss": 1.7328, "step": 31 }, { "epoch": 0.008014024542950162, "grad_norm": 1.78125, "learning_rate": 1.6000000000000001e-06, "loss": 1.787, "step": 32 }, { "epoch": 0.008264462809917356, "grad_norm": 1.7734375, "learning_rate": 1.6500000000000003e-06, "loss": 1.7629, "step": 33 }, { "epoch": 0.008514901076884547, "grad_norm": 1.78125, "learning_rate": 1.7000000000000002e-06, "loss": 2.0424, "step": 34 }, { "epoch": 0.008765339343851741, "grad_norm": 1.9375, "learning_rate": 1.75e-06, "loss": 1.7945, "step": 35 }, { "epoch": 0.009015777610818933, "grad_norm": 2.078125, "learning_rate": 1.8000000000000001e-06, "loss": 2.0177, "step": 36 }, { "epoch": 0.009266215877786126, "grad_norm": 1.921875, "learning_rate": 1.85e-06, "loss": 1.8945, "step": 37 }, { "epoch": 0.009516654144753318, "grad_norm": 0.984375, "learning_rate": 1.9000000000000002e-06, "loss": 1.5008, "step": 38 }, { "epoch": 0.009767092411720512, "grad_norm": 2.078125, "learning_rate": 1.9500000000000004e-06, "loss": 1.7258, "step": 39 }, { "epoch": 0.010017530678687703, "grad_norm": 1.8984375, "learning_rate": 2.0000000000000003e-06, "loss": 1.8667, "step": 40 }, { "epoch": 0.010267968945654895, "grad_norm": 1.53125, "learning_rate": 2.05e-06, "loss": 1.9328, "step": 41 }, { "epoch": 0.010518407212622089, "grad_norm": 1.6484375, "learning_rate": 2.1000000000000002e-06, "loss": 1.8158, "step": 42 }, { "epoch": 0.01076884547958928, "grad_norm": 1.953125, "learning_rate": 2.15e-06, "loss": 1.9001, "step": 43 }, { "epoch": 0.011019283746556474, "grad_norm": 1.578125, "learning_rate": 2.2e-06, "loss": 1.9305, "step": 44 }, { "epoch": 0.011269722013523666, "grad_norm": 2.203125, "learning_rate": 2.25e-06, "loss": 1.9281, "step": 45 }, { "epoch": 0.01152016028049086, "grad_norm": 1.625, "learning_rate": 2.3000000000000004e-06, "loss": 1.888, "step": 46 }, { "epoch": 0.011770598547458051, "grad_norm": 1.7421875, "learning_rate": 2.35e-06, "loss": 1.8359, "step": 47 }, { "epoch": 0.012021036814425245, "grad_norm": 2.0625, "learning_rate": 2.4000000000000003e-06, "loss": 1.9236, "step": 48 }, { "epoch": 0.012271475081392437, "grad_norm": 2.078125, "learning_rate": 2.4500000000000003e-06, "loss": 1.8478, "step": 49 }, { "epoch": 0.01252191334835963, "grad_norm": 1.53125, "learning_rate": 2.5e-06, "loss": 1.7622, "step": 50 }, { "epoch": 0.012772351615326822, "grad_norm": 1.9375, "learning_rate": 2.55e-06, "loss": 1.9523, "step": 51 }, { "epoch": 0.013022789882294014, "grad_norm": 1.8515625, "learning_rate": 2.6e-06, "loss": 1.8347, "step": 52 }, { "epoch": 0.013273228149261207, "grad_norm": 1.9375, "learning_rate": 2.6500000000000005e-06, "loss": 1.9159, "step": 53 }, { "epoch": 0.013523666416228399, "grad_norm": 1.9375, "learning_rate": 2.7000000000000004e-06, "loss": 1.7682, "step": 54 }, { "epoch": 0.013774104683195593, "grad_norm": 1.8359375, "learning_rate": 2.7500000000000004e-06, "loss": 1.8509, "step": 55 }, { "epoch": 0.014024542950162784, "grad_norm": 1.78125, "learning_rate": 2.8000000000000003e-06, "loss": 1.8378, "step": 56 }, { "epoch": 0.014274981217129978, "grad_norm": 1.96875, "learning_rate": 2.85e-06, "loss": 1.7925, "step": 57 }, { "epoch": 0.01452541948409717, "grad_norm": 1.7265625, "learning_rate": 2.9e-06, "loss": 1.9217, "step": 58 }, { "epoch": 0.014775857751064363, "grad_norm": 1.859375, "learning_rate": 2.95e-06, "loss": 1.7453, "step": 59 }, { "epoch": 0.015026296018031555, "grad_norm": 2.046875, "learning_rate": 3e-06, "loss": 2.0146, "step": 60 }, { "epoch": 0.015276734284998747, "grad_norm": 1.9296875, "learning_rate": 3.05e-06, "loss": 1.921, "step": 61 }, { "epoch": 0.01552717255196594, "grad_norm": 1.875, "learning_rate": 3.1000000000000004e-06, "loss": 1.8797, "step": 62 }, { "epoch": 0.015777610818933134, "grad_norm": 1.5859375, "learning_rate": 3.1500000000000003e-06, "loss": 1.7579, "step": 63 }, { "epoch": 0.016028049085900324, "grad_norm": 2.09375, "learning_rate": 3.2000000000000003e-06, "loss": 1.8454, "step": 64 }, { "epoch": 0.016278487352867518, "grad_norm": 1.8203125, "learning_rate": 3.2500000000000002e-06, "loss": 1.762, "step": 65 }, { "epoch": 0.01652892561983471, "grad_norm": 1.84375, "learning_rate": 3.3000000000000006e-06, "loss": 1.7937, "step": 66 }, { "epoch": 0.016779363886801905, "grad_norm": 1.5859375, "learning_rate": 3.3500000000000005e-06, "loss": 1.932, "step": 67 }, { "epoch": 0.017029802153769095, "grad_norm": 1.671875, "learning_rate": 3.4000000000000005e-06, "loss": 1.7995, "step": 68 }, { "epoch": 0.01728024042073629, "grad_norm": 2.125, "learning_rate": 3.45e-06, "loss": 1.8313, "step": 69 }, { "epoch": 0.017530678687703482, "grad_norm": 1.828125, "learning_rate": 3.5e-06, "loss": 1.8019, "step": 70 }, { "epoch": 0.017781116954670672, "grad_norm": 2.078125, "learning_rate": 3.5500000000000003e-06, "loss": 1.9192, "step": 71 }, { "epoch": 0.018031555221637866, "grad_norm": 1.828125, "learning_rate": 3.6000000000000003e-06, "loss": 1.7964, "step": 72 }, { "epoch": 0.01828199348860506, "grad_norm": 1.8359375, "learning_rate": 3.65e-06, "loss": 1.809, "step": 73 }, { "epoch": 0.018532431755572253, "grad_norm": 1.5390625, "learning_rate": 3.7e-06, "loss": 1.7507, "step": 74 }, { "epoch": 0.018782870022539443, "grad_norm": 1.640625, "learning_rate": 3.7500000000000005e-06, "loss": 1.9452, "step": 75 }, { "epoch": 0.019033308289506636, "grad_norm": 1.8671875, "learning_rate": 3.8000000000000005e-06, "loss": 1.8276, "step": 76 }, { "epoch": 0.01928374655647383, "grad_norm": 1.5546875, "learning_rate": 3.85e-06, "loss": 1.9724, "step": 77 }, { "epoch": 0.019534184823441023, "grad_norm": 1.5703125, "learning_rate": 3.900000000000001e-06, "loss": 1.5604, "step": 78 }, { "epoch": 0.019784623090408213, "grad_norm": 1.5703125, "learning_rate": 3.95e-06, "loss": 1.6545, "step": 79 }, { "epoch": 0.020035061357375407, "grad_norm": 1.84375, "learning_rate": 4.000000000000001e-06, "loss": 1.8511, "step": 80 }, { "epoch": 0.0202854996243426, "grad_norm": 1.46875, "learning_rate": 4.05e-06, "loss": 1.7939, "step": 81 }, { "epoch": 0.02053593789130979, "grad_norm": 1.5234375, "learning_rate": 4.1e-06, "loss": 1.8085, "step": 82 }, { "epoch": 0.020786376158276984, "grad_norm": 1.703125, "learning_rate": 4.15e-06, "loss": 1.7744, "step": 83 }, { "epoch": 0.021036814425244178, "grad_norm": 1.71875, "learning_rate": 4.2000000000000004e-06, "loss": 1.9585, "step": 84 }, { "epoch": 0.02128725269221137, "grad_norm": 1.5390625, "learning_rate": 4.25e-06, "loss": 1.7507, "step": 85 }, { "epoch": 0.02153769095917856, "grad_norm": 1.4765625, "learning_rate": 4.3e-06, "loss": 1.7741, "step": 86 }, { "epoch": 0.021788129226145755, "grad_norm": 1.4609375, "learning_rate": 4.350000000000001e-06, "loss": 1.6627, "step": 87 }, { "epoch": 0.02203856749311295, "grad_norm": 1.4921875, "learning_rate": 4.4e-06, "loss": 1.7315, "step": 88 }, { "epoch": 0.022289005760080142, "grad_norm": 1.375, "learning_rate": 4.450000000000001e-06, "loss": 1.7506, "step": 89 }, { "epoch": 0.022539444027047332, "grad_norm": 1.7265625, "learning_rate": 4.5e-06, "loss": 1.8315, "step": 90 }, { "epoch": 0.022789882294014525, "grad_norm": 1.4140625, "learning_rate": 4.5500000000000005e-06, "loss": 1.7876, "step": 91 }, { "epoch": 0.02304032056098172, "grad_norm": 1.28125, "learning_rate": 4.600000000000001e-06, "loss": 1.5095, "step": 92 }, { "epoch": 0.02329075882794891, "grad_norm": 1.421875, "learning_rate": 4.65e-06, "loss": 1.8747, "step": 93 }, { "epoch": 0.023541197094916103, "grad_norm": 1.25, "learning_rate": 4.7e-06, "loss": 1.7023, "step": 94 }, { "epoch": 0.023791635361883296, "grad_norm": 1.4140625, "learning_rate": 4.75e-06, "loss": 1.7698, "step": 95 }, { "epoch": 0.02404207362885049, "grad_norm": 1.2734375, "learning_rate": 4.800000000000001e-06, "loss": 1.6483, "step": 96 }, { "epoch": 0.02429251189581768, "grad_norm": 1.125, "learning_rate": 4.85e-06, "loss": 1.5098, "step": 97 }, { "epoch": 0.024542950162784873, "grad_norm": 1.3984375, "learning_rate": 4.9000000000000005e-06, "loss": 1.7155, "step": 98 }, { "epoch": 0.024793388429752067, "grad_norm": 1.3125, "learning_rate": 4.95e-06, "loss": 1.7809, "step": 99 }, { "epoch": 0.02504382669671926, "grad_norm": 1.3671875, "learning_rate": 5e-06, "loss": 1.7401, "step": 100 }, { "epoch": 0.02529426496368645, "grad_norm": 1.2421875, "learning_rate": 5.050000000000001e-06, "loss": 1.6305, "step": 101 }, { "epoch": 0.025544703230653644, "grad_norm": 1.234375, "learning_rate": 5.1e-06, "loss": 1.6541, "step": 102 }, { "epoch": 0.025795141497620837, "grad_norm": 1.3828125, "learning_rate": 5.150000000000001e-06, "loss": 1.6865, "step": 103 }, { "epoch": 0.026045579764588028, "grad_norm": 1.2578125, "learning_rate": 5.2e-06, "loss": 1.6703, "step": 104 }, { "epoch": 0.02629601803155522, "grad_norm": 1.171875, "learning_rate": 5.2500000000000006e-06, "loss": 1.6192, "step": 105 }, { "epoch": 0.026546456298522415, "grad_norm": 1.265625, "learning_rate": 5.300000000000001e-06, "loss": 1.7179, "step": 106 }, { "epoch": 0.026796894565489608, "grad_norm": 1.2421875, "learning_rate": 5.3500000000000004e-06, "loss": 1.6462, "step": 107 }, { "epoch": 0.027047332832456798, "grad_norm": 1.1875, "learning_rate": 5.400000000000001e-06, "loss": 1.8463, "step": 108 }, { "epoch": 0.027297771099423992, "grad_norm": 1.1796875, "learning_rate": 5.450000000000001e-06, "loss": 1.5304, "step": 109 }, { "epoch": 0.027548209366391185, "grad_norm": 1.296875, "learning_rate": 5.500000000000001e-06, "loss": 1.5724, "step": 110 }, { "epoch": 0.02779864763335838, "grad_norm": 1.2890625, "learning_rate": 5.550000000000001e-06, "loss": 1.6848, "step": 111 }, { "epoch": 0.02804908590032557, "grad_norm": 1.25, "learning_rate": 5.600000000000001e-06, "loss": 1.6917, "step": 112 }, { "epoch": 0.028299524167292762, "grad_norm": 1.3671875, "learning_rate": 5.65e-06, "loss": 1.5809, "step": 113 }, { "epoch": 0.028549962434259956, "grad_norm": 1.09375, "learning_rate": 5.7e-06, "loss": 1.6147, "step": 114 }, { "epoch": 0.028800400701227146, "grad_norm": 1.2578125, "learning_rate": 5.75e-06, "loss": 1.5169, "step": 115 }, { "epoch": 0.02905083896819434, "grad_norm": 1.203125, "learning_rate": 5.8e-06, "loss": 1.7645, "step": 116 }, { "epoch": 0.029301277235161533, "grad_norm": 1.28125, "learning_rate": 5.85e-06, "loss": 1.6446, "step": 117 }, { "epoch": 0.029551715502128727, "grad_norm": 1.1796875, "learning_rate": 5.9e-06, "loss": 1.6311, "step": 118 }, { "epoch": 0.029802153769095917, "grad_norm": 1.2265625, "learning_rate": 5.950000000000001e-06, "loss": 1.7778, "step": 119 }, { "epoch": 0.03005259203606311, "grad_norm": 1.265625, "learning_rate": 6e-06, "loss": 1.5512, "step": 120 }, { "epoch": 0.030303030303030304, "grad_norm": 1.203125, "learning_rate": 6.0500000000000005e-06, "loss": 1.4958, "step": 121 }, { "epoch": 0.030553468569997494, "grad_norm": 1.3125, "learning_rate": 6.1e-06, "loss": 1.6435, "step": 122 }, { "epoch": 0.030803906836964687, "grad_norm": 1.171875, "learning_rate": 6.15e-06, "loss": 1.6262, "step": 123 }, { "epoch": 0.03105434510393188, "grad_norm": 1.375, "learning_rate": 6.200000000000001e-06, "loss": 1.5896, "step": 124 }, { "epoch": 0.03130478337089907, "grad_norm": 0.6484375, "learning_rate": 6.25e-06, "loss": 0.9112, "step": 125 }, { "epoch": 0.03155522163786627, "grad_norm": 1.0546875, "learning_rate": 6.300000000000001e-06, "loss": 1.7731, "step": 126 }, { "epoch": 0.03180565990483346, "grad_norm": 0.421875, "learning_rate": 6.35e-06, "loss": 0.8778, "step": 127 }, { "epoch": 0.03205609817180065, "grad_norm": 1.1953125, "learning_rate": 6.4000000000000006e-06, "loss": 1.5578, "step": 128 }, { "epoch": 0.032306536438767845, "grad_norm": 1.09375, "learning_rate": 6.450000000000001e-06, "loss": 1.7515, "step": 129 }, { "epoch": 0.032556974705735035, "grad_norm": 1.171875, "learning_rate": 6.5000000000000004e-06, "loss": 1.5158, "step": 130 }, { "epoch": 0.03280741297270223, "grad_norm": 1.0625, "learning_rate": 6.550000000000001e-06, "loss": 1.5125, "step": 131 }, { "epoch": 0.03305785123966942, "grad_norm": 1.15625, "learning_rate": 6.600000000000001e-06, "loss": 1.5563, "step": 132 }, { "epoch": 0.03330828950663661, "grad_norm": 1.2421875, "learning_rate": 6.650000000000001e-06, "loss": 1.6616, "step": 133 }, { "epoch": 0.03355872777360381, "grad_norm": 1.2421875, "learning_rate": 6.700000000000001e-06, "loss": 1.6769, "step": 134 }, { "epoch": 0.033809166040571, "grad_norm": 1.203125, "learning_rate": 6.750000000000001e-06, "loss": 1.5558, "step": 135 }, { "epoch": 0.03405960430753819, "grad_norm": 1.140625, "learning_rate": 6.800000000000001e-06, "loss": 1.5713, "step": 136 }, { "epoch": 0.03431004257450539, "grad_norm": 1.203125, "learning_rate": 6.850000000000001e-06, "loss": 1.5107, "step": 137 }, { "epoch": 0.03456048084147258, "grad_norm": 1.2109375, "learning_rate": 6.9e-06, "loss": 1.6884, "step": 138 }, { "epoch": 0.03481091910843977, "grad_norm": 1.046875, "learning_rate": 6.95e-06, "loss": 1.688, "step": 139 }, { "epoch": 0.035061357375406964, "grad_norm": 1.140625, "learning_rate": 7e-06, "loss": 1.5946, "step": 140 }, { "epoch": 0.035311795642374154, "grad_norm": 1.1875, "learning_rate": 7.05e-06, "loss": 1.4187, "step": 141 }, { "epoch": 0.035562233909341344, "grad_norm": 1.1171875, "learning_rate": 7.100000000000001e-06, "loss": 1.5699, "step": 142 }, { "epoch": 0.03581267217630854, "grad_norm": 1.1328125, "learning_rate": 7.15e-06, "loss": 1.5534, "step": 143 }, { "epoch": 0.03606311044327573, "grad_norm": 1.0625, "learning_rate": 7.2000000000000005e-06, "loss": 1.4986, "step": 144 }, { "epoch": 0.03631354871024293, "grad_norm": 1.046875, "learning_rate": 7.25e-06, "loss": 1.5416, "step": 145 }, { "epoch": 0.03656398697721012, "grad_norm": 0.96875, "learning_rate": 7.3e-06, "loss": 1.7254, "step": 146 }, { "epoch": 0.03681442524417731, "grad_norm": 0.9609375, "learning_rate": 7.350000000000001e-06, "loss": 1.7563, "step": 147 }, { "epoch": 0.037064863511144505, "grad_norm": 0.8359375, "learning_rate": 7.4e-06, "loss": 1.4539, "step": 148 }, { "epoch": 0.037315301778111695, "grad_norm": 1.15625, "learning_rate": 7.450000000000001e-06, "loss": 1.526, "step": 149 }, { "epoch": 0.037565740045078885, "grad_norm": 0.94921875, "learning_rate": 7.500000000000001e-06, "loss": 1.5868, "step": 150 }, { "epoch": 0.03781617831204608, "grad_norm": 0.8984375, "learning_rate": 7.5500000000000006e-06, "loss": 1.4252, "step": 151 }, { "epoch": 0.03806661657901327, "grad_norm": 0.88671875, "learning_rate": 7.600000000000001e-06, "loss": 1.4188, "step": 152 }, { "epoch": 0.03831705484598046, "grad_norm": 0.8984375, "learning_rate": 7.650000000000001e-06, "loss": 1.533, "step": 153 }, { "epoch": 0.03856749311294766, "grad_norm": 1.0, "learning_rate": 7.7e-06, "loss": 1.4067, "step": 154 }, { "epoch": 0.03881793137991485, "grad_norm": 0.85546875, "learning_rate": 7.75e-06, "loss": 1.4289, "step": 155 }, { "epoch": 0.03906836964688205, "grad_norm": 1.0390625, "learning_rate": 7.800000000000002e-06, "loss": 1.3756, "step": 156 }, { "epoch": 0.03931880791384924, "grad_norm": 0.9609375, "learning_rate": 7.850000000000001e-06, "loss": 1.407, "step": 157 }, { "epoch": 0.03956924618081643, "grad_norm": 0.9296875, "learning_rate": 7.9e-06, "loss": 1.474, "step": 158 }, { "epoch": 0.039819684447783624, "grad_norm": 0.8515625, "learning_rate": 7.950000000000002e-06, "loss": 1.5144, "step": 159 }, { "epoch": 0.040070122714750814, "grad_norm": 0.87890625, "learning_rate": 8.000000000000001e-06, "loss": 1.5064, "step": 160 }, { "epoch": 0.040320560981718004, "grad_norm": 0.76171875, "learning_rate": 8.050000000000001e-06, "loss": 1.532, "step": 161 }, { "epoch": 0.0405709992486852, "grad_norm": 0.87890625, "learning_rate": 8.1e-06, "loss": 1.5442, "step": 162 }, { "epoch": 0.04082143751565239, "grad_norm": 0.81640625, "learning_rate": 8.15e-06, "loss": 1.6388, "step": 163 }, { "epoch": 0.04107187578261958, "grad_norm": 0.84765625, "learning_rate": 8.2e-06, "loss": 1.4584, "step": 164 }, { "epoch": 0.04132231404958678, "grad_norm": 0.8515625, "learning_rate": 8.25e-06, "loss": 1.4607, "step": 165 }, { "epoch": 0.04157275231655397, "grad_norm": 0.7578125, "learning_rate": 8.3e-06, "loss": 1.4149, "step": 166 }, { "epoch": 0.041823190583521165, "grad_norm": 0.7578125, "learning_rate": 8.35e-06, "loss": 1.573, "step": 167 }, { "epoch": 0.042073628850488355, "grad_norm": 0.921875, "learning_rate": 8.400000000000001e-06, "loss": 1.5272, "step": 168 }, { "epoch": 0.042324067117455545, "grad_norm": 0.859375, "learning_rate": 8.45e-06, "loss": 1.4747, "step": 169 }, { "epoch": 0.04257450538442274, "grad_norm": 0.828125, "learning_rate": 8.5e-06, "loss": 1.4367, "step": 170 }, { "epoch": 0.04282494365138993, "grad_norm": 0.78515625, "learning_rate": 8.550000000000001e-06, "loss": 1.3967, "step": 171 }, { "epoch": 0.04307538191835712, "grad_norm": 0.81640625, "learning_rate": 8.6e-06, "loss": 1.5226, "step": 172 }, { "epoch": 0.04332582018532432, "grad_norm": 0.69140625, "learning_rate": 8.65e-06, "loss": 1.3254, "step": 173 }, { "epoch": 0.04357625845229151, "grad_norm": 0.875, "learning_rate": 8.700000000000001e-06, "loss": 1.372, "step": 174 }, { "epoch": 0.0438266967192587, "grad_norm": 0.76171875, "learning_rate": 8.750000000000001e-06, "loss": 1.3902, "step": 175 }, { "epoch": 0.0440771349862259, "grad_norm": 0.69921875, "learning_rate": 8.8e-06, "loss": 1.4893, "step": 176 }, { "epoch": 0.04432757325319309, "grad_norm": 0.71875, "learning_rate": 8.85e-06, "loss": 1.3944, "step": 177 }, { "epoch": 0.044578011520160284, "grad_norm": 0.82421875, "learning_rate": 8.900000000000001e-06, "loss": 1.3208, "step": 178 }, { "epoch": 0.044828449787127474, "grad_norm": 0.85546875, "learning_rate": 8.95e-06, "loss": 1.3755, "step": 179 }, { "epoch": 0.045078888054094664, "grad_norm": 0.55078125, "learning_rate": 9e-06, "loss": 1.1355, "step": 180 }, { "epoch": 0.04532932632106186, "grad_norm": 0.765625, "learning_rate": 9.050000000000001e-06, "loss": 1.3624, "step": 181 }, { "epoch": 0.04557976458802905, "grad_norm": 0.71875, "learning_rate": 9.100000000000001e-06, "loss": 1.2984, "step": 182 }, { "epoch": 0.04583020285499624, "grad_norm": 0.75390625, "learning_rate": 9.15e-06, "loss": 1.3938, "step": 183 }, { "epoch": 0.04608064112196344, "grad_norm": 0.63671875, "learning_rate": 9.200000000000002e-06, "loss": 1.3085, "step": 184 }, { "epoch": 0.04633107938893063, "grad_norm": 0.546875, "learning_rate": 9.250000000000001e-06, "loss": 1.385, "step": 185 }, { "epoch": 0.04658151765589782, "grad_norm": 0.6796875, "learning_rate": 9.3e-06, "loss": 1.4657, "step": 186 }, { "epoch": 0.046831955922865015, "grad_norm": 0.65234375, "learning_rate": 9.350000000000002e-06, "loss": 1.5467, "step": 187 }, { "epoch": 0.047082394189832205, "grad_norm": 0.75390625, "learning_rate": 9.4e-06, "loss": 1.4414, "step": 188 }, { "epoch": 0.0473328324567994, "grad_norm": 0.71875, "learning_rate": 9.450000000000001e-06, "loss": 1.3864, "step": 189 }, { "epoch": 0.04758327072376659, "grad_norm": 0.61328125, "learning_rate": 9.5e-06, "loss": 1.3145, "step": 190 }, { "epoch": 0.04783370899073378, "grad_norm": 0.625, "learning_rate": 9.55e-06, "loss": 1.4618, "step": 191 }, { "epoch": 0.04808414725770098, "grad_norm": 0.58203125, "learning_rate": 9.600000000000001e-06, "loss": 1.3178, "step": 192 }, { "epoch": 0.04833458552466817, "grad_norm": 0.69921875, "learning_rate": 9.65e-06, "loss": 1.3651, "step": 193 }, { "epoch": 0.04858502379163536, "grad_norm": 0.546875, "learning_rate": 9.7e-06, "loss": 1.1574, "step": 194 }, { "epoch": 0.048835462058602556, "grad_norm": 0.6171875, "learning_rate": 9.75e-06, "loss": 1.3105, "step": 195 }, { "epoch": 0.049085900325569747, "grad_norm": 0.55859375, "learning_rate": 9.800000000000001e-06, "loss": 1.4656, "step": 196 }, { "epoch": 0.04933633859253694, "grad_norm": 0.625, "learning_rate": 9.85e-06, "loss": 1.3669, "step": 197 }, { "epoch": 0.049586776859504134, "grad_norm": 0.6015625, "learning_rate": 9.9e-06, "loss": 1.3679, "step": 198 }, { "epoch": 0.049837215126471324, "grad_norm": 0.59765625, "learning_rate": 9.950000000000001e-06, "loss": 1.3402, "step": 199 }, { "epoch": 0.05008765339343852, "grad_norm": 0.59765625, "learning_rate": 1e-05, "loss": 1.3659, "step": 200 }, { "epoch": 0.05033809166040571, "grad_norm": 0.59375, "learning_rate": 1.005e-05, "loss": 1.33, "step": 201 }, { "epoch": 0.0505885299273729, "grad_norm": 0.51171875, "learning_rate": 1.0100000000000002e-05, "loss": 1.1927, "step": 202 }, { "epoch": 0.0508389681943401, "grad_norm": 0.67578125, "learning_rate": 1.015e-05, "loss": 1.4818, "step": 203 }, { "epoch": 0.05108940646130729, "grad_norm": 0.62890625, "learning_rate": 1.02e-05, "loss": 1.3583, "step": 204 }, { "epoch": 0.05133984472827448, "grad_norm": 0.51953125, "learning_rate": 1.025e-05, "loss": 1.4428, "step": 205 }, { "epoch": 0.051590282995241675, "grad_norm": 0.5625, "learning_rate": 1.0300000000000001e-05, "loss": 1.4832, "step": 206 }, { "epoch": 0.051840721262208865, "grad_norm": 0.5546875, "learning_rate": 1.0350000000000001e-05, "loss": 1.2921, "step": 207 }, { "epoch": 0.052091159529176055, "grad_norm": 0.58984375, "learning_rate": 1.04e-05, "loss": 1.427, "step": 208 }, { "epoch": 0.05234159779614325, "grad_norm": 0.578125, "learning_rate": 1.045e-05, "loss": 1.4233, "step": 209 }, { "epoch": 0.05259203606311044, "grad_norm": 0.52734375, "learning_rate": 1.0500000000000001e-05, "loss": 1.4086, "step": 210 }, { "epoch": 0.05284247433007764, "grad_norm": 0.5546875, "learning_rate": 1.055e-05, "loss": 1.3368, "step": 211 }, { "epoch": 0.05309291259704483, "grad_norm": 0.5078125, "learning_rate": 1.0600000000000002e-05, "loss": 1.602, "step": 212 }, { "epoch": 0.05334335086401202, "grad_norm": 0.50390625, "learning_rate": 1.065e-05, "loss": 1.4175, "step": 213 }, { "epoch": 0.053593789130979216, "grad_norm": 0.5078125, "learning_rate": 1.0700000000000001e-05, "loss": 1.2525, "step": 214 }, { "epoch": 0.053844227397946406, "grad_norm": 0.470703125, "learning_rate": 1.075e-05, "loss": 1.3943, "step": 215 }, { "epoch": 0.054094665664913597, "grad_norm": 0.48046875, "learning_rate": 1.0800000000000002e-05, "loss": 1.3555, "step": 216 }, { "epoch": 0.054345103931880794, "grad_norm": 0.55859375, "learning_rate": 1.0850000000000001e-05, "loss": 1.3575, "step": 217 }, { "epoch": 0.054595542198847984, "grad_norm": 0.435546875, "learning_rate": 1.0900000000000002e-05, "loss": 1.1768, "step": 218 }, { "epoch": 0.054845980465815174, "grad_norm": 0.4765625, "learning_rate": 1.095e-05, "loss": 1.275, "step": 219 }, { "epoch": 0.05509641873278237, "grad_norm": 0.443359375, "learning_rate": 1.1000000000000001e-05, "loss": 1.4359, "step": 220 }, { "epoch": 0.05534685699974956, "grad_norm": 0.466796875, "learning_rate": 1.1050000000000001e-05, "loss": 1.3244, "step": 221 }, { "epoch": 0.05559729526671676, "grad_norm": 0.486328125, "learning_rate": 1.1100000000000002e-05, "loss": 1.2596, "step": 222 }, { "epoch": 0.05584773353368395, "grad_norm": 0.43359375, "learning_rate": 1.1150000000000002e-05, "loss": 1.4499, "step": 223 }, { "epoch": 0.05609817180065114, "grad_norm": 0.453125, "learning_rate": 1.1200000000000001e-05, "loss": 1.1507, "step": 224 }, { "epoch": 0.056348610067618335, "grad_norm": 0.4609375, "learning_rate": 1.125e-05, "loss": 1.3409, "step": 225 }, { "epoch": 0.056599048334585525, "grad_norm": 0.421875, "learning_rate": 1.13e-05, "loss": 1.3152, "step": 226 }, { "epoch": 0.056849486601552715, "grad_norm": 0.447265625, "learning_rate": 1.1350000000000001e-05, "loss": 1.2763, "step": 227 }, { "epoch": 0.05709992486851991, "grad_norm": 0.35546875, "learning_rate": 1.14e-05, "loss": 1.2917, "step": 228 }, { "epoch": 0.0573503631354871, "grad_norm": 0.3984375, "learning_rate": 1.145e-05, "loss": 1.2777, "step": 229 }, { "epoch": 0.05760080140245429, "grad_norm": 0.51171875, "learning_rate": 1.15e-05, "loss": 1.3355, "step": 230 }, { "epoch": 0.05785123966942149, "grad_norm": 0.390625, "learning_rate": 1.1550000000000001e-05, "loss": 1.3133, "step": 231 }, { "epoch": 0.05810167793638868, "grad_norm": 0.443359375, "learning_rate": 1.16e-05, "loss": 1.163, "step": 232 }, { "epoch": 0.05835211620335587, "grad_norm": 0.427734375, "learning_rate": 1.1650000000000002e-05, "loss": 1.304, "step": 233 }, { "epoch": 0.058602554470323066, "grad_norm": 0.404296875, "learning_rate": 1.17e-05, "loss": 1.2214, "step": 234 }, { "epoch": 0.058852992737290256, "grad_norm": 0.486328125, "learning_rate": 1.1750000000000001e-05, "loss": 1.3482, "step": 235 }, { "epoch": 0.05910343100425745, "grad_norm": 0.453125, "learning_rate": 1.18e-05, "loss": 1.2335, "step": 236 }, { "epoch": 0.059353869271224644, "grad_norm": 0.349609375, "learning_rate": 1.1850000000000002e-05, "loss": 1.3853, "step": 237 }, { "epoch": 0.059604307538191834, "grad_norm": 0.4375, "learning_rate": 1.1900000000000001e-05, "loss": 1.2233, "step": 238 }, { "epoch": 0.05985474580515903, "grad_norm": 0.41796875, "learning_rate": 1.195e-05, "loss": 1.2743, "step": 239 }, { "epoch": 0.06010518407212622, "grad_norm": 0.3828125, "learning_rate": 1.2e-05, "loss": 1.2653, "step": 240 }, { "epoch": 0.06035562233909341, "grad_norm": 0.396484375, "learning_rate": 1.2050000000000002e-05, "loss": 1.3065, "step": 241 }, { "epoch": 0.06060606060606061, "grad_norm": 0.482421875, "learning_rate": 1.2100000000000001e-05, "loss": 1.1937, "step": 242 }, { "epoch": 0.0608564988730278, "grad_norm": 0.30859375, "learning_rate": 1.2150000000000002e-05, "loss": 1.0384, "step": 243 }, { "epoch": 0.06110693713999499, "grad_norm": 0.419921875, "learning_rate": 1.22e-05, "loss": 1.2509, "step": 244 }, { "epoch": 0.061357375406962185, "grad_norm": 0.427734375, "learning_rate": 1.2250000000000001e-05, "loss": 1.2619, "step": 245 }, { "epoch": 0.061607813673929375, "grad_norm": 0.39453125, "learning_rate": 1.23e-05, "loss": 1.389, "step": 246 }, { "epoch": 0.06185825194089657, "grad_norm": 0.435546875, "learning_rate": 1.2350000000000002e-05, "loss": 1.3045, "step": 247 }, { "epoch": 0.06210869020786376, "grad_norm": 0.3828125, "learning_rate": 1.2400000000000002e-05, "loss": 1.1582, "step": 248 }, { "epoch": 0.06235912847483095, "grad_norm": 0.39453125, "learning_rate": 1.2450000000000003e-05, "loss": 1.1206, "step": 249 }, { "epoch": 0.06260956674179814, "grad_norm": 0.38671875, "learning_rate": 1.25e-05, "loss": 1.1294, "step": 250 }, { "epoch": 0.06286000500876535, "grad_norm": 0.33984375, "learning_rate": 1.255e-05, "loss": 1.2963, "step": 251 }, { "epoch": 0.06311044327573254, "grad_norm": 0.388671875, "learning_rate": 1.2600000000000001e-05, "loss": 1.2269, "step": 252 }, { "epoch": 0.06336088154269973, "grad_norm": 0.37890625, "learning_rate": 1.2650000000000001e-05, "loss": 1.2146, "step": 253 }, { "epoch": 0.06361131980966692, "grad_norm": 0.341796875, "learning_rate": 1.27e-05, "loss": 1.265, "step": 254 }, { "epoch": 0.0638617580766341, "grad_norm": 0.36328125, "learning_rate": 1.275e-05, "loss": 1.2159, "step": 255 }, { "epoch": 0.0641121963436013, "grad_norm": 0.328125, "learning_rate": 1.2800000000000001e-05, "loss": 1.2121, "step": 256 }, { "epoch": 0.0643626346105685, "grad_norm": 0.34375, "learning_rate": 1.285e-05, "loss": 1.319, "step": 257 }, { "epoch": 0.06461307287753569, "grad_norm": 0.310546875, "learning_rate": 1.2900000000000002e-05, "loss": 1.34, "step": 258 }, { "epoch": 0.06486351114450288, "grad_norm": 0.318359375, "learning_rate": 1.295e-05, "loss": 1.3784, "step": 259 }, { "epoch": 0.06511394941147007, "grad_norm": 0.357421875, "learning_rate": 1.3000000000000001e-05, "loss": 1.3495, "step": 260 }, { "epoch": 0.06536438767843726, "grad_norm": 0.34375, "learning_rate": 1.305e-05, "loss": 1.1819, "step": 261 }, { "epoch": 0.06561482594540446, "grad_norm": 0.359375, "learning_rate": 1.3100000000000002e-05, "loss": 1.2401, "step": 262 }, { "epoch": 0.06586526421237165, "grad_norm": 0.333984375, "learning_rate": 1.3150000000000001e-05, "loss": 1.233, "step": 263 }, { "epoch": 0.06611570247933884, "grad_norm": 0.3359375, "learning_rate": 1.3200000000000002e-05, "loss": 1.2532, "step": 264 }, { "epoch": 0.06636614074630603, "grad_norm": 0.3515625, "learning_rate": 1.325e-05, "loss": 1.2647, "step": 265 }, { "epoch": 0.06661657901327322, "grad_norm": 0.33203125, "learning_rate": 1.3300000000000001e-05, "loss": 1.2861, "step": 266 }, { "epoch": 0.06686701728024042, "grad_norm": 0.349609375, "learning_rate": 1.3350000000000001e-05, "loss": 1.2765, "step": 267 }, { "epoch": 0.06711745554720762, "grad_norm": 0.32421875, "learning_rate": 1.3400000000000002e-05, "loss": 1.1401, "step": 268 }, { "epoch": 0.06736789381417481, "grad_norm": 0.33984375, "learning_rate": 1.3450000000000002e-05, "loss": 1.2825, "step": 269 }, { "epoch": 0.067618332081142, "grad_norm": 0.353515625, "learning_rate": 1.3500000000000001e-05, "loss": 1.3494, "step": 270 }, { "epoch": 0.06786877034810919, "grad_norm": 0.388671875, "learning_rate": 1.355e-05, "loss": 1.2624, "step": 271 }, { "epoch": 0.06811920861507638, "grad_norm": 0.298828125, "learning_rate": 1.3600000000000002e-05, "loss": 1.2121, "step": 272 }, { "epoch": 0.06836964688204357, "grad_norm": 0.3203125, "learning_rate": 1.3650000000000001e-05, "loss": 1.0952, "step": 273 }, { "epoch": 0.06862008514901077, "grad_norm": 0.3515625, "learning_rate": 1.3700000000000003e-05, "loss": 1.2283, "step": 274 }, { "epoch": 0.06887052341597796, "grad_norm": 0.3203125, "learning_rate": 1.375e-05, "loss": 1.2618, "step": 275 }, { "epoch": 0.06912096168294515, "grad_norm": 0.310546875, "learning_rate": 1.38e-05, "loss": 1.1591, "step": 276 }, { "epoch": 0.06937139994991234, "grad_norm": 0.34375, "learning_rate": 1.3850000000000001e-05, "loss": 1.212, "step": 277 }, { "epoch": 0.06962183821687953, "grad_norm": 0.2890625, "learning_rate": 1.39e-05, "loss": 1.1612, "step": 278 }, { "epoch": 0.06987227648384674, "grad_norm": 0.32421875, "learning_rate": 1.3950000000000002e-05, "loss": 1.5406, "step": 279 }, { "epoch": 0.07012271475081393, "grad_norm": 0.3203125, "learning_rate": 1.4e-05, "loss": 1.3426, "step": 280 }, { "epoch": 0.07037315301778112, "grad_norm": 0.341796875, "learning_rate": 1.4050000000000001e-05, "loss": 1.0761, "step": 281 }, { "epoch": 0.07062359128474831, "grad_norm": 0.326171875, "learning_rate": 1.41e-05, "loss": 1.1788, "step": 282 }, { "epoch": 0.0708740295517155, "grad_norm": 0.28125, "learning_rate": 1.4150000000000002e-05, "loss": 1.24, "step": 283 }, { "epoch": 0.07112446781868269, "grad_norm": 0.314453125, "learning_rate": 1.4200000000000001e-05, "loss": 1.1792, "step": 284 }, { "epoch": 0.07137490608564989, "grad_norm": 0.302734375, "learning_rate": 1.425e-05, "loss": 1.0511, "step": 285 }, { "epoch": 0.07162534435261708, "grad_norm": 0.36328125, "learning_rate": 1.43e-05, "loss": 1.313, "step": 286 }, { "epoch": 0.07187578261958427, "grad_norm": 0.310546875, "learning_rate": 1.4350000000000002e-05, "loss": 1.1684, "step": 287 }, { "epoch": 0.07212622088655146, "grad_norm": 0.330078125, "learning_rate": 1.4400000000000001e-05, "loss": 1.3686, "step": 288 }, { "epoch": 0.07237665915351865, "grad_norm": 0.259765625, "learning_rate": 1.4450000000000002e-05, "loss": 1.2378, "step": 289 }, { "epoch": 0.07262709742048586, "grad_norm": 0.296875, "learning_rate": 1.45e-05, "loss": 1.1923, "step": 290 }, { "epoch": 0.07287753568745305, "grad_norm": 0.32421875, "learning_rate": 1.4550000000000001e-05, "loss": 1.2063, "step": 291 }, { "epoch": 0.07312797395442024, "grad_norm": 0.31640625, "learning_rate": 1.46e-05, "loss": 1.297, "step": 292 }, { "epoch": 0.07337841222138743, "grad_norm": 0.28515625, "learning_rate": 1.4650000000000002e-05, "loss": 1.3142, "step": 293 }, { "epoch": 0.07362885048835462, "grad_norm": 0.306640625, "learning_rate": 1.4700000000000002e-05, "loss": 1.0765, "step": 294 }, { "epoch": 0.0738792887553218, "grad_norm": 0.322265625, "learning_rate": 1.4750000000000003e-05, "loss": 1.268, "step": 295 }, { "epoch": 0.07412972702228901, "grad_norm": 0.314453125, "learning_rate": 1.48e-05, "loss": 1.1618, "step": 296 }, { "epoch": 0.0743801652892562, "grad_norm": 0.310546875, "learning_rate": 1.4850000000000002e-05, "loss": 1.2755, "step": 297 }, { "epoch": 0.07463060355622339, "grad_norm": 0.32421875, "learning_rate": 1.4900000000000001e-05, "loss": 1.1014, "step": 298 }, { "epoch": 0.07488104182319058, "grad_norm": 0.296875, "learning_rate": 1.4950000000000003e-05, "loss": 1.2675, "step": 299 }, { "epoch": 0.07513148009015777, "grad_norm": 0.29296875, "learning_rate": 1.5000000000000002e-05, "loss": 1.1908, "step": 300 }, { "epoch": 0.07538191835712497, "grad_norm": 0.30859375, "learning_rate": 1.505e-05, "loss": 1.3468, "step": 301 }, { "epoch": 0.07563235662409216, "grad_norm": 0.255859375, "learning_rate": 1.5100000000000001e-05, "loss": 1.1874, "step": 302 }, { "epoch": 0.07588279489105935, "grad_norm": 0.287109375, "learning_rate": 1.515e-05, "loss": 1.1282, "step": 303 }, { "epoch": 0.07613323315802654, "grad_norm": 0.2470703125, "learning_rate": 1.5200000000000002e-05, "loss": 1.1449, "step": 304 }, { "epoch": 0.07638367142499373, "grad_norm": 0.283203125, "learning_rate": 1.525e-05, "loss": 1.1478, "step": 305 }, { "epoch": 0.07663410969196092, "grad_norm": 0.291015625, "learning_rate": 1.5300000000000003e-05, "loss": 1.1775, "step": 306 }, { "epoch": 0.07688454795892813, "grad_norm": 0.296875, "learning_rate": 1.535e-05, "loss": 1.1218, "step": 307 }, { "epoch": 0.07713498622589532, "grad_norm": 0.29296875, "learning_rate": 1.54e-05, "loss": 1.169, "step": 308 }, { "epoch": 0.07738542449286251, "grad_norm": 0.2890625, "learning_rate": 1.545e-05, "loss": 1.2762, "step": 309 }, { "epoch": 0.0776358627598297, "grad_norm": 0.306640625, "learning_rate": 1.55e-05, "loss": 1.1612, "step": 310 }, { "epoch": 0.07788630102679689, "grad_norm": 0.29296875, "learning_rate": 1.5550000000000002e-05, "loss": 1.0171, "step": 311 }, { "epoch": 0.0781367392937641, "grad_norm": 0.330078125, "learning_rate": 1.5600000000000003e-05, "loss": 1.0806, "step": 312 }, { "epoch": 0.07838717756073128, "grad_norm": 0.28515625, "learning_rate": 1.565e-05, "loss": 1.2184, "step": 313 }, { "epoch": 0.07863761582769847, "grad_norm": 0.29296875, "learning_rate": 1.5700000000000002e-05, "loss": 1.2608, "step": 314 }, { "epoch": 0.07888805409466566, "grad_norm": 0.2890625, "learning_rate": 1.575e-05, "loss": 1.314, "step": 315 }, { "epoch": 0.07913849236163285, "grad_norm": 0.314453125, "learning_rate": 1.58e-05, "loss": 1.1296, "step": 316 }, { "epoch": 0.07938893062860004, "grad_norm": 0.26953125, "learning_rate": 1.5850000000000002e-05, "loss": 1.1529, "step": 317 }, { "epoch": 0.07963936889556725, "grad_norm": 0.2490234375, "learning_rate": 1.5900000000000004e-05, "loss": 1.3941, "step": 318 }, { "epoch": 0.07988980716253444, "grad_norm": 0.306640625, "learning_rate": 1.595e-05, "loss": 1.299, "step": 319 }, { "epoch": 0.08014024542950163, "grad_norm": 0.306640625, "learning_rate": 1.6000000000000003e-05, "loss": 1.2125, "step": 320 }, { "epoch": 0.08039068369646882, "grad_norm": 0.298828125, "learning_rate": 1.605e-05, "loss": 1.1539, "step": 321 }, { "epoch": 0.08064112196343601, "grad_norm": 0.30859375, "learning_rate": 1.6100000000000002e-05, "loss": 1.1055, "step": 322 }, { "epoch": 0.08089156023040321, "grad_norm": 0.291015625, "learning_rate": 1.6150000000000003e-05, "loss": 1.3407, "step": 323 }, { "epoch": 0.0811419984973704, "grad_norm": 1.5234375, "learning_rate": 1.62e-05, "loss": 1.1419, "step": 324 }, { "epoch": 0.08139243676433759, "grad_norm": 0.27734375, "learning_rate": 1.6250000000000002e-05, "loss": 1.0906, "step": 325 }, { "epoch": 0.08164287503130478, "grad_norm": 0.28515625, "learning_rate": 1.63e-05, "loss": 1.2274, "step": 326 }, { "epoch": 0.08189331329827197, "grad_norm": 0.29296875, "learning_rate": 1.635e-05, "loss": 1.2654, "step": 327 }, { "epoch": 0.08214375156523916, "grad_norm": 0.2578125, "learning_rate": 1.64e-05, "loss": 1.2445, "step": 328 }, { "epoch": 0.08239418983220637, "grad_norm": 0.27734375, "learning_rate": 1.645e-05, "loss": 1.1601, "step": 329 }, { "epoch": 0.08264462809917356, "grad_norm": 0.310546875, "learning_rate": 1.65e-05, "loss": 1.1888, "step": 330 }, { "epoch": 0.08289506636614075, "grad_norm": 0.26953125, "learning_rate": 1.6550000000000002e-05, "loss": 1.1914, "step": 331 }, { "epoch": 0.08314550463310794, "grad_norm": 0.265625, "learning_rate": 1.66e-05, "loss": 1.1837, "step": 332 }, { "epoch": 0.08339594290007513, "grad_norm": 0.29296875, "learning_rate": 1.665e-05, "loss": 1.3109, "step": 333 }, { "epoch": 0.08364638116704233, "grad_norm": 0.291015625, "learning_rate": 1.67e-05, "loss": 1.1398, "step": 334 }, { "epoch": 0.08389681943400952, "grad_norm": 0.26953125, "learning_rate": 1.675e-05, "loss": 1.2746, "step": 335 }, { "epoch": 0.08414725770097671, "grad_norm": 0.2412109375, "learning_rate": 1.6800000000000002e-05, "loss": 1.0232, "step": 336 }, { "epoch": 0.0843976959679439, "grad_norm": 0.2890625, "learning_rate": 1.6850000000000003e-05, "loss": 1.2223, "step": 337 }, { "epoch": 0.08464813423491109, "grad_norm": 0.28515625, "learning_rate": 1.69e-05, "loss": 1.0253, "step": 338 }, { "epoch": 0.08489857250187828, "grad_norm": 0.279296875, "learning_rate": 1.6950000000000002e-05, "loss": 1.042, "step": 339 }, { "epoch": 0.08514901076884548, "grad_norm": 0.28125, "learning_rate": 1.7e-05, "loss": 1.1847, "step": 340 }, { "epoch": 0.08539944903581267, "grad_norm": 0.255859375, "learning_rate": 1.705e-05, "loss": 1.0018, "step": 341 }, { "epoch": 0.08564988730277986, "grad_norm": 0.287109375, "learning_rate": 1.7100000000000002e-05, "loss": 1.1065, "step": 342 }, { "epoch": 0.08590032556974705, "grad_norm": 0.2578125, "learning_rate": 1.7150000000000004e-05, "loss": 1.3019, "step": 343 }, { "epoch": 0.08615076383671424, "grad_norm": 0.251953125, "learning_rate": 1.72e-05, "loss": 1.1407, "step": 344 }, { "epoch": 0.08640120210368145, "grad_norm": 0.255859375, "learning_rate": 1.7250000000000003e-05, "loss": 1.0926, "step": 345 }, { "epoch": 0.08665164037064864, "grad_norm": 0.263671875, "learning_rate": 1.73e-05, "loss": 1.1833, "step": 346 }, { "epoch": 0.08690207863761583, "grad_norm": 0.275390625, "learning_rate": 1.735e-05, "loss": 1.1476, "step": 347 }, { "epoch": 0.08715251690458302, "grad_norm": 0.255859375, "learning_rate": 1.7400000000000003e-05, "loss": 1.1914, "step": 348 }, { "epoch": 0.08740295517155021, "grad_norm": 0.2412109375, "learning_rate": 1.7450000000000004e-05, "loss": 1.0701, "step": 349 }, { "epoch": 0.0876533934385174, "grad_norm": 0.330078125, "learning_rate": 1.7500000000000002e-05, "loss": 1.1362, "step": 350 }, { "epoch": 0.0879038317054846, "grad_norm": 0.251953125, "learning_rate": 1.755e-05, "loss": 1.1544, "step": 351 }, { "epoch": 0.0881542699724518, "grad_norm": 0.294921875, "learning_rate": 1.76e-05, "loss": 1.1275, "step": 352 }, { "epoch": 0.08840470823941898, "grad_norm": 0.263671875, "learning_rate": 1.7650000000000002e-05, "loss": 1.1286, "step": 353 }, { "epoch": 0.08865514650638617, "grad_norm": 0.26171875, "learning_rate": 1.77e-05, "loss": 1.1947, "step": 354 }, { "epoch": 0.08890558477335336, "grad_norm": 0.263671875, "learning_rate": 1.775e-05, "loss": 1.1807, "step": 355 }, { "epoch": 0.08915602304032057, "grad_norm": 0.28515625, "learning_rate": 1.7800000000000002e-05, "loss": 1.0152, "step": 356 }, { "epoch": 0.08940646130728776, "grad_norm": 0.302734375, "learning_rate": 1.785e-05, "loss": 1.14, "step": 357 }, { "epoch": 0.08965689957425495, "grad_norm": 0.30859375, "learning_rate": 1.79e-05, "loss": 1.2637, "step": 358 }, { "epoch": 0.08990733784122214, "grad_norm": 0.271484375, "learning_rate": 1.795e-05, "loss": 1.0773, "step": 359 }, { "epoch": 0.09015777610818933, "grad_norm": 0.275390625, "learning_rate": 1.8e-05, "loss": 1.1536, "step": 360 }, { "epoch": 0.09040821437515652, "grad_norm": 0.263671875, "learning_rate": 1.805e-05, "loss": 0.977, "step": 361 }, { "epoch": 0.09065865264212372, "grad_norm": 0.283203125, "learning_rate": 1.8100000000000003e-05, "loss": 0.9669, "step": 362 }, { "epoch": 0.09090909090909091, "grad_norm": 0.271484375, "learning_rate": 1.815e-05, "loss": 1.1432, "step": 363 }, { "epoch": 0.0911595291760581, "grad_norm": 0.326171875, "learning_rate": 1.8200000000000002e-05, "loss": 1.2392, "step": 364 }, { "epoch": 0.09140996744302529, "grad_norm": 0.27734375, "learning_rate": 1.825e-05, "loss": 1.2296, "step": 365 }, { "epoch": 0.09166040570999248, "grad_norm": 0.26171875, "learning_rate": 1.83e-05, "loss": 1.2551, "step": 366 }, { "epoch": 0.09191084397695969, "grad_norm": 0.29296875, "learning_rate": 1.8350000000000002e-05, "loss": 0.8862, "step": 367 }, { "epoch": 0.09216128224392688, "grad_norm": 0.279296875, "learning_rate": 1.8400000000000003e-05, "loss": 1.2057, "step": 368 }, { "epoch": 0.09241172051089407, "grad_norm": 0.283203125, "learning_rate": 1.845e-05, "loss": 1.0378, "step": 369 }, { "epoch": 0.09266215877786126, "grad_norm": 0.2890625, "learning_rate": 1.8500000000000002e-05, "loss": 1.0924, "step": 370 }, { "epoch": 0.09291259704482845, "grad_norm": 0.2373046875, "learning_rate": 1.855e-05, "loss": 1.0761, "step": 371 }, { "epoch": 0.09316303531179564, "grad_norm": 0.279296875, "learning_rate": 1.86e-05, "loss": 1.2226, "step": 372 }, { "epoch": 0.09341347357876284, "grad_norm": 0.267578125, "learning_rate": 1.8650000000000003e-05, "loss": 1.1988, "step": 373 }, { "epoch": 0.09366391184573003, "grad_norm": 0.279296875, "learning_rate": 1.8700000000000004e-05, "loss": 1.1499, "step": 374 }, { "epoch": 0.09391435011269722, "grad_norm": 0.26953125, "learning_rate": 1.8750000000000002e-05, "loss": 1.1108, "step": 375 }, { "epoch": 0.09416478837966441, "grad_norm": 0.291015625, "learning_rate": 1.88e-05, "loss": 1.2425, "step": 376 }, { "epoch": 0.0944152266466316, "grad_norm": 0.26171875, "learning_rate": 1.885e-05, "loss": 1.0279, "step": 377 }, { "epoch": 0.0946656649135988, "grad_norm": 0.259765625, "learning_rate": 1.8900000000000002e-05, "loss": 1.1091, "step": 378 }, { "epoch": 0.094916103180566, "grad_norm": 0.283203125, "learning_rate": 1.8950000000000003e-05, "loss": 1.1083, "step": 379 }, { "epoch": 0.09516654144753318, "grad_norm": 0.279296875, "learning_rate": 1.9e-05, "loss": 1.1652, "step": 380 }, { "epoch": 0.09541697971450037, "grad_norm": 0.26171875, "learning_rate": 1.9050000000000002e-05, "loss": 1.004, "step": 381 }, { "epoch": 0.09566741798146756, "grad_norm": 0.283203125, "learning_rate": 1.91e-05, "loss": 0.8989, "step": 382 }, { "epoch": 0.09591785624843475, "grad_norm": 0.294921875, "learning_rate": 1.915e-05, "loss": 1.1089, "step": 383 }, { "epoch": 0.09616829451540196, "grad_norm": 0.267578125, "learning_rate": 1.9200000000000003e-05, "loss": 1.0637, "step": 384 }, { "epoch": 0.09641873278236915, "grad_norm": 0.30078125, "learning_rate": 1.925e-05, "loss": 1.0835, "step": 385 }, { "epoch": 0.09666917104933634, "grad_norm": 0.267578125, "learning_rate": 1.93e-05, "loss": 1.0866, "step": 386 }, { "epoch": 0.09691960931630353, "grad_norm": 0.328125, "learning_rate": 1.9350000000000003e-05, "loss": 1.2532, "step": 387 }, { "epoch": 0.09717004758327072, "grad_norm": 0.291015625, "learning_rate": 1.94e-05, "loss": 1.0902, "step": 388 }, { "epoch": 0.09742048585023792, "grad_norm": 0.310546875, "learning_rate": 1.9450000000000002e-05, "loss": 1.1377, "step": 389 }, { "epoch": 0.09767092411720511, "grad_norm": 0.2470703125, "learning_rate": 1.95e-05, "loss": 1.0391, "step": 390 }, { "epoch": 0.0979213623841723, "grad_norm": 0.298828125, "learning_rate": 1.955e-05, "loss": 1.1113, "step": 391 }, { "epoch": 0.09817180065113949, "grad_norm": 0.30859375, "learning_rate": 1.9600000000000002e-05, "loss": 1.1305, "step": 392 }, { "epoch": 0.09842223891810668, "grad_norm": 0.25390625, "learning_rate": 1.9650000000000003e-05, "loss": 1.1978, "step": 393 }, { "epoch": 0.09867267718507387, "grad_norm": 0.267578125, "learning_rate": 1.97e-05, "loss": 1.1215, "step": 394 }, { "epoch": 0.09892311545204108, "grad_norm": 0.3203125, "learning_rate": 1.9750000000000002e-05, "loss": 1.116, "step": 395 }, { "epoch": 0.09917355371900827, "grad_norm": 0.26953125, "learning_rate": 1.98e-05, "loss": 1.088, "step": 396 }, { "epoch": 0.09942399198597546, "grad_norm": 0.251953125, "learning_rate": 1.985e-05, "loss": 1.1486, "step": 397 }, { "epoch": 0.09967443025294265, "grad_norm": 0.271484375, "learning_rate": 1.9900000000000003e-05, "loss": 1.0245, "step": 398 }, { "epoch": 0.09992486851990984, "grad_norm": 0.302734375, "learning_rate": 1.9950000000000004e-05, "loss": 1.159, "step": 399 }, { "epoch": 0.10017530678687704, "grad_norm": 0.27734375, "learning_rate": 2e-05, "loss": 1.0002, "step": 400 }, { "epoch": 0.10042574505384423, "grad_norm": 0.263671875, "learning_rate": 1.9994433620929587e-05, "loss": 1.2235, "step": 401 }, { "epoch": 0.10067618332081142, "grad_norm": 0.30078125, "learning_rate": 1.998886724185917e-05, "loss": 1.0381, "step": 402 }, { "epoch": 0.10092662158777861, "grad_norm": 0.29296875, "learning_rate": 1.998330086278876e-05, "loss": 1.135, "step": 403 }, { "epoch": 0.1011770598547458, "grad_norm": 0.296875, "learning_rate": 1.9977734483718344e-05, "loss": 1.1246, "step": 404 }, { "epoch": 0.10142749812171299, "grad_norm": 0.3125, "learning_rate": 1.997216810464793e-05, "loss": 1.0769, "step": 405 }, { "epoch": 0.1016779363886802, "grad_norm": 0.28515625, "learning_rate": 1.9966601725577512e-05, "loss": 1.0503, "step": 406 }, { "epoch": 0.10192837465564739, "grad_norm": 0.287109375, "learning_rate": 1.9961035346507098e-05, "loss": 1.0016, "step": 407 }, { "epoch": 0.10217881292261458, "grad_norm": 0.28125, "learning_rate": 1.9955468967436683e-05, "loss": 1.1766, "step": 408 }, { "epoch": 0.10242925118958177, "grad_norm": 0.294921875, "learning_rate": 1.994990258836627e-05, "loss": 1.2507, "step": 409 }, { "epoch": 0.10267968945654896, "grad_norm": 0.25390625, "learning_rate": 1.9944336209295854e-05, "loss": 1.0092, "step": 410 }, { "epoch": 0.10293012772351616, "grad_norm": 0.458984375, "learning_rate": 1.993876983022544e-05, "loss": 1.0471, "step": 411 }, { "epoch": 0.10318056599048335, "grad_norm": 0.306640625, "learning_rate": 1.9933203451155026e-05, "loss": 1.0948, "step": 412 }, { "epoch": 0.10343100425745054, "grad_norm": 0.28125, "learning_rate": 1.992763707208461e-05, "loss": 1.0587, "step": 413 }, { "epoch": 0.10368144252441773, "grad_norm": 0.294921875, "learning_rate": 1.9922070693014193e-05, "loss": 1.1121, "step": 414 }, { "epoch": 0.10393188079138492, "grad_norm": 0.296875, "learning_rate": 1.9916504313943782e-05, "loss": 1.1454, "step": 415 }, { "epoch": 0.10418231905835211, "grad_norm": 0.3046875, "learning_rate": 1.9910937934873368e-05, "loss": 1.0643, "step": 416 }, { "epoch": 0.10443275732531931, "grad_norm": 0.298828125, "learning_rate": 1.9905371555802954e-05, "loss": 0.9754, "step": 417 }, { "epoch": 0.1046831955922865, "grad_norm": 0.296875, "learning_rate": 1.9899805176732536e-05, "loss": 1.0767, "step": 418 }, { "epoch": 0.1049336338592537, "grad_norm": 0.3359375, "learning_rate": 1.989423879766212e-05, "loss": 0.947, "step": 419 }, { "epoch": 0.10518407212622088, "grad_norm": 0.275390625, "learning_rate": 1.9888672418591707e-05, "loss": 0.9716, "step": 420 }, { "epoch": 0.10543451039318807, "grad_norm": 0.322265625, "learning_rate": 1.9883106039521293e-05, "loss": 1.1109, "step": 421 }, { "epoch": 0.10568494866015528, "grad_norm": 0.291015625, "learning_rate": 1.987753966045088e-05, "loss": 1.0845, "step": 422 }, { "epoch": 0.10593538692712247, "grad_norm": 0.32421875, "learning_rate": 1.9871973281380464e-05, "loss": 1.1917, "step": 423 }, { "epoch": 0.10618582519408966, "grad_norm": 0.244140625, "learning_rate": 1.986640690231005e-05, "loss": 0.7137, "step": 424 }, { "epoch": 0.10643626346105685, "grad_norm": 0.296875, "learning_rate": 1.9860840523239635e-05, "loss": 1.0346, "step": 425 }, { "epoch": 0.10668670172802404, "grad_norm": 0.291015625, "learning_rate": 1.9855274144169217e-05, "loss": 1.3177, "step": 426 }, { "epoch": 0.10693713999499123, "grad_norm": 0.3125, "learning_rate": 1.9849707765098803e-05, "loss": 0.8904, "step": 427 }, { "epoch": 0.10718757826195843, "grad_norm": 0.328125, "learning_rate": 1.9844141386028392e-05, "loss": 0.9826, "step": 428 }, { "epoch": 0.10743801652892562, "grad_norm": 0.291015625, "learning_rate": 1.9838575006957978e-05, "loss": 1.0833, "step": 429 }, { "epoch": 0.10768845479589281, "grad_norm": 0.302734375, "learning_rate": 1.983300862788756e-05, "loss": 1.0393, "step": 430 }, { "epoch": 0.10793889306286, "grad_norm": 0.359375, "learning_rate": 1.9827442248817145e-05, "loss": 1.0097, "step": 431 }, { "epoch": 0.10818933132982719, "grad_norm": 0.34765625, "learning_rate": 1.982187586974673e-05, "loss": 1.0794, "step": 432 }, { "epoch": 0.1084397695967944, "grad_norm": 0.361328125, "learning_rate": 1.9816309490676317e-05, "loss": 1.0996, "step": 433 }, { "epoch": 0.10869020786376159, "grad_norm": 0.337890625, "learning_rate": 1.9810743111605902e-05, "loss": 0.9159, "step": 434 }, { "epoch": 0.10894064613072878, "grad_norm": 0.32421875, "learning_rate": 1.9805176732535488e-05, "loss": 1.0912, "step": 435 }, { "epoch": 0.10919108439769597, "grad_norm": 0.35546875, "learning_rate": 1.9799610353465073e-05, "loss": 0.9204, "step": 436 }, { "epoch": 0.10944152266466316, "grad_norm": 0.373046875, "learning_rate": 1.979404397439466e-05, "loss": 1.0878, "step": 437 }, { "epoch": 0.10969196093163035, "grad_norm": 0.35546875, "learning_rate": 1.978847759532424e-05, "loss": 0.9889, "step": 438 }, { "epoch": 0.10994239919859755, "grad_norm": 0.5234375, "learning_rate": 1.9782911216253827e-05, "loss": 1.1244, "step": 439 }, { "epoch": 0.11019283746556474, "grad_norm": 0.3828125, "learning_rate": 1.9777344837183416e-05, "loss": 1.0136, "step": 440 }, { "epoch": 0.11044327573253193, "grad_norm": 0.375, "learning_rate": 1.9771778458113e-05, "loss": 1.0161, "step": 441 }, { "epoch": 0.11069371399949912, "grad_norm": 0.51953125, "learning_rate": 1.9766212079042584e-05, "loss": 1.0623, "step": 442 }, { "epoch": 0.11094415226646631, "grad_norm": 0.34375, "learning_rate": 1.976064569997217e-05, "loss": 1.0936, "step": 443 }, { "epoch": 0.11119459053343352, "grad_norm": 0.353515625, "learning_rate": 1.9755079320901755e-05, "loss": 1.135, "step": 444 }, { "epoch": 0.1114450288004007, "grad_norm": 0.349609375, "learning_rate": 1.974951294183134e-05, "loss": 1.0762, "step": 445 }, { "epoch": 0.1116954670673679, "grad_norm": 0.359375, "learning_rate": 1.9743946562760926e-05, "loss": 1.2092, "step": 446 }, { "epoch": 0.11194590533433509, "grad_norm": 0.359375, "learning_rate": 1.9738380183690512e-05, "loss": 1.0083, "step": 447 }, { "epoch": 0.11219634360130228, "grad_norm": 0.75, "learning_rate": 1.9732813804620097e-05, "loss": 1.0854, "step": 448 }, { "epoch": 0.11244678186826947, "grad_norm": 0.4453125, "learning_rate": 1.9727247425549683e-05, "loss": 1.1572, "step": 449 }, { "epoch": 0.11269722013523667, "grad_norm": 0.369140625, "learning_rate": 1.9721681046479265e-05, "loss": 1.0968, "step": 450 }, { "epoch": 0.11294765840220386, "grad_norm": 0.369140625, "learning_rate": 1.971611466740885e-05, "loss": 1.0227, "step": 451 }, { "epoch": 0.11319809666917105, "grad_norm": 0.310546875, "learning_rate": 1.9710548288338437e-05, "loss": 1.0214, "step": 452 }, { "epoch": 0.11344853493613824, "grad_norm": 0.349609375, "learning_rate": 1.9704981909268022e-05, "loss": 1.1454, "step": 453 }, { "epoch": 0.11369897320310543, "grad_norm": 0.326171875, "learning_rate": 1.9699415530197608e-05, "loss": 1.2042, "step": 454 }, { "epoch": 0.11394941147007262, "grad_norm": 0.375, "learning_rate": 1.9693849151127193e-05, "loss": 1.0874, "step": 455 }, { "epoch": 0.11419984973703982, "grad_norm": 0.291015625, "learning_rate": 1.968828277205678e-05, "loss": 1.0979, "step": 456 }, { "epoch": 0.11445028800400701, "grad_norm": 0.2890625, "learning_rate": 1.9682716392986365e-05, "loss": 1.1072, "step": 457 }, { "epoch": 0.1147007262709742, "grad_norm": 0.30078125, "learning_rate": 1.967715001391595e-05, "loss": 1.0576, "step": 458 }, { "epoch": 0.1149511645379414, "grad_norm": 0.28515625, "learning_rate": 1.9671583634845536e-05, "loss": 1.1233, "step": 459 }, { "epoch": 0.11520160280490858, "grad_norm": 0.314453125, "learning_rate": 1.966601725577512e-05, "loss": 1.09, "step": 460 }, { "epoch": 0.11545204107187579, "grad_norm": 0.32421875, "learning_rate": 1.9660450876704704e-05, "loss": 0.9255, "step": 461 }, { "epoch": 0.11570247933884298, "grad_norm": 0.314453125, "learning_rate": 1.965488449763429e-05, "loss": 0.9055, "step": 462 }, { "epoch": 0.11595291760581017, "grad_norm": 0.283203125, "learning_rate": 1.9649318118563875e-05, "loss": 0.9437, "step": 463 }, { "epoch": 0.11620335587277736, "grad_norm": 0.34375, "learning_rate": 1.964375173949346e-05, "loss": 0.876, "step": 464 }, { "epoch": 0.11645379413974455, "grad_norm": 0.3203125, "learning_rate": 1.9638185360423046e-05, "loss": 1.0207, "step": 465 }, { "epoch": 0.11670423240671174, "grad_norm": 0.29296875, "learning_rate": 1.963261898135263e-05, "loss": 1.144, "step": 466 }, { "epoch": 0.11695467067367894, "grad_norm": 0.3125, "learning_rate": 1.9627052602282217e-05, "loss": 1.0025, "step": 467 }, { "epoch": 0.11720510894064613, "grad_norm": 0.3046875, "learning_rate": 1.9621486223211803e-05, "loss": 1.128, "step": 468 }, { "epoch": 0.11745554720761332, "grad_norm": 0.322265625, "learning_rate": 1.9615919844141385e-05, "loss": 1.0245, "step": 469 }, { "epoch": 0.11770598547458051, "grad_norm": 0.296875, "learning_rate": 1.9610353465070974e-05, "loss": 0.9247, "step": 470 }, { "epoch": 0.1179564237415477, "grad_norm": 0.37109375, "learning_rate": 1.960478708600056e-05, "loss": 1.2011, "step": 471 }, { "epoch": 0.1182068620085149, "grad_norm": 0.38671875, "learning_rate": 1.9599220706930145e-05, "loss": 0.8678, "step": 472 }, { "epoch": 0.1184573002754821, "grad_norm": 0.310546875, "learning_rate": 1.9593654327859728e-05, "loss": 0.8983, "step": 473 }, { "epoch": 0.11870773854244929, "grad_norm": 0.30078125, "learning_rate": 1.9588087948789313e-05, "loss": 0.9698, "step": 474 }, { "epoch": 0.11895817680941648, "grad_norm": 0.287109375, "learning_rate": 1.95825215697189e-05, "loss": 0.9817, "step": 475 }, { "epoch": 0.11920861507638367, "grad_norm": 0.3359375, "learning_rate": 1.9576955190648484e-05, "loss": 0.9804, "step": 476 }, { "epoch": 0.11945905334335086, "grad_norm": 0.33203125, "learning_rate": 1.957138881157807e-05, "loss": 1.0696, "step": 477 }, { "epoch": 0.11970949161031806, "grad_norm": 0.3046875, "learning_rate": 1.9565822432507656e-05, "loss": 1.2428, "step": 478 }, { "epoch": 0.11995992987728525, "grad_norm": 0.302734375, "learning_rate": 1.956025605343724e-05, "loss": 1.0046, "step": 479 }, { "epoch": 0.12021036814425244, "grad_norm": 0.275390625, "learning_rate": 1.9554689674366827e-05, "loss": 1.0905, "step": 480 }, { "epoch": 0.12046080641121963, "grad_norm": 0.30859375, "learning_rate": 1.954912329529641e-05, "loss": 0.9786, "step": 481 }, { "epoch": 0.12071124467818682, "grad_norm": 0.333984375, "learning_rate": 1.9543556916225995e-05, "loss": 1.1945, "step": 482 }, { "epoch": 0.12096168294515403, "grad_norm": 0.33984375, "learning_rate": 1.9537990537155584e-05, "loss": 1.009, "step": 483 }, { "epoch": 0.12121212121212122, "grad_norm": 0.3203125, "learning_rate": 1.953242415808517e-05, "loss": 1.1949, "step": 484 }, { "epoch": 0.1214625594790884, "grad_norm": 0.283203125, "learning_rate": 1.952685777901475e-05, "loss": 0.9794, "step": 485 }, { "epoch": 0.1217129977460556, "grad_norm": 0.3125, "learning_rate": 1.9521291399944337e-05, "loss": 0.9214, "step": 486 }, { "epoch": 0.12196343601302279, "grad_norm": 0.3203125, "learning_rate": 1.9515725020873923e-05, "loss": 1.0397, "step": 487 }, { "epoch": 0.12221387427998998, "grad_norm": 0.3125, "learning_rate": 1.951015864180351e-05, "loss": 0.9602, "step": 488 }, { "epoch": 0.12246431254695718, "grad_norm": 0.361328125, "learning_rate": 1.9504592262733094e-05, "loss": 1.0122, "step": 489 }, { "epoch": 0.12271475081392437, "grad_norm": 0.310546875, "learning_rate": 1.949902588366268e-05, "loss": 1.099, "step": 490 }, { "epoch": 0.12296518908089156, "grad_norm": 0.326171875, "learning_rate": 1.9493459504592265e-05, "loss": 0.9456, "step": 491 }, { "epoch": 0.12321562734785875, "grad_norm": 0.310546875, "learning_rate": 1.948789312552185e-05, "loss": 0.9652, "step": 492 }, { "epoch": 0.12346606561482594, "grad_norm": 0.296875, "learning_rate": 1.9482326746451433e-05, "loss": 1.053, "step": 493 }, { "epoch": 0.12371650388179314, "grad_norm": 0.326171875, "learning_rate": 1.947676036738102e-05, "loss": 0.9166, "step": 494 }, { "epoch": 0.12396694214876033, "grad_norm": 0.296875, "learning_rate": 1.9471193988310608e-05, "loss": 1.0186, "step": 495 }, { "epoch": 0.12421738041572752, "grad_norm": 0.34375, "learning_rate": 1.9465627609240193e-05, "loss": 0.8838, "step": 496 }, { "epoch": 0.12446781868269471, "grad_norm": 0.322265625, "learning_rate": 1.9460061230169775e-05, "loss": 0.9656, "step": 497 }, { "epoch": 0.1247182569496619, "grad_norm": 0.35546875, "learning_rate": 1.945449485109936e-05, "loss": 1.1317, "step": 498 }, { "epoch": 0.1249686952166291, "grad_norm": 0.28125, "learning_rate": 1.9448928472028947e-05, "loss": 0.9657, "step": 499 }, { "epoch": 0.12521913348359628, "grad_norm": 0.359375, "learning_rate": 1.9443362092958532e-05, "loss": 0.9795, "step": 500 }, { "epoch": 0.12546957175056347, "grad_norm": 0.330078125, "learning_rate": 1.9437795713888118e-05, "loss": 0.8711, "step": 501 }, { "epoch": 0.1257200100175307, "grad_norm": 0.306640625, "learning_rate": 1.9432229334817704e-05, "loss": 0.7678, "step": 502 }, { "epoch": 0.12597044828449788, "grad_norm": 0.29296875, "learning_rate": 1.942666295574729e-05, "loss": 1.0552, "step": 503 }, { "epoch": 0.12622088655146507, "grad_norm": 0.31640625, "learning_rate": 1.942109657667687e-05, "loss": 1.055, "step": 504 }, { "epoch": 0.12647132481843226, "grad_norm": 0.33984375, "learning_rate": 1.9415530197606457e-05, "loss": 1.0599, "step": 505 }, { "epoch": 0.12672176308539945, "grad_norm": 0.306640625, "learning_rate": 1.9409963818536043e-05, "loss": 1.136, "step": 506 }, { "epoch": 0.12697220135236664, "grad_norm": 0.341796875, "learning_rate": 1.9404397439465628e-05, "loss": 1.0732, "step": 507 }, { "epoch": 0.12722263961933383, "grad_norm": 0.400390625, "learning_rate": 1.9398831060395214e-05, "loss": 1.0602, "step": 508 }, { "epoch": 0.12747307788630102, "grad_norm": 0.32421875, "learning_rate": 1.93932646813248e-05, "loss": 0.9825, "step": 509 }, { "epoch": 0.1277235161532682, "grad_norm": 0.359375, "learning_rate": 1.9387698302254385e-05, "loss": 0.9899, "step": 510 }, { "epoch": 0.1279739544202354, "grad_norm": 0.328125, "learning_rate": 1.938213192318397e-05, "loss": 1.0287, "step": 511 }, { "epoch": 0.1282243926872026, "grad_norm": 0.34765625, "learning_rate": 1.9376565544113556e-05, "loss": 1.0829, "step": 512 }, { "epoch": 0.1284748309541698, "grad_norm": 0.3046875, "learning_rate": 1.9370999165043142e-05, "loss": 1.1531, "step": 513 }, { "epoch": 0.128725269221137, "grad_norm": 0.3203125, "learning_rate": 1.9365432785972727e-05, "loss": 1.0358, "step": 514 }, { "epoch": 0.1289757074881042, "grad_norm": 0.294921875, "learning_rate": 1.9359866406902313e-05, "loss": 0.9529, "step": 515 }, { "epoch": 0.12922614575507138, "grad_norm": 0.359375, "learning_rate": 1.9354300027831895e-05, "loss": 1.0311, "step": 516 }, { "epoch": 0.12947658402203857, "grad_norm": 0.3203125, "learning_rate": 1.934873364876148e-05, "loss": 0.9975, "step": 517 }, { "epoch": 0.12972702228900576, "grad_norm": 0.32421875, "learning_rate": 1.9343167269691067e-05, "loss": 1.0439, "step": 518 }, { "epoch": 0.12997746055597295, "grad_norm": 0.28515625, "learning_rate": 1.9337600890620652e-05, "loss": 1.1623, "step": 519 }, { "epoch": 0.13022789882294014, "grad_norm": 0.29296875, "learning_rate": 1.9332034511550238e-05, "loss": 0.9208, "step": 520 }, { "epoch": 0.13047833708990733, "grad_norm": 0.318359375, "learning_rate": 1.9326468132479823e-05, "loss": 1.1408, "step": 521 }, { "epoch": 0.13072877535687452, "grad_norm": 0.3046875, "learning_rate": 1.932090175340941e-05, "loss": 0.9739, "step": 522 }, { "epoch": 0.1309792136238417, "grad_norm": 0.2890625, "learning_rate": 1.9315335374338995e-05, "loss": 0.9601, "step": 523 }, { "epoch": 0.13122965189080893, "grad_norm": 0.326171875, "learning_rate": 1.9309768995268577e-05, "loss": 1.1541, "step": 524 }, { "epoch": 0.13148009015777612, "grad_norm": 0.32421875, "learning_rate": 1.9304202616198166e-05, "loss": 1.1307, "step": 525 }, { "epoch": 0.1317305284247433, "grad_norm": 0.31640625, "learning_rate": 1.929863623712775e-05, "loss": 0.9998, "step": 526 }, { "epoch": 0.1319809666917105, "grad_norm": 0.306640625, "learning_rate": 1.9293069858057337e-05, "loss": 1.1322, "step": 527 }, { "epoch": 0.1322314049586777, "grad_norm": 0.318359375, "learning_rate": 1.928750347898692e-05, "loss": 1.2062, "step": 528 }, { "epoch": 0.13248184322564488, "grad_norm": 0.314453125, "learning_rate": 1.9281937099916505e-05, "loss": 1.1107, "step": 529 }, { "epoch": 0.13273228149261207, "grad_norm": 0.330078125, "learning_rate": 1.927637072084609e-05, "loss": 1.1122, "step": 530 }, { "epoch": 0.13298271975957926, "grad_norm": 0.318359375, "learning_rate": 1.9270804341775676e-05, "loss": 0.975, "step": 531 }, { "epoch": 0.13323315802654645, "grad_norm": 0.326171875, "learning_rate": 1.9265237962705262e-05, "loss": 0.9778, "step": 532 }, { "epoch": 0.13348359629351364, "grad_norm": 0.298828125, "learning_rate": 1.9259671583634847e-05, "loss": 0.8548, "step": 533 }, { "epoch": 0.13373403456048083, "grad_norm": 0.306640625, "learning_rate": 1.9254105204564433e-05, "loss": 1.1151, "step": 534 }, { "epoch": 0.13398447282744805, "grad_norm": 0.328125, "learning_rate": 1.924853882549402e-05, "loss": 0.9979, "step": 535 }, { "epoch": 0.13423491109441524, "grad_norm": 0.30078125, "learning_rate": 1.92429724464236e-05, "loss": 0.908, "step": 536 }, { "epoch": 0.13448534936138243, "grad_norm": 0.361328125, "learning_rate": 1.923740606735319e-05, "loss": 1.0455, "step": 537 }, { "epoch": 0.13473578762834962, "grad_norm": 0.32421875, "learning_rate": 1.9231839688282775e-05, "loss": 1.0524, "step": 538 }, { "epoch": 0.1349862258953168, "grad_norm": 0.3203125, "learning_rate": 1.922627330921236e-05, "loss": 0.9407, "step": 539 }, { "epoch": 0.135236664162284, "grad_norm": 0.314453125, "learning_rate": 1.9220706930141943e-05, "loss": 0.9562, "step": 540 }, { "epoch": 0.1354871024292512, "grad_norm": 0.296875, "learning_rate": 1.921514055107153e-05, "loss": 1.0192, "step": 541 }, { "epoch": 0.13573754069621838, "grad_norm": 0.28125, "learning_rate": 1.9209574172001114e-05, "loss": 0.9695, "step": 542 }, { "epoch": 0.13598797896318557, "grad_norm": 0.318359375, "learning_rate": 1.92040077929307e-05, "loss": 0.8243, "step": 543 }, { "epoch": 0.13623841723015276, "grad_norm": 0.314453125, "learning_rate": 1.9198441413860286e-05, "loss": 1.0889, "step": 544 }, { "epoch": 0.13648885549711995, "grad_norm": 0.2373046875, "learning_rate": 1.919287503478987e-05, "loss": 0.868, "step": 545 }, { "epoch": 0.13673929376408714, "grad_norm": 0.38671875, "learning_rate": 1.9187308655719457e-05, "loss": 0.8877, "step": 546 }, { "epoch": 0.13698973203105436, "grad_norm": 0.291015625, "learning_rate": 1.9181742276649043e-05, "loss": 0.8765, "step": 547 }, { "epoch": 0.13724017029802155, "grad_norm": 0.287109375, "learning_rate": 1.9176175897578625e-05, "loss": 0.9657, "step": 548 }, { "epoch": 0.13749060856498874, "grad_norm": 0.31640625, "learning_rate": 1.917060951850821e-05, "loss": 0.8961, "step": 549 }, { "epoch": 0.13774104683195593, "grad_norm": 0.30859375, "learning_rate": 1.91650431394378e-05, "loss": 1.0533, "step": 550 }, { "epoch": 0.13799148509892312, "grad_norm": 0.365234375, "learning_rate": 1.9159476760367385e-05, "loss": 1.0523, "step": 551 }, { "epoch": 0.1382419233658903, "grad_norm": 0.30859375, "learning_rate": 1.9153910381296967e-05, "loss": 1.0722, "step": 552 }, { "epoch": 0.1384923616328575, "grad_norm": 0.349609375, "learning_rate": 1.9148344002226553e-05, "loss": 1.1141, "step": 553 }, { "epoch": 0.1387427998998247, "grad_norm": 0.29296875, "learning_rate": 1.914277762315614e-05, "loss": 0.9674, "step": 554 }, { "epoch": 0.13899323816679188, "grad_norm": 0.34765625, "learning_rate": 1.9137211244085724e-05, "loss": 0.9732, "step": 555 }, { "epoch": 0.13924367643375907, "grad_norm": 0.318359375, "learning_rate": 1.913164486501531e-05, "loss": 0.9502, "step": 556 }, { "epoch": 0.13949411470072626, "grad_norm": 0.298828125, "learning_rate": 1.9126078485944895e-05, "loss": 0.9708, "step": 557 }, { "epoch": 0.13974455296769348, "grad_norm": 0.298828125, "learning_rate": 1.912051210687448e-05, "loss": 0.9662, "step": 558 }, { "epoch": 0.13999499123466067, "grad_norm": 0.3203125, "learning_rate": 1.9114945727804063e-05, "loss": 1.0114, "step": 559 }, { "epoch": 0.14024542950162786, "grad_norm": 0.365234375, "learning_rate": 1.910937934873365e-05, "loss": 1.059, "step": 560 }, { "epoch": 0.14049586776859505, "grad_norm": 0.341796875, "learning_rate": 1.9103812969663234e-05, "loss": 1.0901, "step": 561 }, { "epoch": 0.14074630603556224, "grad_norm": 0.341796875, "learning_rate": 1.909824659059282e-05, "loss": 0.7892, "step": 562 }, { "epoch": 0.14099674430252943, "grad_norm": 0.298828125, "learning_rate": 1.9092680211522406e-05, "loss": 0.97, "step": 563 }, { "epoch": 0.14124718256949662, "grad_norm": 0.34765625, "learning_rate": 1.908711383245199e-05, "loss": 0.8628, "step": 564 }, { "epoch": 0.1414976208364638, "grad_norm": 0.32421875, "learning_rate": 1.9081547453381577e-05, "loss": 0.9606, "step": 565 }, { "epoch": 0.141748059103431, "grad_norm": 0.2734375, "learning_rate": 1.9075981074311162e-05, "loss": 0.9101, "step": 566 }, { "epoch": 0.14199849737039819, "grad_norm": 0.35546875, "learning_rate": 1.9070414695240748e-05, "loss": 1.017, "step": 567 }, { "epoch": 0.14224893563736538, "grad_norm": 0.296875, "learning_rate": 1.9064848316170334e-05, "loss": 1.0355, "step": 568 }, { "epoch": 0.1424993739043326, "grad_norm": 0.302734375, "learning_rate": 1.905928193709992e-05, "loss": 0.993, "step": 569 }, { "epoch": 0.14274981217129978, "grad_norm": 0.330078125, "learning_rate": 1.9053715558029505e-05, "loss": 1.0265, "step": 570 }, { "epoch": 0.14300025043826697, "grad_norm": 0.3046875, "learning_rate": 1.9048149178959087e-05, "loss": 0.8275, "step": 571 }, { "epoch": 0.14325068870523416, "grad_norm": 0.294921875, "learning_rate": 1.9042582799888673e-05, "loss": 1.042, "step": 572 }, { "epoch": 0.14350112697220135, "grad_norm": 0.330078125, "learning_rate": 1.9037016420818258e-05, "loss": 1.0658, "step": 573 }, { "epoch": 0.14375156523916854, "grad_norm": 0.310546875, "learning_rate": 1.9031450041747844e-05, "loss": 1.0435, "step": 574 }, { "epoch": 0.14400200350613573, "grad_norm": 0.330078125, "learning_rate": 1.902588366267743e-05, "loss": 0.8103, "step": 575 }, { "epoch": 0.14425244177310292, "grad_norm": 0.2734375, "learning_rate": 1.9020317283607015e-05, "loss": 0.9316, "step": 576 }, { "epoch": 0.14450288004007011, "grad_norm": 0.326171875, "learning_rate": 1.90147509045366e-05, "loss": 0.9568, "step": 577 }, { "epoch": 0.1447533183070373, "grad_norm": 0.333984375, "learning_rate": 1.9009184525466186e-05, "loss": 0.9129, "step": 578 }, { "epoch": 0.1450037565740045, "grad_norm": 0.30078125, "learning_rate": 1.9003618146395772e-05, "loss": 1.1375, "step": 579 }, { "epoch": 0.1452541948409717, "grad_norm": 0.33203125, "learning_rate": 1.8998051767325358e-05, "loss": 0.9963, "step": 580 }, { "epoch": 0.1455046331079389, "grad_norm": 0.298828125, "learning_rate": 1.8992485388254943e-05, "loss": 0.9514, "step": 581 }, { "epoch": 0.1457550713749061, "grad_norm": 0.2890625, "learning_rate": 1.898691900918453e-05, "loss": 1.0617, "step": 582 }, { "epoch": 0.14600550964187328, "grad_norm": 0.32421875, "learning_rate": 1.898135263011411e-05, "loss": 0.9628, "step": 583 }, { "epoch": 0.14625594790884047, "grad_norm": 0.330078125, "learning_rate": 1.8975786251043697e-05, "loss": 0.9319, "step": 584 }, { "epoch": 0.14650638617580766, "grad_norm": 0.3359375, "learning_rate": 1.8970219871973282e-05, "loss": 1.0055, "step": 585 }, { "epoch": 0.14675682444277485, "grad_norm": 0.283203125, "learning_rate": 1.8964653492902868e-05, "loss": 0.9832, "step": 586 }, { "epoch": 0.14700726270974204, "grad_norm": 0.33203125, "learning_rate": 1.8959087113832453e-05, "loss": 0.9937, "step": 587 }, { "epoch": 0.14725770097670923, "grad_norm": 0.30859375, "learning_rate": 1.895352073476204e-05, "loss": 0.8791, "step": 588 }, { "epoch": 0.14750813924367642, "grad_norm": 0.326171875, "learning_rate": 1.8947954355691625e-05, "loss": 1.1753, "step": 589 }, { "epoch": 0.1477585775106436, "grad_norm": 0.31640625, "learning_rate": 1.894238797662121e-05, "loss": 1.0086, "step": 590 }, { "epoch": 0.14800901577761083, "grad_norm": 0.380859375, "learning_rate": 1.8936821597550792e-05, "loss": 0.9034, "step": 591 }, { "epoch": 0.14825945404457802, "grad_norm": 0.314453125, "learning_rate": 1.893125521848038e-05, "loss": 0.979, "step": 592 }, { "epoch": 0.1485098923115452, "grad_norm": 0.3203125, "learning_rate": 1.8925688839409967e-05, "loss": 1.1301, "step": 593 }, { "epoch": 0.1487603305785124, "grad_norm": 0.333984375, "learning_rate": 1.8920122460339553e-05, "loss": 1.1375, "step": 594 }, { "epoch": 0.1490107688454796, "grad_norm": 0.34375, "learning_rate": 1.8914556081269135e-05, "loss": 1.146, "step": 595 }, { "epoch": 0.14926120711244678, "grad_norm": 0.33203125, "learning_rate": 1.890898970219872e-05, "loss": 1.083, "step": 596 }, { "epoch": 0.14951164537941397, "grad_norm": 0.33203125, "learning_rate": 1.8903423323128306e-05, "loss": 1.0546, "step": 597 }, { "epoch": 0.14976208364638116, "grad_norm": 0.31640625, "learning_rate": 1.8897856944057892e-05, "loss": 0.9863, "step": 598 }, { "epoch": 0.15001252191334835, "grad_norm": 0.3515625, "learning_rate": 1.8892290564987477e-05, "loss": 0.9428, "step": 599 }, { "epoch": 0.15026296018031554, "grad_norm": 0.404296875, "learning_rate": 1.8886724185917063e-05, "loss": 0.9555, "step": 600 }, { "epoch": 0.15051339844728273, "grad_norm": 0.314453125, "learning_rate": 1.888115780684665e-05, "loss": 1.1377, "step": 601 }, { "epoch": 0.15076383671424995, "grad_norm": 0.306640625, "learning_rate": 1.8875591427776234e-05, "loss": 0.8994, "step": 602 }, { "epoch": 0.15101427498121714, "grad_norm": 0.28515625, "learning_rate": 1.8870025048705816e-05, "loss": 0.9407, "step": 603 }, { "epoch": 0.15126471324818433, "grad_norm": 0.314453125, "learning_rate": 1.8864458669635402e-05, "loss": 0.9972, "step": 604 }, { "epoch": 0.15151515151515152, "grad_norm": 0.3671875, "learning_rate": 1.885889229056499e-05, "loss": 1.0774, "step": 605 }, { "epoch": 0.1517655897821187, "grad_norm": 0.326171875, "learning_rate": 1.8853325911494573e-05, "loss": 1.0286, "step": 606 }, { "epoch": 0.1520160280490859, "grad_norm": 0.34765625, "learning_rate": 1.884775953242416e-05, "loss": 0.8341, "step": 607 }, { "epoch": 0.1522664663160531, "grad_norm": 0.326171875, "learning_rate": 1.8842193153353744e-05, "loss": 0.9592, "step": 608 }, { "epoch": 0.15251690458302028, "grad_norm": 0.30859375, "learning_rate": 1.883662677428333e-05, "loss": 0.9345, "step": 609 }, { "epoch": 0.15276734284998747, "grad_norm": 0.314453125, "learning_rate": 1.8831060395212916e-05, "loss": 0.8952, "step": 610 }, { "epoch": 0.15301778111695466, "grad_norm": 0.37890625, "learning_rate": 1.88254940161425e-05, "loss": 0.9004, "step": 611 }, { "epoch": 0.15326821938392185, "grad_norm": 0.291015625, "learning_rate": 1.8819927637072087e-05, "loss": 0.9277, "step": 612 }, { "epoch": 0.15351865765088907, "grad_norm": 0.349609375, "learning_rate": 1.8814361258001673e-05, "loss": 1.1025, "step": 613 }, { "epoch": 0.15376909591785626, "grad_norm": 0.3359375, "learning_rate": 1.8808794878931255e-05, "loss": 0.9834, "step": 614 }, { "epoch": 0.15401953418482345, "grad_norm": 0.34765625, "learning_rate": 1.880322849986084e-05, "loss": 0.9141, "step": 615 }, { "epoch": 0.15426997245179064, "grad_norm": 0.298828125, "learning_rate": 1.8797662120790426e-05, "loss": 0.8444, "step": 616 }, { "epoch": 0.15452041071875783, "grad_norm": 0.3046875, "learning_rate": 1.8792095741720015e-05, "loss": 0.9082, "step": 617 }, { "epoch": 0.15477084898572502, "grad_norm": 0.33203125, "learning_rate": 1.8786529362649597e-05, "loss": 0.9998, "step": 618 }, { "epoch": 0.1550212872526922, "grad_norm": 0.2890625, "learning_rate": 1.8780962983579183e-05, "loss": 0.9476, "step": 619 }, { "epoch": 0.1552717255196594, "grad_norm": 0.353515625, "learning_rate": 1.877539660450877e-05, "loss": 0.9665, "step": 620 }, { "epoch": 0.1555221637866266, "grad_norm": 0.35546875, "learning_rate": 1.8769830225438354e-05, "loss": 0.957, "step": 621 }, { "epoch": 0.15577260205359378, "grad_norm": 0.3203125, "learning_rate": 1.876426384636794e-05, "loss": 1.0145, "step": 622 }, { "epoch": 0.15602304032056097, "grad_norm": 0.310546875, "learning_rate": 1.8758697467297525e-05, "loss": 1.0169, "step": 623 }, { "epoch": 0.1562734785875282, "grad_norm": 0.31640625, "learning_rate": 1.875313108822711e-05, "loss": 1.0512, "step": 624 }, { "epoch": 0.15652391685449538, "grad_norm": 0.330078125, "learning_rate": 1.8747564709156697e-05, "loss": 0.974, "step": 625 }, { "epoch": 0.15677435512146257, "grad_norm": 0.30078125, "learning_rate": 1.874199833008628e-05, "loss": 1.0814, "step": 626 }, { "epoch": 0.15702479338842976, "grad_norm": 0.32421875, "learning_rate": 1.8736431951015864e-05, "loss": 1.0332, "step": 627 }, { "epoch": 0.15727523165539695, "grad_norm": 0.359375, "learning_rate": 1.873086557194545e-05, "loss": 0.8012, "step": 628 }, { "epoch": 0.15752566992236414, "grad_norm": 0.341796875, "learning_rate": 1.8725299192875036e-05, "loss": 0.9591, "step": 629 }, { "epoch": 0.15777610818933133, "grad_norm": 0.365234375, "learning_rate": 1.871973281380462e-05, "loss": 0.929, "step": 630 }, { "epoch": 0.15802654645629852, "grad_norm": 0.30859375, "learning_rate": 1.8714166434734207e-05, "loss": 1.0396, "step": 631 }, { "epoch": 0.1582769847232657, "grad_norm": 0.30859375, "learning_rate": 1.8708600055663792e-05, "loss": 1.0788, "step": 632 }, { "epoch": 0.1585274229902329, "grad_norm": 0.31640625, "learning_rate": 1.8703033676593378e-05, "loss": 0.977, "step": 633 }, { "epoch": 0.1587778612572001, "grad_norm": 0.32421875, "learning_rate": 1.8697467297522964e-05, "loss": 0.9211, "step": 634 }, { "epoch": 0.1590282995241673, "grad_norm": 0.3515625, "learning_rate": 1.869190091845255e-05, "loss": 0.9731, "step": 635 }, { "epoch": 0.1592787377911345, "grad_norm": 0.3125, "learning_rate": 1.8686334539382135e-05, "loss": 0.8794, "step": 636 }, { "epoch": 0.15952917605810168, "grad_norm": 0.33203125, "learning_rate": 1.868076816031172e-05, "loss": 1.1433, "step": 637 }, { "epoch": 0.15977961432506887, "grad_norm": 0.3125, "learning_rate": 1.8675201781241303e-05, "loss": 0.9864, "step": 638 }, { "epoch": 0.16003005259203607, "grad_norm": 0.306640625, "learning_rate": 1.8669635402170888e-05, "loss": 1.1676, "step": 639 }, { "epoch": 0.16028049085900326, "grad_norm": 0.287109375, "learning_rate": 1.8664069023100474e-05, "loss": 0.9139, "step": 640 }, { "epoch": 0.16053092912597045, "grad_norm": 0.3203125, "learning_rate": 1.865850264403006e-05, "loss": 1.0718, "step": 641 }, { "epoch": 0.16078136739293764, "grad_norm": 0.359375, "learning_rate": 1.8652936264959645e-05, "loss": 0.9877, "step": 642 }, { "epoch": 0.16103180565990483, "grad_norm": 0.310546875, "learning_rate": 1.864736988588923e-05, "loss": 0.9919, "step": 643 }, { "epoch": 0.16128224392687202, "grad_norm": 0.27734375, "learning_rate": 1.8641803506818816e-05, "loss": 0.8225, "step": 644 }, { "epoch": 0.1615326821938392, "grad_norm": 0.33203125, "learning_rate": 1.8636237127748402e-05, "loss": 0.8013, "step": 645 }, { "epoch": 0.16178312046080642, "grad_norm": 0.28515625, "learning_rate": 1.8630670748677984e-05, "loss": 0.973, "step": 646 }, { "epoch": 0.1620335587277736, "grad_norm": 0.30078125, "learning_rate": 1.8625104369607573e-05, "loss": 1.0147, "step": 647 }, { "epoch": 0.1622839969947408, "grad_norm": 0.283203125, "learning_rate": 1.861953799053716e-05, "loss": 1.2079, "step": 648 }, { "epoch": 0.162534435261708, "grad_norm": 0.349609375, "learning_rate": 1.8613971611466744e-05, "loss": 1.0932, "step": 649 }, { "epoch": 0.16278487352867518, "grad_norm": 0.3046875, "learning_rate": 1.8608405232396327e-05, "loss": 0.8818, "step": 650 }, { "epoch": 0.16303531179564237, "grad_norm": 0.30078125, "learning_rate": 1.8602838853325912e-05, "loss": 1.0436, "step": 651 }, { "epoch": 0.16328575006260956, "grad_norm": 0.3359375, "learning_rate": 1.8597272474255498e-05, "loss": 0.9283, "step": 652 }, { "epoch": 0.16353618832957675, "grad_norm": 0.28515625, "learning_rate": 1.8591706095185083e-05, "loss": 1.1737, "step": 653 }, { "epoch": 0.16378662659654394, "grad_norm": 0.30078125, "learning_rate": 1.858613971611467e-05, "loss": 1.0672, "step": 654 }, { "epoch": 0.16403706486351113, "grad_norm": 0.36328125, "learning_rate": 1.8580573337044255e-05, "loss": 1.0716, "step": 655 }, { "epoch": 0.16428750313047832, "grad_norm": 0.369140625, "learning_rate": 1.857500695797384e-05, "loss": 0.9204, "step": 656 }, { "epoch": 0.16453794139744554, "grad_norm": 0.30078125, "learning_rate": 1.8569440578903423e-05, "loss": 1.041, "step": 657 }, { "epoch": 0.16478837966441273, "grad_norm": 0.306640625, "learning_rate": 1.8563874199833008e-05, "loss": 0.8727, "step": 658 }, { "epoch": 0.16503881793137992, "grad_norm": 0.322265625, "learning_rate": 1.8558307820762597e-05, "loss": 0.8747, "step": 659 }, { "epoch": 0.1652892561983471, "grad_norm": 0.314453125, "learning_rate": 1.8552741441692183e-05, "loss": 1.0269, "step": 660 }, { "epoch": 0.1655396944653143, "grad_norm": 0.33203125, "learning_rate": 1.8547175062621765e-05, "loss": 1.0358, "step": 661 }, { "epoch": 0.1657901327322815, "grad_norm": 0.306640625, "learning_rate": 1.854160868355135e-05, "loss": 0.9882, "step": 662 }, { "epoch": 0.16604057099924868, "grad_norm": 0.33203125, "learning_rate": 1.8536042304480936e-05, "loss": 0.9086, "step": 663 }, { "epoch": 0.16629100926621587, "grad_norm": 0.37109375, "learning_rate": 1.8530475925410522e-05, "loss": 1.0657, "step": 664 }, { "epoch": 0.16654144753318306, "grad_norm": 0.318359375, "learning_rate": 1.8524909546340107e-05, "loss": 0.9946, "step": 665 }, { "epoch": 0.16679188580015025, "grad_norm": 0.314453125, "learning_rate": 1.8519343167269693e-05, "loss": 1.0351, "step": 666 }, { "epoch": 0.16704232406711744, "grad_norm": 0.326171875, "learning_rate": 1.851377678819928e-05, "loss": 1.0364, "step": 667 }, { "epoch": 0.16729276233408466, "grad_norm": 0.30078125, "learning_rate": 1.8508210409128864e-05, "loss": 0.8793, "step": 668 }, { "epoch": 0.16754320060105185, "grad_norm": 0.396484375, "learning_rate": 1.8502644030058446e-05, "loss": 0.8943, "step": 669 }, { "epoch": 0.16779363886801904, "grad_norm": 0.3046875, "learning_rate": 1.8497077650988032e-05, "loss": 0.9319, "step": 670 }, { "epoch": 0.16804407713498623, "grad_norm": 0.345703125, "learning_rate": 1.8491511271917618e-05, "loss": 1.0193, "step": 671 }, { "epoch": 0.16829451540195342, "grad_norm": 0.345703125, "learning_rate": 1.8485944892847207e-05, "loss": 0.9679, "step": 672 }, { "epoch": 0.1685449536689206, "grad_norm": 0.314453125, "learning_rate": 1.848037851377679e-05, "loss": 1.0453, "step": 673 }, { "epoch": 0.1687953919358878, "grad_norm": 0.341796875, "learning_rate": 1.8474812134706375e-05, "loss": 0.8309, "step": 674 }, { "epoch": 0.169045830202855, "grad_norm": 0.349609375, "learning_rate": 1.846924575563596e-05, "loss": 0.937, "step": 675 }, { "epoch": 0.16929626846982218, "grad_norm": 0.326171875, "learning_rate": 1.8463679376565546e-05, "loss": 0.9438, "step": 676 }, { "epoch": 0.16954670673678937, "grad_norm": 0.3125, "learning_rate": 1.845811299749513e-05, "loss": 0.9938, "step": 677 }, { "epoch": 0.16979714500375656, "grad_norm": 0.30859375, "learning_rate": 1.8452546618424717e-05, "loss": 0.9707, "step": 678 }, { "epoch": 0.17004758327072378, "grad_norm": 0.345703125, "learning_rate": 1.8446980239354303e-05, "loss": 0.9939, "step": 679 }, { "epoch": 0.17029802153769097, "grad_norm": 0.306640625, "learning_rate": 1.8441413860283888e-05, "loss": 0.9757, "step": 680 }, { "epoch": 0.17054845980465816, "grad_norm": 0.349609375, "learning_rate": 1.843584748121347e-05, "loss": 0.8796, "step": 681 }, { "epoch": 0.17079889807162535, "grad_norm": 0.30859375, "learning_rate": 1.8430281102143056e-05, "loss": 0.9673, "step": 682 }, { "epoch": 0.17104933633859254, "grad_norm": 0.326171875, "learning_rate": 1.842471472307264e-05, "loss": 1.002, "step": 683 }, { "epoch": 0.17129977460555973, "grad_norm": 0.318359375, "learning_rate": 1.8419148344002227e-05, "loss": 0.8473, "step": 684 }, { "epoch": 0.17155021287252692, "grad_norm": 0.361328125, "learning_rate": 1.8413581964931813e-05, "loss": 1.0638, "step": 685 }, { "epoch": 0.1718006511394941, "grad_norm": 0.314453125, "learning_rate": 1.84080155858614e-05, "loss": 0.8162, "step": 686 }, { "epoch": 0.1720510894064613, "grad_norm": 0.333984375, "learning_rate": 1.8402449206790984e-05, "loss": 0.8796, "step": 687 }, { "epoch": 0.1723015276734285, "grad_norm": 0.341796875, "learning_rate": 1.839688282772057e-05, "loss": 0.9171, "step": 688 }, { "epoch": 0.17255196594039568, "grad_norm": 0.2890625, "learning_rate": 1.8391316448650155e-05, "loss": 0.9071, "step": 689 }, { "epoch": 0.1728024042073629, "grad_norm": 0.255859375, "learning_rate": 1.838575006957974e-05, "loss": 0.7646, "step": 690 }, { "epoch": 0.1730528424743301, "grad_norm": 0.423828125, "learning_rate": 1.8380183690509327e-05, "loss": 0.9197, "step": 691 }, { "epoch": 0.17330328074129728, "grad_norm": 0.345703125, "learning_rate": 1.8374617311438912e-05, "loss": 0.8905, "step": 692 }, { "epoch": 0.17355371900826447, "grad_norm": 0.32421875, "learning_rate": 1.8369050932368494e-05, "loss": 0.8655, "step": 693 }, { "epoch": 0.17380415727523166, "grad_norm": 0.3203125, "learning_rate": 1.836348455329808e-05, "loss": 0.8498, "step": 694 }, { "epoch": 0.17405459554219885, "grad_norm": 0.2890625, "learning_rate": 1.8357918174227666e-05, "loss": 0.7596, "step": 695 }, { "epoch": 0.17430503380916604, "grad_norm": 0.314453125, "learning_rate": 1.835235179515725e-05, "loss": 0.9363, "step": 696 }, { "epoch": 0.17455547207613323, "grad_norm": 0.314453125, "learning_rate": 1.8346785416086837e-05, "loss": 1.1126, "step": 697 }, { "epoch": 0.17480591034310042, "grad_norm": 0.296875, "learning_rate": 1.8341219037016422e-05, "loss": 0.9549, "step": 698 }, { "epoch": 0.1750563486100676, "grad_norm": 0.310546875, "learning_rate": 1.8335652657946008e-05, "loss": 0.922, "step": 699 }, { "epoch": 0.1753067868770348, "grad_norm": 0.298828125, "learning_rate": 1.8330086278875594e-05, "loss": 1.162, "step": 700 }, { "epoch": 0.17555722514400202, "grad_norm": 0.373046875, "learning_rate": 1.832451989980518e-05, "loss": 1.0234, "step": 701 }, { "epoch": 0.1758076634109692, "grad_norm": 0.310546875, "learning_rate": 1.8318953520734765e-05, "loss": 0.9221, "step": 702 }, { "epoch": 0.1760581016779364, "grad_norm": 0.310546875, "learning_rate": 1.831338714166435e-05, "loss": 1.054, "step": 703 }, { "epoch": 0.1763085399449036, "grad_norm": 0.328125, "learning_rate": 1.8307820762593933e-05, "loss": 0.8999, "step": 704 }, { "epoch": 0.17655897821187078, "grad_norm": 0.31640625, "learning_rate": 1.830225438352352e-05, "loss": 1.018, "step": 705 }, { "epoch": 0.17680941647883797, "grad_norm": 0.380859375, "learning_rate": 1.8296688004453104e-05, "loss": 1.0039, "step": 706 }, { "epoch": 0.17705985474580516, "grad_norm": 0.328125, "learning_rate": 1.829112162538269e-05, "loss": 1.0541, "step": 707 }, { "epoch": 0.17731029301277235, "grad_norm": 0.373046875, "learning_rate": 1.8285555246312275e-05, "loss": 1.1083, "step": 708 }, { "epoch": 0.17756073127973954, "grad_norm": 0.357421875, "learning_rate": 1.827998886724186e-05, "loss": 1.2093, "step": 709 }, { "epoch": 0.17781116954670673, "grad_norm": 0.296875, "learning_rate": 1.8274422488171446e-05, "loss": 1.0437, "step": 710 }, { "epoch": 0.17806160781367392, "grad_norm": 0.3515625, "learning_rate": 1.8268856109101032e-05, "loss": 0.9704, "step": 711 }, { "epoch": 0.17831204608064113, "grad_norm": 0.353515625, "learning_rate": 1.8263289730030614e-05, "loss": 0.8119, "step": 712 }, { "epoch": 0.17856248434760832, "grad_norm": 0.337890625, "learning_rate": 1.82577233509602e-05, "loss": 1.0672, "step": 713 }, { "epoch": 0.17881292261457551, "grad_norm": 0.3125, "learning_rate": 1.825215697188979e-05, "loss": 0.9019, "step": 714 }, { "epoch": 0.1790633608815427, "grad_norm": 0.353515625, "learning_rate": 1.8246590592819374e-05, "loss": 0.9081, "step": 715 }, { "epoch": 0.1793137991485099, "grad_norm": 0.33984375, "learning_rate": 1.8241024213748957e-05, "loss": 1.0169, "step": 716 }, { "epoch": 0.17956423741547708, "grad_norm": 0.30859375, "learning_rate": 1.8235457834678542e-05, "loss": 0.9128, "step": 717 }, { "epoch": 0.17981467568244427, "grad_norm": 0.361328125, "learning_rate": 1.8229891455608128e-05, "loss": 1.0631, "step": 718 }, { "epoch": 0.18006511394941146, "grad_norm": 0.322265625, "learning_rate": 1.8224325076537714e-05, "loss": 0.9529, "step": 719 }, { "epoch": 0.18031555221637866, "grad_norm": 0.353515625, "learning_rate": 1.82187586974673e-05, "loss": 0.9648, "step": 720 }, { "epoch": 0.18056599048334585, "grad_norm": 0.322265625, "learning_rate": 1.8213192318396885e-05, "loss": 0.8872, "step": 721 }, { "epoch": 0.18081642875031304, "grad_norm": 0.3046875, "learning_rate": 1.820762593932647e-05, "loss": 0.97, "step": 722 }, { "epoch": 0.18106686701728025, "grad_norm": 0.32421875, "learning_rate": 1.8202059560256056e-05, "loss": 0.9071, "step": 723 }, { "epoch": 0.18131730528424744, "grad_norm": 0.306640625, "learning_rate": 1.8196493181185638e-05, "loss": 0.9561, "step": 724 }, { "epoch": 0.18156774355121463, "grad_norm": 0.328125, "learning_rate": 1.8190926802115224e-05, "loss": 0.9599, "step": 725 }, { "epoch": 0.18181818181818182, "grad_norm": 0.296875, "learning_rate": 1.818536042304481e-05, "loss": 1.0182, "step": 726 }, { "epoch": 0.182068620085149, "grad_norm": 0.314453125, "learning_rate": 1.81797940439744e-05, "loss": 0.9988, "step": 727 }, { "epoch": 0.1823190583521162, "grad_norm": 0.34375, "learning_rate": 1.817422766490398e-05, "loss": 0.9385, "step": 728 }, { "epoch": 0.1825694966190834, "grad_norm": 0.30859375, "learning_rate": 1.8168661285833566e-05, "loss": 0.9455, "step": 729 }, { "epoch": 0.18281993488605058, "grad_norm": 0.388671875, "learning_rate": 1.8163094906763152e-05, "loss": 0.8019, "step": 730 }, { "epoch": 0.18307037315301777, "grad_norm": 0.3515625, "learning_rate": 1.8157528527692737e-05, "loss": 0.9338, "step": 731 }, { "epoch": 0.18332081141998496, "grad_norm": 0.373046875, "learning_rate": 1.8151962148622323e-05, "loss": 1.003, "step": 732 }, { "epoch": 0.18357124968695215, "grad_norm": 0.3671875, "learning_rate": 1.814639576955191e-05, "loss": 0.9774, "step": 733 }, { "epoch": 0.18382168795391937, "grad_norm": 0.337890625, "learning_rate": 1.8140829390481494e-05, "loss": 0.9269, "step": 734 }, { "epoch": 0.18407212622088656, "grad_norm": 0.333984375, "learning_rate": 1.813526301141108e-05, "loss": 0.9492, "step": 735 }, { "epoch": 0.18432256448785375, "grad_norm": 0.37890625, "learning_rate": 1.8129696632340662e-05, "loss": 1.0293, "step": 736 }, { "epoch": 0.18457300275482094, "grad_norm": 0.3125, "learning_rate": 1.8124130253270248e-05, "loss": 0.9233, "step": 737 }, { "epoch": 0.18482344102178813, "grad_norm": 0.353515625, "learning_rate": 1.8118563874199833e-05, "loss": 1.0116, "step": 738 }, { "epoch": 0.18507387928875532, "grad_norm": 0.359375, "learning_rate": 1.8112997495129422e-05, "loss": 0.9853, "step": 739 }, { "epoch": 0.1853243175557225, "grad_norm": 0.353515625, "learning_rate": 1.8107431116059005e-05, "loss": 0.9849, "step": 740 }, { "epoch": 0.1855747558226897, "grad_norm": 0.314453125, "learning_rate": 1.810186473698859e-05, "loss": 0.9658, "step": 741 }, { "epoch": 0.1858251940896569, "grad_norm": 0.369140625, "learning_rate": 1.8096298357918176e-05, "loss": 0.9778, "step": 742 }, { "epoch": 0.18607563235662408, "grad_norm": 0.32421875, "learning_rate": 1.809073197884776e-05, "loss": 0.872, "step": 743 }, { "epoch": 0.18632607062359127, "grad_norm": 0.328125, "learning_rate": 1.8085165599777347e-05, "loss": 1.2427, "step": 744 }, { "epoch": 0.1865765088905585, "grad_norm": 0.328125, "learning_rate": 1.8079599220706933e-05, "loss": 0.9942, "step": 745 }, { "epoch": 0.18682694715752568, "grad_norm": 0.326171875, "learning_rate": 1.8074032841636518e-05, "loss": 0.8907, "step": 746 }, { "epoch": 0.18707738542449287, "grad_norm": 0.341796875, "learning_rate": 1.8068466462566104e-05, "loss": 0.9097, "step": 747 }, { "epoch": 0.18732782369146006, "grad_norm": 0.3046875, "learning_rate": 1.8062900083495686e-05, "loss": 0.9303, "step": 748 }, { "epoch": 0.18757826195842725, "grad_norm": 0.291015625, "learning_rate": 1.8057333704425272e-05, "loss": 0.9142, "step": 749 }, { "epoch": 0.18782870022539444, "grad_norm": 0.328125, "learning_rate": 1.8051767325354857e-05, "loss": 0.8467, "step": 750 }, { "epoch": 0.18807913849236163, "grad_norm": 0.37890625, "learning_rate": 1.8046200946284443e-05, "loss": 0.8521, "step": 751 }, { "epoch": 0.18832957675932882, "grad_norm": 0.30859375, "learning_rate": 1.804063456721403e-05, "loss": 0.9261, "step": 752 }, { "epoch": 0.188580015026296, "grad_norm": 0.2734375, "learning_rate": 1.8035068188143614e-05, "loss": 0.8321, "step": 753 }, { "epoch": 0.1888304532932632, "grad_norm": 0.341796875, "learning_rate": 1.80295018090732e-05, "loss": 0.9424, "step": 754 }, { "epoch": 0.1890808915602304, "grad_norm": 0.3046875, "learning_rate": 1.8023935430002782e-05, "loss": 0.928, "step": 755 }, { "epoch": 0.1893313298271976, "grad_norm": 0.35546875, "learning_rate": 1.801836905093237e-05, "loss": 1.0014, "step": 756 }, { "epoch": 0.1895817680941648, "grad_norm": 0.314453125, "learning_rate": 1.8012802671861957e-05, "loss": 0.9605, "step": 757 }, { "epoch": 0.189832206361132, "grad_norm": 0.36328125, "learning_rate": 1.8007236292791542e-05, "loss": 0.8549, "step": 758 }, { "epoch": 0.19008264462809918, "grad_norm": 0.28125, "learning_rate": 1.8001669913721124e-05, "loss": 0.9099, "step": 759 }, { "epoch": 0.19033308289506637, "grad_norm": 0.34765625, "learning_rate": 1.799610353465071e-05, "loss": 1.1121, "step": 760 }, { "epoch": 0.19058352116203356, "grad_norm": 0.3203125, "learning_rate": 1.7990537155580296e-05, "loss": 0.9575, "step": 761 }, { "epoch": 0.19083395942900075, "grad_norm": 0.3671875, "learning_rate": 1.798497077650988e-05, "loss": 0.9607, "step": 762 }, { "epoch": 0.19108439769596794, "grad_norm": 0.32421875, "learning_rate": 1.7979404397439467e-05, "loss": 1.0575, "step": 763 }, { "epoch": 0.19133483596293513, "grad_norm": 0.400390625, "learning_rate": 1.7973838018369052e-05, "loss": 1.0761, "step": 764 }, { "epoch": 0.19158527422990232, "grad_norm": 0.32421875, "learning_rate": 1.7968271639298638e-05, "loss": 0.8766, "step": 765 }, { "epoch": 0.1918357124968695, "grad_norm": 0.298828125, "learning_rate": 1.7962705260228224e-05, "loss": 1.0982, "step": 766 }, { "epoch": 0.19208615076383673, "grad_norm": 0.337890625, "learning_rate": 1.7957138881157806e-05, "loss": 0.8695, "step": 767 }, { "epoch": 0.19233658903080392, "grad_norm": 0.3125, "learning_rate": 1.795157250208739e-05, "loss": 1.0955, "step": 768 }, { "epoch": 0.1925870272977711, "grad_norm": 0.353515625, "learning_rate": 1.794600612301698e-05, "loss": 1.0232, "step": 769 }, { "epoch": 0.1928374655647383, "grad_norm": 0.3125, "learning_rate": 1.7940439743946566e-05, "loss": 0.9902, "step": 770 }, { "epoch": 0.1930879038317055, "grad_norm": 0.306640625, "learning_rate": 1.793487336487615e-05, "loss": 0.9324, "step": 771 }, { "epoch": 0.19333834209867268, "grad_norm": 0.345703125, "learning_rate": 1.7929306985805734e-05, "loss": 0.904, "step": 772 }, { "epoch": 0.19358878036563987, "grad_norm": 0.310546875, "learning_rate": 1.792374060673532e-05, "loss": 0.9704, "step": 773 }, { "epoch": 0.19383921863260706, "grad_norm": 0.27734375, "learning_rate": 1.7918174227664905e-05, "loss": 0.8025, "step": 774 }, { "epoch": 0.19408965689957425, "grad_norm": 0.36328125, "learning_rate": 1.791260784859449e-05, "loss": 1.0843, "step": 775 }, { "epoch": 0.19434009516654144, "grad_norm": 0.302734375, "learning_rate": 1.7907041469524076e-05, "loss": 1.0501, "step": 776 }, { "epoch": 0.19459053343350863, "grad_norm": 0.33984375, "learning_rate": 1.7901475090453662e-05, "loss": 1.1282, "step": 777 }, { "epoch": 0.19484097170047585, "grad_norm": 0.375, "learning_rate": 1.7895908711383248e-05, "loss": 0.9162, "step": 778 }, { "epoch": 0.19509140996744304, "grad_norm": 0.37109375, "learning_rate": 1.789034233231283e-05, "loss": 1.0632, "step": 779 }, { "epoch": 0.19534184823441023, "grad_norm": 0.310546875, "learning_rate": 1.7884775953242416e-05, "loss": 1.116, "step": 780 }, { "epoch": 0.19559228650137742, "grad_norm": 0.353515625, "learning_rate": 1.7879209574172005e-05, "loss": 0.968, "step": 781 }, { "epoch": 0.1958427247683446, "grad_norm": 0.35546875, "learning_rate": 1.787364319510159e-05, "loss": 0.8438, "step": 782 }, { "epoch": 0.1960931630353118, "grad_norm": 0.26171875, "learning_rate": 1.7868076816031172e-05, "loss": 0.5541, "step": 783 }, { "epoch": 0.19634360130227899, "grad_norm": 0.359375, "learning_rate": 1.7862510436960758e-05, "loss": 1.0321, "step": 784 }, { "epoch": 0.19659403956924618, "grad_norm": 0.337890625, "learning_rate": 1.7856944057890344e-05, "loss": 1.0018, "step": 785 }, { "epoch": 0.19684447783621337, "grad_norm": 0.3359375, "learning_rate": 1.785137767881993e-05, "loss": 0.9429, "step": 786 }, { "epoch": 0.19709491610318056, "grad_norm": 0.279296875, "learning_rate": 1.7845811299749515e-05, "loss": 0.8706, "step": 787 }, { "epoch": 0.19734535437014775, "grad_norm": 0.35546875, "learning_rate": 1.78402449206791e-05, "loss": 0.8828, "step": 788 }, { "epoch": 0.19759579263711496, "grad_norm": 0.376953125, "learning_rate": 1.7834678541608686e-05, "loss": 0.9271, "step": 789 }, { "epoch": 0.19784623090408215, "grad_norm": 0.341796875, "learning_rate": 1.782911216253827e-05, "loss": 0.8957, "step": 790 }, { "epoch": 0.19809666917104934, "grad_norm": 0.33984375, "learning_rate": 1.7823545783467854e-05, "loss": 1.0502, "step": 791 }, { "epoch": 0.19834710743801653, "grad_norm": 0.341796875, "learning_rate": 1.781797940439744e-05, "loss": 0.9718, "step": 792 }, { "epoch": 0.19859754570498372, "grad_norm": 0.337890625, "learning_rate": 1.7812413025327025e-05, "loss": 0.9938, "step": 793 }, { "epoch": 0.19884798397195091, "grad_norm": 0.326171875, "learning_rate": 1.7806846646256614e-05, "loss": 0.9091, "step": 794 }, { "epoch": 0.1990984222389181, "grad_norm": 0.314453125, "learning_rate": 1.7801280267186196e-05, "loss": 1.0816, "step": 795 }, { "epoch": 0.1993488605058853, "grad_norm": 0.322265625, "learning_rate": 1.7795713888115782e-05, "loss": 0.861, "step": 796 }, { "epoch": 0.19959929877285248, "grad_norm": 0.33203125, "learning_rate": 1.7790147509045368e-05, "loss": 1.0086, "step": 797 }, { "epoch": 0.19984973703981967, "grad_norm": 0.3984375, "learning_rate": 1.7784581129974953e-05, "loss": 0.9242, "step": 798 }, { "epoch": 0.20010017530678686, "grad_norm": 0.376953125, "learning_rate": 1.777901475090454e-05, "loss": 0.948, "step": 799 }, { "epoch": 0.20035061357375408, "grad_norm": 0.330078125, "learning_rate": 1.7773448371834124e-05, "loss": 0.8617, "step": 800 }, { "epoch": 0.20060105184072127, "grad_norm": 0.31640625, "learning_rate": 1.776788199276371e-05, "loss": 1.1166, "step": 801 }, { "epoch": 0.20085149010768846, "grad_norm": 0.369140625, "learning_rate": 1.7762315613693296e-05, "loss": 0.8901, "step": 802 }, { "epoch": 0.20110192837465565, "grad_norm": 0.326171875, "learning_rate": 1.7756749234622878e-05, "loss": 0.9784, "step": 803 }, { "epoch": 0.20135236664162284, "grad_norm": 0.318359375, "learning_rate": 1.7751182855552463e-05, "loss": 0.9558, "step": 804 }, { "epoch": 0.20160280490859003, "grad_norm": 0.314453125, "learning_rate": 1.774561647648205e-05, "loss": 1.0128, "step": 805 }, { "epoch": 0.20185324317555722, "grad_norm": 0.33984375, "learning_rate": 1.7740050097411635e-05, "loss": 0.9989, "step": 806 }, { "epoch": 0.2021036814425244, "grad_norm": 0.322265625, "learning_rate": 1.773448371834122e-05, "loss": 0.8685, "step": 807 }, { "epoch": 0.2023541197094916, "grad_norm": 0.353515625, "learning_rate": 1.7728917339270806e-05, "loss": 0.9718, "step": 808 }, { "epoch": 0.2026045579764588, "grad_norm": 0.3359375, "learning_rate": 1.772335096020039e-05, "loss": 1.1253, "step": 809 }, { "epoch": 0.20285499624342598, "grad_norm": 0.375, "learning_rate": 1.7717784581129974e-05, "loss": 1.0096, "step": 810 }, { "epoch": 0.2031054345103932, "grad_norm": 0.3125, "learning_rate": 1.7712218202059563e-05, "loss": 0.9347, "step": 811 }, { "epoch": 0.2033558727773604, "grad_norm": 0.30078125, "learning_rate": 1.7706651822989148e-05, "loss": 0.9649, "step": 812 }, { "epoch": 0.20360631104432758, "grad_norm": 0.337890625, "learning_rate": 1.7701085443918734e-05, "loss": 0.838, "step": 813 }, { "epoch": 0.20385674931129477, "grad_norm": 0.36328125, "learning_rate": 1.7695519064848316e-05, "loss": 0.8316, "step": 814 }, { "epoch": 0.20410718757826196, "grad_norm": 0.314453125, "learning_rate": 1.7689952685777902e-05, "loss": 1.1529, "step": 815 }, { "epoch": 0.20435762584522915, "grad_norm": 0.32421875, "learning_rate": 1.7684386306707487e-05, "loss": 0.9218, "step": 816 }, { "epoch": 0.20460806411219634, "grad_norm": 0.32421875, "learning_rate": 1.7678819927637073e-05, "loss": 1.0057, "step": 817 }, { "epoch": 0.20485850237916353, "grad_norm": 0.3046875, "learning_rate": 1.767325354856666e-05, "loss": 0.9849, "step": 818 }, { "epoch": 0.20510894064613072, "grad_norm": 0.330078125, "learning_rate": 1.7667687169496244e-05, "loss": 0.9791, "step": 819 }, { "epoch": 0.2053593789130979, "grad_norm": 0.333984375, "learning_rate": 1.766212079042583e-05, "loss": 0.9469, "step": 820 }, { "epoch": 0.2056098171800651, "grad_norm": 0.34375, "learning_rate": 1.7656554411355415e-05, "loss": 1.2021, "step": 821 }, { "epoch": 0.20586025544703232, "grad_norm": 0.337890625, "learning_rate": 1.7650988032284998e-05, "loss": 0.8614, "step": 822 }, { "epoch": 0.2061106937139995, "grad_norm": 0.34765625, "learning_rate": 1.7645421653214587e-05, "loss": 0.9944, "step": 823 }, { "epoch": 0.2063611319809667, "grad_norm": 0.337890625, "learning_rate": 1.7639855274144172e-05, "loss": 0.8894, "step": 824 }, { "epoch": 0.2066115702479339, "grad_norm": 0.34375, "learning_rate": 1.7634288895073758e-05, "loss": 1.0696, "step": 825 }, { "epoch": 0.20686200851490108, "grad_norm": 0.353515625, "learning_rate": 1.762872251600334e-05, "loss": 0.9562, "step": 826 }, { "epoch": 0.20711244678186827, "grad_norm": 0.263671875, "learning_rate": 1.7623156136932926e-05, "loss": 0.8318, "step": 827 }, { "epoch": 0.20736288504883546, "grad_norm": 0.33984375, "learning_rate": 1.761758975786251e-05, "loss": 1.0426, "step": 828 }, { "epoch": 0.20761332331580265, "grad_norm": 0.328125, "learning_rate": 1.7612023378792097e-05, "loss": 1.2103, "step": 829 }, { "epoch": 0.20786376158276984, "grad_norm": 0.3359375, "learning_rate": 1.7606456999721683e-05, "loss": 0.9663, "step": 830 }, { "epoch": 0.20811419984973703, "grad_norm": 0.291015625, "learning_rate": 1.7600890620651268e-05, "loss": 1.0011, "step": 831 }, { "epoch": 0.20836463811670422, "grad_norm": 0.3671875, "learning_rate": 1.7595324241580854e-05, "loss": 0.9371, "step": 832 }, { "epoch": 0.20861507638367144, "grad_norm": 0.7265625, "learning_rate": 1.758975786251044e-05, "loss": 0.9452, "step": 833 }, { "epoch": 0.20886551465063863, "grad_norm": 0.3359375, "learning_rate": 1.758419148344002e-05, "loss": 0.9173, "step": 834 }, { "epoch": 0.20911595291760582, "grad_norm": 0.3359375, "learning_rate": 1.7578625104369607e-05, "loss": 1.0514, "step": 835 }, { "epoch": 0.209366391184573, "grad_norm": 0.34375, "learning_rate": 1.7573058725299196e-05, "loss": 0.9833, "step": 836 }, { "epoch": 0.2096168294515402, "grad_norm": 0.361328125, "learning_rate": 1.7567492346228782e-05, "loss": 1.0897, "step": 837 }, { "epoch": 0.2098672677185074, "grad_norm": 0.333984375, "learning_rate": 1.7561925967158364e-05, "loss": 1.0562, "step": 838 }, { "epoch": 0.21011770598547458, "grad_norm": 0.32421875, "learning_rate": 1.755635958808795e-05, "loss": 0.9306, "step": 839 }, { "epoch": 0.21036814425244177, "grad_norm": 0.32421875, "learning_rate": 1.7550793209017535e-05, "loss": 1.0691, "step": 840 }, { "epoch": 0.21061858251940896, "grad_norm": 0.359375, "learning_rate": 1.754522682994712e-05, "loss": 1.1384, "step": 841 }, { "epoch": 0.21086902078637615, "grad_norm": 0.353515625, "learning_rate": 1.7539660450876706e-05, "loss": 0.9622, "step": 842 }, { "epoch": 0.21111945905334334, "grad_norm": 0.306640625, "learning_rate": 1.7534094071806292e-05, "loss": 0.8213, "step": 843 }, { "epoch": 0.21136989732031056, "grad_norm": 0.328125, "learning_rate": 1.7528527692735878e-05, "loss": 0.7357, "step": 844 }, { "epoch": 0.21162033558727775, "grad_norm": 0.3359375, "learning_rate": 1.7522961313665463e-05, "loss": 0.836, "step": 845 }, { "epoch": 0.21187077385424494, "grad_norm": 0.359375, "learning_rate": 1.7517394934595046e-05, "loss": 0.9284, "step": 846 }, { "epoch": 0.21212121212121213, "grad_norm": 0.33203125, "learning_rate": 1.751182855552463e-05, "loss": 0.9496, "step": 847 }, { "epoch": 0.21237165038817932, "grad_norm": 0.4375, "learning_rate": 1.7506262176454217e-05, "loss": 0.9056, "step": 848 }, { "epoch": 0.2126220886551465, "grad_norm": 0.33984375, "learning_rate": 1.7500695797383806e-05, "loss": 0.8355, "step": 849 }, { "epoch": 0.2128725269221137, "grad_norm": 0.3515625, "learning_rate": 1.7495129418313388e-05, "loss": 1.0357, "step": 850 }, { "epoch": 0.2131229651890809, "grad_norm": 0.32421875, "learning_rate": 1.7489563039242974e-05, "loss": 0.9597, "step": 851 }, { "epoch": 0.21337340345604808, "grad_norm": 0.34765625, "learning_rate": 1.748399666017256e-05, "loss": 0.9661, "step": 852 }, { "epoch": 0.21362384172301527, "grad_norm": 0.359375, "learning_rate": 1.7478430281102145e-05, "loss": 1.1483, "step": 853 }, { "epoch": 0.21387427998998246, "grad_norm": 0.3984375, "learning_rate": 1.747286390203173e-05, "loss": 1.0142, "step": 854 }, { "epoch": 0.21412471825694968, "grad_norm": 0.365234375, "learning_rate": 1.7467297522961316e-05, "loss": 0.9205, "step": 855 }, { "epoch": 0.21437515652391687, "grad_norm": 0.36328125, "learning_rate": 1.74617311438909e-05, "loss": 0.7216, "step": 856 }, { "epoch": 0.21462559479088406, "grad_norm": 0.353515625, "learning_rate": 1.7456164764820484e-05, "loss": 0.9348, "step": 857 }, { "epoch": 0.21487603305785125, "grad_norm": 0.357421875, "learning_rate": 1.745059838575007e-05, "loss": 0.9856, "step": 858 }, { "epoch": 0.21512647132481844, "grad_norm": 0.349609375, "learning_rate": 1.7445032006679655e-05, "loss": 1.0369, "step": 859 }, { "epoch": 0.21537690959178563, "grad_norm": 0.361328125, "learning_rate": 1.743946562760924e-05, "loss": 0.8489, "step": 860 }, { "epoch": 0.21562734785875282, "grad_norm": 0.3671875, "learning_rate": 1.7433899248538826e-05, "loss": 0.9183, "step": 861 }, { "epoch": 0.21587778612572, "grad_norm": 0.3671875, "learning_rate": 1.7428332869468412e-05, "loss": 0.8608, "step": 862 }, { "epoch": 0.2161282243926872, "grad_norm": 0.3203125, "learning_rate": 1.7422766490397998e-05, "loss": 1.0717, "step": 863 }, { "epoch": 0.21637866265965439, "grad_norm": 0.37109375, "learning_rate": 1.7417200111327583e-05, "loss": 0.9526, "step": 864 }, { "epoch": 0.21662910092662158, "grad_norm": 0.306640625, "learning_rate": 1.741163373225717e-05, "loss": 0.9301, "step": 865 }, { "epoch": 0.2168795391935888, "grad_norm": 0.322265625, "learning_rate": 1.7406067353186754e-05, "loss": 1.1205, "step": 866 }, { "epoch": 0.21712997746055598, "grad_norm": 0.31640625, "learning_rate": 1.740050097411634e-05, "loss": 0.9219, "step": 867 }, { "epoch": 0.21738041572752317, "grad_norm": 0.30078125, "learning_rate": 1.7394934595045926e-05, "loss": 0.852, "step": 868 }, { "epoch": 0.21763085399449036, "grad_norm": 0.3671875, "learning_rate": 1.7389368215975508e-05, "loss": 0.9639, "step": 869 }, { "epoch": 0.21788129226145755, "grad_norm": 0.322265625, "learning_rate": 1.7383801836905093e-05, "loss": 0.8446, "step": 870 }, { "epoch": 0.21813173052842474, "grad_norm": 0.384765625, "learning_rate": 1.737823545783468e-05, "loss": 0.9299, "step": 871 }, { "epoch": 0.21838216879539193, "grad_norm": 0.333984375, "learning_rate": 1.7372669078764265e-05, "loss": 0.9134, "step": 872 }, { "epoch": 0.21863260706235912, "grad_norm": 0.3203125, "learning_rate": 1.736710269969385e-05, "loss": 1.1053, "step": 873 }, { "epoch": 0.21888304532932631, "grad_norm": 0.3359375, "learning_rate": 1.7361536320623436e-05, "loss": 0.9379, "step": 874 }, { "epoch": 0.2191334835962935, "grad_norm": 0.361328125, "learning_rate": 1.735596994155302e-05, "loss": 1.0574, "step": 875 }, { "epoch": 0.2193839218632607, "grad_norm": 0.33203125, "learning_rate": 1.7350403562482607e-05, "loss": 0.837, "step": 876 }, { "epoch": 0.2196343601302279, "grad_norm": 0.345703125, "learning_rate": 1.734483718341219e-05, "loss": 0.8386, "step": 877 }, { "epoch": 0.2198847983971951, "grad_norm": 0.33203125, "learning_rate": 1.733927080434178e-05, "loss": 0.7914, "step": 878 }, { "epoch": 0.2201352366641623, "grad_norm": 0.3125, "learning_rate": 1.7333704425271364e-05, "loss": 0.8365, "step": 879 }, { "epoch": 0.22038567493112948, "grad_norm": 0.34765625, "learning_rate": 1.732813804620095e-05, "loss": 0.9445, "step": 880 }, { "epoch": 0.22063611319809667, "grad_norm": 0.3515625, "learning_rate": 1.7322571667130532e-05, "loss": 0.9407, "step": 881 }, { "epoch": 0.22088655146506386, "grad_norm": 0.298828125, "learning_rate": 1.7317005288060117e-05, "loss": 0.907, "step": 882 }, { "epoch": 0.22113698973203105, "grad_norm": 0.330078125, "learning_rate": 1.7311438908989703e-05, "loss": 0.9565, "step": 883 }, { "epoch": 0.22138742799899824, "grad_norm": 0.3515625, "learning_rate": 1.730587252991929e-05, "loss": 1.109, "step": 884 }, { "epoch": 0.22163786626596543, "grad_norm": 0.36328125, "learning_rate": 1.7300306150848874e-05, "loss": 1.0015, "step": 885 }, { "epoch": 0.22188830453293262, "grad_norm": 0.314453125, "learning_rate": 1.729473977177846e-05, "loss": 1.1156, "step": 886 }, { "epoch": 0.2221387427998998, "grad_norm": 0.345703125, "learning_rate": 1.7289173392708045e-05, "loss": 1.0553, "step": 887 }, { "epoch": 0.22238918106686703, "grad_norm": 0.326171875, "learning_rate": 1.728360701363763e-05, "loss": 0.904, "step": 888 }, { "epoch": 0.22263961933383422, "grad_norm": 0.3515625, "learning_rate": 1.7278040634567213e-05, "loss": 0.967, "step": 889 }, { "epoch": 0.2228900576008014, "grad_norm": 0.361328125, "learning_rate": 1.72724742554968e-05, "loss": 0.7794, "step": 890 }, { "epoch": 0.2231404958677686, "grad_norm": 0.373046875, "learning_rate": 1.7266907876426388e-05, "loss": 0.8876, "step": 891 }, { "epoch": 0.2233909341347358, "grad_norm": 0.359375, "learning_rate": 1.7261341497355974e-05, "loss": 0.7147, "step": 892 }, { "epoch": 0.22364137240170298, "grad_norm": 0.357421875, "learning_rate": 1.7255775118285556e-05, "loss": 1.0201, "step": 893 }, { "epoch": 0.22389181066867017, "grad_norm": 0.396484375, "learning_rate": 1.725020873921514e-05, "loss": 0.7825, "step": 894 }, { "epoch": 0.22414224893563736, "grad_norm": 0.32421875, "learning_rate": 1.7244642360144727e-05, "loss": 1.1405, "step": 895 }, { "epoch": 0.22439268720260455, "grad_norm": 0.326171875, "learning_rate": 1.7239075981074313e-05, "loss": 0.9299, "step": 896 }, { "epoch": 0.22464312546957174, "grad_norm": 0.322265625, "learning_rate": 1.7233509602003898e-05, "loss": 0.854, "step": 897 }, { "epoch": 0.22489356373653893, "grad_norm": 0.341796875, "learning_rate": 1.7227943222933484e-05, "loss": 0.794, "step": 898 }, { "epoch": 0.22514400200350615, "grad_norm": 0.322265625, "learning_rate": 1.722237684386307e-05, "loss": 0.9024, "step": 899 }, { "epoch": 0.22539444027047334, "grad_norm": 0.35546875, "learning_rate": 1.7216810464792655e-05, "loss": 1.1289, "step": 900 }, { "epoch": 0.22564487853744053, "grad_norm": 0.330078125, "learning_rate": 1.7211244085722237e-05, "loss": 0.8834, "step": 901 }, { "epoch": 0.22589531680440772, "grad_norm": 0.3125, "learning_rate": 1.7205677706651823e-05, "loss": 0.9572, "step": 902 }, { "epoch": 0.2261457550713749, "grad_norm": 0.365234375, "learning_rate": 1.7200111327581412e-05, "loss": 0.8628, "step": 903 }, { "epoch": 0.2263961933383421, "grad_norm": 0.330078125, "learning_rate": 1.7194544948510997e-05, "loss": 0.891, "step": 904 }, { "epoch": 0.2266466316053093, "grad_norm": 0.37890625, "learning_rate": 1.718897856944058e-05, "loss": 0.9407, "step": 905 }, { "epoch": 0.22689706987227648, "grad_norm": 0.34765625, "learning_rate": 1.7183412190370165e-05, "loss": 0.9593, "step": 906 }, { "epoch": 0.22714750813924367, "grad_norm": 0.326171875, "learning_rate": 1.717784581129975e-05, "loss": 0.9837, "step": 907 }, { "epoch": 0.22739794640621086, "grad_norm": 0.361328125, "learning_rate": 1.7172279432229337e-05, "loss": 0.9158, "step": 908 }, { "epoch": 0.22764838467317805, "grad_norm": 0.330078125, "learning_rate": 1.7166713053158922e-05, "loss": 0.7764, "step": 909 }, { "epoch": 0.22789882294014524, "grad_norm": 0.337890625, "learning_rate": 1.7161146674088508e-05, "loss": 0.849, "step": 910 }, { "epoch": 0.22814926120711246, "grad_norm": 0.357421875, "learning_rate": 1.7155580295018093e-05, "loss": 0.9898, "step": 911 }, { "epoch": 0.22839969947407965, "grad_norm": 0.361328125, "learning_rate": 1.7150013915947676e-05, "loss": 1.037, "step": 912 }, { "epoch": 0.22865013774104684, "grad_norm": 0.330078125, "learning_rate": 1.714444753687726e-05, "loss": 1.0361, "step": 913 }, { "epoch": 0.22890057600801403, "grad_norm": 0.328125, "learning_rate": 1.7138881157806847e-05, "loss": 0.7075, "step": 914 }, { "epoch": 0.22915101427498122, "grad_norm": 0.3515625, "learning_rate": 1.7133314778736432e-05, "loss": 0.8298, "step": 915 }, { "epoch": 0.2294014525419484, "grad_norm": 0.333984375, "learning_rate": 1.7127748399666018e-05, "loss": 0.8815, "step": 916 }, { "epoch": 0.2296518908089156, "grad_norm": 0.330078125, "learning_rate": 1.7122182020595604e-05, "loss": 0.8889, "step": 917 }, { "epoch": 0.2299023290758828, "grad_norm": 0.388671875, "learning_rate": 1.711661564152519e-05, "loss": 1.0292, "step": 918 }, { "epoch": 0.23015276734284998, "grad_norm": 0.3359375, "learning_rate": 1.7111049262454775e-05, "loss": 0.8492, "step": 919 }, { "epoch": 0.23040320560981717, "grad_norm": 0.34765625, "learning_rate": 1.710548288338436e-05, "loss": 0.8951, "step": 920 }, { "epoch": 0.23065364387678436, "grad_norm": 0.37890625, "learning_rate": 1.7099916504313946e-05, "loss": 0.9808, "step": 921 }, { "epoch": 0.23090408214375158, "grad_norm": 0.341796875, "learning_rate": 1.7094350125243532e-05, "loss": 0.9384, "step": 922 }, { "epoch": 0.23115452041071877, "grad_norm": 0.341796875, "learning_rate": 1.7088783746173117e-05, "loss": 1.0231, "step": 923 }, { "epoch": 0.23140495867768596, "grad_norm": 0.314453125, "learning_rate": 1.70832173671027e-05, "loss": 0.9498, "step": 924 }, { "epoch": 0.23165539694465315, "grad_norm": 0.35546875, "learning_rate": 1.7077650988032285e-05, "loss": 0.8826, "step": 925 }, { "epoch": 0.23190583521162034, "grad_norm": 0.314453125, "learning_rate": 1.707208460896187e-05, "loss": 1.0455, "step": 926 }, { "epoch": 0.23215627347858753, "grad_norm": 0.306640625, "learning_rate": 1.7066518229891456e-05, "loss": 0.8863, "step": 927 }, { "epoch": 0.23240671174555472, "grad_norm": 0.294921875, "learning_rate": 1.7060951850821042e-05, "loss": 0.8843, "step": 928 }, { "epoch": 0.2326571500125219, "grad_norm": 0.318359375, "learning_rate": 1.7055385471750628e-05, "loss": 1.0244, "step": 929 }, { "epoch": 0.2329075882794891, "grad_norm": 0.36328125, "learning_rate": 1.7049819092680213e-05, "loss": 1.1374, "step": 930 }, { "epoch": 0.2331580265464563, "grad_norm": 0.333984375, "learning_rate": 1.70442527136098e-05, "loss": 1.0453, "step": 931 }, { "epoch": 0.23340846481342348, "grad_norm": 0.3359375, "learning_rate": 1.703868633453938e-05, "loss": 0.9494, "step": 932 }, { "epoch": 0.2336589030803907, "grad_norm": 0.328125, "learning_rate": 1.703311995546897e-05, "loss": 1.1066, "step": 933 }, { "epoch": 0.23390934134735789, "grad_norm": 0.32421875, "learning_rate": 1.7027553576398556e-05, "loss": 0.9506, "step": 934 }, { "epoch": 0.23415977961432508, "grad_norm": 0.349609375, "learning_rate": 1.702198719732814e-05, "loss": 1.0162, "step": 935 }, { "epoch": 0.23441021788129227, "grad_norm": 0.306640625, "learning_rate": 1.7016420818257723e-05, "loss": 0.8091, "step": 936 }, { "epoch": 0.23466065614825946, "grad_norm": 0.345703125, "learning_rate": 1.701085443918731e-05, "loss": 0.9499, "step": 937 }, { "epoch": 0.23491109441522665, "grad_norm": 0.333984375, "learning_rate": 1.7005288060116895e-05, "loss": 1.0129, "step": 938 }, { "epoch": 0.23516153268219384, "grad_norm": 0.361328125, "learning_rate": 1.699972168104648e-05, "loss": 0.8747, "step": 939 }, { "epoch": 0.23541197094916103, "grad_norm": 0.375, "learning_rate": 1.6994155301976066e-05, "loss": 0.8333, "step": 940 }, { "epoch": 0.23566240921612822, "grad_norm": 0.333984375, "learning_rate": 1.698858892290565e-05, "loss": 1.0421, "step": 941 }, { "epoch": 0.2359128474830954, "grad_norm": 0.376953125, "learning_rate": 1.6983022543835237e-05, "loss": 0.9235, "step": 942 }, { "epoch": 0.2361632857500626, "grad_norm": 0.3046875, "learning_rate": 1.6977456164764823e-05, "loss": 0.9131, "step": 943 }, { "epoch": 0.2364137240170298, "grad_norm": 0.341796875, "learning_rate": 1.6971889785694405e-05, "loss": 0.8066, "step": 944 }, { "epoch": 0.236664162283997, "grad_norm": 0.337890625, "learning_rate": 1.6966323406623994e-05, "loss": 0.9769, "step": 945 }, { "epoch": 0.2369146005509642, "grad_norm": 0.36328125, "learning_rate": 1.696075702755358e-05, "loss": 0.8764, "step": 946 }, { "epoch": 0.23716503881793138, "grad_norm": 0.345703125, "learning_rate": 1.6955190648483165e-05, "loss": 1.0751, "step": 947 }, { "epoch": 0.23741547708489857, "grad_norm": 0.31640625, "learning_rate": 1.6949624269412747e-05, "loss": 0.9585, "step": 948 }, { "epoch": 0.23766591535186576, "grad_norm": 0.408203125, "learning_rate": 1.6944057890342333e-05, "loss": 0.847, "step": 949 }, { "epoch": 0.23791635361883295, "grad_norm": 0.369140625, "learning_rate": 1.693849151127192e-05, "loss": 1.0313, "step": 950 }, { "epoch": 0.23816679188580014, "grad_norm": 0.3671875, "learning_rate": 1.6932925132201504e-05, "loss": 1.0204, "step": 951 }, { "epoch": 0.23841723015276733, "grad_norm": 0.35546875, "learning_rate": 1.692735875313109e-05, "loss": 0.8754, "step": 952 }, { "epoch": 0.23866766841973452, "grad_norm": 0.349609375, "learning_rate": 1.6921792374060676e-05, "loss": 0.9289, "step": 953 }, { "epoch": 0.23891810668670171, "grad_norm": 0.341796875, "learning_rate": 1.691622599499026e-05, "loss": 0.9756, "step": 954 }, { "epoch": 0.23916854495366893, "grad_norm": 0.3828125, "learning_rate": 1.6910659615919847e-05, "loss": 0.9842, "step": 955 }, { "epoch": 0.23941898322063612, "grad_norm": 0.326171875, "learning_rate": 1.690509323684943e-05, "loss": 0.9916, "step": 956 }, { "epoch": 0.2396694214876033, "grad_norm": 0.330078125, "learning_rate": 1.6899526857779015e-05, "loss": 0.9224, "step": 957 }, { "epoch": 0.2399198597545705, "grad_norm": 0.359375, "learning_rate": 1.6893960478708604e-05, "loss": 0.8639, "step": 958 }, { "epoch": 0.2401702980215377, "grad_norm": 0.361328125, "learning_rate": 1.6888394099638186e-05, "loss": 1.0603, "step": 959 }, { "epoch": 0.24042073628850488, "grad_norm": 0.37890625, "learning_rate": 1.688282772056777e-05, "loss": 0.8359, "step": 960 }, { "epoch": 0.24067117455547207, "grad_norm": 0.328125, "learning_rate": 1.6877261341497357e-05, "loss": 0.9297, "step": 961 }, { "epoch": 0.24092161282243926, "grad_norm": 0.33984375, "learning_rate": 1.6871694962426943e-05, "loss": 0.8559, "step": 962 }, { "epoch": 0.24117205108940645, "grad_norm": 0.326171875, "learning_rate": 1.6866128583356528e-05, "loss": 0.8808, "step": 963 }, { "epoch": 0.24142248935637364, "grad_norm": 0.333984375, "learning_rate": 1.6860562204286114e-05, "loss": 0.767, "step": 964 }, { "epoch": 0.24167292762334083, "grad_norm": 0.322265625, "learning_rate": 1.68549958252157e-05, "loss": 0.9776, "step": 965 }, { "epoch": 0.24192336589030805, "grad_norm": 0.3203125, "learning_rate": 1.6849429446145285e-05, "loss": 0.9487, "step": 966 }, { "epoch": 0.24217380415727524, "grad_norm": 0.359375, "learning_rate": 1.6843863067074867e-05, "loss": 0.8409, "step": 967 }, { "epoch": 0.24242424242424243, "grad_norm": 0.3671875, "learning_rate": 1.6838296688004453e-05, "loss": 0.8985, "step": 968 }, { "epoch": 0.24267468069120962, "grad_norm": 0.3359375, "learning_rate": 1.683273030893404e-05, "loss": 0.8978, "step": 969 }, { "epoch": 0.2429251189581768, "grad_norm": 0.396484375, "learning_rate": 1.6827163929863624e-05, "loss": 0.9133, "step": 970 }, { "epoch": 0.243175557225144, "grad_norm": 0.33203125, "learning_rate": 1.682159755079321e-05, "loss": 0.7539, "step": 971 }, { "epoch": 0.2434259954921112, "grad_norm": 0.349609375, "learning_rate": 1.6816031171722795e-05, "loss": 0.7642, "step": 972 }, { "epoch": 0.24367643375907838, "grad_norm": 0.330078125, "learning_rate": 1.681046479265238e-05, "loss": 0.9163, "step": 973 }, { "epoch": 0.24392687202604557, "grad_norm": 0.36328125, "learning_rate": 1.6804898413581967e-05, "loss": 0.9501, "step": 974 }, { "epoch": 0.24417731029301276, "grad_norm": 0.3203125, "learning_rate": 1.6799332034511552e-05, "loss": 0.8738, "step": 975 }, { "epoch": 0.24442774855997995, "grad_norm": 0.365234375, "learning_rate": 1.6793765655441138e-05, "loss": 1.1221, "step": 976 }, { "epoch": 0.24467818682694717, "grad_norm": 0.357421875, "learning_rate": 1.6788199276370723e-05, "loss": 1.0958, "step": 977 }, { "epoch": 0.24492862509391436, "grad_norm": 0.34765625, "learning_rate": 1.678263289730031e-05, "loss": 0.8276, "step": 978 }, { "epoch": 0.24517906336088155, "grad_norm": 0.3125, "learning_rate": 1.677706651822989e-05, "loss": 1.1731, "step": 979 }, { "epoch": 0.24542950162784874, "grad_norm": 0.326171875, "learning_rate": 1.6771500139159477e-05, "loss": 0.9459, "step": 980 }, { "epoch": 0.24567993989481593, "grad_norm": 0.384765625, "learning_rate": 1.6765933760089062e-05, "loss": 0.9557, "step": 981 }, { "epoch": 0.24593037816178312, "grad_norm": 0.3359375, "learning_rate": 1.6760367381018648e-05, "loss": 0.9699, "step": 982 }, { "epoch": 0.2461808164287503, "grad_norm": 0.34375, "learning_rate": 1.6754801001948234e-05, "loss": 0.865, "step": 983 }, { "epoch": 0.2464312546957175, "grad_norm": 0.357421875, "learning_rate": 1.674923462287782e-05, "loss": 0.9874, "step": 984 }, { "epoch": 0.2466816929626847, "grad_norm": 0.3984375, "learning_rate": 1.6743668243807405e-05, "loss": 1.1065, "step": 985 }, { "epoch": 0.24693213122965188, "grad_norm": 0.35546875, "learning_rate": 1.673810186473699e-05, "loss": 0.7784, "step": 986 }, { "epoch": 0.24718256949661907, "grad_norm": 0.345703125, "learning_rate": 1.6732535485666576e-05, "loss": 0.8833, "step": 987 }, { "epoch": 0.2474330077635863, "grad_norm": 0.33203125, "learning_rate": 1.6726969106596162e-05, "loss": 1.0432, "step": 988 }, { "epoch": 0.24768344603055348, "grad_norm": 0.365234375, "learning_rate": 1.6721402727525747e-05, "loss": 0.8706, "step": 989 }, { "epoch": 0.24793388429752067, "grad_norm": 0.341796875, "learning_rate": 1.6715836348455333e-05, "loss": 0.9075, "step": 990 }, { "epoch": 0.24818432256448786, "grad_norm": 0.33984375, "learning_rate": 1.6710269969384915e-05, "loss": 1.0124, "step": 991 }, { "epoch": 0.24843476083145505, "grad_norm": 0.36328125, "learning_rate": 1.67047035903145e-05, "loss": 0.9504, "step": 992 }, { "epoch": 0.24868519909842224, "grad_norm": 0.3359375, "learning_rate": 1.6699137211244086e-05, "loss": 1.1009, "step": 993 }, { "epoch": 0.24893563736538943, "grad_norm": 0.40625, "learning_rate": 1.6693570832173672e-05, "loss": 0.9863, "step": 994 }, { "epoch": 0.24918607563235662, "grad_norm": 0.33203125, "learning_rate": 1.6688004453103258e-05, "loss": 1.0179, "step": 995 }, { "epoch": 0.2494365138993238, "grad_norm": 0.375, "learning_rate": 1.6682438074032843e-05, "loss": 1.0162, "step": 996 }, { "epoch": 0.249686952166291, "grad_norm": 0.34765625, "learning_rate": 1.667687169496243e-05, "loss": 0.9127, "step": 997 }, { "epoch": 0.2499373904332582, "grad_norm": 0.361328125, "learning_rate": 1.6671305315892014e-05, "loss": 0.7781, "step": 998 }, { "epoch": 0.2501878287002254, "grad_norm": 0.322265625, "learning_rate": 1.6665738936821597e-05, "loss": 0.9378, "step": 999 }, { "epoch": 0.25043826696719257, "grad_norm": 0.375, "learning_rate": 1.6660172557751186e-05, "loss": 0.8602, "step": 1000 }, { "epoch": 0.25068870523415976, "grad_norm": 0.337890625, "learning_rate": 1.665460617868077e-05, "loss": 0.8135, "step": 1001 }, { "epoch": 0.25093914350112695, "grad_norm": 0.337890625, "learning_rate": 1.6649039799610357e-05, "loss": 0.9041, "step": 1002 }, { "epoch": 0.25118958176809414, "grad_norm": 0.294921875, "learning_rate": 1.664347342053994e-05, "loss": 1.0341, "step": 1003 }, { "epoch": 0.2514400200350614, "grad_norm": 0.306640625, "learning_rate": 1.6637907041469525e-05, "loss": 0.9985, "step": 1004 }, { "epoch": 0.2516904583020286, "grad_norm": 0.328125, "learning_rate": 1.663234066239911e-05, "loss": 0.8111, "step": 1005 }, { "epoch": 0.25194089656899576, "grad_norm": 0.3515625, "learning_rate": 1.6626774283328696e-05, "loss": 0.903, "step": 1006 }, { "epoch": 0.25219133483596295, "grad_norm": 0.376953125, "learning_rate": 1.662120790425828e-05, "loss": 1.0931, "step": 1007 }, { "epoch": 0.25244177310293014, "grad_norm": 0.30859375, "learning_rate": 1.6615641525187867e-05, "loss": 0.8273, "step": 1008 }, { "epoch": 0.25269221136989733, "grad_norm": 0.41015625, "learning_rate": 1.6610075146117453e-05, "loss": 0.8298, "step": 1009 }, { "epoch": 0.2529426496368645, "grad_norm": 0.3515625, "learning_rate": 1.6604508767047035e-05, "loss": 1.0922, "step": 1010 }, { "epoch": 0.2531930879038317, "grad_norm": 0.328125, "learning_rate": 1.659894238797662e-05, "loss": 0.9062, "step": 1011 }, { "epoch": 0.2534435261707989, "grad_norm": 0.36328125, "learning_rate": 1.6593376008906206e-05, "loss": 0.7952, "step": 1012 }, { "epoch": 0.2536939644377661, "grad_norm": 0.353515625, "learning_rate": 1.6587809629835795e-05, "loss": 0.9657, "step": 1013 }, { "epoch": 0.2539444027047333, "grad_norm": 0.341796875, "learning_rate": 1.6582243250765377e-05, "loss": 0.8511, "step": 1014 }, { "epoch": 0.2541948409717005, "grad_norm": 0.30859375, "learning_rate": 1.6576676871694963e-05, "loss": 0.8741, "step": 1015 }, { "epoch": 0.25444527923866767, "grad_norm": 0.33203125, "learning_rate": 1.657111049262455e-05, "loss": 0.9071, "step": 1016 }, { "epoch": 0.25469571750563486, "grad_norm": 0.328125, "learning_rate": 1.6565544113554134e-05, "loss": 0.9357, "step": 1017 }, { "epoch": 0.25494615577260205, "grad_norm": 0.388671875, "learning_rate": 1.655997773448372e-05, "loss": 1.071, "step": 1018 }, { "epoch": 0.25519659403956924, "grad_norm": 0.380859375, "learning_rate": 1.6554411355413306e-05, "loss": 1.1644, "step": 1019 }, { "epoch": 0.2554470323065364, "grad_norm": 0.34765625, "learning_rate": 1.654884497634289e-05, "loss": 0.8998, "step": 1020 }, { "epoch": 0.2556974705735036, "grad_norm": 0.33984375, "learning_rate": 1.6543278597272477e-05, "loss": 0.9003, "step": 1021 }, { "epoch": 0.2559479088404708, "grad_norm": 0.34765625, "learning_rate": 1.653771221820206e-05, "loss": 0.612, "step": 1022 }, { "epoch": 0.256198347107438, "grad_norm": 0.3203125, "learning_rate": 1.6532145839131645e-05, "loss": 1.0007, "step": 1023 }, { "epoch": 0.2564487853744052, "grad_norm": 0.345703125, "learning_rate": 1.652657946006123e-05, "loss": 0.9669, "step": 1024 }, { "epoch": 0.2566992236413724, "grad_norm": 0.30078125, "learning_rate": 1.652101308099082e-05, "loss": 0.9344, "step": 1025 }, { "epoch": 0.2569496619083396, "grad_norm": 0.337890625, "learning_rate": 1.65154467019204e-05, "loss": 0.9293, "step": 1026 }, { "epoch": 0.2572001001753068, "grad_norm": 0.3515625, "learning_rate": 1.6509880322849987e-05, "loss": 1.0502, "step": 1027 }, { "epoch": 0.257450538442274, "grad_norm": 0.330078125, "learning_rate": 1.6504313943779573e-05, "loss": 0.8218, "step": 1028 }, { "epoch": 0.2577009767092412, "grad_norm": 0.322265625, "learning_rate": 1.6498747564709158e-05, "loss": 0.9827, "step": 1029 }, { "epoch": 0.2579514149762084, "grad_norm": 0.34375, "learning_rate": 1.6493181185638744e-05, "loss": 0.8389, "step": 1030 }, { "epoch": 0.25820185324317557, "grad_norm": 0.365234375, "learning_rate": 1.648761480656833e-05, "loss": 1.0229, "step": 1031 }, { "epoch": 0.25845229151014276, "grad_norm": 0.345703125, "learning_rate": 1.6482048427497915e-05, "loss": 0.931, "step": 1032 }, { "epoch": 0.25870272977710995, "grad_norm": 0.33984375, "learning_rate": 1.64764820484275e-05, "loss": 1.0762, "step": 1033 }, { "epoch": 0.25895316804407714, "grad_norm": 0.369140625, "learning_rate": 1.6470915669357083e-05, "loss": 0.9629, "step": 1034 }, { "epoch": 0.25920360631104433, "grad_norm": 0.341796875, "learning_rate": 1.646534929028667e-05, "loss": 0.7612, "step": 1035 }, { "epoch": 0.2594540445780115, "grad_norm": 0.31640625, "learning_rate": 1.6459782911216254e-05, "loss": 0.9699, "step": 1036 }, { "epoch": 0.2597044828449787, "grad_norm": 0.33203125, "learning_rate": 1.645421653214584e-05, "loss": 0.8603, "step": 1037 }, { "epoch": 0.2599549211119459, "grad_norm": 0.326171875, "learning_rate": 1.6448650153075425e-05, "loss": 1.0948, "step": 1038 }, { "epoch": 0.2602053593789131, "grad_norm": 0.3125, "learning_rate": 1.644308377400501e-05, "loss": 0.7829, "step": 1039 }, { "epoch": 0.2604557976458803, "grad_norm": 0.330078125, "learning_rate": 1.6437517394934597e-05, "loss": 1.0189, "step": 1040 }, { "epoch": 0.2607062359128475, "grad_norm": 0.310546875, "learning_rate": 1.6431951015864182e-05, "loss": 0.8384, "step": 1041 }, { "epoch": 0.26095667417981466, "grad_norm": 0.380859375, "learning_rate": 1.6426384636793768e-05, "loss": 0.7873, "step": 1042 }, { "epoch": 0.26120711244678185, "grad_norm": 0.32421875, "learning_rate": 1.6420818257723353e-05, "loss": 0.9616, "step": 1043 }, { "epoch": 0.26145755071374904, "grad_norm": 0.423828125, "learning_rate": 1.641525187865294e-05, "loss": 0.9613, "step": 1044 }, { "epoch": 0.26170798898071623, "grad_norm": 0.3359375, "learning_rate": 1.6409685499582525e-05, "loss": 0.8841, "step": 1045 }, { "epoch": 0.2619584272476834, "grad_norm": 0.33984375, "learning_rate": 1.6404119120512107e-05, "loss": 0.7935, "step": 1046 }, { "epoch": 0.2622088655146506, "grad_norm": 0.54296875, "learning_rate": 1.6398552741441693e-05, "loss": 0.9958, "step": 1047 }, { "epoch": 0.26245930378161786, "grad_norm": 0.384765625, "learning_rate": 1.6392986362371278e-05, "loss": 0.9594, "step": 1048 }, { "epoch": 0.26270974204858505, "grad_norm": 0.34765625, "learning_rate": 1.6387419983300864e-05, "loss": 0.8742, "step": 1049 }, { "epoch": 0.26296018031555224, "grad_norm": 0.322265625, "learning_rate": 1.638185360423045e-05, "loss": 1.0716, "step": 1050 }, { "epoch": 0.26321061858251943, "grad_norm": 0.3671875, "learning_rate": 1.6376287225160035e-05, "loss": 0.9639, "step": 1051 }, { "epoch": 0.2634610568494866, "grad_norm": 0.357421875, "learning_rate": 1.637072084608962e-05, "loss": 0.9289, "step": 1052 }, { "epoch": 0.2637114951164538, "grad_norm": 0.357421875, "learning_rate": 1.6365154467019206e-05, "loss": 0.9027, "step": 1053 }, { "epoch": 0.263961933383421, "grad_norm": 0.361328125, "learning_rate": 1.635958808794879e-05, "loss": 0.9159, "step": 1054 }, { "epoch": 0.2642123716503882, "grad_norm": 0.333984375, "learning_rate": 1.6354021708878377e-05, "loss": 1.0735, "step": 1055 }, { "epoch": 0.2644628099173554, "grad_norm": 0.341796875, "learning_rate": 1.6348455329807963e-05, "loss": 0.8871, "step": 1056 }, { "epoch": 0.26471324818432257, "grad_norm": 0.34765625, "learning_rate": 1.634288895073755e-05, "loss": 0.9629, "step": 1057 }, { "epoch": 0.26496368645128976, "grad_norm": 0.34375, "learning_rate": 1.633732257166713e-05, "loss": 0.8929, "step": 1058 }, { "epoch": 0.26521412471825695, "grad_norm": 0.3359375, "learning_rate": 1.6331756192596716e-05, "loss": 0.9731, "step": 1059 }, { "epoch": 0.26546456298522414, "grad_norm": 0.35546875, "learning_rate": 1.6326189813526302e-05, "loss": 0.807, "step": 1060 }, { "epoch": 0.26571500125219133, "grad_norm": 0.35546875, "learning_rate": 1.6320623434455888e-05, "loss": 0.885, "step": 1061 }, { "epoch": 0.2659654395191585, "grad_norm": 0.31640625, "learning_rate": 1.6315057055385473e-05, "loss": 0.8236, "step": 1062 }, { "epoch": 0.2662158777861257, "grad_norm": 0.365234375, "learning_rate": 1.630949067631506e-05, "loss": 0.8178, "step": 1063 }, { "epoch": 0.2664663160530929, "grad_norm": 0.34375, "learning_rate": 1.6303924297244645e-05, "loss": 0.9446, "step": 1064 }, { "epoch": 0.2667167543200601, "grad_norm": 0.33984375, "learning_rate": 1.6298357918174227e-05, "loss": 0.848, "step": 1065 }, { "epoch": 0.2669671925870273, "grad_norm": 0.388671875, "learning_rate": 1.6292791539103812e-05, "loss": 1.0588, "step": 1066 }, { "epoch": 0.26721763085399447, "grad_norm": 0.33203125, "learning_rate": 1.62872251600334e-05, "loss": 0.9495, "step": 1067 }, { "epoch": 0.26746806912096166, "grad_norm": 0.287109375, "learning_rate": 1.6281658780962987e-05, "loss": 0.7443, "step": 1068 }, { "epoch": 0.26771850738792885, "grad_norm": 0.31640625, "learning_rate": 1.627609240189257e-05, "loss": 0.8878, "step": 1069 }, { "epoch": 0.2679689456548961, "grad_norm": 0.330078125, "learning_rate": 1.6270526022822155e-05, "loss": 0.9581, "step": 1070 }, { "epoch": 0.2682193839218633, "grad_norm": 0.328125, "learning_rate": 1.626495964375174e-05, "loss": 0.8608, "step": 1071 }, { "epoch": 0.2684698221888305, "grad_norm": 0.337890625, "learning_rate": 1.6259393264681326e-05, "loss": 0.9253, "step": 1072 }, { "epoch": 0.26872026045579767, "grad_norm": 0.32421875, "learning_rate": 1.625382688561091e-05, "loss": 0.8988, "step": 1073 }, { "epoch": 0.26897069872276486, "grad_norm": 0.33203125, "learning_rate": 1.6248260506540497e-05, "loss": 1.0841, "step": 1074 }, { "epoch": 0.26922113698973205, "grad_norm": 0.357421875, "learning_rate": 1.6242694127470083e-05, "loss": 0.8677, "step": 1075 }, { "epoch": 0.26947157525669924, "grad_norm": 0.337890625, "learning_rate": 1.623712774839967e-05, "loss": 0.9325, "step": 1076 }, { "epoch": 0.2697220135236664, "grad_norm": 0.326171875, "learning_rate": 1.623156136932925e-05, "loss": 1.0437, "step": 1077 }, { "epoch": 0.2699724517906336, "grad_norm": 0.361328125, "learning_rate": 1.6225994990258836e-05, "loss": 0.8337, "step": 1078 }, { "epoch": 0.2702228900576008, "grad_norm": 0.33203125, "learning_rate": 1.6220428611188422e-05, "loss": 0.9835, "step": 1079 }, { "epoch": 0.270473328324568, "grad_norm": 0.322265625, "learning_rate": 1.621486223211801e-05, "loss": 1.0078, "step": 1080 }, { "epoch": 0.2707237665915352, "grad_norm": 0.306640625, "learning_rate": 1.6209295853047593e-05, "loss": 0.9689, "step": 1081 }, { "epoch": 0.2709742048585024, "grad_norm": 0.328125, "learning_rate": 1.620372947397718e-05, "loss": 1.0309, "step": 1082 }, { "epoch": 0.27122464312546957, "grad_norm": 0.375, "learning_rate": 1.6198163094906764e-05, "loss": 0.9206, "step": 1083 }, { "epoch": 0.27147508139243676, "grad_norm": 0.337890625, "learning_rate": 1.619259671583635e-05, "loss": 0.8112, "step": 1084 }, { "epoch": 0.27172551965940395, "grad_norm": 0.353515625, "learning_rate": 1.6187030336765936e-05, "loss": 1.0686, "step": 1085 }, { "epoch": 0.27197595792637114, "grad_norm": 0.3125, "learning_rate": 1.618146395769552e-05, "loss": 0.8836, "step": 1086 }, { "epoch": 0.2722263961933383, "grad_norm": 0.328125, "learning_rate": 1.6175897578625107e-05, "loss": 0.9759, "step": 1087 }, { "epoch": 0.2724768344603055, "grad_norm": 0.33203125, "learning_rate": 1.6170331199554692e-05, "loss": 0.8721, "step": 1088 }, { "epoch": 0.2727272727272727, "grad_norm": 0.37890625, "learning_rate": 1.6164764820484275e-05, "loss": 0.7612, "step": 1089 }, { "epoch": 0.2729777109942399, "grad_norm": 0.3203125, "learning_rate": 1.615919844141386e-05, "loss": 1.0038, "step": 1090 }, { "epoch": 0.2732281492612071, "grad_norm": 0.314453125, "learning_rate": 1.6153632062343446e-05, "loss": 0.8796, "step": 1091 }, { "epoch": 0.2734785875281743, "grad_norm": 0.3359375, "learning_rate": 1.614806568327303e-05, "loss": 1.0069, "step": 1092 }, { "epoch": 0.2737290257951415, "grad_norm": 0.37890625, "learning_rate": 1.6142499304202617e-05, "loss": 0.9298, "step": 1093 }, { "epoch": 0.2739794640621087, "grad_norm": 0.32421875, "learning_rate": 1.6136932925132203e-05, "loss": 1.1142, "step": 1094 }, { "epoch": 0.2742299023290759, "grad_norm": 0.357421875, "learning_rate": 1.613136654606179e-05, "loss": 0.9153, "step": 1095 }, { "epoch": 0.2744803405960431, "grad_norm": 0.3515625, "learning_rate": 1.6125800166991374e-05, "loss": 0.815, "step": 1096 }, { "epoch": 0.2747307788630103, "grad_norm": 0.375, "learning_rate": 1.612023378792096e-05, "loss": 0.9023, "step": 1097 }, { "epoch": 0.2749812171299775, "grad_norm": 0.3125, "learning_rate": 1.6114667408850545e-05, "loss": 0.7719, "step": 1098 }, { "epoch": 0.27523165539694466, "grad_norm": 0.30078125, "learning_rate": 1.610910102978013e-05, "loss": 1.0523, "step": 1099 }, { "epoch": 0.27548209366391185, "grad_norm": 0.388671875, "learning_rate": 1.6103534650709716e-05, "loss": 0.9742, "step": 1100 }, { "epoch": 0.27573253193087904, "grad_norm": 0.369140625, "learning_rate": 1.60979682716393e-05, "loss": 0.9024, "step": 1101 }, { "epoch": 0.27598297019784623, "grad_norm": 0.3515625, "learning_rate": 1.6092401892568884e-05, "loss": 0.8944, "step": 1102 }, { "epoch": 0.2762334084648134, "grad_norm": 0.3671875, "learning_rate": 1.608683551349847e-05, "loss": 0.8414, "step": 1103 }, { "epoch": 0.2764838467317806, "grad_norm": 0.375, "learning_rate": 1.6081269134428055e-05, "loss": 1.0816, "step": 1104 }, { "epoch": 0.2767342849987478, "grad_norm": 0.322265625, "learning_rate": 1.607570275535764e-05, "loss": 0.9052, "step": 1105 }, { "epoch": 0.276984723265715, "grad_norm": 0.345703125, "learning_rate": 1.6070136376287227e-05, "loss": 1.0332, "step": 1106 }, { "epoch": 0.2772351615326822, "grad_norm": 0.33984375, "learning_rate": 1.6064569997216812e-05, "loss": 0.9449, "step": 1107 }, { "epoch": 0.2774855997996494, "grad_norm": 0.3515625, "learning_rate": 1.6059003618146398e-05, "loss": 1.1651, "step": 1108 }, { "epoch": 0.27773603806661656, "grad_norm": 0.345703125, "learning_rate": 1.6053437239075984e-05, "loss": 1.0874, "step": 1109 }, { "epoch": 0.27798647633358375, "grad_norm": 0.326171875, "learning_rate": 1.604787086000557e-05, "loss": 0.9659, "step": 1110 }, { "epoch": 0.27823691460055094, "grad_norm": 0.361328125, "learning_rate": 1.6042304480935155e-05, "loss": 1.0249, "step": 1111 }, { "epoch": 0.27848735286751813, "grad_norm": 0.359375, "learning_rate": 1.6036738101864737e-05, "loss": 0.9373, "step": 1112 }, { "epoch": 0.2787377911344853, "grad_norm": 0.3828125, "learning_rate": 1.6031171722794323e-05, "loss": 1.0997, "step": 1113 }, { "epoch": 0.2789882294014525, "grad_norm": 0.375, "learning_rate": 1.6025605343723908e-05, "loss": 0.992, "step": 1114 }, { "epoch": 0.27923866766841976, "grad_norm": 0.376953125, "learning_rate": 1.6020038964653494e-05, "loss": 0.8685, "step": 1115 }, { "epoch": 0.27948910593538695, "grad_norm": 0.345703125, "learning_rate": 1.601447258558308e-05, "loss": 0.7706, "step": 1116 }, { "epoch": 0.27973954420235414, "grad_norm": 0.345703125, "learning_rate": 1.6008906206512665e-05, "loss": 0.9627, "step": 1117 }, { "epoch": 0.27998998246932133, "grad_norm": 0.3203125, "learning_rate": 1.600333982744225e-05, "loss": 0.8752, "step": 1118 }, { "epoch": 0.2802404207362885, "grad_norm": 0.373046875, "learning_rate": 1.5997773448371836e-05, "loss": 0.8439, "step": 1119 }, { "epoch": 0.2804908590032557, "grad_norm": 0.328125, "learning_rate": 1.599220706930142e-05, "loss": 0.9069, "step": 1120 }, { "epoch": 0.2807412972702229, "grad_norm": 0.357421875, "learning_rate": 1.5986640690231004e-05, "loss": 0.8106, "step": 1121 }, { "epoch": 0.2809917355371901, "grad_norm": 0.36328125, "learning_rate": 1.5981074311160593e-05, "loss": 1.0286, "step": 1122 }, { "epoch": 0.2812421738041573, "grad_norm": 0.314453125, "learning_rate": 1.597550793209018e-05, "loss": 1.0361, "step": 1123 }, { "epoch": 0.28149261207112447, "grad_norm": 0.34375, "learning_rate": 1.596994155301976e-05, "loss": 0.9694, "step": 1124 }, { "epoch": 0.28174305033809166, "grad_norm": 0.33984375, "learning_rate": 1.5964375173949347e-05, "loss": 0.917, "step": 1125 }, { "epoch": 0.28199348860505885, "grad_norm": 0.373046875, "learning_rate": 1.5958808794878932e-05, "loss": 0.8982, "step": 1126 }, { "epoch": 0.28224392687202604, "grad_norm": 0.380859375, "learning_rate": 1.5953242415808518e-05, "loss": 1.0088, "step": 1127 }, { "epoch": 0.28249436513899323, "grad_norm": 0.396484375, "learning_rate": 1.5947676036738103e-05, "loss": 0.9422, "step": 1128 }, { "epoch": 0.2827448034059604, "grad_norm": 0.357421875, "learning_rate": 1.594210965766769e-05, "loss": 1.1301, "step": 1129 }, { "epoch": 0.2829952416729276, "grad_norm": 0.337890625, "learning_rate": 1.5936543278597275e-05, "loss": 0.8494, "step": 1130 }, { "epoch": 0.2832456799398948, "grad_norm": 0.3515625, "learning_rate": 1.593097689952686e-05, "loss": 0.9998, "step": 1131 }, { "epoch": 0.283496118206862, "grad_norm": 0.330078125, "learning_rate": 1.5925410520456442e-05, "loss": 0.939, "step": 1132 }, { "epoch": 0.2837465564738292, "grad_norm": 0.34765625, "learning_rate": 1.5919844141386028e-05, "loss": 1.0043, "step": 1133 }, { "epoch": 0.28399699474079637, "grad_norm": 0.337890625, "learning_rate": 1.5914277762315614e-05, "loss": 0.9769, "step": 1134 }, { "epoch": 0.28424743300776356, "grad_norm": 0.326171875, "learning_rate": 1.5908711383245203e-05, "loss": 0.8956, "step": 1135 }, { "epoch": 0.28449787127473075, "grad_norm": 0.369140625, "learning_rate": 1.5903145004174785e-05, "loss": 1.0017, "step": 1136 }, { "epoch": 0.284748309541698, "grad_norm": 0.3515625, "learning_rate": 1.589757862510437e-05, "loss": 0.9668, "step": 1137 }, { "epoch": 0.2849987478086652, "grad_norm": 0.322265625, "learning_rate": 1.5892012246033956e-05, "loss": 1.0617, "step": 1138 }, { "epoch": 0.2852491860756324, "grad_norm": 0.353515625, "learning_rate": 1.588644586696354e-05, "loss": 0.8997, "step": 1139 }, { "epoch": 0.28549962434259957, "grad_norm": 0.34765625, "learning_rate": 1.5880879487893127e-05, "loss": 0.95, "step": 1140 }, { "epoch": 0.28575006260956676, "grad_norm": 0.318359375, "learning_rate": 1.5875313108822713e-05, "loss": 0.9693, "step": 1141 }, { "epoch": 0.28600050087653395, "grad_norm": 0.333984375, "learning_rate": 1.58697467297523e-05, "loss": 0.9196, "step": 1142 }, { "epoch": 0.28625093914350114, "grad_norm": 0.35546875, "learning_rate": 1.5864180350681884e-05, "loss": 0.8972, "step": 1143 }, { "epoch": 0.2865013774104683, "grad_norm": 0.326171875, "learning_rate": 1.5858613971611466e-05, "loss": 0.9922, "step": 1144 }, { "epoch": 0.2867518156774355, "grad_norm": 0.3125, "learning_rate": 1.5853047592541052e-05, "loss": 0.9647, "step": 1145 }, { "epoch": 0.2870022539444027, "grad_norm": 0.3671875, "learning_rate": 1.5847481213470638e-05, "loss": 1.0706, "step": 1146 }, { "epoch": 0.2872526922113699, "grad_norm": 0.384765625, "learning_rate": 1.5841914834400227e-05, "loss": 0.9657, "step": 1147 }, { "epoch": 0.2875031304783371, "grad_norm": 0.369140625, "learning_rate": 1.583634845532981e-05, "loss": 0.9103, "step": 1148 }, { "epoch": 0.2877535687453043, "grad_norm": 0.357421875, "learning_rate": 1.5830782076259394e-05, "loss": 0.9921, "step": 1149 }, { "epoch": 0.28800400701227147, "grad_norm": 0.34765625, "learning_rate": 1.582521569718898e-05, "loss": 0.8672, "step": 1150 }, { "epoch": 0.28825444527923866, "grad_norm": 0.388671875, "learning_rate": 1.5819649318118566e-05, "loss": 0.7638, "step": 1151 }, { "epoch": 0.28850488354620585, "grad_norm": 0.37109375, "learning_rate": 1.581408293904815e-05, "loss": 0.9118, "step": 1152 }, { "epoch": 0.28875532181317304, "grad_norm": 0.328125, "learning_rate": 1.5808516559977737e-05, "loss": 0.9735, "step": 1153 }, { "epoch": 0.28900576008014023, "grad_norm": 0.3984375, "learning_rate": 1.5802950180907322e-05, "loss": 1.0904, "step": 1154 }, { "epoch": 0.2892561983471074, "grad_norm": 0.31640625, "learning_rate": 1.5797383801836908e-05, "loss": 0.9894, "step": 1155 }, { "epoch": 0.2895066366140746, "grad_norm": 0.388671875, "learning_rate": 1.579181742276649e-05, "loss": 1.0647, "step": 1156 }, { "epoch": 0.2897570748810418, "grad_norm": 0.361328125, "learning_rate": 1.5786251043696076e-05, "loss": 0.7901, "step": 1157 }, { "epoch": 0.290007513148009, "grad_norm": 0.3203125, "learning_rate": 1.578068466462566e-05, "loss": 0.9349, "step": 1158 }, { "epoch": 0.29025795141497623, "grad_norm": 0.326171875, "learning_rate": 1.5775118285555247e-05, "loss": 0.9705, "step": 1159 }, { "epoch": 0.2905083896819434, "grad_norm": 0.34375, "learning_rate": 1.5769551906484833e-05, "loss": 0.9791, "step": 1160 }, { "epoch": 0.2907588279489106, "grad_norm": 0.30078125, "learning_rate": 1.576398552741442e-05, "loss": 0.7031, "step": 1161 }, { "epoch": 0.2910092662158778, "grad_norm": 0.322265625, "learning_rate": 1.5758419148344004e-05, "loss": 0.9068, "step": 1162 }, { "epoch": 0.291259704482845, "grad_norm": 0.359375, "learning_rate": 1.5752852769273586e-05, "loss": 1.0235, "step": 1163 }, { "epoch": 0.2915101427498122, "grad_norm": 0.345703125, "learning_rate": 1.5747286390203175e-05, "loss": 1.089, "step": 1164 }, { "epoch": 0.2917605810167794, "grad_norm": 0.33203125, "learning_rate": 1.574172001113276e-05, "loss": 1.0124, "step": 1165 }, { "epoch": 0.29201101928374656, "grad_norm": 0.3671875, "learning_rate": 1.5736153632062346e-05, "loss": 1.0629, "step": 1166 }, { "epoch": 0.29226145755071375, "grad_norm": 0.33203125, "learning_rate": 1.573058725299193e-05, "loss": 1.0185, "step": 1167 }, { "epoch": 0.29251189581768094, "grad_norm": 0.3671875, "learning_rate": 1.5725020873921514e-05, "loss": 0.9245, "step": 1168 }, { "epoch": 0.29276233408464813, "grad_norm": 0.359375, "learning_rate": 1.57194544948511e-05, "loss": 0.9939, "step": 1169 }, { "epoch": 0.2930127723516153, "grad_norm": 0.35546875, "learning_rate": 1.5713888115780685e-05, "loss": 1.0324, "step": 1170 }, { "epoch": 0.2932632106185825, "grad_norm": 0.34375, "learning_rate": 1.570832173671027e-05, "loss": 0.771, "step": 1171 }, { "epoch": 0.2935136488855497, "grad_norm": 0.3671875, "learning_rate": 1.5702755357639857e-05, "loss": 1.0573, "step": 1172 }, { "epoch": 0.2937640871525169, "grad_norm": 0.345703125, "learning_rate": 1.5697188978569442e-05, "loss": 0.9287, "step": 1173 }, { "epoch": 0.2940145254194841, "grad_norm": 0.41796875, "learning_rate": 1.5691622599499028e-05, "loss": 0.7558, "step": 1174 }, { "epoch": 0.2942649636864513, "grad_norm": 0.31640625, "learning_rate": 1.568605622042861e-05, "loss": 0.9862, "step": 1175 }, { "epoch": 0.29451540195341847, "grad_norm": 0.353515625, "learning_rate": 1.5680489841358196e-05, "loss": 0.9756, "step": 1176 }, { "epoch": 0.29476584022038566, "grad_norm": 0.365234375, "learning_rate": 1.5674923462287785e-05, "loss": 0.8102, "step": 1177 }, { "epoch": 0.29501627848735285, "grad_norm": 0.3671875, "learning_rate": 1.566935708321737e-05, "loss": 0.9814, "step": 1178 }, { "epoch": 0.29526671675432004, "grad_norm": 0.357421875, "learning_rate": 1.5663790704146953e-05, "loss": 0.9111, "step": 1179 }, { "epoch": 0.2955171550212872, "grad_norm": 0.37890625, "learning_rate": 1.5658224325076538e-05, "loss": 0.9277, "step": 1180 }, { "epoch": 0.29576759328825447, "grad_norm": 0.3671875, "learning_rate": 1.5652657946006124e-05, "loss": 0.9175, "step": 1181 }, { "epoch": 0.29601803155522166, "grad_norm": 0.359375, "learning_rate": 1.564709156693571e-05, "loss": 0.9663, "step": 1182 }, { "epoch": 0.29626846982218885, "grad_norm": 0.341796875, "learning_rate": 1.5641525187865295e-05, "loss": 1.169, "step": 1183 }, { "epoch": 0.29651890808915604, "grad_norm": 0.353515625, "learning_rate": 1.563595880879488e-05, "loss": 0.9612, "step": 1184 }, { "epoch": 0.29676934635612323, "grad_norm": 0.318359375, "learning_rate": 1.5630392429724466e-05, "loss": 0.8399, "step": 1185 }, { "epoch": 0.2970197846230904, "grad_norm": 0.384765625, "learning_rate": 1.5624826050654052e-05, "loss": 0.9441, "step": 1186 }, { "epoch": 0.2972702228900576, "grad_norm": 0.341796875, "learning_rate": 1.5619259671583634e-05, "loss": 0.8877, "step": 1187 }, { "epoch": 0.2975206611570248, "grad_norm": 0.345703125, "learning_rate": 1.561369329251322e-05, "loss": 0.9821, "step": 1188 }, { "epoch": 0.297771099423992, "grad_norm": 0.369140625, "learning_rate": 1.560812691344281e-05, "loss": 0.8512, "step": 1189 }, { "epoch": 0.2980215376909592, "grad_norm": 0.35546875, "learning_rate": 1.5602560534372394e-05, "loss": 0.9339, "step": 1190 }, { "epoch": 0.29827197595792637, "grad_norm": 0.328125, "learning_rate": 1.5596994155301977e-05, "loss": 0.9646, "step": 1191 }, { "epoch": 0.29852241422489356, "grad_norm": 0.4453125, "learning_rate": 1.5591427776231562e-05, "loss": 0.9859, "step": 1192 }, { "epoch": 0.29877285249186075, "grad_norm": 0.36328125, "learning_rate": 1.5585861397161148e-05, "loss": 0.8682, "step": 1193 }, { "epoch": 0.29902329075882794, "grad_norm": 0.33203125, "learning_rate": 1.5580295018090733e-05, "loss": 0.9323, "step": 1194 }, { "epoch": 0.29927372902579513, "grad_norm": 0.361328125, "learning_rate": 1.557472863902032e-05, "loss": 1.0404, "step": 1195 }, { "epoch": 0.2995241672927623, "grad_norm": 0.30078125, "learning_rate": 1.5569162259949905e-05, "loss": 0.7551, "step": 1196 }, { "epoch": 0.2997746055597295, "grad_norm": 0.36328125, "learning_rate": 1.556359588087949e-05, "loss": 0.7368, "step": 1197 }, { "epoch": 0.3000250438266967, "grad_norm": 0.4296875, "learning_rate": 1.5558029501809076e-05, "loss": 1.2039, "step": 1198 }, { "epoch": 0.3002754820936639, "grad_norm": 0.318359375, "learning_rate": 1.5552463122738658e-05, "loss": 0.9241, "step": 1199 }, { "epoch": 0.3005259203606311, "grad_norm": 0.35546875, "learning_rate": 1.5546896743668244e-05, "loss": 0.9627, "step": 1200 }, { "epoch": 0.3007763586275983, "grad_norm": 0.341796875, "learning_rate": 1.554133036459783e-05, "loss": 0.9635, "step": 1201 }, { "epoch": 0.30102679689456546, "grad_norm": 0.361328125, "learning_rate": 1.5535763985527418e-05, "loss": 0.8967, "step": 1202 }, { "epoch": 0.3012772351615327, "grad_norm": 0.380859375, "learning_rate": 1.5530197606457e-05, "loss": 0.9716, "step": 1203 }, { "epoch": 0.3015276734284999, "grad_norm": 0.333984375, "learning_rate": 1.5524631227386586e-05, "loss": 0.7822, "step": 1204 }, { "epoch": 0.3017781116954671, "grad_norm": 0.369140625, "learning_rate": 1.5519064848316172e-05, "loss": 0.9536, "step": 1205 }, { "epoch": 0.3020285499624343, "grad_norm": 0.333984375, "learning_rate": 1.5513498469245757e-05, "loss": 0.8733, "step": 1206 }, { "epoch": 0.30227898822940147, "grad_norm": 0.392578125, "learning_rate": 1.5507932090175343e-05, "loss": 1.1872, "step": 1207 }, { "epoch": 0.30252942649636866, "grad_norm": 0.421875, "learning_rate": 1.550236571110493e-05, "loss": 1.0737, "step": 1208 }, { "epoch": 0.30277986476333585, "grad_norm": 0.337890625, "learning_rate": 1.5496799332034514e-05, "loss": 1.029, "step": 1209 }, { "epoch": 0.30303030303030304, "grad_norm": 0.3359375, "learning_rate": 1.5491232952964096e-05, "loss": 1.0479, "step": 1210 }, { "epoch": 0.30328074129727023, "grad_norm": 0.37109375, "learning_rate": 1.5485666573893682e-05, "loss": 1.0225, "step": 1211 }, { "epoch": 0.3035311795642374, "grad_norm": 0.35546875, "learning_rate": 1.5480100194823268e-05, "loss": 0.8961, "step": 1212 }, { "epoch": 0.3037816178312046, "grad_norm": 0.3515625, "learning_rate": 1.5474533815752853e-05, "loss": 0.857, "step": 1213 }, { "epoch": 0.3040320560981718, "grad_norm": 0.330078125, "learning_rate": 1.546896743668244e-05, "loss": 0.9079, "step": 1214 }, { "epoch": 0.304282494365139, "grad_norm": 0.353515625, "learning_rate": 1.5463401057612024e-05, "loss": 0.9053, "step": 1215 }, { "epoch": 0.3045329326321062, "grad_norm": 0.328125, "learning_rate": 1.545783467854161e-05, "loss": 1.0683, "step": 1216 }, { "epoch": 0.30478337089907337, "grad_norm": 0.3515625, "learning_rate": 1.5452268299471196e-05, "loss": 1.0, "step": 1217 }, { "epoch": 0.30503380916604056, "grad_norm": 0.34375, "learning_rate": 1.5446701920400778e-05, "loss": 1.0392, "step": 1218 }, { "epoch": 0.30528424743300775, "grad_norm": 0.333984375, "learning_rate": 1.5441135541330367e-05, "loss": 0.9753, "step": 1219 }, { "epoch": 0.30553468569997494, "grad_norm": 0.3828125, "learning_rate": 1.5435569162259953e-05, "loss": 0.9271, "step": 1220 }, { "epoch": 0.30578512396694213, "grad_norm": 0.353515625, "learning_rate": 1.5430002783189538e-05, "loss": 0.9409, "step": 1221 }, { "epoch": 0.3060355622339093, "grad_norm": 0.3359375, "learning_rate": 1.542443640411912e-05, "loss": 0.9572, "step": 1222 }, { "epoch": 0.3062860005008765, "grad_norm": 0.326171875, "learning_rate": 1.5418870025048706e-05, "loss": 0.8373, "step": 1223 }, { "epoch": 0.3065364387678437, "grad_norm": 0.357421875, "learning_rate": 1.541330364597829e-05, "loss": 0.9202, "step": 1224 }, { "epoch": 0.30678687703481095, "grad_norm": 0.330078125, "learning_rate": 1.5407737266907877e-05, "loss": 0.9111, "step": 1225 }, { "epoch": 0.30703731530177814, "grad_norm": 0.37890625, "learning_rate": 1.5402170887837463e-05, "loss": 0.9699, "step": 1226 }, { "epoch": 0.3072877535687453, "grad_norm": 0.310546875, "learning_rate": 1.539660450876705e-05, "loss": 0.8579, "step": 1227 }, { "epoch": 0.3075381918357125, "grad_norm": 0.3046875, "learning_rate": 1.5391038129696634e-05, "loss": 1.1373, "step": 1228 }, { "epoch": 0.3077886301026797, "grad_norm": 0.373046875, "learning_rate": 1.538547175062622e-05, "loss": 0.8403, "step": 1229 }, { "epoch": 0.3080390683696469, "grad_norm": 0.369140625, "learning_rate": 1.5379905371555802e-05, "loss": 0.9502, "step": 1230 }, { "epoch": 0.3082895066366141, "grad_norm": 0.341796875, "learning_rate": 1.537433899248539e-05, "loss": 0.9665, "step": 1231 }, { "epoch": 0.3085399449035813, "grad_norm": 0.333984375, "learning_rate": 1.5368772613414976e-05, "loss": 1.0895, "step": 1232 }, { "epoch": 0.30879038317054847, "grad_norm": 0.337890625, "learning_rate": 1.5363206234344562e-05, "loss": 1.0027, "step": 1233 }, { "epoch": 0.30904082143751566, "grad_norm": 0.32421875, "learning_rate": 1.5357639855274144e-05, "loss": 1.0256, "step": 1234 }, { "epoch": 0.30929125970448285, "grad_norm": 0.384765625, "learning_rate": 1.535207347620373e-05, "loss": 0.9816, "step": 1235 }, { "epoch": 0.30954169797145004, "grad_norm": 0.32421875, "learning_rate": 1.5346507097133316e-05, "loss": 0.8217, "step": 1236 }, { "epoch": 0.3097921362384172, "grad_norm": 0.328125, "learning_rate": 1.53409407180629e-05, "loss": 0.8871, "step": 1237 }, { "epoch": 0.3100425745053844, "grad_norm": 0.353515625, "learning_rate": 1.5335374338992487e-05, "loss": 0.9716, "step": 1238 }, { "epoch": 0.3102930127723516, "grad_norm": 0.3359375, "learning_rate": 1.5329807959922072e-05, "loss": 0.9595, "step": 1239 }, { "epoch": 0.3105434510393188, "grad_norm": 0.35546875, "learning_rate": 1.5324241580851658e-05, "loss": 0.9555, "step": 1240 }, { "epoch": 0.310793889306286, "grad_norm": 0.349609375, "learning_rate": 1.5318675201781244e-05, "loss": 0.9181, "step": 1241 }, { "epoch": 0.3110443275732532, "grad_norm": 0.3203125, "learning_rate": 1.5313108822710826e-05, "loss": 0.8704, "step": 1242 }, { "epoch": 0.31129476584022037, "grad_norm": 0.365234375, "learning_rate": 1.530754244364041e-05, "loss": 0.8429, "step": 1243 }, { "epoch": 0.31154520410718756, "grad_norm": 0.3515625, "learning_rate": 1.530197606457e-05, "loss": 0.8458, "step": 1244 }, { "epoch": 0.31179564237415475, "grad_norm": 0.365234375, "learning_rate": 1.5296409685499586e-05, "loss": 1.025, "step": 1245 }, { "epoch": 0.31204608064112194, "grad_norm": 0.34375, "learning_rate": 1.5290843306429168e-05, "loss": 0.8863, "step": 1246 }, { "epoch": 0.3122965189080892, "grad_norm": 0.3515625, "learning_rate": 1.5285276927358754e-05, "loss": 0.7661, "step": 1247 }, { "epoch": 0.3125469571750564, "grad_norm": 0.375, "learning_rate": 1.527971054828834e-05, "loss": 0.937, "step": 1248 }, { "epoch": 0.31279739544202356, "grad_norm": 0.349609375, "learning_rate": 1.5274144169217925e-05, "loss": 0.9504, "step": 1249 }, { "epoch": 0.31304783370899075, "grad_norm": 0.328125, "learning_rate": 1.526857779014751e-05, "loss": 1.0453, "step": 1250 }, { "epoch": 0.31329827197595794, "grad_norm": 0.349609375, "learning_rate": 1.5263011411077096e-05, "loss": 0.8215, "step": 1251 }, { "epoch": 0.31354871024292513, "grad_norm": 0.388671875, "learning_rate": 1.5257445032006682e-05, "loss": 0.7387, "step": 1252 }, { "epoch": 0.3137991485098923, "grad_norm": 0.330078125, "learning_rate": 1.5251878652936268e-05, "loss": 0.9147, "step": 1253 }, { "epoch": 0.3140495867768595, "grad_norm": 0.37109375, "learning_rate": 1.5246312273865851e-05, "loss": 1.0078, "step": 1254 }, { "epoch": 0.3143000250438267, "grad_norm": 0.408203125, "learning_rate": 1.5240745894795437e-05, "loss": 0.9928, "step": 1255 }, { "epoch": 0.3145504633107939, "grad_norm": 0.3671875, "learning_rate": 1.5235179515725023e-05, "loss": 0.8904, "step": 1256 }, { "epoch": 0.3148009015777611, "grad_norm": 0.369140625, "learning_rate": 1.5229613136654608e-05, "loss": 0.8576, "step": 1257 }, { "epoch": 0.3150513398447283, "grad_norm": 0.333984375, "learning_rate": 1.5224046757584192e-05, "loss": 0.8054, "step": 1258 }, { "epoch": 0.31530177811169546, "grad_norm": 0.337890625, "learning_rate": 1.5218480378513778e-05, "loss": 0.7734, "step": 1259 }, { "epoch": 0.31555221637866265, "grad_norm": 0.359375, "learning_rate": 1.5212913999443363e-05, "loss": 0.9192, "step": 1260 }, { "epoch": 0.31580265464562984, "grad_norm": 0.35546875, "learning_rate": 1.5207347620372947e-05, "loss": 0.9088, "step": 1261 }, { "epoch": 0.31605309291259703, "grad_norm": 0.3984375, "learning_rate": 1.5201781241302533e-05, "loss": 1.1425, "step": 1262 }, { "epoch": 0.3163035311795642, "grad_norm": 0.439453125, "learning_rate": 1.5196214862232119e-05, "loss": 1.1356, "step": 1263 }, { "epoch": 0.3165539694465314, "grad_norm": 0.341796875, "learning_rate": 1.5190648483161706e-05, "loss": 1.0436, "step": 1264 }, { "epoch": 0.3168044077134986, "grad_norm": 0.349609375, "learning_rate": 1.5185082104091288e-05, "loss": 0.9113, "step": 1265 }, { "epoch": 0.3170548459804658, "grad_norm": 0.5546875, "learning_rate": 1.5179515725020875e-05, "loss": 0.9437, "step": 1266 }, { "epoch": 0.317305284247433, "grad_norm": 0.376953125, "learning_rate": 1.5173949345950461e-05, "loss": 1.0445, "step": 1267 }, { "epoch": 0.3175557225144002, "grad_norm": 0.341796875, "learning_rate": 1.5168382966880047e-05, "loss": 0.9157, "step": 1268 }, { "epoch": 0.3178061607813674, "grad_norm": 0.333984375, "learning_rate": 1.516281658780963e-05, "loss": 0.97, "step": 1269 }, { "epoch": 0.3180565990483346, "grad_norm": 0.349609375, "learning_rate": 1.5157250208739216e-05, "loss": 0.9929, "step": 1270 }, { "epoch": 0.3183070373153018, "grad_norm": 0.3203125, "learning_rate": 1.5151683829668802e-05, "loss": 0.9345, "step": 1271 }, { "epoch": 0.318557475582269, "grad_norm": 0.34765625, "learning_rate": 1.5146117450598387e-05, "loss": 0.9532, "step": 1272 }, { "epoch": 0.3188079138492362, "grad_norm": 0.3203125, "learning_rate": 1.5140551071527971e-05, "loss": 1.1327, "step": 1273 }, { "epoch": 0.31905835211620337, "grad_norm": 0.349609375, "learning_rate": 1.5134984692457557e-05, "loss": 0.7683, "step": 1274 }, { "epoch": 0.31930879038317056, "grad_norm": 0.34765625, "learning_rate": 1.5129418313387143e-05, "loss": 0.6921, "step": 1275 }, { "epoch": 0.31955922865013775, "grad_norm": 0.357421875, "learning_rate": 1.5123851934316728e-05, "loss": 0.8761, "step": 1276 }, { "epoch": 0.31980966691710494, "grad_norm": 0.32421875, "learning_rate": 1.5118285555246312e-05, "loss": 0.9921, "step": 1277 }, { "epoch": 0.32006010518407213, "grad_norm": 0.3359375, "learning_rate": 1.5112719176175898e-05, "loss": 0.8621, "step": 1278 }, { "epoch": 0.3203105434510393, "grad_norm": 0.373046875, "learning_rate": 1.5107152797105485e-05, "loss": 0.9364, "step": 1279 }, { "epoch": 0.3205609817180065, "grad_norm": 0.4296875, "learning_rate": 1.510158641803507e-05, "loss": 1.1225, "step": 1280 }, { "epoch": 0.3208114199849737, "grad_norm": 0.34375, "learning_rate": 1.5096020038964655e-05, "loss": 1.1897, "step": 1281 }, { "epoch": 0.3210618582519409, "grad_norm": 0.31640625, "learning_rate": 1.509045365989424e-05, "loss": 0.8721, "step": 1282 }, { "epoch": 0.3213122965189081, "grad_norm": 0.404296875, "learning_rate": 1.5084887280823826e-05, "loss": 1.0173, "step": 1283 }, { "epoch": 0.32156273478587527, "grad_norm": 0.3515625, "learning_rate": 1.5079320901753411e-05, "loss": 0.8799, "step": 1284 }, { "epoch": 0.32181317305284246, "grad_norm": 0.34765625, "learning_rate": 1.5073754522682995e-05, "loss": 0.9148, "step": 1285 }, { "epoch": 0.32206361131980965, "grad_norm": 0.408203125, "learning_rate": 1.5068188143612581e-05, "loss": 0.977, "step": 1286 }, { "epoch": 0.32231404958677684, "grad_norm": 0.34375, "learning_rate": 1.5062621764542166e-05, "loss": 0.9544, "step": 1287 }, { "epoch": 0.32256448785374403, "grad_norm": 0.376953125, "learning_rate": 1.5057055385471752e-05, "loss": 0.9729, "step": 1288 }, { "epoch": 0.3228149261207112, "grad_norm": 0.373046875, "learning_rate": 1.5051489006401336e-05, "loss": 0.8115, "step": 1289 }, { "epoch": 0.3230653643876784, "grad_norm": 0.38671875, "learning_rate": 1.5045922627330922e-05, "loss": 1.0172, "step": 1290 }, { "epoch": 0.32331580265464566, "grad_norm": 0.35546875, "learning_rate": 1.5040356248260509e-05, "loss": 0.9289, "step": 1291 }, { "epoch": 0.32356624092161285, "grad_norm": 0.36328125, "learning_rate": 1.5034789869190095e-05, "loss": 0.8976, "step": 1292 }, { "epoch": 0.32381667918858004, "grad_norm": 0.357421875, "learning_rate": 1.5029223490119678e-05, "loss": 0.9381, "step": 1293 }, { "epoch": 0.3240671174555472, "grad_norm": 0.365234375, "learning_rate": 1.5023657111049264e-05, "loss": 0.9329, "step": 1294 }, { "epoch": 0.3243175557225144, "grad_norm": 0.33984375, "learning_rate": 1.501809073197885e-05, "loss": 0.7989, "step": 1295 }, { "epoch": 0.3245679939894816, "grad_norm": 0.349609375, "learning_rate": 1.5012524352908435e-05, "loss": 0.8212, "step": 1296 }, { "epoch": 0.3248184322564488, "grad_norm": 0.359375, "learning_rate": 1.500695797383802e-05, "loss": 0.7046, "step": 1297 }, { "epoch": 0.325068870523416, "grad_norm": 0.3671875, "learning_rate": 1.5001391594767605e-05, "loss": 1.0314, "step": 1298 }, { "epoch": 0.3253193087903832, "grad_norm": 0.3515625, "learning_rate": 1.499582521569719e-05, "loss": 0.8921, "step": 1299 }, { "epoch": 0.32556974705735037, "grad_norm": 0.333984375, "learning_rate": 1.4990258836626776e-05, "loss": 0.9738, "step": 1300 }, { "epoch": 0.32582018532431756, "grad_norm": 0.380859375, "learning_rate": 1.498469245755636e-05, "loss": 0.9426, "step": 1301 }, { "epoch": 0.32607062359128475, "grad_norm": 0.380859375, "learning_rate": 1.4979126078485946e-05, "loss": 0.8288, "step": 1302 }, { "epoch": 0.32632106185825194, "grad_norm": 0.34765625, "learning_rate": 1.4973559699415531e-05, "loss": 0.8721, "step": 1303 }, { "epoch": 0.3265715001252191, "grad_norm": 0.37890625, "learning_rate": 1.4967993320345118e-05, "loss": 0.8692, "step": 1304 }, { "epoch": 0.3268219383921863, "grad_norm": 0.41796875, "learning_rate": 1.49624269412747e-05, "loss": 1.0306, "step": 1305 }, { "epoch": 0.3270723766591535, "grad_norm": 0.34765625, "learning_rate": 1.4956860562204288e-05, "loss": 0.8862, "step": 1306 }, { "epoch": 0.3273228149261207, "grad_norm": 0.35546875, "learning_rate": 1.4951294183133874e-05, "loss": 0.8444, "step": 1307 }, { "epoch": 0.3275732531930879, "grad_norm": 0.34375, "learning_rate": 1.494572780406346e-05, "loss": 0.8509, "step": 1308 }, { "epoch": 0.3278236914600551, "grad_norm": 0.39453125, "learning_rate": 1.4940161424993043e-05, "loss": 0.9461, "step": 1309 }, { "epoch": 0.32807412972702227, "grad_norm": 0.3359375, "learning_rate": 1.4934595045922629e-05, "loss": 1.0059, "step": 1310 }, { "epoch": 0.32832456799398946, "grad_norm": 0.373046875, "learning_rate": 1.4929028666852214e-05, "loss": 0.8898, "step": 1311 }, { "epoch": 0.32857500626095665, "grad_norm": 0.3828125, "learning_rate": 1.4923462287781798e-05, "loss": 1.0441, "step": 1312 }, { "epoch": 0.3288254445279239, "grad_norm": 0.388671875, "learning_rate": 1.4917895908711384e-05, "loss": 0.9464, "step": 1313 }, { "epoch": 0.3290758827948911, "grad_norm": 0.35546875, "learning_rate": 1.491232952964097e-05, "loss": 0.9828, "step": 1314 }, { "epoch": 0.3293263210618583, "grad_norm": 0.30859375, "learning_rate": 1.4906763150570555e-05, "loss": 0.7984, "step": 1315 }, { "epoch": 0.32957675932882546, "grad_norm": 0.34765625, "learning_rate": 1.4901196771500139e-05, "loss": 1.0172, "step": 1316 }, { "epoch": 0.32982719759579265, "grad_norm": 0.38671875, "learning_rate": 1.4895630392429725e-05, "loss": 1.0353, "step": 1317 }, { "epoch": 0.33007763586275984, "grad_norm": 0.341796875, "learning_rate": 1.489006401335931e-05, "loss": 0.9488, "step": 1318 }, { "epoch": 0.33032807412972703, "grad_norm": 0.353515625, "learning_rate": 1.4884497634288898e-05, "loss": 0.9973, "step": 1319 }, { "epoch": 0.3305785123966942, "grad_norm": 0.376953125, "learning_rate": 1.487893125521848e-05, "loss": 0.8744, "step": 1320 }, { "epoch": 0.3308289506636614, "grad_norm": 0.404296875, "learning_rate": 1.4873364876148067e-05, "loss": 1.0368, "step": 1321 }, { "epoch": 0.3310793889306286, "grad_norm": 0.375, "learning_rate": 1.4867798497077653e-05, "loss": 0.9799, "step": 1322 }, { "epoch": 0.3313298271975958, "grad_norm": 0.33203125, "learning_rate": 1.4862232118007238e-05, "loss": 0.8993, "step": 1323 }, { "epoch": 0.331580265464563, "grad_norm": 0.337890625, "learning_rate": 1.4856665738936822e-05, "loss": 0.7809, "step": 1324 }, { "epoch": 0.3318307037315302, "grad_norm": 0.392578125, "learning_rate": 1.4851099359866408e-05, "loss": 0.8656, "step": 1325 }, { "epoch": 0.33208114199849736, "grad_norm": 0.322265625, "learning_rate": 1.4845532980795993e-05, "loss": 0.9305, "step": 1326 }, { "epoch": 0.33233158026546455, "grad_norm": 0.369140625, "learning_rate": 1.4839966601725579e-05, "loss": 1.0723, "step": 1327 }, { "epoch": 0.33258201853243174, "grad_norm": 0.337890625, "learning_rate": 1.4834400222655163e-05, "loss": 1.0101, "step": 1328 }, { "epoch": 0.33283245679939893, "grad_norm": 0.357421875, "learning_rate": 1.4828833843584749e-05, "loss": 0.9292, "step": 1329 }, { "epoch": 0.3330828950663661, "grad_norm": 0.34765625, "learning_rate": 1.4823267464514334e-05, "loss": 0.8439, "step": 1330 }, { "epoch": 0.3333333333333333, "grad_norm": 0.373046875, "learning_rate": 1.4817701085443922e-05, "loss": 0.9148, "step": 1331 }, { "epoch": 0.3335837716003005, "grad_norm": 0.40234375, "learning_rate": 1.4812134706373504e-05, "loss": 0.9001, "step": 1332 }, { "epoch": 0.3338342098672677, "grad_norm": 0.349609375, "learning_rate": 1.4806568327303091e-05, "loss": 0.9135, "step": 1333 }, { "epoch": 0.3340846481342349, "grad_norm": 0.375, "learning_rate": 1.4801001948232677e-05, "loss": 1.2157, "step": 1334 }, { "epoch": 0.33433508640120213, "grad_norm": 0.34765625, "learning_rate": 1.4795435569162262e-05, "loss": 0.8822, "step": 1335 }, { "epoch": 0.3345855246681693, "grad_norm": 0.365234375, "learning_rate": 1.4789869190091846e-05, "loss": 0.8267, "step": 1336 }, { "epoch": 0.3348359629351365, "grad_norm": 0.314453125, "learning_rate": 1.4784302811021432e-05, "loss": 1.0537, "step": 1337 }, { "epoch": 0.3350864012021037, "grad_norm": 0.341796875, "learning_rate": 1.4778736431951017e-05, "loss": 0.9527, "step": 1338 }, { "epoch": 0.3353368394690709, "grad_norm": 0.40625, "learning_rate": 1.4773170052880603e-05, "loss": 0.9969, "step": 1339 }, { "epoch": 0.3355872777360381, "grad_norm": 0.3671875, "learning_rate": 1.4767603673810187e-05, "loss": 1.1816, "step": 1340 }, { "epoch": 0.33583771600300527, "grad_norm": 0.404296875, "learning_rate": 1.4762037294739773e-05, "loss": 0.8885, "step": 1341 }, { "epoch": 0.33608815426997246, "grad_norm": 0.353515625, "learning_rate": 1.4756470915669358e-05, "loss": 0.8246, "step": 1342 }, { "epoch": 0.33633859253693965, "grad_norm": 0.390625, "learning_rate": 1.4750904536598944e-05, "loss": 0.9166, "step": 1343 }, { "epoch": 0.33658903080390684, "grad_norm": 0.361328125, "learning_rate": 1.4745338157528528e-05, "loss": 0.9656, "step": 1344 }, { "epoch": 0.33683946907087403, "grad_norm": 0.3671875, "learning_rate": 1.4739771778458113e-05, "loss": 0.9109, "step": 1345 }, { "epoch": 0.3370899073378412, "grad_norm": 0.34375, "learning_rate": 1.47342053993877e-05, "loss": 0.9025, "step": 1346 }, { "epoch": 0.3373403456048084, "grad_norm": 0.349609375, "learning_rate": 1.4728639020317286e-05, "loss": 1.001, "step": 1347 }, { "epoch": 0.3375907838717756, "grad_norm": 0.390625, "learning_rate": 1.472307264124687e-05, "loss": 1.0965, "step": 1348 }, { "epoch": 0.3378412221387428, "grad_norm": 0.326171875, "learning_rate": 1.4717506262176456e-05, "loss": 0.7368, "step": 1349 }, { "epoch": 0.33809166040571, "grad_norm": 0.328125, "learning_rate": 1.4711939883106041e-05, "loss": 0.9943, "step": 1350 }, { "epoch": 0.33834209867267717, "grad_norm": 0.365234375, "learning_rate": 1.4706373504035627e-05, "loss": 0.961, "step": 1351 }, { "epoch": 0.33859253693964436, "grad_norm": 0.32421875, "learning_rate": 1.4700807124965211e-05, "loss": 0.9983, "step": 1352 }, { "epoch": 0.33884297520661155, "grad_norm": 0.34375, "learning_rate": 1.4695240745894797e-05, "loss": 0.9724, "step": 1353 }, { "epoch": 0.33909341347357874, "grad_norm": 0.412109375, "learning_rate": 1.4689674366824382e-05, "loss": 0.6742, "step": 1354 }, { "epoch": 0.33934385174054593, "grad_norm": 0.39453125, "learning_rate": 1.4684107987753968e-05, "loss": 1.0237, "step": 1355 }, { "epoch": 0.3395942900075131, "grad_norm": 0.359375, "learning_rate": 1.4678541608683552e-05, "loss": 0.8838, "step": 1356 }, { "epoch": 0.33984472827448037, "grad_norm": 0.380859375, "learning_rate": 1.4672975229613137e-05, "loss": 0.9787, "step": 1357 }, { "epoch": 0.34009516654144756, "grad_norm": 0.345703125, "learning_rate": 1.4667408850542723e-05, "loss": 0.8084, "step": 1358 }, { "epoch": 0.34034560480841475, "grad_norm": 0.369140625, "learning_rate": 1.466184247147231e-05, "loss": 0.879, "step": 1359 }, { "epoch": 0.34059604307538194, "grad_norm": 0.365234375, "learning_rate": 1.4656276092401892e-05, "loss": 0.8478, "step": 1360 }, { "epoch": 0.34084648134234913, "grad_norm": 0.369140625, "learning_rate": 1.465070971333148e-05, "loss": 1.0241, "step": 1361 }, { "epoch": 0.3410969196093163, "grad_norm": 0.33984375, "learning_rate": 1.4645143334261065e-05, "loss": 0.9734, "step": 1362 }, { "epoch": 0.3413473578762835, "grad_norm": 0.34765625, "learning_rate": 1.463957695519065e-05, "loss": 1.0178, "step": 1363 }, { "epoch": 0.3415977961432507, "grad_norm": 0.375, "learning_rate": 1.4634010576120235e-05, "loss": 0.8796, "step": 1364 }, { "epoch": 0.3418482344102179, "grad_norm": 0.34765625, "learning_rate": 1.462844419704982e-05, "loss": 0.8377, "step": 1365 }, { "epoch": 0.3420986726771851, "grad_norm": 0.36328125, "learning_rate": 1.4622877817979406e-05, "loss": 0.9296, "step": 1366 }, { "epoch": 0.34234911094415227, "grad_norm": 0.31640625, "learning_rate": 1.461731143890899e-05, "loss": 0.841, "step": 1367 }, { "epoch": 0.34259954921111946, "grad_norm": 0.357421875, "learning_rate": 1.4611745059838576e-05, "loss": 0.9712, "step": 1368 }, { "epoch": 0.34284998747808665, "grad_norm": 0.326171875, "learning_rate": 1.4606178680768161e-05, "loss": 0.7595, "step": 1369 }, { "epoch": 0.34310042574505384, "grad_norm": 0.37109375, "learning_rate": 1.4600612301697747e-05, "loss": 1.1777, "step": 1370 }, { "epoch": 0.34335086401202103, "grad_norm": 0.3671875, "learning_rate": 1.459504592262733e-05, "loss": 0.9395, "step": 1371 }, { "epoch": 0.3436013022789882, "grad_norm": 0.34765625, "learning_rate": 1.4589479543556916e-05, "loss": 0.8973, "step": 1372 }, { "epoch": 0.3438517405459554, "grad_norm": 0.36328125, "learning_rate": 1.4583913164486504e-05, "loss": 0.9443, "step": 1373 }, { "epoch": 0.3441021788129226, "grad_norm": 0.359375, "learning_rate": 1.457834678541609e-05, "loss": 0.9938, "step": 1374 }, { "epoch": 0.3443526170798898, "grad_norm": 0.35546875, "learning_rate": 1.4572780406345673e-05, "loss": 0.9491, "step": 1375 }, { "epoch": 0.344603055346857, "grad_norm": 0.388671875, "learning_rate": 1.4567214027275259e-05, "loss": 0.925, "step": 1376 }, { "epoch": 0.34485349361382417, "grad_norm": 0.400390625, "learning_rate": 1.4561647648204844e-05, "loss": 0.8753, "step": 1377 }, { "epoch": 0.34510393188079136, "grad_norm": 0.35546875, "learning_rate": 1.455608126913443e-05, "loss": 1.1367, "step": 1378 }, { "epoch": 0.3453543701477586, "grad_norm": 0.337890625, "learning_rate": 1.4550514890064014e-05, "loss": 0.7322, "step": 1379 }, { "epoch": 0.3456048084147258, "grad_norm": 0.361328125, "learning_rate": 1.45449485109936e-05, "loss": 1.0167, "step": 1380 }, { "epoch": 0.345855246681693, "grad_norm": 0.333984375, "learning_rate": 1.4539382131923185e-05, "loss": 1.0216, "step": 1381 }, { "epoch": 0.3461056849486602, "grad_norm": 0.365234375, "learning_rate": 1.453381575285277e-05, "loss": 0.9405, "step": 1382 }, { "epoch": 0.34635612321562737, "grad_norm": 0.359375, "learning_rate": 1.4528249373782355e-05, "loss": 0.9056, "step": 1383 }, { "epoch": 0.34660656148259456, "grad_norm": 0.419921875, "learning_rate": 1.452268299471194e-05, "loss": 0.9103, "step": 1384 }, { "epoch": 0.34685699974956175, "grad_norm": 0.359375, "learning_rate": 1.4517116615641526e-05, "loss": 0.9727, "step": 1385 }, { "epoch": 0.34710743801652894, "grad_norm": 0.357421875, "learning_rate": 1.4511550236571113e-05, "loss": 0.9744, "step": 1386 }, { "epoch": 0.3473578762834961, "grad_norm": 0.376953125, "learning_rate": 1.4505983857500695e-05, "loss": 0.8005, "step": 1387 }, { "epoch": 0.3476083145504633, "grad_norm": 0.400390625, "learning_rate": 1.4500417478430283e-05, "loss": 0.9366, "step": 1388 }, { "epoch": 0.3478587528174305, "grad_norm": 0.39453125, "learning_rate": 1.4494851099359868e-05, "loss": 0.9851, "step": 1389 }, { "epoch": 0.3481091910843977, "grad_norm": 0.328125, "learning_rate": 1.4489284720289454e-05, "loss": 1.0333, "step": 1390 }, { "epoch": 0.3483596293513649, "grad_norm": 0.404296875, "learning_rate": 1.4483718341219038e-05, "loss": 0.9155, "step": 1391 }, { "epoch": 0.3486100676183321, "grad_norm": 0.3828125, "learning_rate": 1.4478151962148624e-05, "loss": 1.0655, "step": 1392 }, { "epoch": 0.34886050588529927, "grad_norm": 0.36328125, "learning_rate": 1.4472585583078209e-05, "loss": 1.0611, "step": 1393 }, { "epoch": 0.34911094415226646, "grad_norm": 0.365234375, "learning_rate": 1.4467019204007795e-05, "loss": 0.8013, "step": 1394 }, { "epoch": 0.34936138241923365, "grad_norm": 0.361328125, "learning_rate": 1.4461452824937379e-05, "loss": 0.9555, "step": 1395 }, { "epoch": 0.34961182068620084, "grad_norm": 0.361328125, "learning_rate": 1.4455886445866964e-05, "loss": 1.0317, "step": 1396 }, { "epoch": 0.349862258953168, "grad_norm": 0.328125, "learning_rate": 1.445032006679655e-05, "loss": 0.8939, "step": 1397 }, { "epoch": 0.3501126972201352, "grad_norm": 0.400390625, "learning_rate": 1.4444753687726136e-05, "loss": 1.0065, "step": 1398 }, { "epoch": 0.3503631354871024, "grad_norm": 0.33984375, "learning_rate": 1.443918730865572e-05, "loss": 0.8934, "step": 1399 }, { "epoch": 0.3506135737540696, "grad_norm": 0.359375, "learning_rate": 1.4433620929585305e-05, "loss": 0.816, "step": 1400 }, { "epoch": 0.35086401202103684, "grad_norm": 0.359375, "learning_rate": 1.4428054550514892e-05, "loss": 1.0335, "step": 1401 }, { "epoch": 0.35111445028800403, "grad_norm": 0.373046875, "learning_rate": 1.4422488171444478e-05, "loss": 0.9018, "step": 1402 }, { "epoch": 0.3513648885549712, "grad_norm": 0.33984375, "learning_rate": 1.4416921792374062e-05, "loss": 0.793, "step": 1403 }, { "epoch": 0.3516153268219384, "grad_norm": 0.42578125, "learning_rate": 1.4411355413303647e-05, "loss": 1.0225, "step": 1404 }, { "epoch": 0.3518657650889056, "grad_norm": 0.314453125, "learning_rate": 1.4405789034233233e-05, "loss": 0.9752, "step": 1405 }, { "epoch": 0.3521162033558728, "grad_norm": 0.38671875, "learning_rate": 1.4400222655162819e-05, "loss": 1.1006, "step": 1406 }, { "epoch": 0.35236664162284, "grad_norm": 0.38671875, "learning_rate": 1.4394656276092403e-05, "loss": 0.852, "step": 1407 }, { "epoch": 0.3526170798898072, "grad_norm": 0.326171875, "learning_rate": 1.4389089897021988e-05, "loss": 0.8665, "step": 1408 }, { "epoch": 0.35286751815677436, "grad_norm": 0.349609375, "learning_rate": 1.4383523517951574e-05, "loss": 0.7181, "step": 1409 }, { "epoch": 0.35311795642374155, "grad_norm": 0.3359375, "learning_rate": 1.437795713888116e-05, "loss": 0.86, "step": 1410 }, { "epoch": 0.35336839469070874, "grad_norm": 0.357421875, "learning_rate": 1.4372390759810743e-05, "loss": 1.041, "step": 1411 }, { "epoch": 0.35361883295767593, "grad_norm": 0.390625, "learning_rate": 1.4366824380740329e-05, "loss": 0.914, "step": 1412 }, { "epoch": 0.3538692712246431, "grad_norm": 0.318359375, "learning_rate": 1.4361258001669916e-05, "loss": 0.9478, "step": 1413 }, { "epoch": 0.3541197094916103, "grad_norm": 0.37890625, "learning_rate": 1.4355691622599499e-05, "loss": 0.9092, "step": 1414 }, { "epoch": 0.3543701477585775, "grad_norm": 0.337890625, "learning_rate": 1.4350125243529086e-05, "loss": 0.8607, "step": 1415 }, { "epoch": 0.3546205860255447, "grad_norm": 0.404296875, "learning_rate": 1.4344558864458671e-05, "loss": 1.0404, "step": 1416 }, { "epoch": 0.3548710242925119, "grad_norm": 0.388671875, "learning_rate": 1.4338992485388257e-05, "loss": 1.0123, "step": 1417 }, { "epoch": 0.3551214625594791, "grad_norm": 0.388671875, "learning_rate": 1.4333426106317841e-05, "loss": 0.866, "step": 1418 }, { "epoch": 0.35537190082644626, "grad_norm": 0.353515625, "learning_rate": 1.4327859727247427e-05, "loss": 1.0008, "step": 1419 }, { "epoch": 0.35562233909341345, "grad_norm": 0.349609375, "learning_rate": 1.4322293348177012e-05, "loss": 0.9221, "step": 1420 }, { "epoch": 0.35587277736038064, "grad_norm": 0.3828125, "learning_rate": 1.4316726969106598e-05, "loss": 1.1782, "step": 1421 }, { "epoch": 0.35612321562734783, "grad_norm": 0.33203125, "learning_rate": 1.4311160590036182e-05, "loss": 0.9202, "step": 1422 }, { "epoch": 0.3563736538943151, "grad_norm": 0.41015625, "learning_rate": 1.4305594210965767e-05, "loss": 1.0152, "step": 1423 }, { "epoch": 0.35662409216128227, "grad_norm": 0.388671875, "learning_rate": 1.4300027831895353e-05, "loss": 0.9445, "step": 1424 }, { "epoch": 0.35687453042824946, "grad_norm": 0.341796875, "learning_rate": 1.4294461452824939e-05, "loss": 0.8224, "step": 1425 }, { "epoch": 0.35712496869521665, "grad_norm": 0.35546875, "learning_rate": 1.4288895073754522e-05, "loss": 0.9195, "step": 1426 }, { "epoch": 0.35737540696218384, "grad_norm": 0.380859375, "learning_rate": 1.4283328694684108e-05, "loss": 0.8933, "step": 1427 }, { "epoch": 0.35762584522915103, "grad_norm": 0.361328125, "learning_rate": 1.4277762315613695e-05, "loss": 0.8101, "step": 1428 }, { "epoch": 0.3578762834961182, "grad_norm": 0.384765625, "learning_rate": 1.4272195936543281e-05, "loss": 0.873, "step": 1429 }, { "epoch": 0.3581267217630854, "grad_norm": 0.341796875, "learning_rate": 1.4266629557472865e-05, "loss": 0.9764, "step": 1430 }, { "epoch": 0.3583771600300526, "grad_norm": 0.376953125, "learning_rate": 1.426106317840245e-05, "loss": 0.8884, "step": 1431 }, { "epoch": 0.3586275982970198, "grad_norm": 0.396484375, "learning_rate": 1.4255496799332036e-05, "loss": 1.0683, "step": 1432 }, { "epoch": 0.358878036563987, "grad_norm": 0.35546875, "learning_rate": 1.4249930420261622e-05, "loss": 0.9328, "step": 1433 }, { "epoch": 0.35912847483095417, "grad_norm": 0.3515625, "learning_rate": 1.4244364041191206e-05, "loss": 0.7933, "step": 1434 }, { "epoch": 0.35937891309792136, "grad_norm": 0.369140625, "learning_rate": 1.4238797662120791e-05, "loss": 1.1185, "step": 1435 }, { "epoch": 0.35962935136488855, "grad_norm": 0.3671875, "learning_rate": 1.4233231283050377e-05, "loss": 0.8543, "step": 1436 }, { "epoch": 0.35987978963185574, "grad_norm": 0.388671875, "learning_rate": 1.4227664903979963e-05, "loss": 0.9818, "step": 1437 }, { "epoch": 0.36013022789882293, "grad_norm": 0.361328125, "learning_rate": 1.4222098524909546e-05, "loss": 0.8872, "step": 1438 }, { "epoch": 0.3603806661657901, "grad_norm": 0.3515625, "learning_rate": 1.4216532145839132e-05, "loss": 0.9024, "step": 1439 }, { "epoch": 0.3606311044327573, "grad_norm": 0.31640625, "learning_rate": 1.4210965766768718e-05, "loss": 0.8402, "step": 1440 }, { "epoch": 0.3608815426997245, "grad_norm": 0.326171875, "learning_rate": 1.4205399387698305e-05, "loss": 0.8111, "step": 1441 }, { "epoch": 0.3611319809666917, "grad_norm": 0.36328125, "learning_rate": 1.4199833008627887e-05, "loss": 0.9906, "step": 1442 }, { "epoch": 0.3613824192336589, "grad_norm": 0.345703125, "learning_rate": 1.4194266629557474e-05, "loss": 0.9879, "step": 1443 }, { "epoch": 0.36163285750062607, "grad_norm": 0.37890625, "learning_rate": 1.418870025048706e-05, "loss": 0.865, "step": 1444 }, { "epoch": 0.3618832957675933, "grad_norm": 0.34375, "learning_rate": 1.4183133871416646e-05, "loss": 0.8683, "step": 1445 }, { "epoch": 0.3621337340345605, "grad_norm": 0.33203125, "learning_rate": 1.417756749234623e-05, "loss": 1.023, "step": 1446 }, { "epoch": 0.3623841723015277, "grad_norm": 0.4453125, "learning_rate": 1.4172001113275815e-05, "loss": 0.8866, "step": 1447 }, { "epoch": 0.3626346105684949, "grad_norm": 0.3671875, "learning_rate": 1.41664347342054e-05, "loss": 0.9091, "step": 1448 }, { "epoch": 0.3628850488354621, "grad_norm": 0.384765625, "learning_rate": 1.4160868355134986e-05, "loss": 1.0732, "step": 1449 }, { "epoch": 0.36313548710242927, "grad_norm": 0.333984375, "learning_rate": 1.415530197606457e-05, "loss": 0.8844, "step": 1450 }, { "epoch": 0.36338592536939646, "grad_norm": 0.365234375, "learning_rate": 1.4149735596994156e-05, "loss": 0.9374, "step": 1451 }, { "epoch": 0.36363636363636365, "grad_norm": 0.365234375, "learning_rate": 1.4144169217923742e-05, "loss": 0.9164, "step": 1452 }, { "epoch": 0.36388680190333084, "grad_norm": 0.37890625, "learning_rate": 1.4138602838853329e-05, "loss": 0.879, "step": 1453 }, { "epoch": 0.364137240170298, "grad_norm": 0.375, "learning_rate": 1.4133036459782911e-05, "loss": 1.0661, "step": 1454 }, { "epoch": 0.3643876784372652, "grad_norm": 0.306640625, "learning_rate": 1.4127470080712498e-05, "loss": 0.8968, "step": 1455 }, { "epoch": 0.3646381167042324, "grad_norm": 0.30078125, "learning_rate": 1.4121903701642084e-05, "loss": 1.0829, "step": 1456 }, { "epoch": 0.3648885549711996, "grad_norm": 0.37890625, "learning_rate": 1.411633732257167e-05, "loss": 1.012, "step": 1457 }, { "epoch": 0.3651389932381668, "grad_norm": 0.310546875, "learning_rate": 1.4110770943501254e-05, "loss": 0.7809, "step": 1458 }, { "epoch": 0.365389431505134, "grad_norm": 0.3125, "learning_rate": 1.410520456443084e-05, "loss": 0.903, "step": 1459 }, { "epoch": 0.36563986977210117, "grad_norm": 0.353515625, "learning_rate": 1.4099638185360425e-05, "loss": 0.9604, "step": 1460 }, { "epoch": 0.36589030803906836, "grad_norm": 0.359375, "learning_rate": 1.409407180629001e-05, "loss": 0.9147, "step": 1461 }, { "epoch": 0.36614074630603555, "grad_norm": 0.404296875, "learning_rate": 1.4088505427219594e-05, "loss": 1.1839, "step": 1462 }, { "epoch": 0.36639118457300274, "grad_norm": 0.357421875, "learning_rate": 1.408293904814918e-05, "loss": 0.8891, "step": 1463 }, { "epoch": 0.3666416228399699, "grad_norm": 0.3515625, "learning_rate": 1.4077372669078766e-05, "loss": 0.9754, "step": 1464 }, { "epoch": 0.3668920611069371, "grad_norm": 0.341796875, "learning_rate": 1.407180629000835e-05, "loss": 0.9486, "step": 1465 }, { "epoch": 0.3671424993739043, "grad_norm": 0.353515625, "learning_rate": 1.4066239910937935e-05, "loss": 0.85, "step": 1466 }, { "epoch": 0.3673929376408715, "grad_norm": 0.34765625, "learning_rate": 1.406067353186752e-05, "loss": 0.8847, "step": 1467 }, { "epoch": 0.36764337590783874, "grad_norm": 0.357421875, "learning_rate": 1.4055107152797108e-05, "loss": 0.9292, "step": 1468 }, { "epoch": 0.36789381417480593, "grad_norm": 0.3828125, "learning_rate": 1.404954077372669e-05, "loss": 0.8699, "step": 1469 }, { "epoch": 0.3681442524417731, "grad_norm": 0.34765625, "learning_rate": 1.4043974394656278e-05, "loss": 1.0921, "step": 1470 }, { "epoch": 0.3683946907087403, "grad_norm": 0.3828125, "learning_rate": 1.4038408015585863e-05, "loss": 1.0214, "step": 1471 }, { "epoch": 0.3686451289757075, "grad_norm": 0.318359375, "learning_rate": 1.4032841636515449e-05, "loss": 0.9152, "step": 1472 }, { "epoch": 0.3688955672426747, "grad_norm": 0.35546875, "learning_rate": 1.4027275257445033e-05, "loss": 0.9608, "step": 1473 }, { "epoch": 0.3691460055096419, "grad_norm": 0.34765625, "learning_rate": 1.4021708878374618e-05, "loss": 0.9206, "step": 1474 }, { "epoch": 0.3693964437766091, "grad_norm": 0.390625, "learning_rate": 1.4016142499304204e-05, "loss": 0.8766, "step": 1475 }, { "epoch": 0.36964688204357626, "grad_norm": 0.36328125, "learning_rate": 1.401057612023379e-05, "loss": 0.9808, "step": 1476 }, { "epoch": 0.36989732031054345, "grad_norm": 0.359375, "learning_rate": 1.4005009741163373e-05, "loss": 0.9843, "step": 1477 }, { "epoch": 0.37014775857751064, "grad_norm": 0.341796875, "learning_rate": 1.3999443362092959e-05, "loss": 0.9881, "step": 1478 }, { "epoch": 0.37039819684447783, "grad_norm": 0.3984375, "learning_rate": 1.3993876983022545e-05, "loss": 0.7726, "step": 1479 }, { "epoch": 0.370648635111445, "grad_norm": 0.330078125, "learning_rate": 1.398831060395213e-05, "loss": 0.9496, "step": 1480 }, { "epoch": 0.3708990733784122, "grad_norm": 0.375, "learning_rate": 1.3982744224881714e-05, "loss": 0.9383, "step": 1481 }, { "epoch": 0.3711495116453794, "grad_norm": 0.3671875, "learning_rate": 1.39771778458113e-05, "loss": 0.8327, "step": 1482 }, { "epoch": 0.3713999499123466, "grad_norm": 0.318359375, "learning_rate": 1.3971611466740887e-05, "loss": 1.069, "step": 1483 }, { "epoch": 0.3716503881793138, "grad_norm": 0.3359375, "learning_rate": 1.3966045087670473e-05, "loss": 0.9773, "step": 1484 }, { "epoch": 0.371900826446281, "grad_norm": 0.369140625, "learning_rate": 1.3960478708600057e-05, "loss": 1.0085, "step": 1485 }, { "epoch": 0.37215126471324816, "grad_norm": 0.37890625, "learning_rate": 1.3954912329529642e-05, "loss": 1.1499, "step": 1486 }, { "epoch": 0.37240170298021535, "grad_norm": 0.337890625, "learning_rate": 1.3949345950459228e-05, "loss": 1.0317, "step": 1487 }, { "epoch": 0.37265214124718254, "grad_norm": 0.349609375, "learning_rate": 1.3943779571388813e-05, "loss": 0.9861, "step": 1488 }, { "epoch": 0.37290257951414973, "grad_norm": 0.365234375, "learning_rate": 1.3938213192318397e-05, "loss": 0.786, "step": 1489 }, { "epoch": 0.373153017781117, "grad_norm": 0.3671875, "learning_rate": 1.3932646813247983e-05, "loss": 1.0139, "step": 1490 }, { "epoch": 0.37340345604808417, "grad_norm": 0.33203125, "learning_rate": 1.3927080434177569e-05, "loss": 0.8364, "step": 1491 }, { "epoch": 0.37365389431505136, "grad_norm": 0.37890625, "learning_rate": 1.3921514055107154e-05, "loss": 0.7859, "step": 1492 }, { "epoch": 0.37390433258201855, "grad_norm": 0.35546875, "learning_rate": 1.3915947676036738e-05, "loss": 0.8821, "step": 1493 }, { "epoch": 0.37415477084898574, "grad_norm": 0.353515625, "learning_rate": 1.3910381296966324e-05, "loss": 1.008, "step": 1494 }, { "epoch": 0.37440520911595293, "grad_norm": 0.369140625, "learning_rate": 1.3904814917895911e-05, "loss": 0.8991, "step": 1495 }, { "epoch": 0.3746556473829201, "grad_norm": 0.322265625, "learning_rate": 1.3899248538825497e-05, "loss": 0.8483, "step": 1496 }, { "epoch": 0.3749060856498873, "grad_norm": 0.31640625, "learning_rate": 1.389368215975508e-05, "loss": 0.8579, "step": 1497 }, { "epoch": 0.3751565239168545, "grad_norm": 0.34765625, "learning_rate": 1.3888115780684666e-05, "loss": 1.0372, "step": 1498 }, { "epoch": 0.3754069621838217, "grad_norm": 0.3515625, "learning_rate": 1.3882549401614252e-05, "loss": 1.0247, "step": 1499 }, { "epoch": 0.3756574004507889, "grad_norm": 0.328125, "learning_rate": 1.3876983022543837e-05, "loss": 1.0381, "step": 1500 }, { "epoch": 0.37590783871775607, "grad_norm": 0.3671875, "learning_rate": 1.3871416643473421e-05, "loss": 0.8035, "step": 1501 }, { "epoch": 0.37615827698472326, "grad_norm": 0.33984375, "learning_rate": 1.3865850264403007e-05, "loss": 1.1357, "step": 1502 }, { "epoch": 0.37640871525169045, "grad_norm": 0.353515625, "learning_rate": 1.3860283885332593e-05, "loss": 0.9804, "step": 1503 }, { "epoch": 0.37665915351865764, "grad_norm": 0.341796875, "learning_rate": 1.3854717506262178e-05, "loss": 0.9376, "step": 1504 }, { "epoch": 0.37690959178562483, "grad_norm": 0.330078125, "learning_rate": 1.3849151127191762e-05, "loss": 0.6914, "step": 1505 }, { "epoch": 0.377160030052592, "grad_norm": 0.3515625, "learning_rate": 1.3843584748121348e-05, "loss": 1.0285, "step": 1506 }, { "epoch": 0.3774104683195592, "grad_norm": 0.357421875, "learning_rate": 1.3838018369050933e-05, "loss": 1.0791, "step": 1507 }, { "epoch": 0.3776609065865264, "grad_norm": 0.345703125, "learning_rate": 1.383245198998052e-05, "loss": 0.8701, "step": 1508 }, { "epoch": 0.3779113448534936, "grad_norm": 0.33984375, "learning_rate": 1.3826885610910103e-05, "loss": 0.9327, "step": 1509 }, { "epoch": 0.3781617831204608, "grad_norm": 0.359375, "learning_rate": 1.382131923183969e-05, "loss": 1.0986, "step": 1510 }, { "epoch": 0.37841222138742797, "grad_norm": 0.38671875, "learning_rate": 1.3815752852769276e-05, "loss": 1.0526, "step": 1511 }, { "epoch": 0.3786626596543952, "grad_norm": 0.400390625, "learning_rate": 1.3810186473698861e-05, "loss": 0.8755, "step": 1512 }, { "epoch": 0.3789130979213624, "grad_norm": 0.35546875, "learning_rate": 1.3804620094628445e-05, "loss": 0.9918, "step": 1513 }, { "epoch": 0.3791635361883296, "grad_norm": 0.341796875, "learning_rate": 1.3799053715558031e-05, "loss": 0.9294, "step": 1514 }, { "epoch": 0.3794139744552968, "grad_norm": 0.365234375, "learning_rate": 1.3793487336487616e-05, "loss": 1.0604, "step": 1515 }, { "epoch": 0.379664412722264, "grad_norm": 0.3359375, "learning_rate": 1.37879209574172e-05, "loss": 1.0222, "step": 1516 }, { "epoch": 0.37991485098923117, "grad_norm": 0.376953125, "learning_rate": 1.3782354578346786e-05, "loss": 1.0329, "step": 1517 }, { "epoch": 0.38016528925619836, "grad_norm": 0.365234375, "learning_rate": 1.3776788199276372e-05, "loss": 0.8635, "step": 1518 }, { "epoch": 0.38041572752316555, "grad_norm": 0.345703125, "learning_rate": 1.3771221820205957e-05, "loss": 0.8984, "step": 1519 }, { "epoch": 0.38066616579013274, "grad_norm": 0.345703125, "learning_rate": 1.3765655441135541e-05, "loss": 0.9738, "step": 1520 }, { "epoch": 0.38091660405709993, "grad_norm": 0.349609375, "learning_rate": 1.3760089062065127e-05, "loss": 0.9591, "step": 1521 }, { "epoch": 0.3811670423240671, "grad_norm": 0.39453125, "learning_rate": 1.3754522682994712e-05, "loss": 0.9348, "step": 1522 }, { "epoch": 0.3814174805910343, "grad_norm": 0.408203125, "learning_rate": 1.37489563039243e-05, "loss": 1.0804, "step": 1523 }, { "epoch": 0.3816679188580015, "grad_norm": 0.31640625, "learning_rate": 1.3743389924853882e-05, "loss": 0.9219, "step": 1524 }, { "epoch": 0.3819183571249687, "grad_norm": 0.345703125, "learning_rate": 1.373782354578347e-05, "loss": 0.8247, "step": 1525 }, { "epoch": 0.3821687953919359, "grad_norm": 0.359375, "learning_rate": 1.3732257166713055e-05, "loss": 0.8512, "step": 1526 }, { "epoch": 0.38241923365890307, "grad_norm": 0.3359375, "learning_rate": 1.372669078764264e-05, "loss": 0.9198, "step": 1527 }, { "epoch": 0.38266967192587026, "grad_norm": 0.369140625, "learning_rate": 1.3721124408572224e-05, "loss": 0.872, "step": 1528 }, { "epoch": 0.38292011019283745, "grad_norm": 0.412109375, "learning_rate": 1.371555802950181e-05, "loss": 0.9299, "step": 1529 }, { "epoch": 0.38317054845980464, "grad_norm": 0.4296875, "learning_rate": 1.3709991650431396e-05, "loss": 0.8595, "step": 1530 }, { "epoch": 0.38342098672677183, "grad_norm": 0.3515625, "learning_rate": 1.3704425271360981e-05, "loss": 0.9002, "step": 1531 }, { "epoch": 0.383671424993739, "grad_norm": 0.369140625, "learning_rate": 1.3698858892290565e-05, "loss": 0.8363, "step": 1532 }, { "epoch": 0.3839218632607062, "grad_norm": 0.357421875, "learning_rate": 1.369329251322015e-05, "loss": 0.8385, "step": 1533 }, { "epoch": 0.38417230152767345, "grad_norm": 0.37890625, "learning_rate": 1.3687726134149736e-05, "loss": 0.8611, "step": 1534 }, { "epoch": 0.38442273979464064, "grad_norm": 0.365234375, "learning_rate": 1.3682159755079324e-05, "loss": 0.9108, "step": 1535 }, { "epoch": 0.38467317806160783, "grad_norm": 0.35546875, "learning_rate": 1.3676593376008906e-05, "loss": 0.8373, "step": 1536 }, { "epoch": 0.384923616328575, "grad_norm": 0.3671875, "learning_rate": 1.3671026996938493e-05, "loss": 0.8148, "step": 1537 }, { "epoch": 0.3851740545955422, "grad_norm": 0.380859375, "learning_rate": 1.3665460617868079e-05, "loss": 0.7485, "step": 1538 }, { "epoch": 0.3854244928625094, "grad_norm": 0.388671875, "learning_rate": 1.3659894238797664e-05, "loss": 0.9746, "step": 1539 }, { "epoch": 0.3856749311294766, "grad_norm": 0.375, "learning_rate": 1.3654327859727248e-05, "loss": 1.0289, "step": 1540 }, { "epoch": 0.3859253693964438, "grad_norm": 0.3828125, "learning_rate": 1.3648761480656834e-05, "loss": 1.0244, "step": 1541 }, { "epoch": 0.386175807663411, "grad_norm": 0.359375, "learning_rate": 1.364319510158642e-05, "loss": 0.8271, "step": 1542 }, { "epoch": 0.38642624593037816, "grad_norm": 0.291015625, "learning_rate": 1.3637628722516005e-05, "loss": 0.7456, "step": 1543 }, { "epoch": 0.38667668419734536, "grad_norm": 0.349609375, "learning_rate": 1.3632062343445589e-05, "loss": 0.8734, "step": 1544 }, { "epoch": 0.38692712246431255, "grad_norm": 0.3515625, "learning_rate": 1.3626495964375175e-05, "loss": 0.941, "step": 1545 }, { "epoch": 0.38717756073127974, "grad_norm": 0.359375, "learning_rate": 1.362092958530476e-05, "loss": 0.9715, "step": 1546 }, { "epoch": 0.3874279989982469, "grad_norm": 0.349609375, "learning_rate": 1.3615363206234346e-05, "loss": 1.0215, "step": 1547 }, { "epoch": 0.3876784372652141, "grad_norm": 0.375, "learning_rate": 1.360979682716393e-05, "loss": 0.9602, "step": 1548 }, { "epoch": 0.3879288755321813, "grad_norm": 0.36328125, "learning_rate": 1.3604230448093515e-05, "loss": 0.9639, "step": 1549 }, { "epoch": 0.3881793137991485, "grad_norm": 0.39453125, "learning_rate": 1.3598664069023103e-05, "loss": 0.8174, "step": 1550 }, { "epoch": 0.3884297520661157, "grad_norm": 0.375, "learning_rate": 1.3593097689952688e-05, "loss": 0.6792, "step": 1551 }, { "epoch": 0.3886801903330829, "grad_norm": 0.369140625, "learning_rate": 1.3587531310882272e-05, "loss": 1.0633, "step": 1552 }, { "epoch": 0.38893062860005007, "grad_norm": 0.38671875, "learning_rate": 1.3581964931811858e-05, "loss": 0.9176, "step": 1553 }, { "epoch": 0.38918106686701726, "grad_norm": 0.34375, "learning_rate": 1.3576398552741443e-05, "loss": 1.0067, "step": 1554 }, { "epoch": 0.38943150513398445, "grad_norm": 0.369140625, "learning_rate": 1.3570832173671029e-05, "loss": 0.8785, "step": 1555 }, { "epoch": 0.3896819434009517, "grad_norm": 0.341796875, "learning_rate": 1.3565265794600613e-05, "loss": 0.8364, "step": 1556 }, { "epoch": 0.3899323816679189, "grad_norm": 0.34765625, "learning_rate": 1.3559699415530199e-05, "loss": 0.849, "step": 1557 }, { "epoch": 0.39018281993488607, "grad_norm": 0.384765625, "learning_rate": 1.3554133036459784e-05, "loss": 0.7839, "step": 1558 }, { "epoch": 0.39043325820185326, "grad_norm": 0.4140625, "learning_rate": 1.354856665738937e-05, "loss": 0.8875, "step": 1559 }, { "epoch": 0.39068369646882045, "grad_norm": 0.369140625, "learning_rate": 1.3543000278318954e-05, "loss": 0.7888, "step": 1560 }, { "epoch": 0.39093413473578764, "grad_norm": 0.435546875, "learning_rate": 1.353743389924854e-05, "loss": 0.968, "step": 1561 }, { "epoch": 0.39118457300275483, "grad_norm": 0.369140625, "learning_rate": 1.3531867520178125e-05, "loss": 0.9195, "step": 1562 }, { "epoch": 0.391435011269722, "grad_norm": 0.34765625, "learning_rate": 1.3526301141107712e-05, "loss": 0.9909, "step": 1563 }, { "epoch": 0.3916854495366892, "grad_norm": 0.384765625, "learning_rate": 1.3520734762037295e-05, "loss": 0.8859, "step": 1564 }, { "epoch": 0.3919358878036564, "grad_norm": 0.361328125, "learning_rate": 1.3515168382966882e-05, "loss": 1.0221, "step": 1565 }, { "epoch": 0.3921863260706236, "grad_norm": 0.37890625, "learning_rate": 1.3509602003896467e-05, "loss": 0.91, "step": 1566 }, { "epoch": 0.3924367643375908, "grad_norm": 0.35546875, "learning_rate": 1.3504035624826051e-05, "loss": 0.8977, "step": 1567 }, { "epoch": 0.39268720260455797, "grad_norm": 0.34765625, "learning_rate": 1.3498469245755637e-05, "loss": 1.1175, "step": 1568 }, { "epoch": 0.39293764087152516, "grad_norm": 0.380859375, "learning_rate": 1.3492902866685223e-05, "loss": 0.8611, "step": 1569 }, { "epoch": 0.39318807913849235, "grad_norm": 0.365234375, "learning_rate": 1.3487336487614808e-05, "loss": 0.9256, "step": 1570 }, { "epoch": 0.39343851740545954, "grad_norm": 0.38671875, "learning_rate": 1.3481770108544392e-05, "loss": 0.99, "step": 1571 }, { "epoch": 0.39368895567242673, "grad_norm": 0.328125, "learning_rate": 1.3476203729473978e-05, "loss": 0.8671, "step": 1572 }, { "epoch": 0.3939393939393939, "grad_norm": 0.40625, "learning_rate": 1.3470637350403563e-05, "loss": 1.0646, "step": 1573 }, { "epoch": 0.3941898322063611, "grad_norm": 0.3671875, "learning_rate": 1.3465070971333149e-05, "loss": 1.0633, "step": 1574 }, { "epoch": 0.3944402704733283, "grad_norm": 0.349609375, "learning_rate": 1.3459504592262733e-05, "loss": 0.8441, "step": 1575 }, { "epoch": 0.3946907087402955, "grad_norm": 0.42578125, "learning_rate": 1.3453938213192318e-05, "loss": 0.8095, "step": 1576 }, { "epoch": 0.3949411470072627, "grad_norm": 0.361328125, "learning_rate": 1.3448371834121906e-05, "loss": 1.1217, "step": 1577 }, { "epoch": 0.39519158527422993, "grad_norm": 0.365234375, "learning_rate": 1.3442805455051491e-05, "loss": 0.8789, "step": 1578 }, { "epoch": 0.3954420235411971, "grad_norm": 0.37109375, "learning_rate": 1.3437239075981074e-05, "loss": 0.9388, "step": 1579 }, { "epoch": 0.3956924618081643, "grad_norm": 0.43359375, "learning_rate": 1.3431672696910661e-05, "loss": 1.0406, "step": 1580 }, { "epoch": 0.3959429000751315, "grad_norm": 0.365234375, "learning_rate": 1.3426106317840247e-05, "loss": 0.9807, "step": 1581 }, { "epoch": 0.3961933383420987, "grad_norm": 0.37109375, "learning_rate": 1.3420539938769832e-05, "loss": 0.8932, "step": 1582 }, { "epoch": 0.3964437766090659, "grad_norm": 0.36328125, "learning_rate": 1.3414973559699416e-05, "loss": 0.9395, "step": 1583 }, { "epoch": 0.39669421487603307, "grad_norm": 0.341796875, "learning_rate": 1.3409407180629002e-05, "loss": 0.9424, "step": 1584 }, { "epoch": 0.39694465314300026, "grad_norm": 0.37109375, "learning_rate": 1.3403840801558587e-05, "loss": 1.0844, "step": 1585 }, { "epoch": 0.39719509140996745, "grad_norm": 0.396484375, "learning_rate": 1.3398274422488173e-05, "loss": 0.8245, "step": 1586 }, { "epoch": 0.39744552967693464, "grad_norm": 0.392578125, "learning_rate": 1.3392708043417757e-05, "loss": 1.1073, "step": 1587 }, { "epoch": 0.39769596794390183, "grad_norm": 0.36328125, "learning_rate": 1.3387141664347342e-05, "loss": 0.9306, "step": 1588 }, { "epoch": 0.397946406210869, "grad_norm": 0.357421875, "learning_rate": 1.3381575285276928e-05, "loss": 0.9836, "step": 1589 }, { "epoch": 0.3981968444778362, "grad_norm": 0.400390625, "learning_rate": 1.3376008906206515e-05, "loss": 0.862, "step": 1590 }, { "epoch": 0.3984472827448034, "grad_norm": 0.34765625, "learning_rate": 1.3370442527136098e-05, "loss": 0.9677, "step": 1591 }, { "epoch": 0.3986977210117706, "grad_norm": 0.341796875, "learning_rate": 1.3364876148065685e-05, "loss": 0.9866, "step": 1592 }, { "epoch": 0.3989481592787378, "grad_norm": 0.37890625, "learning_rate": 1.335930976899527e-05, "loss": 0.9577, "step": 1593 }, { "epoch": 0.39919859754570497, "grad_norm": 0.345703125, "learning_rate": 1.3353743389924856e-05, "loss": 0.9243, "step": 1594 }, { "epoch": 0.39944903581267216, "grad_norm": 0.40234375, "learning_rate": 1.334817701085444e-05, "loss": 0.9614, "step": 1595 }, { "epoch": 0.39969947407963935, "grad_norm": 0.3671875, "learning_rate": 1.3342610631784026e-05, "loss": 0.9086, "step": 1596 }, { "epoch": 0.39994991234660654, "grad_norm": 0.328125, "learning_rate": 1.3337044252713611e-05, "loss": 0.9202, "step": 1597 }, { "epoch": 0.40020035061357373, "grad_norm": 0.353515625, "learning_rate": 1.3331477873643197e-05, "loss": 1.0877, "step": 1598 }, { "epoch": 0.4004507888805409, "grad_norm": 0.35546875, "learning_rate": 1.332591149457278e-05, "loss": 0.9676, "step": 1599 }, { "epoch": 0.40070122714750817, "grad_norm": 0.353515625, "learning_rate": 1.3320345115502366e-05, "loss": 1.0089, "step": 1600 }, { "epoch": 0.40095166541447536, "grad_norm": 0.421875, "learning_rate": 1.3314778736431952e-05, "loss": 0.8847, "step": 1601 }, { "epoch": 0.40120210368144255, "grad_norm": 0.3671875, "learning_rate": 1.3309212357361538e-05, "loss": 0.8251, "step": 1602 }, { "epoch": 0.40145254194840974, "grad_norm": 0.3359375, "learning_rate": 1.3303645978291122e-05, "loss": 0.9184, "step": 1603 }, { "epoch": 0.4017029802153769, "grad_norm": 0.404296875, "learning_rate": 1.3298079599220707e-05, "loss": 1.0213, "step": 1604 }, { "epoch": 0.4019534184823441, "grad_norm": 0.388671875, "learning_rate": 1.3292513220150294e-05, "loss": 0.8812, "step": 1605 }, { "epoch": 0.4022038567493113, "grad_norm": 0.404296875, "learning_rate": 1.328694684107988e-05, "loss": 0.9066, "step": 1606 }, { "epoch": 0.4024542950162785, "grad_norm": 0.359375, "learning_rate": 1.3281380462009464e-05, "loss": 1.0261, "step": 1607 }, { "epoch": 0.4027047332832457, "grad_norm": 0.337890625, "learning_rate": 1.327581408293905e-05, "loss": 0.8827, "step": 1608 }, { "epoch": 0.4029551715502129, "grad_norm": 0.361328125, "learning_rate": 1.3270247703868635e-05, "loss": 0.9631, "step": 1609 }, { "epoch": 0.40320560981718007, "grad_norm": 0.416015625, "learning_rate": 1.326468132479822e-05, "loss": 0.8449, "step": 1610 }, { "epoch": 0.40345604808414726, "grad_norm": 0.41015625, "learning_rate": 1.3259114945727805e-05, "loss": 0.9061, "step": 1611 }, { "epoch": 0.40370648635111445, "grad_norm": 0.37109375, "learning_rate": 1.325354856665739e-05, "loss": 0.9228, "step": 1612 }, { "epoch": 0.40395692461808164, "grad_norm": 0.359375, "learning_rate": 1.3247982187586976e-05, "loss": 0.8273, "step": 1613 }, { "epoch": 0.4042073628850488, "grad_norm": 0.3359375, "learning_rate": 1.3242415808516562e-05, "loss": 0.869, "step": 1614 }, { "epoch": 0.404457801152016, "grad_norm": 0.40625, "learning_rate": 1.3236849429446145e-05, "loss": 0.8383, "step": 1615 }, { "epoch": 0.4047082394189832, "grad_norm": 0.33984375, "learning_rate": 1.3231283050375731e-05, "loss": 1.1102, "step": 1616 }, { "epoch": 0.4049586776859504, "grad_norm": 0.35546875, "learning_rate": 1.3225716671305318e-05, "loss": 1.1303, "step": 1617 }, { "epoch": 0.4052091159529176, "grad_norm": 0.349609375, "learning_rate": 1.32201502922349e-05, "loss": 0.9325, "step": 1618 }, { "epoch": 0.4054595542198848, "grad_norm": 0.384765625, "learning_rate": 1.3214583913164486e-05, "loss": 0.7536, "step": 1619 }, { "epoch": 0.40570999248685197, "grad_norm": 0.361328125, "learning_rate": 1.3209017534094074e-05, "loss": 0.971, "step": 1620 }, { "epoch": 0.40596043075381916, "grad_norm": 0.421875, "learning_rate": 1.3203451155023659e-05, "loss": 0.9909, "step": 1621 }, { "epoch": 0.4062108690207864, "grad_norm": 0.43359375, "learning_rate": 1.3197884775953243e-05, "loss": 0.9081, "step": 1622 }, { "epoch": 0.4064613072877536, "grad_norm": 0.404296875, "learning_rate": 1.3192318396882829e-05, "loss": 1.1013, "step": 1623 }, { "epoch": 0.4067117455547208, "grad_norm": 0.359375, "learning_rate": 1.3186752017812414e-05, "loss": 0.949, "step": 1624 }, { "epoch": 0.406962183821688, "grad_norm": 0.328125, "learning_rate": 1.3181185638742e-05, "loss": 0.8654, "step": 1625 }, { "epoch": 0.40721262208865516, "grad_norm": 0.39453125, "learning_rate": 1.3175619259671584e-05, "loss": 0.8431, "step": 1626 }, { "epoch": 0.40746306035562235, "grad_norm": 0.359375, "learning_rate": 1.317005288060117e-05, "loss": 0.8471, "step": 1627 }, { "epoch": 0.40771349862258954, "grad_norm": 0.33984375, "learning_rate": 1.3164486501530755e-05, "loss": 0.8829, "step": 1628 }, { "epoch": 0.40796393688955673, "grad_norm": 0.337890625, "learning_rate": 1.315892012246034e-05, "loss": 0.9172, "step": 1629 }, { "epoch": 0.4082143751565239, "grad_norm": 0.3828125, "learning_rate": 1.3153353743389925e-05, "loss": 1.0681, "step": 1630 }, { "epoch": 0.4084648134234911, "grad_norm": 0.33203125, "learning_rate": 1.314778736431951e-05, "loss": 0.9378, "step": 1631 }, { "epoch": 0.4087152516904583, "grad_norm": 0.328125, "learning_rate": 1.3142220985249097e-05, "loss": 0.7972, "step": 1632 }, { "epoch": 0.4089656899574255, "grad_norm": 0.34375, "learning_rate": 1.3136654606178683e-05, "loss": 0.938, "step": 1633 }, { "epoch": 0.4092161282243927, "grad_norm": 0.380859375, "learning_rate": 1.3131088227108267e-05, "loss": 1.1028, "step": 1634 }, { "epoch": 0.4094665664913599, "grad_norm": 0.333984375, "learning_rate": 1.3125521848037853e-05, "loss": 0.8866, "step": 1635 }, { "epoch": 0.40971700475832706, "grad_norm": 0.35546875, "learning_rate": 1.3119955468967438e-05, "loss": 0.8866, "step": 1636 }, { "epoch": 0.40996744302529425, "grad_norm": 0.388671875, "learning_rate": 1.3114389089897024e-05, "loss": 0.9282, "step": 1637 }, { "epoch": 0.41021788129226144, "grad_norm": 0.3671875, "learning_rate": 1.3108822710826608e-05, "loss": 0.7945, "step": 1638 }, { "epoch": 0.41046831955922863, "grad_norm": 0.33984375, "learning_rate": 1.3103256331756193e-05, "loss": 1.0868, "step": 1639 }, { "epoch": 0.4107187578261958, "grad_norm": 0.36328125, "learning_rate": 1.3097689952685779e-05, "loss": 0.8539, "step": 1640 }, { "epoch": 0.410969196093163, "grad_norm": 0.35546875, "learning_rate": 1.3092123573615365e-05, "loss": 0.874, "step": 1641 }, { "epoch": 0.4112196343601302, "grad_norm": 0.36328125, "learning_rate": 1.3086557194544949e-05, "loss": 0.7723, "step": 1642 }, { "epoch": 0.4114700726270974, "grad_norm": 0.330078125, "learning_rate": 1.3080990815474534e-05, "loss": 0.8675, "step": 1643 }, { "epoch": 0.41172051089406464, "grad_norm": 0.373046875, "learning_rate": 1.307542443640412e-05, "loss": 0.801, "step": 1644 }, { "epoch": 0.41197094916103183, "grad_norm": 0.3203125, "learning_rate": 1.3069858057333707e-05, "loss": 0.9685, "step": 1645 }, { "epoch": 0.412221387427999, "grad_norm": 0.3671875, "learning_rate": 1.306429167826329e-05, "loss": 0.9553, "step": 1646 }, { "epoch": 0.4124718256949662, "grad_norm": 0.416015625, "learning_rate": 1.3058725299192877e-05, "loss": 0.7941, "step": 1647 }, { "epoch": 0.4127222639619334, "grad_norm": 0.3515625, "learning_rate": 1.3053158920122462e-05, "loss": 0.8819, "step": 1648 }, { "epoch": 0.4129727022289006, "grad_norm": 0.40234375, "learning_rate": 1.3047592541052048e-05, "loss": 0.8576, "step": 1649 }, { "epoch": 0.4132231404958678, "grad_norm": 0.345703125, "learning_rate": 1.3042026161981632e-05, "loss": 0.9688, "step": 1650 }, { "epoch": 0.41347357876283497, "grad_norm": 0.3125, "learning_rate": 1.3036459782911217e-05, "loss": 0.8883, "step": 1651 }, { "epoch": 0.41372401702980216, "grad_norm": 0.361328125, "learning_rate": 1.3030893403840803e-05, "loss": 0.9476, "step": 1652 }, { "epoch": 0.41397445529676935, "grad_norm": 0.38671875, "learning_rate": 1.3025327024770389e-05, "loss": 0.8556, "step": 1653 }, { "epoch": 0.41422489356373654, "grad_norm": 0.376953125, "learning_rate": 1.3019760645699972e-05, "loss": 1.2591, "step": 1654 }, { "epoch": 0.41447533183070373, "grad_norm": 0.357421875, "learning_rate": 1.3014194266629558e-05, "loss": 0.9733, "step": 1655 }, { "epoch": 0.4147257700976709, "grad_norm": 0.333984375, "learning_rate": 1.3008627887559144e-05, "loss": 0.9181, "step": 1656 }, { "epoch": 0.4149762083646381, "grad_norm": 0.353515625, "learning_rate": 1.3003061508488731e-05, "loss": 0.855, "step": 1657 }, { "epoch": 0.4152266466316053, "grad_norm": 0.337890625, "learning_rate": 1.2997495129418313e-05, "loss": 0.8874, "step": 1658 }, { "epoch": 0.4154770848985725, "grad_norm": 0.337890625, "learning_rate": 1.2991928750347899e-05, "loss": 0.8974, "step": 1659 }, { "epoch": 0.4157275231655397, "grad_norm": 0.361328125, "learning_rate": 1.2986362371277486e-05, "loss": 1.0082, "step": 1660 }, { "epoch": 0.41597796143250687, "grad_norm": 0.36328125, "learning_rate": 1.2980795992207072e-05, "loss": 0.9579, "step": 1661 }, { "epoch": 0.41622839969947406, "grad_norm": 0.3515625, "learning_rate": 1.2975229613136656e-05, "loss": 1.0697, "step": 1662 }, { "epoch": 0.41647883796644125, "grad_norm": 0.328125, "learning_rate": 1.2969663234066241e-05, "loss": 0.9445, "step": 1663 }, { "epoch": 0.41672927623340844, "grad_norm": 0.388671875, "learning_rate": 1.2964096854995827e-05, "loss": 0.9453, "step": 1664 }, { "epoch": 0.41697971450037563, "grad_norm": 0.369140625, "learning_rate": 1.2958530475925413e-05, "loss": 0.8963, "step": 1665 }, { "epoch": 0.4172301527673429, "grad_norm": 0.359375, "learning_rate": 1.2952964096854996e-05, "loss": 0.9017, "step": 1666 }, { "epoch": 0.41748059103431007, "grad_norm": 0.408203125, "learning_rate": 1.2947397717784582e-05, "loss": 0.7995, "step": 1667 }, { "epoch": 0.41773102930127726, "grad_norm": 0.3515625, "learning_rate": 1.2941831338714168e-05, "loss": 0.8872, "step": 1668 }, { "epoch": 0.41798146756824445, "grad_norm": 0.375, "learning_rate": 1.2936264959643752e-05, "loss": 0.9063, "step": 1669 }, { "epoch": 0.41823190583521164, "grad_norm": 0.453125, "learning_rate": 1.2930698580573337e-05, "loss": 1.0606, "step": 1670 }, { "epoch": 0.4184823441021788, "grad_norm": 0.3671875, "learning_rate": 1.2925132201502923e-05, "loss": 0.9871, "step": 1671 }, { "epoch": 0.418732782369146, "grad_norm": 0.416015625, "learning_rate": 1.291956582243251e-05, "loss": 0.9565, "step": 1672 }, { "epoch": 0.4189832206361132, "grad_norm": 0.330078125, "learning_rate": 1.2913999443362092e-05, "loss": 0.8695, "step": 1673 }, { "epoch": 0.4192336589030804, "grad_norm": 0.380859375, "learning_rate": 1.290843306429168e-05, "loss": 0.9244, "step": 1674 }, { "epoch": 0.4194840971700476, "grad_norm": 0.337890625, "learning_rate": 1.2902866685221265e-05, "loss": 0.8578, "step": 1675 }, { "epoch": 0.4197345354370148, "grad_norm": 0.333984375, "learning_rate": 1.2897300306150851e-05, "loss": 0.81, "step": 1676 }, { "epoch": 0.41998497370398197, "grad_norm": 0.388671875, "learning_rate": 1.2891733927080435e-05, "loss": 1.0604, "step": 1677 }, { "epoch": 0.42023541197094916, "grad_norm": 0.392578125, "learning_rate": 1.288616754801002e-05, "loss": 0.9742, "step": 1678 }, { "epoch": 0.42048585023791635, "grad_norm": 0.369140625, "learning_rate": 1.2880601168939606e-05, "loss": 0.8639, "step": 1679 }, { "epoch": 0.42073628850488354, "grad_norm": 0.388671875, "learning_rate": 1.2875034789869192e-05, "loss": 1.0008, "step": 1680 }, { "epoch": 0.42098672677185073, "grad_norm": 0.3671875, "learning_rate": 1.2869468410798776e-05, "loss": 0.8367, "step": 1681 }, { "epoch": 0.4212371650388179, "grad_norm": 0.384765625, "learning_rate": 1.2863902031728361e-05, "loss": 0.8531, "step": 1682 }, { "epoch": 0.4214876033057851, "grad_norm": 0.345703125, "learning_rate": 1.2858335652657947e-05, "loss": 1.0696, "step": 1683 }, { "epoch": 0.4217380415727523, "grad_norm": 0.41015625, "learning_rate": 1.2852769273587532e-05, "loss": 0.9135, "step": 1684 }, { "epoch": 0.4219884798397195, "grad_norm": 0.341796875, "learning_rate": 1.2847202894517116e-05, "loss": 0.8939, "step": 1685 }, { "epoch": 0.4222389181066867, "grad_norm": 0.36328125, "learning_rate": 1.2841636515446702e-05, "loss": 0.8644, "step": 1686 }, { "epoch": 0.42248935637365387, "grad_norm": 0.359375, "learning_rate": 1.283607013637629e-05, "loss": 0.9236, "step": 1687 }, { "epoch": 0.4227397946406211, "grad_norm": 0.3359375, "learning_rate": 1.2830503757305875e-05, "loss": 0.9042, "step": 1688 }, { "epoch": 0.4229902329075883, "grad_norm": 0.3984375, "learning_rate": 1.2824937378235459e-05, "loss": 0.9404, "step": 1689 }, { "epoch": 0.4232406711745555, "grad_norm": 0.365234375, "learning_rate": 1.2819370999165044e-05, "loss": 0.832, "step": 1690 }, { "epoch": 0.4234911094415227, "grad_norm": 0.375, "learning_rate": 1.281380462009463e-05, "loss": 1.0149, "step": 1691 }, { "epoch": 0.4237415477084899, "grad_norm": 0.365234375, "learning_rate": 1.2808238241024216e-05, "loss": 0.8712, "step": 1692 }, { "epoch": 0.42399198597545706, "grad_norm": 0.32421875, "learning_rate": 1.28026718619538e-05, "loss": 1.0223, "step": 1693 }, { "epoch": 0.42424242424242425, "grad_norm": 0.36328125, "learning_rate": 1.2797105482883385e-05, "loss": 0.9873, "step": 1694 }, { "epoch": 0.42449286250939144, "grad_norm": 0.396484375, "learning_rate": 1.279153910381297e-05, "loss": 0.9392, "step": 1695 }, { "epoch": 0.42474330077635863, "grad_norm": 0.37890625, "learning_rate": 1.2785972724742556e-05, "loss": 0.9485, "step": 1696 }, { "epoch": 0.4249937390433258, "grad_norm": 0.328125, "learning_rate": 1.278040634567214e-05, "loss": 1.0095, "step": 1697 }, { "epoch": 0.425244177310293, "grad_norm": 0.4140625, "learning_rate": 1.2774839966601726e-05, "loss": 0.8854, "step": 1698 }, { "epoch": 0.4254946155772602, "grad_norm": 0.40625, "learning_rate": 1.2769273587531311e-05, "loss": 0.8179, "step": 1699 }, { "epoch": 0.4257450538442274, "grad_norm": 0.330078125, "learning_rate": 1.2763707208460899e-05, "loss": 0.9714, "step": 1700 }, { "epoch": 0.4259954921111946, "grad_norm": 0.39453125, "learning_rate": 1.2758140829390481e-05, "loss": 1.015, "step": 1701 }, { "epoch": 0.4262459303781618, "grad_norm": 0.35546875, "learning_rate": 1.2752574450320068e-05, "loss": 0.9344, "step": 1702 }, { "epoch": 0.42649636864512896, "grad_norm": 0.34765625, "learning_rate": 1.2747008071249654e-05, "loss": 0.8171, "step": 1703 }, { "epoch": 0.42674680691209615, "grad_norm": 0.357421875, "learning_rate": 1.274144169217924e-05, "loss": 0.8666, "step": 1704 }, { "epoch": 0.42699724517906334, "grad_norm": 0.330078125, "learning_rate": 1.2735875313108823e-05, "loss": 0.8449, "step": 1705 }, { "epoch": 0.42724768344603054, "grad_norm": 0.34375, "learning_rate": 1.2730308934038409e-05, "loss": 1.0144, "step": 1706 }, { "epoch": 0.4274981217129977, "grad_norm": 0.333984375, "learning_rate": 1.2724742554967995e-05, "loss": 0.9221, "step": 1707 }, { "epoch": 0.4277485599799649, "grad_norm": 0.419921875, "learning_rate": 1.271917617589758e-05, "loss": 0.8898, "step": 1708 }, { "epoch": 0.4279989982469321, "grad_norm": 0.3671875, "learning_rate": 1.2713609796827164e-05, "loss": 0.9233, "step": 1709 }, { "epoch": 0.42824943651389935, "grad_norm": 0.34765625, "learning_rate": 1.270804341775675e-05, "loss": 0.8345, "step": 1710 }, { "epoch": 0.42849987478086654, "grad_norm": 0.341796875, "learning_rate": 1.2702477038686335e-05, "loss": 0.9783, "step": 1711 }, { "epoch": 0.42875031304783373, "grad_norm": 0.353515625, "learning_rate": 1.2696910659615923e-05, "loss": 0.9241, "step": 1712 }, { "epoch": 0.4290007513148009, "grad_norm": 0.369140625, "learning_rate": 1.2691344280545505e-05, "loss": 0.9573, "step": 1713 }, { "epoch": 0.4292511895817681, "grad_norm": 0.376953125, "learning_rate": 1.2685777901475092e-05, "loss": 0.9952, "step": 1714 }, { "epoch": 0.4295016278487353, "grad_norm": 0.41796875, "learning_rate": 1.2680211522404678e-05, "loss": 0.9439, "step": 1715 }, { "epoch": 0.4297520661157025, "grad_norm": 0.361328125, "learning_rate": 1.2674645143334262e-05, "loss": 1.0328, "step": 1716 }, { "epoch": 0.4300025043826697, "grad_norm": 0.412109375, "learning_rate": 1.2669078764263847e-05, "loss": 0.9384, "step": 1717 }, { "epoch": 0.43025294264963687, "grad_norm": 0.37890625, "learning_rate": 1.2663512385193433e-05, "loss": 0.9914, "step": 1718 }, { "epoch": 0.43050338091660406, "grad_norm": 0.421875, "learning_rate": 1.2657946006123019e-05, "loss": 1.0405, "step": 1719 }, { "epoch": 0.43075381918357125, "grad_norm": 0.32421875, "learning_rate": 1.2652379627052603e-05, "loss": 0.8323, "step": 1720 }, { "epoch": 0.43100425745053844, "grad_norm": 0.39453125, "learning_rate": 1.2646813247982188e-05, "loss": 0.7973, "step": 1721 }, { "epoch": 0.43125469571750563, "grad_norm": 0.39453125, "learning_rate": 1.2641246868911774e-05, "loss": 0.8776, "step": 1722 }, { "epoch": 0.4315051339844728, "grad_norm": 0.310546875, "learning_rate": 1.263568048984136e-05, "loss": 0.8565, "step": 1723 }, { "epoch": 0.43175557225144, "grad_norm": 0.365234375, "learning_rate": 1.2630114110770943e-05, "loss": 1.0114, "step": 1724 }, { "epoch": 0.4320060105184072, "grad_norm": 0.33203125, "learning_rate": 1.2624547731700529e-05, "loss": 0.7807, "step": 1725 }, { "epoch": 0.4322564487853744, "grad_norm": 0.341796875, "learning_rate": 1.2618981352630115e-05, "loss": 0.7942, "step": 1726 }, { "epoch": 0.4325068870523416, "grad_norm": 0.396484375, "learning_rate": 1.2613414973559702e-05, "loss": 0.8373, "step": 1727 }, { "epoch": 0.43275732531930877, "grad_norm": 0.349609375, "learning_rate": 1.2607848594489284e-05, "loss": 0.9137, "step": 1728 }, { "epoch": 0.43300776358627596, "grad_norm": 0.357421875, "learning_rate": 1.2602282215418871e-05, "loss": 0.9285, "step": 1729 }, { "epoch": 0.43325820185324315, "grad_norm": 0.359375, "learning_rate": 1.2596715836348457e-05, "loss": 0.8074, "step": 1730 }, { "epoch": 0.43350864012021034, "grad_norm": 0.4140625, "learning_rate": 1.2591149457278043e-05, "loss": 0.945, "step": 1731 }, { "epoch": 0.4337590783871776, "grad_norm": 0.34765625, "learning_rate": 1.2585583078207626e-05, "loss": 0.8305, "step": 1732 }, { "epoch": 0.4340095166541448, "grad_norm": 0.373046875, "learning_rate": 1.2580016699137212e-05, "loss": 1.0367, "step": 1733 }, { "epoch": 0.43425995492111197, "grad_norm": 0.376953125, "learning_rate": 1.2574450320066798e-05, "loss": 0.8874, "step": 1734 }, { "epoch": 0.43451039318807916, "grad_norm": 0.36328125, "learning_rate": 1.2568883940996383e-05, "loss": 0.9002, "step": 1735 }, { "epoch": 0.43476083145504635, "grad_norm": 0.357421875, "learning_rate": 1.2563317561925967e-05, "loss": 0.9621, "step": 1736 }, { "epoch": 0.43501126972201354, "grad_norm": 0.349609375, "learning_rate": 1.2557751182855553e-05, "loss": 1.0626, "step": 1737 }, { "epoch": 0.43526170798898073, "grad_norm": 0.361328125, "learning_rate": 1.2552184803785138e-05, "loss": 0.8822, "step": 1738 }, { "epoch": 0.4355121462559479, "grad_norm": 0.34765625, "learning_rate": 1.2546618424714724e-05, "loss": 0.9378, "step": 1739 }, { "epoch": 0.4357625845229151, "grad_norm": 0.42578125, "learning_rate": 1.2541052045644308e-05, "loss": 0.9001, "step": 1740 }, { "epoch": 0.4360130227898823, "grad_norm": 0.3671875, "learning_rate": 1.2535485666573894e-05, "loss": 0.6842, "step": 1741 }, { "epoch": 0.4362634610568495, "grad_norm": 0.37109375, "learning_rate": 1.2529919287503481e-05, "loss": 0.9077, "step": 1742 }, { "epoch": 0.4365138993238167, "grad_norm": 0.384765625, "learning_rate": 1.2524352908433067e-05, "loss": 0.9709, "step": 1743 }, { "epoch": 0.43676433759078387, "grad_norm": 0.421875, "learning_rate": 1.251878652936265e-05, "loss": 0.8225, "step": 1744 }, { "epoch": 0.43701477585775106, "grad_norm": 0.3515625, "learning_rate": 1.2513220150292236e-05, "loss": 0.9747, "step": 1745 }, { "epoch": 0.43726521412471825, "grad_norm": 0.34375, "learning_rate": 1.2507653771221822e-05, "loss": 0.9418, "step": 1746 }, { "epoch": 0.43751565239168544, "grad_norm": 0.357421875, "learning_rate": 1.2502087392151407e-05, "loss": 0.923, "step": 1747 }, { "epoch": 0.43776609065865263, "grad_norm": 0.3671875, "learning_rate": 1.2496521013080991e-05, "loss": 0.8989, "step": 1748 }, { "epoch": 0.4380165289256198, "grad_norm": 0.337890625, "learning_rate": 1.2490954634010577e-05, "loss": 0.8556, "step": 1749 }, { "epoch": 0.438266967192587, "grad_norm": 0.337890625, "learning_rate": 1.2485388254940162e-05, "loss": 0.8941, "step": 1750 }, { "epoch": 0.4385174054595542, "grad_norm": 0.41015625, "learning_rate": 1.2479821875869748e-05, "loss": 0.8154, "step": 1751 }, { "epoch": 0.4387678437265214, "grad_norm": 0.390625, "learning_rate": 1.2474255496799332e-05, "loss": 0.7994, "step": 1752 }, { "epoch": 0.4390182819934886, "grad_norm": 0.392578125, "learning_rate": 1.2468689117728918e-05, "loss": 0.821, "step": 1753 }, { "epoch": 0.4392687202604558, "grad_norm": 0.388671875, "learning_rate": 1.2463122738658505e-05, "loss": 0.8345, "step": 1754 }, { "epoch": 0.439519158527423, "grad_norm": 0.33203125, "learning_rate": 1.245755635958809e-05, "loss": 0.9781, "step": 1755 }, { "epoch": 0.4397695967943902, "grad_norm": 0.3828125, "learning_rate": 1.2451989980517674e-05, "loss": 0.9888, "step": 1756 }, { "epoch": 0.4400200350613574, "grad_norm": 0.35546875, "learning_rate": 1.244642360144726e-05, "loss": 1.0274, "step": 1757 }, { "epoch": 0.4402704733283246, "grad_norm": 0.361328125, "learning_rate": 1.2440857222376846e-05, "loss": 1.0293, "step": 1758 }, { "epoch": 0.4405209115952918, "grad_norm": 0.369140625, "learning_rate": 1.2435290843306431e-05, "loss": 0.9052, "step": 1759 }, { "epoch": 0.44077134986225897, "grad_norm": 0.359375, "learning_rate": 1.2429724464236015e-05, "loss": 0.7751, "step": 1760 }, { "epoch": 0.44102178812922616, "grad_norm": 0.36328125, "learning_rate": 1.24241580851656e-05, "loss": 1.0699, "step": 1761 }, { "epoch": 0.44127222639619335, "grad_norm": 0.3359375, "learning_rate": 1.2418591706095186e-05, "loss": 0.9247, "step": 1762 }, { "epoch": 0.44152266466316054, "grad_norm": 0.33984375, "learning_rate": 1.2413025327024772e-05, "loss": 0.9292, "step": 1763 }, { "epoch": 0.4417731029301277, "grad_norm": 0.375, "learning_rate": 1.2407458947954356e-05, "loss": 1.1209, "step": 1764 }, { "epoch": 0.4420235411970949, "grad_norm": 0.326171875, "learning_rate": 1.2401892568883941e-05, "loss": 1.0721, "step": 1765 }, { "epoch": 0.4422739794640621, "grad_norm": 0.349609375, "learning_rate": 1.2396326189813527e-05, "loss": 0.9217, "step": 1766 }, { "epoch": 0.4425244177310293, "grad_norm": 0.3359375, "learning_rate": 1.2390759810743111e-05, "loss": 1.0499, "step": 1767 }, { "epoch": 0.4427748559979965, "grad_norm": 0.373046875, "learning_rate": 1.2385193431672697e-05, "loss": 1.0284, "step": 1768 }, { "epoch": 0.4430252942649637, "grad_norm": 0.3515625, "learning_rate": 1.2379627052602284e-05, "loss": 0.9008, "step": 1769 }, { "epoch": 0.44327573253193087, "grad_norm": 0.470703125, "learning_rate": 1.237406067353187e-05, "loss": 0.9029, "step": 1770 }, { "epoch": 0.44352617079889806, "grad_norm": 0.40234375, "learning_rate": 1.2368494294461453e-05, "loss": 1.0334, "step": 1771 }, { "epoch": 0.44377660906586525, "grad_norm": 0.353515625, "learning_rate": 1.2362927915391039e-05, "loss": 1.0436, "step": 1772 }, { "epoch": 0.44402704733283244, "grad_norm": 0.330078125, "learning_rate": 1.2357361536320625e-05, "loss": 0.8065, "step": 1773 }, { "epoch": 0.4442774855997996, "grad_norm": 0.353515625, "learning_rate": 1.235179515725021e-05, "loss": 0.8939, "step": 1774 }, { "epoch": 0.4445279238667668, "grad_norm": 0.33984375, "learning_rate": 1.2346228778179794e-05, "loss": 0.9942, "step": 1775 }, { "epoch": 0.44477836213373406, "grad_norm": 0.373046875, "learning_rate": 1.234066239910938e-05, "loss": 0.9029, "step": 1776 }, { "epoch": 0.44502880040070125, "grad_norm": 0.400390625, "learning_rate": 1.2335096020038965e-05, "loss": 0.9699, "step": 1777 }, { "epoch": 0.44527923866766844, "grad_norm": 0.3515625, "learning_rate": 1.2329529640968551e-05, "loss": 1.1156, "step": 1778 }, { "epoch": 0.44552967693463563, "grad_norm": 0.34765625, "learning_rate": 1.2323963261898135e-05, "loss": 0.9116, "step": 1779 }, { "epoch": 0.4457801152016028, "grad_norm": 0.40234375, "learning_rate": 1.231839688282772e-05, "loss": 0.878, "step": 1780 }, { "epoch": 0.44603055346857, "grad_norm": 0.359375, "learning_rate": 1.2312830503757306e-05, "loss": 0.9487, "step": 1781 }, { "epoch": 0.4462809917355372, "grad_norm": 0.310546875, "learning_rate": 1.2307264124686894e-05, "loss": 0.8008, "step": 1782 }, { "epoch": 0.4465314300025044, "grad_norm": 0.3515625, "learning_rate": 1.2301697745616476e-05, "loss": 0.9663, "step": 1783 }, { "epoch": 0.4467818682694716, "grad_norm": 0.4296875, "learning_rate": 1.2296131366546063e-05, "loss": 0.9622, "step": 1784 }, { "epoch": 0.4470323065364388, "grad_norm": 0.384765625, "learning_rate": 1.2290564987475649e-05, "loss": 0.9241, "step": 1785 }, { "epoch": 0.44728274480340596, "grad_norm": 0.35546875, "learning_rate": 1.2284998608405234e-05, "loss": 0.9542, "step": 1786 }, { "epoch": 0.44753318307037315, "grad_norm": 0.40625, "learning_rate": 1.2279432229334818e-05, "loss": 0.813, "step": 1787 }, { "epoch": 0.44778362133734034, "grad_norm": 0.361328125, "learning_rate": 1.2273865850264404e-05, "loss": 0.9101, "step": 1788 }, { "epoch": 0.44803405960430753, "grad_norm": 0.337890625, "learning_rate": 1.226829947119399e-05, "loss": 0.8682, "step": 1789 }, { "epoch": 0.4482844978712747, "grad_norm": 0.388671875, "learning_rate": 1.2262733092123575e-05, "loss": 0.9044, "step": 1790 }, { "epoch": 0.4485349361382419, "grad_norm": 0.359375, "learning_rate": 1.2257166713053159e-05, "loss": 0.8999, "step": 1791 }, { "epoch": 0.4487853744052091, "grad_norm": 0.36328125, "learning_rate": 1.2251600333982745e-05, "loss": 1.0316, "step": 1792 }, { "epoch": 0.4490358126721763, "grad_norm": 0.341796875, "learning_rate": 1.224603395491233e-05, "loss": 0.9197, "step": 1793 }, { "epoch": 0.4492862509391435, "grad_norm": 0.330078125, "learning_rate": 1.2240467575841917e-05, "loss": 1.0522, "step": 1794 }, { "epoch": 0.4495366892061107, "grad_norm": 0.349609375, "learning_rate": 1.22349011967715e-05, "loss": 0.967, "step": 1795 }, { "epoch": 0.44978712747307786, "grad_norm": 0.357421875, "learning_rate": 1.2229334817701087e-05, "loss": 0.9415, "step": 1796 }, { "epoch": 0.45003756574004505, "grad_norm": 0.46875, "learning_rate": 1.2223768438630673e-05, "loss": 0.8682, "step": 1797 }, { "epoch": 0.4502880040070123, "grad_norm": 0.34375, "learning_rate": 1.2218202059560258e-05, "loss": 0.8391, "step": 1798 }, { "epoch": 0.4505384422739795, "grad_norm": 0.353515625, "learning_rate": 1.2212635680489842e-05, "loss": 1.1485, "step": 1799 }, { "epoch": 0.4507888805409467, "grad_norm": 0.404296875, "learning_rate": 1.2207069301419428e-05, "loss": 0.7904, "step": 1800 }, { "epoch": 0.45103931880791387, "grad_norm": 0.365234375, "learning_rate": 1.2201502922349013e-05, "loss": 0.8865, "step": 1801 }, { "epoch": 0.45128975707488106, "grad_norm": 0.376953125, "learning_rate": 1.2195936543278599e-05, "loss": 1.0443, "step": 1802 }, { "epoch": 0.45154019534184825, "grad_norm": 0.373046875, "learning_rate": 1.2190370164208183e-05, "loss": 0.8295, "step": 1803 }, { "epoch": 0.45179063360881544, "grad_norm": 0.337890625, "learning_rate": 1.2184803785137768e-05, "loss": 0.8808, "step": 1804 }, { "epoch": 0.45204107187578263, "grad_norm": 0.357421875, "learning_rate": 1.2179237406067354e-05, "loss": 0.8856, "step": 1805 }, { "epoch": 0.4522915101427498, "grad_norm": 0.396484375, "learning_rate": 1.217367102699694e-05, "loss": 0.8997, "step": 1806 }, { "epoch": 0.452541948409717, "grad_norm": 0.3671875, "learning_rate": 1.2168104647926524e-05, "loss": 0.974, "step": 1807 }, { "epoch": 0.4527923866766842, "grad_norm": 0.375, "learning_rate": 1.216253826885611e-05, "loss": 0.8895, "step": 1808 }, { "epoch": 0.4530428249436514, "grad_norm": 0.328125, "learning_rate": 1.2156971889785697e-05, "loss": 0.7588, "step": 1809 }, { "epoch": 0.4532932632106186, "grad_norm": 0.37890625, "learning_rate": 1.2151405510715282e-05, "loss": 1.0876, "step": 1810 }, { "epoch": 0.45354370147758577, "grad_norm": 0.3671875, "learning_rate": 1.2145839131644866e-05, "loss": 0.8107, "step": 1811 }, { "epoch": 0.45379413974455296, "grad_norm": 0.37890625, "learning_rate": 1.2140272752574452e-05, "loss": 0.936, "step": 1812 }, { "epoch": 0.45404457801152015, "grad_norm": 0.35546875, "learning_rate": 1.2134706373504037e-05, "loss": 0.8486, "step": 1813 }, { "epoch": 0.45429501627848734, "grad_norm": 0.396484375, "learning_rate": 1.2129139994433623e-05, "loss": 0.9498, "step": 1814 }, { "epoch": 0.45454545454545453, "grad_norm": 0.359375, "learning_rate": 1.2123573615363207e-05, "loss": 0.8053, "step": 1815 }, { "epoch": 0.4547958928124217, "grad_norm": 0.353515625, "learning_rate": 1.2118007236292792e-05, "loss": 0.9064, "step": 1816 }, { "epoch": 0.4550463310793889, "grad_norm": 0.396484375, "learning_rate": 1.2112440857222378e-05, "loss": 0.9596, "step": 1817 }, { "epoch": 0.4552967693463561, "grad_norm": 0.375, "learning_rate": 1.2106874478151962e-05, "loss": 0.9176, "step": 1818 }, { "epoch": 0.4555472076133233, "grad_norm": 0.3515625, "learning_rate": 1.2101308099081548e-05, "loss": 0.9909, "step": 1819 }, { "epoch": 0.4557976458802905, "grad_norm": 0.373046875, "learning_rate": 1.2095741720011133e-05, "loss": 0.942, "step": 1820 }, { "epoch": 0.4560480841472577, "grad_norm": 0.416015625, "learning_rate": 1.2090175340940719e-05, "loss": 0.9879, "step": 1821 }, { "epoch": 0.4562985224142249, "grad_norm": 0.365234375, "learning_rate": 1.2084608961870303e-05, "loss": 0.9405, "step": 1822 }, { "epoch": 0.4565489606811921, "grad_norm": 0.34765625, "learning_rate": 1.2079042582799888e-05, "loss": 1.0388, "step": 1823 }, { "epoch": 0.4567993989481593, "grad_norm": 0.314453125, "learning_rate": 1.2073476203729476e-05, "loss": 0.7601, "step": 1824 }, { "epoch": 0.4570498372151265, "grad_norm": 0.359375, "learning_rate": 1.2067909824659061e-05, "loss": 0.9349, "step": 1825 }, { "epoch": 0.4573002754820937, "grad_norm": 0.376953125, "learning_rate": 1.2062343445588645e-05, "loss": 1.0876, "step": 1826 }, { "epoch": 0.45755071374906087, "grad_norm": 0.375, "learning_rate": 1.205677706651823e-05, "loss": 1.0033, "step": 1827 }, { "epoch": 0.45780115201602806, "grad_norm": 0.33203125, "learning_rate": 1.2051210687447816e-05, "loss": 0.9004, "step": 1828 }, { "epoch": 0.45805159028299525, "grad_norm": 0.353515625, "learning_rate": 1.2045644308377402e-05, "loss": 0.8901, "step": 1829 }, { "epoch": 0.45830202854996244, "grad_norm": 0.41015625, "learning_rate": 1.2040077929306986e-05, "loss": 0.9698, "step": 1830 }, { "epoch": 0.4585524668169296, "grad_norm": 0.37890625, "learning_rate": 1.2034511550236572e-05, "loss": 1.0913, "step": 1831 }, { "epoch": 0.4588029050838968, "grad_norm": 0.53515625, "learning_rate": 1.2028945171166157e-05, "loss": 0.8948, "step": 1832 }, { "epoch": 0.459053343350864, "grad_norm": 0.34765625, "learning_rate": 1.2023378792095743e-05, "loss": 0.8911, "step": 1833 }, { "epoch": 0.4593037816178312, "grad_norm": 0.3515625, "learning_rate": 1.2017812413025327e-05, "loss": 0.9989, "step": 1834 }, { "epoch": 0.4595542198847984, "grad_norm": 0.365234375, "learning_rate": 1.2012246033954912e-05, "loss": 0.9135, "step": 1835 }, { "epoch": 0.4598046581517656, "grad_norm": 0.36328125, "learning_rate": 1.20066796548845e-05, "loss": 0.7852, "step": 1836 }, { "epoch": 0.46005509641873277, "grad_norm": 0.40234375, "learning_rate": 1.2001113275814085e-05, "loss": 0.89, "step": 1837 }, { "epoch": 0.46030553468569996, "grad_norm": 0.42578125, "learning_rate": 1.1995546896743669e-05, "loss": 0.9154, "step": 1838 }, { "epoch": 0.46055597295266715, "grad_norm": 0.384765625, "learning_rate": 1.1989980517673255e-05, "loss": 0.8745, "step": 1839 }, { "epoch": 0.46080641121963434, "grad_norm": 0.322265625, "learning_rate": 1.198441413860284e-05, "loss": 0.9628, "step": 1840 }, { "epoch": 0.4610568494866015, "grad_norm": 0.380859375, "learning_rate": 1.1978847759532426e-05, "loss": 0.9409, "step": 1841 }, { "epoch": 0.4613072877535687, "grad_norm": 0.388671875, "learning_rate": 1.197328138046201e-05, "loss": 1.1781, "step": 1842 }, { "epoch": 0.46155772602053596, "grad_norm": 0.408203125, "learning_rate": 1.1967715001391595e-05, "loss": 0.9734, "step": 1843 }, { "epoch": 0.46180816428750315, "grad_norm": 0.375, "learning_rate": 1.1962148622321181e-05, "loss": 0.9051, "step": 1844 }, { "epoch": 0.46205860255447034, "grad_norm": 0.447265625, "learning_rate": 1.1956582243250767e-05, "loss": 0.9229, "step": 1845 }, { "epoch": 0.46230904082143753, "grad_norm": 0.35546875, "learning_rate": 1.195101586418035e-05, "loss": 1.0154, "step": 1846 }, { "epoch": 0.4625594790884047, "grad_norm": 0.37109375, "learning_rate": 1.1945449485109936e-05, "loss": 0.9428, "step": 1847 }, { "epoch": 0.4628099173553719, "grad_norm": 0.37890625, "learning_rate": 1.1939883106039522e-05, "loss": 0.8292, "step": 1848 }, { "epoch": 0.4630603556223391, "grad_norm": 0.34375, "learning_rate": 1.193431672696911e-05, "loss": 0.9191, "step": 1849 }, { "epoch": 0.4633107938893063, "grad_norm": 0.373046875, "learning_rate": 1.1928750347898691e-05, "loss": 0.8871, "step": 1850 }, { "epoch": 0.4635612321562735, "grad_norm": 0.3515625, "learning_rate": 1.1923183968828279e-05, "loss": 0.6565, "step": 1851 }, { "epoch": 0.4638116704232407, "grad_norm": 0.337890625, "learning_rate": 1.1917617589757864e-05, "loss": 0.8378, "step": 1852 }, { "epoch": 0.46406210869020786, "grad_norm": 0.34765625, "learning_rate": 1.191205121068745e-05, "loss": 0.9105, "step": 1853 }, { "epoch": 0.46431254695717505, "grad_norm": 0.365234375, "learning_rate": 1.1906484831617034e-05, "loss": 0.9012, "step": 1854 }, { "epoch": 0.46456298522414224, "grad_norm": 0.380859375, "learning_rate": 1.190091845254662e-05, "loss": 1.0731, "step": 1855 }, { "epoch": 0.46481342349110943, "grad_norm": 0.365234375, "learning_rate": 1.1895352073476205e-05, "loss": 1.0026, "step": 1856 }, { "epoch": 0.4650638617580766, "grad_norm": 0.36328125, "learning_rate": 1.188978569440579e-05, "loss": 1.0016, "step": 1857 }, { "epoch": 0.4653143000250438, "grad_norm": 0.390625, "learning_rate": 1.1884219315335375e-05, "loss": 1.1108, "step": 1858 }, { "epoch": 0.465564738292011, "grad_norm": 0.39453125, "learning_rate": 1.187865293626496e-05, "loss": 0.8844, "step": 1859 }, { "epoch": 0.4658151765589782, "grad_norm": 0.3515625, "learning_rate": 1.1873086557194546e-05, "loss": 0.7014, "step": 1860 }, { "epoch": 0.4660656148259454, "grad_norm": 0.359375, "learning_rate": 1.1867520178124131e-05, "loss": 0.953, "step": 1861 }, { "epoch": 0.4663160530929126, "grad_norm": 0.423828125, "learning_rate": 1.1861953799053715e-05, "loss": 0.9633, "step": 1862 }, { "epoch": 0.46656649135987976, "grad_norm": 0.396484375, "learning_rate": 1.1856387419983301e-05, "loss": 1.0706, "step": 1863 }, { "epoch": 0.46681692962684695, "grad_norm": 0.396484375, "learning_rate": 1.1850821040912888e-05, "loss": 0.9219, "step": 1864 }, { "epoch": 0.4670673678938142, "grad_norm": 0.380859375, "learning_rate": 1.1845254661842474e-05, "loss": 1.0535, "step": 1865 }, { "epoch": 0.4673178061607814, "grad_norm": 0.365234375, "learning_rate": 1.1839688282772058e-05, "loss": 1.0174, "step": 1866 }, { "epoch": 0.4675682444277486, "grad_norm": 0.39453125, "learning_rate": 1.1834121903701643e-05, "loss": 0.8862, "step": 1867 }, { "epoch": 0.46781868269471577, "grad_norm": 0.42578125, "learning_rate": 1.1828555524631229e-05, "loss": 0.7778, "step": 1868 }, { "epoch": 0.46806912096168296, "grad_norm": 0.390625, "learning_rate": 1.1822989145560813e-05, "loss": 0.7697, "step": 1869 }, { "epoch": 0.46831955922865015, "grad_norm": 0.349609375, "learning_rate": 1.1817422766490399e-05, "loss": 0.8378, "step": 1870 }, { "epoch": 0.46856999749561734, "grad_norm": 0.337890625, "learning_rate": 1.1811856387419984e-05, "loss": 0.8985, "step": 1871 }, { "epoch": 0.46882043576258453, "grad_norm": 0.3671875, "learning_rate": 1.180629000834957e-05, "loss": 0.8718, "step": 1872 }, { "epoch": 0.4690708740295517, "grad_norm": 0.36328125, "learning_rate": 1.1800723629279154e-05, "loss": 0.9908, "step": 1873 }, { "epoch": 0.4693213122965189, "grad_norm": 0.328125, "learning_rate": 1.179515725020874e-05, "loss": 0.878, "step": 1874 }, { "epoch": 0.4695717505634861, "grad_norm": 0.37890625, "learning_rate": 1.1789590871138325e-05, "loss": 0.8864, "step": 1875 }, { "epoch": 0.4698221888304533, "grad_norm": 0.33203125, "learning_rate": 1.1784024492067912e-05, "loss": 0.8467, "step": 1876 }, { "epoch": 0.4700726270974205, "grad_norm": 0.376953125, "learning_rate": 1.1778458112997494e-05, "loss": 0.8907, "step": 1877 }, { "epoch": 0.47032306536438767, "grad_norm": 0.361328125, "learning_rate": 1.1772891733927082e-05, "loss": 0.8609, "step": 1878 }, { "epoch": 0.47057350363135486, "grad_norm": 0.392578125, "learning_rate": 1.1767325354856667e-05, "loss": 1.0118, "step": 1879 }, { "epoch": 0.47082394189832205, "grad_norm": 0.3203125, "learning_rate": 1.1761758975786253e-05, "loss": 0.8749, "step": 1880 }, { "epoch": 0.47107438016528924, "grad_norm": 0.390625, "learning_rate": 1.1756192596715837e-05, "loss": 0.8449, "step": 1881 }, { "epoch": 0.47132481843225643, "grad_norm": 0.3515625, "learning_rate": 1.1750626217645422e-05, "loss": 0.9541, "step": 1882 }, { "epoch": 0.4715752566992236, "grad_norm": 0.341796875, "learning_rate": 1.1745059838575008e-05, "loss": 0.7395, "step": 1883 }, { "epoch": 0.4718256949661908, "grad_norm": 0.2890625, "learning_rate": 1.1739493459504594e-05, "loss": 0.8018, "step": 1884 }, { "epoch": 0.472076133233158, "grad_norm": 0.373046875, "learning_rate": 1.1733927080434178e-05, "loss": 0.8608, "step": 1885 }, { "epoch": 0.4723265715001252, "grad_norm": 0.365234375, "learning_rate": 1.1728360701363763e-05, "loss": 0.9217, "step": 1886 }, { "epoch": 0.47257700976709244, "grad_norm": 0.380859375, "learning_rate": 1.1722794322293349e-05, "loss": 0.9491, "step": 1887 }, { "epoch": 0.4728274480340596, "grad_norm": 0.431640625, "learning_rate": 1.1717227943222934e-05, "loss": 1.0533, "step": 1888 }, { "epoch": 0.4730778863010268, "grad_norm": 0.34375, "learning_rate": 1.1711661564152518e-05, "loss": 0.8609, "step": 1889 }, { "epoch": 0.473328324567994, "grad_norm": 0.33984375, "learning_rate": 1.1706095185082104e-05, "loss": 0.8482, "step": 1890 }, { "epoch": 0.4735787628349612, "grad_norm": 0.380859375, "learning_rate": 1.1700528806011691e-05, "loss": 0.871, "step": 1891 }, { "epoch": 0.4738292011019284, "grad_norm": 0.353515625, "learning_rate": 1.1694962426941277e-05, "loss": 0.9075, "step": 1892 }, { "epoch": 0.4740796393688956, "grad_norm": 0.33984375, "learning_rate": 1.168939604787086e-05, "loss": 0.8843, "step": 1893 }, { "epoch": 0.47433007763586277, "grad_norm": 0.37109375, "learning_rate": 1.1683829668800446e-05, "loss": 0.9138, "step": 1894 }, { "epoch": 0.47458051590282996, "grad_norm": 0.337890625, "learning_rate": 1.1678263289730032e-05, "loss": 0.9261, "step": 1895 }, { "epoch": 0.47483095416979715, "grad_norm": 0.361328125, "learning_rate": 1.1672696910659618e-05, "loss": 0.8798, "step": 1896 }, { "epoch": 0.47508139243676434, "grad_norm": 0.408203125, "learning_rate": 1.1667130531589202e-05, "loss": 0.9939, "step": 1897 }, { "epoch": 0.47533183070373153, "grad_norm": 0.345703125, "learning_rate": 1.1661564152518787e-05, "loss": 0.8906, "step": 1898 }, { "epoch": 0.4755822689706987, "grad_norm": 0.3515625, "learning_rate": 1.1655997773448373e-05, "loss": 1.034, "step": 1899 }, { "epoch": 0.4758327072376659, "grad_norm": 0.35546875, "learning_rate": 1.1650431394377958e-05, "loss": 0.9798, "step": 1900 }, { "epoch": 0.4760831455046331, "grad_norm": 0.43359375, "learning_rate": 1.1644865015307542e-05, "loss": 0.8472, "step": 1901 }, { "epoch": 0.4763335837716003, "grad_norm": 0.388671875, "learning_rate": 1.1639298636237128e-05, "loss": 0.9082, "step": 1902 }, { "epoch": 0.4765840220385675, "grad_norm": 0.337890625, "learning_rate": 1.1633732257166714e-05, "loss": 0.7983, "step": 1903 }, { "epoch": 0.47683446030553467, "grad_norm": 0.388671875, "learning_rate": 1.1628165878096301e-05, "loss": 0.9365, "step": 1904 }, { "epoch": 0.47708489857250186, "grad_norm": 0.330078125, "learning_rate": 1.1622599499025883e-05, "loss": 1.0098, "step": 1905 }, { "epoch": 0.47733533683946905, "grad_norm": 0.40625, "learning_rate": 1.161703311995547e-05, "loss": 0.9819, "step": 1906 }, { "epoch": 0.47758577510643624, "grad_norm": 0.404296875, "learning_rate": 1.1611466740885056e-05, "loss": 1.0653, "step": 1907 }, { "epoch": 0.47783621337340343, "grad_norm": 0.4296875, "learning_rate": 1.1605900361814642e-05, "loss": 0.9315, "step": 1908 }, { "epoch": 0.4780866516403707, "grad_norm": 0.375, "learning_rate": 1.1600333982744226e-05, "loss": 0.9251, "step": 1909 }, { "epoch": 0.47833708990733786, "grad_norm": 0.3515625, "learning_rate": 1.1594767603673811e-05, "loss": 0.9167, "step": 1910 }, { "epoch": 0.47858752817430505, "grad_norm": 0.359375, "learning_rate": 1.1589201224603397e-05, "loss": 0.8096, "step": 1911 }, { "epoch": 0.47883796644127224, "grad_norm": 0.333984375, "learning_rate": 1.1583634845532982e-05, "loss": 0.862, "step": 1912 }, { "epoch": 0.47908840470823943, "grad_norm": 0.388671875, "learning_rate": 1.1578068466462566e-05, "loss": 0.9707, "step": 1913 }, { "epoch": 0.4793388429752066, "grad_norm": 0.3515625, "learning_rate": 1.1572502087392152e-05, "loss": 0.9663, "step": 1914 }, { "epoch": 0.4795892812421738, "grad_norm": 0.3359375, "learning_rate": 1.1566935708321738e-05, "loss": 1.0318, "step": 1915 }, { "epoch": 0.479839719509141, "grad_norm": 0.345703125, "learning_rate": 1.1561369329251325e-05, "loss": 0.9168, "step": 1916 }, { "epoch": 0.4800901577761082, "grad_norm": 0.3359375, "learning_rate": 1.1555802950180907e-05, "loss": 1.1032, "step": 1917 }, { "epoch": 0.4803405960430754, "grad_norm": 0.330078125, "learning_rate": 1.1550236571110494e-05, "loss": 0.9701, "step": 1918 }, { "epoch": 0.4805910343100426, "grad_norm": 0.419921875, "learning_rate": 1.154467019204008e-05, "loss": 0.8825, "step": 1919 }, { "epoch": 0.48084147257700977, "grad_norm": 0.37890625, "learning_rate": 1.1539103812969664e-05, "loss": 0.8662, "step": 1920 }, { "epoch": 0.48109191084397696, "grad_norm": 0.33203125, "learning_rate": 1.153353743389925e-05, "loss": 0.9066, "step": 1921 }, { "epoch": 0.48134234911094415, "grad_norm": 0.375, "learning_rate": 1.1527971054828835e-05, "loss": 0.8866, "step": 1922 }, { "epoch": 0.48159278737791134, "grad_norm": 0.361328125, "learning_rate": 1.152240467575842e-05, "loss": 0.943, "step": 1923 }, { "epoch": 0.4818432256448785, "grad_norm": 0.369140625, "learning_rate": 1.1516838296688005e-05, "loss": 0.9411, "step": 1924 }, { "epoch": 0.4820936639118457, "grad_norm": 0.33984375, "learning_rate": 1.151127191761759e-05, "loss": 0.9291, "step": 1925 }, { "epoch": 0.4823441021788129, "grad_norm": 0.33984375, "learning_rate": 1.1505705538547176e-05, "loss": 0.9727, "step": 1926 }, { "epoch": 0.4825945404457801, "grad_norm": 0.349609375, "learning_rate": 1.1500139159476761e-05, "loss": 0.9499, "step": 1927 }, { "epoch": 0.4828449787127473, "grad_norm": 0.375, "learning_rate": 1.1494572780406345e-05, "loss": 1.0517, "step": 1928 }, { "epoch": 0.4830954169797145, "grad_norm": 0.375, "learning_rate": 1.1489006401335931e-05, "loss": 0.9366, "step": 1929 }, { "epoch": 0.48334585524668167, "grad_norm": 0.3046875, "learning_rate": 1.1483440022265517e-05, "loss": 0.7205, "step": 1930 }, { "epoch": 0.4835962935136489, "grad_norm": 0.373046875, "learning_rate": 1.1477873643195104e-05, "loss": 0.9131, "step": 1931 }, { "epoch": 0.4838467317806161, "grad_norm": 0.361328125, "learning_rate": 1.1472307264124686e-05, "loss": 0.8732, "step": 1932 }, { "epoch": 0.4840971700475833, "grad_norm": 0.3828125, "learning_rate": 1.1466740885054273e-05, "loss": 0.954, "step": 1933 }, { "epoch": 0.4843476083145505, "grad_norm": 0.3203125, "learning_rate": 1.1461174505983859e-05, "loss": 0.8331, "step": 1934 }, { "epoch": 0.48459804658151767, "grad_norm": 0.359375, "learning_rate": 1.1455608126913445e-05, "loss": 0.7999, "step": 1935 }, { "epoch": 0.48484848484848486, "grad_norm": 0.296875, "learning_rate": 1.1450041747843029e-05, "loss": 0.7836, "step": 1936 }, { "epoch": 0.48509892311545205, "grad_norm": 0.375, "learning_rate": 1.1444475368772614e-05, "loss": 0.9806, "step": 1937 }, { "epoch": 0.48534936138241924, "grad_norm": 0.36328125, "learning_rate": 1.14389089897022e-05, "loss": 0.9966, "step": 1938 }, { "epoch": 0.48559979964938643, "grad_norm": 0.375, "learning_rate": 1.1433342610631785e-05, "loss": 0.9829, "step": 1939 }, { "epoch": 0.4858502379163536, "grad_norm": 0.380859375, "learning_rate": 1.142777623156137e-05, "loss": 1.0759, "step": 1940 }, { "epoch": 0.4861006761833208, "grad_norm": 0.34765625, "learning_rate": 1.1422209852490955e-05, "loss": 1.0034, "step": 1941 }, { "epoch": 0.486351114450288, "grad_norm": 0.365234375, "learning_rate": 1.141664347342054e-05, "loss": 0.9225, "step": 1942 }, { "epoch": 0.4866015527172552, "grad_norm": 0.357421875, "learning_rate": 1.1411077094350126e-05, "loss": 0.8466, "step": 1943 }, { "epoch": 0.4868519909842224, "grad_norm": 0.37890625, "learning_rate": 1.140551071527971e-05, "loss": 0.9671, "step": 1944 }, { "epoch": 0.4871024292511896, "grad_norm": 0.361328125, "learning_rate": 1.1399944336209296e-05, "loss": 0.9192, "step": 1945 }, { "epoch": 0.48735286751815676, "grad_norm": 0.359375, "learning_rate": 1.1394377957138883e-05, "loss": 0.9963, "step": 1946 }, { "epoch": 0.48760330578512395, "grad_norm": 0.380859375, "learning_rate": 1.1388811578068469e-05, "loss": 0.9742, "step": 1947 }, { "epoch": 0.48785374405209114, "grad_norm": 0.322265625, "learning_rate": 1.1383245198998053e-05, "loss": 0.8476, "step": 1948 }, { "epoch": 0.48810418231905833, "grad_norm": 0.36328125, "learning_rate": 1.1377678819927638e-05, "loss": 0.8779, "step": 1949 }, { "epoch": 0.4883546205860255, "grad_norm": 0.390625, "learning_rate": 1.1372112440857224e-05, "loss": 0.8482, "step": 1950 }, { "epoch": 0.4886050588529927, "grad_norm": 0.38671875, "learning_rate": 1.136654606178681e-05, "loss": 0.8336, "step": 1951 }, { "epoch": 0.4888554971199599, "grad_norm": 0.39453125, "learning_rate": 1.1360979682716393e-05, "loss": 0.978, "step": 1952 }, { "epoch": 0.48910593538692715, "grad_norm": 0.345703125, "learning_rate": 1.1355413303645979e-05, "loss": 0.8307, "step": 1953 }, { "epoch": 0.48935637365389434, "grad_norm": 0.349609375, "learning_rate": 1.1349846924575565e-05, "loss": 0.9137, "step": 1954 }, { "epoch": 0.48960681192086153, "grad_norm": 0.35546875, "learning_rate": 1.134428054550515e-05, "loss": 0.9355, "step": 1955 }, { "epoch": 0.4898572501878287, "grad_norm": 0.369140625, "learning_rate": 1.1338714166434734e-05, "loss": 1.0285, "step": 1956 }, { "epoch": 0.4901076884547959, "grad_norm": 0.439453125, "learning_rate": 1.133314778736432e-05, "loss": 1.0618, "step": 1957 }, { "epoch": 0.4903581267217631, "grad_norm": 0.4140625, "learning_rate": 1.1327581408293907e-05, "loss": 0.8821, "step": 1958 }, { "epoch": 0.4906085649887303, "grad_norm": 0.341796875, "learning_rate": 1.1322015029223493e-05, "loss": 0.8753, "step": 1959 }, { "epoch": 0.4908590032556975, "grad_norm": 0.42578125, "learning_rate": 1.1316448650153076e-05, "loss": 0.9734, "step": 1960 }, { "epoch": 0.49110944152266467, "grad_norm": 0.380859375, "learning_rate": 1.1310882271082662e-05, "loss": 0.9247, "step": 1961 }, { "epoch": 0.49135987978963186, "grad_norm": 0.376953125, "learning_rate": 1.1305315892012248e-05, "loss": 0.8959, "step": 1962 }, { "epoch": 0.49161031805659905, "grad_norm": 0.37109375, "learning_rate": 1.1299749512941833e-05, "loss": 0.9447, "step": 1963 }, { "epoch": 0.49186075632356624, "grad_norm": 0.36328125, "learning_rate": 1.1294183133871417e-05, "loss": 0.9996, "step": 1964 }, { "epoch": 0.49211119459053343, "grad_norm": 0.341796875, "learning_rate": 1.1288616754801003e-05, "loss": 0.7639, "step": 1965 }, { "epoch": 0.4923616328575006, "grad_norm": 0.353515625, "learning_rate": 1.1283050375730588e-05, "loss": 0.9913, "step": 1966 }, { "epoch": 0.4926120711244678, "grad_norm": 0.37109375, "learning_rate": 1.1277483996660174e-05, "loss": 0.9415, "step": 1967 }, { "epoch": 0.492862509391435, "grad_norm": 0.359375, "learning_rate": 1.1271917617589758e-05, "loss": 0.9111, "step": 1968 }, { "epoch": 0.4931129476584022, "grad_norm": 0.337890625, "learning_rate": 1.1266351238519344e-05, "loss": 1.0666, "step": 1969 }, { "epoch": 0.4933633859253694, "grad_norm": 0.345703125, "learning_rate": 1.126078485944893e-05, "loss": 0.8618, "step": 1970 }, { "epoch": 0.49361382419233657, "grad_norm": 0.39453125, "learning_rate": 1.1255218480378513e-05, "loss": 1.0167, "step": 1971 }, { "epoch": 0.49386426245930376, "grad_norm": 0.359375, "learning_rate": 1.1249652101308099e-05, "loss": 0.9535, "step": 1972 }, { "epoch": 0.49411470072627095, "grad_norm": 0.35546875, "learning_rate": 1.1244085722237686e-05, "loss": 0.8581, "step": 1973 }, { "epoch": 0.49436513899323814, "grad_norm": 0.365234375, "learning_rate": 1.1238519343167272e-05, "loss": 0.8342, "step": 1974 }, { "epoch": 0.4946155772602054, "grad_norm": 0.3515625, "learning_rate": 1.1232952964096856e-05, "loss": 0.9137, "step": 1975 }, { "epoch": 0.4948660155271726, "grad_norm": 0.337890625, "learning_rate": 1.1227386585026441e-05, "loss": 0.7534, "step": 1976 }, { "epoch": 0.49511645379413977, "grad_norm": 0.369140625, "learning_rate": 1.1221820205956027e-05, "loss": 0.8502, "step": 1977 }, { "epoch": 0.49536689206110696, "grad_norm": 0.3125, "learning_rate": 1.1216253826885612e-05, "loss": 0.7646, "step": 1978 }, { "epoch": 0.49561733032807415, "grad_norm": 0.330078125, "learning_rate": 1.1210687447815196e-05, "loss": 0.9329, "step": 1979 }, { "epoch": 0.49586776859504134, "grad_norm": 0.3671875, "learning_rate": 1.1205121068744782e-05, "loss": 0.9402, "step": 1980 }, { "epoch": 0.4961182068620085, "grad_norm": 0.345703125, "learning_rate": 1.1199554689674368e-05, "loss": 0.9393, "step": 1981 }, { "epoch": 0.4963686451289757, "grad_norm": 0.375, "learning_rate": 1.1193988310603953e-05, "loss": 0.9453, "step": 1982 }, { "epoch": 0.4966190833959429, "grad_norm": 0.357421875, "learning_rate": 1.1188421931533537e-05, "loss": 1.0299, "step": 1983 }, { "epoch": 0.4968695216629101, "grad_norm": 0.353515625, "learning_rate": 1.1182855552463123e-05, "loss": 0.9669, "step": 1984 }, { "epoch": 0.4971199599298773, "grad_norm": 0.384765625, "learning_rate": 1.1177289173392708e-05, "loss": 1.0358, "step": 1985 }, { "epoch": 0.4973703981968445, "grad_norm": 0.41015625, "learning_rate": 1.1171722794322296e-05, "loss": 0.7809, "step": 1986 }, { "epoch": 0.49762083646381167, "grad_norm": 0.38671875, "learning_rate": 1.1166156415251878e-05, "loss": 1.0971, "step": 1987 }, { "epoch": 0.49787127473077886, "grad_norm": 0.322265625, "learning_rate": 1.1160590036181465e-05, "loss": 0.7944, "step": 1988 }, { "epoch": 0.49812171299774605, "grad_norm": 0.388671875, "learning_rate": 1.115502365711105e-05, "loss": 0.844, "step": 1989 }, { "epoch": 0.49837215126471324, "grad_norm": 0.333984375, "learning_rate": 1.1149457278040636e-05, "loss": 0.9896, "step": 1990 }, { "epoch": 0.4986225895316804, "grad_norm": 0.376953125, "learning_rate": 1.114389089897022e-05, "loss": 0.9824, "step": 1991 }, { "epoch": 0.4988730277986476, "grad_norm": 0.380859375, "learning_rate": 1.1138324519899806e-05, "loss": 0.8195, "step": 1992 }, { "epoch": 0.4991234660656148, "grad_norm": 0.33984375, "learning_rate": 1.1132758140829392e-05, "loss": 0.9992, "step": 1993 }, { "epoch": 0.499373904332582, "grad_norm": 0.337890625, "learning_rate": 1.1127191761758977e-05, "loss": 0.967, "step": 1994 }, { "epoch": 0.4996243425995492, "grad_norm": 0.37109375, "learning_rate": 1.1121625382688561e-05, "loss": 1.1774, "step": 1995 }, { "epoch": 0.4998747808665164, "grad_norm": 0.34375, "learning_rate": 1.1116059003618147e-05, "loss": 0.8219, "step": 1996 }, { "epoch": 0.5001252191334836, "grad_norm": 0.357421875, "learning_rate": 1.1110492624547732e-05, "loss": 0.9202, "step": 1997 }, { "epoch": 0.5003756574004508, "grad_norm": 0.375, "learning_rate": 1.110492624547732e-05, "loss": 0.8928, "step": 1998 }, { "epoch": 0.500626095667418, "grad_norm": 0.390625, "learning_rate": 1.1099359866406902e-05, "loss": 0.8007, "step": 1999 }, { "epoch": 0.5008765339343851, "grad_norm": 0.359375, "learning_rate": 1.1093793487336489e-05, "loss": 0.9473, "step": 2000 }, { "epoch": 0.5011269722013524, "grad_norm": 0.4375, "learning_rate": 1.1088227108266075e-05, "loss": 0.926, "step": 2001 }, { "epoch": 0.5013774104683195, "grad_norm": 0.39453125, "learning_rate": 1.108266072919566e-05, "loss": 1.0108, "step": 2002 }, { "epoch": 0.5016278487352868, "grad_norm": 0.380859375, "learning_rate": 1.1077094350125244e-05, "loss": 0.9559, "step": 2003 }, { "epoch": 0.5018782870022539, "grad_norm": 0.35546875, "learning_rate": 1.107152797105483e-05, "loss": 0.9646, "step": 2004 }, { "epoch": 0.5021287252692211, "grad_norm": 0.36328125, "learning_rate": 1.1065961591984415e-05, "loss": 0.9389, "step": 2005 }, { "epoch": 0.5023791635361883, "grad_norm": 0.388671875, "learning_rate": 1.1060395212914001e-05, "loss": 1.1093, "step": 2006 }, { "epoch": 0.5026296018031555, "grad_norm": 0.392578125, "learning_rate": 1.1054828833843585e-05, "loss": 0.9806, "step": 2007 }, { "epoch": 0.5028800400701228, "grad_norm": 0.33984375, "learning_rate": 1.104926245477317e-05, "loss": 0.9351, "step": 2008 }, { "epoch": 0.5031304783370899, "grad_norm": 0.365234375, "learning_rate": 1.1043696075702756e-05, "loss": 0.8863, "step": 2009 }, { "epoch": 0.5033809166040571, "grad_norm": 0.392578125, "learning_rate": 1.1038129696632342e-05, "loss": 0.954, "step": 2010 }, { "epoch": 0.5036313548710243, "grad_norm": 0.369140625, "learning_rate": 1.1032563317561926e-05, "loss": 0.8257, "step": 2011 }, { "epoch": 0.5038817931379915, "grad_norm": 0.396484375, "learning_rate": 1.1026996938491511e-05, "loss": 0.8678, "step": 2012 }, { "epoch": 0.5041322314049587, "grad_norm": 0.38671875, "learning_rate": 1.1021430559421099e-05, "loss": 0.9436, "step": 2013 }, { "epoch": 0.5043826696719259, "grad_norm": 0.390625, "learning_rate": 1.1015864180350684e-05, "loss": 0.8897, "step": 2014 }, { "epoch": 0.504633107938893, "grad_norm": 0.35546875, "learning_rate": 1.1010297801280268e-05, "loss": 0.9214, "step": 2015 }, { "epoch": 0.5048835462058603, "grad_norm": 0.373046875, "learning_rate": 1.1004731422209854e-05, "loss": 1.0184, "step": 2016 }, { "epoch": 0.5051339844728274, "grad_norm": 0.33984375, "learning_rate": 1.099916504313944e-05, "loss": 0.8913, "step": 2017 }, { "epoch": 0.5053844227397947, "grad_norm": 0.365234375, "learning_rate": 1.0993598664069025e-05, "loss": 0.8702, "step": 2018 }, { "epoch": 0.5056348610067618, "grad_norm": 0.380859375, "learning_rate": 1.0988032284998609e-05, "loss": 0.8739, "step": 2019 }, { "epoch": 0.505885299273729, "grad_norm": 0.3359375, "learning_rate": 1.0982465905928195e-05, "loss": 1.0006, "step": 2020 }, { "epoch": 0.5061357375406962, "grad_norm": 0.35546875, "learning_rate": 1.097689952685778e-05, "loss": 0.9544, "step": 2021 }, { "epoch": 0.5063861758076634, "grad_norm": 0.357421875, "learning_rate": 1.0971333147787364e-05, "loss": 0.814, "step": 2022 }, { "epoch": 0.5066366140746306, "grad_norm": 0.39453125, "learning_rate": 1.096576676871695e-05, "loss": 0.9345, "step": 2023 }, { "epoch": 0.5068870523415978, "grad_norm": 0.34375, "learning_rate": 1.0960200389646535e-05, "loss": 0.9415, "step": 2024 }, { "epoch": 0.507137490608565, "grad_norm": 0.400390625, "learning_rate": 1.0954634010576121e-05, "loss": 1.1184, "step": 2025 }, { "epoch": 0.5073879288755322, "grad_norm": 0.326171875, "learning_rate": 1.0949067631505705e-05, "loss": 0.7769, "step": 2026 }, { "epoch": 0.5076383671424993, "grad_norm": 0.36328125, "learning_rate": 1.094350125243529e-05, "loss": 0.9907, "step": 2027 }, { "epoch": 0.5078888054094666, "grad_norm": 0.3671875, "learning_rate": 1.0937934873364878e-05, "loss": 0.9186, "step": 2028 }, { "epoch": 0.5081392436764337, "grad_norm": 0.345703125, "learning_rate": 1.0932368494294463e-05, "loss": 0.8462, "step": 2029 }, { "epoch": 0.508389681943401, "grad_norm": 0.375, "learning_rate": 1.0926802115224047e-05, "loss": 1.0243, "step": 2030 }, { "epoch": 0.5086401202103682, "grad_norm": 0.330078125, "learning_rate": 1.0921235736153633e-05, "loss": 0.8783, "step": 2031 }, { "epoch": 0.5088905584773353, "grad_norm": 0.37109375, "learning_rate": 1.0915669357083219e-05, "loss": 0.9633, "step": 2032 }, { "epoch": 0.5091409967443026, "grad_norm": 0.36328125, "learning_rate": 1.0910102978012804e-05, "loss": 0.7711, "step": 2033 }, { "epoch": 0.5093914350112697, "grad_norm": 0.32421875, "learning_rate": 1.0904536598942388e-05, "loss": 0.9098, "step": 2034 }, { "epoch": 0.509641873278237, "grad_norm": 0.33203125, "learning_rate": 1.0898970219871974e-05, "loss": 0.9326, "step": 2035 }, { "epoch": 0.5098923115452041, "grad_norm": 0.31640625, "learning_rate": 1.089340384080156e-05, "loss": 0.9671, "step": 2036 }, { "epoch": 0.5101427498121713, "grad_norm": 0.396484375, "learning_rate": 1.0887837461731145e-05, "loss": 0.8839, "step": 2037 }, { "epoch": 0.5103931880791385, "grad_norm": 0.37890625, "learning_rate": 1.0882271082660729e-05, "loss": 1.0262, "step": 2038 }, { "epoch": 0.5106436263461057, "grad_norm": 0.37109375, "learning_rate": 1.0876704703590314e-05, "loss": 0.9464, "step": 2039 }, { "epoch": 0.5108940646130729, "grad_norm": 0.36328125, "learning_rate": 1.0871138324519902e-05, "loss": 1.055, "step": 2040 }, { "epoch": 0.5111445028800401, "grad_norm": 0.359375, "learning_rate": 1.0865571945449487e-05, "loss": 0.8717, "step": 2041 }, { "epoch": 0.5113949411470072, "grad_norm": 0.41796875, "learning_rate": 1.0860005566379071e-05, "loss": 1.0247, "step": 2042 }, { "epoch": 0.5116453794139745, "grad_norm": 0.400390625, "learning_rate": 1.0854439187308657e-05, "loss": 0.8521, "step": 2043 }, { "epoch": 0.5118958176809416, "grad_norm": 0.396484375, "learning_rate": 1.0848872808238242e-05, "loss": 0.9128, "step": 2044 }, { "epoch": 0.5121462559479089, "grad_norm": 0.376953125, "learning_rate": 1.0843306429167828e-05, "loss": 0.8187, "step": 2045 }, { "epoch": 0.512396694214876, "grad_norm": 0.3984375, "learning_rate": 1.0837740050097412e-05, "loss": 0.9769, "step": 2046 }, { "epoch": 0.5126471324818432, "grad_norm": 0.353515625, "learning_rate": 1.0832173671026998e-05, "loss": 0.7871, "step": 2047 }, { "epoch": 0.5128975707488104, "grad_norm": 0.337890625, "learning_rate": 1.0826607291956583e-05, "loss": 1.0069, "step": 2048 }, { "epoch": 0.5131480090157776, "grad_norm": 0.375, "learning_rate": 1.0821040912886169e-05, "loss": 0.8764, "step": 2049 }, { "epoch": 0.5133984472827448, "grad_norm": 0.34765625, "learning_rate": 1.0815474533815753e-05, "loss": 0.9516, "step": 2050 }, { "epoch": 0.513648885549712, "grad_norm": 0.333984375, "learning_rate": 1.0809908154745338e-05, "loss": 0.9848, "step": 2051 }, { "epoch": 0.5138993238166792, "grad_norm": 0.349609375, "learning_rate": 1.0804341775674924e-05, "loss": 0.868, "step": 2052 }, { "epoch": 0.5141497620836464, "grad_norm": 0.341796875, "learning_rate": 1.0798775396604511e-05, "loss": 1.1806, "step": 2053 }, { "epoch": 0.5144002003506136, "grad_norm": 0.392578125, "learning_rate": 1.0793209017534094e-05, "loss": 0.9642, "step": 2054 }, { "epoch": 0.5146506386175808, "grad_norm": 0.400390625, "learning_rate": 1.078764263846368e-05, "loss": 0.8756, "step": 2055 }, { "epoch": 0.514901076884548, "grad_norm": 0.345703125, "learning_rate": 1.0782076259393266e-05, "loss": 0.7894, "step": 2056 }, { "epoch": 0.5151515151515151, "grad_norm": 0.345703125, "learning_rate": 1.0776509880322852e-05, "loss": 0.975, "step": 2057 }, { "epoch": 0.5154019534184824, "grad_norm": 0.388671875, "learning_rate": 1.0770943501252436e-05, "loss": 0.8025, "step": 2058 }, { "epoch": 0.5156523916854495, "grad_norm": 0.3671875, "learning_rate": 1.0765377122182022e-05, "loss": 0.9996, "step": 2059 }, { "epoch": 0.5159028299524168, "grad_norm": 0.31640625, "learning_rate": 1.0759810743111607e-05, "loss": 1.0128, "step": 2060 }, { "epoch": 0.5161532682193839, "grad_norm": 0.3359375, "learning_rate": 1.0754244364041193e-05, "loss": 1.0137, "step": 2061 }, { "epoch": 0.5164037064863511, "grad_norm": 0.37890625, "learning_rate": 1.0748677984970777e-05, "loss": 0.95, "step": 2062 }, { "epoch": 0.5166541447533183, "grad_norm": 0.365234375, "learning_rate": 1.0743111605900362e-05, "loss": 1.0246, "step": 2063 }, { "epoch": 0.5169045830202855, "grad_norm": 0.373046875, "learning_rate": 1.0737545226829948e-05, "loss": 0.9326, "step": 2064 }, { "epoch": 0.5171550212872527, "grad_norm": 0.423828125, "learning_rate": 1.0731978847759534e-05, "loss": 0.9265, "step": 2065 }, { "epoch": 0.5174054595542199, "grad_norm": 0.412109375, "learning_rate": 1.0726412468689117e-05, "loss": 0.9379, "step": 2066 }, { "epoch": 0.517655897821187, "grad_norm": 0.341796875, "learning_rate": 1.0720846089618703e-05, "loss": 0.9198, "step": 2067 }, { "epoch": 0.5179063360881543, "grad_norm": 0.361328125, "learning_rate": 1.071527971054829e-05, "loss": 0.9645, "step": 2068 }, { "epoch": 0.5181567743551214, "grad_norm": 0.3984375, "learning_rate": 1.0709713331477876e-05, "loss": 1.0075, "step": 2069 }, { "epoch": 0.5184072126220887, "grad_norm": 0.357421875, "learning_rate": 1.070414695240746e-05, "loss": 0.8749, "step": 2070 }, { "epoch": 0.5186576508890558, "grad_norm": 0.357421875, "learning_rate": 1.0698580573337046e-05, "loss": 1.0735, "step": 2071 }, { "epoch": 0.518908089156023, "grad_norm": 0.51171875, "learning_rate": 1.0693014194266631e-05, "loss": 0.8799, "step": 2072 }, { "epoch": 0.5191585274229902, "grad_norm": 0.375, "learning_rate": 1.0687447815196215e-05, "loss": 0.8343, "step": 2073 }, { "epoch": 0.5194089656899574, "grad_norm": 0.369140625, "learning_rate": 1.06818814361258e-05, "loss": 0.9411, "step": 2074 }, { "epoch": 0.5196594039569247, "grad_norm": 0.328125, "learning_rate": 1.0676315057055386e-05, "loss": 0.928, "step": 2075 }, { "epoch": 0.5199098422238918, "grad_norm": 0.365234375, "learning_rate": 1.0670748677984972e-05, "loss": 0.7928, "step": 2076 }, { "epoch": 0.520160280490859, "grad_norm": 0.3828125, "learning_rate": 1.0665182298914556e-05, "loss": 0.9426, "step": 2077 }, { "epoch": 0.5204107187578262, "grad_norm": 0.3515625, "learning_rate": 1.0659615919844141e-05, "loss": 1.003, "step": 2078 }, { "epoch": 0.5206611570247934, "grad_norm": 0.3671875, "learning_rate": 1.0654049540773727e-05, "loss": 0.8452, "step": 2079 }, { "epoch": 0.5209115952917606, "grad_norm": 0.3828125, "learning_rate": 1.0648483161703314e-05, "loss": 1.0066, "step": 2080 }, { "epoch": 0.5211620335587278, "grad_norm": 0.34375, "learning_rate": 1.0642916782632897e-05, "loss": 0.9068, "step": 2081 }, { "epoch": 0.521412471825695, "grad_norm": 0.35546875, "learning_rate": 1.0637350403562484e-05, "loss": 0.9386, "step": 2082 }, { "epoch": 0.5216629100926622, "grad_norm": 0.369140625, "learning_rate": 1.063178402449207e-05, "loss": 0.7473, "step": 2083 }, { "epoch": 0.5219133483596293, "grad_norm": 0.341796875, "learning_rate": 1.0626217645421655e-05, "loss": 0.8195, "step": 2084 }, { "epoch": 0.5221637866265966, "grad_norm": 0.33984375, "learning_rate": 1.0620651266351239e-05, "loss": 0.769, "step": 2085 }, { "epoch": 0.5224142248935637, "grad_norm": 0.369140625, "learning_rate": 1.0615084887280825e-05, "loss": 0.9619, "step": 2086 }, { "epoch": 0.522664663160531, "grad_norm": 0.353515625, "learning_rate": 1.060951850821041e-05, "loss": 0.9208, "step": 2087 }, { "epoch": 0.5229151014274981, "grad_norm": 0.32421875, "learning_rate": 1.0603952129139996e-05, "loss": 0.9879, "step": 2088 }, { "epoch": 0.5231655396944653, "grad_norm": 0.34765625, "learning_rate": 1.059838575006958e-05, "loss": 1.0493, "step": 2089 }, { "epoch": 0.5234159779614325, "grad_norm": 0.328125, "learning_rate": 1.0592819370999165e-05, "loss": 0.9576, "step": 2090 }, { "epoch": 0.5236664162283997, "grad_norm": 0.365234375, "learning_rate": 1.0587252991928751e-05, "loss": 0.9835, "step": 2091 }, { "epoch": 0.5239168544953668, "grad_norm": 0.3515625, "learning_rate": 1.0581686612858337e-05, "loss": 0.7908, "step": 2092 }, { "epoch": 0.5241672927623341, "grad_norm": 0.3203125, "learning_rate": 1.057612023378792e-05, "loss": 0.9834, "step": 2093 }, { "epoch": 0.5244177310293012, "grad_norm": 0.33984375, "learning_rate": 1.0570553854717506e-05, "loss": 1.1374, "step": 2094 }, { "epoch": 0.5246681692962685, "grad_norm": 0.3359375, "learning_rate": 1.0564987475647093e-05, "loss": 0.8125, "step": 2095 }, { "epoch": 0.5249186075632357, "grad_norm": 0.365234375, "learning_rate": 1.0559421096576679e-05, "loss": 0.956, "step": 2096 }, { "epoch": 0.5251690458302029, "grad_norm": 0.37890625, "learning_rate": 1.0553854717506263e-05, "loss": 0.9652, "step": 2097 }, { "epoch": 0.5254194840971701, "grad_norm": 0.373046875, "learning_rate": 1.0548288338435849e-05, "loss": 1.0216, "step": 2098 }, { "epoch": 0.5256699223641372, "grad_norm": 0.392578125, "learning_rate": 1.0542721959365434e-05, "loss": 0.7915, "step": 2099 }, { "epoch": 0.5259203606311045, "grad_norm": 0.388671875, "learning_rate": 1.053715558029502e-05, "loss": 0.816, "step": 2100 }, { "epoch": 0.5261707988980716, "grad_norm": 0.384765625, "learning_rate": 1.0531589201224604e-05, "loss": 0.9912, "step": 2101 }, { "epoch": 0.5264212371650389, "grad_norm": 0.39453125, "learning_rate": 1.052602282215419e-05, "loss": 0.9962, "step": 2102 }, { "epoch": 0.526671675432006, "grad_norm": 0.3828125, "learning_rate": 1.0520456443083775e-05, "loss": 0.9676, "step": 2103 }, { "epoch": 0.5269221136989732, "grad_norm": 0.357421875, "learning_rate": 1.051489006401336e-05, "loss": 0.8696, "step": 2104 }, { "epoch": 0.5271725519659404, "grad_norm": 0.33984375, "learning_rate": 1.0509323684942944e-05, "loss": 0.8587, "step": 2105 }, { "epoch": 0.5274229902329076, "grad_norm": 0.40234375, "learning_rate": 1.050375730587253e-05, "loss": 0.9292, "step": 2106 }, { "epoch": 0.5276734284998748, "grad_norm": 0.390625, "learning_rate": 1.0498190926802116e-05, "loss": 0.7759, "step": 2107 }, { "epoch": 0.527923866766842, "grad_norm": 0.3515625, "learning_rate": 1.0492624547731703e-05, "loss": 0.8015, "step": 2108 }, { "epoch": 0.5281743050338091, "grad_norm": 0.384765625, "learning_rate": 1.0487058168661285e-05, "loss": 0.7408, "step": 2109 }, { "epoch": 0.5284247433007764, "grad_norm": 0.400390625, "learning_rate": 1.0481491789590873e-05, "loss": 0.8828, "step": 2110 }, { "epoch": 0.5286751815677435, "grad_norm": 0.380859375, "learning_rate": 1.0475925410520458e-05, "loss": 0.9323, "step": 2111 }, { "epoch": 0.5289256198347108, "grad_norm": 0.357421875, "learning_rate": 1.0470359031450044e-05, "loss": 0.9014, "step": 2112 }, { "epoch": 0.5291760581016779, "grad_norm": 0.345703125, "learning_rate": 1.0464792652379628e-05, "loss": 0.9837, "step": 2113 }, { "epoch": 0.5294264963686451, "grad_norm": 0.369140625, "learning_rate": 1.0459226273309213e-05, "loss": 0.8692, "step": 2114 }, { "epoch": 0.5296769346356123, "grad_norm": 0.388671875, "learning_rate": 1.0453659894238799e-05, "loss": 0.935, "step": 2115 }, { "epoch": 0.5299273729025795, "grad_norm": 0.4140625, "learning_rate": 1.0448093515168384e-05, "loss": 1.1133, "step": 2116 }, { "epoch": 0.5301778111695467, "grad_norm": 0.30859375, "learning_rate": 1.0442527136097968e-05, "loss": 0.9988, "step": 2117 }, { "epoch": 0.5304282494365139, "grad_norm": 0.3828125, "learning_rate": 1.0436960757027554e-05, "loss": 1.0262, "step": 2118 }, { "epoch": 0.5306786877034811, "grad_norm": 0.369140625, "learning_rate": 1.043139437795714e-05, "loss": 0.8431, "step": 2119 }, { "epoch": 0.5309291259704483, "grad_norm": 0.361328125, "learning_rate": 1.0425827998886727e-05, "loss": 0.9476, "step": 2120 }, { "epoch": 0.5311795642374155, "grad_norm": 0.375, "learning_rate": 1.042026161981631e-05, "loss": 1.0248, "step": 2121 }, { "epoch": 0.5314300025043827, "grad_norm": 0.36328125, "learning_rate": 1.0414695240745896e-05, "loss": 0.934, "step": 2122 }, { "epoch": 0.5316804407713499, "grad_norm": 0.36328125, "learning_rate": 1.0409128861675482e-05, "loss": 1.0, "step": 2123 }, { "epoch": 0.531930879038317, "grad_norm": 0.341796875, "learning_rate": 1.0403562482605066e-05, "loss": 0.9413, "step": 2124 }, { "epoch": 0.5321813173052843, "grad_norm": 0.38671875, "learning_rate": 1.0397996103534652e-05, "loss": 0.6675, "step": 2125 }, { "epoch": 0.5324317555722514, "grad_norm": 0.400390625, "learning_rate": 1.0392429724464237e-05, "loss": 0.837, "step": 2126 }, { "epoch": 0.5326821938392187, "grad_norm": 0.36328125, "learning_rate": 1.0386863345393823e-05, "loss": 0.8952, "step": 2127 }, { "epoch": 0.5329326321061858, "grad_norm": 0.279296875, "learning_rate": 1.0381296966323407e-05, "loss": 1.1274, "step": 2128 }, { "epoch": 0.533183070373153, "grad_norm": 0.380859375, "learning_rate": 1.0375730587252992e-05, "loss": 0.8485, "step": 2129 }, { "epoch": 0.5334335086401202, "grad_norm": 0.35546875, "learning_rate": 1.0370164208182578e-05, "loss": 0.9519, "step": 2130 }, { "epoch": 0.5336839469070874, "grad_norm": 0.35546875, "learning_rate": 1.0364597829112164e-05, "loss": 0.9974, "step": 2131 }, { "epoch": 0.5339343851740546, "grad_norm": 0.369140625, "learning_rate": 1.0359031450041747e-05, "loss": 0.7853, "step": 2132 }, { "epoch": 0.5341848234410218, "grad_norm": 0.341796875, "learning_rate": 1.0353465070971333e-05, "loss": 1.0356, "step": 2133 }, { "epoch": 0.5344352617079889, "grad_norm": 0.359375, "learning_rate": 1.0347898691900919e-05, "loss": 0.8667, "step": 2134 }, { "epoch": 0.5346856999749562, "grad_norm": 0.353515625, "learning_rate": 1.0342332312830506e-05, "loss": 0.9949, "step": 2135 }, { "epoch": 0.5349361382419233, "grad_norm": 0.431640625, "learning_rate": 1.0336765933760088e-05, "loss": 0.9448, "step": 2136 }, { "epoch": 0.5351865765088906, "grad_norm": 0.30078125, "learning_rate": 1.0331199554689676e-05, "loss": 0.6942, "step": 2137 }, { "epoch": 0.5354370147758577, "grad_norm": 0.390625, "learning_rate": 1.0325633175619261e-05, "loss": 0.8925, "step": 2138 }, { "epoch": 0.535687453042825, "grad_norm": 0.380859375, "learning_rate": 1.0320066796548847e-05, "loss": 0.9858, "step": 2139 }, { "epoch": 0.5359378913097922, "grad_norm": 0.37109375, "learning_rate": 1.031450041747843e-05, "loss": 0.8641, "step": 2140 }, { "epoch": 0.5361883295767593, "grad_norm": 0.400390625, "learning_rate": 1.0308934038408016e-05, "loss": 1.0112, "step": 2141 }, { "epoch": 0.5364387678437266, "grad_norm": 0.384765625, "learning_rate": 1.0303367659337602e-05, "loss": 1.1675, "step": 2142 }, { "epoch": 0.5366892061106937, "grad_norm": 0.400390625, "learning_rate": 1.0297801280267188e-05, "loss": 0.8528, "step": 2143 }, { "epoch": 0.536939644377661, "grad_norm": 0.3671875, "learning_rate": 1.0292234901196771e-05, "loss": 0.7347, "step": 2144 }, { "epoch": 0.5371900826446281, "grad_norm": 0.392578125, "learning_rate": 1.0286668522126357e-05, "loss": 0.8178, "step": 2145 }, { "epoch": 0.5374405209115953, "grad_norm": 0.37890625, "learning_rate": 1.0281102143055943e-05, "loss": 0.9841, "step": 2146 }, { "epoch": 0.5376909591785625, "grad_norm": 0.33203125, "learning_rate": 1.0275535763985528e-05, "loss": 1.0699, "step": 2147 }, { "epoch": 0.5379413974455297, "grad_norm": 0.375, "learning_rate": 1.0269969384915112e-05, "loss": 1.0215, "step": 2148 }, { "epoch": 0.5381918357124968, "grad_norm": 0.3828125, "learning_rate": 1.0264403005844698e-05, "loss": 0.8976, "step": 2149 }, { "epoch": 0.5384422739794641, "grad_norm": 0.31640625, "learning_rate": 1.0258836626774285e-05, "loss": 0.9299, "step": 2150 }, { "epoch": 0.5386927122464312, "grad_norm": 0.3671875, "learning_rate": 1.025327024770387e-05, "loss": 0.8317, "step": 2151 }, { "epoch": 0.5389431505133985, "grad_norm": 0.390625, "learning_rate": 1.0247703868633455e-05, "loss": 0.8077, "step": 2152 }, { "epoch": 0.5391935887803656, "grad_norm": 0.35546875, "learning_rate": 1.024213748956304e-05, "loss": 1.036, "step": 2153 }, { "epoch": 0.5394440270473329, "grad_norm": 0.396484375, "learning_rate": 1.0236571110492626e-05, "loss": 0.9191, "step": 2154 }, { "epoch": 0.5396944653143, "grad_norm": 0.359375, "learning_rate": 1.0231004731422211e-05, "loss": 1.0331, "step": 2155 }, { "epoch": 0.5399449035812672, "grad_norm": 0.35546875, "learning_rate": 1.0225438352351795e-05, "loss": 0.9378, "step": 2156 }, { "epoch": 0.5401953418482344, "grad_norm": 0.408203125, "learning_rate": 1.0219871973281381e-05, "loss": 0.8466, "step": 2157 }, { "epoch": 0.5404457801152016, "grad_norm": 0.369140625, "learning_rate": 1.0214305594210967e-05, "loss": 0.7715, "step": 2158 }, { "epoch": 0.5406962183821687, "grad_norm": 0.39453125, "learning_rate": 1.0208739215140552e-05, "loss": 1.0957, "step": 2159 }, { "epoch": 0.540946656649136, "grad_norm": 0.3828125, "learning_rate": 1.0203172836070136e-05, "loss": 0.8775, "step": 2160 }, { "epoch": 0.5411970949161031, "grad_norm": 0.373046875, "learning_rate": 1.0197606456999722e-05, "loss": 0.9496, "step": 2161 }, { "epoch": 0.5414475331830704, "grad_norm": 0.376953125, "learning_rate": 1.0192040077929309e-05, "loss": 0.8679, "step": 2162 }, { "epoch": 0.5416979714500376, "grad_norm": 0.345703125, "learning_rate": 1.0186473698858895e-05, "loss": 0.91, "step": 2163 }, { "epoch": 0.5419484097170048, "grad_norm": 0.3671875, "learning_rate": 1.0180907319788479e-05, "loss": 0.9662, "step": 2164 }, { "epoch": 0.542198847983972, "grad_norm": 0.431640625, "learning_rate": 1.0175340940718064e-05, "loss": 0.9303, "step": 2165 }, { "epoch": 0.5424492862509391, "grad_norm": 0.357421875, "learning_rate": 1.016977456164765e-05, "loss": 1.0383, "step": 2166 }, { "epoch": 0.5426997245179064, "grad_norm": 0.35546875, "learning_rate": 1.0164208182577235e-05, "loss": 1.0347, "step": 2167 }, { "epoch": 0.5429501627848735, "grad_norm": 0.376953125, "learning_rate": 1.015864180350682e-05, "loss": 1.0574, "step": 2168 }, { "epoch": 0.5432006010518408, "grad_norm": 0.357421875, "learning_rate": 1.0153075424436405e-05, "loss": 0.937, "step": 2169 }, { "epoch": 0.5434510393188079, "grad_norm": 0.359375, "learning_rate": 1.014750904536599e-05, "loss": 0.9557, "step": 2170 }, { "epoch": 0.5437014775857751, "grad_norm": 0.361328125, "learning_rate": 1.0141942666295576e-05, "loss": 1.0403, "step": 2171 }, { "epoch": 0.5439519158527423, "grad_norm": 0.337890625, "learning_rate": 1.013637628722516e-05, "loss": 0.9562, "step": 2172 }, { "epoch": 0.5442023541197095, "grad_norm": 0.396484375, "learning_rate": 1.0130809908154746e-05, "loss": 0.9722, "step": 2173 }, { "epoch": 0.5444527923866767, "grad_norm": 0.390625, "learning_rate": 1.0125243529084331e-05, "loss": 0.9532, "step": 2174 }, { "epoch": 0.5447032306536439, "grad_norm": 0.390625, "learning_rate": 1.0119677150013915e-05, "loss": 0.9561, "step": 2175 }, { "epoch": 0.544953668920611, "grad_norm": 0.375, "learning_rate": 1.0114110770943501e-05, "loss": 0.9749, "step": 2176 }, { "epoch": 0.5452041071875783, "grad_norm": 0.369140625, "learning_rate": 1.0108544391873088e-05, "loss": 0.9264, "step": 2177 }, { "epoch": 0.5454545454545454, "grad_norm": 0.404296875, "learning_rate": 1.0102978012802674e-05, "loss": 0.912, "step": 2178 }, { "epoch": 0.5457049837215127, "grad_norm": 0.37890625, "learning_rate": 1.0097411633732258e-05, "loss": 0.9693, "step": 2179 }, { "epoch": 0.5459554219884798, "grad_norm": 0.35546875, "learning_rate": 1.0091845254661843e-05, "loss": 0.8739, "step": 2180 }, { "epoch": 0.546205860255447, "grad_norm": 0.41015625, "learning_rate": 1.0086278875591429e-05, "loss": 0.8963, "step": 2181 }, { "epoch": 0.5464562985224142, "grad_norm": 0.3671875, "learning_rate": 1.0080712496521015e-05, "loss": 0.953, "step": 2182 }, { "epoch": 0.5467067367893814, "grad_norm": 0.361328125, "learning_rate": 1.0075146117450598e-05, "loss": 0.8903, "step": 2183 }, { "epoch": 0.5469571750563486, "grad_norm": 0.361328125, "learning_rate": 1.0069579738380184e-05, "loss": 0.9356, "step": 2184 }, { "epoch": 0.5472076133233158, "grad_norm": 0.3515625, "learning_rate": 1.006401335930977e-05, "loss": 0.77, "step": 2185 }, { "epoch": 0.547458051590283, "grad_norm": 0.35546875, "learning_rate": 1.0058446980239355e-05, "loss": 0.9303, "step": 2186 }, { "epoch": 0.5477084898572502, "grad_norm": 0.345703125, "learning_rate": 1.005288060116894e-05, "loss": 0.757, "step": 2187 }, { "epoch": 0.5479589281242174, "grad_norm": 0.373046875, "learning_rate": 1.0047314222098525e-05, "loss": 0.8564, "step": 2188 }, { "epoch": 0.5482093663911846, "grad_norm": 0.4296875, "learning_rate": 1.004174784302811e-05, "loss": 1.0285, "step": 2189 }, { "epoch": 0.5484598046581518, "grad_norm": 0.400390625, "learning_rate": 1.0036181463957698e-05, "loss": 1.0281, "step": 2190 }, { "epoch": 0.5487102429251189, "grad_norm": 0.419921875, "learning_rate": 1.003061508488728e-05, "loss": 0.833, "step": 2191 }, { "epoch": 0.5489606811920862, "grad_norm": 0.34375, "learning_rate": 1.0025048705816867e-05, "loss": 0.8672, "step": 2192 }, { "epoch": 0.5492111194590533, "grad_norm": 0.380859375, "learning_rate": 1.0019482326746453e-05, "loss": 0.9104, "step": 2193 }, { "epoch": 0.5494615577260206, "grad_norm": 0.318359375, "learning_rate": 1.0013915947676038e-05, "loss": 0.7874, "step": 2194 }, { "epoch": 0.5497119959929877, "grad_norm": 0.4296875, "learning_rate": 1.0008349568605622e-05, "loss": 0.941, "step": 2195 }, { "epoch": 0.549962434259955, "grad_norm": 0.388671875, "learning_rate": 1.0002783189535208e-05, "loss": 0.8833, "step": 2196 }, { "epoch": 0.5502128725269221, "grad_norm": 0.376953125, "learning_rate": 9.997216810464794e-06, "loss": 0.9041, "step": 2197 }, { "epoch": 0.5504633107938893, "grad_norm": 0.357421875, "learning_rate": 9.99165043139438e-06, "loss": 0.9255, "step": 2198 }, { "epoch": 0.5507137490608565, "grad_norm": 0.376953125, "learning_rate": 9.986084052323965e-06, "loss": 0.9131, "step": 2199 }, { "epoch": 0.5509641873278237, "grad_norm": 0.3984375, "learning_rate": 9.980517673253549e-06, "loss": 0.8194, "step": 2200 }, { "epoch": 0.5512146255947908, "grad_norm": 0.38671875, "learning_rate": 9.974951294183134e-06, "loss": 0.9474, "step": 2201 }, { "epoch": 0.5514650638617581, "grad_norm": 0.357421875, "learning_rate": 9.96938491511272e-06, "loss": 1.0961, "step": 2202 }, { "epoch": 0.5517155021287252, "grad_norm": 0.322265625, "learning_rate": 9.963818536042306e-06, "loss": 0.8609, "step": 2203 }, { "epoch": 0.5519659403956925, "grad_norm": 0.37109375, "learning_rate": 9.958252156971891e-06, "loss": 0.8313, "step": 2204 }, { "epoch": 0.5522163786626596, "grad_norm": 0.396484375, "learning_rate": 9.952685777901477e-06, "loss": 0.9156, "step": 2205 }, { "epoch": 0.5524668169296268, "grad_norm": 0.3359375, "learning_rate": 9.94711939883106e-06, "loss": 0.9901, "step": 2206 }, { "epoch": 0.5527172551965941, "grad_norm": 0.38671875, "learning_rate": 9.941553019760646e-06, "loss": 0.9261, "step": 2207 }, { "epoch": 0.5529676934635612, "grad_norm": 0.375, "learning_rate": 9.935986640690232e-06, "loss": 1.0402, "step": 2208 }, { "epoch": 0.5532181317305285, "grad_norm": 0.359375, "learning_rate": 9.930420261619818e-06, "loss": 0.936, "step": 2209 }, { "epoch": 0.5534685699974956, "grad_norm": 0.396484375, "learning_rate": 9.924853882549401e-06, "loss": 0.977, "step": 2210 }, { "epoch": 0.5537190082644629, "grad_norm": 0.34765625, "learning_rate": 9.919287503478989e-06, "loss": 0.9784, "step": 2211 }, { "epoch": 0.55396944653143, "grad_norm": 0.3359375, "learning_rate": 9.913721124408573e-06, "loss": 1.0782, "step": 2212 }, { "epoch": 0.5542198847983972, "grad_norm": 0.349609375, "learning_rate": 9.908154745338158e-06, "loss": 0.9166, "step": 2213 }, { "epoch": 0.5544703230653644, "grad_norm": 0.361328125, "learning_rate": 9.902588366267744e-06, "loss": 0.9344, "step": 2214 }, { "epoch": 0.5547207613323316, "grad_norm": 0.337890625, "learning_rate": 9.89702198719733e-06, "loss": 0.8225, "step": 2215 }, { "epoch": 0.5549711995992987, "grad_norm": 0.40625, "learning_rate": 9.891455608126913e-06, "loss": 0.9497, "step": 2216 }, { "epoch": 0.555221637866266, "grad_norm": 0.376953125, "learning_rate": 9.8858892290565e-06, "loss": 0.8565, "step": 2217 }, { "epoch": 0.5554720761332331, "grad_norm": 0.466796875, "learning_rate": 9.880322849986085e-06, "loss": 0.9301, "step": 2218 }, { "epoch": 0.5557225144002004, "grad_norm": 0.384765625, "learning_rate": 9.87475647091567e-06, "loss": 0.9396, "step": 2219 }, { "epoch": 0.5559729526671675, "grad_norm": 0.35546875, "learning_rate": 9.869190091845256e-06, "loss": 1.0488, "step": 2220 }, { "epoch": 0.5562233909341348, "grad_norm": 0.361328125, "learning_rate": 9.863623712774842e-06, "loss": 0.8532, "step": 2221 }, { "epoch": 0.5564738292011019, "grad_norm": 0.37109375, "learning_rate": 9.858057333704425e-06, "loss": 1.0361, "step": 2222 }, { "epoch": 0.5567242674680691, "grad_norm": 0.349609375, "learning_rate": 9.852490954634011e-06, "loss": 0.9449, "step": 2223 }, { "epoch": 0.5569747057350363, "grad_norm": 0.404296875, "learning_rate": 9.846924575563597e-06, "loss": 1.0021, "step": 2224 }, { "epoch": 0.5572251440020035, "grad_norm": 0.36328125, "learning_rate": 9.841358196493182e-06, "loss": 0.9587, "step": 2225 }, { "epoch": 0.5574755822689706, "grad_norm": 0.3671875, "learning_rate": 9.835791817422768e-06, "loss": 1.0747, "step": 2226 }, { "epoch": 0.5577260205359379, "grad_norm": 0.380859375, "learning_rate": 9.830225438352352e-06, "loss": 1.0334, "step": 2227 }, { "epoch": 0.557976458802905, "grad_norm": 0.369140625, "learning_rate": 9.824659059281937e-06, "loss": 1.026, "step": 2228 }, { "epoch": 0.5582268970698723, "grad_norm": 0.384765625, "learning_rate": 9.819092680211523e-06, "loss": 0.9154, "step": 2229 }, { "epoch": 0.5584773353368395, "grad_norm": 0.390625, "learning_rate": 9.813526301141109e-06, "loss": 0.8392, "step": 2230 }, { "epoch": 0.5587277736038067, "grad_norm": 0.34375, "learning_rate": 9.807959922070693e-06, "loss": 0.8926, "step": 2231 }, { "epoch": 0.5589782118707739, "grad_norm": 0.35546875, "learning_rate": 9.80239354300028e-06, "loss": 0.9294, "step": 2232 }, { "epoch": 0.559228650137741, "grad_norm": 0.388671875, "learning_rate": 9.796827163929864e-06, "loss": 0.9901, "step": 2233 }, { "epoch": 0.5594790884047083, "grad_norm": 0.388671875, "learning_rate": 9.79126078485945e-06, "loss": 0.9098, "step": 2234 }, { "epoch": 0.5597295266716754, "grad_norm": 0.37890625, "learning_rate": 9.785694405789035e-06, "loss": 1.0387, "step": 2235 }, { "epoch": 0.5599799649386427, "grad_norm": 0.34375, "learning_rate": 9.78012802671862e-06, "loss": 0.9395, "step": 2236 }, { "epoch": 0.5602304032056098, "grad_norm": 0.361328125, "learning_rate": 9.774561647648205e-06, "loss": 0.8383, "step": 2237 }, { "epoch": 0.560480841472577, "grad_norm": 0.365234375, "learning_rate": 9.768995268577792e-06, "loss": 0.8734, "step": 2238 }, { "epoch": 0.5607312797395442, "grad_norm": 0.369140625, "learning_rate": 9.763428889507376e-06, "loss": 0.8908, "step": 2239 }, { "epoch": 0.5609817180065114, "grad_norm": 0.357421875, "learning_rate": 9.757862510436961e-06, "loss": 0.8608, "step": 2240 }, { "epoch": 0.5612321562734786, "grad_norm": 0.45703125, "learning_rate": 9.752296131366547e-06, "loss": 0.9641, "step": 2241 }, { "epoch": 0.5614825945404458, "grad_norm": 0.330078125, "learning_rate": 9.746729752296133e-06, "loss": 0.9733, "step": 2242 }, { "epoch": 0.5617330328074129, "grad_norm": 0.322265625, "learning_rate": 9.741163373225717e-06, "loss": 0.8947, "step": 2243 }, { "epoch": 0.5619834710743802, "grad_norm": 0.4609375, "learning_rate": 9.735596994155304e-06, "loss": 0.8349, "step": 2244 }, { "epoch": 0.5622339093413473, "grad_norm": 0.380859375, "learning_rate": 9.730030615084888e-06, "loss": 0.983, "step": 2245 }, { "epoch": 0.5624843476083146, "grad_norm": 0.43359375, "learning_rate": 9.724464236014473e-06, "loss": 0.9535, "step": 2246 }, { "epoch": 0.5627347858752817, "grad_norm": 0.388671875, "learning_rate": 9.718897856944059e-06, "loss": 0.9282, "step": 2247 }, { "epoch": 0.5629852241422489, "grad_norm": 0.361328125, "learning_rate": 9.713331477873645e-06, "loss": 1.1231, "step": 2248 }, { "epoch": 0.5632356624092161, "grad_norm": 0.34765625, "learning_rate": 9.707765098803228e-06, "loss": 0.9253, "step": 2249 }, { "epoch": 0.5634861006761833, "grad_norm": 0.380859375, "learning_rate": 9.702198719732814e-06, "loss": 0.9418, "step": 2250 }, { "epoch": 0.5637365389431506, "grad_norm": 0.42578125, "learning_rate": 9.6966323406624e-06, "loss": 0.7729, "step": 2251 }, { "epoch": 0.5639869772101177, "grad_norm": 0.390625, "learning_rate": 9.691065961591985e-06, "loss": 0.8778, "step": 2252 }, { "epoch": 0.564237415477085, "grad_norm": 0.333984375, "learning_rate": 9.685499582521571e-06, "loss": 0.926, "step": 2253 }, { "epoch": 0.5644878537440521, "grad_norm": 0.3828125, "learning_rate": 9.679933203451157e-06, "loss": 0.681, "step": 2254 }, { "epoch": 0.5647382920110193, "grad_norm": 0.400390625, "learning_rate": 9.67436682438074e-06, "loss": 0.8276, "step": 2255 }, { "epoch": 0.5649887302779865, "grad_norm": 0.337890625, "learning_rate": 9.668800445310326e-06, "loss": 0.9176, "step": 2256 }, { "epoch": 0.5652391685449537, "grad_norm": 0.361328125, "learning_rate": 9.663234066239912e-06, "loss": 1.0063, "step": 2257 }, { "epoch": 0.5654896068119208, "grad_norm": 0.33984375, "learning_rate": 9.657667687169497e-06, "loss": 0.8518, "step": 2258 }, { "epoch": 0.5657400450788881, "grad_norm": 0.359375, "learning_rate": 9.652101308099083e-06, "loss": 1.0679, "step": 2259 }, { "epoch": 0.5659904833458552, "grad_norm": 0.431640625, "learning_rate": 9.646534929028669e-06, "loss": 0.9634, "step": 2260 }, { "epoch": 0.5662409216128225, "grad_norm": 0.3828125, "learning_rate": 9.640968549958252e-06, "loss": 0.9799, "step": 2261 }, { "epoch": 0.5664913598797896, "grad_norm": 0.421875, "learning_rate": 9.635402170887838e-06, "loss": 0.944, "step": 2262 }, { "epoch": 0.5667417981467568, "grad_norm": 0.37109375, "learning_rate": 9.629835791817424e-06, "loss": 0.7252, "step": 2263 }, { "epoch": 0.566992236413724, "grad_norm": 0.3671875, "learning_rate": 9.62426941274701e-06, "loss": 0.9654, "step": 2264 }, { "epoch": 0.5672426746806912, "grad_norm": 0.380859375, "learning_rate": 9.618703033676595e-06, "loss": 1.0014, "step": 2265 }, { "epoch": 0.5674931129476584, "grad_norm": 0.36328125, "learning_rate": 9.61313665460618e-06, "loss": 0.8928, "step": 2266 }, { "epoch": 0.5677435512146256, "grad_norm": 0.33984375, "learning_rate": 9.607570275535764e-06, "loss": 0.794, "step": 2267 }, { "epoch": 0.5679939894815927, "grad_norm": 0.359375, "learning_rate": 9.60200389646535e-06, "loss": 0.9876, "step": 2268 }, { "epoch": 0.56824442774856, "grad_norm": 0.380859375, "learning_rate": 9.596437517394936e-06, "loss": 0.7184, "step": 2269 }, { "epoch": 0.5684948660155271, "grad_norm": 0.3984375, "learning_rate": 9.590871138324521e-06, "loss": 1.1506, "step": 2270 }, { "epoch": 0.5687453042824944, "grad_norm": 0.326171875, "learning_rate": 9.585304759254105e-06, "loss": 0.7908, "step": 2271 }, { "epoch": 0.5689957425494615, "grad_norm": 0.365234375, "learning_rate": 9.579738380183692e-06, "loss": 0.7743, "step": 2272 }, { "epoch": 0.5692461808164287, "grad_norm": 0.466796875, "learning_rate": 9.574172001113276e-06, "loss": 0.8372, "step": 2273 }, { "epoch": 0.569496619083396, "grad_norm": 0.41796875, "learning_rate": 9.568605622042862e-06, "loss": 0.9128, "step": 2274 }, { "epoch": 0.5697470573503631, "grad_norm": 0.375, "learning_rate": 9.563039242972448e-06, "loss": 0.8308, "step": 2275 }, { "epoch": 0.5699974956173304, "grad_norm": 0.38671875, "learning_rate": 9.557472863902032e-06, "loss": 0.8405, "step": 2276 }, { "epoch": 0.5702479338842975, "grad_norm": 0.361328125, "learning_rate": 9.551906484831617e-06, "loss": 0.9707, "step": 2277 }, { "epoch": 0.5704983721512648, "grad_norm": 0.38671875, "learning_rate": 9.546340105761203e-06, "loss": 0.7665, "step": 2278 }, { "epoch": 0.5707488104182319, "grad_norm": 0.34375, "learning_rate": 9.540773726690788e-06, "loss": 0.8507, "step": 2279 }, { "epoch": 0.5709992486851991, "grad_norm": 0.36328125, "learning_rate": 9.535207347620374e-06, "loss": 1.0427, "step": 2280 }, { "epoch": 0.5712496869521663, "grad_norm": 0.37890625, "learning_rate": 9.52964096854996e-06, "loss": 0.8678, "step": 2281 }, { "epoch": 0.5715001252191335, "grad_norm": 0.42578125, "learning_rate": 9.524074589479544e-06, "loss": 1.0613, "step": 2282 }, { "epoch": 0.5717505634861006, "grad_norm": 0.38671875, "learning_rate": 9.518508210409129e-06, "loss": 0.8908, "step": 2283 }, { "epoch": 0.5720010017530679, "grad_norm": 0.40234375, "learning_rate": 9.512941831338715e-06, "loss": 0.7972, "step": 2284 }, { "epoch": 0.572251440020035, "grad_norm": 0.435546875, "learning_rate": 9.5073754522683e-06, "loss": 0.8794, "step": 2285 }, { "epoch": 0.5725018782870023, "grad_norm": 0.283203125, "learning_rate": 9.501809073197886e-06, "loss": 0.6206, "step": 2286 }, { "epoch": 0.5727523165539694, "grad_norm": 0.349609375, "learning_rate": 9.496242694127472e-06, "loss": 0.8148, "step": 2287 }, { "epoch": 0.5730027548209367, "grad_norm": 0.5546875, "learning_rate": 9.490676315057055e-06, "loss": 0.8965, "step": 2288 }, { "epoch": 0.5732531930879038, "grad_norm": 0.3671875, "learning_rate": 9.485109935986641e-06, "loss": 0.9891, "step": 2289 }, { "epoch": 0.573503631354871, "grad_norm": 0.36328125, "learning_rate": 9.479543556916227e-06, "loss": 0.9593, "step": 2290 }, { "epoch": 0.5737540696218382, "grad_norm": 0.404296875, "learning_rate": 9.473977177845812e-06, "loss": 0.9458, "step": 2291 }, { "epoch": 0.5740045078888054, "grad_norm": 0.396484375, "learning_rate": 9.468410798775396e-06, "loss": 0.8296, "step": 2292 }, { "epoch": 0.5742549461557725, "grad_norm": 0.37890625, "learning_rate": 9.462844419704984e-06, "loss": 0.7844, "step": 2293 }, { "epoch": 0.5745053844227398, "grad_norm": 0.35546875, "learning_rate": 9.457278040634567e-06, "loss": 1.1213, "step": 2294 }, { "epoch": 0.574755822689707, "grad_norm": 0.392578125, "learning_rate": 9.451711661564153e-06, "loss": 0.8191, "step": 2295 }, { "epoch": 0.5750062609566742, "grad_norm": 0.40234375, "learning_rate": 9.446145282493739e-06, "loss": 0.9368, "step": 2296 }, { "epoch": 0.5752566992236414, "grad_norm": 0.40234375, "learning_rate": 9.440578903423324e-06, "loss": 0.874, "step": 2297 }, { "epoch": 0.5755071374906086, "grad_norm": 0.375, "learning_rate": 9.435012524352908e-06, "loss": 0.8357, "step": 2298 }, { "epoch": 0.5757575757575758, "grad_norm": 0.421875, "learning_rate": 9.429446145282496e-06, "loss": 0.9602, "step": 2299 }, { "epoch": 0.5760080140245429, "grad_norm": 0.349609375, "learning_rate": 9.42387976621208e-06, "loss": 0.8805, "step": 2300 }, { "epoch": 0.5762584522915102, "grad_norm": 0.39453125, "learning_rate": 9.418313387141665e-06, "loss": 0.9633, "step": 2301 }, { "epoch": 0.5765088905584773, "grad_norm": 0.3671875, "learning_rate": 9.41274700807125e-06, "loss": 1.0487, "step": 2302 }, { "epoch": 0.5767593288254446, "grad_norm": 0.3671875, "learning_rate": 9.407180629000836e-06, "loss": 0.7725, "step": 2303 }, { "epoch": 0.5770097670924117, "grad_norm": 0.349609375, "learning_rate": 9.40161424993042e-06, "loss": 1.051, "step": 2304 }, { "epoch": 0.5772602053593789, "grad_norm": 0.375, "learning_rate": 9.396047870860008e-06, "loss": 0.9596, "step": 2305 }, { "epoch": 0.5775106436263461, "grad_norm": 0.357421875, "learning_rate": 9.390481491789591e-06, "loss": 1.0281, "step": 2306 }, { "epoch": 0.5777610818933133, "grad_norm": 0.365234375, "learning_rate": 9.384915112719177e-06, "loss": 0.9223, "step": 2307 }, { "epoch": 0.5780115201602805, "grad_norm": 0.33984375, "learning_rate": 9.379348733648763e-06, "loss": 0.9171, "step": 2308 }, { "epoch": 0.5782619584272477, "grad_norm": 0.3828125, "learning_rate": 9.373782354578348e-06, "loss": 0.877, "step": 2309 }, { "epoch": 0.5785123966942148, "grad_norm": 0.36328125, "learning_rate": 9.368215975507932e-06, "loss": 0.8166, "step": 2310 }, { "epoch": 0.5787628349611821, "grad_norm": 0.35546875, "learning_rate": 9.362649596437518e-06, "loss": 0.7803, "step": 2311 }, { "epoch": 0.5790132732281492, "grad_norm": 0.380859375, "learning_rate": 9.357083217367103e-06, "loss": 1.0044, "step": 2312 }, { "epoch": 0.5792637114951165, "grad_norm": 0.4140625, "learning_rate": 9.351516838296689e-06, "loss": 0.8658, "step": 2313 }, { "epoch": 0.5795141497620836, "grad_norm": 0.38671875, "learning_rate": 9.345950459226275e-06, "loss": 0.9325, "step": 2314 }, { "epoch": 0.5797645880290508, "grad_norm": 0.373046875, "learning_rate": 9.34038408015586e-06, "loss": 1.0566, "step": 2315 }, { "epoch": 0.580015026296018, "grad_norm": 0.453125, "learning_rate": 9.334817701085444e-06, "loss": 1.0578, "step": 2316 }, { "epoch": 0.5802654645629852, "grad_norm": 0.392578125, "learning_rate": 9.32925132201503e-06, "loss": 1.0617, "step": 2317 }, { "epoch": 0.5805159028299525, "grad_norm": 0.50390625, "learning_rate": 9.323684942944615e-06, "loss": 0.8842, "step": 2318 }, { "epoch": 0.5807663410969196, "grad_norm": 0.349609375, "learning_rate": 9.318118563874201e-06, "loss": 0.869, "step": 2319 }, { "epoch": 0.5810167793638868, "grad_norm": 0.361328125, "learning_rate": 9.312552184803787e-06, "loss": 0.7407, "step": 2320 }, { "epoch": 0.581267217630854, "grad_norm": 0.40234375, "learning_rate": 9.306985805733372e-06, "loss": 0.8017, "step": 2321 }, { "epoch": 0.5815176558978212, "grad_norm": 0.318359375, "learning_rate": 9.301419426662956e-06, "loss": 0.8687, "step": 2322 }, { "epoch": 0.5817680941647884, "grad_norm": 0.359375, "learning_rate": 9.295853047592542e-06, "loss": 1.0901, "step": 2323 }, { "epoch": 0.5820185324317556, "grad_norm": 0.41015625, "learning_rate": 9.290286668522127e-06, "loss": 1.05, "step": 2324 }, { "epoch": 0.5822689706987227, "grad_norm": 0.35546875, "learning_rate": 9.284720289451711e-06, "loss": 0.9822, "step": 2325 }, { "epoch": 0.58251940896569, "grad_norm": 0.365234375, "learning_rate": 9.279153910381299e-06, "loss": 0.9504, "step": 2326 }, { "epoch": 0.5827698472326571, "grad_norm": 0.453125, "learning_rate": 9.273587531310882e-06, "loss": 0.6738, "step": 2327 }, { "epoch": 0.5830202854996244, "grad_norm": 0.40234375, "learning_rate": 9.268021152240468e-06, "loss": 0.8076, "step": 2328 }, { "epoch": 0.5832707237665915, "grad_norm": 0.345703125, "learning_rate": 9.262454773170054e-06, "loss": 0.8603, "step": 2329 }, { "epoch": 0.5835211620335587, "grad_norm": 0.34765625, "learning_rate": 9.25688839409964e-06, "loss": 0.8169, "step": 2330 }, { "epoch": 0.5837716003005259, "grad_norm": 0.359375, "learning_rate": 9.251322015029223e-06, "loss": 0.9439, "step": 2331 }, { "epoch": 0.5840220385674931, "grad_norm": 0.35546875, "learning_rate": 9.245755635958809e-06, "loss": 0.9694, "step": 2332 }, { "epoch": 0.5842724768344603, "grad_norm": 0.36328125, "learning_rate": 9.240189256888394e-06, "loss": 0.9108, "step": 2333 }, { "epoch": 0.5845229151014275, "grad_norm": 0.375, "learning_rate": 9.23462287781798e-06, "loss": 1.0504, "step": 2334 }, { "epoch": 0.5847733533683946, "grad_norm": 0.36328125, "learning_rate": 9.229056498747566e-06, "loss": 0.942, "step": 2335 }, { "epoch": 0.5850237916353619, "grad_norm": 0.353515625, "learning_rate": 9.223490119677151e-06, "loss": 0.8799, "step": 2336 }, { "epoch": 0.585274229902329, "grad_norm": 0.3515625, "learning_rate": 9.217923740606735e-06, "loss": 1.0408, "step": 2337 }, { "epoch": 0.5855246681692963, "grad_norm": 0.3984375, "learning_rate": 9.21235736153632e-06, "loss": 0.9931, "step": 2338 }, { "epoch": 0.5857751064362635, "grad_norm": 0.3984375, "learning_rate": 9.206790982465906e-06, "loss": 0.907, "step": 2339 }, { "epoch": 0.5860255447032306, "grad_norm": 0.412109375, "learning_rate": 9.201224603395492e-06, "loss": 0.957, "step": 2340 }, { "epoch": 0.5862759829701979, "grad_norm": 0.400390625, "learning_rate": 9.195658224325078e-06, "loss": 0.9464, "step": 2341 }, { "epoch": 0.586526421237165, "grad_norm": 0.421875, "learning_rate": 9.190091845254663e-06, "loss": 1.0832, "step": 2342 }, { "epoch": 0.5867768595041323, "grad_norm": 0.43359375, "learning_rate": 9.184525466184247e-06, "loss": 0.8858, "step": 2343 }, { "epoch": 0.5870272977710994, "grad_norm": 0.42578125, "learning_rate": 9.178959087113833e-06, "loss": 1.0321, "step": 2344 }, { "epoch": 0.5872777360380667, "grad_norm": 0.388671875, "learning_rate": 9.173392708043418e-06, "loss": 0.7842, "step": 2345 }, { "epoch": 0.5875281743050338, "grad_norm": 0.4375, "learning_rate": 9.167826328973004e-06, "loss": 0.7821, "step": 2346 }, { "epoch": 0.587778612572001, "grad_norm": 0.365234375, "learning_rate": 9.16225994990259e-06, "loss": 0.9107, "step": 2347 }, { "epoch": 0.5880290508389682, "grad_norm": 0.388671875, "learning_rate": 9.156693570832175e-06, "loss": 1.0473, "step": 2348 }, { "epoch": 0.5882794891059354, "grad_norm": 0.3828125, "learning_rate": 9.15112719176176e-06, "loss": 0.966, "step": 2349 }, { "epoch": 0.5885299273729026, "grad_norm": 0.3671875, "learning_rate": 9.145560812691345e-06, "loss": 0.9705, "step": 2350 }, { "epoch": 0.5887803656398698, "grad_norm": 0.390625, "learning_rate": 9.13999443362093e-06, "loss": 0.9237, "step": 2351 }, { "epoch": 0.5890308039068369, "grad_norm": 0.33984375, "learning_rate": 9.134428054550516e-06, "loss": 0.9976, "step": 2352 }, { "epoch": 0.5892812421738042, "grad_norm": 0.359375, "learning_rate": 9.1288616754801e-06, "loss": 0.9314, "step": 2353 }, { "epoch": 0.5895316804407713, "grad_norm": 0.3671875, "learning_rate": 9.123295296409687e-06, "loss": 0.741, "step": 2354 }, { "epoch": 0.5897821187077386, "grad_norm": 0.39453125, "learning_rate": 9.117728917339271e-06, "loss": 1.0663, "step": 2355 }, { "epoch": 0.5900325569747057, "grad_norm": 0.357421875, "learning_rate": 9.112162538268857e-06, "loss": 0.9573, "step": 2356 }, { "epoch": 0.5902829952416729, "grad_norm": 0.375, "learning_rate": 9.106596159198442e-06, "loss": 1.034, "step": 2357 }, { "epoch": 0.5905334335086401, "grad_norm": 0.35546875, "learning_rate": 9.101029780128028e-06, "loss": 0.8931, "step": 2358 }, { "epoch": 0.5907838717756073, "grad_norm": 0.345703125, "learning_rate": 9.095463401057612e-06, "loss": 1.0963, "step": 2359 }, { "epoch": 0.5910343100425745, "grad_norm": 0.369140625, "learning_rate": 9.0898970219872e-06, "loss": 1.0851, "step": 2360 }, { "epoch": 0.5912847483095417, "grad_norm": 0.43359375, "learning_rate": 9.084330642916783e-06, "loss": 0.9402, "step": 2361 }, { "epoch": 0.5915351865765089, "grad_norm": 0.396484375, "learning_rate": 9.078764263846369e-06, "loss": 0.9992, "step": 2362 }, { "epoch": 0.5917856248434761, "grad_norm": 0.41015625, "learning_rate": 9.073197884775954e-06, "loss": 0.9047, "step": 2363 }, { "epoch": 0.5920360631104433, "grad_norm": 0.404296875, "learning_rate": 9.06763150570554e-06, "loss": 0.8763, "step": 2364 }, { "epoch": 0.5922865013774105, "grad_norm": 0.359375, "learning_rate": 9.062065126635124e-06, "loss": 1.0111, "step": 2365 }, { "epoch": 0.5925369396443777, "grad_norm": 0.416015625, "learning_rate": 9.056498747564711e-06, "loss": 0.981, "step": 2366 }, { "epoch": 0.5927873779113448, "grad_norm": 0.376953125, "learning_rate": 9.050932368494295e-06, "loss": 1.0062, "step": 2367 }, { "epoch": 0.5930378161783121, "grad_norm": 0.328125, "learning_rate": 9.04536598942388e-06, "loss": 0.8944, "step": 2368 }, { "epoch": 0.5932882544452792, "grad_norm": 0.34375, "learning_rate": 9.039799610353466e-06, "loss": 0.988, "step": 2369 }, { "epoch": 0.5935386927122465, "grad_norm": 0.365234375, "learning_rate": 9.034233231283052e-06, "loss": 0.9548, "step": 2370 }, { "epoch": 0.5937891309792136, "grad_norm": 0.373046875, "learning_rate": 9.028666852212636e-06, "loss": 0.8794, "step": 2371 }, { "epoch": 0.5940395692461808, "grad_norm": 0.345703125, "learning_rate": 9.023100473142221e-06, "loss": 0.8101, "step": 2372 }, { "epoch": 0.594290007513148, "grad_norm": 0.380859375, "learning_rate": 9.017534094071807e-06, "loss": 0.9338, "step": 2373 }, { "epoch": 0.5945404457801152, "grad_norm": 0.419921875, "learning_rate": 9.011967715001391e-06, "loss": 0.9839, "step": 2374 }, { "epoch": 0.5947908840470824, "grad_norm": 0.3671875, "learning_rate": 9.006401335930978e-06, "loss": 0.9619, "step": 2375 }, { "epoch": 0.5950413223140496, "grad_norm": 0.34765625, "learning_rate": 9.000834956860562e-06, "loss": 0.9589, "step": 2376 }, { "epoch": 0.5952917605810167, "grad_norm": 0.376953125, "learning_rate": 8.995268577790148e-06, "loss": 0.873, "step": 2377 }, { "epoch": 0.595542198847984, "grad_norm": 0.34765625, "learning_rate": 8.989702198719733e-06, "loss": 0.9274, "step": 2378 }, { "epoch": 0.5957926371149511, "grad_norm": 0.40234375, "learning_rate": 8.984135819649319e-06, "loss": 0.9348, "step": 2379 }, { "epoch": 0.5960430753819184, "grad_norm": 0.375, "learning_rate": 8.978569440578903e-06, "loss": 0.9862, "step": 2380 }, { "epoch": 0.5962935136488855, "grad_norm": 0.384765625, "learning_rate": 8.97300306150849e-06, "loss": 0.9259, "step": 2381 }, { "epoch": 0.5965439519158527, "grad_norm": 0.373046875, "learning_rate": 8.967436682438074e-06, "loss": 0.9334, "step": 2382 }, { "epoch": 0.59679439018282, "grad_norm": 0.345703125, "learning_rate": 8.96187030336766e-06, "loss": 0.8814, "step": 2383 }, { "epoch": 0.5970448284497871, "grad_norm": 0.375, "learning_rate": 8.956303924297245e-06, "loss": 0.8564, "step": 2384 }, { "epoch": 0.5972952667167544, "grad_norm": 0.3515625, "learning_rate": 8.950737545226831e-06, "loss": 0.9511, "step": 2385 }, { "epoch": 0.5975457049837215, "grad_norm": 0.390625, "learning_rate": 8.945171166156415e-06, "loss": 1.0912, "step": 2386 }, { "epoch": 0.5977961432506887, "grad_norm": 0.361328125, "learning_rate": 8.939604787086002e-06, "loss": 0.9058, "step": 2387 }, { "epoch": 0.5980465815176559, "grad_norm": 0.41796875, "learning_rate": 8.934038408015586e-06, "loss": 0.8954, "step": 2388 }, { "epoch": 0.5982970197846231, "grad_norm": 0.3828125, "learning_rate": 8.928472028945172e-06, "loss": 0.9364, "step": 2389 }, { "epoch": 0.5985474580515903, "grad_norm": 0.3671875, "learning_rate": 8.922905649874757e-06, "loss": 0.9096, "step": 2390 }, { "epoch": 0.5987978963185575, "grad_norm": 0.40234375, "learning_rate": 8.917339270804343e-06, "loss": 0.9437, "step": 2391 }, { "epoch": 0.5990483345855246, "grad_norm": 0.337890625, "learning_rate": 8.911772891733927e-06, "loss": 0.8815, "step": 2392 }, { "epoch": 0.5992987728524919, "grad_norm": 0.375, "learning_rate": 8.906206512663513e-06, "loss": 0.9714, "step": 2393 }, { "epoch": 0.599549211119459, "grad_norm": 0.3515625, "learning_rate": 8.900640133593098e-06, "loss": 0.8352, "step": 2394 }, { "epoch": 0.5997996493864263, "grad_norm": 0.3671875, "learning_rate": 8.895073754522684e-06, "loss": 0.8402, "step": 2395 }, { "epoch": 0.6000500876533934, "grad_norm": 0.375, "learning_rate": 8.88950737545227e-06, "loss": 0.9832, "step": 2396 }, { "epoch": 0.6003005259203607, "grad_norm": 0.41796875, "learning_rate": 8.883940996381855e-06, "loss": 0.9267, "step": 2397 }, { "epoch": 0.6005509641873278, "grad_norm": 0.40234375, "learning_rate": 8.878374617311439e-06, "loss": 1.0042, "step": 2398 }, { "epoch": 0.600801402454295, "grad_norm": 0.369140625, "learning_rate": 8.872808238241025e-06, "loss": 0.9266, "step": 2399 }, { "epoch": 0.6010518407212622, "grad_norm": 0.37109375, "learning_rate": 8.86724185917061e-06, "loss": 0.9374, "step": 2400 }, { "epoch": 0.6013022789882294, "grad_norm": 0.439453125, "learning_rate": 8.861675480100196e-06, "loss": 0.9414, "step": 2401 }, { "epoch": 0.6015527172551965, "grad_norm": 0.375, "learning_rate": 8.856109101029781e-06, "loss": 0.9312, "step": 2402 }, { "epoch": 0.6018031555221638, "grad_norm": 0.33984375, "learning_rate": 8.850542721959367e-06, "loss": 0.907, "step": 2403 }, { "epoch": 0.6020535937891309, "grad_norm": 0.357421875, "learning_rate": 8.844976342888951e-06, "loss": 1.0881, "step": 2404 }, { "epoch": 0.6023040320560982, "grad_norm": 0.39453125, "learning_rate": 8.839409963818536e-06, "loss": 0.9154, "step": 2405 }, { "epoch": 0.6025544703230654, "grad_norm": 0.341796875, "learning_rate": 8.833843584748122e-06, "loss": 0.9929, "step": 2406 }, { "epoch": 0.6028049085900326, "grad_norm": 0.396484375, "learning_rate": 8.828277205677708e-06, "loss": 0.8767, "step": 2407 }, { "epoch": 0.6030553468569998, "grad_norm": 0.33984375, "learning_rate": 8.822710826607293e-06, "loss": 0.8525, "step": 2408 }, { "epoch": 0.6033057851239669, "grad_norm": 0.3671875, "learning_rate": 8.817144447536879e-06, "loss": 1.042, "step": 2409 }, { "epoch": 0.6035562233909342, "grad_norm": 0.41796875, "learning_rate": 8.811578068466463e-06, "loss": 0.9832, "step": 2410 }, { "epoch": 0.6038066616579013, "grad_norm": 0.369140625, "learning_rate": 8.806011689396048e-06, "loss": 0.9336, "step": 2411 }, { "epoch": 0.6040570999248686, "grad_norm": 0.390625, "learning_rate": 8.800445310325634e-06, "loss": 0.8982, "step": 2412 }, { "epoch": 0.6043075381918357, "grad_norm": 0.423828125, "learning_rate": 8.79487893125522e-06, "loss": 0.8187, "step": 2413 }, { "epoch": 0.6045579764588029, "grad_norm": 0.3671875, "learning_rate": 8.789312552184804e-06, "loss": 0.7894, "step": 2414 }, { "epoch": 0.6048084147257701, "grad_norm": 0.384765625, "learning_rate": 8.783746173114391e-06, "loss": 0.7385, "step": 2415 }, { "epoch": 0.6050588529927373, "grad_norm": 0.32421875, "learning_rate": 8.778179794043975e-06, "loss": 0.808, "step": 2416 }, { "epoch": 0.6053092912597045, "grad_norm": 0.380859375, "learning_rate": 8.77261341497356e-06, "loss": 0.8989, "step": 2417 }, { "epoch": 0.6055597295266717, "grad_norm": 0.3671875, "learning_rate": 8.767047035903146e-06, "loss": 0.745, "step": 2418 }, { "epoch": 0.6058101677936388, "grad_norm": 0.345703125, "learning_rate": 8.761480656832732e-06, "loss": 0.9834, "step": 2419 }, { "epoch": 0.6060606060606061, "grad_norm": 0.3671875, "learning_rate": 8.755914277762316e-06, "loss": 1.0601, "step": 2420 }, { "epoch": 0.6063110443275732, "grad_norm": 0.404296875, "learning_rate": 8.750347898691903e-06, "loss": 0.8915, "step": 2421 }, { "epoch": 0.6065614825945405, "grad_norm": 0.37109375, "learning_rate": 8.744781519621487e-06, "loss": 0.8857, "step": 2422 }, { "epoch": 0.6068119208615076, "grad_norm": 0.36328125, "learning_rate": 8.739215140551072e-06, "loss": 0.8787, "step": 2423 }, { "epoch": 0.6070623591284748, "grad_norm": 0.40234375, "learning_rate": 8.733648761480658e-06, "loss": 0.9096, "step": 2424 }, { "epoch": 0.607312797395442, "grad_norm": 0.384765625, "learning_rate": 8.728082382410242e-06, "loss": 0.8839, "step": 2425 }, { "epoch": 0.6075632356624092, "grad_norm": 0.458984375, "learning_rate": 8.722516003339828e-06, "loss": 0.9709, "step": 2426 }, { "epoch": 0.6078136739293765, "grad_norm": 0.388671875, "learning_rate": 8.716949624269413e-06, "loss": 0.9201, "step": 2427 }, { "epoch": 0.6080641121963436, "grad_norm": 0.365234375, "learning_rate": 8.711383245198999e-06, "loss": 0.8775, "step": 2428 }, { "epoch": 0.6083145504633108, "grad_norm": 0.48046875, "learning_rate": 8.705816866128584e-06, "loss": 1.0512, "step": 2429 }, { "epoch": 0.608564988730278, "grad_norm": 0.3515625, "learning_rate": 8.70025048705817e-06, "loss": 0.9613, "step": 2430 }, { "epoch": 0.6088154269972452, "grad_norm": 0.37890625, "learning_rate": 8.694684107987754e-06, "loss": 0.894, "step": 2431 }, { "epoch": 0.6090658652642124, "grad_norm": 0.373046875, "learning_rate": 8.68911772891734e-06, "loss": 1.1358, "step": 2432 }, { "epoch": 0.6093163035311796, "grad_norm": 0.337890625, "learning_rate": 8.683551349846925e-06, "loss": 0.8359, "step": 2433 }, { "epoch": 0.6095667417981467, "grad_norm": 0.4453125, "learning_rate": 8.67798497077651e-06, "loss": 0.8908, "step": 2434 }, { "epoch": 0.609817180065114, "grad_norm": 0.365234375, "learning_rate": 8.672418591706095e-06, "loss": 1.0151, "step": 2435 }, { "epoch": 0.6100676183320811, "grad_norm": 0.349609375, "learning_rate": 8.666852212635682e-06, "loss": 0.9615, "step": 2436 }, { "epoch": 0.6103180565990484, "grad_norm": 0.373046875, "learning_rate": 8.661285833565266e-06, "loss": 1.0532, "step": 2437 }, { "epoch": 0.6105684948660155, "grad_norm": 0.345703125, "learning_rate": 8.655719454494852e-06, "loss": 0.9616, "step": 2438 }, { "epoch": 0.6108189331329827, "grad_norm": 0.349609375, "learning_rate": 8.650153075424437e-06, "loss": 0.8541, "step": 2439 }, { "epoch": 0.6110693713999499, "grad_norm": 0.345703125, "learning_rate": 8.644586696354023e-06, "loss": 0.8911, "step": 2440 }, { "epoch": 0.6113198096669171, "grad_norm": 0.369140625, "learning_rate": 8.639020317283607e-06, "loss": 0.9315, "step": 2441 }, { "epoch": 0.6115702479338843, "grad_norm": 0.291015625, "learning_rate": 8.633453938213194e-06, "loss": 0.7251, "step": 2442 }, { "epoch": 0.6118206862008515, "grad_norm": 0.412109375, "learning_rate": 8.627887559142778e-06, "loss": 0.8551, "step": 2443 }, { "epoch": 0.6120711244678186, "grad_norm": 0.3828125, "learning_rate": 8.622321180072363e-06, "loss": 1.0052, "step": 2444 }, { "epoch": 0.6123215627347859, "grad_norm": 0.369140625, "learning_rate": 8.616754801001949e-06, "loss": 0.7919, "step": 2445 }, { "epoch": 0.612572001001753, "grad_norm": 0.3984375, "learning_rate": 8.611188421931535e-06, "loss": 1.0393, "step": 2446 }, { "epoch": 0.6128224392687203, "grad_norm": 0.37109375, "learning_rate": 8.605622042861119e-06, "loss": 0.8869, "step": 2447 }, { "epoch": 0.6130728775356874, "grad_norm": 0.38671875, "learning_rate": 8.600055663790706e-06, "loss": 0.9806, "step": 2448 }, { "epoch": 0.6133233158026546, "grad_norm": 0.359375, "learning_rate": 8.59448928472029e-06, "loss": 0.9738, "step": 2449 }, { "epoch": 0.6135737540696219, "grad_norm": 0.416015625, "learning_rate": 8.588922905649875e-06, "loss": 0.8163, "step": 2450 }, { "epoch": 0.613824192336589, "grad_norm": 0.38671875, "learning_rate": 8.583356526579461e-06, "loss": 0.8406, "step": 2451 }, { "epoch": 0.6140746306035563, "grad_norm": 0.390625, "learning_rate": 8.577790147509047e-06, "loss": 0.9411, "step": 2452 }, { "epoch": 0.6143250688705234, "grad_norm": 0.384765625, "learning_rate": 8.57222376843863e-06, "loss": 0.873, "step": 2453 }, { "epoch": 0.6145755071374907, "grad_norm": 0.38671875, "learning_rate": 8.566657389368216e-06, "loss": 1.1125, "step": 2454 }, { "epoch": 0.6148259454044578, "grad_norm": 0.337890625, "learning_rate": 8.561091010297802e-06, "loss": 0.9499, "step": 2455 }, { "epoch": 0.615076383671425, "grad_norm": 0.369140625, "learning_rate": 8.555524631227387e-06, "loss": 1.1153, "step": 2456 }, { "epoch": 0.6153268219383922, "grad_norm": 0.388671875, "learning_rate": 8.549958252156973e-06, "loss": 1.0847, "step": 2457 }, { "epoch": 0.6155772602053594, "grad_norm": 0.359375, "learning_rate": 8.544391873086559e-06, "loss": 0.9848, "step": 2458 }, { "epoch": 0.6158276984723265, "grad_norm": 0.3671875, "learning_rate": 8.538825494016143e-06, "loss": 1.0085, "step": 2459 }, { "epoch": 0.6160781367392938, "grad_norm": 0.373046875, "learning_rate": 8.533259114945728e-06, "loss": 0.8953, "step": 2460 }, { "epoch": 0.6163285750062609, "grad_norm": 0.34765625, "learning_rate": 8.527692735875314e-06, "loss": 1.0314, "step": 2461 }, { "epoch": 0.6165790132732282, "grad_norm": 0.388671875, "learning_rate": 8.5221263568049e-06, "loss": 1.277, "step": 2462 }, { "epoch": 0.6168294515401953, "grad_norm": 0.357421875, "learning_rate": 8.516559977734485e-06, "loss": 1.0265, "step": 2463 }, { "epoch": 0.6170798898071626, "grad_norm": 0.33984375, "learning_rate": 8.51099359866407e-06, "loss": 0.93, "step": 2464 }, { "epoch": 0.6173303280741297, "grad_norm": 0.373046875, "learning_rate": 8.505427219593655e-06, "loss": 1.0065, "step": 2465 }, { "epoch": 0.6175807663410969, "grad_norm": 0.373046875, "learning_rate": 8.49986084052324e-06, "loss": 1.0032, "step": 2466 }, { "epoch": 0.6178312046080641, "grad_norm": 0.34765625, "learning_rate": 8.494294461452826e-06, "loss": 0.9588, "step": 2467 }, { "epoch": 0.6180816428750313, "grad_norm": 0.333984375, "learning_rate": 8.488728082382411e-06, "loss": 0.8411, "step": 2468 }, { "epoch": 0.6183320811419984, "grad_norm": 0.365234375, "learning_rate": 8.483161703311997e-06, "loss": 0.9231, "step": 2469 }, { "epoch": 0.6185825194089657, "grad_norm": 0.345703125, "learning_rate": 8.477595324241583e-06, "loss": 0.9724, "step": 2470 }, { "epoch": 0.6188329576759329, "grad_norm": 0.380859375, "learning_rate": 8.472028945171167e-06, "loss": 0.9323, "step": 2471 }, { "epoch": 0.6190833959429001, "grad_norm": 0.3359375, "learning_rate": 8.466462566100752e-06, "loss": 0.9217, "step": 2472 }, { "epoch": 0.6193338342098673, "grad_norm": 0.392578125, "learning_rate": 8.460896187030338e-06, "loss": 0.9073, "step": 2473 }, { "epoch": 0.6195842724768345, "grad_norm": 0.359375, "learning_rate": 8.455329807959923e-06, "loss": 0.9294, "step": 2474 }, { "epoch": 0.6198347107438017, "grad_norm": 0.375, "learning_rate": 8.449763428889507e-06, "loss": 1.084, "step": 2475 }, { "epoch": 0.6200851490107688, "grad_norm": 0.34375, "learning_rate": 8.444197049819093e-06, "loss": 1.0432, "step": 2476 }, { "epoch": 0.6203355872777361, "grad_norm": 0.39453125, "learning_rate": 8.438630670748679e-06, "loss": 0.8866, "step": 2477 }, { "epoch": 0.6205860255447032, "grad_norm": 0.337890625, "learning_rate": 8.433064291678264e-06, "loss": 0.9168, "step": 2478 }, { "epoch": 0.6208364638116705, "grad_norm": 0.3671875, "learning_rate": 8.42749791260785e-06, "loss": 0.9875, "step": 2479 }, { "epoch": 0.6210869020786376, "grad_norm": 0.37109375, "learning_rate": 8.421931533537434e-06, "loss": 0.987, "step": 2480 }, { "epoch": 0.6213373403456048, "grad_norm": 0.3984375, "learning_rate": 8.41636515446702e-06, "loss": 0.9684, "step": 2481 }, { "epoch": 0.621587778612572, "grad_norm": 0.427734375, "learning_rate": 8.410798775396605e-06, "loss": 0.9351, "step": 2482 }, { "epoch": 0.6218382168795392, "grad_norm": 0.3984375, "learning_rate": 8.40523239632619e-06, "loss": 1.0467, "step": 2483 }, { "epoch": 0.6220886551465064, "grad_norm": 0.365234375, "learning_rate": 8.399666017255776e-06, "loss": 1.0278, "step": 2484 }, { "epoch": 0.6223390934134736, "grad_norm": 0.380859375, "learning_rate": 8.394099638185362e-06, "loss": 1.079, "step": 2485 }, { "epoch": 0.6225895316804407, "grad_norm": 0.341796875, "learning_rate": 8.388533259114946e-06, "loss": 0.884, "step": 2486 }, { "epoch": 0.622839969947408, "grad_norm": 0.3515625, "learning_rate": 8.382966880044531e-06, "loss": 0.8483, "step": 2487 }, { "epoch": 0.6230904082143751, "grad_norm": 0.38671875, "learning_rate": 8.377400500974117e-06, "loss": 0.9948, "step": 2488 }, { "epoch": 0.6233408464813424, "grad_norm": 0.392578125, "learning_rate": 8.371834121903702e-06, "loss": 0.8839, "step": 2489 }, { "epoch": 0.6235912847483095, "grad_norm": 0.400390625, "learning_rate": 8.366267742833288e-06, "loss": 0.8103, "step": 2490 }, { "epoch": 0.6238417230152767, "grad_norm": 0.330078125, "learning_rate": 8.360701363762874e-06, "loss": 0.8665, "step": 2491 }, { "epoch": 0.6240921612822439, "grad_norm": 0.3671875, "learning_rate": 8.355134984692458e-06, "loss": 0.9868, "step": 2492 }, { "epoch": 0.6243425995492111, "grad_norm": 0.392578125, "learning_rate": 8.349568605622043e-06, "loss": 0.9366, "step": 2493 }, { "epoch": 0.6245930378161784, "grad_norm": 0.365234375, "learning_rate": 8.344002226551629e-06, "loss": 0.9005, "step": 2494 }, { "epoch": 0.6248434760831455, "grad_norm": 0.38671875, "learning_rate": 8.338435847481214e-06, "loss": 0.8091, "step": 2495 }, { "epoch": 0.6250939143501127, "grad_norm": 0.345703125, "learning_rate": 8.332869468410798e-06, "loss": 0.9089, "step": 2496 }, { "epoch": 0.6253443526170799, "grad_norm": 0.396484375, "learning_rate": 8.327303089340386e-06, "loss": 0.9752, "step": 2497 }, { "epoch": 0.6255947908840471, "grad_norm": 0.3984375, "learning_rate": 8.32173671026997e-06, "loss": 0.9292, "step": 2498 }, { "epoch": 0.6258452291510143, "grad_norm": 0.361328125, "learning_rate": 8.316170331199555e-06, "loss": 0.8837, "step": 2499 }, { "epoch": 0.6260956674179815, "grad_norm": 0.373046875, "learning_rate": 8.31060395212914e-06, "loss": 0.8276, "step": 2500 }, { "epoch": 0.6263461056849486, "grad_norm": 0.353515625, "learning_rate": 8.305037573058726e-06, "loss": 0.8528, "step": 2501 }, { "epoch": 0.6265965439519159, "grad_norm": 0.39453125, "learning_rate": 8.29947119398831e-06, "loss": 0.9766, "step": 2502 }, { "epoch": 0.626846982218883, "grad_norm": 0.34765625, "learning_rate": 8.293904814917898e-06, "loss": 0.8569, "step": 2503 }, { "epoch": 0.6270974204858503, "grad_norm": 0.361328125, "learning_rate": 8.288338435847482e-06, "loss": 1.0843, "step": 2504 }, { "epoch": 0.6273478587528174, "grad_norm": 0.404296875, "learning_rate": 8.282772056777067e-06, "loss": 0.9015, "step": 2505 }, { "epoch": 0.6275982970197846, "grad_norm": 0.37890625, "learning_rate": 8.277205677706653e-06, "loss": 0.9209, "step": 2506 }, { "epoch": 0.6278487352867518, "grad_norm": 0.34375, "learning_rate": 8.271639298636238e-06, "loss": 0.9511, "step": 2507 }, { "epoch": 0.628099173553719, "grad_norm": 0.380859375, "learning_rate": 8.266072919565822e-06, "loss": 0.9614, "step": 2508 }, { "epoch": 0.6283496118206862, "grad_norm": 0.36328125, "learning_rate": 8.26050654049541e-06, "loss": 0.8283, "step": 2509 }, { "epoch": 0.6286000500876534, "grad_norm": 0.3828125, "learning_rate": 8.254940161424994e-06, "loss": 1.0043, "step": 2510 }, { "epoch": 0.6288504883546205, "grad_norm": 0.337890625, "learning_rate": 8.249373782354579e-06, "loss": 0.8886, "step": 2511 }, { "epoch": 0.6291009266215878, "grad_norm": 0.341796875, "learning_rate": 8.243807403284165e-06, "loss": 0.988, "step": 2512 }, { "epoch": 0.6293513648885549, "grad_norm": 0.3671875, "learning_rate": 8.23824102421375e-06, "loss": 0.9484, "step": 2513 }, { "epoch": 0.6296018031555222, "grad_norm": 0.45703125, "learning_rate": 8.232674645143334e-06, "loss": 0.843, "step": 2514 }, { "epoch": 0.6298522414224894, "grad_norm": 0.41015625, "learning_rate": 8.22710826607292e-06, "loss": 0.9415, "step": 2515 }, { "epoch": 0.6301026796894565, "grad_norm": 0.3671875, "learning_rate": 8.221541887002506e-06, "loss": 0.9313, "step": 2516 }, { "epoch": 0.6303531179564238, "grad_norm": 0.3359375, "learning_rate": 8.215975507932091e-06, "loss": 0.8708, "step": 2517 }, { "epoch": 0.6306035562233909, "grad_norm": 0.345703125, "learning_rate": 8.210409128861677e-06, "loss": 0.9561, "step": 2518 }, { "epoch": 0.6308539944903582, "grad_norm": 0.3671875, "learning_rate": 8.204842749791262e-06, "loss": 0.8254, "step": 2519 }, { "epoch": 0.6311044327573253, "grad_norm": 0.32421875, "learning_rate": 8.199276370720846e-06, "loss": 0.8167, "step": 2520 }, { "epoch": 0.6313548710242926, "grad_norm": 0.3515625, "learning_rate": 8.193709991650432e-06, "loss": 0.9279, "step": 2521 }, { "epoch": 0.6316053092912597, "grad_norm": 0.349609375, "learning_rate": 8.188143612580017e-06, "loss": 0.8883, "step": 2522 }, { "epoch": 0.6318557475582269, "grad_norm": 0.341796875, "learning_rate": 8.182577233509603e-06, "loss": 0.8628, "step": 2523 }, { "epoch": 0.6321061858251941, "grad_norm": 0.361328125, "learning_rate": 8.177010854439189e-06, "loss": 0.8174, "step": 2524 }, { "epoch": 0.6323566240921613, "grad_norm": 0.380859375, "learning_rate": 8.171444475368774e-06, "loss": 0.86, "step": 2525 }, { "epoch": 0.6326070623591284, "grad_norm": 0.349609375, "learning_rate": 8.165878096298358e-06, "loss": 0.9812, "step": 2526 }, { "epoch": 0.6328575006260957, "grad_norm": 0.349609375, "learning_rate": 8.160311717227944e-06, "loss": 0.8161, "step": 2527 }, { "epoch": 0.6331079388930628, "grad_norm": 0.369140625, "learning_rate": 8.15474533815753e-06, "loss": 1.1175, "step": 2528 }, { "epoch": 0.6333583771600301, "grad_norm": 0.359375, "learning_rate": 8.149178959087113e-06, "loss": 0.8823, "step": 2529 }, { "epoch": 0.6336088154269972, "grad_norm": 0.404296875, "learning_rate": 8.1436125800167e-06, "loss": 0.7572, "step": 2530 }, { "epoch": 0.6338592536939645, "grad_norm": 0.34765625, "learning_rate": 8.138046200946285e-06, "loss": 0.8002, "step": 2531 }, { "epoch": 0.6341096919609316, "grad_norm": 0.39453125, "learning_rate": 8.13247982187587e-06, "loss": 0.8526, "step": 2532 }, { "epoch": 0.6343601302278988, "grad_norm": 0.3828125, "learning_rate": 8.126913442805456e-06, "loss": 1.0335, "step": 2533 }, { "epoch": 0.634610568494866, "grad_norm": 0.36328125, "learning_rate": 8.121347063735041e-06, "loss": 1.0231, "step": 2534 }, { "epoch": 0.6348610067618332, "grad_norm": 0.435546875, "learning_rate": 8.115780684664625e-06, "loss": 0.9954, "step": 2535 }, { "epoch": 0.6351114450288003, "grad_norm": 0.357421875, "learning_rate": 8.110214305594211e-06, "loss": 0.8602, "step": 2536 }, { "epoch": 0.6353618832957676, "grad_norm": 0.337890625, "learning_rate": 8.104647926523797e-06, "loss": 1.0337, "step": 2537 }, { "epoch": 0.6356123215627348, "grad_norm": 0.369140625, "learning_rate": 8.099081547453382e-06, "loss": 0.8592, "step": 2538 }, { "epoch": 0.635862759829702, "grad_norm": 0.36328125, "learning_rate": 8.093515168382968e-06, "loss": 0.9601, "step": 2539 }, { "epoch": 0.6361131980966692, "grad_norm": 0.365234375, "learning_rate": 8.087948789312553e-06, "loss": 0.8817, "step": 2540 }, { "epoch": 0.6363636363636364, "grad_norm": 0.3671875, "learning_rate": 8.082382410242137e-06, "loss": 0.7749, "step": 2541 }, { "epoch": 0.6366140746306036, "grad_norm": 0.41015625, "learning_rate": 8.076816031171723e-06, "loss": 1.0429, "step": 2542 }, { "epoch": 0.6368645128975707, "grad_norm": 0.359375, "learning_rate": 8.071249652101309e-06, "loss": 0.9874, "step": 2543 }, { "epoch": 0.637114951164538, "grad_norm": 0.4140625, "learning_rate": 8.065683273030894e-06, "loss": 0.834, "step": 2544 }, { "epoch": 0.6373653894315051, "grad_norm": 0.41796875, "learning_rate": 8.06011689396048e-06, "loss": 0.9721, "step": 2545 }, { "epoch": 0.6376158276984724, "grad_norm": 0.4140625, "learning_rate": 8.054550514890065e-06, "loss": 0.9966, "step": 2546 }, { "epoch": 0.6378662659654395, "grad_norm": 0.3984375, "learning_rate": 8.04898413581965e-06, "loss": 0.8372, "step": 2547 }, { "epoch": 0.6381167042324067, "grad_norm": 0.359375, "learning_rate": 8.043417756749235e-06, "loss": 1.0421, "step": 2548 }, { "epoch": 0.6383671424993739, "grad_norm": 0.40234375, "learning_rate": 8.03785137767882e-06, "loss": 0.8344, "step": 2549 }, { "epoch": 0.6386175807663411, "grad_norm": 0.423828125, "learning_rate": 8.032284998608406e-06, "loss": 1.0737, "step": 2550 }, { "epoch": 0.6388680190333083, "grad_norm": 0.353515625, "learning_rate": 8.026718619537992e-06, "loss": 0.9135, "step": 2551 }, { "epoch": 0.6391184573002755, "grad_norm": 0.3203125, "learning_rate": 8.021152240467577e-06, "loss": 0.9238, "step": 2552 }, { "epoch": 0.6393688955672426, "grad_norm": 0.37890625, "learning_rate": 8.015585861397161e-06, "loss": 0.8619, "step": 2553 }, { "epoch": 0.6396193338342099, "grad_norm": 0.412109375, "learning_rate": 8.010019482326747e-06, "loss": 0.7984, "step": 2554 }, { "epoch": 0.639869772101177, "grad_norm": 0.353515625, "learning_rate": 8.004453103256333e-06, "loss": 1.0795, "step": 2555 }, { "epoch": 0.6401202103681443, "grad_norm": 0.390625, "learning_rate": 7.998886724185918e-06, "loss": 0.8064, "step": 2556 }, { "epoch": 0.6403706486351114, "grad_norm": 0.50390625, "learning_rate": 7.993320345115502e-06, "loss": 0.9692, "step": 2557 }, { "epoch": 0.6406210869020786, "grad_norm": 0.34375, "learning_rate": 7.98775396604509e-06, "loss": 1.0319, "step": 2558 }, { "epoch": 0.6408715251690458, "grad_norm": 0.38671875, "learning_rate": 7.982187586974673e-06, "loss": 0.9134, "step": 2559 }, { "epoch": 0.641121963436013, "grad_norm": 0.3984375, "learning_rate": 7.976621207904259e-06, "loss": 0.8248, "step": 2560 }, { "epoch": 0.6413724017029803, "grad_norm": 0.380859375, "learning_rate": 7.971054828833844e-06, "loss": 0.9422, "step": 2561 }, { "epoch": 0.6416228399699474, "grad_norm": 0.423828125, "learning_rate": 7.96548844976343e-06, "loss": 0.8825, "step": 2562 }, { "epoch": 0.6418732782369146, "grad_norm": 0.349609375, "learning_rate": 7.959922070693014e-06, "loss": 1.0275, "step": 2563 }, { "epoch": 0.6421237165038818, "grad_norm": 0.36328125, "learning_rate": 7.954355691622601e-06, "loss": 0.9453, "step": 2564 }, { "epoch": 0.642374154770849, "grad_norm": 0.38671875, "learning_rate": 7.948789312552185e-06, "loss": 0.8792, "step": 2565 }, { "epoch": 0.6426245930378162, "grad_norm": 0.361328125, "learning_rate": 7.94322293348177e-06, "loss": 0.9884, "step": 2566 }, { "epoch": 0.6428750313047834, "grad_norm": 0.373046875, "learning_rate": 7.937656554411356e-06, "loss": 0.8806, "step": 2567 }, { "epoch": 0.6431254695717505, "grad_norm": 0.427734375, "learning_rate": 7.932090175340942e-06, "loss": 0.9153, "step": 2568 }, { "epoch": 0.6433759078387178, "grad_norm": 0.376953125, "learning_rate": 7.926523796270526e-06, "loss": 0.9543, "step": 2569 }, { "epoch": 0.6436263461056849, "grad_norm": 0.359375, "learning_rate": 7.920957417200113e-06, "loss": 0.9281, "step": 2570 }, { "epoch": 0.6438767843726522, "grad_norm": 0.35546875, "learning_rate": 7.915391038129697e-06, "loss": 0.9422, "step": 2571 }, { "epoch": 0.6441272226396193, "grad_norm": 0.33984375, "learning_rate": 7.909824659059283e-06, "loss": 0.8215, "step": 2572 }, { "epoch": 0.6443776609065865, "grad_norm": 0.39453125, "learning_rate": 7.904258279988868e-06, "loss": 1.0418, "step": 2573 }, { "epoch": 0.6446280991735537, "grad_norm": 0.376953125, "learning_rate": 7.898691900918454e-06, "loss": 0.8361, "step": 2574 }, { "epoch": 0.6448785374405209, "grad_norm": 0.384765625, "learning_rate": 7.893125521848038e-06, "loss": 1.0089, "step": 2575 }, { "epoch": 0.6451289757074881, "grad_norm": 0.353515625, "learning_rate": 7.887559142777624e-06, "loss": 0.9323, "step": 2576 }, { "epoch": 0.6453794139744553, "grad_norm": 0.373046875, "learning_rate": 7.88199276370721e-06, "loss": 0.9296, "step": 2577 }, { "epoch": 0.6456298522414224, "grad_norm": 0.357421875, "learning_rate": 7.876426384636793e-06, "loss": 0.9845, "step": 2578 }, { "epoch": 0.6458802905083897, "grad_norm": 0.3984375, "learning_rate": 7.87086000556638e-06, "loss": 0.9852, "step": 2579 }, { "epoch": 0.6461307287753568, "grad_norm": 0.34765625, "learning_rate": 7.865293626495964e-06, "loss": 1.0416, "step": 2580 }, { "epoch": 0.6463811670423241, "grad_norm": 0.375, "learning_rate": 7.85972724742555e-06, "loss": 0.8365, "step": 2581 }, { "epoch": 0.6466316053092913, "grad_norm": 0.37890625, "learning_rate": 7.854160868355136e-06, "loss": 0.8748, "step": 2582 }, { "epoch": 0.6468820435762584, "grad_norm": 0.359375, "learning_rate": 7.848594489284721e-06, "loss": 1.051, "step": 2583 }, { "epoch": 0.6471324818432257, "grad_norm": 0.361328125, "learning_rate": 7.843028110214305e-06, "loss": 0.9509, "step": 2584 }, { "epoch": 0.6473829201101928, "grad_norm": 0.35546875, "learning_rate": 7.837461731143892e-06, "loss": 0.8295, "step": 2585 }, { "epoch": 0.6476333583771601, "grad_norm": 0.326171875, "learning_rate": 7.831895352073476e-06, "loss": 1.0339, "step": 2586 }, { "epoch": 0.6478837966441272, "grad_norm": 0.357421875, "learning_rate": 7.826328973003062e-06, "loss": 0.9653, "step": 2587 }, { "epoch": 0.6481342349110945, "grad_norm": 0.4453125, "learning_rate": 7.820762593932648e-06, "loss": 0.9366, "step": 2588 }, { "epoch": 0.6483846731780616, "grad_norm": 0.34375, "learning_rate": 7.815196214862233e-06, "loss": 0.9452, "step": 2589 }, { "epoch": 0.6486351114450288, "grad_norm": 0.353515625, "learning_rate": 7.809629835791817e-06, "loss": 0.9342, "step": 2590 }, { "epoch": 0.648885549711996, "grad_norm": 0.61328125, "learning_rate": 7.804063456721404e-06, "loss": 0.9003, "step": 2591 }, { "epoch": 0.6491359879789632, "grad_norm": 0.36328125, "learning_rate": 7.798497077650988e-06, "loss": 0.9286, "step": 2592 }, { "epoch": 0.6493864262459303, "grad_norm": 0.353515625, "learning_rate": 7.792930698580574e-06, "loss": 0.9843, "step": 2593 }, { "epoch": 0.6496368645128976, "grad_norm": 0.357421875, "learning_rate": 7.78736431951016e-06, "loss": 0.8703, "step": 2594 }, { "epoch": 0.6498873027798647, "grad_norm": 0.357421875, "learning_rate": 7.781797940439745e-06, "loss": 0.9668, "step": 2595 }, { "epoch": 0.650137741046832, "grad_norm": 0.466796875, "learning_rate": 7.776231561369329e-06, "loss": 0.8886, "step": 2596 }, { "epoch": 0.6503881793137991, "grad_norm": 0.349609375, "learning_rate": 7.770665182298915e-06, "loss": 0.8566, "step": 2597 }, { "epoch": 0.6506386175807664, "grad_norm": 0.373046875, "learning_rate": 7.7650988032285e-06, "loss": 0.7997, "step": 2598 }, { "epoch": 0.6508890558477335, "grad_norm": 0.3828125, "learning_rate": 7.759532424158086e-06, "loss": 1.0653, "step": 2599 }, { "epoch": 0.6511394941147007, "grad_norm": 0.384765625, "learning_rate": 7.753966045087671e-06, "loss": 0.8746, "step": 2600 }, { "epoch": 0.6513899323816679, "grad_norm": 0.35546875, "learning_rate": 7.748399666017257e-06, "loss": 0.8827, "step": 2601 }, { "epoch": 0.6516403706486351, "grad_norm": 0.384765625, "learning_rate": 7.742833286946841e-06, "loss": 0.838, "step": 2602 }, { "epoch": 0.6518908089156022, "grad_norm": 0.37109375, "learning_rate": 7.737266907876427e-06, "loss": 0.8989, "step": 2603 }, { "epoch": 0.6521412471825695, "grad_norm": 0.38671875, "learning_rate": 7.731700528806012e-06, "loss": 0.7978, "step": 2604 }, { "epoch": 0.6523916854495367, "grad_norm": 0.349609375, "learning_rate": 7.726134149735598e-06, "loss": 0.9887, "step": 2605 }, { "epoch": 0.6526421237165039, "grad_norm": 0.390625, "learning_rate": 7.720567770665183e-06, "loss": 1.1775, "step": 2606 }, { "epoch": 0.6528925619834711, "grad_norm": 0.35546875, "learning_rate": 7.715001391594769e-06, "loss": 0.9525, "step": 2607 }, { "epoch": 0.6531430002504383, "grad_norm": 0.365234375, "learning_rate": 7.709435012524353e-06, "loss": 1.0097, "step": 2608 }, { "epoch": 0.6533934385174055, "grad_norm": 0.388671875, "learning_rate": 7.703868633453939e-06, "loss": 0.8483, "step": 2609 }, { "epoch": 0.6536438767843726, "grad_norm": 0.376953125, "learning_rate": 7.698302254383524e-06, "loss": 1.005, "step": 2610 }, { "epoch": 0.6538943150513399, "grad_norm": 0.345703125, "learning_rate": 7.69273587531311e-06, "loss": 0.8515, "step": 2611 }, { "epoch": 0.654144753318307, "grad_norm": 0.33203125, "learning_rate": 7.687169496242695e-06, "loss": 0.8958, "step": 2612 }, { "epoch": 0.6543951915852743, "grad_norm": 0.34375, "learning_rate": 7.681603117172281e-06, "loss": 1.0472, "step": 2613 }, { "epoch": 0.6546456298522414, "grad_norm": 0.423828125, "learning_rate": 7.676036738101865e-06, "loss": 0.9375, "step": 2614 }, { "epoch": 0.6548960681192086, "grad_norm": 0.375, "learning_rate": 7.67047035903145e-06, "loss": 0.9604, "step": 2615 }, { "epoch": 0.6551465063861758, "grad_norm": 0.384765625, "learning_rate": 7.664903979961036e-06, "loss": 0.7503, "step": 2616 }, { "epoch": 0.655396944653143, "grad_norm": 0.3828125, "learning_rate": 7.659337600890622e-06, "loss": 0.9451, "step": 2617 }, { "epoch": 0.6556473829201102, "grad_norm": 0.376953125, "learning_rate": 7.653771221820206e-06, "loss": 0.8721, "step": 2618 }, { "epoch": 0.6558978211870774, "grad_norm": 0.369140625, "learning_rate": 7.648204842749793e-06, "loss": 0.8616, "step": 2619 }, { "epoch": 0.6561482594540445, "grad_norm": 0.474609375, "learning_rate": 7.642638463679377e-06, "loss": 0.9034, "step": 2620 }, { "epoch": 0.6563986977210118, "grad_norm": 0.3671875, "learning_rate": 7.637072084608963e-06, "loss": 0.7026, "step": 2621 }, { "epoch": 0.6566491359879789, "grad_norm": 0.365234375, "learning_rate": 7.631505705538548e-06, "loss": 0.9583, "step": 2622 }, { "epoch": 0.6568995742549462, "grad_norm": 0.376953125, "learning_rate": 7.625939326468134e-06, "loss": 0.973, "step": 2623 }, { "epoch": 0.6571500125219133, "grad_norm": 0.361328125, "learning_rate": 7.6203729473977185e-06, "loss": 0.9374, "step": 2624 }, { "epoch": 0.6574004507888805, "grad_norm": 0.369140625, "learning_rate": 7.614806568327304e-06, "loss": 0.8669, "step": 2625 }, { "epoch": 0.6576508890558478, "grad_norm": 0.373046875, "learning_rate": 7.609240189256889e-06, "loss": 0.9244, "step": 2626 }, { "epoch": 0.6579013273228149, "grad_norm": 0.3828125, "learning_rate": 7.603673810186474e-06, "loss": 0.8223, "step": 2627 }, { "epoch": 0.6581517655897822, "grad_norm": 0.392578125, "learning_rate": 7.598107431116059e-06, "loss": 0.9503, "step": 2628 }, { "epoch": 0.6584022038567493, "grad_norm": 0.34765625, "learning_rate": 7.592541052045644e-06, "loss": 0.8935, "step": 2629 }, { "epoch": 0.6586526421237165, "grad_norm": 0.453125, "learning_rate": 7.5869746729752305e-06, "loss": 0.9146, "step": 2630 }, { "epoch": 0.6589030803906837, "grad_norm": 0.357421875, "learning_rate": 7.581408293904815e-06, "loss": 0.8135, "step": 2631 }, { "epoch": 0.6591535186576509, "grad_norm": 0.384765625, "learning_rate": 7.575841914834401e-06, "loss": 0.9409, "step": 2632 }, { "epoch": 0.6594039569246181, "grad_norm": 0.35546875, "learning_rate": 7.570275535763986e-06, "loss": 1.0373, "step": 2633 }, { "epoch": 0.6596543951915853, "grad_norm": 0.361328125, "learning_rate": 7.564709156693571e-06, "loss": 1.0062, "step": 2634 }, { "epoch": 0.6599048334585524, "grad_norm": 0.33984375, "learning_rate": 7.559142777623156e-06, "loss": 1.1157, "step": 2635 }, { "epoch": 0.6601552717255197, "grad_norm": 0.337890625, "learning_rate": 7.5535763985527425e-06, "loss": 1.0162, "step": 2636 }, { "epoch": 0.6604057099924868, "grad_norm": 0.357421875, "learning_rate": 7.548010019482327e-06, "loss": 0.8657, "step": 2637 }, { "epoch": 0.6606561482594541, "grad_norm": 0.392578125, "learning_rate": 7.542443640411913e-06, "loss": 1.0081, "step": 2638 }, { "epoch": 0.6609065865264212, "grad_norm": 0.349609375, "learning_rate": 7.536877261341498e-06, "loss": 0.8609, "step": 2639 }, { "epoch": 0.6611570247933884, "grad_norm": 0.337890625, "learning_rate": 7.531310882271083e-06, "loss": 0.9818, "step": 2640 }, { "epoch": 0.6614074630603556, "grad_norm": 0.373046875, "learning_rate": 7.525744503200668e-06, "loss": 0.8013, "step": 2641 }, { "epoch": 0.6616579013273228, "grad_norm": 0.35546875, "learning_rate": 7.5201781241302545e-06, "loss": 1.0323, "step": 2642 }, { "epoch": 0.66190833959429, "grad_norm": 0.361328125, "learning_rate": 7.514611745059839e-06, "loss": 0.9152, "step": 2643 }, { "epoch": 0.6621587778612572, "grad_norm": 0.3828125, "learning_rate": 7.509045365989425e-06, "loss": 1.0509, "step": 2644 }, { "epoch": 0.6624092161282243, "grad_norm": 0.375, "learning_rate": 7.50347898691901e-06, "loss": 0.8906, "step": 2645 }, { "epoch": 0.6626596543951916, "grad_norm": 0.4296875, "learning_rate": 7.497912607848595e-06, "loss": 0.8642, "step": 2646 }, { "epoch": 0.6629100926621587, "grad_norm": 0.361328125, "learning_rate": 7.49234622877818e-06, "loss": 0.8857, "step": 2647 }, { "epoch": 0.663160530929126, "grad_norm": 0.37890625, "learning_rate": 7.486779849707766e-06, "loss": 0.8732, "step": 2648 }, { "epoch": 0.6634109691960932, "grad_norm": 0.40234375, "learning_rate": 7.48121347063735e-06, "loss": 1.0918, "step": 2649 }, { "epoch": 0.6636614074630603, "grad_norm": 0.373046875, "learning_rate": 7.475647091566937e-06, "loss": 1.0774, "step": 2650 }, { "epoch": 0.6639118457300276, "grad_norm": 0.38671875, "learning_rate": 7.470080712496522e-06, "loss": 0.9871, "step": 2651 }, { "epoch": 0.6641622839969947, "grad_norm": 0.369140625, "learning_rate": 7.464514333426107e-06, "loss": 0.8677, "step": 2652 }, { "epoch": 0.664412722263962, "grad_norm": 0.37109375, "learning_rate": 7.458947954355692e-06, "loss": 1.3707, "step": 2653 }, { "epoch": 0.6646631605309291, "grad_norm": 0.42578125, "learning_rate": 7.4533815752852776e-06, "loss": 0.904, "step": 2654 }, { "epoch": 0.6649135987978964, "grad_norm": 0.380859375, "learning_rate": 7.447815196214862e-06, "loss": 1.0131, "step": 2655 }, { "epoch": 0.6651640370648635, "grad_norm": 0.365234375, "learning_rate": 7.442248817144449e-06, "loss": 0.978, "step": 2656 }, { "epoch": 0.6654144753318307, "grad_norm": 0.34375, "learning_rate": 7.4366824380740336e-06, "loss": 0.9748, "step": 2657 }, { "epoch": 0.6656649135987979, "grad_norm": 0.361328125, "learning_rate": 7.431116059003619e-06, "loss": 0.9531, "step": 2658 }, { "epoch": 0.6659153518657651, "grad_norm": 0.37890625, "learning_rate": 7.425549679933204e-06, "loss": 0.7765, "step": 2659 }, { "epoch": 0.6661657901327322, "grad_norm": 0.37890625, "learning_rate": 7.4199833008627895e-06, "loss": 0.8097, "step": 2660 }, { "epoch": 0.6664162283996995, "grad_norm": 0.359375, "learning_rate": 7.414416921792374e-06, "loss": 0.8496, "step": 2661 }, { "epoch": 0.6666666666666666, "grad_norm": 0.369140625, "learning_rate": 7.408850542721961e-06, "loss": 0.8486, "step": 2662 }, { "epoch": 0.6669171049336339, "grad_norm": 0.3671875, "learning_rate": 7.4032841636515455e-06, "loss": 1.0001, "step": 2663 }, { "epoch": 0.667167543200601, "grad_norm": 0.431640625, "learning_rate": 7.397717784581131e-06, "loss": 1.0049, "step": 2664 }, { "epoch": 0.6674179814675683, "grad_norm": 0.44921875, "learning_rate": 7.392151405510716e-06, "loss": 0.979, "step": 2665 }, { "epoch": 0.6676684197345354, "grad_norm": 0.412109375, "learning_rate": 7.3865850264403015e-06, "loss": 0.9806, "step": 2666 }, { "epoch": 0.6679188580015026, "grad_norm": 0.337890625, "learning_rate": 7.381018647369886e-06, "loss": 0.9101, "step": 2667 }, { "epoch": 0.6681692962684698, "grad_norm": 0.34765625, "learning_rate": 7.375452268299472e-06, "loss": 1.0579, "step": 2668 }, { "epoch": 0.668419734535437, "grad_norm": 0.341796875, "learning_rate": 7.369885889229057e-06, "loss": 0.9024, "step": 2669 }, { "epoch": 0.6686701728024043, "grad_norm": 0.34765625, "learning_rate": 7.364319510158643e-06, "loss": 1.009, "step": 2670 }, { "epoch": 0.6689206110693714, "grad_norm": 0.40234375, "learning_rate": 7.358753131088228e-06, "loss": 1.0422, "step": 2671 }, { "epoch": 0.6691710493363386, "grad_norm": 0.37890625, "learning_rate": 7.3531867520178135e-06, "loss": 0.7292, "step": 2672 }, { "epoch": 0.6694214876033058, "grad_norm": 0.388671875, "learning_rate": 7.347620372947398e-06, "loss": 0.8736, "step": 2673 }, { "epoch": 0.669671925870273, "grad_norm": 0.33984375, "learning_rate": 7.342053993876984e-06, "loss": 0.997, "step": 2674 }, { "epoch": 0.6699223641372402, "grad_norm": 0.3671875, "learning_rate": 7.336487614806569e-06, "loss": 0.8113, "step": 2675 }, { "epoch": 0.6701728024042074, "grad_norm": 0.388671875, "learning_rate": 7.330921235736155e-06, "loss": 0.9138, "step": 2676 }, { "epoch": 0.6704232406711745, "grad_norm": 0.3671875, "learning_rate": 7.32535485666574e-06, "loss": 0.903, "step": 2677 }, { "epoch": 0.6706736789381418, "grad_norm": 0.4140625, "learning_rate": 7.319788477595325e-06, "loss": 0.9374, "step": 2678 }, { "epoch": 0.6709241172051089, "grad_norm": 0.369140625, "learning_rate": 7.31422209852491e-06, "loss": 0.9252, "step": 2679 }, { "epoch": 0.6711745554720762, "grad_norm": 0.34765625, "learning_rate": 7.308655719454495e-06, "loss": 0.8335, "step": 2680 }, { "epoch": 0.6714249937390433, "grad_norm": 0.345703125, "learning_rate": 7.303089340384081e-06, "loss": 0.9509, "step": 2681 }, { "epoch": 0.6716754320060105, "grad_norm": 0.369140625, "learning_rate": 7.297522961313665e-06, "loss": 0.978, "step": 2682 }, { "epoch": 0.6719258702729777, "grad_norm": 0.359375, "learning_rate": 7.291956582243252e-06, "loss": 0.8766, "step": 2683 }, { "epoch": 0.6721763085399449, "grad_norm": 0.357421875, "learning_rate": 7.286390203172837e-06, "loss": 0.8355, "step": 2684 }, { "epoch": 0.6724267468069121, "grad_norm": 0.39453125, "learning_rate": 7.280823824102422e-06, "loss": 0.9519, "step": 2685 }, { "epoch": 0.6726771850738793, "grad_norm": 0.337890625, "learning_rate": 7.275257445032007e-06, "loss": 0.9376, "step": 2686 }, { "epoch": 0.6729276233408464, "grad_norm": 0.37109375, "learning_rate": 7.269691065961593e-06, "loss": 0.8711, "step": 2687 }, { "epoch": 0.6731780616078137, "grad_norm": 0.37109375, "learning_rate": 7.264124686891177e-06, "loss": 0.8957, "step": 2688 }, { "epoch": 0.6734284998747808, "grad_norm": 0.373046875, "learning_rate": 7.258558307820763e-06, "loss": 0.9939, "step": 2689 }, { "epoch": 0.6736789381417481, "grad_norm": 0.43359375, "learning_rate": 7.252991928750348e-06, "loss": 1.0192, "step": 2690 }, { "epoch": 0.6739293764087152, "grad_norm": 0.345703125, "learning_rate": 7.247425549679934e-06, "loss": 1.0264, "step": 2691 }, { "epoch": 0.6741798146756824, "grad_norm": 0.341796875, "learning_rate": 7.241859170609519e-06, "loss": 1.0233, "step": 2692 }, { "epoch": 0.6744302529426497, "grad_norm": 0.3671875, "learning_rate": 7.2362927915391046e-06, "loss": 0.8945, "step": 2693 }, { "epoch": 0.6746806912096168, "grad_norm": 0.384765625, "learning_rate": 7.230726412468689e-06, "loss": 0.642, "step": 2694 }, { "epoch": 0.6749311294765841, "grad_norm": 0.392578125, "learning_rate": 7.225160033398275e-06, "loss": 0.9397, "step": 2695 }, { "epoch": 0.6751815677435512, "grad_norm": 0.35546875, "learning_rate": 7.21959365432786e-06, "loss": 0.8341, "step": 2696 }, { "epoch": 0.6754320060105184, "grad_norm": 0.3515625, "learning_rate": 7.214027275257446e-06, "loss": 0.8483, "step": 2697 }, { "epoch": 0.6756824442774856, "grad_norm": 0.357421875, "learning_rate": 7.208460896187031e-06, "loss": 1.1159, "step": 2698 }, { "epoch": 0.6759328825444528, "grad_norm": 0.357421875, "learning_rate": 7.2028945171166165e-06, "loss": 0.9205, "step": 2699 }, { "epoch": 0.67618332081142, "grad_norm": 0.4375, "learning_rate": 7.197328138046201e-06, "loss": 0.9912, "step": 2700 }, { "epoch": 0.6764337590783872, "grad_norm": 0.412109375, "learning_rate": 7.191761758975787e-06, "loss": 0.8767, "step": 2701 }, { "epoch": 0.6766841973453543, "grad_norm": 0.345703125, "learning_rate": 7.186195379905372e-06, "loss": 0.8978, "step": 2702 }, { "epoch": 0.6769346356123216, "grad_norm": 0.388671875, "learning_rate": 7.180629000834958e-06, "loss": 1.0048, "step": 2703 }, { "epoch": 0.6771850738792887, "grad_norm": 0.287109375, "learning_rate": 7.175062621764543e-06, "loss": 0.8024, "step": 2704 }, { "epoch": 0.677435512146256, "grad_norm": 0.365234375, "learning_rate": 7.1694962426941285e-06, "loss": 0.8769, "step": 2705 }, { "epoch": 0.6776859504132231, "grad_norm": 0.38671875, "learning_rate": 7.163929863623713e-06, "loss": 0.9565, "step": 2706 }, { "epoch": 0.6779363886801903, "grad_norm": 0.330078125, "learning_rate": 7.158363484553299e-06, "loss": 0.7602, "step": 2707 }, { "epoch": 0.6781868269471575, "grad_norm": 0.3828125, "learning_rate": 7.152797105482884e-06, "loss": 0.8988, "step": 2708 }, { "epoch": 0.6784372652141247, "grad_norm": 0.40234375, "learning_rate": 7.147230726412469e-06, "loss": 1.0807, "step": 2709 }, { "epoch": 0.6786877034810919, "grad_norm": 0.40625, "learning_rate": 7.141664347342054e-06, "loss": 0.7483, "step": 2710 }, { "epoch": 0.6789381417480591, "grad_norm": 0.361328125, "learning_rate": 7.1360979682716405e-06, "loss": 1.0035, "step": 2711 }, { "epoch": 0.6791885800150262, "grad_norm": 0.373046875, "learning_rate": 7.130531589201225e-06, "loss": 0.7686, "step": 2712 }, { "epoch": 0.6794390182819935, "grad_norm": 0.361328125, "learning_rate": 7.124965210130811e-06, "loss": 0.8904, "step": 2713 }, { "epoch": 0.6796894565489607, "grad_norm": 0.375, "learning_rate": 7.119398831060396e-06, "loss": 1.0356, "step": 2714 }, { "epoch": 0.6799398948159279, "grad_norm": 0.40234375, "learning_rate": 7.113832451989981e-06, "loss": 1.0056, "step": 2715 }, { "epoch": 0.6801903330828951, "grad_norm": 0.375, "learning_rate": 7.108266072919566e-06, "loss": 0.9337, "step": 2716 }, { "epoch": 0.6804407713498623, "grad_norm": 0.365234375, "learning_rate": 7.1026996938491525e-06, "loss": 0.8727, "step": 2717 }, { "epoch": 0.6806912096168295, "grad_norm": 0.326171875, "learning_rate": 7.097133314778737e-06, "loss": 0.9439, "step": 2718 }, { "epoch": 0.6809416478837966, "grad_norm": 0.357421875, "learning_rate": 7.091566935708323e-06, "loss": 0.9191, "step": 2719 }, { "epoch": 0.6811920861507639, "grad_norm": 0.341796875, "learning_rate": 7.086000556637908e-06, "loss": 0.8317, "step": 2720 }, { "epoch": 0.681442524417731, "grad_norm": 0.380859375, "learning_rate": 7.080434177567493e-06, "loss": 1.0402, "step": 2721 }, { "epoch": 0.6816929626846983, "grad_norm": 0.392578125, "learning_rate": 7.074867798497078e-06, "loss": 0.8942, "step": 2722 }, { "epoch": 0.6819434009516654, "grad_norm": 0.396484375, "learning_rate": 7.0693014194266645e-06, "loss": 1.0373, "step": 2723 }, { "epoch": 0.6821938392186326, "grad_norm": 0.341796875, "learning_rate": 7.063735040356249e-06, "loss": 0.8899, "step": 2724 }, { "epoch": 0.6824442774855998, "grad_norm": 0.349609375, "learning_rate": 7.058168661285835e-06, "loss": 0.859, "step": 2725 }, { "epoch": 0.682694715752567, "grad_norm": 0.359375, "learning_rate": 7.05260228221542e-06, "loss": 0.882, "step": 2726 }, { "epoch": 0.6829451540195342, "grad_norm": 0.4453125, "learning_rate": 7.047035903145005e-06, "loss": 1.0481, "step": 2727 }, { "epoch": 0.6831955922865014, "grad_norm": 0.37890625, "learning_rate": 7.04146952407459e-06, "loss": 0.9793, "step": 2728 }, { "epoch": 0.6834460305534685, "grad_norm": 0.4453125, "learning_rate": 7.035903145004175e-06, "loss": 0.8652, "step": 2729 }, { "epoch": 0.6836964688204358, "grad_norm": 0.412109375, "learning_rate": 7.03033676593376e-06, "loss": 0.861, "step": 2730 }, { "epoch": 0.6839469070874029, "grad_norm": 0.3203125, "learning_rate": 7.024770386863345e-06, "loss": 0.9723, "step": 2731 }, { "epoch": 0.6841973453543702, "grad_norm": 0.416015625, "learning_rate": 7.0192040077929316e-06, "loss": 0.8421, "step": 2732 }, { "epoch": 0.6844477836213373, "grad_norm": 0.412109375, "learning_rate": 7.013637628722516e-06, "loss": 0.7655, "step": 2733 }, { "epoch": 0.6846982218883045, "grad_norm": 0.3359375, "learning_rate": 7.008071249652102e-06, "loss": 1.0774, "step": 2734 }, { "epoch": 0.6849486601552717, "grad_norm": 0.380859375, "learning_rate": 7.002504870581687e-06, "loss": 0.9101, "step": 2735 }, { "epoch": 0.6851990984222389, "grad_norm": 0.384765625, "learning_rate": 6.996938491511272e-06, "loss": 1.1218, "step": 2736 }, { "epoch": 0.6854495366892062, "grad_norm": 0.4453125, "learning_rate": 6.991372112440857e-06, "loss": 1.1036, "step": 2737 }, { "epoch": 0.6856999749561733, "grad_norm": 0.36328125, "learning_rate": 6.9858057333704435e-06, "loss": 1.0227, "step": 2738 }, { "epoch": 0.6859504132231405, "grad_norm": 0.349609375, "learning_rate": 6.980239354300028e-06, "loss": 0.959, "step": 2739 }, { "epoch": 0.6862008514901077, "grad_norm": 0.38671875, "learning_rate": 6.974672975229614e-06, "loss": 0.9887, "step": 2740 }, { "epoch": 0.6864512897570749, "grad_norm": 0.361328125, "learning_rate": 6.969106596159199e-06, "loss": 0.9747, "step": 2741 }, { "epoch": 0.6867017280240421, "grad_norm": 0.392578125, "learning_rate": 6.963540217088784e-06, "loss": 0.9127, "step": 2742 }, { "epoch": 0.6869521662910093, "grad_norm": 0.365234375, "learning_rate": 6.957973838018369e-06, "loss": 0.883, "step": 2743 }, { "epoch": 0.6872026045579764, "grad_norm": 0.32421875, "learning_rate": 6.9524074589479555e-06, "loss": 0.9148, "step": 2744 }, { "epoch": 0.6874530428249437, "grad_norm": 0.3984375, "learning_rate": 6.94684107987754e-06, "loss": 0.837, "step": 2745 }, { "epoch": 0.6877034810919108, "grad_norm": 0.3359375, "learning_rate": 6.941274700807126e-06, "loss": 0.8742, "step": 2746 }, { "epoch": 0.6879539193588781, "grad_norm": 0.51171875, "learning_rate": 6.935708321736711e-06, "loss": 0.8869, "step": 2747 }, { "epoch": 0.6882043576258452, "grad_norm": 0.357421875, "learning_rate": 6.930141942666296e-06, "loss": 0.86, "step": 2748 }, { "epoch": 0.6884547958928124, "grad_norm": 0.345703125, "learning_rate": 6.924575563595881e-06, "loss": 0.9291, "step": 2749 }, { "epoch": 0.6887052341597796, "grad_norm": 0.373046875, "learning_rate": 6.919009184525467e-06, "loss": 0.9268, "step": 2750 }, { "epoch": 0.6889556724267468, "grad_norm": 0.37109375, "learning_rate": 6.913442805455051e-06, "loss": 0.8578, "step": 2751 }, { "epoch": 0.689206110693714, "grad_norm": 0.400390625, "learning_rate": 6.907876426384638e-06, "loss": 0.9791, "step": 2752 }, { "epoch": 0.6894565489606812, "grad_norm": 0.380859375, "learning_rate": 6.902310047314223e-06, "loss": 0.9916, "step": 2753 }, { "epoch": 0.6897069872276483, "grad_norm": 0.341796875, "learning_rate": 6.896743668243808e-06, "loss": 1.0314, "step": 2754 }, { "epoch": 0.6899574254946156, "grad_norm": 0.376953125, "learning_rate": 6.891177289173393e-06, "loss": 1.0047, "step": 2755 }, { "epoch": 0.6902078637615827, "grad_norm": 0.345703125, "learning_rate": 6.885610910102979e-06, "loss": 0.9766, "step": 2756 }, { "epoch": 0.69045830202855, "grad_norm": 0.34765625, "learning_rate": 6.880044531032563e-06, "loss": 0.9732, "step": 2757 }, { "epoch": 0.6907087402955172, "grad_norm": 0.34375, "learning_rate": 6.87447815196215e-06, "loss": 0.9438, "step": 2758 }, { "epoch": 0.6909591785624843, "grad_norm": 0.361328125, "learning_rate": 6.868911772891735e-06, "loss": 0.8516, "step": 2759 }, { "epoch": 0.6912096168294516, "grad_norm": 0.353515625, "learning_rate": 6.86334539382132e-06, "loss": 0.8831, "step": 2760 }, { "epoch": 0.6914600550964187, "grad_norm": 0.375, "learning_rate": 6.857779014750905e-06, "loss": 1.0795, "step": 2761 }, { "epoch": 0.691710493363386, "grad_norm": 0.32421875, "learning_rate": 6.852212635680491e-06, "loss": 0.8474, "step": 2762 }, { "epoch": 0.6919609316303531, "grad_norm": 0.373046875, "learning_rate": 6.846646256610075e-06, "loss": 0.8939, "step": 2763 }, { "epoch": 0.6922113698973203, "grad_norm": 0.3515625, "learning_rate": 6.841079877539662e-06, "loss": 0.9103, "step": 2764 }, { "epoch": 0.6924618081642875, "grad_norm": 0.3515625, "learning_rate": 6.835513498469247e-06, "loss": 1.0127, "step": 2765 }, { "epoch": 0.6927122464312547, "grad_norm": 0.380859375, "learning_rate": 6.829947119398832e-06, "loss": 0.9261, "step": 2766 }, { "epoch": 0.6929626846982219, "grad_norm": 0.375, "learning_rate": 6.824380740328417e-06, "loss": 0.873, "step": 2767 }, { "epoch": 0.6932131229651891, "grad_norm": 0.359375, "learning_rate": 6.818814361258003e-06, "loss": 0.8626, "step": 2768 }, { "epoch": 0.6934635612321562, "grad_norm": 0.353515625, "learning_rate": 6.813247982187587e-06, "loss": 0.8943, "step": 2769 }, { "epoch": 0.6937139994991235, "grad_norm": 0.392578125, "learning_rate": 6.807681603117173e-06, "loss": 1.06, "step": 2770 }, { "epoch": 0.6939644377660906, "grad_norm": 0.404296875, "learning_rate": 6.802115224046758e-06, "loss": 0.911, "step": 2771 }, { "epoch": 0.6942148760330579, "grad_norm": 0.373046875, "learning_rate": 6.796548844976344e-06, "loss": 0.8886, "step": 2772 }, { "epoch": 0.694465314300025, "grad_norm": 0.36328125, "learning_rate": 6.790982465905929e-06, "loss": 0.9474, "step": 2773 }, { "epoch": 0.6947157525669923, "grad_norm": 0.380859375, "learning_rate": 6.7854160868355146e-06, "loss": 0.9735, "step": 2774 }, { "epoch": 0.6949661908339594, "grad_norm": 0.376953125, "learning_rate": 6.779849707765099e-06, "loss": 0.7835, "step": 2775 }, { "epoch": 0.6952166291009266, "grad_norm": 0.34765625, "learning_rate": 6.774283328694685e-06, "loss": 0.923, "step": 2776 }, { "epoch": 0.6954670673678938, "grad_norm": 0.388671875, "learning_rate": 6.76871694962427e-06, "loss": 0.9284, "step": 2777 }, { "epoch": 0.695717505634861, "grad_norm": 0.33203125, "learning_rate": 6.763150570553856e-06, "loss": 1.1035, "step": 2778 }, { "epoch": 0.6959679439018281, "grad_norm": 0.392578125, "learning_rate": 6.757584191483441e-06, "loss": 0.9046, "step": 2779 }, { "epoch": 0.6962183821687954, "grad_norm": 0.37109375, "learning_rate": 6.752017812413026e-06, "loss": 0.8962, "step": 2780 }, { "epoch": 0.6964688204357626, "grad_norm": 0.365234375, "learning_rate": 6.746451433342611e-06, "loss": 0.8384, "step": 2781 }, { "epoch": 0.6967192587027298, "grad_norm": 0.435546875, "learning_rate": 6.740885054272196e-06, "loss": 0.9583, "step": 2782 }, { "epoch": 0.696969696969697, "grad_norm": 0.3671875, "learning_rate": 6.735318675201782e-06, "loss": 1.0236, "step": 2783 }, { "epoch": 0.6972201352366642, "grad_norm": 0.376953125, "learning_rate": 6.7297522961313664e-06, "loss": 0.9806, "step": 2784 }, { "epoch": 0.6974705735036314, "grad_norm": 0.357421875, "learning_rate": 6.724185917060953e-06, "loss": 1.0224, "step": 2785 }, { "epoch": 0.6977210117705985, "grad_norm": 0.359375, "learning_rate": 6.718619537990537e-06, "loss": 0.8554, "step": 2786 }, { "epoch": 0.6979714500375658, "grad_norm": 0.345703125, "learning_rate": 6.713053158920123e-06, "loss": 0.8503, "step": 2787 }, { "epoch": 0.6982218883045329, "grad_norm": 0.330078125, "learning_rate": 6.707486779849708e-06, "loss": 0.8432, "step": 2788 }, { "epoch": 0.6984723265715002, "grad_norm": 0.40625, "learning_rate": 6.701920400779294e-06, "loss": 1.039, "step": 2789 }, { "epoch": 0.6987227648384673, "grad_norm": 0.369140625, "learning_rate": 6.696354021708878e-06, "loss": 0.9284, "step": 2790 }, { "epoch": 0.6989732031054345, "grad_norm": 0.384765625, "learning_rate": 6.690787642638464e-06, "loss": 0.9696, "step": 2791 }, { "epoch": 0.6992236413724017, "grad_norm": 0.349609375, "learning_rate": 6.685221263568049e-06, "loss": 0.8766, "step": 2792 }, { "epoch": 0.6994740796393689, "grad_norm": 0.33984375, "learning_rate": 6.679654884497635e-06, "loss": 0.9212, "step": 2793 }, { "epoch": 0.699724517906336, "grad_norm": 0.373046875, "learning_rate": 6.67408850542722e-06, "loss": 0.8891, "step": 2794 }, { "epoch": 0.6999749561733033, "grad_norm": 0.392578125, "learning_rate": 6.668522126356806e-06, "loss": 1.0344, "step": 2795 }, { "epoch": 0.7002253944402704, "grad_norm": 0.3671875, "learning_rate": 6.66295574728639e-06, "loss": 0.9096, "step": 2796 }, { "epoch": 0.7004758327072377, "grad_norm": 0.3515625, "learning_rate": 6.657389368215976e-06, "loss": 0.8873, "step": 2797 }, { "epoch": 0.7007262709742048, "grad_norm": 0.349609375, "learning_rate": 6.651822989145561e-06, "loss": 0.8547, "step": 2798 }, { "epoch": 0.7009767092411721, "grad_norm": 0.388671875, "learning_rate": 6.646256610075147e-06, "loss": 1.0497, "step": 2799 }, { "epoch": 0.7012271475081392, "grad_norm": 0.3671875, "learning_rate": 6.640690231004732e-06, "loss": 0.8585, "step": 2800 }, { "epoch": 0.7014775857751064, "grad_norm": 0.375, "learning_rate": 6.635123851934318e-06, "loss": 0.9577, "step": 2801 }, { "epoch": 0.7017280240420737, "grad_norm": 0.353515625, "learning_rate": 6.629557472863902e-06, "loss": 1.0559, "step": 2802 }, { "epoch": 0.7019784623090408, "grad_norm": 0.38671875, "learning_rate": 6.623991093793488e-06, "loss": 1.0097, "step": 2803 }, { "epoch": 0.7022289005760081, "grad_norm": 0.416015625, "learning_rate": 6.618424714723073e-06, "loss": 0.9656, "step": 2804 }, { "epoch": 0.7024793388429752, "grad_norm": 0.384765625, "learning_rate": 6.612858335652659e-06, "loss": 0.9715, "step": 2805 }, { "epoch": 0.7027297771099424, "grad_norm": 0.353515625, "learning_rate": 6.607291956582243e-06, "loss": 0.9817, "step": 2806 }, { "epoch": 0.7029802153769096, "grad_norm": 0.392578125, "learning_rate": 6.6017255775118296e-06, "loss": 0.7714, "step": 2807 }, { "epoch": 0.7032306536438768, "grad_norm": 0.38671875, "learning_rate": 6.596159198441414e-06, "loss": 1.117, "step": 2808 }, { "epoch": 0.703481091910844, "grad_norm": 0.36328125, "learning_rate": 6.590592819371e-06, "loss": 0.9983, "step": 2809 }, { "epoch": 0.7037315301778112, "grad_norm": 0.359375, "learning_rate": 6.585026440300585e-06, "loss": 0.8889, "step": 2810 }, { "epoch": 0.7039819684447783, "grad_norm": 0.373046875, "learning_rate": 6.57946006123017e-06, "loss": 1.0408, "step": 2811 }, { "epoch": 0.7042324067117456, "grad_norm": 0.376953125, "learning_rate": 6.573893682159755e-06, "loss": 0.8176, "step": 2812 }, { "epoch": 0.7044828449787127, "grad_norm": 0.3671875, "learning_rate": 6.5683273030893416e-06, "loss": 0.8815, "step": 2813 }, { "epoch": 0.70473328324568, "grad_norm": 0.39453125, "learning_rate": 6.562760924018926e-06, "loss": 0.8672, "step": 2814 }, { "epoch": 0.7049837215126471, "grad_norm": 0.3984375, "learning_rate": 6.557194544948512e-06, "loss": 0.8786, "step": 2815 }, { "epoch": 0.7052341597796143, "grad_norm": 0.37890625, "learning_rate": 6.551628165878097e-06, "loss": 1.2045, "step": 2816 }, { "epoch": 0.7054845980465815, "grad_norm": 0.373046875, "learning_rate": 6.546061786807682e-06, "loss": 0.8616, "step": 2817 }, { "epoch": 0.7057350363135487, "grad_norm": 0.404296875, "learning_rate": 6.540495407737267e-06, "loss": 0.8836, "step": 2818 }, { "epoch": 0.7059854745805159, "grad_norm": 0.34375, "learning_rate": 6.5349290286668535e-06, "loss": 0.9349, "step": 2819 }, { "epoch": 0.7062359128474831, "grad_norm": 0.326171875, "learning_rate": 6.529362649596438e-06, "loss": 0.9773, "step": 2820 }, { "epoch": 0.7064863511144502, "grad_norm": 0.35546875, "learning_rate": 6.523796270526024e-06, "loss": 0.8533, "step": 2821 }, { "epoch": 0.7067367893814175, "grad_norm": 0.34765625, "learning_rate": 6.518229891455609e-06, "loss": 0.9236, "step": 2822 }, { "epoch": 0.7069872276483846, "grad_norm": 0.384765625, "learning_rate": 6.512663512385194e-06, "loss": 0.9036, "step": 2823 }, { "epoch": 0.7072376659153519, "grad_norm": 0.359375, "learning_rate": 6.507097133314779e-06, "loss": 1.0554, "step": 2824 }, { "epoch": 0.7074881041823191, "grad_norm": 0.373046875, "learning_rate": 6.5015307542443655e-06, "loss": 0.8453, "step": 2825 }, { "epoch": 0.7077385424492862, "grad_norm": 0.388671875, "learning_rate": 6.495964375173949e-06, "loss": 0.8667, "step": 2826 }, { "epoch": 0.7079889807162535, "grad_norm": 0.3515625, "learning_rate": 6.490397996103536e-06, "loss": 0.8649, "step": 2827 }, { "epoch": 0.7082394189832206, "grad_norm": 0.3671875, "learning_rate": 6.484831617033121e-06, "loss": 0.9244, "step": 2828 }, { "epoch": 0.7084898572501879, "grad_norm": 0.365234375, "learning_rate": 6.479265237962706e-06, "loss": 1.0132, "step": 2829 }, { "epoch": 0.708740295517155, "grad_norm": 0.361328125, "learning_rate": 6.473698858892291e-06, "loss": 0.9053, "step": 2830 }, { "epoch": 0.7089907337841223, "grad_norm": 0.392578125, "learning_rate": 6.468132479821876e-06, "loss": 0.8619, "step": 2831 }, { "epoch": 0.7092411720510894, "grad_norm": 0.419921875, "learning_rate": 6.462566100751461e-06, "loss": 0.9491, "step": 2832 }, { "epoch": 0.7094916103180566, "grad_norm": 0.34375, "learning_rate": 6.456999721681046e-06, "loss": 0.9161, "step": 2833 }, { "epoch": 0.7097420485850238, "grad_norm": 0.423828125, "learning_rate": 6.451433342610633e-06, "loss": 1.0145, "step": 2834 }, { "epoch": 0.709992486851991, "grad_norm": 0.40625, "learning_rate": 6.445866963540217e-06, "loss": 0.9711, "step": 2835 }, { "epoch": 0.7102429251189581, "grad_norm": 0.341796875, "learning_rate": 6.440300584469803e-06, "loss": 0.9184, "step": 2836 }, { "epoch": 0.7104933633859254, "grad_norm": 0.34375, "learning_rate": 6.434734205399388e-06, "loss": 0.9357, "step": 2837 }, { "epoch": 0.7107438016528925, "grad_norm": 0.421875, "learning_rate": 6.429167826328973e-06, "loss": 0.907, "step": 2838 }, { "epoch": 0.7109942399198598, "grad_norm": 0.369140625, "learning_rate": 6.423601447258558e-06, "loss": 0.9199, "step": 2839 }, { "epoch": 0.7112446781868269, "grad_norm": 0.412109375, "learning_rate": 6.418035068188145e-06, "loss": 0.9983, "step": 2840 }, { "epoch": 0.7114951164537942, "grad_norm": 0.369140625, "learning_rate": 6.412468689117729e-06, "loss": 1.016, "step": 2841 }, { "epoch": 0.7117455547207613, "grad_norm": 0.375, "learning_rate": 6.406902310047315e-06, "loss": 0.9285, "step": 2842 }, { "epoch": 0.7119959929877285, "grad_norm": 0.384765625, "learning_rate": 6.4013359309769e-06, "loss": 1.0011, "step": 2843 }, { "epoch": 0.7122464312546957, "grad_norm": 0.376953125, "learning_rate": 6.395769551906485e-06, "loss": 0.9938, "step": 2844 }, { "epoch": 0.7124968695216629, "grad_norm": 0.396484375, "learning_rate": 6.39020317283607e-06, "loss": 0.9857, "step": 2845 }, { "epoch": 0.7127473077886302, "grad_norm": 0.38671875, "learning_rate": 6.384636793765656e-06, "loss": 0.7967, "step": 2846 }, { "epoch": 0.7129977460555973, "grad_norm": 0.419921875, "learning_rate": 6.3790704146952405e-06, "loss": 0.826, "step": 2847 }, { "epoch": 0.7132481843225645, "grad_norm": 0.3671875, "learning_rate": 6.373504035624827e-06, "loss": 0.9924, "step": 2848 }, { "epoch": 0.7134986225895317, "grad_norm": 0.359375, "learning_rate": 6.367937656554412e-06, "loss": 0.9941, "step": 2849 }, { "epoch": 0.7137490608564989, "grad_norm": 0.3359375, "learning_rate": 6.362371277483997e-06, "loss": 1.0454, "step": 2850 }, { "epoch": 0.713999499123466, "grad_norm": 0.388671875, "learning_rate": 6.356804898413582e-06, "loss": 0.87, "step": 2851 }, { "epoch": 0.7142499373904333, "grad_norm": 0.35546875, "learning_rate": 6.351238519343168e-06, "loss": 0.7773, "step": 2852 }, { "epoch": 0.7145003756574004, "grad_norm": 0.357421875, "learning_rate": 6.3456721402727525e-06, "loss": 0.8816, "step": 2853 }, { "epoch": 0.7147508139243677, "grad_norm": 0.375, "learning_rate": 6.340105761202339e-06, "loss": 1.0005, "step": 2854 }, { "epoch": 0.7150012521913348, "grad_norm": 0.376953125, "learning_rate": 6.334539382131924e-06, "loss": 0.908, "step": 2855 }, { "epoch": 0.7152516904583021, "grad_norm": 0.376953125, "learning_rate": 6.328973003061509e-06, "loss": 1.0416, "step": 2856 }, { "epoch": 0.7155021287252692, "grad_norm": 0.359375, "learning_rate": 6.323406623991094e-06, "loss": 0.8825, "step": 2857 }, { "epoch": 0.7157525669922364, "grad_norm": 0.3359375, "learning_rate": 6.31784024492068e-06, "loss": 0.9793, "step": 2858 }, { "epoch": 0.7160030052592036, "grad_norm": 0.3515625, "learning_rate": 6.3122738658502644e-06, "loss": 1.0243, "step": 2859 }, { "epoch": 0.7162534435261708, "grad_norm": 0.4140625, "learning_rate": 6.306707486779851e-06, "loss": 0.8619, "step": 2860 }, { "epoch": 0.716503881793138, "grad_norm": 0.396484375, "learning_rate": 6.301141107709436e-06, "loss": 0.7849, "step": 2861 }, { "epoch": 0.7167543200601052, "grad_norm": 0.388671875, "learning_rate": 6.295574728639021e-06, "loss": 0.8406, "step": 2862 }, { "epoch": 0.7170047583270723, "grad_norm": 0.421875, "learning_rate": 6.290008349568606e-06, "loss": 0.9557, "step": 2863 }, { "epoch": 0.7172551965940396, "grad_norm": 0.404296875, "learning_rate": 6.284441970498192e-06, "loss": 0.8587, "step": 2864 }, { "epoch": 0.7175056348610067, "grad_norm": 0.37890625, "learning_rate": 6.278875591427776e-06, "loss": 1.0228, "step": 2865 }, { "epoch": 0.717756073127974, "grad_norm": 0.337890625, "learning_rate": 6.273309212357362e-06, "loss": 0.8853, "step": 2866 }, { "epoch": 0.7180065113949411, "grad_norm": 0.380859375, "learning_rate": 6.267742833286947e-06, "loss": 0.9016, "step": 2867 }, { "epoch": 0.7182569496619083, "grad_norm": 0.40625, "learning_rate": 6.262176454216533e-06, "loss": 1.0649, "step": 2868 }, { "epoch": 0.7185073879288756, "grad_norm": 0.42578125, "learning_rate": 6.256610075146118e-06, "loss": 0.9425, "step": 2869 }, { "epoch": 0.7187578261958427, "grad_norm": 0.36328125, "learning_rate": 6.251043696075704e-06, "loss": 0.8352, "step": 2870 }, { "epoch": 0.71900826446281, "grad_norm": 0.359375, "learning_rate": 6.245477317005288e-06, "loss": 0.9667, "step": 2871 }, { "epoch": 0.7192587027297771, "grad_norm": 0.380859375, "learning_rate": 6.239910937934874e-06, "loss": 0.9546, "step": 2872 }, { "epoch": 0.7195091409967443, "grad_norm": 0.35546875, "learning_rate": 6.234344558864459e-06, "loss": 0.9135, "step": 2873 }, { "epoch": 0.7197595792637115, "grad_norm": 0.3984375, "learning_rate": 6.228778179794045e-06, "loss": 0.8915, "step": 2874 }, { "epoch": 0.7200100175306787, "grad_norm": 0.375, "learning_rate": 6.22321180072363e-06, "loss": 0.9801, "step": 2875 }, { "epoch": 0.7202604557976459, "grad_norm": 0.380859375, "learning_rate": 6.217645421653216e-06, "loss": 0.9461, "step": 2876 }, { "epoch": 0.7205108940646131, "grad_norm": 0.380859375, "learning_rate": 6.2120790425828e-06, "loss": 1.0001, "step": 2877 }, { "epoch": 0.7207613323315802, "grad_norm": 0.384765625, "learning_rate": 6.206512663512386e-06, "loss": 0.8513, "step": 2878 }, { "epoch": 0.7210117705985475, "grad_norm": 0.353515625, "learning_rate": 6.200946284441971e-06, "loss": 0.8614, "step": 2879 }, { "epoch": 0.7212622088655146, "grad_norm": 0.349609375, "learning_rate": 6.1953799053715555e-06, "loss": 0.9846, "step": 2880 }, { "epoch": 0.7215126471324819, "grad_norm": 0.353515625, "learning_rate": 6.189813526301142e-06, "loss": 0.8709, "step": 2881 }, { "epoch": 0.721763085399449, "grad_norm": 0.40234375, "learning_rate": 6.184247147230727e-06, "loss": 0.9022, "step": 2882 }, { "epoch": 0.7220135236664162, "grad_norm": 0.314453125, "learning_rate": 6.178680768160312e-06, "loss": 1.1253, "step": 2883 }, { "epoch": 0.7222639619333834, "grad_norm": 0.408203125, "learning_rate": 6.173114389089897e-06, "loss": 1.0277, "step": 2884 }, { "epoch": 0.7225144002003506, "grad_norm": 0.357421875, "learning_rate": 6.167548010019483e-06, "loss": 0.9309, "step": 2885 }, { "epoch": 0.7227648384673178, "grad_norm": 0.38671875, "learning_rate": 6.1619816309490675e-06, "loss": 0.8803, "step": 2886 }, { "epoch": 0.723015276734285, "grad_norm": 0.39453125, "learning_rate": 6.156415251878653e-06, "loss": 0.883, "step": 2887 }, { "epoch": 0.7232657150012521, "grad_norm": 0.380859375, "learning_rate": 6.150848872808238e-06, "loss": 0.9538, "step": 2888 }, { "epoch": 0.7235161532682194, "grad_norm": 0.36328125, "learning_rate": 6.145282493737824e-06, "loss": 0.9877, "step": 2889 }, { "epoch": 0.7237665915351866, "grad_norm": 0.390625, "learning_rate": 6.139716114667409e-06, "loss": 0.8913, "step": 2890 }, { "epoch": 0.7240170298021538, "grad_norm": 0.34375, "learning_rate": 6.134149735596995e-06, "loss": 0.9508, "step": 2891 }, { "epoch": 0.724267468069121, "grad_norm": 0.376953125, "learning_rate": 6.1285833565265795e-06, "loss": 0.9672, "step": 2892 }, { "epoch": 0.7245179063360881, "grad_norm": 0.36328125, "learning_rate": 6.123016977456165e-06, "loss": 0.9175, "step": 2893 }, { "epoch": 0.7247683446030554, "grad_norm": 0.359375, "learning_rate": 6.11745059838575e-06, "loss": 1.0413, "step": 2894 }, { "epoch": 0.7250187828700225, "grad_norm": 0.359375, "learning_rate": 6.111884219315336e-06, "loss": 0.9426, "step": 2895 }, { "epoch": 0.7252692211369898, "grad_norm": 0.34375, "learning_rate": 6.106317840244921e-06, "loss": 1.0984, "step": 2896 }, { "epoch": 0.7255196594039569, "grad_norm": 0.353515625, "learning_rate": 6.100751461174507e-06, "loss": 0.9396, "step": 2897 }, { "epoch": 0.7257700976709242, "grad_norm": 0.37109375, "learning_rate": 6.0951850821040914e-06, "loss": 0.8809, "step": 2898 }, { "epoch": 0.7260205359378913, "grad_norm": 0.388671875, "learning_rate": 6.089618703033677e-06, "loss": 0.7562, "step": 2899 }, { "epoch": 0.7262709742048585, "grad_norm": 0.373046875, "learning_rate": 6.084052323963262e-06, "loss": 0.9204, "step": 2900 }, { "epoch": 0.7265214124718257, "grad_norm": 0.361328125, "learning_rate": 6.078485944892848e-06, "loss": 0.8751, "step": 2901 }, { "epoch": 0.7267718507387929, "grad_norm": 0.37109375, "learning_rate": 6.072919565822433e-06, "loss": 1.1465, "step": 2902 }, { "epoch": 0.72702228900576, "grad_norm": 0.390625, "learning_rate": 6.067353186752019e-06, "loss": 0.9794, "step": 2903 }, { "epoch": 0.7272727272727273, "grad_norm": 0.3828125, "learning_rate": 6.061786807681603e-06, "loss": 1.2447, "step": 2904 }, { "epoch": 0.7275231655396944, "grad_norm": 0.380859375, "learning_rate": 6.056220428611189e-06, "loss": 0.9492, "step": 2905 }, { "epoch": 0.7277736038066617, "grad_norm": 0.3671875, "learning_rate": 6.050654049540774e-06, "loss": 0.8534, "step": 2906 }, { "epoch": 0.7280240420736288, "grad_norm": 0.369140625, "learning_rate": 6.045087670470359e-06, "loss": 0.9769, "step": 2907 }, { "epoch": 0.728274480340596, "grad_norm": 0.375, "learning_rate": 6.039521291399944e-06, "loss": 1.0071, "step": 2908 }, { "epoch": 0.7285249186075632, "grad_norm": 0.375, "learning_rate": 6.033954912329531e-06, "loss": 0.9012, "step": 2909 }, { "epoch": 0.7287753568745304, "grad_norm": 0.404296875, "learning_rate": 6.028388533259115e-06, "loss": 0.9251, "step": 2910 }, { "epoch": 0.7290257951414976, "grad_norm": 0.373046875, "learning_rate": 6.022822154188701e-06, "loss": 0.7471, "step": 2911 }, { "epoch": 0.7292762334084648, "grad_norm": 0.369140625, "learning_rate": 6.017255775118286e-06, "loss": 0.8312, "step": 2912 }, { "epoch": 0.7295266716754321, "grad_norm": 0.392578125, "learning_rate": 6.011689396047871e-06, "loss": 0.8807, "step": 2913 }, { "epoch": 0.7297771099423992, "grad_norm": 0.376953125, "learning_rate": 6.006123016977456e-06, "loss": 0.9215, "step": 2914 }, { "epoch": 0.7300275482093664, "grad_norm": 0.373046875, "learning_rate": 6.000556637907043e-06, "loss": 0.9363, "step": 2915 }, { "epoch": 0.7302779864763336, "grad_norm": 0.390625, "learning_rate": 5.994990258836627e-06, "loss": 0.9935, "step": 2916 }, { "epoch": 0.7305284247433008, "grad_norm": 0.349609375, "learning_rate": 5.989423879766213e-06, "loss": 0.8057, "step": 2917 }, { "epoch": 0.730778863010268, "grad_norm": 0.353515625, "learning_rate": 5.983857500695798e-06, "loss": 0.8494, "step": 2918 }, { "epoch": 0.7310293012772352, "grad_norm": 0.3828125, "learning_rate": 5.978291121625383e-06, "loss": 0.8693, "step": 2919 }, { "epoch": 0.7312797395442023, "grad_norm": 0.37890625, "learning_rate": 5.972724742554968e-06, "loss": 0.9245, "step": 2920 }, { "epoch": 0.7315301778111696, "grad_norm": 0.35546875, "learning_rate": 5.967158363484555e-06, "loss": 0.9749, "step": 2921 }, { "epoch": 0.7317806160781367, "grad_norm": 0.490234375, "learning_rate": 5.961591984414139e-06, "loss": 0.9118, "step": 2922 }, { "epoch": 0.732031054345104, "grad_norm": 0.341796875, "learning_rate": 5.956025605343725e-06, "loss": 0.9811, "step": 2923 }, { "epoch": 0.7322814926120711, "grad_norm": 0.328125, "learning_rate": 5.95045922627331e-06, "loss": 0.896, "step": 2924 }, { "epoch": 0.7325319308790383, "grad_norm": 0.4453125, "learning_rate": 5.944892847202895e-06, "loss": 1.0302, "step": 2925 }, { "epoch": 0.7327823691460055, "grad_norm": 0.365234375, "learning_rate": 5.93932646813248e-06, "loss": 1.0271, "step": 2926 }, { "epoch": 0.7330328074129727, "grad_norm": 0.349609375, "learning_rate": 5.933760089062066e-06, "loss": 0.8853, "step": 2927 }, { "epoch": 0.7332832456799399, "grad_norm": 0.36328125, "learning_rate": 5.9281937099916505e-06, "loss": 0.9591, "step": 2928 }, { "epoch": 0.7335336839469071, "grad_norm": 0.392578125, "learning_rate": 5.922627330921237e-06, "loss": 0.9198, "step": 2929 }, { "epoch": 0.7337841222138742, "grad_norm": 0.376953125, "learning_rate": 5.917060951850822e-06, "loss": 1.1049, "step": 2930 }, { "epoch": 0.7340345604808415, "grad_norm": 0.376953125, "learning_rate": 5.9114945727804065e-06, "loss": 0.8966, "step": 2931 }, { "epoch": 0.7342849987478086, "grad_norm": 0.35546875, "learning_rate": 5.905928193709992e-06, "loss": 0.8952, "step": 2932 }, { "epoch": 0.7345354370147759, "grad_norm": 0.42578125, "learning_rate": 5.900361814639577e-06, "loss": 0.9956, "step": 2933 }, { "epoch": 0.734785875281743, "grad_norm": 0.373046875, "learning_rate": 5.8947954355691625e-06, "loss": 0.9249, "step": 2934 }, { "epoch": 0.7350363135487102, "grad_norm": 0.322265625, "learning_rate": 5.889229056498747e-06, "loss": 0.7478, "step": 2935 }, { "epoch": 0.7352867518156775, "grad_norm": 0.40234375, "learning_rate": 5.883662677428334e-06, "loss": 0.8453, "step": 2936 }, { "epoch": 0.7355371900826446, "grad_norm": 0.384765625, "learning_rate": 5.8780962983579184e-06, "loss": 0.9685, "step": 2937 }, { "epoch": 0.7357876283496119, "grad_norm": 0.43359375, "learning_rate": 5.872529919287504e-06, "loss": 0.8894, "step": 2938 }, { "epoch": 0.736038066616579, "grad_norm": 0.40234375, "learning_rate": 5.866963540217089e-06, "loss": 0.9645, "step": 2939 }, { "epoch": 0.7362885048835462, "grad_norm": 0.376953125, "learning_rate": 5.8613971611466744e-06, "loss": 0.9797, "step": 2940 }, { "epoch": 0.7365389431505134, "grad_norm": 0.349609375, "learning_rate": 5.855830782076259e-06, "loss": 0.8722, "step": 2941 }, { "epoch": 0.7367893814174806, "grad_norm": 0.359375, "learning_rate": 5.850264403005846e-06, "loss": 0.876, "step": 2942 }, { "epoch": 0.7370398196844478, "grad_norm": 0.40234375, "learning_rate": 5.84469802393543e-06, "loss": 1.0827, "step": 2943 }, { "epoch": 0.737290257951415, "grad_norm": 0.376953125, "learning_rate": 5.839131644865016e-06, "loss": 1.0642, "step": 2944 }, { "epoch": 0.7375406962183821, "grad_norm": 0.390625, "learning_rate": 5.833565265794601e-06, "loss": 0.9518, "step": 2945 }, { "epoch": 0.7377911344853494, "grad_norm": 0.40234375, "learning_rate": 5.827998886724186e-06, "loss": 1.0627, "step": 2946 }, { "epoch": 0.7380415727523165, "grad_norm": 0.318359375, "learning_rate": 5.822432507653771e-06, "loss": 0.8656, "step": 2947 }, { "epoch": 0.7382920110192838, "grad_norm": 0.421875, "learning_rate": 5.816866128583357e-06, "loss": 0.7071, "step": 2948 }, { "epoch": 0.7385424492862509, "grad_norm": 0.41796875, "learning_rate": 5.8112997495129415e-06, "loss": 0.9229, "step": 2949 }, { "epoch": 0.7387928875532181, "grad_norm": 0.361328125, "learning_rate": 5.805733370442528e-06, "loss": 0.876, "step": 2950 }, { "epoch": 0.7390433258201853, "grad_norm": 0.408203125, "learning_rate": 5.800166991372113e-06, "loss": 0.9336, "step": 2951 }, { "epoch": 0.7392937640871525, "grad_norm": 0.37109375, "learning_rate": 5.794600612301698e-06, "loss": 0.9781, "step": 2952 }, { "epoch": 0.7395442023541197, "grad_norm": 0.337890625, "learning_rate": 5.789034233231283e-06, "loss": 0.9271, "step": 2953 }, { "epoch": 0.7397946406210869, "grad_norm": 0.36328125, "learning_rate": 5.783467854160869e-06, "loss": 0.9348, "step": 2954 }, { "epoch": 0.740045078888054, "grad_norm": 0.412109375, "learning_rate": 5.7779014750904535e-06, "loss": 1.0053, "step": 2955 }, { "epoch": 0.7402955171550213, "grad_norm": 0.376953125, "learning_rate": 5.77233509602004e-06, "loss": 0.9355, "step": 2956 }, { "epoch": 0.7405459554219885, "grad_norm": 0.421875, "learning_rate": 5.766768716949625e-06, "loss": 0.9964, "step": 2957 }, { "epoch": 0.7407963936889557, "grad_norm": 0.390625, "learning_rate": 5.76120233787921e-06, "loss": 0.9972, "step": 2958 }, { "epoch": 0.7410468319559229, "grad_norm": 0.38671875, "learning_rate": 5.755635958808795e-06, "loss": 0.8741, "step": 2959 }, { "epoch": 0.74129727022289, "grad_norm": 0.38671875, "learning_rate": 5.750069579738381e-06, "loss": 0.9284, "step": 2960 }, { "epoch": 0.7415477084898573, "grad_norm": 0.37109375, "learning_rate": 5.7445032006679655e-06, "loss": 1.1091, "step": 2961 }, { "epoch": 0.7417981467568244, "grad_norm": 0.462890625, "learning_rate": 5.738936821597552e-06, "loss": 0.8855, "step": 2962 }, { "epoch": 0.7420485850237917, "grad_norm": 0.357421875, "learning_rate": 5.733370442527137e-06, "loss": 1.0994, "step": 2963 }, { "epoch": 0.7422990232907588, "grad_norm": 0.40625, "learning_rate": 5.727804063456722e-06, "loss": 0.9763, "step": 2964 }, { "epoch": 0.742549461557726, "grad_norm": 0.375, "learning_rate": 5.722237684386307e-06, "loss": 0.9069, "step": 2965 }, { "epoch": 0.7427998998246932, "grad_norm": 0.390625, "learning_rate": 5.716671305315893e-06, "loss": 1.0319, "step": 2966 }, { "epoch": 0.7430503380916604, "grad_norm": 0.34765625, "learning_rate": 5.7111049262454775e-06, "loss": 0.8455, "step": 2967 }, { "epoch": 0.7433007763586276, "grad_norm": 0.4140625, "learning_rate": 5.705538547175063e-06, "loss": 1.0452, "step": 2968 }, { "epoch": 0.7435512146255948, "grad_norm": 0.359375, "learning_rate": 5.699972168104648e-06, "loss": 0.687, "step": 2969 }, { "epoch": 0.743801652892562, "grad_norm": 0.365234375, "learning_rate": 5.694405789034234e-06, "loss": 0.8198, "step": 2970 }, { "epoch": 0.7440520911595292, "grad_norm": 0.392578125, "learning_rate": 5.688839409963819e-06, "loss": 1.0179, "step": 2971 }, { "epoch": 0.7443025294264963, "grad_norm": 0.361328125, "learning_rate": 5.683273030893405e-06, "loss": 1.0469, "step": 2972 }, { "epoch": 0.7445529676934636, "grad_norm": 0.345703125, "learning_rate": 5.6777066518229895e-06, "loss": 0.9033, "step": 2973 }, { "epoch": 0.7448034059604307, "grad_norm": 0.373046875, "learning_rate": 5.672140272752575e-06, "loss": 0.8783, "step": 2974 }, { "epoch": 0.745053844227398, "grad_norm": 0.38671875, "learning_rate": 5.66657389368216e-06, "loss": 0.9086, "step": 2975 }, { "epoch": 0.7453042824943651, "grad_norm": 0.39453125, "learning_rate": 5.661007514611746e-06, "loss": 0.8295, "step": 2976 }, { "epoch": 0.7455547207613323, "grad_norm": 0.41796875, "learning_rate": 5.655441135541331e-06, "loss": 0.7921, "step": 2977 }, { "epoch": 0.7458051590282995, "grad_norm": 0.33203125, "learning_rate": 5.649874756470917e-06, "loss": 0.9258, "step": 2978 }, { "epoch": 0.7460555972952667, "grad_norm": 0.369140625, "learning_rate": 5.6443083774005014e-06, "loss": 0.9215, "step": 2979 }, { "epoch": 0.746306035562234, "grad_norm": 0.330078125, "learning_rate": 5.638741998330087e-06, "loss": 1.0464, "step": 2980 }, { "epoch": 0.7465564738292011, "grad_norm": 0.373046875, "learning_rate": 5.633175619259672e-06, "loss": 0.9217, "step": 2981 }, { "epoch": 0.7468069120961683, "grad_norm": 0.37890625, "learning_rate": 5.6276092401892566e-06, "loss": 1.0268, "step": 2982 }, { "epoch": 0.7470573503631355, "grad_norm": 0.37890625, "learning_rate": 5.622042861118843e-06, "loss": 0.9547, "step": 2983 }, { "epoch": 0.7473077886301027, "grad_norm": 0.373046875, "learning_rate": 5.616476482048428e-06, "loss": 1.071, "step": 2984 }, { "epoch": 0.7475582268970699, "grad_norm": 0.349609375, "learning_rate": 5.610910102978013e-06, "loss": 0.8709, "step": 2985 }, { "epoch": 0.7478086651640371, "grad_norm": 0.375, "learning_rate": 5.605343723907598e-06, "loss": 1.0117, "step": 2986 }, { "epoch": 0.7480591034310042, "grad_norm": 0.400390625, "learning_rate": 5.599777344837184e-06, "loss": 0.8699, "step": 2987 }, { "epoch": 0.7483095416979715, "grad_norm": 0.4140625, "learning_rate": 5.5942109657667685e-06, "loss": 0.9761, "step": 2988 }, { "epoch": 0.7485599799649386, "grad_norm": 0.388671875, "learning_rate": 5.588644586696354e-06, "loss": 1.0118, "step": 2989 }, { "epoch": 0.7488104182319059, "grad_norm": 0.359375, "learning_rate": 5.583078207625939e-06, "loss": 1.1591, "step": 2990 }, { "epoch": 0.749060856498873, "grad_norm": 0.390625, "learning_rate": 5.577511828555525e-06, "loss": 0.9077, "step": 2991 }, { "epoch": 0.7493112947658402, "grad_norm": 0.390625, "learning_rate": 5.57194544948511e-06, "loss": 1.0208, "step": 2992 }, { "epoch": 0.7495617330328074, "grad_norm": 0.3515625, "learning_rate": 5.566379070414696e-06, "loss": 1.0566, "step": 2993 }, { "epoch": 0.7498121712997746, "grad_norm": 0.373046875, "learning_rate": 5.5608126913442805e-06, "loss": 0.84, "step": 2994 }, { "epoch": 0.7500626095667418, "grad_norm": 0.376953125, "learning_rate": 5.555246312273866e-06, "loss": 1.0197, "step": 2995 }, { "epoch": 0.750313047833709, "grad_norm": 0.361328125, "learning_rate": 5.549679933203451e-06, "loss": 0.9373, "step": 2996 }, { "epoch": 0.7505634861006761, "grad_norm": 0.3671875, "learning_rate": 5.544113554133037e-06, "loss": 0.9534, "step": 2997 }, { "epoch": 0.7508139243676434, "grad_norm": 0.37890625, "learning_rate": 5.538547175062622e-06, "loss": 0.8877, "step": 2998 }, { "epoch": 0.7510643626346105, "grad_norm": 0.345703125, "learning_rate": 5.532980795992208e-06, "loss": 0.9317, "step": 2999 }, { "epoch": 0.7513148009015778, "grad_norm": 0.3671875, "learning_rate": 5.5274144169217925e-06, "loss": 0.9607, "step": 3000 }, { "epoch": 0.751565239168545, "grad_norm": 0.36328125, "learning_rate": 5.521848037851378e-06, "loss": 0.9948, "step": 3001 }, { "epoch": 0.7518156774355121, "grad_norm": 0.423828125, "learning_rate": 5.516281658780963e-06, "loss": 1.0135, "step": 3002 }, { "epoch": 0.7520661157024794, "grad_norm": 0.37109375, "learning_rate": 5.510715279710549e-06, "loss": 0.9374, "step": 3003 }, { "epoch": 0.7523165539694465, "grad_norm": 0.396484375, "learning_rate": 5.505148900640134e-06, "loss": 1.017, "step": 3004 }, { "epoch": 0.7525669922364138, "grad_norm": 0.365234375, "learning_rate": 5.49958252156972e-06, "loss": 1.0479, "step": 3005 }, { "epoch": 0.7528174305033809, "grad_norm": 0.34375, "learning_rate": 5.4940161424993045e-06, "loss": 1.0202, "step": 3006 }, { "epoch": 0.7530678687703481, "grad_norm": 0.37109375, "learning_rate": 5.48844976342889e-06, "loss": 0.9623, "step": 3007 }, { "epoch": 0.7533183070373153, "grad_norm": 0.3515625, "learning_rate": 5.482883384358475e-06, "loss": 1.0058, "step": 3008 }, { "epoch": 0.7535687453042825, "grad_norm": 0.359375, "learning_rate": 5.4773170052880605e-06, "loss": 0.9383, "step": 3009 }, { "epoch": 0.7538191835712497, "grad_norm": 0.46484375, "learning_rate": 5.471750626217645e-06, "loss": 0.7022, "step": 3010 }, { "epoch": 0.7540696218382169, "grad_norm": 0.322265625, "learning_rate": 5.466184247147232e-06, "loss": 0.9368, "step": 3011 }, { "epoch": 0.754320060105184, "grad_norm": 0.37109375, "learning_rate": 5.4606178680768165e-06, "loss": 1.174, "step": 3012 }, { "epoch": 0.7545704983721513, "grad_norm": 0.359375, "learning_rate": 5.455051489006402e-06, "loss": 0.7647, "step": 3013 }, { "epoch": 0.7548209366391184, "grad_norm": 0.396484375, "learning_rate": 5.449485109935987e-06, "loss": 1.1069, "step": 3014 }, { "epoch": 0.7550713749060857, "grad_norm": 0.37890625, "learning_rate": 5.4439187308655724e-06, "loss": 0.7777, "step": 3015 }, { "epoch": 0.7553218131730528, "grad_norm": 0.419921875, "learning_rate": 5.438352351795157e-06, "loss": 0.8624, "step": 3016 }, { "epoch": 0.75557225144002, "grad_norm": 0.40625, "learning_rate": 5.432785972724744e-06, "loss": 0.9614, "step": 3017 }, { "epoch": 0.7558226897069872, "grad_norm": 0.435546875, "learning_rate": 5.4272195936543284e-06, "loss": 0.9508, "step": 3018 }, { "epoch": 0.7560731279739544, "grad_norm": 0.3515625, "learning_rate": 5.421653214583914e-06, "loss": 0.9999, "step": 3019 }, { "epoch": 0.7563235662409216, "grad_norm": 0.359375, "learning_rate": 5.416086835513499e-06, "loss": 0.8208, "step": 3020 }, { "epoch": 0.7565740045078888, "grad_norm": 0.34765625, "learning_rate": 5.410520456443084e-06, "loss": 0.7984, "step": 3021 }, { "epoch": 0.7568244427748559, "grad_norm": 0.404296875, "learning_rate": 5.404954077372669e-06, "loss": 0.7869, "step": 3022 }, { "epoch": 0.7570748810418232, "grad_norm": 0.369140625, "learning_rate": 5.399387698302256e-06, "loss": 0.8086, "step": 3023 }, { "epoch": 0.7573253193087904, "grad_norm": 0.328125, "learning_rate": 5.39382131923184e-06, "loss": 0.8312, "step": 3024 }, { "epoch": 0.7575757575757576, "grad_norm": 0.3984375, "learning_rate": 5.388254940161426e-06, "loss": 0.9219, "step": 3025 }, { "epoch": 0.7578261958427248, "grad_norm": 0.376953125, "learning_rate": 5.382688561091011e-06, "loss": 0.7537, "step": 3026 }, { "epoch": 0.758076634109692, "grad_norm": 0.43359375, "learning_rate": 5.377122182020596e-06, "loss": 0.9406, "step": 3027 }, { "epoch": 0.7583270723766592, "grad_norm": 0.345703125, "learning_rate": 5.371555802950181e-06, "loss": 0.7574, "step": 3028 }, { "epoch": 0.7585775106436263, "grad_norm": 0.404296875, "learning_rate": 5.365989423879767e-06, "loss": 1.038, "step": 3029 }, { "epoch": 0.7588279489105936, "grad_norm": 0.369140625, "learning_rate": 5.3604230448093515e-06, "loss": 0.8461, "step": 3030 }, { "epoch": 0.7590783871775607, "grad_norm": 0.427734375, "learning_rate": 5.354856665738938e-06, "loss": 0.7644, "step": 3031 }, { "epoch": 0.759328825444528, "grad_norm": 0.35546875, "learning_rate": 5.349290286668523e-06, "loss": 0.8296, "step": 3032 }, { "epoch": 0.7595792637114951, "grad_norm": 0.40625, "learning_rate": 5.3437239075981075e-06, "loss": 0.9905, "step": 3033 }, { "epoch": 0.7598297019784623, "grad_norm": 0.33984375, "learning_rate": 5.338157528527693e-06, "loss": 0.9112, "step": 3034 }, { "epoch": 0.7600801402454295, "grad_norm": 0.404296875, "learning_rate": 5.332591149457278e-06, "loss": 0.8899, "step": 3035 }, { "epoch": 0.7603305785123967, "grad_norm": 0.369140625, "learning_rate": 5.3270247703868635e-06, "loss": 1.0252, "step": 3036 }, { "epoch": 0.7605810167793638, "grad_norm": 0.375, "learning_rate": 5.321458391316448e-06, "loss": 0.9248, "step": 3037 }, { "epoch": 0.7608314550463311, "grad_norm": 0.375, "learning_rate": 5.315892012246035e-06, "loss": 0.8194, "step": 3038 }, { "epoch": 0.7610818933132982, "grad_norm": 0.39453125, "learning_rate": 5.3103256331756195e-06, "loss": 1.1127, "step": 3039 }, { "epoch": 0.7613323315802655, "grad_norm": 0.375, "learning_rate": 5.304759254105205e-06, "loss": 0.9079, "step": 3040 }, { "epoch": 0.7615827698472326, "grad_norm": 0.39453125, "learning_rate": 5.29919287503479e-06, "loss": 0.8569, "step": 3041 }, { "epoch": 0.7618332081141999, "grad_norm": 0.37890625, "learning_rate": 5.2936264959643755e-06, "loss": 0.9737, "step": 3042 }, { "epoch": 0.762083646381167, "grad_norm": 0.369140625, "learning_rate": 5.28806011689396e-06, "loss": 0.8756, "step": 3043 }, { "epoch": 0.7623340846481342, "grad_norm": 0.359375, "learning_rate": 5.282493737823547e-06, "loss": 0.9028, "step": 3044 }, { "epoch": 0.7625845229151015, "grad_norm": 0.36328125, "learning_rate": 5.2769273587531315e-06, "loss": 1.092, "step": 3045 }, { "epoch": 0.7628349611820686, "grad_norm": 0.396484375, "learning_rate": 5.271360979682717e-06, "loss": 1.0488, "step": 3046 }, { "epoch": 0.7630853994490359, "grad_norm": 0.39453125, "learning_rate": 5.265794600612302e-06, "loss": 0.9557, "step": 3047 }, { "epoch": 0.763335837716003, "grad_norm": 0.388671875, "learning_rate": 5.2602282215418875e-06, "loss": 0.9136, "step": 3048 }, { "epoch": 0.7635862759829702, "grad_norm": 0.3828125, "learning_rate": 5.254661842471472e-06, "loss": 0.9767, "step": 3049 }, { "epoch": 0.7638367142499374, "grad_norm": 0.4140625, "learning_rate": 5.249095463401058e-06, "loss": 1.0298, "step": 3050 }, { "epoch": 0.7640871525169046, "grad_norm": 0.39453125, "learning_rate": 5.243529084330643e-06, "loss": 1.123, "step": 3051 }, { "epoch": 0.7643375907838718, "grad_norm": 0.37890625, "learning_rate": 5.237962705260229e-06, "loss": 0.7218, "step": 3052 }, { "epoch": 0.764588029050839, "grad_norm": 0.375, "learning_rate": 5.232396326189814e-06, "loss": 0.9711, "step": 3053 }, { "epoch": 0.7648384673178061, "grad_norm": 0.400390625, "learning_rate": 5.2268299471193994e-06, "loss": 1.0244, "step": 3054 }, { "epoch": 0.7650889055847734, "grad_norm": 0.345703125, "learning_rate": 5.221263568048984e-06, "loss": 0.953, "step": 3055 }, { "epoch": 0.7653393438517405, "grad_norm": 0.375, "learning_rate": 5.21569718897857e-06, "loss": 0.868, "step": 3056 }, { "epoch": 0.7655897821187078, "grad_norm": 0.431640625, "learning_rate": 5.210130809908155e-06, "loss": 0.8782, "step": 3057 }, { "epoch": 0.7658402203856749, "grad_norm": 0.388671875, "learning_rate": 5.204564430837741e-06, "loss": 0.8075, "step": 3058 }, { "epoch": 0.7660906586526421, "grad_norm": 0.361328125, "learning_rate": 5.198998051767326e-06, "loss": 0.8932, "step": 3059 }, { "epoch": 0.7663410969196093, "grad_norm": 0.390625, "learning_rate": 5.193431672696911e-06, "loss": 0.9441, "step": 3060 }, { "epoch": 0.7665915351865765, "grad_norm": 0.38671875, "learning_rate": 5.187865293626496e-06, "loss": 0.8879, "step": 3061 }, { "epoch": 0.7668419734535437, "grad_norm": 0.375, "learning_rate": 5.182298914556082e-06, "loss": 0.9682, "step": 3062 }, { "epoch": 0.7670924117205109, "grad_norm": 0.3515625, "learning_rate": 5.1767325354856666e-06, "loss": 1.1116, "step": 3063 }, { "epoch": 0.767342849987478, "grad_norm": 0.388671875, "learning_rate": 5.171166156415253e-06, "loss": 0.9237, "step": 3064 }, { "epoch": 0.7675932882544453, "grad_norm": 0.390625, "learning_rate": 5.165599777344838e-06, "loss": 1.075, "step": 3065 }, { "epoch": 0.7678437265214124, "grad_norm": 0.45703125, "learning_rate": 5.160033398274423e-06, "loss": 0.8286, "step": 3066 }, { "epoch": 0.7680941647883797, "grad_norm": 0.376953125, "learning_rate": 5.154467019204008e-06, "loss": 0.9604, "step": 3067 }, { "epoch": 0.7683446030553469, "grad_norm": 0.333984375, "learning_rate": 5.148900640133594e-06, "loss": 1.0125, "step": 3068 }, { "epoch": 0.768595041322314, "grad_norm": 0.3359375, "learning_rate": 5.1433342610631785e-06, "loss": 0.943, "step": 3069 }, { "epoch": 0.7688454795892813, "grad_norm": 0.337890625, "learning_rate": 5.137767881992764e-06, "loss": 0.9245, "step": 3070 }, { "epoch": 0.7690959178562484, "grad_norm": 0.392578125, "learning_rate": 5.132201502922349e-06, "loss": 0.9006, "step": 3071 }, { "epoch": 0.7693463561232157, "grad_norm": 0.38671875, "learning_rate": 5.126635123851935e-06, "loss": 0.9022, "step": 3072 }, { "epoch": 0.7695967943901828, "grad_norm": 0.3984375, "learning_rate": 5.12106874478152e-06, "loss": 0.9915, "step": 3073 }, { "epoch": 0.76984723265715, "grad_norm": 0.369140625, "learning_rate": 5.115502365711106e-06, "loss": 0.8796, "step": 3074 }, { "epoch": 0.7700976709241172, "grad_norm": 0.400390625, "learning_rate": 5.1099359866406905e-06, "loss": 0.826, "step": 3075 }, { "epoch": 0.7703481091910844, "grad_norm": 0.373046875, "learning_rate": 5.104369607570276e-06, "loss": 0.9669, "step": 3076 }, { "epoch": 0.7705985474580516, "grad_norm": 0.365234375, "learning_rate": 5.098803228499861e-06, "loss": 0.7445, "step": 3077 }, { "epoch": 0.7708489857250188, "grad_norm": 0.357421875, "learning_rate": 5.093236849429447e-06, "loss": 0.9131, "step": 3078 }, { "epoch": 0.7710994239919859, "grad_norm": 0.365234375, "learning_rate": 5.087670470359032e-06, "loss": 0.9291, "step": 3079 }, { "epoch": 0.7713498622589532, "grad_norm": 0.369140625, "learning_rate": 5.082104091288618e-06, "loss": 0.9775, "step": 3080 }, { "epoch": 0.7716003005259203, "grad_norm": 0.365234375, "learning_rate": 5.0765377122182025e-06, "loss": 1.0064, "step": 3081 }, { "epoch": 0.7718507387928876, "grad_norm": 0.353515625, "learning_rate": 5.070971333147788e-06, "loss": 0.8227, "step": 3082 }, { "epoch": 0.7721011770598547, "grad_norm": 0.37890625, "learning_rate": 5.065404954077373e-06, "loss": 0.8632, "step": 3083 }, { "epoch": 0.772351615326822, "grad_norm": 0.3828125, "learning_rate": 5.059838575006958e-06, "loss": 0.9023, "step": 3084 }, { "epoch": 0.7726020535937891, "grad_norm": 0.369140625, "learning_rate": 5.054272195936544e-06, "loss": 0.9433, "step": 3085 }, { "epoch": 0.7728524918607563, "grad_norm": 0.3828125, "learning_rate": 5.048705816866129e-06, "loss": 1.0254, "step": 3086 }, { "epoch": 0.7731029301277235, "grad_norm": 0.37109375, "learning_rate": 5.0431394377957145e-06, "loss": 0.7525, "step": 3087 }, { "epoch": 0.7733533683946907, "grad_norm": 0.39453125, "learning_rate": 5.037573058725299e-06, "loss": 0.9539, "step": 3088 }, { "epoch": 0.773603806661658, "grad_norm": 0.3828125, "learning_rate": 5.032006679654885e-06, "loss": 0.9415, "step": 3089 }, { "epoch": 0.7738542449286251, "grad_norm": 0.390625, "learning_rate": 5.02644030058447e-06, "loss": 0.96, "step": 3090 }, { "epoch": 0.7741046831955923, "grad_norm": 0.36328125, "learning_rate": 5.020873921514055e-06, "loss": 0.9591, "step": 3091 }, { "epoch": 0.7743551214625595, "grad_norm": 0.35546875, "learning_rate": 5.01530754244364e-06, "loss": 0.9321, "step": 3092 }, { "epoch": 0.7746055597295267, "grad_norm": 0.345703125, "learning_rate": 5.0097411633732264e-06, "loss": 0.8776, "step": 3093 }, { "epoch": 0.7748559979964939, "grad_norm": 0.349609375, "learning_rate": 5.004174784302811e-06, "loss": 0.9699, "step": 3094 }, { "epoch": 0.7751064362634611, "grad_norm": 0.388671875, "learning_rate": 4.998608405232397e-06, "loss": 0.9587, "step": 3095 }, { "epoch": 0.7753568745304282, "grad_norm": 0.361328125, "learning_rate": 4.9930420261619824e-06, "loss": 0.804, "step": 3096 }, { "epoch": 0.7756073127973955, "grad_norm": 0.35546875, "learning_rate": 4.987475647091567e-06, "loss": 0.9202, "step": 3097 }, { "epoch": 0.7758577510643626, "grad_norm": 0.349609375, "learning_rate": 4.981909268021153e-06, "loss": 0.9297, "step": 3098 }, { "epoch": 0.7761081893313299, "grad_norm": 0.353515625, "learning_rate": 4.976342888950738e-06, "loss": 0.9717, "step": 3099 }, { "epoch": 0.776358627598297, "grad_norm": 0.359375, "learning_rate": 4.970776509880323e-06, "loss": 0.7925, "step": 3100 }, { "epoch": 0.7766090658652642, "grad_norm": 0.36328125, "learning_rate": 4.965210130809909e-06, "loss": 1.0062, "step": 3101 }, { "epoch": 0.7768595041322314, "grad_norm": 0.37890625, "learning_rate": 4.959643751739494e-06, "loss": 0.9941, "step": 3102 }, { "epoch": 0.7771099423991986, "grad_norm": 0.353515625, "learning_rate": 4.954077372669079e-06, "loss": 0.961, "step": 3103 }, { "epoch": 0.7773603806661658, "grad_norm": 0.361328125, "learning_rate": 4.948510993598665e-06, "loss": 0.9252, "step": 3104 }, { "epoch": 0.777610818933133, "grad_norm": 0.35546875, "learning_rate": 4.94294461452825e-06, "loss": 0.9969, "step": 3105 }, { "epoch": 0.7778612572001001, "grad_norm": 0.359375, "learning_rate": 4.937378235457835e-06, "loss": 0.897, "step": 3106 }, { "epoch": 0.7781116954670674, "grad_norm": 0.345703125, "learning_rate": 4.931811856387421e-06, "loss": 0.9958, "step": 3107 }, { "epoch": 0.7783621337340345, "grad_norm": 0.3828125, "learning_rate": 4.9262454773170055e-06, "loss": 0.855, "step": 3108 }, { "epoch": 0.7786125720010018, "grad_norm": 0.3828125, "learning_rate": 4.920679098246591e-06, "loss": 0.7568, "step": 3109 }, { "epoch": 0.7788630102679689, "grad_norm": 0.36328125, "learning_rate": 4.915112719176176e-06, "loss": 0.8311, "step": 3110 }, { "epoch": 0.7791134485349361, "grad_norm": 0.35546875, "learning_rate": 4.9095463401057615e-06, "loss": 0.9239, "step": 3111 }, { "epoch": 0.7793638868019034, "grad_norm": 0.388671875, "learning_rate": 4.903979961035346e-06, "loss": 0.9346, "step": 3112 }, { "epoch": 0.7796143250688705, "grad_norm": 0.37109375, "learning_rate": 4.898413581964932e-06, "loss": 1.0148, "step": 3113 }, { "epoch": 0.7798647633358378, "grad_norm": 0.5234375, "learning_rate": 4.8928472028945175e-06, "loss": 0.9585, "step": 3114 }, { "epoch": 0.7801152016028049, "grad_norm": 0.392578125, "learning_rate": 4.887280823824102e-06, "loss": 0.869, "step": 3115 }, { "epoch": 0.7803656398697721, "grad_norm": 0.416015625, "learning_rate": 4.881714444753688e-06, "loss": 1.0159, "step": 3116 }, { "epoch": 0.7806160781367393, "grad_norm": 0.3828125, "learning_rate": 4.8761480656832735e-06, "loss": 0.9437, "step": 3117 }, { "epoch": 0.7808665164037065, "grad_norm": 0.375, "learning_rate": 4.870581686612858e-06, "loss": 1.0001, "step": 3118 }, { "epoch": 0.7811169546706737, "grad_norm": 0.365234375, "learning_rate": 4.865015307542444e-06, "loss": 0.8845, "step": 3119 }, { "epoch": 0.7813673929376409, "grad_norm": 0.37109375, "learning_rate": 4.8594489284720295e-06, "loss": 0.9217, "step": 3120 }, { "epoch": 0.781617831204608, "grad_norm": 0.373046875, "learning_rate": 4.853882549401614e-06, "loss": 0.9979, "step": 3121 }, { "epoch": 0.7818682694715753, "grad_norm": 0.380859375, "learning_rate": 4.8483161703312e-06, "loss": 0.9456, "step": 3122 }, { "epoch": 0.7821187077385424, "grad_norm": 0.365234375, "learning_rate": 4.8427497912607855e-06, "loss": 0.8291, "step": 3123 }, { "epoch": 0.7823691460055097, "grad_norm": 0.3671875, "learning_rate": 4.83718341219037e-06, "loss": 1.0128, "step": 3124 }, { "epoch": 0.7826195842724768, "grad_norm": 0.388671875, "learning_rate": 4.831617033119956e-06, "loss": 0.9936, "step": 3125 }, { "epoch": 0.782870022539444, "grad_norm": 0.37890625, "learning_rate": 4.8260506540495415e-06, "loss": 0.9388, "step": 3126 }, { "epoch": 0.7831204608064112, "grad_norm": 0.4140625, "learning_rate": 4.820484274979126e-06, "loss": 0.974, "step": 3127 }, { "epoch": 0.7833708990733784, "grad_norm": 0.37890625, "learning_rate": 4.814917895908712e-06, "loss": 1.0194, "step": 3128 }, { "epoch": 0.7836213373403456, "grad_norm": 0.361328125, "learning_rate": 4.8093515168382974e-06, "loss": 0.9894, "step": 3129 }, { "epoch": 0.7838717756073128, "grad_norm": 0.34765625, "learning_rate": 4.803785137767882e-06, "loss": 0.8131, "step": 3130 }, { "epoch": 0.7841222138742799, "grad_norm": 0.375, "learning_rate": 4.798218758697468e-06, "loss": 0.8935, "step": 3131 }, { "epoch": 0.7843726521412472, "grad_norm": 0.333984375, "learning_rate": 4.792652379627053e-06, "loss": 1.0811, "step": 3132 }, { "epoch": 0.7846230904082144, "grad_norm": 0.357421875, "learning_rate": 4.787086000556638e-06, "loss": 1.1045, "step": 3133 }, { "epoch": 0.7848735286751816, "grad_norm": 0.357421875, "learning_rate": 4.781519621486224e-06, "loss": 0.9452, "step": 3134 }, { "epoch": 0.7851239669421488, "grad_norm": 0.375, "learning_rate": 4.7759532424158086e-06, "loss": 0.7603, "step": 3135 }, { "epoch": 0.7853744052091159, "grad_norm": 0.375, "learning_rate": 4.770386863345394e-06, "loss": 0.9785, "step": 3136 }, { "epoch": 0.7856248434760832, "grad_norm": 0.361328125, "learning_rate": 4.76482048427498e-06, "loss": 0.7341, "step": 3137 }, { "epoch": 0.7858752817430503, "grad_norm": 0.419921875, "learning_rate": 4.7592541052045646e-06, "loss": 1.0327, "step": 3138 }, { "epoch": 0.7861257200100176, "grad_norm": 0.353515625, "learning_rate": 4.75368772613415e-06, "loss": 1.0495, "step": 3139 }, { "epoch": 0.7863761582769847, "grad_norm": 0.37890625, "learning_rate": 4.748121347063736e-06, "loss": 1.0208, "step": 3140 }, { "epoch": 0.786626596543952, "grad_norm": 0.333984375, "learning_rate": 4.7425549679933206e-06, "loss": 0.8319, "step": 3141 }, { "epoch": 0.7868770348109191, "grad_norm": 0.423828125, "learning_rate": 4.736988588922906e-06, "loss": 1.1015, "step": 3142 }, { "epoch": 0.7871274730778863, "grad_norm": 0.384765625, "learning_rate": 4.731422209852492e-06, "loss": 0.9865, "step": 3143 }, { "epoch": 0.7873779113448535, "grad_norm": 0.384765625, "learning_rate": 4.7258558307820765e-06, "loss": 0.9189, "step": 3144 }, { "epoch": 0.7876283496118207, "grad_norm": 0.3828125, "learning_rate": 4.720289451711662e-06, "loss": 0.9535, "step": 3145 }, { "epoch": 0.7878787878787878, "grad_norm": 0.38671875, "learning_rate": 4.714723072641248e-06, "loss": 0.9586, "step": 3146 }, { "epoch": 0.7881292261457551, "grad_norm": 0.37109375, "learning_rate": 4.7091566935708325e-06, "loss": 0.7718, "step": 3147 }, { "epoch": 0.7883796644127222, "grad_norm": 0.33984375, "learning_rate": 4.703590314500418e-06, "loss": 0.8714, "step": 3148 }, { "epoch": 0.7886301026796895, "grad_norm": 0.369140625, "learning_rate": 4.698023935430004e-06, "loss": 0.8679, "step": 3149 }, { "epoch": 0.7888805409466566, "grad_norm": 0.388671875, "learning_rate": 4.6924575563595885e-06, "loss": 0.9891, "step": 3150 }, { "epoch": 0.7891309792136239, "grad_norm": 0.314453125, "learning_rate": 4.686891177289174e-06, "loss": 0.809, "step": 3151 }, { "epoch": 0.789381417480591, "grad_norm": 0.373046875, "learning_rate": 4.681324798218759e-06, "loss": 1.0026, "step": 3152 }, { "epoch": 0.7896318557475582, "grad_norm": 0.341796875, "learning_rate": 4.6757584191483445e-06, "loss": 0.9184, "step": 3153 }, { "epoch": 0.7898822940145254, "grad_norm": 0.390625, "learning_rate": 4.67019204007793e-06, "loss": 0.9935, "step": 3154 }, { "epoch": 0.7901327322814926, "grad_norm": 0.423828125, "learning_rate": 4.664625661007515e-06, "loss": 0.9647, "step": 3155 }, { "epoch": 0.7903831705484599, "grad_norm": 0.40625, "learning_rate": 4.6590592819371005e-06, "loss": 0.8784, "step": 3156 }, { "epoch": 0.790633608815427, "grad_norm": 0.34765625, "learning_rate": 4.653492902866686e-06, "loss": 0.8858, "step": 3157 }, { "epoch": 0.7908840470823942, "grad_norm": 0.359375, "learning_rate": 4.647926523796271e-06, "loss": 0.9814, "step": 3158 }, { "epoch": 0.7911344853493614, "grad_norm": 0.30078125, "learning_rate": 4.642360144725856e-06, "loss": 0.8692, "step": 3159 }, { "epoch": 0.7913849236163286, "grad_norm": 0.349609375, "learning_rate": 4.636793765655441e-06, "loss": 0.8383, "step": 3160 }, { "epoch": 0.7916353618832958, "grad_norm": 0.36328125, "learning_rate": 4.631227386585027e-06, "loss": 0.9354, "step": 3161 }, { "epoch": 0.791885800150263, "grad_norm": 0.373046875, "learning_rate": 4.625661007514612e-06, "loss": 0.9785, "step": 3162 }, { "epoch": 0.7921362384172301, "grad_norm": 0.37109375, "learning_rate": 4.620094628444197e-06, "loss": 0.8837, "step": 3163 }, { "epoch": 0.7923866766841974, "grad_norm": 0.3984375, "learning_rate": 4.614528249373783e-06, "loss": 1.0918, "step": 3164 }, { "epoch": 0.7926371149511645, "grad_norm": 0.40625, "learning_rate": 4.608961870303368e-06, "loss": 0.9347, "step": 3165 }, { "epoch": 0.7928875532181318, "grad_norm": 0.35546875, "learning_rate": 4.603395491232953e-06, "loss": 0.9221, "step": 3166 }, { "epoch": 0.7931379914850989, "grad_norm": 0.345703125, "learning_rate": 4.597829112162539e-06, "loss": 0.8469, "step": 3167 }, { "epoch": 0.7933884297520661, "grad_norm": 0.388671875, "learning_rate": 4.592262733092124e-06, "loss": 0.9276, "step": 3168 }, { "epoch": 0.7936388680190333, "grad_norm": 0.39453125, "learning_rate": 4.586696354021709e-06, "loss": 0.8969, "step": 3169 }, { "epoch": 0.7938893062860005, "grad_norm": 0.369140625, "learning_rate": 4.581129974951295e-06, "loss": 0.9654, "step": 3170 }, { "epoch": 0.7941397445529677, "grad_norm": 0.3125, "learning_rate": 4.57556359588088e-06, "loss": 0.8294, "step": 3171 }, { "epoch": 0.7943901828199349, "grad_norm": 0.3359375, "learning_rate": 4.569997216810465e-06, "loss": 0.9314, "step": 3172 }, { "epoch": 0.794640621086902, "grad_norm": 0.384765625, "learning_rate": 4.56443083774005e-06, "loss": 0.784, "step": 3173 }, { "epoch": 0.7948910593538693, "grad_norm": 0.373046875, "learning_rate": 4.5588644586696356e-06, "loss": 0.9619, "step": 3174 }, { "epoch": 0.7951414976208364, "grad_norm": 0.396484375, "learning_rate": 4.553298079599221e-06, "loss": 0.8727, "step": 3175 }, { "epoch": 0.7953919358878037, "grad_norm": 0.4140625, "learning_rate": 4.547731700528806e-06, "loss": 0.9455, "step": 3176 }, { "epoch": 0.7956423741547709, "grad_norm": 0.380859375, "learning_rate": 4.5421653214583916e-06, "loss": 0.8785, "step": 3177 }, { "epoch": 0.795892812421738, "grad_norm": 0.359375, "learning_rate": 4.536598942387977e-06, "loss": 0.7712, "step": 3178 }, { "epoch": 0.7961432506887053, "grad_norm": 0.361328125, "learning_rate": 4.531032563317562e-06, "loss": 0.8972, "step": 3179 }, { "epoch": 0.7963936889556724, "grad_norm": 0.3359375, "learning_rate": 4.5254661842471475e-06, "loss": 0.8891, "step": 3180 }, { "epoch": 0.7966441272226397, "grad_norm": 0.390625, "learning_rate": 4.519899805176733e-06, "loss": 0.7346, "step": 3181 }, { "epoch": 0.7968945654896068, "grad_norm": 0.36328125, "learning_rate": 4.514333426106318e-06, "loss": 0.9586, "step": 3182 }, { "epoch": 0.797145003756574, "grad_norm": 0.3984375, "learning_rate": 4.5087670470359035e-06, "loss": 0.8945, "step": 3183 }, { "epoch": 0.7973954420235412, "grad_norm": 0.345703125, "learning_rate": 4.503200667965489e-06, "loss": 0.9902, "step": 3184 }, { "epoch": 0.7976458802905084, "grad_norm": 0.35546875, "learning_rate": 4.497634288895074e-06, "loss": 0.9817, "step": 3185 }, { "epoch": 0.7978963185574756, "grad_norm": 0.337890625, "learning_rate": 4.4920679098246595e-06, "loss": 0.8228, "step": 3186 }, { "epoch": 0.7981467568244428, "grad_norm": 0.39453125, "learning_rate": 4.486501530754245e-06, "loss": 0.9176, "step": 3187 }, { "epoch": 0.7983971950914099, "grad_norm": 0.373046875, "learning_rate": 4.48093515168383e-06, "loss": 0.9449, "step": 3188 }, { "epoch": 0.7986476333583772, "grad_norm": 0.337890625, "learning_rate": 4.4753687726134155e-06, "loss": 0.8375, "step": 3189 }, { "epoch": 0.7988980716253443, "grad_norm": 0.322265625, "learning_rate": 4.469802393543001e-06, "loss": 0.9164, "step": 3190 }, { "epoch": 0.7991485098923116, "grad_norm": 0.375, "learning_rate": 4.464236014472586e-06, "loss": 1.0282, "step": 3191 }, { "epoch": 0.7993989481592787, "grad_norm": 0.4375, "learning_rate": 4.4586696354021715e-06, "loss": 0.8887, "step": 3192 }, { "epoch": 0.799649386426246, "grad_norm": 0.384765625, "learning_rate": 4.453103256331756e-06, "loss": 1.0545, "step": 3193 }, { "epoch": 0.7998998246932131, "grad_norm": 0.359375, "learning_rate": 4.447536877261342e-06, "loss": 0.7649, "step": 3194 }, { "epoch": 0.8001502629601803, "grad_norm": 0.408203125, "learning_rate": 4.4419704981909275e-06, "loss": 0.832, "step": 3195 }, { "epoch": 0.8004007012271475, "grad_norm": 0.34765625, "learning_rate": 4.436404119120512e-06, "loss": 1.0547, "step": 3196 }, { "epoch": 0.8006511394941147, "grad_norm": 0.349609375, "learning_rate": 4.430837740050098e-06, "loss": 1.0033, "step": 3197 }, { "epoch": 0.8009015777610818, "grad_norm": 0.3828125, "learning_rate": 4.4252713609796835e-06, "loss": 1.1599, "step": 3198 }, { "epoch": 0.8011520160280491, "grad_norm": 0.3984375, "learning_rate": 4.419704981909268e-06, "loss": 0.8439, "step": 3199 }, { "epoch": 0.8014024542950163, "grad_norm": 0.35546875, "learning_rate": 4.414138602838854e-06, "loss": 1.0042, "step": 3200 }, { "epoch": 0.8016528925619835, "grad_norm": 0.37109375, "learning_rate": 4.4085722237684395e-06, "loss": 0.8895, "step": 3201 }, { "epoch": 0.8019033308289507, "grad_norm": 0.373046875, "learning_rate": 4.403005844698024e-06, "loss": 0.9915, "step": 3202 }, { "epoch": 0.8021537690959178, "grad_norm": 0.37890625, "learning_rate": 4.39743946562761e-06, "loss": 0.9546, "step": 3203 }, { "epoch": 0.8024042073628851, "grad_norm": 0.404296875, "learning_rate": 4.3918730865571955e-06, "loss": 0.8465, "step": 3204 }, { "epoch": 0.8026546456298522, "grad_norm": 0.380859375, "learning_rate": 4.38630670748678e-06, "loss": 0.8933, "step": 3205 }, { "epoch": 0.8029050838968195, "grad_norm": 0.40625, "learning_rate": 4.380740328416366e-06, "loss": 0.9651, "step": 3206 }, { "epoch": 0.8031555221637866, "grad_norm": 0.392578125, "learning_rate": 4.3751739493459514e-06, "loss": 0.899, "step": 3207 }, { "epoch": 0.8034059604307539, "grad_norm": 0.4140625, "learning_rate": 4.369607570275536e-06, "loss": 0.9389, "step": 3208 }, { "epoch": 0.803656398697721, "grad_norm": 0.3515625, "learning_rate": 4.364041191205121e-06, "loss": 0.8534, "step": 3209 }, { "epoch": 0.8039068369646882, "grad_norm": 0.34765625, "learning_rate": 4.358474812134707e-06, "loss": 0.9216, "step": 3210 }, { "epoch": 0.8041572752316554, "grad_norm": 0.380859375, "learning_rate": 4.352908433064292e-06, "loss": 1.0775, "step": 3211 }, { "epoch": 0.8044077134986226, "grad_norm": 0.390625, "learning_rate": 4.347342053993877e-06, "loss": 0.8527, "step": 3212 }, { "epoch": 0.8046581517655897, "grad_norm": 0.38671875, "learning_rate": 4.3417756749234626e-06, "loss": 0.9113, "step": 3213 }, { "epoch": 0.804908590032557, "grad_norm": 0.41015625, "learning_rate": 4.336209295853047e-06, "loss": 0.9213, "step": 3214 }, { "epoch": 0.8051590282995241, "grad_norm": 0.4375, "learning_rate": 4.330642916782633e-06, "loss": 0.7602, "step": 3215 }, { "epoch": 0.8054094665664914, "grad_norm": 0.375, "learning_rate": 4.3250765377122186e-06, "loss": 0.8689, "step": 3216 }, { "epoch": 0.8056599048334585, "grad_norm": 0.384765625, "learning_rate": 4.319510158641803e-06, "loss": 0.8849, "step": 3217 }, { "epoch": 0.8059103431004258, "grad_norm": 0.392578125, "learning_rate": 4.313943779571389e-06, "loss": 0.806, "step": 3218 }, { "epoch": 0.8061607813673929, "grad_norm": 0.423828125, "learning_rate": 4.3083774005009745e-06, "loss": 1.103, "step": 3219 }, { "epoch": 0.8064112196343601, "grad_norm": 0.41015625, "learning_rate": 4.302811021430559e-06, "loss": 0.8569, "step": 3220 }, { "epoch": 0.8066616579013274, "grad_norm": 0.396484375, "learning_rate": 4.297244642360145e-06, "loss": 0.8339, "step": 3221 }, { "epoch": 0.8069120961682945, "grad_norm": 0.357421875, "learning_rate": 4.2916782632897305e-06, "loss": 0.9118, "step": 3222 }, { "epoch": 0.8071625344352618, "grad_norm": 0.349609375, "learning_rate": 4.286111884219315e-06, "loss": 0.8706, "step": 3223 }, { "epoch": 0.8074129727022289, "grad_norm": 0.388671875, "learning_rate": 4.280545505148901e-06, "loss": 0.8622, "step": 3224 }, { "epoch": 0.8076634109691961, "grad_norm": 0.384765625, "learning_rate": 4.2749791260784865e-06, "loss": 0.8697, "step": 3225 }, { "epoch": 0.8079138492361633, "grad_norm": 0.392578125, "learning_rate": 4.269412747008071e-06, "loss": 1.0578, "step": 3226 }, { "epoch": 0.8081642875031305, "grad_norm": 0.390625, "learning_rate": 4.263846367937657e-06, "loss": 0.8855, "step": 3227 }, { "epoch": 0.8084147257700977, "grad_norm": 0.38671875, "learning_rate": 4.2582799888672425e-06, "loss": 0.9374, "step": 3228 }, { "epoch": 0.8086651640370649, "grad_norm": 0.353515625, "learning_rate": 4.252713609796827e-06, "loss": 1.1912, "step": 3229 }, { "epoch": 0.808915602304032, "grad_norm": 0.39453125, "learning_rate": 4.247147230726413e-06, "loss": 0.9501, "step": 3230 }, { "epoch": 0.8091660405709993, "grad_norm": 0.353515625, "learning_rate": 4.2415808516559985e-06, "loss": 0.8687, "step": 3231 }, { "epoch": 0.8094164788379664, "grad_norm": 0.34375, "learning_rate": 4.236014472585583e-06, "loss": 0.9423, "step": 3232 }, { "epoch": 0.8096669171049337, "grad_norm": 0.365234375, "learning_rate": 4.230448093515169e-06, "loss": 0.788, "step": 3233 }, { "epoch": 0.8099173553719008, "grad_norm": 0.357421875, "learning_rate": 4.224881714444754e-06, "loss": 0.8919, "step": 3234 }, { "epoch": 0.810167793638868, "grad_norm": 0.400390625, "learning_rate": 4.219315335374339e-06, "loss": 0.7979, "step": 3235 }, { "epoch": 0.8104182319058352, "grad_norm": 0.359375, "learning_rate": 4.213748956303925e-06, "loss": 0.9089, "step": 3236 }, { "epoch": 0.8106686701728024, "grad_norm": 0.37109375, "learning_rate": 4.20818257723351e-06, "loss": 0.8262, "step": 3237 }, { "epoch": 0.8109191084397696, "grad_norm": 0.37109375, "learning_rate": 4.202616198163095e-06, "loss": 0.9139, "step": 3238 }, { "epoch": 0.8111695467067368, "grad_norm": 0.388671875, "learning_rate": 4.197049819092681e-06, "loss": 0.9031, "step": 3239 }, { "epoch": 0.8114199849737039, "grad_norm": 0.359375, "learning_rate": 4.191483440022266e-06, "loss": 0.8548, "step": 3240 }, { "epoch": 0.8116704232406712, "grad_norm": 0.349609375, "learning_rate": 4.185917060951851e-06, "loss": 0.8764, "step": 3241 }, { "epoch": 0.8119208615076383, "grad_norm": 0.392578125, "learning_rate": 4.180350681881437e-06, "loss": 1.0376, "step": 3242 }, { "epoch": 0.8121712997746056, "grad_norm": 0.369140625, "learning_rate": 4.174784302811022e-06, "loss": 1.0292, "step": 3243 }, { "epoch": 0.8124217380415728, "grad_norm": 0.314453125, "learning_rate": 4.169217923740607e-06, "loss": 0.8098, "step": 3244 }, { "epoch": 0.8126721763085399, "grad_norm": 0.33984375, "learning_rate": 4.163651544670193e-06, "loss": 1.0431, "step": 3245 }, { "epoch": 0.8129226145755072, "grad_norm": 0.341796875, "learning_rate": 4.158085165599778e-06, "loss": 0.9979, "step": 3246 }, { "epoch": 0.8131730528424743, "grad_norm": 0.400390625, "learning_rate": 4.152518786529363e-06, "loss": 0.9415, "step": 3247 }, { "epoch": 0.8134234911094416, "grad_norm": 0.30859375, "learning_rate": 4.146952407458949e-06, "loss": 0.9213, "step": 3248 }, { "epoch": 0.8136739293764087, "grad_norm": 0.384765625, "learning_rate": 4.141386028388534e-06, "loss": 0.9914, "step": 3249 }, { "epoch": 0.813924367643376, "grad_norm": 0.3828125, "learning_rate": 4.135819649318119e-06, "loss": 0.8159, "step": 3250 }, { "epoch": 0.8141748059103431, "grad_norm": 0.341796875, "learning_rate": 4.130253270247705e-06, "loss": 1.0162, "step": 3251 }, { "epoch": 0.8144252441773103, "grad_norm": 0.375, "learning_rate": 4.1246868911772896e-06, "loss": 0.9781, "step": 3252 }, { "epoch": 0.8146756824442775, "grad_norm": 0.375, "learning_rate": 4.119120512106875e-06, "loss": 0.9682, "step": 3253 }, { "epoch": 0.8149261207112447, "grad_norm": 0.400390625, "learning_rate": 4.11355413303646e-06, "loss": 1.0108, "step": 3254 }, { "epoch": 0.8151765589782118, "grad_norm": 0.37890625, "learning_rate": 4.1079877539660456e-06, "loss": 0.9632, "step": 3255 }, { "epoch": 0.8154269972451791, "grad_norm": 0.39453125, "learning_rate": 4.102421374895631e-06, "loss": 1.2284, "step": 3256 }, { "epoch": 0.8156774355121462, "grad_norm": 0.345703125, "learning_rate": 4.096854995825216e-06, "loss": 1.0738, "step": 3257 }, { "epoch": 0.8159278737791135, "grad_norm": 0.37109375, "learning_rate": 4.0912886167548015e-06, "loss": 0.9862, "step": 3258 }, { "epoch": 0.8161783120460806, "grad_norm": 0.3359375, "learning_rate": 4.085722237684387e-06, "loss": 0.8704, "step": 3259 }, { "epoch": 0.8164287503130478, "grad_norm": 0.376953125, "learning_rate": 4.080155858613972e-06, "loss": 0.9221, "step": 3260 }, { "epoch": 0.816679188580015, "grad_norm": 0.36328125, "learning_rate": 4.074589479543557e-06, "loss": 0.9649, "step": 3261 }, { "epoch": 0.8169296268469822, "grad_norm": 0.3671875, "learning_rate": 4.069023100473142e-06, "loss": 0.8703, "step": 3262 }, { "epoch": 0.8171800651139494, "grad_norm": 0.388671875, "learning_rate": 4.063456721402728e-06, "loss": 0.788, "step": 3263 }, { "epoch": 0.8174305033809166, "grad_norm": 0.3828125, "learning_rate": 4.057890342332313e-06, "loss": 0.9012, "step": 3264 }, { "epoch": 0.8176809416478839, "grad_norm": 0.32421875, "learning_rate": 4.052323963261898e-06, "loss": 0.8884, "step": 3265 }, { "epoch": 0.817931379914851, "grad_norm": 0.376953125, "learning_rate": 4.046757584191484e-06, "loss": 0.8107, "step": 3266 }, { "epoch": 0.8181818181818182, "grad_norm": 0.40625, "learning_rate": 4.041191205121069e-06, "loss": 0.9962, "step": 3267 }, { "epoch": 0.8184322564487854, "grad_norm": 0.34375, "learning_rate": 4.035624826050654e-06, "loss": 1.0612, "step": 3268 }, { "epoch": 0.8186826947157526, "grad_norm": 0.3828125, "learning_rate": 4.03005844698024e-06, "loss": 0.9074, "step": 3269 }, { "epoch": 0.8189331329827197, "grad_norm": 0.39453125, "learning_rate": 4.024492067909825e-06, "loss": 0.7941, "step": 3270 }, { "epoch": 0.819183571249687, "grad_norm": 0.28515625, "learning_rate": 4.01892568883941e-06, "loss": 0.6043, "step": 3271 }, { "epoch": 0.8194340095166541, "grad_norm": 0.36328125, "learning_rate": 4.013359309768996e-06, "loss": 0.9451, "step": 3272 }, { "epoch": 0.8196844477836214, "grad_norm": 0.36328125, "learning_rate": 4.007792930698581e-06, "loss": 0.871, "step": 3273 }, { "epoch": 0.8199348860505885, "grad_norm": 0.421875, "learning_rate": 4.002226551628166e-06, "loss": 0.9778, "step": 3274 }, { "epoch": 0.8201853243175558, "grad_norm": 0.365234375, "learning_rate": 3.996660172557751e-06, "loss": 0.8617, "step": 3275 }, { "epoch": 0.8204357625845229, "grad_norm": 0.412109375, "learning_rate": 3.991093793487337e-06, "loss": 0.8272, "step": 3276 }, { "epoch": 0.8206862008514901, "grad_norm": 0.3828125, "learning_rate": 3.985527414416922e-06, "loss": 1.0193, "step": 3277 }, { "epoch": 0.8209366391184573, "grad_norm": 0.353515625, "learning_rate": 3.979961035346507e-06, "loss": 0.836, "step": 3278 }, { "epoch": 0.8211870773854245, "grad_norm": 0.390625, "learning_rate": 3.974394656276093e-06, "loss": 0.9055, "step": 3279 }, { "epoch": 0.8214375156523916, "grad_norm": 0.455078125, "learning_rate": 3.968828277205678e-06, "loss": 0.9421, "step": 3280 }, { "epoch": 0.8216879539193589, "grad_norm": 0.33203125, "learning_rate": 3.963261898135263e-06, "loss": 1.1165, "step": 3281 }, { "epoch": 0.821938392186326, "grad_norm": 0.400390625, "learning_rate": 3.957695519064849e-06, "loss": 0.993, "step": 3282 }, { "epoch": 0.8221888304532933, "grad_norm": 0.392578125, "learning_rate": 3.952129139994434e-06, "loss": 0.8906, "step": 3283 }, { "epoch": 0.8224392687202604, "grad_norm": 0.345703125, "learning_rate": 3.946562760924019e-06, "loss": 0.9415, "step": 3284 }, { "epoch": 0.8226897069872277, "grad_norm": 0.380859375, "learning_rate": 3.940996381853605e-06, "loss": 0.9422, "step": 3285 }, { "epoch": 0.8229401452541948, "grad_norm": 0.39453125, "learning_rate": 3.93543000278319e-06, "loss": 0.9706, "step": 3286 }, { "epoch": 0.823190583521162, "grad_norm": 0.400390625, "learning_rate": 3.929863623712775e-06, "loss": 0.9757, "step": 3287 }, { "epoch": 0.8234410217881293, "grad_norm": 0.408203125, "learning_rate": 3.924297244642361e-06, "loss": 1.1313, "step": 3288 }, { "epoch": 0.8236914600550964, "grad_norm": 0.33984375, "learning_rate": 3.918730865571946e-06, "loss": 0.9741, "step": 3289 }, { "epoch": 0.8239418983220637, "grad_norm": 0.341796875, "learning_rate": 3.913164486501531e-06, "loss": 0.7928, "step": 3290 }, { "epoch": 0.8241923365890308, "grad_norm": 0.412109375, "learning_rate": 3.9075981074311166e-06, "loss": 0.9239, "step": 3291 }, { "epoch": 0.824442774855998, "grad_norm": 0.35546875, "learning_rate": 3.902031728360702e-06, "loss": 0.9726, "step": 3292 }, { "epoch": 0.8246932131229652, "grad_norm": 0.388671875, "learning_rate": 3.896465349290287e-06, "loss": 0.8234, "step": 3293 }, { "epoch": 0.8249436513899324, "grad_norm": 0.37890625, "learning_rate": 3.8908989702198726e-06, "loss": 0.8142, "step": 3294 }, { "epoch": 0.8251940896568996, "grad_norm": 0.3671875, "learning_rate": 3.885332591149457e-06, "loss": 0.826, "step": 3295 }, { "epoch": 0.8254445279238668, "grad_norm": 0.353515625, "learning_rate": 3.879766212079043e-06, "loss": 0.9135, "step": 3296 }, { "epoch": 0.8256949661908339, "grad_norm": 0.357421875, "learning_rate": 3.8741998330086285e-06, "loss": 0.8999, "step": 3297 }, { "epoch": 0.8259454044578012, "grad_norm": 0.359375, "learning_rate": 3.868633453938213e-06, "loss": 0.8644, "step": 3298 }, { "epoch": 0.8261958427247683, "grad_norm": 0.357421875, "learning_rate": 3.863067074867799e-06, "loss": 0.9703, "step": 3299 }, { "epoch": 0.8264462809917356, "grad_norm": 0.3828125, "learning_rate": 3.8575006957973845e-06, "loss": 0.8957, "step": 3300 }, { "epoch": 0.8266967192587027, "grad_norm": 0.34765625, "learning_rate": 3.851934316726969e-06, "loss": 0.8264, "step": 3301 }, { "epoch": 0.8269471575256699, "grad_norm": 0.33984375, "learning_rate": 3.846367937656555e-06, "loss": 1.0286, "step": 3302 }, { "epoch": 0.8271975957926371, "grad_norm": 0.35546875, "learning_rate": 3.8408015585861405e-06, "loss": 0.9251, "step": 3303 }, { "epoch": 0.8274480340596043, "grad_norm": 0.333984375, "learning_rate": 3.835235179515725e-06, "loss": 0.9623, "step": 3304 }, { "epoch": 0.8276984723265715, "grad_norm": 0.34765625, "learning_rate": 3.829668800445311e-06, "loss": 0.9644, "step": 3305 }, { "epoch": 0.8279489105935387, "grad_norm": 0.37109375, "learning_rate": 3.8241024213748965e-06, "loss": 0.9504, "step": 3306 }, { "epoch": 0.8281993488605058, "grad_norm": 0.380859375, "learning_rate": 3.818536042304481e-06, "loss": 0.8844, "step": 3307 }, { "epoch": 0.8284497871274731, "grad_norm": 0.390625, "learning_rate": 3.812969663234067e-06, "loss": 0.7976, "step": 3308 }, { "epoch": 0.8287002253944402, "grad_norm": 0.40234375, "learning_rate": 3.807403284163652e-06, "loss": 0.935, "step": 3309 }, { "epoch": 0.8289506636614075, "grad_norm": 0.35546875, "learning_rate": 3.801836905093237e-06, "loss": 0.958, "step": 3310 }, { "epoch": 0.8292011019283747, "grad_norm": 0.3515625, "learning_rate": 3.796270526022822e-06, "loss": 0.8615, "step": 3311 }, { "epoch": 0.8294515401953418, "grad_norm": 0.37890625, "learning_rate": 3.7907041469524076e-06, "loss": 0.8534, "step": 3312 }, { "epoch": 0.8297019784623091, "grad_norm": 0.404296875, "learning_rate": 3.785137767881993e-06, "loss": 1.0572, "step": 3313 }, { "epoch": 0.8299524167292762, "grad_norm": 0.39453125, "learning_rate": 3.779571388811578e-06, "loss": 0.9903, "step": 3314 }, { "epoch": 0.8302028549962435, "grad_norm": 0.380859375, "learning_rate": 3.7740050097411636e-06, "loss": 0.8912, "step": 3315 }, { "epoch": 0.8304532932632106, "grad_norm": 0.369140625, "learning_rate": 3.768438630670749e-06, "loss": 0.8663, "step": 3316 }, { "epoch": 0.8307037315301778, "grad_norm": 0.419921875, "learning_rate": 3.762872251600334e-06, "loss": 1.0676, "step": 3317 }, { "epoch": 0.830954169797145, "grad_norm": 0.33984375, "learning_rate": 3.7573058725299196e-06, "loss": 0.9324, "step": 3318 }, { "epoch": 0.8312046080641122, "grad_norm": 0.357421875, "learning_rate": 3.751739493459505e-06, "loss": 1.0976, "step": 3319 }, { "epoch": 0.8314550463310794, "grad_norm": 0.369140625, "learning_rate": 3.74617311438909e-06, "loss": 0.9678, "step": 3320 }, { "epoch": 0.8317054845980466, "grad_norm": 0.353515625, "learning_rate": 3.740606735318675e-06, "loss": 1.0093, "step": 3321 }, { "epoch": 0.8319559228650137, "grad_norm": 0.380859375, "learning_rate": 3.735040356248261e-06, "loss": 1.0071, "step": 3322 }, { "epoch": 0.832206361131981, "grad_norm": 0.3671875, "learning_rate": 3.729473977177846e-06, "loss": 0.8246, "step": 3323 }, { "epoch": 0.8324567993989481, "grad_norm": 0.3359375, "learning_rate": 3.723907598107431e-06, "loss": 0.9795, "step": 3324 }, { "epoch": 0.8327072376659154, "grad_norm": 0.333984375, "learning_rate": 3.7183412190370168e-06, "loss": 0.742, "step": 3325 }, { "epoch": 0.8329576759328825, "grad_norm": 0.3359375, "learning_rate": 3.712774839966602e-06, "loss": 0.9068, "step": 3326 }, { "epoch": 0.8332081141998497, "grad_norm": 0.373046875, "learning_rate": 3.707208460896187e-06, "loss": 1.0358, "step": 3327 }, { "epoch": 0.8334585524668169, "grad_norm": 0.36328125, "learning_rate": 3.7016420818257728e-06, "loss": 0.9819, "step": 3328 }, { "epoch": 0.8337089907337841, "grad_norm": 0.373046875, "learning_rate": 3.696075702755358e-06, "loss": 0.9662, "step": 3329 }, { "epoch": 0.8339594290007513, "grad_norm": 0.41796875, "learning_rate": 3.690509323684943e-06, "loss": 0.9368, "step": 3330 }, { "epoch": 0.8342098672677185, "grad_norm": 0.380859375, "learning_rate": 3.6849429446145283e-06, "loss": 0.891, "step": 3331 }, { "epoch": 0.8344603055346858, "grad_norm": 0.357421875, "learning_rate": 3.679376565544114e-06, "loss": 0.9621, "step": 3332 }, { "epoch": 0.8347107438016529, "grad_norm": 0.380859375, "learning_rate": 3.673810186473699e-06, "loss": 0.8609, "step": 3333 }, { "epoch": 0.8349611820686201, "grad_norm": 0.404296875, "learning_rate": 3.6682438074032843e-06, "loss": 0.7057, "step": 3334 }, { "epoch": 0.8352116203355873, "grad_norm": 0.359375, "learning_rate": 3.66267742833287e-06, "loss": 0.9489, "step": 3335 }, { "epoch": 0.8354620586025545, "grad_norm": 0.4296875, "learning_rate": 3.657111049262455e-06, "loss": 1.0889, "step": 3336 }, { "epoch": 0.8357124968695216, "grad_norm": 0.369140625, "learning_rate": 3.6515446701920403e-06, "loss": 0.8522, "step": 3337 }, { "epoch": 0.8359629351364889, "grad_norm": 0.404296875, "learning_rate": 3.645978291121626e-06, "loss": 1.081, "step": 3338 }, { "epoch": 0.836213373403456, "grad_norm": 0.375, "learning_rate": 3.640411912051211e-06, "loss": 0.826, "step": 3339 }, { "epoch": 0.8364638116704233, "grad_norm": 0.361328125, "learning_rate": 3.6348455329807963e-06, "loss": 0.9112, "step": 3340 }, { "epoch": 0.8367142499373904, "grad_norm": 0.373046875, "learning_rate": 3.6292791539103815e-06, "loss": 0.9211, "step": 3341 }, { "epoch": 0.8369646882043577, "grad_norm": 0.404296875, "learning_rate": 3.623712774839967e-06, "loss": 1.1685, "step": 3342 }, { "epoch": 0.8372151264713248, "grad_norm": 0.380859375, "learning_rate": 3.6181463957695523e-06, "loss": 0.963, "step": 3343 }, { "epoch": 0.837465564738292, "grad_norm": 0.365234375, "learning_rate": 3.6125800166991375e-06, "loss": 0.8649, "step": 3344 }, { "epoch": 0.8377160030052592, "grad_norm": 0.37109375, "learning_rate": 3.607013637628723e-06, "loss": 0.9137, "step": 3345 }, { "epoch": 0.8379664412722264, "grad_norm": 0.470703125, "learning_rate": 3.6014472585583083e-06, "loss": 0.9953, "step": 3346 }, { "epoch": 0.8382168795391935, "grad_norm": 0.4140625, "learning_rate": 3.5958808794878935e-06, "loss": 0.9443, "step": 3347 }, { "epoch": 0.8384673178061608, "grad_norm": 0.33984375, "learning_rate": 3.590314500417479e-06, "loss": 0.7846, "step": 3348 }, { "epoch": 0.8387177560731279, "grad_norm": 0.373046875, "learning_rate": 3.5847481213470643e-06, "loss": 0.8493, "step": 3349 }, { "epoch": 0.8389681943400952, "grad_norm": 0.400390625, "learning_rate": 3.5791817422766494e-06, "loss": 0.916, "step": 3350 }, { "epoch": 0.8392186326070623, "grad_norm": 0.431640625, "learning_rate": 3.5736153632062346e-06, "loss": 0.8991, "step": 3351 }, { "epoch": 0.8394690708740296, "grad_norm": 0.384765625, "learning_rate": 3.5680489841358202e-06, "loss": 0.9109, "step": 3352 }, { "epoch": 0.8397195091409967, "grad_norm": 0.365234375, "learning_rate": 3.5624826050654054e-06, "loss": 0.9154, "step": 3353 }, { "epoch": 0.8399699474079639, "grad_norm": 0.388671875, "learning_rate": 3.5569162259949906e-06, "loss": 0.9898, "step": 3354 }, { "epoch": 0.8402203856749312, "grad_norm": 0.3515625, "learning_rate": 3.5513498469245762e-06, "loss": 0.9686, "step": 3355 }, { "epoch": 0.8404708239418983, "grad_norm": 0.337890625, "learning_rate": 3.5457834678541614e-06, "loss": 0.8128, "step": 3356 }, { "epoch": 0.8407212622088656, "grad_norm": 0.34375, "learning_rate": 3.5402170887837466e-06, "loss": 0.9424, "step": 3357 }, { "epoch": 0.8409717004758327, "grad_norm": 0.384765625, "learning_rate": 3.5346507097133322e-06, "loss": 0.915, "step": 3358 }, { "epoch": 0.8412221387427999, "grad_norm": 0.384765625, "learning_rate": 3.5290843306429174e-06, "loss": 0.8526, "step": 3359 }, { "epoch": 0.8414725770097671, "grad_norm": 0.365234375, "learning_rate": 3.5235179515725026e-06, "loss": 1.0156, "step": 3360 }, { "epoch": 0.8417230152767343, "grad_norm": 0.34765625, "learning_rate": 3.5179515725020874e-06, "loss": 0.9467, "step": 3361 }, { "epoch": 0.8419734535437015, "grad_norm": 0.376953125, "learning_rate": 3.5123851934316726e-06, "loss": 0.7004, "step": 3362 }, { "epoch": 0.8422238918106687, "grad_norm": 0.34375, "learning_rate": 3.506818814361258e-06, "loss": 0.8083, "step": 3363 }, { "epoch": 0.8424743300776358, "grad_norm": 0.380859375, "learning_rate": 3.5012524352908434e-06, "loss": 1.0563, "step": 3364 }, { "epoch": 0.8427247683446031, "grad_norm": 0.353515625, "learning_rate": 3.4956860562204285e-06, "loss": 1.0951, "step": 3365 }, { "epoch": 0.8429752066115702, "grad_norm": 0.427734375, "learning_rate": 3.490119677150014e-06, "loss": 1.0412, "step": 3366 }, { "epoch": 0.8432256448785375, "grad_norm": 0.392578125, "learning_rate": 3.4845532980795993e-06, "loss": 1.0089, "step": 3367 }, { "epoch": 0.8434760831455046, "grad_norm": 0.365234375, "learning_rate": 3.4789869190091845e-06, "loss": 0.8252, "step": 3368 }, { "epoch": 0.8437265214124718, "grad_norm": 0.33203125, "learning_rate": 3.47342053993877e-06, "loss": 0.8593, "step": 3369 }, { "epoch": 0.843976959679439, "grad_norm": 0.388671875, "learning_rate": 3.4678541608683553e-06, "loss": 0.9091, "step": 3370 }, { "epoch": 0.8442273979464062, "grad_norm": 0.40234375, "learning_rate": 3.4622877817979405e-06, "loss": 0.9216, "step": 3371 }, { "epoch": 0.8444778362133734, "grad_norm": 0.3828125, "learning_rate": 3.4567214027275257e-06, "loss": 0.8604, "step": 3372 }, { "epoch": 0.8447282744803406, "grad_norm": 0.380859375, "learning_rate": 3.4511550236571113e-06, "loss": 0.8507, "step": 3373 }, { "epoch": 0.8449787127473077, "grad_norm": 0.361328125, "learning_rate": 3.4455886445866965e-06, "loss": 0.9624, "step": 3374 }, { "epoch": 0.845229151014275, "grad_norm": 0.3828125, "learning_rate": 3.4400222655162817e-06, "loss": 0.8338, "step": 3375 }, { "epoch": 0.8454795892812422, "grad_norm": 0.361328125, "learning_rate": 3.4344558864458673e-06, "loss": 0.8376, "step": 3376 }, { "epoch": 0.8457300275482094, "grad_norm": 0.384765625, "learning_rate": 3.4288895073754525e-06, "loss": 0.9798, "step": 3377 }, { "epoch": 0.8459804658151766, "grad_norm": 0.388671875, "learning_rate": 3.4233231283050377e-06, "loss": 0.8777, "step": 3378 }, { "epoch": 0.8462309040821437, "grad_norm": 0.41015625, "learning_rate": 3.4177567492346233e-06, "loss": 0.935, "step": 3379 }, { "epoch": 0.846481342349111, "grad_norm": 0.36328125, "learning_rate": 3.4121903701642085e-06, "loss": 0.775, "step": 3380 }, { "epoch": 0.8467317806160781, "grad_norm": 0.341796875, "learning_rate": 3.4066239910937937e-06, "loss": 0.8081, "step": 3381 }, { "epoch": 0.8469822188830454, "grad_norm": 0.37890625, "learning_rate": 3.401057612023379e-06, "loss": 0.9172, "step": 3382 }, { "epoch": 0.8472326571500125, "grad_norm": 0.3984375, "learning_rate": 3.3954912329529645e-06, "loss": 0.8986, "step": 3383 }, { "epoch": 0.8474830954169797, "grad_norm": 0.357421875, "learning_rate": 3.3899248538825497e-06, "loss": 0.8646, "step": 3384 }, { "epoch": 0.8477335336839469, "grad_norm": 0.37890625, "learning_rate": 3.384358474812135e-06, "loss": 0.9033, "step": 3385 }, { "epoch": 0.8479839719509141, "grad_norm": 0.375, "learning_rate": 3.3787920957417205e-06, "loss": 1.0642, "step": 3386 }, { "epoch": 0.8482344102178813, "grad_norm": 0.359375, "learning_rate": 3.3732257166713056e-06, "loss": 0.8564, "step": 3387 }, { "epoch": 0.8484848484848485, "grad_norm": 0.359375, "learning_rate": 3.367659337600891e-06, "loss": 0.9944, "step": 3388 }, { "epoch": 0.8487352867518156, "grad_norm": 0.37890625, "learning_rate": 3.3620929585304764e-06, "loss": 0.8467, "step": 3389 }, { "epoch": 0.8489857250187829, "grad_norm": 0.357421875, "learning_rate": 3.3565265794600616e-06, "loss": 0.9718, "step": 3390 }, { "epoch": 0.84923616328575, "grad_norm": 0.404296875, "learning_rate": 3.350960200389647e-06, "loss": 0.8946, "step": 3391 }, { "epoch": 0.8494866015527173, "grad_norm": 0.361328125, "learning_rate": 3.345393821319232e-06, "loss": 1.0052, "step": 3392 }, { "epoch": 0.8497370398196844, "grad_norm": 0.3828125, "learning_rate": 3.3398274422488176e-06, "loss": 0.9996, "step": 3393 }, { "epoch": 0.8499874780866516, "grad_norm": 0.3984375, "learning_rate": 3.334261063178403e-06, "loss": 0.9069, "step": 3394 }, { "epoch": 0.8502379163536188, "grad_norm": 0.380859375, "learning_rate": 3.328694684107988e-06, "loss": 0.8603, "step": 3395 }, { "epoch": 0.850488354620586, "grad_norm": 0.37109375, "learning_rate": 3.3231283050375736e-06, "loss": 0.6935, "step": 3396 }, { "epoch": 0.8507387928875532, "grad_norm": 0.392578125, "learning_rate": 3.317561925967159e-06, "loss": 0.9972, "step": 3397 }, { "epoch": 0.8509892311545204, "grad_norm": 0.392578125, "learning_rate": 3.311995546896744e-06, "loss": 1.0493, "step": 3398 }, { "epoch": 0.8512396694214877, "grad_norm": 0.404296875, "learning_rate": 3.3064291678263296e-06, "loss": 1.0094, "step": 3399 }, { "epoch": 0.8514901076884548, "grad_norm": 0.419921875, "learning_rate": 3.3008627887559148e-06, "loss": 1.0024, "step": 3400 }, { "epoch": 0.851740545955422, "grad_norm": 0.341796875, "learning_rate": 3.2952964096855e-06, "loss": 0.8872, "step": 3401 }, { "epoch": 0.8519909842223892, "grad_norm": 0.3828125, "learning_rate": 3.289730030615085e-06, "loss": 0.9769, "step": 3402 }, { "epoch": 0.8522414224893564, "grad_norm": 0.34375, "learning_rate": 3.2841636515446708e-06, "loss": 1.0425, "step": 3403 }, { "epoch": 0.8524918607563235, "grad_norm": 0.3984375, "learning_rate": 3.278597272474256e-06, "loss": 0.8653, "step": 3404 }, { "epoch": 0.8527422990232908, "grad_norm": 0.376953125, "learning_rate": 3.273030893403841e-06, "loss": 0.9251, "step": 3405 }, { "epoch": 0.8529927372902579, "grad_norm": 0.357421875, "learning_rate": 3.2674645143334268e-06, "loss": 0.8083, "step": 3406 }, { "epoch": 0.8532431755572252, "grad_norm": 0.384765625, "learning_rate": 3.261898135263012e-06, "loss": 1.0806, "step": 3407 }, { "epoch": 0.8534936138241923, "grad_norm": 0.353515625, "learning_rate": 3.256331756192597e-06, "loss": 1.0364, "step": 3408 }, { "epoch": 0.8537440520911596, "grad_norm": 0.369140625, "learning_rate": 3.2507653771221828e-06, "loss": 1.056, "step": 3409 }, { "epoch": 0.8539944903581267, "grad_norm": 0.36328125, "learning_rate": 3.245198998051768e-06, "loss": 1.0234, "step": 3410 }, { "epoch": 0.8542449286250939, "grad_norm": 0.365234375, "learning_rate": 3.239632618981353e-06, "loss": 0.9718, "step": 3411 }, { "epoch": 0.8544953668920611, "grad_norm": 0.416015625, "learning_rate": 3.234066239910938e-06, "loss": 0.9103, "step": 3412 }, { "epoch": 0.8547458051590283, "grad_norm": 0.3984375, "learning_rate": 3.228499860840523e-06, "loss": 0.8555, "step": 3413 }, { "epoch": 0.8549962434259955, "grad_norm": 0.412109375, "learning_rate": 3.2229334817701087e-06, "loss": 1.0186, "step": 3414 }, { "epoch": 0.8552466816929627, "grad_norm": 0.349609375, "learning_rate": 3.217367102699694e-06, "loss": 0.9662, "step": 3415 }, { "epoch": 0.8554971199599298, "grad_norm": 0.376953125, "learning_rate": 3.211800723629279e-06, "loss": 1.0766, "step": 3416 }, { "epoch": 0.8557475582268971, "grad_norm": 0.404296875, "learning_rate": 3.2062343445588647e-06, "loss": 0.834, "step": 3417 }, { "epoch": 0.8559979964938642, "grad_norm": 0.380859375, "learning_rate": 3.20066796548845e-06, "loss": 0.9032, "step": 3418 }, { "epoch": 0.8562484347608315, "grad_norm": 0.373046875, "learning_rate": 3.195101586418035e-06, "loss": 0.8381, "step": 3419 }, { "epoch": 0.8564988730277987, "grad_norm": 0.42578125, "learning_rate": 3.1895352073476202e-06, "loss": 0.935, "step": 3420 }, { "epoch": 0.8567493112947658, "grad_norm": 0.361328125, "learning_rate": 3.183968828277206e-06, "loss": 0.9442, "step": 3421 }, { "epoch": 0.8569997495617331, "grad_norm": 0.39453125, "learning_rate": 3.178402449206791e-06, "loss": 1.035, "step": 3422 }, { "epoch": 0.8572501878287002, "grad_norm": 0.37890625, "learning_rate": 3.1728360701363762e-06, "loss": 1.0052, "step": 3423 }, { "epoch": 0.8575006260956675, "grad_norm": 0.423828125, "learning_rate": 3.167269691065962e-06, "loss": 1.0179, "step": 3424 }, { "epoch": 0.8577510643626346, "grad_norm": 0.36328125, "learning_rate": 3.161703311995547e-06, "loss": 0.9108, "step": 3425 }, { "epoch": 0.8580015026296018, "grad_norm": 0.408203125, "learning_rate": 3.1561369329251322e-06, "loss": 0.8812, "step": 3426 }, { "epoch": 0.858251940896569, "grad_norm": 0.3671875, "learning_rate": 3.150570553854718e-06, "loss": 0.7943, "step": 3427 }, { "epoch": 0.8585023791635362, "grad_norm": 0.4375, "learning_rate": 3.145004174784303e-06, "loss": 1.1932, "step": 3428 }, { "epoch": 0.8587528174305034, "grad_norm": 0.388671875, "learning_rate": 3.139437795713888e-06, "loss": 1.023, "step": 3429 }, { "epoch": 0.8590032556974706, "grad_norm": 0.361328125, "learning_rate": 3.1338714166434734e-06, "loss": 0.9431, "step": 3430 }, { "epoch": 0.8592536939644377, "grad_norm": 0.42578125, "learning_rate": 3.128305037573059e-06, "loss": 0.85, "step": 3431 }, { "epoch": 0.859504132231405, "grad_norm": 0.357421875, "learning_rate": 3.122738658502644e-06, "loss": 0.7986, "step": 3432 }, { "epoch": 0.8597545704983721, "grad_norm": 0.373046875, "learning_rate": 3.1171722794322294e-06, "loss": 1.0227, "step": 3433 }, { "epoch": 0.8600050087653394, "grad_norm": 0.3515625, "learning_rate": 3.111605900361815e-06, "loss": 0.7061, "step": 3434 }, { "epoch": 0.8602554470323065, "grad_norm": 0.3671875, "learning_rate": 3.1060395212914e-06, "loss": 0.7958, "step": 3435 }, { "epoch": 0.8605058852992737, "grad_norm": 0.341796875, "learning_rate": 3.1004731422209854e-06, "loss": 0.78, "step": 3436 }, { "epoch": 0.8607563235662409, "grad_norm": 0.375, "learning_rate": 3.094906763150571e-06, "loss": 1.0384, "step": 3437 }, { "epoch": 0.8610067618332081, "grad_norm": 0.365234375, "learning_rate": 3.089340384080156e-06, "loss": 0.8372, "step": 3438 }, { "epoch": 0.8612572001001753, "grad_norm": 0.36328125, "learning_rate": 3.0837740050097414e-06, "loss": 0.8296, "step": 3439 }, { "epoch": 0.8615076383671425, "grad_norm": 0.375, "learning_rate": 3.0782076259393266e-06, "loss": 0.8069, "step": 3440 }, { "epoch": 0.8617580766341096, "grad_norm": 0.349609375, "learning_rate": 3.072641246868912e-06, "loss": 0.9119, "step": 3441 }, { "epoch": 0.8620085149010769, "grad_norm": 0.35546875, "learning_rate": 3.0670748677984974e-06, "loss": 0.9843, "step": 3442 }, { "epoch": 0.8622589531680441, "grad_norm": 0.396484375, "learning_rate": 3.0615084887280825e-06, "loss": 0.8327, "step": 3443 }, { "epoch": 0.8625093914350113, "grad_norm": 0.38671875, "learning_rate": 3.055942109657668e-06, "loss": 0.8839, "step": 3444 }, { "epoch": 0.8627598297019785, "grad_norm": 0.388671875, "learning_rate": 3.0503757305872533e-06, "loss": 1.0183, "step": 3445 }, { "epoch": 0.8630102679689456, "grad_norm": 0.361328125, "learning_rate": 3.0448093515168385e-06, "loss": 0.8935, "step": 3446 }, { "epoch": 0.8632607062359129, "grad_norm": 0.37109375, "learning_rate": 3.039242972446424e-06, "loss": 0.8576, "step": 3447 }, { "epoch": 0.86351114450288, "grad_norm": 0.34765625, "learning_rate": 3.0336765933760093e-06, "loss": 0.9381, "step": 3448 }, { "epoch": 0.8637615827698473, "grad_norm": 0.384765625, "learning_rate": 3.0281102143055945e-06, "loss": 0.9745, "step": 3449 }, { "epoch": 0.8640120210368144, "grad_norm": 0.3671875, "learning_rate": 3.0225438352351797e-06, "loss": 0.8868, "step": 3450 }, { "epoch": 0.8642624593037816, "grad_norm": 0.3984375, "learning_rate": 3.0169774561647653e-06, "loss": 0.7628, "step": 3451 }, { "epoch": 0.8645128975707488, "grad_norm": 0.37109375, "learning_rate": 3.0114110770943505e-06, "loss": 1.0593, "step": 3452 }, { "epoch": 0.864763335837716, "grad_norm": 0.3359375, "learning_rate": 3.0058446980239357e-06, "loss": 0.9416, "step": 3453 }, { "epoch": 0.8650137741046832, "grad_norm": 0.333984375, "learning_rate": 3.0002783189535213e-06, "loss": 0.7779, "step": 3454 }, { "epoch": 0.8652642123716504, "grad_norm": 0.40234375, "learning_rate": 2.9947119398831065e-06, "loss": 0.9563, "step": 3455 }, { "epoch": 0.8655146506386175, "grad_norm": 0.35546875, "learning_rate": 2.9891455608126917e-06, "loss": 0.9149, "step": 3456 }, { "epoch": 0.8657650889055848, "grad_norm": 0.31640625, "learning_rate": 2.9835791817422773e-06, "loss": 0.7465, "step": 3457 }, { "epoch": 0.8660155271725519, "grad_norm": 0.37890625, "learning_rate": 2.9780128026718625e-06, "loss": 1.062, "step": 3458 }, { "epoch": 0.8662659654395192, "grad_norm": 0.404296875, "learning_rate": 2.9724464236014477e-06, "loss": 0.8071, "step": 3459 }, { "epoch": 0.8665164037064863, "grad_norm": 0.359375, "learning_rate": 2.966880044531033e-06, "loss": 0.9, "step": 3460 }, { "epoch": 0.8667668419734536, "grad_norm": 0.38671875, "learning_rate": 2.9613136654606185e-06, "loss": 1.0515, "step": 3461 }, { "epoch": 0.8670172802404207, "grad_norm": 0.37890625, "learning_rate": 2.9557472863902032e-06, "loss": 0.726, "step": 3462 }, { "epoch": 0.8672677185073879, "grad_norm": 0.384765625, "learning_rate": 2.9501809073197884e-06, "loss": 1.1766, "step": 3463 }, { "epoch": 0.8675181567743552, "grad_norm": 0.384765625, "learning_rate": 2.9446145282493736e-06, "loss": 0.869, "step": 3464 }, { "epoch": 0.8677685950413223, "grad_norm": 0.46484375, "learning_rate": 2.9390481491789592e-06, "loss": 0.9205, "step": 3465 }, { "epoch": 0.8680190333082896, "grad_norm": 0.423828125, "learning_rate": 2.9334817701085444e-06, "loss": 0.9446, "step": 3466 }, { "epoch": 0.8682694715752567, "grad_norm": 0.34765625, "learning_rate": 2.9279153910381296e-06, "loss": 0.9127, "step": 3467 }, { "epoch": 0.8685199098422239, "grad_norm": 0.3828125, "learning_rate": 2.922349011967715e-06, "loss": 0.9394, "step": 3468 }, { "epoch": 0.8687703481091911, "grad_norm": 0.36328125, "learning_rate": 2.9167826328973004e-06, "loss": 0.8812, "step": 3469 }, { "epoch": 0.8690207863761583, "grad_norm": 0.34375, "learning_rate": 2.9112162538268856e-06, "loss": 0.9328, "step": 3470 }, { "epoch": 0.8692712246431255, "grad_norm": 0.380859375, "learning_rate": 2.9056498747564708e-06, "loss": 1.1826, "step": 3471 }, { "epoch": 0.8695216629100927, "grad_norm": 0.365234375, "learning_rate": 2.9000834956860564e-06, "loss": 0.874, "step": 3472 }, { "epoch": 0.8697721011770598, "grad_norm": 0.365234375, "learning_rate": 2.8945171166156416e-06, "loss": 0.9293, "step": 3473 }, { "epoch": 0.8700225394440271, "grad_norm": 0.34765625, "learning_rate": 2.8889507375452268e-06, "loss": 1.0614, "step": 3474 }, { "epoch": 0.8702729777109942, "grad_norm": 0.36328125, "learning_rate": 2.8833843584748124e-06, "loss": 0.9444, "step": 3475 }, { "epoch": 0.8705234159779615, "grad_norm": 0.396484375, "learning_rate": 2.8778179794043976e-06, "loss": 0.9544, "step": 3476 }, { "epoch": 0.8707738542449286, "grad_norm": 0.443359375, "learning_rate": 2.8722516003339827e-06, "loss": 0.9205, "step": 3477 }, { "epoch": 0.8710242925118958, "grad_norm": 0.376953125, "learning_rate": 2.8666852212635684e-06, "loss": 0.9075, "step": 3478 }, { "epoch": 0.871274730778863, "grad_norm": 0.349609375, "learning_rate": 2.8611188421931535e-06, "loss": 1.0326, "step": 3479 }, { "epoch": 0.8715251690458302, "grad_norm": 0.349609375, "learning_rate": 2.8555524631227387e-06, "loss": 1.0381, "step": 3480 }, { "epoch": 0.8717756073127974, "grad_norm": 0.357421875, "learning_rate": 2.849986084052324e-06, "loss": 1.0407, "step": 3481 }, { "epoch": 0.8720260455797646, "grad_norm": 0.357421875, "learning_rate": 2.8444197049819095e-06, "loss": 1.0717, "step": 3482 }, { "epoch": 0.8722764838467317, "grad_norm": 0.349609375, "learning_rate": 2.8388533259114947e-06, "loss": 1.1316, "step": 3483 }, { "epoch": 0.872526922113699, "grad_norm": 0.36328125, "learning_rate": 2.83328694684108e-06, "loss": 0.904, "step": 3484 }, { "epoch": 0.8727773603806661, "grad_norm": 0.380859375, "learning_rate": 2.8277205677706655e-06, "loss": 0.8337, "step": 3485 }, { "epoch": 0.8730277986476334, "grad_norm": 0.361328125, "learning_rate": 2.8221541887002507e-06, "loss": 0.8944, "step": 3486 }, { "epoch": 0.8732782369146006, "grad_norm": 0.37890625, "learning_rate": 2.816587809629836e-06, "loss": 1.09, "step": 3487 }, { "epoch": 0.8735286751815677, "grad_norm": 0.36328125, "learning_rate": 2.8110214305594215e-06, "loss": 0.8897, "step": 3488 }, { "epoch": 0.873779113448535, "grad_norm": 0.33984375, "learning_rate": 2.8054550514890067e-06, "loss": 0.9634, "step": 3489 }, { "epoch": 0.8740295517155021, "grad_norm": 0.392578125, "learning_rate": 2.799888672418592e-06, "loss": 0.9116, "step": 3490 }, { "epoch": 0.8742799899824694, "grad_norm": 0.380859375, "learning_rate": 2.794322293348177e-06, "loss": 0.8461, "step": 3491 }, { "epoch": 0.8745304282494365, "grad_norm": 0.388671875, "learning_rate": 2.7887559142777627e-06, "loss": 0.9916, "step": 3492 }, { "epoch": 0.8747808665164037, "grad_norm": 0.408203125, "learning_rate": 2.783189535207348e-06, "loss": 1.0086, "step": 3493 }, { "epoch": 0.8750313047833709, "grad_norm": 0.427734375, "learning_rate": 2.777623156136933e-06, "loss": 1.0253, "step": 3494 }, { "epoch": 0.8752817430503381, "grad_norm": 0.412109375, "learning_rate": 2.7720567770665187e-06, "loss": 0.9604, "step": 3495 }, { "epoch": 0.8755321813173053, "grad_norm": 0.37890625, "learning_rate": 2.766490397996104e-06, "loss": 0.9665, "step": 3496 }, { "epoch": 0.8757826195842725, "grad_norm": 0.365234375, "learning_rate": 2.760924018925689e-06, "loss": 0.9453, "step": 3497 }, { "epoch": 0.8760330578512396, "grad_norm": 0.419921875, "learning_rate": 2.7553576398552747e-06, "loss": 0.8721, "step": 3498 }, { "epoch": 0.8762834961182069, "grad_norm": 0.455078125, "learning_rate": 2.74979126078486e-06, "loss": 1.0077, "step": 3499 }, { "epoch": 0.876533934385174, "grad_norm": 0.396484375, "learning_rate": 2.744224881714445e-06, "loss": 0.9614, "step": 3500 }, { "epoch": 0.8767843726521413, "grad_norm": 0.423828125, "learning_rate": 2.7386585026440302e-06, "loss": 0.8044, "step": 3501 }, { "epoch": 0.8770348109191084, "grad_norm": 0.35546875, "learning_rate": 2.733092123573616e-06, "loss": 0.9372, "step": 3502 }, { "epoch": 0.8772852491860756, "grad_norm": 0.345703125, "learning_rate": 2.727525744503201e-06, "loss": 0.9318, "step": 3503 }, { "epoch": 0.8775356874530428, "grad_norm": 0.375, "learning_rate": 2.7219593654327862e-06, "loss": 0.9278, "step": 3504 }, { "epoch": 0.87778612572001, "grad_norm": 0.357421875, "learning_rate": 2.716392986362372e-06, "loss": 0.919, "step": 3505 }, { "epoch": 0.8780365639869772, "grad_norm": 0.36328125, "learning_rate": 2.710826607291957e-06, "loss": 0.8995, "step": 3506 }, { "epoch": 0.8782870022539444, "grad_norm": 0.373046875, "learning_rate": 2.705260228221542e-06, "loss": 0.9865, "step": 3507 }, { "epoch": 0.8785374405209116, "grad_norm": 0.36328125, "learning_rate": 2.699693849151128e-06, "loss": 0.8431, "step": 3508 }, { "epoch": 0.8787878787878788, "grad_norm": 0.388671875, "learning_rate": 2.694127470080713e-06, "loss": 0.9064, "step": 3509 }, { "epoch": 0.879038317054846, "grad_norm": 0.26953125, "learning_rate": 2.688561091010298e-06, "loss": 0.6018, "step": 3510 }, { "epoch": 0.8792887553218132, "grad_norm": 0.365234375, "learning_rate": 2.6829947119398834e-06, "loss": 0.9453, "step": 3511 }, { "epoch": 0.8795391935887804, "grad_norm": 0.376953125, "learning_rate": 2.677428332869469e-06, "loss": 0.9235, "step": 3512 }, { "epoch": 0.8797896318557475, "grad_norm": 0.328125, "learning_rate": 2.6718619537990538e-06, "loss": 0.7942, "step": 3513 }, { "epoch": 0.8800400701227148, "grad_norm": 0.4375, "learning_rate": 2.666295574728639e-06, "loss": 0.9813, "step": 3514 }, { "epoch": 0.8802905083896819, "grad_norm": 0.359375, "learning_rate": 2.660729195658224e-06, "loss": 0.9884, "step": 3515 }, { "epoch": 0.8805409466566492, "grad_norm": 0.3671875, "learning_rate": 2.6551628165878097e-06, "loss": 0.8863, "step": 3516 }, { "epoch": 0.8807913849236163, "grad_norm": 0.41796875, "learning_rate": 2.649596437517395e-06, "loss": 1.0659, "step": 3517 }, { "epoch": 0.8810418231905836, "grad_norm": 0.36328125, "learning_rate": 2.64403005844698e-06, "loss": 0.8577, "step": 3518 }, { "epoch": 0.8812922614575507, "grad_norm": 0.359375, "learning_rate": 2.6384636793765657e-06, "loss": 0.9232, "step": 3519 }, { "epoch": 0.8815426997245179, "grad_norm": 0.376953125, "learning_rate": 2.632897300306151e-06, "loss": 0.9227, "step": 3520 }, { "epoch": 0.8817931379914851, "grad_norm": 0.359375, "learning_rate": 2.627330921235736e-06, "loss": 1.0202, "step": 3521 }, { "epoch": 0.8820435762584523, "grad_norm": 0.373046875, "learning_rate": 2.6217645421653213e-06, "loss": 0.8878, "step": 3522 }, { "epoch": 0.8822940145254194, "grad_norm": 0.392578125, "learning_rate": 2.616198163094907e-06, "loss": 0.8165, "step": 3523 }, { "epoch": 0.8825444527923867, "grad_norm": 0.40234375, "learning_rate": 2.610631784024492e-06, "loss": 1.0197, "step": 3524 }, { "epoch": 0.8827948910593538, "grad_norm": 0.396484375, "learning_rate": 2.6050654049540773e-06, "loss": 0.9882, "step": 3525 }, { "epoch": 0.8830453293263211, "grad_norm": 0.359375, "learning_rate": 2.599499025883663e-06, "loss": 0.9811, "step": 3526 }, { "epoch": 0.8832957675932882, "grad_norm": 0.3828125, "learning_rate": 2.593932646813248e-06, "loss": 0.8831, "step": 3527 }, { "epoch": 0.8835462058602555, "grad_norm": 0.380859375, "learning_rate": 2.5883662677428333e-06, "loss": 1.0624, "step": 3528 }, { "epoch": 0.8837966441272226, "grad_norm": 0.421875, "learning_rate": 2.582799888672419e-06, "loss": 0.8527, "step": 3529 }, { "epoch": 0.8840470823941898, "grad_norm": 0.359375, "learning_rate": 2.577233509602004e-06, "loss": 0.892, "step": 3530 }, { "epoch": 0.8842975206611571, "grad_norm": 0.37890625, "learning_rate": 2.5716671305315893e-06, "loss": 1.103, "step": 3531 }, { "epoch": 0.8845479589281242, "grad_norm": 0.34375, "learning_rate": 2.5661007514611745e-06, "loss": 0.8713, "step": 3532 }, { "epoch": 0.8847983971950915, "grad_norm": 0.361328125, "learning_rate": 2.56053437239076e-06, "loss": 0.7836, "step": 3533 }, { "epoch": 0.8850488354620586, "grad_norm": 0.3828125, "learning_rate": 2.5549679933203453e-06, "loss": 1.0027, "step": 3534 }, { "epoch": 0.8852992737290258, "grad_norm": 0.416015625, "learning_rate": 2.5494016142499304e-06, "loss": 1.0689, "step": 3535 }, { "epoch": 0.885549711995993, "grad_norm": 0.345703125, "learning_rate": 2.543835235179516e-06, "loss": 1.0192, "step": 3536 }, { "epoch": 0.8858001502629602, "grad_norm": 0.38671875, "learning_rate": 2.5382688561091012e-06, "loss": 1.0064, "step": 3537 }, { "epoch": 0.8860505885299274, "grad_norm": 0.40625, "learning_rate": 2.5327024770386864e-06, "loss": 1.0034, "step": 3538 }, { "epoch": 0.8863010267968946, "grad_norm": 0.357421875, "learning_rate": 2.527136097968272e-06, "loss": 1.0141, "step": 3539 }, { "epoch": 0.8865514650638617, "grad_norm": 0.35546875, "learning_rate": 2.5215697188978572e-06, "loss": 0.7797, "step": 3540 }, { "epoch": 0.886801903330829, "grad_norm": 0.369140625, "learning_rate": 2.5160033398274424e-06, "loss": 0.8805, "step": 3541 }, { "epoch": 0.8870523415977961, "grad_norm": 0.380859375, "learning_rate": 2.5104369607570276e-06, "loss": 0.9913, "step": 3542 }, { "epoch": 0.8873027798647634, "grad_norm": 0.404296875, "learning_rate": 2.5048705816866132e-06, "loss": 1.1122, "step": 3543 }, { "epoch": 0.8875532181317305, "grad_norm": 0.337890625, "learning_rate": 2.4993042026161984e-06, "loss": 1.0873, "step": 3544 }, { "epoch": 0.8878036563986977, "grad_norm": 0.38671875, "learning_rate": 2.4937378235457836e-06, "loss": 0.9585, "step": 3545 }, { "epoch": 0.8880540946656649, "grad_norm": 0.349609375, "learning_rate": 2.488171444475369e-06, "loss": 1.042, "step": 3546 }, { "epoch": 0.8883045329326321, "grad_norm": 0.34375, "learning_rate": 2.4826050654049544e-06, "loss": 0.8744, "step": 3547 }, { "epoch": 0.8885549711995993, "grad_norm": 0.42578125, "learning_rate": 2.4770386863345396e-06, "loss": 0.9142, "step": 3548 }, { "epoch": 0.8888054094665665, "grad_norm": 0.396484375, "learning_rate": 2.471472307264125e-06, "loss": 0.9827, "step": 3549 }, { "epoch": 0.8890558477335336, "grad_norm": 0.39453125, "learning_rate": 2.4659059281937104e-06, "loss": 1.0491, "step": 3550 }, { "epoch": 0.8893062860005009, "grad_norm": 0.404296875, "learning_rate": 2.4603395491232956e-06, "loss": 0.8424, "step": 3551 }, { "epoch": 0.8895567242674681, "grad_norm": 0.376953125, "learning_rate": 2.4547731700528808e-06, "loss": 0.9257, "step": 3552 }, { "epoch": 0.8898071625344353, "grad_norm": 0.357421875, "learning_rate": 2.449206790982466e-06, "loss": 0.9973, "step": 3553 }, { "epoch": 0.8900576008014025, "grad_norm": 0.302734375, "learning_rate": 2.443640411912051e-06, "loss": 0.6641, "step": 3554 }, { "epoch": 0.8903080390683696, "grad_norm": 0.373046875, "learning_rate": 2.4380740328416367e-06, "loss": 0.7865, "step": 3555 }, { "epoch": 0.8905584773353369, "grad_norm": 0.39453125, "learning_rate": 2.432507653771222e-06, "loss": 0.9629, "step": 3556 }, { "epoch": 0.890808915602304, "grad_norm": 0.37890625, "learning_rate": 2.426941274700807e-06, "loss": 1.0173, "step": 3557 }, { "epoch": 0.8910593538692713, "grad_norm": 0.365234375, "learning_rate": 2.4213748956303927e-06, "loss": 0.8987, "step": 3558 }, { "epoch": 0.8913097921362384, "grad_norm": 0.365234375, "learning_rate": 2.415808516559978e-06, "loss": 0.8612, "step": 3559 }, { "epoch": 0.8915602304032056, "grad_norm": 0.375, "learning_rate": 2.410242137489563e-06, "loss": 0.9586, "step": 3560 }, { "epoch": 0.8918106686701728, "grad_norm": 0.412109375, "learning_rate": 2.4046757584191487e-06, "loss": 0.9362, "step": 3561 }, { "epoch": 0.89206110693714, "grad_norm": 0.35546875, "learning_rate": 2.399109379348734e-06, "loss": 0.7857, "step": 3562 }, { "epoch": 0.8923115452041072, "grad_norm": 0.359375, "learning_rate": 2.393543000278319e-06, "loss": 0.8934, "step": 3563 }, { "epoch": 0.8925619834710744, "grad_norm": 0.3671875, "learning_rate": 2.3879766212079043e-06, "loss": 1.0256, "step": 3564 }, { "epoch": 0.8928124217380415, "grad_norm": 0.36328125, "learning_rate": 2.38241024213749e-06, "loss": 1.0695, "step": 3565 }, { "epoch": 0.8930628600050088, "grad_norm": 0.380859375, "learning_rate": 2.376843863067075e-06, "loss": 0.9462, "step": 3566 }, { "epoch": 0.8933132982719759, "grad_norm": 0.349609375, "learning_rate": 2.3712774839966603e-06, "loss": 0.9821, "step": 3567 }, { "epoch": 0.8935637365389432, "grad_norm": 0.361328125, "learning_rate": 2.365711104926246e-06, "loss": 0.8628, "step": 3568 }, { "epoch": 0.8938141748059103, "grad_norm": 0.337890625, "learning_rate": 2.360144725855831e-06, "loss": 0.7628, "step": 3569 }, { "epoch": 0.8940646130728775, "grad_norm": 0.392578125, "learning_rate": 2.3545783467854163e-06, "loss": 0.9705, "step": 3570 }, { "epoch": 0.8943150513398447, "grad_norm": 0.404296875, "learning_rate": 2.349011967715002e-06, "loss": 0.9628, "step": 3571 }, { "epoch": 0.8945654896068119, "grad_norm": 0.38671875, "learning_rate": 2.343445588644587e-06, "loss": 1.0879, "step": 3572 }, { "epoch": 0.8948159278737791, "grad_norm": 0.361328125, "learning_rate": 2.3378792095741723e-06, "loss": 0.9176, "step": 3573 }, { "epoch": 0.8950663661407463, "grad_norm": 0.3515625, "learning_rate": 2.3323128305037574e-06, "loss": 1.0155, "step": 3574 }, { "epoch": 0.8953168044077136, "grad_norm": 0.37109375, "learning_rate": 2.326746451433343e-06, "loss": 0.9943, "step": 3575 }, { "epoch": 0.8955672426746807, "grad_norm": 0.390625, "learning_rate": 2.321180072362928e-06, "loss": 0.7816, "step": 3576 }, { "epoch": 0.8958176809416479, "grad_norm": 0.369140625, "learning_rate": 2.3156136932925134e-06, "loss": 0.9269, "step": 3577 }, { "epoch": 0.8960681192086151, "grad_norm": 0.388671875, "learning_rate": 2.3100473142220986e-06, "loss": 1.0335, "step": 3578 }, { "epoch": 0.8963185574755823, "grad_norm": 0.36328125, "learning_rate": 2.304480935151684e-06, "loss": 0.9685, "step": 3579 }, { "epoch": 0.8965689957425494, "grad_norm": 0.416015625, "learning_rate": 2.2989145560812694e-06, "loss": 0.9904, "step": 3580 }, { "epoch": 0.8968194340095167, "grad_norm": 0.416015625, "learning_rate": 2.2933481770108546e-06, "loss": 0.9017, "step": 3581 }, { "epoch": 0.8970698722764838, "grad_norm": 0.3828125, "learning_rate": 2.28778179794044e-06, "loss": 0.8696, "step": 3582 }, { "epoch": 0.8973203105434511, "grad_norm": 0.4140625, "learning_rate": 2.282215418870025e-06, "loss": 1.0684, "step": 3583 }, { "epoch": 0.8975707488104182, "grad_norm": 0.349609375, "learning_rate": 2.2766490397996106e-06, "loss": 0.7711, "step": 3584 }, { "epoch": 0.8978211870773855, "grad_norm": 0.369140625, "learning_rate": 2.2710826607291958e-06, "loss": 0.9041, "step": 3585 }, { "epoch": 0.8980716253443526, "grad_norm": 0.37890625, "learning_rate": 2.265516281658781e-06, "loss": 0.9085, "step": 3586 }, { "epoch": 0.8983220636113198, "grad_norm": 0.361328125, "learning_rate": 2.2599499025883666e-06, "loss": 1.0913, "step": 3587 }, { "epoch": 0.898572501878287, "grad_norm": 0.345703125, "learning_rate": 2.2543835235179518e-06, "loss": 0.9197, "step": 3588 }, { "epoch": 0.8988229401452542, "grad_norm": 0.380859375, "learning_rate": 2.248817144447537e-06, "loss": 0.8162, "step": 3589 }, { "epoch": 0.8990733784122213, "grad_norm": 0.38671875, "learning_rate": 2.2432507653771226e-06, "loss": 0.7775, "step": 3590 }, { "epoch": 0.8993238166791886, "grad_norm": 0.41015625, "learning_rate": 2.2376843863067078e-06, "loss": 0.8556, "step": 3591 }, { "epoch": 0.8995742549461557, "grad_norm": 0.353515625, "learning_rate": 2.232118007236293e-06, "loss": 0.9414, "step": 3592 }, { "epoch": 0.899824693213123, "grad_norm": 0.380859375, "learning_rate": 2.226551628165878e-06, "loss": 0.7506, "step": 3593 }, { "epoch": 0.9000751314800901, "grad_norm": 0.3671875, "learning_rate": 2.2209852490954637e-06, "loss": 0.8825, "step": 3594 }, { "epoch": 0.9003255697470574, "grad_norm": 0.34375, "learning_rate": 2.215418870025049e-06, "loss": 0.978, "step": 3595 }, { "epoch": 0.9005760080140246, "grad_norm": 0.388671875, "learning_rate": 2.209852490954634e-06, "loss": 0.9473, "step": 3596 }, { "epoch": 0.9008264462809917, "grad_norm": 0.369140625, "learning_rate": 2.2042861118842197e-06, "loss": 0.917, "step": 3597 }, { "epoch": 0.901076884547959, "grad_norm": 0.349609375, "learning_rate": 2.198719732813805e-06, "loss": 0.8388, "step": 3598 }, { "epoch": 0.9013273228149261, "grad_norm": 0.41015625, "learning_rate": 2.19315335374339e-06, "loss": 0.8658, "step": 3599 }, { "epoch": 0.9015777610818934, "grad_norm": 0.384765625, "learning_rate": 2.1875869746729757e-06, "loss": 0.8391, "step": 3600 }, { "epoch": 0.9018281993488605, "grad_norm": 0.3515625, "learning_rate": 2.1820205956025605e-06, "loss": 1.156, "step": 3601 }, { "epoch": 0.9020786376158277, "grad_norm": 0.341796875, "learning_rate": 2.176454216532146e-06, "loss": 0.7244, "step": 3602 }, { "epoch": 0.9023290758827949, "grad_norm": 0.318359375, "learning_rate": 2.1708878374617313e-06, "loss": 0.9684, "step": 3603 }, { "epoch": 0.9025795141497621, "grad_norm": 0.384765625, "learning_rate": 2.1653214583913165e-06, "loss": 1.0505, "step": 3604 }, { "epoch": 0.9028299524167293, "grad_norm": 0.390625, "learning_rate": 2.1597550793209017e-06, "loss": 0.9087, "step": 3605 }, { "epoch": 0.9030803906836965, "grad_norm": 0.373046875, "learning_rate": 2.1541887002504873e-06, "loss": 0.9725, "step": 3606 }, { "epoch": 0.9033308289506636, "grad_norm": 0.396484375, "learning_rate": 2.1486223211800725e-06, "loss": 1.0718, "step": 3607 }, { "epoch": 0.9035812672176309, "grad_norm": 0.380859375, "learning_rate": 2.1430559421096577e-06, "loss": 1.0006, "step": 3608 }, { "epoch": 0.903831705484598, "grad_norm": 0.373046875, "learning_rate": 2.1374895630392433e-06, "loss": 0.9006, "step": 3609 }, { "epoch": 0.9040821437515653, "grad_norm": 0.3671875, "learning_rate": 2.1319231839688285e-06, "loss": 1.0086, "step": 3610 }, { "epoch": 0.9043325820185324, "grad_norm": 0.353515625, "learning_rate": 2.1263568048984136e-06, "loss": 1.0276, "step": 3611 }, { "epoch": 0.9045830202854996, "grad_norm": 0.400390625, "learning_rate": 2.1207904258279993e-06, "loss": 0.791, "step": 3612 }, { "epoch": 0.9048334585524668, "grad_norm": 0.34375, "learning_rate": 2.1152240467575844e-06, "loss": 0.8608, "step": 3613 }, { "epoch": 0.905083896819434, "grad_norm": 0.341796875, "learning_rate": 2.1096576676871696e-06, "loss": 0.968, "step": 3614 }, { "epoch": 0.9053343350864012, "grad_norm": 0.3515625, "learning_rate": 2.104091288616755e-06, "loss": 0.8087, "step": 3615 }, { "epoch": 0.9055847733533684, "grad_norm": 0.38671875, "learning_rate": 2.0985249095463404e-06, "loss": 0.8808, "step": 3616 }, { "epoch": 0.9058352116203355, "grad_norm": 0.390625, "learning_rate": 2.0929585304759256e-06, "loss": 1.1031, "step": 3617 }, { "epoch": 0.9060856498873028, "grad_norm": 0.37109375, "learning_rate": 2.087392151405511e-06, "loss": 1.0355, "step": 3618 }, { "epoch": 0.90633608815427, "grad_norm": 0.34375, "learning_rate": 2.0818257723350964e-06, "loss": 0.9092, "step": 3619 }, { "epoch": 0.9065865264212372, "grad_norm": 0.37890625, "learning_rate": 2.0762593932646816e-06, "loss": 0.8386, "step": 3620 }, { "epoch": 0.9068369646882044, "grad_norm": 0.365234375, "learning_rate": 2.070693014194267e-06, "loss": 0.9141, "step": 3621 }, { "epoch": 0.9070874029551715, "grad_norm": 0.357421875, "learning_rate": 2.0651266351238524e-06, "loss": 0.8176, "step": 3622 }, { "epoch": 0.9073378412221388, "grad_norm": 0.404296875, "learning_rate": 2.0595602560534376e-06, "loss": 0.8747, "step": 3623 }, { "epoch": 0.9075882794891059, "grad_norm": 0.353515625, "learning_rate": 2.0539938769830228e-06, "loss": 1.0075, "step": 3624 }, { "epoch": 0.9078387177560732, "grad_norm": 0.41015625, "learning_rate": 2.048427497912608e-06, "loss": 0.854, "step": 3625 }, { "epoch": 0.9080891560230403, "grad_norm": 0.400390625, "learning_rate": 2.0428611188421936e-06, "loss": 0.8769, "step": 3626 }, { "epoch": 0.9083395942900075, "grad_norm": 0.4140625, "learning_rate": 2.0372947397717783e-06, "loss": 1.0743, "step": 3627 }, { "epoch": 0.9085900325569747, "grad_norm": 0.45703125, "learning_rate": 2.031728360701364e-06, "loss": 1.0495, "step": 3628 }, { "epoch": 0.9088404708239419, "grad_norm": 0.3203125, "learning_rate": 2.026161981630949e-06, "loss": 1.1537, "step": 3629 }, { "epoch": 0.9090909090909091, "grad_norm": 0.38671875, "learning_rate": 2.0205956025605343e-06, "loss": 0.9328, "step": 3630 }, { "epoch": 0.9093413473578763, "grad_norm": 0.380859375, "learning_rate": 2.01502922349012e-06, "loss": 0.8191, "step": 3631 }, { "epoch": 0.9095917856248434, "grad_norm": 0.365234375, "learning_rate": 2.009462844419705e-06, "loss": 0.7944, "step": 3632 }, { "epoch": 0.9098422238918107, "grad_norm": 0.373046875, "learning_rate": 2.0038964653492903e-06, "loss": 0.8715, "step": 3633 }, { "epoch": 0.9100926621587778, "grad_norm": 0.359375, "learning_rate": 1.9983300862788755e-06, "loss": 0.916, "step": 3634 }, { "epoch": 0.9103431004257451, "grad_norm": 0.33984375, "learning_rate": 1.992763707208461e-06, "loss": 0.9675, "step": 3635 }, { "epoch": 0.9105935386927122, "grad_norm": 0.421875, "learning_rate": 1.9871973281380463e-06, "loss": 0.9943, "step": 3636 }, { "epoch": 0.9108439769596794, "grad_norm": 0.353515625, "learning_rate": 1.9816309490676315e-06, "loss": 0.8705, "step": 3637 }, { "epoch": 0.9110944152266466, "grad_norm": 0.408203125, "learning_rate": 1.976064569997217e-06, "loss": 0.8182, "step": 3638 }, { "epoch": 0.9113448534936138, "grad_norm": 0.353515625, "learning_rate": 1.9704981909268023e-06, "loss": 0.9669, "step": 3639 }, { "epoch": 0.911595291760581, "grad_norm": 0.4296875, "learning_rate": 1.9649318118563875e-06, "loss": 0.9834, "step": 3640 }, { "epoch": 0.9118457300275482, "grad_norm": 0.3671875, "learning_rate": 1.959365432785973e-06, "loss": 0.8963, "step": 3641 }, { "epoch": 0.9120961682945155, "grad_norm": 0.322265625, "learning_rate": 1.9537990537155583e-06, "loss": 0.891, "step": 3642 }, { "epoch": 0.9123466065614826, "grad_norm": 0.37890625, "learning_rate": 1.9482326746451435e-06, "loss": 0.7869, "step": 3643 }, { "epoch": 0.9125970448284498, "grad_norm": 0.37109375, "learning_rate": 1.9426662955747287e-06, "loss": 1.0886, "step": 3644 }, { "epoch": 0.912847483095417, "grad_norm": 0.345703125, "learning_rate": 1.9370999165043143e-06, "loss": 0.9431, "step": 3645 }, { "epoch": 0.9130979213623842, "grad_norm": 0.412109375, "learning_rate": 1.9315335374338995e-06, "loss": 0.8039, "step": 3646 }, { "epoch": 0.9133483596293513, "grad_norm": 0.423828125, "learning_rate": 1.9259671583634846e-06, "loss": 0.8779, "step": 3647 }, { "epoch": 0.9135987978963186, "grad_norm": 0.365234375, "learning_rate": 1.9204007792930703e-06, "loss": 0.8672, "step": 3648 }, { "epoch": 0.9138492361632857, "grad_norm": 0.380859375, "learning_rate": 1.9148344002226554e-06, "loss": 0.9019, "step": 3649 }, { "epoch": 0.914099674430253, "grad_norm": 0.408203125, "learning_rate": 1.9092680211522406e-06, "loss": 0.8305, "step": 3650 }, { "epoch": 0.9143501126972201, "grad_norm": 0.3828125, "learning_rate": 1.903701642081826e-06, "loss": 0.9058, "step": 3651 }, { "epoch": 0.9146005509641874, "grad_norm": 0.40625, "learning_rate": 1.898135263011411e-06, "loss": 0.9217, "step": 3652 }, { "epoch": 0.9148509892311545, "grad_norm": 0.33203125, "learning_rate": 1.8925688839409964e-06, "loss": 0.7402, "step": 3653 }, { "epoch": 0.9151014274981217, "grad_norm": 0.42578125, "learning_rate": 1.8870025048705818e-06, "loss": 0.864, "step": 3654 }, { "epoch": 0.9153518657650889, "grad_norm": 0.384765625, "learning_rate": 1.881436125800167e-06, "loss": 0.9566, "step": 3655 }, { "epoch": 0.9156023040320561, "grad_norm": 0.337890625, "learning_rate": 1.8758697467297524e-06, "loss": 1.0082, "step": 3656 }, { "epoch": 0.9158527422990232, "grad_norm": 0.369140625, "learning_rate": 1.8703033676593376e-06, "loss": 0.8343, "step": 3657 }, { "epoch": 0.9161031805659905, "grad_norm": 0.408203125, "learning_rate": 1.864736988588923e-06, "loss": 0.8164, "step": 3658 }, { "epoch": 0.9163536188329576, "grad_norm": 0.359375, "learning_rate": 1.8591706095185084e-06, "loss": 0.7859, "step": 3659 }, { "epoch": 0.9166040570999249, "grad_norm": 0.42578125, "learning_rate": 1.8536042304480936e-06, "loss": 0.9243, "step": 3660 }, { "epoch": 0.916854495366892, "grad_norm": 0.396484375, "learning_rate": 1.848037851377679e-06, "loss": 0.8646, "step": 3661 }, { "epoch": 0.9171049336338593, "grad_norm": 0.3515625, "learning_rate": 1.8424714723072642e-06, "loss": 0.8976, "step": 3662 }, { "epoch": 0.9173553719008265, "grad_norm": 0.3359375, "learning_rate": 1.8369050932368496e-06, "loss": 0.9785, "step": 3663 }, { "epoch": 0.9176058101677936, "grad_norm": 0.353515625, "learning_rate": 1.831338714166435e-06, "loss": 0.9853, "step": 3664 }, { "epoch": 0.9178562484347609, "grad_norm": 0.33984375, "learning_rate": 1.8257723350960202e-06, "loss": 0.9454, "step": 3665 }, { "epoch": 0.918106686701728, "grad_norm": 0.40234375, "learning_rate": 1.8202059560256056e-06, "loss": 1.0007, "step": 3666 }, { "epoch": 0.9183571249686953, "grad_norm": 0.30859375, "learning_rate": 1.8146395769551907e-06, "loss": 0.7998, "step": 3667 }, { "epoch": 0.9186075632356624, "grad_norm": 0.3515625, "learning_rate": 1.8090731978847761e-06, "loss": 0.948, "step": 3668 }, { "epoch": 0.9188580015026296, "grad_norm": 0.369140625, "learning_rate": 1.8035068188143615e-06, "loss": 0.8744, "step": 3669 }, { "epoch": 0.9191084397695968, "grad_norm": 0.396484375, "learning_rate": 1.7979404397439467e-06, "loss": 1.0838, "step": 3670 }, { "epoch": 0.919358878036564, "grad_norm": 0.376953125, "learning_rate": 1.7923740606735321e-06, "loss": 1.0237, "step": 3671 }, { "epoch": 0.9196093163035312, "grad_norm": 0.404296875, "learning_rate": 1.7868076816031173e-06, "loss": 0.8887, "step": 3672 }, { "epoch": 0.9198597545704984, "grad_norm": 0.41796875, "learning_rate": 1.7812413025327027e-06, "loss": 0.8965, "step": 3673 }, { "epoch": 0.9201101928374655, "grad_norm": 0.36328125, "learning_rate": 1.7756749234622881e-06, "loss": 0.9755, "step": 3674 }, { "epoch": 0.9203606311044328, "grad_norm": 0.39453125, "learning_rate": 1.7701085443918733e-06, "loss": 1.0064, "step": 3675 }, { "epoch": 0.9206110693713999, "grad_norm": 0.392578125, "learning_rate": 1.7645421653214587e-06, "loss": 0.8004, "step": 3676 }, { "epoch": 0.9208615076383672, "grad_norm": 0.37109375, "learning_rate": 1.7589757862510437e-06, "loss": 0.8435, "step": 3677 }, { "epoch": 0.9211119459053343, "grad_norm": 0.416015625, "learning_rate": 1.753409407180629e-06, "loss": 0.9899, "step": 3678 }, { "epoch": 0.9213623841723015, "grad_norm": 0.3828125, "learning_rate": 1.7478430281102143e-06, "loss": 0.8572, "step": 3679 }, { "epoch": 0.9216128224392687, "grad_norm": 0.375, "learning_rate": 1.7422766490397997e-06, "loss": 0.8018, "step": 3680 }, { "epoch": 0.9218632607062359, "grad_norm": 0.37109375, "learning_rate": 1.736710269969385e-06, "loss": 0.8903, "step": 3681 }, { "epoch": 0.922113698973203, "grad_norm": 0.357421875, "learning_rate": 1.7311438908989703e-06, "loss": 0.8741, "step": 3682 }, { "epoch": 0.9223641372401703, "grad_norm": 0.359375, "learning_rate": 1.7255775118285557e-06, "loss": 1.0922, "step": 3683 }, { "epoch": 0.9226145755071374, "grad_norm": 0.373046875, "learning_rate": 1.7200111327581408e-06, "loss": 0.9732, "step": 3684 }, { "epoch": 0.9228650137741047, "grad_norm": 0.39453125, "learning_rate": 1.7144447536877262e-06, "loss": 0.8783, "step": 3685 }, { "epoch": 0.9231154520410719, "grad_norm": 0.384765625, "learning_rate": 1.7088783746173116e-06, "loss": 0.9784, "step": 3686 }, { "epoch": 0.9233658903080391, "grad_norm": 0.369140625, "learning_rate": 1.7033119955468968e-06, "loss": 0.8954, "step": 3687 }, { "epoch": 0.9236163285750063, "grad_norm": 0.369140625, "learning_rate": 1.6977456164764822e-06, "loss": 0.884, "step": 3688 }, { "epoch": 0.9238667668419734, "grad_norm": 0.380859375, "learning_rate": 1.6921792374060674e-06, "loss": 1.012, "step": 3689 }, { "epoch": 0.9241172051089407, "grad_norm": 0.33203125, "learning_rate": 1.6866128583356528e-06, "loss": 0.9465, "step": 3690 }, { "epoch": 0.9243676433759078, "grad_norm": 0.375, "learning_rate": 1.6810464792652382e-06, "loss": 0.8985, "step": 3691 }, { "epoch": 0.9246180816428751, "grad_norm": 0.408203125, "learning_rate": 1.6754801001948234e-06, "loss": 0.9499, "step": 3692 }, { "epoch": 0.9248685199098422, "grad_norm": 0.42578125, "learning_rate": 1.6699137211244088e-06, "loss": 0.904, "step": 3693 }, { "epoch": 0.9251189581768094, "grad_norm": 0.369140625, "learning_rate": 1.664347342053994e-06, "loss": 1.0759, "step": 3694 }, { "epoch": 0.9253693964437766, "grad_norm": 0.37109375, "learning_rate": 1.6587809629835794e-06, "loss": 1.1201, "step": 3695 }, { "epoch": 0.9256198347107438, "grad_norm": 0.34375, "learning_rate": 1.6532145839131648e-06, "loss": 0.8785, "step": 3696 }, { "epoch": 0.925870272977711, "grad_norm": 0.39453125, "learning_rate": 1.64764820484275e-06, "loss": 0.9744, "step": 3697 }, { "epoch": 0.9261207112446782, "grad_norm": 0.41015625, "learning_rate": 1.6420818257723354e-06, "loss": 0.8447, "step": 3698 }, { "epoch": 0.9263711495116453, "grad_norm": 0.37109375, "learning_rate": 1.6365154467019206e-06, "loss": 0.8799, "step": 3699 }, { "epoch": 0.9266215877786126, "grad_norm": 0.427734375, "learning_rate": 1.630949067631506e-06, "loss": 0.8566, "step": 3700 }, { "epoch": 0.9268720260455797, "grad_norm": 0.369140625, "learning_rate": 1.6253826885610914e-06, "loss": 0.8768, "step": 3701 }, { "epoch": 0.927122464312547, "grad_norm": 0.4375, "learning_rate": 1.6198163094906766e-06, "loss": 0.9013, "step": 3702 }, { "epoch": 0.9273729025795141, "grad_norm": 0.349609375, "learning_rate": 1.6142499304202615e-06, "loss": 0.9493, "step": 3703 }, { "epoch": 0.9276233408464813, "grad_norm": 0.361328125, "learning_rate": 1.608683551349847e-06, "loss": 0.9572, "step": 3704 }, { "epoch": 0.9278737791134485, "grad_norm": 0.3125, "learning_rate": 1.6031171722794323e-06, "loss": 0.7931, "step": 3705 }, { "epoch": 0.9281242173804157, "grad_norm": 0.357421875, "learning_rate": 1.5975507932090175e-06, "loss": 1.0408, "step": 3706 }, { "epoch": 0.928374655647383, "grad_norm": 0.384765625, "learning_rate": 1.591984414138603e-06, "loss": 0.9237, "step": 3707 }, { "epoch": 0.9286250939143501, "grad_norm": 0.345703125, "learning_rate": 1.5864180350681881e-06, "loss": 0.8956, "step": 3708 }, { "epoch": 0.9288755321813174, "grad_norm": 0.326171875, "learning_rate": 1.5808516559977735e-06, "loss": 0.9046, "step": 3709 }, { "epoch": 0.9291259704482845, "grad_norm": 0.376953125, "learning_rate": 1.575285276927359e-06, "loss": 0.8506, "step": 3710 }, { "epoch": 0.9293764087152517, "grad_norm": 0.3828125, "learning_rate": 1.569718897856944e-06, "loss": 0.8768, "step": 3711 }, { "epoch": 0.9296268469822189, "grad_norm": 0.43359375, "learning_rate": 1.5641525187865295e-06, "loss": 1.0355, "step": 3712 }, { "epoch": 0.9298772852491861, "grad_norm": 0.369140625, "learning_rate": 1.5585861397161147e-06, "loss": 1.0065, "step": 3713 }, { "epoch": 0.9301277235161532, "grad_norm": 0.375, "learning_rate": 1.5530197606457e-06, "loss": 0.9213, "step": 3714 }, { "epoch": 0.9303781617831205, "grad_norm": 0.35546875, "learning_rate": 1.5474533815752855e-06, "loss": 0.9203, "step": 3715 }, { "epoch": 0.9306286000500876, "grad_norm": 0.388671875, "learning_rate": 1.5418870025048707e-06, "loss": 0.8879, "step": 3716 }, { "epoch": 0.9308790383170549, "grad_norm": 0.4375, "learning_rate": 1.536320623434456e-06, "loss": 1.0322, "step": 3717 }, { "epoch": 0.931129476584022, "grad_norm": 0.423828125, "learning_rate": 1.5307542443640413e-06, "loss": 0.9138, "step": 3718 }, { "epoch": 0.9313799148509893, "grad_norm": 0.388671875, "learning_rate": 1.5251878652936267e-06, "loss": 1.0181, "step": 3719 }, { "epoch": 0.9316303531179564, "grad_norm": 0.40625, "learning_rate": 1.519621486223212e-06, "loss": 0.9042, "step": 3720 }, { "epoch": 0.9318807913849236, "grad_norm": 0.35546875, "learning_rate": 1.5140551071527973e-06, "loss": 1.0593, "step": 3721 }, { "epoch": 0.9321312296518908, "grad_norm": 0.34765625, "learning_rate": 1.5084887280823827e-06, "loss": 0.9074, "step": 3722 }, { "epoch": 0.932381667918858, "grad_norm": 0.412109375, "learning_rate": 1.5029223490119678e-06, "loss": 0.7959, "step": 3723 }, { "epoch": 0.9326321061858251, "grad_norm": 0.38671875, "learning_rate": 1.4973559699415532e-06, "loss": 0.9679, "step": 3724 }, { "epoch": 0.9328825444527924, "grad_norm": 0.380859375, "learning_rate": 1.4917895908711386e-06, "loss": 1.0167, "step": 3725 }, { "epoch": 0.9331329827197595, "grad_norm": 0.3515625, "learning_rate": 1.4862232118007238e-06, "loss": 0.8775, "step": 3726 }, { "epoch": 0.9333834209867268, "grad_norm": 0.3984375, "learning_rate": 1.4806568327303092e-06, "loss": 1.0105, "step": 3727 }, { "epoch": 0.9336338592536939, "grad_norm": 0.35546875, "learning_rate": 1.4750904536598942e-06, "loss": 1.1133, "step": 3728 }, { "epoch": 0.9338842975206612, "grad_norm": 0.36328125, "learning_rate": 1.4695240745894796e-06, "loss": 0.8114, "step": 3729 }, { "epoch": 0.9341347357876284, "grad_norm": 0.412109375, "learning_rate": 1.4639576955190648e-06, "loss": 1.0739, "step": 3730 }, { "epoch": 0.9343851740545955, "grad_norm": 0.390625, "learning_rate": 1.4583913164486502e-06, "loss": 1.1119, "step": 3731 }, { "epoch": 0.9346356123215628, "grad_norm": 0.419921875, "learning_rate": 1.4528249373782354e-06, "loss": 0.944, "step": 3732 }, { "epoch": 0.9348860505885299, "grad_norm": 0.388671875, "learning_rate": 1.4472585583078208e-06, "loss": 1.0902, "step": 3733 }, { "epoch": 0.9351364888554972, "grad_norm": 0.392578125, "learning_rate": 1.4416921792374062e-06, "loss": 1.2142, "step": 3734 }, { "epoch": 0.9353869271224643, "grad_norm": 0.369140625, "learning_rate": 1.4361258001669914e-06, "loss": 0.937, "step": 3735 }, { "epoch": 0.9356373653894315, "grad_norm": 0.375, "learning_rate": 1.4305594210965768e-06, "loss": 1.1325, "step": 3736 }, { "epoch": 0.9358878036563987, "grad_norm": 0.37109375, "learning_rate": 1.424993042026162e-06, "loss": 0.8875, "step": 3737 }, { "epoch": 0.9361382419233659, "grad_norm": 0.341796875, "learning_rate": 1.4194266629557474e-06, "loss": 1.1563, "step": 3738 }, { "epoch": 0.9363886801903331, "grad_norm": 0.37890625, "learning_rate": 1.4138602838853328e-06, "loss": 0.8259, "step": 3739 }, { "epoch": 0.9366391184573003, "grad_norm": 0.3359375, "learning_rate": 1.408293904814918e-06, "loss": 0.8373, "step": 3740 }, { "epoch": 0.9368895567242674, "grad_norm": 0.380859375, "learning_rate": 1.4027275257445034e-06, "loss": 0.9078, "step": 3741 }, { "epoch": 0.9371399949912347, "grad_norm": 0.34375, "learning_rate": 1.3971611466740885e-06, "loss": 0.9693, "step": 3742 }, { "epoch": 0.9373904332582018, "grad_norm": 0.357421875, "learning_rate": 1.391594767603674e-06, "loss": 0.7672, "step": 3743 }, { "epoch": 0.9376408715251691, "grad_norm": 0.376953125, "learning_rate": 1.3860283885332593e-06, "loss": 0.7589, "step": 3744 }, { "epoch": 0.9378913097921362, "grad_norm": 0.373046875, "learning_rate": 1.3804620094628445e-06, "loss": 0.9723, "step": 3745 }, { "epoch": 0.9381417480591034, "grad_norm": 0.376953125, "learning_rate": 1.37489563039243e-06, "loss": 0.9996, "step": 3746 }, { "epoch": 0.9383921863260706, "grad_norm": 0.369140625, "learning_rate": 1.3693292513220151e-06, "loss": 0.8595, "step": 3747 }, { "epoch": 0.9386426245930378, "grad_norm": 0.3671875, "learning_rate": 1.3637628722516005e-06, "loss": 0.9995, "step": 3748 }, { "epoch": 0.938893062860005, "grad_norm": 0.40625, "learning_rate": 1.358196493181186e-06, "loss": 0.8604, "step": 3749 }, { "epoch": 0.9391435011269722, "grad_norm": 0.39453125, "learning_rate": 1.352630114110771e-06, "loss": 1.0763, "step": 3750 }, { "epoch": 0.9393939393939394, "grad_norm": 0.421875, "learning_rate": 1.3470637350403565e-06, "loss": 1.0165, "step": 3751 }, { "epoch": 0.9396443776609066, "grad_norm": 0.390625, "learning_rate": 1.3414973559699417e-06, "loss": 1.0443, "step": 3752 }, { "epoch": 0.9398948159278738, "grad_norm": 0.361328125, "learning_rate": 1.3359309768995269e-06, "loss": 0.9704, "step": 3753 }, { "epoch": 0.940145254194841, "grad_norm": 0.36328125, "learning_rate": 1.330364597829112e-06, "loss": 0.9802, "step": 3754 }, { "epoch": 0.9403956924618082, "grad_norm": 0.37109375, "learning_rate": 1.3247982187586975e-06, "loss": 0.8053, "step": 3755 }, { "epoch": 0.9406461307287753, "grad_norm": 0.36328125, "learning_rate": 1.3192318396882829e-06, "loss": 0.962, "step": 3756 }, { "epoch": 0.9408965689957426, "grad_norm": 0.388671875, "learning_rate": 1.313665460617868e-06, "loss": 1.0883, "step": 3757 }, { "epoch": 0.9411470072627097, "grad_norm": 0.341796875, "learning_rate": 1.3080990815474535e-06, "loss": 0.8231, "step": 3758 }, { "epoch": 0.941397445529677, "grad_norm": 0.376953125, "learning_rate": 1.3025327024770386e-06, "loss": 0.8932, "step": 3759 }, { "epoch": 0.9416478837966441, "grad_norm": 0.40625, "learning_rate": 1.296966323406624e-06, "loss": 0.8858, "step": 3760 }, { "epoch": 0.9418983220636113, "grad_norm": 0.396484375, "learning_rate": 1.2913999443362094e-06, "loss": 1.0393, "step": 3761 }, { "epoch": 0.9421487603305785, "grad_norm": 0.37890625, "learning_rate": 1.2858335652657946e-06, "loss": 0.8837, "step": 3762 }, { "epoch": 0.9423991985975457, "grad_norm": 0.3828125, "learning_rate": 1.28026718619538e-06, "loss": 0.7632, "step": 3763 }, { "epoch": 0.9426496368645129, "grad_norm": 0.3828125, "learning_rate": 1.2747008071249652e-06, "loss": 0.9344, "step": 3764 }, { "epoch": 0.9429000751314801, "grad_norm": 0.3984375, "learning_rate": 1.2691344280545506e-06, "loss": 0.922, "step": 3765 }, { "epoch": 0.9431505133984472, "grad_norm": 0.37890625, "learning_rate": 1.263568048984136e-06, "loss": 0.9543, "step": 3766 }, { "epoch": 0.9434009516654145, "grad_norm": 0.375, "learning_rate": 1.2580016699137212e-06, "loss": 0.991, "step": 3767 }, { "epoch": 0.9436513899323816, "grad_norm": 0.361328125, "learning_rate": 1.2524352908433066e-06, "loss": 1.0147, "step": 3768 }, { "epoch": 0.9439018281993489, "grad_norm": 0.384765625, "learning_rate": 1.2468689117728918e-06, "loss": 0.9477, "step": 3769 }, { "epoch": 0.944152266466316, "grad_norm": 0.375, "learning_rate": 1.2413025327024772e-06, "loss": 0.9385, "step": 3770 }, { "epoch": 0.9444027047332832, "grad_norm": 0.388671875, "learning_rate": 1.2357361536320626e-06, "loss": 0.9472, "step": 3771 }, { "epoch": 0.9446531430002504, "grad_norm": 0.35546875, "learning_rate": 1.2301697745616478e-06, "loss": 0.9922, "step": 3772 }, { "epoch": 0.9449035812672176, "grad_norm": 0.37109375, "learning_rate": 1.224603395491233e-06, "loss": 0.8912, "step": 3773 }, { "epoch": 0.9451540195341849, "grad_norm": 0.337890625, "learning_rate": 1.2190370164208184e-06, "loss": 0.9183, "step": 3774 }, { "epoch": 0.945404457801152, "grad_norm": 0.423828125, "learning_rate": 1.2134706373504036e-06, "loss": 1.0134, "step": 3775 }, { "epoch": 0.9456548960681193, "grad_norm": 0.3671875, "learning_rate": 1.207904258279989e-06, "loss": 0.9786, "step": 3776 }, { "epoch": 0.9459053343350864, "grad_norm": 0.37109375, "learning_rate": 1.2023378792095744e-06, "loss": 0.8638, "step": 3777 }, { "epoch": 0.9461557726020536, "grad_norm": 0.353515625, "learning_rate": 1.1967715001391595e-06, "loss": 1.0288, "step": 3778 }, { "epoch": 0.9464062108690208, "grad_norm": 0.359375, "learning_rate": 1.191205121068745e-06, "loss": 0.8836, "step": 3779 }, { "epoch": 0.946656649135988, "grad_norm": 0.361328125, "learning_rate": 1.1856387419983301e-06, "loss": 0.9562, "step": 3780 }, { "epoch": 0.9469070874029551, "grad_norm": 0.37890625, "learning_rate": 1.1800723629279155e-06, "loss": 0.9671, "step": 3781 }, { "epoch": 0.9471575256699224, "grad_norm": 0.365234375, "learning_rate": 1.174505983857501e-06, "loss": 0.7919, "step": 3782 }, { "epoch": 0.9474079639368895, "grad_norm": 0.345703125, "learning_rate": 1.1689396047870861e-06, "loss": 1.1186, "step": 3783 }, { "epoch": 0.9476584022038568, "grad_norm": 0.388671875, "learning_rate": 1.1633732257166715e-06, "loss": 0.8877, "step": 3784 }, { "epoch": 0.9479088404708239, "grad_norm": 0.37890625, "learning_rate": 1.1578068466462567e-06, "loss": 0.9712, "step": 3785 }, { "epoch": 0.9481592787377912, "grad_norm": 0.3671875, "learning_rate": 1.152240467575842e-06, "loss": 0.8041, "step": 3786 }, { "epoch": 0.9484097170047583, "grad_norm": 0.412109375, "learning_rate": 1.1466740885054273e-06, "loss": 0.7579, "step": 3787 }, { "epoch": 0.9486601552717255, "grad_norm": 0.416015625, "learning_rate": 1.1411077094350125e-06, "loss": 0.9497, "step": 3788 }, { "epoch": 0.9489105935386927, "grad_norm": 0.3359375, "learning_rate": 1.1355413303645979e-06, "loss": 0.9484, "step": 3789 }, { "epoch": 0.9491610318056599, "grad_norm": 0.3828125, "learning_rate": 1.1299749512941833e-06, "loss": 0.8484, "step": 3790 }, { "epoch": 0.949411470072627, "grad_norm": 0.427734375, "learning_rate": 1.1244085722237685e-06, "loss": 0.9192, "step": 3791 }, { "epoch": 0.9496619083395943, "grad_norm": 0.361328125, "learning_rate": 1.1188421931533539e-06, "loss": 0.9746, "step": 3792 }, { "epoch": 0.9499123466065614, "grad_norm": 0.345703125, "learning_rate": 1.113275814082939e-06, "loss": 0.9635, "step": 3793 }, { "epoch": 0.9501627848735287, "grad_norm": 0.408203125, "learning_rate": 1.1077094350125245e-06, "loss": 1.189, "step": 3794 }, { "epoch": 0.9504132231404959, "grad_norm": 0.359375, "learning_rate": 1.1021430559421099e-06, "loss": 0.8843, "step": 3795 }, { "epoch": 0.9506636614074631, "grad_norm": 0.365234375, "learning_rate": 1.096576676871695e-06, "loss": 1.0447, "step": 3796 }, { "epoch": 0.9509140996744303, "grad_norm": 0.34375, "learning_rate": 1.0910102978012802e-06, "loss": 0.8367, "step": 3797 }, { "epoch": 0.9511645379413974, "grad_norm": 0.361328125, "learning_rate": 1.0854439187308656e-06, "loss": 1.0959, "step": 3798 }, { "epoch": 0.9514149762083647, "grad_norm": 0.365234375, "learning_rate": 1.0798775396604508e-06, "loss": 0.9607, "step": 3799 }, { "epoch": 0.9516654144753318, "grad_norm": 0.3984375, "learning_rate": 1.0743111605900362e-06, "loss": 1.008, "step": 3800 }, { "epoch": 0.9519158527422991, "grad_norm": 0.3359375, "learning_rate": 1.0687447815196216e-06, "loss": 0.986, "step": 3801 }, { "epoch": 0.9521662910092662, "grad_norm": 0.34375, "learning_rate": 1.0631784024492068e-06, "loss": 0.9313, "step": 3802 }, { "epoch": 0.9524167292762334, "grad_norm": 0.359375, "learning_rate": 1.0576120233787922e-06, "loss": 0.8964, "step": 3803 }, { "epoch": 0.9526671675432006, "grad_norm": 0.37890625, "learning_rate": 1.0520456443083774e-06, "loss": 0.9294, "step": 3804 }, { "epoch": 0.9529176058101678, "grad_norm": 0.39453125, "learning_rate": 1.0464792652379628e-06, "loss": 0.8678, "step": 3805 }, { "epoch": 0.953168044077135, "grad_norm": 0.376953125, "learning_rate": 1.0409128861675482e-06, "loss": 0.97, "step": 3806 }, { "epoch": 0.9534184823441022, "grad_norm": 0.38671875, "learning_rate": 1.0353465070971334e-06, "loss": 0.8796, "step": 3807 }, { "epoch": 0.9536689206110693, "grad_norm": 0.3515625, "learning_rate": 1.0297801280267188e-06, "loss": 0.8157, "step": 3808 }, { "epoch": 0.9539193588780366, "grad_norm": 0.3515625, "learning_rate": 1.024213748956304e-06, "loss": 0.889, "step": 3809 }, { "epoch": 0.9541697971450037, "grad_norm": 0.404296875, "learning_rate": 1.0186473698858892e-06, "loss": 0.8641, "step": 3810 }, { "epoch": 0.954420235411971, "grad_norm": 0.3828125, "learning_rate": 1.0130809908154746e-06, "loss": 0.981, "step": 3811 }, { "epoch": 0.9546706736789381, "grad_norm": 0.345703125, "learning_rate": 1.00751461174506e-06, "loss": 0.8779, "step": 3812 }, { "epoch": 0.9549211119459053, "grad_norm": 0.375, "learning_rate": 1.0019482326746452e-06, "loss": 0.9569, "step": 3813 }, { "epoch": 0.9551715502128725, "grad_norm": 0.396484375, "learning_rate": 9.963818536042306e-07, "loss": 0.743, "step": 3814 }, { "epoch": 0.9554219884798397, "grad_norm": 0.359375, "learning_rate": 9.908154745338157e-07, "loss": 1.0031, "step": 3815 }, { "epoch": 0.9556724267468069, "grad_norm": 0.373046875, "learning_rate": 9.852490954634011e-07, "loss": 0.7628, "step": 3816 }, { "epoch": 0.9559228650137741, "grad_norm": 0.39453125, "learning_rate": 9.796827163929865e-07, "loss": 0.862, "step": 3817 }, { "epoch": 0.9561733032807413, "grad_norm": 0.376953125, "learning_rate": 9.741163373225717e-07, "loss": 0.8529, "step": 3818 }, { "epoch": 0.9564237415477085, "grad_norm": 0.373046875, "learning_rate": 9.685499582521571e-07, "loss": 0.7541, "step": 3819 }, { "epoch": 0.9566741798146757, "grad_norm": 0.390625, "learning_rate": 9.629835791817423e-07, "loss": 0.9032, "step": 3820 }, { "epoch": 0.9569246180816429, "grad_norm": 0.35546875, "learning_rate": 9.574172001113277e-07, "loss": 0.9612, "step": 3821 }, { "epoch": 0.9571750563486101, "grad_norm": 0.333984375, "learning_rate": 9.51850821040913e-07, "loss": 0.9938, "step": 3822 }, { "epoch": 0.9574254946155772, "grad_norm": 0.3515625, "learning_rate": 9.462844419704982e-07, "loss": 1.0594, "step": 3823 }, { "epoch": 0.9576759328825445, "grad_norm": 0.390625, "learning_rate": 9.407180629000835e-07, "loss": 0.9478, "step": 3824 }, { "epoch": 0.9579263711495116, "grad_norm": 0.384765625, "learning_rate": 9.351516838296688e-07, "loss": 1.0828, "step": 3825 }, { "epoch": 0.9581768094164789, "grad_norm": 0.3203125, "learning_rate": 9.295853047592542e-07, "loss": 0.8888, "step": 3826 }, { "epoch": 0.958427247683446, "grad_norm": 0.439453125, "learning_rate": 9.240189256888395e-07, "loss": 0.8591, "step": 3827 }, { "epoch": 0.9586776859504132, "grad_norm": 0.37890625, "learning_rate": 9.184525466184248e-07, "loss": 0.9891, "step": 3828 }, { "epoch": 0.9589281242173804, "grad_norm": 0.345703125, "learning_rate": 9.128861675480101e-07, "loss": 0.957, "step": 3829 }, { "epoch": 0.9591785624843476, "grad_norm": 0.38671875, "learning_rate": 9.073197884775954e-07, "loss": 0.7686, "step": 3830 }, { "epoch": 0.9594290007513148, "grad_norm": 0.4609375, "learning_rate": 9.017534094071808e-07, "loss": 0.8535, "step": 3831 }, { "epoch": 0.959679439018282, "grad_norm": 0.37890625, "learning_rate": 8.961870303367661e-07, "loss": 0.9299, "step": 3832 }, { "epoch": 0.9599298772852491, "grad_norm": 0.404296875, "learning_rate": 8.906206512663514e-07, "loss": 1.1116, "step": 3833 }, { "epoch": 0.9601803155522164, "grad_norm": 0.35546875, "learning_rate": 8.850542721959367e-07, "loss": 0.8279, "step": 3834 }, { "epoch": 0.9604307538191835, "grad_norm": 0.3828125, "learning_rate": 8.794878931255218e-07, "loss": 0.9352, "step": 3835 }, { "epoch": 0.9606811920861508, "grad_norm": 0.353515625, "learning_rate": 8.739215140551071e-07, "loss": 0.9539, "step": 3836 }, { "epoch": 0.9609316303531179, "grad_norm": 0.396484375, "learning_rate": 8.683551349846925e-07, "loss": 1.0475, "step": 3837 }, { "epoch": 0.9611820686200852, "grad_norm": 0.333984375, "learning_rate": 8.627887559142778e-07, "loss": 0.9032, "step": 3838 }, { "epoch": 0.9614325068870524, "grad_norm": 0.37109375, "learning_rate": 8.572223768438631e-07, "loss": 0.7756, "step": 3839 }, { "epoch": 0.9616829451540195, "grad_norm": 0.392578125, "learning_rate": 8.516559977734484e-07, "loss": 0.9347, "step": 3840 }, { "epoch": 0.9619333834209868, "grad_norm": 0.369140625, "learning_rate": 8.460896187030337e-07, "loss": 0.9832, "step": 3841 }, { "epoch": 0.9621838216879539, "grad_norm": 0.404296875, "learning_rate": 8.405232396326191e-07, "loss": 0.9952, "step": 3842 }, { "epoch": 0.9624342599549212, "grad_norm": 0.34375, "learning_rate": 8.349568605622044e-07, "loss": 1.1114, "step": 3843 }, { "epoch": 0.9626846982218883, "grad_norm": 0.40625, "learning_rate": 8.293904814917897e-07, "loss": 0.869, "step": 3844 }, { "epoch": 0.9629351364888555, "grad_norm": 0.369140625, "learning_rate": 8.23824102421375e-07, "loss": 0.9478, "step": 3845 }, { "epoch": 0.9631855747558227, "grad_norm": 0.341796875, "learning_rate": 8.182577233509603e-07, "loss": 1.0513, "step": 3846 }, { "epoch": 0.9634360130227899, "grad_norm": 0.40234375, "learning_rate": 8.126913442805457e-07, "loss": 0.8885, "step": 3847 }, { "epoch": 0.963686451289757, "grad_norm": 0.390625, "learning_rate": 8.071249652101308e-07, "loss": 0.9092, "step": 3848 }, { "epoch": 0.9639368895567243, "grad_norm": 0.365234375, "learning_rate": 8.015585861397162e-07, "loss": 1.0106, "step": 3849 }, { "epoch": 0.9641873278236914, "grad_norm": 0.388671875, "learning_rate": 7.959922070693015e-07, "loss": 0.9817, "step": 3850 }, { "epoch": 0.9644377660906587, "grad_norm": 0.466796875, "learning_rate": 7.904258279988868e-07, "loss": 0.8926, "step": 3851 }, { "epoch": 0.9646882043576258, "grad_norm": 0.373046875, "learning_rate": 7.84859448928472e-07, "loss": 1.0785, "step": 3852 }, { "epoch": 0.9649386426245931, "grad_norm": 0.376953125, "learning_rate": 7.792930698580573e-07, "loss": 0.8068, "step": 3853 }, { "epoch": 0.9651890808915602, "grad_norm": 0.373046875, "learning_rate": 7.737266907876427e-07, "loss": 1.089, "step": 3854 }, { "epoch": 0.9654395191585274, "grad_norm": 0.404296875, "learning_rate": 7.68160311717228e-07, "loss": 0.9047, "step": 3855 }, { "epoch": 0.9656899574254946, "grad_norm": 0.337890625, "learning_rate": 7.625939326468133e-07, "loss": 0.7164, "step": 3856 }, { "epoch": 0.9659403956924618, "grad_norm": 0.359375, "learning_rate": 7.570275535763986e-07, "loss": 1.0832, "step": 3857 }, { "epoch": 0.966190833959429, "grad_norm": 0.447265625, "learning_rate": 7.514611745059839e-07, "loss": 0.7072, "step": 3858 }, { "epoch": 0.9664412722263962, "grad_norm": 0.37890625, "learning_rate": 7.458947954355693e-07, "loss": 1.0318, "step": 3859 }, { "epoch": 0.9666917104933633, "grad_norm": 0.369140625, "learning_rate": 7.403284163651546e-07, "loss": 0.9135, "step": 3860 }, { "epoch": 0.9669421487603306, "grad_norm": 0.40625, "learning_rate": 7.347620372947398e-07, "loss": 0.9162, "step": 3861 }, { "epoch": 0.9671925870272978, "grad_norm": 0.3984375, "learning_rate": 7.291956582243251e-07, "loss": 0.8843, "step": 3862 }, { "epoch": 0.967443025294265, "grad_norm": 0.3515625, "learning_rate": 7.236292791539104e-07, "loss": 0.9276, "step": 3863 }, { "epoch": 0.9676934635612322, "grad_norm": 0.35546875, "learning_rate": 7.180629000834957e-07, "loss": 0.9225, "step": 3864 }, { "epoch": 0.9679439018281993, "grad_norm": 0.3515625, "learning_rate": 7.12496521013081e-07, "loss": 0.9792, "step": 3865 }, { "epoch": 0.9681943400951666, "grad_norm": 0.376953125, "learning_rate": 7.069301419426664e-07, "loss": 0.9246, "step": 3866 }, { "epoch": 0.9684447783621337, "grad_norm": 0.373046875, "learning_rate": 7.013637628722517e-07, "loss": 0.937, "step": 3867 }, { "epoch": 0.968695216629101, "grad_norm": 0.373046875, "learning_rate": 6.95797383801837e-07, "loss": 0.7529, "step": 3868 }, { "epoch": 0.9689456548960681, "grad_norm": 0.333984375, "learning_rate": 6.902310047314223e-07, "loss": 0.8027, "step": 3869 }, { "epoch": 0.9691960931630353, "grad_norm": 0.40234375, "learning_rate": 6.846646256610076e-07, "loss": 0.8321, "step": 3870 }, { "epoch": 0.9694465314300025, "grad_norm": 0.345703125, "learning_rate": 6.79098246590593e-07, "loss": 0.9732, "step": 3871 }, { "epoch": 0.9696969696969697, "grad_norm": 0.365234375, "learning_rate": 6.735318675201783e-07, "loss": 1.0145, "step": 3872 }, { "epoch": 0.9699474079639369, "grad_norm": 0.373046875, "learning_rate": 6.679654884497634e-07, "loss": 0.7817, "step": 3873 }, { "epoch": 0.9701978462309041, "grad_norm": 0.388671875, "learning_rate": 6.623991093793487e-07, "loss": 0.9759, "step": 3874 }, { "epoch": 0.9704482844978712, "grad_norm": 0.384765625, "learning_rate": 6.56832730308934e-07, "loss": 0.8554, "step": 3875 }, { "epoch": 0.9706987227648385, "grad_norm": 0.390625, "learning_rate": 6.512663512385193e-07, "loss": 1.0591, "step": 3876 }, { "epoch": 0.9709491610318056, "grad_norm": 0.361328125, "learning_rate": 6.456999721681047e-07, "loss": 0.7801, "step": 3877 }, { "epoch": 0.9711995992987729, "grad_norm": 0.322265625, "learning_rate": 6.4013359309769e-07, "loss": 0.9417, "step": 3878 }, { "epoch": 0.97145003756574, "grad_norm": 0.357421875, "learning_rate": 6.345672140272753e-07, "loss": 0.942, "step": 3879 }, { "epoch": 0.9717004758327072, "grad_norm": 0.38671875, "learning_rate": 6.290008349568606e-07, "loss": 0.7535, "step": 3880 }, { "epoch": 0.9719509140996744, "grad_norm": 0.3359375, "learning_rate": 6.234344558864459e-07, "loss": 0.8683, "step": 3881 }, { "epoch": 0.9722013523666416, "grad_norm": 0.45703125, "learning_rate": 6.178680768160313e-07, "loss": 0.8554, "step": 3882 }, { "epoch": 0.9724517906336089, "grad_norm": 0.349609375, "learning_rate": 6.123016977456165e-07, "loss": 0.8301, "step": 3883 }, { "epoch": 0.972702228900576, "grad_norm": 0.4140625, "learning_rate": 6.067353186752018e-07, "loss": 0.9646, "step": 3884 }, { "epoch": 0.9729526671675433, "grad_norm": 0.38671875, "learning_rate": 6.011689396047872e-07, "loss": 0.9513, "step": 3885 }, { "epoch": 0.9732031054345104, "grad_norm": 0.40625, "learning_rate": 5.956025605343725e-07, "loss": 1.0072, "step": 3886 }, { "epoch": 0.9734535437014776, "grad_norm": 0.392578125, "learning_rate": 5.900361814639578e-07, "loss": 0.9826, "step": 3887 }, { "epoch": 0.9737039819684448, "grad_norm": 0.3671875, "learning_rate": 5.844698023935431e-07, "loss": 1.1155, "step": 3888 }, { "epoch": 0.973954420235412, "grad_norm": 0.375, "learning_rate": 5.789034233231284e-07, "loss": 0.7846, "step": 3889 }, { "epoch": 0.9742048585023791, "grad_norm": 0.390625, "learning_rate": 5.733370442527137e-07, "loss": 0.8132, "step": 3890 }, { "epoch": 0.9744552967693464, "grad_norm": 0.3515625, "learning_rate": 5.677706651822989e-07, "loss": 0.9378, "step": 3891 }, { "epoch": 0.9747057350363135, "grad_norm": 0.392578125, "learning_rate": 5.622042861118842e-07, "loss": 0.8492, "step": 3892 }, { "epoch": 0.9749561733032808, "grad_norm": 0.3671875, "learning_rate": 5.566379070414695e-07, "loss": 0.8572, "step": 3893 }, { "epoch": 0.9752066115702479, "grad_norm": 0.36328125, "learning_rate": 5.510715279710549e-07, "loss": 0.7995, "step": 3894 }, { "epoch": 0.9754570498372152, "grad_norm": 0.38671875, "learning_rate": 5.455051489006401e-07, "loss": 0.8423, "step": 3895 }, { "epoch": 0.9757074881041823, "grad_norm": 0.376953125, "learning_rate": 5.399387698302254e-07, "loss": 0.9894, "step": 3896 }, { "epoch": 0.9759579263711495, "grad_norm": 0.375, "learning_rate": 5.343723907598108e-07, "loss": 0.9995, "step": 3897 }, { "epoch": 0.9762083646381167, "grad_norm": 0.37890625, "learning_rate": 5.288060116893961e-07, "loss": 0.9285, "step": 3898 }, { "epoch": 0.9764588029050839, "grad_norm": 0.373046875, "learning_rate": 5.232396326189814e-07, "loss": 0.9719, "step": 3899 }, { "epoch": 0.976709241172051, "grad_norm": 0.412109375, "learning_rate": 5.176732535485667e-07, "loss": 1.0798, "step": 3900 }, { "epoch": 0.9769596794390183, "grad_norm": 0.400390625, "learning_rate": 5.12106874478152e-07, "loss": 1.0798, "step": 3901 }, { "epoch": 0.9772101177059854, "grad_norm": 0.41015625, "learning_rate": 5.065404954077373e-07, "loss": 1.0867, "step": 3902 }, { "epoch": 0.9774605559729527, "grad_norm": 0.38671875, "learning_rate": 5.009741163373226e-07, "loss": 1.0114, "step": 3903 }, { "epoch": 0.9777109942399198, "grad_norm": 0.357421875, "learning_rate": 4.954077372669079e-07, "loss": 0.8888, "step": 3904 }, { "epoch": 0.977961432506887, "grad_norm": 0.349609375, "learning_rate": 4.898413581964933e-07, "loss": 0.8263, "step": 3905 }, { "epoch": 0.9782118707738543, "grad_norm": 0.40625, "learning_rate": 4.842749791260786e-07, "loss": 1.0027, "step": 3906 }, { "epoch": 0.9784623090408214, "grad_norm": 0.40625, "learning_rate": 4.787086000556639e-07, "loss": 0.9285, "step": 3907 }, { "epoch": 0.9787127473077887, "grad_norm": 0.37109375, "learning_rate": 4.731422209852491e-07, "loss": 0.9092, "step": 3908 }, { "epoch": 0.9789631855747558, "grad_norm": 0.39453125, "learning_rate": 4.675758419148344e-07, "loss": 1.0513, "step": 3909 }, { "epoch": 0.9792136238417231, "grad_norm": 0.37109375, "learning_rate": 4.6200946284441974e-07, "loss": 0.8776, "step": 3910 }, { "epoch": 0.9794640621086902, "grad_norm": 0.369140625, "learning_rate": 4.5644308377400504e-07, "loss": 1.009, "step": 3911 }, { "epoch": 0.9797145003756574, "grad_norm": 0.396484375, "learning_rate": 4.508767047035904e-07, "loss": 0.803, "step": 3912 }, { "epoch": 0.9799649386426246, "grad_norm": 0.353515625, "learning_rate": 4.453103256331757e-07, "loss": 0.867, "step": 3913 }, { "epoch": 0.9802153769095918, "grad_norm": 0.349609375, "learning_rate": 4.397439465627609e-07, "loss": 0.9385, "step": 3914 }, { "epoch": 0.980465815176559, "grad_norm": 0.34765625, "learning_rate": 4.3417756749234627e-07, "loss": 0.8417, "step": 3915 }, { "epoch": 0.9807162534435262, "grad_norm": 0.349609375, "learning_rate": 4.2861118842193156e-07, "loss": 0.8285, "step": 3916 }, { "epoch": 0.9809666917104933, "grad_norm": 0.392578125, "learning_rate": 4.2304480935151686e-07, "loss": 0.8226, "step": 3917 }, { "epoch": 0.9812171299774606, "grad_norm": 0.443359375, "learning_rate": 4.174784302811022e-07, "loss": 0.9553, "step": 3918 }, { "epoch": 0.9814675682444277, "grad_norm": 0.392578125, "learning_rate": 4.119120512106875e-07, "loss": 0.9362, "step": 3919 }, { "epoch": 0.981718006511395, "grad_norm": 0.35546875, "learning_rate": 4.0634567214027284e-07, "loss": 1.0373, "step": 3920 }, { "epoch": 0.9819684447783621, "grad_norm": 0.375, "learning_rate": 4.007792930698581e-07, "loss": 0.865, "step": 3921 }, { "epoch": 0.9822188830453293, "grad_norm": 0.357421875, "learning_rate": 3.952129139994434e-07, "loss": 0.9876, "step": 3922 }, { "epoch": 0.9824693213122965, "grad_norm": 0.380859375, "learning_rate": 3.8964653492902867e-07, "loss": 1.0811, "step": 3923 }, { "epoch": 0.9827197595792637, "grad_norm": 0.341796875, "learning_rate": 3.84080155858614e-07, "loss": 0.9965, "step": 3924 }, { "epoch": 0.9829701978462309, "grad_norm": 0.38671875, "learning_rate": 3.785137767881993e-07, "loss": 0.7412, "step": 3925 }, { "epoch": 0.9832206361131981, "grad_norm": 0.390625, "learning_rate": 3.7294739771778466e-07, "loss": 0.91, "step": 3926 }, { "epoch": 0.9834710743801653, "grad_norm": 0.396484375, "learning_rate": 3.673810186473699e-07, "loss": 1.0123, "step": 3927 }, { "epoch": 0.9837215126471325, "grad_norm": 0.439453125, "learning_rate": 3.618146395769552e-07, "loss": 0.9442, "step": 3928 }, { "epoch": 0.9839719509140997, "grad_norm": 0.3671875, "learning_rate": 3.562482605065405e-07, "loss": 1.0783, "step": 3929 }, { "epoch": 0.9842223891810669, "grad_norm": 0.3828125, "learning_rate": 3.5068188143612584e-07, "loss": 0.9728, "step": 3930 }, { "epoch": 0.9844728274480341, "grad_norm": 0.38671875, "learning_rate": 3.4511550236571113e-07, "loss": 0.8642, "step": 3931 }, { "epoch": 0.9847232657150012, "grad_norm": 0.369140625, "learning_rate": 3.395491232952965e-07, "loss": 1.0642, "step": 3932 }, { "epoch": 0.9849737039819685, "grad_norm": 0.388671875, "learning_rate": 3.339827442248817e-07, "loss": 0.8321, "step": 3933 }, { "epoch": 0.9852241422489356, "grad_norm": 0.3828125, "learning_rate": 3.28416365154467e-07, "loss": 0.9711, "step": 3934 }, { "epoch": 0.9854745805159029, "grad_norm": 0.3359375, "learning_rate": 3.2284998608405236e-07, "loss": 0.805, "step": 3935 }, { "epoch": 0.98572501878287, "grad_norm": 0.39453125, "learning_rate": 3.1728360701363766e-07, "loss": 0.8304, "step": 3936 }, { "epoch": 0.9859754570498372, "grad_norm": 0.37890625, "learning_rate": 3.1171722794322295e-07, "loss": 0.859, "step": 3937 }, { "epoch": 0.9862258953168044, "grad_norm": 0.390625, "learning_rate": 3.0615084887280824e-07, "loss": 0.7727, "step": 3938 }, { "epoch": 0.9864763335837716, "grad_norm": 0.37890625, "learning_rate": 3.005844698023936e-07, "loss": 0.8287, "step": 3939 }, { "epoch": 0.9867267718507388, "grad_norm": 0.365234375, "learning_rate": 2.950180907319789e-07, "loss": 0.8935, "step": 3940 }, { "epoch": 0.986977210117706, "grad_norm": 0.341796875, "learning_rate": 2.894517116615642e-07, "loss": 1.138, "step": 3941 }, { "epoch": 0.9872276483846731, "grad_norm": 0.37109375, "learning_rate": 2.8388533259114947e-07, "loss": 0.9885, "step": 3942 }, { "epoch": 0.9874780866516404, "grad_norm": 0.39453125, "learning_rate": 2.7831895352073477e-07, "loss": 1.0066, "step": 3943 }, { "epoch": 0.9877285249186075, "grad_norm": 0.396484375, "learning_rate": 2.7275257445032006e-07, "loss": 1.0971, "step": 3944 }, { "epoch": 0.9879789631855748, "grad_norm": 0.412109375, "learning_rate": 2.671861953799054e-07, "loss": 1.0118, "step": 3945 }, { "epoch": 0.9882294014525419, "grad_norm": 0.375, "learning_rate": 2.616198163094907e-07, "loss": 1.0484, "step": 3946 }, { "epoch": 0.9884798397195091, "grad_norm": 0.435546875, "learning_rate": 2.56053437239076e-07, "loss": 0.9757, "step": 3947 }, { "epoch": 0.9887302779864763, "grad_norm": 0.375, "learning_rate": 2.504870581686613e-07, "loss": 0.8277, "step": 3948 }, { "epoch": 0.9889807162534435, "grad_norm": 0.373046875, "learning_rate": 2.4492067909824664e-07, "loss": 1.058, "step": 3949 }, { "epoch": 0.9892311545204108, "grad_norm": 0.373046875, "learning_rate": 2.3935430002783193e-07, "loss": 0.881, "step": 3950 }, { "epoch": 0.9894815927873779, "grad_norm": 0.380859375, "learning_rate": 2.337879209574172e-07, "loss": 1.0544, "step": 3951 }, { "epoch": 0.9897320310543452, "grad_norm": 0.3515625, "learning_rate": 2.2822154188700252e-07, "loss": 0.9189, "step": 3952 }, { "epoch": 0.9899824693213123, "grad_norm": 0.392578125, "learning_rate": 2.2265516281658784e-07, "loss": 1.0123, "step": 3953 }, { "epoch": 0.9902329075882795, "grad_norm": 0.37109375, "learning_rate": 2.1708878374617313e-07, "loss": 1.1597, "step": 3954 }, { "epoch": 0.9904833458552467, "grad_norm": 0.3671875, "learning_rate": 2.1152240467575843e-07, "loss": 0.9674, "step": 3955 }, { "epoch": 0.9907337841222139, "grad_norm": 0.3671875, "learning_rate": 2.0595602560534375e-07, "loss": 0.8452, "step": 3956 }, { "epoch": 0.990984222389181, "grad_norm": 0.388671875, "learning_rate": 2.0038964653492904e-07, "loss": 0.9797, "step": 3957 }, { "epoch": 0.9912346606561483, "grad_norm": 0.341796875, "learning_rate": 1.9482326746451434e-07, "loss": 1.0572, "step": 3958 }, { "epoch": 0.9914850989231154, "grad_norm": 0.373046875, "learning_rate": 1.8925688839409966e-07, "loss": 0.826, "step": 3959 }, { "epoch": 0.9917355371900827, "grad_norm": 0.34765625, "learning_rate": 1.8369050932368495e-07, "loss": 0.8908, "step": 3960 }, { "epoch": 0.9919859754570498, "grad_norm": 0.37109375, "learning_rate": 1.7812413025327025e-07, "loss": 0.9604, "step": 3961 }, { "epoch": 0.992236413724017, "grad_norm": 0.35546875, "learning_rate": 1.7255775118285557e-07, "loss": 1.0462, "step": 3962 }, { "epoch": 0.9924868519909842, "grad_norm": 0.337890625, "learning_rate": 1.6699137211244086e-07, "loss": 0.8795, "step": 3963 }, { "epoch": 0.9927372902579514, "grad_norm": 0.330078125, "learning_rate": 1.6142499304202618e-07, "loss": 1.0437, "step": 3964 }, { "epoch": 0.9929877285249186, "grad_norm": 0.412109375, "learning_rate": 1.5585861397161147e-07, "loss": 0.9282, "step": 3965 }, { "epoch": 0.9932381667918858, "grad_norm": 0.384765625, "learning_rate": 1.502922349011968e-07, "loss": 0.9199, "step": 3966 }, { "epoch": 0.993488605058853, "grad_norm": 0.33984375, "learning_rate": 1.447258558307821e-07, "loss": 0.9768, "step": 3967 }, { "epoch": 0.9937390433258202, "grad_norm": 0.3671875, "learning_rate": 1.3915947676036738e-07, "loss": 0.8065, "step": 3968 }, { "epoch": 0.9939894815927873, "grad_norm": 0.3203125, "learning_rate": 1.335930976899527e-07, "loss": 1.0434, "step": 3969 }, { "epoch": 0.9942399198597546, "grad_norm": 0.3828125, "learning_rate": 1.28026718619538e-07, "loss": 0.9573, "step": 3970 }, { "epoch": 0.9944903581267218, "grad_norm": 0.390625, "learning_rate": 1.2246033954912332e-07, "loss": 0.8754, "step": 3971 }, { "epoch": 0.994740796393689, "grad_norm": 0.36328125, "learning_rate": 1.168939604787086e-07, "loss": 0.8823, "step": 3972 }, { "epoch": 0.9949912346606562, "grad_norm": 0.37109375, "learning_rate": 1.1132758140829392e-07, "loss": 0.9248, "step": 3973 }, { "epoch": 0.9952416729276233, "grad_norm": 0.369140625, "learning_rate": 1.0576120233787921e-07, "loss": 0.801, "step": 3974 }, { "epoch": 0.9954921111945906, "grad_norm": 0.388671875, "learning_rate": 1.0019482326746452e-07, "loss": 0.7747, "step": 3975 }, { "epoch": 0.9957425494615577, "grad_norm": 0.361328125, "learning_rate": 9.462844419704983e-08, "loss": 0.7657, "step": 3976 }, { "epoch": 0.995992987728525, "grad_norm": 0.3203125, "learning_rate": 8.906206512663512e-08, "loss": 0.8555, "step": 3977 }, { "epoch": 0.9962434259954921, "grad_norm": 0.361328125, "learning_rate": 8.349568605622043e-08, "loss": 0.9688, "step": 3978 }, { "epoch": 0.9964938642624593, "grad_norm": 0.369140625, "learning_rate": 7.792930698580574e-08, "loss": 1.0132, "step": 3979 }, { "epoch": 0.9967443025294265, "grad_norm": 0.380859375, "learning_rate": 7.236292791539104e-08, "loss": 0.8096, "step": 3980 }, { "epoch": 0.9969947407963937, "grad_norm": 0.3984375, "learning_rate": 6.679654884497635e-08, "loss": 0.9051, "step": 3981 }, { "epoch": 0.9972451790633609, "grad_norm": 0.380859375, "learning_rate": 6.123016977456166e-08, "loss": 0.9161, "step": 3982 }, { "epoch": 0.9974956173303281, "grad_norm": 0.37890625, "learning_rate": 5.566379070414696e-08, "loss": 0.9621, "step": 3983 }, { "epoch": 0.9977460555972952, "grad_norm": 0.416015625, "learning_rate": 5.009741163373226e-08, "loss": 1.0299, "step": 3984 }, { "epoch": 0.9979964938642625, "grad_norm": 0.392578125, "learning_rate": 4.453103256331756e-08, "loss": 0.9688, "step": 3985 }, { "epoch": 0.9982469321312296, "grad_norm": 0.365234375, "learning_rate": 3.896465349290287e-08, "loss": 0.9249, "step": 3986 }, { "epoch": 0.9984973703981969, "grad_norm": 0.375, "learning_rate": 3.3398274422488176e-08, "loss": 0.8967, "step": 3987 }, { "epoch": 0.998747808665164, "grad_norm": 0.353515625, "learning_rate": 2.783189535207348e-08, "loss": 0.8576, "step": 3988 }, { "epoch": 0.9989982469321312, "grad_norm": 0.34375, "learning_rate": 2.226551628165878e-08, "loss": 0.9573, "step": 3989 }, { "epoch": 0.9992486851990984, "grad_norm": 0.37890625, "learning_rate": 1.6699137211244088e-08, "loss": 0.9369, "step": 3990 }, { "epoch": 0.9994991234660656, "grad_norm": 0.3828125, "learning_rate": 1.113275814082939e-08, "loss": 0.7923, "step": 3991 }, { "epoch": 0.9997495617330328, "grad_norm": 0.3984375, "learning_rate": 5.566379070414695e-09, "loss": 1.1062, "step": 3992 }, { "epoch": 1.0, "grad_norm": 0.490234375, "learning_rate": 0.0, "loss": 0.9161, "step": 3993 } ], "logging_steps": 1, "max_steps": 3993, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 6.491335287845683e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }