{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4035087719298245, "eval_steps": 500, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.0506, "step": 1 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 0.9988, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.00015000000000000001, "loss": 0.9783, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 0.9849, "step": 4 }, { "epoch": 0.02, "learning_rate": 0.00025, "loss": 1.0159, "step": 5 }, { "epoch": 0.02, "learning_rate": 0.00030000000000000003, "loss": 0.9847, "step": 6 }, { "epoch": 0.02, "learning_rate": 0.00034999999999999994, "loss": 0.9101, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.0004, "loss": 0.9445, "step": 8 }, { "epoch": 0.03, "learning_rate": 0.00045, "loss": 0.8578, "step": 9 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 0.9356, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.0005499999999999999, "loss": 0.8395, "step": 11 }, { "epoch": 0.04, "learning_rate": 0.0006000000000000001, "loss": 0.9002, "step": 12 }, { "epoch": 0.04, "learning_rate": 0.00065, "loss": 0.8955, "step": 13 }, { "epoch": 0.04, "learning_rate": 0.0006499959204043461, "loss": 0.902, "step": 14 }, { "epoch": 0.05, "learning_rate": 0.0006499836817198032, "loss": 0.8578, "step": 15 }, { "epoch": 0.05, "learning_rate": 0.0006499632842536263, "loss": 0.9005, "step": 16 }, { "epoch": 0.05, "learning_rate": 0.0006499347285178979, "loss": 0.8539, "step": 17 }, { "epoch": 0.06, "learning_rate": 0.0006498980152295153, "loss": 0.8595, "step": 18 }, { "epoch": 0.06, "learning_rate": 0.0006498531453101735, "loss": 0.8845, "step": 19 }, { "epoch": 0.06, "learning_rate": 0.0006498001198863406, "loss": 0.8924, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.000649738940289231, "loss": 0.8365, "step": 21 }, { "epoch": 0.07, "learning_rate": 0.0006496696080547707, "loss": 0.8462, "step": 22 }, { "epoch": 0.07, "learning_rate": 0.0006495921249235596, "loss": 0.8528, "step": 23 }, { "epoch": 0.07, "learning_rate": 0.0006495064928408277, "loss": 0.8159, "step": 24 }, { "epoch": 0.08, "learning_rate": 0.0006494127139563859, "loss": 0.8245, "step": 25 }, { "epoch": 0.08, "learning_rate": 0.000649310790624572, "loss": 0.8081, "step": 26 }, { "epoch": 0.08, "learning_rate": 0.0006492007254041924, "loss": 0.8535, "step": 27 }, { "epoch": 0.09, "learning_rate": 0.0006490825210584566, "loss": 0.8162, "step": 28 }, { "epoch": 0.09, "learning_rate": 0.0006489561805549089, "loss": 0.8456, "step": 29 }, { "epoch": 0.09, "learning_rate": 0.0006488217070653535, "loss": 0.7799, "step": 30 }, { "epoch": 0.1, "learning_rate": 0.0006486791039657748, "loss": 0.8088, "step": 31 }, { "epoch": 0.1, "learning_rate": 0.0006485283748362524, "loss": 0.8683, "step": 32 }, { "epoch": 0.1, "learning_rate": 0.0006483695234608723, "loss": 0.8871, "step": 33 }, { "epoch": 0.11, "learning_rate": 0.0006482025538276304, "loss": 0.7711, "step": 34 }, { "epoch": 0.11, "learning_rate": 0.0006480274701283335, "loss": 0.7621, "step": 35 }, { "epoch": 0.11, "learning_rate": 0.0006478442767584937, "loss": 0.8243, "step": 36 }, { "epoch": 0.12, "learning_rate": 0.0006476529783172177, "loss": 0.8257, "step": 37 }, { "epoch": 0.12, "learning_rate": 0.0006474535796070919, "loss": 0.8141, "step": 38 }, { "epoch": 0.12, "learning_rate": 0.0006472460856340619, "loss": 0.8109, "step": 39 }, { "epoch": 0.12, "learning_rate": 0.000647030501607306, "loss": 0.7873, "step": 40 }, { "epoch": 0.13, "learning_rate": 0.000646806832939105, "loss": 0.7386, "step": 41 }, { "epoch": 0.13, "learning_rate": 0.0006465750852447068, "loss": 0.8636, "step": 42 }, { "epoch": 0.13, "learning_rate": 0.0006463352643421846, "loss": 0.7357, "step": 43 }, { "epoch": 0.14, "learning_rate": 0.0006460873762522906, "loss": 0.8142, "step": 44 }, { "epoch": 0.14, "learning_rate": 0.0006458314271983063, "loss": 0.7275, "step": 45 }, { "epoch": 0.14, "learning_rate": 0.0006455674236058847, "loss": 0.8029, "step": 46 }, { "epoch": 0.15, "learning_rate": 0.00064529537210289, "loss": 0.7901, "step": 47 }, { "epoch": 0.15, "learning_rate": 0.0006450152795192307, "loss": 0.7788, "step": 48 }, { "epoch": 0.15, "learning_rate": 0.0006447271528866881, "loss": 0.7621, "step": 49 }, { "epoch": 0.16, "learning_rate": 0.0006444309994387402, "loss": 0.7537, "step": 50 }, { "epoch": 0.16, "learning_rate": 0.0006441268266103796, "loss": 0.7917, "step": 51 }, { "epoch": 0.16, "learning_rate": 0.0006438146420379274, "loss": 0.8451, "step": 52 }, { "epoch": 0.17, "learning_rate": 0.0006434944535588411, "loss": 0.8369, "step": 53 }, { "epoch": 0.17, "learning_rate": 0.0006431662692115173, "loss": 0.7637, "step": 54 }, { "epoch": 0.17, "learning_rate": 0.0006428300972350914, "loss": 0.8365, "step": 55 }, { "epoch": 0.17, "learning_rate": 0.0006424859460692295, "loss": 0.7633, "step": 56 }, { "epoch": 0.18, "learning_rate": 0.0006421338243539165, "loss": 0.7718, "step": 57 }, { "epoch": 0.18, "learning_rate": 0.0006417737409292403, "loss": 0.7672, "step": 58 }, { "epoch": 0.18, "learning_rate": 0.0006414057048351684, "loss": 0.8107, "step": 59 }, { "epoch": 0.19, "learning_rate": 0.0006410297253113221, "loss": 0.7979, "step": 60 }, { "epoch": 0.19, "learning_rate": 0.0006406458117967443, "loss": 0.7634, "step": 61 }, { "epoch": 0.19, "learning_rate": 0.0006402539739296618, "loss": 0.7504, "step": 62 }, { "epoch": 0.2, "learning_rate": 0.0006398542215472443, "loss": 0.8082, "step": 63 }, { "epoch": 0.2, "learning_rate": 0.0006394465646853571, "loss": 0.8355, "step": 64 }, { "epoch": 0.2, "learning_rate": 0.0006390310135783086, "loss": 0.7458, "step": 65 }, { "epoch": 0.21, "learning_rate": 0.0006386075786585944, "loss": 0.7525, "step": 66 }, { "epoch": 0.21, "learning_rate": 0.0006381762705566343, "loss": 0.7464, "step": 67 }, { "epoch": 0.21, "learning_rate": 0.0006377371001005063, "loss": 0.78, "step": 68 }, { "epoch": 0.22, "learning_rate": 0.0006372900783156745, "loss": 0.7752, "step": 69 }, { "epoch": 0.22, "learning_rate": 0.0006368352164247117, "loss": 0.7299, "step": 70 }, { "epoch": 0.22, "learning_rate": 0.0006363725258470184, "loss": 0.7722, "step": 71 }, { "epoch": 0.22, "learning_rate": 0.0006359020181985365, "loss": 0.8236, "step": 72 }, { "epoch": 0.23, "learning_rate": 0.0006354237052914561, "loss": 0.7589, "step": 73 }, { "epoch": 0.23, "learning_rate": 0.0006349375991339202, "loss": 0.7948, "step": 74 }, { "epoch": 0.23, "learning_rate": 0.0006344437119297233, "loss": 0.7528, "step": 75 }, { "epoch": 0.24, "learning_rate": 0.0006339420560780045, "loss": 0.7842, "step": 76 }, { "epoch": 0.24, "learning_rate": 0.0006334326441729361, "loss": 0.7541, "step": 77 }, { "epoch": 0.24, "learning_rate": 0.000632915489003408, "loss": 0.7425, "step": 78 }, { "epoch": 0.25, "learning_rate": 0.0006323906035527062, "loss": 0.8168, "step": 79 }, { "epoch": 0.25, "learning_rate": 0.0006318580009981871, "loss": 0.8074, "step": 80 }, { "epoch": 0.25, "learning_rate": 0.0006313176947109465, "loss": 0.7679, "step": 81 }, { "epoch": 0.26, "learning_rate": 0.0006307696982554838, "loss": 0.7465, "step": 82 }, { "epoch": 0.26, "learning_rate": 0.0006302140253893622, "loss": 0.7073, "step": 83 }, { "epoch": 0.26, "learning_rate": 0.0006296506900628619, "loss": 0.7687, "step": 84 }, { "epoch": 0.27, "learning_rate": 0.0006290797064186315, "loss": 0.7578, "step": 85 }, { "epoch": 0.27, "learning_rate": 0.0006285010887913319, "loss": 0.7494, "step": 86 }, { "epoch": 0.27, "learning_rate": 0.0006279148517072765, "loss": 0.7326, "step": 87 }, { "epoch": 0.27, "learning_rate": 0.000627321009884067, "loss": 0.7603, "step": 88 }, { "epoch": 0.28, "learning_rate": 0.0006267195782302236, "loss": 0.8141, "step": 89 }, { "epoch": 0.28, "learning_rate": 0.0006261105718448105, "loss": 0.7542, "step": 90 }, { "epoch": 0.28, "learning_rate": 0.0006254940060170575, "loss": 0.7597, "step": 91 }, { "epoch": 0.29, "learning_rate": 0.0006248698962259753, "loss": 0.7332, "step": 92 }, { "epoch": 0.29, "learning_rate": 0.0006242382581399676, "loss": 0.7031, "step": 93 }, { "epoch": 0.29, "learning_rate": 0.0006235991076164375, "loss": 0.7258, "step": 94 }, { "epoch": 0.3, "learning_rate": 0.0006229524607013892, "loss": 0.7634, "step": 95 }, { "epoch": 0.3, "learning_rate": 0.0006222983336290254, "loss": 0.765, "step": 96 }, { "epoch": 0.3, "learning_rate": 0.0006216367428213398, "loss": 0.7246, "step": 97 }, { "epoch": 0.31, "learning_rate": 0.0006209677048877046, "loss": 0.7115, "step": 98 }, { "epoch": 0.31, "learning_rate": 0.0006202912366244535, "loss": 0.6748, "step": 99 }, { "epoch": 0.31, "learning_rate": 0.0006196073550144604, "loss": 0.6995, "step": 100 }, { "epoch": 0.32, "learning_rate": 0.0006189160772267127, "loss": 0.7764, "step": 101 }, { "epoch": 0.32, "learning_rate": 0.00061821742061588, "loss": 0.8628, "step": 102 }, { "epoch": 0.32, "learning_rate": 0.0006175114027218794, "loss": 0.7266, "step": 103 }, { "epoch": 0.32, "learning_rate": 0.0006167980412694342, "loss": 0.7557, "step": 104 }, { "epoch": 0.33, "learning_rate": 0.0006160773541676288, "loss": 0.7518, "step": 105 }, { "epoch": 0.33, "learning_rate": 0.0006153493595094602, "loss": 0.7589, "step": 106 }, { "epoch": 0.33, "learning_rate": 0.000614614075571383, "loss": 0.7506, "step": 107 }, { "epoch": 0.34, "learning_rate": 0.0006138715208128501, "loss": 0.6617, "step": 108 }, { "epoch": 0.34, "learning_rate": 0.0006131217138758505, "loss": 0.7396, "step": 109 }, { "epoch": 0.34, "learning_rate": 0.0006123646735844401, "loss": 0.7666, "step": 110 }, { "epoch": 0.35, "learning_rate": 0.00061160041894427, "loss": 0.7555, "step": 111 }, { "epoch": 0.35, "learning_rate": 0.0006108289691421089, "loss": 0.7301, "step": 112 }, { "epoch": 0.35, "learning_rate": 0.0006100503435453614, "loss": 0.7364, "step": 113 }, { "epoch": 0.36, "learning_rate": 0.0006092645617015822, "loss": 0.7461, "step": 114 }, { "epoch": 0.36, "learning_rate": 0.0006084716433379844, "loss": 0.8086, "step": 115 }, { "epoch": 0.36, "learning_rate": 0.0006076716083609456, "loss": 0.7577, "step": 116 }, { "epoch": 0.36, "learning_rate": 0.0006068644768555068, "loss": 0.7094, "step": 117 }, { "epoch": 0.37, "learning_rate": 0.0006060502690848696, "loss": 0.726, "step": 118 }, { "epoch": 0.37, "learning_rate": 0.0006052290054898859, "loss": 0.7243, "step": 119 }, { "epoch": 0.37, "learning_rate": 0.0006044007066885458, "loss": 0.7119, "step": 120 }, { "epoch": 0.38, "learning_rate": 0.0006035653934754598, "loss": 0.7049, "step": 121 }, { "epoch": 0.38, "learning_rate": 0.0006027230868213366, "loss": 0.7424, "step": 122 }, { "epoch": 0.38, "learning_rate": 0.0006018738078724563, "loss": 0.7271, "step": 123 }, { "epoch": 0.39, "learning_rate": 0.0006010175779501405, "loss": 0.7996, "step": 124 }, { "epoch": 0.39, "learning_rate": 0.0006001544185502158, "loss": 0.7468, "step": 125 }, { "epoch": 0.39, "learning_rate": 0.0005992843513424754, "loss": 0.7513, "step": 126 }, { "epoch": 0.4, "learning_rate": 0.0005984073981701338, "loss": 0.7461, "step": 127 }, { "epoch": 0.4, "learning_rate": 0.0005975235810492794, "loss": 0.6821, "step": 128 }, { "epoch": 0.4, "learning_rate": 0.0005966329221683215, "loss": 0.7314, "step": 129 }, { "epoch": 0.41, "learning_rate": 0.0005957354438874327, "loss": 0.714, "step": 130 }, { "epoch": 0.41, "learning_rate": 0.0005948311687379884, "loss": 0.7339, "step": 131 }, { "epoch": 0.41, "learning_rate": 0.000593920119422001, "loss": 0.7021, "step": 132 }, { "epoch": 0.41, "learning_rate": 0.0005930023188115492, "loss": 0.7228, "step": 133 }, { "epoch": 0.42, "learning_rate": 0.0005920777899482046, "loss": 0.7107, "step": 134 }, { "epoch": 0.42, "learning_rate": 0.0005911465560424532, "loss": 0.659, "step": 135 }, { "epoch": 0.42, "learning_rate": 0.0005902086404731118, "loss": 0.7028, "step": 136 }, { "epoch": 0.43, "learning_rate": 0.0005892640667867423, "loss": 0.7275, "step": 137 }, { "epoch": 0.43, "learning_rate": 0.00058831285869706, "loss": 0.6889, "step": 138 }, { "epoch": 0.43, "learning_rate": 0.0005873550400843378, "loss": 0.7891, "step": 139 }, { "epoch": 0.44, "learning_rate": 0.0005863906349948074, "loss": 0.7904, "step": 140 }, { "epoch": 0.44, "learning_rate": 0.0005854196676400555, "loss": 0.6674, "step": 141 }, { "epoch": 0.44, "learning_rate": 0.0005844421623964157, "loss": 0.7352, "step": 142 }, { "epoch": 0.45, "learning_rate": 0.0005834581438043563, "loss": 0.6965, "step": 143 }, { "epoch": 0.45, "learning_rate": 0.000582467636567865, "loss": 0.7238, "step": 144 }, { "epoch": 0.45, "learning_rate": 0.0005814706655538279, "loss": 0.7064, "step": 145 }, { "epoch": 0.46, "learning_rate": 0.0005804672557914059, "loss": 0.6984, "step": 146 }, { "epoch": 0.46, "learning_rate": 0.0005794574324714057, "loss": 0.7594, "step": 147 }, { "epoch": 0.46, "learning_rate": 0.0005784412209456479, "loss": 0.6884, "step": 148 }, { "epoch": 0.46, "learning_rate": 0.00057741864672633, "loss": 0.7141, "step": 149 }, { "epoch": 0.47, "learning_rate": 0.0005763897354853866, "loss": 0.705, "step": 150 }, { "epoch": 0.47, "learning_rate": 0.0005753545130538441, "loss": 0.7613, "step": 151 }, { "epoch": 0.47, "learning_rate": 0.0005743130054211732, "loss": 0.736, "step": 152 }, { "epoch": 0.48, "learning_rate": 0.0005732652387346351, "loss": 0.6814, "step": 153 }, { "epoch": 0.48, "learning_rate": 0.0005722112392986265, "loss": 0.7002, "step": 154 }, { "epoch": 0.48, "learning_rate": 0.0005711510335740182, "loss": 0.7023, "step": 155 }, { "epoch": 0.49, "learning_rate": 0.0005700846481774913, "loss": 0.7617, "step": 156 }, { "epoch": 0.49, "learning_rate": 0.0005690121098808687, "loss": 0.7079, "step": 157 }, { "epoch": 0.49, "learning_rate": 0.0005679334456104429, "loss": 0.7614, "step": 158 }, { "epoch": 0.5, "learning_rate": 0.000566848682446301, "loss": 0.6786, "step": 159 }, { "epoch": 0.5, "learning_rate": 0.0005657578476216432, "loss": 0.6773, "step": 160 }, { "epoch": 0.5, "learning_rate": 0.0005646609685221003, "loss": 0.7085, "step": 161 }, { "epoch": 0.51, "learning_rate": 0.0005635580726850462, "loss": 0.7167, "step": 162 }, { "epoch": 0.51, "learning_rate": 0.0005624491877989055, "loss": 0.7192, "step": 163 }, { "epoch": 0.51, "learning_rate": 0.0005613343417024599, "loss": 0.6761, "step": 164 }, { "epoch": 0.51, "learning_rate": 0.0005602135623841478, "loss": 0.7508, "step": 165 }, { "epoch": 0.52, "learning_rate": 0.0005590868779813627, "loss": 0.6978, "step": 166 }, { "epoch": 0.52, "learning_rate": 0.0005579543167797467, "loss": 0.7459, "step": 167 }, { "epoch": 0.52, "learning_rate": 0.0005568159072124794, "loss": 0.7438, "step": 168 }, { "epoch": 0.53, "learning_rate": 0.0005556716778595654, "loss": 0.7073, "step": 169 }, { "epoch": 0.53, "learning_rate": 0.0005545216574471164, "loss": 0.6385, "step": 170 }, { "epoch": 0.53, "learning_rate": 0.0005533658748466291, "loss": 0.6993, "step": 171 }, { "epoch": 0.54, "learning_rate": 0.0005522043590742615, "loss": 0.7258, "step": 172 }, { "epoch": 0.54, "learning_rate": 0.0005510371392901041, "loss": 0.7405, "step": 173 }, { "epoch": 0.54, "learning_rate": 0.0005498642447974479, "loss": 0.7525, "step": 174 }, { "epoch": 0.55, "learning_rate": 0.0005486857050420481, "loss": 0.6639, "step": 175 }, { "epoch": 0.55, "learning_rate": 0.0005475015496113861, "loss": 0.7415, "step": 176 }, { "epoch": 0.55, "learning_rate": 0.0005463118082339253, "loss": 0.7816, "step": 177 }, { "epoch": 0.56, "learning_rate": 0.0005451165107783659, "loss": 0.711, "step": 178 }, { "epoch": 0.56, "learning_rate": 0.0005439156872528941, "loss": 0.7138, "step": 179 }, { "epoch": 0.56, "learning_rate": 0.0005427093678044299, "loss": 0.7069, "step": 180 }, { "epoch": 0.56, "learning_rate": 0.0005414975827178688, "loss": 0.7553, "step": 181 }, { "epoch": 0.57, "learning_rate": 0.000540280362415323, "loss": 0.7045, "step": 182 }, { "epoch": 0.57, "learning_rate": 0.0005390577374553561, "loss": 0.7011, "step": 183 }, { "epoch": 0.57, "learning_rate": 0.0005378297385322177, "loss": 0.7441, "step": 184 }, { "epoch": 0.58, "learning_rate": 0.0005365963964750707, "loss": 0.6797, "step": 185 }, { "epoch": 0.58, "learning_rate": 0.0005353577422472196, "loss": 0.6901, "step": 186 }, { "epoch": 0.58, "learning_rate": 0.0005341138069453313, "loss": 0.7136, "step": 187 }, { "epoch": 0.59, "learning_rate": 0.0005328646217986553, "loss": 0.7459, "step": 188 }, { "epoch": 0.59, "learning_rate": 0.0005316102181682396, "loss": 0.7064, "step": 189 }, { "epoch": 0.59, "learning_rate": 0.0005303506275461433, "loss": 0.6705, "step": 190 }, { "epoch": 0.6, "learning_rate": 0.0005290858815546459, "loss": 0.7008, "step": 191 }, { "epoch": 0.6, "learning_rate": 0.0005278160119454536, "loss": 0.7538, "step": 192 }, { "epoch": 0.6, "learning_rate": 0.0005265410505989021, "loss": 0.7726, "step": 193 }, { "epoch": 0.61, "learning_rate": 0.000525261029523156, "loss": 0.7532, "step": 194 }, { "epoch": 0.61, "learning_rate": 0.0005239759808534055, "loss": 0.6978, "step": 195 }, { "epoch": 0.61, "learning_rate": 0.0005226859368510599, "loss": 0.7182, "step": 196 }, { "epoch": 0.61, "learning_rate": 0.0005213909299029368, "loss": 0.6776, "step": 197 }, { "epoch": 0.62, "learning_rate": 0.0005200909925204501, "loss": 0.7447, "step": 198 }, { "epoch": 0.62, "learning_rate": 0.0005187861573387928, "loss": 0.7298, "step": 199 }, { "epoch": 0.62, "learning_rate": 0.0005174764571161185, "loss": 0.6833, "step": 200 }, { "epoch": 0.63, "learning_rate": 0.0005161619247327185, "loss": 0.7518, "step": 201 }, { "epoch": 0.63, "learning_rate": 0.0005148425931901961, "loss": 0.7429, "step": 202 }, { "epoch": 0.63, "learning_rate": 0.0005135184956106394, "loss": 0.763, "step": 203 }, { "epoch": 0.64, "learning_rate": 0.000512189665235788, "loss": 0.7682, "step": 204 }, { "epoch": 0.64, "learning_rate": 0.0005108561354261996, "loss": 0.7063, "step": 205 }, { "epoch": 0.64, "learning_rate": 0.0005095179396604121, "loss": 0.6956, "step": 206 }, { "epoch": 0.65, "learning_rate": 0.0005081751115341034, "loss": 0.7434, "step": 207 }, { "epoch": 0.65, "learning_rate": 0.0005068276847592474, "loss": 0.6673, "step": 208 }, { "epoch": 0.65, "learning_rate": 0.0005054756931632682, "loss": 0.6448, "step": 209 }, { "epoch": 0.65, "learning_rate": 0.0005041191706881909, "loss": 0.7095, "step": 210 }, { "epoch": 0.66, "learning_rate": 0.0005027581513897888, "loss": 0.673, "step": 211 }, { "epoch": 0.66, "learning_rate": 0.000501392669436729, "loss": 0.6363, "step": 212 }, { "epoch": 0.66, "learning_rate": 0.0005000227591097145, "loss": 0.6711, "step": 213 }, { "epoch": 0.67, "learning_rate": 0.0004986484548006237, "loss": 0.6375, "step": 214 }, { "epoch": 0.67, "learning_rate": 0.0004972697910116468, "loss": 0.7466, "step": 215 }, { "epoch": 0.67, "learning_rate": 0.0004958868023544192, "loss": 0.7147, "step": 216 }, { "epoch": 0.68, "learning_rate": 0.0004944995235491534, "loss": 0.714, "step": 217 }, { "epoch": 0.68, "learning_rate": 0.0004931079894237669, "loss": 0.7377, "step": 218 }, { "epoch": 0.68, "learning_rate": 0.0004917122349130078, "loss": 0.7087, "step": 219 }, { "epoch": 0.69, "learning_rate": 0.000490312295057578, "loss": 0.6716, "step": 220 }, { "epoch": 0.69, "learning_rate": 0.0004889082050032529, "loss": 0.7298, "step": 221 }, { "epoch": 0.69, "learning_rate": 0.0004875, "loss": 0.6557, "step": 222 }, { "epoch": 0.7, "learning_rate": 0.0004860877154010932, "loss": 0.7042, "step": 223 }, { "epoch": 0.7, "learning_rate": 0.00048467138666222534, "loss": 0.6617, "step": 224 }, { "epoch": 0.7, "learning_rate": 0.00048325104934061853, "loss": 0.7019, "step": 225 }, { "epoch": 0.7, "learning_rate": 0.00048182673909413103, "loss": 0.6756, "step": 226 }, { "epoch": 0.71, "learning_rate": 0.00048039849168036205, "loss": 0.709, "step": 227 }, { "epoch": 0.71, "learning_rate": 0.00047896634295575434, "loss": 0.7434, "step": 228 }, { "epoch": 0.71, "learning_rate": 0.00047753032887469385, "loss": 0.7533, "step": 229 }, { "epoch": 0.72, "learning_rate": 0.0004760904854886072, "loss": 0.7019, "step": 230 }, { "epoch": 0.72, "learning_rate": 0.0004746468489450562, "loss": 0.6852, "step": 231 }, { "epoch": 0.72, "learning_rate": 0.0004731994554868307, "loss": 0.7228, "step": 232 }, { "epoch": 0.73, "learning_rate": 0.000471748341451039, "loss": 0.7513, "step": 233 }, { "epoch": 0.73, "learning_rate": 0.0004702935432681949, "loss": 0.6896, "step": 234 }, { "epoch": 0.73, "learning_rate": 0.0004688350974613038, "loss": 0.6815, "step": 235 }, { "epoch": 0.74, "learning_rate": 0.0004673730406449449, "loss": 0.7682, "step": 236 }, { "epoch": 0.74, "learning_rate": 0.00046590740952435323, "loss": 0.7025, "step": 237 }, { "epoch": 0.74, "learning_rate": 0.0004644382408944968, "loss": 0.6662, "step": 238 }, { "epoch": 0.75, "learning_rate": 0.00046296557163915395, "loss": 0.7541, "step": 239 }, { "epoch": 0.75, "learning_rate": 0.0004614894387299867, "loss": 0.7336, "step": 240 }, { "epoch": 0.75, "learning_rate": 0.0004600098792256131, "loss": 0.6618, "step": 241 }, { "epoch": 0.75, "learning_rate": 0.0004585269302706762, "loss": 0.6729, "step": 242 }, { "epoch": 0.76, "learning_rate": 0.0004570406290949121, "loss": 0.7327, "step": 243 }, { "epoch": 0.76, "learning_rate": 0.0004555510130122151, "loss": 0.6778, "step": 244 }, { "epoch": 0.76, "learning_rate": 0.0004540581194197008, "loss": 0.6219, "step": 245 }, { "epoch": 0.77, "learning_rate": 0.00045256198579676755, "loss": 0.6984, "step": 246 }, { "epoch": 0.77, "learning_rate": 0.000451062649704155, "loss": 0.637, "step": 247 }, { "epoch": 0.77, "learning_rate": 0.000449560148783002, "loss": 0.658, "step": 248 }, { "epoch": 0.78, "learning_rate": 0.0004480545207539004, "loss": 0.7305, "step": 249 }, { "epoch": 0.78, "learning_rate": 0.0004465458034159491, "loss": 0.6788, "step": 250 }, { "epoch": 0.78, "learning_rate": 0.00044503403464580475, "loss": 0.7096, "step": 251 }, { "epoch": 0.79, "learning_rate": 0.00044351925239673087, "loss": 0.7108, "step": 252 }, { "epoch": 0.79, "learning_rate": 0.0004420014946976447, "loss": 0.6518, "step": 253 }, { "epoch": 0.79, "learning_rate": 0.00044048079965216294, "loss": 0.7262, "step": 254 }, { "epoch": 0.8, "learning_rate": 0.0004389572054376452, "loss": 0.6988, "step": 255 }, { "epoch": 0.8, "learning_rate": 0.00043743075030423475, "loss": 0.6637, "step": 256 }, { "epoch": 0.8, "learning_rate": 0.0004359014725738994, "loss": 0.7055, "step": 257 }, { "epoch": 0.8, "learning_rate": 0.00043436941063946843, "loss": 0.7179, "step": 258 }, { "epoch": 0.81, "learning_rate": 0.0004328346029636694, "loss": 0.6955, "step": 259 }, { "epoch": 0.81, "learning_rate": 0.0004312970880781621, "loss": 0.6749, "step": 260 }, { "epoch": 0.81, "learning_rate": 0.0004297569045825713, "loss": 0.6711, "step": 261 }, { "epoch": 0.82, "learning_rate": 0.00042821409114351803, "loss": 0.6366, "step": 262 }, { "epoch": 0.82, "learning_rate": 0.00042666868649364844, "loss": 0.7144, "step": 263 }, { "epoch": 0.82, "learning_rate": 0.0004251207294306617, "loss": 0.656, "step": 264 }, { "epoch": 0.83, "learning_rate": 0.00042357025881633535, "loss": 0.6803, "step": 265 }, { "epoch": 0.83, "learning_rate": 0.00042201731357555073, "loss": 0.7044, "step": 266 }, { "epoch": 0.83, "learning_rate": 0.0004204619326953149, "loss": 0.6488, "step": 267 }, { "epoch": 0.84, "learning_rate": 0.00041890415522378223, "loss": 0.6928, "step": 268 }, { "epoch": 0.84, "learning_rate": 0.00041734402026927394, "loss": 0.6764, "step": 269 }, { "epoch": 0.84, "learning_rate": 0.00041578156699929636, "loss": 0.6278, "step": 270 }, { "epoch": 0.85, "learning_rate": 0.0004142168346395577, "loss": 0.691, "step": 271 }, { "epoch": 0.85, "learning_rate": 0.0004126498624729829, "loss": 0.6865, "step": 272 }, { "epoch": 0.85, "learning_rate": 0.000411080689838728, "loss": 0.6715, "step": 273 }, { "epoch": 0.85, "learning_rate": 0.00040950935613119226, "loss": 0.6563, "step": 274 }, { "epoch": 0.86, "learning_rate": 0.00040793590079902885, "loss": 0.7608, "step": 275 }, { "epoch": 0.86, "learning_rate": 0.00040636036334415487, "loss": 0.6189, "step": 276 }, { "epoch": 0.86, "learning_rate": 0.0004047827833207597, "loss": 0.6981, "step": 277 }, { "epoch": 0.87, "learning_rate": 0.0004032032003343117, "loss": 0.644, "step": 278 }, { "epoch": 0.87, "learning_rate": 0.0004016216540405639, "loss": 0.7286, "step": 279 }, { "epoch": 0.87, "learning_rate": 0.0004000381841445586, "loss": 0.6694, "step": 280 }, { "epoch": 0.88, "learning_rate": 0.00039845283039963093, "loss": 0.7204, "step": 281 }, { "epoch": 0.88, "learning_rate": 0.0003968656326064099, "loss": 0.7042, "step": 282 }, { "epoch": 0.88, "learning_rate": 0.00039527663061181983, "loss": 0.712, "step": 283 }, { "epoch": 0.89, "learning_rate": 0.00039368586430808014, "loss": 0.7179, "step": 284 }, { "epoch": 0.89, "learning_rate": 0.00039209337363170347, "loss": 0.6903, "step": 285 }, { "epoch": 0.89, "learning_rate": 0.00039049919856249315, "loss": 0.6924, "step": 286 }, { "epoch": 0.9, "learning_rate": 0.0003889033791225395, "loss": 0.6713, "step": 287 }, { "epoch": 0.9, "learning_rate": 0.000387305955375215, "loss": 0.7852, "step": 288 }, { "epoch": 0.9, "learning_rate": 0.0003857069674241689, "loss": 0.6517, "step": 289 }, { "epoch": 0.9, "learning_rate": 0.00038410645541232, "loss": 0.6764, "step": 290 }, { "epoch": 0.91, "learning_rate": 0.0003825044595208488, "loss": 0.7183, "step": 291 }, { "epoch": 0.91, "learning_rate": 0.000380901019968189, "loss": 0.6826, "step": 292 }, { "epoch": 0.91, "learning_rate": 0.0003792961770090178, "loss": 0.6936, "step": 293 }, { "epoch": 0.92, "learning_rate": 0.0003776899709332449, "loss": 0.718, "step": 294 }, { "epoch": 0.92, "learning_rate": 0.00037608244206500176, "loss": 0.6795, "step": 295 }, { "epoch": 0.92, "learning_rate": 0.00037447363076162853, "loss": 0.6517, "step": 296 }, { "epoch": 0.93, "learning_rate": 0.0003728635774126613, "loss": 0.6849, "step": 297 }, { "epoch": 0.93, "learning_rate": 0.0003712523224388177, "loss": 0.6663, "step": 298 }, { "epoch": 0.93, "learning_rate": 0.00036963990629098264, "loss": 0.6585, "step": 299 }, { "epoch": 0.94, "learning_rate": 0.0003680263694491925, "loss": 0.7054, "step": 300 }, { "epoch": 0.94, "learning_rate": 0.00036641175242161907, "loss": 0.6662, "step": 301 }, { "epoch": 0.94, "learning_rate": 0.000364796095743552, "loss": 0.6306, "step": 302 }, { "epoch": 0.95, "learning_rate": 0.00036317943997638187, "loss": 0.6544, "step": 303 }, { "epoch": 0.95, "learning_rate": 0.0003615618257065817, "loss": 0.7078, "step": 304 }, { "epoch": 0.95, "learning_rate": 0.00035994329354468763, "loss": 0.6511, "step": 305 }, { "epoch": 0.95, "learning_rate": 0.00035832388412427983, "loss": 0.668, "step": 306 }, { "epoch": 0.96, "learning_rate": 0.00035670363810096214, "loss": 0.678, "step": 307 }, { "epoch": 0.96, "learning_rate": 0.0003550825961513418, "loss": 0.6596, "step": 308 }, { "epoch": 0.96, "learning_rate": 0.00035346079897200736, "loss": 0.674, "step": 309 }, { "epoch": 0.97, "learning_rate": 0.00035183828727850804, "loss": 0.6888, "step": 310 }, { "epoch": 0.97, "learning_rate": 0.0003502151018043309, "loss": 0.6864, "step": 311 }, { "epoch": 0.97, "learning_rate": 0.0003485912832998785, "loss": 0.6576, "step": 312 }, { "epoch": 0.98, "learning_rate": 0.0003469668725314458, "loss": 0.6989, "step": 313 }, { "epoch": 0.98, "learning_rate": 0.0003453419102801962, "loss": 0.6519, "step": 314 }, { "epoch": 0.98, "learning_rate": 0.0003437164373411389, "loss": 0.6754, "step": 315 }, { "epoch": 0.99, "learning_rate": 0.00034209049452210347, "loss": 0.6706, "step": 316 }, { "epoch": 0.99, "learning_rate": 0.0003404641226427163, "loss": 0.7295, "step": 317 }, { "epoch": 0.99, "learning_rate": 0.000338837362533375, "loss": 0.7137, "step": 318 }, { "epoch": 0.99, "learning_rate": 0.0003372102550342242, "loss": 0.7131, "step": 319 }, { "epoch": 1.0, "learning_rate": 0.0003355828409941296, "loss": 0.6404, "step": 320 }, { "epoch": 1.0, "learning_rate": 0.00033395516126965267, "loss": 0.6896, "step": 321 }, { "epoch": 1.0, "learning_rate": 0.0003323272567240249, "loss": 0.5439, "step": 322 }, { "epoch": 1.01, "learning_rate": 0.0003306991682261223, "loss": 0.5435, "step": 323 }, { "epoch": 1.01, "learning_rate": 0.0003290709366494386, "loss": 0.5861, "step": 324 }, { "epoch": 1.01, "learning_rate": 0.0003274426028710596, "loss": 0.5743, "step": 325 }, { "epoch": 1.02, "learning_rate": 0.0003258142077706373, "loss": 0.4928, "step": 326 }, { "epoch": 1.02, "learning_rate": 0.0003241857922293627, "loss": 0.5045, "step": 327 }, { "epoch": 1.02, "learning_rate": 0.00032255739712894036, "loss": 0.5733, "step": 328 }, { "epoch": 1.03, "learning_rate": 0.00032092906335056147, "loss": 0.517, "step": 329 }, { "epoch": 1.03, "learning_rate": 0.00031930083177387765, "loss": 0.5836, "step": 330 }, { "epoch": 1.03, "learning_rate": 0.0003176727432759751, "loss": 0.5745, "step": 331 }, { "epoch": 1.04, "learning_rate": 0.00031604483873034735, "loss": 0.5165, "step": 332 }, { "epoch": 1.04, "learning_rate": 0.0003144171590058705, "loss": 0.5716, "step": 333 }, { "epoch": 1.04, "learning_rate": 0.0003127897449657758, "loss": 0.5605, "step": 334 }, { "epoch": 1.04, "learning_rate": 0.0003111626374666249, "loss": 0.5454, "step": 335 }, { "epoch": 1.05, "learning_rate": 0.00030953587735728377, "loss": 0.5743, "step": 336 }, { "epoch": 1.05, "learning_rate": 0.0003079095054778965, "loss": 0.5874, "step": 337 }, { "epoch": 1.05, "learning_rate": 0.0003062835626588612, "loss": 0.5518, "step": 338 }, { "epoch": 1.06, "learning_rate": 0.0003046580897198038, "loss": 0.5589, "step": 339 }, { "epoch": 1.06, "learning_rate": 0.00030303312746855434, "loss": 0.5639, "step": 340 }, { "epoch": 1.06, "learning_rate": 0.0003014087167001215, "loss": 0.5249, "step": 341 }, { "epoch": 1.07, "learning_rate": 0.00029978489819566903, "loss": 0.5328, "step": 342 }, { "epoch": 1.07, "learning_rate": 0.000298161712721492, "loss": 0.5494, "step": 343 }, { "epoch": 1.07, "learning_rate": 0.00029653920102799266, "loss": 0.5593, "step": 344 }, { "epoch": 1.08, "learning_rate": 0.00029491740384865835, "loss": 0.5149, "step": 345 }, { "epoch": 1.08, "learning_rate": 0.00029329636189903783, "loss": 0.5434, "step": 346 }, { "epoch": 1.08, "learning_rate": 0.00029167611587572014, "loss": 0.5099, "step": 347 }, { "epoch": 1.09, "learning_rate": 0.0002900567064553124, "loss": 0.5175, "step": 348 }, { "epoch": 1.09, "learning_rate": 0.00028843817429341826, "loss": 0.5598, "step": 349 }, { "epoch": 1.09, "learning_rate": 0.00028682056002361816, "loss": 0.524, "step": 350 }, { "epoch": 1.09, "learning_rate": 0.00028520390425644797, "loss": 0.5155, "step": 351 }, { "epoch": 1.1, "learning_rate": 0.00028358824757838085, "loss": 0.5419, "step": 352 }, { "epoch": 1.1, "learning_rate": 0.00028197363055080746, "loss": 0.5145, "step": 353 }, { "epoch": 1.1, "learning_rate": 0.00028036009370901733, "loss": 0.5089, "step": 354 }, { "epoch": 1.11, "learning_rate": 0.0002787476775611823, "loss": 0.5361, "step": 355 }, { "epoch": 1.11, "learning_rate": 0.00027713642258733875, "loss": 0.5519, "step": 356 }, { "epoch": 1.11, "learning_rate": 0.0002755263692383714, "loss": 0.579, "step": 357 }, { "epoch": 1.12, "learning_rate": 0.00027391755793499826, "loss": 0.5669, "step": 358 }, { "epoch": 1.12, "learning_rate": 0.000272310029066755, "loss": 0.5641, "step": 359 }, { "epoch": 1.12, "learning_rate": 0.00027070382299098234, "loss": 0.5093, "step": 360 }, { "epoch": 1.13, "learning_rate": 0.000269098980031811, "loss": 0.5726, "step": 361 }, { "epoch": 1.13, "learning_rate": 0.0002674955404791512, "loss": 0.5405, "step": 362 }, { "epoch": 1.13, "learning_rate": 0.00026589354458768003, "loss": 0.576, "step": 363 }, { "epoch": 1.14, "learning_rate": 0.00026429303257583106, "loss": 0.5078, "step": 364 }, { "epoch": 1.14, "learning_rate": 0.0002626940446247851, "loss": 0.4941, "step": 365 }, { "epoch": 1.14, "learning_rate": 0.00026109662087746055, "loss": 0.5337, "step": 366 }, { "epoch": 1.14, "learning_rate": 0.0002595008014375069, "loss": 0.5511, "step": 367 }, { "epoch": 1.15, "learning_rate": 0.0002579066263682965, "loss": 0.555, "step": 368 }, { "epoch": 1.15, "learning_rate": 0.0002563141356919198, "loss": 0.5451, "step": 369 }, { "epoch": 1.15, "learning_rate": 0.00025472336938818014, "loss": 0.5281, "step": 370 }, { "epoch": 1.16, "learning_rate": 0.0002531343673935901, "loss": 0.5649, "step": 371 }, { "epoch": 1.16, "learning_rate": 0.00025154716960036914, "loss": 0.5726, "step": 372 }, { "epoch": 1.16, "learning_rate": 0.00024996181585544135, "loss": 0.5352, "step": 373 }, { "epoch": 1.17, "learning_rate": 0.00024837834595943607, "loss": 0.4992, "step": 374 }, { "epoch": 1.17, "learning_rate": 0.0002467967996656884, "loss": 0.5032, "step": 375 }, { "epoch": 1.17, "learning_rate": 0.0002452172166792403, "loss": 0.5247, "step": 376 }, { "epoch": 1.18, "learning_rate": 0.00024363963665584513, "loss": 0.5449, "step": 377 }, { "epoch": 1.18, "learning_rate": 0.00024206409920097117, "loss": 0.5387, "step": 378 }, { "epoch": 1.18, "learning_rate": 0.00024049064386880769, "loss": 0.5229, "step": 379 }, { "epoch": 1.19, "learning_rate": 0.00023891931016127203, "loss": 0.5491, "step": 380 }, { "epoch": 1.19, "learning_rate": 0.00023735013752701714, "loss": 0.5686, "step": 381 }, { "epoch": 1.19, "learning_rate": 0.00023578316536044242, "loss": 0.5077, "step": 382 }, { "epoch": 1.19, "learning_rate": 0.0002342184330007036, "loss": 0.55, "step": 383 }, { "epoch": 1.2, "learning_rate": 0.000232655979730726, "loss": 0.5133, "step": 384 }, { "epoch": 1.2, "learning_rate": 0.00023109584477621774, "loss": 0.5088, "step": 385 }, { "epoch": 1.2, "learning_rate": 0.00022953806730468502, "loss": 0.4863, "step": 386 }, { "epoch": 1.21, "learning_rate": 0.00022798268642444934, "loss": 0.5305, "step": 387 }, { "epoch": 1.21, "learning_rate": 0.0002264297411836647, "loss": 0.5002, "step": 388 }, { "epoch": 1.21, "learning_rate": 0.00022487927056933846, "loss": 0.5388, "step": 389 }, { "epoch": 1.22, "learning_rate": 0.00022333131350635153, "loss": 0.5467, "step": 390 }, { "epoch": 1.22, "learning_rate": 0.00022178590885648191, "loss": 0.4839, "step": 391 }, { "epoch": 1.22, "learning_rate": 0.00022024309541742872, "loss": 0.51, "step": 392 }, { "epoch": 1.23, "learning_rate": 0.00021870291192183796, "loss": 0.5215, "step": 393 }, { "epoch": 1.23, "learning_rate": 0.00021716539703633072, "loss": 0.5349, "step": 394 }, { "epoch": 1.23, "learning_rate": 0.0002156305893605316, "loss": 0.5916, "step": 395 }, { "epoch": 1.24, "learning_rate": 0.00021409852742610062, "loss": 0.54, "step": 396 }, { "epoch": 1.24, "learning_rate": 0.00021256924969576528, "loss": 0.5376, "step": 397 }, { "epoch": 1.24, "learning_rate": 0.00021104279456235484, "loss": 0.5585, "step": 398 }, { "epoch": 1.24, "learning_rate": 0.000209519200347837, "loss": 0.5605, "step": 399 }, { "epoch": 1.25, "learning_rate": 0.00020799850530235525, "loss": 0.5282, "step": 400 }, { "epoch": 1.25, "learning_rate": 0.0002064807476032691, "loss": 0.5246, "step": 401 }, { "epoch": 1.25, "learning_rate": 0.00020496596535419522, "loss": 0.502, "step": 402 }, { "epoch": 1.26, "learning_rate": 0.00020345419658405092, "loss": 0.5265, "step": 403 }, { "epoch": 1.26, "learning_rate": 0.0002019454792460997, "loss": 0.5488, "step": 404 }, { "epoch": 1.26, "learning_rate": 0.00020043985121699805, "loss": 0.5234, "step": 405 }, { "epoch": 1.27, "learning_rate": 0.00019893735029584487, "loss": 0.4983, "step": 406 }, { "epoch": 1.27, "learning_rate": 0.00019743801420323248, "loss": 0.5674, "step": 407 }, { "epoch": 1.27, "learning_rate": 0.00019594188058029918, "loss": 0.5402, "step": 408 }, { "epoch": 1.28, "learning_rate": 0.000194448986987785, "loss": 0.5408, "step": 409 }, { "epoch": 1.28, "learning_rate": 0.00019295937090508795, "loss": 0.5441, "step": 410 }, { "epoch": 1.28, "learning_rate": 0.00019147306972932385, "loss": 0.5598, "step": 411 }, { "epoch": 1.28, "learning_rate": 0.00018999012077438693, "loss": 0.5638, "step": 412 }, { "epoch": 1.29, "learning_rate": 0.00018851056127001318, "loss": 0.5316, "step": 413 }, { "epoch": 1.29, "learning_rate": 0.0001870344283608461, "loss": 0.5337, "step": 414 }, { "epoch": 1.29, "learning_rate": 0.0001855617591055032, "loss": 0.5346, "step": 415 }, { "epoch": 1.3, "learning_rate": 0.00018409259047564688, "loss": 0.5795, "step": 416 }, { "epoch": 1.3, "learning_rate": 0.00018262695935505508, "loss": 0.5852, "step": 417 }, { "epoch": 1.3, "learning_rate": 0.00018116490253869622, "loss": 0.5616, "step": 418 }, { "epoch": 1.31, "learning_rate": 0.00017970645673180512, "loss": 0.5335, "step": 419 }, { "epoch": 1.31, "learning_rate": 0.000178251658548961, "loss": 0.5204, "step": 420 }, { "epoch": 1.31, "learning_rate": 0.0001768005445131693, "loss": 0.5139, "step": 421 }, { "epoch": 1.32, "learning_rate": 0.00017535315105494386, "loss": 0.5507, "step": 422 }, { "epoch": 1.32, "learning_rate": 0.00017390951451139275, "loss": 0.5024, "step": 423 }, { "epoch": 1.32, "learning_rate": 0.00017246967112530612, "loss": 0.4963, "step": 424 }, { "epoch": 1.33, "learning_rate": 0.00017103365704424566, "loss": 0.5252, "step": 425 }, { "epoch": 1.33, "learning_rate": 0.00016960150831963805, "loss": 0.4911, "step": 426 }, { "epoch": 1.33, "learning_rate": 0.00016817326090586907, "loss": 0.5111, "step": 427 }, { "epoch": 1.33, "learning_rate": 0.00016674895065938136, "loss": 0.5361, "step": 428 }, { "epoch": 1.34, "learning_rate": 0.00016532861333777463, "loss": 0.5082, "step": 429 }, { "epoch": 1.34, "learning_rate": 0.0001639122845989068, "loss": 0.5484, "step": 430 }, { "epoch": 1.34, "learning_rate": 0.00016250000000000007, "loss": 0.5316, "step": 431 }, { "epoch": 1.35, "learning_rate": 0.00016109179499674712, "loss": 0.5541, "step": 432 }, { "epoch": 1.35, "learning_rate": 0.00015968770494242203, "loss": 0.5497, "step": 433 }, { "epoch": 1.35, "learning_rate": 0.00015828776508699223, "loss": 0.5233, "step": 434 }, { "epoch": 1.36, "learning_rate": 0.00015689201057623314, "loss": 0.5506, "step": 435 }, { "epoch": 1.36, "learning_rate": 0.00015550047645084662, "loss": 0.5369, "step": 436 }, { "epoch": 1.36, "learning_rate": 0.00015411319764558083, "loss": 0.4958, "step": 437 }, { "epoch": 1.37, "learning_rate": 0.00015273020898835333, "loss": 0.5657, "step": 438 }, { "epoch": 1.37, "learning_rate": 0.00015135154519937627, "loss": 0.5329, "step": 439 }, { "epoch": 1.37, "learning_rate": 0.0001499772408902855, "loss": 0.4827, "step": 440 }, { "epoch": 1.38, "learning_rate": 0.00014860733056327118, "loss": 0.5911, "step": 441 }, { "epoch": 1.38, "learning_rate": 0.0001472418486102113, "loss": 0.5175, "step": 442 }, { "epoch": 1.38, "learning_rate": 0.0001458808293118091, "loss": 0.495, "step": 443 }, { "epoch": 1.38, "learning_rate": 0.00014452430683673166, "loss": 0.4897, "step": 444 }, { "epoch": 1.39, "learning_rate": 0.0001431723152407525, "loss": 0.5176, "step": 445 }, { "epoch": 1.39, "learning_rate": 0.0001418248884658966, "loss": 0.4719, "step": 446 }, { "epoch": 1.39, "learning_rate": 0.00014048206033958783, "loss": 0.5197, "step": 447 }, { "epoch": 1.4, "learning_rate": 0.00013914386457380048, "loss": 0.4717, "step": 448 }, { "epoch": 1.4, "learning_rate": 0.00013781033476421205, "loss": 0.4571, "step": 449 }, { "epoch": 1.4, "learning_rate": 0.00013648150438936064, "loss": 0.5262, "step": 450 } ], "logging_steps": 1, "max_steps": 640, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "total_flos": 1.1676754391855923e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }