{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7797270955165692, "eval_steps": 500, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.0506, "step": 1 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 0.9988, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.00015000000000000001, "loss": 0.9783, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 0.9849, "step": 4 }, { "epoch": 0.02, "learning_rate": 0.00025, "loss": 1.0159, "step": 5 }, { "epoch": 0.02, "learning_rate": 0.00030000000000000003, "loss": 0.9847, "step": 6 }, { "epoch": 0.02, "learning_rate": 0.00034999999999999994, "loss": 0.9101, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.0004, "loss": 0.9445, "step": 8 }, { "epoch": 0.03, "learning_rate": 0.00045, "loss": 0.8578, "step": 9 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 0.9356, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.0005499999999999999, "loss": 0.8395, "step": 11 }, { "epoch": 0.04, "learning_rate": 0.0006000000000000001, "loss": 0.9002, "step": 12 }, { "epoch": 0.04, "learning_rate": 0.00065, "loss": 0.8955, "step": 13 }, { "epoch": 0.04, "learning_rate": 0.0006499959204043461, "loss": 0.902, "step": 14 }, { "epoch": 0.05, "learning_rate": 0.0006499836817198032, "loss": 0.8578, "step": 15 }, { "epoch": 0.05, "learning_rate": 0.0006499632842536263, "loss": 0.9005, "step": 16 }, { "epoch": 0.05, "learning_rate": 0.0006499347285178979, "loss": 0.8539, "step": 17 }, { "epoch": 0.06, "learning_rate": 0.0006498980152295153, "loss": 0.8595, "step": 18 }, { "epoch": 0.06, "learning_rate": 0.0006498531453101735, "loss": 0.8845, "step": 19 }, { "epoch": 0.06, "learning_rate": 0.0006498001198863406, "loss": 0.8924, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.000649738940289231, "loss": 0.8365, "step": 21 }, { "epoch": 0.07, "learning_rate": 0.0006496696080547707, "loss": 0.8462, "step": 22 }, { "epoch": 0.07, "learning_rate": 0.0006495921249235596, "loss": 0.8528, "step": 23 }, { "epoch": 0.07, "learning_rate": 0.0006495064928408277, "loss": 0.8159, "step": 24 }, { "epoch": 0.08, "learning_rate": 0.0006494127139563859, "loss": 0.8245, "step": 25 }, { "epoch": 0.08, "learning_rate": 0.000649310790624572, "loss": 0.8081, "step": 26 }, { "epoch": 0.08, "learning_rate": 0.0006492007254041924, "loss": 0.8535, "step": 27 }, { "epoch": 0.09, "learning_rate": 0.0006490825210584566, "loss": 0.8162, "step": 28 }, { "epoch": 0.09, "learning_rate": 0.0006489561805549089, "loss": 0.8456, "step": 29 }, { "epoch": 0.09, "learning_rate": 0.0006488217070653535, "loss": 0.7799, "step": 30 }, { "epoch": 0.1, "learning_rate": 0.0006486791039657748, "loss": 0.8088, "step": 31 }, { "epoch": 0.1, "learning_rate": 0.0006485283748362524, "loss": 0.8683, "step": 32 }, { "epoch": 0.1, "learning_rate": 0.0006483695234608723, "loss": 0.8871, "step": 33 }, { "epoch": 0.11, "learning_rate": 0.0006482025538276304, "loss": 0.7711, "step": 34 }, { "epoch": 0.11, "learning_rate": 0.0006480274701283335, "loss": 0.7621, "step": 35 }, { "epoch": 0.11, "learning_rate": 0.0006478442767584937, "loss": 0.8243, "step": 36 }, { "epoch": 0.12, "learning_rate": 0.0006476529783172177, "loss": 0.8257, "step": 37 }, { "epoch": 0.12, "learning_rate": 0.0006474535796070919, "loss": 0.8141, "step": 38 }, { "epoch": 0.12, "learning_rate": 0.0006472460856340619, "loss": 0.8109, "step": 39 }, { "epoch": 0.12, "learning_rate": 0.000647030501607306, "loss": 0.7873, "step": 40 }, { "epoch": 0.13, "learning_rate": 0.000646806832939105, "loss": 0.7386, "step": 41 }, { "epoch": 0.13, "learning_rate": 0.0006465750852447068, "loss": 0.8636, "step": 42 }, { "epoch": 0.13, "learning_rate": 0.0006463352643421846, "loss": 0.7357, "step": 43 }, { "epoch": 0.14, "learning_rate": 0.0006460873762522906, "loss": 0.8142, "step": 44 }, { "epoch": 0.14, "learning_rate": 0.0006458314271983063, "loss": 0.7275, "step": 45 }, { "epoch": 0.14, "learning_rate": 0.0006455674236058847, "loss": 0.8029, "step": 46 }, { "epoch": 0.15, "learning_rate": 0.00064529537210289, "loss": 0.7901, "step": 47 }, { "epoch": 0.15, "learning_rate": 0.0006450152795192307, "loss": 0.7788, "step": 48 }, { "epoch": 0.15, "learning_rate": 0.0006447271528866881, "loss": 0.7621, "step": 49 }, { "epoch": 0.16, "learning_rate": 0.0006444309994387402, "loss": 0.7537, "step": 50 }, { "epoch": 0.16, "learning_rate": 0.0006441268266103796, "loss": 0.7917, "step": 51 }, { "epoch": 0.16, "learning_rate": 0.0006438146420379274, "loss": 0.8451, "step": 52 }, { "epoch": 0.17, "learning_rate": 0.0006434944535588411, "loss": 0.8369, "step": 53 }, { "epoch": 0.17, "learning_rate": 0.0006431662692115173, "loss": 0.7637, "step": 54 }, { "epoch": 0.17, "learning_rate": 0.0006428300972350914, "loss": 0.8365, "step": 55 }, { "epoch": 0.17, "learning_rate": 0.0006424859460692295, "loss": 0.7633, "step": 56 }, { "epoch": 0.18, "learning_rate": 0.0006421338243539165, "loss": 0.7718, "step": 57 }, { "epoch": 0.18, "learning_rate": 0.0006417737409292403, "loss": 0.7672, "step": 58 }, { "epoch": 0.18, "learning_rate": 0.0006414057048351684, "loss": 0.8107, "step": 59 }, { "epoch": 0.19, "learning_rate": 0.0006410297253113221, "loss": 0.7979, "step": 60 }, { "epoch": 0.19, "learning_rate": 0.0006406458117967443, "loss": 0.7634, "step": 61 }, { "epoch": 0.19, "learning_rate": 0.0006402539739296618, "loss": 0.7504, "step": 62 }, { "epoch": 0.2, "learning_rate": 0.0006398542215472443, "loss": 0.8082, "step": 63 }, { "epoch": 0.2, "learning_rate": 0.0006394465646853571, "loss": 0.8355, "step": 64 }, { "epoch": 0.2, "learning_rate": 0.0006390310135783086, "loss": 0.7458, "step": 65 }, { "epoch": 0.21, "learning_rate": 0.0006386075786585944, "loss": 0.7525, "step": 66 }, { "epoch": 0.21, "learning_rate": 0.0006381762705566343, "loss": 0.7464, "step": 67 }, { "epoch": 0.21, "learning_rate": 0.0006377371001005063, "loss": 0.78, "step": 68 }, { "epoch": 0.22, "learning_rate": 0.0006372900783156745, "loss": 0.7752, "step": 69 }, { "epoch": 0.22, "learning_rate": 0.0006368352164247117, "loss": 0.7299, "step": 70 }, { "epoch": 0.22, "learning_rate": 0.0006363725258470184, "loss": 0.7722, "step": 71 }, { "epoch": 0.22, "learning_rate": 0.0006359020181985365, "loss": 0.8236, "step": 72 }, { "epoch": 0.23, "learning_rate": 0.0006354237052914561, "loss": 0.7589, "step": 73 }, { "epoch": 0.23, "learning_rate": 0.0006349375991339202, "loss": 0.7948, "step": 74 }, { "epoch": 0.23, "learning_rate": 0.0006344437119297233, "loss": 0.7528, "step": 75 }, { "epoch": 0.24, "learning_rate": 0.0006339420560780045, "loss": 0.7842, "step": 76 }, { "epoch": 0.24, "learning_rate": 0.0006334326441729361, "loss": 0.7541, "step": 77 }, { "epoch": 0.24, "learning_rate": 0.000632915489003408, "loss": 0.7425, "step": 78 }, { "epoch": 0.25, "learning_rate": 0.0006323906035527062, "loss": 0.8168, "step": 79 }, { "epoch": 0.25, "learning_rate": 0.0006318580009981871, "loss": 0.8074, "step": 80 }, { "epoch": 0.25, "learning_rate": 0.0006313176947109465, "loss": 0.7679, "step": 81 }, { "epoch": 0.26, "learning_rate": 0.0006307696982554838, "loss": 0.7465, "step": 82 }, { "epoch": 0.26, "learning_rate": 0.0006302140253893622, "loss": 0.7073, "step": 83 }, { "epoch": 0.26, "learning_rate": 0.0006296506900628619, "loss": 0.7687, "step": 84 }, { "epoch": 0.27, "learning_rate": 0.0006290797064186315, "loss": 0.7578, "step": 85 }, { "epoch": 0.27, "learning_rate": 0.0006285010887913319, "loss": 0.7494, "step": 86 }, { "epoch": 0.27, "learning_rate": 0.0006279148517072765, "loss": 0.7326, "step": 87 }, { "epoch": 0.27, "learning_rate": 0.000627321009884067, "loss": 0.7603, "step": 88 }, { "epoch": 0.28, "learning_rate": 0.0006267195782302236, "loss": 0.8141, "step": 89 }, { "epoch": 0.28, "learning_rate": 0.0006261105718448105, "loss": 0.7542, "step": 90 }, { "epoch": 0.28, "learning_rate": 0.0006254940060170575, "loss": 0.7597, "step": 91 }, { "epoch": 0.29, "learning_rate": 0.0006248698962259753, "loss": 0.7332, "step": 92 }, { "epoch": 0.29, "learning_rate": 0.0006242382581399676, "loss": 0.7031, "step": 93 }, { "epoch": 0.29, "learning_rate": 0.0006235991076164375, "loss": 0.7258, "step": 94 }, { "epoch": 0.3, "learning_rate": 0.0006229524607013892, "loss": 0.7634, "step": 95 }, { "epoch": 0.3, "learning_rate": 0.0006222983336290254, "loss": 0.765, "step": 96 }, { "epoch": 0.3, "learning_rate": 0.0006216367428213398, "loss": 0.7246, "step": 97 }, { "epoch": 0.31, "learning_rate": 0.0006209677048877046, "loss": 0.7115, "step": 98 }, { "epoch": 0.31, "learning_rate": 0.0006202912366244535, "loss": 0.6748, "step": 99 }, { "epoch": 0.31, "learning_rate": 0.0006196073550144604, "loss": 0.6995, "step": 100 }, { "epoch": 0.32, "learning_rate": 0.0006189160772267127, "loss": 0.7764, "step": 101 }, { "epoch": 0.32, "learning_rate": 0.00061821742061588, "loss": 0.8628, "step": 102 }, { "epoch": 0.32, "learning_rate": 0.0006175114027218794, "loss": 0.7266, "step": 103 }, { "epoch": 0.32, "learning_rate": 0.0006167980412694342, "loss": 0.7557, "step": 104 }, { "epoch": 0.33, "learning_rate": 0.0006160773541676288, "loss": 0.7518, "step": 105 }, { "epoch": 0.33, "learning_rate": 0.0006153493595094602, "loss": 0.7589, "step": 106 }, { "epoch": 0.33, "learning_rate": 0.000614614075571383, "loss": 0.7506, "step": 107 }, { "epoch": 0.34, "learning_rate": 0.0006138715208128501, "loss": 0.6617, "step": 108 }, { "epoch": 0.34, "learning_rate": 0.0006131217138758505, "loss": 0.7396, "step": 109 }, { "epoch": 0.34, "learning_rate": 0.0006123646735844401, "loss": 0.7666, "step": 110 }, { "epoch": 0.35, "learning_rate": 0.00061160041894427, "loss": 0.7555, "step": 111 }, { "epoch": 0.35, "learning_rate": 0.0006108289691421089, "loss": 0.7301, "step": 112 }, { "epoch": 0.35, "learning_rate": 0.0006100503435453614, "loss": 0.7364, "step": 113 }, { "epoch": 0.36, "learning_rate": 0.0006092645617015822, "loss": 0.7461, "step": 114 }, { "epoch": 0.36, "learning_rate": 0.0006084716433379844, "loss": 0.8086, "step": 115 }, { "epoch": 0.36, "learning_rate": 0.0006076716083609456, "loss": 0.7577, "step": 116 }, { "epoch": 0.36, "learning_rate": 0.0006068644768555068, "loss": 0.7094, "step": 117 }, { "epoch": 0.37, "learning_rate": 0.0006060502690848696, "loss": 0.726, "step": 118 }, { "epoch": 0.37, "learning_rate": 0.0006052290054898859, "loss": 0.7243, "step": 119 }, { "epoch": 0.37, "learning_rate": 0.0006044007066885458, "loss": 0.7119, "step": 120 }, { "epoch": 0.38, "learning_rate": 0.0006035653934754598, "loss": 0.7049, "step": 121 }, { "epoch": 0.38, "learning_rate": 0.0006027230868213366, "loss": 0.7424, "step": 122 }, { "epoch": 0.38, "learning_rate": 0.0006018738078724563, "loss": 0.7271, "step": 123 }, { "epoch": 0.39, "learning_rate": 0.0006010175779501405, "loss": 0.7996, "step": 124 }, { "epoch": 0.39, "learning_rate": 0.0006001544185502158, "loss": 0.7468, "step": 125 }, { "epoch": 0.39, "learning_rate": 0.0005992843513424754, "loss": 0.7513, "step": 126 }, { "epoch": 0.4, "learning_rate": 0.0005984073981701338, "loss": 0.7461, "step": 127 }, { "epoch": 0.4, "learning_rate": 0.0005975235810492794, "loss": 0.6821, "step": 128 }, { "epoch": 0.4, "learning_rate": 0.0005966329221683215, "loss": 0.7314, "step": 129 }, { "epoch": 0.41, "learning_rate": 0.0005957354438874327, "loss": 0.714, "step": 130 }, { "epoch": 0.41, "learning_rate": 0.0005948311687379884, "loss": 0.7339, "step": 131 }, { "epoch": 0.41, "learning_rate": 0.000593920119422001, "loss": 0.7021, "step": 132 }, { "epoch": 0.41, "learning_rate": 0.0005930023188115492, "loss": 0.7228, "step": 133 }, { "epoch": 0.42, "learning_rate": 0.0005920777899482046, "loss": 0.7107, "step": 134 }, { "epoch": 0.42, "learning_rate": 0.0005911465560424532, "loss": 0.659, "step": 135 }, { "epoch": 0.42, "learning_rate": 0.0005902086404731118, "loss": 0.7028, "step": 136 }, { "epoch": 0.43, "learning_rate": 0.0005892640667867423, "loss": 0.7275, "step": 137 }, { "epoch": 0.43, "learning_rate": 0.00058831285869706, "loss": 0.6889, "step": 138 }, { "epoch": 0.43, "learning_rate": 0.0005873550400843378, "loss": 0.7891, "step": 139 }, { "epoch": 0.44, "learning_rate": 0.0005863906349948074, "loss": 0.7904, "step": 140 }, { "epoch": 0.44, "learning_rate": 0.0005854196676400555, "loss": 0.6674, "step": 141 }, { "epoch": 0.44, "learning_rate": 0.0005844421623964157, "loss": 0.7352, "step": 142 }, { "epoch": 0.45, "learning_rate": 0.0005834581438043563, "loss": 0.6965, "step": 143 }, { "epoch": 0.45, "learning_rate": 0.000582467636567865, "loss": 0.7238, "step": 144 }, { "epoch": 0.45, "learning_rate": 0.0005814706655538279, "loss": 0.7064, "step": 145 }, { "epoch": 0.46, "learning_rate": 0.0005804672557914059, "loss": 0.6984, "step": 146 }, { "epoch": 0.46, "learning_rate": 0.0005794574324714057, "loss": 0.7594, "step": 147 }, { "epoch": 0.46, "learning_rate": 0.0005784412209456479, "loss": 0.6884, "step": 148 }, { "epoch": 0.46, "learning_rate": 0.00057741864672633, "loss": 0.7141, "step": 149 }, { "epoch": 0.47, "learning_rate": 0.0005763897354853866, "loss": 0.705, "step": 150 }, { "epoch": 0.47, "learning_rate": 0.0005753545130538441, "loss": 0.7613, "step": 151 }, { "epoch": 0.47, "learning_rate": 0.0005743130054211732, "loss": 0.736, "step": 152 }, { "epoch": 0.48, "learning_rate": 0.0005732652387346351, "loss": 0.6814, "step": 153 }, { "epoch": 0.48, "learning_rate": 0.0005722112392986265, "loss": 0.7002, "step": 154 }, { "epoch": 0.48, "learning_rate": 0.0005711510335740182, "loss": 0.7023, "step": 155 }, { "epoch": 0.49, "learning_rate": 0.0005700846481774913, "loss": 0.7617, "step": 156 }, { "epoch": 0.49, "learning_rate": 0.0005690121098808687, "loss": 0.7079, "step": 157 }, { "epoch": 0.49, "learning_rate": 0.0005679334456104429, "loss": 0.7614, "step": 158 }, { "epoch": 0.5, "learning_rate": 0.000566848682446301, "loss": 0.6786, "step": 159 }, { "epoch": 0.5, "learning_rate": 0.0005657578476216432, "loss": 0.6773, "step": 160 }, { "epoch": 0.5, "learning_rate": 0.0005646609685221003, "loss": 0.7085, "step": 161 }, { "epoch": 0.51, "learning_rate": 0.0005635580726850462, "loss": 0.7167, "step": 162 }, { "epoch": 0.51, "learning_rate": 0.0005624491877989055, "loss": 0.7192, "step": 163 }, { "epoch": 0.51, "learning_rate": 0.0005613343417024599, "loss": 0.6761, "step": 164 }, { "epoch": 0.51, "learning_rate": 0.0005602135623841478, "loss": 0.7508, "step": 165 }, { "epoch": 0.52, "learning_rate": 0.0005590868779813627, "loss": 0.6978, "step": 166 }, { "epoch": 0.52, "learning_rate": 0.0005579543167797467, "loss": 0.7459, "step": 167 }, { "epoch": 0.52, "learning_rate": 0.0005568159072124794, "loss": 0.7438, "step": 168 }, { "epoch": 0.53, "learning_rate": 0.0005556716778595654, "loss": 0.7073, "step": 169 }, { "epoch": 0.53, "learning_rate": 0.0005545216574471164, "loss": 0.6385, "step": 170 }, { "epoch": 0.53, "learning_rate": 0.0005533658748466291, "loss": 0.6993, "step": 171 }, { "epoch": 0.54, "learning_rate": 0.0005522043590742615, "loss": 0.7258, "step": 172 }, { "epoch": 0.54, "learning_rate": 0.0005510371392901041, "loss": 0.7405, "step": 173 }, { "epoch": 0.54, "learning_rate": 0.0005498642447974479, "loss": 0.7525, "step": 174 }, { "epoch": 0.55, "learning_rate": 0.0005486857050420481, "loss": 0.6639, "step": 175 }, { "epoch": 0.55, "learning_rate": 0.0005475015496113861, "loss": 0.7415, "step": 176 }, { "epoch": 0.55, "learning_rate": 0.0005463118082339253, "loss": 0.7816, "step": 177 }, { "epoch": 0.56, "learning_rate": 0.0005451165107783659, "loss": 0.711, "step": 178 }, { "epoch": 0.56, "learning_rate": 0.0005439156872528941, "loss": 0.7138, "step": 179 }, { "epoch": 0.56, "learning_rate": 0.0005427093678044299, "loss": 0.7069, "step": 180 }, { "epoch": 0.56, "learning_rate": 0.0005414975827178688, "loss": 0.7553, "step": 181 }, { "epoch": 0.57, "learning_rate": 0.000540280362415323, "loss": 0.7045, "step": 182 }, { "epoch": 0.57, "learning_rate": 0.0005390577374553561, "loss": 0.7011, "step": 183 }, { "epoch": 0.57, "learning_rate": 0.0005378297385322177, "loss": 0.7441, "step": 184 }, { "epoch": 0.58, "learning_rate": 0.0005365963964750707, "loss": 0.6797, "step": 185 }, { "epoch": 0.58, "learning_rate": 0.0005353577422472196, "loss": 0.6901, "step": 186 }, { "epoch": 0.58, "learning_rate": 0.0005341138069453313, "loss": 0.7136, "step": 187 }, { "epoch": 0.59, "learning_rate": 0.0005328646217986553, "loss": 0.7459, "step": 188 }, { "epoch": 0.59, "learning_rate": 0.0005316102181682396, "loss": 0.7064, "step": 189 }, { "epoch": 0.59, "learning_rate": 0.0005303506275461433, "loss": 0.6705, "step": 190 }, { "epoch": 0.6, "learning_rate": 0.0005290858815546459, "loss": 0.7008, "step": 191 }, { "epoch": 0.6, "learning_rate": 0.0005278160119454536, "loss": 0.7538, "step": 192 }, { "epoch": 0.6, "learning_rate": 0.0005265410505989021, "loss": 0.7726, "step": 193 }, { "epoch": 0.61, "learning_rate": 0.000525261029523156, "loss": 0.7532, "step": 194 }, { "epoch": 0.61, "learning_rate": 0.0005239759808534055, "loss": 0.6978, "step": 195 }, { "epoch": 0.61, "learning_rate": 0.0005226859368510599, "loss": 0.7182, "step": 196 }, { "epoch": 0.61, "learning_rate": 0.0005213909299029368, "loss": 0.6776, "step": 197 }, { "epoch": 0.62, "learning_rate": 0.0005200909925204501, "loss": 0.7447, "step": 198 }, { "epoch": 0.62, "learning_rate": 0.0005187861573387928, "loss": 0.7298, "step": 199 }, { "epoch": 0.62, "learning_rate": 0.0005174764571161185, "loss": 0.6833, "step": 200 }, { "epoch": 0.63, "learning_rate": 0.0005161619247327185, "loss": 0.7518, "step": 201 }, { "epoch": 0.63, "learning_rate": 0.0005148425931901961, "loss": 0.7429, "step": 202 }, { "epoch": 0.63, "learning_rate": 0.0005135184956106394, "loss": 0.763, "step": 203 }, { "epoch": 0.64, "learning_rate": 0.000512189665235788, "loss": 0.7682, "step": 204 }, { "epoch": 0.64, "learning_rate": 0.0005108561354261996, "loss": 0.7063, "step": 205 }, { "epoch": 0.64, "learning_rate": 0.0005095179396604121, "loss": 0.6956, "step": 206 }, { "epoch": 0.65, "learning_rate": 0.0005081751115341034, "loss": 0.7434, "step": 207 }, { "epoch": 0.65, "learning_rate": 0.0005068276847592474, "loss": 0.6673, "step": 208 }, { "epoch": 0.65, "learning_rate": 0.0005054756931632682, "loss": 0.6448, "step": 209 }, { "epoch": 0.65, "learning_rate": 0.0005041191706881909, "loss": 0.7095, "step": 210 }, { "epoch": 0.66, "learning_rate": 0.0005027581513897888, "loss": 0.673, "step": 211 }, { "epoch": 0.66, "learning_rate": 0.000501392669436729, "loss": 0.6363, "step": 212 }, { "epoch": 0.66, "learning_rate": 0.0005000227591097145, "loss": 0.6711, "step": 213 }, { "epoch": 0.67, "learning_rate": 0.0004986484548006237, "loss": 0.6375, "step": 214 }, { "epoch": 0.67, "learning_rate": 0.0004972697910116468, "loss": 0.7466, "step": 215 }, { "epoch": 0.67, "learning_rate": 0.0004958868023544192, "loss": 0.7147, "step": 216 }, { "epoch": 0.68, "learning_rate": 0.0004944995235491534, "loss": 0.714, "step": 217 }, { "epoch": 0.68, "learning_rate": 0.0004931079894237669, "loss": 0.7377, "step": 218 }, { "epoch": 0.68, "learning_rate": 0.0004917122349130078, "loss": 0.7087, "step": 219 }, { "epoch": 0.69, "learning_rate": 0.000490312295057578, "loss": 0.6716, "step": 220 }, { "epoch": 0.69, "learning_rate": 0.0004889082050032529, "loss": 0.7298, "step": 221 }, { "epoch": 0.69, "learning_rate": 0.0004875, "loss": 0.6557, "step": 222 }, { "epoch": 0.7, "learning_rate": 0.0004860877154010932, "loss": 0.7042, "step": 223 }, { "epoch": 0.7, "learning_rate": 0.00048467138666222534, "loss": 0.6617, "step": 224 }, { "epoch": 0.7, "learning_rate": 0.00048325104934061853, "loss": 0.7019, "step": 225 }, { "epoch": 0.7, "learning_rate": 0.00048182673909413103, "loss": 0.6756, "step": 226 }, { "epoch": 0.71, "learning_rate": 0.00048039849168036205, "loss": 0.709, "step": 227 }, { "epoch": 0.71, "learning_rate": 0.00047896634295575434, "loss": 0.7434, "step": 228 }, { "epoch": 0.71, "learning_rate": 0.00047753032887469385, "loss": 0.7533, "step": 229 }, { "epoch": 0.72, "learning_rate": 0.0004760904854886072, "loss": 0.7019, "step": 230 }, { "epoch": 0.72, "learning_rate": 0.0004746468489450562, "loss": 0.6852, "step": 231 }, { "epoch": 0.72, "learning_rate": 0.0004731994554868307, "loss": 0.7228, "step": 232 }, { "epoch": 0.73, "learning_rate": 0.000471748341451039, "loss": 0.7513, "step": 233 }, { "epoch": 0.73, "learning_rate": 0.0004702935432681949, "loss": 0.6896, "step": 234 }, { "epoch": 0.73, "learning_rate": 0.0004688350974613038, "loss": 0.6815, "step": 235 }, { "epoch": 0.74, "learning_rate": 0.0004673730406449449, "loss": 0.7682, "step": 236 }, { "epoch": 0.74, "learning_rate": 0.00046590740952435323, "loss": 0.7025, "step": 237 }, { "epoch": 0.74, "learning_rate": 0.0004644382408944968, "loss": 0.6662, "step": 238 }, { "epoch": 0.75, "learning_rate": 0.00046296557163915395, "loss": 0.7541, "step": 239 }, { "epoch": 0.75, "learning_rate": 0.0004614894387299867, "loss": 0.7336, "step": 240 }, { "epoch": 0.75, "learning_rate": 0.0004600098792256131, "loss": 0.6618, "step": 241 }, { "epoch": 0.75, "learning_rate": 0.0004585269302706762, "loss": 0.6729, "step": 242 }, { "epoch": 0.76, "learning_rate": 0.0004570406290949121, "loss": 0.7327, "step": 243 }, { "epoch": 0.76, "learning_rate": 0.0004555510130122151, "loss": 0.6778, "step": 244 }, { "epoch": 0.76, "learning_rate": 0.0004540581194197008, "loss": 0.6219, "step": 245 }, { "epoch": 0.77, "learning_rate": 0.00045256198579676755, "loss": 0.6984, "step": 246 }, { "epoch": 0.77, "learning_rate": 0.000451062649704155, "loss": 0.637, "step": 247 }, { "epoch": 0.77, "learning_rate": 0.000449560148783002, "loss": 0.658, "step": 248 }, { "epoch": 0.78, "learning_rate": 0.0004480545207539004, "loss": 0.7305, "step": 249 }, { "epoch": 0.78, "learning_rate": 0.0004465458034159491, "loss": 0.6788, "step": 250 } ], "logging_steps": 1, "max_steps": 640, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "total_flos": 6.485217104687923e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }