|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15840619002650258, |
|
"global_step": 325, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.7142857142857135e-05, |
|
"loss": 1.9649, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011428571428571427, |
|
"loss": 2.0221, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001714285714285714, |
|
"loss": 1.9956, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00022857142857142854, |
|
"loss": 1.9373, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002857142857142857, |
|
"loss": 2.0042, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003428571428571428, |
|
"loss": 1.9593, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00039999999999999996, |
|
"loss": 2.031, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004571428571428571, |
|
"loss": 1.9644, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005142857142857142, |
|
"loss": 1.9968, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005714285714285714, |
|
"loss": 1.9694, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005999996407482917, |
|
"loss": 1.9885, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005999967667397879, |
|
"loss": 1.9295, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005999910187503132, |
|
"loss": 1.9646, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0005999823968349338, |
|
"loss": 1.9615, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000599970901076248, |
|
"loss": 1.9456, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005999565315843857, |
|
"loss": 2.014, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005999392884970068, |
|
"loss": 1.9118, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005999191719793011, |
|
"loss": 1.944, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005998961822239856, |
|
"loss": 1.9475, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000599870319451303, |
|
"loss": 2.038, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005998415839090198, |
|
"loss": 1.9438, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005998099758724235, |
|
"loss": 2.0804, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005997754956443205, |
|
"loss": 1.9767, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005997381435550326, |
|
"loss": 1.9322, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005996979199623944, |
|
"loss": 1.9276, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005996548252517495, |
|
"loss": 1.9933, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005996088598359469, |
|
"loss": 1.8901, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005995600241553371, |
|
"loss": 1.9472, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000599508318677768, |
|
"loss": 1.9855, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00059945374389858, |
|
"loss": 1.9887, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005993963003406018, |
|
"loss": 1.9798, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005993359885541448, |
|
"loss": 1.9956, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005992728091169984, |
|
"loss": 1.9411, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0005992067626344242, |
|
"loss": 1.9722, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00059913784973915, |
|
"loss": 1.97, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005990660710913641, |
|
"loss": 1.9612, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005989914273787089, |
|
"loss": 1.8503, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005989139193162741, |
|
"loss": 1.992, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00059883354764659, |
|
"loss": 1.9675, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005987503131396204, |
|
"loss": 1.9609, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005986642165927551, |
|
"loss": 2.0349, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005985752588308026, |
|
"loss": 1.9824, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005984834407059817, |
|
"loss": 1.9017, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0005983887630979137, |
|
"loss": 1.903, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000598291226913614, |
|
"loss": 1.9067, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000598190833087483, |
|
"loss": 1.941, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005980875825812974, |
|
"loss": 1.9856, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005979814763842014, |
|
"loss": 1.9555, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005978725155126967, |
|
"loss": 1.9408, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005977607010106324, |
|
"loss": 2.0131, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005976460339491963, |
|
"loss": 1.9499, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000597528515426903, |
|
"loss": 1.9381, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005974081465695849, |
|
"loss": 1.9805, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005972849285303804, |
|
"loss": 1.8787, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005971588624897232, |
|
"loss": 1.8912, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0005970299496553309, |
|
"loss": 1.9536, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005968981912621937, |
|
"loss": 1.9388, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005967635885725623, |
|
"loss": 2.0041, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005966261428759357, |
|
"loss": 1.9447, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005964858554890492, |
|
"loss": 2.0031, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005963427277558616, |
|
"loss": 1.9063, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005961967610475422, |
|
"loss": 1.9492, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005960479567624578, |
|
"loss": 1.9956, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005958963163261595, |
|
"loss": 1.9329, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005957418411913688, |
|
"loss": 1.9424, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0005955845328379636, |
|
"loss": 1.9105, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000595424392772964, |
|
"loss": 1.9439, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005952614225305184, |
|
"loss": 1.9586, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005950956236718882, |
|
"loss": 1.8851, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005949269977854329, |
|
"loss": 1.9031, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005947555464865954, |
|
"loss": 1.9294, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000594581271417886, |
|
"loss": 1.9779, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005944041742488665, |
|
"loss": 1.9515, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005942242566761351, |
|
"loss": 1.9249, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005940415204233092, |
|
"loss": 1.9104, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0005938559672410093, |
|
"loss": 1.9548, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005936675989068425, |
|
"loss": 1.9314, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005934764172253849, |
|
"loss": 1.9468, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005932824240281645, |
|
"loss": 1.9821, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005930856211736438, |
|
"loss": 1.9609, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005928860105472022, |
|
"loss": 1.9261, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005926835940611172, |
|
"loss": 1.9594, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000592478373654547, |
|
"loss": 1.8914, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005922703512935113, |
|
"loss": 1.9509, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005920595289708723, |
|
"loss": 1.9988, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005918459087063165, |
|
"loss": 1.9886, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005916294925463346, |
|
"loss": 2.0024, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005914102825642018, |
|
"loss": 1.859, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005911882808599586, |
|
"loss": 1.9439, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005909634895603902, |
|
"loss": 1.9823, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000590735910819006, |
|
"loss": 1.9308, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005905055468160197, |
|
"loss": 1.9459, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005902723997583274, |
|
"loss": 1.9146, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005900364718794873, |
|
"loss": 1.9036, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005897977654396977, |
|
"loss": 1.9035, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000589556282725776, |
|
"loss": 1.9607, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005893120260511362, |
|
"loss": 2.0468, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005890649977557668, |
|
"loss": 1.9687, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005888152002062089, |
|
"loss": 1.9958, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005885626357955329, |
|
"loss": 1.9025, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005883073069433159, |
|
"loss": 1.9077, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005880492160956185, |
|
"loss": 1.9494, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005877883657249612, |
|
"loss": 1.8716, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000587524758330301, |
|
"loss": 1.9075, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005872583964370073, |
|
"loss": 1.9406, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005869892825968375, |
|
"loss": 1.9179, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005867174193879131, |
|
"loss": 1.9702, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005864428094146943, |
|
"loss": 1.9297, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005861654553079557, |
|
"loss": 1.8467, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005858853597247606, |
|
"loss": 1.9145, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005856025253484358, |
|
"loss": 1.944, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005853169548885461, |
|
"loss": 1.9321, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005850286510808675, |
|
"loss": 1.9838, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005847376166873624, |
|
"loss": 1.9891, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005844438544961515, |
|
"loss": 1.9384, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005841473673214886, |
|
"loss": 1.8826, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005838481580037324, |
|
"loss": 1.8983, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005835462294093202, |
|
"loss": 1.8804, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00058324158443074, |
|
"loss": 1.8997, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005829342259865026, |
|
"loss": 1.9478, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005826241570211144, |
|
"loss": 1.9727, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005823113805050482, |
|
"loss": 1.9216, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005819958994347157, |
|
"loss": 1.9208, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000581677716832438, |
|
"loss": 1.9201, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005813568357464172, |
|
"loss": 1.869, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005810332592507066, |
|
"loss": 1.9111, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005807069904451822, |
|
"loss": 1.8696, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005803780324555121, |
|
"loss": 1.8946, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005800463884331269, |
|
"loss": 1.9641, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005797120615551896, |
|
"loss": 1.8923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005793750550245648, |
|
"loss": 1.8612, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005790353720697887, |
|
"loss": 1.927, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005786930159450374, |
|
"loss": 1.9709, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005783479899300962, |
|
"loss": 1.9665, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005780002973303283, |
|
"loss": 1.8657, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005776499414766424, |
|
"loss": 2.0055, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005772969257254615, |
|
"loss": 1.9147, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0005769412534586908, |
|
"loss": 1.9383, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005765829280836846, |
|
"loss": 1.9575, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005762219530332142, |
|
"loss": 1.9192, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005758583317654352, |
|
"loss": 1.8842, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005754920677638535, |
|
"loss": 1.9905, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000575123164537293, |
|
"loss": 1.9686, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005747516256198616, |
|
"loss": 2.0003, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005743774545709163, |
|
"loss": 1.9195, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000574000654975031, |
|
"loss": 1.8899, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005736212304419609, |
|
"loss": 1.9143, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000573239184606608, |
|
"loss": 1.8431, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005728545211289866, |
|
"loss": 1.8978, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005724672436941882, |
|
"loss": 1.9017, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005720773560123461, |
|
"loss": 1.8912, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005716848618185996, |
|
"loss": 1.9412, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000571289764873059, |
|
"loss": 1.8843, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005708920689607684, |
|
"loss": 1.8971, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005704917778916709, |
|
"loss": 1.9243, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005700888955005706, |
|
"loss": 1.8342, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000569683425647097, |
|
"loss": 1.8725, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005692753722156673, |
|
"loss": 1.8597, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0005688647391154496, |
|
"loss": 1.954, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0005684515302803256, |
|
"loss": 1.9454, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000568035749668852, |
|
"loss": 1.9336, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000567617401264224, |
|
"loss": 1.96, |
|
"step": 324 |
|
} |
|
], |
|
"max_steps": 2051, |
|
"num_train_epochs": 1, |
|
"total_flos": 4.557933379584e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|