|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9730941704035874, |
|
"eval_steps": 500, |
|
"global_step": 220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017937219730941704, |
|
"grad_norm": 1.1350524425506592, |
|
"learning_rate": 4e-05, |
|
"loss": 2.626, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03587443946188341, |
|
"grad_norm": 1.1098030805587769, |
|
"learning_rate": 8e-05, |
|
"loss": 2.564, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.053811659192825115, |
|
"grad_norm": 1.004073143005371, |
|
"learning_rate": 0.00012, |
|
"loss": 2.5371, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07174887892376682, |
|
"grad_norm": 1.0235692262649536, |
|
"learning_rate": 0.00016, |
|
"loss": 2.3217, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08968609865470852, |
|
"grad_norm": 0.9896207451820374, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1635, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10762331838565023, |
|
"grad_norm": 1.0160284042358398, |
|
"learning_rate": 0.00019906976744186048, |
|
"loss": 1.9974, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.12556053811659193, |
|
"grad_norm": 1.6710572242736816, |
|
"learning_rate": 0.00019813953488372096, |
|
"loss": 1.721, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.14349775784753363, |
|
"grad_norm": 1.414752721786499, |
|
"learning_rate": 0.0001972093023255814, |
|
"loss": 1.5117, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.16143497757847533, |
|
"grad_norm": 1.0044751167297363, |
|
"learning_rate": 0.00019627906976744185, |
|
"loss": 1.3689, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.17937219730941703, |
|
"grad_norm": 0.8621335029602051, |
|
"learning_rate": 0.00019534883720930232, |
|
"loss": 1.3251, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19730941704035873, |
|
"grad_norm": 0.7370575666427612, |
|
"learning_rate": 0.0001944186046511628, |
|
"loss": 1.2459, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.21524663677130046, |
|
"grad_norm": 0.7463206648826599, |
|
"learning_rate": 0.00019348837209302326, |
|
"loss": 1.1113, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.23318385650224216, |
|
"grad_norm": 0.9221929907798767, |
|
"learning_rate": 0.00019255813953488374, |
|
"loss": 1.1779, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.25112107623318386, |
|
"grad_norm": 0.8653731346130371, |
|
"learning_rate": 0.0001916279069767442, |
|
"loss": 1.056, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.26905829596412556, |
|
"grad_norm": 0.8368218541145325, |
|
"learning_rate": 0.00019069767441860466, |
|
"loss": 1.0355, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.28699551569506726, |
|
"grad_norm": 0.9069833755493164, |
|
"learning_rate": 0.00018976744186046513, |
|
"loss": 1.076, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.30493273542600896, |
|
"grad_norm": 1.010295033454895, |
|
"learning_rate": 0.00018883720930232557, |
|
"loss": 1.037, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.32286995515695066, |
|
"grad_norm": 0.9165616631507874, |
|
"learning_rate": 0.00018790697674418605, |
|
"loss": 1.0841, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.34080717488789236, |
|
"grad_norm": 1.2438362836837769, |
|
"learning_rate": 0.00018697674418604652, |
|
"loss": 0.928, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.35874439461883406, |
|
"grad_norm": 1.1386940479278564, |
|
"learning_rate": 0.000186046511627907, |
|
"loss": 0.9742, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.37668161434977576, |
|
"grad_norm": 1.0614705085754395, |
|
"learning_rate": 0.00018511627906976744, |
|
"loss": 0.986, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.39461883408071746, |
|
"grad_norm": 1.1421048641204834, |
|
"learning_rate": 0.0001841860465116279, |
|
"loss": 0.9306, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4125560538116592, |
|
"grad_norm": 0.9451465606689453, |
|
"learning_rate": 0.00018325581395348838, |
|
"loss": 0.9888, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4304932735426009, |
|
"grad_norm": 0.753145158290863, |
|
"learning_rate": 0.00018232558139534886, |
|
"loss": 0.9598, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4484304932735426, |
|
"grad_norm": 0.6006896495819092, |
|
"learning_rate": 0.0001813953488372093, |
|
"loss": 0.8188, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4663677130044843, |
|
"grad_norm": 0.6499263048171997, |
|
"learning_rate": 0.00018046511627906977, |
|
"loss": 0.9449, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.484304932735426, |
|
"grad_norm": 0.6340591907501221, |
|
"learning_rate": 0.00017953488372093025, |
|
"loss": 1.0643, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5022421524663677, |
|
"grad_norm": 0.7478179335594177, |
|
"learning_rate": 0.0001786046511627907, |
|
"loss": 0.9822, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5201793721973094, |
|
"grad_norm": 0.6700637936592102, |
|
"learning_rate": 0.00017767441860465117, |
|
"loss": 0.9793, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5381165919282511, |
|
"grad_norm": 0.6026176810264587, |
|
"learning_rate": 0.00017674418604651164, |
|
"loss": 1.0611, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5560538116591929, |
|
"grad_norm": 0.5661296248435974, |
|
"learning_rate": 0.0001758139534883721, |
|
"loss": 0.9952, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5739910313901345, |
|
"grad_norm": 0.6180285811424255, |
|
"learning_rate": 0.00017488372093023258, |
|
"loss": 0.9497, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5919282511210763, |
|
"grad_norm": 0.6067416667938232, |
|
"learning_rate": 0.00017395348837209303, |
|
"loss": 0.9015, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6098654708520179, |
|
"grad_norm": 0.6353489756584167, |
|
"learning_rate": 0.00017302325581395348, |
|
"loss": 0.9297, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6278026905829597, |
|
"grad_norm": 0.6017511487007141, |
|
"learning_rate": 0.00017209302325581395, |
|
"loss": 0.8634, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6457399103139013, |
|
"grad_norm": 0.701750636100769, |
|
"learning_rate": 0.00017116279069767442, |
|
"loss": 0.9088, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6636771300448431, |
|
"grad_norm": 0.6852689385414124, |
|
"learning_rate": 0.0001702325581395349, |
|
"loss": 0.9195, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6816143497757847, |
|
"grad_norm": 0.6971113681793213, |
|
"learning_rate": 0.00016930232558139537, |
|
"loss": 0.8599, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6995515695067265, |
|
"grad_norm": 0.6576591730117798, |
|
"learning_rate": 0.00016837209302325584, |
|
"loss": 0.764, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7174887892376681, |
|
"grad_norm": 0.8312844038009644, |
|
"learning_rate": 0.00016744186046511629, |
|
"loss": 0.8577, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7354260089686099, |
|
"grad_norm": 0.7586076259613037, |
|
"learning_rate": 0.00016651162790697673, |
|
"loss": 0.8069, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.7533632286995515, |
|
"grad_norm": 0.6356410384178162, |
|
"learning_rate": 0.0001655813953488372, |
|
"loss": 0.7753, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7713004484304933, |
|
"grad_norm": 0.6421555280685425, |
|
"learning_rate": 0.00016465116279069768, |
|
"loss": 0.8733, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7892376681614349, |
|
"grad_norm": 0.8002834916114807, |
|
"learning_rate": 0.00016372093023255815, |
|
"loss": 1.0058, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.8071748878923767, |
|
"grad_norm": 0.6567667126655579, |
|
"learning_rate": 0.00016279069767441862, |
|
"loss": 0.8771, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8251121076233184, |
|
"grad_norm": 0.5926035642623901, |
|
"learning_rate": 0.00016186046511627907, |
|
"loss": 0.8556, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.8430493273542601, |
|
"grad_norm": 0.613197922706604, |
|
"learning_rate": 0.00016093023255813954, |
|
"loss": 0.85, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.8609865470852018, |
|
"grad_norm": 0.7108270525932312, |
|
"learning_rate": 0.00016, |
|
"loss": 0.8383, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8789237668161435, |
|
"grad_norm": 0.6039162874221802, |
|
"learning_rate": 0.00015906976744186046, |
|
"loss": 0.898, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8968609865470852, |
|
"grad_norm": 0.6543579697608948, |
|
"learning_rate": 0.00015813953488372093, |
|
"loss": 0.8442, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9147982062780269, |
|
"grad_norm": 0.6246331334114075, |
|
"learning_rate": 0.0001572093023255814, |
|
"loss": 0.7756, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.9327354260089686, |
|
"grad_norm": 0.6133050322532654, |
|
"learning_rate": 0.00015627906976744188, |
|
"loss": 0.8701, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.9506726457399103, |
|
"grad_norm": 0.6625930070877075, |
|
"learning_rate": 0.00015534883720930232, |
|
"loss": 0.9818, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.968609865470852, |
|
"grad_norm": 0.6724585294723511, |
|
"learning_rate": 0.0001544186046511628, |
|
"loss": 0.957, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.9865470852017937, |
|
"grad_norm": 0.5864427089691162, |
|
"learning_rate": 0.00015348837209302327, |
|
"loss": 0.7588, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0134529147982063, |
|
"grad_norm": 1.4326505661010742, |
|
"learning_rate": 0.00015255813953488374, |
|
"loss": 1.2876, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.031390134529148, |
|
"grad_norm": 0.7354760766029358, |
|
"learning_rate": 0.0001516279069767442, |
|
"loss": 0.6682, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.0493273542600896, |
|
"grad_norm": 0.7175352573394775, |
|
"learning_rate": 0.00015069767441860466, |
|
"loss": 0.6862, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.0672645739910314, |
|
"grad_norm": 0.7436112761497498, |
|
"learning_rate": 0.0001497674418604651, |
|
"loss": 0.7319, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0852017937219731, |
|
"grad_norm": 0.7228017449378967, |
|
"learning_rate": 0.00014883720930232558, |
|
"loss": 0.842, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1031390134529149, |
|
"grad_norm": 0.7761241793632507, |
|
"learning_rate": 0.00014790697674418605, |
|
"loss": 0.6666, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.1210762331838564, |
|
"grad_norm": 0.8203029632568359, |
|
"learning_rate": 0.00014697674418604652, |
|
"loss": 0.7403, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.1390134529147982, |
|
"grad_norm": 0.7372130751609802, |
|
"learning_rate": 0.000146046511627907, |
|
"loss": 0.7426, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.15695067264574, |
|
"grad_norm": 0.7927672863006592, |
|
"learning_rate": 0.00014511627906976747, |
|
"loss": 0.7717, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.1748878923766817, |
|
"grad_norm": 0.7056854367256165, |
|
"learning_rate": 0.00014418604651162791, |
|
"loss": 0.6855, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.1928251121076232, |
|
"grad_norm": 0.8383380174636841, |
|
"learning_rate": 0.00014325581395348836, |
|
"loss": 0.7222, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.210762331838565, |
|
"grad_norm": 0.6172988414764404, |
|
"learning_rate": 0.00014232558139534883, |
|
"loss": 0.6318, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.2286995515695067, |
|
"grad_norm": 0.7639912962913513, |
|
"learning_rate": 0.0001413953488372093, |
|
"loss": 0.6523, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.2466367713004485, |
|
"grad_norm": 0.7420268654823303, |
|
"learning_rate": 0.00014046511627906978, |
|
"loss": 0.7358, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.2645739910313902, |
|
"grad_norm": 0.781150758266449, |
|
"learning_rate": 0.00013953488372093025, |
|
"loss": 0.6927, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2825112107623318, |
|
"grad_norm": 0.7435458302497864, |
|
"learning_rate": 0.00013860465116279072, |
|
"loss": 0.7508, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.3004484304932735, |
|
"grad_norm": 0.7637338042259216, |
|
"learning_rate": 0.00013767441860465117, |
|
"loss": 0.5798, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.3183856502242153, |
|
"grad_norm": 0.9575199484825134, |
|
"learning_rate": 0.00013674418604651162, |
|
"loss": 0.8045, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.336322869955157, |
|
"grad_norm": 0.836318850517273, |
|
"learning_rate": 0.0001358139534883721, |
|
"loss": 0.7916, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.3542600896860986, |
|
"grad_norm": 0.7818664908409119, |
|
"learning_rate": 0.00013488372093023256, |
|
"loss": 0.7037, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.3721973094170403, |
|
"grad_norm": 0.7612494826316833, |
|
"learning_rate": 0.00013395348837209303, |
|
"loss": 0.6847, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.390134529147982, |
|
"grad_norm": 0.6829874515533447, |
|
"learning_rate": 0.0001330232558139535, |
|
"loss": 0.6848, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.4080717488789238, |
|
"grad_norm": 0.6923062801361084, |
|
"learning_rate": 0.00013209302325581395, |
|
"loss": 0.6928, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.4260089686098656, |
|
"grad_norm": 0.6936827898025513, |
|
"learning_rate": 0.00013116279069767442, |
|
"loss": 0.8813, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.4439461883408071, |
|
"grad_norm": 0.7367523908615112, |
|
"learning_rate": 0.0001302325581395349, |
|
"loss": 0.7386, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4618834080717489, |
|
"grad_norm": 0.7084159255027771, |
|
"learning_rate": 0.00012930232558139534, |
|
"loss": 0.604, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.4798206278026906, |
|
"grad_norm": 0.817794144153595, |
|
"learning_rate": 0.00012837209302325582, |
|
"loss": 0.7644, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.4977578475336322, |
|
"grad_norm": 0.7807640433311462, |
|
"learning_rate": 0.0001274418604651163, |
|
"loss": 0.8061, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.515695067264574, |
|
"grad_norm": 0.7616767883300781, |
|
"learning_rate": 0.00012651162790697676, |
|
"loss": 0.7786, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.5336322869955157, |
|
"grad_norm": 0.7925138473510742, |
|
"learning_rate": 0.0001255813953488372, |
|
"loss": 0.6859, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.5515695067264574, |
|
"grad_norm": 0.7205699682235718, |
|
"learning_rate": 0.00012465116279069768, |
|
"loss": 0.7581, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.5695067264573992, |
|
"grad_norm": 0.6984810829162598, |
|
"learning_rate": 0.00012372093023255815, |
|
"loss": 0.6652, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.587443946188341, |
|
"grad_norm": 0.7267066836357117, |
|
"learning_rate": 0.00012279069767441863, |
|
"loss": 0.6419, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.6053811659192825, |
|
"grad_norm": 0.7686505913734436, |
|
"learning_rate": 0.00012186046511627907, |
|
"loss": 0.7497, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.6233183856502242, |
|
"grad_norm": 0.755163311958313, |
|
"learning_rate": 0.00012093023255813953, |
|
"loss": 0.7757, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6412556053811658, |
|
"grad_norm": 0.7927355766296387, |
|
"learning_rate": 0.00012, |
|
"loss": 0.7754, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.6591928251121075, |
|
"grad_norm": 0.6950364708900452, |
|
"learning_rate": 0.00011906976744186048, |
|
"loss": 0.6059, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.6771300448430493, |
|
"grad_norm": 0.7365448474884033, |
|
"learning_rate": 0.00011813953488372094, |
|
"loss": 0.673, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.695067264573991, |
|
"grad_norm": 0.7940488457679749, |
|
"learning_rate": 0.00011720930232558141, |
|
"loss": 0.5726, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.7130044843049328, |
|
"grad_norm": 0.8307069540023804, |
|
"learning_rate": 0.00011627906976744187, |
|
"loss": 0.6949, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.7309417040358746, |
|
"grad_norm": 0.876649796962738, |
|
"learning_rate": 0.00011534883720930234, |
|
"loss": 0.6415, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.7488789237668163, |
|
"grad_norm": 0.9207577109336853, |
|
"learning_rate": 0.00011441860465116279, |
|
"loss": 0.674, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.7668161434977578, |
|
"grad_norm": 0.8050037026405334, |
|
"learning_rate": 0.00011348837209302326, |
|
"loss": 0.5656, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.7847533632286996, |
|
"grad_norm": 0.878441333770752, |
|
"learning_rate": 0.00011255813953488372, |
|
"loss": 0.7596, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.8026905829596411, |
|
"grad_norm": 0.91168612241745, |
|
"learning_rate": 0.00011162790697674419, |
|
"loss": 0.8177, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8206278026905829, |
|
"grad_norm": 0.7757384777069092, |
|
"learning_rate": 0.00011069767441860466, |
|
"loss": 0.652, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.8385650224215246, |
|
"grad_norm": 0.8266631960868835, |
|
"learning_rate": 0.00010976744186046512, |
|
"loss": 0.8207, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.8565022421524664, |
|
"grad_norm": 0.7787818312644958, |
|
"learning_rate": 0.0001088372093023256, |
|
"loss": 0.6802, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.8744394618834082, |
|
"grad_norm": 0.8642717003822327, |
|
"learning_rate": 0.00010790697674418607, |
|
"loss": 0.7011, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.89237668161435, |
|
"grad_norm": 0.7537955641746521, |
|
"learning_rate": 0.00010697674418604651, |
|
"loss": 0.6297, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.9103139013452914, |
|
"grad_norm": 0.8498083353042603, |
|
"learning_rate": 0.00010604651162790697, |
|
"loss": 0.6764, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.9282511210762332, |
|
"grad_norm": 0.8197365403175354, |
|
"learning_rate": 0.00010511627906976745, |
|
"loss": 0.7126, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.9461883408071747, |
|
"grad_norm": 0.797406792640686, |
|
"learning_rate": 0.0001041860465116279, |
|
"loss": 0.7664, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.9641255605381165, |
|
"grad_norm": 0.8513347506523132, |
|
"learning_rate": 0.00010325581395348838, |
|
"loss": 0.7572, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.9820627802690582, |
|
"grad_norm": 0.8408546447753906, |
|
"learning_rate": 0.00010232558139534885, |
|
"loss": 0.5291, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.008968609865471, |
|
"grad_norm": 1.9233133792877197, |
|
"learning_rate": 0.00010139534883720931, |
|
"loss": 1.3532, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.0269058295964126, |
|
"grad_norm": 0.782871663570404, |
|
"learning_rate": 0.00010046511627906978, |
|
"loss": 0.4679, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.0448430493273544, |
|
"grad_norm": 0.7588692307472229, |
|
"learning_rate": 9.953488372093024e-05, |
|
"loss": 0.4962, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.062780269058296, |
|
"grad_norm": 0.8635579943656921, |
|
"learning_rate": 9.86046511627907e-05, |
|
"loss": 0.474, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.0807174887892375, |
|
"grad_norm": 0.8822935819625854, |
|
"learning_rate": 9.767441860465116e-05, |
|
"loss": 0.4129, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.098654708520179, |
|
"grad_norm": 0.9706945419311523, |
|
"learning_rate": 9.674418604651163e-05, |
|
"loss": 0.4961, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.116591928251121, |
|
"grad_norm": 1.1239269971847534, |
|
"learning_rate": 9.58139534883721e-05, |
|
"loss": 0.5805, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.1345291479820627, |
|
"grad_norm": 1.1263116598129272, |
|
"learning_rate": 9.488372093023256e-05, |
|
"loss": 0.4252, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.1524663677130045, |
|
"grad_norm": 1.2772897481918335, |
|
"learning_rate": 9.395348837209302e-05, |
|
"loss": 0.4534, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.1704035874439462, |
|
"grad_norm": 1.0017895698547363, |
|
"learning_rate": 9.30232558139535e-05, |
|
"loss": 0.3835, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.188340807174888, |
|
"grad_norm": 1.1340886354446411, |
|
"learning_rate": 9.209302325581396e-05, |
|
"loss": 0.4089, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.2062780269058297, |
|
"grad_norm": 1.011704921722412, |
|
"learning_rate": 9.116279069767443e-05, |
|
"loss": 0.4298, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.2242152466367715, |
|
"grad_norm": 0.9829872250556946, |
|
"learning_rate": 9.023255813953489e-05, |
|
"loss": 0.4568, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.242152466367713, |
|
"grad_norm": 1.0418094396591187, |
|
"learning_rate": 8.930232558139535e-05, |
|
"loss": 0.5086, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.2600896860986546, |
|
"grad_norm": 0.9814426302909851, |
|
"learning_rate": 8.837209302325582e-05, |
|
"loss": 0.5033, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.2780269058295963, |
|
"grad_norm": 1.0506559610366821, |
|
"learning_rate": 8.744186046511629e-05, |
|
"loss": 0.455, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.295964125560538, |
|
"grad_norm": 0.9949473738670349, |
|
"learning_rate": 8.651162790697674e-05, |
|
"loss": 0.4637, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.31390134529148, |
|
"grad_norm": 1.0186420679092407, |
|
"learning_rate": 8.558139534883721e-05, |
|
"loss": 0.4877, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.3318385650224216, |
|
"grad_norm": 1.0602444410324097, |
|
"learning_rate": 8.465116279069768e-05, |
|
"loss": 0.4941, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.3497757847533634, |
|
"grad_norm": 1.0107648372650146, |
|
"learning_rate": 8.372093023255814e-05, |
|
"loss": 0.4339, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.367713004484305, |
|
"grad_norm": 1.1475372314453125, |
|
"learning_rate": 8.27906976744186e-05, |
|
"loss": 0.5158, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.3856502242152464, |
|
"grad_norm": 1.0330064296722412, |
|
"learning_rate": 8.186046511627907e-05, |
|
"loss": 0.4266, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.403587443946188, |
|
"grad_norm": 1.3457512855529785, |
|
"learning_rate": 8.093023255813953e-05, |
|
"loss": 0.617, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.42152466367713, |
|
"grad_norm": 1.1562917232513428, |
|
"learning_rate": 8e-05, |
|
"loss": 0.4976, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.4394618834080717, |
|
"grad_norm": 1.087751030921936, |
|
"learning_rate": 7.906976744186047e-05, |
|
"loss": 0.4836, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.4573991031390134, |
|
"grad_norm": 1.0045045614242554, |
|
"learning_rate": 7.813953488372094e-05, |
|
"loss": 0.4294, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.475336322869955, |
|
"grad_norm": 1.0355446338653564, |
|
"learning_rate": 7.72093023255814e-05, |
|
"loss": 0.4472, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.493273542600897, |
|
"grad_norm": 1.163203239440918, |
|
"learning_rate": 7.627906976744187e-05, |
|
"loss": 0.4853, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.5112107623318387, |
|
"grad_norm": 1.0705980062484741, |
|
"learning_rate": 7.534883720930233e-05, |
|
"loss": 0.4142, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.5291479820627805, |
|
"grad_norm": 1.172975778579712, |
|
"learning_rate": 7.441860465116279e-05, |
|
"loss": 0.5348, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.547085201793722, |
|
"grad_norm": 0.9890033006668091, |
|
"learning_rate": 7.348837209302326e-05, |
|
"loss": 0.3867, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.5650224215246635, |
|
"grad_norm": 1.3716145753860474, |
|
"learning_rate": 7.255813953488373e-05, |
|
"loss": 0.4829, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.5829596412556053, |
|
"grad_norm": 1.1363354921340942, |
|
"learning_rate": 7.162790697674418e-05, |
|
"loss": 0.4112, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.600896860986547, |
|
"grad_norm": 1.180514931678772, |
|
"learning_rate": 7.069767441860465e-05, |
|
"loss": 0.4212, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.618834080717489, |
|
"grad_norm": 1.1589065790176392, |
|
"learning_rate": 6.976744186046513e-05, |
|
"loss": 0.4382, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.6367713004484306, |
|
"grad_norm": 1.1208486557006836, |
|
"learning_rate": 6.883720930232558e-05, |
|
"loss": 0.4852, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.6547085201793723, |
|
"grad_norm": 1.1670925617218018, |
|
"learning_rate": 6.790697674418604e-05, |
|
"loss": 0.4801, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.672645739910314, |
|
"grad_norm": 1.1497890949249268, |
|
"learning_rate": 6.697674418604652e-05, |
|
"loss": 0.4581, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.6905829596412554, |
|
"grad_norm": 1.1380338668823242, |
|
"learning_rate": 6.604651162790698e-05, |
|
"loss": 0.4974, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.708520179372197, |
|
"grad_norm": 1.2095478773117065, |
|
"learning_rate": 6.511627906976745e-05, |
|
"loss": 0.4958, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.726457399103139, |
|
"grad_norm": 1.1369256973266602, |
|
"learning_rate": 6.418604651162791e-05, |
|
"loss": 0.4518, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.7443946188340806, |
|
"grad_norm": 1.1578013896942139, |
|
"learning_rate": 6.325581395348838e-05, |
|
"loss": 0.5655, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.7623318385650224, |
|
"grad_norm": 1.0805268287658691, |
|
"learning_rate": 6.232558139534884e-05, |
|
"loss": 0.5004, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.780269058295964, |
|
"grad_norm": 1.1408129930496216, |
|
"learning_rate": 6.139534883720931e-05, |
|
"loss": 0.4059, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.798206278026906, |
|
"grad_norm": 1.0206074714660645, |
|
"learning_rate": 6.0465116279069765e-05, |
|
"loss": 0.4119, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.8161434977578477, |
|
"grad_norm": 0.9685718417167664, |
|
"learning_rate": 5.953488372093024e-05, |
|
"loss": 0.4574, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.8340807174887894, |
|
"grad_norm": 1.0425866842269897, |
|
"learning_rate": 5.8604651162790704e-05, |
|
"loss": 0.4774, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.852017937219731, |
|
"grad_norm": 1.0325255393981934, |
|
"learning_rate": 5.767441860465117e-05, |
|
"loss": 0.4317, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.8699551569506725, |
|
"grad_norm": 1.0784574747085571, |
|
"learning_rate": 5.674418604651163e-05, |
|
"loss": 0.3921, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.8878923766816142, |
|
"grad_norm": 1.081007957458496, |
|
"learning_rate": 5.5813953488372095e-05, |
|
"loss": 0.4515, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.905829596412556, |
|
"grad_norm": 1.1916303634643555, |
|
"learning_rate": 5.488372093023256e-05, |
|
"loss": 0.4556, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.9237668161434978, |
|
"grad_norm": 1.2342188358306885, |
|
"learning_rate": 5.3953488372093034e-05, |
|
"loss": 0.4772, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.9417040358744395, |
|
"grad_norm": 1.0315567255020142, |
|
"learning_rate": 5.3023255813953486e-05, |
|
"loss": 0.3233, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.9596412556053813, |
|
"grad_norm": 1.3380693197250366, |
|
"learning_rate": 5.209302325581395e-05, |
|
"loss": 0.5098, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.977578475336323, |
|
"grad_norm": 1.2268236875534058, |
|
"learning_rate": 5.1162790697674425e-05, |
|
"loss": 0.4125, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.004484304932735, |
|
"grad_norm": 2.589136838912964, |
|
"learning_rate": 5.023255813953489e-05, |
|
"loss": 0.7637, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 3.022421524663677, |
|
"grad_norm": 0.9620673060417175, |
|
"learning_rate": 4.930232558139535e-05, |
|
"loss": 0.2501, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 3.0403587443946187, |
|
"grad_norm": 1.0851328372955322, |
|
"learning_rate": 4.8372093023255816e-05, |
|
"loss": 0.3299, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.0582959641255605, |
|
"grad_norm": 1.081047773361206, |
|
"learning_rate": 4.744186046511628e-05, |
|
"loss": 0.319, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 3.0762331838565022, |
|
"grad_norm": 0.9016939997673035, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.2505, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.094170403587444, |
|
"grad_norm": 1.279685616493225, |
|
"learning_rate": 4.5581395348837214e-05, |
|
"loss": 0.3574, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 3.1121076233183858, |
|
"grad_norm": 1.1288567781448364, |
|
"learning_rate": 4.465116279069767e-05, |
|
"loss": 0.2322, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 3.1300448430493275, |
|
"grad_norm": 1.0982707738876343, |
|
"learning_rate": 4.3720930232558146e-05, |
|
"loss": 0.2557, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 3.1479820627802693, |
|
"grad_norm": 1.2716487646102905, |
|
"learning_rate": 4.2790697674418605e-05, |
|
"loss": 0.2683, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 3.1659192825112106, |
|
"grad_norm": 1.277907371520996, |
|
"learning_rate": 4.186046511627907e-05, |
|
"loss": 0.3359, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.1838565022421523, |
|
"grad_norm": 1.767809510231018, |
|
"learning_rate": 4.093023255813954e-05, |
|
"loss": 0.3141, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 3.201793721973094, |
|
"grad_norm": 1.5723196268081665, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2696, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.219730941704036, |
|
"grad_norm": 1.2438582181930542, |
|
"learning_rate": 3.906976744186047e-05, |
|
"loss": 0.2261, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 3.2376681614349776, |
|
"grad_norm": 1.3772393465042114, |
|
"learning_rate": 3.8139534883720935e-05, |
|
"loss": 0.2699, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 3.2556053811659194, |
|
"grad_norm": 1.1731289625167847, |
|
"learning_rate": 3.7209302325581394e-05, |
|
"loss": 0.2809, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.273542600896861, |
|
"grad_norm": 1.3203359842300415, |
|
"learning_rate": 3.627906976744187e-05, |
|
"loss": 0.2798, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 3.291479820627803, |
|
"grad_norm": 1.0982232093811035, |
|
"learning_rate": 3.5348837209302326e-05, |
|
"loss": 0.2503, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 3.3094170403587446, |
|
"grad_norm": 1.2753369808197021, |
|
"learning_rate": 3.441860465116279e-05, |
|
"loss": 0.2893, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 3.327354260089686, |
|
"grad_norm": 1.2293989658355713, |
|
"learning_rate": 3.348837209302326e-05, |
|
"loss": 0.2585, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 3.3452914798206277, |
|
"grad_norm": 1.3043240308761597, |
|
"learning_rate": 3.2558139534883724e-05, |
|
"loss": 0.2203, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.3632286995515694, |
|
"grad_norm": 1.1034027338027954, |
|
"learning_rate": 3.162790697674419e-05, |
|
"loss": 0.2501, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.381165919282511, |
|
"grad_norm": 0.9731037020683289, |
|
"learning_rate": 3.0697674418604656e-05, |
|
"loss": 0.1944, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.399103139013453, |
|
"grad_norm": 1.069287657737732, |
|
"learning_rate": 2.976744186046512e-05, |
|
"loss": 0.2041, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 3.4170403587443947, |
|
"grad_norm": 1.3233182430267334, |
|
"learning_rate": 2.8837209302325585e-05, |
|
"loss": 0.2713, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.4349775784753365, |
|
"grad_norm": 1.2428154945373535, |
|
"learning_rate": 2.7906976744186048e-05, |
|
"loss": 0.2002, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.452914798206278, |
|
"grad_norm": 1.20328688621521, |
|
"learning_rate": 2.6976744186046517e-05, |
|
"loss": 0.2646, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.4708520179372195, |
|
"grad_norm": 1.3479125499725342, |
|
"learning_rate": 2.6046511627906976e-05, |
|
"loss": 0.2911, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.4887892376681613, |
|
"grad_norm": 1.2266180515289307, |
|
"learning_rate": 2.5116279069767445e-05, |
|
"loss": 0.2811, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 3.506726457399103, |
|
"grad_norm": 1.2345128059387207, |
|
"learning_rate": 2.4186046511627908e-05, |
|
"loss": 0.2237, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.524663677130045, |
|
"grad_norm": 1.3437424898147583, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.283, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.5426008968609866, |
|
"grad_norm": 1.4216517210006714, |
|
"learning_rate": 2.2325581395348837e-05, |
|
"loss": 0.2871, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 3.5605381165919283, |
|
"grad_norm": 1.1113003492355347, |
|
"learning_rate": 2.1395348837209303e-05, |
|
"loss": 0.219, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 3.57847533632287, |
|
"grad_norm": 1.592371940612793, |
|
"learning_rate": 2.046511627906977e-05, |
|
"loss": 0.3161, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.596412556053812, |
|
"grad_norm": 1.2963297367095947, |
|
"learning_rate": 1.9534883720930235e-05, |
|
"loss": 0.2242, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.6143497757847536, |
|
"grad_norm": 1.1588383913040161, |
|
"learning_rate": 1.8604651162790697e-05, |
|
"loss": 0.2079, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.6322869955156953, |
|
"grad_norm": 1.2683604955673218, |
|
"learning_rate": 1.7674418604651163e-05, |
|
"loss": 0.2501, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.6502242152466366, |
|
"grad_norm": 1.3655322790145874, |
|
"learning_rate": 1.674418604651163e-05, |
|
"loss": 0.2299, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 3.6681614349775784, |
|
"grad_norm": 1.3018665313720703, |
|
"learning_rate": 1.5813953488372095e-05, |
|
"loss": 0.2148, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 3.68609865470852, |
|
"grad_norm": 1.388330101966858, |
|
"learning_rate": 1.488372093023256e-05, |
|
"loss": 0.2512, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.704035874439462, |
|
"grad_norm": 1.535142421722412, |
|
"learning_rate": 1.3953488372093024e-05, |
|
"loss": 0.2579, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.7219730941704037, |
|
"grad_norm": 1.4287734031677246, |
|
"learning_rate": 1.3023255813953488e-05, |
|
"loss": 0.2708, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 3.7399103139013454, |
|
"grad_norm": 1.5674840211868286, |
|
"learning_rate": 1.2093023255813954e-05, |
|
"loss": 0.3017, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.7578475336322867, |
|
"grad_norm": 1.261733889579773, |
|
"learning_rate": 1.1162790697674418e-05, |
|
"loss": 0.2584, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.7757847533632285, |
|
"grad_norm": 1.4881441593170166, |
|
"learning_rate": 1.0232558139534884e-05, |
|
"loss": 0.3086, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 3.7937219730941703, |
|
"grad_norm": 1.1449949741363525, |
|
"learning_rate": 9.302325581395349e-06, |
|
"loss": 0.2277, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.811659192825112, |
|
"grad_norm": 1.3948498964309692, |
|
"learning_rate": 8.372093023255815e-06, |
|
"loss": 0.2668, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 3.8295964125560538, |
|
"grad_norm": 1.1462297439575195, |
|
"learning_rate": 7.44186046511628e-06, |
|
"loss": 0.2142, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.8475336322869955, |
|
"grad_norm": 1.4967782497406006, |
|
"learning_rate": 6.511627906976744e-06, |
|
"loss": 0.275, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.8654708520179373, |
|
"grad_norm": 1.3958649635314941, |
|
"learning_rate": 5.581395348837209e-06, |
|
"loss": 0.2833, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 3.883408071748879, |
|
"grad_norm": 1.4644280672073364, |
|
"learning_rate": 4.651162790697674e-06, |
|
"loss": 0.3475, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.901345291479821, |
|
"grad_norm": 1.3760302066802979, |
|
"learning_rate": 3.72093023255814e-06, |
|
"loss": 0.2892, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.9192825112107625, |
|
"grad_norm": 1.320532202720642, |
|
"learning_rate": 2.7906976744186046e-06, |
|
"loss": 0.2542, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 3.9372197309417043, |
|
"grad_norm": 1.1825841665267944, |
|
"learning_rate": 1.86046511627907e-06, |
|
"loss": 0.2306, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 3.9551569506726456, |
|
"grad_norm": 1.3918488025665283, |
|
"learning_rate": 9.30232558139535e-07, |
|
"loss": 0.2958, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.9730941704035874, |
|
"grad_norm": 1.3532480001449585, |
|
"learning_rate": 0.0, |
|
"loss": 0.2748, |
|
"step": 220 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9925730977234944.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|