|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9790419161676647, |
|
"eval_steps": 167, |
|
"global_step": 1336, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014970059880239522, |
|
"grad_norm": 0.09437490254640579, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.5976, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014970059880239522, |
|
"eval_loss": 0.4935530424118042, |
|
"eval_runtime": 96.1134, |
|
"eval_samples_per_second": 7.658, |
|
"eval_steps_per_second": 0.957, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0029940119760479044, |
|
"grad_norm": 0.07851643115282059, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.5631, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004491017964071856, |
|
"grad_norm": 0.07771598547697067, |
|
"learning_rate": 3e-06, |
|
"loss": 0.4231, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005988023952095809, |
|
"grad_norm": 0.07919719070196152, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.3963, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0074850299401197605, |
|
"grad_norm": 0.08890288323163986, |
|
"learning_rate": 5e-06, |
|
"loss": 0.57, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008982035928143712, |
|
"grad_norm": 0.0997990146279335, |
|
"learning_rate": 6e-06, |
|
"loss": 0.3526, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.010479041916167664, |
|
"grad_norm": 0.09869150817394257, |
|
"learning_rate": 7e-06, |
|
"loss": 0.5726, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011976047904191617, |
|
"grad_norm": 0.08410096168518066, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4443, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01347305389221557, |
|
"grad_norm": 0.10099457949399948, |
|
"learning_rate": 9e-06, |
|
"loss": 0.67, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014970059880239521, |
|
"grad_norm": 0.08075608313083649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.512, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016467065868263474, |
|
"grad_norm": 0.0809493139386177, |
|
"learning_rate": 9.9999859669361e-06, |
|
"loss": 0.4853, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.017964071856287425, |
|
"grad_norm": 0.0719803050160408, |
|
"learning_rate": 9.999943867823174e-06, |
|
"loss": 0.3089, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.019461077844311378, |
|
"grad_norm": 0.09597136080265045, |
|
"learning_rate": 9.999873702897528e-06, |
|
"loss": 0.5944, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.020958083832335328, |
|
"grad_norm": 0.05794112756848335, |
|
"learning_rate": 9.999775472553019e-06, |
|
"loss": 0.2852, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02245508982035928, |
|
"grad_norm": 0.08182471245527267, |
|
"learning_rate": 9.999649177341036e-06, |
|
"loss": 0.4125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.023952095808383235, |
|
"grad_norm": 0.0892665907740593, |
|
"learning_rate": 9.999494817970498e-06, |
|
"loss": 0.5022, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.025449101796407185, |
|
"grad_norm": 0.11126008629798889, |
|
"learning_rate": 9.999312395307861e-06, |
|
"loss": 0.6561, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02694610778443114, |
|
"grad_norm": 0.08598649501800537, |
|
"learning_rate": 9.999101910377107e-06, |
|
"loss": 0.423, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02844311377245509, |
|
"grad_norm": 0.0690973624587059, |
|
"learning_rate": 9.998863364359734e-06, |
|
"loss": 0.2771, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.029940119760479042, |
|
"grad_norm": 0.10876584053039551, |
|
"learning_rate": 9.998596758594752e-06, |
|
"loss": 0.4564, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03143712574850299, |
|
"grad_norm": 0.08262284100055695, |
|
"learning_rate": 9.998302094578685e-06, |
|
"loss": 0.4124, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03293413173652695, |
|
"grad_norm": 0.1151677817106247, |
|
"learning_rate": 9.997979373965542e-06, |
|
"loss": 0.594, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0344311377245509, |
|
"grad_norm": 0.06878229230642319, |
|
"learning_rate": 9.99762859856683e-06, |
|
"loss": 0.367, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03592814371257485, |
|
"grad_norm": 0.09845534712076187, |
|
"learning_rate": 9.997249770351531e-06, |
|
"loss": 0.4928, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0374251497005988, |
|
"grad_norm": 0.11114847660064697, |
|
"learning_rate": 9.996842891446092e-06, |
|
"loss": 0.513, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.038922155688622756, |
|
"grad_norm": 0.0876867026090622, |
|
"learning_rate": 9.996407964134416e-06, |
|
"loss": 0.4446, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.040419161676646706, |
|
"grad_norm": 0.08587277680635452, |
|
"learning_rate": 9.995944990857848e-06, |
|
"loss": 0.4212, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.041916167664670656, |
|
"grad_norm": 0.1094743087887764, |
|
"learning_rate": 9.995453974215164e-06, |
|
"loss": 0.5395, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04341317365269461, |
|
"grad_norm": 0.09168315678834915, |
|
"learning_rate": 9.994934916962547e-06, |
|
"loss": 0.5382, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04491017964071856, |
|
"grad_norm": 0.0933656170964241, |
|
"learning_rate": 9.994387822013586e-06, |
|
"loss": 0.4935, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04640718562874251, |
|
"grad_norm": 0.09951834380626678, |
|
"learning_rate": 9.993812692439247e-06, |
|
"loss": 0.4884, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04790419161676647, |
|
"grad_norm": 0.08234664052724838, |
|
"learning_rate": 9.99320953146786e-06, |
|
"loss": 0.3578, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04940119760479042, |
|
"grad_norm": 0.0895133763551712, |
|
"learning_rate": 9.992578342485107e-06, |
|
"loss": 0.5871, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05089820359281437, |
|
"grad_norm": 0.12492594122886658, |
|
"learning_rate": 9.991919129033994e-06, |
|
"loss": 0.5202, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05239520958083832, |
|
"grad_norm": 0.09857629239559174, |
|
"learning_rate": 9.99123189481483e-06, |
|
"loss": 0.5833, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05389221556886228, |
|
"grad_norm": 0.13012069463729858, |
|
"learning_rate": 9.990516643685222e-06, |
|
"loss": 0.6599, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05538922155688623, |
|
"grad_norm": 0.08107103407382965, |
|
"learning_rate": 9.98977337966003e-06, |
|
"loss": 0.4434, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05688622754491018, |
|
"grad_norm": 0.08469574898481369, |
|
"learning_rate": 9.989002106911368e-06, |
|
"loss": 0.4689, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.058383233532934134, |
|
"grad_norm": 0.06690481305122375, |
|
"learning_rate": 9.988202829768562e-06, |
|
"loss": 0.4292, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.059880239520958084, |
|
"grad_norm": 0.07395423203706741, |
|
"learning_rate": 9.987375552718133e-06, |
|
"loss": 0.3295, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.061377245508982034, |
|
"grad_norm": 0.10279028117656708, |
|
"learning_rate": 9.986520280403775e-06, |
|
"loss": 0.5975, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06287425149700598, |
|
"grad_norm": 0.08501698821783066, |
|
"learning_rate": 9.985637017626326e-06, |
|
"loss": 0.4974, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06437125748502993, |
|
"grad_norm": 0.08820093423128128, |
|
"learning_rate": 9.984725769343737e-06, |
|
"loss": 0.5446, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0658682634730539, |
|
"grad_norm": 0.0912579894065857, |
|
"learning_rate": 9.983786540671052e-06, |
|
"loss": 0.4359, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06736526946107785, |
|
"grad_norm": 0.06515525281429291, |
|
"learning_rate": 9.982819336880369e-06, |
|
"loss": 0.3298, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0688622754491018, |
|
"grad_norm": 0.09884097427129745, |
|
"learning_rate": 9.981824163400827e-06, |
|
"loss": 0.4986, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07035928143712575, |
|
"grad_norm": 0.09501009434461594, |
|
"learning_rate": 9.980801025818556e-06, |
|
"loss": 0.5314, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0718562874251497, |
|
"grad_norm": 0.09527381509542465, |
|
"learning_rate": 9.979749929876658e-06, |
|
"loss": 0.4653, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07335329341317365, |
|
"grad_norm": 0.07869047671556473, |
|
"learning_rate": 9.978670881475173e-06, |
|
"loss": 0.3973, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0748502994011976, |
|
"grad_norm": 0.0786195695400238, |
|
"learning_rate": 9.977563886671043e-06, |
|
"loss": 0.4364, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07634730538922156, |
|
"grad_norm": 0.08818861842155457, |
|
"learning_rate": 9.976428951678077e-06, |
|
"loss": 0.5149, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07784431137724551, |
|
"grad_norm": 0.0732090175151825, |
|
"learning_rate": 9.975266082866923e-06, |
|
"loss": 0.3853, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07934131736526946, |
|
"grad_norm": 0.07830777764320374, |
|
"learning_rate": 9.974075286765027e-06, |
|
"loss": 0.5106, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08083832335329341, |
|
"grad_norm": 0.08423200249671936, |
|
"learning_rate": 9.972856570056594e-06, |
|
"loss": 0.4341, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08233532934131736, |
|
"grad_norm": 0.12318050861358643, |
|
"learning_rate": 9.971609939582556e-06, |
|
"loss": 0.3335, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08383233532934131, |
|
"grad_norm": 0.10663409531116486, |
|
"learning_rate": 9.970335402340534e-06, |
|
"loss": 0.6584, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08532934131736528, |
|
"grad_norm": 0.09056587517261505, |
|
"learning_rate": 9.969032965484789e-06, |
|
"loss": 0.4559, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08682634730538923, |
|
"grad_norm": 0.08034185320138931, |
|
"learning_rate": 9.967702636326195e-06, |
|
"loss": 0.3497, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08832335329341318, |
|
"grad_norm": 0.10354162752628326, |
|
"learning_rate": 9.96634442233219e-06, |
|
"loss": 0.6226, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08982035928143713, |
|
"grad_norm": 0.0716947466135025, |
|
"learning_rate": 9.964958331126735e-06, |
|
"loss": 0.3743, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09131736526946108, |
|
"grad_norm": 0.06358286738395691, |
|
"learning_rate": 9.96354437049027e-06, |
|
"loss": 0.3273, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09281437125748503, |
|
"grad_norm": 0.07961272448301315, |
|
"learning_rate": 9.96210254835968e-06, |
|
"loss": 0.4669, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.09431137724550898, |
|
"grad_norm": 0.07491420954465866, |
|
"learning_rate": 9.960632872828233e-06, |
|
"loss": 0.4228, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09580838323353294, |
|
"grad_norm": 0.07608773559331894, |
|
"learning_rate": 9.959135352145552e-06, |
|
"loss": 0.447, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09730538922155689, |
|
"grad_norm": 0.0806804671883583, |
|
"learning_rate": 9.957609994717559e-06, |
|
"loss": 0.5003, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09880239520958084, |
|
"grad_norm": 0.08095613121986389, |
|
"learning_rate": 9.956056809106426e-06, |
|
"loss": 0.3929, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.10029940119760479, |
|
"grad_norm": 0.09194113314151764, |
|
"learning_rate": 9.954475804030539e-06, |
|
"loss": 0.5485, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.10179640718562874, |
|
"grad_norm": 0.08301645517349243, |
|
"learning_rate": 9.952866988364431e-06, |
|
"loss": 0.3857, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.10329341317365269, |
|
"grad_norm": 0.08787499368190765, |
|
"learning_rate": 9.95123037113875e-06, |
|
"loss": 0.5438, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.10479041916167664, |
|
"grad_norm": 0.08403324335813522, |
|
"learning_rate": 9.9495659615402e-06, |
|
"loss": 0.5825, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1062874251497006, |
|
"grad_norm": 0.07904759049415588, |
|
"learning_rate": 9.947873768911483e-06, |
|
"loss": 0.3771, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.10778443113772455, |
|
"grad_norm": 0.07713301479816437, |
|
"learning_rate": 9.946153802751257e-06, |
|
"loss": 0.4679, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1092814371257485, |
|
"grad_norm": 0.07544517517089844, |
|
"learning_rate": 9.944406072714086e-06, |
|
"loss": 0.3567, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11077844311377245, |
|
"grad_norm": 0.0836024358868599, |
|
"learning_rate": 9.942630588610368e-06, |
|
"loss": 0.4644, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1122754491017964, |
|
"grad_norm": 0.08232344686985016, |
|
"learning_rate": 9.940827360406297e-06, |
|
"loss": 0.4551, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11377245508982035, |
|
"grad_norm": 0.0864156037569046, |
|
"learning_rate": 9.938996398223802e-06, |
|
"loss": 0.4393, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.11526946107784432, |
|
"grad_norm": 0.06776531785726547, |
|
"learning_rate": 9.937137712340483e-06, |
|
"loss": 0.2865, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.11676646706586827, |
|
"grad_norm": 0.092351995408535, |
|
"learning_rate": 9.935251313189564e-06, |
|
"loss": 0.4635, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.11826347305389222, |
|
"grad_norm": 0.10527883470058441, |
|
"learning_rate": 9.933337211359833e-06, |
|
"loss": 0.4317, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11976047904191617, |
|
"grad_norm": 0.06179358810186386, |
|
"learning_rate": 9.931395417595568e-06, |
|
"loss": 0.3493, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12125748502994012, |
|
"grad_norm": 0.07964818924665451, |
|
"learning_rate": 9.929425942796502e-06, |
|
"loss": 0.4229, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12275449101796407, |
|
"grad_norm": 0.07189369946718216, |
|
"learning_rate": 9.927428798017738e-06, |
|
"loss": 0.2973, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.12425149700598802, |
|
"grad_norm": 0.07853538542985916, |
|
"learning_rate": 9.925403994469702e-06, |
|
"loss": 0.4828, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.12574850299401197, |
|
"grad_norm": 0.08836135268211365, |
|
"learning_rate": 9.92335154351807e-06, |
|
"loss": 0.544, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.12724550898203593, |
|
"grad_norm": 0.08305364102125168, |
|
"learning_rate": 9.921271456683716e-06, |
|
"loss": 0.3718, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12874251497005987, |
|
"grad_norm": 0.1029515191912651, |
|
"learning_rate": 9.919163745642633e-06, |
|
"loss": 0.5272, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.13023952095808383, |
|
"grad_norm": 0.09859377145767212, |
|
"learning_rate": 9.91702842222588e-06, |
|
"loss": 0.6271, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1317365269461078, |
|
"grad_norm": 0.08649040758609772, |
|
"learning_rate": 9.91486549841951e-06, |
|
"loss": 0.3192, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.13323353293413173, |
|
"grad_norm": 0.07640533149242401, |
|
"learning_rate": 9.912674986364502e-06, |
|
"loss": 0.315, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1347305389221557, |
|
"grad_norm": 0.0803142637014389, |
|
"learning_rate": 9.91045689835669e-06, |
|
"loss": 0.3875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13622754491017963, |
|
"grad_norm": 0.08381187170743942, |
|
"learning_rate": 9.908211246846708e-06, |
|
"loss": 0.2947, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.1377245508982036, |
|
"grad_norm": 0.06045156344771385, |
|
"learning_rate": 9.905938044439904e-06, |
|
"loss": 0.3409, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.13922155688622753, |
|
"grad_norm": 0.0911700651049614, |
|
"learning_rate": 9.903637303896272e-06, |
|
"loss": 0.5051, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1407185628742515, |
|
"grad_norm": 0.08980625867843628, |
|
"learning_rate": 9.901309038130392e-06, |
|
"loss": 0.512, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.14221556886227546, |
|
"grad_norm": 0.1140458956360817, |
|
"learning_rate": 9.89895326021134e-06, |
|
"loss": 0.5091, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1437125748502994, |
|
"grad_norm": 0.06530901044607162, |
|
"learning_rate": 9.896569983362632e-06, |
|
"loss": 0.1685, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14520958083832336, |
|
"grad_norm": 0.09205546230077744, |
|
"learning_rate": 9.894159220962138e-06, |
|
"loss": 0.4877, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1467065868263473, |
|
"grad_norm": 0.074747733771801, |
|
"learning_rate": 9.891720986542011e-06, |
|
"loss": 0.4084, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.14820359281437126, |
|
"grad_norm": 0.09208723902702332, |
|
"learning_rate": 9.889255293788613e-06, |
|
"loss": 0.4498, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1497005988023952, |
|
"grad_norm": 0.06876664608716965, |
|
"learning_rate": 9.886762156542428e-06, |
|
"loss": 0.3346, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15119760479041916, |
|
"grad_norm": 0.09977904707193375, |
|
"learning_rate": 9.884241588798004e-06, |
|
"loss": 0.4219, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.15269461077844312, |
|
"grad_norm": 0.0814763605594635, |
|
"learning_rate": 9.881693604703853e-06, |
|
"loss": 0.4438, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.15419161676646706, |
|
"grad_norm": 0.07697126269340515, |
|
"learning_rate": 9.879118218562384e-06, |
|
"loss": 0.4969, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.15568862275449102, |
|
"grad_norm": 0.10940787941217422, |
|
"learning_rate": 9.876515444829822e-06, |
|
"loss": 0.5485, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.15718562874251496, |
|
"grad_norm": 0.09441516548395157, |
|
"learning_rate": 9.873885298116123e-06, |
|
"loss": 0.5654, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15868263473053892, |
|
"grad_norm": 0.07836193591356277, |
|
"learning_rate": 9.871227793184893e-06, |
|
"loss": 0.4448, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1601796407185629, |
|
"grad_norm": 0.08903878927230835, |
|
"learning_rate": 9.868542944953304e-06, |
|
"loss": 0.4793, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.16167664670658682, |
|
"grad_norm": 0.0880504921078682, |
|
"learning_rate": 9.865830768492019e-06, |
|
"loss": 0.4365, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1631736526946108, |
|
"grad_norm": 0.06745749711990356, |
|
"learning_rate": 9.863091279025095e-06, |
|
"loss": 0.2266, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.16467065868263472, |
|
"grad_norm": 0.137594074010849, |
|
"learning_rate": 9.860324491929905e-06, |
|
"loss": 0.3052, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1661676646706587, |
|
"grad_norm": 0.07614412158727646, |
|
"learning_rate": 9.857530422737045e-06, |
|
"loss": 0.3063, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.16766467065868262, |
|
"grad_norm": 0.07592976093292236, |
|
"learning_rate": 9.854709087130261e-06, |
|
"loss": 0.3864, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1691616766467066, |
|
"grad_norm": 0.07913090288639069, |
|
"learning_rate": 9.851860500946342e-06, |
|
"loss": 0.4046, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.17065868263473055, |
|
"grad_norm": 0.08463918417692184, |
|
"learning_rate": 9.848984680175049e-06, |
|
"loss": 0.3976, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1721556886227545, |
|
"grad_norm": 0.08824311941862106, |
|
"learning_rate": 9.846081640959008e-06, |
|
"loss": 0.3154, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17365269461077845, |
|
"grad_norm": 0.07617625594139099, |
|
"learning_rate": 9.843151399593636e-06, |
|
"loss": 0.4338, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1751497005988024, |
|
"grad_norm": 0.0766025111079216, |
|
"learning_rate": 9.840193972527037e-06, |
|
"loss": 0.336, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.17664670658682635, |
|
"grad_norm": 0.07912255823612213, |
|
"learning_rate": 9.837209376359918e-06, |
|
"loss": 0.3919, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1781437125748503, |
|
"grad_norm": 0.10425494611263275, |
|
"learning_rate": 9.834197627845488e-06, |
|
"loss": 0.3993, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.17964071856287425, |
|
"grad_norm": 0.09615318477153778, |
|
"learning_rate": 9.831158743889373e-06, |
|
"loss": 0.434, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18113772455089822, |
|
"grad_norm": 0.0898306667804718, |
|
"learning_rate": 9.828092741549513e-06, |
|
"loss": 0.5143, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.18263473053892215, |
|
"grad_norm": 0.09334494918584824, |
|
"learning_rate": 9.82499963803607e-06, |
|
"loss": 0.4423, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.18413173652694612, |
|
"grad_norm": 0.09541749954223633, |
|
"learning_rate": 9.821879450711336e-06, |
|
"loss": 0.4826, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.18562874251497005, |
|
"grad_norm": 0.09210306406021118, |
|
"learning_rate": 9.81873219708962e-06, |
|
"loss": 0.512, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.18712574850299402, |
|
"grad_norm": 0.08098453283309937, |
|
"learning_rate": 9.815557894837171e-06, |
|
"loss": 0.2722, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18862275449101795, |
|
"grad_norm": 0.10569058358669281, |
|
"learning_rate": 9.81235656177206e-06, |
|
"loss": 0.4803, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.19011976047904192, |
|
"grad_norm": 0.07749304175376892, |
|
"learning_rate": 9.809128215864096e-06, |
|
"loss": 0.2676, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.19161676646706588, |
|
"grad_norm": 0.08790621906518936, |
|
"learning_rate": 9.80587287523471e-06, |
|
"loss": 0.3458, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.19311377245508982, |
|
"grad_norm": 0.07159163057804108, |
|
"learning_rate": 9.802590558156863e-06, |
|
"loss": 0.2287, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.19461077844311378, |
|
"grad_norm": 0.08052971214056015, |
|
"learning_rate": 9.79928128305494e-06, |
|
"loss": 0.3894, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19610778443113772, |
|
"grad_norm": 0.08126161992549896, |
|
"learning_rate": 9.795945068504654e-06, |
|
"loss": 0.3881, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.19760479041916168, |
|
"grad_norm": 0.1161087155342102, |
|
"learning_rate": 9.792581933232924e-06, |
|
"loss": 0.574, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.19910179640718562, |
|
"grad_norm": 0.13015466928482056, |
|
"learning_rate": 9.789191896117786e-06, |
|
"loss": 0.4662, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.20059880239520958, |
|
"grad_norm": 0.07646424323320389, |
|
"learning_rate": 9.78577497618829e-06, |
|
"loss": 0.3165, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.20209580838323354, |
|
"grad_norm": 0.07416396588087082, |
|
"learning_rate": 9.782331192624372e-06, |
|
"loss": 0.2962, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.20359281437125748, |
|
"grad_norm": 0.10298296809196472, |
|
"learning_rate": 9.778860564756769e-06, |
|
"loss": 0.4891, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.20508982035928144, |
|
"grad_norm": 0.10729490220546722, |
|
"learning_rate": 9.775363112066897e-06, |
|
"loss": 0.4275, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.20658682634730538, |
|
"grad_norm": 0.08321131765842438, |
|
"learning_rate": 9.771838854186748e-06, |
|
"loss": 0.3733, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.20808383233532934, |
|
"grad_norm": 0.0714513510465622, |
|
"learning_rate": 9.768287810898773e-06, |
|
"loss": 0.2682, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.20958083832335328, |
|
"grad_norm": 0.10221531242132187, |
|
"learning_rate": 9.764710002135784e-06, |
|
"loss": 0.434, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21107784431137724, |
|
"grad_norm": 0.09798867255449295, |
|
"learning_rate": 9.761105447980824e-06, |
|
"loss": 0.4254, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2125748502994012, |
|
"grad_norm": 0.0933586061000824, |
|
"learning_rate": 9.757474168667072e-06, |
|
"loss": 0.4123, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.21407185628742514, |
|
"grad_norm": 0.10116757452487946, |
|
"learning_rate": 9.753816184577715e-06, |
|
"loss": 0.4565, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.2155688622754491, |
|
"grad_norm": 0.09178843349218369, |
|
"learning_rate": 9.750131516245844e-06, |
|
"loss": 0.3424, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.21706586826347304, |
|
"grad_norm": 0.12792278826236725, |
|
"learning_rate": 9.746420184354334e-06, |
|
"loss": 0.3526, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.218562874251497, |
|
"grad_norm": 0.10231195390224457, |
|
"learning_rate": 9.742682209735727e-06, |
|
"loss": 0.432, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.22005988023952097, |
|
"grad_norm": 0.10359717905521393, |
|
"learning_rate": 9.738917613372121e-06, |
|
"loss": 0.4257, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2215568862275449, |
|
"grad_norm": 0.10195307433605194, |
|
"learning_rate": 9.73512641639504e-06, |
|
"loss": 0.413, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.22305389221556887, |
|
"grad_norm": 0.09156057238578796, |
|
"learning_rate": 9.731308640085329e-06, |
|
"loss": 0.377, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2245508982035928, |
|
"grad_norm": 0.10193546861410141, |
|
"learning_rate": 9.72746430587303e-06, |
|
"loss": 0.3818, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22604790419161677, |
|
"grad_norm": 0.09497050940990448, |
|
"learning_rate": 9.723593435337252e-06, |
|
"loss": 0.4343, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2275449101796407, |
|
"grad_norm": 0.08387334644794464, |
|
"learning_rate": 9.719696050206072e-06, |
|
"loss": 0.3136, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.22904191616766467, |
|
"grad_norm": 0.1258871853351593, |
|
"learning_rate": 9.715772172356388e-06, |
|
"loss": 0.4261, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.23053892215568864, |
|
"grad_norm": 0.11211127787828445, |
|
"learning_rate": 9.711821823813812e-06, |
|
"loss": 0.5238, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.23203592814371257, |
|
"grad_norm": 0.10336726158857346, |
|
"learning_rate": 9.70784502675254e-06, |
|
"loss": 0.4502, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.23353293413173654, |
|
"grad_norm": 0.06710167229175568, |
|
"learning_rate": 9.703841803495234e-06, |
|
"loss": 0.1438, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.23502994011976047, |
|
"grad_norm": 0.07125142216682434, |
|
"learning_rate": 9.699812176512887e-06, |
|
"loss": 0.2077, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.23652694610778444, |
|
"grad_norm": 0.1243571862578392, |
|
"learning_rate": 9.695756168424703e-06, |
|
"loss": 0.4075, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.23802395209580837, |
|
"grad_norm": 0.09641040116548538, |
|
"learning_rate": 9.691673801997974e-06, |
|
"loss": 0.5389, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.23952095808383234, |
|
"grad_norm": 0.10448335111141205, |
|
"learning_rate": 9.68756510014794e-06, |
|
"loss": 0.2794, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2410179640718563, |
|
"grad_norm": 0.0995652824640274, |
|
"learning_rate": 9.683430085937672e-06, |
|
"loss": 0.4319, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.24251497005988024, |
|
"grad_norm": 0.09269218891859055, |
|
"learning_rate": 9.67926878257794e-06, |
|
"loss": 0.3553, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2440119760479042, |
|
"grad_norm": 0.11025439202785492, |
|
"learning_rate": 9.675081213427076e-06, |
|
"loss": 0.445, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.24550898203592814, |
|
"grad_norm": 0.0948467031121254, |
|
"learning_rate": 9.67086740199085e-06, |
|
"loss": 0.4348, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2470059880239521, |
|
"grad_norm": 0.09545961767435074, |
|
"learning_rate": 9.666627371922335e-06, |
|
"loss": 0.364, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.24850299401197604, |
|
"grad_norm": 0.11892738938331604, |
|
"learning_rate": 9.66236114702178e-06, |
|
"loss": 0.4303, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.09273416548967361, |
|
"learning_rate": 9.658068751236464e-06, |
|
"loss": 0.3282, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.385356605052948, |
|
"eval_runtime": 96.1718, |
|
"eval_samples_per_second": 7.653, |
|
"eval_steps_per_second": 0.957, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.25149700598802394, |
|
"grad_norm": 0.13071858882904053, |
|
"learning_rate": 9.653750208660577e-06, |
|
"loss": 0.5143, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.25299401197604793, |
|
"grad_norm": 0.13147501647472382, |
|
"learning_rate": 9.649405543535067e-06, |
|
"loss": 0.4322, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.25449101796407186, |
|
"grad_norm": 0.12748856842517853, |
|
"learning_rate": 9.645034780247521e-06, |
|
"loss": 0.3911, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2559880239520958, |
|
"grad_norm": 0.11920400708913803, |
|
"learning_rate": 9.640637943332025e-06, |
|
"loss": 0.3082, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.25748502994011974, |
|
"grad_norm": 0.1189412921667099, |
|
"learning_rate": 9.636215057469009e-06, |
|
"loss": 0.3137, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.25898203592814373, |
|
"grad_norm": 0.11915755271911621, |
|
"learning_rate": 9.631766147485131e-06, |
|
"loss": 0.3861, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.26047904191616766, |
|
"grad_norm": 0.10477300733327866, |
|
"learning_rate": 9.627291238353127e-06, |
|
"loss": 0.4244, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2619760479041916, |
|
"grad_norm": 0.11633849143981934, |
|
"learning_rate": 9.622790355191672e-06, |
|
"loss": 0.4226, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2634730538922156, |
|
"grad_norm": 0.1252908855676651, |
|
"learning_rate": 9.618263523265238e-06, |
|
"loss": 0.4436, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.26497005988023953, |
|
"grad_norm": 0.10194990783929825, |
|
"learning_rate": 9.613710767983953e-06, |
|
"loss": 0.4307, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.26646706586826346, |
|
"grad_norm": 0.11449652910232544, |
|
"learning_rate": 9.609132114903458e-06, |
|
"loss": 0.4771, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2679640718562874, |
|
"grad_norm": 0.08113080263137817, |
|
"learning_rate": 9.60452758972477e-06, |
|
"loss": 0.2791, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2694610778443114, |
|
"grad_norm": 0.11075828969478607, |
|
"learning_rate": 9.599897218294122e-06, |
|
"loss": 0.389, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.27095808383233533, |
|
"grad_norm": 0.09724461287260056, |
|
"learning_rate": 9.595241026602836e-06, |
|
"loss": 0.2496, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.27245508982035926, |
|
"grad_norm": 0.09227737784385681, |
|
"learning_rate": 9.590559040787168e-06, |
|
"loss": 0.2457, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.27395209580838326, |
|
"grad_norm": 0.10152530670166016, |
|
"learning_rate": 9.585851287128157e-06, |
|
"loss": 0.4295, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.2754491017964072, |
|
"grad_norm": 0.12347866594791412, |
|
"learning_rate": 9.581117792051487e-06, |
|
"loss": 0.5124, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.27694610778443113, |
|
"grad_norm": 0.09101920574903488, |
|
"learning_rate": 9.576358582127334e-06, |
|
"loss": 0.3037, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.27844311377245506, |
|
"grad_norm": 0.11212054640054703, |
|
"learning_rate": 9.57157368407022e-06, |
|
"loss": 0.4074, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.27994011976047906, |
|
"grad_norm": 0.12494377791881561, |
|
"learning_rate": 9.56676312473885e-06, |
|
"loss": 0.3777, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.281437125748503, |
|
"grad_norm": 0.09690834581851959, |
|
"learning_rate": 9.561926931135985e-06, |
|
"loss": 0.3416, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.28293413173652693, |
|
"grad_norm": 0.0795978531241417, |
|
"learning_rate": 9.557065130408267e-06, |
|
"loss": 0.214, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2844311377245509, |
|
"grad_norm": 0.11251098662614822, |
|
"learning_rate": 9.552177749846083e-06, |
|
"loss": 0.3523, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.28592814371257486, |
|
"grad_norm": 0.11772778630256653, |
|
"learning_rate": 9.5472648168834e-06, |
|
"loss": 0.3147, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2874251497005988, |
|
"grad_norm": 0.1310431957244873, |
|
"learning_rate": 9.542326359097619e-06, |
|
"loss": 0.3656, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.28892215568862273, |
|
"grad_norm": 0.10823767632246017, |
|
"learning_rate": 9.537362404209419e-06, |
|
"loss": 0.4717, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.2904191616766467, |
|
"grad_norm": 0.1049995943903923, |
|
"learning_rate": 9.532372980082598e-06, |
|
"loss": 0.3117, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.29191616766467066, |
|
"grad_norm": 0.10875418782234192, |
|
"learning_rate": 9.527358114723917e-06, |
|
"loss": 0.4098, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2934131736526946, |
|
"grad_norm": 0.10218145698308945, |
|
"learning_rate": 9.522317836282949e-06, |
|
"loss": 0.3279, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2949101796407186, |
|
"grad_norm": 0.13674795627593994, |
|
"learning_rate": 9.517252173051912e-06, |
|
"loss": 0.4229, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2964071856287425, |
|
"grad_norm": 0.18106026947498322, |
|
"learning_rate": 9.512161153465518e-06, |
|
"loss": 0.4479, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.29790419161676646, |
|
"grad_norm": 0.1282949596643448, |
|
"learning_rate": 9.507044806100806e-06, |
|
"loss": 0.5003, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.2994011976047904, |
|
"grad_norm": 0.10633736848831177, |
|
"learning_rate": 9.501903159676993e-06, |
|
"loss": 0.3367, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3008982035928144, |
|
"grad_norm": 0.14237818121910095, |
|
"learning_rate": 9.496736243055293e-06, |
|
"loss": 0.5059, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3023952095808383, |
|
"grad_norm": 0.14547429978847504, |
|
"learning_rate": 9.491544085238778e-06, |
|
"loss": 0.4906, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.30389221556886226, |
|
"grad_norm": 0.12434766441583633, |
|
"learning_rate": 9.486326715372201e-06, |
|
"loss": 0.4527, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.30538922155688625, |
|
"grad_norm": 0.11812443286180496, |
|
"learning_rate": 9.481084162741835e-06, |
|
"loss": 0.4048, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.3068862275449102, |
|
"grad_norm": 0.1250404566526413, |
|
"learning_rate": 9.475816456775313e-06, |
|
"loss": 0.48, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.3083832335329341, |
|
"grad_norm": 0.11976836621761322, |
|
"learning_rate": 9.470523627041452e-06, |
|
"loss": 0.3601, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.30988023952095806, |
|
"grad_norm": 0.10545119643211365, |
|
"learning_rate": 9.465205703250105e-06, |
|
"loss": 0.332, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.31137724550898205, |
|
"grad_norm": 0.12188291549682617, |
|
"learning_rate": 9.459862715251973e-06, |
|
"loss": 0.4266, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.312874251497006, |
|
"grad_norm": 0.17091204226016998, |
|
"learning_rate": 9.454494693038455e-06, |
|
"loss": 0.3737, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3143712574850299, |
|
"grad_norm": 0.10543544590473175, |
|
"learning_rate": 9.44910166674147e-06, |
|
"loss": 0.291, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3158682634730539, |
|
"grad_norm": 0.12531983852386475, |
|
"learning_rate": 9.44368366663329e-06, |
|
"loss": 0.3832, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.31736526946107785, |
|
"grad_norm": 0.12134228646755219, |
|
"learning_rate": 9.438240723126376e-06, |
|
"loss": 0.2531, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3188622754491018, |
|
"grad_norm": 0.12980221211910248, |
|
"learning_rate": 9.43277286677319e-06, |
|
"loss": 0.3047, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3203592814371258, |
|
"grad_norm": 0.13721132278442383, |
|
"learning_rate": 9.427280128266049e-06, |
|
"loss": 0.3008, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.3218562874251497, |
|
"grad_norm": 0.12022245675325394, |
|
"learning_rate": 9.421762538436933e-06, |
|
"loss": 0.4336, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.32335329341317365, |
|
"grad_norm": 0.13329699635505676, |
|
"learning_rate": 9.416220128257317e-06, |
|
"loss": 0.4004, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3248502994011976, |
|
"grad_norm": 0.10903549939393997, |
|
"learning_rate": 9.410652928837998e-06, |
|
"loss": 0.2668, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3263473053892216, |
|
"grad_norm": 0.11044671386480331, |
|
"learning_rate": 9.405060971428924e-06, |
|
"loss": 0.2933, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3278443113772455, |
|
"grad_norm": 0.13679952919483185, |
|
"learning_rate": 9.399444287419012e-06, |
|
"loss": 0.4049, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.32934131736526945, |
|
"grad_norm": 0.1408192664384842, |
|
"learning_rate": 9.393802908335978e-06, |
|
"loss": 0.4256, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.33083832335329344, |
|
"grad_norm": 0.14757537841796875, |
|
"learning_rate": 9.388136865846153e-06, |
|
"loss": 0.4321, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.3323353293413174, |
|
"grad_norm": 0.09091170132160187, |
|
"learning_rate": 9.382446191754313e-06, |
|
"loss": 0.2193, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3338323353293413, |
|
"grad_norm": 0.10384230315685272, |
|
"learning_rate": 9.376730918003495e-06, |
|
"loss": 0.2178, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.33532934131736525, |
|
"grad_norm": 0.163813054561615, |
|
"learning_rate": 9.370991076674821e-06, |
|
"loss": 0.3887, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.33682634730538924, |
|
"grad_norm": 0.129890576004982, |
|
"learning_rate": 9.36522669998731e-06, |
|
"loss": 0.3996, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3383233532934132, |
|
"grad_norm": 0.12134243547916412, |
|
"learning_rate": 9.359437820297716e-06, |
|
"loss": 0.3961, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3398203592814371, |
|
"grad_norm": 0.142124205827713, |
|
"learning_rate": 9.353624470100321e-06, |
|
"loss": 0.3307, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3413173652694611, |
|
"grad_norm": 0.11390755325555801, |
|
"learning_rate": 9.347786682026774e-06, |
|
"loss": 0.2885, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.34281437125748504, |
|
"grad_norm": 0.1425858587026596, |
|
"learning_rate": 9.341924488845892e-06, |
|
"loss": 0.3786, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.344311377245509, |
|
"grad_norm": 0.12148135900497437, |
|
"learning_rate": 9.336037923463494e-06, |
|
"loss": 0.4657, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3458083832335329, |
|
"grad_norm": 0.13119304180145264, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.4115, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3473053892215569, |
|
"grad_norm": 0.12425543367862701, |
|
"learning_rate": 9.324191808401235e-06, |
|
"loss": 0.3414, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.34880239520958084, |
|
"grad_norm": 0.0954410657286644, |
|
"learning_rate": 9.31823232521629e-06, |
|
"loss": 0.1629, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.3502994011976048, |
|
"grad_norm": 0.14815054833889008, |
|
"learning_rate": 9.312248602819284e-06, |
|
"loss": 0.4772, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.35179640718562877, |
|
"grad_norm": 0.13234567642211914, |
|
"learning_rate": 9.306240674798203e-06, |
|
"loss": 0.3067, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3532934131736527, |
|
"grad_norm": 0.10531976073980331, |
|
"learning_rate": 9.300208574876897e-06, |
|
"loss": 0.2415, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.35479041916167664, |
|
"grad_norm": 0.11296534538269043, |
|
"learning_rate": 9.294152336914907e-06, |
|
"loss": 0.2706, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.3562874251497006, |
|
"grad_norm": 0.16313917934894562, |
|
"learning_rate": 9.288071994907262e-06, |
|
"loss": 0.4391, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.35778443113772457, |
|
"grad_norm": 0.13125663995742798, |
|
"learning_rate": 9.281967582984292e-06, |
|
"loss": 0.4955, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3592814371257485, |
|
"grad_norm": 0.1586073786020279, |
|
"learning_rate": 9.275839135411439e-06, |
|
"loss": 0.4565, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36077844311377244, |
|
"grad_norm": 0.1046639159321785, |
|
"learning_rate": 9.269686686589063e-06, |
|
"loss": 0.2976, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.36227544910179643, |
|
"grad_norm": 0.15066610276699066, |
|
"learning_rate": 9.263510271052243e-06, |
|
"loss": 0.4158, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.36377245508982037, |
|
"grad_norm": 0.16172873973846436, |
|
"learning_rate": 9.257309923470596e-06, |
|
"loss": 0.3956, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.3652694610778443, |
|
"grad_norm": 0.18391874432563782, |
|
"learning_rate": 9.251085678648072e-06, |
|
"loss": 0.5078, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.36676646706586824, |
|
"grad_norm": 0.1277581751346588, |
|
"learning_rate": 9.244837571522758e-06, |
|
"loss": 0.2947, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.36826347305389223, |
|
"grad_norm": 0.16508063673973083, |
|
"learning_rate": 9.238565637166692e-06, |
|
"loss": 0.4845, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.36976047904191617, |
|
"grad_norm": 0.13770635426044464, |
|
"learning_rate": 9.232269910785651e-06, |
|
"loss": 0.413, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.3712574850299401, |
|
"grad_norm": 0.16841325163841248, |
|
"learning_rate": 9.225950427718974e-06, |
|
"loss": 0.4875, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.3727544910179641, |
|
"grad_norm": 0.16400140523910522, |
|
"learning_rate": 9.219607223439343e-06, |
|
"loss": 0.436, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.37425149700598803, |
|
"grad_norm": 0.18191541731357574, |
|
"learning_rate": 9.213240333552589e-06, |
|
"loss": 0.4318, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37574850299401197, |
|
"grad_norm": 0.1227971538901329, |
|
"learning_rate": 9.206849793797508e-06, |
|
"loss": 0.3745, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.3772455089820359, |
|
"grad_norm": 0.13938282430171967, |
|
"learning_rate": 9.200435640045637e-06, |
|
"loss": 0.2679, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.3787425149700599, |
|
"grad_norm": 0.190107524394989, |
|
"learning_rate": 9.193997908301069e-06, |
|
"loss": 0.4332, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.38023952095808383, |
|
"grad_norm": 0.14340659976005554, |
|
"learning_rate": 9.187536634700244e-06, |
|
"loss": 0.3839, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.38173652694610777, |
|
"grad_norm": 0.14708517491817474, |
|
"learning_rate": 9.181051855511749e-06, |
|
"loss": 0.4324, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.38323353293413176, |
|
"grad_norm": 0.1463197022676468, |
|
"learning_rate": 9.174543607136111e-06, |
|
"loss": 0.4632, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.3847305389221557, |
|
"grad_norm": 0.1626986563205719, |
|
"learning_rate": 9.168011926105598e-06, |
|
"loss": 0.4655, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.38622754491017963, |
|
"grad_norm": 0.13527408242225647, |
|
"learning_rate": 9.161456849084007e-06, |
|
"loss": 0.3422, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.38772455089820357, |
|
"grad_norm": 0.1195373684167862, |
|
"learning_rate": 9.154878412866465e-06, |
|
"loss": 0.2734, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.38922155688622756, |
|
"grad_norm": 0.16617508232593536, |
|
"learning_rate": 9.14827665437922e-06, |
|
"loss": 0.3768, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3907185628742515, |
|
"grad_norm": 0.09267516434192657, |
|
"learning_rate": 9.141651610679427e-06, |
|
"loss": 0.1452, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.39221556886227543, |
|
"grad_norm": 0.13747261464595795, |
|
"learning_rate": 9.135003318954954e-06, |
|
"loss": 0.3527, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3937125748502994, |
|
"grad_norm": 0.1347353607416153, |
|
"learning_rate": 9.12833181652416e-06, |
|
"loss": 0.3078, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.39520958083832336, |
|
"grad_norm": 0.10927855968475342, |
|
"learning_rate": 9.121637140835696e-06, |
|
"loss": 0.2403, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3967065868263473, |
|
"grad_norm": 0.14766469597816467, |
|
"learning_rate": 9.114919329468283e-06, |
|
"loss": 0.4768, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.39820359281437123, |
|
"grad_norm": 0.17013344168663025, |
|
"learning_rate": 9.108178420130514e-06, |
|
"loss": 0.4459, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.3997005988023952, |
|
"grad_norm": 0.16462120413780212, |
|
"learning_rate": 9.101414450660633e-06, |
|
"loss": 0.3027, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.40119760479041916, |
|
"grad_norm": 0.15908250212669373, |
|
"learning_rate": 9.094627459026326e-06, |
|
"loss": 0.4534, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.4026946107784431, |
|
"grad_norm": 0.11128409951925278, |
|
"learning_rate": 9.087817483324507e-06, |
|
"loss": 0.2694, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4041916167664671, |
|
"grad_norm": 0.15646331012248993, |
|
"learning_rate": 9.08098456178111e-06, |
|
"loss": 0.3605, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.405688622754491, |
|
"grad_norm": 0.17366009950637817, |
|
"learning_rate": 9.074128732750859e-06, |
|
"loss": 0.3855, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.40718562874251496, |
|
"grad_norm": 0.204384908080101, |
|
"learning_rate": 9.067250034717072e-06, |
|
"loss": 0.4084, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.4086826347305389, |
|
"grad_norm": 0.11996804177761078, |
|
"learning_rate": 9.060348506291432e-06, |
|
"loss": 0.2457, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.4101796407185629, |
|
"grad_norm": 0.18958471715450287, |
|
"learning_rate": 9.053424186213776e-06, |
|
"loss": 0.3692, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4116766467065868, |
|
"grad_norm": 0.16402485966682434, |
|
"learning_rate": 9.046477113351871e-06, |
|
"loss": 0.3062, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.41317365269461076, |
|
"grad_norm": 0.1784193068742752, |
|
"learning_rate": 9.039507326701207e-06, |
|
"loss": 0.4527, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.41467065868263475, |
|
"grad_norm": 0.12558554112911224, |
|
"learning_rate": 9.032514865384767e-06, |
|
"loss": 0.2494, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.4161676646706587, |
|
"grad_norm": 0.1545354127883911, |
|
"learning_rate": 9.025499768652817e-06, |
|
"loss": 0.4017, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4176646706586826, |
|
"grad_norm": 0.18937596678733826, |
|
"learning_rate": 9.018462075882673e-06, |
|
"loss": 0.4244, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.41916167664670656, |
|
"grad_norm": 0.1287037581205368, |
|
"learning_rate": 9.011401826578492e-06, |
|
"loss": 0.2686, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.42065868263473055, |
|
"grad_norm": 0.11539726704359055, |
|
"learning_rate": 9.00431906037105e-06, |
|
"loss": 0.3171, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.4221556886227545, |
|
"grad_norm": 0.15977144241333008, |
|
"learning_rate": 8.997213817017508e-06, |
|
"loss": 0.3253, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4236526946107784, |
|
"grad_norm": 0.1216585785150528, |
|
"learning_rate": 8.990086136401199e-06, |
|
"loss": 0.2122, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4251497005988024, |
|
"grad_norm": 0.13447438180446625, |
|
"learning_rate": 8.982936058531403e-06, |
|
"loss": 0.3615, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.42664670658682635, |
|
"grad_norm": 0.12826186418533325, |
|
"learning_rate": 8.975763623543121e-06, |
|
"loss": 0.368, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4281437125748503, |
|
"grad_norm": 0.12770900130271912, |
|
"learning_rate": 8.968568871696847e-06, |
|
"loss": 0.2612, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.4296407185628742, |
|
"grad_norm": 0.23060199618339539, |
|
"learning_rate": 8.961351843378349e-06, |
|
"loss": 0.3835, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.4311377245508982, |
|
"grad_norm": 0.11921186745166779, |
|
"learning_rate": 8.95411257909843e-06, |
|
"loss": 0.2427, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.43263473053892215, |
|
"grad_norm": 0.13262028992176056, |
|
"learning_rate": 8.946851119492717e-06, |
|
"loss": 0.25, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.4341317365269461, |
|
"grad_norm": 0.14844539761543274, |
|
"learning_rate": 8.939567505321418e-06, |
|
"loss": 0.3788, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4356287425149701, |
|
"grad_norm": 0.23767727613449097, |
|
"learning_rate": 8.932261777469105e-06, |
|
"loss": 0.3833, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.437125748502994, |
|
"grad_norm": 0.18635216355323792, |
|
"learning_rate": 8.924933976944474e-06, |
|
"loss": 0.3438, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.43862275449101795, |
|
"grad_norm": 0.2509608566761017, |
|
"learning_rate": 8.917584144880124e-06, |
|
"loss": 0.3578, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.44011976047904194, |
|
"grad_norm": 0.2058945745229721, |
|
"learning_rate": 8.910212322532317e-06, |
|
"loss": 0.3542, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4416167664670659, |
|
"grad_norm": 0.17262375354766846, |
|
"learning_rate": 8.902818551280758e-06, |
|
"loss": 0.4504, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4431137724550898, |
|
"grad_norm": 0.1604480743408203, |
|
"learning_rate": 8.895402872628352e-06, |
|
"loss": 0.2744, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.44461077844311375, |
|
"grad_norm": 0.14820334315299988, |
|
"learning_rate": 8.887965328200975e-06, |
|
"loss": 0.3349, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.44610778443113774, |
|
"grad_norm": 0.15106992423534393, |
|
"learning_rate": 8.880505959747245e-06, |
|
"loss": 0.2446, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.4476047904191617, |
|
"grad_norm": 0.10235021263360977, |
|
"learning_rate": 8.873024809138272e-06, |
|
"loss": 0.2411, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4491017964071856, |
|
"grad_norm": 0.1582055240869522, |
|
"learning_rate": 8.86552191836745e-06, |
|
"loss": 0.4247, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4505988023952096, |
|
"grad_norm": 0.1799710988998413, |
|
"learning_rate": 8.857997329550195e-06, |
|
"loss": 0.4035, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.45209580838323354, |
|
"grad_norm": 0.17223840951919556, |
|
"learning_rate": 8.850451084923717e-06, |
|
"loss": 0.2541, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4535928143712575, |
|
"grad_norm": 0.21316657960414886, |
|
"learning_rate": 8.842883226846792e-06, |
|
"loss": 0.5618, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4550898203592814, |
|
"grad_norm": 0.19103510677814484, |
|
"learning_rate": 8.835293797799517e-06, |
|
"loss": 0.3129, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4565868263473054, |
|
"grad_norm": 0.14965221285820007, |
|
"learning_rate": 8.827682840383065e-06, |
|
"loss": 0.4006, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.45808383233532934, |
|
"grad_norm": 0.2670743465423584, |
|
"learning_rate": 8.82005039731946e-06, |
|
"loss": 0.4408, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4595808383233533, |
|
"grad_norm": 0.1590014398097992, |
|
"learning_rate": 8.812396511451324e-06, |
|
"loss": 0.3762, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.46107784431137727, |
|
"grad_norm": 0.27246150374412537, |
|
"learning_rate": 8.804721225741646e-06, |
|
"loss": 0.4455, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4625748502994012, |
|
"grad_norm": 0.1429322361946106, |
|
"learning_rate": 8.797024583273536e-06, |
|
"loss": 0.1568, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.46407185628742514, |
|
"grad_norm": 0.14144589006900787, |
|
"learning_rate": 8.789306627249985e-06, |
|
"loss": 0.2598, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4655688622754491, |
|
"grad_norm": 0.1922285109758377, |
|
"learning_rate": 8.781567400993617e-06, |
|
"loss": 0.3722, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.46706586826347307, |
|
"grad_norm": 0.15292677283287048, |
|
"learning_rate": 8.77380694794646e-06, |
|
"loss": 0.3738, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.468562874251497, |
|
"grad_norm": 0.16995134949684143, |
|
"learning_rate": 8.766025311669685e-06, |
|
"loss": 0.4324, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.47005988023952094, |
|
"grad_norm": 0.2128669172525406, |
|
"learning_rate": 8.75822253584337e-06, |
|
"loss": 0.4218, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.47155688622754494, |
|
"grad_norm": 0.13214029371738434, |
|
"learning_rate": 8.75039866426626e-06, |
|
"loss": 0.3243, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.47305389221556887, |
|
"grad_norm": 0.17079909145832062, |
|
"learning_rate": 8.742553740855507e-06, |
|
"loss": 0.3964, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.4745508982035928, |
|
"grad_norm": 0.12262991815805435, |
|
"learning_rate": 8.734687809646437e-06, |
|
"loss": 0.2449, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.47604790419161674, |
|
"grad_norm": 0.14602302014827728, |
|
"learning_rate": 8.726800914792296e-06, |
|
"loss": 0.2784, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.47754491017964074, |
|
"grad_norm": 0.22383739054203033, |
|
"learning_rate": 8.718893100564002e-06, |
|
"loss": 0.337, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.47904191616766467, |
|
"grad_norm": 0.102297842502594, |
|
"learning_rate": 8.710964411349902e-06, |
|
"loss": 0.2467, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4805389221556886, |
|
"grad_norm": 0.1584213376045227, |
|
"learning_rate": 8.703014891655518e-06, |
|
"loss": 0.4198, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4820359281437126, |
|
"grad_norm": 0.1733960062265396, |
|
"learning_rate": 8.695044586103297e-06, |
|
"loss": 0.3446, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.48353293413173654, |
|
"grad_norm": 0.18755660951137543, |
|
"learning_rate": 8.687053539432358e-06, |
|
"loss": 0.3846, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.48502994011976047, |
|
"grad_norm": 0.13098154962062836, |
|
"learning_rate": 8.679041796498253e-06, |
|
"loss": 0.2938, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.4865269461077844, |
|
"grad_norm": 0.14376209676265717, |
|
"learning_rate": 8.6710094022727e-06, |
|
"loss": 0.2437, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4880239520958084, |
|
"grad_norm": 0.17319519817829132, |
|
"learning_rate": 8.66295640184334e-06, |
|
"loss": 0.3055, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.48952095808383234, |
|
"grad_norm": 0.168416827917099, |
|
"learning_rate": 8.65488284041348e-06, |
|
"loss": 0.3925, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.49101796407185627, |
|
"grad_norm": 0.16502583026885986, |
|
"learning_rate": 8.646788763301842e-06, |
|
"loss": 0.3697, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.49251497005988026, |
|
"grad_norm": 0.13545477390289307, |
|
"learning_rate": 8.638674215942307e-06, |
|
"loss": 0.3017, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.4940119760479042, |
|
"grad_norm": 0.14142753183841705, |
|
"learning_rate": 8.630539243883659e-06, |
|
"loss": 0.3287, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.49550898203592814, |
|
"grad_norm": 0.17961451411247253, |
|
"learning_rate": 8.62238389278933e-06, |
|
"loss": 0.3309, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.49700598802395207, |
|
"grad_norm": 0.14708763360977173, |
|
"learning_rate": 8.61420820843715e-06, |
|
"loss": 0.3669, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.49850299401197606, |
|
"grad_norm": 0.21809980273246765, |
|
"learning_rate": 8.606012236719073e-06, |
|
"loss": 0.31, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.22139891982078552, |
|
"learning_rate": 8.59779602364094e-06, |
|
"loss": 0.4451, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.3353196680545807, |
|
"eval_runtime": 96.09, |
|
"eval_samples_per_second": 7.659, |
|
"eval_steps_per_second": 0.957, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5014970059880239, |
|
"grad_norm": 0.13361559808254242, |
|
"learning_rate": 8.58955961532221e-06, |
|
"loss": 0.198, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5029940119760479, |
|
"grad_norm": 0.17649687826633453, |
|
"learning_rate": 8.581303057995697e-06, |
|
"loss": 0.3268, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5044910179640718, |
|
"grad_norm": 0.1601666957139969, |
|
"learning_rate": 8.573026398007323e-06, |
|
"loss": 0.4069, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5059880239520959, |
|
"grad_norm": 0.189590185880661, |
|
"learning_rate": 8.564729681815846e-06, |
|
"loss": 0.3302, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5074850299401198, |
|
"grad_norm": 0.18572555482387543, |
|
"learning_rate": 8.556412955992604e-06, |
|
"loss": 0.4025, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5089820359281437, |
|
"grad_norm": 0.1558508276939392, |
|
"learning_rate": 8.548076267221258e-06, |
|
"loss": 0.396, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5104790419161677, |
|
"grad_norm": 0.19267967343330383, |
|
"learning_rate": 8.539719662297519e-06, |
|
"loss": 0.418, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5119760479041916, |
|
"grad_norm": 0.13435742259025574, |
|
"learning_rate": 8.531343188128896e-06, |
|
"loss": 0.266, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5134730538922155, |
|
"grad_norm": 0.2617822289466858, |
|
"learning_rate": 8.52294689173443e-06, |
|
"loss": 0.2821, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5149700598802395, |
|
"grad_norm": 0.17945440113544464, |
|
"learning_rate": 8.514530820244427e-06, |
|
"loss": 0.3387, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5164670658682635, |
|
"grad_norm": 0.13977111876010895, |
|
"learning_rate": 8.506095020900192e-06, |
|
"loss": 0.3336, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5179640718562875, |
|
"grad_norm": 0.17441023886203766, |
|
"learning_rate": 8.497639541053769e-06, |
|
"loss": 0.3207, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5194610778443114, |
|
"grad_norm": 0.16159437596797943, |
|
"learning_rate": 8.489164428167677e-06, |
|
"loss": 0.395, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5209580838323353, |
|
"grad_norm": 0.14497527480125427, |
|
"learning_rate": 8.480669729814635e-06, |
|
"loss": 0.3077, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5224550898203593, |
|
"grad_norm": 0.17960812151432037, |
|
"learning_rate": 8.472155493677299e-06, |
|
"loss": 0.3512, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5239520958083832, |
|
"grad_norm": 0.11662229150533676, |
|
"learning_rate": 8.463621767547998e-06, |
|
"loss": 0.1755, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5254491017964071, |
|
"grad_norm": 0.17493830621242523, |
|
"learning_rate": 8.455068599328462e-06, |
|
"loss": 0.4597, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5269461077844312, |
|
"grad_norm": 0.15957583487033844, |
|
"learning_rate": 8.446496037029555e-06, |
|
"loss": 0.3225, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5284431137724551, |
|
"grad_norm": 0.18921981751918793, |
|
"learning_rate": 8.437904128770999e-06, |
|
"loss": 0.3952, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5299401197604791, |
|
"grad_norm": 0.15334290266036987, |
|
"learning_rate": 8.429292922781115e-06, |
|
"loss": 0.4056, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.531437125748503, |
|
"grad_norm": 0.16442593932151794, |
|
"learning_rate": 8.420662467396548e-06, |
|
"loss": 0.3826, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5329341317365269, |
|
"grad_norm": 0.17233920097351074, |
|
"learning_rate": 8.412012811061985e-06, |
|
"loss": 0.2516, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5344311377245509, |
|
"grad_norm": 0.16326873004436493, |
|
"learning_rate": 8.403344002329901e-06, |
|
"loss": 0.3362, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.5359281437125748, |
|
"grad_norm": 0.20600523054599762, |
|
"learning_rate": 8.394656089860274e-06, |
|
"loss": 0.3266, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5374251497005988, |
|
"grad_norm": 0.19378286600112915, |
|
"learning_rate": 8.385949122420318e-06, |
|
"loss": 0.3118, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.5389221556886228, |
|
"grad_norm": 0.17837578058242798, |
|
"learning_rate": 8.377223148884202e-06, |
|
"loss": 0.3373, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5404191616766467, |
|
"grad_norm": 0.17606143653392792, |
|
"learning_rate": 8.368478218232787e-06, |
|
"loss": 0.2534, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.5419161676646707, |
|
"grad_norm": 0.15157122910022736, |
|
"learning_rate": 8.359714379553338e-06, |
|
"loss": 0.1722, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5434131736526946, |
|
"grad_norm": 0.23432698845863342, |
|
"learning_rate": 8.350931682039262e-06, |
|
"loss": 0.4558, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5449101796407185, |
|
"grad_norm": 0.1647920310497284, |
|
"learning_rate": 8.342130174989819e-06, |
|
"loss": 0.3593, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5464071856287425, |
|
"grad_norm": 0.17406417429447174, |
|
"learning_rate": 8.333309907809852e-06, |
|
"loss": 0.2793, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5479041916167665, |
|
"grad_norm": 0.1906813532114029, |
|
"learning_rate": 8.324470930009514e-06, |
|
"loss": 0.4096, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.5494011976047904, |
|
"grad_norm": 0.1391858011484146, |
|
"learning_rate": 8.315613291203977e-06, |
|
"loss": 0.3213, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5508982035928144, |
|
"grad_norm": 0.20214834809303284, |
|
"learning_rate": 8.306737041113169e-06, |
|
"loss": 0.3283, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5523952095808383, |
|
"grad_norm": 0.2006218433380127, |
|
"learning_rate": 8.29784222956148e-06, |
|
"loss": 0.3451, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5538922155688623, |
|
"grad_norm": 0.21113942563533783, |
|
"learning_rate": 8.288928906477497e-06, |
|
"loss": 0.3681, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5553892215568862, |
|
"grad_norm": 0.1564386785030365, |
|
"learning_rate": 8.279997121893713e-06, |
|
"loss": 0.2562, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5568862275449101, |
|
"grad_norm": 0.18803362548351288, |
|
"learning_rate": 8.271046925946247e-06, |
|
"loss": 0.2923, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.5583832335329342, |
|
"grad_norm": 0.15986989438533783, |
|
"learning_rate": 8.262078368874566e-06, |
|
"loss": 0.3348, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.5598802395209581, |
|
"grad_norm": 0.18634189665317535, |
|
"learning_rate": 8.25309150102121e-06, |
|
"loss": 0.3538, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.561377245508982, |
|
"grad_norm": 0.18781672418117523, |
|
"learning_rate": 8.244086372831492e-06, |
|
"loss": 0.3305, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.562874251497006, |
|
"grad_norm": 0.22481568157672882, |
|
"learning_rate": 8.235063034853228e-06, |
|
"loss": 0.3972, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5643712574850299, |
|
"grad_norm": 0.1664562225341797, |
|
"learning_rate": 8.226021537736449e-06, |
|
"loss": 0.5461, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5658682634730539, |
|
"grad_norm": 0.133816659450531, |
|
"learning_rate": 8.216961932233118e-06, |
|
"loss": 0.1557, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5673652694610778, |
|
"grad_norm": 0.23958347737789154, |
|
"learning_rate": 8.207884269196845e-06, |
|
"loss": 0.4871, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5688622754491018, |
|
"grad_norm": 0.1876394897699356, |
|
"learning_rate": 8.198788599582596e-06, |
|
"loss": 0.2644, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5703592814371258, |
|
"grad_norm": 0.18707850575447083, |
|
"learning_rate": 8.189674974446423e-06, |
|
"loss": 0.3008, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5718562874251497, |
|
"grad_norm": 0.21212701499462128, |
|
"learning_rate": 8.180543444945154e-06, |
|
"loss": 0.4367, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5733532934131736, |
|
"grad_norm": 0.22406694293022156, |
|
"learning_rate": 8.171394062336127e-06, |
|
"loss": 0.3987, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5748502994011976, |
|
"grad_norm": 0.1596011221408844, |
|
"learning_rate": 8.162226877976886e-06, |
|
"loss": 0.3971, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5763473053892215, |
|
"grad_norm": 0.19374164938926697, |
|
"learning_rate": 8.153041943324912e-06, |
|
"loss": 0.2919, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5778443113772455, |
|
"grad_norm": 0.17940644919872284, |
|
"learning_rate": 8.143839309937307e-06, |
|
"loss": 0.268, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5793413173652695, |
|
"grad_norm": 0.2111639380455017, |
|
"learning_rate": 8.134619029470535e-06, |
|
"loss": 0.3569, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.5808383233532934, |
|
"grad_norm": 0.19277545809745789, |
|
"learning_rate": 8.125381153680103e-06, |
|
"loss": 0.5122, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.5823353293413174, |
|
"grad_norm": 0.1731099933385849, |
|
"learning_rate": 8.116125734420297e-06, |
|
"loss": 0.3467, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.5838323353293413, |
|
"grad_norm": 0.1366906315088272, |
|
"learning_rate": 8.10685282364387e-06, |
|
"loss": 0.2594, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5853293413173652, |
|
"grad_norm": 0.22104769945144653, |
|
"learning_rate": 8.097562473401764e-06, |
|
"loss": 0.3096, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.5868263473053892, |
|
"grad_norm": 0.17460548877716064, |
|
"learning_rate": 8.088254735842808e-06, |
|
"loss": 0.2145, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.5883233532934131, |
|
"grad_norm": 0.20823459327220917, |
|
"learning_rate": 8.078929663213432e-06, |
|
"loss": 0.2993, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.5898203592814372, |
|
"grad_norm": 0.15346956253051758, |
|
"learning_rate": 8.069587307857377e-06, |
|
"loss": 0.2628, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.5913173652694611, |
|
"grad_norm": 0.24852439761161804, |
|
"learning_rate": 8.060227722215385e-06, |
|
"loss": 0.3749, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.592814371257485, |
|
"grad_norm": 0.17771868407726288, |
|
"learning_rate": 8.050850958824926e-06, |
|
"loss": 0.4067, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.594311377245509, |
|
"grad_norm": 0.2503078281879425, |
|
"learning_rate": 8.041457070319884e-06, |
|
"loss": 0.4665, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.5958083832335329, |
|
"grad_norm": 0.26984649896621704, |
|
"learning_rate": 8.032046109430276e-06, |
|
"loss": 0.3278, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.5973053892215568, |
|
"grad_norm": 0.25289425253868103, |
|
"learning_rate": 8.02261812898195e-06, |
|
"loss": 0.2592, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.5988023952095808, |
|
"grad_norm": 0.1754600703716278, |
|
"learning_rate": 8.013173181896283e-06, |
|
"loss": 0.2665, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6002994011976048, |
|
"grad_norm": 0.22235795855522156, |
|
"learning_rate": 8.003711321189895e-06, |
|
"loss": 0.2513, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6017964071856288, |
|
"grad_norm": 0.36419808864593506, |
|
"learning_rate": 7.994232599974346e-06, |
|
"loss": 0.4715, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6032934131736527, |
|
"grad_norm": 0.166230246424675, |
|
"learning_rate": 7.984737071455834e-06, |
|
"loss": 0.2649, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.6047904191616766, |
|
"grad_norm": 0.27781379222869873, |
|
"learning_rate": 7.975224788934903e-06, |
|
"loss": 0.3881, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.6062874251497006, |
|
"grad_norm": 0.18180640041828156, |
|
"learning_rate": 7.965695805806141e-06, |
|
"loss": 0.1962, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6077844311377245, |
|
"grad_norm": 0.1591605544090271, |
|
"learning_rate": 7.95615017555788e-06, |
|
"loss": 0.2133, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6092814371257484, |
|
"grad_norm": 0.16064637899398804, |
|
"learning_rate": 7.946587951771894e-06, |
|
"loss": 0.3003, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6107784431137725, |
|
"grad_norm": 0.22653892636299133, |
|
"learning_rate": 7.937009188123102e-06, |
|
"loss": 0.3814, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6122754491017964, |
|
"grad_norm": 0.19928650557994843, |
|
"learning_rate": 7.927413938379268e-06, |
|
"loss": 0.3206, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6137724550898204, |
|
"grad_norm": 0.2117091864347458, |
|
"learning_rate": 7.917802256400688e-06, |
|
"loss": 0.4773, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6152694610778443, |
|
"grad_norm": 0.1339869201183319, |
|
"learning_rate": 7.908174196139907e-06, |
|
"loss": 0.1727, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.6167664670658682, |
|
"grad_norm": 0.1618313491344452, |
|
"learning_rate": 7.898529811641393e-06, |
|
"loss": 0.3444, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.6182634730538922, |
|
"grad_norm": 0.1486457735300064, |
|
"learning_rate": 7.888869157041257e-06, |
|
"loss": 0.2223, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6197604790419161, |
|
"grad_norm": 0.19519484043121338, |
|
"learning_rate": 7.879192286566929e-06, |
|
"loss": 0.426, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6212574850299402, |
|
"grad_norm": 0.20848150551319122, |
|
"learning_rate": 7.869499254536865e-06, |
|
"loss": 0.3986, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6227544910179641, |
|
"grad_norm": 0.21209968626499176, |
|
"learning_rate": 7.859790115360243e-06, |
|
"loss": 0.3407, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.624251497005988, |
|
"grad_norm": 0.23103849589824677, |
|
"learning_rate": 7.850064923536649e-06, |
|
"loss": 0.4584, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.625748502994012, |
|
"grad_norm": 0.16797013580799103, |
|
"learning_rate": 7.84032373365578e-06, |
|
"loss": 0.3119, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6272455089820359, |
|
"grad_norm": 0.16341273486614227, |
|
"learning_rate": 7.83056660039713e-06, |
|
"loss": 0.2981, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.6287425149700598, |
|
"grad_norm": 0.19721773266792297, |
|
"learning_rate": 7.82079357852969e-06, |
|
"loss": 0.4564, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6302395209580839, |
|
"grad_norm": 0.18293291330337524, |
|
"learning_rate": 7.811004722911637e-06, |
|
"loss": 0.437, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6317365269461078, |
|
"grad_norm": 0.18640290200710297, |
|
"learning_rate": 7.801200088490026e-06, |
|
"loss": 0.3639, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.6332335329341318, |
|
"grad_norm": 0.22067849338054657, |
|
"learning_rate": 7.791379730300476e-06, |
|
"loss": 0.4027, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.6347305389221557, |
|
"grad_norm": 0.13944509625434875, |
|
"learning_rate": 7.781543703466881e-06, |
|
"loss": 0.2165, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.6362275449101796, |
|
"grad_norm": 0.187015101313591, |
|
"learning_rate": 7.771692063201072e-06, |
|
"loss": 0.2628, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6377245508982036, |
|
"grad_norm": 0.23688088357448578, |
|
"learning_rate": 7.76182486480253e-06, |
|
"loss": 0.3946, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.6392215568862275, |
|
"grad_norm": 0.22390404343605042, |
|
"learning_rate": 7.751942163658066e-06, |
|
"loss": 0.3308, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.6407185628742516, |
|
"grad_norm": 0.26804405450820923, |
|
"learning_rate": 7.742044015241508e-06, |
|
"loss": 0.3903, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.6422155688622755, |
|
"grad_norm": 0.14200535416603088, |
|
"learning_rate": 7.7321304751134e-06, |
|
"loss": 0.3099, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.6437125748502994, |
|
"grad_norm": 0.17333939671516418, |
|
"learning_rate": 7.722201598920673e-06, |
|
"loss": 0.3332, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6452095808383234, |
|
"grad_norm": 0.1965278834104538, |
|
"learning_rate": 7.712257442396355e-06, |
|
"loss": 0.344, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.6467065868263473, |
|
"grad_norm": 0.18956130743026733, |
|
"learning_rate": 7.702298061359236e-06, |
|
"loss": 0.3551, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6482035928143712, |
|
"grad_norm": 0.2348889261484146, |
|
"learning_rate": 7.692323511713568e-06, |
|
"loss": 0.2989, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.6497005988023952, |
|
"grad_norm": 0.23371171951293945, |
|
"learning_rate": 7.682333849448749e-06, |
|
"loss": 0.4422, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.6511976047904192, |
|
"grad_norm": 0.18746024370193481, |
|
"learning_rate": 7.672329130639007e-06, |
|
"loss": 0.2378, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6526946107784432, |
|
"grad_norm": 0.21659070253372192, |
|
"learning_rate": 7.662309411443084e-06, |
|
"loss": 0.3655, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6541916167664671, |
|
"grad_norm": 0.24405577778816223, |
|
"learning_rate": 7.652274748103924e-06, |
|
"loss": 0.4455, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.655688622754491, |
|
"grad_norm": 0.2173536866903305, |
|
"learning_rate": 7.642225196948357e-06, |
|
"loss": 0.3585, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.657185628742515, |
|
"grad_norm": 0.21628788113594055, |
|
"learning_rate": 7.63216081438678e-06, |
|
"loss": 0.2962, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.6586826347305389, |
|
"grad_norm": 0.1556631177663803, |
|
"learning_rate": 7.622081656912842e-06, |
|
"loss": 0.1789, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6601796407185628, |
|
"grad_norm": 0.21247422695159912, |
|
"learning_rate": 7.611987781103128e-06, |
|
"loss": 0.243, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.6616766467065869, |
|
"grad_norm": 0.16574670374393463, |
|
"learning_rate": 7.601879243616838e-06, |
|
"loss": 0.2957, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.6631736526946108, |
|
"grad_norm": 0.19471777975559235, |
|
"learning_rate": 7.5917561011954755e-06, |
|
"loss": 0.2709, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6646706586826348, |
|
"grad_norm": 0.2093854397535324, |
|
"learning_rate": 7.581618410662519e-06, |
|
"loss": 0.3786, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6661676646706587, |
|
"grad_norm": 0.1995527446269989, |
|
"learning_rate": 7.571466228923115e-06, |
|
"loss": 0.282, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6676646706586826, |
|
"grad_norm": 0.20918631553649902, |
|
"learning_rate": 7.56129961296375e-06, |
|
"loss": 0.3328, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6691616766467066, |
|
"grad_norm": 0.3352525532245636, |
|
"learning_rate": 7.551118619851929e-06, |
|
"loss": 0.5474, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.6706586826347305, |
|
"grad_norm": 0.16128239035606384, |
|
"learning_rate": 7.540923306735868e-06, |
|
"loss": 0.2257, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6721556886227545, |
|
"grad_norm": 0.19775696098804474, |
|
"learning_rate": 7.530713730844153e-06, |
|
"loss": 0.399, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6736526946107785, |
|
"grad_norm": 0.16437461972236633, |
|
"learning_rate": 7.5204899494854415e-06, |
|
"loss": 0.3229, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6751497005988024, |
|
"grad_norm": 0.1884375959634781, |
|
"learning_rate": 7.510252020048121e-06, |
|
"loss": 0.2652, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6766467065868264, |
|
"grad_norm": 0.22506847977638245, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.293, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6781437125748503, |
|
"grad_norm": 0.24144884943962097, |
|
"learning_rate": 7.489733946887982e-06, |
|
"loss": 0.2653, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.6796407185628742, |
|
"grad_norm": 0.2532469928264618, |
|
"learning_rate": 7.479453918337733e-06, |
|
"loss": 0.3346, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6811377245508982, |
|
"grad_norm": 0.20879314839839935, |
|
"learning_rate": 7.469159972053377e-06, |
|
"loss": 0.4094, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6826347305389222, |
|
"grad_norm": 0.3141922354698181, |
|
"learning_rate": 7.458852165817153e-06, |
|
"loss": 0.5201, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6841317365269461, |
|
"grad_norm": 0.22677458822727203, |
|
"learning_rate": 7.448530557489105e-06, |
|
"loss": 0.2836, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6856287425149701, |
|
"grad_norm": 0.20220258831977844, |
|
"learning_rate": 7.438195205006749e-06, |
|
"loss": 0.331, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.687125748502994, |
|
"grad_norm": 0.22227731347084045, |
|
"learning_rate": 7.427846166384747e-06, |
|
"loss": 0.3344, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.688622754491018, |
|
"grad_norm": 0.2184399664402008, |
|
"learning_rate": 7.417483499714589e-06, |
|
"loss": 0.2932, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6901197604790419, |
|
"grad_norm": 0.16358090937137604, |
|
"learning_rate": 7.40710726316426e-06, |
|
"loss": 0.2363, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6916167664670658, |
|
"grad_norm": 0.25432059168815613, |
|
"learning_rate": 7.396717514977916e-06, |
|
"loss": 0.3201, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.6931137724550899, |
|
"grad_norm": 0.30511903762817383, |
|
"learning_rate": 7.386314313475557e-06, |
|
"loss": 0.4459, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6946107784431138, |
|
"grad_norm": 0.26864519715309143, |
|
"learning_rate": 7.3758977170527e-06, |
|
"loss": 0.2934, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6961077844311377, |
|
"grad_norm": 0.21390244364738464, |
|
"learning_rate": 7.365467784180051e-06, |
|
"loss": 0.1999, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6976047904191617, |
|
"grad_norm": 0.17398962378501892, |
|
"learning_rate": 7.355024573403174e-06, |
|
"loss": 0.3066, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.6991017964071856, |
|
"grad_norm": 0.3039548695087433, |
|
"learning_rate": 7.3445681433421675e-06, |
|
"loss": 0.2559, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7005988023952096, |
|
"grad_norm": 0.24252189695835114, |
|
"learning_rate": 7.3340985526913335e-06, |
|
"loss": 0.2586, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7020958083832335, |
|
"grad_norm": 0.25120341777801514, |
|
"learning_rate": 7.323615860218844e-06, |
|
"loss": 0.3042, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7035928143712575, |
|
"grad_norm": 0.38001149892807007, |
|
"learning_rate": 7.313120124766417e-06, |
|
"loss": 0.3586, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7050898203592815, |
|
"grad_norm": 0.20646889507770538, |
|
"learning_rate": 7.30261140524898e-06, |
|
"loss": 0.2903, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7065868263473054, |
|
"grad_norm": 0.26575809717178345, |
|
"learning_rate": 7.292089760654352e-06, |
|
"loss": 0.2822, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.7080838323353293, |
|
"grad_norm": 0.18457849323749542, |
|
"learning_rate": 7.281555250042893e-06, |
|
"loss": 0.3372, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.7095808383233533, |
|
"grad_norm": 0.16875137388706207, |
|
"learning_rate": 7.271007932547188e-06, |
|
"loss": 0.2172, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.7110778443113772, |
|
"grad_norm": 0.22017961740493774, |
|
"learning_rate": 7.2604478673717095e-06, |
|
"loss": 0.3113, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7125748502994012, |
|
"grad_norm": 0.31057754158973694, |
|
"learning_rate": 7.249875113792485e-06, |
|
"loss": 0.3059, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.7140718562874252, |
|
"grad_norm": 0.22742144763469696, |
|
"learning_rate": 7.239289731156767e-06, |
|
"loss": 0.4326, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.7155688622754491, |
|
"grad_norm": 0.19704562425613403, |
|
"learning_rate": 7.2286917788826926e-06, |
|
"loss": 0.3139, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.7170658682634731, |
|
"grad_norm": 0.18587222695350647, |
|
"learning_rate": 7.218081316458959e-06, |
|
"loss": 0.3197, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.718562874251497, |
|
"grad_norm": 0.2095022052526474, |
|
"learning_rate": 7.207458403444488e-06, |
|
"loss": 0.3275, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7200598802395209, |
|
"grad_norm": 0.2593580186367035, |
|
"learning_rate": 7.196823099468084e-06, |
|
"loss": 0.2981, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.7215568862275449, |
|
"grad_norm": 0.2533735930919647, |
|
"learning_rate": 7.186175464228109e-06, |
|
"loss": 0.3067, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.7230538922155688, |
|
"grad_norm": 0.16174565255641937, |
|
"learning_rate": 7.175515557492139e-06, |
|
"loss": 0.2737, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.7245508982035929, |
|
"grad_norm": 0.2620272934436798, |
|
"learning_rate": 7.1648434390966356e-06, |
|
"loss": 0.2914, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.7260479041916168, |
|
"grad_norm": 0.27633848786354065, |
|
"learning_rate": 7.154159168946607e-06, |
|
"loss": 0.4116, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7275449101796407, |
|
"grad_norm": 0.242568239569664, |
|
"learning_rate": 7.143462807015271e-06, |
|
"loss": 0.357, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7290419161676647, |
|
"grad_norm": 0.18494291603565216, |
|
"learning_rate": 7.132754413343721e-06, |
|
"loss": 0.4115, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.7305389221556886, |
|
"grad_norm": 0.3592863976955414, |
|
"learning_rate": 7.122034048040586e-06, |
|
"loss": 0.3621, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7320359281437125, |
|
"grad_norm": 0.19837193191051483, |
|
"learning_rate": 7.111301771281692e-06, |
|
"loss": 0.2239, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.7335329341317365, |
|
"grad_norm": 0.18162252008914948, |
|
"learning_rate": 7.100557643309732e-06, |
|
"loss": 0.2108, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7350299401197605, |
|
"grad_norm": 0.17453975975513458, |
|
"learning_rate": 7.089801724433918e-06, |
|
"loss": 0.2776, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7365269461077845, |
|
"grad_norm": 0.21002128720283508, |
|
"learning_rate": 7.079034075029651e-06, |
|
"loss": 0.2821, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.7380239520958084, |
|
"grad_norm": 0.2540780007839203, |
|
"learning_rate": 7.0682547555381734e-06, |
|
"loss": 0.368, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.7395209580838323, |
|
"grad_norm": 0.28589752316474915, |
|
"learning_rate": 7.057463826466235e-06, |
|
"loss": 0.3174, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.7410179640718563, |
|
"grad_norm": 0.2610958516597748, |
|
"learning_rate": 7.0466613483857615e-06, |
|
"loss": 0.2885, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.7425149700598802, |
|
"grad_norm": 0.13012602925300598, |
|
"learning_rate": 7.035847381933494e-06, |
|
"loss": 0.1997, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.7440119760479041, |
|
"grad_norm": 0.18959909677505493, |
|
"learning_rate": 7.025021987810664e-06, |
|
"loss": 0.273, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.7455089820359282, |
|
"grad_norm": 0.20305494964122772, |
|
"learning_rate": 7.014185226782655e-06, |
|
"loss": 0.3904, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.7470059880239521, |
|
"grad_norm": 0.2603055536746979, |
|
"learning_rate": 7.003337159678649e-06, |
|
"loss": 0.3786, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.7485029940119761, |
|
"grad_norm": 0.23833239078521729, |
|
"learning_rate": 6.992477847391292e-06, |
|
"loss": 0.2107, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.20404885709285736, |
|
"learning_rate": 6.981607350876357e-06, |
|
"loss": 0.2572, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.3029465675354004, |
|
"eval_runtime": 95.9587, |
|
"eval_samples_per_second": 7.67, |
|
"eval_steps_per_second": 0.959, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.7514970059880239, |
|
"grad_norm": 0.23784396052360535, |
|
"learning_rate": 6.970725731152389e-06, |
|
"loss": 0.2849, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.7529940119760479, |
|
"grad_norm": 0.23206007480621338, |
|
"learning_rate": 6.959833049300376e-06, |
|
"loss": 0.3136, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.7544910179640718, |
|
"grad_norm": 0.19017294049263, |
|
"learning_rate": 6.948929366463397e-06, |
|
"loss": 0.2339, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.7559880239520959, |
|
"grad_norm": 0.21896733343601227, |
|
"learning_rate": 6.938014743846285e-06, |
|
"loss": 0.3311, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.7574850299401198, |
|
"grad_norm": 0.20357593894004822, |
|
"learning_rate": 6.927089242715277e-06, |
|
"loss": 0.2871, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.7589820359281437, |
|
"grad_norm": 0.2007334679365158, |
|
"learning_rate": 6.916152924397676e-06, |
|
"loss": 0.2615, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.7604790419161677, |
|
"grad_norm": 0.284795880317688, |
|
"learning_rate": 6.905205850281502e-06, |
|
"loss": 0.4047, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.7619760479041916, |
|
"grad_norm": 0.25428882241249084, |
|
"learning_rate": 6.894248081815155e-06, |
|
"loss": 0.3606, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.7634730538922155, |
|
"grad_norm": 0.2195863127708435, |
|
"learning_rate": 6.883279680507057e-06, |
|
"loss": 0.4163, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7649700598802395, |
|
"grad_norm": 0.19675293564796448, |
|
"learning_rate": 6.872300707925319e-06, |
|
"loss": 0.3199, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.7664670658682635, |
|
"grad_norm": 0.16144664585590363, |
|
"learning_rate": 6.861311225697392e-06, |
|
"loss": 0.2514, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.7679640718562875, |
|
"grad_norm": 0.2618183493614197, |
|
"learning_rate": 6.850311295509719e-06, |
|
"loss": 0.298, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.7694610778443114, |
|
"grad_norm": 0.2717672288417816, |
|
"learning_rate": 6.8393009791073895e-06, |
|
"loss": 0.4989, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.7709580838323353, |
|
"grad_norm": 0.24221351742744446, |
|
"learning_rate": 6.828280338293792e-06, |
|
"loss": 0.3629, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7724550898203593, |
|
"grad_norm": 0.18478509783744812, |
|
"learning_rate": 6.817249434930267e-06, |
|
"loss": 0.1947, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.7739520958083832, |
|
"grad_norm": 0.2637442648410797, |
|
"learning_rate": 6.806208330935766e-06, |
|
"loss": 0.4101, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.7754491017964071, |
|
"grad_norm": 0.2035938799381256, |
|
"learning_rate": 6.7951570882864944e-06, |
|
"loss": 0.2898, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.7769461077844312, |
|
"grad_norm": 0.15909993648529053, |
|
"learning_rate": 6.784095769015573e-06, |
|
"loss": 0.2155, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.7784431137724551, |
|
"grad_norm": 0.27301713824272156, |
|
"learning_rate": 6.773024435212678e-06, |
|
"loss": 0.5023, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7799401197604791, |
|
"grad_norm": 0.2372361719608307, |
|
"learning_rate": 6.761943149023706e-06, |
|
"loss": 0.2363, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.781437125748503, |
|
"grad_norm": 0.2195385992527008, |
|
"learning_rate": 6.750851972650416e-06, |
|
"loss": 0.261, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.7829341317365269, |
|
"grad_norm": 0.22001579403877258, |
|
"learning_rate": 6.739750968350081e-06, |
|
"loss": 0.2957, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7844311377245509, |
|
"grad_norm": 0.17113597691059113, |
|
"learning_rate": 6.728640198435143e-06, |
|
"loss": 0.2352, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.7859281437125748, |
|
"grad_norm": 0.21069037914276123, |
|
"learning_rate": 6.717519725272859e-06, |
|
"loss": 0.2657, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7874251497005988, |
|
"grad_norm": 0.22734540700912476, |
|
"learning_rate": 6.706389611284953e-06, |
|
"loss": 0.2924, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7889221556886228, |
|
"grad_norm": 0.30117544531822205, |
|
"learning_rate": 6.6952499189472665e-06, |
|
"loss": 0.2928, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7904191616766467, |
|
"grad_norm": 0.1863144040107727, |
|
"learning_rate": 6.684100710789405e-06, |
|
"loss": 0.2685, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7919161676646707, |
|
"grad_norm": 0.28917059302330017, |
|
"learning_rate": 6.6729420493943875e-06, |
|
"loss": 0.3043, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7934131736526946, |
|
"grad_norm": 0.24295863509178162, |
|
"learning_rate": 6.6617739973982985e-06, |
|
"loss": 0.3415, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7949101796407185, |
|
"grad_norm": 0.16020585596561432, |
|
"learning_rate": 6.6505966174899326e-06, |
|
"loss": 0.1827, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.7964071856287425, |
|
"grad_norm": 0.23983198404312134, |
|
"learning_rate": 6.639409972410446e-06, |
|
"loss": 0.341, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.7979041916167665, |
|
"grad_norm": 0.1791592538356781, |
|
"learning_rate": 6.628214124952999e-06, |
|
"loss": 0.232, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.7994011976047904, |
|
"grad_norm": 0.22436876595020294, |
|
"learning_rate": 6.617009137962407e-06, |
|
"loss": 0.3309, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.8008982035928144, |
|
"grad_norm": 0.20181064307689667, |
|
"learning_rate": 6.605795074334793e-06, |
|
"loss": 0.4341, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8023952095808383, |
|
"grad_norm": 0.26198679208755493, |
|
"learning_rate": 6.594571997017224e-06, |
|
"loss": 0.3157, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.8038922155688623, |
|
"grad_norm": 0.182569220662117, |
|
"learning_rate": 6.583339969007364e-06, |
|
"loss": 0.1959, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.8053892215568862, |
|
"grad_norm": 0.23903851211071014, |
|
"learning_rate": 6.57209905335312e-06, |
|
"loss": 0.3192, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.8068862275449101, |
|
"grad_norm": 0.26894667744636536, |
|
"learning_rate": 6.560849313152287e-06, |
|
"loss": 0.2271, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.8083832335329342, |
|
"grad_norm": 0.24519504606723785, |
|
"learning_rate": 6.549590811552193e-06, |
|
"loss": 0.2898, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8098802395209581, |
|
"grad_norm": 0.25122636556625366, |
|
"learning_rate": 6.538323611749351e-06, |
|
"loss": 0.4239, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.811377245508982, |
|
"grad_norm": 0.21076048910617828, |
|
"learning_rate": 6.5270477769890906e-06, |
|
"loss": 0.3781, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.812874251497006, |
|
"grad_norm": 0.17968085408210754, |
|
"learning_rate": 6.515763370565218e-06, |
|
"loss": 0.2529, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.8143712574850299, |
|
"grad_norm": 0.37112146615982056, |
|
"learning_rate": 6.504470455819651e-06, |
|
"loss": 0.4063, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.8158682634730539, |
|
"grad_norm": 0.25475284457206726, |
|
"learning_rate": 6.493169096142068e-06, |
|
"loss": 0.2871, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.8173652694610778, |
|
"grad_norm": 0.17944371700286865, |
|
"learning_rate": 6.481859354969549e-06, |
|
"loss": 0.2466, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.8188622754491018, |
|
"grad_norm": 0.22027306258678436, |
|
"learning_rate": 6.470541295786222e-06, |
|
"loss": 0.31, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.8203592814371258, |
|
"grad_norm": 0.21977883577346802, |
|
"learning_rate": 6.4592149821229064e-06, |
|
"loss": 0.2706, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.8218562874251497, |
|
"grad_norm": 0.2781427502632141, |
|
"learning_rate": 6.447880477556757e-06, |
|
"loss": 0.3271, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.8233532934131736, |
|
"grad_norm": 0.1752307116985321, |
|
"learning_rate": 6.436537845710904e-06, |
|
"loss": 0.2063, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8248502994011976, |
|
"grad_norm": 0.18458420038223267, |
|
"learning_rate": 6.425187150254097e-06, |
|
"loss": 0.2665, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.8263473053892215, |
|
"grad_norm": 0.25183022022247314, |
|
"learning_rate": 6.413828454900351e-06, |
|
"loss": 0.3594, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.8278443113772455, |
|
"grad_norm": 0.21946825087070465, |
|
"learning_rate": 6.402461823408584e-06, |
|
"loss": 0.2277, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.8293413173652695, |
|
"grad_norm": 0.2452111393213272, |
|
"learning_rate": 6.391087319582264e-06, |
|
"loss": 0.3541, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.8308383233532934, |
|
"grad_norm": 0.20177938044071198, |
|
"learning_rate": 6.379705007269046e-06, |
|
"loss": 0.2672, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8323353293413174, |
|
"grad_norm": 0.23342332243919373, |
|
"learning_rate": 6.368314950360416e-06, |
|
"loss": 0.3052, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.8338323353293413, |
|
"grad_norm": 0.1889418214559555, |
|
"learning_rate": 6.356917212791332e-06, |
|
"loss": 0.1696, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.8353293413173652, |
|
"grad_norm": 0.21418286859989166, |
|
"learning_rate": 6.3455118585398676e-06, |
|
"loss": 0.298, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.8368263473053892, |
|
"grad_norm": 0.28259849548339844, |
|
"learning_rate": 6.334098951626847e-06, |
|
"loss": 0.3329, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.8383233532934131, |
|
"grad_norm": 0.20598739385604858, |
|
"learning_rate": 6.3226785561154914e-06, |
|
"loss": 0.3018, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8398203592814372, |
|
"grad_norm": 0.22180384397506714, |
|
"learning_rate": 6.311250736111058e-06, |
|
"loss": 0.3665, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8413173652694611, |
|
"grad_norm": 0.25343960523605347, |
|
"learning_rate": 6.299815555760478e-06, |
|
"loss": 0.4075, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.842814371257485, |
|
"grad_norm": 0.20312725007534027, |
|
"learning_rate": 6.288373079251996e-06, |
|
"loss": 0.2432, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.844311377245509, |
|
"grad_norm": 0.19411613047122955, |
|
"learning_rate": 6.276923370814815e-06, |
|
"loss": 0.268, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.8458083832335329, |
|
"grad_norm": 0.2687308192253113, |
|
"learning_rate": 6.265466494718731e-06, |
|
"loss": 0.224, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.8473053892215568, |
|
"grad_norm": 0.25632116198539734, |
|
"learning_rate": 6.254002515273775e-06, |
|
"loss": 0.2666, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.8488023952095808, |
|
"grad_norm": 0.18523173034191132, |
|
"learning_rate": 6.242531496829848e-06, |
|
"loss": 0.2491, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.8502994011976048, |
|
"grad_norm": 0.5198162198066711, |
|
"learning_rate": 6.231053503776363e-06, |
|
"loss": 0.3999, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.8517964071856288, |
|
"grad_norm": 0.302183598279953, |
|
"learning_rate": 6.219568600541886e-06, |
|
"loss": 0.2657, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.8532934131736527, |
|
"grad_norm": 0.25520554184913635, |
|
"learning_rate": 6.208076851593768e-06, |
|
"loss": 0.3349, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8547904191616766, |
|
"grad_norm": 0.2884499132633209, |
|
"learning_rate": 6.1965783214377895e-06, |
|
"loss": 0.2423, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.8562874251497006, |
|
"grad_norm": 0.20894049108028412, |
|
"learning_rate": 6.185073074617793e-06, |
|
"loss": 0.287, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.8577844311377245, |
|
"grad_norm": 0.2774272859096527, |
|
"learning_rate": 6.173561175715323e-06, |
|
"loss": 0.3144, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.8592814371257484, |
|
"grad_norm": 0.20034471154212952, |
|
"learning_rate": 6.1620426893492645e-06, |
|
"loss": 0.2634, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.8607784431137725, |
|
"grad_norm": 0.28622931241989136, |
|
"learning_rate": 6.150517680175482e-06, |
|
"loss": 0.3312, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8622754491017964, |
|
"grad_norm": 0.15074674785137177, |
|
"learning_rate": 6.13898621288645e-06, |
|
"loss": 0.1718, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.8637724550898204, |
|
"grad_norm": 0.24118809401988983, |
|
"learning_rate": 6.127448352210894e-06, |
|
"loss": 0.4585, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.8652694610778443, |
|
"grad_norm": 0.24849183857440948, |
|
"learning_rate": 6.115904162913431e-06, |
|
"loss": 0.2945, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.8667664670658682, |
|
"grad_norm": 0.1957549899816513, |
|
"learning_rate": 6.1043537097941985e-06, |
|
"loss": 0.2029, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8682634730538922, |
|
"grad_norm": 0.22294564545154572, |
|
"learning_rate": 6.092797057688496e-06, |
|
"loss": 0.2482, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8697604790419161, |
|
"grad_norm": 0.22241006791591644, |
|
"learning_rate": 6.081234271466416e-06, |
|
"loss": 0.2561, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.8712574850299402, |
|
"grad_norm": 0.19968393445014954, |
|
"learning_rate": 6.0696654160324875e-06, |
|
"loss": 0.232, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.8727544910179641, |
|
"grad_norm": 0.23499524593353271, |
|
"learning_rate": 6.058090556325305e-06, |
|
"loss": 0.2794, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.874251497005988, |
|
"grad_norm": 0.23651576042175293, |
|
"learning_rate": 6.046509757317168e-06, |
|
"loss": 0.4183, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.875748502994012, |
|
"grad_norm": 0.1924506276845932, |
|
"learning_rate": 6.034923084013713e-06, |
|
"loss": 0.2618, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.8772455089820359, |
|
"grad_norm": 0.23315298557281494, |
|
"learning_rate": 6.0233306014535505e-06, |
|
"loss": 0.2963, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.8787425149700598, |
|
"grad_norm": 0.19225433468818665, |
|
"learning_rate": 6.0117323747079e-06, |
|
"loss": 0.229, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.8802395209580839, |
|
"grad_norm": 0.1963253617286682, |
|
"learning_rate": 6.000128468880223e-06, |
|
"loss": 0.2386, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.8817365269461078, |
|
"grad_norm": 0.1923033744096756, |
|
"learning_rate": 5.988518949105862e-06, |
|
"loss": 0.2318, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.8832335329341318, |
|
"grad_norm": 0.20783311128616333, |
|
"learning_rate": 5.976903880551669e-06, |
|
"loss": 0.2633, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8847305389221557, |
|
"grad_norm": 0.23813506960868835, |
|
"learning_rate": 5.965283328415644e-06, |
|
"loss": 0.2994, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.8862275449101796, |
|
"grad_norm": 0.31241193413734436, |
|
"learning_rate": 5.953657357926569e-06, |
|
"loss": 0.3336, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.8877245508982036, |
|
"grad_norm": 0.20485684275627136, |
|
"learning_rate": 5.942026034343636e-06, |
|
"loss": 0.2006, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.8892215568862275, |
|
"grad_norm": 0.23816922307014465, |
|
"learning_rate": 5.930389422956088e-06, |
|
"loss": 0.3098, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.8907185628742516, |
|
"grad_norm": 0.23085439205169678, |
|
"learning_rate": 5.918747589082853e-06, |
|
"loss": 0.2973, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8922155688622755, |
|
"grad_norm": 0.22914822399616241, |
|
"learning_rate": 5.907100598072166e-06, |
|
"loss": 0.2719, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.8937125748502994, |
|
"grad_norm": 0.2522124946117401, |
|
"learning_rate": 5.895448515301218e-06, |
|
"loss": 0.2547, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.8952095808383234, |
|
"grad_norm": 0.20887671411037445, |
|
"learning_rate": 5.883791406175775e-06, |
|
"loss": 0.2112, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.8967065868263473, |
|
"grad_norm": 0.35924533009529114, |
|
"learning_rate": 5.872129336129821e-06, |
|
"loss": 0.2568, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.8982035928143712, |
|
"grad_norm": 0.18609894812107086, |
|
"learning_rate": 5.860462370625189e-06, |
|
"loss": 0.2106, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8997005988023952, |
|
"grad_norm": 0.19369381666183472, |
|
"learning_rate": 5.848790575151181e-06, |
|
"loss": 0.2701, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.9011976047904192, |
|
"grad_norm": 0.25602883100509644, |
|
"learning_rate": 5.837114015224223e-06, |
|
"loss": 0.4195, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.9026946107784432, |
|
"grad_norm": 0.2999608516693115, |
|
"learning_rate": 5.8254327563874794e-06, |
|
"loss": 0.4362, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.9041916167664671, |
|
"grad_norm": 0.29497092962265015, |
|
"learning_rate": 5.813746864210489e-06, |
|
"loss": 0.3599, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.905688622754491, |
|
"grad_norm": 0.27466148138046265, |
|
"learning_rate": 5.8020564042888015e-06, |
|
"loss": 0.2137, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.907185628742515, |
|
"grad_norm": 0.1898701786994934, |
|
"learning_rate": 5.790361442243605e-06, |
|
"loss": 0.2576, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.9086826347305389, |
|
"grad_norm": 0.2565309405326843, |
|
"learning_rate": 5.778662043721359e-06, |
|
"loss": 0.2865, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.9101796407185628, |
|
"grad_norm": 0.19474193453788757, |
|
"learning_rate": 5.766958274393428e-06, |
|
"loss": 0.2358, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.9116766467065869, |
|
"grad_norm": 0.2232877016067505, |
|
"learning_rate": 5.7552501999557065e-06, |
|
"loss": 0.2392, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.9131736526946108, |
|
"grad_norm": 0.19385279715061188, |
|
"learning_rate": 5.743537886128258e-06, |
|
"loss": 0.196, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9146706586826348, |
|
"grad_norm": 0.15841762721538544, |
|
"learning_rate": 5.731821398654944e-06, |
|
"loss": 0.179, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.9161676646706587, |
|
"grad_norm": 0.17763687670230865, |
|
"learning_rate": 5.72010080330305e-06, |
|
"loss": 0.1782, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.9176646706586826, |
|
"grad_norm": 0.21254192292690277, |
|
"learning_rate": 5.708376165862921e-06, |
|
"loss": 0.219, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.9191616766467066, |
|
"grad_norm": 0.1851448267698288, |
|
"learning_rate": 5.696647552147589e-06, |
|
"loss": 0.1812, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.9206586826347305, |
|
"grad_norm": 0.23409013450145721, |
|
"learning_rate": 5.684915027992415e-06, |
|
"loss": 0.2064, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.9221556886227545, |
|
"grad_norm": 0.249491885304451, |
|
"learning_rate": 5.673178659254698e-06, |
|
"loss": 0.2612, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.9236526946107785, |
|
"grad_norm": 0.25312793254852295, |
|
"learning_rate": 5.661438511813324e-06, |
|
"loss": 0.2762, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.9251497005988024, |
|
"grad_norm": 0.23058243095874786, |
|
"learning_rate": 5.64969465156839e-06, |
|
"loss": 0.2876, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.9266467065868264, |
|
"grad_norm": 0.19073981046676636, |
|
"learning_rate": 5.637947144440832e-06, |
|
"loss": 0.2177, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.9281437125748503, |
|
"grad_norm": 0.23557421565055847, |
|
"learning_rate": 5.626196056372056e-06, |
|
"loss": 0.3985, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9296407185628742, |
|
"grad_norm": 0.18695135414600372, |
|
"learning_rate": 5.614441453323571e-06, |
|
"loss": 0.1995, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.9311377245508982, |
|
"grad_norm": 0.18378764390945435, |
|
"learning_rate": 5.6026834012766155e-06, |
|
"loss": 0.2558, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.9326347305389222, |
|
"grad_norm": 0.25633805990219116, |
|
"learning_rate": 5.590921966231788e-06, |
|
"loss": 0.3359, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.9341317365269461, |
|
"grad_norm": 0.27903831005096436, |
|
"learning_rate": 5.579157214208675e-06, |
|
"loss": 0.3387, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.9356287425149701, |
|
"grad_norm": 0.28710755705833435, |
|
"learning_rate": 5.567389211245486e-06, |
|
"loss": 0.2983, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.937125748502994, |
|
"grad_norm": 0.26144298911094666, |
|
"learning_rate": 5.555618023398671e-06, |
|
"loss": 0.2697, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.938622754491018, |
|
"grad_norm": 0.20057110488414764, |
|
"learning_rate": 5.5438437167425675e-06, |
|
"loss": 0.2945, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.9401197604790419, |
|
"grad_norm": 0.25036031007766724, |
|
"learning_rate": 5.532066357369012e-06, |
|
"loss": 0.3688, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.9416167664670658, |
|
"grad_norm": 0.23812642693519592, |
|
"learning_rate": 5.52028601138698e-06, |
|
"loss": 0.2967, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.9431137724550899, |
|
"grad_norm": 0.30330294370651245, |
|
"learning_rate": 5.508502744922212e-06, |
|
"loss": 0.3328, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9446107784431138, |
|
"grad_norm": 0.2676711678504944, |
|
"learning_rate": 5.496716624116836e-06, |
|
"loss": 0.2188, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.9461077844311377, |
|
"grad_norm": 0.37458300590515137, |
|
"learning_rate": 5.484927715129011e-06, |
|
"loss": 0.265, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.9476047904191617, |
|
"grad_norm": 0.2473772168159485, |
|
"learning_rate": 5.4731360841325405e-06, |
|
"loss": 0.2214, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.9491017964071856, |
|
"grad_norm": 0.25858768820762634, |
|
"learning_rate": 5.46134179731651e-06, |
|
"loss": 0.2649, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.9505988023952096, |
|
"grad_norm": 0.25391316413879395, |
|
"learning_rate": 5.449544920884912e-06, |
|
"loss": 0.3284, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.9520958083832335, |
|
"grad_norm": 0.1776520311832428, |
|
"learning_rate": 5.437745521056272e-06, |
|
"loss": 0.1544, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.9535928143712575, |
|
"grad_norm": 0.2536100447177887, |
|
"learning_rate": 5.425943664063284e-06, |
|
"loss": 0.2558, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.9550898203592815, |
|
"grad_norm": 0.27622076869010925, |
|
"learning_rate": 5.414139416152435e-06, |
|
"loss": 0.2889, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.9565868263473054, |
|
"grad_norm": 0.19522008299827576, |
|
"learning_rate": 5.402332843583631e-06, |
|
"loss": 0.2173, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.9580838323353293, |
|
"grad_norm": 0.19289469718933105, |
|
"learning_rate": 5.390524012629824e-06, |
|
"loss": 0.3045, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9595808383233533, |
|
"grad_norm": 0.21840733289718628, |
|
"learning_rate": 5.3787129895766484e-06, |
|
"loss": 0.3464, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.9610778443113772, |
|
"grad_norm": 0.3051481246948242, |
|
"learning_rate": 5.3668998407220385e-06, |
|
"loss": 0.3759, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.9625748502994012, |
|
"grad_norm": 0.21447789669036865, |
|
"learning_rate": 5.3550846323758666e-06, |
|
"loss": 0.2464, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.9640718562874252, |
|
"grad_norm": 0.25622251629829407, |
|
"learning_rate": 5.343267430859559e-06, |
|
"loss": 0.2389, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.9655688622754491, |
|
"grad_norm": 0.2348499894142151, |
|
"learning_rate": 5.331448302505736e-06, |
|
"loss": 0.2629, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.9670658682634731, |
|
"grad_norm": 0.2669554650783539, |
|
"learning_rate": 5.319627313657829e-06, |
|
"loss": 0.2561, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.968562874251497, |
|
"grad_norm": 0.30076301097869873, |
|
"learning_rate": 5.3078045306697154e-06, |
|
"loss": 0.4078, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.9700598802395209, |
|
"grad_norm": 0.23209704458713531, |
|
"learning_rate": 5.295980019905342e-06, |
|
"loss": 0.2687, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.9715568862275449, |
|
"grad_norm": 0.25834420323371887, |
|
"learning_rate": 5.284153847738356e-06, |
|
"loss": 0.2643, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.9730538922155688, |
|
"grad_norm": 0.27238282561302185, |
|
"learning_rate": 5.272326080551729e-06, |
|
"loss": 0.1859, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9745508982035929, |
|
"grad_norm": 0.21913808584213257, |
|
"learning_rate": 5.260496784737386e-06, |
|
"loss": 0.2564, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.9760479041916168, |
|
"grad_norm": 0.3546600043773651, |
|
"learning_rate": 5.248666026695835e-06, |
|
"loss": 0.4955, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.9775449101796407, |
|
"grad_norm": 0.20646998286247253, |
|
"learning_rate": 5.236833872835785e-06, |
|
"loss": 0.2132, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.9790419161676647, |
|
"grad_norm": 0.2146719992160797, |
|
"learning_rate": 5.2250003895737865e-06, |
|
"loss": 0.2016, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.9805389221556886, |
|
"grad_norm": 0.27419614791870117, |
|
"learning_rate": 5.213165643333851e-06, |
|
"loss": 0.3216, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.9820359281437125, |
|
"grad_norm": 0.3328908681869507, |
|
"learning_rate": 5.201329700547077e-06, |
|
"loss": 0.2583, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.9835329341317365, |
|
"grad_norm": 0.4016421437263489, |
|
"learning_rate": 5.1894926276512824e-06, |
|
"loss": 0.2865, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.9850299401197605, |
|
"grad_norm": 0.22060658037662506, |
|
"learning_rate": 5.177654491090627e-06, |
|
"loss": 0.3583, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.9865269461077845, |
|
"grad_norm": 0.2804822325706482, |
|
"learning_rate": 5.1658153573152405e-06, |
|
"loss": 0.4259, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.9880239520958084, |
|
"grad_norm": 0.35486575961112976, |
|
"learning_rate": 5.153975292780852e-06, |
|
"loss": 0.2605, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9895209580838323, |
|
"grad_norm": 0.32918664813041687, |
|
"learning_rate": 5.1421343639484165e-06, |
|
"loss": 0.3118, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.9910179640718563, |
|
"grad_norm": 0.3175852298736572, |
|
"learning_rate": 5.130292637283735e-06, |
|
"loss": 0.3451, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.9925149700598802, |
|
"grad_norm": 0.23685918748378754, |
|
"learning_rate": 5.118450179257091e-06, |
|
"loss": 0.3018, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.9940119760479041, |
|
"grad_norm": 0.32527291774749756, |
|
"learning_rate": 5.1066070563428736e-06, |
|
"loss": 0.5126, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.9955089820359282, |
|
"grad_norm": 0.2735530734062195, |
|
"learning_rate": 5.0947633350192035e-06, |
|
"loss": 0.2648, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.9970059880239521, |
|
"grad_norm": 0.2354782074689865, |
|
"learning_rate": 5.082919081767558e-06, |
|
"loss": 0.3093, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.9985029940119761, |
|
"grad_norm": 0.24940013885498047, |
|
"learning_rate": 5.071074363072403e-06, |
|
"loss": 0.2115, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.19145430624485016, |
|
"learning_rate": 5.059229245420819e-06, |
|
"loss": 0.2216, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2865866720676422, |
|
"eval_runtime": 96.0569, |
|
"eval_samples_per_second": 7.662, |
|
"eval_steps_per_second": 0.958, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.001497005988024, |
|
"grad_norm": 0.3310520648956299, |
|
"learning_rate": 5.047383795302119e-06, |
|
"loss": 0.202, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.0029940119760479, |
|
"grad_norm": 0.21326784789562225, |
|
"learning_rate": 5.035538079207488e-06, |
|
"loss": 0.2274, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0044910179640718, |
|
"grad_norm": 0.1706678867340088, |
|
"learning_rate": 5.023692163629603e-06, |
|
"loss": 0.1668, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.0059880239520957, |
|
"grad_norm": 0.2599492073059082, |
|
"learning_rate": 5.01184611506226e-06, |
|
"loss": 0.1964, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.0074850299401197, |
|
"grad_norm": 0.4854053556919098, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4238, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.0089820359281436, |
|
"grad_norm": 0.2861318290233612, |
|
"learning_rate": 4.988153884937742e-06, |
|
"loss": 0.3526, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.0104790419161678, |
|
"grad_norm": 0.3655579388141632, |
|
"learning_rate": 4.9763078363703975e-06, |
|
"loss": 0.3014, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.0119760479041917, |
|
"grad_norm": 0.22971998155117035, |
|
"learning_rate": 4.964461920792512e-06, |
|
"loss": 0.3414, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.0134730538922156, |
|
"grad_norm": 0.3435530364513397, |
|
"learning_rate": 4.952616204697882e-06, |
|
"loss": 0.316, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.0149700598802396, |
|
"grad_norm": 0.15627485513687134, |
|
"learning_rate": 4.940770754579183e-06, |
|
"loss": 0.1875, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.0164670658682635, |
|
"grad_norm": 0.34559714794158936, |
|
"learning_rate": 4.928925636927597e-06, |
|
"loss": 0.3933, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.0179640718562875, |
|
"grad_norm": 0.28742656111717224, |
|
"learning_rate": 4.917080918232444e-06, |
|
"loss": 0.4621, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0194610778443114, |
|
"grad_norm": 0.21044401824474335, |
|
"learning_rate": 4.905236664980797e-06, |
|
"loss": 0.282, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.0209580838323353, |
|
"grad_norm": 0.2607158124446869, |
|
"learning_rate": 4.893392943657127e-06, |
|
"loss": 0.3244, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.001497005988024, |
|
"grad_norm": 0.2220315784215927, |
|
"learning_rate": 4.88154982074291e-06, |
|
"loss": 0.2843, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.0029940119760479, |
|
"grad_norm": 0.2902830243110657, |
|
"learning_rate": 4.8697073627162675e-06, |
|
"loss": 0.3467, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.0044910179640718, |
|
"grad_norm": 0.2681308388710022, |
|
"learning_rate": 4.857865636051586e-06, |
|
"loss": 0.2249, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.0059880239520957, |
|
"grad_norm": 0.2687780559062958, |
|
"learning_rate": 4.846024707219149e-06, |
|
"loss": 0.3296, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.0074850299401197, |
|
"grad_norm": 0.3211975693702698, |
|
"learning_rate": 4.834184642684762e-06, |
|
"loss": 0.2454, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.0089820359281436, |
|
"grad_norm": 0.27423396706581116, |
|
"learning_rate": 4.822345508909376e-06, |
|
"loss": 0.1711, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.0104790419161676, |
|
"grad_norm": 0.17680241167545319, |
|
"learning_rate": 4.810507372348721e-06, |
|
"loss": 0.2281, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.0119760479041917, |
|
"grad_norm": 0.299165278673172, |
|
"learning_rate": 4.798670299452926e-06, |
|
"loss": 0.2054, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0134730538922156, |
|
"grad_norm": 0.181101456284523, |
|
"learning_rate": 4.786834356666153e-06, |
|
"loss": 0.1524, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.0149700598802396, |
|
"grad_norm": 0.28579655289649963, |
|
"learning_rate": 4.774999610426216e-06, |
|
"loss": 0.3557, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.0164670658682635, |
|
"grad_norm": 0.21840621531009674, |
|
"learning_rate": 4.7631661271642185e-06, |
|
"loss": 0.2558, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.0179640718562875, |
|
"grad_norm": 0.25449657440185547, |
|
"learning_rate": 4.751333973304166e-06, |
|
"loss": 0.4695, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.0194610778443114, |
|
"grad_norm": 0.2505941092967987, |
|
"learning_rate": 4.739503215262614e-06, |
|
"loss": 0.2168, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.0209580838323353, |
|
"grad_norm": 0.1709732562303543, |
|
"learning_rate": 4.727673919448271e-06, |
|
"loss": 0.2167, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.0224550898203593, |
|
"grad_norm": 0.2961471378803253, |
|
"learning_rate": 4.715846152261645e-06, |
|
"loss": 0.2312, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.0239520958083832, |
|
"grad_norm": 0.21817496418952942, |
|
"learning_rate": 4.704019980094659e-06, |
|
"loss": 0.2802, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.0254491017964071, |
|
"grad_norm": 0.2459694743156433, |
|
"learning_rate": 4.692195469330286e-06, |
|
"loss": 0.2981, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.026946107784431, |
|
"grad_norm": 0.24430765211582184, |
|
"learning_rate": 4.680372686342173e-06, |
|
"loss": 0.2718, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.028443113772455, |
|
"grad_norm": 0.27209728956222534, |
|
"learning_rate": 4.668551697494265e-06, |
|
"loss": 0.4779, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.029940119760479, |
|
"grad_norm": 0.2800404727458954, |
|
"learning_rate": 4.656732569140441e-06, |
|
"loss": 0.2695, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.031437125748503, |
|
"grad_norm": 0.21455827355384827, |
|
"learning_rate": 4.644915367624134e-06, |
|
"loss": 0.2927, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.032934131736527, |
|
"grad_norm": 0.3750689625740051, |
|
"learning_rate": 4.6331001592779615e-06, |
|
"loss": 0.3039, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.034431137724551, |
|
"grad_norm": 0.35561925172805786, |
|
"learning_rate": 4.621287010423353e-06, |
|
"loss": 0.3393, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.035928143712575, |
|
"grad_norm": 0.3378138244152069, |
|
"learning_rate": 4.609475987370177e-06, |
|
"loss": 0.3245, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.0374251497005988, |
|
"grad_norm": 0.24566203355789185, |
|
"learning_rate": 4.597667156416371e-06, |
|
"loss": 0.2596, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.0389221556886228, |
|
"grad_norm": 0.17309314012527466, |
|
"learning_rate": 4.585860583847566e-06, |
|
"loss": 0.2294, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.0404191616766467, |
|
"grad_norm": 0.3359794616699219, |
|
"learning_rate": 4.5740563359367164e-06, |
|
"loss": 0.4138, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.0419161676646707, |
|
"grad_norm": 0.24172459542751312, |
|
"learning_rate": 4.562254478943729e-06, |
|
"loss": 0.2675, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.0434131736526946, |
|
"grad_norm": 0.22163425385951996, |
|
"learning_rate": 4.550455079115091e-06, |
|
"loss": 0.4267, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.0449101796407185, |
|
"grad_norm": 0.2985590696334839, |
|
"learning_rate": 4.53865820268349e-06, |
|
"loss": 0.3657, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.0464071856287425, |
|
"grad_norm": 0.2542339861392975, |
|
"learning_rate": 4.52686391586746e-06, |
|
"loss": 0.2772, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.0479041916167664, |
|
"grad_norm": 0.253623902797699, |
|
"learning_rate": 4.51507228487099e-06, |
|
"loss": 0.4212, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.0494011976047903, |
|
"grad_norm": 0.3196866512298584, |
|
"learning_rate": 4.503283375883165e-06, |
|
"loss": 0.2925, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.0508982035928143, |
|
"grad_norm": 0.22368155419826508, |
|
"learning_rate": 4.49149725507779e-06, |
|
"loss": 0.2496, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.0523952095808382, |
|
"grad_norm": 0.32750844955444336, |
|
"learning_rate": 4.479713988613021e-06, |
|
"loss": 0.1986, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.0538922155688624, |
|
"grad_norm": 0.3797385096549988, |
|
"learning_rate": 4.467933642630989e-06, |
|
"loss": 0.2568, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.0553892215568863, |
|
"grad_norm": 0.3641880750656128, |
|
"learning_rate": 4.456156283257433e-06, |
|
"loss": 0.2927, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.0568862275449102, |
|
"grad_norm": 0.24843014776706696, |
|
"learning_rate": 4.44438197660133e-06, |
|
"loss": 0.2483, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0583832335329342, |
|
"grad_norm": 0.22964823246002197, |
|
"learning_rate": 4.432610788754517e-06, |
|
"loss": 0.2175, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.0598802395209581, |
|
"grad_norm": 0.23855966329574585, |
|
"learning_rate": 4.420842785791326e-06, |
|
"loss": 0.3055, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.061377245508982, |
|
"grad_norm": 0.3656154274940491, |
|
"learning_rate": 4.409078033768214e-06, |
|
"loss": 0.3667, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.062874251497006, |
|
"grad_norm": 0.23534388840198517, |
|
"learning_rate": 4.397316598723385e-06, |
|
"loss": 0.2423, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.06437125748503, |
|
"grad_norm": 0.29222187399864197, |
|
"learning_rate": 4.3855585466764305e-06, |
|
"loss": 0.2366, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.0658682634730539, |
|
"grad_norm": 0.3136173188686371, |
|
"learning_rate": 4.373803943627946e-06, |
|
"loss": 0.2871, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.0673652694610778, |
|
"grad_norm": 0.24266062676906586, |
|
"learning_rate": 4.362052855559171e-06, |
|
"loss": 0.2742, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.0688622754491017, |
|
"grad_norm": 0.24898295104503632, |
|
"learning_rate": 4.350305348431612e-06, |
|
"loss": 0.3362, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.0703592814371257, |
|
"grad_norm": 0.28294843435287476, |
|
"learning_rate": 4.338561488186678e-06, |
|
"loss": 0.2887, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.0718562874251496, |
|
"grad_norm": 0.28065305948257446, |
|
"learning_rate": 4.326821340745304e-06, |
|
"loss": 0.3043, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0733532934131738, |
|
"grad_norm": 0.23319000005722046, |
|
"learning_rate": 4.315084972007587e-06, |
|
"loss": 0.214, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.0748502994011977, |
|
"grad_norm": 0.2753102481365204, |
|
"learning_rate": 4.303352447852412e-06, |
|
"loss": 0.2536, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.0763473053892216, |
|
"grad_norm": 0.2678392827510834, |
|
"learning_rate": 4.291623834137082e-06, |
|
"loss": 0.3256, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.0778443113772456, |
|
"grad_norm": 0.39115357398986816, |
|
"learning_rate": 4.279899196696953e-06, |
|
"loss": 0.3188, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.0793413173652695, |
|
"grad_norm": 0.312159925699234, |
|
"learning_rate": 4.268178601345057e-06, |
|
"loss": 0.2588, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.0808383233532934, |
|
"grad_norm": 0.18832477927207947, |
|
"learning_rate": 4.256462113871741e-06, |
|
"loss": 0.2005, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.0823353293413174, |
|
"grad_norm": 0.3424656391143799, |
|
"learning_rate": 4.2447498000442935e-06, |
|
"loss": 0.2885, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.0838323353293413, |
|
"grad_norm": 0.1885494589805603, |
|
"learning_rate": 4.233041725606573e-06, |
|
"loss": 0.1686, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.0853293413173652, |
|
"grad_norm": 0.25505173206329346, |
|
"learning_rate": 4.2213379562786406e-06, |
|
"loss": 0.3603, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.0868263473053892, |
|
"grad_norm": 0.2631552815437317, |
|
"learning_rate": 4.209638557756396e-06, |
|
"loss": 0.3339, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0883233532934131, |
|
"grad_norm": 0.2027507871389389, |
|
"learning_rate": 4.1979435957111984e-06, |
|
"loss": 0.4054, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.089820359281437, |
|
"grad_norm": 0.24192075431346893, |
|
"learning_rate": 4.186253135789511e-06, |
|
"loss": 0.319, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.091317365269461, |
|
"grad_norm": 0.23616907000541687, |
|
"learning_rate": 4.1745672436125205e-06, |
|
"loss": 0.2824, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.092814371257485, |
|
"grad_norm": 0.18234433233737946, |
|
"learning_rate": 4.162885984775777e-06, |
|
"loss": 0.1787, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.0943113772455089, |
|
"grad_norm": 0.2815669775009155, |
|
"learning_rate": 4.15120942484882e-06, |
|
"loss": 0.389, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.095808383233533, |
|
"grad_norm": 0.255751371383667, |
|
"learning_rate": 4.139537629374814e-06, |
|
"loss": 0.2521, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.097305389221557, |
|
"grad_norm": 0.25240209698677063, |
|
"learning_rate": 4.12787066387018e-06, |
|
"loss": 0.2305, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.098802395209581, |
|
"grad_norm": 0.24336189031600952, |
|
"learning_rate": 4.116208593824227e-06, |
|
"loss": 0.3735, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.1002994011976048, |
|
"grad_norm": 0.25405654311180115, |
|
"learning_rate": 4.104551484698785e-06, |
|
"loss": 0.1988, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.1017964071856288, |
|
"grad_norm": 0.3620491325855255, |
|
"learning_rate": 4.092899401927836e-06, |
|
"loss": 0.4211, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1032934131736527, |
|
"grad_norm": 0.37368297576904297, |
|
"learning_rate": 4.081252410917148e-06, |
|
"loss": 0.346, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.1047904191616766, |
|
"grad_norm": 0.20550665259361267, |
|
"learning_rate": 4.069610577043912e-06, |
|
"loss": 0.2211, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.1062874251497006, |
|
"grad_norm": 0.20395252108573914, |
|
"learning_rate": 4.057973965656365e-06, |
|
"loss": 0.1803, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.1077844311377245, |
|
"grad_norm": 0.16643236577510834, |
|
"learning_rate": 4.046342642073433e-06, |
|
"loss": 0.202, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.1092814371257484, |
|
"grad_norm": 0.32340267300605774, |
|
"learning_rate": 4.034716671584357e-06, |
|
"loss": 0.2943, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.1107784431137724, |
|
"grad_norm": 0.218381866812706, |
|
"learning_rate": 4.0230961194483325e-06, |
|
"loss": 0.2293, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.1122754491017963, |
|
"grad_norm": 0.2537286579608917, |
|
"learning_rate": 4.01148105089414e-06, |
|
"loss": 0.2854, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.1137724550898203, |
|
"grad_norm": 0.24806788563728333, |
|
"learning_rate": 3.999871531119779e-06, |
|
"loss": 0.3641, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.1152694610778444, |
|
"grad_norm": 0.32125091552734375, |
|
"learning_rate": 3.988267625292102e-06, |
|
"loss": 0.3263, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.1167664670658684, |
|
"grad_norm": 0.23608674108982086, |
|
"learning_rate": 3.976669398546451e-06, |
|
"loss": 0.237, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1182634730538923, |
|
"grad_norm": 0.2589946687221527, |
|
"learning_rate": 3.9650769159862875e-06, |
|
"loss": 0.3088, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.1197604790419162, |
|
"grad_norm": 0.2909379005432129, |
|
"learning_rate": 3.9534902426828325e-06, |
|
"loss": 0.2574, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.1212574850299402, |
|
"grad_norm": 0.1950826793909073, |
|
"learning_rate": 3.941909443674696e-06, |
|
"loss": 0.1856, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.122754491017964, |
|
"grad_norm": 0.21967299282550812, |
|
"learning_rate": 3.930334583967514e-06, |
|
"loss": 0.2938, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.124251497005988, |
|
"grad_norm": 0.225912407040596, |
|
"learning_rate": 3.918765728533586e-06, |
|
"loss": 0.1812, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.125748502994012, |
|
"grad_norm": 0.26577168703079224, |
|
"learning_rate": 3.907202942311506e-06, |
|
"loss": 0.3021, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.127245508982036, |
|
"grad_norm": 0.20895415544509888, |
|
"learning_rate": 3.895646290205803e-06, |
|
"loss": 0.2644, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.1287425149700598, |
|
"grad_norm": 0.2302289605140686, |
|
"learning_rate": 3.884095837086571e-06, |
|
"loss": 0.2647, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.1302395209580838, |
|
"grad_norm": 0.28787434101104736, |
|
"learning_rate": 3.872551647789108e-06, |
|
"loss": 0.3042, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.1317365269461077, |
|
"grad_norm": 0.2890431880950928, |
|
"learning_rate": 3.861013787113553e-06, |
|
"loss": 0.3888, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.1332335329341316, |
|
"grad_norm": 0.2250119298696518, |
|
"learning_rate": 3.849482319824521e-06, |
|
"loss": 0.3427, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.1347305389221556, |
|
"grad_norm": 0.3872185945510864, |
|
"learning_rate": 3.837957310650738e-06, |
|
"loss": 0.4126, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.1362275449101795, |
|
"grad_norm": 0.3162260353565216, |
|
"learning_rate": 3.82643882428468e-06, |
|
"loss": 0.2721, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.1377245508982037, |
|
"grad_norm": 0.2870970368385315, |
|
"learning_rate": 3.81492692538221e-06, |
|
"loss": 0.3195, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.1392215568862276, |
|
"grad_norm": 0.27217966318130493, |
|
"learning_rate": 3.803421678562213e-06, |
|
"loss": 0.394, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.1407185628742516, |
|
"grad_norm": 0.32144343852996826, |
|
"learning_rate": 3.7919231484062334e-06, |
|
"loss": 0.4021, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.1422155688622755, |
|
"grad_norm": 0.2571192979812622, |
|
"learning_rate": 3.7804313994581143e-06, |
|
"loss": 0.3301, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.1437125748502994, |
|
"grad_norm": 0.19559802114963531, |
|
"learning_rate": 3.7689464962236367e-06, |
|
"loss": 0.3152, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.1452095808383234, |
|
"grad_norm": 0.21712540090084076, |
|
"learning_rate": 3.757468503170153e-06, |
|
"loss": 0.2456, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.1467065868263473, |
|
"grad_norm": 0.2776012420654297, |
|
"learning_rate": 3.7459974847262253e-06, |
|
"loss": 0.3182, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1482035928143712, |
|
"grad_norm": 0.2935360372066498, |
|
"learning_rate": 3.734533505281269e-06, |
|
"loss": 0.2671, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.1497005988023952, |
|
"grad_norm": 0.30140233039855957, |
|
"learning_rate": 3.723076629185186e-06, |
|
"loss": 0.3733, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.151197604790419, |
|
"grad_norm": 0.2267109453678131, |
|
"learning_rate": 3.7116269207480055e-06, |
|
"loss": 0.3015, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.152694610778443, |
|
"grad_norm": 0.24247941374778748, |
|
"learning_rate": 3.700184444239524e-06, |
|
"loss": 0.1761, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.154191616766467, |
|
"grad_norm": 0.14026691019535065, |
|
"learning_rate": 3.6887492638889433e-06, |
|
"loss": 0.152, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.1556886227544911, |
|
"grad_norm": 0.24493476748466492, |
|
"learning_rate": 3.677321443884509e-06, |
|
"loss": 0.2549, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.157185628742515, |
|
"grad_norm": 0.22861488163471222, |
|
"learning_rate": 3.6659010483731543e-06, |
|
"loss": 0.2145, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.158682634730539, |
|
"grad_norm": 0.21037505567073822, |
|
"learning_rate": 3.654488141460134e-06, |
|
"loss": 0.2116, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.160179640718563, |
|
"grad_norm": 0.24431665241718292, |
|
"learning_rate": 3.6430827872086694e-06, |
|
"loss": 0.3154, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.1616766467065869, |
|
"grad_norm": 0.288648784160614, |
|
"learning_rate": 3.6316850496395863e-06, |
|
"loss": 0.4031, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1631736526946108, |
|
"grad_norm": 0.2301226109266281, |
|
"learning_rate": 3.6202949927309555e-06, |
|
"loss": 0.2984, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.1646706586826348, |
|
"grad_norm": 0.3974512815475464, |
|
"learning_rate": 3.6089126804177373e-06, |
|
"loss": 0.341, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.1661676646706587, |
|
"grad_norm": 0.25364506244659424, |
|
"learning_rate": 3.597538176591417e-06, |
|
"loss": 0.2507, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.1676646706586826, |
|
"grad_norm": 0.1950305849313736, |
|
"learning_rate": 3.5861715450996505e-06, |
|
"loss": 0.1766, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.1691616766467066, |
|
"grad_norm": 0.265399694442749, |
|
"learning_rate": 3.5748128497459044e-06, |
|
"loss": 0.2574, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.1706586826347305, |
|
"grad_norm": 0.3101622462272644, |
|
"learning_rate": 3.563462154289098e-06, |
|
"loss": 0.3014, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.1721556886227544, |
|
"grad_norm": 0.22397872805595398, |
|
"learning_rate": 3.5521195224432436e-06, |
|
"loss": 0.2439, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.1736526946107784, |
|
"grad_norm": 0.22311492264270782, |
|
"learning_rate": 3.5407850178770944e-06, |
|
"loss": 0.1595, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.1751497005988023, |
|
"grad_norm": 0.24088788032531738, |
|
"learning_rate": 3.5294587042137796e-06, |
|
"loss": 0.2284, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.1766467065868262, |
|
"grad_norm": 0.30875658988952637, |
|
"learning_rate": 3.5181406450304536e-06, |
|
"loss": 0.3085, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1781437125748502, |
|
"grad_norm": 0.2661195993423462, |
|
"learning_rate": 3.506830903857933e-06, |
|
"loss": 0.3207, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.1796407185628743, |
|
"grad_norm": 0.237378790974617, |
|
"learning_rate": 3.49552954418035e-06, |
|
"loss": 0.3883, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.1811377245508983, |
|
"grad_norm": 0.2710888385772705, |
|
"learning_rate": 3.484236629434783e-06, |
|
"loss": 0.2923, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.1826347305389222, |
|
"grad_norm": 0.2072073370218277, |
|
"learning_rate": 3.4729522230109103e-06, |
|
"loss": 0.2178, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.1841317365269461, |
|
"grad_norm": 0.22102674841880798, |
|
"learning_rate": 3.461676388250651e-06, |
|
"loss": 0.2868, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.18562874251497, |
|
"grad_norm": 0.28038355708122253, |
|
"learning_rate": 3.4504091884478076e-06, |
|
"loss": 0.3112, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.187125748502994, |
|
"grad_norm": 0.22115986049175262, |
|
"learning_rate": 3.4391506868477153e-06, |
|
"loss": 0.2361, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.188622754491018, |
|
"grad_norm": 0.24288050830364227, |
|
"learning_rate": 3.4279009466468825e-06, |
|
"loss": 0.2372, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.1901197604790419, |
|
"grad_norm": 0.225479856133461, |
|
"learning_rate": 3.416660030992639e-06, |
|
"loss": 0.2474, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.1916167664670658, |
|
"grad_norm": 0.19721685349941254, |
|
"learning_rate": 3.405428002982779e-06, |
|
"loss": 0.1992, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.1931137724550898, |
|
"grad_norm": 0.27366501092910767, |
|
"learning_rate": 3.3942049256652093e-06, |
|
"loss": 0.3074, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.1946107784431137, |
|
"grad_norm": 0.3281024694442749, |
|
"learning_rate": 3.3829908620375953e-06, |
|
"loss": 0.3376, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.1961077844311376, |
|
"grad_norm": 0.20951387286186218, |
|
"learning_rate": 3.3717858750470046e-06, |
|
"loss": 0.2649, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.1976047904191618, |
|
"grad_norm": 0.2254786491394043, |
|
"learning_rate": 3.3605900275895565e-06, |
|
"loss": 0.3744, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.1991017964071857, |
|
"grad_norm": 0.31041470170021057, |
|
"learning_rate": 3.349403382510068e-06, |
|
"loss": 0.3571, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.2005988023952097, |
|
"grad_norm": 0.3175371289253235, |
|
"learning_rate": 3.3382260026017027e-06, |
|
"loss": 0.2337, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.2020958083832336, |
|
"grad_norm": 0.23739373683929443, |
|
"learning_rate": 3.3270579506056146e-06, |
|
"loss": 0.3272, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.2035928143712575, |
|
"grad_norm": 0.216998890042305, |
|
"learning_rate": 3.3158992892105975e-06, |
|
"loss": 0.2777, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.2050898203592815, |
|
"grad_norm": 0.25838586688041687, |
|
"learning_rate": 3.3047500810527343e-06, |
|
"loss": 0.2233, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.2065868263473054, |
|
"grad_norm": 0.2895229756832123, |
|
"learning_rate": 3.2936103887150484e-06, |
|
"loss": 0.3911, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2080838323353293, |
|
"grad_norm": 0.27420610189437866, |
|
"learning_rate": 3.2824802747271424e-06, |
|
"loss": 0.381, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.2095808383233533, |
|
"grad_norm": 0.24717342853546143, |
|
"learning_rate": 3.271359801564858e-06, |
|
"loss": 0.3571, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.2110778443113772, |
|
"grad_norm": 0.23982776701450348, |
|
"learning_rate": 3.2602490316499197e-06, |
|
"loss": 0.2092, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.2125748502994012, |
|
"grad_norm": 0.25339189171791077, |
|
"learning_rate": 3.2491480273495847e-06, |
|
"loss": 0.263, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.214071856287425, |
|
"grad_norm": 0.18809233605861664, |
|
"learning_rate": 3.2380568509762935e-06, |
|
"loss": 0.2132, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.215568862275449, |
|
"grad_norm": 0.24008889496326447, |
|
"learning_rate": 3.226975564787322e-06, |
|
"loss": 0.3366, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.217065868263473, |
|
"grad_norm": 0.2413085252046585, |
|
"learning_rate": 3.215904230984428e-06, |
|
"loss": 0.2753, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.218562874251497, |
|
"grad_norm": 0.2766128182411194, |
|
"learning_rate": 3.204842911713506e-06, |
|
"loss": 0.3486, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.220059880239521, |
|
"grad_norm": 0.24891407787799835, |
|
"learning_rate": 3.1937916690642356e-06, |
|
"loss": 0.2632, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.221556886227545, |
|
"grad_norm": 0.2821076214313507, |
|
"learning_rate": 3.182750565069735e-06, |
|
"loss": 0.202, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.223053892215569, |
|
"grad_norm": 0.2556501626968384, |
|
"learning_rate": 3.171719661706211e-06, |
|
"loss": 0.3678, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.2245508982035929, |
|
"grad_norm": 0.304360032081604, |
|
"learning_rate": 3.1606990208926125e-06, |
|
"loss": 0.3085, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.2260479041916168, |
|
"grad_norm": 0.25805479288101196, |
|
"learning_rate": 3.1496887044902815e-06, |
|
"loss": 0.2892, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.2275449101796407, |
|
"grad_norm": 0.2673249840736389, |
|
"learning_rate": 3.1386887743026083e-06, |
|
"loss": 0.2122, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.2290419161676647, |
|
"grad_norm": 0.21123580634593964, |
|
"learning_rate": 3.127699292074683e-06, |
|
"loss": 0.2108, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.2290419161676647, |
|
"eval_loss": 0.27716606855392456, |
|
"eval_runtime": 95.8808, |
|
"eval_samples_per_second": 7.676, |
|
"eval_steps_per_second": 0.96, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.2305389221556886, |
|
"grad_norm": 0.23262470960617065, |
|
"learning_rate": 3.1167203194929447e-06, |
|
"loss": 0.2687, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.2320359281437125, |
|
"grad_norm": 0.23696181178092957, |
|
"learning_rate": 3.1057519181848474e-06, |
|
"loss": 0.2368, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.2335329341317365, |
|
"grad_norm": 0.17337414622306824, |
|
"learning_rate": 3.0947941497184985e-06, |
|
"loss": 0.2181, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.2350299401197604, |
|
"grad_norm": 0.299317330121994, |
|
"learning_rate": 3.0838470756023253e-06, |
|
"loss": 0.2551, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.2365269461077844, |
|
"grad_norm": 0.33490660786628723, |
|
"learning_rate": 3.0729107572847244e-06, |
|
"loss": 0.2473, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.2380239520958083, |
|
"grad_norm": 0.4856211841106415, |
|
"learning_rate": 3.0619852561537165e-06, |
|
"loss": 0.3014, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.2395209580838324, |
|
"grad_norm": 0.26796865463256836, |
|
"learning_rate": 3.0510706335366034e-06, |
|
"loss": 0.2985, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.2410179640718564, |
|
"grad_norm": 0.27012571692466736, |
|
"learning_rate": 3.040166950699626e-06, |
|
"loss": 0.2157, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.2425149700598803, |
|
"grad_norm": 0.2794853150844574, |
|
"learning_rate": 3.0292742688476125e-06, |
|
"loss": 0.3638, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.2440119760479043, |
|
"grad_norm": 0.243549183011055, |
|
"learning_rate": 3.018392649123645e-06, |
|
"loss": 0.2581, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.2455089820359282, |
|
"grad_norm": 0.23559677600860596, |
|
"learning_rate": 3.0075221526087083e-06, |
|
"loss": 0.2066, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.2470059880239521, |
|
"grad_norm": 0.21091359853744507, |
|
"learning_rate": 2.9966628403213528e-06, |
|
"loss": 0.1868, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.248502994011976, |
|
"grad_norm": 0.2931704521179199, |
|
"learning_rate": 2.985814773217346e-06, |
|
"loss": 0.3558, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.3139488399028778, |
|
"learning_rate": 2.9749780121893366e-06, |
|
"loss": 0.3084, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.251497005988024, |
|
"grad_norm": 0.3278771638870239, |
|
"learning_rate": 2.964152618066508e-06, |
|
"loss": 0.3552, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2529940119760479, |
|
"grad_norm": 0.17210358381271362, |
|
"learning_rate": 2.9533386516142402e-06, |
|
"loss": 0.1554, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.2544910179640718, |
|
"grad_norm": 0.23768174648284912, |
|
"learning_rate": 2.9425361735337655e-06, |
|
"loss": 0.2715, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.2559880239520957, |
|
"grad_norm": 0.26038113236427307, |
|
"learning_rate": 2.93174524446183e-06, |
|
"loss": 0.2104, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.2574850299401197, |
|
"grad_norm": 0.2509795129299164, |
|
"learning_rate": 2.920965924970352e-06, |
|
"loss": 0.2998, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.2589820359281436, |
|
"grad_norm": 0.18279989063739777, |
|
"learning_rate": 2.910198275566085e-06, |
|
"loss": 0.1925, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.2604790419161676, |
|
"grad_norm": 0.2960183620452881, |
|
"learning_rate": 2.899442356690271e-06, |
|
"loss": 0.2593, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.2619760479041915, |
|
"grad_norm": 0.3304743766784668, |
|
"learning_rate": 2.8886982287183092e-06, |
|
"loss": 0.283, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.2634730538922156, |
|
"grad_norm": 0.31268325448036194, |
|
"learning_rate": 2.8779659519594173e-06, |
|
"loss": 0.3402, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.2649700598802396, |
|
"grad_norm": 0.3021641671657562, |
|
"learning_rate": 2.8672455866562797e-06, |
|
"loss": 0.2746, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.2664670658682635, |
|
"grad_norm": 0.2867257297039032, |
|
"learning_rate": 2.8565371929847286e-06, |
|
"loss": 0.4056, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2679640718562875, |
|
"grad_norm": 0.26254844665527344, |
|
"learning_rate": 2.8458408310533948e-06, |
|
"loss": 0.1884, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.2694610778443114, |
|
"grad_norm": 0.25362345576286316, |
|
"learning_rate": 2.835156560903365e-06, |
|
"loss": 0.3639, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.2709580838323353, |
|
"grad_norm": 0.2275954782962799, |
|
"learning_rate": 2.824484442507863e-06, |
|
"loss": 0.2712, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.2724550898203593, |
|
"grad_norm": 0.30598387122154236, |
|
"learning_rate": 2.813824535771892e-06, |
|
"loss": 0.324, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.2739520958083832, |
|
"grad_norm": 0.20137560367584229, |
|
"learning_rate": 2.803176900531915e-06, |
|
"loss": 0.2698, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.2754491017964071, |
|
"grad_norm": 0.2496221363544464, |
|
"learning_rate": 2.7925415965555126e-06, |
|
"loss": 0.2704, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.276946107784431, |
|
"grad_norm": 0.20142346620559692, |
|
"learning_rate": 2.78191868354104e-06, |
|
"loss": 0.3, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.278443113772455, |
|
"grad_norm": 0.303371787071228, |
|
"learning_rate": 2.771308221117309e-06, |
|
"loss": 0.3695, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.2799401197604792, |
|
"grad_norm": 0.30154815316200256, |
|
"learning_rate": 2.760710268843234e-06, |
|
"loss": 0.1902, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.281437125748503, |
|
"grad_norm": 0.24857012927532196, |
|
"learning_rate": 2.7501248862075163e-06, |
|
"loss": 0.3152, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.282934131736527, |
|
"grad_norm": 0.21458423137664795, |
|
"learning_rate": 2.7395521326282913e-06, |
|
"loss": 0.1814, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.284431137724551, |
|
"grad_norm": 0.27455374598503113, |
|
"learning_rate": 2.7289920674528142e-06, |
|
"loss": 0.3064, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.285928143712575, |
|
"grad_norm": 0.2155693769454956, |
|
"learning_rate": 2.718444749957109e-06, |
|
"loss": 0.2616, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.2874251497005988, |
|
"grad_norm": 0.2787468433380127, |
|
"learning_rate": 2.7079102393456503e-06, |
|
"loss": 0.4756, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.2889221556886228, |
|
"grad_norm": 0.24366062879562378, |
|
"learning_rate": 2.69738859475102e-06, |
|
"loss": 0.3375, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.2904191616766467, |
|
"grad_norm": 0.3134368658065796, |
|
"learning_rate": 2.6868798752335867e-06, |
|
"loss": 0.305, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.2919161676646707, |
|
"grad_norm": 0.15974688529968262, |
|
"learning_rate": 2.6763841397811576e-06, |
|
"loss": 0.1503, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.2934131736526946, |
|
"grad_norm": 0.2607382833957672, |
|
"learning_rate": 2.6659014473086665e-06, |
|
"loss": 0.2365, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.2949101796407185, |
|
"grad_norm": 0.23047217726707458, |
|
"learning_rate": 2.655431856657833e-06, |
|
"loss": 0.215, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.2964071856287425, |
|
"grad_norm": 0.23638281226158142, |
|
"learning_rate": 2.6449754265968263e-06, |
|
"loss": 0.3338, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2979041916167664, |
|
"grad_norm": 0.25799238681793213, |
|
"learning_rate": 2.6345322158199503e-06, |
|
"loss": 0.2688, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.2994011976047903, |
|
"grad_norm": 0.22320988774299622, |
|
"learning_rate": 2.6241022829473e-06, |
|
"loss": 0.3521, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.3008982035928143, |
|
"grad_norm": 0.2436802089214325, |
|
"learning_rate": 2.6136856865244443e-06, |
|
"loss": 0.2838, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.3023952095808382, |
|
"grad_norm": 0.27055466175079346, |
|
"learning_rate": 2.603282485022085e-06, |
|
"loss": 0.3206, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.3038922155688621, |
|
"grad_norm": 0.3011034429073334, |
|
"learning_rate": 2.592892736835742e-06, |
|
"loss": 0.2213, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.3053892215568863, |
|
"grad_norm": 0.3129655718803406, |
|
"learning_rate": 2.5825165002854124e-06, |
|
"loss": 0.2309, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.3068862275449102, |
|
"grad_norm": 0.308241605758667, |
|
"learning_rate": 2.5721538336152553e-06, |
|
"loss": 0.3263, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.3083832335329342, |
|
"grad_norm": 0.3574126660823822, |
|
"learning_rate": 2.5618047949932524e-06, |
|
"loss": 0.198, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.3098802395209581, |
|
"grad_norm": 0.2755787968635559, |
|
"learning_rate": 2.5514694425108968e-06, |
|
"loss": 0.251, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.311377245508982, |
|
"grad_norm": 0.3657899796962738, |
|
"learning_rate": 2.5411478341828475e-06, |
|
"loss": 0.2594, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.312874251497006, |
|
"grad_norm": 0.2408738136291504, |
|
"learning_rate": 2.5308400279466262e-06, |
|
"loss": 0.3013, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.31437125748503, |
|
"grad_norm": 0.21882514655590057, |
|
"learning_rate": 2.5205460816622684e-06, |
|
"loss": 0.2877, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.3158682634730539, |
|
"grad_norm": 0.22941765189170837, |
|
"learning_rate": 2.5102660531120204e-06, |
|
"loss": 0.1921, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.3173652694610778, |
|
"grad_norm": 0.25530606508255005, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.2747, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.3188622754491017, |
|
"grad_norm": 0.2586192786693573, |
|
"learning_rate": 2.4897479799518797e-06, |
|
"loss": 0.2441, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.3203592814371259, |
|
"grad_norm": 0.2807331383228302, |
|
"learning_rate": 2.479510050514561e-06, |
|
"loss": 0.285, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.3218562874251498, |
|
"grad_norm": 0.2594211995601654, |
|
"learning_rate": 2.469286269155848e-06, |
|
"loss": 0.2154, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.3233532934131738, |
|
"grad_norm": 0.3582344055175781, |
|
"learning_rate": 2.4590766932641353e-06, |
|
"loss": 0.3159, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.3248502994011977, |
|
"grad_norm": 0.33185476064682007, |
|
"learning_rate": 2.4488813801480717e-06, |
|
"loss": 0.3023, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.3263473053892216, |
|
"grad_norm": 0.20739290118217468, |
|
"learning_rate": 2.438700387036253e-06, |
|
"loss": 0.2176, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3278443113772456, |
|
"grad_norm": 0.30346420407295227, |
|
"learning_rate": 2.4285337710768843e-06, |
|
"loss": 0.2937, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.3293413173652695, |
|
"grad_norm": 0.2638108432292938, |
|
"learning_rate": 2.4183815893374817e-06, |
|
"loss": 0.2442, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.3308383233532934, |
|
"grad_norm": 0.23496340215206146, |
|
"learning_rate": 2.4082438988045253e-06, |
|
"loss": 0.2105, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.3323353293413174, |
|
"grad_norm": 0.4059658944606781, |
|
"learning_rate": 2.3981207563831633e-06, |
|
"loss": 0.4157, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.3338323353293413, |
|
"grad_norm": 0.28790730237960815, |
|
"learning_rate": 2.388012218896873e-06, |
|
"loss": 0.4796, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.3353293413173652, |
|
"grad_norm": 0.26836466789245605, |
|
"learning_rate": 2.3779183430871596e-06, |
|
"loss": 0.2747, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.3368263473053892, |
|
"grad_norm": 0.26263725757598877, |
|
"learning_rate": 2.3678391856132203e-06, |
|
"loss": 0.2096, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.3383233532934131, |
|
"grad_norm": 0.32726365327835083, |
|
"learning_rate": 2.3577748030516443e-06, |
|
"loss": 0.373, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.339820359281437, |
|
"grad_norm": 0.295310378074646, |
|
"learning_rate": 2.3477252518960764e-06, |
|
"loss": 0.2601, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.341317365269461, |
|
"grad_norm": 0.26851561665534973, |
|
"learning_rate": 2.3376905885569185e-06, |
|
"loss": 0.2078, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.342814371257485, |
|
"grad_norm": 0.33125850558280945, |
|
"learning_rate": 2.3276708693609947e-06, |
|
"loss": 0.3367, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.3443113772455089, |
|
"grad_norm": 0.23281192779541016, |
|
"learning_rate": 2.3176661505512534e-06, |
|
"loss": 0.2787, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.3458083832335328, |
|
"grad_norm": 0.5017257928848267, |
|
"learning_rate": 2.3076764882864333e-06, |
|
"loss": 0.3425, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.347305389221557, |
|
"grad_norm": 0.249893456697464, |
|
"learning_rate": 2.2977019386407653e-06, |
|
"loss": 0.3495, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.348802395209581, |
|
"grad_norm": 0.3614089787006378, |
|
"learning_rate": 2.2877425576036467e-06, |
|
"loss": 0.3012, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.3502994011976048, |
|
"grad_norm": 0.26093000173568726, |
|
"learning_rate": 2.2777984010793264e-06, |
|
"loss": 0.3139, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.3517964071856288, |
|
"grad_norm": 0.28172314167022705, |
|
"learning_rate": 2.267869524886603e-06, |
|
"loss": 0.3505, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.3532934131736527, |
|
"grad_norm": 0.5635291934013367, |
|
"learning_rate": 2.2579559847584924e-06, |
|
"loss": 0.3933, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.3547904191616766, |
|
"grad_norm": 0.2114657163619995, |
|
"learning_rate": 2.2480578363419363e-06, |
|
"loss": 0.1635, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.3562874251497006, |
|
"grad_norm": 0.28377217054367065, |
|
"learning_rate": 2.238175135197471e-06, |
|
"loss": 0.2308, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3577844311377245, |
|
"grad_norm": 0.22767485678195953, |
|
"learning_rate": 2.2283079367989303e-06, |
|
"loss": 0.1877, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.3592814371257484, |
|
"grad_norm": 0.4151133894920349, |
|
"learning_rate": 2.2184562965331203e-06, |
|
"loss": 0.3405, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.3607784431137724, |
|
"grad_norm": 0.17500266432762146, |
|
"learning_rate": 2.2086202696995248e-06, |
|
"loss": 0.1802, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.3622754491017965, |
|
"grad_norm": 0.2124902904033661, |
|
"learning_rate": 2.1987999115099763e-06, |
|
"loss": 0.2348, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.3637724550898205, |
|
"grad_norm": 0.21619191765785217, |
|
"learning_rate": 2.1889952770883644e-06, |
|
"loss": 0.2918, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.3652694610778444, |
|
"grad_norm": 0.2912244498729706, |
|
"learning_rate": 2.17920642147031e-06, |
|
"loss": 0.2907, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.3667664670658684, |
|
"grad_norm": 0.3516407608985901, |
|
"learning_rate": 2.169433399602872e-06, |
|
"loss": 0.3611, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.3682634730538923, |
|
"grad_norm": 0.23837457597255707, |
|
"learning_rate": 2.159676266344222e-06, |
|
"loss": 0.3391, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.3697604790419162, |
|
"grad_norm": 0.3627464175224304, |
|
"learning_rate": 2.1499350764633513e-06, |
|
"loss": 0.309, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.3712574850299402, |
|
"grad_norm": 0.23673026263713837, |
|
"learning_rate": 2.140209884639759e-06, |
|
"loss": 0.2918, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.372754491017964, |
|
"grad_norm": 0.27986136078834534, |
|
"learning_rate": 2.130500745463136e-06, |
|
"loss": 0.3377, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.374251497005988, |
|
"grad_norm": 0.2681880295276642, |
|
"learning_rate": 2.120807713433074e-06, |
|
"loss": 0.2247, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.375748502994012, |
|
"grad_norm": 0.21259628236293793, |
|
"learning_rate": 2.1111308429587446e-06, |
|
"loss": 0.2602, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.377245508982036, |
|
"grad_norm": 0.2214302122592926, |
|
"learning_rate": 2.1014701883586087e-06, |
|
"loss": 0.2005, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.3787425149700598, |
|
"grad_norm": 0.17799615859985352, |
|
"learning_rate": 2.091825803860095e-06, |
|
"loss": 0.2805, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.3802395209580838, |
|
"grad_norm": 0.23212796449661255, |
|
"learning_rate": 2.082197743599314e-06, |
|
"loss": 0.2076, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.3817365269461077, |
|
"grad_norm": 0.28191059827804565, |
|
"learning_rate": 2.072586061620735e-06, |
|
"loss": 0.2368, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.3832335329341316, |
|
"grad_norm": 0.2307487428188324, |
|
"learning_rate": 2.0629908118769004e-06, |
|
"loss": 0.3296, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.3847305389221556, |
|
"grad_norm": 0.17868001759052277, |
|
"learning_rate": 2.0534120482281087e-06, |
|
"loss": 0.1473, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.3862275449101795, |
|
"grad_norm": 0.24826651811599731, |
|
"learning_rate": 2.043849824442124e-06, |
|
"loss": 0.296, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.3877245508982035, |
|
"grad_norm": 0.22485694289207458, |
|
"learning_rate": 2.034304194193861e-06, |
|
"loss": 0.2683, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.3892215568862276, |
|
"grad_norm": 0.2466598004102707, |
|
"learning_rate": 2.024775211065098e-06, |
|
"loss": 0.3431, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.3907185628742516, |
|
"grad_norm": 0.23691681027412415, |
|
"learning_rate": 2.0152629285441668e-06, |
|
"loss": 0.3196, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.3922155688622755, |
|
"grad_norm": 0.22169752418994904, |
|
"learning_rate": 2.0057674000256556e-06, |
|
"loss": 0.2795, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.3937125748502994, |
|
"grad_norm": 0.21425652503967285, |
|
"learning_rate": 1.996288678810105e-06, |
|
"loss": 0.2318, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.3952095808383234, |
|
"grad_norm": 0.346021831035614, |
|
"learning_rate": 1.9868268181037186e-06, |
|
"loss": 0.3006, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.3967065868263473, |
|
"grad_norm": 0.29640334844589233, |
|
"learning_rate": 1.9773818710180514e-06, |
|
"loss": 0.4372, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.3982035928143712, |
|
"grad_norm": 0.3824538588523865, |
|
"learning_rate": 1.967953890569723e-06, |
|
"loss": 0.4661, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.3997005988023952, |
|
"grad_norm": 0.27732986211776733, |
|
"learning_rate": 1.958542929680117e-06, |
|
"loss": 0.221, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.401197604790419, |
|
"grad_norm": 0.35698726773262024, |
|
"learning_rate": 1.9491490411750745e-06, |
|
"loss": 0.3821, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.402694610778443, |
|
"grad_norm": 0.3157179653644562, |
|
"learning_rate": 1.9397722777846153e-06, |
|
"loss": 0.3621, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.4041916167664672, |
|
"grad_norm": 0.23464487493038177, |
|
"learning_rate": 1.9304126921426235e-06, |
|
"loss": 0.3528, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.4056886227544911, |
|
"grad_norm": 0.26018989086151123, |
|
"learning_rate": 1.921070336786568e-06, |
|
"loss": 0.2786, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.407185628742515, |
|
"grad_norm": 0.2732929289340973, |
|
"learning_rate": 1.9117452641571934e-06, |
|
"loss": 0.328, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.408682634730539, |
|
"grad_norm": 0.3107425570487976, |
|
"learning_rate": 1.9024375265982386e-06, |
|
"loss": 0.2317, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.410179640718563, |
|
"grad_norm": 0.2310229390859604, |
|
"learning_rate": 1.893147176356131e-06, |
|
"loss": 0.3121, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.4116766467065869, |
|
"grad_norm": 0.24809664487838745, |
|
"learning_rate": 1.8838742655797053e-06, |
|
"loss": 0.3083, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.4131736526946108, |
|
"grad_norm": 0.36014804244041443, |
|
"learning_rate": 1.8746188463198983e-06, |
|
"loss": 0.2166, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.4146706586826348, |
|
"grad_norm": 0.20624969899654388, |
|
"learning_rate": 1.865380970529469e-06, |
|
"loss": 0.2533, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.4161676646706587, |
|
"grad_norm": 0.2667752504348755, |
|
"learning_rate": 1.8561606900626938e-06, |
|
"loss": 0.306, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4176646706586826, |
|
"grad_norm": 0.27965497970581055, |
|
"learning_rate": 1.8469580566750911e-06, |
|
"loss": 0.2644, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.4191616766467066, |
|
"grad_norm": 0.2576799690723419, |
|
"learning_rate": 1.8377731220231144e-06, |
|
"loss": 0.2376, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.4206586826347305, |
|
"grad_norm": 0.383226215839386, |
|
"learning_rate": 1.8286059376638748e-06, |
|
"loss": 0.3097, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.4221556886227544, |
|
"grad_norm": 0.26679062843322754, |
|
"learning_rate": 1.8194565550548477e-06, |
|
"loss": 0.2423, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.4236526946107784, |
|
"grad_norm": 0.321591854095459, |
|
"learning_rate": 1.810325025553578e-06, |
|
"loss": 0.3952, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.4251497005988023, |
|
"grad_norm": 0.26733914017677307, |
|
"learning_rate": 1.8012114004174048e-06, |
|
"loss": 0.3133, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.4266467065868262, |
|
"grad_norm": 0.2287786900997162, |
|
"learning_rate": 1.7921157308031567e-06, |
|
"loss": 0.2384, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.4281437125748502, |
|
"grad_norm": 0.2414834201335907, |
|
"learning_rate": 1.7830380677668836e-06, |
|
"loss": 0.2552, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.4296407185628741, |
|
"grad_norm": 0.32560208439826965, |
|
"learning_rate": 1.7739784622635514e-06, |
|
"loss": 0.3029, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.4311377245508983, |
|
"grad_norm": 0.344568133354187, |
|
"learning_rate": 1.764936965146773e-06, |
|
"loss": 0.3252, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.4326347305389222, |
|
"grad_norm": 0.3280235528945923, |
|
"learning_rate": 1.7559136271685079e-06, |
|
"loss": 0.2814, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.4341317365269461, |
|
"grad_norm": 0.3319287896156311, |
|
"learning_rate": 1.746908498978791e-06, |
|
"loss": 0.2855, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.43562874251497, |
|
"grad_norm": 0.26920896768569946, |
|
"learning_rate": 1.7379216311254339e-06, |
|
"loss": 0.2206, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.437125748502994, |
|
"grad_norm": 0.2052461802959442, |
|
"learning_rate": 1.7289530740537569e-06, |
|
"loss": 0.2528, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.438622754491018, |
|
"grad_norm": 0.33078446984291077, |
|
"learning_rate": 1.72000287810629e-06, |
|
"loss": 0.2986, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.4401197604790419, |
|
"grad_norm": 0.3203030228614807, |
|
"learning_rate": 1.7110710935225055e-06, |
|
"loss": 0.2862, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.4416167664670658, |
|
"grad_norm": 0.25592538714408875, |
|
"learning_rate": 1.7021577704385218e-06, |
|
"loss": 0.2645, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.4431137724550898, |
|
"grad_norm": 0.26727235317230225, |
|
"learning_rate": 1.6932629588868332e-06, |
|
"loss": 0.2634, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.4446107784431137, |
|
"grad_norm": 0.19937850534915924, |
|
"learning_rate": 1.6843867087960252e-06, |
|
"loss": 0.2024, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.4461077844311379, |
|
"grad_norm": 0.21263986825942993, |
|
"learning_rate": 1.6755290699904881e-06, |
|
"loss": 0.2111, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4476047904191618, |
|
"grad_norm": 0.20202231407165527, |
|
"learning_rate": 1.6666900921901497e-06, |
|
"loss": 0.2371, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.4491017964071857, |
|
"grad_norm": 0.43511268496513367, |
|
"learning_rate": 1.6578698250101828e-06, |
|
"loss": 0.3341, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.4505988023952097, |
|
"grad_norm": 0.3771866261959076, |
|
"learning_rate": 1.6490683179607403e-06, |
|
"loss": 0.5046, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.4520958083832336, |
|
"grad_norm": 0.25945380330085754, |
|
"learning_rate": 1.6402856204466611e-06, |
|
"loss": 0.2812, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.4535928143712575, |
|
"grad_norm": 0.24568547308444977, |
|
"learning_rate": 1.6315217817672142e-06, |
|
"loss": 0.1704, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.4550898203592815, |
|
"grad_norm": 0.26437002420425415, |
|
"learning_rate": 1.6227768511157976e-06, |
|
"loss": 0.1989, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.4565868263473054, |
|
"grad_norm": 0.3020990788936615, |
|
"learning_rate": 1.6140508775796832e-06, |
|
"loss": 0.2377, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.4580838323353293, |
|
"grad_norm": 0.15546730160713196, |
|
"learning_rate": 1.6053439101397257e-06, |
|
"loss": 0.1476, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.4595808383233533, |
|
"grad_norm": 0.22115832567214966, |
|
"learning_rate": 1.5966559976701e-06, |
|
"loss": 0.3021, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.4610778443113772, |
|
"grad_norm": 0.1683429330587387, |
|
"learning_rate": 1.5879871889380155e-06, |
|
"loss": 0.1581, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.4625748502994012, |
|
"grad_norm": 0.2293458878993988, |
|
"learning_rate": 1.5793375326034539e-06, |
|
"loss": 0.2103, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.464071856287425, |
|
"grad_norm": 0.2591915428638458, |
|
"learning_rate": 1.5707070772188843e-06, |
|
"loss": 0.2493, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.465568862275449, |
|
"grad_norm": 0.3226713538169861, |
|
"learning_rate": 1.5620958712290023e-06, |
|
"loss": 0.3839, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.467065868263473, |
|
"grad_norm": 0.3980361223220825, |
|
"learning_rate": 1.5535039629704467e-06, |
|
"loss": 0.2999, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.468562874251497, |
|
"grad_norm": 0.2781156003475189, |
|
"learning_rate": 1.5449314006715394e-06, |
|
"loss": 0.2316, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.4700598802395208, |
|
"grad_norm": 0.332856684923172, |
|
"learning_rate": 1.5363782324520033e-06, |
|
"loss": 0.3408, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.471556886227545, |
|
"grad_norm": 0.28347525000572205, |
|
"learning_rate": 1.5278445063227038e-06, |
|
"loss": 0.3741, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.473053892215569, |
|
"grad_norm": 0.2651091516017914, |
|
"learning_rate": 1.5193302701853674e-06, |
|
"loss": 0.3159, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.4745508982035929, |
|
"grad_norm": 0.2445843368768692, |
|
"learning_rate": 1.5108355718323236e-06, |
|
"loss": 0.2487, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.4760479041916168, |
|
"grad_norm": 0.31026923656463623, |
|
"learning_rate": 1.502360458946232e-06, |
|
"loss": 0.3436, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4775449101796407, |
|
"grad_norm": 0.4750896990299225, |
|
"learning_rate": 1.4939049790998095e-06, |
|
"loss": 0.3669, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.4790419161676647, |
|
"grad_norm": 0.2514893114566803, |
|
"learning_rate": 1.4854691797555753e-06, |
|
"loss": 0.2849, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.4790419161676647, |
|
"eval_loss": 0.27240613102912903, |
|
"eval_runtime": 96.0291, |
|
"eval_samples_per_second": 7.664, |
|
"eval_steps_per_second": 0.958, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.4805389221556886, |
|
"grad_norm": 0.23577263951301575, |
|
"learning_rate": 1.4770531082655704e-06, |
|
"loss": 0.2443, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.4820359281437125, |
|
"grad_norm": 0.29413002729415894, |
|
"learning_rate": 1.4686568118711054e-06, |
|
"loss": 0.3396, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.4835329341317365, |
|
"grad_norm": 0.2968040704727173, |
|
"learning_rate": 1.4602803377024833e-06, |
|
"loss": 0.2327, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.4850299401197604, |
|
"grad_norm": 0.29157525300979614, |
|
"learning_rate": 1.451923732778745e-06, |
|
"loss": 0.2629, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.4865269461077844, |
|
"grad_norm": 0.2867380976676941, |
|
"learning_rate": 1.4435870440073968e-06, |
|
"loss": 0.4332, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.4880239520958085, |
|
"grad_norm": 0.22759690880775452, |
|
"learning_rate": 1.435270318184156e-06, |
|
"loss": 0.2434, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.4895209580838324, |
|
"grad_norm": 0.22279328107833862, |
|
"learning_rate": 1.4269736019926778e-06, |
|
"loss": 0.2133, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.4910179640718564, |
|
"grad_norm": 0.21220585703849792, |
|
"learning_rate": 1.418696942004304e-06, |
|
"loss": 0.1834, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.4925149700598803, |
|
"grad_norm": 0.22208869457244873, |
|
"learning_rate": 1.410440384677791e-06, |
|
"loss": 0.3176, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.4940119760479043, |
|
"grad_norm": 0.25137487053871155, |
|
"learning_rate": 1.4022039763590595e-06, |
|
"loss": 0.303, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.4955089820359282, |
|
"grad_norm": 0.3287590444087982, |
|
"learning_rate": 1.3939877632809279e-06, |
|
"loss": 0.2652, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.4970059880239521, |
|
"grad_norm": 0.22760730981826782, |
|
"learning_rate": 1.3857917915628516e-06, |
|
"loss": 0.2777, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.498502994011976, |
|
"grad_norm": 0.20680049061775208, |
|
"learning_rate": 1.3776161072106703e-06, |
|
"loss": 0.2284, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.4575501084327698, |
|
"learning_rate": 1.369460756116342e-06, |
|
"loss": 0.31, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.501497005988024, |
|
"grad_norm": 0.26314109563827515, |
|
"learning_rate": 1.3613257840576954e-06, |
|
"loss": 0.2972, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.5029940119760479, |
|
"grad_norm": 0.25917747616767883, |
|
"learning_rate": 1.3532112366981598e-06, |
|
"loss": 0.3022, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.5044910179640718, |
|
"grad_norm": 0.22802290320396423, |
|
"learning_rate": 1.3451171595865226e-06, |
|
"loss": 0.2922, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.5059880239520957, |
|
"grad_norm": 0.2178899347782135, |
|
"learning_rate": 1.3370435981566622e-06, |
|
"loss": 0.2445, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.5074850299401197, |
|
"grad_norm": 0.39115995168685913, |
|
"learning_rate": 1.3289905977273027e-06, |
|
"loss": 0.3144, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.5089820359281436, |
|
"grad_norm": 0.23431116342544556, |
|
"learning_rate": 1.3209582035017487e-06, |
|
"loss": 0.2239, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.5104790419161676, |
|
"grad_norm": 0.2912103235721588, |
|
"learning_rate": 1.312946460567644e-06, |
|
"loss": 0.3606, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.5119760479041915, |
|
"grad_norm": 0.20403842628002167, |
|
"learning_rate": 1.3049554138967052e-06, |
|
"loss": 0.1988, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.5134730538922154, |
|
"grad_norm": 0.1640235036611557, |
|
"learning_rate": 1.2969851083444834e-06, |
|
"loss": 0.1336, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.5149700598802394, |
|
"grad_norm": 0.22775056958198547, |
|
"learning_rate": 1.2890355886500971e-06, |
|
"loss": 0.1849, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.5164670658682635, |
|
"grad_norm": 0.315218061208725, |
|
"learning_rate": 1.2811068994359992e-06, |
|
"loss": 0.2266, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.5179640718562875, |
|
"grad_norm": 0.361473023891449, |
|
"learning_rate": 1.273199085207706e-06, |
|
"loss": 0.3429, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.5194610778443114, |
|
"grad_norm": 0.22169454395771027, |
|
"learning_rate": 1.2653121903535653e-06, |
|
"loss": 0.1868, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.5209580838323353, |
|
"grad_norm": 0.20558999478816986, |
|
"learning_rate": 1.257446259144494e-06, |
|
"loss": 0.2067, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5224550898203593, |
|
"grad_norm": 0.37300822138786316, |
|
"learning_rate": 1.2496013357337416e-06, |
|
"loss": 0.2873, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.5239520958083832, |
|
"grad_norm": 0.24679654836654663, |
|
"learning_rate": 1.2417774641566298e-06, |
|
"loss": 0.2406, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.5254491017964071, |
|
"grad_norm": 0.25380274653434753, |
|
"learning_rate": 1.233974688330315e-06, |
|
"loss": 0.284, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.5269461077844313, |
|
"grad_norm": 0.24675562977790833, |
|
"learning_rate": 1.2261930520535403e-06, |
|
"loss": 0.2421, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.5284431137724552, |
|
"grad_norm": 0.23980629444122314, |
|
"learning_rate": 1.2184325990063822e-06, |
|
"loss": 0.2885, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.5299401197604792, |
|
"grad_norm": 0.24391433596611023, |
|
"learning_rate": 1.210693372750017e-06, |
|
"loss": 0.1621, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.531437125748503, |
|
"grad_norm": 0.2686309218406677, |
|
"learning_rate": 1.202975416726464e-06, |
|
"loss": 0.2907, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.532934131736527, |
|
"grad_norm": 0.45841044187545776, |
|
"learning_rate": 1.1952787742583549e-06, |
|
"loss": 0.2877, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.534431137724551, |
|
"grad_norm": 0.1831907480955124, |
|
"learning_rate": 1.1876034885486764e-06, |
|
"loss": 0.2025, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.535928143712575, |
|
"grad_norm": 0.28029313683509827, |
|
"learning_rate": 1.1799496026805413e-06, |
|
"loss": 0.2588, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.5374251497005988, |
|
"grad_norm": 0.2655869424343109, |
|
"learning_rate": 1.1723171596169353e-06, |
|
"loss": 0.2087, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.5389221556886228, |
|
"grad_norm": 0.3924802243709564, |
|
"learning_rate": 1.1647062022004845e-06, |
|
"loss": 0.2952, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.5404191616766467, |
|
"grad_norm": 0.3049009144306183, |
|
"learning_rate": 1.157116773153208e-06, |
|
"loss": 0.2123, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.5419161676646707, |
|
"grad_norm": 0.23748943209648132, |
|
"learning_rate": 1.1495489150762851e-06, |
|
"loss": 0.2867, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.5434131736526946, |
|
"grad_norm": 0.18197742104530334, |
|
"learning_rate": 1.1420026704498077e-06, |
|
"loss": 0.2095, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.5449101796407185, |
|
"grad_norm": 0.2282951921224594, |
|
"learning_rate": 1.1344780816325512e-06, |
|
"loss": 0.1673, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.5464071856287425, |
|
"grad_norm": 0.26313188672065735, |
|
"learning_rate": 1.1269751908617277e-06, |
|
"loss": 0.3961, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.5479041916167664, |
|
"grad_norm": 0.2284393161535263, |
|
"learning_rate": 1.1194940402527566e-06, |
|
"loss": 0.1948, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.5494011976047903, |
|
"grad_norm": 0.28718557953834534, |
|
"learning_rate": 1.112034671799025e-06, |
|
"loss": 0.3257, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.5508982035928143, |
|
"grad_norm": 0.23936651647090912, |
|
"learning_rate": 1.1045971273716476e-06, |
|
"loss": 0.1952, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5523952095808382, |
|
"grad_norm": 0.3323695957660675, |
|
"learning_rate": 1.0971814487192429e-06, |
|
"loss": 0.3468, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.5538922155688621, |
|
"grad_norm": 0.26825106143951416, |
|
"learning_rate": 1.089787677467683e-06, |
|
"loss": 0.2717, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.555389221556886, |
|
"grad_norm": 0.34908008575439453, |
|
"learning_rate": 1.0824158551198783e-06, |
|
"loss": 0.3218, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.55688622754491, |
|
"grad_norm": 0.27587297558784485, |
|
"learning_rate": 1.075066023055527e-06, |
|
"loss": 0.2816, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.5583832335329342, |
|
"grad_norm": 0.2504119277000427, |
|
"learning_rate": 1.0677382225308969e-06, |
|
"loss": 0.3119, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.5598802395209581, |
|
"grad_norm": 0.3709030747413635, |
|
"learning_rate": 1.0604324946785826e-06, |
|
"loss": 0.2381, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.561377245508982, |
|
"grad_norm": 0.27703753113746643, |
|
"learning_rate": 1.0531488805072848e-06, |
|
"loss": 0.1919, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.562874251497006, |
|
"grad_norm": 0.27848151326179504, |
|
"learning_rate": 1.0458874209015708e-06, |
|
"loss": 0.2071, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.56437125748503, |
|
"grad_norm": 0.2879417836666107, |
|
"learning_rate": 1.0386481566216532e-06, |
|
"loss": 0.3086, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.5658682634730539, |
|
"grad_norm": 0.21330009400844574, |
|
"learning_rate": 1.0314311283031531e-06, |
|
"loss": 0.1942, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5673652694610778, |
|
"grad_norm": 0.27636274695396423, |
|
"learning_rate": 1.0242363764568808e-06, |
|
"loss": 0.2228, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.568862275449102, |
|
"grad_norm": 0.5699312686920166, |
|
"learning_rate": 1.0170639414685985e-06, |
|
"loss": 0.3452, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.5703592814371259, |
|
"grad_norm": 0.27370771765708923, |
|
"learning_rate": 1.0099138635988026e-06, |
|
"loss": 0.2548, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.5718562874251498, |
|
"grad_norm": 0.24189740419387817, |
|
"learning_rate": 1.0027861829824953e-06, |
|
"loss": 0.2262, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.5733532934131738, |
|
"grad_norm": 0.2238190919160843, |
|
"learning_rate": 9.956809396289519e-07, |
|
"loss": 0.2553, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.5748502994011977, |
|
"grad_norm": 0.2638553977012634, |
|
"learning_rate": 9.885981734215094e-07, |
|
"loss": 0.3157, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.5763473053892216, |
|
"grad_norm": 0.22409968078136444, |
|
"learning_rate": 9.815379241173295e-07, |
|
"loss": 0.3396, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.5778443113772456, |
|
"grad_norm": 0.25611743330955505, |
|
"learning_rate": 9.745002313471847e-07, |
|
"loss": 0.2865, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.5793413173652695, |
|
"grad_norm": 0.2451084852218628, |
|
"learning_rate": 9.67485134615232e-07, |
|
"loss": 0.292, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.5808383233532934, |
|
"grad_norm": 0.26699185371398926, |
|
"learning_rate": 9.60492673298794e-07, |
|
"loss": 0.297, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.5823353293413174, |
|
"grad_norm": 0.23169200122356415, |
|
"learning_rate": 9.535228866481295e-07, |
|
"loss": 0.2808, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.5838323353293413, |
|
"grad_norm": 0.37093454599380493, |
|
"learning_rate": 9.465758137862264e-07, |
|
"loss": 0.2549, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.5853293413173652, |
|
"grad_norm": 0.4046109914779663, |
|
"learning_rate": 9.396514937085682e-07, |
|
"loss": 0.2644, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.5868263473053892, |
|
"grad_norm": 0.24264554679393768, |
|
"learning_rate": 9.327499652829292e-07, |
|
"loss": 0.1966, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.5883233532934131, |
|
"grad_norm": 0.35896986722946167, |
|
"learning_rate": 9.258712672491416e-07, |
|
"loss": 0.4137, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.589820359281437, |
|
"grad_norm": 0.3543803095817566, |
|
"learning_rate": 9.190154382188921e-07, |
|
"loss": 0.2922, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.591317365269461, |
|
"grad_norm": 0.18955141305923462, |
|
"learning_rate": 9.121825166754927e-07, |
|
"loss": 0.1885, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.592814371257485, |
|
"grad_norm": 0.2830042839050293, |
|
"learning_rate": 9.053725409736752e-07, |
|
"loss": 0.3324, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.5943113772455089, |
|
"grad_norm": 0.2516660988330841, |
|
"learning_rate": 8.98585549339368e-07, |
|
"loss": 0.3912, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.5958083832335328, |
|
"grad_norm": 0.25523948669433594, |
|
"learning_rate": 8.918215798694879e-07, |
|
"loss": 0.2574, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.5973053892215567, |
|
"grad_norm": 0.3023395538330078, |
|
"learning_rate": 8.850806705317183e-07, |
|
"loss": 0.2945, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.5988023952095807, |
|
"grad_norm": 0.2778310775756836, |
|
"learning_rate": 8.783628591643056e-07, |
|
"loss": 0.3833, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.6002994011976048, |
|
"grad_norm": 0.27976202964782715, |
|
"learning_rate": 8.716681834758411e-07, |
|
"loss": 0.2099, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.6017964071856288, |
|
"grad_norm": 0.33491119742393494, |
|
"learning_rate": 8.649966810450472e-07, |
|
"loss": 0.3326, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.6032934131736527, |
|
"grad_norm": 0.24264518916606903, |
|
"learning_rate": 8.583483893205746e-07, |
|
"loss": 0.2515, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.6047904191616766, |
|
"grad_norm": 0.24761344492435455, |
|
"learning_rate": 8.517233456207819e-07, |
|
"loss": 0.1778, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.6062874251497006, |
|
"grad_norm": 0.30928072333335876, |
|
"learning_rate": 8.451215871335355e-07, |
|
"loss": 0.3494, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.6077844311377245, |
|
"grad_norm": 0.2120644450187683, |
|
"learning_rate": 8.38543150915993e-07, |
|
"loss": 0.1977, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.6092814371257484, |
|
"grad_norm": 0.36671245098114014, |
|
"learning_rate": 8.31988073894403e-07, |
|
"loss": 0.2415, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.6107784431137726, |
|
"grad_norm": 0.2658866047859192, |
|
"learning_rate": 8.254563928638892e-07, |
|
"loss": 0.2486, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.6122754491017965, |
|
"grad_norm": 0.21688513457775116, |
|
"learning_rate": 8.189481444882524e-07, |
|
"loss": 0.243, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.6137724550898205, |
|
"grad_norm": 0.24383318424224854, |
|
"learning_rate": 8.124633652997571e-07, |
|
"loss": 0.2836, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.6152694610778444, |
|
"grad_norm": 0.21168118715286255, |
|
"learning_rate": 8.060020916989331e-07, |
|
"loss": 0.2159, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.6167664670658684, |
|
"grad_norm": 0.235800638794899, |
|
"learning_rate": 7.995643599543645e-07, |
|
"loss": 0.3152, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.6182634730538923, |
|
"grad_norm": 0.20235656201839447, |
|
"learning_rate": 7.931502062024949e-07, |
|
"loss": 0.2067, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.6197604790419162, |
|
"grad_norm": 0.21362897753715515, |
|
"learning_rate": 7.86759666447412e-07, |
|
"loss": 0.2366, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.6212574850299402, |
|
"grad_norm": 0.297219455242157, |
|
"learning_rate": 7.803927765606595e-07, |
|
"loss": 0.2066, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.622754491017964, |
|
"grad_norm": 0.1866680532693863, |
|
"learning_rate": 7.740495722810271e-07, |
|
"loss": 0.1817, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.624251497005988, |
|
"grad_norm": 0.25540104508399963, |
|
"learning_rate": 7.677300892143485e-07, |
|
"loss": 0.3215, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.625748502994012, |
|
"grad_norm": 0.2844352424144745, |
|
"learning_rate": 7.614343628333104e-07, |
|
"loss": 0.3789, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.627245508982036, |
|
"grad_norm": 0.32128477096557617, |
|
"learning_rate": 7.55162428477243e-07, |
|
"loss": 0.3061, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.6287425149700598, |
|
"grad_norm": 0.19842462241649628, |
|
"learning_rate": 7.489143213519301e-07, |
|
"loss": 0.2446, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.6302395209580838, |
|
"grad_norm": 0.21685732901096344, |
|
"learning_rate": 7.426900765294043e-07, |
|
"loss": 0.1991, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.6317365269461077, |
|
"grad_norm": 0.3464147746562958, |
|
"learning_rate": 7.364897289477585e-07, |
|
"loss": 0.2764, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.6332335329341316, |
|
"grad_norm": 0.30749276280403137, |
|
"learning_rate": 7.303133134109391e-07, |
|
"loss": 0.1874, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.6347305389221556, |
|
"grad_norm": 0.310250461101532, |
|
"learning_rate": 7.241608645885629e-07, |
|
"loss": 0.3505, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.6362275449101795, |
|
"grad_norm": 0.3631021976470947, |
|
"learning_rate": 7.180324170157094e-07, |
|
"loss": 0.4384, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.6377245508982035, |
|
"grad_norm": 0.21250741183757782, |
|
"learning_rate": 7.119280050927407e-07, |
|
"loss": 0.213, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.6392215568862274, |
|
"grad_norm": 0.2978675365447998, |
|
"learning_rate": 7.058476630850935e-07, |
|
"loss": 0.2757, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.6407185628742516, |
|
"grad_norm": 0.2608672082424164, |
|
"learning_rate": 6.997914251231036e-07, |
|
"loss": 0.2849, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.6422155688622755, |
|
"grad_norm": 0.3758143484592438, |
|
"learning_rate": 6.937593252017983e-07, |
|
"loss": 0.2907, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.6437125748502994, |
|
"grad_norm": 0.2593882083892822, |
|
"learning_rate": 6.87751397180716e-07, |
|
"loss": 0.2183, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.6452095808383234, |
|
"grad_norm": 0.46381810307502747, |
|
"learning_rate": 6.817676747837104e-07, |
|
"loss": 0.4085, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.6467065868263473, |
|
"grad_norm": 0.32167354226112366, |
|
"learning_rate": 6.758081915987669e-07, |
|
"loss": 0.1879, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.6482035928143712, |
|
"grad_norm": 0.4099140167236328, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.3501, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.6497005988023952, |
|
"grad_norm": 0.234484001994133, |
|
"learning_rate": 6.639620765365074e-07, |
|
"loss": 0.1925, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.6511976047904193, |
|
"grad_norm": 0.22274383902549744, |
|
"learning_rate": 6.580755111541076e-07, |
|
"loss": 0.2334, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.6526946107784433, |
|
"grad_norm": 0.27316105365753174, |
|
"learning_rate": 6.522133179732271e-07, |
|
"loss": 0.2915, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.6541916167664672, |
|
"grad_norm": 0.2743278443813324, |
|
"learning_rate": 6.463755298996799e-07, |
|
"loss": 0.3385, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.6556886227544911, |
|
"grad_norm": 0.24365045130252838, |
|
"learning_rate": 6.405621797022848e-07, |
|
"loss": 0.2271, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.657185628742515, |
|
"grad_norm": 0.2597195506095886, |
|
"learning_rate": 6.347733000126899e-07, |
|
"loss": 0.2802, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.658682634730539, |
|
"grad_norm": 0.3242190182209015, |
|
"learning_rate": 6.290089233251811e-07, |
|
"loss": 0.2547, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.660179640718563, |
|
"grad_norm": 0.23406442999839783, |
|
"learning_rate": 6.232690819965065e-07, |
|
"loss": 0.2242, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.6616766467065869, |
|
"grad_norm": 0.32079535722732544, |
|
"learning_rate": 6.175538082456883e-07, |
|
"loss": 0.3538, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.6631736526946108, |
|
"grad_norm": 0.32577863335609436, |
|
"learning_rate": 6.118631341538489e-07, |
|
"loss": 0.3625, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.6646706586826348, |
|
"grad_norm": 0.3936420977115631, |
|
"learning_rate": 6.061970916640236e-07, |
|
"loss": 0.2731, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.6661676646706587, |
|
"grad_norm": 0.34293246269226074, |
|
"learning_rate": 6.005557125809896e-07, |
|
"loss": 0.3033, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.6676646706586826, |
|
"grad_norm": 0.20506998896598816, |
|
"learning_rate": 5.949390285710777e-07, |
|
"loss": 0.294, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.6691616766467066, |
|
"grad_norm": 0.17009401321411133, |
|
"learning_rate": 5.893470711620036e-07, |
|
"loss": 0.1798, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.6706586826347305, |
|
"grad_norm": 0.2312851995229721, |
|
"learning_rate": 5.837798717426846e-07, |
|
"loss": 0.2047, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.6721556886227544, |
|
"grad_norm": 0.16008791327476501, |
|
"learning_rate": 5.782374615630682e-07, |
|
"loss": 0.142, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 1.6736526946107784, |
|
"grad_norm": 0.20815108716487885, |
|
"learning_rate": 5.727198717339511e-07, |
|
"loss": 0.2526, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.6751497005988023, |
|
"grad_norm": 0.22949810326099396, |
|
"learning_rate": 5.672271332268098e-07, |
|
"loss": 0.2564, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 1.6766467065868262, |
|
"grad_norm": 0.3489370048046112, |
|
"learning_rate": 5.617592768736269e-07, |
|
"loss": 0.2188, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.6781437125748502, |
|
"grad_norm": 0.2736472487449646, |
|
"learning_rate": 5.563163333667098e-07, |
|
"loss": 0.308, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.6796407185628741, |
|
"grad_norm": 0.2761566936969757, |
|
"learning_rate": 5.508983332585316e-07, |
|
"loss": 0.1934, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.681137724550898, |
|
"grad_norm": 0.21854543685913086, |
|
"learning_rate": 5.455053069615456e-07, |
|
"loss": 0.2165, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 1.6826347305389222, |
|
"grad_norm": 0.32581812143325806, |
|
"learning_rate": 5.401372847480285e-07, |
|
"loss": 0.3124, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.6841317365269461, |
|
"grad_norm": 0.3002076745033264, |
|
"learning_rate": 5.347942967498965e-07, |
|
"loss": 0.36, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 1.68562874251497, |
|
"grad_norm": 0.22961673140525818, |
|
"learning_rate": 5.294763729585484e-07, |
|
"loss": 0.2958, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.687125748502994, |
|
"grad_norm": 0.2980080842971802, |
|
"learning_rate": 5.241835432246888e-07, |
|
"loss": 0.183, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 1.688622754491018, |
|
"grad_norm": 0.34977275133132935, |
|
"learning_rate": 5.18915837258166e-07, |
|
"loss": 0.3158, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.6901197604790419, |
|
"grad_norm": 0.2116621881723404, |
|
"learning_rate": 5.136732846278003e-07, |
|
"loss": 0.3113, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 1.6916167664670658, |
|
"grad_norm": 0.3168148398399353, |
|
"learning_rate": 5.084559147612244e-07, |
|
"loss": 0.3469, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.69311377245509, |
|
"grad_norm": 0.3314928412437439, |
|
"learning_rate": 5.032637569447091e-07, |
|
"loss": 0.3255, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.694610778443114, |
|
"grad_norm": 0.2442367523908615, |
|
"learning_rate": 4.980968403230097e-07, |
|
"loss": 0.2865, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.6961077844311379, |
|
"grad_norm": 0.23955686390399933, |
|
"learning_rate": 4.929551938991945e-07, |
|
"loss": 0.3061, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 1.6976047904191618, |
|
"grad_norm": 0.20683260262012482, |
|
"learning_rate": 4.87838846534483e-07, |
|
"loss": 0.1759, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.6991017964071857, |
|
"grad_norm": 0.27983346581459045, |
|
"learning_rate": 4.827478269480895e-07, |
|
"loss": 0.2992, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 1.7005988023952097, |
|
"grad_norm": 0.33870527148246765, |
|
"learning_rate": 4.776821637170525e-07, |
|
"loss": 0.3379, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.7020958083832336, |
|
"grad_norm": 0.2928355038166046, |
|
"learning_rate": 4.726418852760839e-07, |
|
"loss": 0.3088, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 1.7035928143712575, |
|
"grad_norm": 0.3086561858654022, |
|
"learning_rate": 4.6762701991740434e-07, |
|
"loss": 0.394, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.7050898203592815, |
|
"grad_norm": 0.2599639594554901, |
|
"learning_rate": 4.626375957905821e-07, |
|
"loss": 0.2423, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 1.7065868263473054, |
|
"grad_norm": 0.24536360800266266, |
|
"learning_rate": 4.576736409023813e-07, |
|
"loss": 0.2244, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.7080838323353293, |
|
"grad_norm": 0.27678626775741577, |
|
"learning_rate": 4.5273518311660103e-07, |
|
"loss": 0.2654, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.7095808383233533, |
|
"grad_norm": 0.25134310126304626, |
|
"learning_rate": 4.4782225015391754e-07, |
|
"loss": 0.2651, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.7110778443113772, |
|
"grad_norm": 0.2866244316101074, |
|
"learning_rate": 4.429348695917329e-07, |
|
"loss": 0.362, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 1.7125748502994012, |
|
"grad_norm": 0.3232477605342865, |
|
"learning_rate": 4.3807306886401555e-07, |
|
"loss": 0.3016, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.714071856287425, |
|
"grad_norm": 0.21404534578323364, |
|
"learning_rate": 4.3323687526115045e-07, |
|
"loss": 0.238, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 1.715568862275449, |
|
"grad_norm": 0.28791582584381104, |
|
"learning_rate": 4.284263159297819e-07, |
|
"loss": 0.2013, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.717065868263473, |
|
"grad_norm": 0.33721908926963806, |
|
"learning_rate": 4.2364141787266613e-07, |
|
"loss": 0.278, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 1.718562874251497, |
|
"grad_norm": 0.3573349714279175, |
|
"learning_rate": 4.1888220794851386e-07, |
|
"loss": 0.2282, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.7200598802395208, |
|
"grad_norm": 0.2762373685836792, |
|
"learning_rate": 4.141487128718452e-07, |
|
"loss": 0.3332, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 1.7215568862275448, |
|
"grad_norm": 0.2202342003583908, |
|
"learning_rate": 4.0944095921283347e-07, |
|
"loss": 0.2471, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.7230538922155687, |
|
"grad_norm": 0.49206018447875977, |
|
"learning_rate": 4.0475897339716466e-07, |
|
"loss": 0.2495, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.7245508982035929, |
|
"grad_norm": 0.25945720076560974, |
|
"learning_rate": 4.001027817058789e-07, |
|
"loss": 0.2775, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.7260479041916168, |
|
"grad_norm": 0.2269635945558548, |
|
"learning_rate": 3.9547241027523164e-07, |
|
"loss": 0.344, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 1.7275449101796407, |
|
"grad_norm": 0.1602826565504074, |
|
"learning_rate": 3.908678850965425e-07, |
|
"loss": 0.1716, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.7290419161676647, |
|
"grad_norm": 0.4009440839290619, |
|
"learning_rate": 3.862892320160483e-07, |
|
"loss": 0.3302, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.7290419161676647, |
|
"eval_loss": 0.27044132351875305, |
|
"eval_runtime": 96.2127, |
|
"eval_samples_per_second": 7.65, |
|
"eval_steps_per_second": 0.956, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.7305389221556886, |
|
"grad_norm": 0.3490601181983948, |
|
"learning_rate": 3.8173647673476366e-07, |
|
"loss": 0.4149, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7320359281437125, |
|
"grad_norm": 0.32757532596588135, |
|
"learning_rate": 3.7720964480832847e-07, |
|
"loss": 0.3498, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 1.7335329341317365, |
|
"grad_norm": 0.17421932518482208, |
|
"learning_rate": 3.727087616468739e-07, |
|
"loss": 0.1484, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.7350299401197606, |
|
"grad_norm": 0.2285478264093399, |
|
"learning_rate": 3.682338525148699e-07, |
|
"loss": 0.2416, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 1.7365269461077846, |
|
"grad_norm": 0.2338390350341797, |
|
"learning_rate": 3.6378494253099307e-07, |
|
"loss": 0.1934, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.7380239520958085, |
|
"grad_norm": 0.34419965744018555, |
|
"learning_rate": 3.5936205666797675e-07, |
|
"loss": 0.3123, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.7395209580838324, |
|
"grad_norm": 0.2365407645702362, |
|
"learning_rate": 3.549652197524783e-07, |
|
"loss": 0.3097, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.7410179640718564, |
|
"grad_norm": 0.36558160185813904, |
|
"learning_rate": 3.505944564649344e-07, |
|
"loss": 0.3637, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 1.7425149700598803, |
|
"grad_norm": 0.261536180973053, |
|
"learning_rate": 3.462497913394258e-07, |
|
"loss": 0.2309, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.7440119760479043, |
|
"grad_norm": 0.21218152344226837, |
|
"learning_rate": 3.419312487635362e-07, |
|
"loss": 0.2562, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 1.7455089820359282, |
|
"grad_norm": 0.29205673933029175, |
|
"learning_rate": 3.3763885297822153e-07, |
|
"loss": 0.4012, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7470059880239521, |
|
"grad_norm": 0.24798201024532318, |
|
"learning_rate": 3.333726280776656e-07, |
|
"loss": 0.2164, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 1.748502994011976, |
|
"grad_norm": 0.2931423485279083, |
|
"learning_rate": 3.2913259800915196e-07, |
|
"loss": 0.2651, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.16753946244716644, |
|
"learning_rate": 3.2491878657292643e-07, |
|
"loss": 0.2041, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 1.751497005988024, |
|
"grad_norm": 0.22982138395309448, |
|
"learning_rate": 3.2073121742206117e-07, |
|
"loss": 0.1714, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.7529940119760479, |
|
"grad_norm": 0.23351234197616577, |
|
"learning_rate": 3.165699140623285e-07, |
|
"loss": 0.2545, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.7544910179640718, |
|
"grad_norm": 0.37126511335372925, |
|
"learning_rate": 3.1243489985206097e-07, |
|
"loss": 0.3697, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.7559880239520957, |
|
"grad_norm": 0.23918622732162476, |
|
"learning_rate": 3.0832619800202746e-07, |
|
"loss": 0.2375, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 1.7574850299401197, |
|
"grad_norm": 0.25686293840408325, |
|
"learning_rate": 3.0424383157529716e-07, |
|
"loss": 0.2728, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.7589820359281436, |
|
"grad_norm": 0.2703104019165039, |
|
"learning_rate": 3.001878234871147e-07, |
|
"loss": 0.27, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 1.7604790419161676, |
|
"grad_norm": 0.20858930051326752, |
|
"learning_rate": 2.961581965047672e-07, |
|
"loss": 0.2294, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7619760479041915, |
|
"grad_norm": 0.2548951506614685, |
|
"learning_rate": 2.921549732474599e-07, |
|
"loss": 0.3003, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 1.7634730538922154, |
|
"grad_norm": 0.3152521252632141, |
|
"learning_rate": 2.8817817618618846e-07, |
|
"loss": 0.3112, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.7649700598802394, |
|
"grad_norm": 0.35076630115509033, |
|
"learning_rate": 2.842278276436128e-07, |
|
"loss": 0.2925, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 1.7664670658682635, |
|
"grad_norm": 0.3196788430213928, |
|
"learning_rate": 2.803039497939281e-07, |
|
"loss": 0.2372, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.7679640718562875, |
|
"grad_norm": 0.20753467082977295, |
|
"learning_rate": 2.7640656466274785e-07, |
|
"loss": 0.2861, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.7694610778443114, |
|
"grad_norm": 0.31822919845581055, |
|
"learning_rate": 2.7253569412697244e-07, |
|
"loss": 0.3193, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.7709580838323353, |
|
"grad_norm": 0.21876436471939087, |
|
"learning_rate": 2.686913599146723e-07, |
|
"loss": 0.2215, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 1.7724550898203593, |
|
"grad_norm": 0.23750582337379456, |
|
"learning_rate": 2.648735836049615e-07, |
|
"loss": 0.2909, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.7739520958083832, |
|
"grad_norm": 0.2619275450706482, |
|
"learning_rate": 2.6108238662788057e-07, |
|
"loss": 0.3952, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 1.7754491017964071, |
|
"grad_norm": 0.25464171171188354, |
|
"learning_rate": 2.573177902642726e-07, |
|
"loss": 0.2432, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7769461077844313, |
|
"grad_norm": 0.31740161776542664, |
|
"learning_rate": 2.5357981564566647e-07, |
|
"loss": 0.4162, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 1.7784431137724552, |
|
"grad_norm": 0.24966104328632355, |
|
"learning_rate": 2.4986848375415653e-07, |
|
"loss": 0.229, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.7799401197604792, |
|
"grad_norm": 0.39699026942253113, |
|
"learning_rate": 2.4618381542228565e-07, |
|
"loss": 0.4622, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 1.781437125748503, |
|
"grad_norm": 0.2294144630432129, |
|
"learning_rate": 2.4252583133292927e-07, |
|
"loss": 0.2457, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.782934131736527, |
|
"grad_norm": 0.1940593421459198, |
|
"learning_rate": 2.3889455201917655e-07, |
|
"loss": 0.2306, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.784431137724551, |
|
"grad_norm": 0.4276283085346222, |
|
"learning_rate": 2.3528999786421758e-07, |
|
"loss": 0.2661, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.785928143712575, |
|
"grad_norm": 0.2756398618221283, |
|
"learning_rate": 2.3171218910122695e-07, |
|
"loss": 0.2456, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 1.7874251497005988, |
|
"grad_norm": 0.22976361215114594, |
|
"learning_rate": 2.2816114581325377e-07, |
|
"loss": 0.2645, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.7889221556886228, |
|
"grad_norm": 0.2517649233341217, |
|
"learning_rate": 2.2463688793310345e-07, |
|
"loss": 0.3377, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 1.7904191616766467, |
|
"grad_norm": 0.1688610017299652, |
|
"learning_rate": 2.2113943524323167e-07, |
|
"loss": 0.1488, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.7919161676646707, |
|
"grad_norm": 0.2724323868751526, |
|
"learning_rate": 2.1766880737562833e-07, |
|
"loss": 0.252, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 1.7934131736526946, |
|
"grad_norm": 0.229396253824234, |
|
"learning_rate": 2.1422502381171163e-07, |
|
"loss": 0.1885, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.7949101796407185, |
|
"grad_norm": 0.276480108499527, |
|
"learning_rate": 2.1080810388221406e-07, |
|
"loss": 0.2015, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 1.7964071856287425, |
|
"grad_norm": 0.27435392141342163, |
|
"learning_rate": 2.0741806676707887e-07, |
|
"loss": 0.2593, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.7979041916167664, |
|
"grad_norm": 0.41313275694847107, |
|
"learning_rate": 2.0405493149534828e-07, |
|
"loss": 0.3106, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.7994011976047903, |
|
"grad_norm": 0.2007514089345932, |
|
"learning_rate": 2.007187169450603e-07, |
|
"loss": 0.2197, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.8008982035928143, |
|
"grad_norm": 0.2099570631980896, |
|
"learning_rate": 1.9740944184313882e-07, |
|
"loss": 0.1886, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 1.8023952095808382, |
|
"grad_norm": 0.22008772194385529, |
|
"learning_rate": 1.941271247652915e-07, |
|
"loss": 0.2284, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.8038922155688621, |
|
"grad_norm": 0.3029489815235138, |
|
"learning_rate": 1.908717841359048e-07, |
|
"loss": 0.3553, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 1.805389221556886, |
|
"grad_norm": 0.24110634624958038, |
|
"learning_rate": 1.8764343822793962e-07, |
|
"loss": 0.2374, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.80688622754491, |
|
"grad_norm": 0.2744743824005127, |
|
"learning_rate": 1.8444210516283035e-07, |
|
"loss": 0.2079, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 1.8083832335329342, |
|
"grad_norm": 0.23340438306331635, |
|
"learning_rate": 1.8126780291038037e-07, |
|
"loss": 0.2714, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.8098802395209581, |
|
"grad_norm": 0.30072230100631714, |
|
"learning_rate": 1.7812054928866617e-07, |
|
"loss": 0.2633, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 1.811377245508982, |
|
"grad_norm": 0.30632907152175903, |
|
"learning_rate": 1.7500036196392956e-07, |
|
"loss": 0.2352, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.812874251497006, |
|
"grad_norm": 0.23265916109085083, |
|
"learning_rate": 1.7190725845048827e-07, |
|
"loss": 0.2612, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.81437125748503, |
|
"grad_norm": 0.22589431703090668, |
|
"learning_rate": 1.688412561106284e-07, |
|
"loss": 0.2423, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.8158682634730539, |
|
"grad_norm": 0.24091245234012604, |
|
"learning_rate": 1.6580237215451378e-07, |
|
"loss": 0.2122, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 1.8173652694610778, |
|
"grad_norm": 0.3324667513370514, |
|
"learning_rate": 1.6279062364008446e-07, |
|
"loss": 0.4563, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.818862275449102, |
|
"grad_norm": 0.33297663927078247, |
|
"learning_rate": 1.5980602747296513e-07, |
|
"loss": 0.2925, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 1.8203592814371259, |
|
"grad_norm": 0.2548763155937195, |
|
"learning_rate": 1.5684860040636573e-07, |
|
"loss": 0.232, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.8218562874251498, |
|
"grad_norm": 0.2788844406604767, |
|
"learning_rate": 1.5391835904099316e-07, |
|
"loss": 0.2535, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 1.8233532934131738, |
|
"grad_norm": 0.3000269830226898, |
|
"learning_rate": 1.510153198249531e-07, |
|
"loss": 0.2428, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.8248502994011977, |
|
"grad_norm": 0.25358203053474426, |
|
"learning_rate": 1.4813949905365833e-07, |
|
"loss": 0.2758, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 1.8263473053892216, |
|
"grad_norm": 0.22794456779956818, |
|
"learning_rate": 1.4529091286973994e-07, |
|
"loss": 0.1812, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.8278443113772456, |
|
"grad_norm": 0.29520854353904724, |
|
"learning_rate": 1.424695772629553e-07, |
|
"loss": 0.2238, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.8293413173652695, |
|
"grad_norm": 0.23672978579998016, |
|
"learning_rate": 1.3967550807009677e-07, |
|
"loss": 0.2423, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.8308383233532934, |
|
"grad_norm": 0.1825004518032074, |
|
"learning_rate": 1.3690872097490481e-07, |
|
"loss": 0.2453, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 1.8323353293413174, |
|
"grad_norm": 0.22814802825450897, |
|
"learning_rate": 1.3416923150798123e-07, |
|
"loss": 0.2367, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.8338323353293413, |
|
"grad_norm": 0.23552152514457703, |
|
"learning_rate": 1.3145705504669592e-07, |
|
"loss": 0.2364, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 1.8353293413173652, |
|
"grad_norm": 0.2058655321598053, |
|
"learning_rate": 1.2877220681510927e-07, |
|
"loss": 0.1397, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8368263473053892, |
|
"grad_norm": 0.243401437997818, |
|
"learning_rate": 1.261147018838782e-07, |
|
"loss": 0.2931, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 1.8383233532934131, |
|
"grad_norm": 0.2716439366340637, |
|
"learning_rate": 1.2348455517017855e-07, |
|
"loss": 0.2844, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.839820359281437, |
|
"grad_norm": 0.349772185087204, |
|
"learning_rate": 1.208817814376162e-07, |
|
"loss": 0.312, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 1.841317365269461, |
|
"grad_norm": 0.21398629248142242, |
|
"learning_rate": 1.1830639529614774e-07, |
|
"loss": 0.1563, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.842814371257485, |
|
"grad_norm": 0.22315660119056702, |
|
"learning_rate": 1.157584112019966e-07, |
|
"loss": 0.2364, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.8443113772455089, |
|
"grad_norm": 0.27086567878723145, |
|
"learning_rate": 1.1323784345757205e-07, |
|
"loss": 0.3122, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.8458083832335328, |
|
"grad_norm": 0.32250702381134033, |
|
"learning_rate": 1.1074470621138866e-07, |
|
"loss": 0.2097, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 1.8473053892215567, |
|
"grad_norm": 0.40964534878730774, |
|
"learning_rate": 1.0827901345798919e-07, |
|
"loss": 0.1931, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.8488023952095807, |
|
"grad_norm": 0.26256802678108215, |
|
"learning_rate": 1.0584077903786238e-07, |
|
"loss": 0.3613, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 1.8502994011976048, |
|
"grad_norm": 0.3172236979007721, |
|
"learning_rate": 1.0343001663736918e-07, |
|
"loss": 0.2944, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8517964071856288, |
|
"grad_norm": 0.2462015599012375, |
|
"learning_rate": 1.0104673978866164e-07, |
|
"loss": 0.2264, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 1.8532934131736527, |
|
"grad_norm": 0.2893504798412323, |
|
"learning_rate": 9.869096186961025e-08, |
|
"loss": 0.3066, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.8547904191616766, |
|
"grad_norm": 0.33563166856765747, |
|
"learning_rate": 9.636269610372895e-08, |
|
"loss": 0.4775, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 1.8562874251497006, |
|
"grad_norm": 0.36685824394226074, |
|
"learning_rate": 9.406195556009745e-08, |
|
"loss": 0.3233, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.8577844311377245, |
|
"grad_norm": 0.35795897245407104, |
|
"learning_rate": 9.178875315329183e-08, |
|
"loss": 0.4117, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.8592814371257484, |
|
"grad_norm": 0.21380247175693512, |
|
"learning_rate": 8.954310164331015e-08, |
|
"loss": 0.263, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.8607784431137726, |
|
"grad_norm": 0.29042747616767883, |
|
"learning_rate": 8.732501363550028e-08, |
|
"loss": 0.2964, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 1.8622754491017965, |
|
"grad_norm": 0.3003634512424469, |
|
"learning_rate": 8.513450158049109e-08, |
|
"loss": 0.3321, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.8637724550898205, |
|
"grad_norm": 0.2523651719093323, |
|
"learning_rate": 8.29715777741208e-08, |
|
"loss": 0.2319, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 1.8652694610778444, |
|
"grad_norm": 0.30345892906188965, |
|
"learning_rate": 8.08362543573682e-08, |
|
"loss": 0.1789, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.8667664670658684, |
|
"grad_norm": 0.1946685016155243, |
|
"learning_rate": 7.872854331628599e-08, |
|
"loss": 0.2507, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 1.8682634730538923, |
|
"grad_norm": 0.28906092047691345, |
|
"learning_rate": 7.664845648193087e-08, |
|
"loss": 0.3652, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.8697604790419162, |
|
"grad_norm": 0.20177105069160461, |
|
"learning_rate": 7.459600553029966e-08, |
|
"loss": 0.2312, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 1.8712574850299402, |
|
"grad_norm": 0.3082532584667206, |
|
"learning_rate": 7.257120198226219e-08, |
|
"loss": 0.2132, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.872754491017964, |
|
"grad_norm": 0.25730371475219727, |
|
"learning_rate": 7.057405720349853e-08, |
|
"loss": 0.2497, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.874251497005988, |
|
"grad_norm": 0.21774837374687195, |
|
"learning_rate": 6.860458240443179e-08, |
|
"loss": 0.2716, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.875748502994012, |
|
"grad_norm": 0.2254839390516281, |
|
"learning_rate": 6.666278864016884e-08, |
|
"loss": 0.2995, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 1.877245508982036, |
|
"grad_norm": 0.23497092723846436, |
|
"learning_rate": 6.474868681043578e-08, |
|
"loss": 0.2641, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.8787425149700598, |
|
"grad_norm": 0.28167974948883057, |
|
"learning_rate": 6.286228765951807e-08, |
|
"loss": 0.3233, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 1.8802395209580838, |
|
"grad_norm": 0.29625046253204346, |
|
"learning_rate": 6.100360177619946e-08, |
|
"loss": 0.2986, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.8817365269461077, |
|
"grad_norm": 0.408067524433136, |
|
"learning_rate": 5.917263959370312e-08, |
|
"loss": 0.3761, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 1.8832335329341316, |
|
"grad_norm": 0.3008968234062195, |
|
"learning_rate": 5.736941138963281e-08, |
|
"loss": 0.3179, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.8847305389221556, |
|
"grad_norm": 0.188938170671463, |
|
"learning_rate": 5.559392728591517e-08, |
|
"loss": 0.1488, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 1.8862275449101795, |
|
"grad_norm": 0.3134278655052185, |
|
"learning_rate": 5.384619724874307e-08, |
|
"loss": 0.3308, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.8877245508982035, |
|
"grad_norm": 0.259612113237381, |
|
"learning_rate": 5.2126231088519e-08, |
|
"loss": 0.195, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.8892215568862274, |
|
"grad_norm": 0.22781744599342346, |
|
"learning_rate": 5.0434038459801213e-08, |
|
"loss": 0.2226, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.8907185628742516, |
|
"grad_norm": 0.1933296024799347, |
|
"learning_rate": 4.876962886124936e-08, |
|
"loss": 0.1725, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 1.8922155688622755, |
|
"grad_norm": 0.2313513308763504, |
|
"learning_rate": 4.713301163556894e-08, |
|
"loss": 0.2243, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.8937125748502994, |
|
"grad_norm": 0.2517513036727905, |
|
"learning_rate": 4.5524195969461895e-08, |
|
"loss": 0.3134, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 1.8952095808383234, |
|
"grad_norm": 0.2580326795578003, |
|
"learning_rate": 4.394319089357335e-08, |
|
"loss": 0.266, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.8967065868263473, |
|
"grad_norm": 0.2893156409263611, |
|
"learning_rate": 4.239000528244164e-08, |
|
"loss": 0.2997, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 1.8982035928143712, |
|
"grad_norm": 0.2750832736492157, |
|
"learning_rate": 4.086464785444777e-08, |
|
"loss": 0.2071, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.8997005988023952, |
|
"grad_norm": 0.38243815302848816, |
|
"learning_rate": 3.936712717176716e-08, |
|
"loss": 0.3132, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 1.9011976047904193, |
|
"grad_norm": 0.34103187918663025, |
|
"learning_rate": 3.7897451640321326e-08, |
|
"loss": 0.3574, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.9026946107784433, |
|
"grad_norm": 0.22488778829574585, |
|
"learning_rate": 3.645562950973014e-08, |
|
"loss": 0.2411, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.9041916167664672, |
|
"grad_norm": 0.24086658656597137, |
|
"learning_rate": 3.504166887326688e-08, |
|
"loss": 0.2503, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.9056886227544911, |
|
"grad_norm": 0.2731955945491791, |
|
"learning_rate": 3.365557766781047e-08, |
|
"loss": 0.2224, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 1.907185628742515, |
|
"grad_norm": 0.34961998462677, |
|
"learning_rate": 3.229736367380498e-08, |
|
"loss": 0.4016, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.908682634730539, |
|
"grad_norm": 0.18793629109859467, |
|
"learning_rate": 3.0967034515211323e-08, |
|
"loss": 0.1871, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 1.910179640718563, |
|
"grad_norm": 0.24469394981861115, |
|
"learning_rate": 2.966459765946672e-08, |
|
"loss": 0.3008, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.9116766467065869, |
|
"grad_norm": 0.2874278128147125, |
|
"learning_rate": 2.8390060417443632e-08, |
|
"loss": 0.3207, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 1.9131736526946108, |
|
"grad_norm": 0.3286723494529724, |
|
"learning_rate": 2.714342994340646e-08, |
|
"loss": 0.2332, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.9146706586826348, |
|
"grad_norm": 0.24583743512630463, |
|
"learning_rate": 2.592471323497381e-08, |
|
"loss": 0.2854, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 1.9161676646706587, |
|
"grad_norm": 0.37019965052604675, |
|
"learning_rate": 2.4733917133077378e-08, |
|
"loss": 0.3804, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.9176646706586826, |
|
"grad_norm": 0.242394357919693, |
|
"learning_rate": 2.3571048321923694e-08, |
|
"loss": 0.1835, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.9191616766467066, |
|
"grad_norm": 0.27056631445884705, |
|
"learning_rate": 2.2436113328958565e-08, |
|
"loss": 0.3272, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.9206586826347305, |
|
"grad_norm": 0.2912746071815491, |
|
"learning_rate": 2.1329118524827662e-08, |
|
"loss": 0.3609, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 1.9221556886227544, |
|
"grad_norm": 0.267817884683609, |
|
"learning_rate": 2.0250070123342124e-08, |
|
"loss": 0.3424, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.9236526946107784, |
|
"grad_norm": 0.250742644071579, |
|
"learning_rate": 1.9198974181444675e-08, |
|
"loss": 0.237, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 1.9251497005988023, |
|
"grad_norm": 0.281894713640213, |
|
"learning_rate": 1.8175836599173545e-08, |
|
"loss": 0.2917, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9266467065868262, |
|
"grad_norm": 0.20631025731563568, |
|
"learning_rate": 1.7180663119630846e-08, |
|
"loss": 0.195, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 1.9281437125748502, |
|
"grad_norm": 0.2538008689880371, |
|
"learning_rate": 1.6213459328950355e-08, |
|
"loss": 0.2873, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.9296407185628741, |
|
"grad_norm": 0.26172444224357605, |
|
"learning_rate": 1.5274230656263656e-08, |
|
"loss": 0.1752, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 1.931137724550898, |
|
"grad_norm": 0.3013996183872223, |
|
"learning_rate": 1.4362982373675171e-08, |
|
"loss": 0.3118, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.9326347305389222, |
|
"grad_norm": 0.29984888434410095, |
|
"learning_rate": 1.347971959622496e-08, |
|
"loss": 0.3903, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.9341317365269461, |
|
"grad_norm": 0.3161168098449707, |
|
"learning_rate": 1.2624447281867625e-08, |
|
"loss": 0.2794, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.93562874251497, |
|
"grad_norm": 0.20692503452301025, |
|
"learning_rate": 1.1797170231439004e-08, |
|
"loss": 0.2728, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 1.937125748502994, |
|
"grad_norm": 0.2914261221885681, |
|
"learning_rate": 1.0997893088632306e-08, |
|
"loss": 0.2823, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.938622754491018, |
|
"grad_norm": 0.2750595808029175, |
|
"learning_rate": 1.0226620339969795e-08, |
|
"loss": 0.2931, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 1.9401197604790419, |
|
"grad_norm": 0.28438398241996765, |
|
"learning_rate": 9.48335631477948e-09, |
|
"loss": 0.2434, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9416167664670658, |
|
"grad_norm": 0.6122963428497314, |
|
"learning_rate": 8.768105185170683e-09, |
|
"loss": 0.3048, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 1.94311377245509, |
|
"grad_norm": 0.25166431069374084, |
|
"learning_rate": 8.080870966008513e-09, |
|
"loss": 0.3405, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.944610778443114, |
|
"grad_norm": 0.1666012555360794, |
|
"learning_rate": 7.421657514893321e-09, |
|
"loss": 0.1841, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.9461077844311379, |
|
"grad_norm": 0.2463662028312683, |
|
"learning_rate": 6.79046853214016e-09, |
|
"loss": 0.2044, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.9476047904191618, |
|
"grad_norm": 0.21893510222434998, |
|
"learning_rate": 6.187307560754363e-09, |
|
"loss": 0.3512, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.9491017964071857, |
|
"grad_norm": 0.3041020333766937, |
|
"learning_rate": 5.612177986414891e-09, |
|
"loss": 0.2655, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.9505988023952097, |
|
"grad_norm": 0.2836143672466278, |
|
"learning_rate": 5.065083037453234e-09, |
|
"loss": 0.503, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 1.9520958083832336, |
|
"grad_norm": 0.2756477892398834, |
|
"learning_rate": 4.546025784837316e-09, |
|
"loss": 0.2959, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.9535928143712575, |
|
"grad_norm": 0.23872525990009308, |
|
"learning_rate": 4.055009142152066e-09, |
|
"loss": 0.2107, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 1.9550898203592815, |
|
"grad_norm": 0.33056017756462097, |
|
"learning_rate": 3.5920358655844312e-09, |
|
"loss": 0.2467, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9565868263473054, |
|
"grad_norm": 0.27123406529426575, |
|
"learning_rate": 3.1571085539089384e-09, |
|
"loss": 0.2182, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 1.9580838323353293, |
|
"grad_norm": 0.2840729355812073, |
|
"learning_rate": 2.7502296484699374e-09, |
|
"loss": 0.327, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.9595808383233533, |
|
"grad_norm": 0.3229871690273285, |
|
"learning_rate": 2.371401433170495e-09, |
|
"loss": 0.3329, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 1.9610778443113772, |
|
"grad_norm": 0.23311461508274078, |
|
"learning_rate": 2.0206260344590724e-09, |
|
"loss": 0.1865, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.9625748502994012, |
|
"grad_norm": 0.20590674877166748, |
|
"learning_rate": 1.6979054213173141e-09, |
|
"loss": 0.2345, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.964071856287425, |
|
"grad_norm": 0.2407190203666687, |
|
"learning_rate": 1.4032414052478348e-09, |
|
"loss": 0.2488, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.965568862275449, |
|
"grad_norm": 0.27954548597335815, |
|
"learning_rate": 1.136635640267003e-09, |
|
"loss": 0.3113, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 1.967065868263473, |
|
"grad_norm": 0.2925473153591156, |
|
"learning_rate": 8.980896228932834e-10, |
|
"loss": 0.3184, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.968562874251497, |
|
"grad_norm": 0.23581089079380035, |
|
"learning_rate": 6.876046921389102e-10, |
|
"loss": 0.2076, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 1.9700598802395208, |
|
"grad_norm": 0.21226383745670319, |
|
"learning_rate": 5.051820295032262e-10, |
|
"loss": 0.2688, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.9715568862275448, |
|
"grad_norm": 0.2070101648569107, |
|
"learning_rate": 3.508226589660213e-10, |
|
"loss": 0.2151, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 1.9730538922155687, |
|
"grad_norm": 0.19046247005462646, |
|
"learning_rate": 2.2452744698087114e-10, |
|
"loss": 0.2351, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.9745508982035929, |
|
"grad_norm": 0.21798643469810486, |
|
"learning_rate": 1.2629710247180626e-10, |
|
"loss": 0.1911, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 1.9760479041916168, |
|
"grad_norm": 0.2827717959880829, |
|
"learning_rate": 5.613217682720606e-11, |
|
"loss": 0.2928, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 1.9775449101796407, |
|
"grad_norm": 0.259986937046051, |
|
"learning_rate": 1.4033063899243637e-11, |
|
"loss": 0.2227, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.9790419161676647, |
|
"grad_norm": 0.2563554644584656, |
|
"learning_rate": 0.0, |
|
"loss": 0.2769, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.9790419161676647, |
|
"eval_loss": 0.27016571164131165, |
|
"eval_runtime": 95.929, |
|
"eval_samples_per_second": 7.672, |
|
"eval_steps_per_second": 0.959, |
|
"step": 1336 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1336, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 334, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.390317365432418e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|