|
{ |
|
"best_metric": 0.01777876727283001, |
|
"best_model_checkpoint": "autotrain-vp92t-1q2id/checkpoint-20391", |
|
"epoch": 2.9998896612600685, |
|
"eval_steps": 500, |
|
"global_step": 20391, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003677957997719666, |
|
"grad_norm": 4.279318809509277, |
|
"learning_rate": 3.6764705882352943e-07, |
|
"loss": 0.685, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007355915995439332, |
|
"grad_norm": 5.647529602050781, |
|
"learning_rate": 7.352941176470589e-07, |
|
"loss": 0.6757, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.011033873993158997, |
|
"grad_norm": 5.691235065460205, |
|
"learning_rate": 1.1029411764705884e-06, |
|
"loss": 0.6611, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014711831990878664, |
|
"grad_norm": 3.6039223670959473, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 0.6402, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.018389789988598332, |
|
"grad_norm": 5.447757244110107, |
|
"learning_rate": 1.8382352941176471e-06, |
|
"loss": 0.5745, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.022067747986317995, |
|
"grad_norm": 11.862848281860352, |
|
"learning_rate": 2.2058823529411767e-06, |
|
"loss": 0.4869, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02574570598403766, |
|
"grad_norm": 8.085945129394531, |
|
"learning_rate": 2.573529411764706e-06, |
|
"loss": 0.3687, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.029423663981757327, |
|
"grad_norm": 7.246406555175781, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.3029, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.033101621979477, |
|
"grad_norm": 17.822601318359375, |
|
"learning_rate": 3.308823529411765e-06, |
|
"loss": 0.2098, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.036779579977196664, |
|
"grad_norm": 21.39044952392578, |
|
"learning_rate": 3.6764705882352942e-06, |
|
"loss": 0.1483, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04045753797491632, |
|
"grad_norm": 1.4089692831039429, |
|
"learning_rate": 4.044117647058823e-06, |
|
"loss": 0.0803, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.04413549597263599, |
|
"grad_norm": 0.7371423840522766, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.0396, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.047813453970355656, |
|
"grad_norm": 0.22398647665977478, |
|
"learning_rate": 4.779411764705882e-06, |
|
"loss": 0.0381, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.05149141196807532, |
|
"grad_norm": 0.25332173705101013, |
|
"learning_rate": 5.147058823529412e-06, |
|
"loss": 0.0306, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05516936996579499, |
|
"grad_norm": 0.24578100442886353, |
|
"learning_rate": 5.5147058823529415e-06, |
|
"loss": 0.0342, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.058847327963514655, |
|
"grad_norm": 0.09213005006313324, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.0094, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06252528596123433, |
|
"grad_norm": 11.745342254638672, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0627, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.066203243958954, |
|
"grad_norm": 12.147088050842285, |
|
"learning_rate": 6.61764705882353e-06, |
|
"loss": 0.0755, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.06988120195667366, |
|
"grad_norm": 0.14864382147789001, |
|
"learning_rate": 6.985294117647059e-06, |
|
"loss": 0.0532, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.07355915995439333, |
|
"grad_norm": 0.06755024939775467, |
|
"learning_rate": 7.3529411764705884e-06, |
|
"loss": 0.049, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.077237117952113, |
|
"grad_norm": 0.0582403726875782, |
|
"learning_rate": 7.720588235294117e-06, |
|
"loss": 0.0213, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.08091507594983265, |
|
"grad_norm": 0.04456046596169472, |
|
"learning_rate": 8.088235294117646e-06, |
|
"loss": 0.0446, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.08459303394755231, |
|
"grad_norm": 0.058339089155197144, |
|
"learning_rate": 8.455882352941177e-06, |
|
"loss": 0.0442, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.08827099194527198, |
|
"grad_norm": 0.03951073810458183, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.0131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09194894994299165, |
|
"grad_norm": 0.08159155398607254, |
|
"learning_rate": 9.191176470588236e-06, |
|
"loss": 0.0429, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.09562690794071131, |
|
"grad_norm": 0.0362938717007637, |
|
"learning_rate": 9.558823529411764e-06, |
|
"loss": 0.0336, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.09930486593843098, |
|
"grad_norm": 0.10369551926851273, |
|
"learning_rate": 9.926470588235293e-06, |
|
"loss": 0.0277, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.10298282393615064, |
|
"grad_norm": 0.032908402383327484, |
|
"learning_rate": 1.0294117647058824e-05, |
|
"loss": 0.0213, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10666078193387031, |
|
"grad_norm": 0.017092719674110413, |
|
"learning_rate": 1.0661764705882354e-05, |
|
"loss": 0.0784, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.11033873993158998, |
|
"grad_norm": 0.015081583522260189, |
|
"learning_rate": 1.1029411764705883e-05, |
|
"loss": 0.0265, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11401669792930964, |
|
"grad_norm": 0.09147176891565323, |
|
"learning_rate": 1.139705882352941e-05, |
|
"loss": 0.0381, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.11769465592702931, |
|
"grad_norm": 0.08481771498918533, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.128, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12137261392474898, |
|
"grad_norm": 0.014765892177820206, |
|
"learning_rate": 1.2132352941176471e-05, |
|
"loss": 0.0141, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.12505057192246866, |
|
"grad_norm": 0.018918083980679512, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0538, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.12872852992018832, |
|
"grad_norm": 0.015013976022601128, |
|
"learning_rate": 1.2867647058823528e-05, |
|
"loss": 0.0019, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.132406487917908, |
|
"grad_norm": 11.109874725341797, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.0394, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.13608444591562766, |
|
"grad_norm": 0.015857884660363197, |
|
"learning_rate": 1.3602941176470589e-05, |
|
"loss": 0.0005, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.13976240391334732, |
|
"grad_norm": 0.015862109139561653, |
|
"learning_rate": 1.3970588235294118e-05, |
|
"loss": 0.0214, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.143440361911067, |
|
"grad_norm": 0.020424969494342804, |
|
"learning_rate": 1.4338235294117647e-05, |
|
"loss": 0.0367, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.14711831990878665, |
|
"grad_norm": 0.027131319046020508, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.08, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15079627790650632, |
|
"grad_norm": 0.03147580847144127, |
|
"learning_rate": 1.5073529411764706e-05, |
|
"loss": 0.0329, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.154474235904226, |
|
"grad_norm": 24.15705680847168, |
|
"learning_rate": 1.5441176470588234e-05, |
|
"loss": 0.0064, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.15815219390194565, |
|
"grad_norm": 0.011522412300109863, |
|
"learning_rate": 1.5808823529411767e-05, |
|
"loss": 0.0762, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.1618301518996653, |
|
"grad_norm": 0.04401927441358566, |
|
"learning_rate": 1.6176470588235293e-05, |
|
"loss": 0.0656, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.16550810989738496, |
|
"grad_norm": 0.034190475940704346, |
|
"learning_rate": 1.6544117647058825e-05, |
|
"loss": 0.0308, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.16918606789510462, |
|
"grad_norm": 0.021350180730223656, |
|
"learning_rate": 1.6911764705882355e-05, |
|
"loss": 0.0539, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1728640258928243, |
|
"grad_norm": 0.0446242094039917, |
|
"learning_rate": 1.727941176470588e-05, |
|
"loss": 0.0681, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.17654198389054396, |
|
"grad_norm": 1.6311242580413818, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.0293, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18021994188826362, |
|
"grad_norm": 0.00914335809648037, |
|
"learning_rate": 1.801470588235294e-05, |
|
"loss": 0.0386, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.1838978998859833, |
|
"grad_norm": 0.009417989291250706, |
|
"learning_rate": 1.8382352941176472e-05, |
|
"loss": 0.0004, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.18757585788370296, |
|
"grad_norm": 0.9801831245422363, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.057, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.19125381588142262, |
|
"grad_norm": 0.005170044023543596, |
|
"learning_rate": 1.9117647058823528e-05, |
|
"loss": 0.0002, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1949317738791423, |
|
"grad_norm": 0.03409629687666893, |
|
"learning_rate": 1.948529411764706e-05, |
|
"loss": 0.0394, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.19860973187686196, |
|
"grad_norm": 0.008645136840641499, |
|
"learning_rate": 1.9852941176470586e-05, |
|
"loss": 0.0053, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.20228768987458162, |
|
"grad_norm": 0.004454991314560175, |
|
"learning_rate": 2.022058823529412e-05, |
|
"loss": 0.0002, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.2059656478723013, |
|
"grad_norm": 0.00453265942633152, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.0216, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.20964360587002095, |
|
"grad_norm": 0.005607594270259142, |
|
"learning_rate": 2.0955882352941175e-05, |
|
"loss": 0.0206, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.21332156386774062, |
|
"grad_norm": 0.002988673048093915, |
|
"learning_rate": 2.1323529411764707e-05, |
|
"loss": 0.0002, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2169995218654603, |
|
"grad_norm": 0.15066058933734894, |
|
"learning_rate": 2.1691176470588237e-05, |
|
"loss": 0.0002, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.22067747986317995, |
|
"grad_norm": 0.2750360071659088, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 0.0002, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22435543786089962, |
|
"grad_norm": 0.00299286050722003, |
|
"learning_rate": 2.2426470588235296e-05, |
|
"loss": 0.0004, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.2280333958586193, |
|
"grad_norm": 0.004124614410102367, |
|
"learning_rate": 2.279411764705882e-05, |
|
"loss": 0.0001, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.23171135385633895, |
|
"grad_norm": 0.5924888849258423, |
|
"learning_rate": 2.3161764705882354e-05, |
|
"loss": 0.1041, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.23538931185405862, |
|
"grad_norm": 0.011225424706935883, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.0528, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.23906726985177829, |
|
"grad_norm": 0.008940880186855793, |
|
"learning_rate": 2.389705882352941e-05, |
|
"loss": 0.0972, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.24274522784949795, |
|
"grad_norm": 0.008405734784901142, |
|
"learning_rate": 2.4264705882352942e-05, |
|
"loss": 0.0495, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.24642318584721762, |
|
"grad_norm": 0.008656616322696209, |
|
"learning_rate": 2.4632352941176472e-05, |
|
"loss": 0.0216, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.2501011438449373, |
|
"grad_norm": 0.02361353114247322, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0385, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.25377910184265695, |
|
"grad_norm": 0.05051364749670029, |
|
"learning_rate": 2.536764705882353e-05, |
|
"loss": 0.0814, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.25745705984037665, |
|
"grad_norm": 0.00914950855076313, |
|
"learning_rate": 2.5735294117647057e-05, |
|
"loss": 0.0006, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2611350178380963, |
|
"grad_norm": 0.008635000325739384, |
|
"learning_rate": 2.610294117647059e-05, |
|
"loss": 0.0003, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.264812975835816, |
|
"grad_norm": 0.005725966300815344, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.0002, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2684909338335356, |
|
"grad_norm": 0.014133188873529434, |
|
"learning_rate": 2.6838235294117648e-05, |
|
"loss": 0.0562, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.2721688918312553, |
|
"grad_norm": 0.0024135392159223557, |
|
"learning_rate": 2.7205882352941177e-05, |
|
"loss": 0.003, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.27584684982897495, |
|
"grad_norm": 0.03300013393163681, |
|
"learning_rate": 2.7573529411764707e-05, |
|
"loss": 0.0947, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.27952480782669464, |
|
"grad_norm": 0.046893417835235596, |
|
"learning_rate": 2.7941176470588236e-05, |
|
"loss": 0.0613, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2832027658244143, |
|
"grad_norm": 0.009986027143895626, |
|
"learning_rate": 2.8308823529411766e-05, |
|
"loss": 0.0616, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.286880723822134, |
|
"grad_norm": 0.05905308201909065, |
|
"learning_rate": 2.8676470588235295e-05, |
|
"loss": 0.1182, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.2905586818198536, |
|
"grad_norm": 0.00858025811612606, |
|
"learning_rate": 2.9044117647058824e-05, |
|
"loss": 0.0007, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.2942366398175733, |
|
"grad_norm": 95.36400604248047, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.0869, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29791459781529295, |
|
"grad_norm": 0.005693793762475252, |
|
"learning_rate": 2.9779411764705883e-05, |
|
"loss": 0.0348, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.30159255581301264, |
|
"grad_norm": 0.009921176359057426, |
|
"learning_rate": 2.998365211705084e-05, |
|
"loss": 0.0514, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3052705138107323, |
|
"grad_norm": 0.01893715187907219, |
|
"learning_rate": 2.9942782409677946e-05, |
|
"loss": 0.0575, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.308948471808452, |
|
"grad_norm": 0.012356853112578392, |
|
"learning_rate": 2.990191270230505e-05, |
|
"loss": 0.0085, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3126264298061716, |
|
"grad_norm": 0.027572082355618477, |
|
"learning_rate": 2.9861042994932156e-05, |
|
"loss": 0.0071, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.3163043878038913, |
|
"grad_norm": 0.0033696063328534365, |
|
"learning_rate": 2.9820173287559262e-05, |
|
"loss": 0.0153, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.31998234580161095, |
|
"grad_norm": 0.0032816240563988686, |
|
"learning_rate": 2.9779303580186367e-05, |
|
"loss": 0.0425, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.3236603037993306, |
|
"grad_norm": 0.013268062844872475, |
|
"learning_rate": 2.9738433872813472e-05, |
|
"loss": 0.0529, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3273382617970503, |
|
"grad_norm": 13.58910083770752, |
|
"learning_rate": 2.9697564165440574e-05, |
|
"loss": 0.1147, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.3310162197947699, |
|
"grad_norm": 80.59501647949219, |
|
"learning_rate": 2.965669445806768e-05, |
|
"loss": 0.0043, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3346941777924896, |
|
"grad_norm": 81.63990020751953, |
|
"learning_rate": 2.9615824750694785e-05, |
|
"loss": 0.0355, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.33837213579020925, |
|
"grad_norm": 0.014708608388900757, |
|
"learning_rate": 2.957495504332189e-05, |
|
"loss": 0.0668, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.34205009378792894, |
|
"grad_norm": 0.015443817712366581, |
|
"learning_rate": 2.9534085335948996e-05, |
|
"loss": 0.0006, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.3457280517856486, |
|
"grad_norm": 0.005841911304742098, |
|
"learning_rate": 2.94932156285761e-05, |
|
"loss": 0.0487, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3494060097833683, |
|
"grad_norm": 0.06746743619441986, |
|
"learning_rate": 2.9452345921203207e-05, |
|
"loss": 0.0229, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.3530839677810879, |
|
"grad_norm": 0.002315863035619259, |
|
"learning_rate": 2.941147621383031e-05, |
|
"loss": 0.0003, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3567619257788076, |
|
"grad_norm": 0.0017154604429379106, |
|
"learning_rate": 2.9370606506457414e-05, |
|
"loss": 0.0308, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.36043988377652725, |
|
"grad_norm": 0.009691163897514343, |
|
"learning_rate": 2.932973679908452e-05, |
|
"loss": 0.0256, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.36411784177424694, |
|
"grad_norm": 0.011957678943872452, |
|
"learning_rate": 2.9288867091711625e-05, |
|
"loss": 0.0713, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.3677957997719666, |
|
"grad_norm": 0.02373524010181427, |
|
"learning_rate": 2.924799738433873e-05, |
|
"loss": 0.0504, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3714737577696863, |
|
"grad_norm": 0.0031693673226982355, |
|
"learning_rate": 2.9207127676965836e-05, |
|
"loss": 0.0251, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.3751517157674059, |
|
"grad_norm": 0.025995498523116112, |
|
"learning_rate": 2.916625796959294e-05, |
|
"loss": 0.0459, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3788296737651256, |
|
"grad_norm": 0.006027763709425926, |
|
"learning_rate": 2.9125388262220043e-05, |
|
"loss": 0.0013, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.38250763176284525, |
|
"grad_norm": 0.014026220887899399, |
|
"learning_rate": 2.9084518554847148e-05, |
|
"loss": 0.0831, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.38618558976056494, |
|
"grad_norm": 0.025293108075857162, |
|
"learning_rate": 2.9043648847474254e-05, |
|
"loss": 0.0275, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.3898635477582846, |
|
"grad_norm": 0.0041050901636481285, |
|
"learning_rate": 2.900277914010136e-05, |
|
"loss": 0.0007, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.3935415057560043, |
|
"grad_norm": 0.027650628238916397, |
|
"learning_rate": 2.8961909432728464e-05, |
|
"loss": 0.0002, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.3972194637537239, |
|
"grad_norm": 0.06839253753423691, |
|
"learning_rate": 2.8921039725355566e-05, |
|
"loss": 0.0353, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4008974217514436, |
|
"grad_norm": 0.005996390245854855, |
|
"learning_rate": 2.8880170017982672e-05, |
|
"loss": 0.0134, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.40457537974916324, |
|
"grad_norm": 0.0020017025526612997, |
|
"learning_rate": 2.8839300310609774e-05, |
|
"loss": 0.0002, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.40825333774688294, |
|
"grad_norm": 0.04533281922340393, |
|
"learning_rate": 2.879843060323688e-05, |
|
"loss": 0.0367, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.4119312957446026, |
|
"grad_norm": 0.005575124174356461, |
|
"learning_rate": 2.8757560895863984e-05, |
|
"loss": 0.064, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.41560925374232227, |
|
"grad_norm": 0.0019691460765898228, |
|
"learning_rate": 2.871669118849109e-05, |
|
"loss": 0.0005, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.4192872117400419, |
|
"grad_norm": 0.004097863100469112, |
|
"learning_rate": 2.8675821481118195e-05, |
|
"loss": 0.0557, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.4229651697377616, |
|
"grad_norm": 0.0018194678705185652, |
|
"learning_rate": 2.86349517737453e-05, |
|
"loss": 0.0009, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.42664312773548124, |
|
"grad_norm": 0.006679282058030367, |
|
"learning_rate": 2.8594082066372406e-05, |
|
"loss": 0.0882, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.43032108573320094, |
|
"grad_norm": 0.0030163535848259926, |
|
"learning_rate": 2.8553212358999508e-05, |
|
"loss": 0.0005, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.4339990437309206, |
|
"grad_norm": 0.2035808265209198, |
|
"learning_rate": 2.8512342651626613e-05, |
|
"loss": 0.0007, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.43767700172864027, |
|
"grad_norm": 0.022791976109147072, |
|
"learning_rate": 2.847147294425372e-05, |
|
"loss": 0.0941, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.4413549597263599, |
|
"grad_norm": 0.007299873046576977, |
|
"learning_rate": 2.8430603236880824e-05, |
|
"loss": 0.0511, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4450329177240796, |
|
"grad_norm": 0.003951882012188435, |
|
"learning_rate": 2.838973352950793e-05, |
|
"loss": 0.0096, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.44871087572179924, |
|
"grad_norm": 0.009184204041957855, |
|
"learning_rate": 2.8348863822135035e-05, |
|
"loss": 0.0372, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.45238883371951893, |
|
"grad_norm": 0.032343972474336624, |
|
"learning_rate": 2.830799411476214e-05, |
|
"loss": 0.0197, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.4560667917172386, |
|
"grad_norm": 0.002111822599545121, |
|
"learning_rate": 2.8267124407389246e-05, |
|
"loss": 0.0004, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.45974474971495827, |
|
"grad_norm": 0.0024695848114788532, |
|
"learning_rate": 2.8226254700016348e-05, |
|
"loss": 0.0757, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.4634227077126779, |
|
"grad_norm": 0.001880451338365674, |
|
"learning_rate": 2.8185384992643453e-05, |
|
"loss": 0.0181, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.4671006657103976, |
|
"grad_norm": 0.02519827149808407, |
|
"learning_rate": 2.814451528527056e-05, |
|
"loss": 0.0006, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.47077862370811724, |
|
"grad_norm": 0.0016058700857684016, |
|
"learning_rate": 2.8103645577897664e-05, |
|
"loss": 0.0088, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.47445658170583693, |
|
"grad_norm": 0.02888200432062149, |
|
"learning_rate": 2.806277587052477e-05, |
|
"loss": 0.0496, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.47813453970355657, |
|
"grad_norm": 0.0747719258069992, |
|
"learning_rate": 2.8021906163151874e-05, |
|
"loss": 0.1338, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.48181249770127627, |
|
"grad_norm": 4.114503860473633, |
|
"learning_rate": 2.798103645577898e-05, |
|
"loss": 0.063, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.4854904556989959, |
|
"grad_norm": 0.10994021594524384, |
|
"learning_rate": 2.7940166748406082e-05, |
|
"loss": 0.0525, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.4891684136967156, |
|
"grad_norm": 0.016218269243836403, |
|
"learning_rate": 2.7899297041033187e-05, |
|
"loss": 0.0547, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.49284637169443524, |
|
"grad_norm": 0.08570988476276398, |
|
"learning_rate": 2.7858427333660293e-05, |
|
"loss": 0.0551, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.49652432969215493, |
|
"grad_norm": 0.013475511223077774, |
|
"learning_rate": 2.7817557626287398e-05, |
|
"loss": 0.0445, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.5002022876898746, |
|
"grad_norm": 0.006590835750102997, |
|
"learning_rate": 2.7776687918914503e-05, |
|
"loss": 0.0005, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5038802456875943, |
|
"grad_norm": 0.004501729272305965, |
|
"learning_rate": 2.773581821154161e-05, |
|
"loss": 0.0219, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.5075582036853139, |
|
"grad_norm": 0.06999096274375916, |
|
"learning_rate": 2.769494850416871e-05, |
|
"loss": 0.0569, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.5112361616830335, |
|
"grad_norm": 0.003883685451000929, |
|
"learning_rate": 2.7654078796795813e-05, |
|
"loss": 0.001, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.5149141196807533, |
|
"grad_norm": 0.0029312793631106615, |
|
"learning_rate": 2.7613209089422918e-05, |
|
"loss": 0.0229, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5185920776784729, |
|
"grad_norm": 0.006315870210528374, |
|
"learning_rate": 2.7572339382050023e-05, |
|
"loss": 0.0731, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.5222700356761926, |
|
"grad_norm": 0.0030722382944077253, |
|
"learning_rate": 2.753146967467713e-05, |
|
"loss": 0.0392, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5259479936739122, |
|
"grad_norm": 0.005796592216938734, |
|
"learning_rate": 2.7490599967304234e-05, |
|
"loss": 0.0139, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.529625951671632, |
|
"grad_norm": 0.6967291831970215, |
|
"learning_rate": 2.744973025993134e-05, |
|
"loss": 0.0325, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5333039096693516, |
|
"grad_norm": 0.017705194652080536, |
|
"learning_rate": 2.7408860552558445e-05, |
|
"loss": 0.0433, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.5369818676670712, |
|
"grad_norm": 0.020230021327733994, |
|
"learning_rate": 2.7367990845185547e-05, |
|
"loss": 0.0007, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5406598256647909, |
|
"grad_norm": 0.0023030710872262716, |
|
"learning_rate": 2.7327121137812652e-05, |
|
"loss": 0.0002, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.5443377836625106, |
|
"grad_norm": 0.0020703673362731934, |
|
"learning_rate": 2.7286251430439758e-05, |
|
"loss": 0.0002, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5480157416602303, |
|
"grad_norm": 0.002691243775188923, |
|
"learning_rate": 2.7245381723066863e-05, |
|
"loss": 0.0005, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.5516936996579499, |
|
"grad_norm": 0.002691768342629075, |
|
"learning_rate": 2.720451201569397e-05, |
|
"loss": 0.0957, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.5553716576556695, |
|
"grad_norm": 0.05962933972477913, |
|
"learning_rate": 2.7163642308321074e-05, |
|
"loss": 0.1317, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.5590496156533893, |
|
"grad_norm": 0.06475093215703964, |
|
"learning_rate": 2.712277260094818e-05, |
|
"loss": 0.0257, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5627275736511089, |
|
"grad_norm": 0.0121241370216012, |
|
"learning_rate": 2.708190289357528e-05, |
|
"loss": 0.0609, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.5664055316488286, |
|
"grad_norm": 0.007753327488899231, |
|
"learning_rate": 2.7041033186202387e-05, |
|
"loss": 0.0008, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.5700834896465482, |
|
"grad_norm": 0.005270315799862146, |
|
"learning_rate": 2.7000163478829492e-05, |
|
"loss": 0.0002, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.573761447644268, |
|
"grad_norm": 0.004358434583991766, |
|
"learning_rate": 2.6959293771456597e-05, |
|
"loss": 0.0174, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.5774394056419876, |
|
"grad_norm": 0.003769191913306713, |
|
"learning_rate": 2.6918424064083703e-05, |
|
"loss": 0.0513, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.5811173636397072, |
|
"grad_norm": 0.0043784258887171745, |
|
"learning_rate": 2.6877554356710808e-05, |
|
"loss": 0.0185, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.5847953216374269, |
|
"grad_norm": 0.004602793138474226, |
|
"learning_rate": 2.6836684649337913e-05, |
|
"loss": 0.0236, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.5884732796351466, |
|
"grad_norm": 0.002638947917148471, |
|
"learning_rate": 2.679581494196502e-05, |
|
"loss": 0.0155, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5921512376328663, |
|
"grad_norm": 0.002830574056133628, |
|
"learning_rate": 2.675494523459212e-05, |
|
"loss": 0.0161, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.5958291956305859, |
|
"grad_norm": 0.015412558801472187, |
|
"learning_rate": 2.6714075527219226e-05, |
|
"loss": 0.131, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.5995071536283055, |
|
"grad_norm": 0.016349300742149353, |
|
"learning_rate": 2.667320581984633e-05, |
|
"loss": 0.0132, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.6031851116260253, |
|
"grad_norm": 0.013236075639724731, |
|
"learning_rate": 2.6632336112473437e-05, |
|
"loss": 0.0005, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.6068630696237449, |
|
"grad_norm": 0.007088659331202507, |
|
"learning_rate": 2.6591466405100542e-05, |
|
"loss": 0.0004, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.6105410276214646, |
|
"grad_norm": 0.02121301181614399, |
|
"learning_rate": 2.6550596697727648e-05, |
|
"loss": 0.0397, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.6142189856191842, |
|
"grad_norm": 0.030070917680859566, |
|
"learning_rate": 2.650972699035475e-05, |
|
"loss": 0.0754, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.617896943616904, |
|
"grad_norm": 10.173595428466797, |
|
"learning_rate": 2.646885728298185e-05, |
|
"loss": 0.0336, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6215749016146236, |
|
"grad_norm": 0.014447388239204884, |
|
"learning_rate": 2.6427987575608957e-05, |
|
"loss": 0.003, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.6252528596123432, |
|
"grad_norm": 0.012096612714231014, |
|
"learning_rate": 2.6387117868236062e-05, |
|
"loss": 0.0481, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.6289308176100629, |
|
"grad_norm": 0.02047719806432724, |
|
"learning_rate": 2.6346248160863168e-05, |
|
"loss": 0.051, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.6326087756077826, |
|
"grad_norm": 0.01152089238166809, |
|
"learning_rate": 2.6305378453490273e-05, |
|
"loss": 0.011, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.6362867336055023, |
|
"grad_norm": 0.01178329810500145, |
|
"learning_rate": 2.626450874611738e-05, |
|
"loss": 0.0187, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.6399646916032219, |
|
"grad_norm": 0.012962247245013714, |
|
"learning_rate": 2.6223639038744484e-05, |
|
"loss": 0.041, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.6436426496009415, |
|
"grad_norm": 0.012993029318749905, |
|
"learning_rate": 2.6182769331371586e-05, |
|
"loss": 0.03, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.6473206075986612, |
|
"grad_norm": 0.01311455201357603, |
|
"learning_rate": 2.614189962399869e-05, |
|
"loss": 0.0421, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6509985655963809, |
|
"grad_norm": 0.022407829761505127, |
|
"learning_rate": 2.6101029916625797e-05, |
|
"loss": 0.0312, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.6546765235941006, |
|
"grad_norm": 0.007614122703671455, |
|
"learning_rate": 2.6060160209252902e-05, |
|
"loss": 0.0014, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.6583544815918202, |
|
"grad_norm": 0.006891134660691023, |
|
"learning_rate": 2.6019290501880007e-05, |
|
"loss": 0.0966, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.6620324395895398, |
|
"grad_norm": 0.026897389441728592, |
|
"learning_rate": 2.5978420794507113e-05, |
|
"loss": 0.0387, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6657103975872596, |
|
"grad_norm": 0.013364088721573353, |
|
"learning_rate": 2.5937551087134218e-05, |
|
"loss": 0.0007, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.6693883555849792, |
|
"grad_norm": 0.006984102539718151, |
|
"learning_rate": 2.589668137976132e-05, |
|
"loss": 0.0008, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.6730663135826989, |
|
"grad_norm": 0.005882107652723789, |
|
"learning_rate": 2.5855811672388425e-05, |
|
"loss": 0.0003, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.6767442715804185, |
|
"grad_norm": 0.008882598020136356, |
|
"learning_rate": 2.581494196501553e-05, |
|
"loss": 0.0389, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6804222295781382, |
|
"grad_norm": 0.01086785364896059, |
|
"learning_rate": 2.5774072257642636e-05, |
|
"loss": 0.0305, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.6841001875758579, |
|
"grad_norm": 0.005837304517626762, |
|
"learning_rate": 2.573320255026974e-05, |
|
"loss": 0.0277, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.6877781455735775, |
|
"grad_norm": 0.006613869220018387, |
|
"learning_rate": 2.5692332842896847e-05, |
|
"loss": 0.0003, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.6914561035712972, |
|
"grad_norm": 0.012274155393242836, |
|
"learning_rate": 2.5651463135523952e-05, |
|
"loss": 0.0383, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.6951340615690169, |
|
"grad_norm": 0.0031378071289509535, |
|
"learning_rate": 2.5610593428151054e-05, |
|
"loss": 0.0065, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.6988120195667366, |
|
"grad_norm": 0.12304351478815079, |
|
"learning_rate": 2.556972372077816e-05, |
|
"loss": 0.0103, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.7024899775644562, |
|
"grad_norm": 0.005349988583475351, |
|
"learning_rate": 2.5528854013405265e-05, |
|
"loss": 0.0292, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.7061679355621758, |
|
"grad_norm": 0.0023686892818659544, |
|
"learning_rate": 2.548798430603237e-05, |
|
"loss": 0.0169, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7098458935598956, |
|
"grad_norm": 0.0018137163715437055, |
|
"learning_rate": 2.5447114598659476e-05, |
|
"loss": 0.0444, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.7135238515576152, |
|
"grad_norm": 0.0029049592558294535, |
|
"learning_rate": 2.540624489128658e-05, |
|
"loss": 0.0591, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.7172018095553349, |
|
"grad_norm": 0.0024209930561482906, |
|
"learning_rate": 2.5365375183913687e-05, |
|
"loss": 0.0311, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.7208797675530545, |
|
"grad_norm": 62.02742385864258, |
|
"learning_rate": 2.532450547654079e-05, |
|
"loss": 0.0324, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.7245577255507742, |
|
"grad_norm": 0.002258418360725045, |
|
"learning_rate": 2.528363576916789e-05, |
|
"loss": 0.0003, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.7282356835484939, |
|
"grad_norm": 0.001804179628379643, |
|
"learning_rate": 2.5242766061794996e-05, |
|
"loss": 0.0499, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.7319136415462135, |
|
"grad_norm": 0.33615773916244507, |
|
"learning_rate": 2.52018963544221e-05, |
|
"loss": 0.0005, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.7355915995439332, |
|
"grad_norm": 0.0010956133482977748, |
|
"learning_rate": 2.5161026647049207e-05, |
|
"loss": 0.0008, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7392695575416529, |
|
"grad_norm": 0.0012902173912152648, |
|
"learning_rate": 2.5120156939676312e-05, |
|
"loss": 0.0273, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.7429475155393725, |
|
"grad_norm": 0.013881388120353222, |
|
"learning_rate": 2.5079287232303417e-05, |
|
"loss": 0.0488, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.7466254735370922, |
|
"grad_norm": 0.011136908084154129, |
|
"learning_rate": 2.5038417524930523e-05, |
|
"loss": 0.0003, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.7503034315348118, |
|
"grad_norm": 0.020626788958907127, |
|
"learning_rate": 2.4997547817557625e-05, |
|
"loss": 0.0621, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.7539813895325316, |
|
"grad_norm": 0.039804015308618546, |
|
"learning_rate": 2.495667811018473e-05, |
|
"loss": 0.0941, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.7576593475302512, |
|
"grad_norm": 0.019914086908102036, |
|
"learning_rate": 2.4915808402811835e-05, |
|
"loss": 0.0021, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.7613373055279709, |
|
"grad_norm": 0.027103891596198082, |
|
"learning_rate": 2.487493869543894e-05, |
|
"loss": 0.0375, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.7650152635256905, |
|
"grad_norm": 0.008572924882173538, |
|
"learning_rate": 2.4834068988066046e-05, |
|
"loss": 0.0007, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.7686932215234102, |
|
"grad_norm": 0.011288322508335114, |
|
"learning_rate": 2.479319928069315e-05, |
|
"loss": 0.0339, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.7723711795211299, |
|
"grad_norm": 11.5412015914917, |
|
"learning_rate": 2.4752329573320257e-05, |
|
"loss": 0.0414, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.7760491375188495, |
|
"grad_norm": 0.016787946224212646, |
|
"learning_rate": 2.471145986594736e-05, |
|
"loss": 0.0817, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.7797270955165692, |
|
"grad_norm": 0.3828181326389313, |
|
"learning_rate": 2.4670590158574464e-05, |
|
"loss": 0.0013, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.7834050535142889, |
|
"grad_norm": 0.00423394562676549, |
|
"learning_rate": 2.462972045120157e-05, |
|
"loss": 0.0009, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.7870830115120085, |
|
"grad_norm": 0.0038542733527719975, |
|
"learning_rate": 2.4588850743828675e-05, |
|
"loss": 0.0006, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.7907609695097282, |
|
"grad_norm": 0.002444320358335972, |
|
"learning_rate": 2.454798103645578e-05, |
|
"loss": 0.0208, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.7944389275074478, |
|
"grad_norm": 0.06983044743537903, |
|
"learning_rate": 2.4507111329082886e-05, |
|
"loss": 0.0611, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7981168855051676, |
|
"grad_norm": 0.0033168047666549683, |
|
"learning_rate": 2.446624162170999e-05, |
|
"loss": 0.0016, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.8017948435028872, |
|
"grad_norm": 0.0031268312595784664, |
|
"learning_rate": 2.4425371914337093e-05, |
|
"loss": 0.0119, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.8054728015006068, |
|
"grad_norm": 0.0019544719252735376, |
|
"learning_rate": 2.43845022069642e-05, |
|
"loss": 0.0337, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.8091507594983265, |
|
"grad_norm": 0.017085539177060127, |
|
"learning_rate": 2.4343632499591304e-05, |
|
"loss": 0.0776, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8128287174960462, |
|
"grad_norm": 0.916741669178009, |
|
"learning_rate": 2.430276279221841e-05, |
|
"loss": 0.0009, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.8165066754937659, |
|
"grad_norm": 0.0018967619398608804, |
|
"learning_rate": 2.4261893084845515e-05, |
|
"loss": 0.0005, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.8201846334914855, |
|
"grad_norm": 0.002946459921076894, |
|
"learning_rate": 2.422102337747262e-05, |
|
"loss": 0.052, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.8238625914892052, |
|
"grad_norm": 0.00457314308732748, |
|
"learning_rate": 2.4180153670099725e-05, |
|
"loss": 0.0386, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8275405494869249, |
|
"grad_norm": 0.00886601209640503, |
|
"learning_rate": 2.4139283962726827e-05, |
|
"loss": 0.0003, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.8312185074846445, |
|
"grad_norm": 0.004110053181648254, |
|
"learning_rate": 2.409841425535393e-05, |
|
"loss": 0.0002, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.8348964654823642, |
|
"grad_norm": 0.002550973556935787, |
|
"learning_rate": 2.4057544547981035e-05, |
|
"loss": 0.0893, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.8385744234800838, |
|
"grad_norm": 0.0047971270978450775, |
|
"learning_rate": 2.401667484060814e-05, |
|
"loss": 0.0649, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.8422523814778036, |
|
"grad_norm": 22.79808235168457, |
|
"learning_rate": 2.3975805133235246e-05, |
|
"loss": 0.0509, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.8459303394755232, |
|
"grad_norm": 0.02145661599934101, |
|
"learning_rate": 2.393493542586235e-05, |
|
"loss": 0.0456, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.8496082974732428, |
|
"grad_norm": 0.8593617081642151, |
|
"learning_rate": 2.3894065718489456e-05, |
|
"loss": 0.0727, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.8532862554709625, |
|
"grad_norm": 0.004426372237503529, |
|
"learning_rate": 2.385319601111656e-05, |
|
"loss": 0.0004, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.8569642134686822, |
|
"grad_norm": 0.0030661604832857847, |
|
"learning_rate": 2.3812326303743664e-05, |
|
"loss": 0.0001, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.8606421714664019, |
|
"grad_norm": 0.0062530264258384705, |
|
"learning_rate": 2.377145659637077e-05, |
|
"loss": 0.044, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.8643201294641215, |
|
"grad_norm": 0.023851774632930756, |
|
"learning_rate": 2.3730586888997874e-05, |
|
"loss": 0.0412, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.8679980874618412, |
|
"grad_norm": 0.013338697142899036, |
|
"learning_rate": 2.368971718162498e-05, |
|
"loss": 0.0006, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.8716760454595609, |
|
"grad_norm": 0.01904129609465599, |
|
"learning_rate": 2.3648847474252085e-05, |
|
"loss": 0.0726, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.8753540034572805, |
|
"grad_norm": 0.010262302123010159, |
|
"learning_rate": 2.360797776687919e-05, |
|
"loss": 0.0087, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.8790319614550002, |
|
"grad_norm": 0.006104280706495047, |
|
"learning_rate": 2.3567108059506296e-05, |
|
"loss": 0.0004, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.8827099194527198, |
|
"grad_norm": 0.019870450720191002, |
|
"learning_rate": 2.3526238352133398e-05, |
|
"loss": 0.0795, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8863878774504396, |
|
"grad_norm": 0.021579677239060402, |
|
"learning_rate": 2.3485368644760503e-05, |
|
"loss": 0.0009, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.8900658354481592, |
|
"grad_norm": 0.007828918285667896, |
|
"learning_rate": 2.344449893738761e-05, |
|
"loss": 0.0017, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.8937437934458788, |
|
"grad_norm": 0.006341638043522835, |
|
"learning_rate": 2.3403629230014714e-05, |
|
"loss": 0.0198, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.8974217514435985, |
|
"grad_norm": 0.004665954038500786, |
|
"learning_rate": 2.336275952264182e-05, |
|
"loss": 0.0002, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9010997094413182, |
|
"grad_norm": 0.0059740557335317135, |
|
"learning_rate": 2.3321889815268925e-05, |
|
"loss": 0.0398, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.9047776674390379, |
|
"grad_norm": 0.09372496604919434, |
|
"learning_rate": 2.328102010789603e-05, |
|
"loss": 0.0596, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.9084556254367575, |
|
"grad_norm": 0.06878636032342911, |
|
"learning_rate": 2.3240150400523132e-05, |
|
"loss": 0.0858, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.9121335834344771, |
|
"grad_norm": 5.581681728363037, |
|
"learning_rate": 2.3199280693150238e-05, |
|
"loss": 0.0728, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.9158115414321969, |
|
"grad_norm": 0.017690079286694527, |
|
"learning_rate": 2.3158410985777343e-05, |
|
"loss": 0.0109, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.9194894994299165, |
|
"grad_norm": 0.009789933450520039, |
|
"learning_rate": 2.3117541278404448e-05, |
|
"loss": 0.003, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.9231674574276362, |
|
"grad_norm": 0.007185524329543114, |
|
"learning_rate": 2.3076671571031554e-05, |
|
"loss": 0.0003, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.9268454154253558, |
|
"grad_norm": 0.29879918694496155, |
|
"learning_rate": 2.303580186365866e-05, |
|
"loss": 0.0004, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.9305233734230756, |
|
"grad_norm": 0.005276743322610855, |
|
"learning_rate": 2.2994932156285764e-05, |
|
"loss": 0.0209, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.9342013314207952, |
|
"grad_norm": 4.756071090698242, |
|
"learning_rate": 2.2954062448912866e-05, |
|
"loss": 0.0696, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.9378792894185148, |
|
"grad_norm": 0.005177750252187252, |
|
"learning_rate": 2.291319274153997e-05, |
|
"loss": 0.0294, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.9415572474162345, |
|
"grad_norm": 0.005691983737051487, |
|
"learning_rate": 2.2872323034167074e-05, |
|
"loss": 0.0102, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.9452352054139542, |
|
"grad_norm": 0.012254934757947922, |
|
"learning_rate": 2.283145332679418e-05, |
|
"loss": 0.0204, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.9489131634116739, |
|
"grad_norm": 0.007204866968095303, |
|
"learning_rate": 2.2790583619421284e-05, |
|
"loss": 0.001, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.9525911214093935, |
|
"grad_norm": 0.0022422156762331724, |
|
"learning_rate": 2.274971391204839e-05, |
|
"loss": 0.0074, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.9562690794071131, |
|
"grad_norm": 0.0029815786983817816, |
|
"learning_rate": 2.2708844204675495e-05, |
|
"loss": 0.0001, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9599470374048328, |
|
"grad_norm": 0.0027428902685642242, |
|
"learning_rate": 2.26679744973026e-05, |
|
"loss": 0.0534, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.9636249954025525, |
|
"grad_norm": 0.0038738884031772614, |
|
"learning_rate": 2.2627104789929703e-05, |
|
"loss": 0.0167, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.9673029534002722, |
|
"grad_norm": 0.002053373260423541, |
|
"learning_rate": 2.2586235082556808e-05, |
|
"loss": 0.0119, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.9709809113979918, |
|
"grad_norm": 0.015416144393384457, |
|
"learning_rate": 2.2545365375183913e-05, |
|
"loss": 0.0436, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.9746588693957114, |
|
"grad_norm": 0.028199590742588043, |
|
"learning_rate": 2.250449566781102e-05, |
|
"loss": 0.06, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.9783368273934312, |
|
"grad_norm": 0.00808124803006649, |
|
"learning_rate": 2.2463625960438124e-05, |
|
"loss": 0.0082, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.9820147853911508, |
|
"grad_norm": 0.896677553653717, |
|
"learning_rate": 2.242275625306523e-05, |
|
"loss": 0.0004, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.9856927433888705, |
|
"grad_norm": 0.014748472720384598, |
|
"learning_rate": 2.2381886545692335e-05, |
|
"loss": 0.0554, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.9893707013865901, |
|
"grad_norm": 0.08279622346162796, |
|
"learning_rate": 2.2341016838319437e-05, |
|
"loss": 0.0727, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.9930486593843099, |
|
"grad_norm": 0.0343361496925354, |
|
"learning_rate": 2.2300147130946542e-05, |
|
"loss": 0.0653, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.9967266173820295, |
|
"grad_norm": 0.01778659224510193, |
|
"learning_rate": 2.2259277423573648e-05, |
|
"loss": 0.0468, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.9999632204200228, |
|
"eval_accuracy": 0.9960279514527399, |
|
"eval_auc": 0.9999026317054973, |
|
"eval_f1": 0.9960253201825409, |
|
"eval_loss": 0.020395906642079353, |
|
"eval_precision": 0.9967589864466706, |
|
"eval_recall": 0.9952927331568108, |
|
"eval_runtime": 2488.2544, |
|
"eval_samples_per_second": 5.464, |
|
"eval_steps_per_second": 1.366, |
|
"step": 6797 |
|
}, |
|
{ |
|
"epoch": 1.0004045753797493, |
|
"grad_norm": 0.038309529423713684, |
|
"learning_rate": 2.2218407716200753e-05, |
|
"loss": 0.0867, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.0040825333774688, |
|
"grad_norm": 0.03099379874765873, |
|
"learning_rate": 2.217753800882786e-05, |
|
"loss": 0.0251, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.0077604913751885, |
|
"grad_norm": 0.014889312908053398, |
|
"learning_rate": 2.2136668301454964e-05, |
|
"loss": 0.0007, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.011438449372908, |
|
"grad_norm": 0.011484134942293167, |
|
"learning_rate": 2.209579859408207e-05, |
|
"loss": 0.0042, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.0151164073706278, |
|
"grad_norm": 0.008166844956576824, |
|
"learning_rate": 2.205492888670917e-05, |
|
"loss": 0.0003, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.0187943653683476, |
|
"grad_norm": 0.006568376440554857, |
|
"learning_rate": 2.2014059179336276e-05, |
|
"loss": 0.0003, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.022472323366067, |
|
"grad_norm": 0.0057509117759764194, |
|
"learning_rate": 2.1973189471963382e-05, |
|
"loss": 0.0084, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.0261502813637868, |
|
"grad_norm": 0.004868589341640472, |
|
"learning_rate": 2.1932319764590487e-05, |
|
"loss": 0.0043, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.0298282393615066, |
|
"grad_norm": 0.004712184425443411, |
|
"learning_rate": 2.1891450057217593e-05, |
|
"loss": 0.0029, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.033506197359226, |
|
"grad_norm": 0.0035947624128311872, |
|
"learning_rate": 2.1850580349844698e-05, |
|
"loss": 0.0051, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.0371841553569459, |
|
"grad_norm": 0.0033714687451720238, |
|
"learning_rate": 2.1809710642471803e-05, |
|
"loss": 0.0377, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.0408621133546654, |
|
"grad_norm": 12.332621574401855, |
|
"learning_rate": 2.1768840935098905e-05, |
|
"loss": 0.0061, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.0445400713523851, |
|
"grad_norm": 0.002749204868450761, |
|
"learning_rate": 2.172797122772601e-05, |
|
"loss": 0.0003, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.0482180293501049, |
|
"grad_norm": 0.0026924049016088247, |
|
"learning_rate": 2.1687101520353113e-05, |
|
"loss": 0.0001, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.0518959873478244, |
|
"grad_norm": 0.006290792487561703, |
|
"learning_rate": 2.1646231812980218e-05, |
|
"loss": 0.0443, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.0555739453455442, |
|
"grad_norm": 0.0048763868398964405, |
|
"learning_rate": 2.1605362105607323e-05, |
|
"loss": 0.0002, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.059251903343264, |
|
"grad_norm": 0.003825924126431346, |
|
"learning_rate": 2.156449239823443e-05, |
|
"loss": 0.0002, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.0629298613409834, |
|
"grad_norm": 0.0068919663317501545, |
|
"learning_rate": 2.1523622690861534e-05, |
|
"loss": 0.0001, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.0666078193387032, |
|
"grad_norm": 0.0029492308385670185, |
|
"learning_rate": 2.1482752983488636e-05, |
|
"loss": 0.0001, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.0702857773364227, |
|
"grad_norm": 0.0031761634163558483, |
|
"learning_rate": 2.144188327611574e-05, |
|
"loss": 0.0001, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.0739637353341425, |
|
"grad_norm": 0.004821736365556717, |
|
"learning_rate": 2.1401013568742847e-05, |
|
"loss": 0.0373, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.0776416933318622, |
|
"grad_norm": 0.003594837849959731, |
|
"learning_rate": 2.1360143861369952e-05, |
|
"loss": 0.0004, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.0813196513295817, |
|
"grad_norm": 0.004811630584299564, |
|
"learning_rate": 2.1319274153997058e-05, |
|
"loss": 0.0001, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.0849976093273015, |
|
"grad_norm": 0.006440363824367523, |
|
"learning_rate": 2.1278404446624163e-05, |
|
"loss": 0.0453, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.0886755673250212, |
|
"grad_norm": 0.007900132797658443, |
|
"learning_rate": 2.123753473925127e-05, |
|
"loss": 0.0003, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.0923535253227408, |
|
"grad_norm": 0.00898217223584652, |
|
"learning_rate": 2.1196665031878374e-05, |
|
"loss": 0.0811, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.0960314833204605, |
|
"grad_norm": 0.031215157359838486, |
|
"learning_rate": 2.1155795324505476e-05, |
|
"loss": 0.035, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.09970944131818, |
|
"grad_norm": 0.022409003227949142, |
|
"learning_rate": 2.111492561713258e-05, |
|
"loss": 0.0014, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.1033873993158998, |
|
"grad_norm": 0.0137456264346838, |
|
"learning_rate": 2.1074055909759686e-05, |
|
"loss": 0.0006, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.1070653573136195, |
|
"grad_norm": 0.006075088866055012, |
|
"learning_rate": 2.1033186202386792e-05, |
|
"loss": 0.0005, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.110743315311339, |
|
"grad_norm": 0.007382239680737257, |
|
"learning_rate": 2.0992316495013897e-05, |
|
"loss": 0.0003, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.1144212733090588, |
|
"grad_norm": 0.016082163900136948, |
|
"learning_rate": 2.0951446787641003e-05, |
|
"loss": 0.0469, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.1180992313067786, |
|
"grad_norm": 0.02028113603591919, |
|
"learning_rate": 2.0910577080268108e-05, |
|
"loss": 0.0398, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.121777189304498, |
|
"grad_norm": 0.014643259346485138, |
|
"learning_rate": 2.086970737289521e-05, |
|
"loss": 0.0007, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.1254551473022179, |
|
"grad_norm": 0.010461482219398022, |
|
"learning_rate": 2.0828837665522315e-05, |
|
"loss": 0.0004, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.1291331052999374, |
|
"grad_norm": 0.009396770037710667, |
|
"learning_rate": 2.078796795814942e-05, |
|
"loss": 0.0004, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.1328110632976571, |
|
"grad_norm": 0.007909806445240974, |
|
"learning_rate": 2.0747098250776526e-05, |
|
"loss": 0.016, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.1364890212953769, |
|
"grad_norm": 0.006153750233352184, |
|
"learning_rate": 2.070622854340363e-05, |
|
"loss": 0.0055, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.1401669792930964, |
|
"grad_norm": 0.006996823474764824, |
|
"learning_rate": 2.0665358836030737e-05, |
|
"loss": 0.0002, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.1438449372908162, |
|
"grad_norm": 0.006032935809344053, |
|
"learning_rate": 2.0624489128657842e-05, |
|
"loss": 0.0331, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.1475228952885357, |
|
"grad_norm": 0.003607578342780471, |
|
"learning_rate": 2.0583619421284944e-05, |
|
"loss": 0.0002, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.1512008532862554, |
|
"grad_norm": 0.004726866725832224, |
|
"learning_rate": 2.054274971391205e-05, |
|
"loss": 0.0002, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.1548788112839752, |
|
"grad_norm": 0.004033273551613092, |
|
"learning_rate": 2.050188000653915e-05, |
|
"loss": 0.0001, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.1585567692816947, |
|
"grad_norm": 0.0035559283569455147, |
|
"learning_rate": 2.0461010299166257e-05, |
|
"loss": 0.0001, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.1622347272794145, |
|
"grad_norm": 0.002765959594398737, |
|
"learning_rate": 2.0420140591793362e-05, |
|
"loss": 0.0001, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.1659126852771342, |
|
"grad_norm": 0.003123935777693987, |
|
"learning_rate": 2.0379270884420468e-05, |
|
"loss": 0.0001, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.1695906432748537, |
|
"grad_norm": 0.0030226910021156073, |
|
"learning_rate": 2.0338401177047573e-05, |
|
"loss": 0.0443, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.1732686012725735, |
|
"grad_norm": 0.002675386844202876, |
|
"learning_rate": 2.0297531469674675e-05, |
|
"loss": 0.0001, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 1.1769465592702932, |
|
"grad_norm": 0.002876314101740718, |
|
"learning_rate": 2.025666176230178e-05, |
|
"loss": 0.0001, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1806245172680128, |
|
"grad_norm": 0.003930400125682354, |
|
"learning_rate": 2.0215792054928886e-05, |
|
"loss": 0.0463, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 1.1843024752657325, |
|
"grad_norm": 0.004908836912363768, |
|
"learning_rate": 2.017492234755599e-05, |
|
"loss": 0.0002, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.187980433263452, |
|
"grad_norm": 0.005489639472216368, |
|
"learning_rate": 2.0134052640183097e-05, |
|
"loss": 0.0014, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 1.1916583912611718, |
|
"grad_norm": 0.0054463837295770645, |
|
"learning_rate": 2.0093182932810202e-05, |
|
"loss": 0.0418, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.1953363492588915, |
|
"grad_norm": 0.004771388601511717, |
|
"learning_rate": 2.0052313225437307e-05, |
|
"loss": 0.0002, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 1.199014307256611, |
|
"grad_norm": 0.004579597618430853, |
|
"learning_rate": 2.001144351806441e-05, |
|
"loss": 0.0002, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.2026922652543308, |
|
"grad_norm": 0.005399708636105061, |
|
"learning_rate": 1.9970573810691515e-05, |
|
"loss": 0.0002, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 1.2063702232520503, |
|
"grad_norm": 0.0028218550141900778, |
|
"learning_rate": 1.992970410331862e-05, |
|
"loss": 0.0001, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.21004818124977, |
|
"grad_norm": 0.0270390622317791, |
|
"learning_rate": 1.9888834395945725e-05, |
|
"loss": 0.1464, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 1.2137261392474898, |
|
"grad_norm": 0.007817487232387066, |
|
"learning_rate": 1.984796468857283e-05, |
|
"loss": 0.0005, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.2174040972452094, |
|
"grad_norm": 0.009673170745372772, |
|
"learning_rate": 1.9807094981199936e-05, |
|
"loss": 0.0003, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 1.2210820552429291, |
|
"grad_norm": 0.006883264984935522, |
|
"learning_rate": 1.976622527382704e-05, |
|
"loss": 0.0364, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.2247600132406489, |
|
"grad_norm": 0.038729436695575714, |
|
"learning_rate": 1.9725355566454147e-05, |
|
"loss": 0.0002, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 1.2284379712383684, |
|
"grad_norm": 0.004570882301777601, |
|
"learning_rate": 1.968448585908125e-05, |
|
"loss": 0.0002, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.2321159292360881, |
|
"grad_norm": 0.010231226682662964, |
|
"learning_rate": 1.9643616151708354e-05, |
|
"loss": 0.0463, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 1.235793887233808, |
|
"grad_norm": 0.008044122718274593, |
|
"learning_rate": 1.960274644433546e-05, |
|
"loss": 0.0003, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.2394718452315274, |
|
"grad_norm": 0.005202152766287327, |
|
"learning_rate": 1.9561876736962565e-05, |
|
"loss": 0.0391, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 1.2431498032292472, |
|
"grad_norm": 0.0054007298313081264, |
|
"learning_rate": 1.952100702958967e-05, |
|
"loss": 0.0182, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.2468277612269667, |
|
"grad_norm": 0.005195588804781437, |
|
"learning_rate": 1.9480137322216776e-05, |
|
"loss": 0.0392, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 1.2505057192246865, |
|
"grad_norm": 0.00451032817363739, |
|
"learning_rate": 1.943926761484388e-05, |
|
"loss": 0.0002, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.2541836772224062, |
|
"grad_norm": 0.00390147278085351, |
|
"learning_rate": 1.9398397907470983e-05, |
|
"loss": 0.0002, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 1.2578616352201257, |
|
"grad_norm": 0.0030624952632933855, |
|
"learning_rate": 1.935752820009809e-05, |
|
"loss": 0.0001, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.2615395932178455, |
|
"grad_norm": 0.0030448674224317074, |
|
"learning_rate": 1.931665849272519e-05, |
|
"loss": 0.0001, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 1.265217551215565, |
|
"grad_norm": 0.003369387937709689, |
|
"learning_rate": 1.9275788785352296e-05, |
|
"loss": 0.0001, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.2688955092132848, |
|
"grad_norm": 0.0026294661220163107, |
|
"learning_rate": 1.92349190779794e-05, |
|
"loss": 0.0001, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 1.2725734672110045, |
|
"grad_norm": 0.002674271585419774, |
|
"learning_rate": 1.9194049370606507e-05, |
|
"loss": 0.0001, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.276251425208724, |
|
"grad_norm": 0.016562707722187042, |
|
"learning_rate": 1.9153179663233612e-05, |
|
"loss": 0.0001, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 1.2799293832064438, |
|
"grad_norm": 0.002845450770109892, |
|
"learning_rate": 1.9112309955860714e-05, |
|
"loss": 0.0376, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.2836073412041635, |
|
"grad_norm": 0.002954358235001564, |
|
"learning_rate": 1.907144024848782e-05, |
|
"loss": 0.0001, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 1.287285299201883, |
|
"grad_norm": 0.002028050599619746, |
|
"learning_rate": 1.9030570541114925e-05, |
|
"loss": 0.0047, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.2909632571996028, |
|
"grad_norm": 0.002608607057482004, |
|
"learning_rate": 1.898970083374203e-05, |
|
"loss": 0.0001, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 1.2946412151973226, |
|
"grad_norm": 0.0024424525909125805, |
|
"learning_rate": 1.8948831126369135e-05, |
|
"loss": 0.0001, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.298319173195042, |
|
"grad_norm": 0.001993270590901375, |
|
"learning_rate": 1.890796141899624e-05, |
|
"loss": 0.0001, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 1.3019971311927618, |
|
"grad_norm": 0.009992810897529125, |
|
"learning_rate": 1.8867091711623346e-05, |
|
"loss": 0.0001, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.3056750891904814, |
|
"grad_norm": 0.003959705121815205, |
|
"learning_rate": 1.8826222004250448e-05, |
|
"loss": 0.0336, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.3093530471882011, |
|
"grad_norm": 0.002648918190971017, |
|
"learning_rate": 1.8785352296877554e-05, |
|
"loss": 0.0002, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.3130310051859206, |
|
"grad_norm": 0.001997936749830842, |
|
"learning_rate": 1.874448258950466e-05, |
|
"loss": 0.0001, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 1.3167089631836404, |
|
"grad_norm": 0.0019702455028891563, |
|
"learning_rate": 1.8703612882131764e-05, |
|
"loss": 0.0001, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.3203869211813601, |
|
"grad_norm": 0.0019666815642267466, |
|
"learning_rate": 1.866274317475887e-05, |
|
"loss": 0.015, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 1.3240648791790797, |
|
"grad_norm": 0.016209330409765244, |
|
"learning_rate": 1.8621873467385975e-05, |
|
"loss": 0.0499, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3277428371767994, |
|
"grad_norm": 0.002770668361335993, |
|
"learning_rate": 1.858100376001308e-05, |
|
"loss": 0.0001, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 1.3314207951745192, |
|
"grad_norm": 0.0025566229596734047, |
|
"learning_rate": 1.8540134052640182e-05, |
|
"loss": 0.0429, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.3350987531722387, |
|
"grad_norm": 0.00490075396373868, |
|
"learning_rate": 1.8499264345267288e-05, |
|
"loss": 0.0391, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 1.3387767111699584, |
|
"grad_norm": 0.002448379760608077, |
|
"learning_rate": 1.8458394637894393e-05, |
|
"loss": 0.0002, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.3424546691676782, |
|
"grad_norm": 0.0027882566209882498, |
|
"learning_rate": 1.84175249305215e-05, |
|
"loss": 0.0001, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 1.3461326271653977, |
|
"grad_norm": 0.0021890706848353148, |
|
"learning_rate": 1.8376655223148604e-05, |
|
"loss": 0.0001, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.3498105851631175, |
|
"grad_norm": 0.002767590805888176, |
|
"learning_rate": 1.833578551577571e-05, |
|
"loss": 0.0001, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 1.3534885431608372, |
|
"grad_norm": 0.0018375491490587592, |
|
"learning_rate": 1.8294915808402815e-05, |
|
"loss": 0.0003, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.3571665011585567, |
|
"grad_norm": 0.0020680581219494343, |
|
"learning_rate": 1.825404610102992e-05, |
|
"loss": 0.0001, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 1.3608444591562765, |
|
"grad_norm": 0.001452911994419992, |
|
"learning_rate": 1.8213176393657022e-05, |
|
"loss": 0.0001, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.364522417153996, |
|
"grad_norm": 0.011856326833367348, |
|
"learning_rate": 1.8172306686284127e-05, |
|
"loss": 0.0498, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 1.3682003751517158, |
|
"grad_norm": 0.005070924758911133, |
|
"learning_rate": 1.813143697891123e-05, |
|
"loss": 0.0003, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.3718783331494353, |
|
"grad_norm": 0.003941578324884176, |
|
"learning_rate": 1.8090567271538335e-05, |
|
"loss": 0.0001, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 1.375556291147155, |
|
"grad_norm": 0.0044369762763381, |
|
"learning_rate": 1.804969756416544e-05, |
|
"loss": 0.0395, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.3792342491448748, |
|
"grad_norm": 0.003973621409386396, |
|
"learning_rate": 1.8008827856792546e-05, |
|
"loss": 0.0002, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 1.3829122071425943, |
|
"grad_norm": 0.00455184280872345, |
|
"learning_rate": 1.796795814941965e-05, |
|
"loss": 0.0001, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.386590165140314, |
|
"grad_norm": 0.0031091428827494383, |
|
"learning_rate": 1.7927088442046753e-05, |
|
"loss": 0.0001, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 1.3902681231380338, |
|
"grad_norm": 0.0024325144477188587, |
|
"learning_rate": 1.7886218734673858e-05, |
|
"loss": 0.0001, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.3939460811357534, |
|
"grad_norm": 0.0036399061791598797, |
|
"learning_rate": 1.7845349027300964e-05, |
|
"loss": 0.0001, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 1.397624039133473, |
|
"grad_norm": 0.0023723021149635315, |
|
"learning_rate": 1.780447931992807e-05, |
|
"loss": 0.0001, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.4013019971311929, |
|
"grad_norm": 0.0027509965002536774, |
|
"learning_rate": 1.7763609612555174e-05, |
|
"loss": 0.0001, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 1.4049799551289124, |
|
"grad_norm": 0.0033826676663011312, |
|
"learning_rate": 1.772273990518228e-05, |
|
"loss": 0.0001, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.4086579131266321, |
|
"grad_norm": 0.011138912290334702, |
|
"learning_rate": 1.7681870197809385e-05, |
|
"loss": 0.0398, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 1.4123358711243519, |
|
"grad_norm": 0.023271048441529274, |
|
"learning_rate": 1.7641000490436487e-05, |
|
"loss": 0.0747, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.4160138291220714, |
|
"grad_norm": 0.18063010275363922, |
|
"learning_rate": 1.7600130783063593e-05, |
|
"loss": 0.0009, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 1.4196917871197912, |
|
"grad_norm": 0.012859140522778034, |
|
"learning_rate": 1.7559261075690698e-05, |
|
"loss": 0.0444, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.4233697451175107, |
|
"grad_norm": 0.003733620513230562, |
|
"learning_rate": 1.7518391368317803e-05, |
|
"loss": 0.0219, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 1.4270477031152304, |
|
"grad_norm": 4.048089504241943, |
|
"learning_rate": 1.747752166094491e-05, |
|
"loss": 0.052, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.43072566111295, |
|
"grad_norm": 0.02329842559993267, |
|
"learning_rate": 1.7436651953572014e-05, |
|
"loss": 0.0033, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 1.4344036191106697, |
|
"grad_norm": 0.5609085559844971, |
|
"learning_rate": 1.739578224619912e-05, |
|
"loss": 0.0468, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.4380815771083895, |
|
"grad_norm": 0.010268951766192913, |
|
"learning_rate": 1.735491253882622e-05, |
|
"loss": 0.0004, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 1.441759535106109, |
|
"grad_norm": 0.005183890461921692, |
|
"learning_rate": 1.7314042831453327e-05, |
|
"loss": 0.0002, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.4454374931038287, |
|
"grad_norm": 0.006362477317452431, |
|
"learning_rate": 1.7273173124080432e-05, |
|
"loss": 0.0623, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 1.4491154511015485, |
|
"grad_norm": 0.004158661235123873, |
|
"learning_rate": 1.7232303416707537e-05, |
|
"loss": 0.0002, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.452793409099268, |
|
"grad_norm": 0.003037210088223219, |
|
"learning_rate": 1.7191433709334643e-05, |
|
"loss": 0.0001, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 1.4564713670969878, |
|
"grad_norm": 0.006479774601757526, |
|
"learning_rate": 1.7150564001961748e-05, |
|
"loss": 0.0562, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.4601493250947075, |
|
"grad_norm": 34.625465393066406, |
|
"learning_rate": 1.7109694294588854e-05, |
|
"loss": 0.0423, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 1.463827283092427, |
|
"grad_norm": 0.003740801243111491, |
|
"learning_rate": 1.706882458721596e-05, |
|
"loss": 0.0001, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.4675052410901468, |
|
"grad_norm": 0.06391607969999313, |
|
"learning_rate": 1.702795487984306e-05, |
|
"loss": 0.0211, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 1.4711831990878665, |
|
"grad_norm": 0.0029998337849974632, |
|
"learning_rate": 1.6987085172470166e-05, |
|
"loss": 0.0012, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.474861157085586, |
|
"grad_norm": 0.002598424442112446, |
|
"learning_rate": 1.6946215465097272e-05, |
|
"loss": 0.0056, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 1.4785391150833058, |
|
"grad_norm": 0.0026498546358197927, |
|
"learning_rate": 1.6905345757724374e-05, |
|
"loss": 0.0003, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.4822170730810253, |
|
"grad_norm": 0.002896289573982358, |
|
"learning_rate": 1.686447605035148e-05, |
|
"loss": 0.0244, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 1.485895031078745, |
|
"grad_norm": 0.002737634815275669, |
|
"learning_rate": 1.6823606342978584e-05, |
|
"loss": 0.0002, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.4895729890764646, |
|
"grad_norm": 0.002295145532116294, |
|
"learning_rate": 1.678273663560569e-05, |
|
"loss": 0.0001, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 1.4932509470741844, |
|
"grad_norm": 0.0018749627051874995, |
|
"learning_rate": 1.6741866928232792e-05, |
|
"loss": 0.0001, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.4969289050719041, |
|
"grad_norm": 0.002252426231279969, |
|
"learning_rate": 1.6700997220859897e-05, |
|
"loss": 0.0091, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 1.5006068630696237, |
|
"grad_norm": 0.001987684750929475, |
|
"learning_rate": 1.6660127513487003e-05, |
|
"loss": 0.0059, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.5042848210673434, |
|
"grad_norm": 0.0018681609071791172, |
|
"learning_rate": 1.6619257806114108e-05, |
|
"loss": 0.0036, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 1.5079627790650632, |
|
"grad_norm": 0.002243634080514312, |
|
"learning_rate": 1.6578388098741213e-05, |
|
"loss": 0.0001, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.5116407370627827, |
|
"grad_norm": 0.005282828118652105, |
|
"learning_rate": 1.653751839136832e-05, |
|
"loss": 0.0508, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 1.5153186950605024, |
|
"grad_norm": 0.0033266160171478987, |
|
"learning_rate": 1.6496648683995424e-05, |
|
"loss": 0.0036, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.5189966530582222, |
|
"grad_norm": 0.0024327326100319624, |
|
"learning_rate": 1.6455778976622526e-05, |
|
"loss": 0.0001, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 1.5226746110559417, |
|
"grad_norm": 0.0037725428119301796, |
|
"learning_rate": 1.641490926924963e-05, |
|
"loss": 0.0859, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.5263525690536615, |
|
"grad_norm": 0.01479677390307188, |
|
"learning_rate": 1.6374039561876737e-05, |
|
"loss": 0.0002, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 1.5300305270513812, |
|
"grad_norm": 0.002465145429596305, |
|
"learning_rate": 1.6333169854503842e-05, |
|
"loss": 0.0009, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.5337084850491007, |
|
"grad_norm": 0.002028359565883875, |
|
"learning_rate": 1.6292300147130948e-05, |
|
"loss": 0.0001, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 1.5373864430468203, |
|
"grad_norm": 0.0017766653327271342, |
|
"learning_rate": 1.6251430439758053e-05, |
|
"loss": 0.0001, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.5410644010445402, |
|
"grad_norm": 0.002013767370954156, |
|
"learning_rate": 1.6210560732385158e-05, |
|
"loss": 0.0255, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 1.5447423590422598, |
|
"grad_norm": 0.0019861028995364904, |
|
"learning_rate": 1.616969102501226e-05, |
|
"loss": 0.0109, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.5484203170399793, |
|
"grad_norm": 0.0017919589299708605, |
|
"learning_rate": 1.6128821317639366e-05, |
|
"loss": 0.0063, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 1.552098275037699, |
|
"grad_norm": 0.001575242611579597, |
|
"learning_rate": 1.608795161026647e-05, |
|
"loss": 0.0001, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.5557762330354188, |
|
"grad_norm": 0.0017625424079596996, |
|
"learning_rate": 1.6047081902893576e-05, |
|
"loss": 0.0001, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 1.5594541910331383, |
|
"grad_norm": 0.0014293509302660823, |
|
"learning_rate": 1.6006212195520682e-05, |
|
"loss": 0.0001, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.563132149030858, |
|
"grad_norm": 3.637284994125366, |
|
"learning_rate": 1.5965342488147787e-05, |
|
"loss": 0.0319, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 1.5668101070285778, |
|
"grad_norm": 0.0015190584817901254, |
|
"learning_rate": 1.5924472780774893e-05, |
|
"loss": 0.1112, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.5704880650262973, |
|
"grad_norm": 0.0019073854200541973, |
|
"learning_rate": 1.5883603073401995e-05, |
|
"loss": 0.0001, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 1.574166023024017, |
|
"grad_norm": 0.15334878861904144, |
|
"learning_rate": 1.58427333660291e-05, |
|
"loss": 0.0001, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.5778439810217368, |
|
"grad_norm": 0.0013233659556135535, |
|
"learning_rate": 1.5801863658656205e-05, |
|
"loss": 0.0006, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 1.5815219390194564, |
|
"grad_norm": 60.88636779785156, |
|
"learning_rate": 1.576099395128331e-05, |
|
"loss": 0.0794, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.5851998970171761, |
|
"grad_norm": 0.006810314953327179, |
|
"learning_rate": 1.5720124243910413e-05, |
|
"loss": 0.0272, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 1.5888778550148959, |
|
"grad_norm": 0.006012595724314451, |
|
"learning_rate": 1.5679254536537518e-05, |
|
"loss": 0.0177, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.5925558130126154, |
|
"grad_norm": 0.0041669500060379505, |
|
"learning_rate": 1.5638384829164623e-05, |
|
"loss": 0.0007, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 1.596233771010335, |
|
"grad_norm": 0.0024410944897681475, |
|
"learning_rate": 1.559751512179173e-05, |
|
"loss": 0.0001, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.5999117290080547, |
|
"grad_norm": 0.002287843730300665, |
|
"learning_rate": 1.555664541441883e-05, |
|
"loss": 0.0001, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 1.6035896870057744, |
|
"grad_norm": 0.002450288040563464, |
|
"learning_rate": 1.5515775707045936e-05, |
|
"loss": 0.0001, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.607267645003494, |
|
"grad_norm": 0.0017540917033329606, |
|
"learning_rate": 1.547490599967304e-05, |
|
"loss": 0.0001, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 1.6109456030012137, |
|
"grad_norm": 0.0018945990595966578, |
|
"learning_rate": 1.5434036292300147e-05, |
|
"loss": 0.0001, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.6146235609989334, |
|
"grad_norm": 0.38427916169166565, |
|
"learning_rate": 1.5393166584927252e-05, |
|
"loss": 0.0478, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 1.618301518996653, |
|
"grad_norm": 0.005249540787190199, |
|
"learning_rate": 1.5352296877554358e-05, |
|
"loss": 0.0005, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.6219794769943727, |
|
"grad_norm": 0.049626559019088745, |
|
"learning_rate": 1.5311427170181463e-05, |
|
"loss": 0.0803, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.6256574349920925, |
|
"grad_norm": 0.006765100173652172, |
|
"learning_rate": 1.5270557462808565e-05, |
|
"loss": 0.021, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.629335392989812, |
|
"grad_norm": 0.012057892046868801, |
|
"learning_rate": 1.522968775543567e-05, |
|
"loss": 0.0005, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 1.6330133509875318, |
|
"grad_norm": 0.012171362526714802, |
|
"learning_rate": 1.5188818048062776e-05, |
|
"loss": 0.0171, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.6366913089852515, |
|
"grad_norm": 0.006173169240355492, |
|
"learning_rate": 1.5147948340689881e-05, |
|
"loss": 0.0183, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 1.640369266982971, |
|
"grad_norm": 0.025982793420553207, |
|
"learning_rate": 1.5107078633316986e-05, |
|
"loss": 0.041, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.6440472249806906, |
|
"grad_norm": 0.0121184466406703, |
|
"learning_rate": 1.5066208925944092e-05, |
|
"loss": 0.0066, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 1.6477251829784105, |
|
"grad_norm": 0.008928947150707245, |
|
"learning_rate": 1.5025339218571197e-05, |
|
"loss": 0.0013, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.65140314097613, |
|
"grad_norm": 0.003572331042960286, |
|
"learning_rate": 1.4984469511198301e-05, |
|
"loss": 0.0448, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 1.6550810989738496, |
|
"grad_norm": 0.012093408964574337, |
|
"learning_rate": 1.4943599803825406e-05, |
|
"loss": 0.0003, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.6587590569715693, |
|
"grad_norm": 0.005746824201196432, |
|
"learning_rate": 1.490273009645251e-05, |
|
"loss": 0.0002, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 1.662437014969289, |
|
"grad_norm": 0.005075458902865648, |
|
"learning_rate": 1.4861860389079615e-05, |
|
"loss": 0.0431, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.6661149729670086, |
|
"grad_norm": 0.006644480861723423, |
|
"learning_rate": 1.4820990681706719e-05, |
|
"loss": 0.0003, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 1.6697929309647284, |
|
"grad_norm": 0.016171354800462723, |
|
"learning_rate": 1.4780120974333823e-05, |
|
"loss": 0.0163, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.673470888962448, |
|
"grad_norm": 0.005658384878188372, |
|
"learning_rate": 1.4739251266960928e-05, |
|
"loss": 0.0022, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 1.6771488469601676, |
|
"grad_norm": 0.010968804359436035, |
|
"learning_rate": 1.4698381559588033e-05, |
|
"loss": 0.0804, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.6808268049578874, |
|
"grad_norm": 0.029876096174120903, |
|
"learning_rate": 1.4657511852215139e-05, |
|
"loss": 0.067, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 1.6845047629556071, |
|
"grad_norm": 0.03841656073927879, |
|
"learning_rate": 1.4616642144842242e-05, |
|
"loss": 0.0349, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.6881827209533267, |
|
"grad_norm": 0.017025554552674294, |
|
"learning_rate": 1.4575772437469348e-05, |
|
"loss": 0.001, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 1.6918606789510464, |
|
"grad_norm": 0.024776197969913483, |
|
"learning_rate": 1.4534902730096453e-05, |
|
"loss": 0.0356, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.6955386369487662, |
|
"grad_norm": 0.018094466999173164, |
|
"learning_rate": 1.4494033022723559e-05, |
|
"loss": 0.0006, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 1.6992165949464857, |
|
"grad_norm": 0.010948434472084045, |
|
"learning_rate": 1.4453163315350662e-05, |
|
"loss": 0.0566, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.7028945529442052, |
|
"grad_norm": 0.06060256063938141, |
|
"learning_rate": 1.4412293607977768e-05, |
|
"loss": 0.087, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 1.7065725109419252, |
|
"grad_norm": 0.0425218902528286, |
|
"learning_rate": 1.4371423900604873e-05, |
|
"loss": 0.0014, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.7102504689396447, |
|
"grad_norm": 0.03931298479437828, |
|
"learning_rate": 1.4330554193231977e-05, |
|
"loss": 0.0329, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 1.7139284269373642, |
|
"grad_norm": 0.05203554406762123, |
|
"learning_rate": 1.4289684485859082e-05, |
|
"loss": 0.0667, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.717606384935084, |
|
"grad_norm": 0.059145841747522354, |
|
"learning_rate": 1.4248814778486187e-05, |
|
"loss": 0.0464, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 1.7212843429328037, |
|
"grad_norm": 0.053441960364580154, |
|
"learning_rate": 1.4207945071113291e-05, |
|
"loss": 0.0598, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.7249623009305233, |
|
"grad_norm": 0.0338728241622448, |
|
"learning_rate": 1.4167075363740395e-05, |
|
"loss": 0.0014, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 1.728640258928243, |
|
"grad_norm": 0.03298606723546982, |
|
"learning_rate": 1.41262056563675e-05, |
|
"loss": 0.0011, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.7323182169259628, |
|
"grad_norm": 0.007968394085764885, |
|
"learning_rate": 1.4085335948994606e-05, |
|
"loss": 0.0332, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 1.7359961749236823, |
|
"grad_norm": 0.033015619963407516, |
|
"learning_rate": 1.404446624162171e-05, |
|
"loss": 0.0471, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.739674132921402, |
|
"grad_norm": 0.03123684972524643, |
|
"learning_rate": 1.4003596534248815e-05, |
|
"loss": 0.0008, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 1.7433520909191218, |
|
"grad_norm": 0.026270106434822083, |
|
"learning_rate": 1.396272682687592e-05, |
|
"loss": 0.027, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.7470300489168413, |
|
"grad_norm": 0.025614146143198013, |
|
"learning_rate": 1.3921857119503025e-05, |
|
"loss": 0.0006, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 1.750708006914561, |
|
"grad_norm": 0.011196363717317581, |
|
"learning_rate": 1.3880987412130129e-05, |
|
"loss": 0.0004, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.014085380360484123, |
|
"learning_rate": 1.3840117704757234e-05, |
|
"loss": 0.0007, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 1.7580639229100004, |
|
"grad_norm": 0.2520334720611572, |
|
"learning_rate": 1.379924799738434e-05, |
|
"loss": 0.0012, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.7617418809077199, |
|
"grad_norm": 0.0027042387519031763, |
|
"learning_rate": 1.3758378290011445e-05, |
|
"loss": 0.0657, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 1.7654198389054399, |
|
"grad_norm": 0.007959190756082535, |
|
"learning_rate": 1.3717508582638549e-05, |
|
"loss": 0.0009, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.7690977969031594, |
|
"grad_norm": 0.006802896503359079, |
|
"learning_rate": 1.3676638875265654e-05, |
|
"loss": 0.0002, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 1.772775754900879, |
|
"grad_norm": 0.0037322076968848705, |
|
"learning_rate": 1.3635769167892758e-05, |
|
"loss": 0.0002, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.7764537128985987, |
|
"grad_norm": 0.004444212652742863, |
|
"learning_rate": 1.3594899460519862e-05, |
|
"loss": 0.0113, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 1.7801316708963184, |
|
"grad_norm": 0.0029294530395418406, |
|
"learning_rate": 1.3554029753146967e-05, |
|
"loss": 0.0024, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.783809628894038, |
|
"grad_norm": 0.006351064890623093, |
|
"learning_rate": 1.3513160045774072e-05, |
|
"loss": 0.0339, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 1.7874875868917577, |
|
"grad_norm": 0.0033591645769774914, |
|
"learning_rate": 1.3472290338401178e-05, |
|
"loss": 0.003, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.7911655448894774, |
|
"grad_norm": 0.003340468741953373, |
|
"learning_rate": 1.3431420631028281e-05, |
|
"loss": 0.0002, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 1.794843502887197, |
|
"grad_norm": 0.12212031334638596, |
|
"learning_rate": 1.3390550923655387e-05, |
|
"loss": 0.0836, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.7985214608849167, |
|
"grad_norm": 0.014243889600038528, |
|
"learning_rate": 1.3349681216282492e-05, |
|
"loss": 0.0318, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 1.8021994188826365, |
|
"grad_norm": 0.016160359606146812, |
|
"learning_rate": 1.3308811508909596e-05, |
|
"loss": 0.0003, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.805877376880356, |
|
"grad_norm": 0.011376752518117428, |
|
"learning_rate": 1.3267941801536701e-05, |
|
"loss": 0.0003, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 1.8095553348780757, |
|
"grad_norm": 0.00865715742111206, |
|
"learning_rate": 1.3227072094163807e-05, |
|
"loss": 0.0133, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.8132332928757955, |
|
"grad_norm": 0.007116909604519606, |
|
"learning_rate": 1.3186202386790912e-05, |
|
"loss": 0.0003, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 1.816911250873515, |
|
"grad_norm": 0.008155121468007565, |
|
"learning_rate": 1.3145332679418016e-05, |
|
"loss": 0.0385, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.8205892088712345, |
|
"grad_norm": 0.013204419054090977, |
|
"learning_rate": 1.3104462972045121e-05, |
|
"loss": 0.0356, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 1.8242671668689545, |
|
"grad_norm": 0.013173281215131283, |
|
"learning_rate": 1.3063593264672226e-05, |
|
"loss": 0.0004, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.827945124866674, |
|
"grad_norm": 0.010820701718330383, |
|
"learning_rate": 1.302272355729933e-05, |
|
"loss": 0.0003, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 1.8316230828643936, |
|
"grad_norm": 0.00571137759834528, |
|
"learning_rate": 1.2981853849926434e-05, |
|
"loss": 0.0011, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.8353010408621133, |
|
"grad_norm": 0.007815693505108356, |
|
"learning_rate": 1.2940984142553539e-05, |
|
"loss": 0.0002, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 1.838978998859833, |
|
"grad_norm": 0.04561807960271835, |
|
"learning_rate": 1.2900114435180645e-05, |
|
"loss": 0.0002, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.8426569568575526, |
|
"grad_norm": 0.007523215841501951, |
|
"learning_rate": 1.2859244727807748e-05, |
|
"loss": 0.0203, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 1.8463349148552723, |
|
"grad_norm": 0.007975575514137745, |
|
"learning_rate": 1.2818375020434854e-05, |
|
"loss": 0.0002, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.850012872852992, |
|
"grad_norm": 0.007269065361469984, |
|
"learning_rate": 1.2777505313061959e-05, |
|
"loss": 0.0002, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 1.8536908308507116, |
|
"grad_norm": 0.004501336719840765, |
|
"learning_rate": 1.2736635605689064e-05, |
|
"loss": 0.0001, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.8573687888484314, |
|
"grad_norm": 0.004011464770883322, |
|
"learning_rate": 1.2695765898316168e-05, |
|
"loss": 0.0003, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 1.8610467468461511, |
|
"grad_norm": 0.002334051998332143, |
|
"learning_rate": 1.2654896190943273e-05, |
|
"loss": 0.0234, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.8647247048438707, |
|
"grad_norm": 0.004475513007491827, |
|
"learning_rate": 1.2614026483570379e-05, |
|
"loss": 0.0002, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 1.8684026628415904, |
|
"grad_norm": 0.003851409535855055, |
|
"learning_rate": 1.2573156776197482e-05, |
|
"loss": 0.0001, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.8720806208393101, |
|
"grad_norm": 0.0028481779154390097, |
|
"learning_rate": 1.2532287068824588e-05, |
|
"loss": 0.0255, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 1.8757585788370297, |
|
"grad_norm": 0.0030939916614443064, |
|
"learning_rate": 1.2491417361451693e-05, |
|
"loss": 0.0332, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.8794365368347492, |
|
"grad_norm": 0.0065445504151284695, |
|
"learning_rate": 1.2450547654078797e-05, |
|
"loss": 0.0422, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 1.8831144948324692, |
|
"grad_norm": 0.005459626670926809, |
|
"learning_rate": 1.24096779467059e-05, |
|
"loss": 0.0113, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.8867924528301887, |
|
"grad_norm": 0.002942801220342517, |
|
"learning_rate": 1.2368808239333006e-05, |
|
"loss": 0.0002, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 1.8904704108279082, |
|
"grad_norm": 0.0067766509018838406, |
|
"learning_rate": 1.2327938531960111e-05, |
|
"loss": 0.018, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.894148368825628, |
|
"grad_norm": 0.005411918740719557, |
|
"learning_rate": 1.2287068824587217e-05, |
|
"loss": 0.0486, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 1.8978263268233477, |
|
"grad_norm": 0.006009817123413086, |
|
"learning_rate": 1.224619911721432e-05, |
|
"loss": 0.0002, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.9015042848210673, |
|
"grad_norm": 0.005595459137111902, |
|
"learning_rate": 1.2205329409841426e-05, |
|
"loss": 0.0408, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 1.905182242818787, |
|
"grad_norm": 0.012987248599529266, |
|
"learning_rate": 1.2164459702468531e-05, |
|
"loss": 0.0823, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.9088602008165068, |
|
"grad_norm": 0.16368244588375092, |
|
"learning_rate": 1.2123589995095635e-05, |
|
"loss": 0.0018, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 1.9125381588142263, |
|
"grad_norm": 0.00949876382946968, |
|
"learning_rate": 1.208272028772274e-05, |
|
"loss": 0.0163, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.916216116811946, |
|
"grad_norm": 14.246623039245605, |
|
"learning_rate": 1.2041850580349846e-05, |
|
"loss": 0.0342, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 1.9198940748096658, |
|
"grad_norm": 0.00826562475413084, |
|
"learning_rate": 1.2000980872976951e-05, |
|
"loss": 0.0003, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.9235720328073853, |
|
"grad_norm": 0.006868135649710894, |
|
"learning_rate": 1.1960111165604055e-05, |
|
"loss": 0.0003, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 1.9272499908051048, |
|
"grad_norm": 0.00808362290263176, |
|
"learning_rate": 1.191924145823116e-05, |
|
"loss": 0.0402, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.9309279488028248, |
|
"grad_norm": 0.010807299055159092, |
|
"learning_rate": 1.1878371750858265e-05, |
|
"loss": 0.078, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 1.9346059068005443, |
|
"grad_norm": 0.01139509491622448, |
|
"learning_rate": 1.1837502043485367e-05, |
|
"loss": 0.0007, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.9382838647982639, |
|
"grad_norm": 0.00977110955864191, |
|
"learning_rate": 1.1796632336112473e-05, |
|
"loss": 0.0004, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 1.9419618227959836, |
|
"grad_norm": 0.006910570897161961, |
|
"learning_rate": 1.1755762628739578e-05, |
|
"loss": 0.0003, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.9456397807937034, |
|
"grad_norm": 4.1620564460754395, |
|
"learning_rate": 1.1714892921366683e-05, |
|
"loss": 0.0667, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 1.949317738791423, |
|
"grad_norm": 0.015238853171467781, |
|
"learning_rate": 1.1674023213993787e-05, |
|
"loss": 0.0015, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.9529956967891426, |
|
"grad_norm": 0.007931707426905632, |
|
"learning_rate": 1.1633153506620892e-05, |
|
"loss": 0.0003, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 1.9566736547868624, |
|
"grad_norm": 0.009560568258166313, |
|
"learning_rate": 1.1592283799247998e-05, |
|
"loss": 0.0003, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.960351612784582, |
|
"grad_norm": 0.008578946813941002, |
|
"learning_rate": 1.1551414091875103e-05, |
|
"loss": 0.0008, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 1.9640295707823017, |
|
"grad_norm": 0.011748207733035088, |
|
"learning_rate": 1.1510544384502207e-05, |
|
"loss": 0.0002, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.9677075287800214, |
|
"grad_norm": 0.007073475047945976, |
|
"learning_rate": 1.1469674677129312e-05, |
|
"loss": 0.0002, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 1.971385486777741, |
|
"grad_norm": 0.003219211706891656, |
|
"learning_rate": 1.1428804969756418e-05, |
|
"loss": 0.0323, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.9750634447754607, |
|
"grad_norm": 0.0061137378215789795, |
|
"learning_rate": 1.1387935262383521e-05, |
|
"loss": 0.0002, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 1.9787414027731804, |
|
"grad_norm": 0.006435078103095293, |
|
"learning_rate": 1.1347065555010627e-05, |
|
"loss": 0.0409, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.9824193607709, |
|
"grad_norm": 0.002217411994934082, |
|
"learning_rate": 1.1306195847637732e-05, |
|
"loss": 0.0002, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 1.9860973187686195, |
|
"grad_norm": 0.009155605919659138, |
|
"learning_rate": 1.1265326140264837e-05, |
|
"loss": 0.0487, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.9897752767663395, |
|
"grad_norm": 0.011870177462697029, |
|
"learning_rate": 1.122445643289194e-05, |
|
"loss": 0.0004, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 1.993453234764059, |
|
"grad_norm": 0.008746917359530926, |
|
"learning_rate": 1.1183586725519045e-05, |
|
"loss": 0.0411, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.9971311927617785, |
|
"grad_norm": 0.005829541012644768, |
|
"learning_rate": 1.114271701814615e-05, |
|
"loss": 0.0003, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 1.9999264408400457, |
|
"eval_accuracy": 0.9963221772710555, |
|
"eval_auc": 0.9999360039904769, |
|
"eval_f1": 0.9963186570460905, |
|
"eval_loss": 0.023916827514767647, |
|
"eval_precision": 0.9973466981132075, |
|
"eval_recall": 0.9952927331568108, |
|
"eval_runtime": 2353.5774, |
|
"eval_samples_per_second": 5.776, |
|
"eval_steps_per_second": 1.444, |
|
"step": 13594 |
|
}, |
|
{ |
|
"epoch": 2.0008091507594985, |
|
"grad_norm": 0.010372490622103214, |
|
"learning_rate": 1.1101847310773254e-05, |
|
"loss": 0.0003, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.004487108757218, |
|
"grad_norm": 0.014902903698384762, |
|
"learning_rate": 1.106097760340036e-05, |
|
"loss": 0.0673, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 2.0081650667549376, |
|
"grad_norm": 0.005416123196482658, |
|
"learning_rate": 1.1020107896027465e-05, |
|
"loss": 0.0003, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.0118430247526575, |
|
"grad_norm": 0.007089643273502588, |
|
"learning_rate": 1.097923818865457e-05, |
|
"loss": 0.0003, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 2.015520982750377, |
|
"grad_norm": 0.005935342982411385, |
|
"learning_rate": 1.0938368481281674e-05, |
|
"loss": 0.0004, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.0191989407480966, |
|
"grad_norm": 0.004356461577117443, |
|
"learning_rate": 1.0897498773908779e-05, |
|
"loss": 0.0003, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 2.022876898745816, |
|
"grad_norm": 0.010521539486944675, |
|
"learning_rate": 1.0856629066535884e-05, |
|
"loss": 0.0839, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.026554856743536, |
|
"grad_norm": 0.007211623247712851, |
|
"learning_rate": 1.081575935916299e-05, |
|
"loss": 0.0215, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 2.0302328147412556, |
|
"grad_norm": 0.008732822723686695, |
|
"learning_rate": 1.0774889651790093e-05, |
|
"loss": 0.0002, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.033910772738975, |
|
"grad_norm": 0.005103670991957188, |
|
"learning_rate": 1.0734019944417199e-05, |
|
"loss": 0.0002, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 2.037588730736695, |
|
"grad_norm": 0.00569286709651351, |
|
"learning_rate": 1.0693150237044304e-05, |
|
"loss": 0.0002, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.0412666887344146, |
|
"grad_norm": 0.004690663423389196, |
|
"learning_rate": 1.0652280529671408e-05, |
|
"loss": 0.0002, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 2.044944646732134, |
|
"grad_norm": 0.003813117044046521, |
|
"learning_rate": 1.0611410822298512e-05, |
|
"loss": 0.0001, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.048622604729854, |
|
"grad_norm": 0.0031241225078701973, |
|
"learning_rate": 1.0570541114925617e-05, |
|
"loss": 0.0001, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 2.0523005627275737, |
|
"grad_norm": 0.001760639250278473, |
|
"learning_rate": 1.0529671407552722e-05, |
|
"loss": 0.0003, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.055978520725293, |
|
"grad_norm": 0.00507943844422698, |
|
"learning_rate": 1.0488801700179826e-05, |
|
"loss": 0.0465, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 2.059656478723013, |
|
"grad_norm": 0.005704312119632959, |
|
"learning_rate": 1.0447931992806931e-05, |
|
"loss": 0.0002, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.0633344367207327, |
|
"grad_norm": 0.0037137740291655064, |
|
"learning_rate": 1.0407062285434037e-05, |
|
"loss": 0.0002, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 2.067012394718452, |
|
"grad_norm": 0.004969414323568344, |
|
"learning_rate": 1.036619257806114e-05, |
|
"loss": 0.0002, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.070690352716172, |
|
"grad_norm": 0.002151261083781719, |
|
"learning_rate": 1.0325322870688246e-05, |
|
"loss": 0.0001, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 2.0743683107138917, |
|
"grad_norm": 0.004214055370539427, |
|
"learning_rate": 1.0284453163315351e-05, |
|
"loss": 0.0001, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.0780462687116112, |
|
"grad_norm": 0.004696809686720371, |
|
"learning_rate": 1.0243583455942457e-05, |
|
"loss": 0.0001, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 2.0817242267093308, |
|
"grad_norm": 8.668023109436035, |
|
"learning_rate": 1.020271374856956e-05, |
|
"loss": 0.0642, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.0854021847070507, |
|
"grad_norm": 0.00823593232780695, |
|
"learning_rate": 1.0161844041196666e-05, |
|
"loss": 0.0004, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 2.0890801427047703, |
|
"grad_norm": 0.006173284724354744, |
|
"learning_rate": 1.0120974333823771e-05, |
|
"loss": 0.0002, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.09275810070249, |
|
"grad_norm": 0.004422744270414114, |
|
"learning_rate": 1.0080104626450876e-05, |
|
"loss": 0.0002, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 2.0964360587002098, |
|
"grad_norm": 0.0038796046283096075, |
|
"learning_rate": 1.0039234919077978e-05, |
|
"loss": 0.0002, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.1001140166979293, |
|
"grad_norm": 0.003889993764460087, |
|
"learning_rate": 9.998365211705084e-06, |
|
"loss": 0.0008, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 2.103791974695649, |
|
"grad_norm": 0.0035641242284327745, |
|
"learning_rate": 9.957495504332189e-06, |
|
"loss": 0.0001, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.107469932693369, |
|
"grad_norm": 0.0037507452070713043, |
|
"learning_rate": 9.916625796959293e-06, |
|
"loss": 0.0001, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 2.1111478906910883, |
|
"grad_norm": 0.002810309175401926, |
|
"learning_rate": 9.875756089586398e-06, |
|
"loss": 0.0001, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.114825848688808, |
|
"grad_norm": 0.0030445558950304985, |
|
"learning_rate": 9.834886382213504e-06, |
|
"loss": 0.0001, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 2.118503806686528, |
|
"grad_norm": 0.0025213556364178658, |
|
"learning_rate": 9.794016674840609e-06, |
|
"loss": 0.0001, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.1221817646842474, |
|
"grad_norm": 0.0027236223686486483, |
|
"learning_rate": 9.753146967467713e-06, |
|
"loss": 0.0001, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 2.125859722681967, |
|
"grad_norm": 0.002416795352473855, |
|
"learning_rate": 9.712277260094818e-06, |
|
"loss": 0.0004, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.129537680679687, |
|
"grad_norm": 0.0019158340292051435, |
|
"learning_rate": 9.671407552721923e-06, |
|
"loss": 0.0001, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 2.1332156386774064, |
|
"grad_norm": 0.002519650151953101, |
|
"learning_rate": 9.630537845349029e-06, |
|
"loss": 0.0001, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.136893596675126, |
|
"grad_norm": 0.002294061239808798, |
|
"learning_rate": 9.589668137976132e-06, |
|
"loss": 0.0001, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 2.1405715546728454, |
|
"grad_norm": 0.0021358055528253317, |
|
"learning_rate": 9.548798430603238e-06, |
|
"loss": 0.0471, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.1442495126705654, |
|
"grad_norm": 0.001824073726311326, |
|
"learning_rate": 9.507928723230343e-06, |
|
"loss": 0.0001, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 2.147927470668285, |
|
"grad_norm": 0.001960406079888344, |
|
"learning_rate": 9.467059015857447e-06, |
|
"loss": 0.0001, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.1516054286660045, |
|
"grad_norm": 0.0018290438456460834, |
|
"learning_rate": 9.42618930848455e-06, |
|
"loss": 0.0001, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 2.1552833866637244, |
|
"grad_norm": 0.0019052918069064617, |
|
"learning_rate": 9.385319601111656e-06, |
|
"loss": 0.0001, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.158961344661444, |
|
"grad_norm": 0.0018661071080714464, |
|
"learning_rate": 9.344449893738761e-06, |
|
"loss": 0.0001, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 2.1626393026591635, |
|
"grad_norm": 0.0031746248714625835, |
|
"learning_rate": 9.303580186365865e-06, |
|
"loss": 0.049, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.1663172606568835, |
|
"grad_norm": 0.003573804395273328, |
|
"learning_rate": 9.26271047899297e-06, |
|
"loss": 0.0001, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 2.169995218654603, |
|
"grad_norm": 0.003289070213213563, |
|
"learning_rate": 9.221840771620076e-06, |
|
"loss": 0.0113, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.1736731766523225, |
|
"grad_norm": 0.00257130921818316, |
|
"learning_rate": 9.18097106424718e-06, |
|
"loss": 0.0483, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 2.1773511346500425, |
|
"grad_norm": 0.005980730522423983, |
|
"learning_rate": 9.140101356874285e-06, |
|
"loss": 0.0002, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.181029092647762, |
|
"grad_norm": 0.005953842308372259, |
|
"learning_rate": 9.09923164950139e-06, |
|
"loss": 0.0002, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 2.1847070506454815, |
|
"grad_norm": 0.037090156227350235, |
|
"learning_rate": 9.058361942128496e-06, |
|
"loss": 0.0785, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.188385008643201, |
|
"grad_norm": 0.007919345051050186, |
|
"learning_rate": 9.0174922347556e-06, |
|
"loss": 0.0006, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 2.192062966640921, |
|
"grad_norm": 0.021819893270730972, |
|
"learning_rate": 8.976622527382705e-06, |
|
"loss": 0.0376, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.1957409246386406, |
|
"grad_norm": 0.024493372067809105, |
|
"learning_rate": 8.93575282000981e-06, |
|
"loss": 0.0439, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 2.19941888263636, |
|
"grad_norm": 0.038370776921510696, |
|
"learning_rate": 8.894883112636915e-06, |
|
"loss": 0.0802, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.20309684063408, |
|
"grad_norm": 0.019332151859998703, |
|
"learning_rate": 8.854013405264019e-06, |
|
"loss": 0.0012, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 2.2067747986317996, |
|
"grad_norm": 0.03362823650240898, |
|
"learning_rate": 8.813143697891123e-06, |
|
"loss": 0.0369, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.210452756629519, |
|
"grad_norm": 0.024772603064775467, |
|
"learning_rate": 8.772273990518228e-06, |
|
"loss": 0.0008, |
|
"step": 15025 |
|
}, |
|
{ |
|
"epoch": 2.214130714627239, |
|
"grad_norm": 0.02276591770350933, |
|
"learning_rate": 8.731404283145332e-06, |
|
"loss": 0.1007, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 2.2178086726249586, |
|
"grad_norm": 0.016099456697702408, |
|
"learning_rate": 8.690534575772437e-06, |
|
"loss": 0.0009, |
|
"step": 15075 |
|
}, |
|
{ |
|
"epoch": 2.221486630622678, |
|
"grad_norm": 0.003277967683970928, |
|
"learning_rate": 8.649664868399542e-06, |
|
"loss": 0.0069, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.225164588620398, |
|
"grad_norm": 0.011233772151172161, |
|
"learning_rate": 8.608795161026648e-06, |
|
"loss": 0.0386, |
|
"step": 15125 |
|
}, |
|
{ |
|
"epoch": 2.2288425466181176, |
|
"grad_norm": 0.007455474231392145, |
|
"learning_rate": 8.567925453653752e-06, |
|
"loss": 0.0003, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 2.232520504615837, |
|
"grad_norm": 0.011497107334434986, |
|
"learning_rate": 8.527055746280857e-06, |
|
"loss": 0.0004, |
|
"step": 15175 |
|
}, |
|
{ |
|
"epoch": 2.236198462613557, |
|
"grad_norm": 0.003145186696201563, |
|
"learning_rate": 8.486186038907962e-06, |
|
"loss": 0.0003, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.2398764206112767, |
|
"grad_norm": 0.00954380352050066, |
|
"learning_rate": 8.445316331535066e-06, |
|
"loss": 0.0595, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 2.243554378608996, |
|
"grad_norm": 0.007323611527681351, |
|
"learning_rate": 8.404446624162171e-06, |
|
"loss": 0.0004, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.247232336606716, |
|
"grad_norm": 0.011944909580051899, |
|
"learning_rate": 8.363576916789277e-06, |
|
"loss": 0.0003, |
|
"step": 15275 |
|
}, |
|
{ |
|
"epoch": 2.2509102946044357, |
|
"grad_norm": 0.01304931566119194, |
|
"learning_rate": 8.322707209416382e-06, |
|
"loss": 0.0389, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.2545882526021552, |
|
"grad_norm": 0.008787041530013084, |
|
"learning_rate": 8.281837502043486e-06, |
|
"loss": 0.0004, |
|
"step": 15325 |
|
}, |
|
{ |
|
"epoch": 2.2582662105998748, |
|
"grad_norm": 0.011969480663537979, |
|
"learning_rate": 8.24096779467059e-06, |
|
"loss": 0.0004, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 2.2619441685975947, |
|
"grad_norm": 0.011229045689105988, |
|
"learning_rate": 8.200098087297695e-06, |
|
"loss": 0.0003, |
|
"step": 15375 |
|
}, |
|
{ |
|
"epoch": 2.2656221265953143, |
|
"grad_norm": 0.00922977551817894, |
|
"learning_rate": 8.1592283799248e-06, |
|
"loss": 0.0004, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.269300084593034, |
|
"grad_norm": 0.008094431832432747, |
|
"learning_rate": 8.118358672551904e-06, |
|
"loss": 0.0003, |
|
"step": 15425 |
|
}, |
|
{ |
|
"epoch": 2.2729780425907538, |
|
"grad_norm": 0.0032492594327777624, |
|
"learning_rate": 8.07748896517901e-06, |
|
"loss": 0.0002, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.2766560005884733, |
|
"grad_norm": 0.004196746740490198, |
|
"learning_rate": 8.036619257806115e-06, |
|
"loss": 0.0002, |
|
"step": 15475 |
|
}, |
|
{ |
|
"epoch": 2.280333958586193, |
|
"grad_norm": 0.005214506760239601, |
|
"learning_rate": 7.995749550433218e-06, |
|
"loss": 0.0002, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.284011916583913, |
|
"grad_norm": 0.0034893976990133524, |
|
"learning_rate": 7.954879843060324e-06, |
|
"loss": 0.0002, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 2.2876898745816323, |
|
"grad_norm": 0.0036745897959917784, |
|
"learning_rate": 7.914010135687429e-06, |
|
"loss": 0.0002, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.291367832579352, |
|
"grad_norm": 0.0020664865151047707, |
|
"learning_rate": 7.873140428314534e-06, |
|
"loss": 0.0001, |
|
"step": 15575 |
|
}, |
|
{ |
|
"epoch": 2.2950457905770714, |
|
"grad_norm": 0.005072563886642456, |
|
"learning_rate": 7.832270720941638e-06, |
|
"loss": 0.0417, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.2987237485747913, |
|
"grad_norm": 0.004465815611183643, |
|
"learning_rate": 7.791401013568743e-06, |
|
"loss": 0.0002, |
|
"step": 15625 |
|
}, |
|
{ |
|
"epoch": 2.302401706572511, |
|
"grad_norm": 0.005166616756469011, |
|
"learning_rate": 7.750531306195849e-06, |
|
"loss": 0.016, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.3060796645702304, |
|
"grad_norm": 0.0010274857049807906, |
|
"learning_rate": 7.709661598822953e-06, |
|
"loss": 0.0002, |
|
"step": 15675 |
|
}, |
|
{ |
|
"epoch": 2.3097576225679504, |
|
"grad_norm": 0.006900500506162643, |
|
"learning_rate": 7.668791891450058e-06, |
|
"loss": 0.0002, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.31343558056567, |
|
"grad_norm": 0.004663816653192043, |
|
"learning_rate": 7.6279221840771624e-06, |
|
"loss": 0.0001, |
|
"step": 15725 |
|
}, |
|
{ |
|
"epoch": 2.3171135385633894, |
|
"grad_norm": 0.006946474779397249, |
|
"learning_rate": 7.587052476704268e-06, |
|
"loss": 0.0001, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.3207914965611094, |
|
"grad_norm": 0.003868917003273964, |
|
"learning_rate": 7.5461827693313715e-06, |
|
"loss": 0.0342, |
|
"step": 15775 |
|
}, |
|
{ |
|
"epoch": 2.324469454558829, |
|
"grad_norm": 0.0028817090205848217, |
|
"learning_rate": 7.505313061958477e-06, |
|
"loss": 0.0138, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.3281474125565484, |
|
"grad_norm": 0.0059151784516870975, |
|
"learning_rate": 7.464443354585581e-06, |
|
"loss": 0.0733, |
|
"step": 15825 |
|
}, |
|
{ |
|
"epoch": 2.3318253705542684, |
|
"grad_norm": 0.004359770100563765, |
|
"learning_rate": 7.423573647212686e-06, |
|
"loss": 0.0421, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.335503328551988, |
|
"grad_norm": 0.011809108778834343, |
|
"learning_rate": 7.3827039398397904e-06, |
|
"loss": 0.0003, |
|
"step": 15875 |
|
}, |
|
{ |
|
"epoch": 2.3391812865497075, |
|
"grad_norm": 0.005823772866278887, |
|
"learning_rate": 7.341834232466896e-06, |
|
"loss": 0.0003, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.3428592445474274, |
|
"grad_norm": 0.003460386535152793, |
|
"learning_rate": 7.300964525094e-06, |
|
"loss": 0.0002, |
|
"step": 15925 |
|
}, |
|
{ |
|
"epoch": 2.346537202545147, |
|
"grad_norm": 0.008056416176259518, |
|
"learning_rate": 7.260094817721106e-06, |
|
"loss": 0.0381, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.3502151605428665, |
|
"grad_norm": 0.007788171526044607, |
|
"learning_rate": 7.21922511034821e-06, |
|
"loss": 0.0002, |
|
"step": 15975 |
|
}, |
|
{ |
|
"epoch": 2.3538931185405865, |
|
"grad_norm": 0.0066045369021594524, |
|
"learning_rate": 7.178355402975315e-06, |
|
"loss": 0.0002, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.357571076538306, |
|
"grad_norm": 0.004805906675755978, |
|
"learning_rate": 7.137485695602419e-06, |
|
"loss": 0.0053, |
|
"step": 16025 |
|
}, |
|
{ |
|
"epoch": 2.3612490345360255, |
|
"grad_norm": 0.010813217610120773, |
|
"learning_rate": 7.096615988229525e-06, |
|
"loss": 0.0381, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.3649269925337455, |
|
"grad_norm": 0.009302555583417416, |
|
"learning_rate": 7.055746280856629e-06, |
|
"loss": 0.0393, |
|
"step": 16075 |
|
}, |
|
{ |
|
"epoch": 2.368604950531465, |
|
"grad_norm": 0.011496507562696934, |
|
"learning_rate": 7.014876573483734e-06, |
|
"loss": 0.0386, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.3722829085291846, |
|
"grad_norm": 0.025231193751096725, |
|
"learning_rate": 6.974006866110839e-06, |
|
"loss": 0.0367, |
|
"step": 16125 |
|
}, |
|
{ |
|
"epoch": 2.375960866526904, |
|
"grad_norm": 0.020235830917954445, |
|
"learning_rate": 6.933137158737944e-06, |
|
"loss": 0.0006, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 2.379638824524624, |
|
"grad_norm": 0.006687480956315994, |
|
"learning_rate": 6.892267451365048e-06, |
|
"loss": 0.0004, |
|
"step": 16175 |
|
}, |
|
{ |
|
"epoch": 2.3833167825223436, |
|
"grad_norm": 0.003918817732483149, |
|
"learning_rate": 6.851397743992153e-06, |
|
"loss": 0.0003, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.386994740520063, |
|
"grad_norm": 0.011175381019711494, |
|
"learning_rate": 6.810528036619258e-06, |
|
"loss": 0.0003, |
|
"step": 16225 |
|
}, |
|
{ |
|
"epoch": 2.390672698517783, |
|
"grad_norm": 0.007755937986075878, |
|
"learning_rate": 6.769658329246363e-06, |
|
"loss": 0.0002, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.3943506565155026, |
|
"grad_norm": 0.004887331277132034, |
|
"learning_rate": 6.728788621873468e-06, |
|
"loss": 0.0002, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 2.398028614513222, |
|
"grad_norm": 0.0048552751541137695, |
|
"learning_rate": 6.6879189145005725e-06, |
|
"loss": 0.0002, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.401706572510942, |
|
"grad_norm": 0.011255592107772827, |
|
"learning_rate": 6.647049207127677e-06, |
|
"loss": 0.0002, |
|
"step": 16325 |
|
}, |
|
{ |
|
"epoch": 2.4053845305086616, |
|
"grad_norm": 0.009114415384829044, |
|
"learning_rate": 6.6061794997547816e-06, |
|
"loss": 0.0002, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 2.409062488506381, |
|
"grad_norm": 0.009386932477355003, |
|
"learning_rate": 6.565309792381886e-06, |
|
"loss": 0.0395, |
|
"step": 16375 |
|
}, |
|
{ |
|
"epoch": 2.4127404465041007, |
|
"grad_norm": 0.005927698221057653, |
|
"learning_rate": 6.5244400850089915e-06, |
|
"loss": 0.0002, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.4164184045018207, |
|
"grad_norm": 0.0084453159943223, |
|
"learning_rate": 6.483570377636096e-06, |
|
"loss": 0.0506, |
|
"step": 16425 |
|
}, |
|
{ |
|
"epoch": 2.42009636249954, |
|
"grad_norm": 0.008083072490990162, |
|
"learning_rate": 6.442700670263201e-06, |
|
"loss": 0.0003, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 2.4237743204972597, |
|
"grad_norm": 0.00735598336905241, |
|
"learning_rate": 6.401830962890306e-06, |
|
"loss": 0.0003, |
|
"step": 16475 |
|
}, |
|
{ |
|
"epoch": 2.4274522784949797, |
|
"grad_norm": 0.007824303582310677, |
|
"learning_rate": 6.360961255517411e-06, |
|
"loss": 0.0398, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.431130236492699, |
|
"grad_norm": 0.009155460633337498, |
|
"learning_rate": 6.320091548144516e-06, |
|
"loss": 0.0003, |
|
"step": 16525 |
|
}, |
|
{ |
|
"epoch": 2.4348081944904187, |
|
"grad_norm": 0.005739257670938969, |
|
"learning_rate": 6.27922184077162e-06, |
|
"loss": 0.0003, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 2.4384861524881387, |
|
"grad_norm": 0.006940542254596949, |
|
"learning_rate": 6.238352133398725e-06, |
|
"loss": 0.0003, |
|
"step": 16575 |
|
}, |
|
{ |
|
"epoch": 2.4421641104858582, |
|
"grad_norm": 0.0053449515253305435, |
|
"learning_rate": 6.197482426025829e-06, |
|
"loss": 0.0002, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.4458420684835778, |
|
"grad_norm": 0.005325790494680405, |
|
"learning_rate": 6.156612718652935e-06, |
|
"loss": 0.0002, |
|
"step": 16625 |
|
}, |
|
{ |
|
"epoch": 2.4495200264812977, |
|
"grad_norm": 0.006259521469473839, |
|
"learning_rate": 6.115743011280039e-06, |
|
"loss": 0.0002, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 2.4531979844790173, |
|
"grad_norm": 0.006854058708995581, |
|
"learning_rate": 6.074873303907145e-06, |
|
"loss": 0.0002, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 2.456875942476737, |
|
"grad_norm": 0.004361658822745085, |
|
"learning_rate": 6.034003596534249e-06, |
|
"loss": 0.0002, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.4605539004744568, |
|
"grad_norm": 0.0055083055049180984, |
|
"learning_rate": 5.993133889161354e-06, |
|
"loss": 0.0002, |
|
"step": 16725 |
|
}, |
|
{ |
|
"epoch": 2.4642318584721763, |
|
"grad_norm": 0.0033617918379604816, |
|
"learning_rate": 5.952264181788458e-06, |
|
"loss": 0.0002, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.467909816469896, |
|
"grad_norm": 0.0048737069591879845, |
|
"learning_rate": 5.911394474415564e-06, |
|
"loss": 0.0001, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 2.471587774467616, |
|
"grad_norm": 0.0036280914209783077, |
|
"learning_rate": 5.870524767042668e-06, |
|
"loss": 0.0001, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.4752657324653353, |
|
"grad_norm": 0.003542742459103465, |
|
"learning_rate": 5.829655059669773e-06, |
|
"loss": 0.0001, |
|
"step": 16825 |
|
}, |
|
{ |
|
"epoch": 2.478943690463055, |
|
"grad_norm": 0.004226271994411945, |
|
"learning_rate": 5.788785352296878e-06, |
|
"loss": 0.0001, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 2.482621648460775, |
|
"grad_norm": 0.0033333373721688986, |
|
"learning_rate": 5.7479156449239826e-06, |
|
"loss": 0.0001, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 2.4862996064584943, |
|
"grad_norm": 0.003888545325025916, |
|
"learning_rate": 5.707045937551087e-06, |
|
"loss": 0.0001, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.489977564456214, |
|
"grad_norm": 0.0031992702279239893, |
|
"learning_rate": 5.666176230178192e-06, |
|
"loss": 0.0001, |
|
"step": 16925 |
|
}, |
|
{ |
|
"epoch": 2.4936555224539334, |
|
"grad_norm": 0.0026705926284193993, |
|
"learning_rate": 5.625306522805297e-06, |
|
"loss": 0.0001, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 2.4973334804516534, |
|
"grad_norm": 0.001754347002133727, |
|
"learning_rate": 5.5844368154324015e-06, |
|
"loss": 0.0001, |
|
"step": 16975 |
|
}, |
|
{ |
|
"epoch": 2.501011438449373, |
|
"grad_norm": 0.0018643263028934598, |
|
"learning_rate": 5.543567108059507e-06, |
|
"loss": 0.0006, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.5046893964470924, |
|
"grad_norm": 0.002491478342562914, |
|
"learning_rate": 5.502697400686611e-06, |
|
"loss": 0.0001, |
|
"step": 17025 |
|
}, |
|
{ |
|
"epoch": 2.5083673544448124, |
|
"grad_norm": 0.002735487651079893, |
|
"learning_rate": 5.461827693313716e-06, |
|
"loss": 0.0001, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 2.512045312442532, |
|
"grad_norm": 0.002121156081557274, |
|
"learning_rate": 5.420957985940821e-06, |
|
"loss": 0.0013, |
|
"step": 17075 |
|
}, |
|
{ |
|
"epoch": 2.5157232704402515, |
|
"grad_norm": 0.001368986559100449, |
|
"learning_rate": 5.380088278567925e-06, |
|
"loss": 0.0001, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.519401228437971, |
|
"grad_norm": 0.0018654069863259792, |
|
"learning_rate": 5.33921857119503e-06, |
|
"loss": 0.0001, |
|
"step": 17125 |
|
}, |
|
{ |
|
"epoch": 2.523079186435691, |
|
"grad_norm": 0.0008688032394275069, |
|
"learning_rate": 5.298348863822135e-06, |
|
"loss": 0.0001, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 2.5267571444334105, |
|
"grad_norm": 0.0014730022521689534, |
|
"learning_rate": 5.25747915644924e-06, |
|
"loss": 0.0001, |
|
"step": 17175 |
|
}, |
|
{ |
|
"epoch": 2.53043510243113, |
|
"grad_norm": 589.290283203125, |
|
"learning_rate": 5.216609449076345e-06, |
|
"loss": 0.0295, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.53411306042885, |
|
"grad_norm": 0.0014689558884128928, |
|
"learning_rate": 5.17573974170345e-06, |
|
"loss": 0.0, |
|
"step": 17225 |
|
}, |
|
{ |
|
"epoch": 2.5377910184265695, |
|
"grad_norm": 0.001330269267782569, |
|
"learning_rate": 5.134870034330555e-06, |
|
"loss": 0.0, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 2.541468976424289, |
|
"grad_norm": 0.001491030678153038, |
|
"learning_rate": 5.094000326957658e-06, |
|
"loss": 0.0, |
|
"step": 17275 |
|
}, |
|
{ |
|
"epoch": 2.545146934422009, |
|
"grad_norm": 0.002089619869366288, |
|
"learning_rate": 5.053130619584764e-06, |
|
"loss": 0.0778, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.5488248924197285, |
|
"grad_norm": 0.0015247270930558443, |
|
"learning_rate": 5.012260912211868e-06, |
|
"loss": 0.0188, |
|
"step": 17325 |
|
}, |
|
{ |
|
"epoch": 2.552502850417448, |
|
"grad_norm": 0.002242110203951597, |
|
"learning_rate": 4.971391204838974e-06, |
|
"loss": 0.0179, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 2.556180808415168, |
|
"grad_norm": 0.0018629367696121335, |
|
"learning_rate": 4.930521497466078e-06, |
|
"loss": 0.0181, |
|
"step": 17375 |
|
}, |
|
{ |
|
"epoch": 2.5598587664128876, |
|
"grad_norm": 0.0014634733088314533, |
|
"learning_rate": 4.8896517900931836e-06, |
|
"loss": 0.0328, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.563536724410607, |
|
"grad_norm": 0.001321232644841075, |
|
"learning_rate": 4.848782082720288e-06, |
|
"loss": 0.0, |
|
"step": 17425 |
|
}, |
|
{ |
|
"epoch": 2.567214682408327, |
|
"grad_norm": 0.0012456915574148297, |
|
"learning_rate": 4.807912375347393e-06, |
|
"loss": 0.0003, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 2.5708926404060466, |
|
"grad_norm": 0.0009979073656722903, |
|
"learning_rate": 4.767042667974497e-06, |
|
"loss": 0.0001, |
|
"step": 17475 |
|
}, |
|
{ |
|
"epoch": 2.574570598403766, |
|
"grad_norm": 0.001377744134515524, |
|
"learning_rate": 4.726172960601602e-06, |
|
"loss": 0.0, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.578248556401486, |
|
"grad_norm": 0.0022715404629707336, |
|
"learning_rate": 4.685303253228707e-06, |
|
"loss": 0.0498, |
|
"step": 17525 |
|
}, |
|
{ |
|
"epoch": 2.5819265143992056, |
|
"grad_norm": 0.002307375194504857, |
|
"learning_rate": 4.644433545855812e-06, |
|
"loss": 0.0001, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 2.585604472396925, |
|
"grad_norm": 0.002744297729805112, |
|
"learning_rate": 4.603563838482917e-06, |
|
"loss": 0.0444, |
|
"step": 17575 |
|
}, |
|
{ |
|
"epoch": 2.589282430394645, |
|
"grad_norm": 0.004225959535688162, |
|
"learning_rate": 4.5626941311100215e-06, |
|
"loss": 0.0148, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.5929603883923646, |
|
"grad_norm": 0.0028173536993563175, |
|
"learning_rate": 4.521824423737127e-06, |
|
"loss": 0.0033, |
|
"step": 17625 |
|
}, |
|
{ |
|
"epoch": 2.596638346390084, |
|
"grad_norm": 0.00215067807585001, |
|
"learning_rate": 4.4809547163642305e-06, |
|
"loss": 0.0001, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 2.600316304387804, |
|
"grad_norm": 0.004402931313961744, |
|
"learning_rate": 4.440085008991336e-06, |
|
"loss": 0.0001, |
|
"step": 17675 |
|
}, |
|
{ |
|
"epoch": 2.6039942623855237, |
|
"grad_norm": 0.0019863785710185766, |
|
"learning_rate": 4.3992153016184404e-06, |
|
"loss": 0.0001, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.607672220383243, |
|
"grad_norm": 0.0032948977313935757, |
|
"learning_rate": 4.358345594245545e-06, |
|
"loss": 0.0001, |
|
"step": 17725 |
|
}, |
|
{ |
|
"epoch": 2.6113501783809627, |
|
"grad_norm": 0.0017591605428606272, |
|
"learning_rate": 4.31747588687265e-06, |
|
"loss": 0.0001, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 2.6150281363786827, |
|
"grad_norm": 0.5669000148773193, |
|
"learning_rate": 4.276606179499755e-06, |
|
"loss": 0.0002, |
|
"step": 17775 |
|
}, |
|
{ |
|
"epoch": 2.6187060943764022, |
|
"grad_norm": 0.0018617259338498116, |
|
"learning_rate": 4.23573647212686e-06, |
|
"loss": 0.044, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.6223840523741218, |
|
"grad_norm": 0.004173843190073967, |
|
"learning_rate": 4.194866764753964e-06, |
|
"loss": 0.0001, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 2.6260620103718413, |
|
"grad_norm": 0.005529914982616901, |
|
"learning_rate": 4.153997057381069e-06, |
|
"loss": 0.0001, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 2.6297399683695613, |
|
"grad_norm": 0.003100366098806262, |
|
"learning_rate": 4.113127350008174e-06, |
|
"loss": 0.0001, |
|
"step": 17875 |
|
}, |
|
{ |
|
"epoch": 2.633417926367281, |
|
"grad_norm": 0.0017961232224479318, |
|
"learning_rate": 4.072257642635279e-06, |
|
"loss": 0.012, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.6370958843650003, |
|
"grad_norm": 0.0022237550001591444, |
|
"learning_rate": 4.031387935262384e-06, |
|
"loss": 0.0001, |
|
"step": 17925 |
|
}, |
|
{ |
|
"epoch": 2.6407738423627203, |
|
"grad_norm": 0.002973005408421159, |
|
"learning_rate": 3.990518227889488e-06, |
|
"loss": 0.0438, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 2.64445180036044, |
|
"grad_norm": 0.003434759797528386, |
|
"learning_rate": 3.949648520516594e-06, |
|
"loss": 0.0003, |
|
"step": 17975 |
|
}, |
|
{ |
|
"epoch": 2.6481297583581593, |
|
"grad_norm": 0.003463399363681674, |
|
"learning_rate": 3.908778813143697e-06, |
|
"loss": 0.0001, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.6518077163558793, |
|
"grad_norm": 0.003393635619431734, |
|
"learning_rate": 3.867909105770803e-06, |
|
"loss": 0.0002, |
|
"step": 18025 |
|
}, |
|
{ |
|
"epoch": 2.655485674353599, |
|
"grad_norm": 0.0027733049355447292, |
|
"learning_rate": 3.827039398397907e-06, |
|
"loss": 0.0001, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 2.6591636323513184, |
|
"grad_norm": 0.0038054571487009525, |
|
"learning_rate": 3.7861696910250126e-06, |
|
"loss": 0.0001, |
|
"step": 18075 |
|
}, |
|
{ |
|
"epoch": 2.6628415903490383, |
|
"grad_norm": 0.0029823731165379286, |
|
"learning_rate": 3.745299983652117e-06, |
|
"loss": 0.0001, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.666519548346758, |
|
"grad_norm": 0.0019862265326082706, |
|
"learning_rate": 3.704430276279222e-06, |
|
"loss": 0.0001, |
|
"step": 18125 |
|
}, |
|
{ |
|
"epoch": 2.6701975063444774, |
|
"grad_norm": 0.003500757971778512, |
|
"learning_rate": 3.6635605689063266e-06, |
|
"loss": 0.0001, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 2.6738754643421974, |
|
"grad_norm": 0.002085187705233693, |
|
"learning_rate": 3.6226908615334315e-06, |
|
"loss": 0.0001, |
|
"step": 18175 |
|
}, |
|
{ |
|
"epoch": 2.677553422339917, |
|
"grad_norm": 0.0023257972206920385, |
|
"learning_rate": 3.5818211541605365e-06, |
|
"loss": 0.0001, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.6812313803376364, |
|
"grad_norm": 0.0022203666158020496, |
|
"learning_rate": 3.5409514467876414e-06, |
|
"loss": 0.0001, |
|
"step": 18225 |
|
}, |
|
{ |
|
"epoch": 2.6849093383353564, |
|
"grad_norm": 0.0012388962786644697, |
|
"learning_rate": 3.500081739414746e-06, |
|
"loss": 0.0, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 2.688587296333076, |
|
"grad_norm": 0.0008910479955375195, |
|
"learning_rate": 3.4592120320418505e-06, |
|
"loss": 0.0003, |
|
"step": 18275 |
|
}, |
|
{ |
|
"epoch": 2.6922652543307954, |
|
"grad_norm": 0.0010503758676350117, |
|
"learning_rate": 3.4183423246689554e-06, |
|
"loss": 0.0, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.6959432123285154, |
|
"grad_norm": 0.000730241066776216, |
|
"learning_rate": 3.37747261729606e-06, |
|
"loss": 0.0001, |
|
"step": 18325 |
|
}, |
|
{ |
|
"epoch": 2.699621170326235, |
|
"grad_norm": 0.000822307774797082, |
|
"learning_rate": 3.336602909923165e-06, |
|
"loss": 0.0, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 2.7032991283239545, |
|
"grad_norm": 1.4722820520401, |
|
"learning_rate": 3.29573320255027e-06, |
|
"loss": 0.1083, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 2.7069770863216744, |
|
"grad_norm": 0.004885438829660416, |
|
"learning_rate": 3.254863495177375e-06, |
|
"loss": 0.0002, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.710655044319394, |
|
"grad_norm": 0.0033965399488806725, |
|
"learning_rate": 3.2139937878044794e-06, |
|
"loss": 0.0001, |
|
"step": 18425 |
|
}, |
|
{ |
|
"epoch": 2.7143330023171135, |
|
"grad_norm": 0.004250906407833099, |
|
"learning_rate": 3.1731240804315843e-06, |
|
"loss": 0.0231, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 2.7180109603148335, |
|
"grad_norm": 0.003409018972888589, |
|
"learning_rate": 3.1322543730586893e-06, |
|
"loss": 0.0002, |
|
"step": 18475 |
|
}, |
|
{ |
|
"epoch": 2.721688918312553, |
|
"grad_norm": 0.0036356241907924414, |
|
"learning_rate": 3.0913846656857938e-06, |
|
"loss": 0.0409, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.7253668763102725, |
|
"grad_norm": 0.006237304303795099, |
|
"learning_rate": 3.0505149583128983e-06, |
|
"loss": 0.0386, |
|
"step": 18525 |
|
}, |
|
{ |
|
"epoch": 2.729044834307992, |
|
"grad_norm": 0.006783687509596348, |
|
"learning_rate": 3.0096452509400033e-06, |
|
"loss": 0.0002, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 2.732722792305712, |
|
"grad_norm": 0.04287054389715195, |
|
"learning_rate": 2.9687755435671082e-06, |
|
"loss": 0.0321, |
|
"step": 18575 |
|
}, |
|
{ |
|
"epoch": 2.7364007503034316, |
|
"grad_norm": 0.0038001120556145906, |
|
"learning_rate": 2.9279058361942127e-06, |
|
"loss": 0.0003, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.740078708301151, |
|
"grad_norm": 0.003841620171442628, |
|
"learning_rate": 2.8870361288213177e-06, |
|
"loss": 0.0001, |
|
"step": 18625 |
|
}, |
|
{ |
|
"epoch": 2.7437566662988706, |
|
"grad_norm": 0.002676568925380707, |
|
"learning_rate": 2.8461664214484226e-06, |
|
"loss": 0.0001, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 2.7474346242965906, |
|
"grad_norm": 0.008307211101055145, |
|
"learning_rate": 2.8052967140755276e-06, |
|
"loss": 0.0001, |
|
"step": 18675 |
|
}, |
|
{ |
|
"epoch": 2.75111258229431, |
|
"grad_norm": 0.0034743708092719316, |
|
"learning_rate": 2.764427006702632e-06, |
|
"loss": 0.0001, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.7547905402920296, |
|
"grad_norm": 0.0020617684349417686, |
|
"learning_rate": 2.7235572993297367e-06, |
|
"loss": 0.0001, |
|
"step": 18725 |
|
}, |
|
{ |
|
"epoch": 2.7584684982897496, |
|
"grad_norm": 0.0017286173533648252, |
|
"learning_rate": 2.6826875919568416e-06, |
|
"loss": 0.0001, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 2.762146456287469, |
|
"grad_norm": 0.001774169155396521, |
|
"learning_rate": 2.6418178845839466e-06, |
|
"loss": 0.0001, |
|
"step": 18775 |
|
}, |
|
{ |
|
"epoch": 2.7658244142851887, |
|
"grad_norm": 0.003061393741518259, |
|
"learning_rate": 2.600948177211051e-06, |
|
"loss": 0.0298, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.7695023722829086, |
|
"grad_norm": 0.00195386353880167, |
|
"learning_rate": 2.560078469838156e-06, |
|
"loss": 0.0001, |
|
"step": 18825 |
|
}, |
|
{ |
|
"epoch": 2.773180330280628, |
|
"grad_norm": 0.0015053004026412964, |
|
"learning_rate": 2.519208762465261e-06, |
|
"loss": 0.0001, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 2.7768582882783477, |
|
"grad_norm": 0.002827111864462495, |
|
"learning_rate": 2.4783390550923655e-06, |
|
"loss": 0.0001, |
|
"step": 18875 |
|
}, |
|
{ |
|
"epoch": 2.7805362462760677, |
|
"grad_norm": 0.0010932940058410168, |
|
"learning_rate": 2.4374693477194705e-06, |
|
"loss": 0.0001, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.784214204273787, |
|
"grad_norm": 7.858973026275635, |
|
"learning_rate": 2.3965996403465754e-06, |
|
"loss": 0.0468, |
|
"step": 18925 |
|
}, |
|
{ |
|
"epoch": 2.7878921622715067, |
|
"grad_norm": 0.002107949461787939, |
|
"learning_rate": 2.35572993297368e-06, |
|
"loss": 0.0001, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 2.7915701202692267, |
|
"grad_norm": 0.001860212185420096, |
|
"learning_rate": 2.3148602256007845e-06, |
|
"loss": 0.0001, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 2.795248078266946, |
|
"grad_norm": 0.002180658746510744, |
|
"learning_rate": 2.2739905182278894e-06, |
|
"loss": 0.0001, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.7989260362646657, |
|
"grad_norm": 0.001684672199189663, |
|
"learning_rate": 2.2331208108549944e-06, |
|
"loss": 0.0001, |
|
"step": 19025 |
|
}, |
|
{ |
|
"epoch": 2.8026039942623857, |
|
"grad_norm": 0.0015821090200915933, |
|
"learning_rate": 2.1922511034820993e-06, |
|
"loss": 0.0001, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 2.8062819522601052, |
|
"grad_norm": 0.0031413165852427483, |
|
"learning_rate": 2.151381396109204e-06, |
|
"loss": 0.0222, |
|
"step": 19075 |
|
}, |
|
{ |
|
"epoch": 2.8099599102578248, |
|
"grad_norm": 0.001654456602409482, |
|
"learning_rate": 2.110511688736309e-06, |
|
"loss": 0.0001, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.8136378682555447, |
|
"grad_norm": 0.0025208396837115288, |
|
"learning_rate": 2.0696419813634138e-06, |
|
"loss": 0.0297, |
|
"step": 19125 |
|
}, |
|
{ |
|
"epoch": 2.8173158262532643, |
|
"grad_norm": 0.0016039038309827447, |
|
"learning_rate": 2.0287722739905183e-06, |
|
"loss": 0.0001, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 2.820993784250984, |
|
"grad_norm": 0.0015692878514528275, |
|
"learning_rate": 1.987902566617623e-06, |
|
"loss": 0.0001, |
|
"step": 19175 |
|
}, |
|
{ |
|
"epoch": 2.8246717422487038, |
|
"grad_norm": 0.0014573705848306417, |
|
"learning_rate": 1.9470328592447278e-06, |
|
"loss": 0.0001, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.8283497002464233, |
|
"grad_norm": 0.005317123141139746, |
|
"learning_rate": 1.9061631518718325e-06, |
|
"loss": 0.0001, |
|
"step": 19225 |
|
}, |
|
{ |
|
"epoch": 2.832027658244143, |
|
"grad_norm": 0.0014695243444293737, |
|
"learning_rate": 1.8652934444989374e-06, |
|
"loss": 0.0312, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 2.835705616241863, |
|
"grad_norm": 0.04826376587152481, |
|
"learning_rate": 1.8244237371260422e-06, |
|
"loss": 0.0263, |
|
"step": 19275 |
|
}, |
|
{ |
|
"epoch": 2.8393835742395823, |
|
"grad_norm": 0.0012747733853757381, |
|
"learning_rate": 1.7835540297531471e-06, |
|
"loss": 0.0001, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.843061532237302, |
|
"grad_norm": 0.0011536297388374805, |
|
"learning_rate": 1.7426843223802519e-06, |
|
"loss": 0.025, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 2.8467394902350214, |
|
"grad_norm": 0.00559173384681344, |
|
"learning_rate": 1.7018146150073564e-06, |
|
"loss": 0.0001, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 2.8504174482327413, |
|
"grad_norm": 0.0011801973450928926, |
|
"learning_rate": 1.6609449076344614e-06, |
|
"loss": 0.0001, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 2.854095406230461, |
|
"grad_norm": 0.020327366888523102, |
|
"learning_rate": 1.620075200261566e-06, |
|
"loss": 0.0001, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.8577733642281804, |
|
"grad_norm": 0.0012536696158349514, |
|
"learning_rate": 1.579205492888671e-06, |
|
"loss": 0.0001, |
|
"step": 19425 |
|
}, |
|
{ |
|
"epoch": 2.8614513222259, |
|
"grad_norm": 0.0010541353840380907, |
|
"learning_rate": 1.5383357855157758e-06, |
|
"loss": 0.0, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 2.86512928022362, |
|
"grad_norm": 0.0011492278426885605, |
|
"learning_rate": 1.4974660781428805e-06, |
|
"loss": 0.0001, |
|
"step": 19475 |
|
}, |
|
{ |
|
"epoch": 2.8688072382213394, |
|
"grad_norm": 0.002121875062584877, |
|
"learning_rate": 1.4565963707699853e-06, |
|
"loss": 0.0339, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.872485196219059, |
|
"grad_norm": 0.0013062648940831423, |
|
"learning_rate": 1.4157266633970902e-06, |
|
"loss": 0.0001, |
|
"step": 19525 |
|
}, |
|
{ |
|
"epoch": 2.876163154216779, |
|
"grad_norm": 0.0012365735601633787, |
|
"learning_rate": 1.374856956024195e-06, |
|
"loss": 0.0001, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 2.8798411122144985, |
|
"grad_norm": 0.001490547088906169, |
|
"learning_rate": 1.3339872486512997e-06, |
|
"loss": 0.0389, |
|
"step": 19575 |
|
}, |
|
{ |
|
"epoch": 2.883519070212218, |
|
"grad_norm": 0.0010857345769181848, |
|
"learning_rate": 1.2931175412784044e-06, |
|
"loss": 0.0002, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.887197028209938, |
|
"grad_norm": 0.0016767021734267473, |
|
"learning_rate": 1.2522478339055092e-06, |
|
"loss": 0.0001, |
|
"step": 19625 |
|
}, |
|
{ |
|
"epoch": 2.8908749862076575, |
|
"grad_norm": 0.004218839108943939, |
|
"learning_rate": 1.2113781265326141e-06, |
|
"loss": 0.0001, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 2.894552944205377, |
|
"grad_norm": 0.0010596220381557941, |
|
"learning_rate": 1.1705084191597189e-06, |
|
"loss": 0.0001, |
|
"step": 19675 |
|
}, |
|
{ |
|
"epoch": 2.898230902203097, |
|
"grad_norm": 0.005758639425039291, |
|
"learning_rate": 1.1296387117868236e-06, |
|
"loss": 0.0001, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.9019088602008165, |
|
"grad_norm": 0.004077006597071886, |
|
"learning_rate": 1.0887690044139283e-06, |
|
"loss": 0.0001, |
|
"step": 19725 |
|
}, |
|
{ |
|
"epoch": 2.905586818198536, |
|
"grad_norm": 0.023057300597429276, |
|
"learning_rate": 1.0478992970410333e-06, |
|
"loss": 0.0001, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 2.909264776196256, |
|
"grad_norm": 0.0010171595495194197, |
|
"learning_rate": 1.007029589668138e-06, |
|
"loss": 0.0002, |
|
"step": 19775 |
|
}, |
|
{ |
|
"epoch": 2.9129427341939755, |
|
"grad_norm": 0.0021811590995639563, |
|
"learning_rate": 9.661598822952428e-07, |
|
"loss": 0.0018, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.916620692191695, |
|
"grad_norm": 0.0007530258735641837, |
|
"learning_rate": 9.252901749223475e-07, |
|
"loss": 0.0, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 2.920298650189415, |
|
"grad_norm": 0.0008248965605162084, |
|
"learning_rate": 8.844204675494524e-07, |
|
"loss": 0.0, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 2.9239766081871346, |
|
"grad_norm": 0.0008437008364126086, |
|
"learning_rate": 8.435507601765572e-07, |
|
"loss": 0.0001, |
|
"step": 19875 |
|
}, |
|
{ |
|
"epoch": 2.927654566184854, |
|
"grad_norm": 0.0011598097626119852, |
|
"learning_rate": 8.026810528036619e-07, |
|
"loss": 0.029, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.931332524182574, |
|
"grad_norm": 0.000989201944321394, |
|
"learning_rate": 7.618113454307668e-07, |
|
"loss": 0.0001, |
|
"step": 19925 |
|
}, |
|
{ |
|
"epoch": 2.9350104821802936, |
|
"grad_norm": 0.0009332878980785608, |
|
"learning_rate": 7.209416380578715e-07, |
|
"loss": 0.0001, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 2.938688440178013, |
|
"grad_norm": 0.0010302929440513253, |
|
"learning_rate": 6.800719306849764e-07, |
|
"loss": 0.0316, |
|
"step": 19975 |
|
}, |
|
{ |
|
"epoch": 2.942366398175733, |
|
"grad_norm": 0.0011053696507588029, |
|
"learning_rate": 6.392022233120811e-07, |
|
"loss": 0.0001, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.9460443561734526, |
|
"grad_norm": 0.001087658922187984, |
|
"learning_rate": 5.983325159391858e-07, |
|
"loss": 0.0, |
|
"step": 20025 |
|
}, |
|
{ |
|
"epoch": 2.949722314171172, |
|
"grad_norm": 0.0008900929242372513, |
|
"learning_rate": 5.574628085662906e-07, |
|
"loss": 0.0001, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 2.9534002721688917, |
|
"grad_norm": 0.001053415471687913, |
|
"learning_rate": 5.165931011933954e-07, |
|
"loss": 0.0, |
|
"step": 20075 |
|
}, |
|
{ |
|
"epoch": 2.9570782301666116, |
|
"grad_norm": 0.0008429349982179701, |
|
"learning_rate": 4.757233938205003e-07, |
|
"loss": 0.0, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.960756188164331, |
|
"grad_norm": 0.0009649925632402301, |
|
"learning_rate": 4.34853686447605e-07, |
|
"loss": 0.0, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 2.9644341461620507, |
|
"grad_norm": 0.0009367198217660189, |
|
"learning_rate": 3.939839790747098e-07, |
|
"loss": 0.0002, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 2.9681121041597702, |
|
"grad_norm": 0.0008432368049398065, |
|
"learning_rate": 3.5311427170181465e-07, |
|
"loss": 0.0, |
|
"step": 20175 |
|
}, |
|
{ |
|
"epoch": 2.97179006215749, |
|
"grad_norm": 2.9367611408233643, |
|
"learning_rate": 3.1224456432891944e-07, |
|
"loss": 0.0002, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.9754680201552097, |
|
"grad_norm": 0.0008842748356983066, |
|
"learning_rate": 2.7137485695602424e-07, |
|
"loss": 0.0001, |
|
"step": 20225 |
|
}, |
|
{ |
|
"epoch": 2.9791459781529293, |
|
"grad_norm": 0.3803035616874695, |
|
"learning_rate": 2.30505149583129e-07, |
|
"loss": 0.0303, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 2.9828239361506492, |
|
"grad_norm": 0.001255788840353489, |
|
"learning_rate": 1.8963544221023377e-07, |
|
"loss": 0.0001, |
|
"step": 20275 |
|
}, |
|
{ |
|
"epoch": 2.9865018941483688, |
|
"grad_norm": 0.0012517735594883561, |
|
"learning_rate": 1.4876573483733856e-07, |
|
"loss": 0.0001, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.9901798521460883, |
|
"grad_norm": 0.0008377633057534695, |
|
"learning_rate": 1.0789602746444335e-07, |
|
"loss": 0.0001, |
|
"step": 20325 |
|
}, |
|
{ |
|
"epoch": 2.9938578101438083, |
|
"grad_norm": 0.0008699085447005928, |
|
"learning_rate": 6.702632009154815e-08, |
|
"loss": 0.0001, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 2.997535768141528, |
|
"grad_norm": 0.000927777262404561, |
|
"learning_rate": 2.6156612718652934e-08, |
|
"loss": 0.023, |
|
"step": 20375 |
|
}, |
|
{ |
|
"epoch": 2.9998896612600685, |
|
"eval_accuracy": 0.9969841853622655, |
|
"eval_auc": 0.9999289486306174, |
|
"eval_f1": 0.9969837416317222, |
|
"eval_loss": 0.01777876727283001, |
|
"eval_precision": 0.9972038263428992, |
|
"eval_recall": 0.9967637540453075, |
|
"eval_runtime": 2385.463, |
|
"eval_samples_per_second": 5.699, |
|
"eval_steps_per_second": 1.425, |
|
"step": 20391 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 20391, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.073019505969152e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|