|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9977761304670127, |
|
"eval_steps": 500, |
|
"global_step": 1011, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0029651593773165306, |
|
"grad_norm": 11.262527465820312, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"loss": 0.9138, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005930318754633061, |
|
"grad_norm": 10.613697052001953, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 0.9147, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008895478131949592, |
|
"grad_norm": 11.271403312683105, |
|
"learning_rate": 5.882352941176471e-07, |
|
"loss": 0.9118, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011860637509266123, |
|
"grad_norm": 11.093155860900879, |
|
"learning_rate": 7.843137254901962e-07, |
|
"loss": 0.9137, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014825796886582653, |
|
"grad_norm": 11.175023078918457, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.8879, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017790956263899184, |
|
"grad_norm": 10.275071144104004, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 0.864, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.020756115641215715, |
|
"grad_norm": 8.285501480102539, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"loss": 0.8612, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.023721275018532245, |
|
"grad_norm": 6.519635200500488, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"loss": 0.8372, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.026686434395848776, |
|
"grad_norm": 6.018601894378662, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"loss": 0.8244, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.029651593773165306, |
|
"grad_norm": 5.061045169830322, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 0.8057, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03261675315048184, |
|
"grad_norm": 5.859638214111328, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"loss": 0.7734, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03558191252779837, |
|
"grad_norm": 5.410571098327637, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 0.7635, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0385470719051149, |
|
"grad_norm": 3.8421123027801514, |
|
"learning_rate": 2.549019607843137e-06, |
|
"loss": 0.7373, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04151223128243143, |
|
"grad_norm": 2.3517632484436035, |
|
"learning_rate": 2.7450980392156867e-06, |
|
"loss": 0.7035, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04447739065974796, |
|
"grad_norm": 2.1120362281799316, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.6795, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04744255003706449, |
|
"grad_norm": 2.042616605758667, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"loss": 0.6596, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.050407709414381024, |
|
"grad_norm": 1.781117558479309, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.6325, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05337286879169755, |
|
"grad_norm": 1.464235782623291, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 0.6265, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.056338028169014086, |
|
"grad_norm": 1.1197887659072876, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"loss": 0.6251, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05930318754633061, |
|
"grad_norm": 1.1305307149887085, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.6271, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06226834692364715, |
|
"grad_norm": 1.1442177295684814, |
|
"learning_rate": 4.11764705882353e-06, |
|
"loss": 0.605, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06523350630096368, |
|
"grad_norm": 0.8627598881721497, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 0.5979, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0681986656782802, |
|
"grad_norm": 0.9222763776779175, |
|
"learning_rate": 4.509803921568628e-06, |
|
"loss": 0.6027, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07116382505559674, |
|
"grad_norm": 0.787282407283783, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 0.5928, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07412898443291327, |
|
"grad_norm": 0.8055775165557861, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 0.5842, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0770941438102298, |
|
"grad_norm": 0.713017463684082, |
|
"learning_rate": 5.098039215686274e-06, |
|
"loss": 0.5694, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08005930318754632, |
|
"grad_norm": 0.7474880814552307, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 0.5558, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08302446256486286, |
|
"grad_norm": 0.7316311001777649, |
|
"learning_rate": 5.4901960784313735e-06, |
|
"loss": 0.5629, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08598962194217939, |
|
"grad_norm": 0.760550856590271, |
|
"learning_rate": 5.686274509803922e-06, |
|
"loss": 0.5574, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08895478131949593, |
|
"grad_norm": 0.7376196384429932, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.5562, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09191994069681246, |
|
"grad_norm": 0.7215123176574707, |
|
"learning_rate": 6.07843137254902e-06, |
|
"loss": 0.5438, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09488510007412898, |
|
"grad_norm": 0.7079214453697205, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 0.5418, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09785025945144551, |
|
"grad_norm": 0.6675574779510498, |
|
"learning_rate": 6.470588235294119e-06, |
|
"loss": 0.5402, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10081541882876205, |
|
"grad_norm": 0.6604759693145752, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.5344, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10378057820607858, |
|
"grad_norm": 0.7341721057891846, |
|
"learning_rate": 6.862745098039216e-06, |
|
"loss": 0.5336, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1067457375833951, |
|
"grad_norm": 0.6764810681343079, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 0.5327, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10971089696071164, |
|
"grad_norm": 0.6292859315872192, |
|
"learning_rate": 7.2549019607843145e-06, |
|
"loss": 0.5275, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11267605633802817, |
|
"grad_norm": 0.7222408652305603, |
|
"learning_rate": 7.450980392156863e-06, |
|
"loss": 0.5207, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1156412157153447, |
|
"grad_norm": 0.592737078666687, |
|
"learning_rate": 7.647058823529411e-06, |
|
"loss": 0.5202, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11860637509266123, |
|
"grad_norm": 0.7391071915626526, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.5088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12157153446997776, |
|
"grad_norm": 0.5978769659996033, |
|
"learning_rate": 8.03921568627451e-06, |
|
"loss": 0.5059, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1245366938472943, |
|
"grad_norm": 0.7067713737487793, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 0.5079, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12750185322461083, |
|
"grad_norm": 0.6121165752410889, |
|
"learning_rate": 8.43137254901961e-06, |
|
"loss": 0.4998, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13046701260192736, |
|
"grad_norm": 0.7495785355567932, |
|
"learning_rate": 8.627450980392157e-06, |
|
"loss": 0.4877, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1334321719792439, |
|
"grad_norm": 0.6476943492889404, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.4971, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1363973313565604, |
|
"grad_norm": 0.7655041813850403, |
|
"learning_rate": 9.019607843137256e-06, |
|
"loss": 0.5002, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13936249073387694, |
|
"grad_norm": 0.6622442007064819, |
|
"learning_rate": 9.215686274509804e-06, |
|
"loss": 0.484, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14232765011119347, |
|
"grad_norm": 0.7732651233673096, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 0.4922, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14529280948851, |
|
"grad_norm": 0.6692637205123901, |
|
"learning_rate": 9.607843137254903e-06, |
|
"loss": 0.4733, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14825796886582654, |
|
"grad_norm": 0.705590546131134, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.4734, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15122312824314307, |
|
"grad_norm": 0.6731917858123779, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4651, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1541882876204596, |
|
"grad_norm": 0.6704531908035278, |
|
"learning_rate": 1.0196078431372549e-05, |
|
"loss": 0.4689, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15715344699777614, |
|
"grad_norm": 0.6448220610618591, |
|
"learning_rate": 1.03921568627451e-05, |
|
"loss": 0.4675, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16011860637509265, |
|
"grad_norm": 0.6441836953163147, |
|
"learning_rate": 1.0588235294117648e-05, |
|
"loss": 0.4557, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16308376575240918, |
|
"grad_norm": 0.7347533106803894, |
|
"learning_rate": 1.0784313725490196e-05, |
|
"loss": 0.4622, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16604892512972572, |
|
"grad_norm": 0.6999682784080505, |
|
"learning_rate": 1.0980392156862747e-05, |
|
"loss": 0.4446, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16901408450704225, |
|
"grad_norm": 0.6985459327697754, |
|
"learning_rate": 1.1176470588235295e-05, |
|
"loss": 0.4471, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17197924388435878, |
|
"grad_norm": 0.7167170643806458, |
|
"learning_rate": 1.1372549019607844e-05, |
|
"loss": 0.4465, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17494440326167532, |
|
"grad_norm": 0.6770612001419067, |
|
"learning_rate": 1.1568627450980394e-05, |
|
"loss": 0.4374, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17790956263899185, |
|
"grad_norm": 0.7454700469970703, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.4346, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1808747220163084, |
|
"grad_norm": 0.726898193359375, |
|
"learning_rate": 1.1960784313725491e-05, |
|
"loss": 0.4287, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18383988139362492, |
|
"grad_norm": 0.7026724219322205, |
|
"learning_rate": 1.215686274509804e-05, |
|
"loss": 0.4242, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.18680504077094143, |
|
"grad_norm": 1.0427573919296265, |
|
"learning_rate": 1.235294117647059e-05, |
|
"loss": 0.4301, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18977020014825796, |
|
"grad_norm": 0.9116256833076477, |
|
"learning_rate": 1.2549019607843138e-05, |
|
"loss": 0.4131, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1927353595255745, |
|
"grad_norm": 0.7025630474090576, |
|
"learning_rate": 1.2745098039215686e-05, |
|
"loss": 0.4175, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19570051890289103, |
|
"grad_norm": 1.24030339717865, |
|
"learning_rate": 1.2941176470588238e-05, |
|
"loss": 0.4166, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19866567828020756, |
|
"grad_norm": 0.7674146294593811, |
|
"learning_rate": 1.3137254901960785e-05, |
|
"loss": 0.4042, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2016308376575241, |
|
"grad_norm": 0.7968058586120605, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.4015, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.20459599703484063, |
|
"grad_norm": 0.9057684540748596, |
|
"learning_rate": 1.3529411764705885e-05, |
|
"loss": 0.3992, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.20756115641215717, |
|
"grad_norm": 0.8404118418693542, |
|
"learning_rate": 1.3725490196078432e-05, |
|
"loss": 0.3974, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 0.8619468212127686, |
|
"learning_rate": 1.392156862745098e-05, |
|
"loss": 0.4023, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2134914751667902, |
|
"grad_norm": 0.745784342288971, |
|
"learning_rate": 1.4117647058823532e-05, |
|
"loss": 0.3929, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21645663454410674, |
|
"grad_norm": 0.8499307632446289, |
|
"learning_rate": 1.431372549019608e-05, |
|
"loss": 0.3827, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.21942179392142327, |
|
"grad_norm": 0.8255784511566162, |
|
"learning_rate": 1.4509803921568629e-05, |
|
"loss": 0.3831, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2223869532987398, |
|
"grad_norm": 0.8738009333610535, |
|
"learning_rate": 1.4705882352941179e-05, |
|
"loss": 0.377, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22535211267605634, |
|
"grad_norm": 0.8723142147064209, |
|
"learning_rate": 1.4901960784313726e-05, |
|
"loss": 0.3685, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.22831727205337288, |
|
"grad_norm": 0.8929502964019775, |
|
"learning_rate": 1.5098039215686276e-05, |
|
"loss": 0.3787, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2312824314306894, |
|
"grad_norm": 1.0882786512374878, |
|
"learning_rate": 1.5294117647058822e-05, |
|
"loss": 0.3652, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23424759080800592, |
|
"grad_norm": 0.9075109362602234, |
|
"learning_rate": 1.5490196078431373e-05, |
|
"loss": 0.3674, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.23721275018532245, |
|
"grad_norm": 1.1592175960540771, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 0.3644, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24017790956263899, |
|
"grad_norm": 0.8505756258964539, |
|
"learning_rate": 1.5882352941176473e-05, |
|
"loss": 0.3642, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24314306893995552, |
|
"grad_norm": 0.9724293947219849, |
|
"learning_rate": 1.607843137254902e-05, |
|
"loss": 0.3467, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.24610822831727205, |
|
"grad_norm": 1.0010569095611572, |
|
"learning_rate": 1.627450980392157e-05, |
|
"loss": 0.3582, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2490733876945886, |
|
"grad_norm": 0.9776509404182434, |
|
"learning_rate": 1.647058823529412e-05, |
|
"loss": 0.3494, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2520385470719051, |
|
"grad_norm": 0.9763832688331604, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.3487, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.25500370644922166, |
|
"grad_norm": 0.8749181628227234, |
|
"learning_rate": 1.686274509803922e-05, |
|
"loss": 0.3425, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.25796886582653816, |
|
"grad_norm": 0.922757089138031, |
|
"learning_rate": 1.7058823529411767e-05, |
|
"loss": 0.3431, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2609340252038547, |
|
"grad_norm": 0.8772656321525574, |
|
"learning_rate": 1.7254901960784314e-05, |
|
"loss": 0.3424, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.26389918458117123, |
|
"grad_norm": 0.8626474738121033, |
|
"learning_rate": 1.7450980392156866e-05, |
|
"loss": 0.3351, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2668643439584878, |
|
"grad_norm": 0.8123406767845154, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.3274, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2698295033358043, |
|
"grad_norm": 0.8629675507545471, |
|
"learning_rate": 1.7843137254901965e-05, |
|
"loss": 0.3332, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2727946627131208, |
|
"grad_norm": 0.7453241944313049, |
|
"learning_rate": 1.8039215686274513e-05, |
|
"loss": 0.3264, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.27575982209043737, |
|
"grad_norm": 0.8055425882339478, |
|
"learning_rate": 1.823529411764706e-05, |
|
"loss": 0.3196, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2787249814677539, |
|
"grad_norm": 0.8176495432853699, |
|
"learning_rate": 1.843137254901961e-05, |
|
"loss": 0.3167, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"grad_norm": 0.7777736186981201, |
|
"learning_rate": 1.862745098039216e-05, |
|
"loss": 0.318, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.28465530022238694, |
|
"grad_norm": 0.8604575395584106, |
|
"learning_rate": 1.8823529411764708e-05, |
|
"loss": 0.3231, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2876204595997035, |
|
"grad_norm": 0.821183979511261, |
|
"learning_rate": 1.9019607843137255e-05, |
|
"loss": 0.3176, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.29058561897702, |
|
"grad_norm": 0.8958712816238403, |
|
"learning_rate": 1.9215686274509807e-05, |
|
"loss": 0.3155, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2935507783543366, |
|
"grad_norm": 0.9813326001167297, |
|
"learning_rate": 1.9411764705882355e-05, |
|
"loss": 0.3182, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2965159377316531, |
|
"grad_norm": 0.9215829968452454, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 0.3084, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2994810971089696, |
|
"grad_norm": 0.8247601389884949, |
|
"learning_rate": 1.9803921568627454e-05, |
|
"loss": 0.3032, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.30244625648628615, |
|
"grad_norm": 0.8188148736953735, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3059, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.30541141586360265, |
|
"grad_norm": 0.8999500870704651, |
|
"learning_rate": 1.9999940277008807e-05, |
|
"loss": 0.3086, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3083765752409192, |
|
"grad_norm": 0.8770850300788879, |
|
"learning_rate": 1.99997611087486e-05, |
|
"loss": 0.299, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.3113417346182357, |
|
"grad_norm": 0.8018732070922852, |
|
"learning_rate": 1.9999462497359468e-05, |
|
"loss": 0.3034, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3143068939955523, |
|
"grad_norm": 0.8308204412460327, |
|
"learning_rate": 1.9999044446408203e-05, |
|
"loss": 0.3009, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3172720533728688, |
|
"grad_norm": 1.1407383680343628, |
|
"learning_rate": 1.9998506960888258e-05, |
|
"loss": 0.2982, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3202372127501853, |
|
"grad_norm": 0.8638990521430969, |
|
"learning_rate": 1.999785004721968e-05, |
|
"loss": 0.2957, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.32320237212750186, |
|
"grad_norm": 0.8289093971252441, |
|
"learning_rate": 1.999707371324904e-05, |
|
"loss": 0.3004, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.32616753150481836, |
|
"grad_norm": 1.0886658430099487, |
|
"learning_rate": 1.9996177968249336e-05, |
|
"loss": 0.2953, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3291326908821349, |
|
"grad_norm": 0.7261621356010437, |
|
"learning_rate": 1.999516282291988e-05, |
|
"loss": 0.2945, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.33209785025945143, |
|
"grad_norm": 0.9758589267730713, |
|
"learning_rate": 1.999402828938618e-05, |
|
"loss": 0.2946, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.335063009636768, |
|
"grad_norm": 0.7505866885185242, |
|
"learning_rate": 1.999277438119978e-05, |
|
"loss": 0.2997, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3380281690140845, |
|
"grad_norm": 0.801395833492279, |
|
"learning_rate": 1.9991401113338103e-05, |
|
"loss": 0.2885, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.34099332839140106, |
|
"grad_norm": 0.7377513647079468, |
|
"learning_rate": 1.9989908502204295e-05, |
|
"loss": 0.2863, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.34395848776871757, |
|
"grad_norm": 0.728659987449646, |
|
"learning_rate": 1.9988296565626988e-05, |
|
"loss": 0.2863, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3469236471460341, |
|
"grad_norm": 0.7147101759910583, |
|
"learning_rate": 1.9986565322860117e-05, |
|
"loss": 0.2813, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.34988880652335064, |
|
"grad_norm": 0.7080392837524414, |
|
"learning_rate": 1.9984714794582682e-05, |
|
"loss": 0.281, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35285396590066714, |
|
"grad_norm": 0.7131238579750061, |
|
"learning_rate": 1.99827450028985e-05, |
|
"loss": 0.2776, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3558191252779837, |
|
"grad_norm": 0.6940627694129944, |
|
"learning_rate": 1.9980655971335944e-05, |
|
"loss": 0.2814, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3587842846553002, |
|
"grad_norm": 0.655299186706543, |
|
"learning_rate": 1.9978447724847655e-05, |
|
"loss": 0.2752, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3617494440326168, |
|
"grad_norm": 0.676629900932312, |
|
"learning_rate": 1.9976120289810247e-05, |
|
"loss": 0.2818, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3647146034099333, |
|
"grad_norm": 0.6595851182937622, |
|
"learning_rate": 1.9973673694024002e-05, |
|
"loss": 0.2801, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.36767976278724984, |
|
"grad_norm": 0.6573948860168457, |
|
"learning_rate": 1.9971107966712518e-05, |
|
"loss": 0.2829, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.37064492216456635, |
|
"grad_norm": 0.6650752425193787, |
|
"learning_rate": 1.9968423138522382e-05, |
|
"loss": 0.2774, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.37361008154188285, |
|
"grad_norm": 0.6870672106742859, |
|
"learning_rate": 1.996561924152278e-05, |
|
"loss": 0.279, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3765752409191994, |
|
"grad_norm": 0.6355287432670593, |
|
"learning_rate": 1.9962696309205146e-05, |
|
"loss": 0.2745, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3795404002965159, |
|
"grad_norm": 0.6908348798751831, |
|
"learning_rate": 1.995965437648273e-05, |
|
"loss": 0.2735, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3825055596738325, |
|
"grad_norm": 0.6098238229751587, |
|
"learning_rate": 1.995649347969019e-05, |
|
"loss": 0.2658, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.385470719051149, |
|
"grad_norm": 0.6651309728622437, |
|
"learning_rate": 1.995321365658317e-05, |
|
"loss": 0.2696, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.38843587842846555, |
|
"grad_norm": 0.6579580903053284, |
|
"learning_rate": 1.994981494633784e-05, |
|
"loss": 0.2639, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.39140103780578206, |
|
"grad_norm": 0.650133490562439, |
|
"learning_rate": 1.9946297389550433e-05, |
|
"loss": 0.2664, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.39436619718309857, |
|
"grad_norm": 0.6148021221160889, |
|
"learning_rate": 1.9942661028236746e-05, |
|
"loss": 0.2691, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3973313565604151, |
|
"grad_norm": 0.6839851140975952, |
|
"learning_rate": 1.9938905905831657e-05, |
|
"loss": 0.2619, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.40029651593773163, |
|
"grad_norm": 0.6269260048866272, |
|
"learning_rate": 1.993503206718859e-05, |
|
"loss": 0.2679, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.4032616753150482, |
|
"grad_norm": 0.6698904633522034, |
|
"learning_rate": 1.9931039558578997e-05, |
|
"loss": 0.2737, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4062268346923647, |
|
"grad_norm": 0.6404738426208496, |
|
"learning_rate": 1.9926928427691788e-05, |
|
"loss": 0.2702, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.40919199406968126, |
|
"grad_norm": 0.6118369698524475, |
|
"learning_rate": 1.992269872363277e-05, |
|
"loss": 0.2644, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.41215715344699777, |
|
"grad_norm": 0.6277154684066772, |
|
"learning_rate": 1.991835049692405e-05, |
|
"loss": 0.2657, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.41512231282431433, |
|
"grad_norm": 0.5819361805915833, |
|
"learning_rate": 1.991388379950346e-05, |
|
"loss": 0.252, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.41808747220163084, |
|
"grad_norm": 0.672166109085083, |
|
"learning_rate": 1.9909298684723905e-05, |
|
"loss": 0.2606, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 0.5884442925453186, |
|
"learning_rate": 1.9904595207352736e-05, |
|
"loss": 0.2557, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4240177909562639, |
|
"grad_norm": 0.5893815755844116, |
|
"learning_rate": 1.9899773423571102e-05, |
|
"loss": 0.2595, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4269829503335804, |
|
"grad_norm": 0.6969826221466064, |
|
"learning_rate": 1.9894833390973266e-05, |
|
"loss": 0.2595, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.429948109710897, |
|
"grad_norm": 0.5909337997436523, |
|
"learning_rate": 1.9889775168565942e-05, |
|
"loss": 0.2522, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4329132690882135, |
|
"grad_norm": 0.5902915000915527, |
|
"learning_rate": 1.9884598816767563e-05, |
|
"loss": 0.2532, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.43587842846553004, |
|
"grad_norm": 0.6261239647865295, |
|
"learning_rate": 1.987930439740757e-05, |
|
"loss": 0.2566, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.43884358784284655, |
|
"grad_norm": 0.579250156879425, |
|
"learning_rate": 1.9873891973725673e-05, |
|
"loss": 0.2587, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.44180874722016306, |
|
"grad_norm": 0.5678402185440063, |
|
"learning_rate": 1.98683616103711e-05, |
|
"loss": 0.2494, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4447739065974796, |
|
"grad_norm": 0.6142150163650513, |
|
"learning_rate": 1.986271337340182e-05, |
|
"loss": 0.2507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4477390659747961, |
|
"grad_norm": 0.6132687926292419, |
|
"learning_rate": 1.9856947330283752e-05, |
|
"loss": 0.2547, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4507042253521127, |
|
"grad_norm": 0.5993427038192749, |
|
"learning_rate": 1.985106354988997e-05, |
|
"loss": 0.2478, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4536693847294292, |
|
"grad_norm": 0.6638728380203247, |
|
"learning_rate": 1.984506210249986e-05, |
|
"loss": 0.2547, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.45663454410674575, |
|
"grad_norm": 0.6074317097663879, |
|
"learning_rate": 1.9838943059798305e-05, |
|
"loss": 0.2521, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.45959970348406226, |
|
"grad_norm": 0.6486067175865173, |
|
"learning_rate": 1.9832706494874812e-05, |
|
"loss": 0.2562, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4625648628613788, |
|
"grad_norm": 0.6348186135292053, |
|
"learning_rate": 1.982635248222264e-05, |
|
"loss": 0.2528, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.46553002223869533, |
|
"grad_norm": 0.5568612217903137, |
|
"learning_rate": 1.9819881097737917e-05, |
|
"loss": 0.2471, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.46849518161601184, |
|
"grad_norm": 0.5930222272872925, |
|
"learning_rate": 1.9813292418718734e-05, |
|
"loss": 0.2434, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4714603409933284, |
|
"grad_norm": 0.6412246823310852, |
|
"learning_rate": 1.9806586523864212e-05, |
|
"loss": 0.2482, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4744255003706449, |
|
"grad_norm": 0.5488153696060181, |
|
"learning_rate": 1.9799763493273572e-05, |
|
"loss": 0.2416, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.47739065974796147, |
|
"grad_norm": 0.6217798590660095, |
|
"learning_rate": 1.9792823408445173e-05, |
|
"loss": 0.2508, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.48035581912527797, |
|
"grad_norm": 0.5728364586830139, |
|
"learning_rate": 1.978576635227554e-05, |
|
"loss": 0.2488, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.48332097850259453, |
|
"grad_norm": 0.6427583694458008, |
|
"learning_rate": 1.9778592409058376e-05, |
|
"loss": 0.2483, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.48628613787991104, |
|
"grad_norm": 0.6554081439971924, |
|
"learning_rate": 1.9771301664483548e-05, |
|
"loss": 0.2426, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4892512972572276, |
|
"grad_norm": 0.5885781049728394, |
|
"learning_rate": 1.976389420563607e-05, |
|
"loss": 0.2551, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4922164566345441, |
|
"grad_norm": 0.5944969058036804, |
|
"learning_rate": 1.975637012099507e-05, |
|
"loss": 0.2466, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4951816160118606, |
|
"grad_norm": 0.6138084530830383, |
|
"learning_rate": 1.97487295004327e-05, |
|
"loss": 0.2414, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4981467753891772, |
|
"grad_norm": 0.5585880279541016, |
|
"learning_rate": 1.9740972435213114e-05, |
|
"loss": 0.2352, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5011119347664937, |
|
"grad_norm": 0.6357447504997253, |
|
"learning_rate": 1.9733099017991342e-05, |
|
"loss": 0.2454, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5040770941438102, |
|
"grad_norm": 0.5774321556091309, |
|
"learning_rate": 1.972510934281218e-05, |
|
"loss": 0.2424, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5070422535211268, |
|
"grad_norm": 0.6422623991966248, |
|
"learning_rate": 1.9717003505109097e-05, |
|
"loss": 0.2361, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5100074128984433, |
|
"grad_norm": 0.5912754535675049, |
|
"learning_rate": 1.9708781601703066e-05, |
|
"loss": 0.243, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5129725722757599, |
|
"grad_norm": 0.5881178379058838, |
|
"learning_rate": 1.9700443730801412e-05, |
|
"loss": 0.2394, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5159377316530763, |
|
"grad_norm": 0.6363380551338196, |
|
"learning_rate": 1.9691989991996663e-05, |
|
"loss": 0.2407, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5189028910303929, |
|
"grad_norm": 0.55989670753479, |
|
"learning_rate": 1.9683420486265328e-05, |
|
"loss": 0.2438, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5218680504077094, |
|
"grad_norm": 0.6781154274940491, |
|
"learning_rate": 1.967473531596671e-05, |
|
"loss": 0.2424, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5248332097850259, |
|
"grad_norm": 0.5050660967826843, |
|
"learning_rate": 1.966593458484168e-05, |
|
"loss": 0.2341, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5277983691623425, |
|
"grad_norm": 0.6881943345069885, |
|
"learning_rate": 1.9657018398011435e-05, |
|
"loss": 0.2433, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.530763528539659, |
|
"grad_norm": 0.553970992565155, |
|
"learning_rate": 1.9647986861976246e-05, |
|
"loss": 0.237, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5337286879169756, |
|
"grad_norm": 0.6539415121078491, |
|
"learning_rate": 1.9638840084614182e-05, |
|
"loss": 0.238, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.536693847294292, |
|
"grad_norm": 0.5665425658226013, |
|
"learning_rate": 1.9629578175179823e-05, |
|
"loss": 0.2399, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5396590066716086, |
|
"grad_norm": 0.6046749949455261, |
|
"learning_rate": 1.9620201244302952e-05, |
|
"loss": 0.2359, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5426241660489252, |
|
"grad_norm": 0.6772344708442688, |
|
"learning_rate": 1.9610709403987248e-05, |
|
"loss": 0.2382, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5455893254262416, |
|
"grad_norm": 0.473206490278244, |
|
"learning_rate": 1.9601102767608924e-05, |
|
"loss": 0.2321, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5485544848035582, |
|
"grad_norm": 0.6189218163490295, |
|
"learning_rate": 1.95913814499154e-05, |
|
"loss": 0.2356, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5515196441808747, |
|
"grad_norm": 0.5345617532730103, |
|
"learning_rate": 1.95815455670239e-05, |
|
"loss": 0.2394, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5544848035581913, |
|
"grad_norm": 0.5871132016181946, |
|
"learning_rate": 1.9571595236420103e-05, |
|
"loss": 0.2359, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5574499629355077, |
|
"grad_norm": 0.5409566760063171, |
|
"learning_rate": 1.9561530576956703e-05, |
|
"loss": 0.2396, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5604151223128243, |
|
"grad_norm": 0.5904874205589294, |
|
"learning_rate": 1.955135170885202e-05, |
|
"loss": 0.2361, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 0.5407031178474426, |
|
"learning_rate": 1.9541058753688538e-05, |
|
"loss": 0.2368, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5663454410674573, |
|
"grad_norm": 0.5759615302085876, |
|
"learning_rate": 1.9530651834411477e-05, |
|
"loss": 0.2358, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5693106004447739, |
|
"grad_norm": 0.6436863541603088, |
|
"learning_rate": 1.95201310753273e-05, |
|
"loss": 0.2299, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5722757598220904, |
|
"grad_norm": 0.5067325830459595, |
|
"learning_rate": 1.9509496602102253e-05, |
|
"loss": 0.2275, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.575240919199407, |
|
"grad_norm": 0.5916472673416138, |
|
"learning_rate": 1.9498748541760845e-05, |
|
"loss": 0.229, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5782060785767235, |
|
"grad_norm": 0.49817144870758057, |
|
"learning_rate": 1.9487887022684336e-05, |
|
"loss": 0.2277, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.58117123795404, |
|
"grad_norm": 0.6111854910850525, |
|
"learning_rate": 1.947691217460921e-05, |
|
"loss": 0.2395, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5841363973313566, |
|
"grad_norm": 0.524508535861969, |
|
"learning_rate": 1.946582412862562e-05, |
|
"loss": 0.2372, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5871015567086731, |
|
"grad_norm": 0.5496771335601807, |
|
"learning_rate": 1.9454623017175814e-05, |
|
"loss": 0.2338, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5900667160859896, |
|
"grad_norm": 0.5417652726173401, |
|
"learning_rate": 1.9443308974052574e-05, |
|
"loss": 0.2328, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5930318754633062, |
|
"grad_norm": 0.49683743715286255, |
|
"learning_rate": 1.9431882134397596e-05, |
|
"loss": 0.2289, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5959970348406227, |
|
"grad_norm": 0.5067436099052429, |
|
"learning_rate": 1.9420342634699893e-05, |
|
"loss": 0.2303, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5989621942179392, |
|
"grad_norm": 0.532744288444519, |
|
"learning_rate": 1.9408690612794146e-05, |
|
"loss": 0.2219, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6019273535952557, |
|
"grad_norm": 0.5270218253135681, |
|
"learning_rate": 1.9396926207859085e-05, |
|
"loss": 0.2324, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6048925129725723, |
|
"grad_norm": 0.4947966933250427, |
|
"learning_rate": 1.9385049560415794e-05, |
|
"loss": 0.2282, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6078576723498889, |
|
"grad_norm": 0.5205817222595215, |
|
"learning_rate": 1.9373060812326053e-05, |
|
"loss": 0.2279, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6108228317272053, |
|
"grad_norm": 0.5304152369499207, |
|
"learning_rate": 1.9360960106790645e-05, |
|
"loss": 0.2288, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6137879911045219, |
|
"grad_norm": 0.49558138847351074, |
|
"learning_rate": 1.9348747588347637e-05, |
|
"loss": 0.2284, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6167531504818384, |
|
"grad_norm": 0.48547008633613586, |
|
"learning_rate": 1.9336423402870655e-05, |
|
"loss": 0.2297, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6197183098591549, |
|
"grad_norm": 0.5189692974090576, |
|
"learning_rate": 1.932398769756714e-05, |
|
"loss": 0.2293, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6226834692364714, |
|
"grad_norm": 0.5088484287261963, |
|
"learning_rate": 1.9311440620976597e-05, |
|
"loss": 0.2311, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.625648628613788, |
|
"grad_norm": 0.5324704051017761, |
|
"learning_rate": 1.9298782322968817e-05, |
|
"loss": 0.2377, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6286137879911046, |
|
"grad_norm": 0.5019773840904236, |
|
"learning_rate": 1.9286012954742078e-05, |
|
"loss": 0.2256, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.631578947368421, |
|
"grad_norm": 0.5624535083770752, |
|
"learning_rate": 1.9273132668821363e-05, |
|
"loss": 0.2291, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6345441067457376, |
|
"grad_norm": 0.5227831602096558, |
|
"learning_rate": 1.9260141619056507e-05, |
|
"loss": 0.2268, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6375092661230541, |
|
"grad_norm": 0.5904820561408997, |
|
"learning_rate": 1.924703996062038e-05, |
|
"loss": 0.227, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6404744255003706, |
|
"grad_norm": 0.561266303062439, |
|
"learning_rate": 1.9233827850007028e-05, |
|
"loss": 0.2294, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6434395848776872, |
|
"grad_norm": 0.5293812155723572, |
|
"learning_rate": 1.9220505445029803e-05, |
|
"loss": 0.2228, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6464047442550037, |
|
"grad_norm": 0.5227711200714111, |
|
"learning_rate": 1.9207072904819484e-05, |
|
"loss": 0.2261, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6493699036323203, |
|
"grad_norm": 0.5241237282752991, |
|
"learning_rate": 1.9193530389822364e-05, |
|
"loss": 0.2247, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6523350630096367, |
|
"grad_norm": 0.5190705060958862, |
|
"learning_rate": 1.9179878061798347e-05, |
|
"loss": 0.2266, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6553002223869533, |
|
"grad_norm": 0.4801787734031677, |
|
"learning_rate": 1.9166116083819002e-05, |
|
"loss": 0.2211, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6582653817642699, |
|
"grad_norm": 0.5298479795455933, |
|
"learning_rate": 1.915224462026563e-05, |
|
"loss": 0.2145, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6612305411415864, |
|
"grad_norm": 0.5878245830535889, |
|
"learning_rate": 1.913826383682729e-05, |
|
"loss": 0.2249, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6641957005189029, |
|
"grad_norm": 0.4641963839530945, |
|
"learning_rate": 1.912417390049882e-05, |
|
"loss": 0.2195, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6671608598962194, |
|
"grad_norm": 0.4989553391933441, |
|
"learning_rate": 1.9109974979578852e-05, |
|
"loss": 0.2306, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.670126019273536, |
|
"grad_norm": 0.5732155442237854, |
|
"learning_rate": 1.909566724366779e-05, |
|
"loss": 0.2246, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6730911786508524, |
|
"grad_norm": 0.5080471038818359, |
|
"learning_rate": 1.9081250863665794e-05, |
|
"loss": 0.2253, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.676056338028169, |
|
"grad_norm": 0.5161991119384766, |
|
"learning_rate": 1.9066726011770725e-05, |
|
"loss": 0.2248, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6790214974054856, |
|
"grad_norm": 0.5189105868339539, |
|
"learning_rate": 1.905209286147611e-05, |
|
"loss": 0.227, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6819866567828021, |
|
"grad_norm": 0.5306798219680786, |
|
"learning_rate": 1.903735158756905e-05, |
|
"loss": 0.2253, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6849518161601186, |
|
"grad_norm": 0.523923933506012, |
|
"learning_rate": 1.9022502366128136e-05, |
|
"loss": 0.2295, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6879169755374351, |
|
"grad_norm": 0.5236137509346008, |
|
"learning_rate": 1.9007545374521354e-05, |
|
"loss": 0.222, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6908821349147517, |
|
"grad_norm": 0.5138505697250366, |
|
"learning_rate": 1.8992480791403957e-05, |
|
"loss": 0.2143, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6938472942920682, |
|
"grad_norm": 0.5385280251502991, |
|
"learning_rate": 1.897730879671634e-05, |
|
"loss": 0.2227, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6968124536693847, |
|
"grad_norm": 0.5067414045333862, |
|
"learning_rate": 1.8962029571681887e-05, |
|
"loss": 0.2223, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6997776130467013, |
|
"grad_norm": 0.4815332591533661, |
|
"learning_rate": 1.8946643298804794e-05, |
|
"loss": 0.2188, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7027427724240178, |
|
"grad_norm": 0.4668591618537903, |
|
"learning_rate": 1.8931150161867917e-05, |
|
"loss": 0.2206, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7057079318013343, |
|
"grad_norm": 0.5026832222938538, |
|
"learning_rate": 1.891555034593055e-05, |
|
"loss": 0.2228, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7086730911786508, |
|
"grad_norm": 0.5014287233352661, |
|
"learning_rate": 1.8899844037326227e-05, |
|
"loss": 0.216, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7116382505559674, |
|
"grad_norm": 0.4586634933948517, |
|
"learning_rate": 1.8884031423660492e-05, |
|
"loss": 0.2206, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7146034099332839, |
|
"grad_norm": 0.500434398651123, |
|
"learning_rate": 1.8868112693808664e-05, |
|
"loss": 0.2163, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7175685693106004, |
|
"grad_norm": 0.46279287338256836, |
|
"learning_rate": 1.8852088037913577e-05, |
|
"loss": 0.2161, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.720533728687917, |
|
"grad_norm": 0.5185891389846802, |
|
"learning_rate": 1.8835957647383304e-05, |
|
"loss": 0.2221, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7234988880652335, |
|
"grad_norm": 0.48801976442337036, |
|
"learning_rate": 1.8819721714888878e-05, |
|
"loss": 0.225, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.72646404744255, |
|
"grad_norm": 0.4899084270000458, |
|
"learning_rate": 1.8803380434362e-05, |
|
"loss": 0.2169, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7294292068198666, |
|
"grad_norm": 0.5264920592308044, |
|
"learning_rate": 1.878693400099269e-05, |
|
"loss": 0.2207, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7323943661971831, |
|
"grad_norm": 0.48303139209747314, |
|
"learning_rate": 1.877038261122699e-05, |
|
"loss": 0.2244, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7353595255744997, |
|
"grad_norm": 0.46109214425086975, |
|
"learning_rate": 1.87537264627646e-05, |
|
"loss": 0.216, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7383246849518161, |
|
"grad_norm": 0.4971975088119507, |
|
"learning_rate": 1.8736965754556527e-05, |
|
"loss": 0.2235, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7412898443291327, |
|
"grad_norm": 0.4700891077518463, |
|
"learning_rate": 1.8720100686802693e-05, |
|
"loss": 0.2175, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7442550037064493, |
|
"grad_norm": 0.45833539962768555, |
|
"learning_rate": 1.8703131460949555e-05, |
|
"loss": 0.216, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7472201630837657, |
|
"grad_norm": 0.47551876306533813, |
|
"learning_rate": 1.86860582796877e-05, |
|
"loss": 0.2222, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7501853224610823, |
|
"grad_norm": 0.4569433629512787, |
|
"learning_rate": 1.866888134694942e-05, |
|
"loss": 0.2165, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7531504818383988, |
|
"grad_norm": 0.43670737743377686, |
|
"learning_rate": 1.865160086790627e-05, |
|
"loss": 0.2128, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7561156412157154, |
|
"grad_norm": 0.517746090888977, |
|
"learning_rate": 1.8634217048966638e-05, |
|
"loss": 0.2149, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7590808005930318, |
|
"grad_norm": 0.46699458360671997, |
|
"learning_rate": 1.861673009777325e-05, |
|
"loss": 0.2187, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7620459599703484, |
|
"grad_norm": 0.46238595247268677, |
|
"learning_rate": 1.8599140223200716e-05, |
|
"loss": 0.2137, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.765011119347665, |
|
"grad_norm": 0.47764065861701965, |
|
"learning_rate": 1.858144763535302e-05, |
|
"loss": 0.2221, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7679762787249814, |
|
"grad_norm": 0.4717821180820465, |
|
"learning_rate": 1.8563652545561014e-05, |
|
"loss": 0.2188, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.770941438102298, |
|
"grad_norm": 0.4471701383590698, |
|
"learning_rate": 1.8545755166379898e-05, |
|
"loss": 0.2171, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7739065974796145, |
|
"grad_norm": 0.49311378598213196, |
|
"learning_rate": 1.852775571158668e-05, |
|
"loss": 0.2157, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7768717568569311, |
|
"grad_norm": 0.4882054924964905, |
|
"learning_rate": 1.850965439617761e-05, |
|
"loss": 0.2167, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7798369162342476, |
|
"grad_norm": 0.45021718740463257, |
|
"learning_rate": 1.8491451436365628e-05, |
|
"loss": 0.2191, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7828020756115641, |
|
"grad_norm": 0.5516721606254578, |
|
"learning_rate": 1.8473147049577777e-05, |
|
"loss": 0.2152, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7857672349888807, |
|
"grad_norm": 0.4654419422149658, |
|
"learning_rate": 1.8454741454452604e-05, |
|
"loss": 0.2216, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7887323943661971, |
|
"grad_norm": 0.4703727066516876, |
|
"learning_rate": 1.843623487083755e-05, |
|
"loss": 0.2164, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7916975537435137, |
|
"grad_norm": 0.479714959859848, |
|
"learning_rate": 1.8417627519786317e-05, |
|
"loss": 0.2152, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7946627131208303, |
|
"grad_norm": 0.4948756992816925, |
|
"learning_rate": 1.839891962355624e-05, |
|
"loss": 0.2219, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7976278724981468, |
|
"grad_norm": 0.45587557554244995, |
|
"learning_rate": 1.838011140560562e-05, |
|
"loss": 0.2157, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8005930318754633, |
|
"grad_norm": 0.46080151200294495, |
|
"learning_rate": 1.836120309059107e-05, |
|
"loss": 0.2122, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8035581912527798, |
|
"grad_norm": 0.4493560492992401, |
|
"learning_rate": 1.8342194904364815e-05, |
|
"loss": 0.2163, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8065233506300964, |
|
"grad_norm": 0.4825652539730072, |
|
"learning_rate": 1.8323087073971996e-05, |
|
"loss": 0.2116, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.809488510007413, |
|
"grad_norm": 0.4308413863182068, |
|
"learning_rate": 1.8303879827647977e-05, |
|
"loss": 0.2172, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8124536693847294, |
|
"grad_norm": 0.508596658706665, |
|
"learning_rate": 1.8284573394815596e-05, |
|
"loss": 0.2186, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.815418828762046, |
|
"grad_norm": 0.4650067090988159, |
|
"learning_rate": 1.826516800608244e-05, |
|
"loss": 0.2069, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8183839881393625, |
|
"grad_norm": 0.42739060521125793, |
|
"learning_rate": 1.8245663893238075e-05, |
|
"loss": 0.2102, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.821349147516679, |
|
"grad_norm": 0.46640655398368835, |
|
"learning_rate": 1.8226061289251297e-05, |
|
"loss": 0.2145, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8243143068939955, |
|
"grad_norm": 0.4410681426525116, |
|
"learning_rate": 1.8206360428267332e-05, |
|
"loss": 0.2131, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8272794662713121, |
|
"grad_norm": 0.44091495871543884, |
|
"learning_rate": 1.8186561545605055e-05, |
|
"loss": 0.2122, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8302446256486287, |
|
"grad_norm": 0.4652099311351776, |
|
"learning_rate": 1.816666487775416e-05, |
|
"loss": 0.2179, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8332097850259451, |
|
"grad_norm": 0.4468926787376404, |
|
"learning_rate": 1.8146670662372353e-05, |
|
"loss": 0.219, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8361749444032617, |
|
"grad_norm": 0.4693123400211334, |
|
"learning_rate": 1.8126579138282502e-05, |
|
"loss": 0.2145, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8391401037805782, |
|
"grad_norm": 0.43998247385025024, |
|
"learning_rate": 1.8106390545469797e-05, |
|
"loss": 0.212, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 0.4576677978038788, |
|
"learning_rate": 1.8086105125078858e-05, |
|
"loss": 0.2141, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"grad_norm": 0.42104509472846985, |
|
"learning_rate": 1.8065723119410885e-05, |
|
"loss": 0.2126, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8480355819125278, |
|
"grad_norm": 0.4544185996055603, |
|
"learning_rate": 1.804524477192075e-05, |
|
"loss": 0.2122, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8510007412898444, |
|
"grad_norm": 0.4285774528980255, |
|
"learning_rate": 1.8024670327214084e-05, |
|
"loss": 0.211, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8539659006671608, |
|
"grad_norm": 0.43197640776634216, |
|
"learning_rate": 1.8004000031044363e-05, |
|
"loss": 0.2103, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8569310600444774, |
|
"grad_norm": 0.4368259906768799, |
|
"learning_rate": 1.798323413030997e-05, |
|
"loss": 0.2134, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.859896219421794, |
|
"grad_norm": 0.4898151159286499, |
|
"learning_rate": 1.796237287305125e-05, |
|
"loss": 0.2137, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8628613787991104, |
|
"grad_norm": 0.42249011993408203, |
|
"learning_rate": 1.7941416508447537e-05, |
|
"loss": 0.2052, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.865826538176427, |
|
"grad_norm": 0.45801860094070435, |
|
"learning_rate": 1.792036528681418e-05, |
|
"loss": 0.2146, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8687916975537435, |
|
"grad_norm": 0.44352859258651733, |
|
"learning_rate": 1.789921945959958e-05, |
|
"loss": 0.2053, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8717568569310601, |
|
"grad_norm": 0.4158633351325989, |
|
"learning_rate": 1.7877979279382135e-05, |
|
"loss": 0.2137, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8747220163083765, |
|
"grad_norm": 0.41102075576782227, |
|
"learning_rate": 1.7856644999867264e-05, |
|
"loss": 0.2109, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8776871756856931, |
|
"grad_norm": 0.41784408688545227, |
|
"learning_rate": 1.783521687588437e-05, |
|
"loss": 0.2128, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8806523350630097, |
|
"grad_norm": 0.4097442626953125, |
|
"learning_rate": 1.781369516338378e-05, |
|
"loss": 0.2116, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8836174944403261, |
|
"grad_norm": 0.4172267019748688, |
|
"learning_rate": 1.779208011943371e-05, |
|
"loss": 0.2096, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8865826538176427, |
|
"grad_norm": 0.4201764464378357, |
|
"learning_rate": 1.777037200221717e-05, |
|
"loss": 0.2144, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8895478131949592, |
|
"grad_norm": 0.4283645451068878, |
|
"learning_rate": 1.77485710710289e-05, |
|
"loss": 0.2159, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8925129725722758, |
|
"grad_norm": 0.4021233022212982, |
|
"learning_rate": 1.7726677586272263e-05, |
|
"loss": 0.2147, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8954781319495922, |
|
"grad_norm": 0.4146812856197357, |
|
"learning_rate": 1.7704691809456142e-05, |
|
"loss": 0.2136, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8984432913269088, |
|
"grad_norm": 0.41466352343559265, |
|
"learning_rate": 1.7682614003191807e-05, |
|
"loss": 0.2117, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.9014084507042254, |
|
"grad_norm": 0.45098355412483215, |
|
"learning_rate": 1.766044443118978e-05, |
|
"loss": 0.2141, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9043736100815419, |
|
"grad_norm": 0.39802679419517517, |
|
"learning_rate": 1.76381833582567e-05, |
|
"loss": 0.2119, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9073387694588584, |
|
"grad_norm": 0.4417196214199066, |
|
"learning_rate": 1.761583105029213e-05, |
|
"loss": 0.2148, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.910303928836175, |
|
"grad_norm": 0.4523768723011017, |
|
"learning_rate": 1.7593387774285412e-05, |
|
"loss": 0.2116, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9132690882134915, |
|
"grad_norm": 0.42361876368522644, |
|
"learning_rate": 1.7570853798312462e-05, |
|
"loss": 0.2091, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.916234247590808, |
|
"grad_norm": 0.44734466075897217, |
|
"learning_rate": 1.7548229391532572e-05, |
|
"loss": 0.2098, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.9191994069681245, |
|
"grad_norm": 0.4427475333213806, |
|
"learning_rate": 1.7525514824185187e-05, |
|
"loss": 0.2159, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9221645663454411, |
|
"grad_norm": 0.4229927659034729, |
|
"learning_rate": 1.750271036758669e-05, |
|
"loss": 0.2104, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9251297257227576, |
|
"grad_norm": 0.4121291935443878, |
|
"learning_rate": 1.747981629412715e-05, |
|
"loss": 0.2076, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9280948851000741, |
|
"grad_norm": 0.45084404945373535, |
|
"learning_rate": 1.7456832877267083e-05, |
|
"loss": 0.215, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9310600444773907, |
|
"grad_norm": 0.423123836517334, |
|
"learning_rate": 1.7433760391534166e-05, |
|
"loss": 0.2082, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9340252038547072, |
|
"grad_norm": 0.4547256827354431, |
|
"learning_rate": 1.741059911251997e-05, |
|
"loss": 0.2089, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9369903632320237, |
|
"grad_norm": 0.4248969852924347, |
|
"learning_rate": 1.7387349316876668e-05, |
|
"loss": 0.2039, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9399555226093402, |
|
"grad_norm": 0.46414193511009216, |
|
"learning_rate": 1.7364011282313732e-05, |
|
"loss": 0.2081, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9429206819866568, |
|
"grad_norm": 0.4844679534435272, |
|
"learning_rate": 1.7340585287594605e-05, |
|
"loss": 0.2142, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9458858413639734, |
|
"grad_norm": 0.4147413372993469, |
|
"learning_rate": 1.731707161253338e-05, |
|
"loss": 0.2128, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.9488510007412898, |
|
"grad_norm": 0.4431176781654358, |
|
"learning_rate": 1.7293470537991463e-05, |
|
"loss": 0.2104, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9518161601186064, |
|
"grad_norm": 0.45323607325553894, |
|
"learning_rate": 1.7269782345874204e-05, |
|
"loss": 0.2083, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9547813194959229, |
|
"grad_norm": 0.4210136830806732, |
|
"learning_rate": 1.7246007319127547e-05, |
|
"loss": 0.2069, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9577464788732394, |
|
"grad_norm": 0.440244197845459, |
|
"learning_rate": 1.7222145741734625e-05, |
|
"loss": 0.2021, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9607116382505559, |
|
"grad_norm": 0.41491949558258057, |
|
"learning_rate": 1.7198197898712402e-05, |
|
"loss": 0.2086, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9636767976278725, |
|
"grad_norm": 0.4270980954170227, |
|
"learning_rate": 1.717416407610824e-05, |
|
"loss": 0.2063, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9666419570051891, |
|
"grad_norm": 0.436722993850708, |
|
"learning_rate": 1.7150044560996488e-05, |
|
"loss": 0.2095, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9696071163825055, |
|
"grad_norm": 0.42856717109680176, |
|
"learning_rate": 1.7125839641475074e-05, |
|
"loss": 0.2151, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9725722757598221, |
|
"grad_norm": 0.4263397753238678, |
|
"learning_rate": 1.7101549606662025e-05, |
|
"loss": 0.21, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9755374351371386, |
|
"grad_norm": 0.43046820163726807, |
|
"learning_rate": 1.7077174746692054e-05, |
|
"loss": 0.211, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9785025945144552, |
|
"grad_norm": 0.4144728481769562, |
|
"learning_rate": 1.7052715352713076e-05, |
|
"loss": 0.2069, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9814677538917717, |
|
"grad_norm": 0.4112738072872162, |
|
"learning_rate": 1.7028171716882714e-05, |
|
"loss": 0.209, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9844329132690882, |
|
"grad_norm": 0.4484747052192688, |
|
"learning_rate": 1.7003544132364847e-05, |
|
"loss": 0.2118, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.9873980726464048, |
|
"grad_norm": 0.4388020634651184, |
|
"learning_rate": 1.6978832893326074e-05, |
|
"loss": 0.2069, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9903632320237212, |
|
"grad_norm": 0.45029163360595703, |
|
"learning_rate": 1.6954038294932215e-05, |
|
"loss": 0.2153, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9933283914010378, |
|
"grad_norm": 0.4059215486049652, |
|
"learning_rate": 1.692916063334479e-05, |
|
"loss": 0.1999, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9962935507783544, |
|
"grad_norm": 0.430908739566803, |
|
"learning_rate": 1.690420020571747e-05, |
|
"loss": 0.2101, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9992587101556709, |
|
"grad_norm": 0.4230971336364746, |
|
"learning_rate": 1.6879157310192537e-05, |
|
"loss": 0.2033, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.0022238695329875, |
|
"grad_norm": 0.37717196345329285, |
|
"learning_rate": 1.685403224589731e-05, |
|
"loss": 0.1831, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.005189028910304, |
|
"grad_norm": 0.4386158287525177, |
|
"learning_rate": 1.6828825312940594e-05, |
|
"loss": 0.1782, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.0081541882876204, |
|
"grad_norm": 0.3862016201019287, |
|
"learning_rate": 1.6803536812409077e-05, |
|
"loss": 0.1779, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.011119347664937, |
|
"grad_norm": 0.4159914553165436, |
|
"learning_rate": 1.6778167046363735e-05, |
|
"loss": 0.1699, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0140845070422535, |
|
"grad_norm": 0.5072054266929626, |
|
"learning_rate": 1.675271631783623e-05, |
|
"loss": 0.1738, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.01704966641957, |
|
"grad_norm": 0.41934165358543396, |
|
"learning_rate": 1.672718493082529e-05, |
|
"loss": 0.1722, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.0200148257968866, |
|
"grad_norm": 0.4099801480770111, |
|
"learning_rate": 1.6701573190293076e-05, |
|
"loss": 0.1713, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.0229799851742032, |
|
"grad_norm": 0.44231241941452026, |
|
"learning_rate": 1.667588140216154e-05, |
|
"loss": 0.1675, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.0259451445515197, |
|
"grad_norm": 0.4088985323905945, |
|
"learning_rate": 1.6650109873308763e-05, |
|
"loss": 0.1736, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.028910303928836, |
|
"grad_norm": 0.4394180476665497, |
|
"learning_rate": 1.6624258911565312e-05, |
|
"loss": 0.1727, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.0318754633061527, |
|
"grad_norm": 0.4399167001247406, |
|
"learning_rate": 1.6598328825710536e-05, |
|
"loss": 0.1732, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.0348406226834692, |
|
"grad_norm": 0.46241313219070435, |
|
"learning_rate": 1.6572319925468892e-05, |
|
"loss": 0.1759, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.0378057820607858, |
|
"grad_norm": 0.40860143303871155, |
|
"learning_rate": 1.654623252150624e-05, |
|
"loss": 0.1711, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0407709414381023, |
|
"grad_norm": 0.4109824597835541, |
|
"learning_rate": 1.6520066925426146e-05, |
|
"loss": 0.1799, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.043736100815419, |
|
"grad_norm": 0.40983447432518005, |
|
"learning_rate": 1.6493823449766137e-05, |
|
"loss": 0.1752, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.0467012601927355, |
|
"grad_norm": 0.4187794029712677, |
|
"learning_rate": 1.6467502407993995e-05, |
|
"loss": 0.1753, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.0496664195700518, |
|
"grad_norm": 0.40739187598228455, |
|
"learning_rate": 1.644110411450398e-05, |
|
"loss": 0.1771, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.41065889596939087, |
|
"learning_rate": 1.6414628884613106e-05, |
|
"loss": 0.1711, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.055596738324685, |
|
"grad_norm": 0.43635791540145874, |
|
"learning_rate": 1.6388077034557355e-05, |
|
"loss": 0.175, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.0585618977020015, |
|
"grad_norm": 0.432016521692276, |
|
"learning_rate": 1.6361448881487913e-05, |
|
"loss": 0.1754, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.061527057079318, |
|
"grad_norm": 0.43051794171333313, |
|
"learning_rate": 1.6334744743467366e-05, |
|
"loss": 0.177, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.0644922164566346, |
|
"grad_norm": 0.39719873666763306, |
|
"learning_rate": 1.6307964939465914e-05, |
|
"loss": 0.1732, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.0674573758339512, |
|
"grad_norm": 0.40763285756111145, |
|
"learning_rate": 1.628110978935756e-05, |
|
"loss": 0.1744, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0704225352112675, |
|
"grad_norm": 0.40124091506004333, |
|
"learning_rate": 1.625417961391628e-05, |
|
"loss": 0.1759, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.073387694588584, |
|
"grad_norm": 0.41654643416404724, |
|
"learning_rate": 1.62271747348122e-05, |
|
"loss": 0.1751, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0763528539659006, |
|
"grad_norm": 0.39688020944595337, |
|
"learning_rate": 1.6200095474607753e-05, |
|
"loss": 0.1704, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.0793180133432172, |
|
"grad_norm": 0.3920522928237915, |
|
"learning_rate": 1.6172942156753822e-05, |
|
"loss": 0.168, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.0822831727205338, |
|
"grad_norm": 0.4264538586139679, |
|
"learning_rate": 1.614571510558588e-05, |
|
"loss": 0.174, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0852483320978503, |
|
"grad_norm": 0.3995387554168701, |
|
"learning_rate": 1.6118414646320115e-05, |
|
"loss": 0.1718, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.0882134914751669, |
|
"grad_norm": 0.36994609236717224, |
|
"learning_rate": 1.6091041105049542e-05, |
|
"loss": 0.1726, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.0911786508524832, |
|
"grad_norm": 0.3809909224510193, |
|
"learning_rate": 1.6063594808740112e-05, |
|
"loss": 0.1741, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0941438102297998, |
|
"grad_norm": 0.4052869975566864, |
|
"learning_rate": 1.6036076085226813e-05, |
|
"loss": 0.1728, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.0971089696071163, |
|
"grad_norm": 0.38783711194992065, |
|
"learning_rate": 1.6008485263209742e-05, |
|
"loss": 0.1701, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.100074128984433, |
|
"grad_norm": 0.4025594890117645, |
|
"learning_rate": 1.598082267225018e-05, |
|
"loss": 0.1743, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.1030392883617495, |
|
"grad_norm": 0.4071436822414398, |
|
"learning_rate": 1.595308864276666e-05, |
|
"loss": 0.1726, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.106004447739066, |
|
"grad_norm": 0.446532279253006, |
|
"learning_rate": 1.592528350603103e-05, |
|
"loss": 0.1708, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.1089696071163826, |
|
"grad_norm": 0.3993205726146698, |
|
"learning_rate": 1.5897407594164468e-05, |
|
"loss": 0.1805, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.111934766493699, |
|
"grad_norm": 0.42292505502700806, |
|
"learning_rate": 1.586946124013354e-05, |
|
"loss": 0.1823, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1148999258710155, |
|
"grad_norm": 0.41676023602485657, |
|
"learning_rate": 1.5841444777746232e-05, |
|
"loss": 0.1756, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.117865085248332, |
|
"grad_norm": 0.3944017291069031, |
|
"learning_rate": 1.5813358541647915e-05, |
|
"loss": 0.1734, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.1208302446256486, |
|
"grad_norm": 0.38493022322654724, |
|
"learning_rate": 1.578520286731741e-05, |
|
"loss": 0.1772, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.1237954040029652, |
|
"grad_norm": 0.4245246350765228, |
|
"learning_rate": 1.575697809106292e-05, |
|
"loss": 0.1743, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"grad_norm": 0.3895925283432007, |
|
"learning_rate": 1.5728684550018066e-05, |
|
"loss": 0.1704, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1297257227575983, |
|
"grad_norm": 0.3827330768108368, |
|
"learning_rate": 1.570032258213783e-05, |
|
"loss": 0.1746, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.1326908821349146, |
|
"grad_norm": 0.3874651789665222, |
|
"learning_rate": 1.5671892526194515e-05, |
|
"loss": 0.1751, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.1356560415122312, |
|
"grad_norm": 0.4029993712902069, |
|
"learning_rate": 1.564339472177373e-05, |
|
"loss": 0.1771, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.1386212008895478, |
|
"grad_norm": 0.3838706314563751, |
|
"learning_rate": 1.561482950927029e-05, |
|
"loss": 0.1732, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.1415863602668643, |
|
"grad_norm": 0.3896842896938324, |
|
"learning_rate": 1.5586197229884185e-05, |
|
"loss": 0.1737, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.144551519644181, |
|
"grad_norm": 0.4098159372806549, |
|
"learning_rate": 1.5557498225616488e-05, |
|
"loss": 0.1769, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.1475166790214975, |
|
"grad_norm": 0.4123744070529938, |
|
"learning_rate": 1.5528732839265272e-05, |
|
"loss": 0.177, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.150481838398814, |
|
"grad_norm": 0.3826339542865753, |
|
"learning_rate": 1.549990141442153e-05, |
|
"loss": 0.1708, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.1534469977761304, |
|
"grad_norm": 0.38323384523391724, |
|
"learning_rate": 1.5471004295465034e-05, |
|
"loss": 0.1759, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.156412157153447, |
|
"grad_norm": 0.3751480281352997, |
|
"learning_rate": 1.5442041827560274e-05, |
|
"loss": 0.1742, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1593773165307635, |
|
"grad_norm": 0.42600059509277344, |
|
"learning_rate": 1.5413014356652287e-05, |
|
"loss": 0.1726, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.16234247590808, |
|
"grad_norm": 0.4077330529689789, |
|
"learning_rate": 1.538392222946255e-05, |
|
"loss": 0.1708, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.1653076352853966, |
|
"grad_norm": 0.39985400438308716, |
|
"learning_rate": 1.5354765793484834e-05, |
|
"loss": 0.1753, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.1682727946627132, |
|
"grad_norm": 0.4099324941635132, |
|
"learning_rate": 1.5325545396981053e-05, |
|
"loss": 0.172, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.1712379540400297, |
|
"grad_norm": 0.39008331298828125, |
|
"learning_rate": 1.5296261388977107e-05, |
|
"loss": 0.172, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.174203113417346, |
|
"grad_norm": 0.36513862013816833, |
|
"learning_rate": 1.52669141192587e-05, |
|
"loss": 0.1699, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1771682727946626, |
|
"grad_norm": 0.43505406379699707, |
|
"learning_rate": 1.5237503938367186e-05, |
|
"loss": 0.1766, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.1801334321719792, |
|
"grad_norm": 0.4039159417152405, |
|
"learning_rate": 1.5208031197595357e-05, |
|
"loss": 0.1744, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.1830985915492958, |
|
"grad_norm": 0.3673771619796753, |
|
"learning_rate": 1.5178496248983254e-05, |
|
"loss": 0.1719, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.1860637509266123, |
|
"grad_norm": 0.3980352580547333, |
|
"learning_rate": 1.5148899445313983e-05, |
|
"loss": 0.1722, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1890289103039289, |
|
"grad_norm": 0.39053529500961304, |
|
"learning_rate": 1.5119241140109466e-05, |
|
"loss": 0.1743, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.1919940696812454, |
|
"grad_norm": 0.3899192214012146, |
|
"learning_rate": 1.5089521687626243e-05, |
|
"loss": 0.1723, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.1949592290585618, |
|
"grad_norm": 0.4070497453212738, |
|
"learning_rate": 1.505974144285124e-05, |
|
"loss": 0.1692, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1979243884358783, |
|
"grad_norm": 0.3976007103919983, |
|
"learning_rate": 1.5029900761497507e-05, |
|
"loss": 0.1781, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.200889547813195, |
|
"grad_norm": 0.41118377447128296, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.1746, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.2038547071905115, |
|
"grad_norm": 0.41726142168045044, |
|
"learning_rate": 1.4970039515511303e-05, |
|
"loss": 0.179, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.206819866567828, |
|
"grad_norm": 0.3854449391365051, |
|
"learning_rate": 1.4940019665897363e-05, |
|
"loss": 0.1737, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.2097850259451446, |
|
"grad_norm": 0.45727819204330444, |
|
"learning_rate": 1.4909940809733223e-05, |
|
"loss": 0.1723, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.2127501853224611, |
|
"grad_norm": 0.3889809250831604, |
|
"learning_rate": 1.4879803306298736e-05, |
|
"loss": 0.1714, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.2157153446997775, |
|
"grad_norm": 0.4237361550331116, |
|
"learning_rate": 1.4849607515574276e-05, |
|
"loss": 0.1724, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.218680504077094, |
|
"grad_norm": 0.4138452112674713, |
|
"learning_rate": 1.4819353798236427e-05, |
|
"loss": 0.1725, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.2216456634544106, |
|
"grad_norm": 0.4682404100894928, |
|
"learning_rate": 1.4789042515653687e-05, |
|
"loss": 0.1727, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.2246108228317272, |
|
"grad_norm": 0.38663214445114136, |
|
"learning_rate": 1.4758674029882152e-05, |
|
"loss": 0.176, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.2275759822090437, |
|
"grad_norm": 0.391353577375412, |
|
"learning_rate": 1.4728248703661183e-05, |
|
"loss": 0.1775, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.2305411415863603, |
|
"grad_norm": 0.4257277846336365, |
|
"learning_rate": 1.4697766900409076e-05, |
|
"loss": 0.1773, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.2335063009636769, |
|
"grad_norm": 0.38307616114616394, |
|
"learning_rate": 1.466722898421873e-05, |
|
"loss": 0.1739, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.2364714603409934, |
|
"grad_norm": 0.3973027467727661, |
|
"learning_rate": 1.4636635319853274e-05, |
|
"loss": 0.1738, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.2394366197183098, |
|
"grad_norm": 0.4155060052871704, |
|
"learning_rate": 1.4605986272741748e-05, |
|
"loss": 0.1737, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.2424017790956263, |
|
"grad_norm": 0.40221065282821655, |
|
"learning_rate": 1.4575282208974704e-05, |
|
"loss": 0.1718, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.2453669384729429, |
|
"grad_norm": 0.41945594549179077, |
|
"learning_rate": 1.4544523495299843e-05, |
|
"loss": 0.1772, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2483320978502594, |
|
"grad_norm": 0.4217647612094879, |
|
"learning_rate": 1.4513710499117648e-05, |
|
"loss": 0.1816, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.251297257227576, |
|
"grad_norm": 0.4151117205619812, |
|
"learning_rate": 1.4482843588476976e-05, |
|
"loss": 0.1718, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.2542624166048926, |
|
"grad_norm": 0.38060155510902405, |
|
"learning_rate": 1.445192313207067e-05, |
|
"loss": 0.1725, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.257227575982209, |
|
"grad_norm": 0.4043025076389313, |
|
"learning_rate": 1.4420949499231172e-05, |
|
"loss": 0.1735, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.2601927353595257, |
|
"grad_norm": 0.40334248542785645, |
|
"learning_rate": 1.4389923059926064e-05, |
|
"loss": 0.1748, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.263157894736842, |
|
"grad_norm": 0.3861962854862213, |
|
"learning_rate": 1.4358844184753713e-05, |
|
"loss": 0.1751, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.2661230541141586, |
|
"grad_norm": 0.3862569034099579, |
|
"learning_rate": 1.432771324493879e-05, |
|
"loss": 0.1766, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.2690882134914752, |
|
"grad_norm": 0.3655155897140503, |
|
"learning_rate": 1.4296530612327864e-05, |
|
"loss": 0.1738, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.2720533728687917, |
|
"grad_norm": 0.45015332102775574, |
|
"learning_rate": 1.4265296659384956e-05, |
|
"loss": 0.1758, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.2750185322461083, |
|
"grad_norm": 0.40792006254196167, |
|
"learning_rate": 1.4234011759187084e-05, |
|
"loss": 0.1753, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2779836916234246, |
|
"grad_norm": 0.3909926116466522, |
|
"learning_rate": 1.4202676285419811e-05, |
|
"loss": 0.1775, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.2809488510007414, |
|
"grad_norm": 0.38805529475212097, |
|
"learning_rate": 1.4171290612372781e-05, |
|
"loss": 0.1772, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2839140103780577, |
|
"grad_norm": 0.3860710859298706, |
|
"learning_rate": 1.4139855114935253e-05, |
|
"loss": 0.17, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.2868791697553743, |
|
"grad_norm": 0.42617350816726685, |
|
"learning_rate": 1.410837016859161e-05, |
|
"loss": 0.1743, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.2898443291326909, |
|
"grad_norm": 0.3832889795303345, |
|
"learning_rate": 1.4076836149416889e-05, |
|
"loss": 0.1698, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.2928094885100074, |
|
"grad_norm": 0.4039870500564575, |
|
"learning_rate": 1.4045253434072278e-05, |
|
"loss": 0.1752, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.295774647887324, |
|
"grad_norm": 0.38493219017982483, |
|
"learning_rate": 1.4013622399800628e-05, |
|
"loss": 0.1737, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.2987398072646406, |
|
"grad_norm": 0.4500020146369934, |
|
"learning_rate": 1.3981943424421932e-05, |
|
"loss": 0.1704, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.3017049666419571, |
|
"grad_norm": 0.4027196764945984, |
|
"learning_rate": 1.3950216886328818e-05, |
|
"loss": 0.1699, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.3046701260192735, |
|
"grad_norm": 0.37555673718452454, |
|
"learning_rate": 1.3918443164482048e-05, |
|
"loss": 0.1733, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.30763528539659, |
|
"grad_norm": 0.3900480568408966, |
|
"learning_rate": 1.3886622638405953e-05, |
|
"loss": 0.168, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.3106004447739066, |
|
"grad_norm": 0.40044647455215454, |
|
"learning_rate": 1.3854755688183941e-05, |
|
"loss": 0.1681, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.3135656041512231, |
|
"grad_norm": 0.39409545063972473, |
|
"learning_rate": 1.3822842694453923e-05, |
|
"loss": 0.1731, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.3165307635285397, |
|
"grad_norm": 0.37648630142211914, |
|
"learning_rate": 1.3790884038403796e-05, |
|
"loss": 0.1711, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.3194959229058563, |
|
"grad_norm": 0.3983948826789856, |
|
"learning_rate": 1.375888010176686e-05, |
|
"loss": 0.1782, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.3224610822831728, |
|
"grad_norm": 0.42869091033935547, |
|
"learning_rate": 1.3726831266817278e-05, |
|
"loss": 0.1714, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.3254262416604892, |
|
"grad_norm": 0.43148529529571533, |
|
"learning_rate": 1.3694737916365517e-05, |
|
"loss": 0.1734, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.3283914010378057, |
|
"grad_norm": 0.37700700759887695, |
|
"learning_rate": 1.3662600433753746e-05, |
|
"loss": 0.1732, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.3313565604151223, |
|
"grad_norm": 0.3717349171638489, |
|
"learning_rate": 1.3630419202851287e-05, |
|
"loss": 0.1722, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.3343217197924389, |
|
"grad_norm": 0.401803195476532, |
|
"learning_rate": 1.3598194608050011e-05, |
|
"loss": 0.1727, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3372868791697554, |
|
"grad_norm": 0.373855322599411, |
|
"learning_rate": 1.3565927034259757e-05, |
|
"loss": 0.1724, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.340252038547072, |
|
"grad_norm": 0.40752193331718445, |
|
"learning_rate": 1.3533616866903736e-05, |
|
"loss": 0.1741, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.3432171979243885, |
|
"grad_norm": 0.37844231724739075, |
|
"learning_rate": 1.3501264491913909e-05, |
|
"loss": 0.1759, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.3461823573017049, |
|
"grad_norm": 0.37028035521507263, |
|
"learning_rate": 1.3468870295726399e-05, |
|
"loss": 0.1743, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.3491475166790214, |
|
"grad_norm": 0.3744882047176361, |
|
"learning_rate": 1.3436434665276865e-05, |
|
"loss": 0.176, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.352112676056338, |
|
"grad_norm": 0.34571152925491333, |
|
"learning_rate": 1.3403957987995884e-05, |
|
"loss": 0.1725, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.3550778354336546, |
|
"grad_norm": 0.3648885488510132, |
|
"learning_rate": 1.3371440651804313e-05, |
|
"loss": 0.1752, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.3580429948109711, |
|
"grad_norm": 0.37405288219451904, |
|
"learning_rate": 1.3338883045108674e-05, |
|
"loss": 0.1716, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.3610081541882877, |
|
"grad_norm": 0.3600881099700928, |
|
"learning_rate": 1.3306285556796494e-05, |
|
"loss": 0.166, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.3639733135656043, |
|
"grad_norm": 0.38361856341362, |
|
"learning_rate": 1.327364857623168e-05, |
|
"loss": 0.1686, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.3669384729429206, |
|
"grad_norm": 0.4009436070919037, |
|
"learning_rate": 1.3240972493249846e-05, |
|
"loss": 0.1765, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.3699036323202372, |
|
"grad_norm": 0.3752938508987427, |
|
"learning_rate": 1.3208257698153677e-05, |
|
"loss": 0.1673, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.3728687916975537, |
|
"grad_norm": 0.3697980046272278, |
|
"learning_rate": 1.3175504581708261e-05, |
|
"loss": 0.1696, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.3758339510748703, |
|
"grad_norm": 0.4123381972312927, |
|
"learning_rate": 1.3142713535136413e-05, |
|
"loss": 0.1751, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.3787991104521868, |
|
"grad_norm": 0.3773389458656311, |
|
"learning_rate": 1.3109884950114007e-05, |
|
"loss": 0.175, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.3817642698295034, |
|
"grad_norm": 0.37522801756858826, |
|
"learning_rate": 1.3077019218765306e-05, |
|
"loss": 0.1721, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.38472942920682, |
|
"grad_norm": 0.3822220265865326, |
|
"learning_rate": 1.3044116733658261e-05, |
|
"loss": 0.1741, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.3876945885841363, |
|
"grad_norm": 0.33929958939552307, |
|
"learning_rate": 1.3011177887799846e-05, |
|
"loss": 0.1669, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.3906597479614529, |
|
"grad_norm": 0.3751008212566376, |
|
"learning_rate": 1.2978203074631335e-05, |
|
"loss": 0.173, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.3936249073387694, |
|
"grad_norm": 0.3586931526660919, |
|
"learning_rate": 1.2945192688023625e-05, |
|
"loss": 0.1707, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.396590066716086, |
|
"grad_norm": 0.3598410189151764, |
|
"learning_rate": 1.2912147122272523e-05, |
|
"loss": 0.1673, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.3995552260934025, |
|
"grad_norm": 0.37330952286720276, |
|
"learning_rate": 1.287906677209403e-05, |
|
"loss": 0.1705, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.402520385470719, |
|
"grad_norm": 0.3800138533115387, |
|
"learning_rate": 1.2845952032619651e-05, |
|
"loss": 0.1707, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.4054855448480357, |
|
"grad_norm": 0.34873542189598083, |
|
"learning_rate": 1.2812803299391629e-05, |
|
"loss": 0.1716, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 0.39961710572242737, |
|
"learning_rate": 1.2779620968358276e-05, |
|
"loss": 0.1713, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.4114158636026686, |
|
"grad_norm": 0.37982645630836487, |
|
"learning_rate": 1.2746405435869198e-05, |
|
"loss": 0.1713, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.4143810229799851, |
|
"grad_norm": 0.3620937764644623, |
|
"learning_rate": 1.271315709867059e-05, |
|
"loss": 0.1712, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.4173461823573017, |
|
"grad_norm": 0.36581623554229736, |
|
"learning_rate": 1.2679876353900482e-05, |
|
"loss": 0.1725, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.4203113417346183, |
|
"grad_norm": 0.36710691452026367, |
|
"learning_rate": 1.2646563599083997e-05, |
|
"loss": 0.1706, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.4232765011119348, |
|
"grad_norm": 0.3968733847141266, |
|
"learning_rate": 1.2613219232128608e-05, |
|
"loss": 0.1704, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4262416604892514, |
|
"grad_norm": 0.38720619678497314, |
|
"learning_rate": 1.2579843651319382e-05, |
|
"loss": 0.1714, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.4292068198665677, |
|
"grad_norm": 0.36827707290649414, |
|
"learning_rate": 1.2546437255314223e-05, |
|
"loss": 0.1715, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.4321719792438843, |
|
"grad_norm": 0.37707608938217163, |
|
"learning_rate": 1.2513000443139112e-05, |
|
"loss": 0.1735, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.4351371386212008, |
|
"grad_norm": 0.40368345379829407, |
|
"learning_rate": 1.2479533614183334e-05, |
|
"loss": 0.1726, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.4381022979985174, |
|
"grad_norm": 0.3910945951938629, |
|
"learning_rate": 1.2446037168194716e-05, |
|
"loss": 0.1755, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.441067457375834, |
|
"grad_norm": 0.37151867151260376, |
|
"learning_rate": 1.2412511505274845e-05, |
|
"loss": 0.1771, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.4440326167531505, |
|
"grad_norm": 0.35527053475379944, |
|
"learning_rate": 1.23789570258743e-05, |
|
"loss": 0.1677, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.446997776130467, |
|
"grad_norm": 0.3575199544429779, |
|
"learning_rate": 1.2345374130787855e-05, |
|
"loss": 0.1715, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.4499629355077834, |
|
"grad_norm": 0.35391053557395935, |
|
"learning_rate": 1.23117632211497e-05, |
|
"loss": 0.1716, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.4529280948851, |
|
"grad_norm": 0.3692530691623688, |
|
"learning_rate": 1.2278124698428643e-05, |
|
"loss": 0.1689, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4558932542624166, |
|
"grad_norm": 0.35716333985328674, |
|
"learning_rate": 1.2244458964423328e-05, |
|
"loss": 0.1682, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.4588584136397331, |
|
"grad_norm": 0.3617175221443176, |
|
"learning_rate": 1.221076642125742e-05, |
|
"loss": 0.1749, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.4618235730170497, |
|
"grad_norm": 0.3705756366252899, |
|
"learning_rate": 1.2177047471374808e-05, |
|
"loss": 0.1706, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.4647887323943662, |
|
"grad_norm": 0.35617804527282715, |
|
"learning_rate": 1.214330251753481e-05, |
|
"loss": 0.1704, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.4677538917716828, |
|
"grad_norm": 0.3682483732700348, |
|
"learning_rate": 1.2109531962807333e-05, |
|
"loss": 0.1688, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.4707190511489991, |
|
"grad_norm": 0.3654380738735199, |
|
"learning_rate": 1.207573621056809e-05, |
|
"loss": 0.1654, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.4736842105263157, |
|
"grad_norm": 0.39695996046066284, |
|
"learning_rate": 1.2041915664493763e-05, |
|
"loss": 0.1705, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.4766493699036323, |
|
"grad_norm": 0.3834567964076996, |
|
"learning_rate": 1.2008070728557186e-05, |
|
"loss": 0.1737, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.4796145292809488, |
|
"grad_norm": 0.3756810426712036, |
|
"learning_rate": 1.1974201807022525e-05, |
|
"loss": 0.1744, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.4825796886582654, |
|
"grad_norm": 0.43872207403182983, |
|
"learning_rate": 1.1940309304440434e-05, |
|
"loss": 0.1725, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.485544848035582, |
|
"grad_norm": 0.4155595004558563, |
|
"learning_rate": 1.1906393625643244e-05, |
|
"loss": 0.167, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.4885100074128985, |
|
"grad_norm": 0.37012434005737305, |
|
"learning_rate": 1.1872455175740111e-05, |
|
"loss": 0.1714, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.4914751667902149, |
|
"grad_norm": 0.4194466173648834, |
|
"learning_rate": 1.1838494360112185e-05, |
|
"loss": 0.1731, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.4944403261675316, |
|
"grad_norm": 0.38535988330841064, |
|
"learning_rate": 1.1804511584407763e-05, |
|
"loss": 0.1719, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.497405485544848, |
|
"grad_norm": 0.3790641725063324, |
|
"learning_rate": 1.1770507254537454e-05, |
|
"loss": 0.1715, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.5003706449221645, |
|
"grad_norm": 0.40725064277648926, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 0.1672, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.503335804299481, |
|
"grad_norm": 0.3657318949699402, |
|
"learning_rate": 1.1702435557223988e-05, |
|
"loss": 0.1701, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.5063009636767977, |
|
"grad_norm": 0.41225719451904297, |
|
"learning_rate": 1.1668369002869912e-05, |
|
"loss": 0.1703, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.5092661230541142, |
|
"grad_norm": 0.38106808066368103, |
|
"learning_rate": 1.1634282520518382e-05, |
|
"loss": 0.1705, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.5122312824314306, |
|
"grad_norm": 0.43504565954208374, |
|
"learning_rate": 1.1600176517318742e-05, |
|
"loss": 0.1712, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5151964418087474, |
|
"grad_norm": 0.37367385625839233, |
|
"learning_rate": 1.1566051400653486e-05, |
|
"loss": 0.1708, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.5181616011860637, |
|
"grad_norm": 0.3934025168418884, |
|
"learning_rate": 1.153190757813343e-05, |
|
"loss": 0.1723, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.5211267605633803, |
|
"grad_norm": 0.35954198241233826, |
|
"learning_rate": 1.1497745457592817e-05, |
|
"loss": 0.1686, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.5240919199406968, |
|
"grad_norm": 0.3657681345939636, |
|
"learning_rate": 1.1463565447084446e-05, |
|
"loss": 0.1715, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.5270570793180134, |
|
"grad_norm": 0.3832554817199707, |
|
"learning_rate": 1.142936795487482e-05, |
|
"loss": 0.1725, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.53002223869533, |
|
"grad_norm": 0.36780476570129395, |
|
"learning_rate": 1.1395153389439232e-05, |
|
"loss": 0.1686, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.5329873980726463, |
|
"grad_norm": 0.37948641180992126, |
|
"learning_rate": 1.1360922159456929e-05, |
|
"loss": 0.169, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.535952557449963, |
|
"grad_norm": 0.37667399644851685, |
|
"learning_rate": 1.1326674673806195e-05, |
|
"loss": 0.1694, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.5389177168272794, |
|
"grad_norm": 0.3817925751209259, |
|
"learning_rate": 1.129241134155949e-05, |
|
"loss": 0.1684, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.541882876204596, |
|
"grad_norm": 0.3880022168159485, |
|
"learning_rate": 1.1258132571978555e-05, |
|
"loss": 0.1681, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5448480355819125, |
|
"grad_norm": 0.39235079288482666, |
|
"learning_rate": 1.1223838774509515e-05, |
|
"loss": 0.1724, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.547813194959229, |
|
"grad_norm": 0.3959818184375763, |
|
"learning_rate": 1.1189530358778005e-05, |
|
"loss": 0.1653, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.5507783543365457, |
|
"grad_norm": 0.3723091185092926, |
|
"learning_rate": 1.1155207734584264e-05, |
|
"loss": 0.1715, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.553743513713862, |
|
"grad_norm": 0.3744927644729614, |
|
"learning_rate": 1.1120871311898254e-05, |
|
"loss": 0.1709, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.5567086730911788, |
|
"grad_norm": 0.37305641174316406, |
|
"learning_rate": 1.1086521500854746e-05, |
|
"loss": 0.1705, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.5596738324684951, |
|
"grad_norm": 0.3628908693790436, |
|
"learning_rate": 1.1052158711748435e-05, |
|
"loss": 0.1703, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.5626389918458117, |
|
"grad_norm": 0.3602434992790222, |
|
"learning_rate": 1.1017783355029027e-05, |
|
"loss": 0.1733, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.5656041512231282, |
|
"grad_norm": 0.3662010133266449, |
|
"learning_rate": 1.0983395841296349e-05, |
|
"loss": 0.1722, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.5685693106004448, |
|
"grad_norm": 0.38595232367515564, |
|
"learning_rate": 1.0948996581295437e-05, |
|
"loss": 0.1722, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.5715344699777614, |
|
"grad_norm": 0.3809836804866791, |
|
"learning_rate": 1.0914585985911632e-05, |
|
"loss": 0.1704, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5744996293550777, |
|
"grad_norm": 0.3592289686203003, |
|
"learning_rate": 1.0880164466165675e-05, |
|
"loss": 0.1732, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.5774647887323945, |
|
"grad_norm": 0.3625737428665161, |
|
"learning_rate": 1.084573243320878e-05, |
|
"loss": 0.1743, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.5804299481097108, |
|
"grad_norm": 0.3582081198692322, |
|
"learning_rate": 1.0811290298317755e-05, |
|
"loss": 0.171, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.5833951074870274, |
|
"grad_norm": 0.3777657449245453, |
|
"learning_rate": 1.0776838472890065e-05, |
|
"loss": 0.1711, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.586360266864344, |
|
"grad_norm": 0.34954240918159485, |
|
"learning_rate": 1.0742377368438915e-05, |
|
"loss": 0.1685, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.5893254262416605, |
|
"grad_norm": 0.3632443845272064, |
|
"learning_rate": 1.0707907396588362e-05, |
|
"loss": 0.1689, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.592290585618977, |
|
"grad_norm": 0.35810449719429016, |
|
"learning_rate": 1.0673428969068365e-05, |
|
"loss": 0.1714, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.5952557449962934, |
|
"grad_norm": 0.36739829182624817, |
|
"learning_rate": 1.063894249770989e-05, |
|
"loss": 0.17, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.5982209043736102, |
|
"grad_norm": 0.35011234879493713, |
|
"learning_rate": 1.0604448394439983e-05, |
|
"loss": 0.1661, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.6011860637509265, |
|
"grad_norm": 0.37619051337242126, |
|
"learning_rate": 1.0569947071276847e-05, |
|
"loss": 0.1708, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.604151223128243, |
|
"grad_norm": 0.36766669154167175, |
|
"learning_rate": 1.053543894032493e-05, |
|
"loss": 0.1699, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.6071163825055597, |
|
"grad_norm": 0.3799968361854553, |
|
"learning_rate": 1.0500924413769988e-05, |
|
"loss": 0.175, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.6100815418828762, |
|
"grad_norm": 0.35972005128860474, |
|
"learning_rate": 1.0466403903874176e-05, |
|
"loss": 0.1709, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.6130467012601928, |
|
"grad_norm": 0.38818514347076416, |
|
"learning_rate": 1.0431877822971118e-05, |
|
"loss": 0.1729, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.6160118606375091, |
|
"grad_norm": 0.35318616032600403, |
|
"learning_rate": 1.0397346583460972e-05, |
|
"loss": 0.1708, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.618977020014826, |
|
"grad_norm": 0.34682103991508484, |
|
"learning_rate": 1.0362810597805526e-05, |
|
"loss": 0.172, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.6219421793921422, |
|
"grad_norm": 0.37605708837509155, |
|
"learning_rate": 1.0328270278523256e-05, |
|
"loss": 0.1733, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.6249073387694588, |
|
"grad_norm": 0.3474465608596802, |
|
"learning_rate": 1.0293726038184393e-05, |
|
"loss": 0.1659, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.6278724981467754, |
|
"grad_norm": 0.3567797839641571, |
|
"learning_rate": 1.0259178289406011e-05, |
|
"loss": 0.1692, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.630837657524092, |
|
"grad_norm": 0.35859590768814087, |
|
"learning_rate": 1.022462744484709e-05, |
|
"loss": 0.1725, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.6338028169014085, |
|
"grad_norm": 0.4004250168800354, |
|
"learning_rate": 1.019007391720359e-05, |
|
"loss": 0.171, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.6367679762787248, |
|
"grad_norm": 0.3502226769924164, |
|
"learning_rate": 1.0155518119203511e-05, |
|
"loss": 0.1669, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.6397331356560416, |
|
"grad_norm": 0.35019659996032715, |
|
"learning_rate": 1.0120960463601977e-05, |
|
"loss": 0.162, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.642698295033358, |
|
"grad_norm": 0.3413262963294983, |
|
"learning_rate": 1.0086401363176306e-05, |
|
"loss": 0.1671, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.6456634544106745, |
|
"grad_norm": 0.3686580955982208, |
|
"learning_rate": 1.0051841230721065e-05, |
|
"loss": 0.1723, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.648628613787991, |
|
"grad_norm": 0.4102790355682373, |
|
"learning_rate": 1.0017280479043148e-05, |
|
"loss": 0.1737, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.6515937731653076, |
|
"grad_norm": 0.3648839592933655, |
|
"learning_rate": 9.982719520956856e-06, |
|
"loss": 0.1701, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.6545589325426242, |
|
"grad_norm": 0.35376548767089844, |
|
"learning_rate": 9.948158769278939e-06, |
|
"loss": 0.1665, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.6575240919199405, |
|
"grad_norm": 0.34262967109680176, |
|
"learning_rate": 9.913598636823694e-06, |
|
"loss": 0.1637, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.6604892512972573, |
|
"grad_norm": 0.3623892068862915, |
|
"learning_rate": 9.879039536398023e-06, |
|
"loss": 0.1688, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6634544106745737, |
|
"grad_norm": 0.36795225739479065, |
|
"learning_rate": 9.844481880796492e-06, |
|
"loss": 0.1716, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.6664195700518905, |
|
"grad_norm": 0.3584054112434387, |
|
"learning_rate": 9.809926082796415e-06, |
|
"loss": 0.1717, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.6693847294292068, |
|
"grad_norm": 0.3560091555118561, |
|
"learning_rate": 9.775372555152912e-06, |
|
"loss": 0.1685, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.6723498888065234, |
|
"grad_norm": 0.36741241812705994, |
|
"learning_rate": 9.740821710593989e-06, |
|
"loss": 0.1685, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.67531504818384, |
|
"grad_norm": 0.3397235870361328, |
|
"learning_rate": 9.70627396181561e-06, |
|
"loss": 0.1613, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.6782802075611563, |
|
"grad_norm": 0.3634246289730072, |
|
"learning_rate": 9.671729721476747e-06, |
|
"loss": 0.1681, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.681245366938473, |
|
"grad_norm": 0.3582555949687958, |
|
"learning_rate": 9.637189402194477e-06, |
|
"loss": 0.1687, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.6842105263157894, |
|
"grad_norm": 0.34005481004714966, |
|
"learning_rate": 9.602653416539031e-06, |
|
"loss": 0.1689, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.6871756856931062, |
|
"grad_norm": 0.3448920249938965, |
|
"learning_rate": 9.568122177028884e-06, |
|
"loss": 0.1688, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"grad_norm": 0.3394884169101715, |
|
"learning_rate": 9.533596096125826e-06, |
|
"loss": 0.163, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.693106004447739, |
|
"grad_norm": 0.35604503750801086, |
|
"learning_rate": 9.499075586230014e-06, |
|
"loss": 0.1709, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.6960711638250556, |
|
"grad_norm": 0.34704917669296265, |
|
"learning_rate": 9.464561059675073e-06, |
|
"loss": 0.1686, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.699036323202372, |
|
"grad_norm": 0.3488229811191559, |
|
"learning_rate": 9.430052928723153e-06, |
|
"loss": 0.1705, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.7020014825796888, |
|
"grad_norm": 0.349729984998703, |
|
"learning_rate": 9.395551605560018e-06, |
|
"loss": 0.1656, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.704966641957005, |
|
"grad_norm": 0.3426892161369324, |
|
"learning_rate": 9.361057502290112e-06, |
|
"loss": 0.1652, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.7079318013343219, |
|
"grad_norm": 0.3359294533729553, |
|
"learning_rate": 9.326571030931636e-06, |
|
"loss": 0.1668, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.7108969607116382, |
|
"grad_norm": 0.32818013429641724, |
|
"learning_rate": 9.292092603411642e-06, |
|
"loss": 0.1641, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.7138621200889548, |
|
"grad_norm": 0.3587988317012787, |
|
"learning_rate": 9.257622631561085e-06, |
|
"loss": 0.1692, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.7168272794662713, |
|
"grad_norm": 0.3606449365615845, |
|
"learning_rate": 9.223161527109938e-06, |
|
"loss": 0.1732, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.7197924388435877, |
|
"grad_norm": 0.33454060554504395, |
|
"learning_rate": 9.188709701682246e-06, |
|
"loss": 0.1707, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.7227575982209045, |
|
"grad_norm": 0.3533168435096741, |
|
"learning_rate": 9.154267566791224e-06, |
|
"loss": 0.1647, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.7257227575982208, |
|
"grad_norm": 0.3588050901889801, |
|
"learning_rate": 9.119835533834332e-06, |
|
"loss": 0.1709, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.7286879169755376, |
|
"grad_norm": 0.35869184136390686, |
|
"learning_rate": 9.085414014088368e-06, |
|
"loss": 0.1721, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.731653076352854, |
|
"grad_norm": 0.33058810234069824, |
|
"learning_rate": 9.051003418704566e-06, |
|
"loss": 0.1687, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.7346182357301705, |
|
"grad_norm": 0.35373157262802124, |
|
"learning_rate": 9.016604158703654e-06, |
|
"loss": 0.1685, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.737583395107487, |
|
"grad_norm": 0.3870552182197571, |
|
"learning_rate": 8.982216644970978e-06, |
|
"loss": 0.1698, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.7405485544848036, |
|
"grad_norm": 0.35172680020332336, |
|
"learning_rate": 8.947841288251568e-06, |
|
"loss": 0.167, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.7435137138621202, |
|
"grad_norm": 0.3640024960041046, |
|
"learning_rate": 8.913478499145255e-06, |
|
"loss": 0.1659, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.7464788732394365, |
|
"grad_norm": 0.36789610981941223, |
|
"learning_rate": 8.879128688101749e-06, |
|
"loss": 0.1708, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.7494440326167533, |
|
"grad_norm": 0.3513200283050537, |
|
"learning_rate": 8.844792265415738e-06, |
|
"loss": 0.1652, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7524091919940696, |
|
"grad_norm": 0.3880747854709625, |
|
"learning_rate": 8.810469641222001e-06, |
|
"loss": 0.1699, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.7553743513713862, |
|
"grad_norm": 0.33801934123039246, |
|
"learning_rate": 8.776161225490488e-06, |
|
"loss": 0.1675, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.7583395107487028, |
|
"grad_norm": 0.3653337359428406, |
|
"learning_rate": 8.741867428021447e-06, |
|
"loss": 0.1648, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.7613046701260193, |
|
"grad_norm": 0.36136823892593384, |
|
"learning_rate": 8.707588658440511e-06, |
|
"loss": 0.1696, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.7642698295033359, |
|
"grad_norm": 0.33816996216773987, |
|
"learning_rate": 8.673325326193806e-06, |
|
"loss": 0.1665, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.7672349888806522, |
|
"grad_norm": 0.33847707509994507, |
|
"learning_rate": 8.639077840543078e-06, |
|
"loss": 0.1678, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.770200148257969, |
|
"grad_norm": 0.3402957022190094, |
|
"learning_rate": 8.604846610560771e-06, |
|
"loss": 0.1643, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.7731653076352853, |
|
"grad_norm": 0.37062397599220276, |
|
"learning_rate": 8.570632045125185e-06, |
|
"loss": 0.1679, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.776130467012602, |
|
"grad_norm": 0.34380587935447693, |
|
"learning_rate": 8.536434552915555e-06, |
|
"loss": 0.1652, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.7790956263899185, |
|
"grad_norm": 0.33917438983917236, |
|
"learning_rate": 8.502254542407186e-06, |
|
"loss": 0.1652, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.782060785767235, |
|
"grad_norm": 0.3372032940387726, |
|
"learning_rate": 8.468092421866575e-06, |
|
"loss": 0.1629, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.7850259451445516, |
|
"grad_norm": 0.34099259972572327, |
|
"learning_rate": 8.433948599346516e-06, |
|
"loss": 0.1678, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.787991104521868, |
|
"grad_norm": 0.370136559009552, |
|
"learning_rate": 8.399823482681263e-06, |
|
"loss": 0.1671, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.7909562638991847, |
|
"grad_norm": 0.3444167375564575, |
|
"learning_rate": 8.36571747948162e-06, |
|
"loss": 0.1652, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.793921423276501, |
|
"grad_norm": 0.3237707018852234, |
|
"learning_rate": 8.331630997130091e-06, |
|
"loss": 0.1647, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.7968865826538176, |
|
"grad_norm": 0.3346817195415497, |
|
"learning_rate": 8.297564442776014e-06, |
|
"loss": 0.168, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.7998517420311342, |
|
"grad_norm": 0.3474122881889343, |
|
"learning_rate": 8.263518223330698e-06, |
|
"loss": 0.1665, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.8028169014084507, |
|
"grad_norm": 0.37336310744285583, |
|
"learning_rate": 8.229492745462551e-06, |
|
"loss": 0.1628, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.8057820607857673, |
|
"grad_norm": 0.3516935706138611, |
|
"learning_rate": 8.195488415592238e-06, |
|
"loss": 0.1669, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.8087472201630836, |
|
"grad_norm": 0.3758098781108856, |
|
"learning_rate": 8.161505639887818e-06, |
|
"loss": 0.1709, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.8117123795404004, |
|
"grad_norm": 0.34178832173347473, |
|
"learning_rate": 8.12754482425989e-06, |
|
"loss": 0.1659, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.8146775389177168, |
|
"grad_norm": 0.3256490230560303, |
|
"learning_rate": 8.09360637435676e-06, |
|
"loss": 0.1649, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.8176426982950333, |
|
"grad_norm": 0.3661201596260071, |
|
"learning_rate": 8.05969069555957e-06, |
|
"loss": 0.167, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.82060785767235, |
|
"grad_norm": 0.34554868936538696, |
|
"learning_rate": 8.025798192977482e-06, |
|
"loss": 0.1685, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.8235730170496665, |
|
"grad_norm": 0.3409639298915863, |
|
"learning_rate": 7.991929271442817e-06, |
|
"loss": 0.1674, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.826538176426983, |
|
"grad_norm": 0.36160513758659363, |
|
"learning_rate": 7.958084335506239e-06, |
|
"loss": 0.167, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.8295033358042994, |
|
"grad_norm": 0.3661399483680725, |
|
"learning_rate": 7.924263789431913e-06, |
|
"loss": 0.1658, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.8324684951816161, |
|
"grad_norm": 0.3356715738773346, |
|
"learning_rate": 7.89046803719267e-06, |
|
"loss": 0.1709, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.8354336545589325, |
|
"grad_norm": 0.3546086549758911, |
|
"learning_rate": 7.856697482465195e-06, |
|
"loss": 0.1626, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.838398813936249, |
|
"grad_norm": 0.3332943320274353, |
|
"learning_rate": 7.822952528625192e-06, |
|
"loss": 0.1678, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.8413639733135656, |
|
"grad_norm": 0.34793728590011597, |
|
"learning_rate": 7.789233578742583e-06, |
|
"loss": 0.1659, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.8443291326908822, |
|
"grad_norm": 0.33829519152641296, |
|
"learning_rate": 7.755541035576677e-06, |
|
"loss": 0.1647, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.8472942920681987, |
|
"grad_norm": 0.33728060126304626, |
|
"learning_rate": 7.721875301571359e-06, |
|
"loss": 0.169, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.850259451445515, |
|
"grad_norm": 0.34719350934028625, |
|
"learning_rate": 7.688236778850307e-06, |
|
"loss": 0.1694, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.8532246108228319, |
|
"grad_norm": 0.328850656747818, |
|
"learning_rate": 7.654625869212147e-06, |
|
"loss": 0.1627, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.8561897702001482, |
|
"grad_norm": 0.34174007177352905, |
|
"learning_rate": 7.621042974125701e-06, |
|
"loss": 0.1693, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.8591549295774648, |
|
"grad_norm": 0.3343605101108551, |
|
"learning_rate": 7.587488494725157e-06, |
|
"loss": 0.1639, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.8621200889547813, |
|
"grad_norm": 0.3540162742137909, |
|
"learning_rate": 7.553962831805291e-06, |
|
"loss": 0.1667, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.8650852483320979, |
|
"grad_norm": 0.32035985589027405, |
|
"learning_rate": 7.520466385816672e-06, |
|
"loss": 0.1632, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.8680504077094144, |
|
"grad_norm": 0.3598351776599884, |
|
"learning_rate": 7.48699955686089e-06, |
|
"loss": 0.1674, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8710155670867308, |
|
"grad_norm": 0.35652288794517517, |
|
"learning_rate": 7.453562744685779e-06, |
|
"loss": 0.1661, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.8739807264640476, |
|
"grad_norm": 0.32800406217575073, |
|
"learning_rate": 7.420156348680621e-06, |
|
"loss": 0.1617, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.876945885841364, |
|
"grad_norm": 0.3622135818004608, |
|
"learning_rate": 7.3867807678713965e-06, |
|
"loss": 0.1656, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.8799110452186805, |
|
"grad_norm": 0.34809359908103943, |
|
"learning_rate": 7.353436400916006e-06, |
|
"loss": 0.1677, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.882876204595997, |
|
"grad_norm": 0.31972047686576843, |
|
"learning_rate": 7.32012364609952e-06, |
|
"loss": 0.1614, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.8858413639733136, |
|
"grad_norm": 0.3630245327949524, |
|
"learning_rate": 7.286842901329413e-06, |
|
"loss": 0.1674, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.8888065233506302, |
|
"grad_norm": 0.337985098361969, |
|
"learning_rate": 7.253594564130804e-06, |
|
"loss": 0.1661, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.8917716827279465, |
|
"grad_norm": 0.35212552547454834, |
|
"learning_rate": 7.22037903164173e-06, |
|
"loss": 0.1667, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.8947368421052633, |
|
"grad_norm": 0.3282301127910614, |
|
"learning_rate": 7.187196700608373e-06, |
|
"loss": 0.1642, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.8977020014825796, |
|
"grad_norm": 0.3764040470123291, |
|
"learning_rate": 7.154047967380353e-06, |
|
"loss": 0.1664, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9006671608598962, |
|
"grad_norm": 0.35521620512008667, |
|
"learning_rate": 7.120933227905971e-06, |
|
"loss": 0.1674, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.9036323202372127, |
|
"grad_norm": 0.34446388483047485, |
|
"learning_rate": 7.0878528777274814e-06, |
|
"loss": 0.167, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.9065974796145293, |
|
"grad_norm": 0.3320685029029846, |
|
"learning_rate": 7.05480731197638e-06, |
|
"loss": 0.1617, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.9095626389918459, |
|
"grad_norm": 0.3581525385379791, |
|
"learning_rate": 7.021796925368667e-06, |
|
"loss": 0.1672, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.9125277983691622, |
|
"grad_norm": 0.3463064730167389, |
|
"learning_rate": 6.988822112200157e-06, |
|
"loss": 0.1616, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.915492957746479, |
|
"grad_norm": 0.3198210597038269, |
|
"learning_rate": 6.955883266341741e-06, |
|
"loss": 0.1644, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.9184581171237953, |
|
"grad_norm": 0.3308843672275543, |
|
"learning_rate": 6.9229807812346985e-06, |
|
"loss": 0.166, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.9214232765011119, |
|
"grad_norm": 0.357516884803772, |
|
"learning_rate": 6.890115049885995e-06, |
|
"loss": 0.1653, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.9243884358784284, |
|
"grad_norm": 0.3382551074028015, |
|
"learning_rate": 6.85728646486359e-06, |
|
"loss": 0.1643, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.927353595255745, |
|
"grad_norm": 0.34474846720695496, |
|
"learning_rate": 6.824495418291741e-06, |
|
"loss": 0.1664, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9303187546330616, |
|
"grad_norm": 0.3264622390270233, |
|
"learning_rate": 6.791742301846325e-06, |
|
"loss": 0.1651, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.933283914010378, |
|
"grad_norm": 0.34936872124671936, |
|
"learning_rate": 6.759027506750159e-06, |
|
"loss": 0.1664, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.9362490733876947, |
|
"grad_norm": 0.34215009212493896, |
|
"learning_rate": 6.726351423768323e-06, |
|
"loss": 0.1671, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.939214232765011, |
|
"grad_norm": 0.33212119340896606, |
|
"learning_rate": 6.693714443203507e-06, |
|
"loss": 0.1642, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.9421793921423276, |
|
"grad_norm": 0.3262803256511688, |
|
"learning_rate": 6.661116954891329e-06, |
|
"loss": 0.1618, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.9451445515196442, |
|
"grad_norm": 0.3281456530094147, |
|
"learning_rate": 6.62855934819569e-06, |
|
"loss": 0.1661, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.9481097108969607, |
|
"grad_norm": 0.3286809027194977, |
|
"learning_rate": 6.59604201200412e-06, |
|
"loss": 0.1639, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.9510748702742773, |
|
"grad_norm": 0.33260786533355713, |
|
"learning_rate": 6.563565334723134e-06, |
|
"loss": 0.1623, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.9540400296515936, |
|
"grad_norm": 0.34506213665008545, |
|
"learning_rate": 6.5311297042736046e-06, |
|
"loss": 0.168, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.9570051890289104, |
|
"grad_norm": 0.3355524241924286, |
|
"learning_rate": 6.498735508086094e-06, |
|
"loss": 0.1659, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9599703484062267, |
|
"grad_norm": 0.3466867506504059, |
|
"learning_rate": 6.466383133096268e-06, |
|
"loss": 0.1658, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.9629355077835435, |
|
"grad_norm": 0.32791653275489807, |
|
"learning_rate": 6.4340729657402424e-06, |
|
"loss": 0.1656, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.9659006671608599, |
|
"grad_norm": 0.3234347403049469, |
|
"learning_rate": 6.40180539194999e-06, |
|
"loss": 0.1626, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.9688658265381764, |
|
"grad_norm": 0.33013296127319336, |
|
"learning_rate": 6.3695807971487175e-06, |
|
"loss": 0.1666, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"grad_norm": 0.3211277425289154, |
|
"learning_rate": 6.337399566246257e-06, |
|
"loss": 0.1631, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.9747961452928093, |
|
"grad_norm": 0.32406309247016907, |
|
"learning_rate": 6.305262083634488e-06, |
|
"loss": 0.1648, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.9777613046701261, |
|
"grad_norm": 0.31896913051605225, |
|
"learning_rate": 6.2731687331827214e-06, |
|
"loss": 0.1639, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.9807264640474425, |
|
"grad_norm": 0.32637953758239746, |
|
"learning_rate": 6.2411198982331435e-06, |
|
"loss": 0.1616, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.9836916234247592, |
|
"grad_norm": 0.3404463231563568, |
|
"learning_rate": 6.209115961596208e-06, |
|
"loss": 0.1639, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.9866567828020756, |
|
"grad_norm": 0.33349186182022095, |
|
"learning_rate": 6.177157305546077e-06, |
|
"loss": 0.1656, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.9896219421793921, |
|
"grad_norm": 0.33086422085762024, |
|
"learning_rate": 6.145244311816063e-06, |
|
"loss": 0.1659, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.9925871015567087, |
|
"grad_norm": 0.325844407081604, |
|
"learning_rate": 6.113377361594048e-06, |
|
"loss": 0.1627, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.995552260934025, |
|
"grad_norm": 0.3200128376483917, |
|
"learning_rate": 6.081556835517955e-06, |
|
"loss": 0.1648, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.9985174203113418, |
|
"grad_norm": 0.3372708261013031, |
|
"learning_rate": 6.049783113671184e-06, |
|
"loss": 0.1647, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.001482579688658, |
|
"grad_norm": 0.3364209830760956, |
|
"learning_rate": 6.018056575578075e-06, |
|
"loss": 0.1502, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.004447739065975, |
|
"grad_norm": 0.48207002878189087, |
|
"learning_rate": 5.986377600199371e-06, |
|
"loss": 0.135, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.0074128984432913, |
|
"grad_norm": 0.4162459671497345, |
|
"learning_rate": 5.9547465659277215e-06, |
|
"loss": 0.1348, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.010378057820608, |
|
"grad_norm": 0.3513208329677582, |
|
"learning_rate": 5.923163850583114e-06, |
|
"loss": 0.1341, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.0133432171979244, |
|
"grad_norm": 0.3508232831954956, |
|
"learning_rate": 5.891629831408392e-06, |
|
"loss": 0.1293, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.0163083765752408, |
|
"grad_norm": 0.3477492034435272, |
|
"learning_rate": 5.8601448850647515e-06, |
|
"loss": 0.1301, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.0192735359525575, |
|
"grad_norm": 0.4429793357849121, |
|
"learning_rate": 5.828709387627219e-06, |
|
"loss": 0.1342, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.022238695329874, |
|
"grad_norm": 0.45886561274528503, |
|
"learning_rate": 5.797323714580192e-06, |
|
"loss": 0.1315, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.0252038547071907, |
|
"grad_norm": 0.4540359377861023, |
|
"learning_rate": 5.7659882408129204e-06, |
|
"loss": 0.1331, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.028169014084507, |
|
"grad_norm": 0.3975986838340759, |
|
"learning_rate": 5.7347033406150494e-06, |
|
"loss": 0.1336, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.031134173461824, |
|
"grad_norm": 0.40435880422592163, |
|
"learning_rate": 5.703469387672138e-06, |
|
"loss": 0.1314, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.03409933283914, |
|
"grad_norm": 0.36956506967544556, |
|
"learning_rate": 5.672286755061212e-06, |
|
"loss": 0.1292, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.0370644922164565, |
|
"grad_norm": 0.3341839909553528, |
|
"learning_rate": 5.64115581524629e-06, |
|
"loss": 0.1284, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.0400296515937733, |
|
"grad_norm": 0.3514721989631653, |
|
"learning_rate": 5.610076940073939e-06, |
|
"loss": 0.1293, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.0429948109710896, |
|
"grad_norm": 0.3784818947315216, |
|
"learning_rate": 5.579050500768837e-06, |
|
"loss": 0.1317, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.0459599703484064, |
|
"grad_norm": 0.37682050466537476, |
|
"learning_rate": 5.548076867929331e-06, |
|
"loss": 0.1299, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.0489251297257227, |
|
"grad_norm": 0.3408771753311157, |
|
"learning_rate": 5.517156411523026e-06, |
|
"loss": 0.1294, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.0518902891030395, |
|
"grad_norm": 0.3548658788204193, |
|
"learning_rate": 5.486289500882355e-06, |
|
"loss": 0.1319, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.054855448480356, |
|
"grad_norm": 0.3722321391105652, |
|
"learning_rate": 5.455476504700161e-06, |
|
"loss": 0.1317, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.057820607857672, |
|
"grad_norm": 0.3734947144985199, |
|
"learning_rate": 5.424717791025302e-06, |
|
"loss": 0.1333, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.060785767234989, |
|
"grad_norm": 0.3377910554409027, |
|
"learning_rate": 5.3940137272582534e-06, |
|
"loss": 0.1298, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.0637509266123053, |
|
"grad_norm": 0.371706485748291, |
|
"learning_rate": 5.3633646801467255e-06, |
|
"loss": 0.1315, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.066716085989622, |
|
"grad_norm": 0.37259435653686523, |
|
"learning_rate": 5.332771015781275e-06, |
|
"loss": 0.1312, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.0696812453669384, |
|
"grad_norm": 0.357020765542984, |
|
"learning_rate": 5.302233099590928e-06, |
|
"loss": 0.1298, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.072646404744255, |
|
"grad_norm": 0.3563533425331116, |
|
"learning_rate": 5.271751296338823e-06, |
|
"loss": 0.13, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.0756115641215716, |
|
"grad_norm": 0.35207492113113403, |
|
"learning_rate": 5.241325970117851e-06, |
|
"loss": 0.1331, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.078576723498888, |
|
"grad_norm": 0.3274974524974823, |
|
"learning_rate": 5.210957484346314e-06, |
|
"loss": 0.1277, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.0815418828762047, |
|
"grad_norm": 0.36036819219589233, |
|
"learning_rate": 5.1806462017635775e-06, |
|
"loss": 0.13, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.084507042253521, |
|
"grad_norm": 0.36002618074417114, |
|
"learning_rate": 5.150392484425728e-06, |
|
"loss": 0.1319, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.087472201630838, |
|
"grad_norm": 0.3353933095932007, |
|
"learning_rate": 5.120196693701267e-06, |
|
"loss": 0.1312, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.090437361008154, |
|
"grad_norm": 0.36249250173568726, |
|
"learning_rate": 5.090059190266779e-06, |
|
"loss": 0.1331, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.093402520385471, |
|
"grad_norm": 0.3565695583820343, |
|
"learning_rate": 5.059980334102637e-06, |
|
"loss": 0.1306, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.0963676797627873, |
|
"grad_norm": 0.35264307260513306, |
|
"learning_rate": 5.0299604844886985e-06, |
|
"loss": 0.1288, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.0993328391401036, |
|
"grad_norm": 0.34676074981689453, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.1289, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.1022979985174204, |
|
"grad_norm": 0.34547021985054016, |
|
"learning_rate": 4.970099238502494e-06, |
|
"loss": 0.13, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.3479922413825989, |
|
"learning_rate": 4.940258557148765e-06, |
|
"loss": 0.1289, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.1082283172720535, |
|
"grad_norm": 0.34653687477111816, |
|
"learning_rate": 4.910478312373757e-06, |
|
"loss": 0.1274, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.11119347664937, |
|
"grad_norm": 0.35768458247184753, |
|
"learning_rate": 4.8807588598905364e-06, |
|
"loss": 0.1263, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.1141586360266866, |
|
"grad_norm": 0.3689504563808441, |
|
"learning_rate": 4.8511005546860214e-06, |
|
"loss": 0.131, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.117123795404003, |
|
"grad_norm": 0.36393120884895325, |
|
"learning_rate": 4.821503751016746e-06, |
|
"loss": 0.132, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.1200889547813193, |
|
"grad_norm": 0.331999272108078, |
|
"learning_rate": 4.791968802404648e-06, |
|
"loss": 0.1315, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.123054114158636, |
|
"grad_norm": 0.3519760072231293, |
|
"learning_rate": 4.762496061632814e-06, |
|
"loss": 0.1283, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.1260192735359524, |
|
"grad_norm": 0.3459528088569641, |
|
"learning_rate": 4.733085880741301e-06, |
|
"loss": 0.1281, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.128984432913269, |
|
"grad_norm": 0.3726547956466675, |
|
"learning_rate": 4.703738611022899e-06, |
|
"loss": 0.1304, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.1319495922905856, |
|
"grad_norm": 0.33399152755737305, |
|
"learning_rate": 4.674454603018949e-06, |
|
"loss": 0.1281, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.1349147516679023, |
|
"grad_norm": 0.3605293333530426, |
|
"learning_rate": 4.645234206515171e-06, |
|
"loss": 0.1276, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.1378799110452187, |
|
"grad_norm": 0.34493979811668396, |
|
"learning_rate": 4.616077770537453e-06, |
|
"loss": 0.1297, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.140845070422535, |
|
"grad_norm": 0.34895074367523193, |
|
"learning_rate": 4.586985643347716e-06, |
|
"loss": 0.1325, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.143810229799852, |
|
"grad_norm": 0.3834850788116455, |
|
"learning_rate": 4.557958172439726e-06, |
|
"loss": 0.1307, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.146775389177168, |
|
"grad_norm": 0.36474162340164185, |
|
"learning_rate": 4.5289957045349655e-06, |
|
"loss": 0.1319, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.149740548554485, |
|
"grad_norm": 0.36195108294487, |
|
"learning_rate": 4.500098585578475e-06, |
|
"loss": 0.1291, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.1527057079318013, |
|
"grad_norm": 0.3537023663520813, |
|
"learning_rate": 4.471267160734731e-06, |
|
"loss": 0.1301, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.155670867309118, |
|
"grad_norm": 0.33723926544189453, |
|
"learning_rate": 4.4425017743835155e-06, |
|
"loss": 0.1305, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.1586360266864344, |
|
"grad_norm": 0.3564864993095398, |
|
"learning_rate": 4.413802770115816e-06, |
|
"loss": 0.127, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.1616011860637507, |
|
"grad_norm": 0.35434702038764954, |
|
"learning_rate": 4.385170490729712e-06, |
|
"loss": 0.13, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.1645663454410675, |
|
"grad_norm": 0.35794782638549805, |
|
"learning_rate": 4.356605278226274e-06, |
|
"loss": 0.1312, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.167531504818384, |
|
"grad_norm": 0.3571893870830536, |
|
"learning_rate": 4.328107473805487e-06, |
|
"loss": 0.1275, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.1704966641957006, |
|
"grad_norm": 0.34572115540504456, |
|
"learning_rate": 4.299677417862174e-06, |
|
"loss": 0.1319, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.173461823573017, |
|
"grad_norm": 0.33964014053344727, |
|
"learning_rate": 4.2713154499819345e-06, |
|
"loss": 0.1304, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.1764269829503338, |
|
"grad_norm": 0.34920188784599304, |
|
"learning_rate": 4.243021908937083e-06, |
|
"loss": 0.1294, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.17939214232765, |
|
"grad_norm": 0.33510822057724, |
|
"learning_rate": 4.214797132682597e-06, |
|
"loss": 0.129, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.1823573017049664, |
|
"grad_norm": 0.33283138275146484, |
|
"learning_rate": 4.186641458352088e-06, |
|
"loss": 0.1271, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.1853224610822832, |
|
"grad_norm": 0.3561367988586426, |
|
"learning_rate": 4.158555222253772e-06, |
|
"loss": 0.1311, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.1882876204595996, |
|
"grad_norm": 0.36620989441871643, |
|
"learning_rate": 4.130538759866457e-06, |
|
"loss": 0.1317, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.1912527798369164, |
|
"grad_norm": 0.337467759847641, |
|
"learning_rate": 4.102592405835536e-06, |
|
"loss": 0.1297, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.1942179392142327, |
|
"grad_norm": 0.3357710540294647, |
|
"learning_rate": 4.074716493968976e-06, |
|
"loss": 0.1314, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.1971830985915495, |
|
"grad_norm": 0.35487931966781616, |
|
"learning_rate": 4.046911357233343e-06, |
|
"loss": 0.1299, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.200148257968866, |
|
"grad_norm": 0.34735655784606934, |
|
"learning_rate": 4.019177327749822e-06, |
|
"loss": 0.1324, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.203113417346182, |
|
"grad_norm": 0.3381595313549042, |
|
"learning_rate": 3.991514736790259e-06, |
|
"loss": 0.1325, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.206078576723499, |
|
"grad_norm": 0.33680617809295654, |
|
"learning_rate": 3.9639239147731865e-06, |
|
"loss": 0.1299, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.2090437361008153, |
|
"grad_norm": 0.33319249749183655, |
|
"learning_rate": 3.936405191259891e-06, |
|
"loss": 0.1291, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.212008895478132, |
|
"grad_norm": 0.32937324047088623, |
|
"learning_rate": 3.908958894950465e-06, |
|
"loss": 0.1306, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.2149740548554484, |
|
"grad_norm": 0.3424176573753357, |
|
"learning_rate": 3.881585353679891e-06, |
|
"loss": 0.1294, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.217939214232765, |
|
"grad_norm": 0.33413031697273254, |
|
"learning_rate": 3.854284894414122e-06, |
|
"loss": 0.1293, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.2209043736100815, |
|
"grad_norm": 0.33275535702705383, |
|
"learning_rate": 3.827057843246181e-06, |
|
"loss": 0.131, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.223869532987398, |
|
"grad_norm": 0.33580246567726135, |
|
"learning_rate": 3.799904525392251e-06, |
|
"loss": 0.1305, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.2268346923647147, |
|
"grad_norm": 0.34102970361709595, |
|
"learning_rate": 3.7728252651878018e-06, |
|
"loss": 0.1304, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.229799851742031, |
|
"grad_norm": 0.3335299491882324, |
|
"learning_rate": 3.745820386083724e-06, |
|
"loss": 0.1301, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.2327650111193478, |
|
"grad_norm": 0.33244848251342773, |
|
"learning_rate": 3.718890210642442e-06, |
|
"loss": 0.1289, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.235730170496664, |
|
"grad_norm": 0.3514939248561859, |
|
"learning_rate": 3.6920350605340883e-06, |
|
"loss": 0.1292, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.238695329873981, |
|
"grad_norm": 0.34593474864959717, |
|
"learning_rate": 3.6652552565326382e-06, |
|
"loss": 0.1308, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.2416604892512972, |
|
"grad_norm": 0.3418528139591217, |
|
"learning_rate": 3.638551118512089e-06, |
|
"loss": 0.13, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.244625648628614, |
|
"grad_norm": 0.35408085584640503, |
|
"learning_rate": 3.611922965442648e-06, |
|
"loss": 0.1278, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.2475908080059304, |
|
"grad_norm": 0.33797547221183777, |
|
"learning_rate": 3.5853711153868962e-06, |
|
"loss": 0.1296, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.2505559673832467, |
|
"grad_norm": 0.3515971302986145, |
|
"learning_rate": 3.558895885496023e-06, |
|
"loss": 0.1311, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"grad_norm": 0.34564974904060364, |
|
"learning_rate": 3.53249759200601e-06, |
|
"loss": 0.1271, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.25648628613788, |
|
"grad_norm": 0.3417358696460724, |
|
"learning_rate": 3.506176550233863e-06, |
|
"loss": 0.1323, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.2594514455151966, |
|
"grad_norm": 0.342887282371521, |
|
"learning_rate": 3.479933074573858e-06, |
|
"loss": 0.1305, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.262416604892513, |
|
"grad_norm": 0.3478313386440277, |
|
"learning_rate": 3.453767478493761e-06, |
|
"loss": 0.1303, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.2653817642698293, |
|
"grad_norm": 0.33934473991394043, |
|
"learning_rate": 3.4276800745311135e-06, |
|
"loss": 0.1288, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.268346923647146, |
|
"grad_norm": 0.31560465693473816, |
|
"learning_rate": 3.401671174289469e-06, |
|
"loss": 0.128, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.2713120830244624, |
|
"grad_norm": 0.34385186433792114, |
|
"learning_rate": 3.37574108843469e-06, |
|
"loss": 0.127, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.274277242401779, |
|
"grad_norm": 0.32480913400650024, |
|
"learning_rate": 3.3498901266912397e-06, |
|
"loss": 0.128, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.2772424017790955, |
|
"grad_norm": 0.3512122631072998, |
|
"learning_rate": 3.3241185978384636e-06, |
|
"loss": 0.1276, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.2802075611564123, |
|
"grad_norm": 0.34826409816741943, |
|
"learning_rate": 3.2984268097069284e-06, |
|
"loss": 0.13, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.2831727205337287, |
|
"grad_norm": 0.34755197167396545, |
|
"learning_rate": 3.2728150691747117e-06, |
|
"loss": 0.1314, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.2861378799110454, |
|
"grad_norm": 0.3306916654109955, |
|
"learning_rate": 3.2472836821637744e-06, |
|
"loss": 0.1314, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.289103039288362, |
|
"grad_norm": 0.3324066698551178, |
|
"learning_rate": 3.22183295363627e-06, |
|
"loss": 0.1304, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.292068198665678, |
|
"grad_norm": 0.34940484166145325, |
|
"learning_rate": 3.196463187590929e-06, |
|
"loss": 0.1351, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.295033358042995, |
|
"grad_norm": 0.34311389923095703, |
|
"learning_rate": 3.1711746870594083e-06, |
|
"loss": 0.1299, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.2979985174203112, |
|
"grad_norm": 0.3504948318004608, |
|
"learning_rate": 3.145967754102691e-06, |
|
"loss": 0.1314, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.300963676797628, |
|
"grad_norm": 0.33524277806282043, |
|
"learning_rate": 3.1208426898074685e-06, |
|
"loss": 0.1326, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.3039288361749444, |
|
"grad_norm": 0.3370322287082672, |
|
"learning_rate": 3.0957997942825337e-06, |
|
"loss": 0.1301, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.3068939955522607, |
|
"grad_norm": 0.33946508169174194, |
|
"learning_rate": 3.070839366655215e-06, |
|
"loss": 0.1297, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.3098591549295775, |
|
"grad_norm": 0.343334436416626, |
|
"learning_rate": 3.045961705067787e-06, |
|
"loss": 0.1279, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.312824314306894, |
|
"grad_norm": 0.34265804290771484, |
|
"learning_rate": 3.021167106673928e-06, |
|
"loss": 0.1314, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.3157894736842106, |
|
"grad_norm": 0.33246049284935, |
|
"learning_rate": 2.996455867635155e-06, |
|
"loss": 0.1306, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.318754633061527, |
|
"grad_norm": 0.34323611855506897, |
|
"learning_rate": 2.9718282831172885e-06, |
|
"loss": 0.1318, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.3217197924388437, |
|
"grad_norm": 0.34068265557289124, |
|
"learning_rate": 2.94728464728693e-06, |
|
"loss": 0.1292, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.32468495181616, |
|
"grad_norm": 0.3370424807071686, |
|
"learning_rate": 2.922825253307947e-06, |
|
"loss": 0.129, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.327650111193477, |
|
"grad_norm": 0.3519260883331299, |
|
"learning_rate": 2.898450393337977e-06, |
|
"loss": 0.1292, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.330615270570793, |
|
"grad_norm": 0.33347323536872864, |
|
"learning_rate": 2.8741603585249312e-06, |
|
"loss": 0.1261, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.3335804299481095, |
|
"grad_norm": 0.3215949833393097, |
|
"learning_rate": 2.8499554390035144e-06, |
|
"loss": 0.1294, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.3365455893254263, |
|
"grad_norm": 0.32965582609176636, |
|
"learning_rate": 2.8258359238917665e-06, |
|
"loss": 0.1281, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.3395107487027427, |
|
"grad_norm": 0.33794647455215454, |
|
"learning_rate": 2.8018021012875994e-06, |
|
"loss": 0.1285, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.3424759080800595, |
|
"grad_norm": 0.32937586307525635, |
|
"learning_rate": 2.7778542582653746e-06, |
|
"loss": 0.128, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.345441067457376, |
|
"grad_norm": 0.3328467607498169, |
|
"learning_rate": 2.753992680872457e-06, |
|
"loss": 0.129, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.348406226834692, |
|
"grad_norm": 0.32725760340690613, |
|
"learning_rate": 2.7302176541257984e-06, |
|
"loss": 0.1294, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.351371386212009, |
|
"grad_norm": 0.3363383114337921, |
|
"learning_rate": 2.7065294620085425e-06, |
|
"loss": 0.129, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.3543365455893253, |
|
"grad_norm": 0.33696410059928894, |
|
"learning_rate": 2.6829283874666236e-06, |
|
"loss": 0.1285, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.357301704966642, |
|
"grad_norm": 0.33244121074676514, |
|
"learning_rate": 2.6594147124053983e-06, |
|
"loss": 0.1277, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.3602668643439584, |
|
"grad_norm": 0.3353787958621979, |
|
"learning_rate": 2.635988717686272e-06, |
|
"loss": 0.1304, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.363232023721275, |
|
"grad_norm": 0.34110620617866516, |
|
"learning_rate": 2.6126506831233343e-06, |
|
"loss": 0.1306, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.3661971830985915, |
|
"grad_norm": 0.33648866415023804, |
|
"learning_rate": 2.5894008874800323e-06, |
|
"loss": 0.1286, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.3691623424759083, |
|
"grad_norm": 0.34967437386512756, |
|
"learning_rate": 2.5662396084658383e-06, |
|
"loss": 0.133, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.3721275018532246, |
|
"grad_norm": 0.33198100328445435, |
|
"learning_rate": 2.543167122732918e-06, |
|
"loss": 0.1277, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.375092661230541, |
|
"grad_norm": 0.34363454580307007, |
|
"learning_rate": 2.5201837058728506e-06, |
|
"loss": 0.1277, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.3780578206078578, |
|
"grad_norm": 0.3609948456287384, |
|
"learning_rate": 2.4972896324133143e-06, |
|
"loss": 0.1295, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.381022979985174, |
|
"grad_norm": 0.34460243582725525, |
|
"learning_rate": 2.474485175814816e-06, |
|
"loss": 0.1319, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.383988139362491, |
|
"grad_norm": 0.3403383493423462, |
|
"learning_rate": 2.451770608467432e-06, |
|
"loss": 0.1284, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.386953298739807, |
|
"grad_norm": 0.333807110786438, |
|
"learning_rate": 2.429146201687538e-06, |
|
"loss": 0.1257, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.3899184581171236, |
|
"grad_norm": 0.33072689175605774, |
|
"learning_rate": 2.4066122257145898e-06, |
|
"loss": 0.1309, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.3928836174944403, |
|
"grad_norm": 0.32463690638542175, |
|
"learning_rate": 2.3841689497078746e-06, |
|
"loss": 0.1289, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.3958487768717567, |
|
"grad_norm": 0.34213897585868835, |
|
"learning_rate": 2.361816641743303e-06, |
|
"loss": 0.1286, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.3988139362490735, |
|
"grad_norm": 0.3414537310600281, |
|
"learning_rate": 2.339555568810221e-06, |
|
"loss": 0.126, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.40177909562639, |
|
"grad_norm": 0.32957902550697327, |
|
"learning_rate": 2.317385996808195e-06, |
|
"loss": 0.1302, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.4047442550037066, |
|
"grad_norm": 0.3390369713306427, |
|
"learning_rate": 2.295308190543859e-06, |
|
"loss": 0.132, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.407709414381023, |
|
"grad_norm": 0.3288882076740265, |
|
"learning_rate": 2.2733224137277366e-06, |
|
"loss": 0.1271, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.4106745737583397, |
|
"grad_norm": 0.3289991021156311, |
|
"learning_rate": 2.251428928971102e-06, |
|
"loss": 0.1304, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.413639733135656, |
|
"grad_norm": 0.33164265751838684, |
|
"learning_rate": 2.229627997782834e-06, |
|
"loss": 0.1296, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.4166048925129724, |
|
"grad_norm": 0.33751052618026733, |
|
"learning_rate": 2.2079198805662917e-06, |
|
"loss": 0.1282, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.419570051890289, |
|
"grad_norm": 0.3279024362564087, |
|
"learning_rate": 2.186304836616221e-06, |
|
"loss": 0.1295, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.4225352112676055, |
|
"grad_norm": 0.3452274203300476, |
|
"learning_rate": 2.1647831241156304e-06, |
|
"loss": 0.1299, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.4255003706449223, |
|
"grad_norm": 0.3305584788322449, |
|
"learning_rate": 2.1433550001327376e-06, |
|
"loss": 0.1285, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.4284655300222386, |
|
"grad_norm": 0.33620432019233704, |
|
"learning_rate": 2.122020720617869e-06, |
|
"loss": 0.1304, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.431430689399555, |
|
"grad_norm": 0.3142911493778229, |
|
"learning_rate": 2.1007805404004247e-06, |
|
"loss": 0.125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.4343958487768718, |
|
"grad_norm": 0.3442496657371521, |
|
"learning_rate": 2.0796347131858187e-06, |
|
"loss": 0.1286, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.437361008154188, |
|
"grad_norm": 0.34949377179145813, |
|
"learning_rate": 2.058583491552465e-06, |
|
"loss": 0.1284, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.440326167531505, |
|
"grad_norm": 0.36079153418540955, |
|
"learning_rate": 2.037627126948751e-06, |
|
"loss": 0.1303, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.4432913269088212, |
|
"grad_norm": 0.32977890968322754, |
|
"learning_rate": 2.0167658696900317e-06, |
|
"loss": 0.1279, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.446256486286138, |
|
"grad_norm": 0.3395943343639374, |
|
"learning_rate": 1.9959999689556407e-06, |
|
"loss": 0.1295, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.4492216456634543, |
|
"grad_norm": 0.3250430226325989, |
|
"learning_rate": 1.9753296727859195e-06, |
|
"loss": 0.1287, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.452186805040771, |
|
"grad_norm": 0.3329125642776489, |
|
"learning_rate": 1.9547552280792528e-06, |
|
"loss": 0.1278, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.4551519644180875, |
|
"grad_norm": 0.31633639335632324, |
|
"learning_rate": 1.9342768805891176e-06, |
|
"loss": 0.1291, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.458117123795404, |
|
"grad_norm": 0.3292962610721588, |
|
"learning_rate": 1.9138948749211473e-06, |
|
"loss": 0.1297, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.4610822831727206, |
|
"grad_norm": 0.34126242995262146, |
|
"learning_rate": 1.8936094545302098e-06, |
|
"loss": 0.1293, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.464047442550037, |
|
"grad_norm": 0.3327971398830414, |
|
"learning_rate": 1.8734208617174986e-06, |
|
"loss": 0.1284, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.4670126019273537, |
|
"grad_norm": 0.340774804353714, |
|
"learning_rate": 1.8533293376276473e-06, |
|
"loss": 0.129, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.46997776130467, |
|
"grad_norm": 0.3464578688144684, |
|
"learning_rate": 1.8333351222458407e-06, |
|
"loss": 0.1277, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.472942920681987, |
|
"grad_norm": 0.340108722448349, |
|
"learning_rate": 1.813438454394948e-06, |
|
"loss": 0.1304, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.475908080059303, |
|
"grad_norm": 0.36126676201820374, |
|
"learning_rate": 1.7936395717326705e-06, |
|
"loss": 0.1275, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.4788732394366195, |
|
"grad_norm": 0.3317781388759613, |
|
"learning_rate": 1.773938710748706e-06, |
|
"loss": 0.1301, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.4818383988139363, |
|
"grad_norm": 0.34120678901672363, |
|
"learning_rate": 1.7543361067619269e-06, |
|
"loss": 0.1287, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.4848035581912526, |
|
"grad_norm": 0.3353835642337799, |
|
"learning_rate": 1.734831993917564e-06, |
|
"loss": 0.1296, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.4877687175685694, |
|
"grad_norm": 0.34985971450805664, |
|
"learning_rate": 1.715426605184407e-06, |
|
"loss": 0.129, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.4907338769458858, |
|
"grad_norm": 0.3302218019962311, |
|
"learning_rate": 1.6961201723520248e-06, |
|
"loss": 0.131, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.4936990363232026, |
|
"grad_norm": 0.34012821316719055, |
|
"learning_rate": 1.676912926028007e-06, |
|
"loss": 0.1301, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.496664195700519, |
|
"grad_norm": 0.3237687945365906, |
|
"learning_rate": 1.6578050956351887e-06, |
|
"loss": 0.1257, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.4996293550778352, |
|
"grad_norm": 0.3470035791397095, |
|
"learning_rate": 1.6387969094089318e-06, |
|
"loss": 0.1287, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.502594514455152, |
|
"grad_norm": 0.35050496459007263, |
|
"learning_rate": 1.619888594394382e-06, |
|
"loss": 0.1314, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.5055596738324684, |
|
"grad_norm": 0.3287401795387268, |
|
"learning_rate": 1.6010803764437633e-06, |
|
"loss": 0.1285, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.508524833209785, |
|
"grad_norm": 0.34805530309677124, |
|
"learning_rate": 1.5823724802136863e-06, |
|
"loss": 0.1313, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.5114899925871015, |
|
"grad_norm": 0.33040815591812134, |
|
"learning_rate": 1.5637651291624522e-06, |
|
"loss": 0.1284, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.514455151964418, |
|
"grad_norm": 0.340082049369812, |
|
"learning_rate": 1.545258545547398e-06, |
|
"loss": 0.1258, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.5174203113417346, |
|
"grad_norm": 0.3319970965385437, |
|
"learning_rate": 1.5268529504222262e-06, |
|
"loss": 0.1278, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.5203854707190514, |
|
"grad_norm": 0.327903151512146, |
|
"learning_rate": 1.5085485636343755e-06, |
|
"loss": 0.1272, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.5233506300963677, |
|
"grad_norm": 0.3466844856739044, |
|
"learning_rate": 1.4903456038223941e-06, |
|
"loss": 0.131, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.526315789473684, |
|
"grad_norm": 0.3274025619029999, |
|
"learning_rate": 1.4722442884133214e-06, |
|
"loss": 0.127, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.529280948851001, |
|
"grad_norm": 0.32809337973594666, |
|
"learning_rate": 1.4542448336201021e-06, |
|
"loss": 0.1265, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.532246108228317, |
|
"grad_norm": 0.3453335165977478, |
|
"learning_rate": 1.4363474544389876e-06, |
|
"loss": 0.1295, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"grad_norm": 0.3447280824184418, |
|
"learning_rate": 1.4185523646469822e-06, |
|
"loss": 0.1312, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.5381764269829503, |
|
"grad_norm": 0.33509477972984314, |
|
"learning_rate": 1.4008597767992872e-06, |
|
"loss": 0.1283, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.5411415863602667, |
|
"grad_norm": 0.3374352753162384, |
|
"learning_rate": 1.3832699022267516e-06, |
|
"loss": 0.1277, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.5441067457375834, |
|
"grad_norm": 0.3189197778701782, |
|
"learning_rate": 1.3657829510333653e-06, |
|
"loss": 0.1261, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.5470719051149, |
|
"grad_norm": 0.34467366337776184, |
|
"learning_rate": 1.3483991320937307e-06, |
|
"loss": 0.1295, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.5500370644922166, |
|
"grad_norm": 0.33318278193473816, |
|
"learning_rate": 1.3311186530505838e-06, |
|
"loss": 0.1271, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.553002223869533, |
|
"grad_norm": 0.3337114453315735, |
|
"learning_rate": 1.313941720312303e-06, |
|
"loss": 0.133, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.5559673832468492, |
|
"grad_norm": 0.33227020502090454, |
|
"learning_rate": 1.2968685390504465e-06, |
|
"loss": 0.1277, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.558932542624166, |
|
"grad_norm": 0.3402811288833618, |
|
"learning_rate": 1.2798993131973093e-06, |
|
"loss": 0.128, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.561897702001483, |
|
"grad_norm": 0.32487955689430237, |
|
"learning_rate": 1.263034245443473e-06, |
|
"loss": 0.1296, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.564862861378799, |
|
"grad_norm": 0.3243284523487091, |
|
"learning_rate": 1.2462735372353996e-06, |
|
"loss": 0.1262, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.5678280207561155, |
|
"grad_norm": 0.33498314023017883, |
|
"learning_rate": 1.2296173887730122e-06, |
|
"loss": 0.1311, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.5707931801334323, |
|
"grad_norm": 0.32444214820861816, |
|
"learning_rate": 1.2130659990073146e-06, |
|
"loss": 0.1251, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.5737583395107486, |
|
"grad_norm": 0.3283936083316803, |
|
"learning_rate": 1.196619565638003e-06, |
|
"loss": 0.1266, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.5767234988880654, |
|
"grad_norm": 0.33177807927131653, |
|
"learning_rate": 1.1802782851111206e-06, |
|
"loss": 0.1277, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.5796886582653817, |
|
"grad_norm": 0.327374130487442, |
|
"learning_rate": 1.1640423526166987e-06, |
|
"loss": 0.1273, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.582653817642698, |
|
"grad_norm": 0.3298618495464325, |
|
"learning_rate": 1.1479119620864277e-06, |
|
"loss": 0.1278, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.585618977020015, |
|
"grad_norm": 0.34262576699256897, |
|
"learning_rate": 1.1318873061913405e-06, |
|
"loss": 0.1253, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.588584136397331, |
|
"grad_norm": 0.33369916677474976, |
|
"learning_rate": 1.1159685763395113e-06, |
|
"loss": 0.1277, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.591549295774648, |
|
"grad_norm": 0.32637131214141846, |
|
"learning_rate": 1.1001559626737757e-06, |
|
"loss": 0.1285, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.5945144551519643, |
|
"grad_norm": 0.33180394768714905, |
|
"learning_rate": 1.0844496540694515e-06, |
|
"loss": 0.1294, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.597479614529281, |
|
"grad_norm": 0.36661967635154724, |
|
"learning_rate": 1.0688498381320855e-06, |
|
"loss": 0.127, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.6004447739065975, |
|
"grad_norm": 0.32528406381607056, |
|
"learning_rate": 1.0533567011952094e-06, |
|
"loss": 0.1253, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.6034099332839142, |
|
"grad_norm": 0.33627548813819885, |
|
"learning_rate": 1.037970428318118e-06, |
|
"loss": 0.1258, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.6063750926612306, |
|
"grad_norm": 0.329609215259552, |
|
"learning_rate": 1.022691203283661e-06, |
|
"loss": 0.1268, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.609340252038547, |
|
"grad_norm": 0.3270719647407532, |
|
"learning_rate": 1.0075192085960451e-06, |
|
"loss": 0.1282, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.6123054114158637, |
|
"grad_norm": 0.3354145586490631, |
|
"learning_rate": 9.924546254786493e-07, |
|
"loss": 0.1285, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.61527057079318, |
|
"grad_norm": 0.32381850481033325, |
|
"learning_rate": 9.77497633871868e-07, |
|
"loss": 0.1294, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.618235730170497, |
|
"grad_norm": 0.32297268509864807, |
|
"learning_rate": 9.62648412430951e-07, |
|
"loss": 0.1268, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.621200889547813, |
|
"grad_norm": 0.3353489339351654, |
|
"learning_rate": 9.479071385238892e-07, |
|
"loss": 0.1263, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.6241660489251295, |
|
"grad_norm": 0.331815630197525, |
|
"learning_rate": 9.332739882292752e-07, |
|
"loss": 0.128, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.6271312083024463, |
|
"grad_norm": 0.33713892102241516, |
|
"learning_rate": 9.187491363342094e-07, |
|
"loss": 0.1269, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.6300963676797626, |
|
"grad_norm": 0.3313647508621216, |
|
"learning_rate": 9.043327563322113e-07, |
|
"loss": 0.1305, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.6330615270570794, |
|
"grad_norm": 0.32444262504577637, |
|
"learning_rate": 8.900250204211513e-07, |
|
"loss": 0.1291, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.6360266864343957, |
|
"grad_norm": 0.34167933464050293, |
|
"learning_rate": 8.758260995011825e-07, |
|
"loss": 0.1263, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.6389918458117125, |
|
"grad_norm": 0.3300521671772003, |
|
"learning_rate": 8.617361631727139e-07, |
|
"loss": 0.1258, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.641957005189029, |
|
"grad_norm": 0.3591514527797699, |
|
"learning_rate": 8.477553797343729e-07, |
|
"loss": 0.1268, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.6449221645663457, |
|
"grad_norm": 0.3284503221511841, |
|
"learning_rate": 8.338839161809997e-07, |
|
"loss": 0.127, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.647887323943662, |
|
"grad_norm": 0.3253602981567383, |
|
"learning_rate": 8.201219382016556e-07, |
|
"loss": 0.1259, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.6508524833209783, |
|
"grad_norm": 0.3226112723350525, |
|
"learning_rate": 8.06469610177636e-07, |
|
"loss": 0.1264, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.653817642698295, |
|
"grad_norm": 0.3329734206199646, |
|
"learning_rate": 7.92927095180518e-07, |
|
"loss": 0.1277, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.6567828020756115, |
|
"grad_norm": 0.36240342259407043, |
|
"learning_rate": 7.794945549701993e-07, |
|
"loss": 0.1286, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.6597479614529282, |
|
"grad_norm": 0.3200359642505646, |
|
"learning_rate": 7.661721499929753e-07, |
|
"loss": 0.1277, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.6627131208302446, |
|
"grad_norm": 0.33148688077926636, |
|
"learning_rate": 7.529600393796232e-07, |
|
"loss": 0.1277, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.665678280207561, |
|
"grad_norm": 0.32987260818481445, |
|
"learning_rate": 7.398583809434944e-07, |
|
"loss": 0.128, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.6686434395848777, |
|
"grad_norm": 0.33015844225883484, |
|
"learning_rate": 7.268673311786378e-07, |
|
"loss": 0.1307, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6716085989621945, |
|
"grad_norm": 0.32374393939971924, |
|
"learning_rate": 7.1398704525792e-07, |
|
"loss": 0.1277, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.674573758339511, |
|
"grad_norm": 0.318718284368515, |
|
"learning_rate": 7.012176770311863e-07, |
|
"loss": 0.1266, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.677538917716827, |
|
"grad_norm": 0.3262283205986023, |
|
"learning_rate": 6.885593790234057e-07, |
|
"loss": 0.1251, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.680504077094144, |
|
"grad_norm": 0.3396647274494171, |
|
"learning_rate": 6.760123024328624e-07, |
|
"loss": 0.1303, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.6834692364714603, |
|
"grad_norm": 0.3207716643810272, |
|
"learning_rate": 6.635765971293484e-07, |
|
"loss": 0.1274, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.686434395848777, |
|
"grad_norm": 0.32596075534820557, |
|
"learning_rate": 6.512524116523633e-07, |
|
"loss": 0.1257, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.6893995552260934, |
|
"grad_norm": 0.322693407535553, |
|
"learning_rate": 6.390398932093555e-07, |
|
"loss": 0.1248, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.6923647146034098, |
|
"grad_norm": 0.3405155837535858, |
|
"learning_rate": 6.269391876739494e-07, |
|
"loss": 0.1291, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.6953298739807265, |
|
"grad_norm": 0.32777202129364014, |
|
"learning_rate": 6.149504395842087e-07, |
|
"loss": 0.1288, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.698295033358043, |
|
"grad_norm": 0.3245905935764313, |
|
"learning_rate": 6.030737921409169e-07, |
|
"loss": 0.1261, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.7012601927353597, |
|
"grad_norm": 0.33581435680389404, |
|
"learning_rate": 5.913093872058528e-07, |
|
"loss": 0.1302, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.704225352112676, |
|
"grad_norm": 0.3299258053302765, |
|
"learning_rate": 5.796573653001091e-07, |
|
"loss": 0.1264, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.7071905114899923, |
|
"grad_norm": 0.3272840082645416, |
|
"learning_rate": 5.681178656024055e-07, |
|
"loss": 0.1269, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.710155670867309, |
|
"grad_norm": 0.34318456053733826, |
|
"learning_rate": 5.56691025947429e-07, |
|
"loss": 0.1267, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.713120830244626, |
|
"grad_norm": 0.32936856150627136, |
|
"learning_rate": 5.453769828241872e-07, |
|
"loss": 0.1259, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.7160859896219423, |
|
"grad_norm": 0.32494810223579407, |
|
"learning_rate": 5.341758713743828e-07, |
|
"loss": 0.1281, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.7190511489992586, |
|
"grad_norm": 0.3184977173805237, |
|
"learning_rate": 5.230878253907911e-07, |
|
"loss": 0.1271, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.7220163083765754, |
|
"grad_norm": 0.3371221125125885, |
|
"learning_rate": 5.121129773156663e-07, |
|
"loss": 0.1322, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.7249814677538917, |
|
"grad_norm": 0.32113394141197205, |
|
"learning_rate": 5.012514582391592e-07, |
|
"loss": 0.1272, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.7279466271312085, |
|
"grad_norm": 0.34688618779182434, |
|
"learning_rate": 4.905033978977492e-07, |
|
"loss": 0.1265, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.730911786508525, |
|
"grad_norm": 0.3413783311843872, |
|
"learning_rate": 4.798689246727006e-07, |
|
"loss": 0.1307, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.733876945885841, |
|
"grad_norm": 0.33555951714515686, |
|
"learning_rate": 4.693481655885257e-07, |
|
"loss": 0.1269, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.736842105263158, |
|
"grad_norm": 0.3346193730831146, |
|
"learning_rate": 4.58941246311464e-07, |
|
"loss": 0.1268, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.7398072646404743, |
|
"grad_norm": 0.3286806344985962, |
|
"learning_rate": 4.4864829114798394e-07, |
|
"loss": 0.1288, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.742772424017791, |
|
"grad_norm": 0.33568400144577026, |
|
"learning_rate": 4.384694230432984e-07, |
|
"loss": 0.1269, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.7457375833951074, |
|
"grad_norm": 0.3334142565727234, |
|
"learning_rate": 4.2840476357989825e-07, |
|
"loss": 0.1272, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.7487027427724238, |
|
"grad_norm": 0.32712700963020325, |
|
"learning_rate": 4.184544329761009e-07, |
|
"loss": 0.1271, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.7516679021497406, |
|
"grad_norm": 0.3435976803302765, |
|
"learning_rate": 4.0861855008460403e-07, |
|
"loss": 0.1286, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.7546330615270573, |
|
"grad_norm": 0.3265362083911896, |
|
"learning_rate": 3.988972323910778e-07, |
|
"loss": 0.1281, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.7575982209043737, |
|
"grad_norm": 0.32593265175819397, |
|
"learning_rate": 3.8929059601275463e-07, |
|
"loss": 0.1273, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.76056338028169, |
|
"grad_norm": 0.3315712511539459, |
|
"learning_rate": 3.797987556970495e-07, |
|
"loss": 0.1296, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.763528539659007, |
|
"grad_norm": 0.32149094343185425, |
|
"learning_rate": 3.7042182482018074e-07, |
|
"loss": 0.1284, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.766493699036323, |
|
"grad_norm": 0.3222528100013733, |
|
"learning_rate": 3.611599153858214e-07, |
|
"loss": 0.1259, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.76945885841364, |
|
"grad_norm": 0.32701918482780457, |
|
"learning_rate": 3.520131380237546e-07, |
|
"loss": 0.1287, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.7724240177909563, |
|
"grad_norm": 0.32082316279411316, |
|
"learning_rate": 3.429816019885657e-07, |
|
"loss": 0.1279, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.7753891771682726, |
|
"grad_norm": 0.32540014386177063, |
|
"learning_rate": 3.3406541515832e-07, |
|
"loss": 0.1273, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.7783543365455894, |
|
"grad_norm": 0.32940953969955444, |
|
"learning_rate": 3.252646840332918e-07, |
|
"loss": 0.1264, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.7813194959229057, |
|
"grad_norm": 0.32957059144973755, |
|
"learning_rate": 3.16579513734675e-07, |
|
"loss": 0.128, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.7842846553002225, |
|
"grad_norm": 0.31844544410705566, |
|
"learning_rate": 3.080100080033388e-07, |
|
"loss": 0.1268, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.787249814677539, |
|
"grad_norm": 0.3339882493019104, |
|
"learning_rate": 2.995562691985898e-07, |
|
"loss": 0.1259, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.790214974054855, |
|
"grad_norm": 0.3345246911048889, |
|
"learning_rate": 2.9121839829693857e-07, |
|
"loss": 0.1284, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.793180133432172, |
|
"grad_norm": 0.34839048981666565, |
|
"learning_rate": 2.829964948909048e-07, |
|
"loss": 0.1263, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.7961452928094888, |
|
"grad_norm": 0.3264479637145996, |
|
"learning_rate": 2.748906571878207e-07, |
|
"loss": 0.1253, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.799110452186805, |
|
"grad_norm": 0.3153613805770874, |
|
"learning_rate": 2.6690098200866097e-07, |
|
"loss": 0.1242, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.8020756115641214, |
|
"grad_norm": 0.3332012891769409, |
|
"learning_rate": 2.5902756478688674e-07, |
|
"loss": 0.1271, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.805040770941438, |
|
"grad_norm": 0.3120848536491394, |
|
"learning_rate": 2.5127049956730207e-07, |
|
"loss": 0.128, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.8080059303187546, |
|
"grad_norm": 0.32999473810195923, |
|
"learning_rate": 2.436298790049363e-07, |
|
"loss": 0.126, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.8109710896960713, |
|
"grad_norm": 0.32779738306999207, |
|
"learning_rate": 2.3610579436392999e-07, |
|
"loss": 0.1259, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.8139362490733877, |
|
"grad_norm": 0.31936830282211304, |
|
"learning_rate": 2.2869833551645293e-07, |
|
"loss": 0.1241, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 0.33030980825424194, |
|
"learning_rate": 2.2140759094162468e-07, |
|
"loss": 0.1274, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.819866567828021, |
|
"grad_norm": 0.34059956669807434, |
|
"learning_rate": 2.1423364772445886e-07, |
|
"loss": 0.1277, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.822831727205337, |
|
"grad_norm": 0.3205658793449402, |
|
"learning_rate": 2.071765915548274e-07, |
|
"loss": 0.1306, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.825796886582654, |
|
"grad_norm": 0.34191787242889404, |
|
"learning_rate": 2.002365067264289e-07, |
|
"loss": 0.1269, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.8287620459599703, |
|
"grad_norm": 0.3269594609737396, |
|
"learning_rate": 1.9341347613579086e-07, |
|
"loss": 0.1283, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.8317272053372866, |
|
"grad_norm": 0.33148735761642456, |
|
"learning_rate": 1.867075812812691e-07, |
|
"loss": 0.1298, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.8346923647146034, |
|
"grad_norm": 0.3186003267765045, |
|
"learning_rate": 1.8011890226208527e-07, |
|
"loss": 0.1274, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.83765752409192, |
|
"grad_norm": 0.32092559337615967, |
|
"learning_rate": 1.7364751777736334e-07, |
|
"loss": 0.1242, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.8406226834692365, |
|
"grad_norm": 0.36179545521736145, |
|
"learning_rate": 1.6729350512519006e-07, |
|
"loss": 0.129, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.843587842846553, |
|
"grad_norm": 0.33006298542022705, |
|
"learning_rate": 1.6105694020169594e-07, |
|
"loss": 0.1258, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.8465530022238696, |
|
"grad_norm": 0.31537604331970215, |
|
"learning_rate": 1.5493789750014032e-07, |
|
"loss": 0.1283, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.849518161601186, |
|
"grad_norm": 0.33820608258247375, |
|
"learning_rate": 1.489364501100332e-07, |
|
"loss": 0.1275, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.8524833209785028, |
|
"grad_norm": 0.3154459297657013, |
|
"learning_rate": 1.430526697162482e-07, |
|
"loss": 0.1258, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.855448480355819, |
|
"grad_norm": 0.31913918256759644, |
|
"learning_rate": 1.3728662659818205e-07, |
|
"loss": 0.1253, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.8584136397331354, |
|
"grad_norm": 0.32766804099082947, |
|
"learning_rate": 1.3163838962890196e-07, |
|
"loss": 0.129, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.8613787991104522, |
|
"grad_norm": 0.3298415541648865, |
|
"learning_rate": 1.2610802627432972e-07, |
|
"loss": 0.1278, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.8643439584877686, |
|
"grad_norm": 0.32275769114494324, |
|
"learning_rate": 1.206956025924333e-07, |
|
"loss": 0.126, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.8673091178650854, |
|
"grad_norm": 0.3340933918952942, |
|
"learning_rate": 1.1540118323243866e-07, |
|
"loss": 0.1272, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.8702742772424017, |
|
"grad_norm": 0.33475035429000854, |
|
"learning_rate": 1.1022483143405705e-07, |
|
"loss": 0.1265, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.873239436619718, |
|
"grad_norm": 0.32354483008384705, |
|
"learning_rate": 1.0516660902673448e-07, |
|
"loss": 0.1255, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.876204595997035, |
|
"grad_norm": 0.3189190924167633, |
|
"learning_rate": 1.0022657642890232e-07, |
|
"loss": 0.1254, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.8791697553743516, |
|
"grad_norm": 0.3238016366958618, |
|
"learning_rate": 9.540479264726676e-08, |
|
"loss": 0.1274, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.882134914751668, |
|
"grad_norm": 0.3224412798881531, |
|
"learning_rate": 9.070131527609604e-08, |
|
"loss": 0.1271, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.8851000741289843, |
|
"grad_norm": 0.34490659832954407, |
|
"learning_rate": 8.61162004965388e-08, |
|
"loss": 0.1277, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.888065233506301, |
|
"grad_norm": 0.3256824016571045, |
|
"learning_rate": 8.16495030759501e-08, |
|
"loss": 0.1304, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.8910303928836174, |
|
"grad_norm": 0.326412171125412, |
|
"learning_rate": 7.730127636723539e-08, |
|
"loss": 0.1271, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.893995552260934, |
|
"grad_norm": 0.32723942399024963, |
|
"learning_rate": 7.307157230821426e-08, |
|
"loss": 0.1291, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.8969607116382505, |
|
"grad_norm": 0.33483996987342834, |
|
"learning_rate": 6.896044142100433e-08, |
|
"loss": 0.1271, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.899925871015567, |
|
"grad_norm": 0.3145699203014374, |
|
"learning_rate": 6.496793281141056e-08, |
|
"loss": 0.1257, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.9028910303928837, |
|
"grad_norm": 0.3338087201118469, |
|
"learning_rate": 6.109409416834689e-08, |
|
"loss": 0.1272, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.9058561897702, |
|
"grad_norm": 0.3297833502292633, |
|
"learning_rate": 5.7338971763256646e-08, |
|
"loss": 0.1263, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.9088213491475168, |
|
"grad_norm": 0.32836630940437317, |
|
"learning_rate": 5.37026104495697e-08, |
|
"loss": 0.1264, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.911786508524833, |
|
"grad_norm": 0.32583150267601013, |
|
"learning_rate": 5.0185053662161756e-08, |
|
"loss": 0.1265, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.91475166790215, |
|
"grad_norm": 0.32299482822418213, |
|
"learning_rate": 4.678634341683252e-08, |
|
"loss": 0.1253, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.9177168272794662, |
|
"grad_norm": 0.32840579748153687, |
|
"learning_rate": 4.350652030981395e-08, |
|
"loss": 0.1286, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.920681986656783, |
|
"grad_norm": 0.3269804120063782, |
|
"learning_rate": 4.0345623517273894e-08, |
|
"loss": 0.1284, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.9236471460340994, |
|
"grad_norm": 0.31736278533935547, |
|
"learning_rate": 3.7303690794854296e-08, |
|
"loss": 0.1246, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.9266123054114157, |
|
"grad_norm": 0.3197997212409973, |
|
"learning_rate": 3.438075847721933e-08, |
|
"loss": 0.1247, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.9295774647887325, |
|
"grad_norm": 0.3263581395149231, |
|
"learning_rate": 3.157686147762129e-08, |
|
"loss": 0.1273, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.932542624166049, |
|
"grad_norm": 0.32051053643226624, |
|
"learning_rate": 2.8892033287484245e-08, |
|
"loss": 0.1265, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.9355077835433656, |
|
"grad_norm": 0.33849623799324036, |
|
"learning_rate": 2.6326305976001054e-08, |
|
"loss": 0.1287, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.938472942920682, |
|
"grad_norm": 0.3170969486236572, |
|
"learning_rate": 2.3879710189753657e-08, |
|
"loss": 0.1252, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.9414381022979983, |
|
"grad_norm": 0.32798030972480774, |
|
"learning_rate": 2.1552275152346702e-08, |
|
"loss": 0.1282, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.944403261675315, |
|
"grad_norm": 0.3274080157279968, |
|
"learning_rate": 1.9344028664056715e-08, |
|
"loss": 0.1249, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.9473684210526314, |
|
"grad_norm": 0.3368877172470093, |
|
"learning_rate": 1.7254997101500137e-08, |
|
"loss": 0.1287, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.950333580429948, |
|
"grad_norm": 0.32225024700164795, |
|
"learning_rate": 1.528520541731915e-08, |
|
"loss": 0.1259, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.9532987398072645, |
|
"grad_norm": 0.33008435368537903, |
|
"learning_rate": 1.3434677139885222e-08, |
|
"loss": 0.1262, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.9562638991845813, |
|
"grad_norm": 0.3370579183101654, |
|
"learning_rate": 1.170343437301491e-08, |
|
"loss": 0.126, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.9592290585618977, |
|
"grad_norm": 0.31601622700691223, |
|
"learning_rate": 1.0091497795706728e-08, |
|
"loss": 0.1269, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.9621942179392144, |
|
"grad_norm": 0.3216618299484253, |
|
"learning_rate": 8.59888666189579e-09, |
|
"loss": 0.126, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.965159377316531, |
|
"grad_norm": 0.3355175852775574, |
|
"learning_rate": 7.225618800222878e-09, |
|
"loss": 0.1278, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.968124536693847, |
|
"grad_norm": 0.32904869318008423, |
|
"learning_rate": 5.971710613821291e-09, |
|
"loss": 0.1284, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.971089696071164, |
|
"grad_norm": 0.351557195186615, |
|
"learning_rate": 4.837177080119215e-09, |
|
"loss": 0.1265, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.9740548554484803, |
|
"grad_norm": 0.32986804842948914, |
|
"learning_rate": 3.8220317506654226e-09, |
|
"loss": 0.1269, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.977020014825797, |
|
"grad_norm": 0.3295051157474518, |
|
"learning_rate": 2.9262867509605164e-09, |
|
"loss": 0.1261, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.9799851742031134, |
|
"grad_norm": 0.3266933858394623, |
|
"learning_rate": 2.149952780321485e-09, |
|
"loss": 0.1248, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.9829503335804297, |
|
"grad_norm": 0.32243990898132324, |
|
"learning_rate": 1.4930391117451427e-09, |
|
"loss": 0.1262, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.9859154929577465, |
|
"grad_norm": 0.34273526072502136, |
|
"learning_rate": 9.555535917993297e-10, |
|
"loss": 0.129, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.9888806523350633, |
|
"grad_norm": 0.3207569718360901, |
|
"learning_rate": 5.375026405352035e-10, |
|
"loss": 0.1259, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.9918458117123796, |
|
"grad_norm": 0.3252420723438263, |
|
"learning_rate": 2.388912514017516e-10, |
|
"loss": 0.1273, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.994810971089696, |
|
"grad_norm": 0.3213896155357361, |
|
"learning_rate": 5.972299119250124e-11, |
|
"loss": 0.1258, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.9977761304670127, |
|
"grad_norm": 0.3191134035587311, |
|
"learning_rate": 0.0, |
|
"loss": 0.1258, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.9977761304670127, |
|
"step": 1011, |
|
"total_flos": 1.1934072323532915e+19, |
|
"train_loss": 0.20480146514054692, |
|
"train_runtime": 12349.0973, |
|
"train_samples_per_second": 10.482, |
|
"train_steps_per_second": 0.082 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1011, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 999999, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1934072323532915e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|