|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.5500910746812386, |
|
"eval_steps": 500, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.46431864432643544, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 1.4151, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.42969176658539837, |
|
"learning_rate": 2.4242424242424244e-06, |
|
"loss": 1.3729, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5004307936270223, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 1.3989, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.43666634920041486, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 1.3363, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.4691114419353825, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 1.4293, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.4277596061377729, |
|
"learning_rate": 7.272727272727272e-06, |
|
"loss": 1.4343, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.4238339229382504, |
|
"learning_rate": 8.484848484848486e-06, |
|
"loss": 1.4462, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.40992048534183273, |
|
"learning_rate": 9.696969696969698e-06, |
|
"loss": 1.2756, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.37885700313540693, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.3464, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.36066141170123023, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 1.3419, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.35617169386863406, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.3533, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3040188564782602, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 1.2395, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.31038319439216566, |
|
"learning_rate": 1.5757575757575756e-05, |
|
"loss": 1.3082, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.26683768372135835, |
|
"learning_rate": 1.6969696969696972e-05, |
|
"loss": 1.3063, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3652323682563078, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.3045, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.23559121485457843, |
|
"learning_rate": 1.9393939393939395e-05, |
|
"loss": 1.2366, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.2342299313020104, |
|
"learning_rate": 2.0606060606060608e-05, |
|
"loss": 1.2831, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.2202931700357255, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 1.3064, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.2097660599292375, |
|
"learning_rate": 2.3030303030303034e-05, |
|
"loss": 1.2376, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.2356785314652122, |
|
"learning_rate": 2.4242424242424244e-05, |
|
"loss": 1.2802, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.24639302530564244, |
|
"learning_rate": 2.5454545454545454e-05, |
|
"loss": 1.3016, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.24373126133228787, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.3407, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.24488805144123432, |
|
"learning_rate": 2.7878787878787883e-05, |
|
"loss": 1.3325, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.2653033507571198, |
|
"learning_rate": 2.909090909090909e-05, |
|
"loss": 1.2811, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2841724819336817, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 1.2837, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.2183883020111492, |
|
"learning_rate": 3.151515151515151e-05, |
|
"loss": 1.2472, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.2137995163762026, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 1.2854, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.19499006223503876, |
|
"learning_rate": 3.3939393939393945e-05, |
|
"loss": 1.3018, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.17367919355340256, |
|
"learning_rate": 3.515151515151515e-05, |
|
"loss": 1.2824, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.18326045693683557, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 1.2192, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.17474388188066411, |
|
"learning_rate": 3.757575757575758e-05, |
|
"loss": 1.2078, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.17856970178098716, |
|
"learning_rate": 3.878787878787879e-05, |
|
"loss": 1.2683, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.18617589704298348, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2265, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.17653209733215317, |
|
"learning_rate": 4.1212121212121216e-05, |
|
"loss": 1.319, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.1722921367585233, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 1.2117, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.176642606378719, |
|
"learning_rate": 4.3636363636363636e-05, |
|
"loss": 1.2512, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.16696442324691066, |
|
"learning_rate": 4.484848484848485e-05, |
|
"loss": 1.2637, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.17035384059517106, |
|
"learning_rate": 4.606060606060607e-05, |
|
"loss": 1.2699, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.15545801881444482, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 1.2939, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.17111439344347512, |
|
"learning_rate": 4.848484848484849e-05, |
|
"loss": 1.3033, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.16994151343455458, |
|
"learning_rate": 4.9696969696969694e-05, |
|
"loss": 1.2603, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.15929214926453447, |
|
"learning_rate": 5.090909090909091e-05, |
|
"loss": 1.2626, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.16761261516238699, |
|
"learning_rate": 5.212121212121213e-05, |
|
"loss": 1.296, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.15754700542426123, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 1.278, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.15522526683877644, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 1.2355, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.1577929926930023, |
|
"learning_rate": 5.5757575757575766e-05, |
|
"loss": 1.2879, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.31075066632858317, |
|
"learning_rate": 5.696969696969697e-05, |
|
"loss": 1.2202, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1663780653395111, |
|
"learning_rate": 5.818181818181818e-05, |
|
"loss": 1.2319, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.16049499655883026, |
|
"learning_rate": 5.93939393939394e-05, |
|
"loss": 1.2801, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.14515773124436285, |
|
"learning_rate": 6.060606060606061e-05, |
|
"loss": 1.2588, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.14653064850325623, |
|
"learning_rate": 6.181818181818182e-05, |
|
"loss": 1.2677, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.17193239746689878, |
|
"learning_rate": 6.303030303030302e-05, |
|
"loss": 1.2742, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.1967020450342533, |
|
"learning_rate": 6.424242424242424e-05, |
|
"loss": 1.1545, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.16247531997247225, |
|
"learning_rate": 6.545454545454546e-05, |
|
"loss": 1.222, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.14990706377244528, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.2103, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.1412817445239095, |
|
"learning_rate": 6.787878787878789e-05, |
|
"loss": 1.2169, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.14575971073482757, |
|
"learning_rate": 6.90909090909091e-05, |
|
"loss": 1.2751, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.13714747569950891, |
|
"learning_rate": 7.03030303030303e-05, |
|
"loss": 1.2508, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.14334695156859903, |
|
"learning_rate": 7.151515151515152e-05, |
|
"loss": 1.2721, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.1456824177522916, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 1.2649, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.15030318240210044, |
|
"learning_rate": 7.393939393939395e-05, |
|
"loss": 1.2167, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.1651326066719482, |
|
"learning_rate": 7.515151515151515e-05, |
|
"loss": 1.3126, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.1408250406479118, |
|
"learning_rate": 7.636363636363637e-05, |
|
"loss": 1.2891, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.21501384376905694, |
|
"learning_rate": 7.757575757575758e-05, |
|
"loss": 1.3019, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1365168726167339, |
|
"learning_rate": 7.878787878787879e-05, |
|
"loss": 1.2498, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.1431463689660936, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2793, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.13689045214286194, |
|
"learning_rate": 8.121212121212121e-05, |
|
"loss": 1.2295, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.13483608710081227, |
|
"learning_rate": 8.242424242424243e-05, |
|
"loss": 1.2258, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.13707618564415613, |
|
"learning_rate": 8.363636363636364e-05, |
|
"loss": 1.2252, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.13780236215967515, |
|
"learning_rate": 8.484848484848486e-05, |
|
"loss": 1.2565, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.14036805493494423, |
|
"learning_rate": 8.606060606060606e-05, |
|
"loss": 1.3023, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.12776919439147982, |
|
"learning_rate": 8.727272727272727e-05, |
|
"loss": 1.2292, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.1289941815481437, |
|
"learning_rate": 8.848484848484849e-05, |
|
"loss": 1.2191, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.13943952294847306, |
|
"learning_rate": 8.96969696969697e-05, |
|
"loss": 1.2915, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.1493528502117281, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.2797, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.1252401242451818, |
|
"learning_rate": 9.212121212121214e-05, |
|
"loss": 1.2552, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.13969800467546992, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 1.3147, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.1277258491470434, |
|
"learning_rate": 9.454545454545455e-05, |
|
"loss": 1.2089, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.133041369314817, |
|
"learning_rate": 9.575757575757576e-05, |
|
"loss": 1.2761, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.14564572037181842, |
|
"learning_rate": 9.696969696969698e-05, |
|
"loss": 1.1901, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.13666505656492195, |
|
"learning_rate": 9.818181818181818e-05, |
|
"loss": 1.2615, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.135007805210003, |
|
"learning_rate": 9.939393939393939e-05, |
|
"loss": 1.2669, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.17287563365884975, |
|
"learning_rate": 0.00010060606060606062, |
|
"loss": 1.2669, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.12934306326048103, |
|
"learning_rate": 0.00010181818181818181, |
|
"loss": 1.1979, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.13517436169178096, |
|
"learning_rate": 0.00010303030303030303, |
|
"loss": 1.2226, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.12105351159271568, |
|
"learning_rate": 0.00010424242424242425, |
|
"loss": 1.1172, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.1281676431775383, |
|
"learning_rate": 0.00010545454545454545, |
|
"loss": 1.2046, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.11730963057933333, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 1.1883, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.12655235108503246, |
|
"learning_rate": 0.00010787878787878789, |
|
"loss": 1.1331, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.13047560307970027, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 1.2731, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.12193522973752649, |
|
"learning_rate": 0.00011030303030303031, |
|
"loss": 1.2161, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.12804360300116346, |
|
"learning_rate": 0.00011151515151515153, |
|
"loss": 1.3062, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.15991741754516206, |
|
"learning_rate": 0.00011272727272727272, |
|
"loss": 1.239, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.15140182244454561, |
|
"learning_rate": 0.00011393939393939394, |
|
"loss": 1.2349, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.12320241076263434, |
|
"learning_rate": 0.00011515151515151516, |
|
"loss": 1.2875, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.13235998458230466, |
|
"learning_rate": 0.00011636363636363636, |
|
"loss": 1.2218, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.11783688734798668, |
|
"learning_rate": 0.00011757575757575758, |
|
"loss": 1.1864, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3151933420750235, |
|
"learning_rate": 0.0001187878787878788, |
|
"loss": 1.3023, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.12665632567219295, |
|
"learning_rate": 0.00012, |
|
"loss": 1.2249, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.1228886740460738, |
|
"learning_rate": 0.00012121212121212122, |
|
"loss": 1.2517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.11892005244989344, |
|
"learning_rate": 0.00012242424242424243, |
|
"loss": 1.2586, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.1232340827222201, |
|
"learning_rate": 0.00012363636363636364, |
|
"loss": 1.3217, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.13837226869323116, |
|
"learning_rate": 0.00012484848484848487, |
|
"loss": 1.2693, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.12068217991774362, |
|
"learning_rate": 0.00012606060606060605, |
|
"loss": 1.2623, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.16779277284606545, |
|
"learning_rate": 0.00012727272727272728, |
|
"loss": 1.2415, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.13396891539963085, |
|
"learning_rate": 0.0001284848484848485, |
|
"loss": 1.2313, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.12457104490772812, |
|
"learning_rate": 0.0001296969696969697, |
|
"loss": 1.1758, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.12676816816563452, |
|
"learning_rate": 0.00013090909090909093, |
|
"loss": 1.2478, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.11973639622066906, |
|
"learning_rate": 0.00013212121212121213, |
|
"loss": 1.2335, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.1330159646034068, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.26, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.1298003025338099, |
|
"learning_rate": 0.00013454545454545455, |
|
"loss": 1.1907, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.1226154813287666, |
|
"learning_rate": 0.00013575757575757578, |
|
"loss": 1.1807, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.12533753244302145, |
|
"learning_rate": 0.00013696969696969696, |
|
"loss": 1.2098, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.12673266503840944, |
|
"learning_rate": 0.0001381818181818182, |
|
"loss": 1.2265, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.1299039569361384, |
|
"learning_rate": 0.0001393939393939394, |
|
"loss": 1.2534, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.13023496663090803, |
|
"learning_rate": 0.0001406060606060606, |
|
"loss": 1.2453, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.12001793500864573, |
|
"learning_rate": 0.00014181818181818184, |
|
"loss": 1.1608, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.14561862193041028, |
|
"learning_rate": 0.00014303030303030304, |
|
"loss": 1.2233, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.12636130876430832, |
|
"learning_rate": 0.00014424242424242425, |
|
"loss": 1.2833, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.189556849271166, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 1.3105, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.12409073764495662, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 1.1534, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.12149212466969316, |
|
"learning_rate": 0.0001478787878787879, |
|
"loss": 1.3039, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.12147336887953522, |
|
"learning_rate": 0.0001490909090909091, |
|
"loss": 1.326, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.1176585016163167, |
|
"learning_rate": 0.0001503030303030303, |
|
"loss": 1.191, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.2066428974234372, |
|
"learning_rate": 0.00015151515151515152, |
|
"loss": 1.3054, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.29582724255710047, |
|
"learning_rate": 0.00015272727272727275, |
|
"loss": 1.2032, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.13084381204119358, |
|
"learning_rate": 0.00015393939393939393, |
|
"loss": 1.2289, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.1294157600411397, |
|
"learning_rate": 0.00015515151515151516, |
|
"loss": 1.2561, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.14039614543447027, |
|
"learning_rate": 0.00015636363636363637, |
|
"loss": 1.243, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.19939984917282128, |
|
"learning_rate": 0.00015757575757575757, |
|
"loss": 1.1286, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.14402764349968203, |
|
"learning_rate": 0.0001587878787878788, |
|
"loss": 1.1959, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.13970978861500938, |
|
"learning_rate": 0.00016, |
|
"loss": 1.1814, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.14539538472563127, |
|
"learning_rate": 0.00016121212121212122, |
|
"loss": 1.2317, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.13456425455391557, |
|
"learning_rate": 0.00016242424242424243, |
|
"loss": 1.2239, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.1314997837157779, |
|
"learning_rate": 0.00016363636363636366, |
|
"loss": 1.1986, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.14046946525591422, |
|
"learning_rate": 0.00016484848484848487, |
|
"loss": 1.2238, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.6095538041505763, |
|
"learning_rate": 0.00016606060606060607, |
|
"loss": 1.2332, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.17707289712054367, |
|
"learning_rate": 0.00016727272727272728, |
|
"loss": 1.2401, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.19335172179099247, |
|
"learning_rate": 0.00016848484848484848, |
|
"loss": 1.2361, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.13725591818701255, |
|
"learning_rate": 0.00016969696969696972, |
|
"loss": 1.193, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.15535575462507384, |
|
"learning_rate": 0.0001709090909090909, |
|
"loss": 1.2769, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.14909436560898923, |
|
"learning_rate": 0.00017212121212121213, |
|
"loss": 1.2602, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.15054368082407957, |
|
"learning_rate": 0.00017333333333333334, |
|
"loss": 1.2607, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.13386897838741724, |
|
"learning_rate": 0.00017454545454545454, |
|
"loss": 1.168, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.13567889528730145, |
|
"learning_rate": 0.00017575757575757578, |
|
"loss": 1.1984, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.13994382298003089, |
|
"learning_rate": 0.00017696969696969698, |
|
"loss": 1.2795, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.13941573210713187, |
|
"learning_rate": 0.0001781818181818182, |
|
"loss": 1.2303, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.18302605925485763, |
|
"learning_rate": 0.0001793939393939394, |
|
"loss": 1.2696, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.1547402223275396, |
|
"learning_rate": 0.00018060606060606063, |
|
"loss": 1.1276, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.19947594494850646, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 1.271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.1517101450465788, |
|
"learning_rate": 0.00018303030303030304, |
|
"loss": 1.2193, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.19251063116857103, |
|
"learning_rate": 0.00018424242424242427, |
|
"loss": 1.2703, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.16789099560498666, |
|
"learning_rate": 0.00018545454545454545, |
|
"loss": 1.2244, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.14907376557922342, |
|
"learning_rate": 0.0001866666666666667, |
|
"loss": 1.264, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.14276598263036905, |
|
"learning_rate": 0.0001878787878787879, |
|
"loss": 1.2545, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.14526753816999002, |
|
"learning_rate": 0.0001890909090909091, |
|
"loss": 1.2912, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.1627048894660859, |
|
"learning_rate": 0.0001903030303030303, |
|
"loss": 1.2573, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.16405036632332695, |
|
"learning_rate": 0.0001915151515151515, |
|
"loss": 1.2359, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.14533427219788658, |
|
"learning_rate": 0.00019272727272727274, |
|
"loss": 1.1718, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.13802382666732702, |
|
"learning_rate": 0.00019393939393939395, |
|
"loss": 1.2297, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.15620193618511755, |
|
"learning_rate": 0.00019515151515151516, |
|
"loss": 1.2287, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.1401696295700075, |
|
"learning_rate": 0.00019636363636363636, |
|
"loss": 1.2231, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.15816133304035035, |
|
"learning_rate": 0.0001975757575757576, |
|
"loss": 1.2804, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.14626275180535692, |
|
"learning_rate": 0.00019878787878787878, |
|
"loss": 1.2115, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.13100680398305042, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2524, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.14849458896148926, |
|
"learning_rate": 0.00019999977531546566, |
|
"loss": 1.2161, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.13628125499037252, |
|
"learning_rate": 0.0001999991012628722, |
|
"loss": 1.2452, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.18617698759086793, |
|
"learning_rate": 0.00019999797784524866, |
|
"loss": 1.2197, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.14416004826313944, |
|
"learning_rate": 0.00019999640506764336, |
|
"loss": 1.2796, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.13807081386834757, |
|
"learning_rate": 0.0001999943829371238, |
|
"loss": 1.2732, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.16526927436841996, |
|
"learning_rate": 0.0001999919114627769, |
|
"loss": 1.3016, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.14479672734919855, |
|
"learning_rate": 0.0001999889906557086, |
|
"loss": 1.3106, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13829284006072087, |
|
"learning_rate": 0.00019998562052904418, |
|
"loss": 1.3355, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13484630104616105, |
|
"learning_rate": 0.0001999818010979279, |
|
"loss": 1.1928, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.14972770674556948, |
|
"learning_rate": 0.00019997753237952317, |
|
"loss": 1.2559, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13378525020528342, |
|
"learning_rate": 0.00019997281439301218, |
|
"loss": 1.2673, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13242998699125438, |
|
"learning_rate": 0.00019996764715959618, |
|
"loss": 1.2272, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.12938881004364342, |
|
"learning_rate": 0.00019996203070249516, |
|
"loss": 1.2035, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.13388032350164566, |
|
"learning_rate": 0.00019995596504694763, |
|
"loss": 1.2642, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.13893372222140873, |
|
"learning_rate": 0.00019994945022021082, |
|
"loss": 1.2235, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.14131710715500717, |
|
"learning_rate": 0.00019994248625156038, |
|
"loss": 1.1095, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.13448100369103572, |
|
"learning_rate": 0.0001999350731722902, |
|
"loss": 1.1879, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.13862444003216381, |
|
"learning_rate": 0.00019992721101571236, |
|
"loss": 1.2227, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.13506115547921224, |
|
"learning_rate": 0.00019991889981715698, |
|
"loss": 1.2833, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.13174857502600473, |
|
"learning_rate": 0.00019991013961397197, |
|
"loss": 1.2394, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.1290276308949748, |
|
"learning_rate": 0.00019990093044552304, |
|
"loss": 1.2659, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.1388159912078538, |
|
"learning_rate": 0.0001998912723531933, |
|
"loss": 1.3052, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.1256806205303357, |
|
"learning_rate": 0.00019988116538038325, |
|
"loss": 1.2031, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.13256850855084143, |
|
"learning_rate": 0.00019987060957251047, |
|
"loss": 1.211, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.13197363789890235, |
|
"learning_rate": 0.0001998596049770095, |
|
"loss": 1.2256, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.13277364593883098, |
|
"learning_rate": 0.00019984815164333163, |
|
"loss": 1.2174, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.13838072824574454, |
|
"learning_rate": 0.00019983624962294458, |
|
"loss": 1.3128, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.13524759737199996, |
|
"learning_rate": 0.0001998238989693323, |
|
"loss": 1.1806, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12669987683723832, |
|
"learning_rate": 0.0001998110997379949, |
|
"loss": 1.2171, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.1461834612451898, |
|
"learning_rate": 0.00019979785198644806, |
|
"loss": 1.2231, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.13265793664862735, |
|
"learning_rate": 0.0001997841557742232, |
|
"loss": 1.1718, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12842971557690963, |
|
"learning_rate": 0.00019977001116286674, |
|
"loss": 1.2758, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12188365921206967, |
|
"learning_rate": 0.00019975541821594026, |
|
"loss": 1.2457, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12679949330022622, |
|
"learning_rate": 0.00019974037699901993, |
|
"loss": 1.1825, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.12949746150357985, |
|
"learning_rate": 0.00019972488757969635, |
|
"loss": 1.2666, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.1363496149379173, |
|
"learning_rate": 0.00019970895002757413, |
|
"loss": 1.2031, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.14218340110669314, |
|
"learning_rate": 0.0001996925644142717, |
|
"loss": 1.3073, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.14234535389443218, |
|
"learning_rate": 0.00019967573081342103, |
|
"loss": 1.2444, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.12866113026310516, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 1.2257, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.1324053366295965, |
|
"learning_rate": 0.00019964071995366744, |
|
"loss": 1.2374, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.12906841330218152, |
|
"learning_rate": 0.00019962254285209254, |
|
"loss": 1.2334, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13620873131846425, |
|
"learning_rate": 0.00019960391807762463, |
|
"loss": 1.242, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.14877366842835116, |
|
"learning_rate": 0.00019958484571395757, |
|
"loss": 1.1772, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13914108740445985, |
|
"learning_rate": 0.00019956532584679675, |
|
"loss": 1.2734, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.13198394930310692, |
|
"learning_rate": 0.00019954535856385837, |
|
"loss": 1.1728, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3807736597404611, |
|
"learning_rate": 0.0001995249439548693, |
|
"loss": 1.2089, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.1682550557564819, |
|
"learning_rate": 0.00019950408211156636, |
|
"loss": 1.2423, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.2102196862007261, |
|
"learning_rate": 0.0001994827731276963, |
|
"loss": 1.2096, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.154346739470422, |
|
"learning_rate": 0.00019946101709901514, |
|
"loss": 1.2847, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.16416668358293746, |
|
"learning_rate": 0.0001994388141232876, |
|
"loss": 1.2503, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.13134349458231093, |
|
"learning_rate": 0.0001994161643002871, |
|
"loss": 1.1231, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.15083246389185287, |
|
"learning_rate": 0.00019939306773179497, |
|
"loss": 1.1614, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1742387260929692, |
|
"learning_rate": 0.00019936952452159995, |
|
"loss": 1.3568, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.18146911432436974, |
|
"learning_rate": 0.00019934553477549794, |
|
"loss": 1.2686, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1393593447949332, |
|
"learning_rate": 0.00019932109860129154, |
|
"loss": 1.1141, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.14856124153987935, |
|
"learning_rate": 0.00019929621610878927, |
|
"loss": 1.234, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.14820851831477327, |
|
"learning_rate": 0.0001992708874098054, |
|
"loss": 1.2069, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.17893142790958147, |
|
"learning_rate": 0.00019924511261815926, |
|
"loss": 1.1278, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.14573658703265605, |
|
"learning_rate": 0.00019921889184967476, |
|
"loss": 1.2292, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.15282321197574994, |
|
"learning_rate": 0.00019919222522217996, |
|
"loss": 1.2482, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.16342112084119492, |
|
"learning_rate": 0.00019916511285550642, |
|
"loss": 1.2172, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.1475889153814455, |
|
"learning_rate": 0.00019913755487148876, |
|
"loss": 1.1747, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.163738064491857, |
|
"learning_rate": 0.00019910955139396396, |
|
"loss": 1.3007, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.14427856196022704, |
|
"learning_rate": 0.00019908110254877106, |
|
"loss": 1.2464, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.20204742660246344, |
|
"learning_rate": 0.00019905220846375032, |
|
"loss": 1.2515, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.15134144918251685, |
|
"learning_rate": 0.0001990228692687429, |
|
"loss": 1.1786, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.1636590177812163, |
|
"learning_rate": 0.00019899308509558998, |
|
"loss": 1.1974, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.15552319776955892, |
|
"learning_rate": 0.00019896285607813244, |
|
"loss": 1.2308, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.17104898009833774, |
|
"learning_rate": 0.00019893218235221015, |
|
"loss": 1.2828, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.16387378763964267, |
|
"learning_rate": 0.00019890106405566138, |
|
"loss": 1.2779, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.14622126798612248, |
|
"learning_rate": 0.00019886950132832207, |
|
"loss": 1.2894, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.16619841547518147, |
|
"learning_rate": 0.0001988374943120254, |
|
"loss": 1.2133, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.12664832399697545, |
|
"learning_rate": 0.00019880504315060096, |
|
"loss": 1.1807, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.2015108381613456, |
|
"learning_rate": 0.00019877214798987426, |
|
"loss": 1.1876, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.14468620723711506, |
|
"learning_rate": 0.00019873880897766598, |
|
"loss": 1.1883, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1549018650770757, |
|
"learning_rate": 0.00019870502626379127, |
|
"loss": 1.2896, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1492917963684983, |
|
"learning_rate": 0.0001986708000000593, |
|
"loss": 1.2102, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.178606606459489, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 1.2292, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.206170239681528, |
|
"learning_rate": 0.00019860101744022485, |
|
"loss": 1.2666, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13741043007948167, |
|
"learning_rate": 0.0001985654614577036, |
|
"loss": 1.2022, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.1595080658199459, |
|
"learning_rate": 0.0001985294625524861, |
|
"loss": 1.1203, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.13929705183853777, |
|
"learning_rate": 0.00019849302088634034, |
|
"loss": 1.1505, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.14045247607912964, |
|
"learning_rate": 0.00019845613662302383, |
|
"loss": 1.1897, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.15002651347444407, |
|
"learning_rate": 0.00019841880992828306, |
|
"loss": 1.2133, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.1567929487810952, |
|
"learning_rate": 0.00019838104096985267, |
|
"loss": 1.129, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.15240634543877116, |
|
"learning_rate": 0.00019834282991745464, |
|
"loss": 1.1995, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.151807679821367, |
|
"learning_rate": 0.00019830417694279766, |
|
"loss": 1.25, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.1648599156208311, |
|
"learning_rate": 0.0001982650822195762, |
|
"loss": 1.2511, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.15363401233808713, |
|
"learning_rate": 0.00019822554592346993, |
|
"loss": 1.1794, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.1569644350778875, |
|
"learning_rate": 0.00019818556823214268, |
|
"loss": 1.2033, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.15996552747294254, |
|
"learning_rate": 0.0001981451493252418, |
|
"loss": 1.2809, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.15863104885072635, |
|
"learning_rate": 0.0001981042893843974, |
|
"loss": 1.1667, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.2887466971861171, |
|
"learning_rate": 0.0001980629885932214, |
|
"loss": 1.1915, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.15233015979193984, |
|
"learning_rate": 0.00019802124713730681, |
|
"loss": 1.1734, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.18207884538436447, |
|
"learning_rate": 0.00019797906520422677, |
|
"loss": 1.2575, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.17323546756038308, |
|
"learning_rate": 0.0001979364429835339, |
|
"loss": 1.1704, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.14592153602263633, |
|
"learning_rate": 0.00019789338066675922, |
|
"loss": 1.192, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.19250697792287097, |
|
"learning_rate": 0.0001978498784474115, |
|
"loss": 1.2779, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.1429107680887097, |
|
"learning_rate": 0.0001978059365209762, |
|
"loss": 1.2529, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.48514081074992116, |
|
"learning_rate": 0.00019776155508491482, |
|
"loss": 1.1917, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.1534376167748161, |
|
"learning_rate": 0.0001977167343386638, |
|
"loss": 1.2384, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.16744875760032166, |
|
"learning_rate": 0.00019767147448363366, |
|
"loss": 1.1744, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.29195538170738244, |
|
"learning_rate": 0.00019762577572320824, |
|
"loss": 1.1418, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.1820804717651353, |
|
"learning_rate": 0.00019757963826274357, |
|
"loss": 1.2815, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.17522345110441973, |
|
"learning_rate": 0.00019753306230956718, |
|
"loss": 1.2363, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.16354388270886613, |
|
"learning_rate": 0.000197486048072977, |
|
"loss": 1.2845, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.17590082756401024, |
|
"learning_rate": 0.0001974385957642404, |
|
"loss": 1.192, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.17345720403188775, |
|
"learning_rate": 0.00019739070559659347, |
|
"loss": 1.2068, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.16070434867766506, |
|
"learning_rate": 0.00019734237778523976, |
|
"loss": 1.189, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.18983443066710415, |
|
"learning_rate": 0.0001972936125473495, |
|
"loss": 1.2223, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.15724400187981355, |
|
"learning_rate": 0.00019724441010205863, |
|
"loss": 1.2292, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.14570729442956004, |
|
"learning_rate": 0.00019719477067046766, |
|
"loss": 1.1421, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.1559242881177266, |
|
"learning_rate": 0.00019714469447564088, |
|
"loss": 1.2598, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.16621830243096108, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 1.3038, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.675483994100576, |
|
"learning_rate": 0.00019704323269834927, |
|
"loss": 1.2298, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.2769699381619058, |
|
"learning_rate": 0.00019699184757182225, |
|
"loss": 1.2566, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.20189839889100783, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 1.3181, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.19497107359413876, |
|
"learning_rate": 0.00019688776999754912, |
|
"loss": 1.1502, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1982266815755412, |
|
"learning_rate": 0.00019683507801749545, |
|
"loss": 1.2053, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1924340950322314, |
|
"learning_rate": 0.00019678195089055346, |
|
"loss": 1.2149, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1725322346446431, |
|
"learning_rate": 0.00019672838885546008, |
|
"loss": 1.2553, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.2535488743520272, |
|
"learning_rate": 0.00019667439215290648, |
|
"loss": 1.2576, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.37837586860064026, |
|
"learning_rate": 0.00019661996102553718, |
|
"loss": 1.1815, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.17520419597901843, |
|
"learning_rate": 0.00019656509571794878, |
|
"loss": 1.1932, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.17056234784450633, |
|
"learning_rate": 0.00019650979647668906, |
|
"loss": 1.163, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.18272246580207432, |
|
"learning_rate": 0.00019645406355025565, |
|
"loss": 1.1887, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.17889037954429915, |
|
"learning_rate": 0.00019639789718909508, |
|
"loss": 1.2126, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.23993734971101424, |
|
"learning_rate": 0.00019634129764560168, |
|
"loss": 1.2485, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.1847578318208199, |
|
"learning_rate": 0.00019628426517411625, |
|
"loss": 1.2549, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.23185098827091005, |
|
"learning_rate": 0.00019622680003092503, |
|
"loss": 1.1599, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.220638044092583, |
|
"learning_rate": 0.00019616890247425866, |
|
"loss": 1.2281, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.2303439219825616, |
|
"learning_rate": 0.00019611057276429085, |
|
"loss": 1.2208, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.1744807302230573, |
|
"learning_rate": 0.00019605181116313724, |
|
"loss": 1.2303, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.17510946821872422, |
|
"learning_rate": 0.0001959926179348543, |
|
"loss": 1.2385, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.2218474349751746, |
|
"learning_rate": 0.00019593299334543808, |
|
"loss": 1.2153, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.1742070481516402, |
|
"learning_rate": 0.00019587293766282308, |
|
"loss": 1.1628, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.15250311715180823, |
|
"learning_rate": 0.00019581245115688094, |
|
"loss": 1.1632, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.1744397677094501, |
|
"learning_rate": 0.0001957515340994193, |
|
"loss": 1.254, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.1686772182789891, |
|
"learning_rate": 0.00019569018676418053, |
|
"loss": 1.2169, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.16404966161017623, |
|
"learning_rate": 0.00019562840942684067, |
|
"loss": 1.2221, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.16052011449463713, |
|
"learning_rate": 0.00019556620236500793, |
|
"loss": 1.2045, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.16343251390831215, |
|
"learning_rate": 0.0001955035658582216, |
|
"loss": 1.2289, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.14387162360389305, |
|
"learning_rate": 0.00019544050018795075, |
|
"loss": 1.1365, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.15304461439740238, |
|
"learning_rate": 0.00019537700563759304, |
|
"loss": 1.1931, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.17059958050065627, |
|
"learning_rate": 0.00019531308249247327, |
|
"loss": 1.2166, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.17633385530926995, |
|
"learning_rate": 0.00019524873103984235, |
|
"loss": 1.2604, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.17855814403303746, |
|
"learning_rate": 0.00019518395156887576, |
|
"loss": 1.1615, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.19823982444256988, |
|
"learning_rate": 0.00019511874437067243, |
|
"loss": 1.2153, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.1570784627362585, |
|
"learning_rate": 0.0001950531097382533, |
|
"loss": 1.2788, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.2183125402112695, |
|
"learning_rate": 0.00019498704796656018, |
|
"loss": 1.2966, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.18173933276147194, |
|
"learning_rate": 0.00019492055935245418, |
|
"loss": 1.2978, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.17483116680914407, |
|
"learning_rate": 0.00019485364419471454, |
|
"loss": 1.258, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.15490767356815494, |
|
"learning_rate": 0.0001947863027940374, |
|
"loss": 1.2088, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.14703966491934156, |
|
"learning_rate": 0.00019471853545303405, |
|
"loss": 1.2355, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.14386689086661608, |
|
"learning_rate": 0.00019465034247623003, |
|
"loss": 1.2583, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.18818904376313625, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 1.2181, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.17393313719202513, |
|
"learning_rate": 0.00019451268084288385, |
|
"loss": 1.3453, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.14706823379985753, |
|
"learning_rate": 0.00019444321280495043, |
|
"loss": 1.2234, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.15282014755252687, |
|
"learning_rate": 0.00019437332036843118, |
|
"loss": 1.1262, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.1618727884326225, |
|
"learning_rate": 0.00019430300384740105, |
|
"loss": 1.3136, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.16090758705378874, |
|
"learning_rate": 0.00019423226355784077, |
|
"loss": 1.2055, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.15241156801091013, |
|
"learning_rate": 0.00019416109981763526, |
|
"loss": 1.2678, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.14216697909809062, |
|
"learning_rate": 0.0001940895129465724, |
|
"loss": 1.2841, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.15790232415414485, |
|
"learning_rate": 0.00019401750326634144, |
|
"loss": 1.3119, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.13322691961062616, |
|
"learning_rate": 0.0001939450711005316, |
|
"loss": 1.1293, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.14075018938835404, |
|
"learning_rate": 0.00019387221677463062, |
|
"loss": 1.2176, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.21565975459393052, |
|
"learning_rate": 0.00019379894061602335, |
|
"loss": 1.1723, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.17967631394222838, |
|
"learning_rate": 0.00019372524295399013, |
|
"loss": 1.239, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.21187969201978435, |
|
"learning_rate": 0.0001936511241197055, |
|
"loss": 1.2207, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.16967789022974608, |
|
"learning_rate": 0.00019357658444623654, |
|
"loss": 1.2478, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.14810621660374448, |
|
"learning_rate": 0.0001935016242685415, |
|
"loss": 1.1223, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.1489106421847434, |
|
"learning_rate": 0.00019342624392346824, |
|
"loss": 1.1592, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.17625176068748855, |
|
"learning_rate": 0.0001933504437497527, |
|
"loss": 1.2145, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.17250255512763446, |
|
"learning_rate": 0.00019327422408801744, |
|
"loss": 1.2504, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.16079375745566896, |
|
"learning_rate": 0.00019319758528077, |
|
"loss": 1.1795, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.15454466809245995, |
|
"learning_rate": 0.0001931205276724015, |
|
"loss": 1.2123, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.7021323604447972, |
|
"learning_rate": 0.000193043051609185, |
|
"loss": 1.2239, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.1572764339385847, |
|
"learning_rate": 0.00019296515743927399, |
|
"loss": 1.2516, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.2136637778252246, |
|
"learning_rate": 0.00019288684551270073, |
|
"loss": 1.2321, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.4546540454773654, |
|
"learning_rate": 0.00019280811618137484, |
|
"loss": 1.18, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9809832576786297, |
|
"learning_rate": 0.00019272896979908154, |
|
"loss": 1.2081, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.5246256133291822, |
|
"learning_rate": 0.00019264940672148018, |
|
"loss": 1.2722, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.24941717134878091, |
|
"learning_rate": 0.00019256942730610268, |
|
"loss": 1.2352, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.3356068462072784, |
|
"learning_rate": 0.00019248903191235176, |
|
"loss": 1.2225, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.19535845221880543, |
|
"learning_rate": 0.00019240822090149944, |
|
"loss": 1.1669, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.22306941566416597, |
|
"learning_rate": 0.00019232699463668542, |
|
"loss": 1.2281, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.2700134013989352, |
|
"learning_rate": 0.00019224535348291542, |
|
"loss": 1.1939, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.24406908935562743, |
|
"learning_rate": 0.00019216329780705953, |
|
"loss": 1.1839, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.20465183000217488, |
|
"learning_rate": 0.00019208082797785055, |
|
"loss": 1.2277, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.21324820828129784, |
|
"learning_rate": 0.00019199794436588243, |
|
"loss": 1.2072, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.1780562512431263, |
|
"learning_rate": 0.00019191464734360844, |
|
"loss": 1.2082, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.16547971467615655, |
|
"learning_rate": 0.00019183093728533966, |
|
"loss": 1.1978, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.22904664933247196, |
|
"learning_rate": 0.00019174681456724318, |
|
"loss": 1.1562, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.1737397860007602, |
|
"learning_rate": 0.00019166227956734052, |
|
"loss": 1.2383, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1589465455917568, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 1.2158, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.16253126221999709, |
|
"learning_rate": 0.00019149197424346405, |
|
"loss": 1.1952, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.22436676243032663, |
|
"learning_rate": 0.00019140620468478968, |
|
"loss": 1.2315, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.19291682612950423, |
|
"learning_rate": 0.00019132002437490458, |
|
"loss": 1.2283, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1519191258459668, |
|
"learning_rate": 0.00019123343370107637, |
|
"loss": 1.1151, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.17179909633547025, |
|
"learning_rate": 0.00019114643305241676, |
|
"loss": 1.1576, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.17992599023321432, |
|
"learning_rate": 0.00019105902281987976, |
|
"loss": 1.2592, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.17714099390314453, |
|
"learning_rate": 0.00019097120339625994, |
|
"loss": 1.2578, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.2455577642687935, |
|
"learning_rate": 0.00019088297517619055, |
|
"loss": 1.2361, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.18398518628783986, |
|
"learning_rate": 0.00019079433855614201, |
|
"loss": 1.1906, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.18944067022821645, |
|
"learning_rate": 0.00019070529393441985, |
|
"loss": 1.237, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.17639967519781063, |
|
"learning_rate": 0.00019061584171116303, |
|
"loss": 1.1841, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.15947129998283005, |
|
"learning_rate": 0.00019052598228834217, |
|
"loss": 1.1722, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.1693354353719105, |
|
"learning_rate": 0.00019043571606975777, |
|
"loss": 1.2204, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.16236190451963983, |
|
"learning_rate": 0.00019034504346103823, |
|
"loss": 1.1778, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.17702370729269964, |
|
"learning_rate": 0.00019025396486963827, |
|
"loss": 1.2065, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.19388150596154238, |
|
"learning_rate": 0.00019016248070483687, |
|
"loss": 1.2942, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.16152000400319103, |
|
"learning_rate": 0.0001900705913777356, |
|
"loss": 1.1784, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1545267913996029, |
|
"learning_rate": 0.00018997829730125663, |
|
"loss": 1.1829, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.15421727704318197, |
|
"learning_rate": 0.000189885598890141, |
|
"loss": 1.177, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1624966073814206, |
|
"learning_rate": 0.00018979249656094673, |
|
"loss": 1.2439, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.9490737312904575, |
|
"learning_rate": 0.00018969899073204686, |
|
"loss": 1.2085, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.8982903208613089, |
|
"learning_rate": 0.00018960508182362768, |
|
"loss": 1.2347, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.3771428474797688, |
|
"learning_rate": 0.00018951077025768678, |
|
"loss": 1.2546, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4776152950069111, |
|
"learning_rate": 0.00018941605645803115, |
|
"loss": 1.2904, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.18786943849618057, |
|
"learning_rate": 0.00018932094085027533, |
|
"loss": 1.2122, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.6297025984167128, |
|
"learning_rate": 0.0001892254238618394, |
|
"loss": 1.171, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.20382660707264952, |
|
"learning_rate": 0.0001891295059219472, |
|
"loss": 1.1874, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.3580819775908755, |
|
"learning_rate": 0.00018903318746162429, |
|
"loss": 1.1531, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.43619056173016185, |
|
"learning_rate": 0.00018893646891369602, |
|
"loss": 1.2289, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.29385240705823723, |
|
"learning_rate": 0.0001888393507127856, |
|
"loss": 1.2073, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.3136086850525623, |
|
"learning_rate": 0.00018874183329531223, |
|
"loss": 1.1898, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.2307767217662562, |
|
"learning_rate": 0.000188643917099489, |
|
"loss": 1.207, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.18703654518135468, |
|
"learning_rate": 0.000188545602565321, |
|
"loss": 1.1688, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.4809351333934126, |
|
"learning_rate": 0.00018844689013460336, |
|
"loss": 1.2519, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.40370101428544464, |
|
"learning_rate": 0.0001883477802509192, |
|
"loss": 1.2411, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.2858848636432859, |
|
"learning_rate": 0.00018824827335963765, |
|
"loss": 1.194, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.32195602638999565, |
|
"learning_rate": 0.000188148369907912, |
|
"loss": 1.0988, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.23790306908901832, |
|
"learning_rate": 0.00018804807034467733, |
|
"loss": 1.2237, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.20126988767112128, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 1.2731, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.3336380339194037, |
|
"learning_rate": 0.00018784628468831996, |
|
"loss": 1.2369, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.5054330893305989, |
|
"learning_rate": 0.0001877447995019596, |
|
"loss": 1.2443, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.2297866279715136, |
|
"learning_rate": 0.0001876429200176108, |
|
"loss": 1.2376, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.39350567174184636, |
|
"learning_rate": 0.00018754064669308858, |
|
"loss": 1.2126, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.2025361091435325, |
|
"learning_rate": 0.00018743797998797753, |
|
"loss": 1.2224, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.31824903419753814, |
|
"learning_rate": 0.00018733492036363005, |
|
"loss": 1.2942, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.4642066748643017, |
|
"learning_rate": 0.00018723146828316428, |
|
"loss": 1.2515, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.7833055646295342, |
|
"learning_rate": 0.00018712762421146183, |
|
"loss": 1.2207, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.2810249021786599, |
|
"learning_rate": 0.00018702338861516587, |
|
"loss": 1.2755, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.460995724241333, |
|
"learning_rate": 0.0001869187619626789, |
|
"loss": 1.2856, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.49139203044984286, |
|
"learning_rate": 0.00018681374472416073, |
|
"loss": 1.2392, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.691604613969173, |
|
"learning_rate": 0.0001867083373715264, |
|
"loss": 1.2992, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.8014112047318501, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 1.2683, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.31614342841331383, |
|
"learning_rate": 0.00018649635422033215, |
|
"loss": 1.2356, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.2559855196513244, |
|
"learning_rate": 0.000186389779374359, |
|
"loss": 1.2053, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.6613999986014714, |
|
"learning_rate": 0.0001862828163194388, |
|
"loss": 1.2568, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.27190082167109786, |
|
"learning_rate": 0.0001861754655362304, |
|
"loss": 1.1288, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.43819582203066043, |
|
"learning_rate": 0.00018606772750713504, |
|
"loss": 1.1758, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.41738497400383384, |
|
"learning_rate": 0.0001859596027162941, |
|
"loss": 1.2993, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.2595142634740817, |
|
"learning_rate": 0.000185851091649587, |
|
"loss": 1.269, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.2795314201020271, |
|
"learning_rate": 0.00018574219479462878, |
|
"loss": 1.1915, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.2502992494749938, |
|
"learning_rate": 0.00018563291264076835, |
|
"loss": 1.2157, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.27422512335538374, |
|
"learning_rate": 0.00018552324567908585, |
|
"loss": 1.2541, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.3360989016060905, |
|
"learning_rate": 0.00018541319440239066, |
|
"loss": 1.2666, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.9441131913572127, |
|
"learning_rate": 0.00018530275930521924, |
|
"loss": 1.2924, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.3772373301771213, |
|
"learning_rate": 0.00018519194088383273, |
|
"loss": 1.1952, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.22091753616251295, |
|
"learning_rate": 0.0001850807396362148, |
|
"loss": 1.1858, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.21423504993321807, |
|
"learning_rate": 0.00018496915606206951, |
|
"loss": 1.2245, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.5238946238105926, |
|
"learning_rate": 0.00018485719066281892, |
|
"loss": 1.2351, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.3037858949309141, |
|
"learning_rate": 0.0001847448439416009, |
|
"loss": 1.1669, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.21553286799952254, |
|
"learning_rate": 0.00018463211640326686, |
|
"loss": 1.1454, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.202875547805464, |
|
"learning_rate": 0.0001845190085543795, |
|
"loss": 1.188, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.24385408620619278, |
|
"learning_rate": 0.00018440552090321047, |
|
"loss": 1.2307, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.23793944272430378, |
|
"learning_rate": 0.0001842916539597382, |
|
"loss": 1.2253, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.17062488448810784, |
|
"learning_rate": 0.0001841774082356455, |
|
"loss": 1.2681, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.20003742001916064, |
|
"learning_rate": 0.00018406278424431736, |
|
"loss": 1.2428, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.2696052831337752, |
|
"learning_rate": 0.0001839477825008385, |
|
"loss": 1.2945, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.23302960820538443, |
|
"learning_rate": 0.00018383240352199117, |
|
"loss": 1.1718, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.38187833239777536, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 1.2311, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.4052561772533732, |
|
"learning_rate": 0.00018360051593379383, |
|
"loss": 1.1639, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.23379763821020377, |
|
"learning_rate": 0.0001834840083664749, |
|
"loss": 1.1809, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.2368414607613928, |
|
"learning_rate": 0.00018336712564784503, |
|
"loss": 1.2357, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.20230633988510938, |
|
"learning_rate": 0.000183249868303139, |
|
"loss": 1.1851, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.170513157244292, |
|
"learning_rate": 0.00018313223685927505, |
|
"loss": 1.205, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.18082295035256266, |
|
"learning_rate": 0.0001830142318448525, |
|
"loss": 1.2305, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.18286299264146286, |
|
"learning_rate": 0.00018289585379014942, |
|
"loss": 1.23, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.17868104103482751, |
|
"learning_rate": 0.00018277710322712012, |
|
"loss": 1.2894, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.1820411127336495, |
|
"learning_rate": 0.00018265798068939294, |
|
"loss": 1.2395, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1738237541783663, |
|
"learning_rate": 0.0001825384867122677, |
|
"loss": 1.1576, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15693445967795147, |
|
"learning_rate": 0.0001824186218327134, |
|
"loss": 1.0809, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.18509145652208978, |
|
"learning_rate": 0.00018229838658936564, |
|
"loss": 1.2717, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.14702488366564262, |
|
"learning_rate": 0.0001821777815225245, |
|
"loss": 1.2236, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.1828399354418095, |
|
"learning_rate": 0.00018205680717415187, |
|
"loss": 1.2565, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.17460984182013486, |
|
"learning_rate": 0.00018193546408786898, |
|
"loss": 1.2474, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.2001623109673152, |
|
"learning_rate": 0.00018181375280895416, |
|
"loss": 1.2544, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.17228631742863837, |
|
"learning_rate": 0.00018169167388434025, |
|
"loss": 1.1851, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1644862232819482, |
|
"learning_rate": 0.00018156922786261216, |
|
"loss": 1.1817, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.19775186397477057, |
|
"learning_rate": 0.00018144641529400446, |
|
"loss": 1.257, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.1626281991220394, |
|
"learning_rate": 0.00018132323673039885, |
|
"loss": 1.2277, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.16158256707311264, |
|
"learning_rate": 0.00018119969272532166, |
|
"loss": 1.1624, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.17705809207051687, |
|
"learning_rate": 0.00018107578383394146, |
|
"loss": 1.2421, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.17639060401882287, |
|
"learning_rate": 0.00018095151061306645, |
|
"loss": 1.285, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.16918796486576196, |
|
"learning_rate": 0.00018082687362114212, |
|
"loss": 1.2606, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.15968377185965665, |
|
"learning_rate": 0.0001807018734182485, |
|
"loss": 1.194, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.17537027967397978, |
|
"learning_rate": 0.00018057651056609784, |
|
"loss": 1.1594, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.15753665403127565, |
|
"learning_rate": 0.00018045078562803203, |
|
"loss": 1.1382, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.17121200763916436, |
|
"learning_rate": 0.00018032469916902003, |
|
"loss": 1.2286, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.19120510133331003, |
|
"learning_rate": 0.00018019825175565542, |
|
"loss": 1.2835, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1671735980123817, |
|
"learning_rate": 0.0001800714439561538, |
|
"loss": 1.2201, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.1579098534969056, |
|
"learning_rate": 0.00017994427634035015, |
|
"loss": 1.2156, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.1746075421158512, |
|
"learning_rate": 0.00017981674947969636, |
|
"loss": 1.2049, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.16878182886737042, |
|
"learning_rate": 0.00017968886394725874, |
|
"loss": 1.2204, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.16725956538286493, |
|
"learning_rate": 0.00017956062031771535, |
|
"loss": 1.2091, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.18877845951705005, |
|
"learning_rate": 0.00017943201916735335, |
|
"loss": 1.241, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.180337447476004, |
|
"learning_rate": 0.00017930306107406653, |
|
"loss": 1.2253, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.16688572366717752, |
|
"learning_rate": 0.0001791737466173527, |
|
"loss": 1.239, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.15385917621135983, |
|
"learning_rate": 0.00017904407637831099, |
|
"loss": 1.2476, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.17725645269055587, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 1.2599, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.14758551718901028, |
|
"learning_rate": 0.00017878367088563195, |
|
"loss": 1.2249, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.15216962408661316, |
|
"learning_rate": 0.00017865293680217637, |
|
"loss": 1.2346, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.16679282848599514, |
|
"learning_rate": 0.00017852184927675112, |
|
"loss": 1.2443, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.16723562739069214, |
|
"learning_rate": 0.00017839040889842305, |
|
"loss": 1.224, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.15922276239929914, |
|
"learning_rate": 0.00017825861625784455, |
|
"loss": 1.2739, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.1510107938469514, |
|
"learning_rate": 0.00017812647194725094, |
|
"loss": 1.1764, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.16446999054333494, |
|
"learning_rate": 0.00017799397656045792, |
|
"loss": 1.2498, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.18566301651865832, |
|
"learning_rate": 0.00017786113069285874, |
|
"loss": 1.232, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.20592971655306183, |
|
"learning_rate": 0.00017772793494142167, |
|
"loss": 1.1586, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.1581947714375729, |
|
"learning_rate": 0.00017759438990468725, |
|
"loss": 1.2502, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.15466760695169174, |
|
"learning_rate": 0.00017746049618276545, |
|
"loss": 1.1605, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.16041506222444918, |
|
"learning_rate": 0.00017732625437733335, |
|
"loss": 1.2778, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.17168109661676773, |
|
"learning_rate": 0.0001771916650916321, |
|
"loss": 1.262, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.1788973186498254, |
|
"learning_rate": 0.00017705672893046425, |
|
"loss": 1.2111, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1759644359346382, |
|
"learning_rate": 0.00017692144650019125, |
|
"loss": 1.2546, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.15710749736088767, |
|
"learning_rate": 0.0001767858184087304, |
|
"loss": 1.2487, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1648235522911144, |
|
"learning_rate": 0.00017664984526555248, |
|
"loss": 1.2469, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.15452607969890703, |
|
"learning_rate": 0.0001765135276816787, |
|
"loss": 1.1855, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1837695597880219, |
|
"learning_rate": 0.00017637686626967812, |
|
"loss": 1.2185, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.15861390725762364, |
|
"learning_rate": 0.00017623986164366486, |
|
"loss": 1.2056, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1663260460966887, |
|
"learning_rate": 0.00017610251441929533, |
|
"loss": 1.1242, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1803309720529981, |
|
"learning_rate": 0.00017596482521376546, |
|
"loss": 1.2938, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.14909085011764342, |
|
"learning_rate": 0.00017582679464580797, |
|
"loss": 1.1953, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.15779022242482527, |
|
"learning_rate": 0.00017568842333568952, |
|
"loss": 1.2792, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.1553327313967345, |
|
"learning_rate": 0.00017554971190520798, |
|
"loss": 1.2286, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.16363964666273684, |
|
"learning_rate": 0.00017541066097768963, |
|
"loss": 1.2753, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.16668099163659675, |
|
"learning_rate": 0.00017527127117798635, |
|
"loss": 1.185, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.13957568397594883, |
|
"learning_rate": 0.0001751315431324727, |
|
"loss": 1.143, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.1553111736740035, |
|
"learning_rate": 0.00017499147746904335, |
|
"loss": 1.2492, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.1691517335818193, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 1.2619, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.15480883994395986, |
|
"learning_rate": 0.00017471033580759903, |
|
"loss": 1.2396, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1451690143792058, |
|
"learning_rate": 0.00017456926107294765, |
|
"loss": 1.1732, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1524398957482947, |
|
"learning_rate": 0.00017442785124710227, |
|
"loss": 1.2083, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.16790264977550012, |
|
"learning_rate": 0.0001742861069655148, |
|
"loss": 1.2201, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1529847047636337, |
|
"learning_rate": 0.0001741440288651403, |
|
"loss": 1.243, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1485875402374676, |
|
"learning_rate": 0.00017400161758443375, |
|
"loss": 1.2053, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.16950094279079617, |
|
"learning_rate": 0.00017385887376334742, |
|
"loss": 1.1944, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.15289337084330445, |
|
"learning_rate": 0.00017371579804332789, |
|
"loss": 1.2503, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.15337063655317973, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 1.3092, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.1458937961897621, |
|
"learning_rate": 0.00017342865347972988, |
|
"loss": 1.2244, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.19897118610161338, |
|
"learning_rate": 0.00017328458592649027, |
|
"loss": 1.2238, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.15850805264911003, |
|
"learning_rate": 0.00017314018905498931, |
|
"loss": 1.195, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.14445183074519347, |
|
"learning_rate": 0.00017299546351410197, |
|
"loss": 1.1974, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.18180731722745677, |
|
"learning_rate": 0.00017285040995418, |
|
"loss": 1.2107, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.14943874953193587, |
|
"learning_rate": 0.00017270502902704926, |
|
"loss": 1.1843, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.15767466790910512, |
|
"learning_rate": 0.00017255932138600665, |
|
"loss": 1.1409, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.16402921378654775, |
|
"learning_rate": 0.00017241328768581726, |
|
"loss": 1.2135, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.15526246786505485, |
|
"learning_rate": 0.00017226692858271134, |
|
"loss": 1.2255, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.16608155892622348, |
|
"learning_rate": 0.00017212024473438147, |
|
"loss": 1.2691, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.14913271520144072, |
|
"learning_rate": 0.00017197323679997943, |
|
"loss": 1.1574, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.1471910610421707, |
|
"learning_rate": 0.00017182590544011347, |
|
"loss": 1.2774, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.1417464185073962, |
|
"learning_rate": 0.00017167825131684513, |
|
"loss": 1.2446, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.1610488125634495, |
|
"learning_rate": 0.0001715302750936864, |
|
"loss": 1.2862, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.20227974555123074, |
|
"learning_rate": 0.00017138197743559654, |
|
"loss": 1.207, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.1355502559749413, |
|
"learning_rate": 0.00017123335900897946, |
|
"loss": 1.1019, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.1559423167028215, |
|
"learning_rate": 0.00017108442048168038, |
|
"loss": 1.2549, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.15898973818185586, |
|
"learning_rate": 0.00017093516252298296, |
|
"loss": 1.2705, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.15169569998999652, |
|
"learning_rate": 0.00017078558580360632, |
|
"loss": 1.2454, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.15976111665597925, |
|
"learning_rate": 0.00017063569099570196, |
|
"loss": 1.2585, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.14488877221999352, |
|
"learning_rate": 0.00017048547877285077, |
|
"loss": 1.2169, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.14919533098974924, |
|
"learning_rate": 0.00017033494981006002, |
|
"loss": 1.2358, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.15251746717084805, |
|
"learning_rate": 0.00017018410478376032, |
|
"loss": 1.2241, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.1456060482002663, |
|
"learning_rate": 0.00017003294437180255, |
|
"loss": 1.2298, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.17048886778787248, |
|
"learning_rate": 0.00016988146925345484, |
|
"loss": 1.2707, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.15304381059310815, |
|
"learning_rate": 0.00016972968010939954, |
|
"loss": 1.1498, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.16590055969071696, |
|
"learning_rate": 0.0001695775776217301, |
|
"loss": 1.2481, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.14299575837437278, |
|
"learning_rate": 0.00016942516247394807, |
|
"loss": 1.2058, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.14275107775859475, |
|
"learning_rate": 0.00016927243535095997, |
|
"loss": 1.2178, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.1554250137491414, |
|
"learning_rate": 0.0001691193969390742, |
|
"loss": 1.1197, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.16958418467021688, |
|
"learning_rate": 0.0001689660479259981, |
|
"loss": 1.1768, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1546216583314497, |
|
"learning_rate": 0.00016881238900083473, |
|
"loss": 1.1741, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.15287056494787424, |
|
"learning_rate": 0.0001686584208540797, |
|
"loss": 1.2328, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1419329373337611, |
|
"learning_rate": 0.0001685041441776183, |
|
"loss": 1.1743, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.15662192296485464, |
|
"learning_rate": 0.00016834955966472213, |
|
"loss": 1.1861, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.16304778894798697, |
|
"learning_rate": 0.00016819466801004621, |
|
"loss": 1.2045, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.15690090424895087, |
|
"learning_rate": 0.00016803946990962576, |
|
"loss": 1.1553, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.15227523196027068, |
|
"learning_rate": 0.000167883966060873, |
|
"loss": 1.126, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.15442134859978873, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 1.169, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.16235578615890994, |
|
"learning_rate": 0.00016757204391488613, |
|
"loss": 1.198, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.16631365293435893, |
|
"learning_rate": 0.00016741562701933367, |
|
"loss": 1.2191, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.15668252106453312, |
|
"learning_rate": 0.0001672589071788059, |
|
"loss": 1.1094, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.14883987789397163, |
|
"learning_rate": 0.00016710188509755329, |
|
"loss": 1.0653, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.14723412204861966, |
|
"learning_rate": 0.00016694456148118452, |
|
"loss": 1.0632, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.2694273773199372, |
|
"learning_rate": 0.00016678693703666325, |
|
"loss": 1.1577, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.3010203762394184, |
|
"learning_rate": 0.00016662901247230502, |
|
"loss": 1.1049, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.16960159731526514, |
|
"learning_rate": 0.0001664707884977739, |
|
"loss": 1.2185, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.1593257551859938, |
|
"learning_rate": 0.00016631226582407952, |
|
"loss": 1.1085, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.1695114549114674, |
|
"learning_rate": 0.00016615344516357378, |
|
"loss": 1.1863, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.17116727204552032, |
|
"learning_rate": 0.00016599432722994755, |
|
"loss": 1.0921, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.16265948588203852, |
|
"learning_rate": 0.00016583491273822765, |
|
"loss": 1.1905, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.16500105151611763, |
|
"learning_rate": 0.00016567520240477344, |
|
"loss": 1.1477, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.15445707774433456, |
|
"learning_rate": 0.00016551519694727381, |
|
"loss": 1.1247, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.17740657850033337, |
|
"learning_rate": 0.0001653548970847438, |
|
"loss": 1.0164, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.16152198390194278, |
|
"learning_rate": 0.0001651943035375214, |
|
"loss": 1.1779, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.16299638453443582, |
|
"learning_rate": 0.00016503341702726426, |
|
"loss": 1.1847, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.1486770667934901, |
|
"learning_rate": 0.00016487223827694672, |
|
"loss": 1.1002, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.15907088594322072, |
|
"learning_rate": 0.00016471076801085615, |
|
"loss": 1.1127, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.15491780431797444, |
|
"learning_rate": 0.00016454900695458998, |
|
"loss": 1.1196, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.16584586610927665, |
|
"learning_rate": 0.00016438695583505242, |
|
"loss": 1.1441, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.15197831968756778, |
|
"learning_rate": 0.00016422461538045103, |
|
"loss": 1.0532, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.15669296934233234, |
|
"learning_rate": 0.00016406198632029357, |
|
"loss": 1.1753, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.1523894998655591, |
|
"learning_rate": 0.0001638990693853848, |
|
"loss": 1.1339, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.1570057583413597, |
|
"learning_rate": 0.000163735865307823, |
|
"loss": 1.1117, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.1637341350288717, |
|
"learning_rate": 0.00016357237482099684, |
|
"loss": 1.1369, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.15703259344496112, |
|
"learning_rate": 0.0001634085986595819, |
|
"loss": 1.1448, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.1672273979359662, |
|
"learning_rate": 0.00016324453755953773, |
|
"loss": 1.1972, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.16032943590057747, |
|
"learning_rate": 0.0001630801922581041, |
|
"loss": 1.1811, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.15414179865764022, |
|
"learning_rate": 0.00016291556349379795, |
|
"loss": 1.1126, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.16133591336152153, |
|
"learning_rate": 0.00016275065200641004, |
|
"loss": 1.0664, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.17496271909044792, |
|
"learning_rate": 0.00016258545853700158, |
|
"loss": 1.2042, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.16189829224809252, |
|
"learning_rate": 0.00016241998382790095, |
|
"loss": 1.1896, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.1508460775909727, |
|
"learning_rate": 0.00016225422862270027, |
|
"loss": 1.1205, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.16848110855261456, |
|
"learning_rate": 0.00016208819366625218, |
|
"loss": 1.1132, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.15996856299765072, |
|
"learning_rate": 0.00016192187970466644, |
|
"loss": 1.1022, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.15697088450293906, |
|
"learning_rate": 0.0001617552874853065, |
|
"loss": 1.0483, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.16338270251123885, |
|
"learning_rate": 0.0001615884177567863, |
|
"loss": 1.1049, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.1605597226615145, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 1.1319, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.1694169270615921, |
|
"learning_rate": 0.00016125384877295257, |
|
"loss": 1.2143, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.16306866904399633, |
|
"learning_rate": 0.00016108615102108855, |
|
"loss": 1.164, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.1646758516051161, |
|
"learning_rate": 0.00016091817876695655, |
|
"loss": 1.2176, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.16334394129553162, |
|
"learning_rate": 0.00016074993276537198, |
|
"loss": 1.1526, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.16103491072240317, |
|
"learning_rate": 0.00016058141377238026, |
|
"loss": 1.1022, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.16029258404100036, |
|
"learning_rate": 0.00016041262254525362, |
|
"loss": 1.1508, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.15686217802450533, |
|
"learning_rate": 0.00016024355984248768, |
|
"loss": 1.1471, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.1520812354157866, |
|
"learning_rate": 0.0001600742264237979, |
|
"loss": 1.0994, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.161954690104593, |
|
"learning_rate": 0.0001599046230501163, |
|
"loss": 1.1475, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.16470046818858164, |
|
"learning_rate": 0.00015973475048358795, |
|
"loss": 1.1425, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.17783426542152342, |
|
"learning_rate": 0.00015956460948756765, |
|
"loss": 1.1933, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.20834095672163827, |
|
"learning_rate": 0.0001593942008266164, |
|
"loss": 1.1545, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.1601922371369916, |
|
"learning_rate": 0.00015922352526649803, |
|
"loss": 1.1626, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.17590795577463478, |
|
"learning_rate": 0.00015905258357417569, |
|
"loss": 1.1428, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.16438364538914646, |
|
"learning_rate": 0.00015888137651780845, |
|
"loss": 1.1237, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.1532404311365829, |
|
"learning_rate": 0.00015870990486674792, |
|
"loss": 1.1292, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.1658092976448754, |
|
"learning_rate": 0.0001585381693915346, |
|
"loss": 1.2051, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.17009266166893572, |
|
"learning_rate": 0.00015836617086389468, |
|
"loss": 1.1574, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.17117032466097273, |
|
"learning_rate": 0.00015819391005673626, |
|
"loss": 1.1715, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.16766945870038483, |
|
"learning_rate": 0.00015802138774414622, |
|
"loss": 1.1322, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.16603674100502688, |
|
"learning_rate": 0.00015784860470138633, |
|
"loss": 1.1682, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.1538296997751752, |
|
"learning_rate": 0.00015767556170489025, |
|
"loss": 1.0979, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.16355862351540293, |
|
"learning_rate": 0.00015750225953225968, |
|
"loss": 1.0859, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.17100029950170442, |
|
"learning_rate": 0.00015732869896226094, |
|
"loss": 1.1513, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.16981502045992727, |
|
"learning_rate": 0.0001571548807748215, |
|
"loss": 1.1665, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.15981257682597005, |
|
"learning_rate": 0.00015698080575102661, |
|
"loss": 1.0862, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.17114664647857297, |
|
"learning_rate": 0.00015680647467311557, |
|
"loss": 1.1883, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.16818704328900685, |
|
"learning_rate": 0.00015663188832447833, |
|
"loss": 1.1345, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.16716958487866437, |
|
"learning_rate": 0.0001564570474896519, |
|
"loss": 1.1613, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.16425076739154273, |
|
"learning_rate": 0.00015628195295431697, |
|
"loss": 1.1153, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.1668267227341853, |
|
"learning_rate": 0.0001561066055052941, |
|
"loss": 1.1796, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.15748311738845247, |
|
"learning_rate": 0.00015593100593054064, |
|
"loss": 1.125, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.1578229320002536, |
|
"learning_rate": 0.00015575515501914668, |
|
"loss": 1.1751, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.17990453419427754, |
|
"learning_rate": 0.0001555790535613318, |
|
"loss": 1.2107, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.17170087096955436, |
|
"learning_rate": 0.0001554027023484416, |
|
"loss": 1.1704, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.1641233552033525, |
|
"learning_rate": 0.00015522610217294375, |
|
"loss": 1.1399, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.22816800313793714, |
|
"learning_rate": 0.00015504925382842487, |
|
"loss": 1.1374, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.1699514304776241, |
|
"learning_rate": 0.00015487215810958675, |
|
"loss": 1.1668, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.16580705156733896, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 1.2467, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.1621295978537637, |
|
"learning_rate": 0.0001545172277333142, |
|
"loss": 1.095, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.15661235975760687, |
|
"learning_rate": 0.00015433939467082713, |
|
"loss": 1.1437, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.15913894864881573, |
|
"learning_rate": 0.00015416131742390827, |
|
"loss": 1.1682, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.17078546916822446, |
|
"learning_rate": 0.00015398299679278172, |
|
"loss": 1.1673, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.17593093163481005, |
|
"learning_rate": 0.00015380443357876518, |
|
"loss": 1.1588, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.16563306408223222, |
|
"learning_rate": 0.00015362562858426654, |
|
"loss": 1.1542, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.15824017459526013, |
|
"learning_rate": 0.0001534465826127801, |
|
"loss": 1.136, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.1555748015065671, |
|
"learning_rate": 0.00015326729646888314, |
|
"loss": 1.1744, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.15054650982383877, |
|
"learning_rate": 0.0001530877709582321, |
|
"loss": 1.0996, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.1732483344367302, |
|
"learning_rate": 0.00015290800688755907, |
|
"loss": 1.1375, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.17086276533942882, |
|
"learning_rate": 0.0001527280050646682, |
|
"loss": 1.1399, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.14791498583408913, |
|
"learning_rate": 0.00015254776629843205, |
|
"loss": 1.0112, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.16492159023612152, |
|
"learning_rate": 0.00015236729139878782, |
|
"loss": 1.1179, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.15550107018855622, |
|
"learning_rate": 0.0001521865811767339, |
|
"loss": 1.0872, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.17086993477527834, |
|
"learning_rate": 0.00015200563644432612, |
|
"loss": 1.1747, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.16832629109129904, |
|
"learning_rate": 0.0001518244580146742, |
|
"loss": 1.2623, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.15555098188988337, |
|
"learning_rate": 0.00015164304670193792, |
|
"loss": 1.1172, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.16504655864531245, |
|
"learning_rate": 0.00015146140332132358, |
|
"loss": 1.1615, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.15904615747067738, |
|
"learning_rate": 0.00015127952868908043, |
|
"loss": 1.1841, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.1664190253819244, |
|
"learning_rate": 0.00015109742362249672, |
|
"loss": 1.2258, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.17622098883404838, |
|
"learning_rate": 0.00015091508893989633, |
|
"loss": 1.2141, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.17253106779139057, |
|
"learning_rate": 0.00015073252546063493, |
|
"loss": 1.1289, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.15924181512457228, |
|
"learning_rate": 0.0001505497340050963, |
|
"loss": 1.1836, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.1755114666296529, |
|
"learning_rate": 0.00015036671539468878, |
|
"loss": 1.2109, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.1623211151951732, |
|
"learning_rate": 0.00015018347045184132, |
|
"loss": 1.1922, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.16895842955233992, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.2402, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.17126264186496687, |
|
"learning_rate": 0.00014981630486362435, |
|
"loss": 1.2327, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.19632534974860108, |
|
"learning_rate": 0.00014963238586818345, |
|
"loss": 1.1586, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.16708414744148634, |
|
"learning_rate": 0.00014944824384015236, |
|
"loss": 1.1766, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.1800283830073218, |
|
"learning_rate": 0.00014926387960700842, |
|
"loss": 1.0902, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.17570100482291343, |
|
"learning_rate": 0.0001490792939972275, |
|
"loss": 1.1875, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.15904497800793038, |
|
"learning_rate": 0.0001488944878402802, |
|
"loss": 1.083, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.1629549994998532, |
|
"learning_rate": 0.00014870946196662822, |
|
"loss": 1.1505, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.17495490666041913, |
|
"learning_rate": 0.00014852421720772062, |
|
"loss": 1.1107, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.1814722009088628, |
|
"learning_rate": 0.00014833875439599004, |
|
"loss": 1.2089, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.16312442272141373, |
|
"learning_rate": 0.00014815307436484898, |
|
"loss": 1.1518, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.18878089936272002, |
|
"learning_rate": 0.00014796717794868607, |
|
"loss": 1.1806, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.171347321491223, |
|
"learning_rate": 0.00014778106598286234, |
|
"loss": 1.3189, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.15561777538677532, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 1.1071, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.16095434527629326, |
|
"learning_rate": 0.0001474081987485156, |
|
"loss": 1.173, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.1594519070011647, |
|
"learning_rate": 0.00014722144515554264, |
|
"loss": 1.1099, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.15853021638823037, |
|
"learning_rate": 0.00014703447936400134, |
|
"loss": 1.1563, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.21942200368949344, |
|
"learning_rate": 0.00014684730221405814, |
|
"loss": 1.1226, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.16328115550493413, |
|
"learning_rate": 0.00014665991454682924, |
|
"loss": 1.1878, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.16551970389762746, |
|
"learning_rate": 0.00014647231720437686, |
|
"loss": 1.055, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.17275962400330097, |
|
"learning_rate": 0.00014628451102970547, |
|
"loss": 1.1302, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.17269943862734122, |
|
"learning_rate": 0.00014609649686675785, |
|
"loss": 1.1635, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.18932844828433326, |
|
"learning_rate": 0.00014590827556041158, |
|
"loss": 1.0954, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.1670913427771278, |
|
"learning_rate": 0.00014571984795647494, |
|
"loss": 1.1336, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.1727053222121231, |
|
"learning_rate": 0.00014553121490168332, |
|
"loss": 1.1477, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.20281162440108022, |
|
"learning_rate": 0.00014534237724369534, |
|
"loss": 1.1364, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.17595782723751183, |
|
"learning_rate": 0.00014515333583108896, |
|
"loss": 1.1977, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.16755958800968615, |
|
"learning_rate": 0.00014496409151335785, |
|
"loss": 1.1315, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.17908885139779734, |
|
"learning_rate": 0.00014477464514090743, |
|
"loss": 1.1851, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.16047769884625865, |
|
"learning_rate": 0.00014458499756505116, |
|
"loss": 1.1137, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.18864957014640948, |
|
"learning_rate": 0.00014439514963800648, |
|
"loss": 1.1962, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.16074020271372416, |
|
"learning_rate": 0.00014420510221289137, |
|
"loss": 1.1448, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.17166433855606958, |
|
"learning_rate": 0.00014401485614372008, |
|
"loss": 1.0487, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.1773682331611409, |
|
"learning_rate": 0.0001438244122853996, |
|
"loss": 1.16, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.16596601643110168, |
|
"learning_rate": 0.00014363377149372584, |
|
"loss": 1.0843, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.16748936526973612, |
|
"learning_rate": 0.0001434429346253794, |
|
"loss": 1.1619, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.17600274748860967, |
|
"learning_rate": 0.00014325190253792222, |
|
"loss": 1.151, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.1721019830304793, |
|
"learning_rate": 0.0001430606760897934, |
|
"loss": 1.0609, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.172361054646693, |
|
"learning_rate": 0.00014286925614030542, |
|
"loss": 1.1777, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.1938123204770535, |
|
"learning_rate": 0.00014267764354964038, |
|
"loss": 1.1078, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.171152731950173, |
|
"learning_rate": 0.00014248583917884594, |
|
"loss": 1.2344, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.176915280903416, |
|
"learning_rate": 0.00014229384388983167, |
|
"loss": 1.1443, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.16620426221482948, |
|
"learning_rate": 0.00014210165854536494, |
|
"loss": 1.1635, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.17368168932295722, |
|
"learning_rate": 0.0001419092840090673, |
|
"loss": 1.2076, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.16396003568599715, |
|
"learning_rate": 0.0001417167211454104, |
|
"loss": 1.1798, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.17474861300401356, |
|
"learning_rate": 0.0001415239708197122, |
|
"loss": 1.1951, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.17687469132536782, |
|
"learning_rate": 0.00014133103389813302, |
|
"loss": 1.1863, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.1729350163866909, |
|
"learning_rate": 0.0001411379112476717, |
|
"loss": 1.1791, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.16183372577765218, |
|
"learning_rate": 0.0001409446037361617, |
|
"loss": 1.11, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.17468439327379603, |
|
"learning_rate": 0.0001407511122322672, |
|
"loss": 1.1459, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.1746393388604993, |
|
"learning_rate": 0.00014055743760547917, |
|
"loss": 1.1782, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.1709730768079813, |
|
"learning_rate": 0.00014036358072611147, |
|
"loss": 1.1729, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.16338226575160056, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 1.1637, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.16522796461612693, |
|
"learning_rate": 0.00013997532369498355, |
|
"loss": 1.1786, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.16443017398263532, |
|
"learning_rate": 0.0001397809252879303, |
|
"loss": 1.1885, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.16678395716368374, |
|
"learning_rate": 0.0001395863481177036, |
|
"loss": 1.1576, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.15796662849679724, |
|
"learning_rate": 0.000139391593058673, |
|
"loss": 1.1719, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.1605406960709786, |
|
"learning_rate": 0.00013919666098600753, |
|
"loss": 1.1442, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.16720350808668913, |
|
"learning_rate": 0.00013900155277567157, |
|
"loss": 1.1231, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.16025549611436293, |
|
"learning_rate": 0.00013880626930442113, |
|
"loss": 1.0959, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.1682327263659866, |
|
"learning_rate": 0.00013861081144979974, |
|
"loss": 1.1816, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.16843691606816277, |
|
"learning_rate": 0.00013841518009013445, |
|
"loss": 1.1607, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.17753480625595375, |
|
"learning_rate": 0.0001382193761045322, |
|
"loss": 1.2085, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.175444570487372, |
|
"learning_rate": 0.0001380234003728754, |
|
"loss": 1.1883, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.17844909537485765, |
|
"learning_rate": 0.00013782725377581848, |
|
"loss": 1.2548, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.17827531614400435, |
|
"learning_rate": 0.00013763093719478358, |
|
"loss": 1.1753, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.16568782268956592, |
|
"learning_rate": 0.00013743445151195657, |
|
"loss": 1.1269, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.16229148230338714, |
|
"learning_rate": 0.00013723779761028347, |
|
"loss": 1.1233, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.18071555500117806, |
|
"learning_rate": 0.000137040976373466, |
|
"loss": 1.1691, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.16528568438217509, |
|
"learning_rate": 0.000136843988685958, |
|
"loss": 1.1044, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.15621961073605914, |
|
"learning_rate": 0.00013664683543296112, |
|
"loss": 1.0585, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.1821012365890871, |
|
"learning_rate": 0.00013644951750042114, |
|
"loss": 1.1061, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.18832728793079637, |
|
"learning_rate": 0.00013625203577502382, |
|
"loss": 1.2088, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.17821269011741103, |
|
"learning_rate": 0.00013605439114419094, |
|
"loss": 1.1076, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.15816928920099374, |
|
"learning_rate": 0.00013585658449607633, |
|
"loss": 1.057, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.1634582673036166, |
|
"learning_rate": 0.00013565861671956187, |
|
"loss": 1.2206, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.16402056158577089, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 1.0809, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.18674575425072043, |
|
"learning_rate": 0.0001352622013404774, |
|
"loss": 1.0979, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.16768128529630819, |
|
"learning_rate": 0.00013506375551927547, |
|
"loss": 1.1813, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.17842009039615817, |
|
"learning_rate": 0.00013486515213240188, |
|
"loss": 1.1767, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.16460609654310757, |
|
"learning_rate": 0.0001346663920723188, |
|
"loss": 1.1711, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.16130836204900895, |
|
"learning_rate": 0.00013446747623219255, |
|
"loss": 1.1506, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.19357518099915463, |
|
"learning_rate": 0.00013426840550588933, |
|
"loss": 1.1454, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.16818483070815926, |
|
"learning_rate": 0.0001340691807879714, |
|
"loss": 1.105, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.17634247767232533, |
|
"learning_rate": 0.00013386980297369307, |
|
"loss": 1.1173, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.17275588243641285, |
|
"learning_rate": 0.0001336702729589965, |
|
"loss": 1.1614, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.18630211892037335, |
|
"learning_rate": 0.00013347059164050794, |
|
"loss": 1.0822, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.1610612417401003, |
|
"learning_rate": 0.0001332707599155334, |
|
"loss": 1.11, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.1710900937487699, |
|
"learning_rate": 0.00013307077868205487, |
|
"loss": 1.1556, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.16789088046962233, |
|
"learning_rate": 0.00013287064883872612, |
|
"loss": 1.166, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.17152792861267624, |
|
"learning_rate": 0.00013267037128486883, |
|
"loss": 1.2076, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.16987939988427406, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 1.1892, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.17050807620104472, |
|
"learning_rate": 0.00013226937664616976, |
|
"loss": 1.1405, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.1619859173580362, |
|
"learning_rate": 0.00013206866136327388, |
|
"loss": 1.15, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.1693102604231561, |
|
"learning_rate": 0.00013186780197373306, |
|
"loss": 1.1108, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.15751817301555188, |
|
"learning_rate": 0.00013166679938014726, |
|
"loss": 1.1075, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.17344055609711165, |
|
"learning_rate": 0.00013146565448576004, |
|
"loss": 1.2239, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.16419033762795351, |
|
"learning_rate": 0.00013126436819445422, |
|
"loss": 1.2146, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.17241446484871606, |
|
"learning_rate": 0.00013106294141074825, |
|
"loss": 1.1808, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.16299230805525922, |
|
"learning_rate": 0.0001308613750397917, |
|
"loss": 1.1639, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.15993384139183944, |
|
"learning_rate": 0.00013065966998736155, |
|
"loss": 1.0664, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.17951699730636844, |
|
"learning_rate": 0.00013045782715985792, |
|
"loss": 1.1565, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.16420615622643212, |
|
"learning_rate": 0.0001302558474643, |
|
"loss": 1.1675, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.1718558627273243, |
|
"learning_rate": 0.0001300537318083221, |
|
"loss": 1.1758, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.16040004880583997, |
|
"learning_rate": 0.00012985148110016947, |
|
"loss": 1.1286, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.1644924005506398, |
|
"learning_rate": 0.0001296490962486942, |
|
"loss": 1.0901, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.17508769995003778, |
|
"learning_rate": 0.00012944657816335123, |
|
"loss": 1.158, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.16636235063611218, |
|
"learning_rate": 0.0001292439277541942, |
|
"loss": 1.1506, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.1651752039403403, |
|
"learning_rate": 0.00012904114593187136, |
|
"loss": 1.1167, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.1727449182027404, |
|
"learning_rate": 0.0001288382336076215, |
|
"loss": 1.1208, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.1678888505155555, |
|
"learning_rate": 0.00012863519169326984, |
|
"loss": 1.1164, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.1717914946350261, |
|
"learning_rate": 0.0001284320211012239, |
|
"loss": 1.174, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.15833038102003086, |
|
"learning_rate": 0.00012822872274446958, |
|
"loss": 1.0557, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.16693919668597118, |
|
"learning_rate": 0.00012802529753656668, |
|
"loss": 1.2202, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.1608034670310254, |
|
"learning_rate": 0.0001278217463916453, |
|
"loss": 1.0761, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.16522372158185394, |
|
"learning_rate": 0.0001276180702244012, |
|
"loss": 1.114, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.15699403016449, |
|
"learning_rate": 0.00012741426995009213, |
|
"loss": 1.0487, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.1724966872571711, |
|
"learning_rate": 0.00012721034648453353, |
|
"loss": 1.2198, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.16612867386952304, |
|
"learning_rate": 0.00012700630074409427, |
|
"loss": 1.1481, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.1755115998846381, |
|
"learning_rate": 0.0001268021336456929, |
|
"loss": 1.1295, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.16610612624318807, |
|
"learning_rate": 0.00012659784610679318, |
|
"loss": 1.0735, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.17210665028457767, |
|
"learning_rate": 0.0001263934390454001, |
|
"loss": 1.128, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.17115180879996908, |
|
"learning_rate": 0.00012618891338005573, |
|
"loss": 1.2114, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.1708121456807184, |
|
"learning_rate": 0.0001259842700298352, |
|
"loss": 1.2037, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.16413369503599784, |
|
"learning_rate": 0.00012577950991434248, |
|
"loss": 1.115, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.16575779512688402, |
|
"learning_rate": 0.0001255746339537061, |
|
"loss": 1.13, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.16787768159350958, |
|
"learning_rate": 0.00012536964306857526, |
|
"loss": 1.1187, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.161257935533138, |
|
"learning_rate": 0.00012516453818011566, |
|
"loss": 1.1272, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.17163817923820932, |
|
"learning_rate": 0.00012495932021000517, |
|
"loss": 1.1371, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.16451801686420228, |
|
"learning_rate": 0.0001247539900804299, |
|
"loss": 1.1234, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.4897545674498291, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 1.1276, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.1674813107615878, |
|
"learning_rate": 0.00012434299703414524, |
|
"loss": 1.0717, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.16889265391095676, |
|
"learning_rate": 0.0001241373359643114, |
|
"loss": 1.1094, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.17807200243744, |
|
"learning_rate": 0.0001239315664287558, |
|
"loss": 1.2072, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.1821416845887793, |
|
"learning_rate": 0.00012372568935214298, |
|
"loss": 1.1574, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.16787645348154986, |
|
"learning_rate": 0.00012351970565962085, |
|
"loss": 1.0884, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.17767787321450104, |
|
"learning_rate": 0.00012331361627681645, |
|
"loss": 1.1561, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.16990542040230855, |
|
"learning_rate": 0.00012310742212983167, |
|
"loss": 1.1454, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.17040623149046621, |
|
"learning_rate": 0.00012290112414523928, |
|
"loss": 1.1143, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.17383642491755713, |
|
"learning_rate": 0.00012269472325007858, |
|
"loss": 1.1502, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.1670271756098212, |
|
"learning_rate": 0.00012248822037185138, |
|
"loss": 1.1872, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.17338597290651167, |
|
"learning_rate": 0.0001222816164385177, |
|
"loss": 1.201, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.16342647338148947, |
|
"learning_rate": 0.00012207491237849172, |
|
"loss": 1.0994, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.15847559756714308, |
|
"learning_rate": 0.0001218681091206376, |
|
"loss": 1.1272, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.17295023549456498, |
|
"learning_rate": 0.00012166120759426514, |
|
"loss": 1.1026, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.17574482838436642, |
|
"learning_rate": 0.00012145420872912585, |
|
"loss": 1.1785, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.1636907763577917, |
|
"learning_rate": 0.0001212471134554086, |
|
"loss": 1.125, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.1683476239550862, |
|
"learning_rate": 0.00012103992270373547, |
|
"loss": 1.1196, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.1634846923426851, |
|
"learning_rate": 0.00012083263740515765, |
|
"loss": 1.1666, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.20362360607016913, |
|
"learning_rate": 0.00012062525849115107, |
|
"loss": 1.2166, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.15520795046759722, |
|
"learning_rate": 0.00012041778689361254, |
|
"loss": 1.1046, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.17498352958051774, |
|
"learning_rate": 0.00012021022354485514, |
|
"loss": 1.1331, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.17105423893572264, |
|
"learning_rate": 0.00012000256937760445, |
|
"loss": 1.0799, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.16837214432904796, |
|
"learning_rate": 0.00011979482532499401, |
|
"loss": 1.1856, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.1717737899447442, |
|
"learning_rate": 0.00011958699232056134, |
|
"loss": 1.1738, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.16901679352588425, |
|
"learning_rate": 0.0001193790712982437, |
|
"loss": 1.1361, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.39457461047161113, |
|
"learning_rate": 0.00011917106319237386, |
|
"loss": 1.1095, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.1689082831860728, |
|
"learning_rate": 0.00011896296893767587, |
|
"loss": 1.1518, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.17381008421938537, |
|
"learning_rate": 0.00011875478946926093, |
|
"loss": 1.1251, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.1723002103848165, |
|
"learning_rate": 0.00011854652572262323, |
|
"loss": 1.1128, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.1799457776202214, |
|
"learning_rate": 0.00011833817863363564, |
|
"loss": 1.1702, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.18319688210909812, |
|
"learning_rate": 0.00011812974913854545, |
|
"loss": 1.1537, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.1716604871697964, |
|
"learning_rate": 0.0001179212381739704, |
|
"loss": 1.1567, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.17098997576829758, |
|
"learning_rate": 0.00011771264667689427, |
|
"loss": 1.1309, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.2607304915983766, |
|
"learning_rate": 0.00011750397558466273, |
|
"loss": 1.1151, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.1869030292628108, |
|
"learning_rate": 0.00011729522583497912, |
|
"loss": 1.0764, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.1969233515350324, |
|
"learning_rate": 0.00011708639836590023, |
|
"loss": 1.0986, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.17794486028770468, |
|
"learning_rate": 0.00011687749411583213, |
|
"loss": 1.1327, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.19408346665780069, |
|
"learning_rate": 0.00011666851402352588, |
|
"loss": 1.0768, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.5122187922492092, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 1.1223, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.18948615786386766, |
|
"learning_rate": 0.00011625033006890315, |
|
"loss": 1.1452, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.16917117240636045, |
|
"learning_rate": 0.00011604112808577603, |
|
"loss": 1.131, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.177460873056157, |
|
"learning_rate": 0.00011583185401878101, |
|
"loss": 1.1691, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.2727518607538819, |
|
"learning_rate": 0.00011562250880833104, |
|
"loss": 1.185, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.1722336921590558, |
|
"learning_rate": 0.00011541309339515869, |
|
"loss": 1.2202, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.16731429410912582, |
|
"learning_rate": 0.00011520360872031209, |
|
"loss": 1.1114, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.1747423744661194, |
|
"learning_rate": 0.00011499405572515059, |
|
"loss": 1.0929, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.18705677818301408, |
|
"learning_rate": 0.00011478443535134049, |
|
"loss": 1.101, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.18090585152937122, |
|
"learning_rate": 0.00011457474854085096, |
|
"loss": 1.2376, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.1572625515956593, |
|
"learning_rate": 0.00011436499623594963, |
|
"loss": 1.1039, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.18868561701551964, |
|
"learning_rate": 0.00011415517937919846, |
|
"loss": 1.1091, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.3765201515989829, |
|
"learning_rate": 0.00011394529891344958, |
|
"loss": 1.1904, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.17659930390592027, |
|
"learning_rate": 0.00011373535578184082, |
|
"loss": 1.1767, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.16250915193456436, |
|
"learning_rate": 0.00011352535092779173, |
|
"loss": 1.0396, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.17679862621844297, |
|
"learning_rate": 0.00011331528529499909, |
|
"loss": 1.0899, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.17303502214887417, |
|
"learning_rate": 0.00011310515982743293, |
|
"loss": 1.1623, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.17241019588300063, |
|
"learning_rate": 0.00011289497546933212, |
|
"loss": 1.1761, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.19940689264510733, |
|
"learning_rate": 0.00011268473316520007, |
|
"loss": 1.186, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.17205111928537736, |
|
"learning_rate": 0.00011247443385980078, |
|
"loss": 1.1669, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.168610859884427, |
|
"learning_rate": 0.0001122640784981542, |
|
"loss": 1.1223, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.1571679218925349, |
|
"learning_rate": 0.0001120536680255323, |
|
"loss": 1.1017, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.1731635445936976, |
|
"learning_rate": 0.00011184320338745467, |
|
"loss": 1.1549, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.16986390394426032, |
|
"learning_rate": 0.00011163268552968423, |
|
"loss": 1.1561, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.17807860518694296, |
|
"learning_rate": 0.00011142211539822318, |
|
"loss": 1.1191, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.1848822566355218, |
|
"learning_rate": 0.0001112114939393085, |
|
"loss": 1.1265, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.16154933469362054, |
|
"learning_rate": 0.00011100082209940795, |
|
"loss": 1.1696, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.17539593545795423, |
|
"learning_rate": 0.00011079010082521557, |
|
"loss": 1.2098, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.16955097983529596, |
|
"learning_rate": 0.00011057933106364758, |
|
"loss": 1.134, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.1627286025891699, |
|
"learning_rate": 0.00011036851376183812, |
|
"loss": 1.0968, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.18241009216885773, |
|
"learning_rate": 0.0001101576498671349, |
|
"loss": 1.0784, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.1602213810951013, |
|
"learning_rate": 0.00010994674032709513, |
|
"loss": 1.0272, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.17710273355839293, |
|
"learning_rate": 0.00010973578608948094, |
|
"loss": 1.1807, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.16846043025211901, |
|
"learning_rate": 0.00010952478810225548, |
|
"loss": 1.1237, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.17578602240771765, |
|
"learning_rate": 0.00010931374731357841, |
|
"loss": 1.1305, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.1741114211076835, |
|
"learning_rate": 0.0001091026646718018, |
|
"loss": 1.0663, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.17419627851243186, |
|
"learning_rate": 0.0001088915411254657, |
|
"loss": 1.0864, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.1941495171655647, |
|
"learning_rate": 0.00010868037762329404, |
|
"loss": 1.1471, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.16828482820770493, |
|
"learning_rate": 0.0001084691751141903, |
|
"loss": 1.1386, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.1812212801001379, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 1.193, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.31177412914918357, |
|
"learning_rate": 0.00010804665687167262, |
|
"loss": 1.1416, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.16307908317649347, |
|
"learning_rate": 0.00010783534303692493, |
|
"loss": 1.0742, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.17264040918528867, |
|
"learning_rate": 0.00010762399399256917, |
|
"loss": 1.1196, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.16662882952717148, |
|
"learning_rate": 0.00010741261068834265, |
|
"loss": 1.1095, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.16909860632274898, |
|
"learning_rate": 0.00010720119407413647, |
|
"loss": 1.0978, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.1841227749300958, |
|
"learning_rate": 0.00010698974509999158, |
|
"loss": 1.1249, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.163410587005958, |
|
"learning_rate": 0.00010677826471609422, |
|
"loss": 1.1027, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.16473194468337188, |
|
"learning_rate": 0.00010656675387277182, |
|
"loss": 1.0452, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.16465304101505895, |
|
"learning_rate": 0.00010635521352048872, |
|
"loss": 1.0975, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.1648559678877552, |
|
"learning_rate": 0.00010614364460984176, |
|
"loss": 1.1865, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.17182083260240222, |
|
"learning_rate": 0.00010593204809155628, |
|
"loss": 1.1666, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.262043699845629, |
|
"learning_rate": 0.00010572042491648149, |
|
"loss": 1.1705, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.1642146793634745, |
|
"learning_rate": 0.00010550877603558655, |
|
"loss": 1.0943, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.1740460470258003, |
|
"learning_rate": 0.00010529710239995605, |
|
"loss": 1.0492, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.16424504391420278, |
|
"learning_rate": 0.0001050854049607858, |
|
"loss": 1.0877, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.1754464752434157, |
|
"learning_rate": 0.00010487368466937866, |
|
"loss": 1.2004, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.17019599469065266, |
|
"learning_rate": 0.00010466194247714008, |
|
"loss": 1.1736, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.16453531456339282, |
|
"learning_rate": 0.00010445017933557404, |
|
"loss": 1.0682, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.16576841634828685, |
|
"learning_rate": 0.00010423839619627853, |
|
"loss": 1.1239, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.17959933112877494, |
|
"learning_rate": 0.00010402659401094152, |
|
"loss": 1.1741, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.1764446278189948, |
|
"learning_rate": 0.00010381477373133652, |
|
"loss": 1.1465, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.1704634166756464, |
|
"learning_rate": 0.0001036029363093183, |
|
"loss": 1.2092, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.16952721202785456, |
|
"learning_rate": 0.00010339108269681874, |
|
"loss": 1.1478, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.1787903145977452, |
|
"learning_rate": 0.00010317921384584244, |
|
"loss": 1.1818, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.1623533169508836, |
|
"learning_rate": 0.00010296733070846252, |
|
"loss": 1.1352, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.17651784025914782, |
|
"learning_rate": 0.00010275543423681621, |
|
"loss": 1.175, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.16473007072288753, |
|
"learning_rate": 0.00010254352538310075, |
|
"loss": 1.1837, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.16909571695790346, |
|
"learning_rate": 0.00010233160509956894, |
|
"loss": 1.1435, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.1904744004900901, |
|
"learning_rate": 0.000102119674338525, |
|
"loss": 1.1549, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.17373949711988154, |
|
"learning_rate": 0.00010190773405232024, |
|
"loss": 1.202, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.18192103377408994, |
|
"learning_rate": 0.00010169578519334873, |
|
"loss": 1.1851, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.18229237608440976, |
|
"learning_rate": 0.0001014838287140431, |
|
"loss": 1.1233, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.16940278596266165, |
|
"learning_rate": 0.00010127186556687019, |
|
"loss": 1.169, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.18262078712459354, |
|
"learning_rate": 0.00010105989670432681, |
|
"loss": 1.199, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.16747513400418446, |
|
"learning_rate": 0.00010084792307893552, |
|
"loss": 1.0666, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.1746037115339502, |
|
"learning_rate": 0.00010063594564324012, |
|
"loss": 1.1885, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.1732153359208303, |
|
"learning_rate": 0.00010042396534980176, |
|
"loss": 1.1155, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.18911672921606115, |
|
"learning_rate": 0.00010021198315119424, |
|
"loss": 1.1495, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.1810967486335703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2386, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.16208600336701465, |
|
"learning_rate": 9.978801684880578e-05, |
|
"loss": 1.1016, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.17108242412955507, |
|
"learning_rate": 9.957603465019826e-05, |
|
"loss": 1.1659, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.17777524975299183, |
|
"learning_rate": 9.93640543567599e-05, |
|
"loss": 1.1877, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.32271912031116656, |
|
"learning_rate": 9.91520769210645e-05, |
|
"loss": 1.1254, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.16598806172835462, |
|
"learning_rate": 9.894010329567323e-05, |
|
"loss": 1.187, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.17032736087667158, |
|
"learning_rate": 9.872813443312984e-05, |
|
"loss": 1.2073, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.16171590357915658, |
|
"learning_rate": 9.851617128595694e-05, |
|
"loss": 1.0839, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.22200097454760384, |
|
"learning_rate": 9.830421480665128e-05, |
|
"loss": 1.1299, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.18159512381331983, |
|
"learning_rate": 9.809226594767978e-05, |
|
"loss": 1.1991, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.1729262170321024, |
|
"learning_rate": 9.788032566147505e-05, |
|
"loss": 1.0962, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.17432638214872634, |
|
"learning_rate": 9.766839490043108e-05, |
|
"loss": 1.1669, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.16088749813770933, |
|
"learning_rate": 9.745647461689931e-05, |
|
"loss": 1.0548, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.1741820101835776, |
|
"learning_rate": 9.724456576318381e-05, |
|
"loss": 1.1986, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.1749247436598961, |
|
"learning_rate": 9.70326692915375e-05, |
|
"loss": 1.1474, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.1637038092120215, |
|
"learning_rate": 9.682078615415754e-05, |
|
"loss": 1.1231, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.18807058316399383, |
|
"learning_rate": 9.660891730318128e-05, |
|
"loss": 1.2045, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.1748352594360658, |
|
"learning_rate": 9.639706369068171e-05, |
|
"loss": 1.1689, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.1716288593635444, |
|
"learning_rate": 9.61852262686635e-05, |
|
"loss": 1.1572, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.18271775085545605, |
|
"learning_rate": 9.597340598905852e-05, |
|
"loss": 1.21, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.1717878271479738, |
|
"learning_rate": 9.576160380372149e-05, |
|
"loss": 1.0988, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.1680182822271049, |
|
"learning_rate": 9.5549820664426e-05, |
|
"loss": 1.0968, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.1742620341628087, |
|
"learning_rate": 9.533805752285993e-05, |
|
"loss": 1.1635, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.16338762068272702, |
|
"learning_rate": 9.512631533062138e-05, |
|
"loss": 0.9968, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.17102172627425277, |
|
"learning_rate": 9.491459503921421e-05, |
|
"loss": 1.1481, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.166342175891061, |
|
"learning_rate": 9.470289760004398e-05, |
|
"loss": 1.0721, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.2215993724024064, |
|
"learning_rate": 9.449122396441345e-05, |
|
"loss": 1.1877, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.1659659894828161, |
|
"learning_rate": 9.427957508351852e-05, |
|
"loss": 1.1691, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.1656871284817402, |
|
"learning_rate": 9.406795190844376e-05, |
|
"loss": 1.06, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.1586845482565193, |
|
"learning_rate": 9.385635539015825e-05, |
|
"loss": 1.1, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.16465420669616035, |
|
"learning_rate": 9.364478647951133e-05, |
|
"loss": 1.1403, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.15668641868741373, |
|
"learning_rate": 9.343324612722819e-05, |
|
"loss": 1.1075, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.16611169088286057, |
|
"learning_rate": 9.32217352839058e-05, |
|
"loss": 1.1256, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.17301783500328655, |
|
"learning_rate": 9.301025490000841e-05, |
|
"loss": 1.1797, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.17015053210523748, |
|
"learning_rate": 9.279880592586354e-05, |
|
"loss": 1.1087, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.17913490315482883, |
|
"learning_rate": 9.25873893116574e-05, |
|
"loss": 1.1673, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.18716341523653843, |
|
"learning_rate": 9.237600600743085e-05, |
|
"loss": 1.1448, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.16020067287870843, |
|
"learning_rate": 9.216465696307512e-05, |
|
"loss": 1.1152, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.18082661292174337, |
|
"learning_rate": 9.195334312832742e-05, |
|
"loss": 1.1923, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.16932315705059348, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 1.2025, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.1773723182281052, |
|
"learning_rate": 9.153082488580968e-05, |
|
"loss": 1.1583, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.1650084811263765, |
|
"learning_rate": 9.131962237670598e-05, |
|
"loss": 1.1456, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.17296637806848497, |
|
"learning_rate": 9.11084588745343e-05, |
|
"loss": 1.1188, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.18441757049220042, |
|
"learning_rate": 9.089733532819824e-05, |
|
"loss": 1.1057, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.16479278966433117, |
|
"learning_rate": 9.068625268642161e-05, |
|
"loss": 0.9686, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.1721522611977686, |
|
"learning_rate": 9.047521189774455e-05, |
|
"loss": 1.1593, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.17693809369137542, |
|
"learning_rate": 9.026421391051907e-05, |
|
"loss": 1.141, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.16748786371543328, |
|
"learning_rate": 9.005325967290488e-05, |
|
"loss": 1.0954, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.16738864271495277, |
|
"learning_rate": 8.984235013286511e-05, |
|
"loss": 1.1954, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.17608800377093817, |
|
"learning_rate": 8.963148623816191e-05, |
|
"loss": 1.2054, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.1572056318124515, |
|
"learning_rate": 8.942066893635246e-05, |
|
"loss": 1.062, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.17321331852374994, |
|
"learning_rate": 8.920989917478447e-05, |
|
"loss": 1.1774, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.24022668199221645, |
|
"learning_rate": 8.899917790059208e-05, |
|
"loss": 1.1422, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.17078130002322278, |
|
"learning_rate": 8.878850606069153e-05, |
|
"loss": 1.102, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.18816676453808523, |
|
"learning_rate": 8.857788460177686e-05, |
|
"loss": 1.1228, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.17061330738089536, |
|
"learning_rate": 8.836731447031581e-05, |
|
"loss": 1.1664, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.15744686305416977, |
|
"learning_rate": 8.815679661254537e-05, |
|
"loss": 1.0526, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.17140964720406338, |
|
"learning_rate": 8.79463319744677e-05, |
|
"loss": 1.1015, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.177839632569439, |
|
"learning_rate": 8.77359215018458e-05, |
|
"loss": 1.179, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.18976111884283367, |
|
"learning_rate": 8.752556614019923e-05, |
|
"loss": 1.1901, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.17757617341847579, |
|
"learning_rate": 8.731526683479992e-05, |
|
"loss": 1.1195, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.18025608157061349, |
|
"learning_rate": 8.710502453066792e-05, |
|
"loss": 1.1681, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.17991548690786335, |
|
"learning_rate": 8.689484017256711e-05, |
|
"loss": 1.0382, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.18377930665665893, |
|
"learning_rate": 8.668471470500095e-05, |
|
"loss": 1.2159, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.16785777204268718, |
|
"learning_rate": 8.647464907220832e-05, |
|
"loss": 1.1001, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.1718542715910871, |
|
"learning_rate": 8.626464421815919e-05, |
|
"loss": 1.1362, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.17567452985919224, |
|
"learning_rate": 8.605470108655045e-05, |
|
"loss": 1.0701, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.16645014392512258, |
|
"learning_rate": 8.584482062080154e-05, |
|
"loss": 1.1224, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.1683034297078507, |
|
"learning_rate": 8.563500376405041e-05, |
|
"loss": 1.1833, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.17027006999646288, |
|
"learning_rate": 8.542525145914905e-05, |
|
"loss": 1.2105, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.16661484142693264, |
|
"learning_rate": 8.521556464865954e-05, |
|
"loss": 1.1345, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.16535108955142738, |
|
"learning_rate": 8.500594427484946e-05, |
|
"loss": 1.0801, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.1619767851731314, |
|
"learning_rate": 8.479639127968792e-05, |
|
"loss": 1.1398, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.16148830590153632, |
|
"learning_rate": 8.458690660484134e-05, |
|
"loss": 1.1714, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.1718526387843433, |
|
"learning_rate": 8.4377491191669e-05, |
|
"loss": 1.1196, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.1589791532312397, |
|
"learning_rate": 8.4168145981219e-05, |
|
"loss": 1.0975, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.18228960673193118, |
|
"learning_rate": 8.395887191422397e-05, |
|
"loss": 1.0911, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.17051635436251017, |
|
"learning_rate": 8.374966993109687e-05, |
|
"loss": 1.1156, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.1734092864786433, |
|
"learning_rate": 8.35405409719266e-05, |
|
"loss": 1.1387, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.17960160313198717, |
|
"learning_rate": 8.333148597647414e-05, |
|
"loss": 1.1255, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.1667460148360989, |
|
"learning_rate": 8.312250588416791e-05, |
|
"loss": 1.0877, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.17227304205526361, |
|
"learning_rate": 8.291360163409978e-05, |
|
"loss": 1.1643, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.16172787345643957, |
|
"learning_rate": 8.270477416502091e-05, |
|
"loss": 1.0813, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.17717380607042416, |
|
"learning_rate": 8.249602441533726e-05, |
|
"loss": 1.1653, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.17646325594155327, |
|
"learning_rate": 8.228735332310575e-05, |
|
"loss": 1.1671, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.16889550666185796, |
|
"learning_rate": 8.207876182602958e-05, |
|
"loss": 1.1068, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.1668114849825541, |
|
"learning_rate": 8.187025086145458e-05, |
|
"loss": 1.1467, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.1721245962364379, |
|
"learning_rate": 8.16618213663644e-05, |
|
"loss": 1.1377, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.1929335873892253, |
|
"learning_rate": 8.145347427737678e-05, |
|
"loss": 1.119, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.16623130316399012, |
|
"learning_rate": 8.12452105307391e-05, |
|
"loss": 1.0687, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.18085333131496864, |
|
"learning_rate": 8.103703106232416e-05, |
|
"loss": 1.1517, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.17906886829733978, |
|
"learning_rate": 8.082893680762619e-05, |
|
"loss": 1.1325, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.16359826556853824, |
|
"learning_rate": 8.062092870175628e-05, |
|
"loss": 1.0353, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.1626009671933035, |
|
"learning_rate": 8.041300767943867e-05, |
|
"loss": 1.1598, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.16643269286545453, |
|
"learning_rate": 8.0205174675006e-05, |
|
"loss": 1.1226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.1699654548245084, |
|
"learning_rate": 7.999743062239557e-05, |
|
"loss": 1.0999, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.1827266429905637, |
|
"learning_rate": 7.978977645514487e-05, |
|
"loss": 1.2791, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.44434388351574605, |
|
"learning_rate": 7.958221310638749e-05, |
|
"loss": 1.1693, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.17634755268047692, |
|
"learning_rate": 7.937474150884897e-05, |
|
"loss": 1.0845, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.177726814790075, |
|
"learning_rate": 7.916736259484239e-05, |
|
"loss": 1.1849, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.16479013332848777, |
|
"learning_rate": 7.896007729626457e-05, |
|
"loss": 1.1296, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.1607507116543152, |
|
"learning_rate": 7.875288654459144e-05, |
|
"loss": 1.0829, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.16387244898588724, |
|
"learning_rate": 7.854579127087417e-05, |
|
"loss": 1.1604, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.163742117997135, |
|
"learning_rate": 7.833879240573487e-05, |
|
"loss": 1.0757, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.16638334168591146, |
|
"learning_rate": 7.813189087936243e-05, |
|
"loss": 1.1637, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.16818864513814694, |
|
"learning_rate": 7.792508762150833e-05, |
|
"loss": 1.1564, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.17901829622442805, |
|
"learning_rate": 7.771838356148232e-05, |
|
"loss": 1.1841, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.16614583867770816, |
|
"learning_rate": 7.751177962814866e-05, |
|
"loss": 1.1255, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.19013974114118293, |
|
"learning_rate": 7.730527674992143e-05, |
|
"loss": 1.1488, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.17442438513245137, |
|
"learning_rate": 7.709887585476075e-05, |
|
"loss": 1.1066, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.17701832779209928, |
|
"learning_rate": 7.689257787016834e-05, |
|
"loss": 1.1755, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.16936912919421254, |
|
"learning_rate": 7.668638372318359e-05, |
|
"loss": 1.1057, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.19112857435989614, |
|
"learning_rate": 7.648029434037915e-05, |
|
"loss": 1.1412, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.16103962948152212, |
|
"learning_rate": 7.627431064785706e-05, |
|
"loss": 1.0929, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.16677548612612422, |
|
"learning_rate": 7.606843357124426e-05, |
|
"loss": 1.2046, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.16233738083605964, |
|
"learning_rate": 7.58626640356886e-05, |
|
"loss": 1.0678, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.16602043351095722, |
|
"learning_rate": 7.565700296585483e-05, |
|
"loss": 1.1216, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.16627151579102536, |
|
"learning_rate": 7.54514512859201e-05, |
|
"loss": 1.0911, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.20281510390834787, |
|
"learning_rate": 7.524600991957012e-05, |
|
"loss": 1.1722, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.16069933741616732, |
|
"learning_rate": 7.504067978999484e-05, |
|
"loss": 1.0839, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.18107514785611215, |
|
"learning_rate": 7.483546181988436e-05, |
|
"loss": 1.0717, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.1721242495617235, |
|
"learning_rate": 7.463035693142473e-05, |
|
"loss": 1.1599, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.17918262771658913, |
|
"learning_rate": 7.442536604629395e-05, |
|
"loss": 1.089, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.17784491718923554, |
|
"learning_rate": 7.422049008565757e-05, |
|
"loss": 1.176, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.15630505382249996, |
|
"learning_rate": 7.401572997016479e-05, |
|
"loss": 1.039, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.17070239450442767, |
|
"learning_rate": 7.381108661994429e-05, |
|
"loss": 1.1059, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.16222624124278331, |
|
"learning_rate": 7.360656095459995e-05, |
|
"loss": 1.024, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.17125506848213806, |
|
"learning_rate": 7.340215389320687e-05, |
|
"loss": 1.1567, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.17959953833242476, |
|
"learning_rate": 7.31978663543071e-05, |
|
"loss": 1.141, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.17010870239393844, |
|
"learning_rate": 7.299369925590574e-05, |
|
"loss": 1.1046, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.15998424148316903, |
|
"learning_rate": 7.278965351546648e-05, |
|
"loss": 1.0627, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.1710612967988063, |
|
"learning_rate": 7.258573004990788e-05, |
|
"loss": 1.1486, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.1607397255850716, |
|
"learning_rate": 7.238192977559884e-05, |
|
"loss": 1.0751, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.17530360487610264, |
|
"learning_rate": 7.217825360835473e-05, |
|
"loss": 1.1281, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.17374375355320706, |
|
"learning_rate": 7.197470246343333e-05, |
|
"loss": 1.1493, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.15987413285182245, |
|
"learning_rate": 7.177127725553045e-05, |
|
"loss": 1.1446, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.1789355664165954, |
|
"learning_rate": 7.156797889877613e-05, |
|
"loss": 1.2304, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.17282586483463735, |
|
"learning_rate": 7.136480830673019e-05, |
|
"loss": 1.1909, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.159213004708279, |
|
"learning_rate": 7.116176639237852e-05, |
|
"loss": 1.0624, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.16197929506166975, |
|
"learning_rate": 7.095885406812866e-05, |
|
"loss": 1.1283, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.16495179889101508, |
|
"learning_rate": 7.075607224580581e-05, |
|
"loss": 1.1378, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.1662731898113741, |
|
"learning_rate": 7.05534218366488e-05, |
|
"loss": 1.0542, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.1650210560996418, |
|
"learning_rate": 7.035090375130581e-05, |
|
"loss": 1.1326, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.1708049460231556, |
|
"learning_rate": 7.014851889983057e-05, |
|
"loss": 1.137, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.17558230008855352, |
|
"learning_rate": 6.994626819167789e-05, |
|
"loss": 1.1359, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.16645826974884173, |
|
"learning_rate": 6.974415253570003e-05, |
|
"loss": 1.1742, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.17184188151368943, |
|
"learning_rate": 6.954217284014211e-05, |
|
"loss": 1.1953, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.17068592296841717, |
|
"learning_rate": 6.934033001263847e-05, |
|
"loss": 1.1301, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.16302700472895656, |
|
"learning_rate": 6.913862496020831e-05, |
|
"loss": 1.1232, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.17057720351205383, |
|
"learning_rate": 6.893705858925178e-05, |
|
"loss": 1.1247, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.1582652133644645, |
|
"learning_rate": 6.873563180554583e-05, |
|
"loss": 1.1203, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.16895339487465857, |
|
"learning_rate": 6.853434551424e-05, |
|
"loss": 1.1846, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.1726993947504367, |
|
"learning_rate": 6.833320061985277e-05, |
|
"loss": 1.1963, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.16055788377798388, |
|
"learning_rate": 6.813219802626698e-05, |
|
"loss": 1.1439, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.17265920805658908, |
|
"learning_rate": 6.793133863672616e-05, |
|
"loss": 1.192, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.1833601588530699, |
|
"learning_rate": 6.773062335383024e-05, |
|
"loss": 1.1128, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.1717674054389487, |
|
"learning_rate": 6.753005307953167e-05, |
|
"loss": 1.134, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.18207071739270747, |
|
"learning_rate": 6.73296287151312e-05, |
|
"loss": 1.1723, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.1624856029472637, |
|
"learning_rate": 6.712935116127389e-05, |
|
"loss": 1.1144, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.19915390833072513, |
|
"learning_rate": 6.692922131794517e-05, |
|
"loss": 1.1072, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.1722567762020263, |
|
"learning_rate": 6.672924008446662e-05, |
|
"loss": 1.1425, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.15963511981578907, |
|
"learning_rate": 6.652940835949208e-05, |
|
"loss": 1.1211, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.16578458195408932, |
|
"learning_rate": 6.632972704100349e-05, |
|
"loss": 1.1244, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.17110384656595054, |
|
"learning_rate": 6.613019702630694e-05, |
|
"loss": 1.1795, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.16990207952122507, |
|
"learning_rate": 6.593081921202859e-05, |
|
"loss": 1.0817, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.16467375370533938, |
|
"learning_rate": 6.57315944941107e-05, |
|
"loss": 1.0977, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.17152460282733728, |
|
"learning_rate": 6.553252376780748e-05, |
|
"loss": 1.1868, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.18132610186641904, |
|
"learning_rate": 6.533360792768122e-05, |
|
"loss": 1.2005, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.16698085910075294, |
|
"learning_rate": 6.513484786759818e-05, |
|
"loss": 1.1362, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.16504199012773973, |
|
"learning_rate": 6.493624448072457e-05, |
|
"loss": 1.1303, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.1779596734054924, |
|
"learning_rate": 6.473779865952263e-05, |
|
"loss": 1.1894, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.16364715074236819, |
|
"learning_rate": 6.453951129574644e-05, |
|
"loss": 1.1443, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.16251550362999542, |
|
"learning_rate": 6.434138328043815e-05, |
|
"loss": 1.1238, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.1666990603079008, |
|
"learning_rate": 6.414341550392368e-05, |
|
"loss": 1.0909, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.15779118156939148, |
|
"learning_rate": 6.39456088558091e-05, |
|
"loss": 1.1109, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.1677767995314692, |
|
"learning_rate": 6.374796422497621e-05, |
|
"loss": 1.2103, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.17165919802575422, |
|
"learning_rate": 6.355048249957886e-05, |
|
"loss": 1.1879, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.1918026223221136, |
|
"learning_rate": 6.33531645670389e-05, |
|
"loss": 1.1733, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.1787527260843118, |
|
"learning_rate": 6.3156011314042e-05, |
|
"loss": 1.1338, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.1694260433762424, |
|
"learning_rate": 6.2959023626534e-05, |
|
"loss": 1.1537, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.18311039386845188, |
|
"learning_rate": 6.276220238971652e-05, |
|
"loss": 1.1976, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.17113962947413622, |
|
"learning_rate": 6.256554848804343e-05, |
|
"loss": 1.0813, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.1664054538940142, |
|
"learning_rate": 6.236906280521646e-05, |
|
"loss": 1.1514, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.16227842164425618, |
|
"learning_rate": 6.217274622418153e-05, |
|
"loss": 1.0597, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.17755371081391708, |
|
"learning_rate": 6.197659962712461e-05, |
|
"loss": 1.19, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.15844523213403552, |
|
"learning_rate": 6.178062389546784e-05, |
|
"loss": 1.0587, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.16903107223771263, |
|
"learning_rate": 6.158481990986557e-05, |
|
"loss": 1.1339, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.17002303254909312, |
|
"learning_rate": 6.138918855020028e-05, |
|
"loss": 1.1158, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.16613843815618992, |
|
"learning_rate": 6.11937306955789e-05, |
|
"loss": 1.1894, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.17111783959067672, |
|
"learning_rate": 6.099844722432843e-05, |
|
"loss": 1.1974, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.1670020953276936, |
|
"learning_rate": 6.080333901399251e-05, |
|
"loss": 1.1018, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.18080514034863332, |
|
"learning_rate": 6.060840694132701e-05, |
|
"loss": 1.1845, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.18525347317947505, |
|
"learning_rate": 6.0413651882296406e-05, |
|
"loss": 1.0796, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.20400403778138837, |
|
"learning_rate": 6.021907471206971e-05, |
|
"loss": 1.0117, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.1641784925803925, |
|
"learning_rate": 6.002467630501646e-05, |
|
"loss": 1.097, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.16623722195850973, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 1.0141, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.1810578331051459, |
|
"learning_rate": 5.9636419273888546e-05, |
|
"loss": 1.0277, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.18889563641964546, |
|
"learning_rate": 5.944256239452085e-05, |
|
"loss": 0.9148, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.18087970961891994, |
|
"learning_rate": 5.924888776773281e-05, |
|
"loss": 1.0529, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.1733509400885039, |
|
"learning_rate": 5.9055396263838315e-05, |
|
"loss": 0.9925, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.17285204625514128, |
|
"learning_rate": 5.886208875232833e-05, |
|
"loss": 1.0022, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.17977965852266828, |
|
"learning_rate": 5.8668966101867005e-05, |
|
"loss": 1.0279, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.1805768384069582, |
|
"learning_rate": 5.847602918028785e-05, |
|
"loss": 0.9688, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.19307882052881373, |
|
"learning_rate": 5.82832788545896e-05, |
|
"loss": 1.0361, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.18038343158251127, |
|
"learning_rate": 5.809071599093272e-05, |
|
"loss": 0.9618, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.18081635249066572, |
|
"learning_rate": 5.789834145463506e-05, |
|
"loss": 0.9634, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.2132256408954024, |
|
"learning_rate": 5.7706156110168384e-05, |
|
"loss": 0.9917, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.20894695338897498, |
|
"learning_rate": 5.751416082115408e-05, |
|
"loss": 1.0608, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.18561065850228853, |
|
"learning_rate": 5.732235645035964e-05, |
|
"loss": 1.0319, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.19972592759433763, |
|
"learning_rate": 5.713074385969457e-05, |
|
"loss": 1.068, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.18875304906041382, |
|
"learning_rate": 5.6939323910206645e-05, |
|
"loss": 1.0706, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.20051381946623245, |
|
"learning_rate": 5.6748097462077796e-05, |
|
"loss": 1.0879, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.18028693825056266, |
|
"learning_rate": 5.65570653746206e-05, |
|
"loss": 0.9736, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.18371629750739465, |
|
"learning_rate": 5.63662285062742e-05, |
|
"loss": 1.0459, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.18620839376911252, |
|
"learning_rate": 5.6175587714600385e-05, |
|
"loss": 1.0564, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.1761497636850895, |
|
"learning_rate": 5.598514385627996e-05, |
|
"loss": 0.9928, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.18717899836165747, |
|
"learning_rate": 5.579489778710867e-05, |
|
"loss": 0.9903, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.1792885931651523, |
|
"learning_rate": 5.56048503619935e-05, |
|
"loss": 0.9889, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.18624892968399204, |
|
"learning_rate": 5.541500243494888e-05, |
|
"loss": 1.0291, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.17837324324146603, |
|
"learning_rate": 5.522535485909257e-05, |
|
"loss": 1.0182, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.18979627533160237, |
|
"learning_rate": 5.5035908486642195e-05, |
|
"loss": 0.9828, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.19273836099434338, |
|
"learning_rate": 5.484666416891109e-05, |
|
"loss": 1.0496, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.19543417583922992, |
|
"learning_rate": 5.4657622756304704e-05, |
|
"loss": 0.9901, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.19508414506527536, |
|
"learning_rate": 5.4468785098316674e-05, |
|
"loss": 1.0326, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.185374829651796, |
|
"learning_rate": 5.428015204352508e-05, |
|
"loss": 0.9779, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.19699548343809967, |
|
"learning_rate": 5.409172443958843e-05, |
|
"loss": 1.0246, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.20300075118398447, |
|
"learning_rate": 5.3903503133242136e-05, |
|
"loss": 0.969, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.19110544018613584, |
|
"learning_rate": 5.371548897029457e-05, |
|
"loss": 1.0747, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.18550194883382987, |
|
"learning_rate": 5.3527682795623146e-05, |
|
"loss": 1.0157, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.18331490400990383, |
|
"learning_rate": 5.334008545317082e-05, |
|
"loss": 0.9947, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.18751165571321418, |
|
"learning_rate": 5.31526977859419e-05, |
|
"loss": 1.0502, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.19211604448762654, |
|
"learning_rate": 5.296552063599868e-05, |
|
"loss": 1.0145, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.186297345787599, |
|
"learning_rate": 5.277855484445735e-05, |
|
"loss": 1.0537, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.18982650964521386, |
|
"learning_rate": 5.259180125148442e-05, |
|
"loss": 0.9838, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.19925867583848977, |
|
"learning_rate": 5.240526069629265e-05, |
|
"loss": 1.0231, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.1884322287285855, |
|
"learning_rate": 5.22189340171377e-05, |
|
"loss": 0.9296, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.1796241132232387, |
|
"learning_rate": 5.203282205131395e-05, |
|
"loss": 0.9681, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.1839238478548194, |
|
"learning_rate": 5.1846925635151045e-05, |
|
"loss": 0.9101, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.19345190226496187, |
|
"learning_rate": 5.166124560401002e-05, |
|
"loss": 0.9651, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.18755030741172238, |
|
"learning_rate": 5.1475782792279426e-05, |
|
"loss": 1.0174, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.18631640479941738, |
|
"learning_rate": 5.129053803337181e-05, |
|
"loss": 1.035, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.1975877349106955, |
|
"learning_rate": 5.1105512159719805e-05, |
|
"loss": 0.9989, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.18636484306751205, |
|
"learning_rate": 5.092070600277252e-05, |
|
"loss": 1.0426, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.18611358587003984, |
|
"learning_rate": 5.073612039299157e-05, |
|
"loss": 0.9949, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.1910181776639786, |
|
"learning_rate": 5.0551756159847666e-05, |
|
"loss": 0.9716, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.1896157384615499, |
|
"learning_rate": 5.036761413181659e-05, |
|
"loss": 1.0205, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.18470372913964078, |
|
"learning_rate": 5.0183695136375664e-05, |
|
"loss": 0.9938, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.19182549114492065, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.0717, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.20299654057754923, |
|
"learning_rate": 4.9816529548158706e-05, |
|
"loss": 1.0932, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.18753899816476238, |
|
"learning_rate": 4.963328460531127e-05, |
|
"loss": 1.0785, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.1912398459667478, |
|
"learning_rate": 4.9450265994903664e-05, |
|
"loss": 1.0354, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.1970798090720647, |
|
"learning_rate": 4.9267474539365086e-05, |
|
"loss": 1.0394, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.19438353474466769, |
|
"learning_rate": 4.908491106010368e-05, |
|
"loss": 1.0563, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.18583224152081138, |
|
"learning_rate": 4.8902576377503316e-05, |
|
"loss": 1.0375, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.21067204344598053, |
|
"learning_rate": 4.87204713109196e-05, |
|
"loss": 1.0275, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.19271054378491778, |
|
"learning_rate": 4.8538596678676406e-05, |
|
"loss": 1.0241, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.20698647247820803, |
|
"learning_rate": 4.8356953298062115e-05, |
|
"loss": 1.0183, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.19147199804367737, |
|
"learning_rate": 4.817554198532581e-05, |
|
"loss": 1.0464, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.18948541542489386, |
|
"learning_rate": 4.79943635556739e-05, |
|
"loss": 0.942, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.20063322256200955, |
|
"learning_rate": 4.7813418823266146e-05, |
|
"loss": 1.0207, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.20277313248067322, |
|
"learning_rate": 4.763270860121222e-05, |
|
"loss": 1.1196, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.18916247962333935, |
|
"learning_rate": 4.745223370156797e-05, |
|
"loss": 1.0635, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.20770456045992683, |
|
"learning_rate": 4.727199493533181e-05, |
|
"loss": 1.0641, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.1928796702717179, |
|
"learning_rate": 4.709199311244098e-05, |
|
"loss": 1.0173, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.19067624131226646, |
|
"learning_rate": 4.691222904176791e-05, |
|
"loss": 1.0739, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.18794968941934675, |
|
"learning_rate": 4.6732703531116874e-05, |
|
"loss": 0.9853, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.18128537252239216, |
|
"learning_rate": 4.6553417387219886e-05, |
|
"loss": 0.9791, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.19969579280811037, |
|
"learning_rate": 4.6374371415733496e-05, |
|
"loss": 1.0328, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.17771947900276572, |
|
"learning_rate": 4.619556642123484e-05, |
|
"loss": 0.9977, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.18877925953999256, |
|
"learning_rate": 4.601700320721829e-05, |
|
"loss": 1.0163, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.1976777570336078, |
|
"learning_rate": 4.583868257609171e-05, |
|
"loss": 0.9831, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.1926309732820373, |
|
"learning_rate": 4.566060532917288e-05, |
|
"loss": 1.0369, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.1981154480792855, |
|
"learning_rate": 4.5482772266685844e-05, |
|
"loss": 1.0265, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.18961229347269684, |
|
"learning_rate": 4.530518418775733e-05, |
|
"loss": 0.978, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.1902416426203216, |
|
"learning_rate": 4.512784189041328e-05, |
|
"loss": 1.0476, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.1963783030038318, |
|
"learning_rate": 4.495074617157513e-05, |
|
"loss": 1.1034, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.1976594259307984, |
|
"learning_rate": 4.477389782705628e-05, |
|
"loss": 1.0411, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.19599497931317425, |
|
"learning_rate": 4.459729765155842e-05, |
|
"loss": 0.9831, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.18768317847907803, |
|
"learning_rate": 4.4420946438668164e-05, |
|
"loss": 0.9667, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.1893955423774859, |
|
"learning_rate": 4.424484498085335e-05, |
|
"loss": 1.0176, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.185214177313968, |
|
"learning_rate": 4.4068994069459376e-05, |
|
"loss": 0.9672, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.2001674222762866, |
|
"learning_rate": 4.389339449470592e-05, |
|
"loss": 1.0872, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.18927674242494813, |
|
"learning_rate": 4.371804704568309e-05, |
|
"loss": 1.0305, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.189797567081393, |
|
"learning_rate": 4.354295251034811e-05, |
|
"loss": 0.9866, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.19621874043278917, |
|
"learning_rate": 4.336811167552167e-05, |
|
"loss": 0.9989, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.1961781136176054, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"loss": 1.1191, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.19610651673820242, |
|
"learning_rate": 4.301919424897338e-05, |
|
"loss": 1.0448, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.19393858136213526, |
|
"learning_rate": 4.284511922517853e-05, |
|
"loss": 1.021, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.19611938956585503, |
|
"learning_rate": 4.267130103773911e-05, |
|
"loss": 1.003, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.19521183418608554, |
|
"learning_rate": 4.249774046774034e-05, |
|
"loss": 0.9956, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.1903820141620171, |
|
"learning_rate": 4.232443829510977e-05, |
|
"loss": 0.988, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.19917471096571515, |
|
"learning_rate": 4.215139529861367e-05, |
|
"loss": 1.0294, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.1989120051737976, |
|
"learning_rate": 4.19786122558538e-05, |
|
"loss": 0.9865, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.1878933904328363, |
|
"learning_rate": 4.1806089943263706e-05, |
|
"loss": 0.9914, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.1889686688150144, |
|
"learning_rate": 4.163382913610533e-05, |
|
"loss": 0.9183, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.19599798287453252, |
|
"learning_rate": 4.146183060846538e-05, |
|
"loss": 0.9809, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.19159410980792513, |
|
"learning_rate": 4.129009513325212e-05, |
|
"loss": 1.0689, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.1916180919600845, |
|
"learning_rate": 4.111862348219158e-05, |
|
"loss": 1.0453, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.18629810473389968, |
|
"learning_rate": 4.094741642582434e-05, |
|
"loss": 0.9991, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.18983910244997687, |
|
"learning_rate": 4.077647473350201e-05, |
|
"loss": 1.0355, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.19604918383133388, |
|
"learning_rate": 4.060579917338362e-05, |
|
"loss": 0.9918, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.19914658125329224, |
|
"learning_rate": 4.043539051243239e-05, |
|
"loss": 1.0083, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.19984032461233703, |
|
"learning_rate": 4.026524951641204e-05, |
|
"loss": 1.0402, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.19713889646881289, |
|
"learning_rate": 4.009537694988372e-05, |
|
"loss": 1.0814, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.18984773417984263, |
|
"learning_rate": 3.99257735762021e-05, |
|
"loss": 0.9727, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.1969234745130071, |
|
"learning_rate": 3.975644015751234e-05, |
|
"loss": 1.1246, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.20095976880290764, |
|
"learning_rate": 3.958737745474638e-05, |
|
"loss": 1.0264, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.19573419897255526, |
|
"learning_rate": 3.9418586227619746e-05, |
|
"loss": 0.9699, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.20794714498925052, |
|
"learning_rate": 3.9250067234628054e-05, |
|
"loss": 1.0871, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.200309362571014, |
|
"learning_rate": 3.9081821233043436e-05, |
|
"loss": 1.0869, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.21221715688656814, |
|
"learning_rate": 3.891384897891148e-05, |
|
"loss": 1.0829, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.1895941245282512, |
|
"learning_rate": 3.874615122704746e-05, |
|
"loss": 0.9934, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.20282251657546532, |
|
"learning_rate": 3.857872873103322e-05, |
|
"loss": 1.0394, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.20092601921573158, |
|
"learning_rate": 3.8411582243213694e-05, |
|
"loss": 1.0626, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.1967616729990936, |
|
"learning_rate": 3.824471251469353e-05, |
|
"loss": 1.1027, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.1974168096006322, |
|
"learning_rate": 3.807812029533362e-05, |
|
"loss": 1.0285, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.2005755550749436, |
|
"learning_rate": 3.791180633374785e-05, |
|
"loss": 1.0579, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.22982259456971435, |
|
"learning_rate": 3.7745771377299755e-05, |
|
"loss": 0.9251, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.19486627427023995, |
|
"learning_rate": 3.758001617209906e-05, |
|
"loss": 0.9991, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.19268654100626367, |
|
"learning_rate": 3.7414541462998446e-05, |
|
"loss": 1.0843, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.19142859148030442, |
|
"learning_rate": 3.724934799358998e-05, |
|
"loss": 0.9734, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.2078245745909079, |
|
"learning_rate": 3.708443650620206e-05, |
|
"loss": 1.079, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.18957789857941185, |
|
"learning_rate": 3.691980774189591e-05, |
|
"loss": 0.9865, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.2022379994557547, |
|
"learning_rate": 3.675546244046228e-05, |
|
"loss": 1.0147, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.20484113736244272, |
|
"learning_rate": 3.6591401340418116e-05, |
|
"loss": 1.0491, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.19932572874171609, |
|
"learning_rate": 3.642762517900322e-05, |
|
"loss": 1.0187, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.19513525838609894, |
|
"learning_rate": 3.626413469217702e-05, |
|
"loss": 1.0346, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.18447148945806335, |
|
"learning_rate": 3.6100930614615205e-05, |
|
"loss": 0.9713, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.19615772374760426, |
|
"learning_rate": 3.593801367970645e-05, |
|
"loss": 1.0279, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.195942402850121, |
|
"learning_rate": 3.5775384619549e-05, |
|
"loss": 1.044, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.20033402806459327, |
|
"learning_rate": 3.561304416494762e-05, |
|
"loss": 0.9634, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.20133608619277885, |
|
"learning_rate": 3.5450993045409996e-05, |
|
"loss": 0.9424, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.1976979767858137, |
|
"learning_rate": 3.5289231989143865e-05, |
|
"loss": 1.0329, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.1866564132240131, |
|
"learning_rate": 3.512776172305331e-05, |
|
"loss": 0.9951, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.20643351098366144, |
|
"learning_rate": 3.496658297273574e-05, |
|
"loss": 1.0768, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.2016711813980471, |
|
"learning_rate": 3.4805696462478634e-05, |
|
"loss": 1.0351, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.19967214131051508, |
|
"learning_rate": 3.46451029152562e-05, |
|
"loss": 1.0102, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.1983504742901789, |
|
"learning_rate": 3.448480305272619e-05, |
|
"loss": 1.0268, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.19682722769626732, |
|
"learning_rate": 3.4324797595226565e-05, |
|
"loss": 1.0053, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.20088167609003055, |
|
"learning_rate": 3.41650872617724e-05, |
|
"loss": 0.9733, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.20335900600853737, |
|
"learning_rate": 3.400567277005247e-05, |
|
"loss": 0.9861, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.20886916546979215, |
|
"learning_rate": 3.3846554836426234e-05, |
|
"loss": 1.0473, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.2010663342671541, |
|
"learning_rate": 3.36877341759205e-05, |
|
"loss": 1.0358, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.21062432438070422, |
|
"learning_rate": 3.3529211502226123e-05, |
|
"loss": 1.0166, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.2208916600042823, |
|
"learning_rate": 3.337098752769503e-05, |
|
"loss": 1.1117, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.18314118494949805, |
|
"learning_rate": 3.321306296333673e-05, |
|
"loss": 0.9577, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.2165551432625507, |
|
"learning_rate": 3.3055438518815486e-05, |
|
"loss": 1.0629, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.1919444697411042, |
|
"learning_rate": 3.289811490244671e-05, |
|
"loss": 1.0157, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.19605731642039462, |
|
"learning_rate": 3.274109282119413e-05, |
|
"loss": 1.0183, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.1978411361540464, |
|
"learning_rate": 3.258437298066634e-05, |
|
"loss": 1.0333, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.1954273247181233, |
|
"learning_rate": 3.242795608511388e-05, |
|
"loss": 1.0204, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.19539709125757848, |
|
"learning_rate": 3.227184283742591e-05, |
|
"loss": 1.0393, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.19509958919620607, |
|
"learning_rate": 3.2116033939127024e-05, |
|
"loss": 1.0703, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.19145181109714973, |
|
"learning_rate": 3.196053009037428e-05, |
|
"loss": 0.963, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.19905167727180642, |
|
"learning_rate": 3.180533198995379e-05, |
|
"loss": 1.109, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.20242244581363775, |
|
"learning_rate": 3.165044033527789e-05, |
|
"loss": 1.0477, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.19788917203746156, |
|
"learning_rate": 3.1495855822381715e-05, |
|
"loss": 1.0331, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.20117465106402488, |
|
"learning_rate": 3.134157914592032e-05, |
|
"loss": 1.0634, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.20012651544737803, |
|
"learning_rate": 3.1187610999165316e-05, |
|
"loss": 1.0525, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.2043849789970364, |
|
"learning_rate": 3.1033952074001884e-05, |
|
"loss": 1.0009, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.20436413566516082, |
|
"learning_rate": 3.088060306092582e-05, |
|
"loss": 1.0194, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.2471012507273153, |
|
"learning_rate": 3.072756464904006e-05, |
|
"loss": 1.0174, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.19900543237914783, |
|
"learning_rate": 3.057483752605196e-05, |
|
"loss": 1.1252, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.21526226315725566, |
|
"learning_rate": 3.042242237826991e-05, |
|
"loss": 1.0376, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.20806176621387384, |
|
"learning_rate": 3.0270319890600462e-05, |
|
"loss": 1.0543, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.2067694678280775, |
|
"learning_rate": 3.0118530746545148e-05, |
|
"loss": 1.0625, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.1984329234985004, |
|
"learning_rate": 2.9967055628197472e-05, |
|
"loss": 1.0445, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.2051174512399526, |
|
"learning_rate": 2.9815895216239732e-05, |
|
"loss": 1.0023, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.19190337658134426, |
|
"learning_rate": 2.9665050189940015e-05, |
|
"loss": 1.0081, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.19812292614431837, |
|
"learning_rate": 2.951452122714926e-05, |
|
"loss": 1.0347, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.20217031978231875, |
|
"learning_rate": 2.9364309004298053e-05, |
|
"loss": 1.0683, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.2002443376802822, |
|
"learning_rate": 2.9214414196393704e-05, |
|
"loss": 1.0534, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.19168165480576083, |
|
"learning_rate": 2.9064837477017048e-05, |
|
"loss": 1.0077, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.19459332024136894, |
|
"learning_rate": 2.8915579518319624e-05, |
|
"loss": 1.041, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.20329713707478517, |
|
"learning_rate": 2.876664099102053e-05, |
|
"loss": 1.0385, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.1969232285759602, |
|
"learning_rate": 2.861802256440348e-05, |
|
"loss": 1.018, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.21865970166706636, |
|
"learning_rate": 2.8469724906313678e-05, |
|
"loss": 1.0325, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.2045555091720532, |
|
"learning_rate": 2.8321748683154893e-05, |
|
"loss": 1.0077, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.19858767150496162, |
|
"learning_rate": 2.8174094559886534e-05, |
|
"loss": 1.0477, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.1979848674300343, |
|
"learning_rate": 2.8026763200020555e-05, |
|
"loss": 1.0647, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.18999100747223083, |
|
"learning_rate": 2.7879755265618555e-05, |
|
"loss": 0.9991, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.19318555274581345, |
|
"learning_rate": 2.773307141728867e-05, |
|
"loss": 1.0689, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.2001048343990683, |
|
"learning_rate": 2.7586712314182773e-05, |
|
"loss": 0.9894, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.1941010794439273, |
|
"learning_rate": 2.7440678613993332e-05, |
|
"loss": 1.0117, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.20543809678008965, |
|
"learning_rate": 2.729497097295075e-05, |
|
"loss": 0.9712, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.19570648696674608, |
|
"learning_rate": 2.7149590045820028e-05, |
|
"loss": 1.0332, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.20214826838793598, |
|
"learning_rate": 2.7004536485898045e-05, |
|
"loss": 1.024, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.2115211012217786, |
|
"learning_rate": 2.685981094501069e-05, |
|
"loss": 0.9931, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.21220137438280212, |
|
"learning_rate": 2.6715414073509748e-05, |
|
"loss": 1.0435, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.20609154009368855, |
|
"learning_rate": 2.6571346520270147e-05, |
|
"loss": 1.1253, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.1923052032802701, |
|
"learning_rate": 2.6427608932686843e-05, |
|
"loss": 0.9451, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.19730527167467898, |
|
"learning_rate": 2.6284201956672137e-05, |
|
"loss": 1.0458, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.19989767746964465, |
|
"learning_rate": 2.614112623665259e-05, |
|
"loss": 0.9961, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.20743521472072698, |
|
"learning_rate": 2.599838241556626e-05, |
|
"loss": 0.986, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.211185028570926, |
|
"learning_rate": 2.5855971134859736e-05, |
|
"loss": 1.0033, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.18980333720726505, |
|
"learning_rate": 2.5713893034485214e-05, |
|
"loss": 1.0542, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.19527708839800334, |
|
"learning_rate": 2.5572148752897795e-05, |
|
"loss": 0.9458, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.19737801451718315, |
|
"learning_rate": 2.5430738927052344e-05, |
|
"loss": 1.0229, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.19944968925099527, |
|
"learning_rate": 2.5289664192400997e-05, |
|
"loss": 1.0187, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.19850167089007587, |
|
"learning_rate": 2.514892518288988e-05, |
|
"loss": 1.0301, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.20869345185658936, |
|
"learning_rate": 2.5008522530956646e-05, |
|
"loss": 1.0882, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.192429822864874, |
|
"learning_rate": 2.4868456867527313e-05, |
|
"loss": 1.0768, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.20041667374784053, |
|
"learning_rate": 2.472872882201368e-05, |
|
"loss": 0.9564, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.19278196420916055, |
|
"learning_rate": 2.4589339022310386e-05, |
|
"loss": 1.0012, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.20632547530350015, |
|
"learning_rate": 2.445028809479203e-05, |
|
"loss": 1.0691, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.20026721503616646, |
|
"learning_rate": 2.431157666431052e-05, |
|
"loss": 1.0614, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.19489852973676156, |
|
"learning_rate": 2.4173205354192063e-05, |
|
"loss": 0.9797, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.19651304043270207, |
|
"learning_rate": 2.403517478623456e-05, |
|
"loss": 1.0636, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.20753886611474595, |
|
"learning_rate": 2.3897485580704682e-05, |
|
"loss": 1.0488, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.1902807052625808, |
|
"learning_rate": 2.376013835633517e-05, |
|
"loss": 1.0122, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.2128547880840383, |
|
"learning_rate": 2.3623133730321922e-05, |
|
"loss": 1.0662, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.19178946945027847, |
|
"learning_rate": 2.3486472318321307e-05, |
|
"loss": 0.999, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.19790833247794626, |
|
"learning_rate": 2.3350154734447537e-05, |
|
"loss": 1.0543, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.19615861400513737, |
|
"learning_rate": 2.32141815912696e-05, |
|
"loss": 1.0323, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.19111434134530353, |
|
"learning_rate": 2.3078553499808797e-05, |
|
"loss": 1.0206, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.1967812553288128, |
|
"learning_rate": 2.2943271069535755e-05, |
|
"loss": 1.0213, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.19874170003799302, |
|
"learning_rate": 2.2808334908367914e-05, |
|
"loss": 0.9631, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.23014562765430222, |
|
"learning_rate": 2.267374562266662e-05, |
|
"loss": 1.0333, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.21310855355754507, |
|
"learning_rate": 2.2539503817234553e-05, |
|
"loss": 1.0808, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.19972397701820568, |
|
"learning_rate": 2.240561009531281e-05, |
|
"loss": 1.0527, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.1927849389481322, |
|
"learning_rate": 2.227206505857834e-05, |
|
"loss": 1.0776, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.19740116004042996, |
|
"learning_rate": 2.2138869307141263e-05, |
|
"loss": 1.014, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.20289387092281236, |
|
"learning_rate": 2.2006023439542088e-05, |
|
"loss": 1.0103, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.20394382008794595, |
|
"learning_rate": 2.1873528052749092e-05, |
|
"loss": 0.9351, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.20548016222534313, |
|
"learning_rate": 2.1741383742155474e-05, |
|
"loss": 1.0066, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.19563695871816103, |
|
"learning_rate": 2.1609591101576942e-05, |
|
"loss": 1.0477, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.21242314187055802, |
|
"learning_rate": 2.1478150723248857e-05, |
|
"loss": 0.9558, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.21115961226521227, |
|
"learning_rate": 2.1347063197823647e-05, |
|
"loss": 1.013, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.1994263855293853, |
|
"learning_rate": 2.1216329114368084e-05, |
|
"loss": 1.0287, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.20246037782306023, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 1.0011, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.19513564076453144, |
|
"learning_rate": 2.0955923621689034e-05, |
|
"loss": 0.9725, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.22329723996513792, |
|
"learning_rate": 2.0826253382647333e-05, |
|
"loss": 1.1098, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.20377464973870935, |
|
"learning_rate": 2.0696938925933506e-05, |
|
"loss": 1.0675, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.19102737130076528, |
|
"learning_rate": 2.056798083264667e-05, |
|
"loss": 0.9492, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.2073014798883011, |
|
"learning_rate": 2.043937968228469e-05, |
|
"loss": 1.0387, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.24290986242591792, |
|
"learning_rate": 2.0311136052741277e-05, |
|
"loss": 1.0558, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.2085266713259981, |
|
"learning_rate": 2.0183250520303652e-05, |
|
"loss": 1.0638, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.1955217978555781, |
|
"learning_rate": 2.0055723659649904e-05, |
|
"loss": 1.0096, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.19927692668612149, |
|
"learning_rate": 1.9928556043846214e-05, |
|
"loss": 1.0701, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.19744727496920214, |
|
"learning_rate": 1.9801748244344585e-05, |
|
"loss": 1.0438, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.2055358202234184, |
|
"learning_rate": 1.967530083097996e-05, |
|
"loss": 1.0575, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.2022241159482209, |
|
"learning_rate": 1.9549214371968004e-05, |
|
"loss": 1.0226, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.19649509124937034, |
|
"learning_rate": 1.9423489433902186e-05, |
|
"loss": 1.0202, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.20600851551211816, |
|
"learning_rate": 1.929812658175154e-05, |
|
"loss": 1.021, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.19294116399601477, |
|
"learning_rate": 1.9173126378857907e-05, |
|
"loss": 1.0213, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.1938777663276966, |
|
"learning_rate": 1.9048489386933543e-05, |
|
"loss": 0.9898, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.19185115735103692, |
|
"learning_rate": 1.892421616605857e-05, |
|
"loss": 0.9944, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.19670853913434655, |
|
"learning_rate": 1.8800307274678364e-05, |
|
"loss": 0.9919, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.20990234935789304, |
|
"learning_rate": 1.8676763269601194e-05, |
|
"loss": 0.9881, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.19572956406722788, |
|
"learning_rate": 1.8553584705995562e-05, |
|
"loss": 1.0294, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.19113731201482373, |
|
"learning_rate": 1.8430772137387853e-05, |
|
"loss": 1.0309, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.20326050779588162, |
|
"learning_rate": 1.8308326115659757e-05, |
|
"loss": 1.0583, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.20127668715102875, |
|
"learning_rate": 1.8186247191045856e-05, |
|
"loss": 1.0456, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.20570924872407959, |
|
"learning_rate": 1.806453591213103e-05, |
|
"loss": 1.0397, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.19884839319020708, |
|
"learning_rate": 1.794319282584813e-05, |
|
"loss": 1.008, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.22176454676380103, |
|
"learning_rate": 1.7822218477475494e-05, |
|
"loss": 0.9774, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.20018826586404945, |
|
"learning_rate": 1.7701613410634365e-05, |
|
"loss": 1.0615, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.20389686562227474, |
|
"learning_rate": 1.7581378167286656e-05, |
|
"loss": 1.043, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.19726813509954272, |
|
"learning_rate": 1.7461513287732313e-05, |
|
"loss": 1.0322, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.1951457034588497, |
|
"learning_rate": 1.734201931060706e-05, |
|
"loss": 1.0342, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.2013088268621123, |
|
"learning_rate": 1.722289677287987e-05, |
|
"loss": 0.9935, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.20117633179429092, |
|
"learning_rate": 1.710414620985059e-05, |
|
"loss": 1.0012, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.2033234102340342, |
|
"learning_rate": 1.6985768155147496e-05, |
|
"loss": 0.9818, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.20104360454517034, |
|
"learning_rate": 1.686776314072497e-05, |
|
"loss": 1.0999, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.20940487689123094, |
|
"learning_rate": 1.6750131696861015e-05, |
|
"loss": 1.0285, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.21680835461628276, |
|
"learning_rate": 1.663287435215498e-05, |
|
"loss": 0.9571, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.19827472641956026, |
|
"learning_rate": 1.6515991633525118e-05, |
|
"loss": 1.0499, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.1942134691900752, |
|
"learning_rate": 1.6399484066206183e-05, |
|
"loss": 1.0287, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.1971846855407925, |
|
"learning_rate": 1.6283352173747145e-05, |
|
"loss": 1.0295, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.2004843637777262, |
|
"learning_rate": 1.6167596478008817e-05, |
|
"loss": 0.9847, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.19534562714480122, |
|
"learning_rate": 1.6052217499161515e-05, |
|
"loss": 0.9995, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.20788157295613702, |
|
"learning_rate": 1.5937215755682665e-05, |
|
"loss": 1.0213, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.19455795086744224, |
|
"learning_rate": 1.582259176435451e-05, |
|
"loss": 1.032, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.20159782581581032, |
|
"learning_rate": 1.5708346040261813e-05, |
|
"loss": 1.0669, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.20640526620902436, |
|
"learning_rate": 1.5594479096789537e-05, |
|
"loss": 0.9989, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.19589322284099392, |
|
"learning_rate": 1.5480991445620542e-05, |
|
"loss": 1.0104, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.2036185439151077, |
|
"learning_rate": 1.5367883596733155e-05, |
|
"loss": 1.0446, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.18952222603214305, |
|
"learning_rate": 1.5255156058399122e-05, |
|
"loss": 0.9417, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.19652022738548006, |
|
"learning_rate": 1.5142809337181063e-05, |
|
"loss": 1.0583, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.20045206927797848, |
|
"learning_rate": 1.5030843937930483e-05, |
|
"loss": 1.0201, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.2055930699724937, |
|
"learning_rate": 1.4919260363785215e-05, |
|
"loss": 1.0243, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.2162787120662479, |
|
"learning_rate": 1.4808059116167305e-05, |
|
"loss": 1.0907, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.1977866282036809, |
|
"learning_rate": 1.4697240694780767e-05, |
|
"loss": 0.986, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.20811708085703706, |
|
"learning_rate": 1.4586805597609331e-05, |
|
"loss": 1.0941, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.20740381984269454, |
|
"learning_rate": 1.4476754320914188e-05, |
|
"loss": 1.0413, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.20638961593596633, |
|
"learning_rate": 1.4367087359231668e-05, |
|
"loss": 1.0453, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.20334283004311088, |
|
"learning_rate": 1.4257805205371234e-05, |
|
"loss": 1.0005, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.20299293540353064, |
|
"learning_rate": 1.4148908350413049e-05, |
|
"loss": 1.0365, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.19832697349630915, |
|
"learning_rate": 1.4040397283705897e-05, |
|
"loss": 1.0487, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.21129996076963695, |
|
"learning_rate": 1.3932272492864984e-05, |
|
"loss": 1.0439, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.19706789983721826, |
|
"learning_rate": 1.3824534463769633e-05, |
|
"loss": 1.045, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.19765871458568013, |
|
"learning_rate": 1.3717183680561252e-05, |
|
"loss": 0.9727, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.20274254850720416, |
|
"learning_rate": 1.3610220625641002e-05, |
|
"loss": 1.003, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.2027777493566929, |
|
"learning_rate": 1.350364577966785e-05, |
|
"loss": 1.0572, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.19979864676290385, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 1.0418, |
|
"step": 1400 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1647, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"total_flos": 1.2994679641473024e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|