|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9988694177501414, |
|
"eval_steps": 500, |
|
"global_step": 1326, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0015074429998115696, |
|
"grad_norm": 2.758618933635527, |
|
"learning_rate": 1.5037593984962406e-07, |
|
"loss": 0.7902, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003014885999623139, |
|
"grad_norm": 2.6991611264758757, |
|
"learning_rate": 3.007518796992481e-07, |
|
"loss": 0.7844, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0045223289994347085, |
|
"grad_norm": 2.7272565236365143, |
|
"learning_rate": 4.511278195488722e-07, |
|
"loss": 0.7626, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006029771999246278, |
|
"grad_norm": 2.7528117182790965, |
|
"learning_rate": 6.015037593984962e-07, |
|
"loss": 0.7858, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007537214999057848, |
|
"grad_norm": 2.775699578303916, |
|
"learning_rate": 7.518796992481203e-07, |
|
"loss": 0.8026, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009044657998869417, |
|
"grad_norm": 2.6767851045683204, |
|
"learning_rate": 9.022556390977444e-07, |
|
"loss": 0.769, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.010552100998680987, |
|
"grad_norm": 2.5261120500748224, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 0.7706, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.012059543998492557, |
|
"grad_norm": 2.514977460751076, |
|
"learning_rate": 1.2030075187969925e-06, |
|
"loss": 0.7785, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.013566986998304126, |
|
"grad_norm": 2.200241814288396, |
|
"learning_rate": 1.3533834586466167e-06, |
|
"loss": 0.7499, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.015074429998115696, |
|
"grad_norm": 2.212492267506947, |
|
"learning_rate": 1.5037593984962406e-06, |
|
"loss": 0.784, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016581872997927266, |
|
"grad_norm": 1.9319703389138259, |
|
"learning_rate": 1.6541353383458648e-06, |
|
"loss": 0.7302, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.018089315997738834, |
|
"grad_norm": 1.2550044831716305, |
|
"learning_rate": 1.8045112781954887e-06, |
|
"loss": 0.7056, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.019596758997550406, |
|
"grad_norm": 1.2195135372913062, |
|
"learning_rate": 1.9548872180451127e-06, |
|
"loss": 0.7014, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.021104201997361974, |
|
"grad_norm": 1.2455313883450765, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 0.7374, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.022611644997173545, |
|
"grad_norm": 1.1872107473955416, |
|
"learning_rate": 2.255639097744361e-06, |
|
"loss": 0.7186, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.024119087996985113, |
|
"grad_norm": 1.1982009143546264, |
|
"learning_rate": 2.406015037593985e-06, |
|
"loss": 0.6828, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.025626530996796685, |
|
"grad_norm": 2.3539969715580384, |
|
"learning_rate": 2.556390977443609e-06, |
|
"loss": 0.6636, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.027133973996608253, |
|
"grad_norm": 2.6228143835706765, |
|
"learning_rate": 2.7067669172932333e-06, |
|
"loss": 0.6967, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.028641416996419825, |
|
"grad_norm": 2.224580122320562, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.6761, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.030148859996231393, |
|
"grad_norm": 1.6908938829069853, |
|
"learning_rate": 3.007518796992481e-06, |
|
"loss": 0.6711, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03165630299604296, |
|
"grad_norm": 1.2313675848377437, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.6731, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03316374599585453, |
|
"grad_norm": 1.0688239960942414, |
|
"learning_rate": 3.3082706766917295e-06, |
|
"loss": 0.6766, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.034671188995666104, |
|
"grad_norm": 1.0400364411240592, |
|
"learning_rate": 3.4586466165413535e-06, |
|
"loss": 0.6388, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03617863199547767, |
|
"grad_norm": 1.1273406799386165, |
|
"learning_rate": 3.6090225563909775e-06, |
|
"loss": 0.6406, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03768607499528924, |
|
"grad_norm": 1.1097242153227487, |
|
"learning_rate": 3.7593984962406014e-06, |
|
"loss": 0.6316, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03919351799510081, |
|
"grad_norm": 0.9446010057713108, |
|
"learning_rate": 3.909774436090225e-06, |
|
"loss": 0.6023, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04070096099491238, |
|
"grad_norm": 0.8323088497080903, |
|
"learning_rate": 4.06015037593985e-06, |
|
"loss": 0.6183, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04220840399472395, |
|
"grad_norm": 0.7872962129475931, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.603, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 0.7767297100748087, |
|
"learning_rate": 4.360902255639098e-06, |
|
"loss": 0.6217, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04522328999434709, |
|
"grad_norm": 0.8604923635307716, |
|
"learning_rate": 4.511278195488722e-06, |
|
"loss": 0.5855, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.046730732994158655, |
|
"grad_norm": 0.8294236539215625, |
|
"learning_rate": 4.661654135338346e-06, |
|
"loss": 0.5933, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04823817599397023, |
|
"grad_norm": 0.7967164005183986, |
|
"learning_rate": 4.81203007518797e-06, |
|
"loss": 0.6159, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0497456189937818, |
|
"grad_norm": 0.6830740716112117, |
|
"learning_rate": 4.962406015037594e-06, |
|
"loss": 0.5752, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05125306199359337, |
|
"grad_norm": 0.6829489822638658, |
|
"learning_rate": 5.112781954887218e-06, |
|
"loss": 0.5792, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.052760504993404934, |
|
"grad_norm": 0.7505911666509206, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.602, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.054267947993216506, |
|
"grad_norm": 0.7281087618287696, |
|
"learning_rate": 5.413533834586467e-06, |
|
"loss": 0.5994, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05577539099302808, |
|
"grad_norm": 0.7061965815038841, |
|
"learning_rate": 5.56390977443609e-06, |
|
"loss": 0.5734, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05728283399283965, |
|
"grad_norm": 0.6836377512068608, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.5543, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.058790276992651214, |
|
"grad_norm": 0.6751595521927064, |
|
"learning_rate": 5.864661654135339e-06, |
|
"loss": 0.5983, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.060297719992462785, |
|
"grad_norm": 0.6517307660683558, |
|
"learning_rate": 6.015037593984962e-06, |
|
"loss": 0.5482, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06180516299227436, |
|
"grad_norm": 0.670740477832527, |
|
"learning_rate": 6.165413533834587e-06, |
|
"loss": 0.5771, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06331260599208592, |
|
"grad_norm": 0.6386183426057812, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 0.5338, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0648200489918975, |
|
"grad_norm": 0.6325251598452951, |
|
"learning_rate": 6.466165413533835e-06, |
|
"loss": 0.5409, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06632749199170906, |
|
"grad_norm": 0.6190880971896819, |
|
"learning_rate": 6.616541353383459e-06, |
|
"loss": 0.5386, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06783493499152063, |
|
"grad_norm": 0.6390822260018926, |
|
"learning_rate": 6.766917293233083e-06, |
|
"loss": 0.5824, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06934237799133221, |
|
"grad_norm": 0.6866065901609671, |
|
"learning_rate": 6.917293233082707e-06, |
|
"loss": 0.5661, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07084982099114377, |
|
"grad_norm": 0.6253742924174672, |
|
"learning_rate": 7.067669172932331e-06, |
|
"loss": 0.5371, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07235726399095534, |
|
"grad_norm": 0.6086976797344416, |
|
"learning_rate": 7.218045112781955e-06, |
|
"loss": 0.5394, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07386470699076692, |
|
"grad_norm": 0.6617551336853821, |
|
"learning_rate": 7.368421052631579e-06, |
|
"loss": 0.5481, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07537214999057848, |
|
"grad_norm": 0.6511571841438215, |
|
"learning_rate": 7.518796992481203e-06, |
|
"loss": 0.557, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07687959299039004, |
|
"grad_norm": 0.6424690635636273, |
|
"learning_rate": 7.669172932330828e-06, |
|
"loss": 0.554, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07838703599020162, |
|
"grad_norm": 0.6208252498748196, |
|
"learning_rate": 7.81954887218045e-06, |
|
"loss": 0.5316, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07989447899001319, |
|
"grad_norm": 0.6178927978791646, |
|
"learning_rate": 7.969924812030075e-06, |
|
"loss": 0.5303, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08140192198982477, |
|
"grad_norm": 0.6246216171964205, |
|
"learning_rate": 8.1203007518797e-06, |
|
"loss": 0.5551, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08290936498963633, |
|
"grad_norm": 0.6071119189590479, |
|
"learning_rate": 8.270676691729324e-06, |
|
"loss": 0.5324, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0844168079894479, |
|
"grad_norm": 0.6123261613238393, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 0.5503, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08592425098925947, |
|
"grad_norm": 0.620387110972641, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.5353, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 0.6185030072259556, |
|
"learning_rate": 8.721804511278195e-06, |
|
"loss": 0.5544, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0889391369888826, |
|
"grad_norm": 0.6244700607026835, |
|
"learning_rate": 8.87218045112782e-06, |
|
"loss": 0.5768, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09044657998869418, |
|
"grad_norm": 0.655865564676625, |
|
"learning_rate": 9.022556390977444e-06, |
|
"loss": 0.5541, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09195402298850575, |
|
"grad_norm": 0.6355623162806917, |
|
"learning_rate": 9.172932330827068e-06, |
|
"loss": 0.5317, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09346146598831731, |
|
"grad_norm": 0.6620650186277286, |
|
"learning_rate": 9.323308270676693e-06, |
|
"loss": 0.5825, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.09496890898812889, |
|
"grad_norm": 0.6544984607034259, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.5367, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09647635198794045, |
|
"grad_norm": 0.6566999876216955, |
|
"learning_rate": 9.62406015037594e-06, |
|
"loss": 0.5334, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09798379498775203, |
|
"grad_norm": 0.6538652733227992, |
|
"learning_rate": 9.774436090225564e-06, |
|
"loss": 0.5088, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0994912379875636, |
|
"grad_norm": 0.7184816645886852, |
|
"learning_rate": 9.924812030075189e-06, |
|
"loss": 0.5015, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.10099868098737516, |
|
"grad_norm": 0.6287887378220718, |
|
"learning_rate": 1.0075187969924813e-05, |
|
"loss": 0.5171, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.10250612398718674, |
|
"grad_norm": 0.7045986205120561, |
|
"learning_rate": 1.0225563909774436e-05, |
|
"loss": 0.5499, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1040135669869983, |
|
"grad_norm": 0.6263524660452249, |
|
"learning_rate": 1.0375939849624062e-05, |
|
"loss": 0.5319, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.10552100998680987, |
|
"grad_norm": 0.6340009161866458, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.5295, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10702845298662145, |
|
"grad_norm": 0.6930114141626272, |
|
"learning_rate": 1.0676691729323309e-05, |
|
"loss": 0.543, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.10853589598643301, |
|
"grad_norm": 0.6582132112309497, |
|
"learning_rate": 1.0827067669172933e-05, |
|
"loss": 0.5164, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11004333898624458, |
|
"grad_norm": 0.6774079053656817, |
|
"learning_rate": 1.0977443609022558e-05, |
|
"loss": 0.5202, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11155078198605616, |
|
"grad_norm": 0.6435562490480392, |
|
"learning_rate": 1.112781954887218e-05, |
|
"loss": 0.5095, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.11305822498586772, |
|
"grad_norm": 0.7112297517037395, |
|
"learning_rate": 1.1278195488721806e-05, |
|
"loss": 0.5316, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1145656679856793, |
|
"grad_norm": 0.709494451956929, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.4935, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.11607311098549086, |
|
"grad_norm": 0.6777802836075782, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 0.5043, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.11758055398530243, |
|
"grad_norm": 0.6296151489375509, |
|
"learning_rate": 1.1729323308270678e-05, |
|
"loss": 0.4874, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.119087996985114, |
|
"grad_norm": 0.6808431409244452, |
|
"learning_rate": 1.1879699248120302e-05, |
|
"loss": 0.4788, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.12059543998492557, |
|
"grad_norm": 0.6704429377361576, |
|
"learning_rate": 1.2030075187969925e-05, |
|
"loss": 0.5011, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12210288298473713, |
|
"grad_norm": 0.6926069766970787, |
|
"learning_rate": 1.2180451127819551e-05, |
|
"loss": 0.496, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12361032598454871, |
|
"grad_norm": 0.639818862010909, |
|
"learning_rate": 1.2330827067669174e-05, |
|
"loss": 0.5308, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.12511776898436028, |
|
"grad_norm": 0.6204899572762589, |
|
"learning_rate": 1.2481203007518798e-05, |
|
"loss": 0.5063, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.12662521198417184, |
|
"grad_norm": 0.6865925022658576, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 0.507, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1281326549839834, |
|
"grad_norm": 0.7029706975479946, |
|
"learning_rate": 1.2781954887218047e-05, |
|
"loss": 0.5209, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.129640097983795, |
|
"grad_norm": 0.6524424672188123, |
|
"learning_rate": 1.293233082706767e-05, |
|
"loss": 0.5527, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.13114754098360656, |
|
"grad_norm": 0.6074805023838824, |
|
"learning_rate": 1.3082706766917295e-05, |
|
"loss": 0.4873, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.13265498398341813, |
|
"grad_norm": 0.6891851394415897, |
|
"learning_rate": 1.3233082706766918e-05, |
|
"loss": 0.5335, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1341624269832297, |
|
"grad_norm": 0.6322157680641546, |
|
"learning_rate": 1.3383458646616543e-05, |
|
"loss": 0.5163, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.13566986998304126, |
|
"grad_norm": 0.6652911326311045, |
|
"learning_rate": 1.3533834586466165e-05, |
|
"loss": 0.5227, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.13717731298285282, |
|
"grad_norm": 0.7241927650908743, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 0.5271, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.13868475598266441, |
|
"grad_norm": 0.654474241851782, |
|
"learning_rate": 1.3834586466165414e-05, |
|
"loss": 0.5154, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.14019219898247598, |
|
"grad_norm": 0.7184414157305912, |
|
"learning_rate": 1.3984962406015038e-05, |
|
"loss": 0.5077, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.14169964198228754, |
|
"grad_norm": 0.6449190071052974, |
|
"learning_rate": 1.4135338345864663e-05, |
|
"loss": 0.5038, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1432070849820991, |
|
"grad_norm": 0.691580302982374, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.4667, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.14471452798191067, |
|
"grad_norm": 0.6288139344926038, |
|
"learning_rate": 1.443609022556391e-05, |
|
"loss": 0.49, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14622197098172227, |
|
"grad_norm": 0.643873596183986, |
|
"learning_rate": 1.4586466165413536e-05, |
|
"loss": 0.4749, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.14772941398153383, |
|
"grad_norm": 0.6755660192421138, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 0.5075, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1492368569813454, |
|
"grad_norm": 0.646796309870399, |
|
"learning_rate": 1.4887218045112783e-05, |
|
"loss": 0.5296, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.15074429998115696, |
|
"grad_norm": 0.7044467423953411, |
|
"learning_rate": 1.5037593984962406e-05, |
|
"loss": 0.5303, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15225174298096852, |
|
"grad_norm": 0.7730989605725896, |
|
"learning_rate": 1.5187969924812032e-05, |
|
"loss": 0.5099, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1537591859807801, |
|
"grad_norm": 0.6620556405595589, |
|
"learning_rate": 1.5338345864661656e-05, |
|
"loss": 0.5189, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.15526662898059168, |
|
"grad_norm": 0.8038364131821449, |
|
"learning_rate": 1.548872180451128e-05, |
|
"loss": 0.5152, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.15677407198040325, |
|
"grad_norm": 0.6537353177538859, |
|
"learning_rate": 1.56390977443609e-05, |
|
"loss": 0.5012, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1582815149802148, |
|
"grad_norm": 0.7802019309424624, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 0.4851, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15978895798002637, |
|
"grad_norm": 0.7590456113216669, |
|
"learning_rate": 1.593984962406015e-05, |
|
"loss": 0.5077, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.16129640097983794, |
|
"grad_norm": 0.6740526095538228, |
|
"learning_rate": 1.6090225563909775e-05, |
|
"loss": 0.4794, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.16280384397964953, |
|
"grad_norm": 0.7266285917065574, |
|
"learning_rate": 1.62406015037594e-05, |
|
"loss": 0.5368, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1643112869794611, |
|
"grad_norm": 0.7202106895600753, |
|
"learning_rate": 1.6390977443609023e-05, |
|
"loss": 0.5077, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.16581872997927266, |
|
"grad_norm": 0.7646664609937389, |
|
"learning_rate": 1.6541353383458648e-05, |
|
"loss": 0.517, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16732617297908423, |
|
"grad_norm": 0.7090240598112959, |
|
"learning_rate": 1.6691729323308272e-05, |
|
"loss": 0.5217, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1688336159788958, |
|
"grad_norm": 0.7260255784190195, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 0.4977, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.17034105897870735, |
|
"grad_norm": 0.6392143364785348, |
|
"learning_rate": 1.699248120300752e-05, |
|
"loss": 0.4844, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.17184850197851895, |
|
"grad_norm": 0.7380625519153193, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.5063, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1733559449783305, |
|
"grad_norm": 0.6999588028799495, |
|
"learning_rate": 1.729323308270677e-05, |
|
"loss": 0.5124, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17486338797814208, |
|
"grad_norm": 0.7189938723035283, |
|
"learning_rate": 1.744360902255639e-05, |
|
"loss": 0.4723, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.17637083097795364, |
|
"grad_norm": 0.755925477740424, |
|
"learning_rate": 1.7593984962406015e-05, |
|
"loss": 0.5071, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1778782739777652, |
|
"grad_norm": 0.6932179016486248, |
|
"learning_rate": 1.774436090225564e-05, |
|
"loss": 0.4813, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1793857169775768, |
|
"grad_norm": 0.6803984999939205, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 0.4537, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.18089315997738836, |
|
"grad_norm": 0.6943064601614549, |
|
"learning_rate": 1.8045112781954888e-05, |
|
"loss": 0.5302, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18240060297719993, |
|
"grad_norm": 0.7024561418818766, |
|
"learning_rate": 1.8195488721804512e-05, |
|
"loss": 0.5292, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.1839080459770115, |
|
"grad_norm": 0.6516091971357849, |
|
"learning_rate": 1.8345864661654137e-05, |
|
"loss": 0.5192, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.18541548897682306, |
|
"grad_norm": 0.733121234769519, |
|
"learning_rate": 1.849624060150376e-05, |
|
"loss": 0.5137, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.18692293197663462, |
|
"grad_norm": 0.6379606914374305, |
|
"learning_rate": 1.8646616541353386e-05, |
|
"loss": 0.5004, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1884303749764462, |
|
"grad_norm": 0.682116565034204, |
|
"learning_rate": 1.879699248120301e-05, |
|
"loss": 0.4932, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18993781797625778, |
|
"grad_norm": 0.6586357032334851, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 0.4949, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.19144526097606934, |
|
"grad_norm": 0.6996866499647233, |
|
"learning_rate": 1.909774436090226e-05, |
|
"loss": 0.4926, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.1929527039758809, |
|
"grad_norm": 0.7344954131354208, |
|
"learning_rate": 1.924812030075188e-05, |
|
"loss": 0.477, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.19446014697569247, |
|
"grad_norm": 0.6945294612726404, |
|
"learning_rate": 1.9398496240601504e-05, |
|
"loss": 0.4872, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.19596758997550406, |
|
"grad_norm": 0.7624604146665339, |
|
"learning_rate": 1.954887218045113e-05, |
|
"loss": 0.5224, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19747503297531563, |
|
"grad_norm": 0.685229042181731, |
|
"learning_rate": 1.9699248120300753e-05, |
|
"loss": 0.4964, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1989824759751272, |
|
"grad_norm": 0.8306163709490333, |
|
"learning_rate": 1.9849624060150377e-05, |
|
"loss": 0.5021, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.20048991897493876, |
|
"grad_norm": 0.6752867411242717, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4946, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.20199736197475032, |
|
"grad_norm": 0.9473906923308808, |
|
"learning_rate": 1.99999653272242e-05, |
|
"loss": 0.5112, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2035048049745619, |
|
"grad_norm": 0.6355233169612663, |
|
"learning_rate": 1.9999861309137232e-05, |
|
"loss": 0.5318, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.20501224797437348, |
|
"grad_norm": 0.8423903087733013, |
|
"learning_rate": 1.999968794646042e-05, |
|
"loss": 0.5148, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.20651969097418504, |
|
"grad_norm": 0.6660475408627802, |
|
"learning_rate": 1.9999445240395953e-05, |
|
"loss": 0.5178, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2080271339739966, |
|
"grad_norm": 0.72967331295993, |
|
"learning_rate": 1.9999133192626893e-05, |
|
"loss": 0.5262, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.20953457697380817, |
|
"grad_norm": 0.7393548066200798, |
|
"learning_rate": 1.9998751805317152e-05, |
|
"loss": 0.5057, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.21104201997361974, |
|
"grad_norm": 0.687138877245702, |
|
"learning_rate": 1.999830108111148e-05, |
|
"loss": 0.4958, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21254946297343133, |
|
"grad_norm": 0.7007673502124087, |
|
"learning_rate": 1.999778102313545e-05, |
|
"loss": 0.4948, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2140569059732429, |
|
"grad_norm": 0.7183800488623966, |
|
"learning_rate": 1.999719163499543e-05, |
|
"loss": 0.5104, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.21556434897305446, |
|
"grad_norm": 0.6535866563135689, |
|
"learning_rate": 1.999653292077857e-05, |
|
"loss": 0.5145, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.21707179197286602, |
|
"grad_norm": 0.64107430044815, |
|
"learning_rate": 1.999580488505276e-05, |
|
"loss": 0.4659, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 0.6779403955139097, |
|
"learning_rate": 1.9995007532866594e-05, |
|
"loss": 0.4964, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.22008667797248915, |
|
"grad_norm": 0.6539110005752458, |
|
"learning_rate": 1.9994140869749366e-05, |
|
"loss": 0.5092, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.22159412097230075, |
|
"grad_norm": 0.6565635872751927, |
|
"learning_rate": 1.9993204901710995e-05, |
|
"loss": 0.5185, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2231015639721123, |
|
"grad_norm": 0.6372834025735034, |
|
"learning_rate": 1.9992199635241997e-05, |
|
"loss": 0.5152, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.22460900697192387, |
|
"grad_norm": 0.622264657968412, |
|
"learning_rate": 1.999112507731346e-05, |
|
"loss": 0.5, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.22611644997173544, |
|
"grad_norm": 0.6267246999704467, |
|
"learning_rate": 1.9989981235376956e-05, |
|
"loss": 0.4897, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.227623892971547, |
|
"grad_norm": 0.6551262788067906, |
|
"learning_rate": 1.9988768117364526e-05, |
|
"loss": 0.5165, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2291313359713586, |
|
"grad_norm": 0.6664514520384526, |
|
"learning_rate": 1.9987485731688595e-05, |
|
"loss": 0.5002, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.23063877897117016, |
|
"grad_norm": 0.6076256276502832, |
|
"learning_rate": 1.998613408724195e-05, |
|
"loss": 0.5084, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.23214622197098173, |
|
"grad_norm": 0.6373492462291207, |
|
"learning_rate": 1.998471319339763e-05, |
|
"loss": 0.5026, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2336536649707933, |
|
"grad_norm": 1.745130262060046, |
|
"learning_rate": 1.9983223060008908e-05, |
|
"loss": 0.5034, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.23516110797060485, |
|
"grad_norm": 5.046536940100192, |
|
"learning_rate": 1.9981663697409203e-05, |
|
"loss": 0.5424, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.23666855097041642, |
|
"grad_norm": 1.5507957638980387, |
|
"learning_rate": 1.998003511641199e-05, |
|
"loss": 0.5301, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.238175993970228, |
|
"grad_norm": 0.9453551152302114, |
|
"learning_rate": 1.997833732831076e-05, |
|
"loss": 0.4793, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.23968343697003958, |
|
"grad_norm": 1.1864330727309345, |
|
"learning_rate": 1.9976570344878916e-05, |
|
"loss": 0.5125, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.24119087996985114, |
|
"grad_norm": 0.8095198214822489, |
|
"learning_rate": 1.9974734178369702e-05, |
|
"loss": 0.4904, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2426983229696627, |
|
"grad_norm": 0.620347427984293, |
|
"learning_rate": 1.997282884151612e-05, |
|
"loss": 0.4611, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.24420576596947427, |
|
"grad_norm": 0.7590913383659819, |
|
"learning_rate": 1.9970854347530828e-05, |
|
"loss": 0.5085, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.24571320896928586, |
|
"grad_norm": 0.5932187358928716, |
|
"learning_rate": 1.9968810710106065e-05, |
|
"loss": 0.49, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.24722065196909743, |
|
"grad_norm": 0.7421744580230403, |
|
"learning_rate": 1.9966697943413548e-05, |
|
"loss": 0.4789, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.248728094968909, |
|
"grad_norm": 0.634606964098851, |
|
"learning_rate": 1.9964516062104377e-05, |
|
"loss": 0.5008, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.25023553796872056, |
|
"grad_norm": 0.721486256547781, |
|
"learning_rate": 1.996226508130892e-05, |
|
"loss": 0.4546, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.25174298096853215, |
|
"grad_norm": 0.6785391250628956, |
|
"learning_rate": 1.995994501663674e-05, |
|
"loss": 0.4892, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2532504239683437, |
|
"grad_norm": 0.5862796281463728, |
|
"learning_rate": 1.995755588417644e-05, |
|
"loss": 0.4736, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2547578669681553, |
|
"grad_norm": 0.6656043784418029, |
|
"learning_rate": 1.99550977004956e-05, |
|
"loss": 0.4749, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2562653099679668, |
|
"grad_norm": 0.6091440270236256, |
|
"learning_rate": 1.9952570482640628e-05, |
|
"loss": 0.4997, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2577727529677784, |
|
"grad_norm": 0.7671131220608588, |
|
"learning_rate": 1.9949974248136655e-05, |
|
"loss": 0.4741, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.25928019596759, |
|
"grad_norm": 0.6246356814602296, |
|
"learning_rate": 1.9947309014987414e-05, |
|
"loss": 0.4727, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.26078763896740154, |
|
"grad_norm": 0.7874820318511245, |
|
"learning_rate": 1.9944574801675106e-05, |
|
"loss": 0.4965, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.26229508196721313, |
|
"grad_norm": 0.6727323732082747, |
|
"learning_rate": 1.9941771627160287e-05, |
|
"loss": 0.5361, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.26380252496702467, |
|
"grad_norm": 0.6896058530733802, |
|
"learning_rate": 1.9938899510881732e-05, |
|
"loss": 0.4574, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.26530996796683626, |
|
"grad_norm": 0.6396080754362474, |
|
"learning_rate": 1.9935958472756283e-05, |
|
"loss": 0.4791, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.26681741096664785, |
|
"grad_norm": 0.6978820918898457, |
|
"learning_rate": 1.993294853317873e-05, |
|
"loss": 0.4936, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.2683248539664594, |
|
"grad_norm": 0.6200726182474722, |
|
"learning_rate": 1.9929869713021668e-05, |
|
"loss": 0.4809, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.269832296966271, |
|
"grad_norm": 0.6621164817055001, |
|
"learning_rate": 1.9926722033635343e-05, |
|
"loss": 0.4833, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2713397399660825, |
|
"grad_norm": 0.6443066814567524, |
|
"learning_rate": 1.9923505516847514e-05, |
|
"loss": 0.452, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2728471829658941, |
|
"grad_norm": 0.6324303832157692, |
|
"learning_rate": 1.9920220184963296e-05, |
|
"loss": 0.4942, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.27435462596570565, |
|
"grad_norm": 0.7093590972609833, |
|
"learning_rate": 1.9916866060764994e-05, |
|
"loss": 0.4666, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.27586206896551724, |
|
"grad_norm": 0.6056554558199854, |
|
"learning_rate": 1.991344316751198e-05, |
|
"loss": 0.48, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.27736951196532883, |
|
"grad_norm": 0.6668247301054864, |
|
"learning_rate": 1.9909951528940485e-05, |
|
"loss": 0.4892, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.27887695496514037, |
|
"grad_norm": 0.6248269362388523, |
|
"learning_rate": 1.990639116926348e-05, |
|
"loss": 0.4601, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.28038439796495196, |
|
"grad_norm": 0.5988280290089756, |
|
"learning_rate": 1.9902762113170467e-05, |
|
"loss": 0.4671, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2818918409647635, |
|
"grad_norm": 0.6183022850194487, |
|
"learning_rate": 1.989906438582734e-05, |
|
"loss": 0.5023, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2833992839645751, |
|
"grad_norm": 0.6345731772578389, |
|
"learning_rate": 1.9895298012876192e-05, |
|
"loss": 0.4749, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.2849067269643867, |
|
"grad_norm": 0.6026738883514794, |
|
"learning_rate": 1.9891463020435144e-05, |
|
"loss": 0.4884, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2864141699641982, |
|
"grad_norm": 0.6275566066201014, |
|
"learning_rate": 1.9887559435098162e-05, |
|
"loss": 0.4868, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2879216129640098, |
|
"grad_norm": 0.6830623512458401, |
|
"learning_rate": 1.9883587283934875e-05, |
|
"loss": 0.4797, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.28942905596382135, |
|
"grad_norm": 0.621100203862078, |
|
"learning_rate": 1.9879546594490383e-05, |
|
"loss": 0.4781, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.29093649896363294, |
|
"grad_norm": 0.7266845450092815, |
|
"learning_rate": 1.987543739478507e-05, |
|
"loss": 0.4838, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.29244394196344453, |
|
"grad_norm": 0.5998498411317879, |
|
"learning_rate": 1.987125971331441e-05, |
|
"loss": 0.4809, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.29395138496325607, |
|
"grad_norm": 0.7629414665635117, |
|
"learning_rate": 1.9867013579048765e-05, |
|
"loss": 0.4891, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.29545882796306766, |
|
"grad_norm": 0.6340989750127195, |
|
"learning_rate": 1.9862699021433186e-05, |
|
"loss": 0.4696, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2969662709628792, |
|
"grad_norm": 0.6600966786500729, |
|
"learning_rate": 1.9858316070387208e-05, |
|
"loss": 0.4568, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2984737139626908, |
|
"grad_norm": 0.6836572646612057, |
|
"learning_rate": 1.9853864756304654e-05, |
|
"loss": 0.4849, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2999811569625024, |
|
"grad_norm": 0.5912116643865833, |
|
"learning_rate": 1.9849345110053405e-05, |
|
"loss": 0.4752, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.3014885999623139, |
|
"grad_norm": 0.6202584603281575, |
|
"learning_rate": 1.984475716297519e-05, |
|
"loss": 0.478, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3029960429621255, |
|
"grad_norm": 0.6021031729150327, |
|
"learning_rate": 1.984010094688539e-05, |
|
"loss": 0.4818, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.30450348596193705, |
|
"grad_norm": 0.6013263404823498, |
|
"learning_rate": 1.9835376494072788e-05, |
|
"loss": 0.4798, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.30601092896174864, |
|
"grad_norm": 0.6391395935977097, |
|
"learning_rate": 1.9830583837299363e-05, |
|
"loss": 0.5079, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.3075183719615602, |
|
"grad_norm": 0.6403126078695583, |
|
"learning_rate": 1.9825723009800058e-05, |
|
"loss": 0.4994, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.30902581496137177, |
|
"grad_norm": 0.6996650791864127, |
|
"learning_rate": 1.9820794045282553e-05, |
|
"loss": 0.458, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.31053325796118336, |
|
"grad_norm": 0.6120915229627125, |
|
"learning_rate": 1.9815796977927015e-05, |
|
"loss": 0.4837, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3120407009609949, |
|
"grad_norm": 0.698625059347094, |
|
"learning_rate": 1.9810731842385892e-05, |
|
"loss": 0.4762, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.3135481439608065, |
|
"grad_norm": 0.6458152328354264, |
|
"learning_rate": 1.9805598673783644e-05, |
|
"loss": 0.4877, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.31505558696061803, |
|
"grad_norm": 0.6183456339468536, |
|
"learning_rate": 1.980039750771651e-05, |
|
"loss": 0.4555, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3165630299604296, |
|
"grad_norm": 0.6625119162294268, |
|
"learning_rate": 1.9795128380252263e-05, |
|
"loss": 0.467, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3180704729602412, |
|
"grad_norm": 0.5634839413053515, |
|
"learning_rate": 1.978979132792996e-05, |
|
"loss": 0.4664, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.31957791596005275, |
|
"grad_norm": 0.6026219616185302, |
|
"learning_rate": 1.9784386387759684e-05, |
|
"loss": 0.4774, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.32108535895986434, |
|
"grad_norm": 0.6269218843440012, |
|
"learning_rate": 1.977891359722229e-05, |
|
"loss": 0.4432, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3225928019596759, |
|
"grad_norm": 0.5910572611931473, |
|
"learning_rate": 1.9773372994269147e-05, |
|
"loss": 0.4699, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.32410024495948747, |
|
"grad_norm": 0.6536939644754692, |
|
"learning_rate": 1.976776461732187e-05, |
|
"loss": 0.4807, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.32560768795929906, |
|
"grad_norm": 0.6191726708771672, |
|
"learning_rate": 1.976208850527206e-05, |
|
"loss": 0.4944, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3271151309591106, |
|
"grad_norm": 0.6298298802683915, |
|
"learning_rate": 1.9756344697481027e-05, |
|
"loss": 0.4862, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3286225739589222, |
|
"grad_norm": 0.6539080215758202, |
|
"learning_rate": 1.975053323377952e-05, |
|
"loss": 0.4817, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.33013001695873373, |
|
"grad_norm": 0.6146092404035427, |
|
"learning_rate": 1.9744654154467468e-05, |
|
"loss": 0.5422, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3316374599585453, |
|
"grad_norm": 0.6490399293285286, |
|
"learning_rate": 1.9738707500313655e-05, |
|
"loss": 0.4703, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3331449029583569, |
|
"grad_norm": 0.615019483124787, |
|
"learning_rate": 1.9732693312555492e-05, |
|
"loss": 0.4801, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.33465234595816845, |
|
"grad_norm": 0.5547372740595196, |
|
"learning_rate": 1.9726611632898693e-05, |
|
"loss": 0.4719, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.33615978895798004, |
|
"grad_norm": 0.6076625752065381, |
|
"learning_rate": 1.9720462503517e-05, |
|
"loss": 0.498, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.3376672319577916, |
|
"grad_norm": 0.6047398581844834, |
|
"learning_rate": 1.971424596705189e-05, |
|
"loss": 0.4643, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.3391746749576032, |
|
"grad_norm": 0.5727445152315086, |
|
"learning_rate": 1.9707962066612278e-05, |
|
"loss": 0.4515, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3406821179574147, |
|
"grad_norm": 0.5573272058322264, |
|
"learning_rate": 1.970161084577422e-05, |
|
"loss": 0.4524, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3421895609572263, |
|
"grad_norm": 0.6257676922974255, |
|
"learning_rate": 1.9695192348580606e-05, |
|
"loss": 0.4815, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3436970039570379, |
|
"grad_norm": 0.5745183403896584, |
|
"learning_rate": 1.9688706619540863e-05, |
|
"loss": 0.4717, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.34520444695684943, |
|
"grad_norm": 0.5964564340890054, |
|
"learning_rate": 1.968215370363063e-05, |
|
"loss": 0.4839, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.346711889956661, |
|
"grad_norm": 0.5672877352491237, |
|
"learning_rate": 1.9675533646291463e-05, |
|
"loss": 0.4914, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.34821933295647256, |
|
"grad_norm": 0.6672213227292868, |
|
"learning_rate": 1.9668846493430522e-05, |
|
"loss": 0.4718, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.34972677595628415, |
|
"grad_norm": 0.5546791014566226, |
|
"learning_rate": 1.9662092291420233e-05, |
|
"loss": 0.4392, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.35123421895609575, |
|
"grad_norm": 0.6546100852352986, |
|
"learning_rate": 1.965527108709798e-05, |
|
"loss": 0.4836, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.3527416619559073, |
|
"grad_norm": 0.5992362903479123, |
|
"learning_rate": 1.964838292776579e-05, |
|
"loss": 0.4464, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3542491049557189, |
|
"grad_norm": 0.63523888294575, |
|
"learning_rate": 1.9641427861189973e-05, |
|
"loss": 0.4856, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3557565479555304, |
|
"grad_norm": 0.6032723041133213, |
|
"learning_rate": 1.963440593560083e-05, |
|
"loss": 0.4966, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.357263990955342, |
|
"grad_norm": 0.6306498314236755, |
|
"learning_rate": 1.9627317199692287e-05, |
|
"loss": 0.4771, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.3587714339551536, |
|
"grad_norm": 0.5865071462782886, |
|
"learning_rate": 1.962016170262157e-05, |
|
"loss": 0.4573, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.36027887695496513, |
|
"grad_norm": 0.5665927327271444, |
|
"learning_rate": 1.961293949400888e-05, |
|
"loss": 0.4485, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3617863199547767, |
|
"grad_norm": 0.554220781330076, |
|
"learning_rate": 1.960565062393701e-05, |
|
"loss": 0.4686, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36329376295458826, |
|
"grad_norm": 0.6455923732389204, |
|
"learning_rate": 1.9598295142951035e-05, |
|
"loss": 0.4592, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.36480120595439985, |
|
"grad_norm": 0.596721778819204, |
|
"learning_rate": 1.9590873102057948e-05, |
|
"loss": 0.4907, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.36630864895421145, |
|
"grad_norm": 0.6716627212373145, |
|
"learning_rate": 1.9583384552726294e-05, |
|
"loss": 0.4799, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.367816091954023, |
|
"grad_norm": 0.6229194933798746, |
|
"learning_rate": 1.957582954688584e-05, |
|
"loss": 0.4652, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.3693235349538346, |
|
"grad_norm": 0.6298037085236075, |
|
"learning_rate": 1.9568208136927177e-05, |
|
"loss": 0.4717, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3708309779536461, |
|
"grad_norm": 0.5787887978421966, |
|
"learning_rate": 1.9560520375701408e-05, |
|
"loss": 0.4845, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.3723384209534577, |
|
"grad_norm": 0.6451526421523999, |
|
"learning_rate": 1.9552766316519726e-05, |
|
"loss": 0.4516, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.37384586395326924, |
|
"grad_norm": 0.538692705705553, |
|
"learning_rate": 1.9544946013153093e-05, |
|
"loss": 0.4649, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.37535330695308083, |
|
"grad_norm": 0.6399780775437526, |
|
"learning_rate": 1.9537059519831822e-05, |
|
"loss": 0.4594, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.3768607499528924, |
|
"grad_norm": 0.6082935211607333, |
|
"learning_rate": 1.9529106891245244e-05, |
|
"loss": 0.4709, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37836819295270396, |
|
"grad_norm": 0.6106738888512755, |
|
"learning_rate": 1.9521088182541298e-05, |
|
"loss": 0.492, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.37987563595251556, |
|
"grad_norm": 0.5803041737823633, |
|
"learning_rate": 1.951300344932616e-05, |
|
"loss": 0.4646, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.3813830789523271, |
|
"grad_norm": 0.5647638332240319, |
|
"learning_rate": 1.9504852747663862e-05, |
|
"loss": 0.4725, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.3828905219521387, |
|
"grad_norm": 0.664315669006426, |
|
"learning_rate": 1.9496636134075894e-05, |
|
"loss": 0.4689, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3843979649519503, |
|
"grad_norm": 0.6019633789641826, |
|
"learning_rate": 1.9488353665540813e-05, |
|
"loss": 0.4613, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.3859054079517618, |
|
"grad_norm": 0.5805016640621002, |
|
"learning_rate": 1.9480005399493857e-05, |
|
"loss": 0.4613, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.3874128509515734, |
|
"grad_norm": 0.6053466035481387, |
|
"learning_rate": 1.9471591393826536e-05, |
|
"loss": 0.4877, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.38892029395138494, |
|
"grad_norm": 0.5443749204002357, |
|
"learning_rate": 1.9463111706886234e-05, |
|
"loss": 0.481, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.39042773695119654, |
|
"grad_norm": 0.6422687053592201, |
|
"learning_rate": 1.9454566397475813e-05, |
|
"loss": 0.464, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.39193517995100813, |
|
"grad_norm": 0.5911574213296809, |
|
"learning_rate": 1.944595552485319e-05, |
|
"loss": 0.4451, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 0.6244696365384524, |
|
"learning_rate": 1.943727914873094e-05, |
|
"loss": 0.465, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.39495006595063126, |
|
"grad_norm": 0.6787496907794774, |
|
"learning_rate": 1.9428537329275862e-05, |
|
"loss": 0.4591, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3964575089504428, |
|
"grad_norm": 0.584284155721975, |
|
"learning_rate": 1.941973012710859e-05, |
|
"loss": 0.4835, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.3979649519502544, |
|
"grad_norm": 0.6636147745329853, |
|
"learning_rate": 1.941085760330316e-05, |
|
"loss": 0.4558, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3994723949500659, |
|
"grad_norm": 0.580002453326873, |
|
"learning_rate": 1.940191981938657e-05, |
|
"loss": 0.4848, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4009798379498775, |
|
"grad_norm": 0.6067452479296194, |
|
"learning_rate": 1.9392916837338376e-05, |
|
"loss": 0.4783, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4024872809496891, |
|
"grad_norm": 0.6517612748843483, |
|
"learning_rate": 1.9383848719590257e-05, |
|
"loss": 0.4849, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.40399472394950064, |
|
"grad_norm": 0.6355304966389256, |
|
"learning_rate": 1.9374715529025575e-05, |
|
"loss": 0.4312, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.40550216694931224, |
|
"grad_norm": 0.627744747765263, |
|
"learning_rate": 1.9365517328978943e-05, |
|
"loss": 0.4762, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4070096099491238, |
|
"grad_norm": 0.6640367945419465, |
|
"learning_rate": 1.9356254183235785e-05, |
|
"loss": 0.432, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.40851705294893537, |
|
"grad_norm": 0.647008694411896, |
|
"learning_rate": 1.93469261560319e-05, |
|
"loss": 0.4795, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.41002449594874696, |
|
"grad_norm": 0.6742117075938286, |
|
"learning_rate": 1.9337533312053002e-05, |
|
"loss": 0.4573, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.4115319389485585, |
|
"grad_norm": 0.6000668524451142, |
|
"learning_rate": 1.9328075716434287e-05, |
|
"loss": 0.4474, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.4130393819483701, |
|
"grad_norm": 0.6027061587937567, |
|
"learning_rate": 1.931855343475998e-05, |
|
"loss": 0.4283, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4145468249481816, |
|
"grad_norm": 0.56875377174764, |
|
"learning_rate": 1.930896653306286e-05, |
|
"loss": 0.4446, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4160542679479932, |
|
"grad_norm": 0.6494800822344575, |
|
"learning_rate": 1.929931507782383e-05, |
|
"loss": 0.4504, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4175617109478048, |
|
"grad_norm": 0.5925306999643124, |
|
"learning_rate": 1.9289599135971437e-05, |
|
"loss": 0.4993, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.41906915394761635, |
|
"grad_norm": 0.5812846521774916, |
|
"learning_rate": 1.9279818774881418e-05, |
|
"loss": 0.4574, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.42057659694742794, |
|
"grad_norm": 0.5625417674563119, |
|
"learning_rate": 1.9269974062376224e-05, |
|
"loss": 0.4325, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.4220840399472395, |
|
"grad_norm": 0.5839055838922522, |
|
"learning_rate": 1.926006506672456e-05, |
|
"loss": 0.4669, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.42359148294705107, |
|
"grad_norm": 0.6042605173402862, |
|
"learning_rate": 1.9250091856640895e-05, |
|
"loss": 0.4224, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.42509892594686266, |
|
"grad_norm": 0.5856982708883072, |
|
"learning_rate": 1.9240054501285015e-05, |
|
"loss": 0.4709, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4266063689466742, |
|
"grad_norm": 0.5631263514578662, |
|
"learning_rate": 1.922995307026151e-05, |
|
"loss": 0.4614, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4281138119464858, |
|
"grad_norm": 0.5583569731432177, |
|
"learning_rate": 1.921978763361931e-05, |
|
"loss": 0.4589, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.4296212549462973, |
|
"grad_norm": 0.6050421963625475, |
|
"learning_rate": 1.9209558261851194e-05, |
|
"loss": 0.4382, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4311286979461089, |
|
"grad_norm": 0.533785762634786, |
|
"learning_rate": 1.919926502589331e-05, |
|
"loss": 0.4862, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.43263614094592046, |
|
"grad_norm": 0.5693448486944194, |
|
"learning_rate": 1.9188907997124666e-05, |
|
"loss": 0.4562, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.43414358394573205, |
|
"grad_norm": 0.5654990613672617, |
|
"learning_rate": 1.9178487247366652e-05, |
|
"loss": 0.4492, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.43565102694554364, |
|
"grad_norm": 0.5771432152665512, |
|
"learning_rate": 1.916800284888253e-05, |
|
"loss": 0.4478, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 0.5734596310020046, |
|
"learning_rate": 1.915745487437694e-05, |
|
"loss": 0.4801, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.43866591294516677, |
|
"grad_norm": 0.5832753483996317, |
|
"learning_rate": 1.9146843396995396e-05, |
|
"loss": 0.4563, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4401733559449783, |
|
"grad_norm": 0.5879841082366902, |
|
"learning_rate": 1.9136168490323772e-05, |
|
"loss": 0.4689, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4416807989447899, |
|
"grad_norm": 0.5521570450782258, |
|
"learning_rate": 1.9125430228387794e-05, |
|
"loss": 0.4581, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4431882419446015, |
|
"grad_norm": 0.5673604877581071, |
|
"learning_rate": 1.9114628685652535e-05, |
|
"loss": 0.4668, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.444695684944413, |
|
"grad_norm": 0.5866077006525799, |
|
"learning_rate": 1.9103763937021887e-05, |
|
"loss": 0.4588, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4462031279442246, |
|
"grad_norm": 0.5731048741878798, |
|
"learning_rate": 1.909283605783805e-05, |
|
"loss": 0.4774, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.44771057094403616, |
|
"grad_norm": 0.6251177027508026, |
|
"learning_rate": 1.9081845123881002e-05, |
|
"loss": 0.4813, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.44921801394384775, |
|
"grad_norm": 0.5256954818277138, |
|
"learning_rate": 1.9070791211367984e-05, |
|
"loss": 0.4473, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.45072545694365934, |
|
"grad_norm": 0.6199874516009303, |
|
"learning_rate": 1.9059674396952963e-05, |
|
"loss": 0.4629, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4522328999434709, |
|
"grad_norm": 0.5917017492987557, |
|
"learning_rate": 1.90484947577261e-05, |
|
"loss": 0.4979, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45374034294328247, |
|
"grad_norm": 0.6120361922704654, |
|
"learning_rate": 1.903725237121322e-05, |
|
"loss": 0.4831, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.455247785943094, |
|
"grad_norm": 0.5514120347682593, |
|
"learning_rate": 1.902594731537527e-05, |
|
"loss": 0.4452, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4567552289429056, |
|
"grad_norm": 0.5767336190747095, |
|
"learning_rate": 1.901457966860779e-05, |
|
"loss": 0.4435, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4582626719427172, |
|
"grad_norm": 0.5868519118956824, |
|
"learning_rate": 1.9003149509740347e-05, |
|
"loss": 0.492, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.45977011494252873, |
|
"grad_norm": 0.6168191655686016, |
|
"learning_rate": 1.899165691803601e-05, |
|
"loss": 0.4512, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4612775579423403, |
|
"grad_norm": 0.6053359821845329, |
|
"learning_rate": 1.8980101973190787e-05, |
|
"loss": 0.4749, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.46278500094215186, |
|
"grad_norm": 0.60634572707715, |
|
"learning_rate": 1.896848475533309e-05, |
|
"loss": 0.4682, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.46429244394196345, |
|
"grad_norm": 0.6339199718330278, |
|
"learning_rate": 1.8956805345023145e-05, |
|
"loss": 0.4643, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.465799886941775, |
|
"grad_norm": 0.6011561135843241, |
|
"learning_rate": 1.894506382325248e-05, |
|
"loss": 0.435, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.4673073299415866, |
|
"grad_norm": 0.6067579490346751, |
|
"learning_rate": 1.8933260271443313e-05, |
|
"loss": 0.4162, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4688147729413982, |
|
"grad_norm": 0.5747986536629459, |
|
"learning_rate": 1.8921394771448032e-05, |
|
"loss": 0.448, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.4703222159412097, |
|
"grad_norm": 0.605434367981348, |
|
"learning_rate": 1.89094674055486e-05, |
|
"loss": 0.4264, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.4718296589410213, |
|
"grad_norm": 0.6028982875539595, |
|
"learning_rate": 1.889747825645599e-05, |
|
"loss": 0.447, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.47333710194083284, |
|
"grad_norm": 0.6024460995063091, |
|
"learning_rate": 1.8885427407309627e-05, |
|
"loss": 0.4689, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.47484454494064443, |
|
"grad_norm": 0.6726949468749703, |
|
"learning_rate": 1.887331494167678e-05, |
|
"loss": 0.4562, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.476351987940456, |
|
"grad_norm": 0.6108367421924343, |
|
"learning_rate": 1.8861140943552014e-05, |
|
"loss": 0.4574, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.47785943094026756, |
|
"grad_norm": 0.6095993211515124, |
|
"learning_rate": 1.884890549735659e-05, |
|
"loss": 0.429, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.47936687394007915, |
|
"grad_norm": 0.5708366516060817, |
|
"learning_rate": 1.8836608687937883e-05, |
|
"loss": 0.4494, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.4808743169398907, |
|
"grad_norm": 0.6319148329863508, |
|
"learning_rate": 1.8824250600568798e-05, |
|
"loss": 0.4457, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.4823817599397023, |
|
"grad_norm": 0.5817901717334689, |
|
"learning_rate": 1.8811831320947177e-05, |
|
"loss": 0.4444, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4838892029395139, |
|
"grad_norm": 0.6167253992638152, |
|
"learning_rate": 1.879935093519519e-05, |
|
"loss": 0.4758, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4853966459393254, |
|
"grad_norm": 0.5918299912550289, |
|
"learning_rate": 1.878680952985877e-05, |
|
"loss": 0.4586, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.486904088939137, |
|
"grad_norm": 0.5897988696893806, |
|
"learning_rate": 1.8774207191906976e-05, |
|
"loss": 0.4548, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.48841153193894854, |
|
"grad_norm": 0.5336492924439385, |
|
"learning_rate": 1.8761544008731426e-05, |
|
"loss": 0.4477, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.48991897493876013, |
|
"grad_norm": 0.5969332291879268, |
|
"learning_rate": 1.874882006814565e-05, |
|
"loss": 0.4423, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4914264179385717, |
|
"grad_norm": 0.5894559630672119, |
|
"learning_rate": 1.8736035458384528e-05, |
|
"loss": 0.4681, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.49293386093838326, |
|
"grad_norm": 0.583381204713255, |
|
"learning_rate": 1.8723190268103634e-05, |
|
"loss": 0.431, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.49444130393819485, |
|
"grad_norm": 0.5501857874739489, |
|
"learning_rate": 1.8710284586378645e-05, |
|
"loss": 0.4501, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.4959487469380064, |
|
"grad_norm": 0.5807568427837185, |
|
"learning_rate": 1.8697318502704734e-05, |
|
"loss": 0.446, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.497456189937818, |
|
"grad_norm": 0.5344952874232914, |
|
"learning_rate": 1.8684292106995916e-05, |
|
"loss": 0.464, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4989636329376295, |
|
"grad_norm": 0.5875400091192824, |
|
"learning_rate": 1.8671205489584453e-05, |
|
"loss": 0.462, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5004710759374411, |
|
"grad_norm": 0.5898142606962845, |
|
"learning_rate": 1.865805874122021e-05, |
|
"loss": 0.4495, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5019785189372526, |
|
"grad_norm": 0.5383180946864506, |
|
"learning_rate": 1.8644851953070045e-05, |
|
"loss": 0.474, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5034859619370643, |
|
"grad_norm": 0.5701159430118912, |
|
"learning_rate": 1.863158521671716e-05, |
|
"loss": 0.4644, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5049934049368758, |
|
"grad_norm": 0.5456550772582448, |
|
"learning_rate": 1.8618258624160465e-05, |
|
"loss": 0.4426, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5065008479366874, |
|
"grad_norm": 0.5806062450133762, |
|
"learning_rate": 1.8604872267813954e-05, |
|
"loss": 0.4428, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.508008290936499, |
|
"grad_norm": 0.5723184224994758, |
|
"learning_rate": 1.859142624050605e-05, |
|
"loss": 0.427, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5095157339363106, |
|
"grad_norm": 0.5503430826330011, |
|
"learning_rate": 1.8577920635478976e-05, |
|
"loss": 0.4863, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5110231769361221, |
|
"grad_norm": 0.5922429005891785, |
|
"learning_rate": 1.8564355546388094e-05, |
|
"loss": 0.472, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5125306199359336, |
|
"grad_norm": 0.5243816217609505, |
|
"learning_rate": 1.855073106730126e-05, |
|
"loss": 0.4563, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5140380629357453, |
|
"grad_norm": 0.571898057341335, |
|
"learning_rate": 1.8537047292698175e-05, |
|
"loss": 0.4686, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5155455059355568, |
|
"grad_norm": 0.5389787797747003, |
|
"learning_rate": 1.852330431746973e-05, |
|
"loss": 0.4044, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5170529489353684, |
|
"grad_norm": 0.5755069679771695, |
|
"learning_rate": 1.8509502236917353e-05, |
|
"loss": 0.4536, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.51856039193518, |
|
"grad_norm": 0.5386650306089089, |
|
"learning_rate": 1.8495641146752322e-05, |
|
"loss": 0.4285, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5200678349349915, |
|
"grad_norm": 0.5775045065740545, |
|
"learning_rate": 1.848172114309513e-05, |
|
"loss": 0.4579, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5215752779348031, |
|
"grad_norm": 0.6222104655446267, |
|
"learning_rate": 1.8467742322474822e-05, |
|
"loss": 0.4733, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5230827209346146, |
|
"grad_norm": 0.5869893846228816, |
|
"learning_rate": 1.845370478182829e-05, |
|
"loss": 0.5073, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5245901639344263, |
|
"grad_norm": 0.6007295355780623, |
|
"learning_rate": 1.8439608618499637e-05, |
|
"loss": 0.4859, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5260976069342378, |
|
"grad_norm": 0.5715093886190423, |
|
"learning_rate": 1.842545393023949e-05, |
|
"loss": 0.436, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5276050499340493, |
|
"grad_norm": 0.5370655215760771, |
|
"learning_rate": 1.841124081520431e-05, |
|
"loss": 0.4545, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.529112492933861, |
|
"grad_norm": 0.5468526752808022, |
|
"learning_rate": 1.8396969371955724e-05, |
|
"loss": 0.4412, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5306199359336725, |
|
"grad_norm": 0.5386055180491347, |
|
"learning_rate": 1.838263969945985e-05, |
|
"loss": 0.455, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.532127378933484, |
|
"grad_norm": 0.5273830292324821, |
|
"learning_rate": 1.836825189708659e-05, |
|
"loss": 0.4208, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5336348219332957, |
|
"grad_norm": 0.5324858057392972, |
|
"learning_rate": 1.8353806064608953e-05, |
|
"loss": 0.4259, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5351422649331072, |
|
"grad_norm": 0.5185086851614243, |
|
"learning_rate": 1.833930230220236e-05, |
|
"loss": 0.4506, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5366497079329188, |
|
"grad_norm": 0.5553133756097826, |
|
"learning_rate": 1.8324740710443955e-05, |
|
"loss": 0.4629, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5381571509327303, |
|
"grad_norm": 0.5742120676044152, |
|
"learning_rate": 1.831012139031189e-05, |
|
"loss": 0.4357, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.539664593932542, |
|
"grad_norm": 0.5605121444976939, |
|
"learning_rate": 1.829544444318466e-05, |
|
"loss": 0.4606, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5411720369323535, |
|
"grad_norm": 0.6092704764024721, |
|
"learning_rate": 1.8280709970840352e-05, |
|
"loss": 0.4589, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.542679479932165, |
|
"grad_norm": 0.5515104498699946, |
|
"learning_rate": 1.8265918075455985e-05, |
|
"loss": 0.4554, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5441869229319767, |
|
"grad_norm": 0.5517752011641777, |
|
"learning_rate": 1.8251068859606777e-05, |
|
"loss": 0.4446, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.5456943659317882, |
|
"grad_norm": 0.523313087940014, |
|
"learning_rate": 1.823616242626542e-05, |
|
"loss": 0.4453, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5472018089315998, |
|
"grad_norm": 0.5555090795115328, |
|
"learning_rate": 1.8221198878801415e-05, |
|
"loss": 0.431, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5487092519314113, |
|
"grad_norm": 0.5254077832278897, |
|
"learning_rate": 1.8206178320980295e-05, |
|
"loss": 0.4512, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5502166949312229, |
|
"grad_norm": 0.5382752275452225, |
|
"learning_rate": 1.819110085696295e-05, |
|
"loss": 0.4489, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"grad_norm": 0.5752845306460045, |
|
"learning_rate": 1.817596659130489e-05, |
|
"loss": 0.4659, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.553231580930846, |
|
"grad_norm": 0.534082668899082, |
|
"learning_rate": 1.816077562895551e-05, |
|
"loss": 0.443, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5547390239306577, |
|
"grad_norm": 0.4935673245960411, |
|
"learning_rate": 1.814552807525738e-05, |
|
"loss": 0.4265, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5562464669304692, |
|
"grad_norm": 0.5587086828843211, |
|
"learning_rate": 1.81302240359455e-05, |
|
"loss": 0.4171, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5577539099302807, |
|
"grad_norm": 0.5706799552715889, |
|
"learning_rate": 1.8114863617146576e-05, |
|
"loss": 0.4419, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5592613529300924, |
|
"grad_norm": 0.5559814423377313, |
|
"learning_rate": 1.8099446925378278e-05, |
|
"loss": 0.4646, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5607687959299039, |
|
"grad_norm": 0.6399807563842037, |
|
"learning_rate": 1.8083974067548506e-05, |
|
"loss": 0.4662, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.5622762389297155, |
|
"grad_norm": 0.5499667823126643, |
|
"learning_rate": 1.806844515095465e-05, |
|
"loss": 0.4705, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.563783681929527, |
|
"grad_norm": 0.5802308318791667, |
|
"learning_rate": 1.8052860283282832e-05, |
|
"loss": 0.4285, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5652911249293386, |
|
"grad_norm": 0.616061675009139, |
|
"learning_rate": 1.8037219572607177e-05, |
|
"loss": 0.4661, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5667985679291502, |
|
"grad_norm": 0.5381388831653736, |
|
"learning_rate": 1.8021523127389066e-05, |
|
"loss": 0.442, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5683060109289617, |
|
"grad_norm": 0.5427863037336617, |
|
"learning_rate": 1.800577105647635e-05, |
|
"loss": 0.4737, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5698134539287734, |
|
"grad_norm": 0.647319829296571, |
|
"learning_rate": 1.7989963469102643e-05, |
|
"loss": 0.4597, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5713208969285849, |
|
"grad_norm": 0.5361993689265471, |
|
"learning_rate": 1.797410047488653e-05, |
|
"loss": 0.4515, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5728283399283964, |
|
"grad_norm": 0.5928443348297506, |
|
"learning_rate": 1.7958182183830816e-05, |
|
"loss": 0.4383, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.574335782928208, |
|
"grad_norm": 0.5525429424873411, |
|
"learning_rate": 1.794220870632177e-05, |
|
"loss": 0.4676, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5758432259280196, |
|
"grad_norm": 0.6048913598018805, |
|
"learning_rate": 1.7926180153128358e-05, |
|
"loss": 0.4803, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5773506689278312, |
|
"grad_norm": 0.6159208841600681, |
|
"learning_rate": 1.791009663540146e-05, |
|
"loss": 0.4446, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5788581119276427, |
|
"grad_norm": 0.604058916697408, |
|
"learning_rate": 1.789395826467312e-05, |
|
"loss": 0.4406, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5803655549274543, |
|
"grad_norm": 0.6189321454832999, |
|
"learning_rate": 1.7877765152855757e-05, |
|
"loss": 0.4757, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5818729979272659, |
|
"grad_norm": 0.5252310621840579, |
|
"learning_rate": 1.78615174122414e-05, |
|
"loss": 0.4226, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5833804409270774, |
|
"grad_norm": 0.6058698433864601, |
|
"learning_rate": 1.78452151555009e-05, |
|
"loss": 0.4242, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.5848878839268891, |
|
"grad_norm": 0.5784597918661724, |
|
"learning_rate": 1.7828858495683162e-05, |
|
"loss": 0.4546, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.5863953269267006, |
|
"grad_norm": 0.5778733445604559, |
|
"learning_rate": 1.781244754621434e-05, |
|
"loss": 0.4474, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.5879027699265121, |
|
"grad_norm": 0.5574362195371769, |
|
"learning_rate": 1.779598242089707e-05, |
|
"loss": 0.4461, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5894102129263237, |
|
"grad_norm": 0.6035018906117913, |
|
"learning_rate": 1.7779463233909677e-05, |
|
"loss": 0.4647, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.5909176559261353, |
|
"grad_norm": 0.5783320653215531, |
|
"learning_rate": 1.7762890099805362e-05, |
|
"loss": 0.4509, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.5924250989259469, |
|
"grad_norm": 0.608063697903211, |
|
"learning_rate": 1.774626313351145e-05, |
|
"loss": 0.4496, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.5939325419257584, |
|
"grad_norm": 0.5637493289630973, |
|
"learning_rate": 1.7729582450328547e-05, |
|
"loss": 0.4548, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.59543998492557, |
|
"grad_norm": 0.5878505952019026, |
|
"learning_rate": 1.771284816592978e-05, |
|
"loss": 0.4025, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5969474279253816, |
|
"grad_norm": 0.5732228081169485, |
|
"learning_rate": 1.7696060396359956e-05, |
|
"loss": 0.4155, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.5984548709251931, |
|
"grad_norm": 0.5275574748856542, |
|
"learning_rate": 1.7679219258034798e-05, |
|
"loss": 0.4668, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.5999623139250048, |
|
"grad_norm": 0.565193432089848, |
|
"learning_rate": 1.7662324867740102e-05, |
|
"loss": 0.464, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.6014697569248163, |
|
"grad_norm": 0.5276065053060457, |
|
"learning_rate": 1.7645377342630956e-05, |
|
"loss": 0.4641, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.6029771999246278, |
|
"grad_norm": 0.5504334109425478, |
|
"learning_rate": 1.76283768002309e-05, |
|
"loss": 0.4288, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6044846429244394, |
|
"grad_norm": 0.6059296820868759, |
|
"learning_rate": 1.7611323358431145e-05, |
|
"loss": 0.4961, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.605992085924251, |
|
"grad_norm": 0.5077017761738585, |
|
"learning_rate": 1.759421713548971e-05, |
|
"loss": 0.4706, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6074995289240626, |
|
"grad_norm": 0.5590656170710925, |
|
"learning_rate": 1.757705825003065e-05, |
|
"loss": 0.4034, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.6090069719238741, |
|
"grad_norm": 0.525709220345065, |
|
"learning_rate": 1.7559846821043205e-05, |
|
"loss": 0.4379, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.6105144149236857, |
|
"grad_norm": 0.5538945207929713, |
|
"learning_rate": 1.754258296788097e-05, |
|
"loss": 0.445, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6120218579234973, |
|
"grad_norm": 0.5517645766831191, |
|
"learning_rate": 1.7525266810261096e-05, |
|
"loss": 0.4469, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6135293009233088, |
|
"grad_norm": 0.5594555749715797, |
|
"learning_rate": 1.7507898468263422e-05, |
|
"loss": 0.4343, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6150367439231204, |
|
"grad_norm": 0.5530618540850076, |
|
"learning_rate": 1.7490478062329686e-05, |
|
"loss": 0.4625, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.616544186922932, |
|
"grad_norm": 0.5948076942836006, |
|
"learning_rate": 1.7473005713262644e-05, |
|
"loss": 0.4497, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6180516299227435, |
|
"grad_norm": 0.5776155556563956, |
|
"learning_rate": 1.7455481542225272e-05, |
|
"loss": 0.3959, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6195590729225551, |
|
"grad_norm": 0.5391682645939875, |
|
"learning_rate": 1.7437905670739893e-05, |
|
"loss": 0.4337, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.6210665159223667, |
|
"grad_norm": 0.5918312025262793, |
|
"learning_rate": 1.7420278220687366e-05, |
|
"loss": 0.4749, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.6225739589221783, |
|
"grad_norm": 0.5169533403943937, |
|
"learning_rate": 1.7402599314306207e-05, |
|
"loss": 0.4361, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6240814019219898, |
|
"grad_norm": 0.5757476582664114, |
|
"learning_rate": 1.7384869074191777e-05, |
|
"loss": 0.4423, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6255888449218014, |
|
"grad_norm": 0.5789420594237762, |
|
"learning_rate": 1.7367087623295394e-05, |
|
"loss": 0.4493, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.627096287921613, |
|
"grad_norm": 0.5146689624027024, |
|
"learning_rate": 1.7349255084923517e-05, |
|
"loss": 0.4128, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6286037309214245, |
|
"grad_norm": 0.5556214483108315, |
|
"learning_rate": 1.7331371582736864e-05, |
|
"loss": 0.4097, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6301111739212361, |
|
"grad_norm": 0.5781033815860408, |
|
"learning_rate": 1.731343724074957e-05, |
|
"loss": 0.4755, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6316186169210477, |
|
"grad_norm": 0.505299705771376, |
|
"learning_rate": 1.7295452183328317e-05, |
|
"loss": 0.423, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.6331260599208592, |
|
"grad_norm": 0.6019529322565086, |
|
"learning_rate": 1.7277416535191478e-05, |
|
"loss": 0.4467, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6346335029206708, |
|
"grad_norm": 0.5423258091864472, |
|
"learning_rate": 1.7259330421408247e-05, |
|
"loss": 0.4297, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6361409459204824, |
|
"grad_norm": 0.550859799446333, |
|
"learning_rate": 1.7241193967397784e-05, |
|
"loss": 0.4334, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.637648388920294, |
|
"grad_norm": 0.5436505610454662, |
|
"learning_rate": 1.7223007298928322e-05, |
|
"loss": 0.4227, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.6391558319201055, |
|
"grad_norm": 0.5265015330498195, |
|
"learning_rate": 1.7204770542116326e-05, |
|
"loss": 0.4407, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.640663274919917, |
|
"grad_norm": 0.577557633955233, |
|
"learning_rate": 1.7186483823425582e-05, |
|
"loss": 0.4794, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6421707179197287, |
|
"grad_norm": 0.5304780945155085, |
|
"learning_rate": 1.7168147269666357e-05, |
|
"loss": 0.4306, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.6436781609195402, |
|
"grad_norm": 0.5436263482054755, |
|
"learning_rate": 1.714976100799449e-05, |
|
"loss": 0.4505, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.6451856039193518, |
|
"grad_norm": 0.5239803005942689, |
|
"learning_rate": 1.713132516591053e-05, |
|
"loss": 0.4204, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.6466930469191634, |
|
"grad_norm": 0.5640485363783228, |
|
"learning_rate": 1.7112839871258838e-05, |
|
"loss": 0.4709, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.6482004899189749, |
|
"grad_norm": 0.5112413611963181, |
|
"learning_rate": 1.7094305252226713e-05, |
|
"loss": 0.4352, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6497079329187865, |
|
"grad_norm": 0.5839208365283748, |
|
"learning_rate": 1.7075721437343488e-05, |
|
"loss": 0.467, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.6512153759185981, |
|
"grad_norm": 0.5264144807133015, |
|
"learning_rate": 1.705708855547966e-05, |
|
"loss": 0.4427, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6527228189184097, |
|
"grad_norm": 0.503285177882026, |
|
"learning_rate": 1.7038406735845967e-05, |
|
"loss": 0.4206, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.6542302619182212, |
|
"grad_norm": 0.523921175908132, |
|
"learning_rate": 1.7019676107992523e-05, |
|
"loss": 0.4636, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 0.5213012549969936, |
|
"learning_rate": 1.70008968018079e-05, |
|
"loss": 0.4385, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6572451479178444, |
|
"grad_norm": 0.5616975925596913, |
|
"learning_rate": 1.6982068947518235e-05, |
|
"loss": 0.4495, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6587525909176559, |
|
"grad_norm": 0.5094741288290618, |
|
"learning_rate": 1.6963192675686312e-05, |
|
"loss": 0.4354, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.6602600339174675, |
|
"grad_norm": 0.5691859599654164, |
|
"learning_rate": 1.694426811721069e-05, |
|
"loss": 0.4121, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.6617674769172791, |
|
"grad_norm": 0.565755177059836, |
|
"learning_rate": 1.6925295403324758e-05, |
|
"loss": 0.4291, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.6632749199170906, |
|
"grad_norm": 0.5182694692522232, |
|
"learning_rate": 1.6906274665595854e-05, |
|
"loss": 0.4187, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6647823629169022, |
|
"grad_norm": 0.5442306033345655, |
|
"learning_rate": 1.688720603592432e-05, |
|
"loss": 0.4596, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.6662898059167138, |
|
"grad_norm": 0.508987211991653, |
|
"learning_rate": 1.6868089646542632e-05, |
|
"loss": 0.4218, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.6677972489165254, |
|
"grad_norm": 0.5409018441358341, |
|
"learning_rate": 1.6848925630014445e-05, |
|
"loss": 0.4422, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6693046919163369, |
|
"grad_norm": 0.5332135170482968, |
|
"learning_rate": 1.6829714119233688e-05, |
|
"loss": 0.4742, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6708121349161484, |
|
"grad_norm": 0.510365685539909, |
|
"learning_rate": 1.6810455247423634e-05, |
|
"loss": 0.4308, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6723195779159601, |
|
"grad_norm": 0.5088383566851198, |
|
"learning_rate": 1.6791149148136003e-05, |
|
"loss": 0.4491, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6738270209157716, |
|
"grad_norm": 0.5398522018308489, |
|
"learning_rate": 1.677179595525e-05, |
|
"loss": 0.465, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.6753344639155832, |
|
"grad_norm": 0.5312851766133058, |
|
"learning_rate": 1.675239580297141e-05, |
|
"loss": 0.4574, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6768419069153948, |
|
"grad_norm": 0.5377924163432233, |
|
"learning_rate": 1.6732948825831657e-05, |
|
"loss": 0.4282, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6783493499152063, |
|
"grad_norm": 0.5411515105207517, |
|
"learning_rate": 1.671345515868688e-05, |
|
"loss": 0.437, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6798567929150179, |
|
"grad_norm": 0.5061423487479686, |
|
"learning_rate": 1.6693914936716983e-05, |
|
"loss": 0.4244, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6813642359148294, |
|
"grad_norm": 0.5390647508447596, |
|
"learning_rate": 1.6674328295424723e-05, |
|
"loss": 0.4395, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6828716789146411, |
|
"grad_norm": 0.5706362763533134, |
|
"learning_rate": 1.6654695370634738e-05, |
|
"loss": 0.4421, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.6843791219144526, |
|
"grad_norm": 0.5330284685793139, |
|
"learning_rate": 1.6635016298492628e-05, |
|
"loss": 0.4303, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6858865649142641, |
|
"grad_norm": 0.5267067326608682, |
|
"learning_rate": 1.6615291215464005e-05, |
|
"loss": 0.4245, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6873940079140758, |
|
"grad_norm": 0.5726680200512305, |
|
"learning_rate": 1.6595520258333545e-05, |
|
"loss": 0.4752, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6889014509138873, |
|
"grad_norm": 0.5183865668680759, |
|
"learning_rate": 1.657570356420404e-05, |
|
"loss": 0.4542, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6904088939136989, |
|
"grad_norm": 0.553551099478117, |
|
"learning_rate": 1.6555841270495456e-05, |
|
"loss": 0.445, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6919163369135105, |
|
"grad_norm": 0.5929224658029257, |
|
"learning_rate": 1.6535933514943955e-05, |
|
"loss": 0.4183, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.693423779913322, |
|
"grad_norm": 0.5010271872134405, |
|
"learning_rate": 1.6515980435600965e-05, |
|
"loss": 0.4169, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6949312229131336, |
|
"grad_norm": 0.49068598527278895, |
|
"learning_rate": 1.6495982170832224e-05, |
|
"loss": 0.4122, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.6964386659129451, |
|
"grad_norm": 0.5288472547252633, |
|
"learning_rate": 1.6475938859316795e-05, |
|
"loss": 0.4154, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.6979461089127568, |
|
"grad_norm": 0.5364001246117184, |
|
"learning_rate": 1.6455850640046134e-05, |
|
"loss": 0.4247, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6994535519125683, |
|
"grad_norm": 0.5248089160285507, |
|
"learning_rate": 1.6435717652323097e-05, |
|
"loss": 0.4522, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7009609949123798, |
|
"grad_norm": 0.5871578611838155, |
|
"learning_rate": 1.6415540035761008e-05, |
|
"loss": 0.4477, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7024684379121915, |
|
"grad_norm": 0.531098674787926, |
|
"learning_rate": 1.639531793028265e-05, |
|
"loss": 0.43, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.703975880912003, |
|
"grad_norm": 0.6050322359617515, |
|
"learning_rate": 1.637505147611934e-05, |
|
"loss": 0.4533, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7054833239118146, |
|
"grad_norm": 0.5045703819799817, |
|
"learning_rate": 1.6354740813809917e-05, |
|
"loss": 0.4021, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7069907669116261, |
|
"grad_norm": 0.5129545738188582, |
|
"learning_rate": 1.6334386084199787e-05, |
|
"loss": 0.4517, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7084982099114377, |
|
"grad_norm": 0.5736577274561188, |
|
"learning_rate": 1.631398742843995e-05, |
|
"loss": 0.418, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7100056529112493, |
|
"grad_norm": 0.5323460252829038, |
|
"learning_rate": 1.629354498798601e-05, |
|
"loss": 0.4251, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7115130959110608, |
|
"grad_norm": 0.5747199097534378, |
|
"learning_rate": 1.627305890459719e-05, |
|
"loss": 0.4394, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.7130205389108725, |
|
"grad_norm": 0.5646262513047455, |
|
"learning_rate": 1.625252932033538e-05, |
|
"loss": 0.4297, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.714527981910684, |
|
"grad_norm": 0.49304427786239235, |
|
"learning_rate": 1.6231956377564095e-05, |
|
"loss": 0.4224, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.7160354249104955, |
|
"grad_norm": 0.5791416730858486, |
|
"learning_rate": 1.621134021894756e-05, |
|
"loss": 0.4388, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7175428679103072, |
|
"grad_norm": 0.5186150019034591, |
|
"learning_rate": 1.619068098744965e-05, |
|
"loss": 0.4422, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.7190503109101187, |
|
"grad_norm": 0.5839335428128258, |
|
"learning_rate": 1.6169978826332955e-05, |
|
"loss": 0.458, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.7205577539099303, |
|
"grad_norm": 0.5613046419371709, |
|
"learning_rate": 1.6149233879157747e-05, |
|
"loss": 0.4669, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.7220651969097418, |
|
"grad_norm": 0.5154157204007299, |
|
"learning_rate": 1.6128446289781012e-05, |
|
"loss": 0.4372, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.7235726399095535, |
|
"grad_norm": 0.5677977726488427, |
|
"learning_rate": 1.610761620235543e-05, |
|
"loss": 0.4731, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.725080082909365, |
|
"grad_norm": 0.5375971717165063, |
|
"learning_rate": 1.60867437613284e-05, |
|
"loss": 0.4566, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.7265875259091765, |
|
"grad_norm": 0.49724342603457516, |
|
"learning_rate": 1.6065829111441e-05, |
|
"loss": 0.4507, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.7280949689089882, |
|
"grad_norm": 0.5827089081742053, |
|
"learning_rate": 1.6044872397727037e-05, |
|
"loss": 0.4564, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.7296024119087997, |
|
"grad_norm": 0.5474489228753104, |
|
"learning_rate": 1.6023873765511993e-05, |
|
"loss": 0.4309, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.7311098549086112, |
|
"grad_norm": 0.5319969584661621, |
|
"learning_rate": 1.6002833360412044e-05, |
|
"loss": 0.4394, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7326172979084229, |
|
"grad_norm": 0.5521662619957021, |
|
"learning_rate": 1.5981751328333036e-05, |
|
"loss": 0.4568, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7341247409082344, |
|
"grad_norm": 0.4814653766664411, |
|
"learning_rate": 1.5960627815469486e-05, |
|
"loss": 0.4066, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.735632183908046, |
|
"grad_norm": 0.5109256400558994, |
|
"learning_rate": 1.5939462968303554e-05, |
|
"loss": 0.4272, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7371396269078575, |
|
"grad_norm": 0.5357957318401174, |
|
"learning_rate": 1.5918256933604047e-05, |
|
"loss": 0.4237, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.7386470699076692, |
|
"grad_norm": 0.5396229844011063, |
|
"learning_rate": 1.589700985842538e-05, |
|
"loss": 0.4205, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7401545129074807, |
|
"grad_norm": 0.5056971418930007, |
|
"learning_rate": 1.5875721890106574e-05, |
|
"loss": 0.4558, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7416619559072922, |
|
"grad_norm": 0.5466763607345122, |
|
"learning_rate": 1.5854393176270205e-05, |
|
"loss": 0.4262, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.7431693989071039, |
|
"grad_norm": 0.5318696480713733, |
|
"learning_rate": 1.5833023864821427e-05, |
|
"loss": 0.4222, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.7446768419069154, |
|
"grad_norm": 0.5577732122364522, |
|
"learning_rate": 1.5811614103946905e-05, |
|
"loss": 0.4643, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.746184284906727, |
|
"grad_norm": 0.5396811070945262, |
|
"learning_rate": 1.5790164042113805e-05, |
|
"loss": 0.4619, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.7476917279065385, |
|
"grad_norm": 0.5116348501037207, |
|
"learning_rate": 1.576867382806877e-05, |
|
"loss": 0.4257, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.7491991709063501, |
|
"grad_norm": 0.5376269628887883, |
|
"learning_rate": 1.5747143610836873e-05, |
|
"loss": 0.4431, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.7507066139061617, |
|
"grad_norm": 0.5552456121649234, |
|
"learning_rate": 1.5725573539720592e-05, |
|
"loss": 0.4345, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.7522140569059732, |
|
"grad_norm": 0.5525594597252514, |
|
"learning_rate": 1.570396376429877e-05, |
|
"loss": 0.4288, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.7537214999057849, |
|
"grad_norm": 0.5130914024917077, |
|
"learning_rate": 1.5682314434425593e-05, |
|
"loss": 0.4506, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7552289429055964, |
|
"grad_norm": 0.5438445066019086, |
|
"learning_rate": 1.5660625700229526e-05, |
|
"loss": 0.451, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.7567363859054079, |
|
"grad_norm": 0.5393532424898553, |
|
"learning_rate": 1.5638897712112303e-05, |
|
"loss": 0.4339, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.7582438289052196, |
|
"grad_norm": 0.5067131473915181, |
|
"learning_rate": 1.561713062074785e-05, |
|
"loss": 0.4452, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.7597512719050311, |
|
"grad_norm": 0.511705817056659, |
|
"learning_rate": 1.5595324577081265e-05, |
|
"loss": 0.4227, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.7612587149048426, |
|
"grad_norm": 0.5105016396695756, |
|
"learning_rate": 1.5573479732327758e-05, |
|
"loss": 0.4223, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.7627661579046542, |
|
"grad_norm": 0.508814615305124, |
|
"learning_rate": 1.555159623797161e-05, |
|
"loss": 0.4649, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.7642736009044658, |
|
"grad_norm": 0.5115538447430213, |
|
"learning_rate": 1.552967424576512e-05, |
|
"loss": 0.4257, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.7657810439042774, |
|
"grad_norm": 0.519809456344861, |
|
"learning_rate": 1.5507713907727557e-05, |
|
"loss": 0.4393, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.7672884869040889, |
|
"grad_norm": 0.5220982867467517, |
|
"learning_rate": 1.5485715376144087e-05, |
|
"loss": 0.4296, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.7687959299039006, |
|
"grad_norm": 0.4819994486336346, |
|
"learning_rate": 1.5463678803564753e-05, |
|
"loss": 0.4227, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7703033729037121, |
|
"grad_norm": 0.5721785385849657, |
|
"learning_rate": 1.5441604342803374e-05, |
|
"loss": 0.4446, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.7718108159035236, |
|
"grad_norm": 0.5203314012229143, |
|
"learning_rate": 1.5419492146936518e-05, |
|
"loss": 0.4205, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.7733182589033352, |
|
"grad_norm": 0.5359755271436466, |
|
"learning_rate": 1.5397342369302425e-05, |
|
"loss": 0.4402, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.7748257019031468, |
|
"grad_norm": 0.5233100133672925, |
|
"learning_rate": 1.5375155163499953e-05, |
|
"loss": 0.4177, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.7763331449029583, |
|
"grad_norm": 0.5349268255121612, |
|
"learning_rate": 1.5352930683387502e-05, |
|
"loss": 0.4586, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7778405879027699, |
|
"grad_norm": 0.5815705753331589, |
|
"learning_rate": 1.5330669083081956e-05, |
|
"loss": 0.4427, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.7793480309025815, |
|
"grad_norm": 0.49665657788094364, |
|
"learning_rate": 1.5308370516957617e-05, |
|
"loss": 0.4201, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.7808554739023931, |
|
"grad_norm": 0.5160010880115449, |
|
"learning_rate": 1.528603513964511e-05, |
|
"loss": 0.4261, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.7823629169022046, |
|
"grad_norm": 0.5468406227400142, |
|
"learning_rate": 1.5263663106030347e-05, |
|
"loss": 0.4116, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.7838703599020163, |
|
"grad_norm": 0.5236112386795565, |
|
"learning_rate": 1.5241254571253433e-05, |
|
"loss": 0.4317, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7853778029018278, |
|
"grad_norm": 0.5715363020786929, |
|
"learning_rate": 1.5218809690707583e-05, |
|
"loss": 0.4288, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 0.5191719390295657, |
|
"learning_rate": 1.5196328620038059e-05, |
|
"loss": 0.4126, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.7883926889014509, |
|
"grad_norm": 0.5236405890133281, |
|
"learning_rate": 1.5173811515141083e-05, |
|
"loss": 0.4024, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7899001319012625, |
|
"grad_norm": 0.5689433953014548, |
|
"learning_rate": 1.5151258532162771e-05, |
|
"loss": 0.4377, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.791407574901074, |
|
"grad_norm": 0.5352416985872532, |
|
"learning_rate": 1.5128669827498024e-05, |
|
"loss": 0.4354, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7929150179008856, |
|
"grad_norm": 0.5274897373659767, |
|
"learning_rate": 1.5106045557789453e-05, |
|
"loss": 0.4391, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7944224609006972, |
|
"grad_norm": 0.5240353462138522, |
|
"learning_rate": 1.5083385879926309e-05, |
|
"loss": 0.4461, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7959299039005088, |
|
"grad_norm": 0.5040339622037668, |
|
"learning_rate": 1.5060690951043385e-05, |
|
"loss": 0.428, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7974373469003203, |
|
"grad_norm": 0.6056664440579997, |
|
"learning_rate": 1.5037960928519902e-05, |
|
"loss": 0.4667, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7989447899001318, |
|
"grad_norm": 0.5064874652403102, |
|
"learning_rate": 1.501519596997847e-05, |
|
"loss": 0.4174, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8004522328999435, |
|
"grad_norm": 0.5178815992344113, |
|
"learning_rate": 1.499239623328394e-05, |
|
"loss": 0.4143, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.801959675899755, |
|
"grad_norm": 0.5433275328773004, |
|
"learning_rate": 1.4969561876542348e-05, |
|
"loss": 0.4308, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.8034671188995666, |
|
"grad_norm": 0.5067490187395532, |
|
"learning_rate": 1.4946693058099802e-05, |
|
"loss": 0.4383, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.8049745618993782, |
|
"grad_norm": 0.49712616081242367, |
|
"learning_rate": 1.4923789936541378e-05, |
|
"loss": 0.423, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.8064820048991898, |
|
"grad_norm": 0.5142222567824052, |
|
"learning_rate": 1.4900852670690044e-05, |
|
"loss": 0.4427, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8079894478990013, |
|
"grad_norm": 0.5138167933634391, |
|
"learning_rate": 1.487788141960553e-05, |
|
"loss": 0.426, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.8094968908988129, |
|
"grad_norm": 0.49938679145962556, |
|
"learning_rate": 1.4854876342583246e-05, |
|
"loss": 0.4116, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.8110043338986245, |
|
"grad_norm": 0.5630302514996013, |
|
"learning_rate": 1.4831837599153165e-05, |
|
"loss": 0.4569, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.812511776898436, |
|
"grad_norm": 0.5068845911186761, |
|
"learning_rate": 1.4808765349078729e-05, |
|
"loss": 0.4174, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.8140192198982475, |
|
"grad_norm": 0.5402742918446363, |
|
"learning_rate": 1.4785659752355724e-05, |
|
"loss": 0.4046, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8155266628980592, |
|
"grad_norm": 0.5486844481668101, |
|
"learning_rate": 1.4762520969211186e-05, |
|
"loss": 0.4225, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.8170341058978707, |
|
"grad_norm": 0.5290035366810187, |
|
"learning_rate": 1.4739349160102285e-05, |
|
"loss": 0.4378, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.8185415488976823, |
|
"grad_norm": 0.5374079241254692, |
|
"learning_rate": 1.4716144485715209e-05, |
|
"loss": 0.4299, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.8200489918974939, |
|
"grad_norm": 0.4778906030205072, |
|
"learning_rate": 1.4692907106964051e-05, |
|
"loss": 0.3992, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.8215564348973055, |
|
"grad_norm": 0.49060078784195343, |
|
"learning_rate": 1.4669637184989696e-05, |
|
"loss": 0.4243, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.823063877897117, |
|
"grad_norm": 0.5253862030306666, |
|
"learning_rate": 1.4646334881158704e-05, |
|
"loss": 0.4236, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.8245713208969286, |
|
"grad_norm": 0.5215051723939326, |
|
"learning_rate": 1.4623000357062184e-05, |
|
"loss": 0.4274, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.8260787638967402, |
|
"grad_norm": 0.5071119070406966, |
|
"learning_rate": 1.459963377451468e-05, |
|
"loss": 0.4081, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.8275862068965517, |
|
"grad_norm": 0.5180772114309931, |
|
"learning_rate": 1.457623529555305e-05, |
|
"loss": 0.4228, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.8290936498963632, |
|
"grad_norm": 0.5198434876057629, |
|
"learning_rate": 1.4552805082435333e-05, |
|
"loss": 0.4328, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8306010928961749, |
|
"grad_norm": 0.53696356685593, |
|
"learning_rate": 1.4529343297639638e-05, |
|
"loss": 0.4311, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.8321085358959864, |
|
"grad_norm": 0.5522072703618133, |
|
"learning_rate": 1.4505850103863007e-05, |
|
"loss": 0.4441, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.833615978895798, |
|
"grad_norm": 0.5022303098504759, |
|
"learning_rate": 1.448232566402028e-05, |
|
"loss": 0.4233, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.8351234218956096, |
|
"grad_norm": 0.5522095422296431, |
|
"learning_rate": 1.4458770141242992e-05, |
|
"loss": 0.4333, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.8366308648954212, |
|
"grad_norm": 0.5232096502230357, |
|
"learning_rate": 1.4435183698878212e-05, |
|
"loss": 0.4286, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8381383078952327, |
|
"grad_norm": 0.46986995612699417, |
|
"learning_rate": 1.4411566500487425e-05, |
|
"loss": 0.4049, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.8396457508950442, |
|
"grad_norm": 0.5077507527784849, |
|
"learning_rate": 1.4387918709845395e-05, |
|
"loss": 0.4144, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.8411531938948559, |
|
"grad_norm": 0.5253570052023816, |
|
"learning_rate": 1.4364240490939032e-05, |
|
"loss": 0.4547, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.8426606368946674, |
|
"grad_norm": 0.49921819408434215, |
|
"learning_rate": 1.4340532007966252e-05, |
|
"loss": 0.3949, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.844168079894479, |
|
"grad_norm": 0.5411234788441551, |
|
"learning_rate": 1.4316793425334836e-05, |
|
"loss": 0.4445, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8456755228942906, |
|
"grad_norm": 0.5264546536830835, |
|
"learning_rate": 1.4293024907661295e-05, |
|
"loss": 0.4117, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8471829658941021, |
|
"grad_norm": 0.518655972625287, |
|
"learning_rate": 1.4269226619769727e-05, |
|
"loss": 0.4159, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.8486904088939137, |
|
"grad_norm": 0.537382287002897, |
|
"learning_rate": 1.424539872669067e-05, |
|
"loss": 0.4395, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.8501978518937253, |
|
"grad_norm": 0.4871628601960703, |
|
"learning_rate": 1.4221541393659966e-05, |
|
"loss": 0.4244, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.8517052948935369, |
|
"grad_norm": 0.5323818502275258, |
|
"learning_rate": 1.4197654786117604e-05, |
|
"loss": 0.442, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.8532127378933484, |
|
"grad_norm": 0.49211277864065, |
|
"learning_rate": 1.4173739069706586e-05, |
|
"loss": 0.4333, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.8547201808931599, |
|
"grad_norm": 0.5016763716077036, |
|
"learning_rate": 1.414979441027176e-05, |
|
"loss": 0.4223, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.8562276238929716, |
|
"grad_norm": 0.5072197589397037, |
|
"learning_rate": 1.4125820973858693e-05, |
|
"loss": 0.4166, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.8577350668927831, |
|
"grad_norm": 0.5379841247223495, |
|
"learning_rate": 1.41018189267125e-05, |
|
"loss": 0.4457, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.8592425098925947, |
|
"grad_norm": 0.5156171430561991, |
|
"learning_rate": 1.4077788435276701e-05, |
|
"loss": 0.4154, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8607499528924063, |
|
"grad_norm": 0.5377878469372074, |
|
"learning_rate": 1.4053729666192067e-05, |
|
"loss": 0.4437, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.8622573958922178, |
|
"grad_norm": 0.5606843337820052, |
|
"learning_rate": 1.4029642786295452e-05, |
|
"loss": 0.4479, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.8637648388920294, |
|
"grad_norm": 0.4989731388746451, |
|
"learning_rate": 1.400552796261866e-05, |
|
"loss": 0.407, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.8652722818918409, |
|
"grad_norm": 0.5136932503470173, |
|
"learning_rate": 1.3981385362387268e-05, |
|
"loss": 0.4211, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.8667797248916526, |
|
"grad_norm": 0.495625389098895, |
|
"learning_rate": 1.3957215153019463e-05, |
|
"loss": 0.4203, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8682871678914641, |
|
"grad_norm": 0.49590492700182753, |
|
"learning_rate": 1.3933017502124897e-05, |
|
"loss": 0.4123, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.8697946108912756, |
|
"grad_norm": 0.5389299185456149, |
|
"learning_rate": 1.3908792577503514e-05, |
|
"loss": 0.4309, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.8713020538910873, |
|
"grad_norm": 0.5014871721652727, |
|
"learning_rate": 1.3884540547144393e-05, |
|
"loss": 0.4159, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.8728094968908988, |
|
"grad_norm": 0.49719473763201644, |
|
"learning_rate": 1.3860261579224574e-05, |
|
"loss": 0.4191, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 0.5102002869995407, |
|
"learning_rate": 1.3835955842107897e-05, |
|
"loss": 0.418, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.875824382890522, |
|
"grad_norm": 0.497268362475834, |
|
"learning_rate": 1.3811623504343845e-05, |
|
"loss": 0.4092, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.8773318258903335, |
|
"grad_norm": 0.49466892349875324, |
|
"learning_rate": 1.378726473466635e-05, |
|
"loss": 0.4154, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.8788392688901451, |
|
"grad_norm": 0.5485556900908343, |
|
"learning_rate": 1.3762879701992642e-05, |
|
"loss": 0.4327, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.8803467118899566, |
|
"grad_norm": 0.49193915962474927, |
|
"learning_rate": 1.373846857542208e-05, |
|
"loss": 0.4233, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.8818541548897683, |
|
"grad_norm": 0.49157440442050665, |
|
"learning_rate": 1.3714031524234965e-05, |
|
"loss": 0.4255, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.8833615978895798, |
|
"grad_norm": 0.5153566919676954, |
|
"learning_rate": 1.3689568717891381e-05, |
|
"loss": 0.4433, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.8848690408893913, |
|
"grad_norm": 0.5151771531878016, |
|
"learning_rate": 1.3665080326029997e-05, |
|
"loss": 0.4313, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.886376483889203, |
|
"grad_norm": 0.5172038128981158, |
|
"learning_rate": 1.364056651846693e-05, |
|
"loss": 0.4025, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.8878839268890145, |
|
"grad_norm": 0.5197034910270297, |
|
"learning_rate": 1.3616027465194525e-05, |
|
"loss": 0.432, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.889391369888826, |
|
"grad_norm": 0.5280686849313844, |
|
"learning_rate": 1.35914633363802e-05, |
|
"loss": 0.4093, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8908988128886377, |
|
"grad_norm": 0.5192774851448931, |
|
"learning_rate": 1.356687430236526e-05, |
|
"loss": 0.426, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.8924062558884492, |
|
"grad_norm": 0.5407059497728999, |
|
"learning_rate": 1.3542260533663723e-05, |
|
"loss": 0.4408, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.8939136988882608, |
|
"grad_norm": 0.5029787366533781, |
|
"learning_rate": 1.351762220096112e-05, |
|
"loss": 0.4134, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.8954211418880723, |
|
"grad_norm": 0.5557133502339159, |
|
"learning_rate": 1.3492959475113332e-05, |
|
"loss": 0.4247, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.896928584887884, |
|
"grad_norm": 0.5446161829977666, |
|
"learning_rate": 1.3468272527145388e-05, |
|
"loss": 0.4133, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8984360278876955, |
|
"grad_norm": 0.5055328441209378, |
|
"learning_rate": 1.3443561528250295e-05, |
|
"loss": 0.3916, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.899943470887507, |
|
"grad_norm": 0.5874519416857665, |
|
"learning_rate": 1.3418826649787834e-05, |
|
"loss": 0.4339, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.9014509138873187, |
|
"grad_norm": 0.5577170031704589, |
|
"learning_rate": 1.3394068063283387e-05, |
|
"loss": 0.458, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.9029583568871302, |
|
"grad_norm": 0.5332814444729285, |
|
"learning_rate": 1.3369285940426737e-05, |
|
"loss": 0.4206, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.9044657998869418, |
|
"grad_norm": 0.5654643143753597, |
|
"learning_rate": 1.334448045307088e-05, |
|
"loss": 0.4113, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9059732428867533, |
|
"grad_norm": 0.4979334800098818, |
|
"learning_rate": 1.331965177323084e-05, |
|
"loss": 0.4093, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.9074806858865649, |
|
"grad_norm": 0.5415874467915235, |
|
"learning_rate": 1.3294800073082464e-05, |
|
"loss": 0.4366, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.9089881288863765, |
|
"grad_norm": 0.5813207766062746, |
|
"learning_rate": 1.3269925524961237e-05, |
|
"loss": 0.4448, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.910495571886188, |
|
"grad_norm": 0.5078359282634053, |
|
"learning_rate": 1.3245028301361086e-05, |
|
"loss": 0.4161, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.9120030148859997, |
|
"grad_norm": 0.5539022471684321, |
|
"learning_rate": 1.3220108574933185e-05, |
|
"loss": 0.4056, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.9135104578858112, |
|
"grad_norm": 0.48460567118259956, |
|
"learning_rate": 1.3195166518484748e-05, |
|
"loss": 0.4009, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.9150179008856227, |
|
"grad_norm": 0.4843343744091719, |
|
"learning_rate": 1.317020230497784e-05, |
|
"loss": 0.4231, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.9165253438854344, |
|
"grad_norm": 0.5190197613843625, |
|
"learning_rate": 1.3145216107528178e-05, |
|
"loss": 0.4029, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.9180327868852459, |
|
"grad_norm": 0.4867573763184133, |
|
"learning_rate": 1.3120208099403926e-05, |
|
"loss": 0.3801, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.9195402298850575, |
|
"grad_norm": 0.48900894299598635, |
|
"learning_rate": 1.3095178454024496e-05, |
|
"loss": 0.4413, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.921047672884869, |
|
"grad_norm": 0.5555266761898254, |
|
"learning_rate": 1.3070127344959348e-05, |
|
"loss": 0.4144, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.9225551158846806, |
|
"grad_norm": 0.5128649153965221, |
|
"learning_rate": 1.3045054945926775e-05, |
|
"loss": 0.4616, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.9240625588844922, |
|
"grad_norm": 0.5194503259126656, |
|
"learning_rate": 1.3019961430792711e-05, |
|
"loss": 0.4229, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.9255700018843037, |
|
"grad_norm": 0.49285532678009114, |
|
"learning_rate": 1.2994846973569524e-05, |
|
"loss": 0.4165, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.9270774448841154, |
|
"grad_norm": 0.5197963588456296, |
|
"learning_rate": 1.2969711748414804e-05, |
|
"loss": 0.3947, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.9285848878839269, |
|
"grad_norm": 0.542725727252665, |
|
"learning_rate": 1.2944555929630152e-05, |
|
"loss": 0.4261, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.9300923308837384, |
|
"grad_norm": 0.5068570325444082, |
|
"learning_rate": 1.2919379691659979e-05, |
|
"loss": 0.453, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.93159977388355, |
|
"grad_norm": 0.5138431602453551, |
|
"learning_rate": 1.2894183209090304e-05, |
|
"loss": 0.4482, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.9331072168833616, |
|
"grad_norm": 0.5098264236378465, |
|
"learning_rate": 1.2868966656647522e-05, |
|
"loss": 0.4344, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.9346146598831732, |
|
"grad_norm": 0.4932368518544031, |
|
"learning_rate": 1.2843730209197203e-05, |
|
"loss": 0.4444, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9361221028829847, |
|
"grad_norm": 0.48787838834596486, |
|
"learning_rate": 1.2818474041742885e-05, |
|
"loss": 0.3909, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.9376295458827963, |
|
"grad_norm": 0.5042148044417084, |
|
"learning_rate": 1.2793198329424858e-05, |
|
"loss": 0.4114, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.9391369888826079, |
|
"grad_norm": 0.5164275014163481, |
|
"learning_rate": 1.2767903247518945e-05, |
|
"loss": 0.4042, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.9406444318824194, |
|
"grad_norm": 0.4878553181808082, |
|
"learning_rate": 1.2742588971435276e-05, |
|
"loss": 0.4108, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.9421518748822311, |
|
"grad_norm": 0.4953872026297146, |
|
"learning_rate": 1.2717255676717106e-05, |
|
"loss": 0.4227, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9436593178820426, |
|
"grad_norm": 0.5623597137703112, |
|
"learning_rate": 1.2691903539039563e-05, |
|
"loss": 0.4436, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.9451667608818541, |
|
"grad_norm": 0.539298059881258, |
|
"learning_rate": 1.2666532734208437e-05, |
|
"loss": 0.4384, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.9466742038816657, |
|
"grad_norm": 0.5443120200340641, |
|
"learning_rate": 1.264114343815898e-05, |
|
"loss": 0.4413, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.9481816468814773, |
|
"grad_norm": 0.5142650264217846, |
|
"learning_rate": 1.2615735826954664e-05, |
|
"loss": 0.4231, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.9496890898812889, |
|
"grad_norm": 0.5566560995617864, |
|
"learning_rate": 1.2590310076785974e-05, |
|
"loss": 0.4458, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9511965328811004, |
|
"grad_norm": 0.484643722468428, |
|
"learning_rate": 1.256486636396917e-05, |
|
"loss": 0.3868, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.952703975880912, |
|
"grad_norm": 0.5278211197592041, |
|
"learning_rate": 1.2539404864945087e-05, |
|
"loss": 0.3956, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.9542114188807236, |
|
"grad_norm": 0.5339784329738423, |
|
"learning_rate": 1.2513925756277894e-05, |
|
"loss": 0.4065, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.9557188618805351, |
|
"grad_norm": 0.4808436521240299, |
|
"learning_rate": 1.2488429214653871e-05, |
|
"loss": 0.3733, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.9572263048803467, |
|
"grad_norm": 0.5245674565988473, |
|
"learning_rate": 1.24629154168802e-05, |
|
"loss": 0.4206, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.9587337478801583, |
|
"grad_norm": 0.5091922264135481, |
|
"learning_rate": 1.2437384539883715e-05, |
|
"loss": 0.4321, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.9602411908799698, |
|
"grad_norm": 0.48729820029525145, |
|
"learning_rate": 1.2411836760709686e-05, |
|
"loss": 0.3961, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.9617486338797814, |
|
"grad_norm": 0.5224677796102979, |
|
"learning_rate": 1.2386272256520606e-05, |
|
"loss": 0.4094, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.963256076879593, |
|
"grad_norm": 0.5291193644566966, |
|
"learning_rate": 1.2360691204594937e-05, |
|
"loss": 0.4202, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.9647635198794046, |
|
"grad_norm": 0.5090746283917961, |
|
"learning_rate": 1.2335093782325889e-05, |
|
"loss": 0.4115, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9662709628792161, |
|
"grad_norm": 0.49844277614657384, |
|
"learning_rate": 1.2309480167220203e-05, |
|
"loss": 0.4138, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.9677784058790277, |
|
"grad_norm": 0.5085446797250271, |
|
"learning_rate": 1.2283850536896907e-05, |
|
"loss": 0.4403, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.9692858488788393, |
|
"grad_norm": 0.48811956114780947, |
|
"learning_rate": 1.2258205069086082e-05, |
|
"loss": 0.4132, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.9707932918786508, |
|
"grad_norm": 0.5804699645229868, |
|
"learning_rate": 1.2232543941627641e-05, |
|
"loss": 0.4145, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.9723007348784624, |
|
"grad_norm": 0.5223286630706884, |
|
"learning_rate": 1.2206867332470091e-05, |
|
"loss": 0.4451, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.973808177878274, |
|
"grad_norm": 0.5431240213202171, |
|
"learning_rate": 1.2181175419669293e-05, |
|
"loss": 0.4106, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.9753156208780855, |
|
"grad_norm": 0.4788749668502741, |
|
"learning_rate": 1.215546838138723e-05, |
|
"loss": 0.3947, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.9768230638778971, |
|
"grad_norm": 0.4823666614879542, |
|
"learning_rate": 1.212974639589078e-05, |
|
"loss": 0.3805, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.9783305068777087, |
|
"grad_norm": 0.5272835049687891, |
|
"learning_rate": 1.2104009641550472e-05, |
|
"loss": 0.4192, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.9798379498775203, |
|
"grad_norm": 0.4899435333806439, |
|
"learning_rate": 1.2078258296839245e-05, |
|
"loss": 0.4242, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9813453928773318, |
|
"grad_norm": 0.48267520902055755, |
|
"learning_rate": 1.2052492540331218e-05, |
|
"loss": 0.3819, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.9828528358771434, |
|
"grad_norm": 0.5208472855722491, |
|
"learning_rate": 1.2026712550700457e-05, |
|
"loss": 0.4268, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.984360278876955, |
|
"grad_norm": 0.5182048450359825, |
|
"learning_rate": 1.200091850671972e-05, |
|
"loss": 0.3833, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.9858677218767665, |
|
"grad_norm": 0.5524884939555313, |
|
"learning_rate": 1.1975110587259222e-05, |
|
"loss": 0.4099, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.9873751648765781, |
|
"grad_norm": 0.5724743146915252, |
|
"learning_rate": 1.1949288971285411e-05, |
|
"loss": 0.4451, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.9888826078763897, |
|
"grad_norm": 0.5560489536019798, |
|
"learning_rate": 1.1923453837859706e-05, |
|
"loss": 0.4245, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.9903900508762012, |
|
"grad_norm": 0.5241693566205756, |
|
"learning_rate": 1.1897605366137264e-05, |
|
"loss": 0.426, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.9918974938760128, |
|
"grad_norm": 0.5078011601273249, |
|
"learning_rate": 1.1871743735365735e-05, |
|
"loss": 0.4147, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.9934049368758244, |
|
"grad_norm": 0.5014207467428378, |
|
"learning_rate": 1.1845869124884027e-05, |
|
"loss": 0.4029, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.994912379875636, |
|
"grad_norm": 0.5184450473918536, |
|
"learning_rate": 1.1819981714121054e-05, |
|
"loss": 0.4338, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9964198228754475, |
|
"grad_norm": 0.5218529509897015, |
|
"learning_rate": 1.1794081682594491e-05, |
|
"loss": 0.4001, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.997927265875259, |
|
"grad_norm": 0.5277285874094648, |
|
"learning_rate": 1.176816920990954e-05, |
|
"loss": 0.4225, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.9994347088750707, |
|
"grad_norm": 0.506018413554039, |
|
"learning_rate": 1.174224447575767e-05, |
|
"loss": 0.4398, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.0009421518748822, |
|
"grad_norm": 0.6655724719416495, |
|
"learning_rate": 1.171630765991538e-05, |
|
"loss": 0.377, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.0024495948746939, |
|
"grad_norm": 0.5752880840432146, |
|
"learning_rate": 1.169035894224295e-05, |
|
"loss": 0.325, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.0039570378745053, |
|
"grad_norm": 0.547046172496627, |
|
"learning_rate": 1.1664398502683194e-05, |
|
"loss": 0.3422, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.005464480874317, |
|
"grad_norm": 0.6183956576461548, |
|
"learning_rate": 1.1638426521260211e-05, |
|
"loss": 0.3551, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.0069719238741286, |
|
"grad_norm": 0.6272202909322583, |
|
"learning_rate": 1.1612443178078138e-05, |
|
"loss": 0.3293, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.00847936687394, |
|
"grad_norm": 0.6733584649632783, |
|
"learning_rate": 1.1586448653319908e-05, |
|
"loss": 0.3495, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.0099868098737517, |
|
"grad_norm": 0.5974677434978642, |
|
"learning_rate": 1.156044312724598e-05, |
|
"loss": 0.3339, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0114942528735633, |
|
"grad_norm": 0.5526640261136243, |
|
"learning_rate": 1.153442678019311e-05, |
|
"loss": 0.3629, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.0130016958733747, |
|
"grad_norm": 0.5666634240071511, |
|
"learning_rate": 1.1508399792573095e-05, |
|
"loss": 0.3361, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.0145091388731864, |
|
"grad_norm": 0.6224882966351409, |
|
"learning_rate": 1.1482362344871514e-05, |
|
"loss": 0.3545, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.0160165818729978, |
|
"grad_norm": 0.5615749930186623, |
|
"learning_rate": 1.1456314617646482e-05, |
|
"loss": 0.3208, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.0175240248728095, |
|
"grad_norm": 0.548490348465347, |
|
"learning_rate": 1.1430256791527406e-05, |
|
"loss": 0.3278, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.019031467872621, |
|
"grad_norm": 0.6137191595237155, |
|
"learning_rate": 1.1404189047213716e-05, |
|
"loss": 0.3684, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.0205389108724325, |
|
"grad_norm": 0.6128432091688398, |
|
"learning_rate": 1.137811156547362e-05, |
|
"loss": 0.3479, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.0220463538722442, |
|
"grad_norm": 0.5530398492501923, |
|
"learning_rate": 1.1352024527142855e-05, |
|
"loss": 0.3258, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.0235537968720558, |
|
"grad_norm": 0.5691801541559598, |
|
"learning_rate": 1.1325928113123431e-05, |
|
"loss": 0.3359, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.0250612398718673, |
|
"grad_norm": 0.5996898750429057, |
|
"learning_rate": 1.129982250438237e-05, |
|
"loss": 0.34, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.026568682871679, |
|
"grad_norm": 0.5203916917045198, |
|
"learning_rate": 1.1273707881950445e-05, |
|
"loss": 0.3194, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.0280761258714906, |
|
"grad_norm": 0.5706678991613441, |
|
"learning_rate": 1.1247584426920962e-05, |
|
"loss": 0.3394, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.029583568871302, |
|
"grad_norm": 0.558797423405198, |
|
"learning_rate": 1.1221452320448449e-05, |
|
"loss": 0.3476, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.0310910118711136, |
|
"grad_norm": 0.5491796357132722, |
|
"learning_rate": 1.1195311743747445e-05, |
|
"loss": 0.3287, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.0325984548709253, |
|
"grad_norm": 0.5423270097914835, |
|
"learning_rate": 1.116916287809122e-05, |
|
"loss": 0.3315, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.0341058978707367, |
|
"grad_norm": 0.5440784988767636, |
|
"learning_rate": 1.1143005904810527e-05, |
|
"loss": 0.3409, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.0356133408705483, |
|
"grad_norm": 0.5506460404964368, |
|
"learning_rate": 1.1116841005292339e-05, |
|
"loss": 0.3665, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.03712078387036, |
|
"grad_norm": 0.5271450898091751, |
|
"learning_rate": 1.1090668360978589e-05, |
|
"loss": 0.3354, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.0386282268701714, |
|
"grad_norm": 0.5116723363561022, |
|
"learning_rate": 1.106448815336493e-05, |
|
"loss": 0.3055, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.040135669869983, |
|
"grad_norm": 0.5261827472069973, |
|
"learning_rate": 1.1038300563999455e-05, |
|
"loss": 0.3141, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0416431128697947, |
|
"grad_norm": 0.5675715863653521, |
|
"learning_rate": 1.1012105774481446e-05, |
|
"loss": 0.3576, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.0431505558696061, |
|
"grad_norm": 0.542765155631167, |
|
"learning_rate": 1.0985903966460115e-05, |
|
"loss": 0.337, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.0446579988694178, |
|
"grad_norm": 0.576467518182856, |
|
"learning_rate": 1.0959695321633346e-05, |
|
"loss": 0.3345, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.0461654418692292, |
|
"grad_norm": 0.5261227763098979, |
|
"learning_rate": 1.0933480021746432e-05, |
|
"loss": 0.3137, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.0476728848690409, |
|
"grad_norm": 0.5529375328569147, |
|
"learning_rate": 1.0907258248590816e-05, |
|
"loss": 0.332, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.0491803278688525, |
|
"grad_norm": 0.5136240223834705, |
|
"learning_rate": 1.0881030184002827e-05, |
|
"loss": 0.3276, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.050687770868664, |
|
"grad_norm": 0.5367848385477425, |
|
"learning_rate": 1.0854796009862434e-05, |
|
"loss": 0.3163, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.0521952138684756, |
|
"grad_norm": 0.544930166455388, |
|
"learning_rate": 1.0828555908091958e-05, |
|
"loss": 0.359, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.0537026568682872, |
|
"grad_norm": 0.5387564352002492, |
|
"learning_rate": 1.0802310060654832e-05, |
|
"loss": 0.339, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.0552100998680987, |
|
"grad_norm": 0.5496802508408758, |
|
"learning_rate": 1.0776058649554336e-05, |
|
"loss": 0.3535, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0567175428679103, |
|
"grad_norm": 0.5348548485090446, |
|
"learning_rate": 1.0749801856832325e-05, |
|
"loss": 0.3368, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.058224985867722, |
|
"grad_norm": 0.5794289951348468, |
|
"learning_rate": 1.0723539864567983e-05, |
|
"loss": 0.3596, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.0597324288675334, |
|
"grad_norm": 0.5365708234277743, |
|
"learning_rate": 1.0697272854876537e-05, |
|
"loss": 0.3421, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.061239871867345, |
|
"grad_norm": 0.5904399117303262, |
|
"learning_rate": 1.0671001009908015e-05, |
|
"loss": 0.3348, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.0627473148671567, |
|
"grad_norm": 0.5204976732643493, |
|
"learning_rate": 1.0644724511845976e-05, |
|
"loss": 0.3525, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.064254757866968, |
|
"grad_norm": 0.6120309774969117, |
|
"learning_rate": 1.0618443542906251e-05, |
|
"loss": 0.3727, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.0657622008667798, |
|
"grad_norm": 0.6091575812702822, |
|
"learning_rate": 1.059215828533566e-05, |
|
"loss": 0.3588, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.0672696438665912, |
|
"grad_norm": 0.591151755333861, |
|
"learning_rate": 1.0565868921410776e-05, |
|
"loss": 0.3363, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.0687770868664028, |
|
"grad_norm": 0.5984602535754296, |
|
"learning_rate": 1.0539575633436645e-05, |
|
"loss": 0.3616, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.0702845298662145, |
|
"grad_norm": 0.5604228857922577, |
|
"learning_rate": 1.0513278603745523e-05, |
|
"loss": 0.3398, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.071791972866026, |
|
"grad_norm": 0.5557030870304388, |
|
"learning_rate": 1.0486978014695606e-05, |
|
"loss": 0.338, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.0732994158658375, |
|
"grad_norm": 0.5730991612503363, |
|
"learning_rate": 1.0460674048669783e-05, |
|
"loss": 0.3219, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.0748068588656492, |
|
"grad_norm": 0.6009828081011681, |
|
"learning_rate": 1.0434366888074363e-05, |
|
"loss": 0.3237, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.0763143018654606, |
|
"grad_norm": 0.5386294130513889, |
|
"learning_rate": 1.0408056715337797e-05, |
|
"loss": 0.3391, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.0778217448652723, |
|
"grad_norm": 0.5345878263288965, |
|
"learning_rate": 1.0381743712909424e-05, |
|
"loss": 0.3384, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.079329187865084, |
|
"grad_norm": 0.6369538253688138, |
|
"learning_rate": 1.0355428063258224e-05, |
|
"loss": 0.35, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.0808366308648953, |
|
"grad_norm": 0.5615591275271141, |
|
"learning_rate": 1.0329109948871512e-05, |
|
"loss": 0.3467, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.082344073864707, |
|
"grad_norm": 0.6406352309238248, |
|
"learning_rate": 1.0302789552253702e-05, |
|
"loss": 0.3523, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.0838515168645186, |
|
"grad_norm": 0.5212977047595297, |
|
"learning_rate": 1.0276467055925044e-05, |
|
"loss": 0.3185, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.08535895986433, |
|
"grad_norm": 0.5443802073020193, |
|
"learning_rate": 1.0250142642420335e-05, |
|
"loss": 0.3396, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0868664028641417, |
|
"grad_norm": 0.5516128695838226, |
|
"learning_rate": 1.0223816494287675e-05, |
|
"loss": 0.3199, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.0883738458639534, |
|
"grad_norm": 0.5459335385131995, |
|
"learning_rate": 1.0197488794087188e-05, |
|
"loss": 0.2979, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.0898812888637648, |
|
"grad_norm": 0.5660471338581954, |
|
"learning_rate": 1.0171159724389766e-05, |
|
"loss": 0.3578, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.0913887318635764, |
|
"grad_norm": 0.577383627814168, |
|
"learning_rate": 1.0144829467775794e-05, |
|
"loss": 0.3253, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.092896174863388, |
|
"grad_norm": 0.5656943231881854, |
|
"learning_rate": 1.0118498206833886e-05, |
|
"loss": 0.3559, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.0944036178631995, |
|
"grad_norm": 0.5427797556871369, |
|
"learning_rate": 1.0092166124159628e-05, |
|
"loss": 0.3299, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.0959110608630112, |
|
"grad_norm": 0.5582731085039236, |
|
"learning_rate": 1.0065833402354302e-05, |
|
"loss": 0.342, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.0974185038628228, |
|
"grad_norm": 0.5809252708008414, |
|
"learning_rate": 1.003950022402361e-05, |
|
"loss": 0.3553, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.0989259468626342, |
|
"grad_norm": 0.5400373499865376, |
|
"learning_rate": 1.0013166771776441e-05, |
|
"loss": 0.3283, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.1004333898624459, |
|
"grad_norm": 0.5280335723569519, |
|
"learning_rate": 9.986833228223562e-06, |
|
"loss": 0.3567, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1019408328622573, |
|
"grad_norm": 0.5756207231701386, |
|
"learning_rate": 9.96049977597639e-06, |
|
"loss": 0.3422, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.103448275862069, |
|
"grad_norm": 0.5917844968799806, |
|
"learning_rate": 9.934166597645703e-06, |
|
"loss": 0.3803, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.1049557188618806, |
|
"grad_norm": 0.5375048549376539, |
|
"learning_rate": 9.907833875840374e-06, |
|
"loss": 0.3421, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.106463161861692, |
|
"grad_norm": 0.5685857382900585, |
|
"learning_rate": 9.881501793166117e-06, |
|
"loss": 0.3658, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.1079706048615037, |
|
"grad_norm": 0.6069823667279429, |
|
"learning_rate": 9.85517053222421e-06, |
|
"loss": 0.3273, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.1094780478613153, |
|
"grad_norm": 0.5196609920900314, |
|
"learning_rate": 9.82884027561024e-06, |
|
"loss": 0.3233, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.1109854908611267, |
|
"grad_norm": 0.5702922246540342, |
|
"learning_rate": 9.802511205912815e-06, |
|
"loss": 0.35, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.1124929338609384, |
|
"grad_norm": 0.563216447988931, |
|
"learning_rate": 9.776183505712327e-06, |
|
"loss": 0.3578, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.11400037686075, |
|
"grad_norm": 0.5835000476343075, |
|
"learning_rate": 9.749857357579667e-06, |
|
"loss": 0.3753, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.1155078198605615, |
|
"grad_norm": 0.5822012862085456, |
|
"learning_rate": 9.723532944074961e-06, |
|
"loss": 0.3035, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.1170152628603731, |
|
"grad_norm": 0.5625362231656639, |
|
"learning_rate": 9.6972104477463e-06, |
|
"loss": 0.3669, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.1185227058601848, |
|
"grad_norm": 0.5816421569187623, |
|
"learning_rate": 9.670890051128493e-06, |
|
"loss": 0.3264, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.1200301488599962, |
|
"grad_norm": 0.6076866614497781, |
|
"learning_rate": 9.644571936741778e-06, |
|
"loss": 0.3448, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.1215375918598078, |
|
"grad_norm": 0.5868211335333723, |
|
"learning_rate": 9.618256287090576e-06, |
|
"loss": 0.3453, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.1230450348596195, |
|
"grad_norm": 0.5784910781884745, |
|
"learning_rate": 9.591943284662206e-06, |
|
"loss": 0.3543, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.124552477859431, |
|
"grad_norm": 0.5577968039251089, |
|
"learning_rate": 9.56563311192564e-06, |
|
"loss": 0.356, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.1260599208592426, |
|
"grad_norm": 0.5624603535612774, |
|
"learning_rate": 9.53932595133022e-06, |
|
"loss": 0.322, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.127567363859054, |
|
"grad_norm": 0.5863194939952109, |
|
"learning_rate": 9.513021985304399e-06, |
|
"loss": 0.341, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.1290748068588656, |
|
"grad_norm": 0.5297072497497793, |
|
"learning_rate": 9.486721396254484e-06, |
|
"loss": 0.3263, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.1305822498586773, |
|
"grad_norm": 0.5597259831895821, |
|
"learning_rate": 9.460424366563355e-06, |
|
"loss": 0.3243, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1320896928584887, |
|
"grad_norm": 0.5464179018975297, |
|
"learning_rate": 9.434131078589224e-06, |
|
"loss": 0.3206, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.1335971358583004, |
|
"grad_norm": 0.5464450895465798, |
|
"learning_rate": 9.407841714664343e-06, |
|
"loss": 0.3387, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.135104578858112, |
|
"grad_norm": 0.5546542012199714, |
|
"learning_rate": 9.381556457093752e-06, |
|
"loss": 0.337, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.1366120218579234, |
|
"grad_norm": 0.5753540187155672, |
|
"learning_rate": 9.355275488154025e-06, |
|
"loss": 0.3644, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.138119464857735, |
|
"grad_norm": 0.571883771055452, |
|
"learning_rate": 9.32899899009199e-06, |
|
"loss": 0.332, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.1396269078575467, |
|
"grad_norm": 0.5383157533846678, |
|
"learning_rate": 9.30272714512347e-06, |
|
"loss": 0.3397, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.1411343508573581, |
|
"grad_norm": 0.564086061412075, |
|
"learning_rate": 9.276460135432019e-06, |
|
"loss": 0.3592, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.1426417938571698, |
|
"grad_norm": 0.510671608636206, |
|
"learning_rate": 9.250198143167675e-06, |
|
"loss": 0.3301, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.1441492368569812, |
|
"grad_norm": 0.5323793942216957, |
|
"learning_rate": 9.223941350445666e-06, |
|
"loss": 0.3341, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.1456566798567929, |
|
"grad_norm": 0.5359098725485097, |
|
"learning_rate": 9.19768993934517e-06, |
|
"loss": 0.3214, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1471641228566045, |
|
"grad_norm": 0.5451361788825891, |
|
"learning_rate": 9.171444091908046e-06, |
|
"loss": 0.3195, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.1486715658564162, |
|
"grad_norm": 0.5246946467273069, |
|
"learning_rate": 9.145203990137571e-06, |
|
"loss": 0.3417, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.1501790088562276, |
|
"grad_norm": 0.5919155354849388, |
|
"learning_rate": 9.118969815997174e-06, |
|
"loss": 0.3417, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.1516864518560392, |
|
"grad_norm": 0.549339616533448, |
|
"learning_rate": 9.092741751409186e-06, |
|
"loss": 0.3323, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.1531938948558507, |
|
"grad_norm": 0.5520952523067345, |
|
"learning_rate": 9.06651997825357e-06, |
|
"loss": 0.325, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.1547013378556623, |
|
"grad_norm": 0.5546050111571403, |
|
"learning_rate": 9.040304678366658e-06, |
|
"loss": 0.3798, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.156208780855474, |
|
"grad_norm": 0.5355016099382737, |
|
"learning_rate": 9.014096033539889e-06, |
|
"loss": 0.3324, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.1577162238552854, |
|
"grad_norm": 0.5063535090434689, |
|
"learning_rate": 8.987894225518556e-06, |
|
"loss": 0.3098, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.159223666855097, |
|
"grad_norm": 0.538083701203612, |
|
"learning_rate": 8.961699436000548e-06, |
|
"loss": 0.3378, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.1607311098549087, |
|
"grad_norm": 0.5611833134978637, |
|
"learning_rate": 8.93551184663507e-06, |
|
"loss": 0.3291, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.16223855285472, |
|
"grad_norm": 0.5334833426063799, |
|
"learning_rate": 8.909331639021414e-06, |
|
"loss": 0.3265, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.1637459958545318, |
|
"grad_norm": 0.5370028500892263, |
|
"learning_rate": 8.883158994707666e-06, |
|
"loss": 0.353, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.1652534388543434, |
|
"grad_norm": 0.5564851227581507, |
|
"learning_rate": 8.856994095189477e-06, |
|
"loss": 0.314, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.1667608818541548, |
|
"grad_norm": 0.5516816420442727, |
|
"learning_rate": 8.830837121908783e-06, |
|
"loss": 0.3459, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.1682683248539665, |
|
"grad_norm": 0.546010896691211, |
|
"learning_rate": 8.804688256252557e-06, |
|
"loss": 0.3564, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.1697757678537781, |
|
"grad_norm": 0.5204242216440147, |
|
"learning_rate": 8.778547679551555e-06, |
|
"loss": 0.3093, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.1712832108535896, |
|
"grad_norm": 0.5530932960461594, |
|
"learning_rate": 8.75241557307904e-06, |
|
"loss": 0.3169, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.1727906538534012, |
|
"grad_norm": 0.5433657189299205, |
|
"learning_rate": 8.726292118049555e-06, |
|
"loss": 0.3238, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.1742980968532128, |
|
"grad_norm": 0.536612091168906, |
|
"learning_rate": 8.700177495617635e-06, |
|
"loss": 0.3375, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.1758055398530243, |
|
"grad_norm": 0.5547355998217709, |
|
"learning_rate": 8.674071886876572e-06, |
|
"loss": 0.3285, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.177312982852836, |
|
"grad_norm": 0.6048276095962777, |
|
"learning_rate": 8.647975472857148e-06, |
|
"loss": 0.3704, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.1788204258526473, |
|
"grad_norm": 0.571295755561053, |
|
"learning_rate": 8.621888434526382e-06, |
|
"loss": 0.374, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.180327868852459, |
|
"grad_norm": 0.5189210618741348, |
|
"learning_rate": 8.595810952786289e-06, |
|
"loss": 0.3247, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.1818353118522706, |
|
"grad_norm": 0.5407807768349286, |
|
"learning_rate": 8.569743208472594e-06, |
|
"loss": 0.318, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.183342754852082, |
|
"grad_norm": 0.5555658150734397, |
|
"learning_rate": 8.543685382353518e-06, |
|
"loss": 0.342, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.1848501978518937, |
|
"grad_norm": 0.5478498420172522, |
|
"learning_rate": 8.51763765512849e-06, |
|
"loss": 0.3386, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.1863576408517054, |
|
"grad_norm": 0.5229096665922429, |
|
"learning_rate": 8.491600207426907e-06, |
|
"loss": 0.3218, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.1878650838515168, |
|
"grad_norm": 0.5706786350698708, |
|
"learning_rate": 8.465573219806893e-06, |
|
"loss": 0.3491, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.1893725268513284, |
|
"grad_norm": 0.5792169640912351, |
|
"learning_rate": 8.439556872754025e-06, |
|
"loss": 0.3482, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.19087996985114, |
|
"grad_norm": 0.5219487046954187, |
|
"learning_rate": 8.413551346680095e-06, |
|
"loss": 0.3183, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1923874128509515, |
|
"grad_norm": 0.5680359320055756, |
|
"learning_rate": 8.38755682192186e-06, |
|
"loss": 0.3257, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.1938948558507632, |
|
"grad_norm": 0.54324153892485, |
|
"learning_rate": 8.36157347873979e-06, |
|
"loss": 0.3151, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.1954022988505748, |
|
"grad_norm": 0.5584145695371312, |
|
"learning_rate": 8.335601497316809e-06, |
|
"loss": 0.3474, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.1969097418503862, |
|
"grad_norm": 0.5414937178807059, |
|
"learning_rate": 8.309641057757052e-06, |
|
"loss": 0.3348, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.1984171848501979, |
|
"grad_norm": 0.5933495505366142, |
|
"learning_rate": 8.283692340084623e-06, |
|
"loss": 0.3743, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.1999246278500095, |
|
"grad_norm": 0.5730820400742883, |
|
"learning_rate": 8.257755524242333e-06, |
|
"loss": 0.3437, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.201432070849821, |
|
"grad_norm": 0.5154842086228131, |
|
"learning_rate": 8.231830790090461e-06, |
|
"loss": 0.3271, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.2029395138496326, |
|
"grad_norm": 0.5445619151521616, |
|
"learning_rate": 8.205918317405508e-06, |
|
"loss": 0.3229, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.204446956849444, |
|
"grad_norm": 0.6016710522110904, |
|
"learning_rate": 8.18001828587895e-06, |
|
"loss": 0.3609, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.2059543998492557, |
|
"grad_norm": 0.5457762036159068, |
|
"learning_rate": 8.154130875115978e-06, |
|
"loss": 0.318, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2074618428490673, |
|
"grad_norm": 0.5404902176604001, |
|
"learning_rate": 8.12825626463427e-06, |
|
"loss": 0.3323, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.2089692858488787, |
|
"grad_norm": 0.5722847292063646, |
|
"learning_rate": 8.102394633862743e-06, |
|
"loss": 0.3147, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.2104767288486904, |
|
"grad_norm": 0.5531842770730636, |
|
"learning_rate": 8.0765461621403e-06, |
|
"loss": 0.331, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.211984171848502, |
|
"grad_norm": 0.5256780853712785, |
|
"learning_rate": 8.050711028714589e-06, |
|
"loss": 0.3176, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.2134916148483135, |
|
"grad_norm": 0.6144603881477418, |
|
"learning_rate": 8.02488941274078e-06, |
|
"loss": 0.3383, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.2149990578481251, |
|
"grad_norm": 0.571788365434139, |
|
"learning_rate": 7.999081493280283e-06, |
|
"loss": 0.3258, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.2165065008479368, |
|
"grad_norm": 0.5982762464323738, |
|
"learning_rate": 7.973287449299545e-06, |
|
"loss": 0.3503, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.2180139438477482, |
|
"grad_norm": 0.5363356894959806, |
|
"learning_rate": 7.947507459668784e-06, |
|
"loss": 0.3436, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.2195213868475598, |
|
"grad_norm": 0.5730894211276505, |
|
"learning_rate": 7.921741703160758e-06, |
|
"loss": 0.3584, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.2210288298473715, |
|
"grad_norm": 0.563926690224309, |
|
"learning_rate": 7.895990358449533e-06, |
|
"loss": 0.3291, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.222536272847183, |
|
"grad_norm": 0.5254920217508706, |
|
"learning_rate": 7.87025360410922e-06, |
|
"loss": 0.316, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.2240437158469946, |
|
"grad_norm": 0.5313261676986573, |
|
"learning_rate": 7.844531618612772e-06, |
|
"loss": 0.3319, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.2255511588468062, |
|
"grad_norm": 0.5790168954324271, |
|
"learning_rate": 7.81882458033071e-06, |
|
"loss": 0.3202, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.2270586018466176, |
|
"grad_norm": 0.5385942852927429, |
|
"learning_rate": 7.79313266752991e-06, |
|
"loss": 0.3259, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.2285660448464293, |
|
"grad_norm": 0.5551972130449111, |
|
"learning_rate": 7.767456058372362e-06, |
|
"loss": 0.3385, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.2300734878462407, |
|
"grad_norm": 0.5322043372006761, |
|
"learning_rate": 7.741794930913922e-06, |
|
"loss": 0.3215, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.2315809308460524, |
|
"grad_norm": 0.5541120887430956, |
|
"learning_rate": 7.7161494631031e-06, |
|
"loss": 0.3428, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.233088373845864, |
|
"grad_norm": 0.5527885462222231, |
|
"learning_rate": 7.690519832779799e-06, |
|
"loss": 0.3389, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.2345958168456754, |
|
"grad_norm": 0.5406331170872595, |
|
"learning_rate": 7.664906217674115e-06, |
|
"loss": 0.3112, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.236103259845487, |
|
"grad_norm": 0.5055150883042695, |
|
"learning_rate": 7.639308795405066e-06, |
|
"loss": 0.3202, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2376107028452987, |
|
"grad_norm": 0.5563269801825349, |
|
"learning_rate": 7.613727743479395e-06, |
|
"loss": 0.3571, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.2391181458451102, |
|
"grad_norm": 0.5792057709615847, |
|
"learning_rate": 7.588163239290316e-06, |
|
"loss": 0.3329, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.2406255888449218, |
|
"grad_norm": 0.5666249401867434, |
|
"learning_rate": 7.562615460116289e-06, |
|
"loss": 0.351, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.2421330318447334, |
|
"grad_norm": 0.5265355387938444, |
|
"learning_rate": 7.537084583119802e-06, |
|
"loss": 0.3701, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.2436404748445449, |
|
"grad_norm": 0.5495841716595921, |
|
"learning_rate": 7.511570785346129e-06, |
|
"loss": 0.329, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.2451479178443565, |
|
"grad_norm": 0.5587199026990006, |
|
"learning_rate": 7.486074243722109e-06, |
|
"loss": 0.3252, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.2466553608441682, |
|
"grad_norm": 0.5211341468152613, |
|
"learning_rate": 7.460595135054916e-06, |
|
"loss": 0.3311, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.2481628038439796, |
|
"grad_norm": 0.5364245497529563, |
|
"learning_rate": 7.435133636030831e-06, |
|
"loss": 0.3208, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.2496702468437912, |
|
"grad_norm": 0.5314247345107659, |
|
"learning_rate": 7.4096899232140295e-06, |
|
"loss": 0.3317, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.251177689843603, |
|
"grad_norm": 0.5630710895853528, |
|
"learning_rate": 7.384264173045339e-06, |
|
"loss": 0.3351, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2526851328434143, |
|
"grad_norm": 0.5197283769421239, |
|
"learning_rate": 7.358856561841021e-06, |
|
"loss": 0.3065, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.254192575843226, |
|
"grad_norm": 0.5568788382198039, |
|
"learning_rate": 7.333467265791563e-06, |
|
"loss": 0.351, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.2557000188430374, |
|
"grad_norm": 0.5725220505007355, |
|
"learning_rate": 7.308096460960441e-06, |
|
"loss": 0.3439, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.257207461842849, |
|
"grad_norm": 0.5304098730159461, |
|
"learning_rate": 7.282744323282895e-06, |
|
"loss": 0.3188, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.2587149048426607, |
|
"grad_norm": 0.5263594760039901, |
|
"learning_rate": 7.2574110285647244e-06, |
|
"loss": 0.3209, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.2602223478424723, |
|
"grad_norm": 0.6039158733618325, |
|
"learning_rate": 7.232096752481061e-06, |
|
"loss": 0.3366, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.2617297908422838, |
|
"grad_norm": 0.5807414247418556, |
|
"learning_rate": 7.206801670575145e-06, |
|
"loss": 0.3446, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.2632372338420954, |
|
"grad_norm": 0.5398549865816707, |
|
"learning_rate": 7.181525958257116e-06, |
|
"loss": 0.2976, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.2647446768419068, |
|
"grad_norm": 0.5502842121004295, |
|
"learning_rate": 7.156269790802801e-06, |
|
"loss": 0.3308, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.2662521198417185, |
|
"grad_norm": 0.5520318040890088, |
|
"learning_rate": 7.131033343352483e-06, |
|
"loss": 0.3347, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.2677595628415301, |
|
"grad_norm": 0.5430821284421434, |
|
"learning_rate": 7.105816790909699e-06, |
|
"loss": 0.3199, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.2692670058413416, |
|
"grad_norm": 0.5268656785617308, |
|
"learning_rate": 7.080620308340024e-06, |
|
"loss": 0.3368, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.2707744488411532, |
|
"grad_norm": 0.5488558866283424, |
|
"learning_rate": 7.055444070369852e-06, |
|
"loss": 0.3185, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.2722818918409646, |
|
"grad_norm": 0.5234636191148432, |
|
"learning_rate": 7.0302882515852025e-06, |
|
"loss": 0.3156, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.2737893348407763, |
|
"grad_norm": 0.5571922620156962, |
|
"learning_rate": 7.005153026430476e-06, |
|
"loss": 0.3475, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.275296777840588, |
|
"grad_norm": 0.6043172841328527, |
|
"learning_rate": 6.980038569207291e-06, |
|
"loss": 0.3535, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.2768042208403996, |
|
"grad_norm": 0.5449462283830545, |
|
"learning_rate": 6.954945054073228e-06, |
|
"loss": 0.3485, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.278311663840211, |
|
"grad_norm": 0.553125976275942, |
|
"learning_rate": 6.929872655040655e-06, |
|
"loss": 0.3392, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.2798191068400226, |
|
"grad_norm": 0.5313032640250875, |
|
"learning_rate": 6.904821545975507e-06, |
|
"loss": 0.3533, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.281326549839834, |
|
"grad_norm": 0.5461530058972931, |
|
"learning_rate": 6.879791900596077e-06, |
|
"loss": 0.3082, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2828339928396457, |
|
"grad_norm": 0.5268975792503748, |
|
"learning_rate": 6.854783892471823e-06, |
|
"loss": 0.3507, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.2843414358394574, |
|
"grad_norm": 0.525335909935522, |
|
"learning_rate": 6.829797695022163e-06, |
|
"loss": 0.3137, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.285848878839269, |
|
"grad_norm": 0.5439698304073414, |
|
"learning_rate": 6.804833481515256e-06, |
|
"loss": 0.3269, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.2873563218390804, |
|
"grad_norm": 0.5426503592650488, |
|
"learning_rate": 6.7798914250668154e-06, |
|
"loss": 0.3255, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.288863764838892, |
|
"grad_norm": 0.546578985401071, |
|
"learning_rate": 6.7549716986389146e-06, |
|
"loss": 0.3357, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.2903712078387035, |
|
"grad_norm": 0.5433998763126892, |
|
"learning_rate": 6.730074475038766e-06, |
|
"loss": 0.3316, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.2918786508385152, |
|
"grad_norm": 0.5364588967630985, |
|
"learning_rate": 6.7051999269175405e-06, |
|
"loss": 0.3305, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.2933860938383268, |
|
"grad_norm": 0.5658934841388523, |
|
"learning_rate": 6.680348226769162e-06, |
|
"loss": 0.329, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.2948935368381382, |
|
"grad_norm": 0.5643062239325746, |
|
"learning_rate": 6.655519546929121e-06, |
|
"loss": 0.3297, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.2964009798379499, |
|
"grad_norm": 0.5371342456598566, |
|
"learning_rate": 6.630714059573267e-06, |
|
"loss": 0.3411, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2979084228377613, |
|
"grad_norm": 0.5429869820067992, |
|
"learning_rate": 6.6059319367166165e-06, |
|
"loss": 0.3162, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.299415865837573, |
|
"grad_norm": 0.6163498341710386, |
|
"learning_rate": 6.581173350212169e-06, |
|
"loss": 0.3346, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.3009233088373846, |
|
"grad_norm": 0.5249574401357171, |
|
"learning_rate": 6.55643847174971e-06, |
|
"loss": 0.3184, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.3024307518371963, |
|
"grad_norm": 0.5652427669527782, |
|
"learning_rate": 6.531727472854617e-06, |
|
"loss": 0.3277, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.3039381948370077, |
|
"grad_norm": 0.5499255875094143, |
|
"learning_rate": 6.507040524886672e-06, |
|
"loss": 0.3099, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.3054456378368193, |
|
"grad_norm": 0.5395982289283698, |
|
"learning_rate": 6.482377799038882e-06, |
|
"loss": 0.312, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.3069530808366308, |
|
"grad_norm": 0.5425266392409812, |
|
"learning_rate": 6.45773946633628e-06, |
|
"loss": 0.3288, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.3084605238364424, |
|
"grad_norm": 0.5289252666187554, |
|
"learning_rate": 6.4331256976347434e-06, |
|
"loss": 0.3143, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.309967966836254, |
|
"grad_norm": 0.5829209174715098, |
|
"learning_rate": 6.408536663619803e-06, |
|
"loss": 0.3215, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 0.5955771972087047, |
|
"learning_rate": 6.383972534805478e-06, |
|
"loss": 0.3232, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.3129828528358771, |
|
"grad_norm": 0.5434757595303122, |
|
"learning_rate": 6.359433481533074e-06, |
|
"loss": 0.318, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.3144902958356888, |
|
"grad_norm": 0.5313303734643687, |
|
"learning_rate": 6.3349196739700024e-06, |
|
"loss": 0.3159, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.3159977388355002, |
|
"grad_norm": 0.5678985167703594, |
|
"learning_rate": 6.310431282108622e-06, |
|
"loss": 0.3757, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.3175051818353118, |
|
"grad_norm": 0.5648846998829979, |
|
"learning_rate": 6.2859684757650365e-06, |
|
"loss": 0.3493, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.3190126248351235, |
|
"grad_norm": 0.5488508237006199, |
|
"learning_rate": 6.261531424577923e-06, |
|
"loss": 0.3219, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.320520067834935, |
|
"grad_norm": 0.5438041497270804, |
|
"learning_rate": 6.2371202980073596e-06, |
|
"loss": 0.3417, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.3220275108347466, |
|
"grad_norm": 0.5400748419899576, |
|
"learning_rate": 6.212735265333655e-06, |
|
"loss": 0.3025, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.323534953834558, |
|
"grad_norm": 0.5299843425249701, |
|
"learning_rate": 6.188376495656156e-06, |
|
"loss": 0.3374, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.3250423968343696, |
|
"grad_norm": 0.5236709426443396, |
|
"learning_rate": 6.164044157892102e-06, |
|
"loss": 0.3221, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.3265498398341813, |
|
"grad_norm": 0.5699051867060005, |
|
"learning_rate": 6.13973842077543e-06, |
|
"loss": 0.3226, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.328057282833993, |
|
"grad_norm": 0.5655665319377791, |
|
"learning_rate": 6.11545945285561e-06, |
|
"loss": 0.3175, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.3295647258338044, |
|
"grad_norm": 0.5387118438674878, |
|
"learning_rate": 6.091207422496489e-06, |
|
"loss": 0.3243, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.331072168833616, |
|
"grad_norm": 0.5744706591584219, |
|
"learning_rate": 6.066982497875109e-06, |
|
"loss": 0.3286, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.3325796118334274, |
|
"grad_norm": 0.542466249812019, |
|
"learning_rate": 6.042784846980542e-06, |
|
"loss": 0.3225, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.334087054833239, |
|
"grad_norm": 0.5515488785701044, |
|
"learning_rate": 6.018614637612733e-06, |
|
"loss": 0.3238, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.3355944978330507, |
|
"grad_norm": 0.5349896204088196, |
|
"learning_rate": 5.99447203738134e-06, |
|
"loss": 0.324, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.3371019408328624, |
|
"grad_norm": 0.5371367792089301, |
|
"learning_rate": 5.9703572137045495e-06, |
|
"loss": 0.3369, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.3386093838326738, |
|
"grad_norm": 0.5615358147993731, |
|
"learning_rate": 5.946270333807937e-06, |
|
"loss": 0.3052, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.3401168268324855, |
|
"grad_norm": 0.5743727933679714, |
|
"learning_rate": 5.922211564723302e-06, |
|
"loss": 0.3455, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.3416242698322969, |
|
"grad_norm": 0.5336291605723125, |
|
"learning_rate": 5.898181073287504e-06, |
|
"loss": 0.3226, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.3431317128321085, |
|
"grad_norm": 0.5722163135210774, |
|
"learning_rate": 5.87417902614131e-06, |
|
"loss": 0.3646, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.3446391558319202, |
|
"grad_norm": 0.5524401803992677, |
|
"learning_rate": 5.850205589728239e-06, |
|
"loss": 0.3016, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.3461465988317316, |
|
"grad_norm": 0.5311906031167264, |
|
"learning_rate": 5.826260930293417e-06, |
|
"loss": 0.3174, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.3476540418315432, |
|
"grad_norm": 0.5385817256109608, |
|
"learning_rate": 5.802345213882396e-06, |
|
"loss": 0.3447, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.349161484831355, |
|
"grad_norm": 0.5443226270708521, |
|
"learning_rate": 5.778458606340037e-06, |
|
"loss": 0.3056, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.3506689278311663, |
|
"grad_norm": 0.5247659222065347, |
|
"learning_rate": 5.754601273309333e-06, |
|
"loss": 0.3045, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.352176370830978, |
|
"grad_norm": 0.5555702487315548, |
|
"learning_rate": 5.730773380230276e-06, |
|
"loss": 0.3186, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.3536838138307896, |
|
"grad_norm": 0.5661524854903914, |
|
"learning_rate": 5.70697509233871e-06, |
|
"loss": 0.3248, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.355191256830601, |
|
"grad_norm": 0.5284857763614461, |
|
"learning_rate": 5.683206574665165e-06, |
|
"loss": 0.3018, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.3566986998304127, |
|
"grad_norm": 0.5740850180912764, |
|
"learning_rate": 5.6594679920337514e-06, |
|
"loss": 0.3529, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3582061428302241, |
|
"grad_norm": 0.5299086867590524, |
|
"learning_rate": 5.635759509060969e-06, |
|
"loss": 0.32, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.3597135858300358, |
|
"grad_norm": 0.5330329949433913, |
|
"learning_rate": 5.612081290154607e-06, |
|
"loss": 0.3156, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.3612210288298474, |
|
"grad_norm": 0.5347611903592508, |
|
"learning_rate": 5.58843349951258e-06, |
|
"loss": 0.3183, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.362728471829659, |
|
"grad_norm": 0.5409021340662399, |
|
"learning_rate": 5.564816301121792e-06, |
|
"loss": 0.3411, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.3642359148294705, |
|
"grad_norm": 0.5560565848550149, |
|
"learning_rate": 5.541229858757011e-06, |
|
"loss": 0.3508, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.3657433578292821, |
|
"grad_norm": 0.5040665273430834, |
|
"learning_rate": 5.517674335979721e-06, |
|
"loss": 0.3038, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.3672508008290936, |
|
"grad_norm": 0.5520505173652595, |
|
"learning_rate": 5.494149896136998e-06, |
|
"loss": 0.3342, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.3687582438289052, |
|
"grad_norm": 0.5286100688050495, |
|
"learning_rate": 5.470656702360367e-06, |
|
"loss": 0.3051, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.3702656868287169, |
|
"grad_norm": 0.5540464877346475, |
|
"learning_rate": 5.447194917564671e-06, |
|
"loss": 0.3327, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.3717731298285283, |
|
"grad_norm": 0.5401690086723988, |
|
"learning_rate": 5.423764704446954e-06, |
|
"loss": 0.332, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.37328057282834, |
|
"grad_norm": 0.5440262612621518, |
|
"learning_rate": 5.400366225485326e-06, |
|
"loss": 0.3326, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.3747880158281516, |
|
"grad_norm": 0.5291318028597245, |
|
"learning_rate": 5.376999642937817e-06, |
|
"loss": 0.3262, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.376295458827963, |
|
"grad_norm": 0.5361093139503608, |
|
"learning_rate": 5.353665118841296e-06, |
|
"loss": 0.3258, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.3778029018277747, |
|
"grad_norm": 0.5442991814951846, |
|
"learning_rate": 5.330362815010306e-06, |
|
"loss": 0.3162, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 0.5384147655921361, |
|
"learning_rate": 5.307092893035951e-06, |
|
"loss": 0.3381, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.3808177878273977, |
|
"grad_norm": 0.539100490777508, |
|
"learning_rate": 5.2838555142847925e-06, |
|
"loss": 0.3423, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.3823252308272094, |
|
"grad_norm": 0.5427293981456651, |
|
"learning_rate": 5.260650839897719e-06, |
|
"loss": 0.3217, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.3838326738270208, |
|
"grad_norm": 0.5726046303707281, |
|
"learning_rate": 5.237479030788817e-06, |
|
"loss": 0.3507, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.3853401168268324, |
|
"grad_norm": 0.5527176893463295, |
|
"learning_rate": 5.214340247644278e-06, |
|
"loss": 0.331, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.386847559826644, |
|
"grad_norm": 0.5481340240469819, |
|
"learning_rate": 5.191234650921273e-06, |
|
"loss": 0.318, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3883550028264557, |
|
"grad_norm": 0.5543962320620248, |
|
"learning_rate": 5.168162400846835e-06, |
|
"loss": 0.3155, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.3898624458262672, |
|
"grad_norm": 0.5367203136891187, |
|
"learning_rate": 5.145123657416759e-06, |
|
"loss": 0.3326, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.3913698888260788, |
|
"grad_norm": 0.5460167764137122, |
|
"learning_rate": 5.122118580394473e-06, |
|
"loss": 0.337, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.3928773318258902, |
|
"grad_norm": 0.5195547700814616, |
|
"learning_rate": 5.099147329309959e-06, |
|
"loss": 0.326, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.394384774825702, |
|
"grad_norm": 0.5456994955845843, |
|
"learning_rate": 5.076210063458622e-06, |
|
"loss": 0.3322, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.3958922178255135, |
|
"grad_norm": 0.5843461937914468, |
|
"learning_rate": 5.0533069419002e-06, |
|
"loss": 0.339, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.397399660825325, |
|
"grad_norm": 0.5150644731537505, |
|
"learning_rate": 5.030438123457655e-06, |
|
"loss": 0.2913, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.3989071038251366, |
|
"grad_norm": 0.5258159746479392, |
|
"learning_rate": 5.007603766716063e-06, |
|
"loss": 0.3107, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.4004145468249483, |
|
"grad_norm": 0.5748165660930256, |
|
"learning_rate": 4.984804030021533e-06, |
|
"loss": 0.3328, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.4019219898247597, |
|
"grad_norm": 0.5630833950584739, |
|
"learning_rate": 4.962039071480102e-06, |
|
"loss": 0.3233, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.4034294328245713, |
|
"grad_norm": 0.537736675931464, |
|
"learning_rate": 4.939309048956622e-06, |
|
"loss": 0.3451, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.404936875824383, |
|
"grad_norm": 0.5230851918523695, |
|
"learning_rate": 4.9166141200736885e-06, |
|
"loss": 0.3389, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.4064443188241944, |
|
"grad_norm": 0.5434274109432955, |
|
"learning_rate": 4.89395444221055e-06, |
|
"loss": 0.3189, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.407951761824006, |
|
"grad_norm": 0.5467058284642171, |
|
"learning_rate": 4.871330172501979e-06, |
|
"loss": 0.3218, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.4094592048238175, |
|
"grad_norm": 0.5643806859737526, |
|
"learning_rate": 4.848741467837228e-06, |
|
"loss": 0.339, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.4109666478236291, |
|
"grad_norm": 0.5210588389675654, |
|
"learning_rate": 4.826188484858918e-06, |
|
"loss": 0.2865, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.4124740908234408, |
|
"grad_norm": 0.5575173474168307, |
|
"learning_rate": 4.803671379961945e-06, |
|
"loss": 0.3448, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.4139815338232524, |
|
"grad_norm": 0.5553202634668, |
|
"learning_rate": 4.781190309292421e-06, |
|
"loss": 0.318, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.4154889768230638, |
|
"grad_norm": 0.5277473116633332, |
|
"learning_rate": 4.758745428746569e-06, |
|
"loss": 0.3047, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.4169964198228755, |
|
"grad_norm": 0.5308118737562897, |
|
"learning_rate": 4.736336893969652e-06, |
|
"loss": 0.3126, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.418503862822687, |
|
"grad_norm": 0.562016412444855, |
|
"learning_rate": 4.7139648603548925e-06, |
|
"loss": 0.3306, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.4200113058224986, |
|
"grad_norm": 0.5112809802949265, |
|
"learning_rate": 4.691629483042387e-06, |
|
"loss": 0.2877, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.4215187488223102, |
|
"grad_norm": 0.555811318693021, |
|
"learning_rate": 4.669330916918043e-06, |
|
"loss": 0.3346, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.4230261918221216, |
|
"grad_norm": 0.5388925234150407, |
|
"learning_rate": 4.647069316612502e-06, |
|
"loss": 0.3137, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.4245336348219333, |
|
"grad_norm": 0.5265475799399302, |
|
"learning_rate": 4.624844836500052e-06, |
|
"loss": 0.3162, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.426041077821745, |
|
"grad_norm": 0.5112485522585755, |
|
"learning_rate": 4.60265763069758e-06, |
|
"loss": 0.2914, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.4275485208215564, |
|
"grad_norm": 0.5285723749640436, |
|
"learning_rate": 4.580507853063487e-06, |
|
"loss": 0.3098, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.429055963821368, |
|
"grad_norm": 0.5408160656578395, |
|
"learning_rate": 4.5583956571966295e-06, |
|
"loss": 0.3365, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.4305634068211797, |
|
"grad_norm": 0.5598936258222863, |
|
"learning_rate": 4.5363211964352524e-06, |
|
"loss": 0.3292, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.432070849820991, |
|
"grad_norm": 0.5180426805197446, |
|
"learning_rate": 4.514284623855915e-06, |
|
"loss": 0.3174, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.4335782928208027, |
|
"grad_norm": 0.5639401953538313, |
|
"learning_rate": 4.4922860922724466e-06, |
|
"loss": 0.3617, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.4350857358206142, |
|
"grad_norm": 0.5482846937319309, |
|
"learning_rate": 4.470325754234881e-06, |
|
"loss": 0.3256, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.4365931788204258, |
|
"grad_norm": 0.530946653125974, |
|
"learning_rate": 4.448403762028391e-06, |
|
"loss": 0.3367, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.4381006218202375, |
|
"grad_norm": 0.5630491613208096, |
|
"learning_rate": 4.426520267672244e-06, |
|
"loss": 0.33, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.439608064820049, |
|
"grad_norm": 0.5281029541497921, |
|
"learning_rate": 4.40467542291874e-06, |
|
"loss": 0.3266, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.4411155078198605, |
|
"grad_norm": 0.5134408808419982, |
|
"learning_rate": 4.382869379252152e-06, |
|
"loss": 0.3002, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.4426229508196722, |
|
"grad_norm": 0.5379209815628555, |
|
"learning_rate": 4.361102287887698e-06, |
|
"loss": 0.3197, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.4441303938194836, |
|
"grad_norm": 0.5118973398445678, |
|
"learning_rate": 4.339374299770477e-06, |
|
"loss": 0.316, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.4456378368192953, |
|
"grad_norm": 0.5340927744773125, |
|
"learning_rate": 4.31768556557441e-06, |
|
"loss": 0.2995, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.447145279819107, |
|
"grad_norm": 0.5574015013189253, |
|
"learning_rate": 4.296036235701235e-06, |
|
"loss": 0.3214, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4486527228189185, |
|
"grad_norm": 0.544283711827625, |
|
"learning_rate": 4.274426460279412e-06, |
|
"loss": 0.309, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.45016016581873, |
|
"grad_norm": 0.5456093505990249, |
|
"learning_rate": 4.252856389163128e-06, |
|
"loss": 0.3056, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.4516676088185416, |
|
"grad_norm": 0.5524390480774677, |
|
"learning_rate": 4.231326171931231e-06, |
|
"loss": 0.2988, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.453175051818353, |
|
"grad_norm": 0.5357243619653109, |
|
"learning_rate": 4.209835957886196e-06, |
|
"loss": 0.3051, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.4546824948181647, |
|
"grad_norm": 0.5567188365205857, |
|
"learning_rate": 4.188385896053098e-06, |
|
"loss": 0.3211, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.4561899378179763, |
|
"grad_norm": 0.5571208137663407, |
|
"learning_rate": 4.166976135178575e-06, |
|
"loss": 0.3212, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.4576973808177878, |
|
"grad_norm": 0.5288681574805124, |
|
"learning_rate": 4.1456068237297964e-06, |
|
"loss": 0.3247, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.4592048238175994, |
|
"grad_norm": 0.5362640814930834, |
|
"learning_rate": 4.124278109893432e-06, |
|
"loss": 0.3206, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.4607122668174108, |
|
"grad_norm": 0.5300069434968542, |
|
"learning_rate": 4.10299014157462e-06, |
|
"loss": 0.299, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.4622197098172225, |
|
"grad_norm": 0.5207197137299924, |
|
"learning_rate": 4.0817430663959536e-06, |
|
"loss": 0.2872, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.4637271528170341, |
|
"grad_norm": 0.5361880777046366, |
|
"learning_rate": 4.06053703169645e-06, |
|
"loss": 0.3432, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.4652345958168458, |
|
"grad_norm": 0.5390277129867954, |
|
"learning_rate": 4.039372184530521e-06, |
|
"loss": 0.3121, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.4667420388166572, |
|
"grad_norm": 0.5098624467494199, |
|
"learning_rate": 4.0182486716669656e-06, |
|
"loss": 0.3057, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.4682494818164689, |
|
"grad_norm": 0.5530264319623549, |
|
"learning_rate": 3.9971666395879605e-06, |
|
"loss": 0.316, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.4697569248162803, |
|
"grad_norm": 0.5648165554049958, |
|
"learning_rate": 3.9761262344880096e-06, |
|
"loss": 0.3456, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.471264367816092, |
|
"grad_norm": 0.5376597362402104, |
|
"learning_rate": 3.9551276022729644e-06, |
|
"loss": 0.3075, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.4727718108159036, |
|
"grad_norm": 0.5207214018679573, |
|
"learning_rate": 3.9341708885590034e-06, |
|
"loss": 0.3043, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.4742792538157152, |
|
"grad_norm": 0.5346717454580582, |
|
"learning_rate": 3.913256238671607e-06, |
|
"loss": 0.3187, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.4757866968155267, |
|
"grad_norm": 0.5474625757974195, |
|
"learning_rate": 3.89238379764457e-06, |
|
"loss": 0.3341, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.4772941398153383, |
|
"grad_norm": 0.553265646517597, |
|
"learning_rate": 3.871553710218988e-06, |
|
"loss": 0.3615, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4788015828151497, |
|
"grad_norm": 0.5178190237398634, |
|
"learning_rate": 3.850766120842252e-06, |
|
"loss": 0.3087, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.4803090258149614, |
|
"grad_norm": 0.5676605070232937, |
|
"learning_rate": 3.830021173667048e-06, |
|
"loss": 0.3331, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.481816468814773, |
|
"grad_norm": 0.5366490741054173, |
|
"learning_rate": 3.809319012550352e-06, |
|
"loss": 0.3134, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.4833239118145845, |
|
"grad_norm": 0.5237338303143243, |
|
"learning_rate": 3.788659781052444e-06, |
|
"loss": 0.3426, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.484831354814396, |
|
"grad_norm": 0.5118568891202759, |
|
"learning_rate": 3.7680436224359084e-06, |
|
"loss": 0.3049, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.4863387978142075, |
|
"grad_norm": 0.5474188971913913, |
|
"learning_rate": 3.747470679664624e-06, |
|
"loss": 0.3177, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.4878462408140192, |
|
"grad_norm": 0.5623337896836218, |
|
"learning_rate": 3.7269410954028107e-06, |
|
"loss": 0.3268, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.4893536838138308, |
|
"grad_norm": 0.5320249898828978, |
|
"learning_rate": 3.706455012013994e-06, |
|
"loss": 0.3135, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.4908611268136425, |
|
"grad_norm": 0.5258630499196119, |
|
"learning_rate": 3.6860125715600513e-06, |
|
"loss": 0.2922, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.492368569813454, |
|
"grad_norm": 0.5488691155290143, |
|
"learning_rate": 3.665613915800217e-06, |
|
"loss": 0.3093, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.4938760128132655, |
|
"grad_norm": 0.534561267695021, |
|
"learning_rate": 3.6452591861900886e-06, |
|
"loss": 0.3201, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.495383455813077, |
|
"grad_norm": 0.5493934402188156, |
|
"learning_rate": 3.6249485238806637e-06, |
|
"loss": 0.3258, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.4968908988128886, |
|
"grad_norm": 0.5213745241077384, |
|
"learning_rate": 3.6046820697173514e-06, |
|
"loss": 0.3206, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.4983983418127003, |
|
"grad_norm": 0.5189896345789112, |
|
"learning_rate": 3.5844599642389965e-06, |
|
"loss": 0.3093, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.499905784812512, |
|
"grad_norm": 0.535438446636319, |
|
"learning_rate": 3.564282347676903e-06, |
|
"loss": 0.3449, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.5014132278123233, |
|
"grad_norm": 0.5151601964534807, |
|
"learning_rate": 3.54414935995387e-06, |
|
"loss": 0.3002, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.5029206708121348, |
|
"grad_norm": 0.5669837505751246, |
|
"learning_rate": 3.524061140683206e-06, |
|
"loss": 0.3367, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.5044281138119464, |
|
"grad_norm": 0.5376128901605735, |
|
"learning_rate": 3.5040178291677816e-06, |
|
"loss": 0.3362, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.505935556811758, |
|
"grad_norm": 0.5816182611830706, |
|
"learning_rate": 3.4840195643990383e-06, |
|
"loss": 0.316, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.5074429998115697, |
|
"grad_norm": 0.5333548248485912, |
|
"learning_rate": 3.464066485056048e-06, |
|
"loss": 0.3223, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5089504428113814, |
|
"grad_norm": 0.5574217586347463, |
|
"learning_rate": 3.444158729504549e-06, |
|
"loss": 0.2994, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.5104578858111928, |
|
"grad_norm": 0.5272699065186945, |
|
"learning_rate": 3.4242964357959597e-06, |
|
"loss": 0.3152, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.5119653288110042, |
|
"grad_norm": 0.5352889213452704, |
|
"learning_rate": 3.4044797416664564e-06, |
|
"loss": 0.3103, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.5134727718108159, |
|
"grad_norm": 0.5602956634920077, |
|
"learning_rate": 3.3847087845359996e-06, |
|
"loss": 0.334, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.5149802148106275, |
|
"grad_norm": 0.5402201680847467, |
|
"learning_rate": 3.364983701507376e-06, |
|
"loss": 0.3291, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.5164876578104391, |
|
"grad_norm": 0.5526297524617138, |
|
"learning_rate": 3.3453046293652657e-06, |
|
"loss": 0.3232, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.5179951008102506, |
|
"grad_norm": 0.5401106392320315, |
|
"learning_rate": 3.3256717045752794e-06, |
|
"loss": 0.3219, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.5195025438100622, |
|
"grad_norm": 0.5589978845369276, |
|
"learning_rate": 3.3060850632830167e-06, |
|
"loss": 0.3215, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.5210099868098736, |
|
"grad_norm": 0.5628427903081042, |
|
"learning_rate": 3.286544841313126e-06, |
|
"loss": 0.3042, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.5225174298096853, |
|
"grad_norm": 0.5291974603976658, |
|
"learning_rate": 3.2670511741683475e-06, |
|
"loss": 0.3039, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.524024872809497, |
|
"grad_norm": 0.5307771510625195, |
|
"learning_rate": 3.2476041970285945e-06, |
|
"loss": 0.3225, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.5255323158093086, |
|
"grad_norm": 0.5798408138665074, |
|
"learning_rate": 3.2282040447500063e-06, |
|
"loss": 0.3574, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.52703975880912, |
|
"grad_norm": 0.5262954379509106, |
|
"learning_rate": 3.208850851863998e-06, |
|
"loss": 0.3074, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.5285472018089314, |
|
"grad_norm": 0.5411329822808086, |
|
"learning_rate": 3.189544752576369e-06, |
|
"loss": 0.3291, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.530054644808743, |
|
"grad_norm": 0.512225504454937, |
|
"learning_rate": 3.1702858807663175e-06, |
|
"loss": 0.2967, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.5315620878085547, |
|
"grad_norm": 0.5358326980021074, |
|
"learning_rate": 3.151074369985556e-06, |
|
"loss": 0.3281, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.5330695308083664, |
|
"grad_norm": 0.5412115741377782, |
|
"learning_rate": 3.131910353457369e-06, |
|
"loss": 0.3321, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.534576973808178, |
|
"grad_norm": 0.5355770866583788, |
|
"learning_rate": 3.112793964075681e-06, |
|
"loss": 0.2999, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.5360844168079895, |
|
"grad_norm": 0.53044658803981, |
|
"learning_rate": 3.0937253344041507e-06, |
|
"loss": 0.3271, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.5375918598078009, |
|
"grad_norm": 0.5274519029189704, |
|
"learning_rate": 3.074704596675242e-06, |
|
"loss": 0.3174, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.5390993028076125, |
|
"grad_norm": 0.5410440028748313, |
|
"learning_rate": 3.055731882789311e-06, |
|
"loss": 0.3268, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.5406067458074242, |
|
"grad_norm": 0.5400373191606019, |
|
"learning_rate": 3.0368073243136874e-06, |
|
"loss": 0.325, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.5421141888072358, |
|
"grad_norm": 0.5520146881220487, |
|
"learning_rate": 3.0179310524817707e-06, |
|
"loss": 0.32, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.5436216318070473, |
|
"grad_norm": 0.5351759755594372, |
|
"learning_rate": 2.9991031981921026e-06, |
|
"loss": 0.3269, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.545129074806859, |
|
"grad_norm": 0.5327689357374978, |
|
"learning_rate": 2.9803238920074784e-06, |
|
"loss": 0.3082, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.5466365178066703, |
|
"grad_norm": 0.5326589519994432, |
|
"learning_rate": 2.961593264154038e-06, |
|
"loss": 0.3157, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.548143960806482, |
|
"grad_norm": 0.5249678984746798, |
|
"learning_rate": 2.9429114445203423e-06, |
|
"loss": 0.3119, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.5496514038062936, |
|
"grad_norm": 0.5606341770532942, |
|
"learning_rate": 2.924278562656514e-06, |
|
"loss": 0.3351, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.5511588468061053, |
|
"grad_norm": 0.5156779633424778, |
|
"learning_rate": 2.90569474777329e-06, |
|
"loss": 0.3256, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.5526662898059167, |
|
"grad_norm": 0.5374626464876353, |
|
"learning_rate": 2.8871601287411634e-06, |
|
"loss": 0.3303, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5541737328057281, |
|
"grad_norm": 0.5262890780017794, |
|
"learning_rate": 2.8686748340894744e-06, |
|
"loss": 0.3114, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.5556811758055398, |
|
"grad_norm": 0.5260995538471516, |
|
"learning_rate": 2.850238992005514e-06, |
|
"loss": 0.2979, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.5571886188053514, |
|
"grad_norm": 0.5573768187241204, |
|
"learning_rate": 2.8318527303336465e-06, |
|
"loss": 0.3475, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.558696061805163, |
|
"grad_norm": 0.5542071850347167, |
|
"learning_rate": 2.81351617657442e-06, |
|
"loss": 0.3359, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.5602035048049747, |
|
"grad_norm": 0.5376949139111594, |
|
"learning_rate": 2.795229457883678e-06, |
|
"loss": 0.3299, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.5617109478047861, |
|
"grad_norm": 0.5213690515169962, |
|
"learning_rate": 2.7769927010716814e-06, |
|
"loss": 0.3187, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.5632183908045976, |
|
"grad_norm": 0.5282868034684867, |
|
"learning_rate": 2.7588060326022205e-06, |
|
"loss": 0.3124, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.5647258338044092, |
|
"grad_norm": 0.5629003594886647, |
|
"learning_rate": 2.740669578591755e-06, |
|
"loss": 0.3453, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.5662332768042209, |
|
"grad_norm": 0.5452741440289394, |
|
"learning_rate": 2.7225834648085282e-06, |
|
"loss": 0.3148, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.5677407198040325, |
|
"grad_norm": 0.5576848374307647, |
|
"learning_rate": 2.7045478166716843e-06, |
|
"loss": 0.3362, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.569248162803844, |
|
"grad_norm": 0.5230478868120295, |
|
"learning_rate": 2.6865627592504295e-06, |
|
"loss": 0.3074, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.5707556058036556, |
|
"grad_norm": 0.5476529275243367, |
|
"learning_rate": 2.668628417263137e-06, |
|
"loss": 0.314, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.572263048803467, |
|
"grad_norm": 0.5340674210452238, |
|
"learning_rate": 2.6507449150764852e-06, |
|
"loss": 0.3035, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.5737704918032787, |
|
"grad_norm": 0.5245422709481129, |
|
"learning_rate": 2.632912376704607e-06, |
|
"loss": 0.3344, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.5752779348030903, |
|
"grad_norm": 0.52661514981572, |
|
"learning_rate": 2.615130925808228e-06, |
|
"loss": 0.3054, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.576785377802902, |
|
"grad_norm": 0.5147590149467712, |
|
"learning_rate": 2.597400685693795e-06, |
|
"loss": 0.2879, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.5782928208027134, |
|
"grad_norm": 0.5476707768783776, |
|
"learning_rate": 2.5797217793126373e-06, |
|
"loss": 0.3395, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.5798002638025248, |
|
"grad_norm": 0.5338841668417198, |
|
"learning_rate": 2.5620943292601074e-06, |
|
"loss": 0.3211, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.5813077068023365, |
|
"grad_norm": 0.526860653464564, |
|
"learning_rate": 2.5445184577747305e-06, |
|
"loss": 0.3251, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.582815149802148, |
|
"grad_norm": 0.5250152267933532, |
|
"learning_rate": 2.52699428673736e-06, |
|
"loss": 0.3126, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5843225928019598, |
|
"grad_norm": 0.5470636297967526, |
|
"learning_rate": 2.5095219376703183e-06, |
|
"loss": 0.3063, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.5858300358017714, |
|
"grad_norm": 0.5581707395933467, |
|
"learning_rate": 2.4921015317365794e-06, |
|
"loss": 0.3624, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.5873374788015828, |
|
"grad_norm": 0.5280009933911688, |
|
"learning_rate": 2.4747331897389103e-06, |
|
"loss": 0.3106, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.5888449218013942, |
|
"grad_norm": 0.5234869653748981, |
|
"learning_rate": 2.4574170321190305e-06, |
|
"loss": 0.2956, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.590352364801206, |
|
"grad_norm": 0.546217705596414, |
|
"learning_rate": 2.440153178956798e-06, |
|
"loss": 0.3215, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.5918598078010175, |
|
"grad_norm": 0.5556302525952723, |
|
"learning_rate": 2.42294174996935e-06, |
|
"loss": 0.3204, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.5933672508008292, |
|
"grad_norm": 0.5588880844097838, |
|
"learning_rate": 2.40578286451029e-06, |
|
"loss": 0.3282, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.5948746938006406, |
|
"grad_norm": 0.5241614280996468, |
|
"learning_rate": 2.38867664156886e-06, |
|
"loss": 0.3255, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.5963821368004523, |
|
"grad_norm": 0.5543274849783603, |
|
"learning_rate": 2.3716231997691007e-06, |
|
"loss": 0.3175, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.5978895798002637, |
|
"grad_norm": 0.5306578564545272, |
|
"learning_rate": 2.3546226573690444e-06, |
|
"loss": 0.3211, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5993970228000753, |
|
"grad_norm": 0.5401209566379707, |
|
"learning_rate": 2.3376751322599e-06, |
|
"loss": 0.3117, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.600904465799887, |
|
"grad_norm": 0.5339229576030943, |
|
"learning_rate": 2.320780741965206e-06, |
|
"loss": 0.3064, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.6024119087996986, |
|
"grad_norm": 0.5291570037477905, |
|
"learning_rate": 2.3039396036400463e-06, |
|
"loss": 0.3001, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.60391935179951, |
|
"grad_norm": 0.5544131085966325, |
|
"learning_rate": 2.287151834070226e-06, |
|
"loss": 0.3173, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.6054267947993215, |
|
"grad_norm": 0.5042273491393638, |
|
"learning_rate": 2.2704175496714552e-06, |
|
"loss": 0.3035, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.6069342377991331, |
|
"grad_norm": 0.5164264296676705, |
|
"learning_rate": 2.2537368664885527e-06, |
|
"loss": 0.306, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.6084416807989448, |
|
"grad_norm": 0.540939444102417, |
|
"learning_rate": 2.2371099001946385e-06, |
|
"loss": 0.3417, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.6099491237987564, |
|
"grad_norm": 0.5349172500611197, |
|
"learning_rate": 2.2205367660903267e-06, |
|
"loss": 0.3155, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.611456566798568, |
|
"grad_norm": 0.5392150017492342, |
|
"learning_rate": 2.2040175791029305e-06, |
|
"loss": 0.334, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.6129640097983795, |
|
"grad_norm": 0.5420224175155496, |
|
"learning_rate": 2.187552453785662e-06, |
|
"loss": 0.2981, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.614471452798191, |
|
"grad_norm": 0.5385758816342323, |
|
"learning_rate": 2.1711415043168395e-06, |
|
"loss": 0.3313, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.6159788957980026, |
|
"grad_norm": 0.5437131207841849, |
|
"learning_rate": 2.1547848444991025e-06, |
|
"loss": 0.3352, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.6174863387978142, |
|
"grad_norm": 0.5395621366503963, |
|
"learning_rate": 2.138482587758605e-06, |
|
"loss": 0.308, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.6189937817976259, |
|
"grad_norm": 0.5255773037738669, |
|
"learning_rate": 2.1222348471442477e-06, |
|
"loss": 0.3014, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.6205012247974373, |
|
"grad_norm": 0.5226567653631905, |
|
"learning_rate": 2.1060417353268845e-06, |
|
"loss": 0.3143, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.622008667797249, |
|
"grad_norm": 0.5449482690998529, |
|
"learning_rate": 2.0899033645985423e-06, |
|
"loss": 0.3091, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.6235161107970604, |
|
"grad_norm": 0.5424238529202222, |
|
"learning_rate": 2.073819846871646e-06, |
|
"loss": 0.3185, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.625023553796872, |
|
"grad_norm": 0.5311225228497766, |
|
"learning_rate": 2.0577912936782317e-06, |
|
"loss": 0.2983, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.6265309967966837, |
|
"grad_norm": 0.5208053420833654, |
|
"learning_rate": 2.041817816169187e-06, |
|
"loss": 0.3295, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.6280384397964953, |
|
"grad_norm": 0.5269145283569221, |
|
"learning_rate": 2.025899525113474e-06, |
|
"loss": 0.3026, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.6295458827963067, |
|
"grad_norm": 0.5371643301644231, |
|
"learning_rate": 2.010036530897359e-06, |
|
"loss": 0.3196, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.6310533257961182, |
|
"grad_norm": 0.5349956576564423, |
|
"learning_rate": 1.9942289435236506e-06, |
|
"loss": 0.3304, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.6325607687959298, |
|
"grad_norm": 0.5604150538269126, |
|
"learning_rate": 1.978476872610939e-06, |
|
"loss": 0.3485, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.6340682117957415, |
|
"grad_norm": 0.5245207862200475, |
|
"learning_rate": 1.962780427392823e-06, |
|
"loss": 0.2882, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.6355756547955531, |
|
"grad_norm": 0.5195585896558206, |
|
"learning_rate": 1.9471397167171714e-06, |
|
"loss": 0.3051, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.6370830977953648, |
|
"grad_norm": 0.5147063039454572, |
|
"learning_rate": 1.931554849045355e-06, |
|
"loss": 0.3078, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.6385905407951762, |
|
"grad_norm": 0.5303520825987951, |
|
"learning_rate": 1.916025932451493e-06, |
|
"loss": 0.3141, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.6400979837949876, |
|
"grad_norm": 0.5154838630662848, |
|
"learning_rate": 1.9005530746217238e-06, |
|
"loss": 0.2971, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.6416054267947993, |
|
"grad_norm": 0.5537432078636199, |
|
"learning_rate": 1.8851363828534253e-06, |
|
"loss": 0.3124, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.643112869794611, |
|
"grad_norm": 0.5634336334894083, |
|
"learning_rate": 1.869775964054501e-06, |
|
"loss": 0.3271, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.6446203127944226, |
|
"grad_norm": 0.5433031560068617, |
|
"learning_rate": 1.8544719247426224e-06, |
|
"loss": 0.3191, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.646127755794234, |
|
"grad_norm": 0.5357448136347239, |
|
"learning_rate": 1.8392243710444911e-06, |
|
"loss": 0.2982, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.6476351987940456, |
|
"grad_norm": 0.5552897165798768, |
|
"learning_rate": 1.8240334086951117e-06, |
|
"loss": 0.3537, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.649142641793857, |
|
"grad_norm": 0.5318934621576651, |
|
"learning_rate": 1.8088991430370506e-06, |
|
"loss": 0.3005, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.6506500847936687, |
|
"grad_norm": 0.5465559179605479, |
|
"learning_rate": 1.7938216790197071e-06, |
|
"loss": 0.3207, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.6521575277934804, |
|
"grad_norm": 0.5641671337079456, |
|
"learning_rate": 1.77880112119859e-06, |
|
"loss": 0.3095, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.653664970793292, |
|
"grad_norm": 0.5270236586496325, |
|
"learning_rate": 1.7638375737345804e-06, |
|
"loss": 0.312, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.6551724137931034, |
|
"grad_norm": 0.5192997218773957, |
|
"learning_rate": 1.7489311403932274e-06, |
|
"loss": 0.2937, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.656679856792915, |
|
"grad_norm": 0.5620286825583494, |
|
"learning_rate": 1.7340819245440166e-06, |
|
"loss": 0.3186, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.6581872997927265, |
|
"grad_norm": 0.5445642066374056, |
|
"learning_rate": 1.7192900291596493e-06, |
|
"loss": 0.3222, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6596947427925381, |
|
"grad_norm": 0.5157869374514513, |
|
"learning_rate": 1.7045555568153415e-06, |
|
"loss": 0.306, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.6612021857923498, |
|
"grad_norm": 0.5138381277919514, |
|
"learning_rate": 1.6898786096881104e-06, |
|
"loss": 0.2715, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.6627096287921614, |
|
"grad_norm": 0.5431676779116132, |
|
"learning_rate": 1.6752592895560493e-06, |
|
"loss": 0.3202, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.6642170717919729, |
|
"grad_norm": 0.5396928068758252, |
|
"learning_rate": 1.6606976977976408e-06, |
|
"loss": 0.3122, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.6657245147917843, |
|
"grad_norm": 0.5332820215409003, |
|
"learning_rate": 1.6461939353910494e-06, |
|
"loss": 0.3083, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.667231957791596, |
|
"grad_norm": 0.5475766503326961, |
|
"learning_rate": 1.631748102913412e-06, |
|
"loss": 0.3166, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.6687394007914076, |
|
"grad_norm": 0.5416380330717204, |
|
"learning_rate": 1.6173603005401505e-06, |
|
"loss": 0.3158, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.6702468437912192, |
|
"grad_norm": 0.5416213794269614, |
|
"learning_rate": 1.6030306280442764e-06, |
|
"loss": 0.3077, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.6717542867910307, |
|
"grad_norm": 0.5153535761957841, |
|
"learning_rate": 1.588759184795694e-06, |
|
"loss": 0.3064, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.6732617297908423, |
|
"grad_norm": 0.5315610226872074, |
|
"learning_rate": 1.574546069760514e-06, |
|
"loss": 0.3241, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.6747691727906537, |
|
"grad_norm": 0.5684878986820251, |
|
"learning_rate": 1.5603913815003634e-06, |
|
"loss": 0.3403, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.6762766157904654, |
|
"grad_norm": 0.5361918937068931, |
|
"learning_rate": 1.5462952181717117e-06, |
|
"loss": 0.3157, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.677784058790277, |
|
"grad_norm": 0.5495570916809654, |
|
"learning_rate": 1.532257677525183e-06, |
|
"loss": 0.3224, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.6792915017900887, |
|
"grad_norm": 0.5281943000093583, |
|
"learning_rate": 1.5182788569048689e-06, |
|
"loss": 0.3209, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.6807989447899, |
|
"grad_norm": 0.5572787989841019, |
|
"learning_rate": 1.5043588532476827e-06, |
|
"loss": 0.3663, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.6823063877897118, |
|
"grad_norm": 0.5228968415248135, |
|
"learning_rate": 1.49049776308265e-06, |
|
"loss": 0.2889, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.6838138307895232, |
|
"grad_norm": 0.5220477714238411, |
|
"learning_rate": 1.476695682530268e-06, |
|
"loss": 0.3031, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.6853212737893348, |
|
"grad_norm": 0.5351071420566498, |
|
"learning_rate": 1.4629527073018267e-06, |
|
"loss": 0.3308, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.6868287167891465, |
|
"grad_norm": 0.5396208371722178, |
|
"learning_rate": 1.449268932698743e-06, |
|
"loss": 0.2971, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.6883361597889581, |
|
"grad_norm": 0.5416569763589452, |
|
"learning_rate": 1.4356444536119085e-06, |
|
"loss": 0.3024, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.6898436027887695, |
|
"grad_norm": 0.5370439069377987, |
|
"learning_rate": 1.422079364521024e-06, |
|
"loss": 0.3169, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.691351045788581, |
|
"grad_norm": 0.5450238281058462, |
|
"learning_rate": 1.4085737594939497e-06, |
|
"loss": 0.333, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.6928584887883926, |
|
"grad_norm": 0.5168406644621856, |
|
"learning_rate": 1.3951277321860468e-06, |
|
"loss": 0.3006, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.6943659317882043, |
|
"grad_norm": 0.5087028192552481, |
|
"learning_rate": 1.381741375839537e-06, |
|
"loss": 0.2664, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.695873374788016, |
|
"grad_norm": 0.5165999383002566, |
|
"learning_rate": 1.3684147832828409e-06, |
|
"loss": 0.281, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.6973808177878273, |
|
"grad_norm": 0.5150348541332692, |
|
"learning_rate": 1.355148046929956e-06, |
|
"loss": 0.307, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.698888260787639, |
|
"grad_norm": 0.5168141041103775, |
|
"learning_rate": 1.3419412587797908e-06, |
|
"loss": 0.293, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.7003957037874504, |
|
"grad_norm": 0.5133973098786774, |
|
"learning_rate": 1.3287945104155487e-06, |
|
"loss": 0.3015, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.701903146787262, |
|
"grad_norm": 0.5513676691974454, |
|
"learning_rate": 1.3157078930040856e-06, |
|
"loss": 0.3179, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.7034105897870737, |
|
"grad_norm": 0.546503387891844, |
|
"learning_rate": 1.3026814972952674e-06, |
|
"loss": 0.3043, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.7049180327868854, |
|
"grad_norm": 0.5133928571679112, |
|
"learning_rate": 1.2897154136213542e-06, |
|
"loss": 0.2938, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 1.7064254757866968, |
|
"grad_norm": 0.5441429881648664, |
|
"learning_rate": 1.2768097318963701e-06, |
|
"loss": 0.2978, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.7079329187865084, |
|
"grad_norm": 0.5430821498300733, |
|
"learning_rate": 1.2639645416154744e-06, |
|
"loss": 0.3204, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 1.7094403617863199, |
|
"grad_norm": 0.5536392631462185, |
|
"learning_rate": 1.2511799318543493e-06, |
|
"loss": 0.3196, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.7109478047861315, |
|
"grad_norm": 0.5403946840340975, |
|
"learning_rate": 1.2384559912685768e-06, |
|
"loss": 0.3156, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.7124552477859432, |
|
"grad_norm": 0.5360872549447121, |
|
"learning_rate": 1.2257928080930236e-06, |
|
"loss": 0.3275, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.7139626907857548, |
|
"grad_norm": 0.5482125645711281, |
|
"learning_rate": 1.2131904701412345e-06, |
|
"loss": 0.3041, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 1.7154701337855662, |
|
"grad_norm": 0.5497152879102077, |
|
"learning_rate": 1.2006490648048118e-06, |
|
"loss": 0.3148, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.7169775767853777, |
|
"grad_norm": 0.5141035384965538, |
|
"learning_rate": 1.1881686790528279e-06, |
|
"loss": 0.3068, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 1.7184850197851893, |
|
"grad_norm": 0.529010142033819, |
|
"learning_rate": 1.1757493994312052e-06, |
|
"loss": 0.3088, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.719992462785001, |
|
"grad_norm": 0.5495066854090749, |
|
"learning_rate": 1.1633913120621188e-06, |
|
"loss": 0.3236, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 1.7214999057848126, |
|
"grad_norm": 0.5492447937245492, |
|
"learning_rate": 1.151094502643414e-06, |
|
"loss": 0.3308, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.723007348784624, |
|
"grad_norm": 0.5562630282965828, |
|
"learning_rate": 1.1388590564479895e-06, |
|
"loss": 0.299, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 1.7245147917844357, |
|
"grad_norm": 0.5231751271166386, |
|
"learning_rate": 1.1266850583232224e-06, |
|
"loss": 0.3053, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.726022234784247, |
|
"grad_norm": 0.5201419013982067, |
|
"learning_rate": 1.1145725926903772e-06, |
|
"loss": 0.3023, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.7275296777840587, |
|
"grad_norm": 0.511856980805046, |
|
"learning_rate": 1.1025217435440116e-06, |
|
"loss": 0.2867, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.7290371207838704, |
|
"grad_norm": 0.565983997374927, |
|
"learning_rate": 1.0905325944514034e-06, |
|
"loss": 0.3232, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 1.730544563783682, |
|
"grad_norm": 0.5363045072212188, |
|
"learning_rate": 1.078605228551971e-06, |
|
"loss": 0.3182, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.7320520067834935, |
|
"grad_norm": 0.5329886787330583, |
|
"learning_rate": 1.0667397285566893e-06, |
|
"loss": 0.3061, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 1.7335594497833051, |
|
"grad_norm": 0.5397864180847504, |
|
"learning_rate": 1.0549361767475241e-06, |
|
"loss": 0.2873, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.7350668927831165, |
|
"grad_norm": 0.5436027401118747, |
|
"learning_rate": 1.0431946549768567e-06, |
|
"loss": 0.3213, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 1.7365743357829282, |
|
"grad_norm": 0.5304426225729307, |
|
"learning_rate": 1.0315152446669142e-06, |
|
"loss": 0.295, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.7380817787827398, |
|
"grad_norm": 0.5119724476906113, |
|
"learning_rate": 1.019898026809214e-06, |
|
"loss": 0.3009, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 1.7395892217825515, |
|
"grad_norm": 0.5399752438286287, |
|
"learning_rate": 1.0083430819639962e-06, |
|
"loss": 0.3097, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.741096664782363, |
|
"grad_norm": 0.5329124149971953, |
|
"learning_rate": 9.968504902596566e-07, |
|
"loss": 0.3094, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.7426041077821743, |
|
"grad_norm": 0.5265575018375785, |
|
"learning_rate": 9.85420331392214e-07, |
|
"loss": 0.3001, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.744111550781986, |
|
"grad_norm": 0.5415027063140824, |
|
"learning_rate": 9.74052684624731e-07, |
|
"loss": 0.3052, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 1.7456189937817976, |
|
"grad_norm": 0.5273083269054069, |
|
"learning_rate": 9.62747628786782e-07, |
|
"loss": 0.2918, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.7471264367816093, |
|
"grad_norm": 0.5467675396074031, |
|
"learning_rate": 9.515052422739035e-07, |
|
"loss": 0.3013, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 1.748633879781421, |
|
"grad_norm": 0.5012647001758278, |
|
"learning_rate": 9.403256030470386e-07, |
|
"loss": 0.2922, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.7501413227812324, |
|
"grad_norm": 0.5473662670334606, |
|
"learning_rate": 9.292087886320166e-07, |
|
"loss": 0.3275, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 1.7516487657810438, |
|
"grad_norm": 0.5119711645632107, |
|
"learning_rate": 9.181548761189996e-07, |
|
"loss": 0.2997, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.7531562087808554, |
|
"grad_norm": 0.5579153837729429, |
|
"learning_rate": 9.071639421619527e-07, |
|
"loss": 0.3373, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 1.754663651780667, |
|
"grad_norm": 0.5369066551498206, |
|
"learning_rate": 8.962360629781164e-07, |
|
"loss": 0.3013, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.7561710947804787, |
|
"grad_norm": 0.5299407293801213, |
|
"learning_rate": 8.853713143474685e-07, |
|
"loss": 0.2977, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.7576785377802902, |
|
"grad_norm": 0.5246411563266595, |
|
"learning_rate": 8.745697716122081e-07, |
|
"loss": 0.3192, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.7591859807801018, |
|
"grad_norm": 0.5450557723814945, |
|
"learning_rate": 8.638315096762318e-07, |
|
"loss": 0.3075, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 1.7606934237799132, |
|
"grad_norm": 0.5392032704205785, |
|
"learning_rate": 8.531566030046035e-07, |
|
"loss": 0.3231, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.7622008667797249, |
|
"grad_norm": 0.5370373229347402, |
|
"learning_rate": 8.425451256230588e-07, |
|
"loss": 0.3012, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.7637083097795365, |
|
"grad_norm": 0.5405109358545394, |
|
"learning_rate": 8.319971511174718e-07, |
|
"loss": 0.3165, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7652157527793482, |
|
"grad_norm": 0.528010212076121, |
|
"learning_rate": 8.215127526333499e-07, |
|
"loss": 0.3236, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 1.7667231957791596, |
|
"grad_norm": 0.5226712118154457, |
|
"learning_rate": 8.110920028753355e-07, |
|
"loss": 0.3088, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.768230638778971, |
|
"grad_norm": 0.5574504221106463, |
|
"learning_rate": 8.007349741066939e-07, |
|
"loss": 0.3258, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 1.7697380817787827, |
|
"grad_norm": 0.5452638755092976, |
|
"learning_rate": 7.904417381488083e-07, |
|
"loss": 0.3167, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.7712455247785943, |
|
"grad_norm": 0.5517680932401637, |
|
"learning_rate": 7.802123663806938e-07, |
|
"loss": 0.3227, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.772752967778406, |
|
"grad_norm": 0.5480833770797733, |
|
"learning_rate": 7.700469297384927e-07, |
|
"loss": 0.3307, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.7742604107782176, |
|
"grad_norm": 0.5430614726412718, |
|
"learning_rate": 7.599454987149868e-07, |
|
"loss": 0.3463, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 1.775767853778029, |
|
"grad_norm": 0.549255722400039, |
|
"learning_rate": 7.499081433591071e-07, |
|
"loss": 0.3284, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.7772752967778405, |
|
"grad_norm": 0.5410909371721678, |
|
"learning_rate": 7.399349332754458e-07, |
|
"loss": 0.315, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 1.778782739777652, |
|
"grad_norm": 0.530772119547457, |
|
"learning_rate": 7.300259376237795e-07, |
|
"loss": 0.3101, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7802901827774638, |
|
"grad_norm": 0.5484763677836378, |
|
"learning_rate": 7.201812251185869e-07, |
|
"loss": 0.3192, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 1.7817976257772754, |
|
"grad_norm": 0.5299690672941552, |
|
"learning_rate": 7.104008640285642e-07, |
|
"loss": 0.3115, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.7833050687770868, |
|
"grad_norm": 0.5424348226189657, |
|
"learning_rate": 7.006849221761736e-07, |
|
"loss": 0.3119, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 1.7848125117768985, |
|
"grad_norm": 0.5221149596372863, |
|
"learning_rate": 6.910334669371433e-07, |
|
"loss": 0.3078, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.78631995477671, |
|
"grad_norm": 0.5581110749980865, |
|
"learning_rate": 6.814465652400237e-07, |
|
"loss": 0.3364, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.7878273977765216, |
|
"grad_norm": 0.5227271898985753, |
|
"learning_rate": 6.719242835657147e-07, |
|
"loss": 0.3057, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.7893348407763332, |
|
"grad_norm": 0.5541663205023336, |
|
"learning_rate": 6.62466687947001e-07, |
|
"loss": 0.335, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 1.7908422837761448, |
|
"grad_norm": 0.5269336686543489, |
|
"learning_rate": 6.530738439681017e-07, |
|
"loss": 0.3151, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.7923497267759563, |
|
"grad_norm": 0.5786593133724774, |
|
"learning_rate": 6.437458167642164e-07, |
|
"loss": 0.3366, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 1.7938571697757677, |
|
"grad_norm": 0.5253461874134103, |
|
"learning_rate": 6.344826710210584e-07, |
|
"loss": 0.3127, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7953646127755793, |
|
"grad_norm": 0.5299856089834871, |
|
"learning_rate": 6.252844709744255e-07, |
|
"loss": 0.3135, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 1.796872055775391, |
|
"grad_norm": 0.521456043233, |
|
"learning_rate": 6.161512804097436e-07, |
|
"loss": 0.2977, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.7983794987752026, |
|
"grad_norm": 0.5469215546867617, |
|
"learning_rate": 6.070831626616236e-07, |
|
"loss": 0.3255, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 1.7998869417750143, |
|
"grad_norm": 0.5228672904607026, |
|
"learning_rate": 5.980801806134318e-07, |
|
"loss": 0.2866, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.8013943847748257, |
|
"grad_norm": 0.5146912409802952, |
|
"learning_rate": 5.891423966968413e-07, |
|
"loss": 0.2881, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.8029018277746371, |
|
"grad_norm": 0.5474095770320819, |
|
"learning_rate": 5.80269872891408e-07, |
|
"loss": 0.3036, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.8044092707744488, |
|
"grad_norm": 0.5298374503272721, |
|
"learning_rate": 5.714626707241411e-07, |
|
"loss": 0.3032, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 1.8059167137742604, |
|
"grad_norm": 0.5577685295614593, |
|
"learning_rate": 5.627208512690641e-07, |
|
"loss": 0.3136, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.807424156774072, |
|
"grad_norm": 0.5641313668289314, |
|
"learning_rate": 5.5404447514681e-07, |
|
"loss": 0.3057, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 1.8089315997738835, |
|
"grad_norm": 0.5295651592847044, |
|
"learning_rate": 5.45433602524188e-07, |
|
"loss": 0.292, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8104390427736952, |
|
"grad_norm": 0.5164790354946905, |
|
"learning_rate": 5.368882931137675e-07, |
|
"loss": 0.3171, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 1.8119464857735066, |
|
"grad_norm": 0.5440394178727653, |
|
"learning_rate": 5.284086061734672e-07, |
|
"loss": 0.3389, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.8134539287733182, |
|
"grad_norm": 0.5379140129646219, |
|
"learning_rate": 5.199946005061462e-07, |
|
"loss": 0.3191, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 1.8149613717731299, |
|
"grad_norm": 0.5586596394798488, |
|
"learning_rate": 5.116463344591893e-07, |
|
"loss": 0.3297, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.8164688147729415, |
|
"grad_norm": 0.5460139307968361, |
|
"learning_rate": 5.033638659241102e-07, |
|
"loss": 0.3179, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.817976257772753, |
|
"grad_norm": 0.5099561737975997, |
|
"learning_rate": 4.951472523361401e-07, |
|
"loss": 0.2881, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.8194837007725644, |
|
"grad_norm": 0.5127038476010487, |
|
"learning_rate": 4.869965506738416e-07, |
|
"loss": 0.301, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 1.820991143772376, |
|
"grad_norm": 0.5160382306170839, |
|
"learning_rate": 4.789118174587071e-07, |
|
"loss": 0.2951, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.8224985867721877, |
|
"grad_norm": 0.5368235750006268, |
|
"learning_rate": 4.7089310875475856e-07, |
|
"loss": 0.3244, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 1.8240060297719993, |
|
"grad_norm": 0.5486310091200752, |
|
"learning_rate": 4.6294048016817917e-07, |
|
"loss": 0.3357, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.825513472771811, |
|
"grad_norm": 0.5531427959479509, |
|
"learning_rate": 4.550539868469106e-07, |
|
"loss": 0.3404, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 1.8270209157716224, |
|
"grad_norm": 0.5715387730686571, |
|
"learning_rate": 4.4723368348027375e-07, |
|
"loss": 0.3172, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.8285283587714338, |
|
"grad_norm": 0.5208566706980682, |
|
"learning_rate": 4.394796242985933e-07, |
|
"loss": 0.3334, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 1.8300358017712455, |
|
"grad_norm": 0.5088911051543478, |
|
"learning_rate": 4.317918630728235e-07, |
|
"loss": 0.3022, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.8315432447710571, |
|
"grad_norm": 0.5459330178466746, |
|
"learning_rate": 4.241704531141633e-07, |
|
"loss": 0.3192, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.8330506877708688, |
|
"grad_norm": 0.5256076032695434, |
|
"learning_rate": 4.166154472737061e-07, |
|
"loss": 0.2962, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.8345581307706802, |
|
"grad_norm": 0.525111212488327, |
|
"learning_rate": 4.091268979420537e-07, |
|
"loss": 0.3015, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 1.8360655737704918, |
|
"grad_norm": 0.5131054923837834, |
|
"learning_rate": 4.0170485704896453e-07, |
|
"loss": 0.2984, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.8375730167703033, |
|
"grad_norm": 0.5222731798701179, |
|
"learning_rate": 3.943493760629924e-07, |
|
"loss": 0.3007, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 1.839080459770115, |
|
"grad_norm": 0.526864271558084, |
|
"learning_rate": 3.8706050599112363e-07, |
|
"loss": 0.3265, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.8405879027699266, |
|
"grad_norm": 0.5429845817805197, |
|
"learning_rate": 3.798382973784298e-07, |
|
"loss": 0.3126, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 1.8420953457697382, |
|
"grad_norm": 0.5203043666384023, |
|
"learning_rate": 3.7268280030771655e-07, |
|
"loss": 0.3005, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.8436027887695496, |
|
"grad_norm": 0.5553304676785623, |
|
"learning_rate": 3.655940643991718e-07, |
|
"loss": 0.3033, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 1.845110231769361, |
|
"grad_norm": 0.5453329521030166, |
|
"learning_rate": 3.585721388100283e-07, |
|
"loss": 0.3197, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.8466176747691727, |
|
"grad_norm": 0.5358448339719871, |
|
"learning_rate": 3.516170722342127e-07, |
|
"loss": 0.3123, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.8481251177689844, |
|
"grad_norm": 0.5255670215605667, |
|
"learning_rate": 3.4472891290201927e-07, |
|
"loss": 0.3052, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.849632560768796, |
|
"grad_norm": 0.5558001652650641, |
|
"learning_rate": 3.3790770857976995e-07, |
|
"loss": 0.326, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 1.8511400037686077, |
|
"grad_norm": 0.5383925013665675, |
|
"learning_rate": 3.3115350656948043e-07, |
|
"loss": 0.3074, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.852647446768419, |
|
"grad_norm": 0.537705868540796, |
|
"learning_rate": 3.2446635370853686e-07, |
|
"loss": 0.3304, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 1.8541548897682305, |
|
"grad_norm": 0.5382781367285551, |
|
"learning_rate": 3.1784629636937404e-07, |
|
"loss": 0.2883, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.8556623327680422, |
|
"grad_norm": 0.5198656289929648, |
|
"learning_rate": 3.1129338045914004e-07, |
|
"loss": 0.3067, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 1.8571697757678538, |
|
"grad_norm": 0.5472844326917599, |
|
"learning_rate": 3.0480765141939316e-07, |
|
"loss": 0.2992, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.8586772187676655, |
|
"grad_norm": 0.5507075256404199, |
|
"learning_rate": 2.9838915422578e-07, |
|
"loss": 0.3179, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 1.8601846617674769, |
|
"grad_norm": 0.5510217424809797, |
|
"learning_rate": 2.920379333877221e-07, |
|
"loss": 0.2994, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.8616921047672885, |
|
"grad_norm": 0.5400374711049234, |
|
"learning_rate": 2.8575403294811123e-07, |
|
"loss": 0.32, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.8631995477671, |
|
"grad_norm": 0.5378450793044806, |
|
"learning_rate": 2.795374964830022e-07, |
|
"loss": 0.2982, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.8647069907669116, |
|
"grad_norm": 0.5537869321901812, |
|
"learning_rate": 2.733883671013082e-07, |
|
"loss": 0.3215, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 1.8662144337667232, |
|
"grad_norm": 0.5214658307160525, |
|
"learning_rate": 2.673066874445096e-07, |
|
"loss": 0.2967, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.867721876766535, |
|
"grad_norm": 0.5396057264477051, |
|
"learning_rate": 2.612924996863453e-07, |
|
"loss": 0.3323, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 1.8692293197663463, |
|
"grad_norm": 0.5029335509841266, |
|
"learning_rate": 2.5534584553253526e-07, |
|
"loss": 0.2874, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8707367627661577, |
|
"grad_norm": 0.5258804019291271, |
|
"learning_rate": 2.494667662204797e-07, |
|
"loss": 0.2899, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 1.8722442057659694, |
|
"grad_norm": 0.5248481914254555, |
|
"learning_rate": 2.436553025189758e-07, |
|
"loss": 0.3024, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.873751648765781, |
|
"grad_norm": 0.5561247405618174, |
|
"learning_rate": 2.3791149472794373e-07, |
|
"loss": 0.3224, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 1.8752590917655927, |
|
"grad_norm": 0.5386355445772364, |
|
"learning_rate": 2.3223538267813317e-07, |
|
"loss": 0.3252, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.8767665347654043, |
|
"grad_norm": 0.5387316814949316, |
|
"learning_rate": 2.2662700573085505e-07, |
|
"loss": 0.3188, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.8782739777652158, |
|
"grad_norm": 0.5140491567851894, |
|
"learning_rate": 2.2108640277771153e-07, |
|
"loss": 0.3087, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.8797814207650272, |
|
"grad_norm": 0.5578574961077984, |
|
"learning_rate": 2.156136122403174e-07, |
|
"loss": 0.3339, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 1.8812888637648388, |
|
"grad_norm": 0.531923059262347, |
|
"learning_rate": 2.1020867207004026e-07, |
|
"loss": 0.302, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.8827963067646505, |
|
"grad_norm": 0.5199091046599704, |
|
"learning_rate": 2.048716197477374e-07, |
|
"loss": 0.3, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 1.8843037497644621, |
|
"grad_norm": 0.5292062225255757, |
|
"learning_rate": 1.996024922834905e-07, |
|
"loss": 0.3136, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8858111927642736, |
|
"grad_norm": 0.5116718173644801, |
|
"learning_rate": 1.9440132621635687e-07, |
|
"loss": 0.3022, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 1.8873186357640852, |
|
"grad_norm": 0.5311851666913588, |
|
"learning_rate": 1.8926815761410867e-07, |
|
"loss": 0.3119, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.8888260787638966, |
|
"grad_norm": 0.5164961460809835, |
|
"learning_rate": 1.8420302207298623e-07, |
|
"loss": 0.3366, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 1.8903335217637083, |
|
"grad_norm": 0.504315215516738, |
|
"learning_rate": 1.792059547174507e-07, |
|
"loss": 0.2975, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.89184096476352, |
|
"grad_norm": 0.5345352832708335, |
|
"learning_rate": 1.7427699019994415e-07, |
|
"loss": 0.3236, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.8933484077633316, |
|
"grad_norm": 0.5381742517110331, |
|
"learning_rate": 1.6941616270063854e-07, |
|
"loss": 0.3279, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.894855850763143, |
|
"grad_norm": 0.5375566393713683, |
|
"learning_rate": 1.6462350592721498e-07, |
|
"loss": 0.3362, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 1.8963632937629544, |
|
"grad_norm": 0.5285514266127366, |
|
"learning_rate": 1.5989905311461274e-07, |
|
"loss": 0.3204, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.897870736762766, |
|
"grad_norm": 0.5414536639413304, |
|
"learning_rate": 1.5524283702481158e-07, |
|
"loss": 0.3335, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 1.8993781797625777, |
|
"grad_norm": 0.512538356667461, |
|
"learning_rate": 1.5065488994659983e-07, |
|
"loss": 0.3053, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.9008856227623894, |
|
"grad_norm": 0.5324212249495981, |
|
"learning_rate": 1.461352436953478e-07, |
|
"loss": 0.3072, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 1.902393065762201, |
|
"grad_norm": 0.5455697348576503, |
|
"learning_rate": 1.4168392961279254e-07, |
|
"loss": 0.3316, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.9039005087620124, |
|
"grad_norm": 0.5466375519251029, |
|
"learning_rate": 1.3730097856681668e-07, |
|
"loss": 0.3226, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 1.9054079517618239, |
|
"grad_norm": 0.5312632713929628, |
|
"learning_rate": 1.329864209512377e-07, |
|
"loss": 0.315, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.9069153947616355, |
|
"grad_norm": 0.5425648068314173, |
|
"learning_rate": 1.2874028668559247e-07, |
|
"loss": 0.3235, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.9084228377614472, |
|
"grad_norm": 0.5312642091039448, |
|
"learning_rate": 1.245626052149318e-07, |
|
"loss": 0.3203, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.9099302807612588, |
|
"grad_norm": 0.532495465640754, |
|
"learning_rate": 1.2045340550961958e-07, |
|
"loss": 0.3155, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 1.9114377237610702, |
|
"grad_norm": 0.5246778980321247, |
|
"learning_rate": 1.164127160651285e-07, |
|
"loss": 0.2926, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.9129451667608819, |
|
"grad_norm": 0.5339514500193528, |
|
"learning_rate": 1.1244056490184008e-07, |
|
"loss": 0.3029, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 1.9144526097606933, |
|
"grad_norm": 0.520828858822998, |
|
"learning_rate": 1.0853697956485942e-07, |
|
"loss": 0.3065, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.915960052760505, |
|
"grad_norm": 0.520817868672033, |
|
"learning_rate": 1.0470198712381086e-07, |
|
"loss": 0.307, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 1.9174674957603166, |
|
"grad_norm": 0.516414932582989, |
|
"learning_rate": 1.009356141726614e-07, |
|
"loss": 0.3101, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.9189749387601283, |
|
"grad_norm": 0.549210829131398, |
|
"learning_rate": 9.723788682953539e-08, |
|
"loss": 0.3562, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 1.9204823817599397, |
|
"grad_norm": 0.5457067373758283, |
|
"learning_rate": 9.360883073652238e-08, |
|
"loss": 0.3179, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.921989824759751, |
|
"grad_norm": 0.5418508804321499, |
|
"learning_rate": 9.004847105951509e-08, |
|
"loss": 0.3159, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.9234972677595628, |
|
"grad_norm": 0.5366441286826634, |
|
"learning_rate": 8.655683248802282e-08, |
|
"loss": 0.2996, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.9250047107593744, |
|
"grad_norm": 0.5442333602669928, |
|
"learning_rate": 8.313393923500613e-08, |
|
"loss": 0.3088, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 1.926512153759186, |
|
"grad_norm": 0.5391901662166373, |
|
"learning_rate": 7.977981503670795e-08, |
|
"loss": 0.3061, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.9280195967589977, |
|
"grad_norm": 0.5435340810409717, |
|
"learning_rate": 7.64944831524872e-08, |
|
"loss": 0.3285, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 1.9295270397588091, |
|
"grad_norm": 0.521864945549257, |
|
"learning_rate": 7.327796636465767e-08, |
|
"loss": 0.3076, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.9310344827586206, |
|
"grad_norm": 0.5514257916288331, |
|
"learning_rate": 7.01302869783338e-08, |
|
"loss": 0.3094, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 1.9325419257584322, |
|
"grad_norm": 0.5219787357249853, |
|
"learning_rate": 6.705146682127184e-08, |
|
"loss": 0.2995, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.9340493687582438, |
|
"grad_norm": 0.5276229371813537, |
|
"learning_rate": 6.404152724371892e-08, |
|
"loss": 0.3091, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 1.9355568117580555, |
|
"grad_norm": 0.5314226178684127, |
|
"learning_rate": 6.110048911826871e-08, |
|
"loss": 0.3158, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.937064254757867, |
|
"grad_norm": 0.5502887577785275, |
|
"learning_rate": 5.82283728397115e-08, |
|
"loss": 0.3215, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.9385716977576786, |
|
"grad_norm": 0.5486357521857442, |
|
"learning_rate": 5.542519832489546e-08, |
|
"loss": 0.3386, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.94007914075749, |
|
"grad_norm": 0.5249846416693436, |
|
"learning_rate": 5.269098501259007e-08, |
|
"loss": 0.3097, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 1.9415865837573016, |
|
"grad_norm": 0.5254444548298214, |
|
"learning_rate": 5.002575186334735e-08, |
|
"loss": 0.3357, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.9430940267571133, |
|
"grad_norm": 0.5333212942639225, |
|
"learning_rate": 4.742951735937418e-08, |
|
"loss": 0.3051, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 1.944601469756925, |
|
"grad_norm": 0.551663577132892, |
|
"learning_rate": 4.490229950440239e-08, |
|
"loss": 0.3004, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.9461089127567364, |
|
"grad_norm": 0.5255799475848183, |
|
"learning_rate": 4.2444115823562226e-08, |
|
"loss": 0.2978, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 1.9476163557565478, |
|
"grad_norm": 0.5122243774948546, |
|
"learning_rate": 4.005498336326463e-08, |
|
"loss": 0.2904, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.9491237987563594, |
|
"grad_norm": 0.5387853567764707, |
|
"learning_rate": 3.773491869108137e-08, |
|
"loss": 0.3186, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 1.950631241756171, |
|
"grad_norm": 0.5188343179014033, |
|
"learning_rate": 3.548393789562732e-08, |
|
"loss": 0.3009, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.9521386847559827, |
|
"grad_norm": 0.5377374320626865, |
|
"learning_rate": 3.3302056586453916e-08, |
|
"loss": 0.3054, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.9536461277557944, |
|
"grad_norm": 0.535561804417277, |
|
"learning_rate": 3.118928989393699e-08, |
|
"loss": 0.296, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.9551535707556058, |
|
"grad_norm": 0.5298311028053398, |
|
"learning_rate": 2.9145652469174666e-08, |
|
"loss": 0.3049, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 1.9566610137554172, |
|
"grad_norm": 0.5269215357647239, |
|
"learning_rate": 2.7171158483882963e-08, |
|
"loss": 0.2986, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.9581684567552289, |
|
"grad_norm": 0.5270798678914951, |
|
"learning_rate": 2.5265821630298116e-08, |
|
"loss": 0.3214, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 1.9596758997550405, |
|
"grad_norm": 0.5488284780441306, |
|
"learning_rate": 2.3429655121085525e-08, |
|
"loss": 0.3293, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9611833427548522, |
|
"grad_norm": 0.5281296618472574, |
|
"learning_rate": 2.1662671689242076e-08, |
|
"loss": 0.3269, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 1.9626907857546636, |
|
"grad_norm": 0.5477047385786338, |
|
"learning_rate": 1.996488358801174e-08, |
|
"loss": 0.3116, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.9641982287544753, |
|
"grad_norm": 0.548270877454329, |
|
"learning_rate": 1.8336302590798992e-08, |
|
"loss": 0.3415, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 1.9657056717542867, |
|
"grad_norm": 0.5385366961987965, |
|
"learning_rate": 1.677693999109109e-08, |
|
"loss": 0.3036, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 0.5125316134927453, |
|
"learning_rate": 1.5286806602372583e-08, |
|
"loss": 0.2899, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.96872055775391, |
|
"grad_norm": 0.5211226615097172, |
|
"learning_rate": 1.3865912758054267e-08, |
|
"loss": 0.3025, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.9702280007537216, |
|
"grad_norm": 0.5265304987884217, |
|
"learning_rate": 1.2514268311405452e-08, |
|
"loss": 0.3005, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 1.971735443753533, |
|
"grad_norm": 0.5410147654111483, |
|
"learning_rate": 1.1231882635477364e-08, |
|
"loss": 0.3119, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.9732428867533447, |
|
"grad_norm": 0.5170988890501786, |
|
"learning_rate": 1.0018764623045407e-08, |
|
"loss": 0.2958, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 1.9747503297531561, |
|
"grad_norm": 0.5184171784095163, |
|
"learning_rate": 8.874922686541442e-09, |
|
"loss": 0.2924, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9762577727529678, |
|
"grad_norm": 0.5140179548472411, |
|
"learning_rate": 7.800364758002721e-09, |
|
"loss": 0.2935, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 1.9777652157527794, |
|
"grad_norm": 0.5486141459025122, |
|
"learning_rate": 6.795098289008595e-09, |
|
"loss": 0.315, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.979272658752591, |
|
"grad_norm": 0.5311267803536656, |
|
"learning_rate": 5.859130250636113e-09, |
|
"loss": 0.3115, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.9807801017524025, |
|
"grad_norm": 0.5127976418049099, |
|
"learning_rate": 4.992467133406731e-09, |
|
"loss": 0.2853, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.982287544752214, |
|
"grad_norm": 0.5328293684979241, |
|
"learning_rate": 4.195114947244117e-09, |
|
"loss": 0.3117, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.9837949877520256, |
|
"grad_norm": 0.5211491118658048, |
|
"learning_rate": 3.4670792214297476e-09, |
|
"loss": 0.3049, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.9853024307518372, |
|
"grad_norm": 0.532080710582646, |
|
"learning_rate": 2.808365004569602e-09, |
|
"loss": 0.3075, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 1.9868098737516489, |
|
"grad_norm": 0.530526147652671, |
|
"learning_rate": 2.2189768645519693e-09, |
|
"loss": 0.3158, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.9883173167514603, |
|
"grad_norm": 0.5335931657065038, |
|
"learning_rate": 1.6989188885219165e-09, |
|
"loss": 0.319, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 1.989824759751272, |
|
"grad_norm": 0.527218281586083, |
|
"learning_rate": 1.2481946828502011e-09, |
|
"loss": 0.2986, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9913322027510834, |
|
"grad_norm": 0.5444631806162264, |
|
"learning_rate": 8.668073731088467e-10, |
|
"loss": 0.3414, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 1.992839645750895, |
|
"grad_norm": 0.5458452249259766, |
|
"learning_rate": 5.547596040489378e-10, |
|
"loss": 0.3312, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.9943470887507067, |
|
"grad_norm": 0.5337875506880636, |
|
"learning_rate": 3.1205353958285724e-10, |
|
"loss": 0.3065, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 1.9958545317505183, |
|
"grad_norm": 0.5477429410153635, |
|
"learning_rate": 1.3869086276985243e-10, |
|
"loss": 0.308, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.9973619747503297, |
|
"grad_norm": 0.5355633680169556, |
|
"learning_rate": 3.467277580271322e-11, |
|
"loss": 0.3114, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.9988694177501414, |
|
"grad_norm": 0.5487135118890082, |
|
"learning_rate": 0.0, |
|
"loss": 0.3367, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.9988694177501414, |
|
"step": 1326, |
|
"total_flos": 5.576345153511096e+17, |
|
"train_loss": 0.3973805017061363, |
|
"train_runtime": 5664.6789, |
|
"train_samples_per_second": 29.975, |
|
"train_steps_per_second": 0.234 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1326, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.576345153511096e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|