|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.352441613588111, |
|
"eval_steps": 500, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005307855626326964, |
|
"grad_norm": 0.8387855291366577, |
|
"learning_rate": 4.999986097031132e-05, |
|
"loss": 1.9588, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010615711252653927, |
|
"grad_norm": 0.645732581615448, |
|
"learning_rate": 4.999944388279162e-05, |
|
"loss": 2.1093, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01592356687898089, |
|
"grad_norm": 0.7065261602401733, |
|
"learning_rate": 4.9998748742079904e-05, |
|
"loss": 1.8366, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.021231422505307854, |
|
"grad_norm": 2.4020400047302246, |
|
"learning_rate": 4.999777555590779e-05, |
|
"loss": 2.298, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02653927813163482, |
|
"grad_norm": 0.9261078834533691, |
|
"learning_rate": 4.99965243350994e-05, |
|
"loss": 1.854, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03184713375796178, |
|
"grad_norm": 4.6172943115234375, |
|
"learning_rate": 4.9994995093571314e-05, |
|
"loss": 1.8602, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.037154989384288746, |
|
"grad_norm": 0.7544531226158142, |
|
"learning_rate": 4.9993187848332315e-05, |
|
"loss": 1.7065, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04246284501061571, |
|
"grad_norm": 0.9400565028190613, |
|
"learning_rate": 4.9991102619483254e-05, |
|
"loss": 1.7744, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04777070063694268, |
|
"grad_norm": 0.8333051204681396, |
|
"learning_rate": 4.9988739430216834e-05, |
|
"loss": 1.6745, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05307855626326964, |
|
"grad_norm": 0.9034268260002136, |
|
"learning_rate": 4.998609830681734e-05, |
|
"loss": 1.7685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.058386411889596604, |
|
"grad_norm": 5.156125068664551, |
|
"learning_rate": 4.998317927866033e-05, |
|
"loss": 1.822, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06369426751592357, |
|
"grad_norm": 3.3585586547851562, |
|
"learning_rate": 4.997998237821233e-05, |
|
"loss": 1.8087, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06900212314225053, |
|
"grad_norm": 1.1148409843444824, |
|
"learning_rate": 4.9976507641030466e-05, |
|
"loss": 1.5799, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07430997876857749, |
|
"grad_norm": 2.8555004596710205, |
|
"learning_rate": 4.997275510576207e-05, |
|
"loss": 1.6364, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07961783439490445, |
|
"grad_norm": 1.1666771173477173, |
|
"learning_rate": 4.996872481414425e-05, |
|
"loss": 1.6141, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08492569002123142, |
|
"grad_norm": 1.0667269229888916, |
|
"learning_rate": 4.9964416811003414e-05, |
|
"loss": 1.7928, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09023354564755838, |
|
"grad_norm": 1.2639814615249634, |
|
"learning_rate": 4.9959831144254794e-05, |
|
"loss": 1.4345, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09554140127388536, |
|
"grad_norm": 1.1917731761932373, |
|
"learning_rate": 4.995496786490189e-05, |
|
"loss": 1.7151, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10084925690021232, |
|
"grad_norm": 1.2275704145431519, |
|
"learning_rate": 4.9949827027035924e-05, |
|
"loss": 1.8297, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10615711252653928, |
|
"grad_norm": 1.3175028562545776, |
|
"learning_rate": 4.994440868783522e-05, |
|
"loss": 1.6928, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11146496815286625, |
|
"grad_norm": 1.2463750839233398, |
|
"learning_rate": 4.993871290756459e-05, |
|
"loss": 1.7687, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11677282377919321, |
|
"grad_norm": 4.9171624183654785, |
|
"learning_rate": 4.993273974957463e-05, |
|
"loss": 1.6187, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12208067940552017, |
|
"grad_norm": 6.226306438446045, |
|
"learning_rate": 4.992648928030103e-05, |
|
"loss": 1.7059, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12738853503184713, |
|
"grad_norm": 1.2201507091522217, |
|
"learning_rate": 4.991996156926387e-05, |
|
"loss": 1.6283, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1326963906581741, |
|
"grad_norm": 1.2521358728408813, |
|
"learning_rate": 4.9913156689066806e-05, |
|
"loss": 1.5449, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13800424628450106, |
|
"grad_norm": 4.661998271942139, |
|
"learning_rate": 4.990607471539626e-05, |
|
"loss": 1.8635, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14331210191082802, |
|
"grad_norm": 2.3003995418548584, |
|
"learning_rate": 4.9898715727020594e-05, |
|
"loss": 1.6994, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14861995753715498, |
|
"grad_norm": 1.2704275846481323, |
|
"learning_rate": 4.989107980578924e-05, |
|
"loss": 1.6886, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15392781316348195, |
|
"grad_norm": 5.455511093139648, |
|
"learning_rate": 4.988316703663179e-05, |
|
"loss": 1.7095, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1592356687898089, |
|
"grad_norm": 1.3058491945266724, |
|
"learning_rate": 4.987497750755702e-05, |
|
"loss": 1.6245, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16454352441613587, |
|
"grad_norm": 1.2686774730682373, |
|
"learning_rate": 4.986651130965194e-05, |
|
"loss": 1.7859, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.16985138004246284, |
|
"grad_norm": 1.4865705966949463, |
|
"learning_rate": 4.9857768537080784e-05, |
|
"loss": 1.6112, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1751592356687898, |
|
"grad_norm": 1.1640167236328125, |
|
"learning_rate": 4.9848749287083945e-05, |
|
"loss": 1.736, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.18046709129511676, |
|
"grad_norm": 1.2125452756881714, |
|
"learning_rate": 4.983945365997691e-05, |
|
"loss": 1.6853, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18577494692144372, |
|
"grad_norm": 1.4915114641189575, |
|
"learning_rate": 4.9829881759149135e-05, |
|
"loss": 1.6422, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1910828025477707, |
|
"grad_norm": 3.3147950172424316, |
|
"learning_rate": 4.982003369106287e-05, |
|
"loss": 1.5487, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19639065817409768, |
|
"grad_norm": 1.2265527248382568, |
|
"learning_rate": 4.980990956525205e-05, |
|
"loss": 1.6864, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.20169851380042464, |
|
"grad_norm": 1.448042631149292, |
|
"learning_rate": 4.979950949432098e-05, |
|
"loss": 1.5778, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2070063694267516, |
|
"grad_norm": 1.3541866540908813, |
|
"learning_rate": 4.9788833593943166e-05, |
|
"loss": 1.6342, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.21231422505307856, |
|
"grad_norm": 1.1465802192687988, |
|
"learning_rate": 4.977788198285995e-05, |
|
"loss": 1.6218, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21762208067940553, |
|
"grad_norm": 1.4200804233551025, |
|
"learning_rate": 4.976665478287929e-05, |
|
"loss": 1.6393, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2229299363057325, |
|
"grad_norm": 3.8200623989105225, |
|
"learning_rate": 4.9755152118874294e-05, |
|
"loss": 1.7447, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22823779193205945, |
|
"grad_norm": 1.822286605834961, |
|
"learning_rate": 4.974337411878191e-05, |
|
"loss": 1.4881, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.23354564755838642, |
|
"grad_norm": 1.3040127754211426, |
|
"learning_rate": 4.9731320913601474e-05, |
|
"loss": 1.6864, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23885350318471338, |
|
"grad_norm": 1.3640131950378418, |
|
"learning_rate": 4.9718992637393256e-05, |
|
"loss": 1.5177, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.24416135881104034, |
|
"grad_norm": 1.1982786655426025, |
|
"learning_rate": 4.970638942727698e-05, |
|
"loss": 1.6818, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2494692144373673, |
|
"grad_norm": 1.3077235221862793, |
|
"learning_rate": 4.969351142343025e-05, |
|
"loss": 1.5376, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.25477707006369427, |
|
"grad_norm": 1.420879602432251, |
|
"learning_rate": 4.9680358769087076e-05, |
|
"loss": 1.5622, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26008492569002123, |
|
"grad_norm": 1.5823885202407837, |
|
"learning_rate": 4.966693161053621e-05, |
|
"loss": 1.6004, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2653927813163482, |
|
"grad_norm": 1.6158925294876099, |
|
"learning_rate": 4.965323009711954e-05, |
|
"loss": 1.9304, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27070063694267515, |
|
"grad_norm": 1.4248143434524536, |
|
"learning_rate": 4.963925438123044e-05, |
|
"loss": 1.682, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2760084925690021, |
|
"grad_norm": 1.3910802602767944, |
|
"learning_rate": 4.962500461831207e-05, |
|
"loss": 1.8035, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2813163481953291, |
|
"grad_norm": 1.4260281324386597, |
|
"learning_rate": 4.9610480966855625e-05, |
|
"loss": 1.5745, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.28662420382165604, |
|
"grad_norm": 9.649313926696777, |
|
"learning_rate": 4.959568358839861e-05, |
|
"loss": 1.5141, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.291932059447983, |
|
"grad_norm": 1.6886736154556274, |
|
"learning_rate": 4.958061264752303e-05, |
|
"loss": 1.6512, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.29723991507430997, |
|
"grad_norm": 1.3794057369232178, |
|
"learning_rate": 4.956526831185353e-05, |
|
"loss": 1.5219, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.30254777070063693, |
|
"grad_norm": 1.388073205947876, |
|
"learning_rate": 4.9549650752055564e-05, |
|
"loss": 1.6123, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3078556263269639, |
|
"grad_norm": 1.3487626314163208, |
|
"learning_rate": 4.9533760141833506e-05, |
|
"loss": 1.5851, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31316348195329086, |
|
"grad_norm": 1.3812495470046997, |
|
"learning_rate": 4.9517596657928665e-05, |
|
"loss": 1.6599, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3184713375796178, |
|
"grad_norm": 1.5697531700134277, |
|
"learning_rate": 4.950116048011739e-05, |
|
"loss": 1.5878, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3237791932059448, |
|
"grad_norm": 1.511542558670044, |
|
"learning_rate": 4.9484451791209e-05, |
|
"loss": 1.5842, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.32908704883227174, |
|
"grad_norm": 1.4298487901687622, |
|
"learning_rate": 4.9467470777043806e-05, |
|
"loss": 1.624, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3343949044585987, |
|
"grad_norm": 1.5230979919433594, |
|
"learning_rate": 4.9450217626491016e-05, |
|
"loss": 1.478, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.33970276008492567, |
|
"grad_norm": 1.4259607791900635, |
|
"learning_rate": 4.943269253144664e-05, |
|
"loss": 1.606, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34501061571125263, |
|
"grad_norm": 1.4630590677261353, |
|
"learning_rate": 4.9414895686831376e-05, |
|
"loss": 1.6398, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3503184713375796, |
|
"grad_norm": 6.577169895172119, |
|
"learning_rate": 4.939682729058839e-05, |
|
"loss": 1.6031, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35562632696390656, |
|
"grad_norm": 1.5499671697616577, |
|
"learning_rate": 4.9378487543681154e-05, |
|
"loss": 1.5839, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3609341825902335, |
|
"grad_norm": 1.4330203533172607, |
|
"learning_rate": 4.935987665009123e-05, |
|
"loss": 1.6147, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3662420382165605, |
|
"grad_norm": 1.4522225856781006, |
|
"learning_rate": 4.9340994816815946e-05, |
|
"loss": 1.5507, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.37154989384288745, |
|
"grad_norm": 1.4307374954223633, |
|
"learning_rate": 4.9321842253866136e-05, |
|
"loss": 1.617, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37685774946921446, |
|
"grad_norm": 1.288672685623169, |
|
"learning_rate": 4.930241917426379e-05, |
|
"loss": 1.5612, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3821656050955414, |
|
"grad_norm": 4.4466938972473145, |
|
"learning_rate": 4.928272579403969e-05, |
|
"loss": 1.6811, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3874734607218684, |
|
"grad_norm": 1.278381586074829, |
|
"learning_rate": 4.9262762332230996e-05, |
|
"loss": 1.6635, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.39278131634819535, |
|
"grad_norm": 1.3399561643600464, |
|
"learning_rate": 4.924252901087881e-05, |
|
"loss": 1.5004, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3980891719745223, |
|
"grad_norm": 1.4491037130355835, |
|
"learning_rate": 4.922202605502573e-05, |
|
"loss": 1.5217, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4033970276008493, |
|
"grad_norm": 1.376114845275879, |
|
"learning_rate": 4.920125369271332e-05, |
|
"loss": 1.5523, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.40870488322717624, |
|
"grad_norm": 1.5188900232315063, |
|
"learning_rate": 4.918021215497958e-05, |
|
"loss": 1.6177, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4140127388535032, |
|
"grad_norm": 3.417870044708252, |
|
"learning_rate": 4.9158901675856395e-05, |
|
"loss": 1.6203, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41932059447983017, |
|
"grad_norm": 1.3470820188522339, |
|
"learning_rate": 4.913732249236689e-05, |
|
"loss": 1.4859, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.42462845010615713, |
|
"grad_norm": 1.4560948610305786, |
|
"learning_rate": 4.911547484452286e-05, |
|
"loss": 1.6273, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4299363057324841, |
|
"grad_norm": 1.5756402015686035, |
|
"learning_rate": 4.909335897532202e-05, |
|
"loss": 1.7351, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.43524416135881105, |
|
"grad_norm": 1.5693994760513306, |
|
"learning_rate": 4.9070975130745387e-05, |
|
"loss": 1.4738, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.440552016985138, |
|
"grad_norm": 1.669011116027832, |
|
"learning_rate": 4.904832355975445e-05, |
|
"loss": 1.6116, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.445859872611465, |
|
"grad_norm": 1.4707449674606323, |
|
"learning_rate": 4.902540451428849e-05, |
|
"loss": 1.5715, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45116772823779194, |
|
"grad_norm": 1.2995195388793945, |
|
"learning_rate": 4.900221824926173e-05, |
|
"loss": 1.6486, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4564755838641189, |
|
"grad_norm": 3.4517061710357666, |
|
"learning_rate": 4.89787650225605e-05, |
|
"loss": 1.6355, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46178343949044587, |
|
"grad_norm": 1.5337308645248413, |
|
"learning_rate": 4.895504509504039e-05, |
|
"loss": 1.6102, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.46709129511677283, |
|
"grad_norm": 3.0765092372894287, |
|
"learning_rate": 4.893105873052333e-05, |
|
"loss": 1.6678, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4723991507430998, |
|
"grad_norm": 1.5984159708023071, |
|
"learning_rate": 4.8906806195794655e-05, |
|
"loss": 1.6586, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.47770700636942676, |
|
"grad_norm": 1.6139107942581177, |
|
"learning_rate": 4.888228776060016e-05, |
|
"loss": 1.447, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4830148619957537, |
|
"grad_norm": 2.6926217079162598, |
|
"learning_rate": 4.8857503697643094e-05, |
|
"loss": 1.6684, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4883227176220807, |
|
"grad_norm": 7.389099597930908, |
|
"learning_rate": 4.883245428258107e-05, |
|
"loss": 1.6146, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49363057324840764, |
|
"grad_norm": 1.4098495244979858, |
|
"learning_rate": 4.880713979402311e-05, |
|
"loss": 1.4861, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4989384288747346, |
|
"grad_norm": 1.5015437602996826, |
|
"learning_rate": 4.8781560513526414e-05, |
|
"loss": 1.7288, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5042462845010616, |
|
"grad_norm": 1.5533912181854248, |
|
"learning_rate": 4.875571672559337e-05, |
|
"loss": 1.5165, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5095541401273885, |
|
"grad_norm": 1.6178662776947021, |
|
"learning_rate": 4.8729608717668265e-05, |
|
"loss": 1.4429, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5148619957537155, |
|
"grad_norm": 1.4526007175445557, |
|
"learning_rate": 4.870323678013415e-05, |
|
"loss": 1.5218, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5201698513800425, |
|
"grad_norm": 1.5645301342010498, |
|
"learning_rate": 4.867660120630962e-05, |
|
"loss": 1.5621, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5254777070063694, |
|
"grad_norm": 1.4064879417419434, |
|
"learning_rate": 4.864970229244552e-05, |
|
"loss": 1.5439, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5307855626326964, |
|
"grad_norm": 1.5387187004089355, |
|
"learning_rate": 4.862254033772164e-05, |
|
"loss": 1.5439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5360934182590233, |
|
"grad_norm": 15.129183769226074, |
|
"learning_rate": 4.859511564424345e-05, |
|
"loss": 1.7019, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5414012738853503, |
|
"grad_norm": 1.44954252243042, |
|
"learning_rate": 4.856742851703866e-05, |
|
"loss": 1.4983, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5467091295116773, |
|
"grad_norm": 1.40328049659729, |
|
"learning_rate": 4.8539479264053896e-05, |
|
"loss": 1.5446, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5520169851380042, |
|
"grad_norm": 1.4867098331451416, |
|
"learning_rate": 4.8511268196151224e-05, |
|
"loss": 1.5093, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5573248407643312, |
|
"grad_norm": 1.2966865301132202, |
|
"learning_rate": 4.848279562710474e-05, |
|
"loss": 1.5193, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5626326963906582, |
|
"grad_norm": 1.4985305070877075, |
|
"learning_rate": 4.845406187359701e-05, |
|
"loss": 1.5356, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5679405520169851, |
|
"grad_norm": 1.6481366157531738, |
|
"learning_rate": 4.842506725521565e-05, |
|
"loss": 1.5552, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5732484076433121, |
|
"grad_norm": 1.7323246002197266, |
|
"learning_rate": 4.839581209444966e-05, |
|
"loss": 1.6082, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.578556263269639, |
|
"grad_norm": 1.4038372039794922, |
|
"learning_rate": 4.8366296716685914e-05, |
|
"loss": 1.7123, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.583864118895966, |
|
"grad_norm": 1.4740629196166992, |
|
"learning_rate": 4.833652145020551e-05, |
|
"loss": 1.5231, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.589171974522293, |
|
"grad_norm": 1.6231038570404053, |
|
"learning_rate": 4.830648662618015e-05, |
|
"loss": 1.3732, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5944798301486199, |
|
"grad_norm": 1.4000989198684692, |
|
"learning_rate": 4.827619257866839e-05, |
|
"loss": 1.7253, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5997876857749469, |
|
"grad_norm": 3.50241756439209, |
|
"learning_rate": 4.8245639644612006e-05, |
|
"loss": 1.4861, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.6050955414012739, |
|
"grad_norm": 2.493551731109619, |
|
"learning_rate": 4.821482816383218e-05, |
|
"loss": 1.5129, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6104033970276008, |
|
"grad_norm": 1.4700591564178467, |
|
"learning_rate": 4.818375847902577e-05, |
|
"loss": 1.4915, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6157112526539278, |
|
"grad_norm": 1.4178653955459595, |
|
"learning_rate": 4.8152430935761456e-05, |
|
"loss": 1.5438, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6210191082802548, |
|
"grad_norm": 1.6205229759216309, |
|
"learning_rate": 4.812084588247592e-05, |
|
"loss": 1.666, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.6263269639065817, |
|
"grad_norm": 1.566666841506958, |
|
"learning_rate": 4.808900367046999e-05, |
|
"loss": 1.7644, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6316348195329087, |
|
"grad_norm": 1.8027448654174805, |
|
"learning_rate": 4.8056904653904666e-05, |
|
"loss": 1.6192, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6369426751592356, |
|
"grad_norm": 1.7948691844940186, |
|
"learning_rate": 4.8024549189797276e-05, |
|
"loss": 1.5361, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6422505307855626, |
|
"grad_norm": 1.4708564281463623, |
|
"learning_rate": 4.7991937638017415e-05, |
|
"loss": 1.7171, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.6475583864118896, |
|
"grad_norm": 4.92915678024292, |
|
"learning_rate": 4.795907036128299e-05, |
|
"loss": 1.5913, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6528662420382165, |
|
"grad_norm": 1.3035740852355957, |
|
"learning_rate": 4.792594772515619e-05, |
|
"loss": 1.7267, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6581740976645435, |
|
"grad_norm": 1.4440399408340454, |
|
"learning_rate": 4.78925700980394e-05, |
|
"loss": 1.6977, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6634819532908705, |
|
"grad_norm": 1.6491578817367554, |
|
"learning_rate": 4.78589378511711e-05, |
|
"loss": 1.6391, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6687898089171974, |
|
"grad_norm": 1.6024360656738281, |
|
"learning_rate": 4.782505135862176e-05, |
|
"loss": 1.6311, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6740976645435244, |
|
"grad_norm": 1.5361950397491455, |
|
"learning_rate": 4.7790910997289664e-05, |
|
"loss": 1.5929, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6794055201698513, |
|
"grad_norm": 1.4991101026535034, |
|
"learning_rate": 4.77565171468967e-05, |
|
"loss": 1.5092, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6847133757961783, |
|
"grad_norm": 4.35531759262085, |
|
"learning_rate": 4.77218701899842e-05, |
|
"loss": 1.6607, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6900212314225053, |
|
"grad_norm": 1.4146044254302979, |
|
"learning_rate": 4.7686970511908594e-05, |
|
"loss": 1.5518, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6953290870488322, |
|
"grad_norm": 1.5280144214630127, |
|
"learning_rate": 4.7651818500837184e-05, |
|
"loss": 1.7207, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.7006369426751592, |
|
"grad_norm": 1.5810437202453613, |
|
"learning_rate": 4.761641454774386e-05, |
|
"loss": 1.4195, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7059447983014862, |
|
"grad_norm": 1.454335331916809, |
|
"learning_rate": 4.758075904640463e-05, |
|
"loss": 1.4806, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7112526539278131, |
|
"grad_norm": 1.6834053993225098, |
|
"learning_rate": 4.7544852393393375e-05, |
|
"loss": 1.4771, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7165605095541401, |
|
"grad_norm": 1.5010559558868408, |
|
"learning_rate": 4.750869498807735e-05, |
|
"loss": 1.5019, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.721868365180467, |
|
"grad_norm": 1.5334972143173218, |
|
"learning_rate": 4.747228723261278e-05, |
|
"loss": 1.4645, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.727176220806794, |
|
"grad_norm": 1.3904098272323608, |
|
"learning_rate": 4.743562953194039e-05, |
|
"loss": 1.4856, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.732484076433121, |
|
"grad_norm": 5.131705284118652, |
|
"learning_rate": 4.739872229378085e-05, |
|
"loss": 1.6691, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7377919320594479, |
|
"grad_norm": 1.4987908601760864, |
|
"learning_rate": 4.736156592863032e-05, |
|
"loss": 1.581, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.7430997876857749, |
|
"grad_norm": 1.5452443361282349, |
|
"learning_rate": 4.732416084975585e-05, |
|
"loss": 1.5531, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7484076433121019, |
|
"grad_norm": 1.5594438314437866, |
|
"learning_rate": 4.7286507473190736e-05, |
|
"loss": 1.5902, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7537154989384289, |
|
"grad_norm": 1.5028551816940308, |
|
"learning_rate": 4.724860621772995e-05, |
|
"loss": 1.4885, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7590233545647559, |
|
"grad_norm": 3.062858819961548, |
|
"learning_rate": 4.721045750492549e-05, |
|
"loss": 1.5931, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7643312101910829, |
|
"grad_norm": 1.6405315399169922, |
|
"learning_rate": 4.717206175908164e-05, |
|
"loss": 1.3859, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7696390658174098, |
|
"grad_norm": 1.4577491283416748, |
|
"learning_rate": 4.713341940725029e-05, |
|
"loss": 1.5765, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7749469214437368, |
|
"grad_norm": 1.5505036115646362, |
|
"learning_rate": 4.7094530879226166e-05, |
|
"loss": 1.6068, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7802547770700637, |
|
"grad_norm": 1.4415700435638428, |
|
"learning_rate": 4.705539660754207e-05, |
|
"loss": 1.5555, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7855626326963907, |
|
"grad_norm": 1.699349045753479, |
|
"learning_rate": 4.701601702746405e-05, |
|
"loss": 1.4237, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7908704883227177, |
|
"grad_norm": 1.6142672300338745, |
|
"learning_rate": 4.697639257698657e-05, |
|
"loss": 1.5193, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7961783439490446, |
|
"grad_norm": 1.498228907585144, |
|
"learning_rate": 4.6936523696827615e-05, |
|
"loss": 1.548, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8014861995753716, |
|
"grad_norm": 1.5121357440948486, |
|
"learning_rate": 4.6896410830423845e-05, |
|
"loss": 1.546, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.8067940552016986, |
|
"grad_norm": 1.4195791482925415, |
|
"learning_rate": 4.685605442392559e-05, |
|
"loss": 1.5297, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8121019108280255, |
|
"grad_norm": 1.5095983743667603, |
|
"learning_rate": 4.681545492619195e-05, |
|
"loss": 1.6098, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.8174097664543525, |
|
"grad_norm": 1.597701072692871, |
|
"learning_rate": 4.677461278878577e-05, |
|
"loss": 1.7606, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8227176220806794, |
|
"grad_norm": 1.5142344236373901, |
|
"learning_rate": 4.673352846596861e-05, |
|
"loss": 1.4081, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8280254777070064, |
|
"grad_norm": 1.5927125215530396, |
|
"learning_rate": 4.669220241469573e-05, |
|
"loss": 1.4579, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 1.527633547782898, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 1.5787, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.8386411889596603, |
|
"grad_norm": 1.558786153793335, |
|
"learning_rate": 4.660882696804165e-05, |
|
"loss": 1.5751, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8439490445859873, |
|
"grad_norm": 1.5467716455459595, |
|
"learning_rate": 4.656677849999345e-05, |
|
"loss": 1.4025, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.8492569002123143, |
|
"grad_norm": 1.6665401458740234, |
|
"learning_rate": 4.652449015814518e-05, |
|
"loss": 1.5634, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8545647558386412, |
|
"grad_norm": 1.736045479774475, |
|
"learning_rate": 4.648196241284367e-05, |
|
"loss": 1.5068, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.8598726114649682, |
|
"grad_norm": 14.421595573425293, |
|
"learning_rate": 4.643919573709843e-05, |
|
"loss": 1.5791, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8651804670912951, |
|
"grad_norm": 3.725691080093384, |
|
"learning_rate": 4.639619060657648e-05, |
|
"loss": 1.5196, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.8704883227176221, |
|
"grad_norm": 1.8308895826339722, |
|
"learning_rate": 4.6352947499597024e-05, |
|
"loss": 1.5593, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8757961783439491, |
|
"grad_norm": 1.6904733180999756, |
|
"learning_rate": 4.630946689712609e-05, |
|
"loss": 1.568, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.881104033970276, |
|
"grad_norm": 1.5767687559127808, |
|
"learning_rate": 4.626574928277127e-05, |
|
"loss": 1.5503, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.886411889596603, |
|
"grad_norm": 1.6126394271850586, |
|
"learning_rate": 4.622179514277626e-05, |
|
"loss": 1.6526, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.89171974522293, |
|
"grad_norm": 2.0911881923675537, |
|
"learning_rate": 4.618646186075468e-05, |
|
"loss": 1.6366, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8970276008492569, |
|
"grad_norm": 1.9342654943466187, |
|
"learning_rate": 4.614208320833528e-05, |
|
"loss": 1.5226, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.9023354564755839, |
|
"grad_norm": 2.9547078609466553, |
|
"learning_rate": 4.6097469405736174e-05, |
|
"loss": 1.5154, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9076433121019108, |
|
"grad_norm": 1.7412949800491333, |
|
"learning_rate": 4.605262094916878e-05, |
|
"loss": 1.4203, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.9129511677282378, |
|
"grad_norm": 1.4583709239959717, |
|
"learning_rate": 4.6007538337454464e-05, |
|
"loss": 1.4819, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9182590233545648, |
|
"grad_norm": 1.556915521621704, |
|
"learning_rate": 4.5962222072018955e-05, |
|
"loss": 1.4277, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.9235668789808917, |
|
"grad_norm": 1.6333413124084473, |
|
"learning_rate": 4.5916672656886746e-05, |
|
"loss": 1.48, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9288747346072187, |
|
"grad_norm": 1.5821317434310913, |
|
"learning_rate": 4.587089059867552e-05, |
|
"loss": 1.532, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9341825902335457, |
|
"grad_norm": 1.4887222051620483, |
|
"learning_rate": 4.58248764065905e-05, |
|
"loss": 1.6305, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9394904458598726, |
|
"grad_norm": 1.8513277769088745, |
|
"learning_rate": 4.577863059241879e-05, |
|
"loss": 1.6394, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.9447983014861996, |
|
"grad_norm": 1.4932013750076294, |
|
"learning_rate": 4.573215367052369e-05, |
|
"loss": 1.7202, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9501061571125266, |
|
"grad_norm": 8.590271949768066, |
|
"learning_rate": 4.568544615783894e-05, |
|
"loss": 1.4357, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.9554140127388535, |
|
"grad_norm": 1.458540439605713, |
|
"learning_rate": 4.5638508573863035e-05, |
|
"loss": 1.6818, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9607218683651805, |
|
"grad_norm": 1.7310667037963867, |
|
"learning_rate": 4.559134144065338e-05, |
|
"loss": 1.6905, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.9660297239915074, |
|
"grad_norm": 1.4106065034866333, |
|
"learning_rate": 4.554394528282052e-05, |
|
"loss": 1.5248, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9713375796178344, |
|
"grad_norm": 1.5425328016281128, |
|
"learning_rate": 4.549632062752231e-05, |
|
"loss": 1.5851, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9766454352441614, |
|
"grad_norm": 1.6904933452606201, |
|
"learning_rate": 4.5448468004458025e-05, |
|
"loss": 1.434, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9819532908704883, |
|
"grad_norm": 12.340048789978027, |
|
"learning_rate": 4.5400387945862486e-05, |
|
"loss": 1.567, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9872611464968153, |
|
"grad_norm": 8.410961151123047, |
|
"learning_rate": 4.5352080986500135e-05, |
|
"loss": 1.5363, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9925690021231423, |
|
"grad_norm": 1.6052480936050415, |
|
"learning_rate": 4.530354766365911e-05, |
|
"loss": 1.6247, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9978768577494692, |
|
"grad_norm": 1.6782705783843994, |
|
"learning_rate": 4.525478851714522e-05, |
|
"loss": 1.4887, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.0031847133757963, |
|
"grad_norm": 1.480660080909729, |
|
"learning_rate": 4.5205804089275976e-05, |
|
"loss": 1.441, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.0084925690021231, |
|
"grad_norm": 4.800995349884033, |
|
"learning_rate": 4.5156594924874575e-05, |
|
"loss": 1.5609, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0138004246284502, |
|
"grad_norm": 1.552259087562561, |
|
"learning_rate": 4.510716157126379e-05, |
|
"loss": 1.5113, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.019108280254777, |
|
"grad_norm": 1.4873735904693604, |
|
"learning_rate": 4.5057504578259924e-05, |
|
"loss": 1.5546, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0244161358811041, |
|
"grad_norm": 1.6509064435958862, |
|
"learning_rate": 4.500762449816668e-05, |
|
"loss": 1.4914, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.029723991507431, |
|
"grad_norm": 2.9540882110595703, |
|
"learning_rate": 4.495752188576902e-05, |
|
"loss": 1.3561, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.035031847133758, |
|
"grad_norm": 1.5996639728546143, |
|
"learning_rate": 4.4907197298327e-05, |
|
"loss": 1.6173, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.040339702760085, |
|
"grad_norm": 1.5103893280029297, |
|
"learning_rate": 4.485665129556954e-05, |
|
"loss": 1.6103, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.045647558386412, |
|
"grad_norm": 1.7204993963241577, |
|
"learning_rate": 4.4805884439688244e-05, |
|
"loss": 1.5181, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.0509554140127388, |
|
"grad_norm": 1.6108498573303223, |
|
"learning_rate": 4.475489729533114e-05, |
|
"loss": 1.5974, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.056263269639066, |
|
"grad_norm": 1.7573896646499634, |
|
"learning_rate": 4.470369042959637e-05, |
|
"loss": 1.4313, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.0615711252653928, |
|
"grad_norm": 1.7552425861358643, |
|
"learning_rate": 4.465226441202589e-05, |
|
"loss": 1.4407, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0668789808917198, |
|
"grad_norm": 1.6480523347854614, |
|
"learning_rate": 4.460061981459917e-05, |
|
"loss": 1.597, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.0721868365180467, |
|
"grad_norm": 2.603396415710449, |
|
"learning_rate": 4.454875721172679e-05, |
|
"loss": 1.5327, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.0774946921443738, |
|
"grad_norm": 1.5323936939239502, |
|
"learning_rate": 4.4496677180244065e-05, |
|
"loss": 1.5541, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.0828025477707006, |
|
"grad_norm": 1.6930556297302246, |
|
"learning_rate": 4.444438029940465e-05, |
|
"loss": 1.5251, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.0881104033970277, |
|
"grad_norm": 1.7261557579040527, |
|
"learning_rate": 4.439186715087406e-05, |
|
"loss": 1.603, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.0934182590233545, |
|
"grad_norm": 3.680421829223633, |
|
"learning_rate": 4.4339138318723246e-05, |
|
"loss": 1.529, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.0987261146496816, |
|
"grad_norm": 1.6117990016937256, |
|
"learning_rate": 4.428619438942204e-05, |
|
"loss": 1.6533, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.1040339702760085, |
|
"grad_norm": 1.8256531953811646, |
|
"learning_rate": 4.42330359518327e-05, |
|
"loss": 1.5175, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1093418259023355, |
|
"grad_norm": 1.751794457435608, |
|
"learning_rate": 4.417966359720329e-05, |
|
"loss": 1.5462, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.1146496815286624, |
|
"grad_norm": 1.6888757944107056, |
|
"learning_rate": 4.4126077919161165e-05, |
|
"loss": 1.5416, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.1199575371549895, |
|
"grad_norm": 1.6523631811141968, |
|
"learning_rate": 4.407227951370635e-05, |
|
"loss": 1.5035, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.1252653927813163, |
|
"grad_norm": 1.532172441482544, |
|
"learning_rate": 4.401826897920487e-05, |
|
"loss": 1.5502, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1305732484076434, |
|
"grad_norm": 1.8720592260360718, |
|
"learning_rate": 4.396404691638215e-05, |
|
"loss": 1.5217, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.1358811040339702, |
|
"grad_norm": 1.553623080253601, |
|
"learning_rate": 4.390961392831633e-05, |
|
"loss": 1.4841, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.1411889596602973, |
|
"grad_norm": 3.326525926589966, |
|
"learning_rate": 4.38549706204315e-05, |
|
"loss": 1.5746, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.1464968152866242, |
|
"grad_norm": 1.6932830810546875, |
|
"learning_rate": 4.380011760049104e-05, |
|
"loss": 1.4295, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.1518046709129512, |
|
"grad_norm": 1.6742303371429443, |
|
"learning_rate": 4.37450554785908e-05, |
|
"loss": 1.6131, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.157112526539278, |
|
"grad_norm": 1.4667819738388062, |
|
"learning_rate": 4.368978486715237e-05, |
|
"loss": 1.5901, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.1624203821656052, |
|
"grad_norm": 1.6580276489257812, |
|
"learning_rate": 4.363430638091621e-05, |
|
"loss": 1.4339, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.167728237791932, |
|
"grad_norm": 1.791914939880371, |
|
"learning_rate": 4.357862063693486e-05, |
|
"loss": 1.6448, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.173036093418259, |
|
"grad_norm": 1.6610525846481323, |
|
"learning_rate": 4.352272825456605e-05, |
|
"loss": 1.4427, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.178343949044586, |
|
"grad_norm": 1.6194666624069214, |
|
"learning_rate": 4.346662985546581e-05, |
|
"loss": 1.5659, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.183651804670913, |
|
"grad_norm": 1.7261152267456055, |
|
"learning_rate": 4.34103260635816e-05, |
|
"loss": 1.7018, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.1889596602972399, |
|
"grad_norm": 1.4662343263626099, |
|
"learning_rate": 4.335381750514529e-05, |
|
"loss": 1.3376, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.194267515923567, |
|
"grad_norm": 1.6291650533676147, |
|
"learning_rate": 4.329710480866627e-05, |
|
"loss": 1.5875, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.1995753715498938, |
|
"grad_norm": 1.7333427667617798, |
|
"learning_rate": 4.3240188604924436e-05, |
|
"loss": 1.6739, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.2048832271762209, |
|
"grad_norm": 1.6119394302368164, |
|
"learning_rate": 4.3183069526963135e-05, |
|
"loss": 1.5353, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.2101910828025477, |
|
"grad_norm": 1.6907188892364502, |
|
"learning_rate": 4.312574821008219e-05, |
|
"loss": 1.4782, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.2154989384288748, |
|
"grad_norm": 1.735899567604065, |
|
"learning_rate": 4.30682252918308e-05, |
|
"loss": 1.6236, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.2208067940552016, |
|
"grad_norm": 1.4847278594970703, |
|
"learning_rate": 4.301050141200041e-05, |
|
"loss": 1.5917, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2261146496815287, |
|
"grad_norm": 1.5689457654953003, |
|
"learning_rate": 4.295257721261768e-05, |
|
"loss": 1.4878, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.2314225053078556, |
|
"grad_norm": 1.6647305488586426, |
|
"learning_rate": 4.289445333793728e-05, |
|
"loss": 1.4494, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.2367303609341826, |
|
"grad_norm": 1.6868528127670288, |
|
"learning_rate": 4.283613043443474e-05, |
|
"loss": 1.4505, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.2420382165605095, |
|
"grad_norm": 1.5382746458053589, |
|
"learning_rate": 4.277760915079928e-05, |
|
"loss": 1.4367, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.2473460721868366, |
|
"grad_norm": 1.7457520961761475, |
|
"learning_rate": 4.271889013792656e-05, |
|
"loss": 1.5249, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.2526539278131634, |
|
"grad_norm": 1.7499550580978394, |
|
"learning_rate": 4.2659974048911474e-05, |
|
"loss": 1.4727, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.2579617834394905, |
|
"grad_norm": 1.7218068838119507, |
|
"learning_rate": 4.2600861539040845e-05, |
|
"loss": 1.47, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.2632696390658174, |
|
"grad_norm": 1.6986812353134155, |
|
"learning_rate": 4.254155326578621e-05, |
|
"loss": 1.4663, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.2685774946921444, |
|
"grad_norm": 1.8053547143936157, |
|
"learning_rate": 4.2482049888796406e-05, |
|
"loss": 1.5941, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.2738853503184713, |
|
"grad_norm": 1.7940459251403809, |
|
"learning_rate": 4.242235206989032e-05, |
|
"loss": 1.4495, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2791932059447984, |
|
"grad_norm": 1.8331998586654663, |
|
"learning_rate": 4.236246047304949e-05, |
|
"loss": 1.4658, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.2845010615711252, |
|
"grad_norm": 1.748568058013916, |
|
"learning_rate": 4.2302375764410706e-05, |
|
"loss": 1.5562, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.2898089171974523, |
|
"grad_norm": 3.9075028896331787, |
|
"learning_rate": 4.224209861225865e-05, |
|
"loss": 1.6023, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.2951167728237791, |
|
"grad_norm": 1.7952510118484497, |
|
"learning_rate": 4.218162968701842e-05, |
|
"loss": 1.5116, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.3004246284501062, |
|
"grad_norm": 9.50228500366211, |
|
"learning_rate": 4.212096966124807e-05, |
|
"loss": 1.6317, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.305732484076433, |
|
"grad_norm": 1.3332840204238892, |
|
"learning_rate": 4.206011920963117e-05, |
|
"loss": 1.3332, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.3110403397027601, |
|
"grad_norm": 1.7881704568862915, |
|
"learning_rate": 4.1999079008969264e-05, |
|
"loss": 1.5414, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.316348195329087, |
|
"grad_norm": 1.5981987714767456, |
|
"learning_rate": 4.1937849738174364e-05, |
|
"loss": 1.2791, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.321656050955414, |
|
"grad_norm": 1.5826878547668457, |
|
"learning_rate": 4.187643207826137e-05, |
|
"loss": 1.5198, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.326963906581741, |
|
"grad_norm": 1.590627670288086, |
|
"learning_rate": 4.181482671234056e-05, |
|
"loss": 1.5467, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.332271762208068, |
|
"grad_norm": 2.814920425415039, |
|
"learning_rate": 4.17530343256099e-05, |
|
"loss": 1.4888, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.3375796178343948, |
|
"grad_norm": 1.7269675731658936, |
|
"learning_rate": 4.16910556053475e-05, |
|
"loss": 1.5527, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.342887473460722, |
|
"grad_norm": 1.7376699447631836, |
|
"learning_rate": 4.162889124090394e-05, |
|
"loss": 1.5451, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.3481953290870488, |
|
"grad_norm": 1.6660183668136597, |
|
"learning_rate": 4.1566541923694594e-05, |
|
"loss": 1.5799, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.3535031847133758, |
|
"grad_norm": 1.6107293367385864, |
|
"learning_rate": 4.150400834719195e-05, |
|
"loss": 1.5069, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.3588110403397027, |
|
"grad_norm": 1.662691593170166, |
|
"learning_rate": 4.144129120691791e-05, |
|
"loss": 1.5886, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.3641188959660298, |
|
"grad_norm": 1.9366472959518433, |
|
"learning_rate": 4.137839120043603e-05, |
|
"loss": 1.4109, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.3694267515923566, |
|
"grad_norm": 1.5821161270141602, |
|
"learning_rate": 4.1315309027343774e-05, |
|
"loss": 1.4114, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.3747346072186837, |
|
"grad_norm": 1.6319890022277832, |
|
"learning_rate": 4.125204538926474e-05, |
|
"loss": 1.5181, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.3800424628450108, |
|
"grad_norm": 5.780959129333496, |
|
"learning_rate": 4.118860098984083e-05, |
|
"loss": 1.5228, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.3853503184713376, |
|
"grad_norm": 1.6391093730926514, |
|
"learning_rate": 4.112497653472446e-05, |
|
"loss": 1.5428, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.3906581740976645, |
|
"grad_norm": 4.674883842468262, |
|
"learning_rate": 4.106117273157068e-05, |
|
"loss": 1.553, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.3959660297239915, |
|
"grad_norm": 1.5224674940109253, |
|
"learning_rate": 4.099719029002932e-05, |
|
"loss": 1.4488, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.4012738853503186, |
|
"grad_norm": 1.7352584600448608, |
|
"learning_rate": 4.09330299217371e-05, |
|
"loss": 1.4543, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.4065817409766455, |
|
"grad_norm": 1.31924569606781, |
|
"learning_rate": 4.086869234030969e-05, |
|
"loss": 1.488, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.4118895966029723, |
|
"grad_norm": 1.72556471824646, |
|
"learning_rate": 4.0804178261333826e-05, |
|
"loss": 1.5535, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.4171974522292994, |
|
"grad_norm": 1.706010341644287, |
|
"learning_rate": 4.073948840235928e-05, |
|
"loss": 1.6833, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.4225053078556265, |
|
"grad_norm": 1.762577772140503, |
|
"learning_rate": 4.067462348289092e-05, |
|
"loss": 1.4062, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.4278131634819533, |
|
"grad_norm": 1.6299678087234497, |
|
"learning_rate": 4.060958422438072e-05, |
|
"loss": 1.4231, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.4331210191082802, |
|
"grad_norm": 2.0553765296936035, |
|
"learning_rate": 4.0544371350219716e-05, |
|
"loss": 1.5546, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4384288747346072, |
|
"grad_norm": 1.8095347881317139, |
|
"learning_rate": 4.0478985585729946e-05, |
|
"loss": 1.3377, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.4437367303609343, |
|
"grad_norm": 1.8389967679977417, |
|
"learning_rate": 4.041342765815641e-05, |
|
"loss": 1.5391, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.4490445859872612, |
|
"grad_norm": 3.814575433731079, |
|
"learning_rate": 4.0347698296658966e-05, |
|
"loss": 1.6319, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.454352441613588, |
|
"grad_norm": 1.799367070198059, |
|
"learning_rate": 4.028179823230423e-05, |
|
"loss": 1.5563, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.459660297239915, |
|
"grad_norm": 1.7153081893920898, |
|
"learning_rate": 4.021572819805744e-05, |
|
"loss": 1.5521, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.4649681528662422, |
|
"grad_norm": 1.6066639423370361, |
|
"learning_rate": 4.014948892877429e-05, |
|
"loss": 1.4111, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.470276008492569, |
|
"grad_norm": 1.7587813138961792, |
|
"learning_rate": 4.008308116119279e-05, |
|
"loss": 1.5377, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.4755838641188959, |
|
"grad_norm": 1.7587300539016724, |
|
"learning_rate": 4.001650563392504e-05, |
|
"loss": 1.5002, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.480891719745223, |
|
"grad_norm": 1.636374592781067, |
|
"learning_rate": 3.994976308744901e-05, |
|
"loss": 1.5244, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.48619957537155, |
|
"grad_norm": 8.85874080657959, |
|
"learning_rate": 3.988285426410036e-05, |
|
"loss": 1.5959, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.4915074309978769, |
|
"grad_norm": 1.6643364429473877, |
|
"learning_rate": 3.98157799080641e-05, |
|
"loss": 1.4482, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.4968152866242037, |
|
"grad_norm": 1.6281925439834595, |
|
"learning_rate": 3.974854076536639e-05, |
|
"loss": 1.377, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.5021231422505308, |
|
"grad_norm": 1.9073539972305298, |
|
"learning_rate": 3.968113758386619e-05, |
|
"loss": 1.4558, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.5074309978768579, |
|
"grad_norm": 1.6521536111831665, |
|
"learning_rate": 3.9613571113246974e-05, |
|
"loss": 1.5093, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.5127388535031847, |
|
"grad_norm": 1.5043442249298096, |
|
"learning_rate": 3.954584210500837e-05, |
|
"loss": 1.3886, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.5180467091295116, |
|
"grad_norm": 1.8989366292953491, |
|
"learning_rate": 3.94779513124578e-05, |
|
"loss": 1.5509, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.5233545647558386, |
|
"grad_norm": 1.6416149139404297, |
|
"learning_rate": 3.940989949070214e-05, |
|
"loss": 1.4652, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.5286624203821657, |
|
"grad_norm": 2.7976372241973877, |
|
"learning_rate": 3.934168739663927e-05, |
|
"loss": 1.363, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5339702760084926, |
|
"grad_norm": 1.847293734550476, |
|
"learning_rate": 3.9273315788949686e-05, |
|
"loss": 1.4779, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.5392781316348194, |
|
"grad_norm": 1.6267356872558594, |
|
"learning_rate": 3.920478542808805e-05, |
|
"loss": 1.4931, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.5445859872611465, |
|
"grad_norm": 1.7588109970092773, |
|
"learning_rate": 3.913609707627476e-05, |
|
"loss": 1.4393, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.5498938428874736, |
|
"grad_norm": 1.7333145141601562, |
|
"learning_rate": 3.906725149748741e-05, |
|
"loss": 1.5746, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.5552016985138004, |
|
"grad_norm": 1.7868926525115967, |
|
"learning_rate": 3.899824945745236e-05, |
|
"loss": 1.4401, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.5605095541401273, |
|
"grad_norm": 1.4039024114608765, |
|
"learning_rate": 3.892909172363617e-05, |
|
"loss": 1.3735, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.5658174097664543, |
|
"grad_norm": 1.9331457614898682, |
|
"learning_rate": 3.8859779065237115e-05, |
|
"loss": 1.543, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.5711252653927814, |
|
"grad_norm": 1.744489312171936, |
|
"learning_rate": 3.879031225317656e-05, |
|
"loss": 1.5235, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.5764331210191083, |
|
"grad_norm": 1.8111132383346558, |
|
"learning_rate": 3.872069206009047e-05, |
|
"loss": 1.4448, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.5817409766454351, |
|
"grad_norm": 1.7649325132369995, |
|
"learning_rate": 3.865091926032072e-05, |
|
"loss": 1.4324, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.5870488322717622, |
|
"grad_norm": 1.977229118347168, |
|
"learning_rate": 3.858099462990658e-05, |
|
"loss": 1.458, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.5923566878980893, |
|
"grad_norm": 1.7111470699310303, |
|
"learning_rate": 3.851091894657601e-05, |
|
"loss": 1.5631, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5976645435244161, |
|
"grad_norm": 1.9638622999191284, |
|
"learning_rate": 3.8440692989737044e-05, |
|
"loss": 1.6272, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.602972399150743, |
|
"grad_norm": 1.7026537656784058, |
|
"learning_rate": 3.837031754046911e-05, |
|
"loss": 1.4667, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.60828025477707, |
|
"grad_norm": 1.6524882316589355, |
|
"learning_rate": 3.829979338151437e-05, |
|
"loss": 1.3998, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.6135881104033971, |
|
"grad_norm": 1.5562883615493774, |
|
"learning_rate": 3.822912129726896e-05, |
|
"loss": 1.5495, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.618895966029724, |
|
"grad_norm": 1.3875113725662231, |
|
"learning_rate": 3.815830207377431e-05, |
|
"loss": 1.4045, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.6242038216560508, |
|
"grad_norm": 2.9664599895477295, |
|
"learning_rate": 3.808733649870839e-05, |
|
"loss": 1.3617, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.629511677282378, |
|
"grad_norm": 1.9497058391571045, |
|
"learning_rate": 3.801622536137694e-05, |
|
"loss": 1.6036, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.634819532908705, |
|
"grad_norm": 1.8934732675552368, |
|
"learning_rate": 3.794496945270471e-05, |
|
"loss": 1.4382, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6401273885350318, |
|
"grad_norm": 2.006883382797241, |
|
"learning_rate": 3.787356956522665e-05, |
|
"loss": 1.4724, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.6454352441613587, |
|
"grad_norm": 1.51792573928833, |
|
"learning_rate": 3.780202649307907e-05, |
|
"loss": 1.3992, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.6507430997876857, |
|
"grad_norm": 1.7015622854232788, |
|
"learning_rate": 3.7730341031990875e-05, |
|
"loss": 1.5489, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.6560509554140128, |
|
"grad_norm": 1.560760259628296, |
|
"learning_rate": 3.765851397927463e-05, |
|
"loss": 1.4211, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.6613588110403397, |
|
"grad_norm": 1.7241127490997314, |
|
"learning_rate": 3.758654613381778e-05, |
|
"loss": 1.506, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 1.5885661840438843, |
|
"learning_rate": 3.751443829607368e-05, |
|
"loss": 1.4212, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.6719745222929936, |
|
"grad_norm": 1.660274624824524, |
|
"learning_rate": 3.744219126805276e-05, |
|
"loss": 1.2287, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.6772823779193207, |
|
"grad_norm": 1.8777093887329102, |
|
"learning_rate": 3.736980585331355e-05, |
|
"loss": 1.52, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.6825902335456475, |
|
"grad_norm": 1.9632158279418945, |
|
"learning_rate": 3.729728285695381e-05, |
|
"loss": 1.4532, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.6878980891719744, |
|
"grad_norm": 1.859124779701233, |
|
"learning_rate": 3.7224623085601474e-05, |
|
"loss": 1.6036, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.6932059447983014, |
|
"grad_norm": 3.9819421768188477, |
|
"learning_rate": 3.7151827347405806e-05, |
|
"loss": 1.7094, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.6985138004246285, |
|
"grad_norm": 1.9998877048492432, |
|
"learning_rate": 3.707889645202829e-05, |
|
"loss": 1.393, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.7038216560509554, |
|
"grad_norm": 1.7848412990570068, |
|
"learning_rate": 3.700583121063371e-05, |
|
"loss": 1.4604, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.7091295116772822, |
|
"grad_norm": 2.515498638153076, |
|
"learning_rate": 3.693263243588109e-05, |
|
"loss": 1.465, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.7144373673036093, |
|
"grad_norm": 1.8479849100112915, |
|
"learning_rate": 3.6859300941914645e-05, |
|
"loss": 1.6931, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 1.7197452229299364, |
|
"grad_norm": 1.7097549438476562, |
|
"learning_rate": 3.6785837544354774e-05, |
|
"loss": 1.547, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.7250530785562632, |
|
"grad_norm": 1.6838785409927368, |
|
"learning_rate": 3.671224306028893e-05, |
|
"loss": 1.3985, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.73036093418259, |
|
"grad_norm": 1.7739403247833252, |
|
"learning_rate": 3.6638518308262565e-05, |
|
"loss": 1.4027, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.7356687898089171, |
|
"grad_norm": 1.8597843647003174, |
|
"learning_rate": 3.656466410827004e-05, |
|
"loss": 1.492, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.7409766454352442, |
|
"grad_norm": 2.0825037956237793, |
|
"learning_rate": 3.649068128174546e-05, |
|
"loss": 1.5483, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.746284501061571, |
|
"grad_norm": 6.958364486694336, |
|
"learning_rate": 3.641657065155358e-05, |
|
"loss": 1.5487, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 1.7515923566878981, |
|
"grad_norm": 1.7793304920196533, |
|
"learning_rate": 3.634233304198061e-05, |
|
"loss": 1.3823, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.7569002123142252, |
|
"grad_norm": 1.587827444076538, |
|
"learning_rate": 3.626796927872511e-05, |
|
"loss": 1.506, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.762208067940552, |
|
"grad_norm": 1.9246413707733154, |
|
"learning_rate": 3.619348018888873e-05, |
|
"loss": 1.5549, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.767515923566879, |
|
"grad_norm": 1.54891836643219, |
|
"learning_rate": 3.611886660096709e-05, |
|
"loss": 1.5131, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.772823779193206, |
|
"grad_norm": 1.9341977834701538, |
|
"learning_rate": 3.604412934484048e-05, |
|
"loss": 1.584, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.778131634819533, |
|
"grad_norm": 1.5830014944076538, |
|
"learning_rate": 3.5969269251764704e-05, |
|
"loss": 1.5922, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.78343949044586, |
|
"grad_norm": 1.724741816520691, |
|
"learning_rate": 3.58942871543618e-05, |
|
"loss": 1.3407, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.7887473460721868, |
|
"grad_norm": 1.831621766090393, |
|
"learning_rate": 3.581918388661078e-05, |
|
"loss": 1.5302, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 1.7940552016985138, |
|
"grad_norm": 1.8564783334732056, |
|
"learning_rate": 3.5743960283838355e-05, |
|
"loss": 1.5634, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.799363057324841, |
|
"grad_norm": 1.8462448120117188, |
|
"learning_rate": 3.566861718270966e-05, |
|
"loss": 1.4205, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.8046709129511678, |
|
"grad_norm": 1.8261650800704956, |
|
"learning_rate": 3.5593155421218914e-05, |
|
"loss": 1.4333, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.8099787685774946, |
|
"grad_norm": 2.0608906745910645, |
|
"learning_rate": 3.5517575838680144e-05, |
|
"loss": 1.427, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 1.8152866242038217, |
|
"grad_norm": 1.8263474702835083, |
|
"learning_rate": 3.544187927571781e-05, |
|
"loss": 1.4824, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.8205944798301488, |
|
"grad_norm": 1.9386931657791138, |
|
"learning_rate": 3.5366066574257486e-05, |
|
"loss": 1.3078, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.8259023354564756, |
|
"grad_norm": 1.8082537651062012, |
|
"learning_rate": 3.5290138577516455e-05, |
|
"loss": 1.4363, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.8312101910828025, |
|
"grad_norm": 1.8778493404388428, |
|
"learning_rate": 3.52140961299944e-05, |
|
"loss": 1.3782, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.8365180467091295, |
|
"grad_norm": 3.402137279510498, |
|
"learning_rate": 3.513794007746394e-05, |
|
"loss": 1.5746, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8418259023354566, |
|
"grad_norm": 1.8941349983215332, |
|
"learning_rate": 3.506167126696125e-05, |
|
"loss": 1.4293, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.8471337579617835, |
|
"grad_norm": 1.9133306741714478, |
|
"learning_rate": 3.498529054677665e-05, |
|
"loss": 1.5387, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.8524416135881103, |
|
"grad_norm": 1.6595460176467896, |
|
"learning_rate": 3.4908798766445163e-05, |
|
"loss": 1.4309, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.8577494692144374, |
|
"grad_norm": 1.7001606225967407, |
|
"learning_rate": 3.483219677673706e-05, |
|
"loss": 1.444, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.8630573248407645, |
|
"grad_norm": 1.6574691534042358, |
|
"learning_rate": 3.4755485429648404e-05, |
|
"loss": 1.4694, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.8683651804670913, |
|
"grad_norm": 1.6714574098587036, |
|
"learning_rate": 3.467866557839157e-05, |
|
"loss": 1.5645, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.8736730360934182, |
|
"grad_norm": 1.8455125093460083, |
|
"learning_rate": 3.4601738077385765e-05, |
|
"loss": 1.3651, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.8789808917197452, |
|
"grad_norm": 1.527896761894226, |
|
"learning_rate": 3.452470378224749e-05, |
|
"loss": 1.3828, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.8842887473460723, |
|
"grad_norm": 1.6732441186904907, |
|
"learning_rate": 3.4447563549781104e-05, |
|
"loss": 1.422, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.8895966029723992, |
|
"grad_norm": 2.028780698776245, |
|
"learning_rate": 3.437031823796918e-05, |
|
"loss": 1.6961, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.894904458598726, |
|
"grad_norm": 1.666979432106018, |
|
"learning_rate": 3.4292968705963057e-05, |
|
"loss": 1.4066, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.900212314225053, |
|
"grad_norm": 1.9285733699798584, |
|
"learning_rate": 3.4215515814073254e-05, |
|
"loss": 1.3729, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.9055201698513802, |
|
"grad_norm": 1.876514196395874, |
|
"learning_rate": 3.413796042375987e-05, |
|
"loss": 1.5066, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.910828025477707, |
|
"grad_norm": 1.6777039766311646, |
|
"learning_rate": 3.4060303397623054e-05, |
|
"loss": 1.5205, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.9161358811040339, |
|
"grad_norm": 1.6733899116516113, |
|
"learning_rate": 3.398254559939339e-05, |
|
"loss": 1.4899, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.921443736730361, |
|
"grad_norm": 1.869210124015808, |
|
"learning_rate": 3.390468789392226e-05, |
|
"loss": 1.2822, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.926751592356688, |
|
"grad_norm": 1.5251469612121582, |
|
"learning_rate": 3.382673114717228e-05, |
|
"loss": 1.4774, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.9320594479830149, |
|
"grad_norm": 1.7720097303390503, |
|
"learning_rate": 3.3748676226207615e-05, |
|
"loss": 1.4899, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.9373673036093417, |
|
"grad_norm": 1.8252582550048828, |
|
"learning_rate": 3.367052399918439e-05, |
|
"loss": 1.5476, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.9426751592356688, |
|
"grad_norm": 1.5934362411499023, |
|
"learning_rate": 3.359227533534097e-05, |
|
"loss": 1.491, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9479830148619959, |
|
"grad_norm": 1.8928519487380981, |
|
"learning_rate": 3.3513931104988374e-05, |
|
"loss": 1.4503, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.9532908704883227, |
|
"grad_norm": 2.1186277866363525, |
|
"learning_rate": 3.3435492179500485e-05, |
|
"loss": 1.5802, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.9585987261146496, |
|
"grad_norm": 1.6814011335372925, |
|
"learning_rate": 3.3356959431304474e-05, |
|
"loss": 1.5618, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.9639065817409767, |
|
"grad_norm": 1.7327566146850586, |
|
"learning_rate": 3.327833373387101e-05, |
|
"loss": 1.5079, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.9692144373673037, |
|
"grad_norm": 1.7963391542434692, |
|
"learning_rate": 3.3199615961704614e-05, |
|
"loss": 1.4489, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.9745222929936306, |
|
"grad_norm": 1.9455621242523193, |
|
"learning_rate": 3.312080699033386e-05, |
|
"loss": 1.4823, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.9798301486199574, |
|
"grad_norm": 1.7423186302185059, |
|
"learning_rate": 3.304190769630169e-05, |
|
"loss": 1.51, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.9851380042462845, |
|
"grad_norm": 1.8353419303894043, |
|
"learning_rate": 3.2962918957155645e-05, |
|
"loss": 1.5076, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.9904458598726116, |
|
"grad_norm": 1.960813283920288, |
|
"learning_rate": 3.288384165143811e-05, |
|
"loss": 1.4509, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.9957537154989384, |
|
"grad_norm": 1.7254458665847778, |
|
"learning_rate": 3.280467665867654e-05, |
|
"loss": 1.4408, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.0010615711252653, |
|
"grad_norm": 1.7819510698318481, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 1.4863, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 2.0063694267515926, |
|
"grad_norm": 1.7719825506210327, |
|
"learning_rate": 3.2646087134997784e-05, |
|
"loss": 1.4181, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.0116772823779194, |
|
"grad_norm": 2.0865135192871094, |
|
"learning_rate": 3.256666436797276e-05, |
|
"loss": 1.5429, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 2.0169851380042463, |
|
"grad_norm": 3.6327757835388184, |
|
"learning_rate": 3.2487157441668415e-05, |
|
"loss": 1.5906, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.022292993630573, |
|
"grad_norm": 1.6347745656967163, |
|
"learning_rate": 3.240756724039062e-05, |
|
"loss": 1.4776, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 2.0276008492569004, |
|
"grad_norm": 1.8393852710723877, |
|
"learning_rate": 3.2327894649371435e-05, |
|
"loss": 1.3918, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.0329087048832273, |
|
"grad_norm": 1.8218520879745483, |
|
"learning_rate": 3.224814055475932e-05, |
|
"loss": 1.4173, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 2.038216560509554, |
|
"grad_norm": 1.8101989030838013, |
|
"learning_rate": 3.21683058436092e-05, |
|
"loss": 1.4378, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.043524416135881, |
|
"grad_norm": 1.9730515480041504, |
|
"learning_rate": 3.208839140387271e-05, |
|
"loss": 1.3769, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.0488322717622083, |
|
"grad_norm": 2.008378505706787, |
|
"learning_rate": 3.200839812438821e-05, |
|
"loss": 1.3861, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.054140127388535, |
|
"grad_norm": 2.0991263389587402, |
|
"learning_rate": 3.192832689487095e-05, |
|
"loss": 1.5276, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 2.059447983014862, |
|
"grad_norm": 1.616385579109192, |
|
"learning_rate": 3.184817860590319e-05, |
|
"loss": 1.4331, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.064755838641189, |
|
"grad_norm": 1.628811240196228, |
|
"learning_rate": 3.176795414892427e-05, |
|
"loss": 1.3525, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 2.070063694267516, |
|
"grad_norm": 2.066753625869751, |
|
"learning_rate": 3.1687654416220666e-05, |
|
"loss": 1.3573, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.075371549893843, |
|
"grad_norm": 1.9191819429397583, |
|
"learning_rate": 3.160728030091616e-05, |
|
"loss": 1.5621, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 2.08067940552017, |
|
"grad_norm": 2.1518616676330566, |
|
"learning_rate": 3.152683269696179e-05, |
|
"loss": 1.4343, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.0859872611464967, |
|
"grad_norm": 1.8996518850326538, |
|
"learning_rate": 3.1446312499125986e-05, |
|
"loss": 1.4251, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 2.091295116772824, |
|
"grad_norm": 1.9120466709136963, |
|
"learning_rate": 3.1365720602984586e-05, |
|
"loss": 1.4178, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.096602972399151, |
|
"grad_norm": 1.6327637434005737, |
|
"learning_rate": 3.12850579049109e-05, |
|
"loss": 1.4255, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.1019108280254777, |
|
"grad_norm": 1.9733164310455322, |
|
"learning_rate": 3.120432530206569e-05, |
|
"loss": 1.5019, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.1072186836518045, |
|
"grad_norm": 1.8080165386199951, |
|
"learning_rate": 3.112352369238728e-05, |
|
"loss": 1.4403, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 2.112526539278132, |
|
"grad_norm": 1.841697096824646, |
|
"learning_rate": 3.104265397458146e-05, |
|
"loss": 1.4666, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.1178343949044587, |
|
"grad_norm": 1.9103078842163086, |
|
"learning_rate": 3.096171704811156e-05, |
|
"loss": 1.3622, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"grad_norm": 2.059898614883423, |
|
"learning_rate": 3.088071381318845e-05, |
|
"loss": 1.4161, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.1284501061571124, |
|
"grad_norm": 2.0670325756073, |
|
"learning_rate": 3.0799645170760486e-05, |
|
"loss": 1.4092, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 2.1337579617834397, |
|
"grad_norm": 1.5905953645706177, |
|
"learning_rate": 3.071851202250352e-05, |
|
"loss": 1.4295, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.1390658174097665, |
|
"grad_norm": 1.9497631788253784, |
|
"learning_rate": 3.063731527081086e-05, |
|
"loss": 1.4194, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 2.1443736730360934, |
|
"grad_norm": 4.793402671813965, |
|
"learning_rate": 3.055605581878322e-05, |
|
"loss": 1.4232, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1496815286624202, |
|
"grad_norm": 2.07564115524292, |
|
"learning_rate": 3.0474734570218732e-05, |
|
"loss": 1.4475, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.1549893842887475, |
|
"grad_norm": 1.9304168224334717, |
|
"learning_rate": 3.03933524296028e-05, |
|
"loss": 1.4494, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.1602972399150744, |
|
"grad_norm": 2.044576644897461, |
|
"learning_rate": 3.031191030209814e-05, |
|
"loss": 1.5126, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 2.1656050955414012, |
|
"grad_norm": 1.8235676288604736, |
|
"learning_rate": 3.0230409093534622e-05, |
|
"loss": 1.4385, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.170912951167728, |
|
"grad_norm": 1.870332956314087, |
|
"learning_rate": 3.0148849710399278e-05, |
|
"loss": 1.4701, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 2.1762208067940554, |
|
"grad_norm": 4.544968128204346, |
|
"learning_rate": 3.0067233059826143e-05, |
|
"loss": 1.5572, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.1815286624203822, |
|
"grad_norm": 1.9169080257415771, |
|
"learning_rate": 2.9985560049586237e-05, |
|
"loss": 1.4814, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 2.186836518046709, |
|
"grad_norm": 2.15110445022583, |
|
"learning_rate": 2.9903831588077392e-05, |
|
"loss": 1.6031, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.192144373673036, |
|
"grad_norm": 2.1074917316436768, |
|
"learning_rate": 2.9822048584314228e-05, |
|
"loss": 1.3167, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.1974522292993632, |
|
"grad_norm": 1.8479692935943604, |
|
"learning_rate": 2.9740211947917984e-05, |
|
"loss": 1.3893, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.20276008492569, |
|
"grad_norm": 1.86372971534729, |
|
"learning_rate": 2.965832258910643e-05, |
|
"loss": 1.5014, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.208067940552017, |
|
"grad_norm": 2.080585479736328, |
|
"learning_rate": 2.957638141868373e-05, |
|
"loss": 1.5324, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.213375796178344, |
|
"grad_norm": 2.2292301654815674, |
|
"learning_rate": 2.9494389348030317e-05, |
|
"loss": 1.2817, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.218683651804671, |
|
"grad_norm": 1.86923348903656, |
|
"learning_rate": 2.941234728909275e-05, |
|
"loss": 1.4919, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.223991507430998, |
|
"grad_norm": 1.9480481147766113, |
|
"learning_rate": 2.9330256154373593e-05, |
|
"loss": 1.3585, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 2.229299363057325, |
|
"grad_norm": 2.1231307983398438, |
|
"learning_rate": 2.9248116856921226e-05, |
|
"loss": 1.5803, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.2346072186836516, |
|
"grad_norm": 2.2283520698547363, |
|
"learning_rate": 2.9165930310319733e-05, |
|
"loss": 1.502, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 2.239915074309979, |
|
"grad_norm": 1.9217607975006104, |
|
"learning_rate": 2.9083697428678712e-05, |
|
"loss": 1.442, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.245222929936306, |
|
"grad_norm": 4.379934310913086, |
|
"learning_rate": 2.9001419126623113e-05, |
|
"loss": 1.5073, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 2.2505307855626326, |
|
"grad_norm": 2.02557635307312, |
|
"learning_rate": 2.8919096319283084e-05, |
|
"loss": 1.3755, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2558386411889595, |
|
"grad_norm": 2.0024921894073486, |
|
"learning_rate": 2.8836729922283755e-05, |
|
"loss": 1.5393, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.261146496815287, |
|
"grad_norm": 1.9053192138671875, |
|
"learning_rate": 2.8754320851735107e-05, |
|
"loss": 1.3441, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.2664543524416136, |
|
"grad_norm": 2.073275327682495, |
|
"learning_rate": 2.8671870024221707e-05, |
|
"loss": 1.3883, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 2.2717622080679405, |
|
"grad_norm": 1.8114712238311768, |
|
"learning_rate": 2.8589378356792606e-05, |
|
"loss": 1.3674, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.2770700636942673, |
|
"grad_norm": 12.008658409118652, |
|
"learning_rate": 2.8506846766951063e-05, |
|
"loss": 1.4504, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.2823779193205946, |
|
"grad_norm": 1.860350489616394, |
|
"learning_rate": 2.8424276172644382e-05, |
|
"loss": 1.4243, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.2876857749469215, |
|
"grad_norm": 2.3256890773773193, |
|
"learning_rate": 2.8341667492253675e-05, |
|
"loss": 1.4229, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.2929936305732483, |
|
"grad_norm": 2.0510356426239014, |
|
"learning_rate": 2.825902164458369e-05, |
|
"loss": 1.3562, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.298301486199575, |
|
"grad_norm": 1.8248239755630493, |
|
"learning_rate": 2.817633954885252e-05, |
|
"loss": 1.5125, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.3036093418259025, |
|
"grad_norm": 1.8396949768066406, |
|
"learning_rate": 2.8093622124681473e-05, |
|
"loss": 1.4886, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.3089171974522293, |
|
"grad_norm": 1.9355541467666626, |
|
"learning_rate": 2.8010870292084744e-05, |
|
"loss": 1.466, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.314225053078556, |
|
"grad_norm": 1.9316900968551636, |
|
"learning_rate": 2.7928084971459272e-05, |
|
"loss": 1.4423, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.319532908704883, |
|
"grad_norm": 2.041689395904541, |
|
"learning_rate": 2.7845267083574432e-05, |
|
"loss": 1.4992, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.3248407643312103, |
|
"grad_norm": 1.89895761013031, |
|
"learning_rate": 2.7762417549561858e-05, |
|
"loss": 1.3173, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.330148619957537, |
|
"grad_norm": 1.6639972925186157, |
|
"learning_rate": 2.7679537290905117e-05, |
|
"loss": 1.4519, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 2.335456475583864, |
|
"grad_norm": 1.8617371320724487, |
|
"learning_rate": 2.7596627229429556e-05, |
|
"loss": 1.2956, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.340764331210191, |
|
"grad_norm": 2.1502444744110107, |
|
"learning_rate": 2.751368828729196e-05, |
|
"loss": 1.5061, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.346072186836518, |
|
"grad_norm": 2.0051639080047607, |
|
"learning_rate": 2.7430721386970372e-05, |
|
"loss": 1.6049, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.351380042462845, |
|
"grad_norm": 2.1966779232025146, |
|
"learning_rate": 2.7347727451253763e-05, |
|
"loss": 1.5206, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 2.356687898089172, |
|
"grad_norm": 2.000822067260742, |
|
"learning_rate": 2.7264707403231826e-05, |
|
"loss": 1.533, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3619957537154987, |
|
"grad_norm": 2.175576686859131, |
|
"learning_rate": 2.718166216628466e-05, |
|
"loss": 1.5238, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.367303609341826, |
|
"grad_norm": 1.9840582609176636, |
|
"learning_rate": 2.7098592664072563e-05, |
|
"loss": 1.3994, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.372611464968153, |
|
"grad_norm": 1.7856370210647583, |
|
"learning_rate": 2.701549982052568e-05, |
|
"loss": 1.5091, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 2.3779193205944797, |
|
"grad_norm": 2.5901758670806885, |
|
"learning_rate": 2.6932384559833795e-05, |
|
"loss": 1.4364, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.3832271762208066, |
|
"grad_norm": 2.039409637451172, |
|
"learning_rate": 2.6849247806436002e-05, |
|
"loss": 1.6041, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 2.388535031847134, |
|
"grad_norm": 2.1011273860931396, |
|
"learning_rate": 2.676609048501047e-05, |
|
"loss": 1.3286, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.3938428874734607, |
|
"grad_norm": 1.7810633182525635, |
|
"learning_rate": 2.6682913520464104e-05, |
|
"loss": 1.4414, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 2.3991507430997876, |
|
"grad_norm": 2.1423192024230957, |
|
"learning_rate": 2.6599717837922324e-05, |
|
"loss": 1.3732, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.404458598726115, |
|
"grad_norm": 1.6819944381713867, |
|
"learning_rate": 2.6516504362718692e-05, |
|
"loss": 1.4819, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 2.4097664543524417, |
|
"grad_norm": 4.543319225311279, |
|
"learning_rate": 2.6433274020384717e-05, |
|
"loss": 1.3511, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.4150743099787686, |
|
"grad_norm": 2.027402639389038, |
|
"learning_rate": 2.6350027736639466e-05, |
|
"loss": 1.4949, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.4203821656050954, |
|
"grad_norm": 2.225890636444092, |
|
"learning_rate": 2.6266766437379348e-05, |
|
"loss": 1.5223, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.4256900212314223, |
|
"grad_norm": 1.709800362586975, |
|
"learning_rate": 2.6183491048667748e-05, |
|
"loss": 1.3139, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 2.4309978768577496, |
|
"grad_norm": 1.9229758977890015, |
|
"learning_rate": 2.610020249672479e-05, |
|
"loss": 1.4932, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.4363057324840764, |
|
"grad_norm": 1.7151269912719727, |
|
"learning_rate": 2.601690170791698e-05, |
|
"loss": 1.3308, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 2.4416135881104033, |
|
"grad_norm": 2.1811819076538086, |
|
"learning_rate": 2.5933589608746945e-05, |
|
"loss": 1.4028, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.4469214437367306, |
|
"grad_norm": 2.236459970474243, |
|
"learning_rate": 2.585026712584309e-05, |
|
"loss": 1.5397, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 2.4522292993630574, |
|
"grad_norm": 2.1036362648010254, |
|
"learning_rate": 2.576693518594934e-05, |
|
"loss": 1.4838, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4575371549893843, |
|
"grad_norm": 2.25015926361084, |
|
"learning_rate": 2.568359471591477e-05, |
|
"loss": 1.4518, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 2.462845010615711, |
|
"grad_norm": 2.4356729984283447, |
|
"learning_rate": 2.5600246642683367e-05, |
|
"loss": 1.4599, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.468152866242038, |
|
"grad_norm": 1.9419898986816406, |
|
"learning_rate": 2.5516891893283645e-05, |
|
"loss": 1.4831, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.4734607218683653, |
|
"grad_norm": 1.9871810674667358, |
|
"learning_rate": 2.543353139481841e-05, |
|
"loss": 1.4965, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.478768577494692, |
|
"grad_norm": 2.024142026901245, |
|
"learning_rate": 2.535016607445438e-05, |
|
"loss": 1.5143, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 2.484076433121019, |
|
"grad_norm": 1.7300843000411987, |
|
"learning_rate": 2.526679685941193e-05, |
|
"loss": 1.4122, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.4893842887473463, |
|
"grad_norm": 1.9679033756256104, |
|
"learning_rate": 2.518342467695473e-05, |
|
"loss": 1.4179, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 2.494692144373673, |
|
"grad_norm": 2.0890605449676514, |
|
"learning_rate": 2.5100050454379475e-05, |
|
"loss": 1.4879, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 2.1922872066497803, |
|
"learning_rate": 2.501667511900554e-05, |
|
"loss": 1.252, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 2.505307855626327, |
|
"grad_norm": 2.26338529586792, |
|
"learning_rate": 2.4933299598164674e-05, |
|
"loss": 1.3662, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.5106157112526537, |
|
"grad_norm": 2.192429542541504, |
|
"learning_rate": 2.4849924819190696e-05, |
|
"loss": 1.4638, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 2.515923566878981, |
|
"grad_norm": 1.9536056518554688, |
|
"learning_rate": 2.4766551709409172e-05, |
|
"loss": 1.5399, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.521231422505308, |
|
"grad_norm": 3.6337201595306396, |
|
"learning_rate": 2.46831811961271e-05, |
|
"loss": 1.4925, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.5265392781316347, |
|
"grad_norm": 3.1741602420806885, |
|
"learning_rate": 2.4599814206622604e-05, |
|
"loss": 1.4498, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.531847133757962, |
|
"grad_norm": 1.8435138463974, |
|
"learning_rate": 2.451645166813461e-05, |
|
"loss": 1.2496, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 2.537154989384289, |
|
"grad_norm": 2.1668882369995117, |
|
"learning_rate": 2.4433094507852537e-05, |
|
"loss": 1.3713, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.5424628450106157, |
|
"grad_norm": 2.1070964336395264, |
|
"learning_rate": 2.434974365290599e-05, |
|
"loss": 1.481, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 2.5477707006369426, |
|
"grad_norm": 1.9351259469985962, |
|
"learning_rate": 2.4266400030354444e-05, |
|
"loss": 1.5247, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.5530785562632694, |
|
"grad_norm": 2.2230396270751953, |
|
"learning_rate": 2.4183064567176928e-05, |
|
"loss": 1.3355, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 2.5583864118895967, |
|
"grad_norm": 8.01842975616455, |
|
"learning_rate": 2.409973819026173e-05, |
|
"loss": 1.4554, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5636942675159236, |
|
"grad_norm": 2.270322322845459, |
|
"learning_rate": 2.401642182639605e-05, |
|
"loss": 1.4627, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 2.5690021231422504, |
|
"grad_norm": 2.0427427291870117, |
|
"learning_rate": 2.3933116402255764e-05, |
|
"loss": 1.4061, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.5743099787685777, |
|
"grad_norm": 2.2200124263763428, |
|
"learning_rate": 2.384982284439503e-05, |
|
"loss": 1.4439, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.5796178343949046, |
|
"grad_norm": 1.9936960935592651, |
|
"learning_rate": 2.3766542079236048e-05, |
|
"loss": 1.6219, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.5849256900212314, |
|
"grad_norm": 2.3527231216430664, |
|
"learning_rate": 2.368327503305872e-05, |
|
"loss": 1.5253, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 2.5902335456475583, |
|
"grad_norm": 1.7423584461212158, |
|
"learning_rate": 2.3600022631990372e-05, |
|
"loss": 1.299, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.595541401273885, |
|
"grad_norm": 1.7755167484283447, |
|
"learning_rate": 2.3516785801995433e-05, |
|
"loss": 1.4781, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 2.6008492569002124, |
|
"grad_norm": 6.36676549911499, |
|
"learning_rate": 2.3433565468865157e-05, |
|
"loss": 1.5042, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.6061571125265393, |
|
"grad_norm": 1.851580023765564, |
|
"learning_rate": 2.335036255820729e-05, |
|
"loss": 1.4587, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 2.611464968152866, |
|
"grad_norm": 2.0467379093170166, |
|
"learning_rate": 2.3267177995435824e-05, |
|
"loss": 1.6473, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.6167728237791934, |
|
"grad_norm": 2.056533098220825, |
|
"learning_rate": 2.3184012705760662e-05, |
|
"loss": 1.4673, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 2.6220806794055203, |
|
"grad_norm": 1.8680130243301392, |
|
"learning_rate": 2.3100867614177353e-05, |
|
"loss": 1.3721, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.627388535031847, |
|
"grad_norm": 2.404651641845703, |
|
"learning_rate": 2.3017743645456794e-05, |
|
"loss": 1.4524, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.632696390658174, |
|
"grad_norm": 2.0899250507354736, |
|
"learning_rate": 2.293464172413495e-05, |
|
"loss": 1.5181, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.638004246284501, |
|
"grad_norm": 2.262739658355713, |
|
"learning_rate": 2.2851562774502542e-05, |
|
"loss": 1.5211, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 2.643312101910828, |
|
"grad_norm": 4.4608941078186035, |
|
"learning_rate": 2.276850772059483e-05, |
|
"loss": 1.599, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.648619957537155, |
|
"grad_norm": 2.067124128341675, |
|
"learning_rate": 2.2685477486181267e-05, |
|
"loss": 1.37, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 2.653927813163482, |
|
"grad_norm": 2.236569404602051, |
|
"learning_rate": 2.2602472994755276e-05, |
|
"loss": 1.4943, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.659235668789809, |
|
"grad_norm": 7.286528587341309, |
|
"learning_rate": 2.2519495169523924e-05, |
|
"loss": 1.459, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 2.664543524416136, |
|
"grad_norm": 2.1355056762695312, |
|
"learning_rate": 2.243654493339773e-05, |
|
"loss": 1.4789, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.669851380042463, |
|
"grad_norm": 2.1914258003234863, |
|
"learning_rate": 2.2353623208980316e-05, |
|
"loss": 1.3678, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 2.6751592356687897, |
|
"grad_norm": 1.906111717224121, |
|
"learning_rate": 2.227073091855822e-05, |
|
"loss": 1.4229, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.6804670912951165, |
|
"grad_norm": 2.0272438526153564, |
|
"learning_rate": 2.2187868984090577e-05, |
|
"loss": 1.3161, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.685774946921444, |
|
"grad_norm": 2.4848039150238037, |
|
"learning_rate": 2.2105038327198914e-05, |
|
"loss": 1.3172, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.6910828025477707, |
|
"grad_norm": 1.991502285003662, |
|
"learning_rate": 2.202223986915685e-05, |
|
"loss": 1.4735, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 2.6963906581740975, |
|
"grad_norm": 2.0094003677368164, |
|
"learning_rate": 2.193947453087991e-05, |
|
"loss": 1.4223, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.701698513800425, |
|
"grad_norm": 3.0732345581054688, |
|
"learning_rate": 2.185674323291522e-05, |
|
"loss": 1.4143, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 2.7070063694267517, |
|
"grad_norm": 9.969552040100098, |
|
"learning_rate": 2.1774046895431317e-05, |
|
"loss": 1.465, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.7123142250530785, |
|
"grad_norm": 1.9031027555465698, |
|
"learning_rate": 2.1691386438207873e-05, |
|
"loss": 1.5055, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 2.7176220806794054, |
|
"grad_norm": 2.1032540798187256, |
|
"learning_rate": 2.160876278062551e-05, |
|
"loss": 1.4889, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.722929936305732, |
|
"grad_norm": 2.103361129760742, |
|
"learning_rate": 2.1526176841655533e-05, |
|
"loss": 1.5629, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 2.7282377919320595, |
|
"grad_norm": 1.7875380516052246, |
|
"learning_rate": 2.1443629539849735e-05, |
|
"loss": 1.438, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.7335456475583864, |
|
"grad_norm": 4.726676940917969, |
|
"learning_rate": 2.136112179333017e-05, |
|
"loss": 1.3722, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.738853503184713, |
|
"grad_norm": 2.1944401264190674, |
|
"learning_rate": 2.1278654519778947e-05, |
|
"loss": 1.4818, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.7441613588110405, |
|
"grad_norm": 2.1532351970672607, |
|
"learning_rate": 2.1196228636428002e-05, |
|
"loss": 1.5619, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 2.7494692144373674, |
|
"grad_norm": 1.9567017555236816, |
|
"learning_rate": 2.111384506004894e-05, |
|
"loss": 1.4255, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.754777070063694, |
|
"grad_norm": 2.1782784461975098, |
|
"learning_rate": 2.10315047069428e-05, |
|
"loss": 1.5677, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 2.7600849256900215, |
|
"grad_norm": 6.628244400024414, |
|
"learning_rate": 2.0949208492929866e-05, |
|
"loss": 1.4233, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7653927813163484, |
|
"grad_norm": 2.323992967605591, |
|
"learning_rate": 2.08669573333395e-05, |
|
"loss": 1.7152, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 2.770700636942675, |
|
"grad_norm": 1.6532930135726929, |
|
"learning_rate": 2.078475214299996e-05, |
|
"loss": 1.1821, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.776008492569002, |
|
"grad_norm": 2.089218854904175, |
|
"learning_rate": 2.0702593836228196e-05, |
|
"loss": 1.3794, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 2.781316348195329, |
|
"grad_norm": 2.067755699157715, |
|
"learning_rate": 2.062048332681972e-05, |
|
"loss": 1.406, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.786624203821656, |
|
"grad_norm": 1.9614882469177246, |
|
"learning_rate": 2.053842152803842e-05, |
|
"loss": 1.5471, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.791932059447983, |
|
"grad_norm": 2.23738956451416, |
|
"learning_rate": 2.0456409352606396e-05, |
|
"loss": 1.5058, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.79723991507431, |
|
"grad_norm": 2.063555955886841, |
|
"learning_rate": 2.037444771269382e-05, |
|
"loss": 1.3531, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 2.802547770700637, |
|
"grad_norm": 2.0748682022094727, |
|
"learning_rate": 2.0292537519908817e-05, |
|
"loss": 1.2844, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.807855626326964, |
|
"grad_norm": 2.197343587875366, |
|
"learning_rate": 2.0210679685287248e-05, |
|
"loss": 1.5082, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 2.813163481953291, |
|
"grad_norm": 4.911831378936768, |
|
"learning_rate": 2.0128875119282674e-05, |
|
"loss": 1.5497, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.8184713375796178, |
|
"grad_norm": 1.9611250162124634, |
|
"learning_rate": 2.004712473175615e-05, |
|
"loss": 1.4158, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 2.8237791932059446, |
|
"grad_norm": 2.2171881198883057, |
|
"learning_rate": 1.996542943196616e-05, |
|
"loss": 1.3746, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.829087048832272, |
|
"grad_norm": 2.2292375564575195, |
|
"learning_rate": 1.9883790128558463e-05, |
|
"loss": 1.5202, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 2.8343949044585988, |
|
"grad_norm": 2.012502431869507, |
|
"learning_rate": 1.980220772955602e-05, |
|
"loss": 1.5072, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.8397027600849256, |
|
"grad_norm": 2.0411856174468994, |
|
"learning_rate": 1.9720683142348873e-05, |
|
"loss": 1.61, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.845010615711253, |
|
"grad_norm": 2.1242668628692627, |
|
"learning_rate": 1.963921727368406e-05, |
|
"loss": 1.4123, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.8503184713375798, |
|
"grad_norm": 1.9053332805633545, |
|
"learning_rate": 1.9557811029655522e-05, |
|
"loss": 1.4463, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 2.8556263269639066, |
|
"grad_norm": 2.0949225425720215, |
|
"learning_rate": 1.9476465315694055e-05, |
|
"loss": 1.5502, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.8609341825902335, |
|
"grad_norm": 2.2040441036224365, |
|
"learning_rate": 1.9395181036557188e-05, |
|
"loss": 1.4678, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 2.8662420382165603, |
|
"grad_norm": 2.0631675720214844, |
|
"learning_rate": 1.9313959096319175e-05, |
|
"loss": 1.3414, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8715498938428876, |
|
"grad_norm": 4.133503437042236, |
|
"learning_rate": 1.923280039836089e-05, |
|
"loss": 1.4198, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 2.8768577494692145, |
|
"grad_norm": 2.2318644523620605, |
|
"learning_rate": 1.9151705845359825e-05, |
|
"loss": 1.3251, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.8821656050955413, |
|
"grad_norm": 1.9430187940597534, |
|
"learning_rate": 1.9070676339280004e-05, |
|
"loss": 1.5425, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 2.8874734607218686, |
|
"grad_norm": 2.3634932041168213, |
|
"learning_rate": 1.8989712781361997e-05, |
|
"loss": 1.4142, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.8927813163481955, |
|
"grad_norm": 1.9859676361083984, |
|
"learning_rate": 1.8908816072112856e-05, |
|
"loss": 1.4577, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.8980891719745223, |
|
"grad_norm": 1.9253382682800293, |
|
"learning_rate": 1.882798711129613e-05, |
|
"loss": 1.4094, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.903397027600849, |
|
"grad_norm": 1.818198323249817, |
|
"learning_rate": 1.8747226797921845e-05, |
|
"loss": 1.4304, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 2.908704883227176, |
|
"grad_norm": 2.1580026149749756, |
|
"learning_rate": 1.866653603023649e-05, |
|
"loss": 1.1938, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.9140127388535033, |
|
"grad_norm": 2.2599189281463623, |
|
"learning_rate": 1.858591570571306e-05, |
|
"loss": 1.5586, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 2.91932059447983, |
|
"grad_norm": 2.2263216972351074, |
|
"learning_rate": 1.8505366721041033e-05, |
|
"loss": 1.5277, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.924628450106157, |
|
"grad_norm": 2.088515043258667, |
|
"learning_rate": 1.842488997211644e-05, |
|
"loss": 1.5115, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 2.9299363057324843, |
|
"grad_norm": 2.1377217769622803, |
|
"learning_rate": 1.834448635403186e-05, |
|
"loss": 1.5497, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.935244161358811, |
|
"grad_norm": 2.0695080757141113, |
|
"learning_rate": 1.82641567610665e-05, |
|
"loss": 1.4421, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 2.940552016985138, |
|
"grad_norm": 4.7147674560546875, |
|
"learning_rate": 1.8183902086676217e-05, |
|
"loss": 1.556, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.945859872611465, |
|
"grad_norm": 2.120251178741455, |
|
"learning_rate": 1.810372322348361e-05, |
|
"loss": 1.4685, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.9511677282377917, |
|
"grad_norm": 1.9467816352844238, |
|
"learning_rate": 1.8023621063268064e-05, |
|
"loss": 1.4662, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.956475583864119, |
|
"grad_norm": 2.197115659713745, |
|
"learning_rate": 1.7943596496955854e-05, |
|
"loss": 1.347, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 2.961783439490446, |
|
"grad_norm": 2.2167131900787354, |
|
"learning_rate": 1.7863650414610223e-05, |
|
"loss": 1.5102, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.9670912951167727, |
|
"grad_norm": 3.7643346786499023, |
|
"learning_rate": 1.7783783705421487e-05, |
|
"loss": 1.2618, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 2.9723991507431, |
|
"grad_norm": 2.3326447010040283, |
|
"learning_rate": 1.7703997257697137e-05, |
|
"loss": 1.3727, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.977707006369427, |
|
"grad_norm": 2.2277841567993164, |
|
"learning_rate": 1.762429195885198e-05, |
|
"loss": 1.447, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 2.9830148619957537, |
|
"grad_norm": 2.2077393531799316, |
|
"learning_rate": 1.754466869539824e-05, |
|
"loss": 1.4006, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.9883227176220806, |
|
"grad_norm": 2.2370176315307617, |
|
"learning_rate": 1.7465128352935732e-05, |
|
"loss": 1.4167, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 2.9936305732484074, |
|
"grad_norm": 2.0863046646118164, |
|
"learning_rate": 1.7385671816141963e-05, |
|
"loss": 1.5003, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.9989384288747347, |
|
"grad_norm": 2.1493258476257324, |
|
"learning_rate": 1.730629996876235e-05, |
|
"loss": 1.2646, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 3.0042462845010616, |
|
"grad_norm": 2.2664546966552734, |
|
"learning_rate": 1.7227013693600347e-05, |
|
"loss": 1.4217, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.0095541401273884, |
|
"grad_norm": 2.0606799125671387, |
|
"learning_rate": 1.7147813872507654e-05, |
|
"loss": 1.3851, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 3.0148619957537157, |
|
"grad_norm": 2.0941736698150635, |
|
"learning_rate": 1.7068701386374374e-05, |
|
"loss": 1.4804, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.0201698513800426, |
|
"grad_norm": 2.1622138023376465, |
|
"learning_rate": 1.6989677115119267e-05, |
|
"loss": 1.3979, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 3.0254777070063694, |
|
"grad_norm": 2.0956408977508545, |
|
"learning_rate": 1.691074193767991e-05, |
|
"loss": 1.4387, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.0307855626326963, |
|
"grad_norm": 2.141923666000366, |
|
"learning_rate": 1.683189673200296e-05, |
|
"loss": 1.2867, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 3.0360934182590236, |
|
"grad_norm": 2.4103381633758545, |
|
"learning_rate": 1.675314237503436e-05, |
|
"loss": 1.409, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.0414012738853504, |
|
"grad_norm": 2.0725698471069336, |
|
"learning_rate": 1.667447974270962e-05, |
|
"loss": 1.434, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 3.0467091295116773, |
|
"grad_norm": 2.1921072006225586, |
|
"learning_rate": 1.6595909709944035e-05, |
|
"loss": 1.278, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.052016985138004, |
|
"grad_norm": 2.077505588531494, |
|
"learning_rate": 1.651743315062299e-05, |
|
"loss": 1.4423, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 3.0573248407643314, |
|
"grad_norm": 2.065654993057251, |
|
"learning_rate": 1.64390509375922e-05, |
|
"loss": 1.4783, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.0626326963906583, |
|
"grad_norm": 2.1610610485076904, |
|
"learning_rate": 1.6360763942648056e-05, |
|
"loss": 1.4743, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 3.067940552016985, |
|
"grad_norm": 2.189526319503784, |
|
"learning_rate": 1.628257303652786e-05, |
|
"loss": 1.444, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.073248407643312, |
|
"grad_norm": 2.0830533504486084, |
|
"learning_rate": 1.620447908890022e-05, |
|
"loss": 1.3342, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 3.0785562632696393, |
|
"grad_norm": 2.0287106037139893, |
|
"learning_rate": 1.61264829683553e-05, |
|
"loss": 1.4212, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.083864118895966, |
|
"grad_norm": 1.9034909009933472, |
|
"learning_rate": 1.604858554239521e-05, |
|
"loss": 1.4566, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 3.089171974522293, |
|
"grad_norm": 2.098928451538086, |
|
"learning_rate": 1.597078767742434e-05, |
|
"loss": 1.4257, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.09447983014862, |
|
"grad_norm": 2.230280637741089, |
|
"learning_rate": 1.589309023873974e-05, |
|
"loss": 1.4142, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 3.099787685774947, |
|
"grad_norm": 2.3032426834106445, |
|
"learning_rate": 1.581549409052145e-05, |
|
"loss": 1.4048, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.105095541401274, |
|
"grad_norm": 2.5375571250915527, |
|
"learning_rate": 1.5738000095822948e-05, |
|
"loss": 1.3517, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 3.110403397027601, |
|
"grad_norm": 1.7900398969650269, |
|
"learning_rate": 1.5660609116561493e-05, |
|
"loss": 1.3255, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.1157112526539277, |
|
"grad_norm": 2.3134543895721436, |
|
"learning_rate": 1.5583322013508604e-05, |
|
"loss": 1.4345, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 3.121019108280255, |
|
"grad_norm": 2.1048059463500977, |
|
"learning_rate": 1.5506139646280427e-05, |
|
"loss": 1.1971, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.126326963906582, |
|
"grad_norm": 2.4140281677246094, |
|
"learning_rate": 1.5429062873328194e-05, |
|
"loss": 1.4395, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 3.1316348195329087, |
|
"grad_norm": 2.161916971206665, |
|
"learning_rate": 1.535209255192869e-05, |
|
"loss": 1.3569, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.1369426751592355, |
|
"grad_norm": 2.458399534225464, |
|
"learning_rate": 1.52752295381747e-05, |
|
"loss": 1.4479, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 3.142250530785563, |
|
"grad_norm": 2.1411702632904053, |
|
"learning_rate": 1.5198474686965495e-05, |
|
"loss": 1.4861, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.1475583864118897, |
|
"grad_norm": 2.147722005844116, |
|
"learning_rate": 1.5121828851997319e-05, |
|
"loss": 1.3144, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 3.1528662420382165, |
|
"grad_norm": 2.137479782104492, |
|
"learning_rate": 1.5045292885753894e-05, |
|
"loss": 1.4583, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.1581740976645434, |
|
"grad_norm": 2.3600449562072754, |
|
"learning_rate": 1.4968867639496956e-05, |
|
"loss": 1.4061, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 3.1634819532908707, |
|
"grad_norm": 2.391444206237793, |
|
"learning_rate": 1.4892553963256745e-05, |
|
"loss": 1.5864, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.1687898089171975, |
|
"grad_norm": 2.2082204818725586, |
|
"learning_rate": 1.4816352705822612e-05, |
|
"loss": 1.2608, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 3.1740976645435244, |
|
"grad_norm": 2.3388137817382812, |
|
"learning_rate": 1.4740264714733504e-05, |
|
"loss": 1.5217, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.1794055201698512, |
|
"grad_norm": 2.470644950866699, |
|
"learning_rate": 1.4664290836268613e-05, |
|
"loss": 1.471, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 3.1847133757961785, |
|
"grad_norm": 2.1400668621063232, |
|
"learning_rate": 1.4588431915437906e-05, |
|
"loss": 1.4109, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.1900212314225054, |
|
"grad_norm": 2.219452142715454, |
|
"learning_rate": 1.4512688795972756e-05, |
|
"loss": 1.5468, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 3.1953290870488322, |
|
"grad_norm": 2.131532907485962, |
|
"learning_rate": 1.4437062320316558e-05, |
|
"loss": 1.3587, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.200636942675159, |
|
"grad_norm": 2.2039008140563965, |
|
"learning_rate": 1.4361553329615324e-05, |
|
"loss": 1.4387, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 3.2059447983014864, |
|
"grad_norm": 2.1974165439605713, |
|
"learning_rate": 1.428616266370838e-05, |
|
"loss": 1.4334, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.2112526539278132, |
|
"grad_norm": 2.232050895690918, |
|
"learning_rate": 1.4210891161118992e-05, |
|
"loss": 1.4696, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 3.21656050955414, |
|
"grad_norm": 2.1611685752868652, |
|
"learning_rate": 1.4135739659045053e-05, |
|
"loss": 1.3681, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.221868365180467, |
|
"grad_norm": 2.1293656826019287, |
|
"learning_rate": 1.4060708993349738e-05, |
|
"loss": 1.3466, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 3.2271762208067942, |
|
"grad_norm": 2.123840570449829, |
|
"learning_rate": 1.3985799998552267e-05, |
|
"loss": 1.4368, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.232484076433121, |
|
"grad_norm": 2.4953863620758057, |
|
"learning_rate": 1.3911013507818581e-05, |
|
"loss": 1.3601, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 3.237791932059448, |
|
"grad_norm": 2.3729612827301025, |
|
"learning_rate": 1.3836350352952085e-05, |
|
"loss": 1.4593, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.243099787685775, |
|
"grad_norm": 2.4012491703033447, |
|
"learning_rate": 1.3761811364384378e-05, |
|
"loss": 1.4123, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 3.248407643312102, |
|
"grad_norm": 2.420816659927368, |
|
"learning_rate": 1.3687397371166055e-05, |
|
"loss": 1.426, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.253715498938429, |
|
"grad_norm": 2.3156495094299316, |
|
"learning_rate": 1.3613109200957469e-05, |
|
"loss": 1.396, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 3.259023354564756, |
|
"grad_norm": 2.5136337280273438, |
|
"learning_rate": 1.3538947680019514e-05, |
|
"loss": 1.3537, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.2643312101910826, |
|
"grad_norm": 2.1792850494384766, |
|
"learning_rate": 1.3464913633204434e-05, |
|
"loss": 1.4983, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 3.26963906581741, |
|
"grad_norm": 2.5874316692352295, |
|
"learning_rate": 1.3391007883946669e-05, |
|
"loss": 1.4165, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.274946921443737, |
|
"grad_norm": 2.1205999851226807, |
|
"learning_rate": 1.3317231254253687e-05, |
|
"loss": 1.3347, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 3.2802547770700636, |
|
"grad_norm": 2.6099026203155518, |
|
"learning_rate": 1.3243584564696848e-05, |
|
"loss": 1.4395, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.2855626326963905, |
|
"grad_norm": 2.8517844676971436, |
|
"learning_rate": 1.3170068634402236e-05, |
|
"loss": 1.4585, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 3.290870488322718, |
|
"grad_norm": 2.137800931930542, |
|
"learning_rate": 1.3096684281041613e-05, |
|
"loss": 1.4397, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.2961783439490446, |
|
"grad_norm": 2.1291749477386475, |
|
"learning_rate": 1.3023432320823287e-05, |
|
"loss": 1.4387, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 3.3014861995753715, |
|
"grad_norm": 1.9797881841659546, |
|
"learning_rate": 1.2950313568483036e-05, |
|
"loss": 1.266, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.3067940552016983, |
|
"grad_norm": 2.3177640438079834, |
|
"learning_rate": 1.2877328837275044e-05, |
|
"loss": 1.3107, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 3.3121019108280256, |
|
"grad_norm": 2.499016046524048, |
|
"learning_rate": 1.2804478938962867e-05, |
|
"loss": 1.397, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 3.3174097664543525, |
|
"grad_norm": 2.1002349853515625, |
|
"learning_rate": 1.2731764683810398e-05, |
|
"loss": 1.3943, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 3.3227176220806793, |
|
"grad_norm": 2.446040391921997, |
|
"learning_rate": 1.265918688057288e-05, |
|
"loss": 1.4008, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 3.328025477707006, |
|
"grad_norm": 2.4573974609375, |
|
"learning_rate": 1.2586746336487835e-05, |
|
"loss": 1.533, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 2.1954808235168457, |
|
"learning_rate": 1.2514443857266175e-05, |
|
"loss": 1.3354, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 3.3386411889596603, |
|
"grad_norm": 2.187451124191284, |
|
"learning_rate": 1.2442280247083198e-05, |
|
"loss": 1.3633, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 3.343949044585987, |
|
"grad_norm": 5.623300552368164, |
|
"learning_rate": 1.2370256308569656e-05, |
|
"loss": 1.4056, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.349256900212314, |
|
"grad_norm": 2.10553240776062, |
|
"learning_rate": 1.2298372842802786e-05, |
|
"loss": 1.4899, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 3.3545647558386413, |
|
"grad_norm": 2.105638265609741, |
|
"learning_rate": 1.2226630649297466e-05, |
|
"loss": 1.4447, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 3.359872611464968, |
|
"grad_norm": 3.875722885131836, |
|
"learning_rate": 1.2155030525997286e-05, |
|
"loss": 1.3026, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 3.365180467091295, |
|
"grad_norm": 2.115307092666626, |
|
"learning_rate": 1.208357326926568e-05, |
|
"loss": 1.3592, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 3.370488322717622, |
|
"grad_norm": 3.39106822013855, |
|
"learning_rate": 1.2012259673877046e-05, |
|
"loss": 1.2692, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 3.375796178343949, |
|
"grad_norm": 1.9469239711761475, |
|
"learning_rate": 1.1941090533007948e-05, |
|
"loss": 1.4078, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 3.381104033970276, |
|
"grad_norm": 2.1060116291046143, |
|
"learning_rate": 1.1870066638228264e-05, |
|
"loss": 1.3936, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 3.386411889596603, |
|
"grad_norm": 2.3175833225250244, |
|
"learning_rate": 1.1799188779492406e-05, |
|
"loss": 1.3777, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 3.3917197452229297, |
|
"grad_norm": 2.24416446685791, |
|
"learning_rate": 1.1728457745130483e-05, |
|
"loss": 1.4643, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 3.397027600849257, |
|
"grad_norm": 2.251556158065796, |
|
"learning_rate": 1.1657874321839602e-05, |
|
"loss": 1.434, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.402335456475584, |
|
"grad_norm": 2.4189612865448, |
|
"learning_rate": 1.1587439294675068e-05, |
|
"loss": 1.4088, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 3.4076433121019107, |
|
"grad_norm": 2.213326930999756, |
|
"learning_rate": 1.1517153447041687e-05, |
|
"loss": 1.4231, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 3.412951167728238, |
|
"grad_norm": 2.072181463241577, |
|
"learning_rate": 1.1447017560684996e-05, |
|
"loss": 1.4474, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 3.418259023354565, |
|
"grad_norm": 2.4454848766326904, |
|
"learning_rate": 1.1377032415682648e-05, |
|
"loss": 1.4199, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 3.4235668789808917, |
|
"grad_norm": 2.853790283203125, |
|
"learning_rate": 1.130719879043567e-05, |
|
"loss": 1.4094, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 3.4288747346072186, |
|
"grad_norm": 2.308567762374878, |
|
"learning_rate": 1.1237517461659846e-05, |
|
"loss": 1.3363, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 3.4341825902335454, |
|
"grad_norm": 2.467379093170166, |
|
"learning_rate": 1.1167989204377036e-05, |
|
"loss": 1.4102, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 3.4394904458598727, |
|
"grad_norm": 2.1049180030822754, |
|
"learning_rate": 1.1098614791906606e-05, |
|
"loss": 1.2906, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.4447983014861996, |
|
"grad_norm": 2.4943039417266846, |
|
"learning_rate": 1.1029394995856792e-05, |
|
"loss": 1.2913, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 3.4501061571125264, |
|
"grad_norm": 2.1266067028045654, |
|
"learning_rate": 1.0960330586116138e-05, |
|
"loss": 1.4101, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.4554140127388537, |
|
"grad_norm": 2.2753243446350098, |
|
"learning_rate": 1.08914223308449e-05, |
|
"loss": 1.4643, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 3.4607218683651806, |
|
"grad_norm": 2.324915885925293, |
|
"learning_rate": 1.0822670996466547e-05, |
|
"loss": 1.4543, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 3.4660297239915074, |
|
"grad_norm": 2.1122493743896484, |
|
"learning_rate": 1.0754077347659208e-05, |
|
"loss": 1.5137, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 3.4713375796178343, |
|
"grad_norm": 2.3270649909973145, |
|
"learning_rate": 1.0685642147347183e-05, |
|
"loss": 1.4333, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 3.476645435244161, |
|
"grad_norm": 2.105161190032959, |
|
"learning_rate": 1.0617366156692423e-05, |
|
"loss": 1.4143, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 3.4819532908704884, |
|
"grad_norm": 2.328491687774658, |
|
"learning_rate": 1.0549250135086114e-05, |
|
"loss": 1.4786, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 3.4872611464968153, |
|
"grad_norm": 2.6255910396575928, |
|
"learning_rate": 1.0481294840140199e-05, |
|
"loss": 1.3698, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 3.492569002123142, |
|
"grad_norm": 2.3305420875549316, |
|
"learning_rate": 1.0413501027678965e-05, |
|
"loss": 1.3544, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 3.4978768577494694, |
|
"grad_norm": 2.7587552070617676, |
|
"learning_rate": 1.0345869451730608e-05, |
|
"loss": 1.4469, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 3.5031847133757963, |
|
"grad_norm": 16.741992950439453, |
|
"learning_rate": 1.0278400864518892e-05, |
|
"loss": 1.5186, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.508492569002123, |
|
"grad_norm": 2.2464590072631836, |
|
"learning_rate": 1.0211096016454749e-05, |
|
"loss": 1.4908, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 3.51380042462845, |
|
"grad_norm": 2.071381092071533, |
|
"learning_rate": 1.0143955656127957e-05, |
|
"loss": 1.3258, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 3.519108280254777, |
|
"grad_norm": 2.1464314460754395, |
|
"learning_rate": 1.0076980530298769e-05, |
|
"loss": 1.4744, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 3.524416135881104, |
|
"grad_norm": 1.953969120979309, |
|
"learning_rate": 1.0010171383889664e-05, |
|
"loss": 1.4532, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 3.529723991507431, |
|
"grad_norm": 2.3309433460235596, |
|
"learning_rate": 9.943528959977027e-06, |
|
"loss": 1.4666, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 3.535031847133758, |
|
"grad_norm": 2.290804862976074, |
|
"learning_rate": 9.877053999782907e-06, |
|
"loss": 1.3826, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 3.540339702760085, |
|
"grad_norm": 2.3279714584350586, |
|
"learning_rate": 9.81074724266672e-06, |
|
"loss": 1.4758, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 3.545647558386412, |
|
"grad_norm": 2.5002360343933105, |
|
"learning_rate": 9.74460942611711e-06, |
|
"loss": 1.4526, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 3.550955414012739, |
|
"grad_norm": 2.25785756111145, |
|
"learning_rate": 9.678641285743673e-06, |
|
"loss": 1.3462, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 3.5562632696390657, |
|
"grad_norm": 2.0230438709259033, |
|
"learning_rate": 9.612843555268813e-06, |
|
"loss": 1.2952, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 3.5615711252653925, |
|
"grad_norm": 2.1412694454193115, |
|
"learning_rate": 9.547216966519577e-06, |
|
"loss": 1.3239, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 3.56687898089172, |
|
"grad_norm": 14.145221710205078, |
|
"learning_rate": 9.481762249419482e-06, |
|
"loss": 1.4509, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 3.5721868365180467, |
|
"grad_norm": 2.359635353088379, |
|
"learning_rate": 9.416480131980455e-06, |
|
"loss": 1.3237, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 3.5774946921443735, |
|
"grad_norm": 2.2716643810272217, |
|
"learning_rate": 9.35137134029469e-06, |
|
"loss": 1.3844, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 3.582802547770701, |
|
"grad_norm": 2.952258825302124, |
|
"learning_rate": 9.286436598526601e-06, |
|
"loss": 1.3404, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 3.5881104033970277, |
|
"grad_norm": 2.1978461742401123, |
|
"learning_rate": 9.221676628904724e-06, |
|
"loss": 1.4622, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 3.5934182590233545, |
|
"grad_norm": 2.3678669929504395, |
|
"learning_rate": 9.157092151713742e-06, |
|
"loss": 1.3749, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 3.5987261146496814, |
|
"grad_norm": 5.995536804199219, |
|
"learning_rate": 9.092683885286438e-06, |
|
"loss": 1.4892, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 3.6040339702760082, |
|
"grad_norm": 2.5272786617279053, |
|
"learning_rate": 9.028452545995714e-06, |
|
"loss": 1.4309, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 3.6093418259023355, |
|
"grad_norm": 2.5360288619995117, |
|
"learning_rate": 8.964398848246603e-06, |
|
"loss": 1.3416, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.6146496815286624, |
|
"grad_norm": 2.71073842048645, |
|
"learning_rate": 8.900523504468366e-06, |
|
"loss": 1.3541, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 3.6199575371549892, |
|
"grad_norm": 2.1415514945983887, |
|
"learning_rate": 8.836827225106536e-06, |
|
"loss": 1.4829, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 3.6252653927813165, |
|
"grad_norm": 2.409083604812622, |
|
"learning_rate": 8.773310718615036e-06, |
|
"loss": 1.4849, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 3.6305732484076434, |
|
"grad_norm": 2.1981987953186035, |
|
"learning_rate": 8.709974691448253e-06, |
|
"loss": 1.4821, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 3.6358811040339702, |
|
"grad_norm": 2.5032248497009277, |
|
"learning_rate": 8.64681984805325e-06, |
|
"loss": 1.3284, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 3.641188959660297, |
|
"grad_norm": 2.0410494804382324, |
|
"learning_rate": 8.583846890861886e-06, |
|
"loss": 1.4164, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 3.646496815286624, |
|
"grad_norm": 2.2673985958099365, |
|
"learning_rate": 8.521056520283017e-06, |
|
"loss": 1.4347, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 3.6518046709129512, |
|
"grad_norm": 2.1072731018066406, |
|
"learning_rate": 8.458449434694679e-06, |
|
"loss": 1.4396, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 3.657112526539278, |
|
"grad_norm": 2.3610174655914307, |
|
"learning_rate": 8.396026330436374e-06, |
|
"loss": 1.4165, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 3.662420382165605, |
|
"grad_norm": 2.143754482269287, |
|
"learning_rate": 8.333787901801279e-06, |
|
"loss": 1.3376, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 3.6677282377919322, |
|
"grad_norm": 2.567183017730713, |
|
"learning_rate": 8.271734841028553e-06, |
|
"loss": 1.3744, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 3.673036093418259, |
|
"grad_norm": 2.2236099243164062, |
|
"learning_rate": 8.209867838295596e-06, |
|
"loss": 1.3606, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 3.678343949044586, |
|
"grad_norm": 2.2553422451019287, |
|
"learning_rate": 8.148187581710423e-06, |
|
"loss": 1.355, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 3.683651804670913, |
|
"grad_norm": 2.2291765213012695, |
|
"learning_rate": 8.086694757303991e-06, |
|
"loss": 1.2048, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 3.6889596602972397, |
|
"grad_norm": 2.602382183074951, |
|
"learning_rate": 8.025390049022562e-06, |
|
"loss": 1.3158, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 3.694267515923567, |
|
"grad_norm": 2.460057258605957, |
|
"learning_rate": 7.964274138720081e-06, |
|
"loss": 1.4712, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 3.699575371549894, |
|
"grad_norm": 2.3745005130767822, |
|
"learning_rate": 7.903347706150636e-06, |
|
"loss": 1.3811, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 3.7048832271762207, |
|
"grad_norm": 2.286000967025757, |
|
"learning_rate": 7.842611428960861e-06, |
|
"loss": 1.573, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 3.710191082802548, |
|
"grad_norm": 2.2103700637817383, |
|
"learning_rate": 7.782065982682423e-06, |
|
"loss": 1.4452, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 3.715498938428875, |
|
"grad_norm": 2.0639681816101074, |
|
"learning_rate": 7.721712040724469e-06, |
|
"loss": 1.5869, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.7208067940552016, |
|
"grad_norm": 1.9713996648788452, |
|
"learning_rate": 7.661550274366189e-06, |
|
"loss": 1.4913, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 3.7261146496815285, |
|
"grad_norm": 2.4420719146728516, |
|
"learning_rate": 7.601581352749309e-06, |
|
"loss": 1.5407, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 3.7314225053078554, |
|
"grad_norm": 2.3371787071228027, |
|
"learning_rate": 7.5418059428706865e-06, |
|
"loss": 1.4896, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 3.7367303609341826, |
|
"grad_norm": 5.171130657196045, |
|
"learning_rate": 7.482224709574829e-06, |
|
"loss": 1.4157, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 3.7420382165605095, |
|
"grad_norm": 2.2224090099334717, |
|
"learning_rate": 7.4228383155465705e-06, |
|
"loss": 1.4068, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 3.7473460721868364, |
|
"grad_norm": 2.283038854598999, |
|
"learning_rate": 7.363647421303666e-06, |
|
"loss": 1.3288, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 3.7526539278131636, |
|
"grad_norm": 2.160877227783203, |
|
"learning_rate": 7.304652685189434e-06, |
|
"loss": 1.4058, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 3.7579617834394905, |
|
"grad_norm": 2.3264095783233643, |
|
"learning_rate": 7.2458547633654675e-06, |
|
"loss": 1.4104, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.7632696390658174, |
|
"grad_norm": 2.3588404655456543, |
|
"learning_rate": 7.1872543098043035e-06, |
|
"loss": 1.5082, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 3.7685774946921446, |
|
"grad_norm": 2.169647216796875, |
|
"learning_rate": 7.128851976282172e-06, |
|
"loss": 1.3996, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.7738853503184715, |
|
"grad_norm": 2.3636116981506348, |
|
"learning_rate": 7.070648412371725e-06, |
|
"loss": 1.3886, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 3.7791932059447984, |
|
"grad_norm": 2.253528118133545, |
|
"learning_rate": 7.012644265434834e-06, |
|
"loss": 1.4162, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 3.784501061571125, |
|
"grad_norm": 2.413527727127075, |
|
"learning_rate": 6.95484018061538e-06, |
|
"loss": 1.4416, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 3.789808917197452, |
|
"grad_norm": 2.313779354095459, |
|
"learning_rate": 6.897236800832082e-06, |
|
"loss": 1.4022, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.7951167728237793, |
|
"grad_norm": 2.22562575340271, |
|
"learning_rate": 6.8398347667713246e-06, |
|
"loss": 1.4004, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 3.800424628450106, |
|
"grad_norm": 2.1902172565460205, |
|
"learning_rate": 6.782634716880068e-06, |
|
"loss": 1.4268, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 3.805732484076433, |
|
"grad_norm": 2.4533374309539795, |
|
"learning_rate": 6.725637287358724e-06, |
|
"loss": 1.3675, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 3.8110403397027603, |
|
"grad_norm": 2.2463905811309814, |
|
"learning_rate": 6.668843112154088e-06, |
|
"loss": 1.3991, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 3.816348195329087, |
|
"grad_norm": 2.4668920040130615, |
|
"learning_rate": 6.612252822952267e-06, |
|
"loss": 1.4839, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 3.821656050955414, |
|
"grad_norm": 2.244965076446533, |
|
"learning_rate": 6.555867049171688e-06, |
|
"loss": 1.525, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.826963906581741, |
|
"grad_norm": 2.3175225257873535, |
|
"learning_rate": 6.499686417956083e-06, |
|
"loss": 1.4035, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 3.8322717622080678, |
|
"grad_norm": 2.565484046936035, |
|
"learning_rate": 6.443711554167506e-06, |
|
"loss": 1.4154, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 3.837579617834395, |
|
"grad_norm": 2.11671781539917, |
|
"learning_rate": 6.38794308037938e-06, |
|
"loss": 1.2861, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 3.842887473460722, |
|
"grad_norm": 2.795337438583374, |
|
"learning_rate": 6.332381616869593e-06, |
|
"loss": 1.4971, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 3.8481953290870488, |
|
"grad_norm": 2.443300247192383, |
|
"learning_rate": 6.2770277816135814e-06, |
|
"loss": 1.4387, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 3.853503184713376, |
|
"grad_norm": 2.138455629348755, |
|
"learning_rate": 6.221882190277472e-06, |
|
"loss": 1.3503, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 3.858811040339703, |
|
"grad_norm": 2.1262552738189697, |
|
"learning_rate": 6.166945456211204e-06, |
|
"loss": 1.4727, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 3.8641188959660298, |
|
"grad_norm": 2.3657939434051514, |
|
"learning_rate": 6.112218190441746e-06, |
|
"loss": 1.4265, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 3.8694267515923566, |
|
"grad_norm": 2.300050735473633, |
|
"learning_rate": 6.057701001666275e-06, |
|
"loss": 1.2813, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 3.8747346072186835, |
|
"grad_norm": 2.3793447017669678, |
|
"learning_rate": 6.0033944962454205e-06, |
|
"loss": 1.4436, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.8800424628450108, |
|
"grad_norm": 2.3621938228607178, |
|
"learning_rate": 5.949299278196493e-06, |
|
"loss": 1.2759, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 3.8853503184713376, |
|
"grad_norm": 2.4751360416412354, |
|
"learning_rate": 5.8954159491868085e-06, |
|
"loss": 1.2852, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 3.8906581740976645, |
|
"grad_norm": 2.046066999435425, |
|
"learning_rate": 5.841745108526958e-06, |
|
"loss": 1.3721, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 3.8959660297239918, |
|
"grad_norm": 2.4861631393432617, |
|
"learning_rate": 5.7882873531641705e-06, |
|
"loss": 1.4705, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 3.9012738853503186, |
|
"grad_norm": 2.280402421951294, |
|
"learning_rate": 5.735043277675642e-06, |
|
"loss": 1.5398, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 3.9065817409766455, |
|
"grad_norm": 9.118425369262695, |
|
"learning_rate": 5.682013474261957e-06, |
|
"loss": 1.5439, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 3.9118895966029723, |
|
"grad_norm": 2.5025949478149414, |
|
"learning_rate": 5.629198532740482e-06, |
|
"loss": 1.3915, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 3.917197452229299, |
|
"grad_norm": 2.5477778911590576, |
|
"learning_rate": 5.576599040538813e-06, |
|
"loss": 1.4886, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 3.9225053078556265, |
|
"grad_norm": 2.4313480854034424, |
|
"learning_rate": 5.524215582688216e-06, |
|
"loss": 1.4189, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 3.9278131634819533, |
|
"grad_norm": 2.327481269836426, |
|
"learning_rate": 5.472048741817165e-06, |
|
"loss": 1.4373, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.93312101910828, |
|
"grad_norm": 2.1773698329925537, |
|
"learning_rate": 5.4200990981448375e-06, |
|
"loss": 1.2943, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 3.9384288747346075, |
|
"grad_norm": 2.5786592960357666, |
|
"learning_rate": 5.368367229474655e-06, |
|
"loss": 1.5092, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 3.9437367303609343, |
|
"grad_norm": 2.2224104404449463, |
|
"learning_rate": 5.316853711187858e-06, |
|
"loss": 1.426, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 3.949044585987261, |
|
"grad_norm": 2.4788384437561035, |
|
"learning_rate": 5.265559116237123e-06, |
|
"loss": 1.2464, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.954352441613588, |
|
"grad_norm": 2.4742305278778076, |
|
"learning_rate": 5.214484015140178e-06, |
|
"loss": 1.4523, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 3.959660297239915, |
|
"grad_norm": 2.3287322521209717, |
|
"learning_rate": 5.163628975973458e-06, |
|
"loss": 1.5333, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 3.964968152866242, |
|
"grad_norm": 2.1568734645843506, |
|
"learning_rate": 5.112994564365775e-06, |
|
"loss": 1.3845, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 3.970276008492569, |
|
"grad_norm": 2.289116144180298, |
|
"learning_rate": 5.062581343492051e-06, |
|
"loss": 1.382, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 3.975583864118896, |
|
"grad_norm": 2.5056374073028564, |
|
"learning_rate": 5.012389874067039e-06, |
|
"loss": 1.3863, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 3.980891719745223, |
|
"grad_norm": 2.403012990951538, |
|
"learning_rate": 4.962420714339094e-06, |
|
"loss": 1.3269, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.98619957537155, |
|
"grad_norm": 5.482332229614258, |
|
"learning_rate": 4.9126744200839456e-06, |
|
"loss": 1.5098, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 3.991507430997877, |
|
"grad_norm": 2.4922893047332764, |
|
"learning_rate": 4.8631515445985404e-06, |
|
"loss": 1.4779, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 3.9968152866242037, |
|
"grad_norm": 2.413883686065674, |
|
"learning_rate": 4.813852638694874e-06, |
|
"loss": 1.4107, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 4.002123142250531, |
|
"grad_norm": 2.4241840839385986, |
|
"learning_rate": 4.76477825069388e-06, |
|
"loss": 1.5316, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 4.007430997876858, |
|
"grad_norm": 2.759788751602173, |
|
"learning_rate": 4.715928926419292e-06, |
|
"loss": 1.2973, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 4.012738853503185, |
|
"grad_norm": 2.396528959274292, |
|
"learning_rate": 4.6673052091916276e-06, |
|
"loss": 1.3054, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 4.018046709129512, |
|
"grad_norm": 2.296591281890869, |
|
"learning_rate": 4.618907639822107e-06, |
|
"loss": 1.3189, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 4.023354564755839, |
|
"grad_norm": 1.9780110120773315, |
|
"learning_rate": 4.570736756606659e-06, |
|
"loss": 1.4215, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 4.028662420382165, |
|
"grad_norm": 2.2614572048187256, |
|
"learning_rate": 4.522793095319899e-06, |
|
"loss": 1.4185, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 4.033970276008493, |
|
"grad_norm": 2.025022029876709, |
|
"learning_rate": 4.475077189209218e-06, |
|
"loss": 1.4099, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.03927813163482, |
|
"grad_norm": 2.2896909713745117, |
|
"learning_rate": 4.427589568988824e-06, |
|
"loss": 1.4216, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 4.044585987261146, |
|
"grad_norm": 2.432157516479492, |
|
"learning_rate": 4.380330762833848e-06, |
|
"loss": 1.3377, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 4.049893842887474, |
|
"grad_norm": 2.016071081161499, |
|
"learning_rate": 4.333301296374442e-06, |
|
"loss": 1.3515, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 4.055201698513801, |
|
"grad_norm": 1.8489809036254883, |
|
"learning_rate": 4.286501692689984e-06, |
|
"loss": 1.2477, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 4.060509554140127, |
|
"grad_norm": 2.454228162765503, |
|
"learning_rate": 4.239932472303215e-06, |
|
"loss": 1.4972, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 4.065817409766455, |
|
"grad_norm": 2.3903145790100098, |
|
"learning_rate": 4.193594153174485e-06, |
|
"loss": 1.2439, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 4.071125265392781, |
|
"grad_norm": 2.542217254638672, |
|
"learning_rate": 4.1474872506959416e-06, |
|
"loss": 1.406, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 4.076433121019108, |
|
"grad_norm": 2.2244298458099365, |
|
"learning_rate": 4.101612277685856e-06, |
|
"loss": 1.3496, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 4.081740976645436, |
|
"grad_norm": 2.2349693775177, |
|
"learning_rate": 4.0559697443828895e-06, |
|
"loss": 1.4276, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 4.087048832271762, |
|
"grad_norm": 2.4000797271728516, |
|
"learning_rate": 4.0105601584404214e-06, |
|
"loss": 1.4705, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 4.092356687898089, |
|
"grad_norm": 2.4222750663757324, |
|
"learning_rate": 3.965384024920885e-06, |
|
"loss": 1.3891, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 4.097664543524417, |
|
"grad_norm": 2.3922958374023438, |
|
"learning_rate": 3.920441846290193e-06, |
|
"loss": 1.4135, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 4.102972399150743, |
|
"grad_norm": 2.559807538986206, |
|
"learning_rate": 3.8757341224121085e-06, |
|
"loss": 1.4211, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 4.10828025477707, |
|
"grad_norm": 2.560068130493164, |
|
"learning_rate": 3.831261350542712e-06, |
|
"loss": 1.4407, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 4.113588110403397, |
|
"grad_norm": 2.4018898010253906, |
|
"learning_rate": 3.7870240253248563e-06, |
|
"loss": 1.3578, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 4.118895966029724, |
|
"grad_norm": 2.588036298751831, |
|
"learning_rate": 3.7430226387826535e-06, |
|
"loss": 1.5336, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 4.124203821656051, |
|
"grad_norm": 2.196746587753296, |
|
"learning_rate": 3.6992576803160374e-06, |
|
"loss": 1.2888, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 4.129511677282378, |
|
"grad_norm": 2.288543701171875, |
|
"learning_rate": 3.6557296366952878e-06, |
|
"loss": 1.3779, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 4.134819532908705, |
|
"grad_norm": 2.3535075187683105, |
|
"learning_rate": 3.6124389920556445e-06, |
|
"loss": 1.3858, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 4.140127388535032, |
|
"grad_norm": 2.0725769996643066, |
|
"learning_rate": 3.5693862278918797e-06, |
|
"loss": 1.5102, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.145435244161359, |
|
"grad_norm": 2.4607093334198, |
|
"learning_rate": 3.526571823052993e-06, |
|
"loss": 1.2617, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 4.150743099787686, |
|
"grad_norm": 2.3538119792938232, |
|
"learning_rate": 3.4839962537368516e-06, |
|
"loss": 1.3718, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 4.156050955414012, |
|
"grad_norm": 2.226527214050293, |
|
"learning_rate": 3.4416599934849162e-06, |
|
"loss": 1.4393, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 4.16135881104034, |
|
"grad_norm": 2.4096546173095703, |
|
"learning_rate": 3.3995635131769428e-06, |
|
"loss": 1.4414, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 2.072619676589966, |
|
"learning_rate": 3.3577072810257766e-06, |
|
"loss": 1.4062, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 4.171974522292993, |
|
"grad_norm": 2.6206490993499756, |
|
"learning_rate": 3.3160917625721376e-06, |
|
"loss": 1.3057, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 4.177282377919321, |
|
"grad_norm": 2.2108707427978516, |
|
"learning_rate": 3.2747174206794295e-06, |
|
"loss": 1.5061, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 4.182590233545648, |
|
"grad_norm": 2.2947421073913574, |
|
"learning_rate": 3.233584715528601e-06, |
|
"loss": 1.3381, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 4.187898089171974, |
|
"grad_norm": 2.2240118980407715, |
|
"learning_rate": 3.1926941046130225e-06, |
|
"loss": 1.347, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 4.193205944798302, |
|
"grad_norm": 2.6407012939453125, |
|
"learning_rate": 3.152046042733414e-06, |
|
"loss": 1.3313, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 4.198513800424628, |
|
"grad_norm": 2.520167112350464, |
|
"learning_rate": 3.1116409819927695e-06, |
|
"loss": 1.409, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 4.203821656050955, |
|
"grad_norm": 2.168043851852417, |
|
"learning_rate": 3.071479371791322e-06, |
|
"loss": 1.3983, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 4.209129511677283, |
|
"grad_norm": 2.150791883468628, |
|
"learning_rate": 3.0315616588215635e-06, |
|
"loss": 1.376, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 4.214437367303609, |
|
"grad_norm": 2.6289355754852295, |
|
"learning_rate": 2.991888287063277e-06, |
|
"loss": 1.3597, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 4.219745222929936, |
|
"grad_norm": 2.2437193393707275, |
|
"learning_rate": 2.9524596977785867e-06, |
|
"loss": 1.3839, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 4.225053078556264, |
|
"grad_norm": 2.4536490440368652, |
|
"learning_rate": 2.913276329507042e-06, |
|
"loss": 1.403, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 4.23036093418259, |
|
"grad_norm": 2.498270034790039, |
|
"learning_rate": 2.874338618060765e-06, |
|
"loss": 1.3648, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 4.235668789808917, |
|
"grad_norm": 2.479475975036621, |
|
"learning_rate": 2.835646996519595e-06, |
|
"loss": 1.2893, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 4.240976645435244, |
|
"grad_norm": 2.214371681213379, |
|
"learning_rate": 2.7972018952262563e-06, |
|
"loss": 1.2914, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"grad_norm": 2.644470691680908, |
|
"learning_rate": 2.7590037417815824e-06, |
|
"loss": 1.4778, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.251592356687898, |
|
"grad_norm": 2.93762469291687, |
|
"learning_rate": 2.721052961039766e-06, |
|
"loss": 1.3126, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 4.256900212314225, |
|
"grad_norm": 2.3123199939727783, |
|
"learning_rate": 2.6833499751036247e-06, |
|
"loss": 1.3483, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 4.262208067940552, |
|
"grad_norm": 2.590116262435913, |
|
"learning_rate": 2.6458952033199176e-06, |
|
"loss": 1.3834, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 4.267515923566879, |
|
"grad_norm": 2.3344857692718506, |
|
"learning_rate": 2.6086890622746526e-06, |
|
"loss": 1.3848, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 4.272823779193206, |
|
"grad_norm": 2.472461223602295, |
|
"learning_rate": 2.571731965788496e-06, |
|
"loss": 1.2809, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 4.278131634819533, |
|
"grad_norm": 2.2091104984283447, |
|
"learning_rate": 2.535024324912133e-06, |
|
"loss": 1.367, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 4.2834394904458595, |
|
"grad_norm": 2.2722952365875244, |
|
"learning_rate": 2.4985665479217213e-06, |
|
"loss": 1.4383, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 4.288747346072187, |
|
"grad_norm": 2.44112491607666, |
|
"learning_rate": 2.4623590403143187e-06, |
|
"loss": 1.3626, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 4.294055201698514, |
|
"grad_norm": 2.245461940765381, |
|
"learning_rate": 2.4264022048034155e-06, |
|
"loss": 1.3627, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 4.2993630573248405, |
|
"grad_norm": 2.2949862480163574, |
|
"learning_rate": 2.3906964413144215e-06, |
|
"loss": 1.3443, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 4.304670912951168, |
|
"grad_norm": 2.4080650806427, |
|
"learning_rate": 2.3552421469802354e-06, |
|
"loss": 1.3183, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 4.309978768577495, |
|
"grad_norm": 2.6764564514160156, |
|
"learning_rate": 2.320039716136807e-06, |
|
"loss": 1.5511, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 4.3152866242038215, |
|
"grad_norm": 2.4801034927368164, |
|
"learning_rate": 2.2850895403187856e-06, |
|
"loss": 1.5182, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 4.320594479830149, |
|
"grad_norm": 2.4023561477661133, |
|
"learning_rate": 2.250392008255131e-06, |
|
"loss": 1.3622, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 4.325902335456475, |
|
"grad_norm": 2.485903739929199, |
|
"learning_rate": 2.215947505864818e-06, |
|
"loss": 1.3739, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 4.3312101910828025, |
|
"grad_norm": 2.491157293319702, |
|
"learning_rate": 2.181756416252512e-06, |
|
"loss": 1.5562, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 4.33651804670913, |
|
"grad_norm": 2.497166395187378, |
|
"learning_rate": 2.147819119704339e-06, |
|
"loss": 1.3947, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 4.341825902335456, |
|
"grad_norm": 2.6698756217956543, |
|
"learning_rate": 2.1141359936836414e-06, |
|
"loss": 1.3789, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 4.3471337579617835, |
|
"grad_norm": 2.5180952548980713, |
|
"learning_rate": 2.0807074128267876e-06, |
|
"loss": 1.422, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 4.352441613588111, |
|
"grad_norm": 2.275834321975708, |
|
"learning_rate": 2.0475337489389846e-06, |
|
"loss": 1.3095, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7323402312577434e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|