Genshin_Impact_Qwen_1_5_Chat_sharegpt_roleplay_chat_lora_small
/
checkpoint-1000
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 3.250711093051605, | |
"eval_steps": 500, | |
"global_step": 1000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.016253555465258026, | |
"grad_norm": 2.077186346054077, | |
"learning_rate": 4.9998691031433496e-05, | |
"loss": 2.8778, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.03250711093051605, | |
"grad_norm": 2.073504686355591, | |
"learning_rate": 4.999476426280588e-05, | |
"loss": 2.6432, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.048760666395774074, | |
"grad_norm": 2.1281931400299072, | |
"learning_rate": 4.998822010531848e-05, | |
"loss": 2.3167, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.0650142218610321, | |
"grad_norm": 1.090219497680664, | |
"learning_rate": 4.997905924425903e-05, | |
"loss": 2.1533, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.08126777732629012, | |
"grad_norm": 1.007896900177002, | |
"learning_rate": 4.996728263892985e-05, | |
"loss": 2.0267, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.09752133279154815, | |
"grad_norm": 0.9982665777206421, | |
"learning_rate": 4.995289152254744e-05, | |
"loss": 1.9352, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.11377488825680618, | |
"grad_norm": 0.8844298720359802, | |
"learning_rate": 4.9935887402113315e-05, | |
"loss": 1.9486, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.1300284437220642, | |
"grad_norm": 0.9337536692619324, | |
"learning_rate": 4.991627205825621e-05, | |
"loss": 1.9228, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.14628199918732224, | |
"grad_norm": 0.9377800822257996, | |
"learning_rate": 4.9894047545045605e-05, | |
"loss": 1.835, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.16253555465258024, | |
"grad_norm": 0.8525241017341614, | |
"learning_rate": 4.986921618977664e-05, | |
"loss": 1.8157, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.17878911011783827, | |
"grad_norm": 0.872871458530426, | |
"learning_rate": 4.984178059272638e-05, | |
"loss": 1.8811, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.1950426655830963, | |
"grad_norm": 0.9125804305076599, | |
"learning_rate": 4.981174362688158e-05, | |
"loss": 1.8242, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.21129622104835433, | |
"grad_norm": 0.7474733591079712, | |
"learning_rate": 4.977910843763777e-05, | |
"loss": 1.7808, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.22754977651361236, | |
"grad_norm": 1.0248199701309204, | |
"learning_rate": 4.974387844246987e-05, | |
"loss": 1.8512, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.24380333197887039, | |
"grad_norm": 0.9032835960388184, | |
"learning_rate": 4.970605733057441e-05, | |
"loss": 1.8172, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.2600568874441284, | |
"grad_norm": 0.8550340533256531, | |
"learning_rate": 4.9665649062483115e-05, | |
"loss": 1.7418, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.27631044290938644, | |
"grad_norm": 0.9429016709327698, | |
"learning_rate": 4.96226578696482e-05, | |
"loss": 1.8265, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.2925639983746445, | |
"grad_norm": 0.975885272026062, | |
"learning_rate": 4.957708825399927e-05, | |
"loss": 1.7943, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.3088175538399025, | |
"grad_norm": 0.827629804611206, | |
"learning_rate": 4.9528944987471884e-05, | |
"loss": 1.8067, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.3250711093051605, | |
"grad_norm": 1.0019093751907349, | |
"learning_rate": 4.9478233111507856e-05, | |
"loss": 1.7874, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.3413246647704185, | |
"grad_norm": 1.0811312198638916, | |
"learning_rate": 4.9424957936527295e-05, | |
"loss": 1.7395, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.35757822023567654, | |
"grad_norm": 0.9461565613746643, | |
"learning_rate": 4.936912504137257e-05, | |
"loss": 1.7833, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.37383177570093457, | |
"grad_norm": 1.0386009216308594, | |
"learning_rate": 4.9310740272724055e-05, | |
"loss": 1.7569, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.3900853311661926, | |
"grad_norm": 0.9916568994522095, | |
"learning_rate": 4.924980974448791e-05, | |
"loss": 1.7584, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.4063388866314506, | |
"grad_norm": 1.240400791168213, | |
"learning_rate": 4.918633983715581e-05, | |
"loss": 1.7438, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.42259244209670865, | |
"grad_norm": 1.1441287994384766, | |
"learning_rate": 4.912033719713687e-05, | |
"loss": 1.7046, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.4388459975619667, | |
"grad_norm": 1.1687984466552734, | |
"learning_rate": 4.905180873606157e-05, | |
"loss": 1.738, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.4550995530272247, | |
"grad_norm": 0.9126319885253906, | |
"learning_rate": 4.8980761630058014e-05, | |
"loss": 1.7738, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.47135310849248274, | |
"grad_norm": 1.172781229019165, | |
"learning_rate": 4.8907203319000495e-05, | |
"loss": 1.6642, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.48760666395774077, | |
"grad_norm": 1.1217776536941528, | |
"learning_rate": 4.883114150573037e-05, | |
"loss": 1.7359, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.5038602194229987, | |
"grad_norm": 1.1024829149246216, | |
"learning_rate": 4.8752584155249444e-05, | |
"loss": 1.6707, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.5201137748882568, | |
"grad_norm": 1.042017936706543, | |
"learning_rate": 4.8671539493885934e-05, | |
"loss": 1.709, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.5363673303535148, | |
"grad_norm": 1.1834771633148193, | |
"learning_rate": 4.8588016008432945e-05, | |
"loss": 1.7282, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.5526208858187729, | |
"grad_norm": 1.0311099290847778, | |
"learning_rate": 4.850202244525987e-05, | |
"loss": 1.774, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.5688744412840309, | |
"grad_norm": 1.2995566129684448, | |
"learning_rate": 4.8413567809396376e-05, | |
"loss": 1.714, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.585127996749289, | |
"grad_norm": 1.23491370677948, | |
"learning_rate": 4.8322661363589507e-05, | |
"loss": 1.7717, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.6013815522145469, | |
"grad_norm": 1.1809489727020264, | |
"learning_rate": 4.822931262733367e-05, | |
"loss": 1.722, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.617635107679805, | |
"grad_norm": 0.988572359085083, | |
"learning_rate": 4.813353137587377e-05, | |
"loss": 1.7007, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.633888663145063, | |
"grad_norm": 1.0803310871124268, | |
"learning_rate": 4.803532763918162e-05, | |
"loss": 1.6824, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.650142218610321, | |
"grad_norm": 1.02713143825531, | |
"learning_rate": 4.793471170090555e-05, | |
"loss": 1.6453, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.666395774075579, | |
"grad_norm": 1.2023831605911255, | |
"learning_rate": 4.783169409729362e-05, | |
"loss": 1.6974, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.682649329540837, | |
"grad_norm": 1.0009326934814453, | |
"learning_rate": 4.772628561609022e-05, | |
"loss": 1.7094, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.6989028850060951, | |
"grad_norm": 1.269020438194275, | |
"learning_rate": 4.761849729540643e-05, | |
"loss": 1.6832, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.7151564404713531, | |
"grad_norm": 1.3129128217697144, | |
"learning_rate": 4.7508340422564134e-05, | |
"loss": 1.6847, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.7314099959366112, | |
"grad_norm": 1.0837377309799194, | |
"learning_rate": 4.7395826532914054e-05, | |
"loss": 1.7215, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.7476635514018691, | |
"grad_norm": 1.1211832761764526, | |
"learning_rate": 4.728096740862778e-05, | |
"loss": 1.7658, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.7639171068671272, | |
"grad_norm": 1.1422042846679688, | |
"learning_rate": 4.716377507746397e-05, | |
"loss": 1.7098, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.7801706623323852, | |
"grad_norm": 1.2925424575805664, | |
"learning_rate": 4.704426181150884e-05, | |
"loss": 1.7504, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.7964242177976433, | |
"grad_norm": 1.282771348953247, | |
"learning_rate": 4.6922440125891064e-05, | |
"loss": 1.72, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.8126777732629012, | |
"grad_norm": 1.2494529485702515, | |
"learning_rate": 4.6798322777471216e-05, | |
"loss": 1.6581, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.8289313287281593, | |
"grad_norm": 1.2736073732376099, | |
"learning_rate": 4.667192276350591e-05, | |
"loss": 1.6825, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.8451848841934173, | |
"grad_norm": 1.1477563381195068, | |
"learning_rate": 4.654325332028676e-05, | |
"loss": 1.6889, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.8614384396586753, | |
"grad_norm": 1.2415803670883179, | |
"learning_rate": 4.641232792175428e-05, | |
"loss": 1.6586, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.8776919951239334, | |
"grad_norm": 1.3159024715423584, | |
"learning_rate": 4.6279160278086994e-05, | |
"loss": 1.7102, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.8939455505891913, | |
"grad_norm": 1.2742581367492676, | |
"learning_rate": 4.614376433426565e-05, | |
"loss": 1.7476, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.9101991060544494, | |
"grad_norm": 1.34221351146698, | |
"learning_rate": 4.6006154268613014e-05, | |
"loss": 1.7497, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.9264526615197074, | |
"grad_norm": 1.1669361591339111, | |
"learning_rate": 4.586634449130911e-05, | |
"loss": 1.6693, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.9427062169849655, | |
"grad_norm": 1.2765443325042725, | |
"learning_rate": 4.572434964288226e-05, | |
"loss": 1.5862, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.9589597724502235, | |
"grad_norm": 1.2429810762405396, | |
"learning_rate": 4.55801845926759e-05, | |
"loss": 1.6875, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.9752133279154815, | |
"grad_norm": 1.2103233337402344, | |
"learning_rate": 4.543386443729157e-05, | |
"loss": 1.7294, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.9914668833807395, | |
"grad_norm": 1.3512628078460693, | |
"learning_rate": 4.528540449900798e-05, | |
"loss": 1.709, | |
"step": 305 | |
}, | |
{ | |
"epoch": 1.0077204388459975, | |
"grad_norm": 1.2039848566055298, | |
"learning_rate": 4.513482032417656e-05, | |
"loss": 1.6632, | |
"step": 310 | |
}, | |
{ | |
"epoch": 1.0239739943112556, | |
"grad_norm": 1.3805475234985352, | |
"learning_rate": 4.498212768159341e-05, | |
"loss": 1.695, | |
"step": 315 | |
}, | |
{ | |
"epoch": 1.0402275497765137, | |
"grad_norm": 1.3265151977539062, | |
"learning_rate": 4.48273425608481e-05, | |
"loss": 1.6653, | |
"step": 320 | |
}, | |
{ | |
"epoch": 1.0564811052417715, | |
"grad_norm": 1.015641212463379, | |
"learning_rate": 4.4670481170649214e-05, | |
"loss": 1.652, | |
"step": 325 | |
}, | |
{ | |
"epoch": 1.0727346607070296, | |
"grad_norm": 1.2359689474105835, | |
"learning_rate": 4.451155993712711e-05, | |
"loss": 1.6457, | |
"step": 330 | |
}, | |
{ | |
"epoch": 1.0889882161722877, | |
"grad_norm": 1.3188092708587646, | |
"learning_rate": 4.435059550211371e-05, | |
"loss": 1.6512, | |
"step": 335 | |
}, | |
{ | |
"epoch": 1.1052417716375458, | |
"grad_norm": 1.323879361152649, | |
"learning_rate": 4.4187604721399877e-05, | |
"loss": 1.6338, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.1214953271028036, | |
"grad_norm": 1.263047218322754, | |
"learning_rate": 4.40226046629703e-05, | |
"loss": 1.6508, | |
"step": 345 | |
}, | |
{ | |
"epoch": 1.1377488825680617, | |
"grad_norm": 1.265649676322937, | |
"learning_rate": 4.3855612605216174e-05, | |
"loss": 1.6447, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.1540024380333198, | |
"grad_norm": 1.2379087209701538, | |
"learning_rate": 4.3686646035125855e-05, | |
"loss": 1.6522, | |
"step": 355 | |
}, | |
{ | |
"epoch": 1.170255993498578, | |
"grad_norm": 1.200422763824463, | |
"learning_rate": 4.351572264645366e-05, | |
"loss": 1.6935, | |
"step": 360 | |
}, | |
{ | |
"epoch": 1.1865095489638358, | |
"grad_norm": 1.3982837200164795, | |
"learning_rate": 4.334286033786704e-05, | |
"loss": 1.6792, | |
"step": 365 | |
}, | |
{ | |
"epoch": 1.2027631044290938, | |
"grad_norm": 1.3108696937561035, | |
"learning_rate": 4.3168077211072263e-05, | |
"loss": 1.6498, | |
"step": 370 | |
}, | |
{ | |
"epoch": 1.219016659894352, | |
"grad_norm": 1.3713555335998535, | |
"learning_rate": 4.2991391568918825e-05, | |
"loss": 1.6675, | |
"step": 375 | |
}, | |
{ | |
"epoch": 1.23527021535961, | |
"grad_norm": 1.406441330909729, | |
"learning_rate": 4.281282191348289e-05, | |
"loss": 1.6328, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.2515237708248679, | |
"grad_norm": 1.3209528923034668, | |
"learning_rate": 4.2632386944129707e-05, | |
"loss": 1.7064, | |
"step": 385 | |
}, | |
{ | |
"epoch": 1.267777326290126, | |
"grad_norm": 1.4870327711105347, | |
"learning_rate": 4.245010555555554e-05, | |
"loss": 1.7161, | |
"step": 390 | |
}, | |
{ | |
"epoch": 1.284030881755384, | |
"grad_norm": 1.4530566930770874, | |
"learning_rate": 4.2265996835809016e-05, | |
"loss": 1.6267, | |
"step": 395 | |
}, | |
{ | |
"epoch": 1.300284437220642, | |
"grad_norm": 1.4400850534439087, | |
"learning_rate": 4.2080080064292304e-05, | |
"loss": 1.6029, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.3165379926859, | |
"grad_norm": 1.389769434928894, | |
"learning_rate": 4.189237470974219e-05, | |
"loss": 1.6298, | |
"step": 405 | |
}, | |
{ | |
"epoch": 1.332791548151158, | |
"grad_norm": 1.2579468488693237, | |
"learning_rate": 4.1702900428191374e-05, | |
"loss": 1.6965, | |
"step": 410 | |
}, | |
{ | |
"epoch": 1.3490451036164162, | |
"grad_norm": 1.4243718385696411, | |
"learning_rate": 4.151167706091017e-05, | |
"loss": 1.5955, | |
"step": 415 | |
}, | |
{ | |
"epoch": 1.3652986590816742, | |
"grad_norm": 1.395219087600708, | |
"learning_rate": 4.131872463232872e-05, | |
"loss": 1.6369, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.3815522145469321, | |
"grad_norm": 1.4617631435394287, | |
"learning_rate": 4.1124063347940135e-05, | |
"loss": 1.6105, | |
"step": 425 | |
}, | |
{ | |
"epoch": 1.3978057700121902, | |
"grad_norm": 1.1881122589111328, | |
"learning_rate": 4.092771359218461e-05, | |
"loss": 1.6372, | |
"step": 430 | |
}, | |
{ | |
"epoch": 1.414059325477448, | |
"grad_norm": 1.330495834350586, | |
"learning_rate": 4.0729695926314815e-05, | |
"loss": 1.5817, | |
"step": 435 | |
}, | |
{ | |
"epoch": 1.4303128809427061, | |
"grad_norm": 1.3312103748321533, | |
"learning_rate": 4.053003108624276e-05, | |
"loss": 1.6602, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.4465664364079642, | |
"grad_norm": 1.3933234214782715, | |
"learning_rate": 4.03287399803684e-05, | |
"loss": 1.63, | |
"step": 445 | |
}, | |
{ | |
"epoch": 1.4628199918732223, | |
"grad_norm": 1.600183129310608, | |
"learning_rate": 4.0125843687390166e-05, | |
"loss": 1.6614, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.4790735473384804, | |
"grad_norm": 1.4344749450683594, | |
"learning_rate": 3.992136345409765e-05, | |
"loss": 1.6492, | |
"step": 455 | |
}, | |
{ | |
"epoch": 1.4953271028037383, | |
"grad_norm": 1.4546074867248535, | |
"learning_rate": 3.9715320693146655e-05, | |
"loss": 1.6063, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.5115806582689963, | |
"grad_norm": 1.2904654741287231, | |
"learning_rate": 3.9507736980817e-05, | |
"loss": 1.6618, | |
"step": 465 | |
}, | |
{ | |
"epoch": 1.5278342137342542, | |
"grad_norm": 1.8645012378692627, | |
"learning_rate": 3.9298634054753026e-05, | |
"loss": 1.697, | |
"step": 470 | |
}, | |
{ | |
"epoch": 1.5440877691995123, | |
"grad_norm": 1.4428379535675049, | |
"learning_rate": 3.908803381168732e-05, | |
"loss": 1.6741, | |
"step": 475 | |
}, | |
{ | |
"epoch": 1.5603413246647704, | |
"grad_norm": 1.4418410062789917, | |
"learning_rate": 3.887595830514775e-05, | |
"loss": 1.7026, | |
"step": 480 | |
}, | |
{ | |
"epoch": 1.5765948801300285, | |
"grad_norm": 1.3783613443374634, | |
"learning_rate": 3.8662429743148046e-05, | |
"loss": 1.6692, | |
"step": 485 | |
}, | |
{ | |
"epoch": 1.5928484355952865, | |
"grad_norm": 1.3248358964920044, | |
"learning_rate": 3.844747048586228e-05, | |
"loss": 1.6315, | |
"step": 490 | |
}, | |
{ | |
"epoch": 1.6091019910605446, | |
"grad_norm": 1.3751637935638428, | |
"learning_rate": 3.823110304328331e-05, | |
"loss": 1.6716, | |
"step": 495 | |
}, | |
{ | |
"epoch": 1.6253555465258025, | |
"grad_norm": 1.3097206354141235, | |
"learning_rate": 3.801335007286564e-05, | |
"loss": 1.5766, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.6416091019910606, | |
"grad_norm": 1.402269959449768, | |
"learning_rate": 3.779423437715274e-05, | |
"loss": 1.6405, | |
"step": 505 | |
}, | |
{ | |
"epoch": 1.6578626574563184, | |
"grad_norm": 1.6543242931365967, | |
"learning_rate": 3.757377890138927e-05, | |
"loss": 1.6307, | |
"step": 510 | |
}, | |
{ | |
"epoch": 1.6741162129215765, | |
"grad_norm": 1.4377750158309937, | |
"learning_rate": 3.7352006731118264e-05, | |
"loss": 1.588, | |
"step": 515 | |
}, | |
{ | |
"epoch": 1.6903697683868346, | |
"grad_norm": 1.578800916671753, | |
"learning_rate": 3.712894108976372e-05, | |
"loss": 1.6307, | |
"step": 520 | |
}, | |
{ | |
"epoch": 1.7066233238520927, | |
"grad_norm": 1.4415676593780518, | |
"learning_rate": 3.690460533619866e-05, | |
"loss": 1.6854, | |
"step": 525 | |
}, | |
{ | |
"epoch": 1.7228768793173508, | |
"grad_norm": 1.5168665647506714, | |
"learning_rate": 3.667902296229905e-05, | |
"loss": 1.599, | |
"step": 530 | |
}, | |
{ | |
"epoch": 1.7391304347826086, | |
"grad_norm": 1.3709524869918823, | |
"learning_rate": 3.645221759048384e-05, | |
"loss": 1.6427, | |
"step": 535 | |
}, | |
{ | |
"epoch": 1.7553839902478667, | |
"grad_norm": 1.4154709577560425, | |
"learning_rate": 3.622421297124122e-05, | |
"loss": 1.5486, | |
"step": 540 | |
}, | |
{ | |
"epoch": 1.7716375457131246, | |
"grad_norm": 1.5655745267868042, | |
"learning_rate": 3.599503298064154e-05, | |
"loss": 1.6065, | |
"step": 545 | |
}, | |
{ | |
"epoch": 1.7878911011783827, | |
"grad_norm": 1.5691167116165161, | |
"learning_rate": 3.576470161783712e-05, | |
"loss": 1.6194, | |
"step": 550 | |
}, | |
{ | |
"epoch": 1.8041446566436408, | |
"grad_norm": 1.5229332447052002, | |
"learning_rate": 3.5533243002549046e-05, | |
"loss": 1.6312, | |
"step": 555 | |
}, | |
{ | |
"epoch": 1.8203982121088988, | |
"grad_norm": 2.0803072452545166, | |
"learning_rate": 3.5300681372541476e-05, | |
"loss": 1.5872, | |
"step": 560 | |
}, | |
{ | |
"epoch": 1.836651767574157, | |
"grad_norm": 1.4445246458053589, | |
"learning_rate": 3.5067041081083496e-05, | |
"loss": 1.6226, | |
"step": 565 | |
}, | |
{ | |
"epoch": 1.852905323039415, | |
"grad_norm": 1.559645652770996, | |
"learning_rate": 3.483234659439889e-05, | |
"loss": 1.5703, | |
"step": 570 | |
}, | |
{ | |
"epoch": 1.8691588785046729, | |
"grad_norm": 1.3045098781585693, | |
"learning_rate": 3.459662248910411e-05, | |
"loss": 1.5574, | |
"step": 575 | |
}, | |
{ | |
"epoch": 1.885412433969931, | |
"grad_norm": 1.410343050956726, | |
"learning_rate": 3.435989344963471e-05, | |
"loss": 1.6836, | |
"step": 580 | |
}, | |
{ | |
"epoch": 1.9016659894351888, | |
"grad_norm": 1.5801265239715576, | |
"learning_rate": 3.41221842656604e-05, | |
"loss": 1.6366, | |
"step": 585 | |
}, | |
{ | |
"epoch": 1.917919544900447, | |
"grad_norm": 1.440772533416748, | |
"learning_rate": 3.3883519829489154e-05, | |
"loss": 1.6395, | |
"step": 590 | |
}, | |
{ | |
"epoch": 1.934173100365705, | |
"grad_norm": 1.3845847845077515, | |
"learning_rate": 3.3643925133460564e-05, | |
"loss": 1.6139, | |
"step": 595 | |
}, | |
{ | |
"epoch": 1.950426655830963, | |
"grad_norm": 1.3879539966583252, | |
"learning_rate": 3.3403425267328715e-05, | |
"loss": 1.5887, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.9666802112962212, | |
"grad_norm": 1.3762508630752563, | |
"learning_rate": 3.316204541563479e-05, | |
"loss": 1.636, | |
"step": 605 | |
}, | |
{ | |
"epoch": 1.982933766761479, | |
"grad_norm": 1.6764074563980103, | |
"learning_rate": 3.2919810855069864e-05, | |
"loss": 1.6699, | |
"step": 610 | |
}, | |
{ | |
"epoch": 1.9991873222267371, | |
"grad_norm": 1.5237112045288086, | |
"learning_rate": 3.267674695182798e-05, | |
"loss": 1.6194, | |
"step": 615 | |
}, | |
{ | |
"epoch": 2.015440877691995, | |
"grad_norm": 1.7229068279266357, | |
"learning_rate": 3.243287915894987e-05, | |
"loss": 1.6077, | |
"step": 620 | |
}, | |
{ | |
"epoch": 2.031694433157253, | |
"grad_norm": 1.5503779649734497, | |
"learning_rate": 3.218823301365754e-05, | |
"loss": 1.6024, | |
"step": 625 | |
}, | |
{ | |
"epoch": 2.047947988622511, | |
"grad_norm": 1.584635853767395, | |
"learning_rate": 3.1942834134680124e-05, | |
"loss": 1.5911, | |
"step": 630 | |
}, | |
{ | |
"epoch": 2.0642015440877692, | |
"grad_norm": 1.4763562679290771, | |
"learning_rate": 3.1696708219571126e-05, | |
"loss": 1.6143, | |
"step": 635 | |
}, | |
{ | |
"epoch": 2.0804550995530273, | |
"grad_norm": 1.5131165981292725, | |
"learning_rate": 3.144988104201745e-05, | |
"loss": 1.6069, | |
"step": 640 | |
}, | |
{ | |
"epoch": 2.0967086550182854, | |
"grad_norm": 1.595162272453308, | |
"learning_rate": 3.120237844914044e-05, | |
"loss": 1.6128, | |
"step": 645 | |
}, | |
{ | |
"epoch": 2.112962210483543, | |
"grad_norm": 1.9360445737838745, | |
"learning_rate": 3.095422635878923e-05, | |
"loss": 1.6004, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.129215765948801, | |
"grad_norm": 1.4424818754196167, | |
"learning_rate": 3.0705450756826706e-05, | |
"loss": 1.6084, | |
"step": 655 | |
}, | |
{ | |
"epoch": 2.145469321414059, | |
"grad_norm": 1.6683636903762817, | |
"learning_rate": 3.045607769440829e-05, | |
"loss": 1.6063, | |
"step": 660 | |
}, | |
{ | |
"epoch": 2.1617228768793173, | |
"grad_norm": 1.9396454095840454, | |
"learning_rate": 3.0206133285254017e-05, | |
"loss": 1.6374, | |
"step": 665 | |
}, | |
{ | |
"epoch": 2.1779764323445754, | |
"grad_norm": 1.9183346033096313, | |
"learning_rate": 2.9955643702913865e-05, | |
"loss": 1.529, | |
"step": 670 | |
}, | |
{ | |
"epoch": 2.1942299878098335, | |
"grad_norm": 1.61932373046875, | |
"learning_rate": 2.9704635178027012e-05, | |
"loss": 1.5607, | |
"step": 675 | |
}, | |
{ | |
"epoch": 2.2104835432750916, | |
"grad_norm": 1.7259231805801392, | |
"learning_rate": 2.9453133995574955e-05, | |
"loss": 1.5854, | |
"step": 680 | |
}, | |
{ | |
"epoch": 2.2267370987403496, | |
"grad_norm": 1.675529956817627, | |
"learning_rate": 2.9201166492129088e-05, | |
"loss": 1.5563, | |
"step": 685 | |
}, | |
{ | |
"epoch": 2.2429906542056073, | |
"grad_norm": 1.628509283065796, | |
"learning_rate": 2.8948759053092754e-05, | |
"loss": 1.5781, | |
"step": 690 | |
}, | |
{ | |
"epoch": 2.2592442096708654, | |
"grad_norm": 1.5093809366226196, | |
"learning_rate": 2.869593810993824e-05, | |
"loss": 1.5759, | |
"step": 695 | |
}, | |
{ | |
"epoch": 2.2754977651361235, | |
"grad_norm": 1.8209433555603027, | |
"learning_rate": 2.844273013743896e-05, | |
"loss": 1.6089, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.2917513206013815, | |
"grad_norm": 2.2926626205444336, | |
"learning_rate": 2.8189161650897045e-05, | |
"loss": 1.578, | |
"step": 705 | |
}, | |
{ | |
"epoch": 2.3080048760666396, | |
"grad_norm": 1.6229182481765747, | |
"learning_rate": 2.7935259203366777e-05, | |
"loss": 1.5563, | |
"step": 710 | |
}, | |
{ | |
"epoch": 2.3242584315318977, | |
"grad_norm": 1.8527780771255493, | |
"learning_rate": 2.7681049382873965e-05, | |
"loss": 1.5463, | |
"step": 715 | |
}, | |
{ | |
"epoch": 2.340511986997156, | |
"grad_norm": 1.877462387084961, | |
"learning_rate": 2.7426558809631748e-05, | |
"loss": 1.5227, | |
"step": 720 | |
}, | |
{ | |
"epoch": 2.356765542462414, | |
"grad_norm": 1.5895532369613647, | |
"learning_rate": 2.7171814133253015e-05, | |
"loss": 1.6244, | |
"step": 725 | |
}, | |
{ | |
"epoch": 2.3730190979276715, | |
"grad_norm": 1.5113598108291626, | |
"learning_rate": 2.691684202995966e-05, | |
"loss": 1.5977, | |
"step": 730 | |
}, | |
{ | |
"epoch": 2.3892726533929296, | |
"grad_norm": 1.582775592803955, | |
"learning_rate": 2.6661669199789174e-05, | |
"loss": 1.6314, | |
"step": 735 | |
}, | |
{ | |
"epoch": 2.4055262088581877, | |
"grad_norm": 1.6638239622116089, | |
"learning_rate": 2.6406322363798657e-05, | |
"loss": 1.5892, | |
"step": 740 | |
}, | |
{ | |
"epoch": 2.4217797643234458, | |
"grad_norm": 1.538442850112915, | |
"learning_rate": 2.6150828261266642e-05, | |
"loss": 1.5512, | |
"step": 745 | |
}, | |
{ | |
"epoch": 2.438033319788704, | |
"grad_norm": 1.6044487953186035, | |
"learning_rate": 2.589521364689308e-05, | |
"loss": 1.5288, | |
"step": 750 | |
}, | |
{ | |
"epoch": 2.454286875253962, | |
"grad_norm": 1.8566341400146484, | |
"learning_rate": 2.5639505287997583e-05, | |
"loss": 1.6311, | |
"step": 755 | |
}, | |
{ | |
"epoch": 2.47054043071922, | |
"grad_norm": 1.7828178405761719, | |
"learning_rate": 2.5383729961716483e-05, | |
"loss": 1.5705, | |
"step": 760 | |
}, | |
{ | |
"epoch": 2.4867939861844777, | |
"grad_norm": 1.8720645904541016, | |
"learning_rate": 2.512791445219876e-05, | |
"loss": 1.5955, | |
"step": 765 | |
}, | |
{ | |
"epoch": 2.5030475416497358, | |
"grad_norm": 1.5776010751724243, | |
"learning_rate": 2.487208554780125e-05, | |
"loss": 1.6158, | |
"step": 770 | |
}, | |
{ | |
"epoch": 2.519301097114994, | |
"grad_norm": 1.802860140800476, | |
"learning_rate": 2.461627003828352e-05, | |
"loss": 1.5844, | |
"step": 775 | |
}, | |
{ | |
"epoch": 2.535554652580252, | |
"grad_norm": 1.8349519968032837, | |
"learning_rate": 2.4360494712002423e-05, | |
"loss": 1.5486, | |
"step": 780 | |
}, | |
{ | |
"epoch": 2.55180820804551, | |
"grad_norm": 1.77409029006958, | |
"learning_rate": 2.4104786353106926e-05, | |
"loss": 1.5918, | |
"step": 785 | |
}, | |
{ | |
"epoch": 2.568061763510768, | |
"grad_norm": 1.7446224689483643, | |
"learning_rate": 2.384917173873336e-05, | |
"loss": 1.5554, | |
"step": 790 | |
}, | |
{ | |
"epoch": 2.584315318976026, | |
"grad_norm": 1.7830730676651, | |
"learning_rate": 2.359367763620135e-05, | |
"loss": 1.6061, | |
"step": 795 | |
}, | |
{ | |
"epoch": 2.600568874441284, | |
"grad_norm": 1.542494773864746, | |
"learning_rate": 2.3338330800210828e-05, | |
"loss": 1.6328, | |
"step": 800 | |
}, | |
{ | |
"epoch": 2.616822429906542, | |
"grad_norm": 1.7287977933883667, | |
"learning_rate": 2.3083157970040344e-05, | |
"loss": 1.5585, | |
"step": 805 | |
}, | |
{ | |
"epoch": 2.6330759853718, | |
"grad_norm": 1.6727949380874634, | |
"learning_rate": 2.282818586674699e-05, | |
"loss": 1.6686, | |
"step": 810 | |
}, | |
{ | |
"epoch": 2.649329540837058, | |
"grad_norm": 1.8130536079406738, | |
"learning_rate": 2.2573441190368255e-05, | |
"loss": 1.606, | |
"step": 815 | |
}, | |
{ | |
"epoch": 2.665583096302316, | |
"grad_norm": 1.8258434534072876, | |
"learning_rate": 2.2318950617126044e-05, | |
"loss": 1.5736, | |
"step": 820 | |
}, | |
{ | |
"epoch": 2.6818366517675742, | |
"grad_norm": 1.8970004320144653, | |
"learning_rate": 2.206474079663323e-05, | |
"loss": 1.5835, | |
"step": 825 | |
}, | |
{ | |
"epoch": 2.6980902072328323, | |
"grad_norm": 1.6587392091751099, | |
"learning_rate": 2.181083834910296e-05, | |
"loss": 1.6072, | |
"step": 830 | |
}, | |
{ | |
"epoch": 2.71434376269809, | |
"grad_norm": 2.0163655281066895, | |
"learning_rate": 2.1557269862561043e-05, | |
"loss": 1.6178, | |
"step": 835 | |
}, | |
{ | |
"epoch": 2.7305973181633485, | |
"grad_norm": 1.669689416885376, | |
"learning_rate": 2.1304061890061762e-05, | |
"loss": 1.5864, | |
"step": 840 | |
}, | |
{ | |
"epoch": 2.746850873628606, | |
"grad_norm": 1.7801822423934937, | |
"learning_rate": 2.105124094690725e-05, | |
"loss": 1.5186, | |
"step": 845 | |
}, | |
{ | |
"epoch": 2.7631044290938642, | |
"grad_norm": 1.7704806327819824, | |
"learning_rate": 2.0798833507870918e-05, | |
"loss": 1.5555, | |
"step": 850 | |
}, | |
{ | |
"epoch": 2.7793579845591223, | |
"grad_norm": 1.954834222793579, | |
"learning_rate": 2.0546866004425054e-05, | |
"loss": 1.6356, | |
"step": 855 | |
}, | |
{ | |
"epoch": 2.7956115400243804, | |
"grad_norm": 1.6739940643310547, | |
"learning_rate": 2.0295364821972997e-05, | |
"loss": 1.5781, | |
"step": 860 | |
}, | |
{ | |
"epoch": 2.8118650954896385, | |
"grad_norm": 1.758725881576538, | |
"learning_rate": 2.0044356297086134e-05, | |
"loss": 1.6308, | |
"step": 865 | |
}, | |
{ | |
"epoch": 2.828118650954896, | |
"grad_norm": 1.7557764053344727, | |
"learning_rate": 1.9793866714745978e-05, | |
"loss": 1.5366, | |
"step": 870 | |
}, | |
{ | |
"epoch": 2.8443722064201546, | |
"grad_norm": 1.6240283250808716, | |
"learning_rate": 1.9543922305591704e-05, | |
"loss": 1.6104, | |
"step": 875 | |
}, | |
{ | |
"epoch": 2.8606257618854123, | |
"grad_norm": 1.7991812229156494, | |
"learning_rate": 1.9294549243173303e-05, | |
"loss": 1.6576, | |
"step": 880 | |
}, | |
{ | |
"epoch": 2.8768793173506704, | |
"grad_norm": 1.8438798189163208, | |
"learning_rate": 1.904577364121077e-05, | |
"loss": 1.5634, | |
"step": 885 | |
}, | |
{ | |
"epoch": 2.8931328728159285, | |
"grad_norm": 2.0317084789276123, | |
"learning_rate": 1.879762155085956e-05, | |
"loss": 1.5538, | |
"step": 890 | |
}, | |
{ | |
"epoch": 2.9093864282811865, | |
"grad_norm": 1.6778262853622437, | |
"learning_rate": 1.8550118957982547e-05, | |
"loss": 1.6398, | |
"step": 895 | |
}, | |
{ | |
"epoch": 2.9256399837464446, | |
"grad_norm": 1.8134307861328125, | |
"learning_rate": 1.8303291780428876e-05, | |
"loss": 1.4889, | |
"step": 900 | |
}, | |
{ | |
"epoch": 2.9418935392117027, | |
"grad_norm": 1.9006396532058716, | |
"learning_rate": 1.8057165865319882e-05, | |
"loss": 1.582, | |
"step": 905 | |
}, | |
{ | |
"epoch": 2.958147094676961, | |
"grad_norm": 1.8433765172958374, | |
"learning_rate": 1.781176698634246e-05, | |
"loss": 1.5856, | |
"step": 910 | |
}, | |
{ | |
"epoch": 2.9744006501422184, | |
"grad_norm": 1.6938343048095703, | |
"learning_rate": 1.7567120841050135e-05, | |
"loss": 1.599, | |
"step": 915 | |
}, | |
{ | |
"epoch": 2.9906542056074765, | |
"grad_norm": 1.868025302886963, | |
"learning_rate": 1.7323253048172013e-05, | |
"loss": 1.5799, | |
"step": 920 | |
}, | |
{ | |
"epoch": 3.0069077610727346, | |
"grad_norm": 1.767774224281311, | |
"learning_rate": 1.7080189144930135e-05, | |
"loss": 1.6014, | |
"step": 925 | |
}, | |
{ | |
"epoch": 3.0231613165379927, | |
"grad_norm": 1.7357462644577026, | |
"learning_rate": 1.6837954584365216e-05, | |
"loss": 1.5911, | |
"step": 930 | |
}, | |
{ | |
"epoch": 3.039414872003251, | |
"grad_norm": 1.7004544734954834, | |
"learning_rate": 1.6596574732671287e-05, | |
"loss": 1.5304, | |
"step": 935 | |
}, | |
{ | |
"epoch": 3.055668427468509, | |
"grad_norm": 1.7053595781326294, | |
"learning_rate": 1.6356074866539435e-05, | |
"loss": 1.548, | |
"step": 940 | |
}, | |
{ | |
"epoch": 3.071921982933767, | |
"grad_norm": 1.7751458883285522, | |
"learning_rate": 1.6116480170510852e-05, | |
"loss": 1.6273, | |
"step": 945 | |
}, | |
{ | |
"epoch": 3.0881755383990246, | |
"grad_norm": 1.8386896848678589, | |
"learning_rate": 1.5877815734339608e-05, | |
"loss": 1.5394, | |
"step": 950 | |
}, | |
{ | |
"epoch": 3.1044290938642827, | |
"grad_norm": 1.6355414390563965, | |
"learning_rate": 1.5640106550365298e-05, | |
"loss": 1.5259, | |
"step": 955 | |
}, | |
{ | |
"epoch": 3.1206826493295408, | |
"grad_norm": 1.7519521713256836, | |
"learning_rate": 1.54033775108959e-05, | |
"loss": 1.6105, | |
"step": 960 | |
}, | |
{ | |
"epoch": 3.136936204794799, | |
"grad_norm": 1.690624713897705, | |
"learning_rate": 1.5167653405601124e-05, | |
"loss": 1.5488, | |
"step": 965 | |
}, | |
{ | |
"epoch": 3.153189760260057, | |
"grad_norm": 1.7758216857910156, | |
"learning_rate": 1.4932958918916513e-05, | |
"loss": 1.5906, | |
"step": 970 | |
}, | |
{ | |
"epoch": 3.169443315725315, | |
"grad_norm": 1.7740142345428467, | |
"learning_rate": 1.469931862745853e-05, | |
"loss": 1.5361, | |
"step": 975 | |
}, | |
{ | |
"epoch": 3.185696871190573, | |
"grad_norm": 1.8564815521240234, | |
"learning_rate": 1.446675699745097e-05, | |
"loss": 1.5407, | |
"step": 980 | |
}, | |
{ | |
"epoch": 3.2019504266558307, | |
"grad_norm": 1.829458236694336, | |
"learning_rate": 1.4235298382162899e-05, | |
"loss": 1.5962, | |
"step": 985 | |
}, | |
{ | |
"epoch": 3.218203982121089, | |
"grad_norm": 1.9702229499816895, | |
"learning_rate": 1.4004967019358469e-05, | |
"loss": 1.5905, | |
"step": 990 | |
}, | |
{ | |
"epoch": 3.234457537586347, | |
"grad_norm": 1.8121120929718018, | |
"learning_rate": 1.3775787028758799e-05, | |
"loss": 1.5726, | |
"step": 995 | |
}, | |
{ | |
"epoch": 3.250711093051605, | |
"grad_norm": 2.0297420024871826, | |
"learning_rate": 1.354778240951617e-05, | |
"loss": 1.5484, | |
"step": 1000 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 1535, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 5, | |
"save_steps": 100, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 5.16003151926657e+17, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |