|
{ |
|
"best_metric": 0.6174443364143372, |
|
"best_model_checkpoint": "ckpt/llama2_13b_other/fuze_28_balance_no_sys/checkpoint-12000", |
|
"epoch": 2.0, |
|
"eval_steps": 3000, |
|
"global_step": 13776, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014518002322880372, |
|
"grad_norm": 0.5469616055488586, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.7374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0029036004645760743, |
|
"grad_norm": 3.591161012649536, |
|
"learning_rate": 5e-05, |
|
"loss": 2.639, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004355400696864111, |
|
"grad_norm": 4.15903902053833, |
|
"learning_rate": 4.9999934803356854e-05, |
|
"loss": 1.9973, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005807200929152149, |
|
"grad_norm": 0.7227652072906494, |
|
"learning_rate": 4.999973921376744e-05, |
|
"loss": 1.0062, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007259001161440186, |
|
"grad_norm": 2.8751795291900635, |
|
"learning_rate": 4.9999413232251924e-05, |
|
"loss": 0.8956, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008710801393728223, |
|
"grad_norm": 1.205117106437683, |
|
"learning_rate": 4.9998956860510515e-05, |
|
"loss": 0.6662, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01016260162601626, |
|
"grad_norm": 2.018847703933716, |
|
"learning_rate": 4.9998370100923546e-05, |
|
"loss": 0.843, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.011614401858304297, |
|
"grad_norm": 1.5158370733261108, |
|
"learning_rate": 4.9997652956551386e-05, |
|
"loss": 0.6177, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.013066202090592335, |
|
"grad_norm": 2.3284151554107666, |
|
"learning_rate": 4.999680543113447e-05, |
|
"loss": 0.9499, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014518002322880372, |
|
"grad_norm": 1.6479175090789795, |
|
"learning_rate": 4.999582752909326e-05, |
|
"loss": 0.94, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01596980255516841, |
|
"grad_norm": 2.4836409091949463, |
|
"learning_rate": 4.999471925552824e-05, |
|
"loss": 1.0131, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.017421602787456445, |
|
"grad_norm": 0.8880699872970581, |
|
"learning_rate": 4.999348061621986e-05, |
|
"loss": 0.7342, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.018873403019744484, |
|
"grad_norm": 1.1543691158294678, |
|
"learning_rate": 4.999211161762852e-05, |
|
"loss": 0.9561, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02032520325203252, |
|
"grad_norm": 1.358257532119751, |
|
"learning_rate": 4.9990612266894574e-05, |
|
"loss": 0.8214, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02177700348432056, |
|
"grad_norm": 1.9943785667419434, |
|
"learning_rate": 4.9988982571838214e-05, |
|
"loss": 0.8319, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023228803716608595, |
|
"grad_norm": 6.3814520835876465, |
|
"learning_rate": 4.998722254095949e-05, |
|
"loss": 0.9133, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02468060394889663, |
|
"grad_norm": 5.650225639343262, |
|
"learning_rate": 4.998533218343826e-05, |
|
"loss": 0.8295, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02613240418118467, |
|
"grad_norm": 0.8774411082267761, |
|
"learning_rate": 4.998331150913412e-05, |
|
"loss": 0.7959, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.027584204413472705, |
|
"grad_norm": 1.1295948028564453, |
|
"learning_rate": 4.998116052858636e-05, |
|
"loss": 0.6883, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.029036004645760744, |
|
"grad_norm": 1.1888564825057983, |
|
"learning_rate": 4.9978879253013925e-05, |
|
"loss": 0.9037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03048780487804878, |
|
"grad_norm": 2.754950761795044, |
|
"learning_rate": 4.997646769431532e-05, |
|
"loss": 0.9584, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03193960511033682, |
|
"grad_norm": 0.6559054851531982, |
|
"learning_rate": 4.9973925865068604e-05, |
|
"loss": 0.9589, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03339140534262486, |
|
"grad_norm": 0.49671778082847595, |
|
"learning_rate": 4.997125377853127e-05, |
|
"loss": 0.6646, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03484320557491289, |
|
"grad_norm": 1.9896801710128784, |
|
"learning_rate": 4.996845144864021e-05, |
|
"loss": 0.9931, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03629500580720093, |
|
"grad_norm": 5.797499179840088, |
|
"learning_rate": 4.9965518890011606e-05, |
|
"loss": 0.6066, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03774680603948897, |
|
"grad_norm": 1.675417423248291, |
|
"learning_rate": 4.996245611794091e-05, |
|
"loss": 0.8694, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.039198606271777, |
|
"grad_norm": 1.7982059717178345, |
|
"learning_rate": 4.9959263148402713e-05, |
|
"loss": 0.699, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04065040650406504, |
|
"grad_norm": 2.3844377994537354, |
|
"learning_rate": 4.9955939998050696e-05, |
|
"loss": 0.9412, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04210220673635308, |
|
"grad_norm": 1.0922398567199707, |
|
"learning_rate": 4.9952486684217516e-05, |
|
"loss": 0.7914, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04355400696864112, |
|
"grad_norm": 0.8432398438453674, |
|
"learning_rate": 4.994890322491472e-05, |
|
"loss": 0.8929, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04500580720092915, |
|
"grad_norm": 0.7825923562049866, |
|
"learning_rate": 4.9945189638832676e-05, |
|
"loss": 0.9772, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04645760743321719, |
|
"grad_norm": 3.1025567054748535, |
|
"learning_rate": 4.994134594534046e-05, |
|
"loss": 0.9957, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04790940766550523, |
|
"grad_norm": 0.7623017430305481, |
|
"learning_rate": 4.993737216448573e-05, |
|
"loss": 0.5879, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04936120789779326, |
|
"grad_norm": 0.5946126580238342, |
|
"learning_rate": 4.9933268316994665e-05, |
|
"loss": 0.7163, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0508130081300813, |
|
"grad_norm": 2.404294490814209, |
|
"learning_rate": 4.992903442427184e-05, |
|
"loss": 0.7242, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05226480836236934, |
|
"grad_norm": 2.895533323287964, |
|
"learning_rate": 4.9924670508400096e-05, |
|
"loss": 0.7379, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.05371660859465738, |
|
"grad_norm": 1.498542070388794, |
|
"learning_rate": 4.992017659214044e-05, |
|
"loss": 0.8413, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.05516840882694541, |
|
"grad_norm": 2.449810743331909, |
|
"learning_rate": 4.991555269893194e-05, |
|
"loss": 0.7442, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.05662020905923345, |
|
"grad_norm": 0.6948937773704529, |
|
"learning_rate": 4.991079885289159e-05, |
|
"loss": 0.7314, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"grad_norm": 1.1707820892333984, |
|
"learning_rate": 4.990591507881416e-05, |
|
"loss": 0.6596, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05952380952380952, |
|
"grad_norm": 2.644362211227417, |
|
"learning_rate": 4.99009014021721e-05, |
|
"loss": 0.6841, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06097560975609756, |
|
"grad_norm": 4.067262649536133, |
|
"learning_rate": 4.9895757849115415e-05, |
|
"loss": 0.9483, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0624274099883856, |
|
"grad_norm": 0.9770334959030151, |
|
"learning_rate": 4.989048444647149e-05, |
|
"loss": 0.738, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.06387921022067364, |
|
"grad_norm": 0.6074944734573364, |
|
"learning_rate": 4.988508122174498e-05, |
|
"loss": 0.8884, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.06533101045296168, |
|
"grad_norm": 1.6675050258636475, |
|
"learning_rate": 4.9879548203117654e-05, |
|
"loss": 0.713, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.06678281068524972, |
|
"grad_norm": 2.338547945022583, |
|
"learning_rate": 4.987388541944824e-05, |
|
"loss": 1.0344, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.06823461091753774, |
|
"grad_norm": 2.637160301208496, |
|
"learning_rate": 4.986809290027231e-05, |
|
"loss": 0.6869, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.06968641114982578, |
|
"grad_norm": 1.666285753250122, |
|
"learning_rate": 4.986217067580209e-05, |
|
"loss": 0.7925, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.07113821138211382, |
|
"grad_norm": 3.2277042865753174, |
|
"learning_rate": 4.98561187769263e-05, |
|
"loss": 0.4641, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.07259001161440186, |
|
"grad_norm": 2.3560967445373535, |
|
"learning_rate": 4.984993723521003e-05, |
|
"loss": 0.5966, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0740418118466899, |
|
"grad_norm": 1.709749698638916, |
|
"learning_rate": 4.984362608289454e-05, |
|
"loss": 0.672, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.07549361207897794, |
|
"grad_norm": 0.9946199655532837, |
|
"learning_rate": 4.98371853528971e-05, |
|
"loss": 0.7569, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.07694541231126598, |
|
"grad_norm": 0.839363694190979, |
|
"learning_rate": 4.983061507881083e-05, |
|
"loss": 0.6376, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.078397212543554, |
|
"grad_norm": 1.063310146331787, |
|
"learning_rate": 4.982391529490452e-05, |
|
"loss": 0.9329, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.07984901277584204, |
|
"grad_norm": 1.1951221227645874, |
|
"learning_rate": 4.981708603612244e-05, |
|
"loss": 0.7935, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 1.3913841247558594, |
|
"learning_rate": 4.981012733808417e-05, |
|
"loss": 0.7963, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.08275261324041812, |
|
"grad_norm": 1.2319680452346802, |
|
"learning_rate": 4.980303923708441e-05, |
|
"loss": 0.8177, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.08420441347270616, |
|
"grad_norm": 1.925199031829834, |
|
"learning_rate": 4.979582177009279e-05, |
|
"loss": 0.7387, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0856562137049942, |
|
"grad_norm": 1.3763043880462646, |
|
"learning_rate": 4.9788474974753686e-05, |
|
"loss": 0.6866, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.08710801393728224, |
|
"grad_norm": 1.1121422052383423, |
|
"learning_rate": 4.9780998889386e-05, |
|
"loss": 1.0793, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08855981416957026, |
|
"grad_norm": 4.042593479156494, |
|
"learning_rate": 4.9773393552982994e-05, |
|
"loss": 0.7474, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.0900116144018583, |
|
"grad_norm": 1.7447359561920166, |
|
"learning_rate": 4.976565900521205e-05, |
|
"loss": 0.6573, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.09146341463414634, |
|
"grad_norm": 2.484264373779297, |
|
"learning_rate": 4.975779528641451e-05, |
|
"loss": 0.7327, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.09291521486643438, |
|
"grad_norm": 1.6296310424804688, |
|
"learning_rate": 4.97498024376054e-05, |
|
"loss": 1.0403, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.09436701509872242, |
|
"grad_norm": 0.7024840712547302, |
|
"learning_rate": 4.9741680500473276e-05, |
|
"loss": 0.8121, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.09581881533101046, |
|
"grad_norm": 5.444680690765381, |
|
"learning_rate": 4.973342951737999e-05, |
|
"loss": 0.6586, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0972706155632985, |
|
"grad_norm": 3.954261302947998, |
|
"learning_rate": 4.9725049531360454e-05, |
|
"loss": 1.0836, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.09872241579558652, |
|
"grad_norm": 0.8371906876564026, |
|
"learning_rate": 4.9716540586122425e-05, |
|
"loss": 0.7811, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.10017421602787456, |
|
"grad_norm": 1.8764899969100952, |
|
"learning_rate": 4.970790272604626e-05, |
|
"loss": 0.5407, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.1016260162601626, |
|
"grad_norm": 1.2408713102340698, |
|
"learning_rate": 4.9699135996184745e-05, |
|
"loss": 0.751, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10307781649245064, |
|
"grad_norm": 1.968125343322754, |
|
"learning_rate": 4.969024044226276e-05, |
|
"loss": 0.7491, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.10452961672473868, |
|
"grad_norm": 2.832357168197632, |
|
"learning_rate": 4.9681216110677145e-05, |
|
"loss": 0.8841, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.10598141695702672, |
|
"grad_norm": 1.2367392778396606, |
|
"learning_rate": 4.9672063048496384e-05, |
|
"loss": 0.6756, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.10743321718931476, |
|
"grad_norm": 1.3740241527557373, |
|
"learning_rate": 4.9662781303460385e-05, |
|
"loss": 0.62, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.10888501742160278, |
|
"grad_norm": 1.3408523797988892, |
|
"learning_rate": 4.9653370923980245e-05, |
|
"loss": 1.1724, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.11033681765389082, |
|
"grad_norm": 0.5014351010322571, |
|
"learning_rate": 4.964383195913798e-05, |
|
"loss": 0.6388, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.11178861788617886, |
|
"grad_norm": 4.638858318328857, |
|
"learning_rate": 4.963416445868626e-05, |
|
"loss": 0.9585, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1132404181184669, |
|
"grad_norm": 1.0444010496139526, |
|
"learning_rate": 4.962436847304818e-05, |
|
"loss": 0.8725, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.11469221835075494, |
|
"grad_norm": 0.930262565612793, |
|
"learning_rate": 4.9614444053316954e-05, |
|
"loss": 0.6572, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"grad_norm": 2.152299642562866, |
|
"learning_rate": 4.960439125125571e-05, |
|
"loss": 0.9154, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.11759581881533102, |
|
"grad_norm": 0.5455219745635986, |
|
"learning_rate": 4.959421011929716e-05, |
|
"loss": 0.6822, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 2.061328172683716, |
|
"learning_rate": 4.9583900710543344e-05, |
|
"loss": 0.6367, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.12049941927990708, |
|
"grad_norm": 0.8445961475372314, |
|
"learning_rate": 4.957346307876537e-05, |
|
"loss": 0.7663, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 1.552341341972351, |
|
"learning_rate": 4.956289727840313e-05, |
|
"loss": 0.8241, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.12340301974448316, |
|
"grad_norm": 1.2934677600860596, |
|
"learning_rate": 4.9552203364565e-05, |
|
"loss": 0.8214, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1248548199767712, |
|
"grad_norm": 1.5896968841552734, |
|
"learning_rate": 4.9541381393027564e-05, |
|
"loss": 0.8306, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.12630662020905922, |
|
"grad_norm": 1.5255497694015503, |
|
"learning_rate": 4.953043142023531e-05, |
|
"loss": 1.0784, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.12775842044134728, |
|
"grad_norm": 1.1418253183364868, |
|
"learning_rate": 4.951935350330037e-05, |
|
"loss": 0.7049, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1292102206736353, |
|
"grad_norm": 1.0762348175048828, |
|
"learning_rate": 4.950814770000217e-05, |
|
"loss": 0.7348, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.13066202090592335, |
|
"grad_norm": 2.7543962001800537, |
|
"learning_rate": 4.949681406878718e-05, |
|
"loss": 0.5945, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.13211382113821138, |
|
"grad_norm": 0.9333184361457825, |
|
"learning_rate": 4.948535266876857e-05, |
|
"loss": 0.5863, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.13356562137049943, |
|
"grad_norm": 0.7856501340866089, |
|
"learning_rate": 4.947376355972593e-05, |
|
"loss": 0.8137, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.13501742160278746, |
|
"grad_norm": 2.296924352645874, |
|
"learning_rate": 4.9462046802104945e-05, |
|
"loss": 0.6391, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.13646922183507548, |
|
"grad_norm": 1.829867959022522, |
|
"learning_rate": 4.9450202457017055e-05, |
|
"loss": 0.763, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.13792102206736354, |
|
"grad_norm": 0.624662458896637, |
|
"learning_rate": 4.9438230586239207e-05, |
|
"loss": 0.789, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.13937282229965156, |
|
"grad_norm": 1.0507394075393677, |
|
"learning_rate": 4.942613125221346e-05, |
|
"loss": 0.7278, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.14082462253193961, |
|
"grad_norm": 0.9353327751159668, |
|
"learning_rate": 4.9413904518046674e-05, |
|
"loss": 0.6717, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.14227642276422764, |
|
"grad_norm": 4.606626510620117, |
|
"learning_rate": 4.9401550447510235e-05, |
|
"loss": 0.6505, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.1437282229965157, |
|
"grad_norm": 0.5310667753219604, |
|
"learning_rate": 4.9389069105039634e-05, |
|
"loss": 0.6163, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.14518002322880372, |
|
"grad_norm": 1.1119409799575806, |
|
"learning_rate": 4.9376460555734225e-05, |
|
"loss": 0.6708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14663182346109174, |
|
"grad_norm": 0.934678852558136, |
|
"learning_rate": 4.936372486535679e-05, |
|
"loss": 0.63, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.1480836236933798, |
|
"grad_norm": 0.9781250357627869, |
|
"learning_rate": 4.9350862100333294e-05, |
|
"loss": 0.7353, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.14953542392566782, |
|
"grad_norm": 0.732448935508728, |
|
"learning_rate": 4.9337872327752444e-05, |
|
"loss": 0.6336, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.15098722415795587, |
|
"grad_norm": 0.9106850624084473, |
|
"learning_rate": 4.932475561536542e-05, |
|
"loss": 0.5646, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.1524390243902439, |
|
"grad_norm": 3.07547926902771, |
|
"learning_rate": 4.931151203158547e-05, |
|
"loss": 0.5629, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.15389082462253195, |
|
"grad_norm": 2.424933433532715, |
|
"learning_rate": 4.929814164548756e-05, |
|
"loss": 0.9348, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.15534262485481998, |
|
"grad_norm": 0.5797663927078247, |
|
"learning_rate": 4.928464452680804e-05, |
|
"loss": 0.7293, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.156794425087108, |
|
"grad_norm": 4.099000453948975, |
|
"learning_rate": 4.9271020745944265e-05, |
|
"loss": 0.3943, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.15824622531939606, |
|
"grad_norm": 2.4443376064300537, |
|
"learning_rate": 4.92572703739542e-05, |
|
"loss": 0.503, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.15969802555168408, |
|
"grad_norm": 1.4358808994293213, |
|
"learning_rate": 4.924339348255611e-05, |
|
"loss": 0.7181, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.16114982578397213, |
|
"grad_norm": 1.4664112329483032, |
|
"learning_rate": 4.922939014412812e-05, |
|
"loss": 0.7096, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 1.605031967163086, |
|
"learning_rate": 4.9215260431707885e-05, |
|
"loss": 0.7917, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.1640534262485482, |
|
"grad_norm": 2.435290813446045, |
|
"learning_rate": 4.92010044189922e-05, |
|
"loss": 0.7983, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.16550522648083624, |
|
"grad_norm": 3.4949209690093994, |
|
"learning_rate": 4.9186622180336595e-05, |
|
"loss": 0.811, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.16695702671312426, |
|
"grad_norm": 0.8932238221168518, |
|
"learning_rate": 4.917211379075496e-05, |
|
"loss": 0.5875, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.16840882694541232, |
|
"grad_norm": 1.7695764303207397, |
|
"learning_rate": 4.9157479325919156e-05, |
|
"loss": 0.8934, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.16986062717770034, |
|
"grad_norm": 5.012516975402832, |
|
"learning_rate": 4.9142718862158634e-05, |
|
"loss": 0.6394, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.1713124274099884, |
|
"grad_norm": 1.3308697938919067, |
|
"learning_rate": 4.912783247646e-05, |
|
"loss": 0.5884, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.17276422764227642, |
|
"grad_norm": 0.9371745586395264, |
|
"learning_rate": 4.911282024646664e-05, |
|
"loss": 0.8007, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"grad_norm": 3.8555784225463867, |
|
"learning_rate": 4.909768225047833e-05, |
|
"loss": 0.632, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1756678281068525, |
|
"grad_norm": 3.392313003540039, |
|
"learning_rate": 4.908241856745077e-05, |
|
"loss": 0.8346, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.17711962833914052, |
|
"grad_norm": 1.3405215740203857, |
|
"learning_rate": 4.906702927699525e-05, |
|
"loss": 0.7455, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 1.5076225996017456, |
|
"learning_rate": 4.905151445937817e-05, |
|
"loss": 0.8539, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1800232288037166, |
|
"grad_norm": 1.722262978553772, |
|
"learning_rate": 4.903587419552065e-05, |
|
"loss": 0.641, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.18147502903600465, |
|
"grad_norm": 0.5747788548469543, |
|
"learning_rate": 4.902010856699811e-05, |
|
"loss": 0.669, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.18292682926829268, |
|
"grad_norm": 3.8853611946105957, |
|
"learning_rate": 4.900421765603983e-05, |
|
"loss": 0.598, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.18437862950058073, |
|
"grad_norm": 0.7678889036178589, |
|
"learning_rate": 4.8988201545528536e-05, |
|
"loss": 0.57, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.18583042973286876, |
|
"grad_norm": 0.890204906463623, |
|
"learning_rate": 4.897206031899997e-05, |
|
"loss": 0.52, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.18728222996515678, |
|
"grad_norm": 1.0817334651947021, |
|
"learning_rate": 4.8955794060642416e-05, |
|
"loss": 0.5813, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.18873403019744484, |
|
"grad_norm": 0.7758299112319946, |
|
"learning_rate": 4.893940285529631e-05, |
|
"loss": 0.8182, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.19018583042973286, |
|
"grad_norm": 1.0594083070755005, |
|
"learning_rate": 4.8922886788453796e-05, |
|
"loss": 0.6143, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.1916376306620209, |
|
"grad_norm": 0.9304606914520264, |
|
"learning_rate": 4.8906245946258235e-05, |
|
"loss": 0.7401, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.19308943089430894, |
|
"grad_norm": 2.11362361907959, |
|
"learning_rate": 4.8889480415503785e-05, |
|
"loss": 0.4487, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.194541231126597, |
|
"grad_norm": 2.9040818214416504, |
|
"learning_rate": 4.8872590283634955e-05, |
|
"loss": 0.7218, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.19599303135888502, |
|
"grad_norm": 0.6021516919136047, |
|
"learning_rate": 4.8855575638746135e-05, |
|
"loss": 0.7179, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.19744483159117304, |
|
"grad_norm": 3.067187786102295, |
|
"learning_rate": 4.883843656958115e-05, |
|
"loss": 0.9561, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.1988966318234611, |
|
"grad_norm": 4.093753337860107, |
|
"learning_rate": 4.882117316553278e-05, |
|
"loss": 0.8025, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.20034843205574912, |
|
"grad_norm": 1.0853984355926514, |
|
"learning_rate": 4.88037855166423e-05, |
|
"loss": 0.7298, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.20180023228803717, |
|
"grad_norm": 1.4068083763122559, |
|
"learning_rate": 4.878627371359902e-05, |
|
"loss": 0.5038, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.2032520325203252, |
|
"grad_norm": 1.063698649406433, |
|
"learning_rate": 4.876863784773981e-05, |
|
"loss": 0.8824, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.20470383275261325, |
|
"grad_norm": 1.4493242502212524, |
|
"learning_rate": 4.875087801104859e-05, |
|
"loss": 0.8179, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.20615563298490128, |
|
"grad_norm": 1.8046404123306274, |
|
"learning_rate": 4.8732994296155915e-05, |
|
"loss": 0.7289, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.2076074332171893, |
|
"grad_norm": 1.531055212020874, |
|
"learning_rate": 4.871498679633844e-05, |
|
"loss": 0.9306, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.20905923344947736, |
|
"grad_norm": 1.2926791906356812, |
|
"learning_rate": 4.869685560551844e-05, |
|
"loss": 0.7812, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.21051103368176538, |
|
"grad_norm": 2.004673957824707, |
|
"learning_rate": 4.867860081826334e-05, |
|
"loss": 0.6344, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.21196283391405343, |
|
"grad_norm": 0.8372285962104797, |
|
"learning_rate": 4.866022252978521e-05, |
|
"loss": 0.9279, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.21341463414634146, |
|
"grad_norm": 3.9492061138153076, |
|
"learning_rate": 4.8641720835940265e-05, |
|
"loss": 0.6554, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.2148664343786295, |
|
"grad_norm": 1.1838141679763794, |
|
"learning_rate": 4.862309583322837e-05, |
|
"loss": 0.35, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.21631823461091754, |
|
"grad_norm": 0.5205928683280945, |
|
"learning_rate": 4.860434761879255e-05, |
|
"loss": 0.8758, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.21777003484320556, |
|
"grad_norm": 1.2075397968292236, |
|
"learning_rate": 4.858547629041844e-05, |
|
"loss": 0.8463, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21922183507549362, |
|
"grad_norm": 0.9651175141334534, |
|
"learning_rate": 4.8566481946533824e-05, |
|
"loss": 0.5918, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.22067363530778164, |
|
"grad_norm": 1.0648430585861206, |
|
"learning_rate": 4.8547364686208106e-05, |
|
"loss": 0.7321, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.2221254355400697, |
|
"grad_norm": 1.3580704927444458, |
|
"learning_rate": 4.852812460915178e-05, |
|
"loss": 0.8827, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.22357723577235772, |
|
"grad_norm": 1.9950529336929321, |
|
"learning_rate": 4.850876181571592e-05, |
|
"loss": 0.8698, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.22502903600464577, |
|
"grad_norm": 0.6319971680641174, |
|
"learning_rate": 4.848927640689165e-05, |
|
"loss": 0.8824, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2264808362369338, |
|
"grad_norm": 0.40468019247055054, |
|
"learning_rate": 4.846966848430964e-05, |
|
"loss": 0.454, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.22793263646922182, |
|
"grad_norm": 2.219438076019287, |
|
"learning_rate": 4.8449938150239544e-05, |
|
"loss": 0.7014, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.22938443670150988, |
|
"grad_norm": 0.6382218599319458, |
|
"learning_rate": 4.843008550758948e-05, |
|
"loss": 0.7618, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.2308362369337979, |
|
"grad_norm": 1.5169848203659058, |
|
"learning_rate": 4.8410110659905514e-05, |
|
"loss": 0.9599, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"grad_norm": 1.203534483909607, |
|
"learning_rate": 4.8390013711371085e-05, |
|
"loss": 0.4722, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.23373983739837398, |
|
"grad_norm": 1.456782341003418, |
|
"learning_rate": 4.836979476680647e-05, |
|
"loss": 0.8534, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.23519163763066203, |
|
"grad_norm": 0.9215080142021179, |
|
"learning_rate": 4.834945393166826e-05, |
|
"loss": 0.8088, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.23664343786295006, |
|
"grad_norm": 0.7815489768981934, |
|
"learning_rate": 4.832899131204879e-05, |
|
"loss": 0.8544, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 0.947912871837616, |
|
"learning_rate": 4.8308407014675577e-05, |
|
"loss": 0.6289, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.23954703832752614, |
|
"grad_norm": 0.6381635665893555, |
|
"learning_rate": 4.82877011469108e-05, |
|
"loss": 0.7655, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.24099883855981416, |
|
"grad_norm": 1.064013957977295, |
|
"learning_rate": 4.8266873816750716e-05, |
|
"loss": 0.5693, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.2424506387921022, |
|
"grad_norm": 2.0902225971221924, |
|
"learning_rate": 4.824592513282505e-05, |
|
"loss": 0.8012, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 6.462097644805908, |
|
"learning_rate": 4.8224855204396555e-05, |
|
"loss": 0.628, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.2453542392566783, |
|
"grad_norm": 1.036537766456604, |
|
"learning_rate": 4.820366414136028e-05, |
|
"loss": 0.7784, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.24680603948896632, |
|
"grad_norm": 1.232399582862854, |
|
"learning_rate": 4.818235205424315e-05, |
|
"loss": 0.7538, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.24825783972125434, |
|
"grad_norm": 1.105141282081604, |
|
"learning_rate": 4.816091905420327e-05, |
|
"loss": 0.9042, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.2497096399535424, |
|
"grad_norm": 0.6853220462799072, |
|
"learning_rate": 4.813936525302942e-05, |
|
"loss": 0.5936, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.2511614401858304, |
|
"grad_norm": 0.4660559892654419, |
|
"learning_rate": 4.811769076314044e-05, |
|
"loss": 0.7323, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.25261324041811845, |
|
"grad_norm": 1.0349425077438354, |
|
"learning_rate": 4.809589569758464e-05, |
|
"loss": 0.5865, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.2540650406504065, |
|
"grad_norm": 0.4405325651168823, |
|
"learning_rate": 4.8073980170039234e-05, |
|
"loss": 0.7297, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.25551684088269455, |
|
"grad_norm": 1.4432979822158813, |
|
"learning_rate": 4.805194429480972e-05, |
|
"loss": 0.6268, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2569686411149826, |
|
"grad_norm": 0.7807000279426575, |
|
"learning_rate": 4.802978818682933e-05, |
|
"loss": 0.7536, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.2584204413472706, |
|
"grad_norm": 1.3717634677886963, |
|
"learning_rate": 4.800751196165835e-05, |
|
"loss": 0.908, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2598722415795586, |
|
"grad_norm": 1.9359996318817139, |
|
"learning_rate": 4.79851157354836e-05, |
|
"loss": 0.4698, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.2613240418118467, |
|
"grad_norm": 2.113598346710205, |
|
"learning_rate": 4.7962599625117773e-05, |
|
"loss": 0.6629, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.26277584204413473, |
|
"grad_norm": 0.7605477571487427, |
|
"learning_rate": 4.7939963747998855e-05, |
|
"loss": 0.727, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.26422764227642276, |
|
"grad_norm": 0.6016331315040588, |
|
"learning_rate": 4.7917208222189506e-05, |
|
"loss": 0.8574, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.2656794425087108, |
|
"grad_norm": 0.8621135950088501, |
|
"learning_rate": 4.789433316637644e-05, |
|
"loss": 0.7995, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.26713124274099886, |
|
"grad_norm": 1.2249228954315186, |
|
"learning_rate": 4.7871338699869796e-05, |
|
"loss": 0.9538, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.2685830429732869, |
|
"grad_norm": 3.5839085578918457, |
|
"learning_rate": 4.784822494260255e-05, |
|
"loss": 0.602, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.2700348432055749, |
|
"grad_norm": 1.334702491760254, |
|
"learning_rate": 4.782499201512983e-05, |
|
"loss": 0.702, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.27148664343786294, |
|
"grad_norm": 0.8643277287483215, |
|
"learning_rate": 4.780164003862838e-05, |
|
"loss": 0.7837, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.27293844367015097, |
|
"grad_norm": 0.9091192483901978, |
|
"learning_rate": 4.777816913489581e-05, |
|
"loss": 0.658, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.27439024390243905, |
|
"grad_norm": 4.237992763519287, |
|
"learning_rate": 4.775457942635006e-05, |
|
"loss": 0.7956, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.27584204413472707, |
|
"grad_norm": 0.5401553511619568, |
|
"learning_rate": 4.773087103602871e-05, |
|
"loss": 0.6637, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2772938443670151, |
|
"grad_norm": 2.7873334884643555, |
|
"learning_rate": 4.770704408758837e-05, |
|
"loss": 0.4589, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.2787456445993031, |
|
"grad_norm": 0.6120592355728149, |
|
"learning_rate": 4.7683098705304e-05, |
|
"loss": 0.6523, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.28019744483159115, |
|
"grad_norm": 0.4932442307472229, |
|
"learning_rate": 4.765903501406826e-05, |
|
"loss": 0.7068, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.28164924506387923, |
|
"grad_norm": 1.102984070777893, |
|
"learning_rate": 4.7634853139390945e-05, |
|
"loss": 0.7414, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.28310104529616725, |
|
"grad_norm": 0.7468515634536743, |
|
"learning_rate": 4.7610553207398185e-05, |
|
"loss": 0.8069, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.2845528455284553, |
|
"grad_norm": 1.3317950963974, |
|
"learning_rate": 4.758613534483191e-05, |
|
"loss": 0.8219, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.2860046457607433, |
|
"grad_norm": 1.7681723833084106, |
|
"learning_rate": 4.7561599679049135e-05, |
|
"loss": 0.5898, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.2874564459930314, |
|
"grad_norm": 1.0765740871429443, |
|
"learning_rate": 4.7536946338021306e-05, |
|
"loss": 0.552, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.2889082462253194, |
|
"grad_norm": 1.1886732578277588, |
|
"learning_rate": 4.751217545033362e-05, |
|
"loss": 0.5558, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 1.0681451559066772, |
|
"learning_rate": 4.748728714518438e-05, |
|
"loss": 0.6335, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29181184668989546, |
|
"grad_norm": 0.8771520256996155, |
|
"learning_rate": 4.7462281552384306e-05, |
|
"loss": 0.6354, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.2932636469221835, |
|
"grad_norm": 1.085581660270691, |
|
"learning_rate": 4.7437158802355854e-05, |
|
"loss": 0.4697, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.29471544715447157, |
|
"grad_norm": 1.2349504232406616, |
|
"learning_rate": 4.7411919026132536e-05, |
|
"loss": 0.5823, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.2961672473867596, |
|
"grad_norm": 0.8741536736488342, |
|
"learning_rate": 4.7386562355358254e-05, |
|
"loss": 0.7622, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.2976190476190476, |
|
"grad_norm": 3.957540273666382, |
|
"learning_rate": 4.736108892228658e-05, |
|
"loss": 0.696, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.29907084785133564, |
|
"grad_norm": 1.2028242349624634, |
|
"learning_rate": 4.733549885978012e-05, |
|
"loss": 0.5248, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.30052264808362367, |
|
"grad_norm": 2.623757839202881, |
|
"learning_rate": 4.7309792301309755e-05, |
|
"loss": 0.7899, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.30197444831591175, |
|
"grad_norm": 0.8219063878059387, |
|
"learning_rate": 4.728396938095399e-05, |
|
"loss": 0.8088, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3034262485481998, |
|
"grad_norm": 2.02731990814209, |
|
"learning_rate": 4.7258030233398244e-05, |
|
"loss": 0.7673, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"grad_norm": 1.400942087173462, |
|
"learning_rate": 4.723197499393415e-05, |
|
"loss": 0.648, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3063298490127758, |
|
"grad_norm": 2.6127829551696777, |
|
"learning_rate": 4.7205803798458836e-05, |
|
"loss": 0.7408, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.3077816492450639, |
|
"grad_norm": 2.252988338470459, |
|
"learning_rate": 4.7179516783474226e-05, |
|
"loss": 0.7625, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.30923344947735193, |
|
"grad_norm": 1.4618316888809204, |
|
"learning_rate": 4.7153114086086336e-05, |
|
"loss": 0.9155, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.31068524970963995, |
|
"grad_norm": 0.945075511932373, |
|
"learning_rate": 4.712659584400454e-05, |
|
"loss": 0.8939, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.312137049941928, |
|
"grad_norm": 1.9799119234085083, |
|
"learning_rate": 4.709996219554088e-05, |
|
"loss": 0.7928, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.313588850174216, |
|
"grad_norm": 3.0045998096466064, |
|
"learning_rate": 4.7073213279609293e-05, |
|
"loss": 0.7881, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.3150406504065041, |
|
"grad_norm": 1.4035004377365112, |
|
"learning_rate": 4.7046349235724964e-05, |
|
"loss": 0.8062, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.3164924506387921, |
|
"grad_norm": 1.9164339303970337, |
|
"learning_rate": 4.701937020400352e-05, |
|
"loss": 0.7617, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.31794425087108014, |
|
"grad_norm": 1.0605820417404175, |
|
"learning_rate": 4.699227632516034e-05, |
|
"loss": 0.7231, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.31939605110336816, |
|
"grad_norm": 0.9426791071891785, |
|
"learning_rate": 4.6965067740509825e-05, |
|
"loss": 0.6771, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3208478513356562, |
|
"grad_norm": 1.0823321342468262, |
|
"learning_rate": 4.693774459196465e-05, |
|
"loss": 0.8387, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.32229965156794427, |
|
"grad_norm": 1.703384518623352, |
|
"learning_rate": 4.691030702203502e-05, |
|
"loss": 0.4302, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.3237514518002323, |
|
"grad_norm": 1.2216838598251343, |
|
"learning_rate": 4.6882755173827933e-05, |
|
"loss": 0.5434, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 0.8944600820541382, |
|
"learning_rate": 4.6855089191046406e-05, |
|
"loss": 0.7718, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.32665505226480834, |
|
"grad_norm": 1.4830057621002197, |
|
"learning_rate": 4.682730921798881e-05, |
|
"loss": 0.7067, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3281068524970964, |
|
"grad_norm": 1.1373881101608276, |
|
"learning_rate": 4.679941539954801e-05, |
|
"loss": 0.6134, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.32955865272938445, |
|
"grad_norm": 3.311741352081299, |
|
"learning_rate": 4.677140788121067e-05, |
|
"loss": 0.5914, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.3310104529616725, |
|
"grad_norm": 1.3183683156967163, |
|
"learning_rate": 4.674328680905649e-05, |
|
"loss": 0.6412, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.3324622531939605, |
|
"grad_norm": 0.6239253282546997, |
|
"learning_rate": 4.671505232975741e-05, |
|
"loss": 0.8585, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.3339140534262485, |
|
"grad_norm": 0.6019532680511475, |
|
"learning_rate": 4.668670459057692e-05, |
|
"loss": 0.6322, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.3353658536585366, |
|
"grad_norm": 1.3409720659255981, |
|
"learning_rate": 4.665824373936921e-05, |
|
"loss": 0.8676, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.33681765389082463, |
|
"grad_norm": 1.3901034593582153, |
|
"learning_rate": 4.662966992457842e-05, |
|
"loss": 0.6381, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.33826945412311266, |
|
"grad_norm": 0.4752490818500519, |
|
"learning_rate": 4.660098329523791e-05, |
|
"loss": 0.7852, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.3397212543554007, |
|
"grad_norm": 0.8826183676719666, |
|
"learning_rate": 4.657218400096942e-05, |
|
"loss": 0.7941, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.3411730545876887, |
|
"grad_norm": 1.8894481658935547, |
|
"learning_rate": 4.654327219198235e-05, |
|
"loss": 0.554, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3426248548199768, |
|
"grad_norm": 4.281989097595215, |
|
"learning_rate": 4.6514248019072926e-05, |
|
"loss": 0.6456, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.3440766550522648, |
|
"grad_norm": 1.1848098039627075, |
|
"learning_rate": 4.648511163362343e-05, |
|
"loss": 0.8237, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.34552845528455284, |
|
"grad_norm": 1.174756646156311, |
|
"learning_rate": 4.645586318760145e-05, |
|
"loss": 0.709, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.34698025551684086, |
|
"grad_norm": 2.8332509994506836, |
|
"learning_rate": 4.6426502833559e-05, |
|
"loss": 0.6055, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"grad_norm": 0.6192472577095032, |
|
"learning_rate": 4.639703072463181e-05, |
|
"loss": 0.8328, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.34988385598141697, |
|
"grad_norm": 0.6660485863685608, |
|
"learning_rate": 4.636744701453849e-05, |
|
"loss": 0.92, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.351335656213705, |
|
"grad_norm": 1.6284211874008179, |
|
"learning_rate": 4.633775185757973e-05, |
|
"loss": 0.7252, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.352787456445993, |
|
"grad_norm": 0.7274760007858276, |
|
"learning_rate": 4.630794540863747e-05, |
|
"loss": 0.6107, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.35423925667828104, |
|
"grad_norm": 2.6577463150024414, |
|
"learning_rate": 4.627802782317417e-05, |
|
"loss": 0.647, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.3556910569105691, |
|
"grad_norm": 1.4532408714294434, |
|
"learning_rate": 4.624799925723191e-05, |
|
"loss": 0.435, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 2.7971816062927246, |
|
"learning_rate": 4.621785986743163e-05, |
|
"loss": 0.5866, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.3585946573751452, |
|
"grad_norm": 1.4571512937545776, |
|
"learning_rate": 4.61876098109723e-05, |
|
"loss": 0.7796, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.3600464576074332, |
|
"grad_norm": 2.3864150047302246, |
|
"learning_rate": 4.6157249245630075e-05, |
|
"loss": 0.9921, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.3614982578397213, |
|
"grad_norm": 2.915992021560669, |
|
"learning_rate": 4.6126778329757516e-05, |
|
"loss": 0.7665, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.3629500580720093, |
|
"grad_norm": 2.1495201587677, |
|
"learning_rate": 4.609619722228274e-05, |
|
"loss": 0.6569, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36440185830429733, |
|
"grad_norm": 2.9136157035827637, |
|
"learning_rate": 4.606550608270859e-05, |
|
"loss": 0.835, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 0.8638590574264526, |
|
"learning_rate": 4.603470507111182e-05, |
|
"loss": 0.7063, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.3673054587688734, |
|
"grad_norm": 2.173835277557373, |
|
"learning_rate": 4.600379434814221e-05, |
|
"loss": 0.761, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.36875725900116146, |
|
"grad_norm": 2.0101635456085205, |
|
"learning_rate": 4.597277407502181e-05, |
|
"loss": 0.5618, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.3702090592334495, |
|
"grad_norm": 1.1493425369262695, |
|
"learning_rate": 4.5941644413544024e-05, |
|
"loss": 0.671, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3716608594657375, |
|
"grad_norm": 1.129114031791687, |
|
"learning_rate": 4.591040552607281e-05, |
|
"loss": 0.601, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.37311265969802554, |
|
"grad_norm": 2.0701091289520264, |
|
"learning_rate": 4.587905757554182e-05, |
|
"loss": 0.8573, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.37456445993031356, |
|
"grad_norm": 1.2713189125061035, |
|
"learning_rate": 4.5847600725453536e-05, |
|
"loss": 0.6449, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.37601626016260165, |
|
"grad_norm": 1.8538284301757812, |
|
"learning_rate": 4.581603513987845e-05, |
|
"loss": 0.6038, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.37746806039488967, |
|
"grad_norm": 1.350251317024231, |
|
"learning_rate": 4.5784360983454175e-05, |
|
"loss": 0.5973, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3789198606271777, |
|
"grad_norm": 0.7953972220420837, |
|
"learning_rate": 4.5752578421384606e-05, |
|
"loss": 0.9078, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.3803716608594657, |
|
"grad_norm": 0.8986756205558777, |
|
"learning_rate": 4.572068761943905e-05, |
|
"loss": 0.6951, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.3818234610917538, |
|
"grad_norm": 0.920846700668335, |
|
"learning_rate": 4.568868874395137e-05, |
|
"loss": 0.4939, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.3832752613240418, |
|
"grad_norm": 1.8228408098220825, |
|
"learning_rate": 4.565658196181909e-05, |
|
"loss": 0.8694, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.38472706155632985, |
|
"grad_norm": 1.1996351480484009, |
|
"learning_rate": 4.5624367440502594e-05, |
|
"loss": 0.6528, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.3861788617886179, |
|
"grad_norm": 1.8428452014923096, |
|
"learning_rate": 4.559204534802415e-05, |
|
"loss": 0.6755, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.3876306620209059, |
|
"grad_norm": 1.1987791061401367, |
|
"learning_rate": 4.555961585296712e-05, |
|
"loss": 0.5469, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.389082462253194, |
|
"grad_norm": 1.405840277671814, |
|
"learning_rate": 4.5527079124475045e-05, |
|
"loss": 0.7443, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.390534262485482, |
|
"grad_norm": 1.371089220046997, |
|
"learning_rate": 4.549443533225075e-05, |
|
"loss": 0.7145, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.39198606271777003, |
|
"grad_norm": 1.3392704725265503, |
|
"learning_rate": 4.546168464655551e-05, |
|
"loss": 0.6241, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.39343786295005806, |
|
"grad_norm": 1.8694888353347778, |
|
"learning_rate": 4.542882723820809e-05, |
|
"loss": 0.7412, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.3948896631823461, |
|
"grad_norm": 0.8335723876953125, |
|
"learning_rate": 4.5395863278583914e-05, |
|
"loss": 0.5457, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.39634146341463417, |
|
"grad_norm": 1.200954556465149, |
|
"learning_rate": 4.5362792939614126e-05, |
|
"loss": 0.8856, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.3977932636469222, |
|
"grad_norm": 0.7490825057029724, |
|
"learning_rate": 4.532961639378477e-05, |
|
"loss": 0.7058, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.3992450638792102, |
|
"grad_norm": 0.5736889839172363, |
|
"learning_rate": 4.529633381413577e-05, |
|
"loss": 0.8461, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.40069686411149824, |
|
"grad_norm": 3.038465976715088, |
|
"learning_rate": 4.526294537426013e-05, |
|
"loss": 0.9319, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.4021486643437863, |
|
"grad_norm": 3.3678839206695557, |
|
"learning_rate": 4.5229451248302996e-05, |
|
"loss": 0.7878, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.40360046457607435, |
|
"grad_norm": 0.9918755292892456, |
|
"learning_rate": 4.5195851610960716e-05, |
|
"loss": 0.5738, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.40505226480836237, |
|
"grad_norm": 0.45315515995025635, |
|
"learning_rate": 4.516214663747999e-05, |
|
"loss": 0.8513, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 3.0047781467437744, |
|
"learning_rate": 4.512833650365691e-05, |
|
"loss": 0.494, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4079558652729384, |
|
"grad_norm": 1.6291121244430542, |
|
"learning_rate": 4.509442138583604e-05, |
|
"loss": 0.4759, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.4094076655052265, |
|
"grad_norm": 1.279628038406372, |
|
"learning_rate": 4.506040146090953e-05, |
|
"loss": 0.75, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.41085946573751453, |
|
"grad_norm": 0.6952537894248962, |
|
"learning_rate": 4.502627690631618e-05, |
|
"loss": 0.6722, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.41231126596980255, |
|
"grad_norm": 6.771650791168213, |
|
"learning_rate": 4.499204790004051e-05, |
|
"loss": 0.6538, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.4137630662020906, |
|
"grad_norm": 1.1350947618484497, |
|
"learning_rate": 4.49577146206118e-05, |
|
"loss": 0.651, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.4152148664343786, |
|
"grad_norm": 1.379130482673645, |
|
"learning_rate": 4.492327724710324e-05, |
|
"loss": 0.8259, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.9285208582878113, |
|
"learning_rate": 4.488873595913091e-05, |
|
"loss": 0.5317, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.4181184668989547, |
|
"grad_norm": 1.3536639213562012, |
|
"learning_rate": 4.485409093685289e-05, |
|
"loss": 0.9471, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.41957026713124274, |
|
"grad_norm": 1.6582531929016113, |
|
"learning_rate": 4.4819342360968316e-05, |
|
"loss": 0.6531, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.42102206736353076, |
|
"grad_norm": 0.5296352505683899, |
|
"learning_rate": 4.478449041271644e-05, |
|
"loss": 0.8268, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.42247386759581884, |
|
"grad_norm": 1.2088879346847534, |
|
"learning_rate": 4.474953527387564e-05, |
|
"loss": 0.9049, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.42392566782810687, |
|
"grad_norm": 0.5331336855888367, |
|
"learning_rate": 4.471447712676256e-05, |
|
"loss": 1.1198, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.4253774680603949, |
|
"grad_norm": 1.4603538513183594, |
|
"learning_rate": 4.4679316154231054e-05, |
|
"loss": 0.5809, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.4268292682926829, |
|
"grad_norm": 0.9502357840538025, |
|
"learning_rate": 4.464405253967133e-05, |
|
"loss": 0.5471, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.42828106852497094, |
|
"grad_norm": 1.7284854650497437, |
|
"learning_rate": 4.4608686467008926e-05, |
|
"loss": 0.6076, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.429732868757259, |
|
"grad_norm": 0.7732632160186768, |
|
"learning_rate": 4.457321812070378e-05, |
|
"loss": 0.8251, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.43118466898954705, |
|
"grad_norm": 1.8970303535461426, |
|
"learning_rate": 4.453764768574926e-05, |
|
"loss": 0.6548, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.4326364692218351, |
|
"grad_norm": 0.50247722864151, |
|
"learning_rate": 4.450197534767121e-05, |
|
"loss": 0.6137, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.4340882694541231, |
|
"grad_norm": 1.2860316038131714, |
|
"learning_rate": 4.4466201292526956e-05, |
|
"loss": 0.5776, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.4355400696864111, |
|
"grad_norm": 1.1598414182662964, |
|
"learning_rate": 4.4430325706904366e-05, |
|
"loss": 0.9806, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4355400696864111, |
|
"eval_loss": 0.6489894390106201, |
|
"eval_runtime": 107.7493, |
|
"eval_samples_per_second": 13.457, |
|
"eval_steps_per_second": 3.369, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4369918699186992, |
|
"grad_norm": 0.9044310450553894, |
|
"learning_rate": 4.439434877792086e-05, |
|
"loss": 0.666, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.43844367015098723, |
|
"grad_norm": 0.6645646095275879, |
|
"learning_rate": 4.435827069322244e-05, |
|
"loss": 0.5448, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.43989547038327526, |
|
"grad_norm": 4.799647331237793, |
|
"learning_rate": 4.4322091640982705e-05, |
|
"loss": 0.6945, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.4413472706155633, |
|
"grad_norm": 1.6476815938949585, |
|
"learning_rate": 4.428581180990188e-05, |
|
"loss": 0.6551, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.44279907084785136, |
|
"grad_norm": 0.8809843063354492, |
|
"learning_rate": 4.424943138920581e-05, |
|
"loss": 0.9209, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.4442508710801394, |
|
"grad_norm": 2.0106568336486816, |
|
"learning_rate": 4.4212950568645007e-05, |
|
"loss": 0.6188, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.4457026713124274, |
|
"grad_norm": 0.4192439913749695, |
|
"learning_rate": 4.417636953849364e-05, |
|
"loss": 0.7886, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.44715447154471544, |
|
"grad_norm": 1.8189557790756226, |
|
"learning_rate": 4.4139688489548534e-05, |
|
"loss": 1.0636, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.44860627177700346, |
|
"grad_norm": 1.1486669778823853, |
|
"learning_rate": 4.410290761312818e-05, |
|
"loss": 0.7989, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.45005807200929154, |
|
"grad_norm": 0.6434163451194763, |
|
"learning_rate": 4.406602710107177e-05, |
|
"loss": 0.7368, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.45150987224157957, |
|
"grad_norm": 1.370603084564209, |
|
"learning_rate": 4.4029047145738134e-05, |
|
"loss": 0.6113, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.4529616724738676, |
|
"grad_norm": 1.5696393251419067, |
|
"learning_rate": 4.39919679400048e-05, |
|
"loss": 0.6274, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.4544134727061556, |
|
"grad_norm": 21.59466552734375, |
|
"learning_rate": 4.3954789677266936e-05, |
|
"loss": 0.7229, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.45586527293844364, |
|
"grad_norm": 0.975163459777832, |
|
"learning_rate": 4.391751255143639e-05, |
|
"loss": 0.7115, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.4573170731707317, |
|
"grad_norm": 0.6678398251533508, |
|
"learning_rate": 4.3880136756940624e-05, |
|
"loss": 0.6668, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.45876887340301975, |
|
"grad_norm": 0.9730459451675415, |
|
"learning_rate": 4.384266248872176e-05, |
|
"loss": 0.6139, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.4602206736353078, |
|
"grad_norm": 0.7275809049606323, |
|
"learning_rate": 4.380508994223551e-05, |
|
"loss": 0.9358, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.4616724738675958, |
|
"grad_norm": 4.506844520568848, |
|
"learning_rate": 4.376741931345019e-05, |
|
"loss": 0.5481, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.4631242740998839, |
|
"grad_norm": 0.5535733699798584, |
|
"learning_rate": 4.3729650798845676e-05, |
|
"loss": 0.7074, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"grad_norm": 0.7955453991889954, |
|
"learning_rate": 4.36917845954124e-05, |
|
"loss": 0.5912, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.46602787456445993, |
|
"grad_norm": 1.144351601600647, |
|
"learning_rate": 4.365382090065032e-05, |
|
"loss": 0.893, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.46747967479674796, |
|
"grad_norm": 2.5055947303771973, |
|
"learning_rate": 4.3615759912567864e-05, |
|
"loss": 0.7052, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.468931475029036, |
|
"grad_norm": 2.367400884628296, |
|
"learning_rate": 4.3577601829680925e-05, |
|
"loss": 0.5374, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.47038327526132406, |
|
"grad_norm": 2.6038706302642822, |
|
"learning_rate": 4.353934685101181e-05, |
|
"loss": 0.5551, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.4718350754936121, |
|
"grad_norm": 1.4026364088058472, |
|
"learning_rate": 4.350099517608823e-05, |
|
"loss": 0.7855, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.4732868757259001, |
|
"grad_norm": 1.1398979425430298, |
|
"learning_rate": 4.346254700494221e-05, |
|
"loss": 0.6862, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.47473867595818814, |
|
"grad_norm": 0.881351888179779, |
|
"learning_rate": 4.3424002538109096e-05, |
|
"loss": 0.7258, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 3.777125120162964, |
|
"learning_rate": 4.338536197662646e-05, |
|
"loss": 0.6882, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.47764227642276424, |
|
"grad_norm": 1.4731556177139282, |
|
"learning_rate": 4.3346625522033105e-05, |
|
"loss": 0.8303, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.47909407665505227, |
|
"grad_norm": 1.810880184173584, |
|
"learning_rate": 4.330779337636798e-05, |
|
"loss": 0.7837, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.4805458768873403, |
|
"grad_norm": 1.3891079425811768, |
|
"learning_rate": 4.326886574216911e-05, |
|
"loss": 0.4782, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.4819976771196283, |
|
"grad_norm": 3.7195885181427, |
|
"learning_rate": 4.32298428224726e-05, |
|
"loss": 0.6343, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.4834494773519164, |
|
"grad_norm": 1.837262511253357, |
|
"learning_rate": 4.319072482081151e-05, |
|
"loss": 0.4242, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.4849012775842044, |
|
"grad_norm": 0.9354246854782104, |
|
"learning_rate": 4.315151194121484e-05, |
|
"loss": 0.6616, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.48635307781649245, |
|
"grad_norm": 5.568230152130127, |
|
"learning_rate": 4.3112204388206436e-05, |
|
"loss": 0.5538, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 1.4984145164489746, |
|
"learning_rate": 4.307280236680393e-05, |
|
"loss": 0.6217, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.4892566782810685, |
|
"grad_norm": 1.278181552886963, |
|
"learning_rate": 4.303330608251769e-05, |
|
"loss": 0.6273, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.4907084785133566, |
|
"grad_norm": 0.48235225677490234, |
|
"learning_rate": 4.2993715741349726e-05, |
|
"loss": 0.5814, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.4921602787456446, |
|
"grad_norm": 0.9399949312210083, |
|
"learning_rate": 4.2954031549792634e-05, |
|
"loss": 0.869, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.49361207897793263, |
|
"grad_norm": 1.9232203960418701, |
|
"learning_rate": 4.291425371482849e-05, |
|
"loss": 0.8627, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.49506387921022066, |
|
"grad_norm": 0.5802033543586731, |
|
"learning_rate": 4.287438244392781e-05, |
|
"loss": 0.8384, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.4965156794425087, |
|
"grad_norm": 0.3144931197166443, |
|
"learning_rate": 4.283441794504842e-05, |
|
"loss": 0.6346, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.49796747967479676, |
|
"grad_norm": 5.040658473968506, |
|
"learning_rate": 4.279436042663443e-05, |
|
"loss": 0.6497, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.4994192799070848, |
|
"grad_norm": 0.7379769682884216, |
|
"learning_rate": 4.275421009761509e-05, |
|
"loss": 0.6061, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.5008710801393729, |
|
"grad_norm": 0.975500226020813, |
|
"learning_rate": 4.271396716740374e-05, |
|
"loss": 0.601, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.5023228803716608, |
|
"grad_norm": 1.0296087265014648, |
|
"learning_rate": 4.267363184589669e-05, |
|
"loss": 0.5649, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.5037746806039489, |
|
"grad_norm": 1.4896851778030396, |
|
"learning_rate": 4.2633204343472146e-05, |
|
"loss": 0.6021, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.5052264808362369, |
|
"grad_norm": 1.235889196395874, |
|
"learning_rate": 4.25926848709891e-05, |
|
"loss": 0.4451, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.506678281068525, |
|
"grad_norm": 0.9615374207496643, |
|
"learning_rate": 4.255207363978625e-05, |
|
"loss": 0.4711, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.508130081300813, |
|
"grad_norm": 1.6776018142700195, |
|
"learning_rate": 4.251137086168086e-05, |
|
"loss": 0.7406, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.509581881533101, |
|
"grad_norm": 1.5150796175003052, |
|
"learning_rate": 4.247057674896771e-05, |
|
"loss": 0.496, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.5110336817653891, |
|
"grad_norm": 1.1669261455535889, |
|
"learning_rate": 4.24296915144179e-05, |
|
"loss": 0.8257, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.5124854819976771, |
|
"grad_norm": 0.6701371073722839, |
|
"learning_rate": 4.2388715371277875e-05, |
|
"loss": 0.8408, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.5139372822299652, |
|
"grad_norm": 1.5670065879821777, |
|
"learning_rate": 4.234764853326817e-05, |
|
"loss": 0.9285, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.5153890824622532, |
|
"grad_norm": 0.589513897895813, |
|
"learning_rate": 4.230649121458239e-05, |
|
"loss": 0.7376, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.5168408826945412, |
|
"grad_norm": 0.7740994095802307, |
|
"learning_rate": 4.226524362988605e-05, |
|
"loss": 0.5336, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.5182926829268293, |
|
"grad_norm": 1.503607153892517, |
|
"learning_rate": 4.222390599431549e-05, |
|
"loss": 0.8121, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.5197444831591173, |
|
"grad_norm": 1.1378567218780518, |
|
"learning_rate": 4.21824785234767e-05, |
|
"loss": 1.0838, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.5211962833914053, |
|
"grad_norm": 0.8732675313949585, |
|
"learning_rate": 4.214096143344425e-05, |
|
"loss": 0.6242, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"grad_norm": 1.4234071969985962, |
|
"learning_rate": 4.2099354940760124e-05, |
|
"loss": 0.7382, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5240998838559814, |
|
"grad_norm": 0.9399917721748352, |
|
"learning_rate": 4.205765926243264e-05, |
|
"loss": 0.6173, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.5255516840882695, |
|
"grad_norm": 1.9771159887313843, |
|
"learning_rate": 4.201587461593522e-05, |
|
"loss": 0.9029, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.5270034843205574, |
|
"grad_norm": 2.2527432441711426, |
|
"learning_rate": 4.197400121920539e-05, |
|
"loss": 0.624, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.5284552845528455, |
|
"grad_norm": 1.7543494701385498, |
|
"learning_rate": 4.193203929064353e-05, |
|
"loss": 0.5714, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.5299070847851336, |
|
"grad_norm": 0.9363800883293152, |
|
"learning_rate": 4.1889989049111794e-05, |
|
"loss": 0.5273, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5313588850174216, |
|
"grad_norm": 0.6811619400978088, |
|
"learning_rate": 4.184785071393295e-05, |
|
"loss": 0.634, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.5328106852497096, |
|
"grad_norm": 1.3300182819366455, |
|
"learning_rate": 4.180562450488923e-05, |
|
"loss": 0.7374, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.5342624854819977, |
|
"grad_norm": 5.183244228363037, |
|
"learning_rate": 4.17633106422212e-05, |
|
"loss": 0.6945, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 3.467090368270874, |
|
"learning_rate": 4.1720909346626624e-05, |
|
"loss": 0.52, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.5371660859465738, |
|
"grad_norm": 0.5636081099510193, |
|
"learning_rate": 4.167842083925926e-05, |
|
"loss": 0.7019, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5386178861788617, |
|
"grad_norm": 0.8139100074768066, |
|
"learning_rate": 4.163584534172774e-05, |
|
"loss": 0.6844, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.5400696864111498, |
|
"grad_norm": 0.3868808150291443, |
|
"learning_rate": 4.1593183076094445e-05, |
|
"loss": 0.4764, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.5415214866434379, |
|
"grad_norm": 3.8870656490325928, |
|
"learning_rate": 4.155043426487429e-05, |
|
"loss": 0.6925, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.5429732868757259, |
|
"grad_norm": 1.7030867338180542, |
|
"learning_rate": 4.150759913103359e-05, |
|
"loss": 0.5368, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.544425087108014, |
|
"grad_norm": 1.52249276638031, |
|
"learning_rate": 4.1464677897988904e-05, |
|
"loss": 0.6469, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.5458768873403019, |
|
"grad_norm": 1.3640564680099487, |
|
"learning_rate": 4.1421670789605856e-05, |
|
"loss": 0.6186, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.54732868757259, |
|
"grad_norm": 0.9472920298576355, |
|
"learning_rate": 4.137857803019797e-05, |
|
"loss": 0.6701, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.5487804878048781, |
|
"grad_norm": 2.9700679779052734, |
|
"learning_rate": 4.1335399844525514e-05, |
|
"loss": 0.6616, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.5502322880371661, |
|
"grad_norm": 1.1544781923294067, |
|
"learning_rate": 4.129213645779431e-05, |
|
"loss": 0.6644, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.5516840882694541, |
|
"grad_norm": 2.1192784309387207, |
|
"learning_rate": 4.124878809565455e-05, |
|
"loss": 0.5912, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5531358885017421, |
|
"grad_norm": 0.9204639196395874, |
|
"learning_rate": 4.1205354984199665e-05, |
|
"loss": 1.0158, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.5545876887340302, |
|
"grad_norm": 1.1523475646972656, |
|
"learning_rate": 4.116183734996509e-05, |
|
"loss": 0.5879, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.5560394889663183, |
|
"grad_norm": 1.5894629955291748, |
|
"learning_rate": 4.1118235419927125e-05, |
|
"loss": 0.5309, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.5574912891986062, |
|
"grad_norm": 1.463646650314331, |
|
"learning_rate": 4.107454942150173e-05, |
|
"loss": 0.5955, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.5589430894308943, |
|
"grad_norm": 0.8998947739601135, |
|
"learning_rate": 4.103077958254334e-05, |
|
"loss": 0.5999, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.5603948896631823, |
|
"grad_norm": 1.8093136548995972, |
|
"learning_rate": 4.098692613134367e-05, |
|
"loss": 0.7605, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.5618466898954704, |
|
"grad_norm": 1.070966124534607, |
|
"learning_rate": 4.0942989296630566e-05, |
|
"loss": 0.7076, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.5632984901277585, |
|
"grad_norm": 1.424028754234314, |
|
"learning_rate": 4.0898969307566734e-05, |
|
"loss": 0.553, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.5647502903600464, |
|
"grad_norm": 4.3886189460754395, |
|
"learning_rate": 4.0854866393748633e-05, |
|
"loss": 0.6369, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.5662020905923345, |
|
"grad_norm": 0.7212158441543579, |
|
"learning_rate": 4.081068078520521e-05, |
|
"loss": 0.5729, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.5676538908246226, |
|
"grad_norm": 1.5475590229034424, |
|
"learning_rate": 4.076641271239674e-05, |
|
"loss": 0.6781, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.5691056910569106, |
|
"grad_norm": 2.9124624729156494, |
|
"learning_rate": 4.072206240621359e-05, |
|
"loss": 0.3627, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.5705574912891986, |
|
"grad_norm": 3.567720651626587, |
|
"learning_rate": 4.067763009797506e-05, |
|
"loss": 0.6201, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.5720092915214866, |
|
"grad_norm": 1.0543193817138672, |
|
"learning_rate": 4.063311601942814e-05, |
|
"loss": 0.8288, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.5734610917537747, |
|
"grad_norm": 2.356640338897705, |
|
"learning_rate": 4.058852040274629e-05, |
|
"loss": 0.7107, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.5749128919860628, |
|
"grad_norm": 1.225469946861267, |
|
"learning_rate": 4.054384348052829e-05, |
|
"loss": 0.7114, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.5763646922183507, |
|
"grad_norm": 1.6612083911895752, |
|
"learning_rate": 4.049908548579695e-05, |
|
"loss": 0.6198, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.5778164924506388, |
|
"grad_norm": 0.8432019352912903, |
|
"learning_rate": 4.0454246651997976e-05, |
|
"loss": 0.641, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.5792682926829268, |
|
"grad_norm": 1.41001296043396, |
|
"learning_rate": 4.040932721299866e-05, |
|
"loss": 0.6773, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 4.1915788650512695, |
|
"learning_rate": 4.036432740308675e-05, |
|
"loss": 0.708, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.582171893147503, |
|
"grad_norm": 1.1455175876617432, |
|
"learning_rate": 4.031924745696915e-05, |
|
"loss": 0.687, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.5836236933797909, |
|
"grad_norm": 0.27715983986854553, |
|
"learning_rate": 4.027408760977078e-05, |
|
"loss": 0.6192, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.585075493612079, |
|
"grad_norm": 0.9823393821716309, |
|
"learning_rate": 4.022884809703325e-05, |
|
"loss": 0.7417, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.586527293844367, |
|
"grad_norm": 1.0322932004928589, |
|
"learning_rate": 4.018352915471373e-05, |
|
"loss": 0.6031, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.587979094076655, |
|
"grad_norm": 1.231325387954712, |
|
"learning_rate": 4.0138131019183635e-05, |
|
"loss": 0.6654, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.5894308943089431, |
|
"grad_norm": 0.7293880581855774, |
|
"learning_rate": 4.009265392722745e-05, |
|
"loss": 0.7368, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.5908826945412311, |
|
"grad_norm": 1.2683119773864746, |
|
"learning_rate": 4.0047098116041494e-05, |
|
"loss": 0.7025, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.5923344947735192, |
|
"grad_norm": 3.7659318447113037, |
|
"learning_rate": 4.000146382323262e-05, |
|
"loss": 0.6851, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.5937862950058072, |
|
"grad_norm": 0.5533025860786438, |
|
"learning_rate": 3.995575128681706e-05, |
|
"loss": 0.7296, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 1.3915671110153198, |
|
"learning_rate": 3.990996074521912e-05, |
|
"loss": 0.8556, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.5966898954703833, |
|
"grad_norm": 1.290931224822998, |
|
"learning_rate": 3.986409243726997e-05, |
|
"loss": 0.6936, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.5981416957026713, |
|
"grad_norm": 1.8250644207000732, |
|
"learning_rate": 3.981814660220639e-05, |
|
"loss": 0.48, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.5995934959349594, |
|
"grad_norm": 5.125851631164551, |
|
"learning_rate": 3.977212347966951e-05, |
|
"loss": 0.6769, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.6010452961672473, |
|
"grad_norm": 1.0293982028961182, |
|
"learning_rate": 3.9726023309703586e-05, |
|
"loss": 0.4873, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.6024970963995354, |
|
"grad_norm": 1.5232713222503662, |
|
"learning_rate": 3.9679846332754716e-05, |
|
"loss": 0.5796, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.6039488966318235, |
|
"grad_norm": 1.948309302330017, |
|
"learning_rate": 3.963359278966962e-05, |
|
"loss": 0.7975, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6054006968641115, |
|
"grad_norm": 4.971721649169922, |
|
"learning_rate": 3.9587262921694343e-05, |
|
"loss": 0.5604, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.6068524970963995, |
|
"grad_norm": 0.7850014567375183, |
|
"learning_rate": 3.954085697047305e-05, |
|
"loss": 0.6898, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.6083042973286876, |
|
"grad_norm": 0.5327876210212708, |
|
"learning_rate": 3.949437517804672e-05, |
|
"loss": 0.7244, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 2.595165729522705, |
|
"learning_rate": 3.944781778685189e-05, |
|
"loss": 0.6537, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6112078977932637, |
|
"grad_norm": 3.179577350616455, |
|
"learning_rate": 3.940118503971941e-05, |
|
"loss": 0.6315, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.6126596980255516, |
|
"grad_norm": 4.726830959320068, |
|
"learning_rate": 3.935447717987318e-05, |
|
"loss": 0.9359, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.6141114982578397, |
|
"grad_norm": 0.4002162516117096, |
|
"learning_rate": 3.930769445092883e-05, |
|
"loss": 0.7475, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.6155632984901278, |
|
"grad_norm": 1.5376918315887451, |
|
"learning_rate": 3.9260837096892536e-05, |
|
"loss": 0.8695, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.6170150987224158, |
|
"grad_norm": 1.1458797454833984, |
|
"learning_rate": 3.921390536215966e-05, |
|
"loss": 0.5302, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.6184668989547039, |
|
"grad_norm": 2.180319309234619, |
|
"learning_rate": 3.916689949151352e-05, |
|
"loss": 0.6508, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.6199186991869918, |
|
"grad_norm": 0.7947795391082764, |
|
"learning_rate": 3.911981973012413e-05, |
|
"loss": 0.5396, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.6213704994192799, |
|
"grad_norm": 2.065096616744995, |
|
"learning_rate": 3.907266632354687e-05, |
|
"loss": 0.6551, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.622822299651568, |
|
"grad_norm": 0.585402250289917, |
|
"learning_rate": 3.902543951772125e-05, |
|
"loss": 0.8218, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.624274099883856, |
|
"grad_norm": 0.9007218480110168, |
|
"learning_rate": 3.897813955896961e-05, |
|
"loss": 0.6261, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.625725900116144, |
|
"grad_norm": 1.722657322883606, |
|
"learning_rate": 3.8930766693995836e-05, |
|
"loss": 0.6373, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.627177700348432, |
|
"grad_norm": 2.8142952919006348, |
|
"learning_rate": 3.888332116988405e-05, |
|
"loss": 0.7586, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.6286295005807201, |
|
"grad_norm": 0.6167258620262146, |
|
"learning_rate": 3.883580323409739e-05, |
|
"loss": 0.6376, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.6300813008130082, |
|
"grad_norm": 1.2382534742355347, |
|
"learning_rate": 3.878821313447662e-05, |
|
"loss": 0.7507, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.6315331010452961, |
|
"grad_norm": 1.4185280799865723, |
|
"learning_rate": 3.874055111923895e-05, |
|
"loss": 0.8366, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.6329849012775842, |
|
"grad_norm": 1.5447771549224854, |
|
"learning_rate": 3.869281743697664e-05, |
|
"loss": 0.7417, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.6344367015098722, |
|
"grad_norm": 0.8044071793556213, |
|
"learning_rate": 3.864501233665574e-05, |
|
"loss": 0.6307, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.6358885017421603, |
|
"grad_norm": 1.0656015872955322, |
|
"learning_rate": 3.8597136067614834e-05, |
|
"loss": 0.8411, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.6373403019744484, |
|
"grad_norm": 1.03560471534729, |
|
"learning_rate": 3.854918887956369e-05, |
|
"loss": 0.4866, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"grad_norm": 3.3328843116760254, |
|
"learning_rate": 3.850117102258194e-05, |
|
"loss": 0.5966, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6402439024390244, |
|
"grad_norm": 0.6904016733169556, |
|
"learning_rate": 3.8453082747117866e-05, |
|
"loss": 0.7452, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.6416957026713124, |
|
"grad_norm": 1.4979177713394165, |
|
"learning_rate": 3.8404924303986966e-05, |
|
"loss": 0.5983, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.6431475029036005, |
|
"grad_norm": 0.5199301838874817, |
|
"learning_rate": 3.8356695944370766e-05, |
|
"loss": 0.6088, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.6445993031358885, |
|
"grad_norm": 0.7011024355888367, |
|
"learning_rate": 3.8308397919815425e-05, |
|
"loss": 0.8235, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.6460511033681765, |
|
"grad_norm": 0.6176084280014038, |
|
"learning_rate": 3.826003048223048e-05, |
|
"loss": 0.5582, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.6475029036004646, |
|
"grad_norm": 0.8521440029144287, |
|
"learning_rate": 3.8211593883887486e-05, |
|
"loss": 0.608, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.6489547038327527, |
|
"grad_norm": 1.2053148746490479, |
|
"learning_rate": 3.816308837741875e-05, |
|
"loss": 0.6533, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 1.701720952987671, |
|
"learning_rate": 3.811451421581595e-05, |
|
"loss": 0.6655, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.6518583042973287, |
|
"grad_norm": 1.8435336351394653, |
|
"learning_rate": 3.8065871652428874e-05, |
|
"loss": 0.6773, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.6533101045296167, |
|
"grad_norm": 3.5968480110168457, |
|
"learning_rate": 3.801716094096407e-05, |
|
"loss": 0.8139, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6547619047619048, |
|
"grad_norm": 0.776545524597168, |
|
"learning_rate": 3.796838233548353e-05, |
|
"loss": 0.758, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.6562137049941928, |
|
"grad_norm": 1.1160175800323486, |
|
"learning_rate": 3.7919536090403366e-05, |
|
"loss": 0.4703, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.6576655052264808, |
|
"grad_norm": 1.2551127672195435, |
|
"learning_rate": 3.787062246049245e-05, |
|
"loss": 0.8029, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.6591173054587689, |
|
"grad_norm": 1.130473256111145, |
|
"learning_rate": 3.7821641700871174e-05, |
|
"loss": 0.6633, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.6605691056910569, |
|
"grad_norm": 0.6870506405830383, |
|
"learning_rate": 3.7772594067010005e-05, |
|
"loss": 0.5136, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.662020905923345, |
|
"grad_norm": 1.1664706468582153, |
|
"learning_rate": 3.772347981472824e-05, |
|
"loss": 0.7384, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.663472706155633, |
|
"grad_norm": 1.849837303161621, |
|
"learning_rate": 3.767429920019261e-05, |
|
"loss": 0.6037, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.664924506387921, |
|
"grad_norm": 1.2257493734359741, |
|
"learning_rate": 3.7625052479916015e-05, |
|
"loss": 0.7564, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.6663763066202091, |
|
"grad_norm": 1.277335286140442, |
|
"learning_rate": 3.7575739910756124e-05, |
|
"loss": 0.6522, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.667828106852497, |
|
"grad_norm": 0.8080965280532837, |
|
"learning_rate": 3.752636174991403e-05, |
|
"loss": 0.8077, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6692799070847851, |
|
"grad_norm": 1.9517686367034912, |
|
"learning_rate": 3.747691825493298e-05, |
|
"loss": 0.5579, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.6707317073170732, |
|
"grad_norm": 1.0174436569213867, |
|
"learning_rate": 3.742740968369697e-05, |
|
"loss": 0.8038, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.6721835075493612, |
|
"grad_norm": 0.6888383626937866, |
|
"learning_rate": 3.73778362944294e-05, |
|
"loss": 0.8365, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.6736353077816493, |
|
"grad_norm": 2.7746047973632812, |
|
"learning_rate": 3.732819834569176e-05, |
|
"loss": 0.5363, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.6750871080139372, |
|
"grad_norm": 0.43378978967666626, |
|
"learning_rate": 3.7278496096382254e-05, |
|
"loss": 0.5768, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.6765389082462253, |
|
"grad_norm": 1.7999366521835327, |
|
"learning_rate": 3.722872980573448e-05, |
|
"loss": 0.7168, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.6779907084785134, |
|
"grad_norm": 0.7228707075119019, |
|
"learning_rate": 3.717889973331603e-05, |
|
"loss": 0.8107, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.6794425087108014, |
|
"grad_norm": 1.048464059829712, |
|
"learning_rate": 3.7129006139027203e-05, |
|
"loss": 0.6335, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.6808943089430894, |
|
"grad_norm": 3.776031494140625, |
|
"learning_rate": 3.707904928309956e-05, |
|
"loss": 0.5367, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.6823461091753774, |
|
"grad_norm": 4.042102336883545, |
|
"learning_rate": 3.7029029426094666e-05, |
|
"loss": 0.5869, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.6837979094076655, |
|
"grad_norm": 2.6105918884277344, |
|
"learning_rate": 3.6978946828902646e-05, |
|
"loss": 0.4038, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.6852497096399536, |
|
"grad_norm": 0.17694531381130219, |
|
"learning_rate": 3.6928801752740895e-05, |
|
"loss": 0.6876, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.6867015098722415, |
|
"grad_norm": 1.4261376857757568, |
|
"learning_rate": 3.687859445915265e-05, |
|
"loss": 0.4988, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.6881533101045296, |
|
"grad_norm": 3.2906527519226074, |
|
"learning_rate": 3.682832521000568e-05, |
|
"loss": 0.6203, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.6896051103368177, |
|
"grad_norm": 0.8446171283721924, |
|
"learning_rate": 3.677799426749088e-05, |
|
"loss": 0.9472, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.6910569105691057, |
|
"grad_norm": 1.2324299812316895, |
|
"learning_rate": 3.6727601894120945e-05, |
|
"loss": 0.6428, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.6925087108013938, |
|
"grad_norm": 2.0250608921051025, |
|
"learning_rate": 3.667714835272895e-05, |
|
"loss": 0.55, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.6939605110336817, |
|
"grad_norm": 1.788245677947998, |
|
"learning_rate": 3.662663390646701e-05, |
|
"loss": 0.672, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.6954123112659698, |
|
"grad_norm": 2.5829572677612305, |
|
"learning_rate": 3.657605881880493e-05, |
|
"loss": 0.4385, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"grad_norm": 0.9620968699455261, |
|
"learning_rate": 3.652542335352878e-05, |
|
"loss": 0.8065, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.6983159117305459, |
|
"grad_norm": 1.38759183883667, |
|
"learning_rate": 3.647472777473954e-05, |
|
"loss": 0.7473, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.6997677119628339, |
|
"grad_norm": 1.4988477230072021, |
|
"learning_rate": 3.6423972346851744e-05, |
|
"loss": 0.6581, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.7012195121951219, |
|
"grad_norm": 1.095119595527649, |
|
"learning_rate": 3.637315733459207e-05, |
|
"loss": 0.5304, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.70267131242741, |
|
"grad_norm": 0.6751285791397095, |
|
"learning_rate": 3.6322283002997964e-05, |
|
"loss": 0.7912, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.7041231126596981, |
|
"grad_norm": 4.3074564933776855, |
|
"learning_rate": 3.62713496174163e-05, |
|
"loss": 0.545, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.705574912891986, |
|
"grad_norm": 1.85584557056427, |
|
"learning_rate": 3.622035744350192e-05, |
|
"loss": 0.9848, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.7070267131242741, |
|
"grad_norm": 1.2834818363189697, |
|
"learning_rate": 3.6169306747216324e-05, |
|
"loss": 0.7151, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.7084785133565621, |
|
"grad_norm": 2.248262882232666, |
|
"learning_rate": 3.611819779482623e-05, |
|
"loss": 0.5322, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.7099303135888502, |
|
"grad_norm": 2.055523633956909, |
|
"learning_rate": 3.606703085290221e-05, |
|
"loss": 0.6814, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.7113821138211383, |
|
"grad_norm": 1.6206103563308716, |
|
"learning_rate": 3.601580618831727e-05, |
|
"loss": 0.8505, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.7128339140534262, |
|
"grad_norm": 1.4901407957077026, |
|
"learning_rate": 3.5964524068245536e-05, |
|
"loss": 0.9409, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 1.2524491548538208, |
|
"learning_rate": 3.591318476016076e-05, |
|
"loss": 0.6961, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.7157375145180023, |
|
"grad_norm": 1.2523133754730225, |
|
"learning_rate": 3.586178853183498e-05, |
|
"loss": 0.7585, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.7171893147502904, |
|
"grad_norm": 1.0829603672027588, |
|
"learning_rate": 3.581033565133713e-05, |
|
"loss": 0.6737, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.7186411149825784, |
|
"grad_norm": 2.06748628616333, |
|
"learning_rate": 3.5758826387031626e-05, |
|
"loss": 0.7715, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.7200929152148664, |
|
"grad_norm": 0.8570627570152283, |
|
"learning_rate": 3.570726100757693e-05, |
|
"loss": 0.7153, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.7215447154471545, |
|
"grad_norm": 4.75230073928833, |
|
"learning_rate": 3.5655639781924247e-05, |
|
"loss": 0.447, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.7229965156794426, |
|
"grad_norm": 2.5116281509399414, |
|
"learning_rate": 3.5603962979315996e-05, |
|
"loss": 0.5853, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.7244483159117305, |
|
"grad_norm": 1.00091552734375, |
|
"learning_rate": 3.555223086928453e-05, |
|
"loss": 0.8609, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.7259001161440186, |
|
"grad_norm": 1.0202133655548096, |
|
"learning_rate": 3.550044372165062e-05, |
|
"loss": 0.879, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7273519163763066, |
|
"grad_norm": 1.4836984872817993, |
|
"learning_rate": 3.5448601806522134e-05, |
|
"loss": 0.3201, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.7288037166085947, |
|
"grad_norm": 1.1861945390701294, |
|
"learning_rate": 3.539670539429256e-05, |
|
"loss": 0.4413, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.7302555168408827, |
|
"grad_norm": 1.24436616897583, |
|
"learning_rate": 3.534475475563967e-05, |
|
"loss": 0.7143, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 1.162705421447754, |
|
"learning_rate": 3.5292750161524045e-05, |
|
"loss": 0.6185, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.7331591173054588, |
|
"grad_norm": 1.116911768913269, |
|
"learning_rate": 3.5240691883187666e-05, |
|
"loss": 0.6876, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.7346109175377468, |
|
"grad_norm": 0.6887683272361755, |
|
"learning_rate": 3.5188580192152544e-05, |
|
"loss": 0.5068, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.7360627177700348, |
|
"grad_norm": 0.8753703832626343, |
|
"learning_rate": 3.513641536021925e-05, |
|
"loss": 0.8465, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.7375145180023229, |
|
"grad_norm": 1.0913424491882324, |
|
"learning_rate": 3.5084197659465555e-05, |
|
"loss": 0.5948, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.7389663182346109, |
|
"grad_norm": 4.28510856628418, |
|
"learning_rate": 3.503192736224496e-05, |
|
"loss": 0.6233, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.740418118466899, |
|
"grad_norm": 1.444339632987976, |
|
"learning_rate": 3.49796047411853e-05, |
|
"loss": 0.4999, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.741869918699187, |
|
"grad_norm": 1.1212478876113892, |
|
"learning_rate": 3.4927230069187307e-05, |
|
"loss": 0.5284, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.743321718931475, |
|
"grad_norm": 0.00559547683224082, |
|
"learning_rate": 3.487480361942321e-05, |
|
"loss": 0.4229, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.7447735191637631, |
|
"grad_norm": 13.444393157958984, |
|
"learning_rate": 3.482232566533529e-05, |
|
"loss": 0.7992, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.7462253193960511, |
|
"grad_norm": 0.6348085403442383, |
|
"learning_rate": 3.4769796480634456e-05, |
|
"loss": 0.7238, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.7476771196283392, |
|
"grad_norm": 1.069054126739502, |
|
"learning_rate": 3.471721633929885e-05, |
|
"loss": 0.4417, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.7491289198606271, |
|
"grad_norm": 0.9457240104675293, |
|
"learning_rate": 3.466458551557235e-05, |
|
"loss": 0.7843, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.7505807200929152, |
|
"grad_norm": 5.595800399780273, |
|
"learning_rate": 3.4611904283963205e-05, |
|
"loss": 0.8307, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.7520325203252033, |
|
"grad_norm": 0.6603794693946838, |
|
"learning_rate": 3.455917291924256e-05, |
|
"loss": 0.5221, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.7534843205574913, |
|
"grad_norm": 1.487289309501648, |
|
"learning_rate": 3.450639169644308e-05, |
|
"loss": 0.6535, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"grad_norm": 0.9417099952697754, |
|
"learning_rate": 3.445356089085743e-05, |
|
"loss": 0.7801, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.7563879210220673, |
|
"grad_norm": 0.5838674306869507, |
|
"learning_rate": 3.4400680778036906e-05, |
|
"loss": 0.5079, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.7578397212543554, |
|
"grad_norm": 1.297662377357483, |
|
"learning_rate": 3.434775163378997e-05, |
|
"loss": 0.6784, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.7592915214866435, |
|
"grad_norm": 0.6394696235656738, |
|
"learning_rate": 3.4294773734180825e-05, |
|
"loss": 0.5856, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.7607433217189314, |
|
"grad_norm": 3.172327756881714, |
|
"learning_rate": 3.424174735552799e-05, |
|
"loss": 0.7602, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.7621951219512195, |
|
"grad_norm": 1.0046736001968384, |
|
"learning_rate": 3.418867277440278e-05, |
|
"loss": 0.8301, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.7636469221835076, |
|
"grad_norm": 5.960042953491211, |
|
"learning_rate": 3.413555026762799e-05, |
|
"loss": 0.745, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.7650987224157956, |
|
"grad_norm": 0.9394850730895996, |
|
"learning_rate": 3.408238011227635e-05, |
|
"loss": 0.7655, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.7665505226480837, |
|
"grad_norm": 1.9447022676467896, |
|
"learning_rate": 3.402916258566907e-05, |
|
"loss": 0.909, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.7680023228803716, |
|
"grad_norm": 1.3960545063018799, |
|
"learning_rate": 3.3975897965374515e-05, |
|
"loss": 1.0169, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.7694541231126597, |
|
"grad_norm": 1.291868805885315, |
|
"learning_rate": 3.392258652920664e-05, |
|
"loss": 0.8068, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.7709059233449478, |
|
"grad_norm": 0.8512223362922668, |
|
"learning_rate": 3.386922855522356e-05, |
|
"loss": 0.6296, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.7723577235772358, |
|
"grad_norm": 1.03252112865448, |
|
"learning_rate": 3.3815824321726154e-05, |
|
"loss": 0.7254, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.7738095238095238, |
|
"grad_norm": 0.5753119587898254, |
|
"learning_rate": 3.376237410725655e-05, |
|
"loss": 0.8159, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.7752613240418118, |
|
"grad_norm": 0.9350941181182861, |
|
"learning_rate": 3.370887819059672e-05, |
|
"loss": 0.6446, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.7767131242740999, |
|
"grad_norm": 1.6437619924545288, |
|
"learning_rate": 3.3655336850767e-05, |
|
"loss": 0.891, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.778164924506388, |
|
"grad_norm": 2.669983386993408, |
|
"learning_rate": 3.3601750367024645e-05, |
|
"loss": 0.8369, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.7796167247386759, |
|
"grad_norm": 1.661522388458252, |
|
"learning_rate": 3.354811901886234e-05, |
|
"loss": 0.7392, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.781068524970964, |
|
"grad_norm": 0.7996639609336853, |
|
"learning_rate": 3.3494443086006824e-05, |
|
"loss": 0.745, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.782520325203252, |
|
"grad_norm": 0.6470725536346436, |
|
"learning_rate": 3.344072284841734e-05, |
|
"loss": 0.7941, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.7839721254355401, |
|
"grad_norm": 1.523929476737976, |
|
"learning_rate": 3.3386958586284204e-05, |
|
"loss": 0.5812, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7854239256678281, |
|
"grad_norm": 0.7597313523292542, |
|
"learning_rate": 3.333315058002739e-05, |
|
"loss": 0.4126, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.7868757259001161, |
|
"grad_norm": 2.064470052719116, |
|
"learning_rate": 3.3279299110295e-05, |
|
"loss": 0.7855, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.7883275261324042, |
|
"grad_norm": 0.6145796179771423, |
|
"learning_rate": 3.3225404457961834e-05, |
|
"loss": 0.6219, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.7897793263646922, |
|
"grad_norm": 3.158587694168091, |
|
"learning_rate": 3.317146690412793e-05, |
|
"loss": 0.7321, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.7912311265969802, |
|
"grad_norm": 4.978558540344238, |
|
"learning_rate": 3.311748673011709e-05, |
|
"loss": 0.5758, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.7926829268292683, |
|
"grad_norm": 1.3039811849594116, |
|
"learning_rate": 3.306346421747539e-05, |
|
"loss": 0.7172, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.7941347270615563, |
|
"grad_norm": 0.47538790106773376, |
|
"learning_rate": 3.300939964796977e-05, |
|
"loss": 0.5409, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.7955865272938444, |
|
"grad_norm": 1.0770827531814575, |
|
"learning_rate": 3.295529330358649e-05, |
|
"loss": 0.4414, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.7970383275261324, |
|
"grad_norm": 0.7383883595466614, |
|
"learning_rate": 3.290114546652971e-05, |
|
"loss": 0.5318, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.7984901277584204, |
|
"grad_norm": 0.9000987410545349, |
|
"learning_rate": 3.284695641922e-05, |
|
"loss": 0.5446, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7999419279907085, |
|
"grad_norm": 2.9022693634033203, |
|
"learning_rate": 3.279272644429291e-05, |
|
"loss": 0.725, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.8013937282229965, |
|
"grad_norm": 1.3384835720062256, |
|
"learning_rate": 3.2738455824597405e-05, |
|
"loss": 0.6995, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.8028455284552846, |
|
"grad_norm": 0.9091627597808838, |
|
"learning_rate": 3.268414484319445e-05, |
|
"loss": 0.5134, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.8042973286875726, |
|
"grad_norm": 3.8653523921966553, |
|
"learning_rate": 3.262979378335557e-05, |
|
"loss": 0.7161, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.8057491289198606, |
|
"grad_norm": 0.8096335530281067, |
|
"learning_rate": 3.257540292856126e-05, |
|
"loss": 0.5652, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.8072009291521487, |
|
"grad_norm": 1.397865653038025, |
|
"learning_rate": 3.252097256249965e-05, |
|
"loss": 0.6965, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.8086527293844367, |
|
"grad_norm": 2.277859926223755, |
|
"learning_rate": 3.246650296906489e-05, |
|
"loss": 0.6531, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.8101045296167247, |
|
"grad_norm": 2.0666253566741943, |
|
"learning_rate": 3.241199443235576e-05, |
|
"loss": 0.4249, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.8115563298490128, |
|
"grad_norm": 1.2161462306976318, |
|
"learning_rate": 3.2357447236674136e-05, |
|
"loss": 0.4259, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 2.8734538555145264, |
|
"learning_rate": 3.2302861666523564e-05, |
|
"loss": 0.4658, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8144599303135889, |
|
"grad_norm": 0.739331841468811, |
|
"learning_rate": 3.22482380066077e-05, |
|
"loss": 0.6863, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.8159117305458768, |
|
"grad_norm": 0.8823861479759216, |
|
"learning_rate": 3.2193576541828894e-05, |
|
"loss": 0.6399, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.8173635307781649, |
|
"grad_norm": 1.240403175354004, |
|
"learning_rate": 3.2138877557286675e-05, |
|
"loss": 0.8784, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.818815331010453, |
|
"grad_norm": 1.1647741794586182, |
|
"learning_rate": 3.208414133827623e-05, |
|
"loss": 0.9796, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.820267131242741, |
|
"grad_norm": 1.0195775032043457, |
|
"learning_rate": 3.2029368170287e-05, |
|
"loss": 0.4319, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.8217189314750291, |
|
"grad_norm": 1.4524924755096436, |
|
"learning_rate": 3.197455833900112e-05, |
|
"loss": 0.7408, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.823170731707317, |
|
"grad_norm": 0.5133039355278015, |
|
"learning_rate": 3.191971213029195e-05, |
|
"loss": 0.4198, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.8246225319396051, |
|
"grad_norm": 1.205497145652771, |
|
"learning_rate": 3.186482983022257e-05, |
|
"loss": 0.4425, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.8260743321718932, |
|
"grad_norm": 0.6108511090278625, |
|
"learning_rate": 3.180991172504434e-05, |
|
"loss": 0.6768, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.8275261324041812, |
|
"grad_norm": 1.1527341604232788, |
|
"learning_rate": 3.175495810119533e-05, |
|
"loss": 0.5248, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.8289779326364692, |
|
"grad_norm": 1.3975361585617065, |
|
"learning_rate": 3.16999692452989e-05, |
|
"loss": 0.8838, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.8304297328687572, |
|
"grad_norm": 4.7035603523254395, |
|
"learning_rate": 3.164494544416215e-05, |
|
"loss": 1.0907, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.8318815331010453, |
|
"grad_norm": 1.6571784019470215, |
|
"learning_rate": 3.158988698477445e-05, |
|
"loss": 0.732, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.6325092315673828, |
|
"learning_rate": 3.1534794154305935e-05, |
|
"loss": 0.8245, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.8347851335656213, |
|
"grad_norm": 0.9876767992973328, |
|
"learning_rate": 3.1479667240106016e-05, |
|
"loss": 0.7428, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.8362369337979094, |
|
"grad_norm": 0.37352874875068665, |
|
"learning_rate": 3.142450652970187e-05, |
|
"loss": 0.5489, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.8376887340301974, |
|
"grad_norm": 1.924856424331665, |
|
"learning_rate": 3.136931231079696e-05, |
|
"loss": 0.7834, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.8391405342624855, |
|
"grad_norm": 1.8147573471069336, |
|
"learning_rate": 3.1314084871269496e-05, |
|
"loss": 0.6688, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.8405923344947736, |
|
"grad_norm": 0.680001437664032, |
|
"learning_rate": 3.1258824499170975e-05, |
|
"loss": 0.6193, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.8420441347270615, |
|
"grad_norm": 1.3029786348342896, |
|
"learning_rate": 3.1203531482724665e-05, |
|
"loss": 0.694, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.8434959349593496, |
|
"grad_norm": 1.4556697607040405, |
|
"learning_rate": 3.114820611032408e-05, |
|
"loss": 0.7933, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.8449477351916377, |
|
"grad_norm": 1.7649941444396973, |
|
"learning_rate": 3.1092848670531514e-05, |
|
"loss": 0.4818, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.8463995354239257, |
|
"grad_norm": 0.8985478281974792, |
|
"learning_rate": 3.1037459452076504e-05, |
|
"loss": 0.6992, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.8478513356562137, |
|
"grad_norm": 1.0186079740524292, |
|
"learning_rate": 3.0982038743854346e-05, |
|
"loss": 0.2927, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.8493031358885017, |
|
"grad_norm": 1.724191427230835, |
|
"learning_rate": 3.0926586834924555e-05, |
|
"loss": 0.6936, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.8507549361207898, |
|
"grad_norm": 0.25378018617630005, |
|
"learning_rate": 3.087110401450941e-05, |
|
"loss": 0.6692, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.8522067363530779, |
|
"grad_norm": 0.5603824853897095, |
|
"learning_rate": 3.0815590571992394e-05, |
|
"loss": 0.4975, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 0.4890209138393402, |
|
"learning_rate": 3.076004679691672e-05, |
|
"loss": 0.832, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.8551103368176539, |
|
"grad_norm": 0.6338871121406555, |
|
"learning_rate": 3.0704472978983795e-05, |
|
"loss": 0.6447, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.8565621370499419, |
|
"grad_norm": 0.9809471964836121, |
|
"learning_rate": 3.064886940805174e-05, |
|
"loss": 0.6176, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.85801393728223, |
|
"grad_norm": 0.7329034209251404, |
|
"learning_rate": 3.059323637413385e-05, |
|
"loss": 0.4022, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.859465737514518, |
|
"grad_norm": 1.2352603673934937, |
|
"learning_rate": 3.053757416739708e-05, |
|
"loss": 0.9392, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.860917537746806, |
|
"grad_norm": 1.056897759437561, |
|
"learning_rate": 3.0481883078160555e-05, |
|
"loss": 0.616, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.8623693379790941, |
|
"grad_norm": 0.6841446757316589, |
|
"learning_rate": 3.042616339689404e-05, |
|
"loss": 0.5995, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.8638211382113821, |
|
"grad_norm": 1.3766181468963623, |
|
"learning_rate": 3.0370415414216436e-05, |
|
"loss": 0.6945, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.8652729384436701, |
|
"grad_norm": 0.960422694683075, |
|
"learning_rate": 3.0314639420894242e-05, |
|
"loss": 0.6205, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.8667247386759582, |
|
"grad_norm": 2.2252063751220703, |
|
"learning_rate": 3.0258835707840062e-05, |
|
"loss": 0.67, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.8681765389082462, |
|
"grad_norm": 4.834002494812012, |
|
"learning_rate": 3.020300456611109e-05, |
|
"loss": 0.5169, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.8696283391405343, |
|
"grad_norm": 0.3208721876144409, |
|
"learning_rate": 3.0147146286907546e-05, |
|
"loss": 0.7802, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 0.6140812039375305, |
|
"learning_rate": 3.0091261161571227e-05, |
|
"loss": 0.753, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.6239650249481201, |
|
"eval_runtime": 107.7332, |
|
"eval_samples_per_second": 13.459, |
|
"eval_steps_per_second": 3.369, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8725319396051103, |
|
"grad_norm": 0.7981186509132385, |
|
"learning_rate": 3.003534948158393e-05, |
|
"loss": 0.581, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.8739837398373984, |
|
"grad_norm": 1.1279065608978271, |
|
"learning_rate": 2.9979411538565977e-05, |
|
"loss": 0.5993, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.8754355400696864, |
|
"grad_norm": 0.7594296336174011, |
|
"learning_rate": 2.9923447624274647e-05, |
|
"loss": 0.7433, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.8768873403019745, |
|
"grad_norm": 4.225851058959961, |
|
"learning_rate": 2.9867458030602684e-05, |
|
"loss": 0.5974, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.8783391405342624, |
|
"grad_norm": 1.2313289642333984, |
|
"learning_rate": 2.9811443049576793e-05, |
|
"loss": 0.5609, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.8797909407665505, |
|
"grad_norm": 2.6386501789093018, |
|
"learning_rate": 2.9755402973356045e-05, |
|
"loss": 0.9846, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.8812427409988386, |
|
"grad_norm": 1.1028252840042114, |
|
"learning_rate": 2.969933809423045e-05, |
|
"loss": 0.5933, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.8826945412311266, |
|
"grad_norm": 1.0655920505523682, |
|
"learning_rate": 2.964324870461935e-05, |
|
"loss": 0.8486, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.8841463414634146, |
|
"grad_norm": 2.2200887203216553, |
|
"learning_rate": 2.9587135097069934e-05, |
|
"loss": 0.3357, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.8855981416957027, |
|
"grad_norm": 8.945457458496094, |
|
"learning_rate": 2.9530997564255725e-05, |
|
"loss": 0.7661, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.8870499419279907, |
|
"grad_norm": 0.8916497230529785, |
|
"learning_rate": 2.9474836398975005e-05, |
|
"loss": 0.3096, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.8885017421602788, |
|
"grad_norm": 1.2500933408737183, |
|
"learning_rate": 2.9418651894149334e-05, |
|
"loss": 0.7636, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.8899535423925667, |
|
"grad_norm": 1.3231313228607178, |
|
"learning_rate": 2.9362444342822015e-05, |
|
"loss": 0.8473, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.8914053426248548, |
|
"grad_norm": 1.0085506439208984, |
|
"learning_rate": 2.9306214038156516e-05, |
|
"loss": 0.6876, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 0.7650404572486877, |
|
"learning_rate": 2.924996127343502e-05, |
|
"loss": 0.4889, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.8943089430894309, |
|
"grad_norm": 0.7335465550422668, |
|
"learning_rate": 2.9193686342056847e-05, |
|
"loss": 0.6647, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.895760743321719, |
|
"grad_norm": 0.5137434005737305, |
|
"learning_rate": 2.9137389537536913e-05, |
|
"loss": 0.6737, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.8972125435540069, |
|
"grad_norm": 0.9400390386581421, |
|
"learning_rate": 2.9081071153504236e-05, |
|
"loss": 0.6747, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.898664343786295, |
|
"grad_norm": 0.660967230796814, |
|
"learning_rate": 2.9024731483700396e-05, |
|
"loss": 0.4432, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.9001161440185831, |
|
"grad_norm": 2.423039197921753, |
|
"learning_rate": 2.8968370821977963e-05, |
|
"loss": 0.6982, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.9015679442508711, |
|
"grad_norm": 3.0828261375427246, |
|
"learning_rate": 2.8911989462299016e-05, |
|
"loss": 0.5868, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.9030197444831591, |
|
"grad_norm": 2.1633851528167725, |
|
"learning_rate": 2.8855587698733595e-05, |
|
"loss": 0.5404, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.9044715447154471, |
|
"grad_norm": 5.27179479598999, |
|
"learning_rate": 2.8799165825458145e-05, |
|
"loss": 0.7313, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.9059233449477352, |
|
"grad_norm": 0.805304229259491, |
|
"learning_rate": 2.8742724136754005e-05, |
|
"loss": 0.5804, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.9073751451800233, |
|
"grad_norm": 2.6164822578430176, |
|
"learning_rate": 2.868626292700588e-05, |
|
"loss": 0.6612, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.9088269454123112, |
|
"grad_norm": 1.3976331949234009, |
|
"learning_rate": 2.8629782490700253e-05, |
|
"loss": 0.5746, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.9102787456445993, |
|
"grad_norm": 1.42573881149292, |
|
"learning_rate": 2.857328312242392e-05, |
|
"loss": 0.576, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.9117305458768873, |
|
"grad_norm": 2.0388023853302, |
|
"learning_rate": 2.851676511686243e-05, |
|
"loss": 0.7672, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.9131823461091754, |
|
"grad_norm": 1.3161983489990234, |
|
"learning_rate": 2.8460228768798506e-05, |
|
"loss": 0.6011, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"grad_norm": 1.2606275081634521, |
|
"learning_rate": 2.8403674373110562e-05, |
|
"loss": 0.6017, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.9160859465737514, |
|
"grad_norm": 2.2314658164978027, |
|
"learning_rate": 2.8347102224771144e-05, |
|
"loss": 0.6201, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.9175377468060395, |
|
"grad_norm": 1.990546703338623, |
|
"learning_rate": 2.8290512618845367e-05, |
|
"loss": 0.6775, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.9189895470383276, |
|
"grad_norm": 1.7261875867843628, |
|
"learning_rate": 2.823390585048943e-05, |
|
"loss": 0.6419, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.9204413472706156, |
|
"grad_norm": 2.2154932022094727, |
|
"learning_rate": 2.8177282214949047e-05, |
|
"loss": 0.8979, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.9218931475029036, |
|
"grad_norm": 6.259598731994629, |
|
"learning_rate": 2.8120642007557873e-05, |
|
"loss": 0.767, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.9233449477351916, |
|
"grad_norm": 1.4923880100250244, |
|
"learning_rate": 2.806398552373603e-05, |
|
"loss": 0.7091, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.9247967479674797, |
|
"grad_norm": 0.6974102258682251, |
|
"learning_rate": 2.8007313058988527e-05, |
|
"loss": 0.6863, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.9262485481997678, |
|
"grad_norm": 1.9238085746765137, |
|
"learning_rate": 2.7950624908903705e-05, |
|
"loss": 0.555, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.9277003484320557, |
|
"grad_norm": 0.496724933385849, |
|
"learning_rate": 2.789392136915175e-05, |
|
"loss": 0.9554, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"grad_norm": 1.349373459815979, |
|
"learning_rate": 2.7837202735483093e-05, |
|
"loss": 0.8156, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.9306039488966318, |
|
"grad_norm": 1.377130150794983, |
|
"learning_rate": 2.778046930372689e-05, |
|
"loss": 0.7222, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.9320557491289199, |
|
"grad_norm": 1.0762406587600708, |
|
"learning_rate": 2.7723721369789486e-05, |
|
"loss": 0.6956, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.9335075493612079, |
|
"grad_norm": 1.7975473403930664, |
|
"learning_rate": 2.7666959229652867e-05, |
|
"loss": 0.7824, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.9349593495934959, |
|
"grad_norm": 1.836282730102539, |
|
"learning_rate": 2.761018317937311e-05, |
|
"loss": 0.6559, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.936411149825784, |
|
"grad_norm": 1.9735631942749023, |
|
"learning_rate": 2.7553393515078852e-05, |
|
"loss": 0.578, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.937862950058072, |
|
"grad_norm": 1.7507141828536987, |
|
"learning_rate": 2.749659053296973e-05, |
|
"loss": 0.897, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.93931475029036, |
|
"grad_norm": 1.1130051612854004, |
|
"learning_rate": 2.743977452931484e-05, |
|
"loss": 0.5654, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.9407665505226481, |
|
"grad_norm": 0.851780354976654, |
|
"learning_rate": 2.738294580045119e-05, |
|
"loss": 0.5722, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.9422183507549361, |
|
"grad_norm": 0.6273514628410339, |
|
"learning_rate": 2.732610464278219e-05, |
|
"loss": 0.6938, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.9436701509872242, |
|
"grad_norm": 1.4148989915847778, |
|
"learning_rate": 2.7269251352776042e-05, |
|
"loss": 0.5636, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9451219512195121, |
|
"grad_norm": 0.9783958792686462, |
|
"learning_rate": 2.7212386226964242e-05, |
|
"loss": 0.5425, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.9465737514518002, |
|
"grad_norm": 0.860564649105072, |
|
"learning_rate": 2.7155509561940017e-05, |
|
"loss": 0.6981, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.9480255516840883, |
|
"grad_norm": 1.0383031368255615, |
|
"learning_rate": 2.7098621654356766e-05, |
|
"loss": 0.7683, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.9494773519163763, |
|
"grad_norm": 0.6206135153770447, |
|
"learning_rate": 2.704172280092655e-05, |
|
"loss": 0.5571, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.9509291521486644, |
|
"grad_norm": 1.0526723861694336, |
|
"learning_rate": 2.698481329841851e-05, |
|
"loss": 0.9023, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.7944720983505249, |
|
"learning_rate": 2.6927893443657316e-05, |
|
"loss": 0.5719, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.9538327526132404, |
|
"grad_norm": 0.16957837343215942, |
|
"learning_rate": 2.6870963533521655e-05, |
|
"loss": 0.641, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.9552845528455285, |
|
"grad_norm": 0.8909958004951477, |
|
"learning_rate": 2.681402386494264e-05, |
|
"loss": 0.5357, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.9567363530778165, |
|
"grad_norm": 0.8061552047729492, |
|
"learning_rate": 2.6757074734902303e-05, |
|
"loss": 0.8705, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.9581881533101045, |
|
"grad_norm": 0.7766616940498352, |
|
"learning_rate": 2.6700116440432005e-05, |
|
"loss": 0.6641, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.9596399535423926, |
|
"grad_norm": 4.805869102478027, |
|
"learning_rate": 2.6643149278610925e-05, |
|
"loss": 0.4838, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.9610917537746806, |
|
"grad_norm": 1.1826244592666626, |
|
"learning_rate": 2.6586173546564465e-05, |
|
"loss": 0.8335, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.9625435540069687, |
|
"grad_norm": 4.609352111816406, |
|
"learning_rate": 2.6529189541462745e-05, |
|
"loss": 0.5172, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.9639953542392566, |
|
"grad_norm": 1.5737910270690918, |
|
"learning_rate": 2.647219756051904e-05, |
|
"loss": 0.4788, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.9654471544715447, |
|
"grad_norm": 4.146353244781494, |
|
"learning_rate": 2.6415197900988213e-05, |
|
"loss": 0.7194, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.9668989547038328, |
|
"grad_norm": 0.5611397624015808, |
|
"learning_rate": 2.6358190860165187e-05, |
|
"loss": 0.489, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.9683507549361208, |
|
"grad_norm": 2.0827231407165527, |
|
"learning_rate": 2.6301176735383382e-05, |
|
"loss": 0.5859, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.9698025551684089, |
|
"grad_norm": 2.0396342277526855, |
|
"learning_rate": 2.624415582401314e-05, |
|
"loss": 0.7885, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.9712543554006968, |
|
"grad_norm": 2.5447700023651123, |
|
"learning_rate": 2.6187128423460233e-05, |
|
"loss": 0.722, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.9727061556329849, |
|
"grad_norm": 4.586677551269531, |
|
"learning_rate": 2.6130094831164282e-05, |
|
"loss": 0.5383, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.974157955865273, |
|
"grad_norm": 2.4895076751708984, |
|
"learning_rate": 2.607305534459717e-05, |
|
"loss": 0.6993, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.8436042666435242, |
|
"learning_rate": 2.6016010261261546e-05, |
|
"loss": 0.6571, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.977061556329849, |
|
"grad_norm": 0.6883308291435242, |
|
"learning_rate": 2.5958959878689253e-05, |
|
"loss": 0.5514, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.978513356562137, |
|
"grad_norm": 2.935514211654663, |
|
"learning_rate": 2.590190449443975e-05, |
|
"loss": 0.6725, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.9799651567944251, |
|
"grad_norm": 2.491732597351074, |
|
"learning_rate": 2.584484440609861e-05, |
|
"loss": 0.6864, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.9814169570267132, |
|
"grad_norm": 2.6545393466949463, |
|
"learning_rate": 2.5787779911275937e-05, |
|
"loss": 0.6371, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.9828687572590011, |
|
"grad_norm": 0.4963870942592621, |
|
"learning_rate": 2.57307113076048e-05, |
|
"loss": 0.6246, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.9843205574912892, |
|
"grad_norm": 0.5385538339614868, |
|
"learning_rate": 2.567363889273971e-05, |
|
"loss": 0.8436, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.9857723577235772, |
|
"grad_norm": 7.346200466156006, |
|
"learning_rate": 2.561656296435506e-05, |
|
"loss": 0.65, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"grad_norm": 1.658962368965149, |
|
"learning_rate": 2.555948382014357e-05, |
|
"loss": 0.6879, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.9886759581881533, |
|
"grad_norm": 1.5802571773529053, |
|
"learning_rate": 2.5502401757814714e-05, |
|
"loss": 0.7704, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.9901277584204413, |
|
"grad_norm": 2.8722903728485107, |
|
"learning_rate": 2.5445317075093223e-05, |
|
"loss": 0.4583, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.9915795586527294, |
|
"grad_norm": 0.8566171526908875, |
|
"learning_rate": 2.5388230069717446e-05, |
|
"loss": 0.8975, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.9930313588850174, |
|
"grad_norm": 0.7352342009544373, |
|
"learning_rate": 2.5331141039437882e-05, |
|
"loss": 0.7039, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.9944831591173054, |
|
"grad_norm": 1.2166061401367188, |
|
"learning_rate": 2.5274050282015587e-05, |
|
"loss": 0.6728, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.9959349593495935, |
|
"grad_norm": 1.2508012056350708, |
|
"learning_rate": 2.521695809522061e-05, |
|
"loss": 0.7019, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.9973867595818815, |
|
"grad_norm": 1.4797356128692627, |
|
"learning_rate": 2.515986477683048e-05, |
|
"loss": 0.5035, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.9988385598141696, |
|
"grad_norm": 0.6893177628517151, |
|
"learning_rate": 2.510277062462861e-05, |
|
"loss": 0.6175, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.0002903600464577, |
|
"grad_norm": 3.4936206340789795, |
|
"learning_rate": 2.504567593640275e-05, |
|
"loss": 0.7674, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.0017421602787457, |
|
"grad_norm": 1.3632289171218872, |
|
"learning_rate": 2.4988581009943477e-05, |
|
"loss": 0.3736, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.0031939605110336, |
|
"grad_norm": 0.46680641174316406, |
|
"learning_rate": 2.4931486143042586e-05, |
|
"loss": 0.4425, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.0046457607433217, |
|
"grad_norm": 0.8818445801734924, |
|
"learning_rate": 2.4874391633491576e-05, |
|
"loss": 0.6905, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.0060975609756098, |
|
"grad_norm": 0.5474444031715393, |
|
"learning_rate": 2.4817297779080073e-05, |
|
"loss": 0.7923, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.0075493612078978, |
|
"grad_norm": 0.8076862096786499, |
|
"learning_rate": 2.4760204877594297e-05, |
|
"loss": 0.6344, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.009001161440186, |
|
"grad_norm": 1.0539401769638062, |
|
"learning_rate": 2.4703113226815474e-05, |
|
"loss": 0.7762, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.0104529616724738, |
|
"grad_norm": 1.6129015684127808, |
|
"learning_rate": 2.4646023124518336e-05, |
|
"loss": 0.5475, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.0119047619047619, |
|
"grad_norm": 3.270751476287842, |
|
"learning_rate": 2.4588934868469522e-05, |
|
"loss": 0.7106, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.01335656213705, |
|
"grad_norm": 1.1488885879516602, |
|
"learning_rate": 2.4531848756426032e-05, |
|
"loss": 0.6126, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.014808362369338, |
|
"grad_norm": 0.7339674234390259, |
|
"learning_rate": 2.447476508613372e-05, |
|
"loss": 0.5384, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.016260162601626, |
|
"grad_norm": 0.2865770161151886, |
|
"learning_rate": 2.4417684155325664e-05, |
|
"loss": 0.6378, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.017711962833914, |
|
"grad_norm": 1.421481728553772, |
|
"learning_rate": 2.4360606261720673e-05, |
|
"loss": 0.6757, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.019163763066202, |
|
"grad_norm": 0.846333384513855, |
|
"learning_rate": 2.430353170302172e-05, |
|
"loss": 0.517, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.0206155632984901, |
|
"grad_norm": 0.3524300158023834, |
|
"learning_rate": 2.4246460776914363e-05, |
|
"loss": 0.6129, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.0220673635307782, |
|
"grad_norm": 0.7928240299224854, |
|
"learning_rate": 2.4189393781065232e-05, |
|
"loss": 0.4327, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.0235191637630663, |
|
"grad_norm": 0.9376094341278076, |
|
"learning_rate": 2.4132331013120453e-05, |
|
"loss": 0.6137, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.0249709639953541, |
|
"grad_norm": 1.046407699584961, |
|
"learning_rate": 2.4075272770704104e-05, |
|
"loss": 0.6877, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.0264227642276422, |
|
"grad_norm": 2.0462183952331543, |
|
"learning_rate": 2.4018219351416645e-05, |
|
"loss": 0.4539, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.0278745644599303, |
|
"grad_norm": 0.4574951231479645, |
|
"learning_rate": 2.3961171052833386e-05, |
|
"loss": 0.9033, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.0293263646922184, |
|
"grad_norm": 3.518298864364624, |
|
"learning_rate": 2.3904128172502946e-05, |
|
"loss": 0.5817, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.0307781649245065, |
|
"grad_norm": 0.598048985004425, |
|
"learning_rate": 2.3847091007945667e-05, |
|
"loss": 0.4244, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.0322299651567943, |
|
"grad_norm": 1.5225111246109009, |
|
"learning_rate": 2.3790059856652083e-05, |
|
"loss": 0.9356, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.0336817653890824, |
|
"grad_norm": 0.9001873135566711, |
|
"learning_rate": 2.3733035016081355e-05, |
|
"loss": 0.4678, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.0351335656213705, |
|
"grad_norm": 2.5215003490448, |
|
"learning_rate": 2.367601678365974e-05, |
|
"loss": 0.5787, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.0365853658536586, |
|
"grad_norm": 0.9304032325744629, |
|
"learning_rate": 2.361900545677903e-05, |
|
"loss": 0.3138, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.0380371660859466, |
|
"grad_norm": 0.9305661916732788, |
|
"learning_rate": 2.3562001332795e-05, |
|
"loss": 0.5626, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.0394889663182345, |
|
"grad_norm": 1.5378453731536865, |
|
"learning_rate": 2.3505004709025842e-05, |
|
"loss": 0.7586, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.0409407665505226, |
|
"grad_norm": 0.8000249266624451, |
|
"learning_rate": 2.3448015882750647e-05, |
|
"loss": 0.4352, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.0423925667828107, |
|
"grad_norm": 0.8322232365608215, |
|
"learning_rate": 2.339103515120783e-05, |
|
"loss": 0.7357, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.0438443670150988, |
|
"grad_norm": 0.9948438405990601, |
|
"learning_rate": 2.3334062811593556e-05, |
|
"loss": 0.657, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"grad_norm": 1.071321725845337, |
|
"learning_rate": 2.3277099161060298e-05, |
|
"loss": 0.5158, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.0467479674796747, |
|
"grad_norm": 0.7249424457550049, |
|
"learning_rate": 2.3220144496715125e-05, |
|
"loss": 0.606, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.0481997677119628, |
|
"grad_norm": 1.2231613397598267, |
|
"learning_rate": 2.3163199115618282e-05, |
|
"loss": 0.4094, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.0496515679442509, |
|
"grad_norm": 1.2972086668014526, |
|
"learning_rate": 2.310626331478159e-05, |
|
"loss": 0.4112, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.051103368176539, |
|
"grad_norm": 1.0579259395599365, |
|
"learning_rate": 2.304933739116688e-05, |
|
"loss": 0.6859, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.052555168408827, |
|
"grad_norm": 1.3413074016571045, |
|
"learning_rate": 2.2992421641684494e-05, |
|
"loss": 0.4698, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.054006968641115, |
|
"grad_norm": 1.203018069267273, |
|
"learning_rate": 2.2935516363191693e-05, |
|
"loss": 0.4366, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.055458768873403, |
|
"grad_norm": 1.540850281715393, |
|
"learning_rate": 2.2878621852491135e-05, |
|
"loss": 0.5985, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.056910569105691, |
|
"grad_norm": 0.8544327616691589, |
|
"learning_rate": 2.28217384063293e-05, |
|
"loss": 0.6348, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.0583623693379791, |
|
"grad_norm": 0.9405458569526672, |
|
"learning_rate": 2.2764866321394963e-05, |
|
"loss": 0.5561, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.0598141695702672, |
|
"grad_norm": 0.6483383178710938, |
|
"learning_rate": 2.2708005894317657e-05, |
|
"loss": 0.6295, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.0612659698025553, |
|
"grad_norm": 1.3376249074935913, |
|
"learning_rate": 2.2651157421666096e-05, |
|
"loss": 0.6177, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.0627177700348431, |
|
"grad_norm": 2.9725067615509033, |
|
"learning_rate": 2.2594321199946656e-05, |
|
"loss": 0.4115, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.0641695702671312, |
|
"grad_norm": 1.1227383613586426, |
|
"learning_rate": 2.253749752560179e-05, |
|
"loss": 0.7575, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.0656213704994193, |
|
"grad_norm": 7.148159027099609, |
|
"learning_rate": 2.248068669500853e-05, |
|
"loss": 0.6736, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.0670731707317074, |
|
"grad_norm": 1.4029227495193481, |
|
"learning_rate": 2.2423889004476915e-05, |
|
"loss": 0.5547, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.0685249709639955, |
|
"grad_norm": 0.13588035106658936, |
|
"learning_rate": 2.2367104750248444e-05, |
|
"loss": 0.5272, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.0699767711962833, |
|
"grad_norm": 1.2609344720840454, |
|
"learning_rate": 2.2310334228494536e-05, |
|
"loss": 0.6262, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 1.735031008720398, |
|
"learning_rate": 2.2253577735314987e-05, |
|
"loss": 0.4278, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.0728803716608595, |
|
"grad_norm": 5.371007919311523, |
|
"learning_rate": 2.219683556673642e-05, |
|
"loss": 0.6081, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.0743321718931476, |
|
"grad_norm": 2.175072431564331, |
|
"learning_rate": 2.2140108018710758e-05, |
|
"loss": 0.7055, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.0757839721254356, |
|
"grad_norm": 0.49367207288742065, |
|
"learning_rate": 2.208339538711366e-05, |
|
"loss": 0.3842, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.0772357723577235, |
|
"grad_norm": 1.9475051164627075, |
|
"learning_rate": 2.2026697967742968e-05, |
|
"loss": 0.4956, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.0786875725900116, |
|
"grad_norm": 1.74053955078125, |
|
"learning_rate": 2.1970016056317203e-05, |
|
"loss": 0.6627, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.0801393728222997, |
|
"grad_norm": 1.1123576164245605, |
|
"learning_rate": 2.1913349948473996e-05, |
|
"loss": 0.4789, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.0815911730545877, |
|
"grad_norm": 1.6125507354736328, |
|
"learning_rate": 2.1856699939768545e-05, |
|
"loss": 0.4892, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.0830429732868758, |
|
"grad_norm": 1.4963864088058472, |
|
"learning_rate": 2.1800066325672074e-05, |
|
"loss": 0.4966, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.0844947735191637, |
|
"grad_norm": 1.2943956851959229, |
|
"learning_rate": 2.1743449401570324e-05, |
|
"loss": 0.7522, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.0859465737514518, |
|
"grad_norm": 0.6681497097015381, |
|
"learning_rate": 2.1686849462761947e-05, |
|
"loss": 0.5014, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.0873983739837398, |
|
"grad_norm": 1.1527822017669678, |
|
"learning_rate": 2.1630266804457035e-05, |
|
"loss": 0.4268, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.088850174216028, |
|
"grad_norm": 1.0493078231811523, |
|
"learning_rate": 2.157370172177553e-05, |
|
"loss": 0.6676, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.090301974448316, |
|
"grad_norm": 0.7843257784843445, |
|
"learning_rate": 2.1517154509745724e-05, |
|
"loss": 0.4035, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.0917537746806039, |
|
"grad_norm": 1.5716508626937866, |
|
"learning_rate": 2.1460625463302686e-05, |
|
"loss": 0.4774, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.093205574912892, |
|
"grad_norm": 0.881391704082489, |
|
"learning_rate": 2.1404114877286747e-05, |
|
"loss": 0.6217, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.09465737514518, |
|
"grad_norm": 0.4978386461734772, |
|
"learning_rate": 2.134762304644193e-05, |
|
"loss": 0.7448, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.096109175377468, |
|
"grad_norm": 1.047534465789795, |
|
"learning_rate": 2.129115026541447e-05, |
|
"loss": 0.7455, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 3.125924825668335, |
|
"learning_rate": 2.1234696828751226e-05, |
|
"loss": 0.3793, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.099012775842044, |
|
"grad_norm": 4.937119960784912, |
|
"learning_rate": 2.1178263030898155e-05, |
|
"loss": 0.6671, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.1004645760743321, |
|
"grad_norm": 0.9988604187965393, |
|
"learning_rate": 2.1121849166198793e-05, |
|
"loss": 0.6868, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.1019163763066202, |
|
"grad_norm": 1.7846256494522095, |
|
"learning_rate": 2.106545552889272e-05, |
|
"loss": 0.7165, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"grad_norm": 1.7793424129486084, |
|
"learning_rate": 2.1009082413113973e-05, |
|
"loss": 0.6098, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.1048199767711964, |
|
"grad_norm": 0.6615446209907532, |
|
"learning_rate": 2.095273011288963e-05, |
|
"loss": 0.5701, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.1062717770034842, |
|
"grad_norm": 1.3341655731201172, |
|
"learning_rate": 2.0896398922138122e-05, |
|
"loss": 0.676, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.1077235772357723, |
|
"grad_norm": 1.0205527544021606, |
|
"learning_rate": 2.0840089134667824e-05, |
|
"loss": 0.5475, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.1091753774680604, |
|
"grad_norm": 1.5262418985366821, |
|
"learning_rate": 2.0783801044175467e-05, |
|
"loss": 0.582, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.1106271777003485, |
|
"grad_norm": 2.5063817501068115, |
|
"learning_rate": 2.0727534944244615e-05, |
|
"loss": 0.7552, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.1120789779326365, |
|
"grad_norm": 3.6351969242095947, |
|
"learning_rate": 2.067129112834413e-05, |
|
"loss": 0.6419, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.1135307781649244, |
|
"grad_norm": 0.8957704305648804, |
|
"learning_rate": 2.061506988982665e-05, |
|
"loss": 0.4333, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.1149825783972125, |
|
"grad_norm": 1.9803669452667236, |
|
"learning_rate": 2.0558871521927073e-05, |
|
"loss": 0.4656, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.1164343786295006, |
|
"grad_norm": 0.8719884157180786, |
|
"learning_rate": 2.0502696317760973e-05, |
|
"loss": 0.4252, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.1178861788617886, |
|
"grad_norm": 1.6916320323944092, |
|
"learning_rate": 2.044654457032314e-05, |
|
"loss": 0.7204, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.1193379790940767, |
|
"grad_norm": 1.6074903011322021, |
|
"learning_rate": 2.0390416572486e-05, |
|
"loss": 0.4984, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.1207897793263646, |
|
"grad_norm": 0.2988170087337494, |
|
"learning_rate": 2.033431261699813e-05, |
|
"loss": 0.4557, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.1222415795586527, |
|
"grad_norm": 15.167128562927246, |
|
"learning_rate": 2.0278232996482688e-05, |
|
"loss": 0.551, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.1236933797909407, |
|
"grad_norm": 0.8808531761169434, |
|
"learning_rate": 2.0222178003435926e-05, |
|
"loss": 0.434, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.1251451800232288, |
|
"grad_norm": 0.7921860814094543, |
|
"learning_rate": 2.0166147930225615e-05, |
|
"loss": 0.4803, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.126596980255517, |
|
"grad_norm": 1.9591280221939087, |
|
"learning_rate": 2.011014306908958e-05, |
|
"loss": 0.786, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.1280487804878048, |
|
"grad_norm": 1.479054570198059, |
|
"learning_rate": 2.0054163712134145e-05, |
|
"loss": 0.655, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.1295005807200929, |
|
"grad_norm": 3.091681480407715, |
|
"learning_rate": 1.9998210151332585e-05, |
|
"loss": 0.7444, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.130952380952381, |
|
"grad_norm": 2.064387321472168, |
|
"learning_rate": 1.994228267852366e-05, |
|
"loss": 0.4337, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.132404181184669, |
|
"grad_norm": 1.0761544704437256, |
|
"learning_rate": 1.9886381585410045e-05, |
|
"loss": 0.5395, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.133855981416957, |
|
"grad_norm": 1.1305792331695557, |
|
"learning_rate": 1.9830507163556816e-05, |
|
"loss": 0.6013, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.135307781649245, |
|
"grad_norm": 3.304077386856079, |
|
"learning_rate": 1.977465970438998e-05, |
|
"loss": 0.8103, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.136759581881533, |
|
"grad_norm": 0.8400141596794128, |
|
"learning_rate": 1.9718839499194868e-05, |
|
"loss": 0.5292, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.1382113821138211, |
|
"grad_norm": 5.679340839385986, |
|
"learning_rate": 1.9663046839114684e-05, |
|
"loss": 0.5317, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.1396631823461092, |
|
"grad_norm": 2.914165496826172, |
|
"learning_rate": 1.960728201514896e-05, |
|
"loss": 0.7501, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.1411149825783973, |
|
"grad_norm": 3.093472957611084, |
|
"learning_rate": 1.9551545318152047e-05, |
|
"loss": 0.5741, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.1425667828106851, |
|
"grad_norm": 1.7415759563446045, |
|
"learning_rate": 1.949583703883158e-05, |
|
"loss": 0.5044, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.1440185830429732, |
|
"grad_norm": 4.8877668380737305, |
|
"learning_rate": 1.9440157467746985e-05, |
|
"loss": 0.786, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.1454703832752613, |
|
"grad_norm": 1.9730969667434692, |
|
"learning_rate": 1.9384506895307964e-05, |
|
"loss": 0.7195, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.1469221835075494, |
|
"grad_norm": 12.92557430267334, |
|
"learning_rate": 1.932888561177294e-05, |
|
"loss": 0.5679, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.1483739837398375, |
|
"grad_norm": 2.283071517944336, |
|
"learning_rate": 1.92732939072476e-05, |
|
"loss": 0.5129, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.1498257839721253, |
|
"grad_norm": 0.8420314788818359, |
|
"learning_rate": 1.9217732071683343e-05, |
|
"loss": 0.6232, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.1512775842044134, |
|
"grad_norm": 1.523573637008667, |
|
"learning_rate": 1.9162200394875783e-05, |
|
"loss": 0.6329, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.1527293844367015, |
|
"grad_norm": 3.2268831729888916, |
|
"learning_rate": 1.9106699166463247e-05, |
|
"loss": 0.5248, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.1541811846689896, |
|
"grad_norm": 2.4383325576782227, |
|
"learning_rate": 1.905122867592522e-05, |
|
"loss": 0.725, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.1556329849012776, |
|
"grad_norm": 7.215484142303467, |
|
"learning_rate": 1.8995789212580884e-05, |
|
"loss": 0.4331, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.1570847851335655, |
|
"grad_norm": 1.5999699831008911, |
|
"learning_rate": 1.89403810655876e-05, |
|
"loss": 0.421, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.1585365853658536, |
|
"grad_norm": 0.6313633918762207, |
|
"learning_rate": 1.8885004523939386e-05, |
|
"loss": 0.3322, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.1599883855981417, |
|
"grad_norm": 1.2481117248535156, |
|
"learning_rate": 1.8829659876465406e-05, |
|
"loss": 0.4594, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 1.2827537059783936, |
|
"learning_rate": 1.8774347411828472e-05, |
|
"loss": 0.603, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1628919860627178, |
|
"grad_norm": 0.5014917254447937, |
|
"learning_rate": 1.871906741852356e-05, |
|
"loss": 0.3013, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.164343786295006, |
|
"grad_norm": 1.5885872840881348, |
|
"learning_rate": 1.8663820184876247e-05, |
|
"loss": 0.5299, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.1657955865272938, |
|
"grad_norm": 0.0035865483805537224, |
|
"learning_rate": 1.8608605999041297e-05, |
|
"loss": 0.5274, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.1672473867595818, |
|
"grad_norm": 1.4468114376068115, |
|
"learning_rate": 1.8553425149001057e-05, |
|
"loss": 0.4781, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.16869918699187, |
|
"grad_norm": 2.688275098800659, |
|
"learning_rate": 1.8498277922564026e-05, |
|
"loss": 0.4668, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.170150987224158, |
|
"grad_norm": 1.6105045080184937, |
|
"learning_rate": 1.8443164607363333e-05, |
|
"loss": 0.6738, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.171602787456446, |
|
"grad_norm": 1.46797513961792, |
|
"learning_rate": 1.8388085490855217e-05, |
|
"loss": 0.552, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.173054587688734, |
|
"grad_norm": 1.1859605312347412, |
|
"learning_rate": 1.833304086031757e-05, |
|
"loss": 0.4247, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.174506387921022, |
|
"grad_norm": 1.4056955575942993, |
|
"learning_rate": 1.8278031002848394e-05, |
|
"loss": 0.4875, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.17595818815331, |
|
"grad_norm": 1.6861822605133057, |
|
"learning_rate": 1.8223056205364342e-05, |
|
"loss": 0.5837, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.1774099883855982, |
|
"grad_norm": 1.9432148933410645, |
|
"learning_rate": 1.8168116754599186e-05, |
|
"loss": 0.6512, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.1788617886178863, |
|
"grad_norm": 1.438887119293213, |
|
"learning_rate": 1.811321293710235e-05, |
|
"loss": 0.5249, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.1803135888501741, |
|
"grad_norm": 4.159003734588623, |
|
"learning_rate": 1.8058345039237395e-05, |
|
"loss": 0.4055, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.1817653890824622, |
|
"grad_norm": 1.9116485118865967, |
|
"learning_rate": 1.8003513347180557e-05, |
|
"loss": 0.6732, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.1832171893147503, |
|
"grad_norm": 0.8615849614143372, |
|
"learning_rate": 1.7948718146919212e-05, |
|
"loss": 0.4732, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.1846689895470384, |
|
"grad_norm": 1.812454342842102, |
|
"learning_rate": 1.7893959724250402e-05, |
|
"loss": 0.4385, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.1861207897793264, |
|
"grad_norm": 1.0954737663269043, |
|
"learning_rate": 1.7839238364779358e-05, |
|
"loss": 0.4728, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.1875725900116145, |
|
"grad_norm": 3.3820154666900635, |
|
"learning_rate": 1.7784554353918002e-05, |
|
"loss": 0.4665, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.1890243902439024, |
|
"grad_norm": 1.3054349422454834, |
|
"learning_rate": 1.772990797688344e-05, |
|
"loss": 0.8027, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 0.9854569435119629, |
|
"learning_rate": 1.7675299518696503e-05, |
|
"loss": 0.6728, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.1919279907084785, |
|
"grad_norm": 1.3417751789093018, |
|
"learning_rate": 1.7620729264180244e-05, |
|
"loss": 0.6094, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.1933797909407666, |
|
"grad_norm": 1.8543522357940674, |
|
"learning_rate": 1.756619749795846e-05, |
|
"loss": 0.3593, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.1948315911730547, |
|
"grad_norm": 4.067511081695557, |
|
"learning_rate": 1.751170450445418e-05, |
|
"loss": 0.4437, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.1962833914053426, |
|
"grad_norm": 0.7072954773902893, |
|
"learning_rate": 1.7457250567888255e-05, |
|
"loss": 0.4523, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.1977351916376306, |
|
"grad_norm": 2.400019884109497, |
|
"learning_rate": 1.7402835972277774e-05, |
|
"loss": 0.7181, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.1991869918699187, |
|
"grad_norm": 0.7188956141471863, |
|
"learning_rate": 1.734846100143466e-05, |
|
"loss": 0.6106, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.2006387921022068, |
|
"grad_norm": 0.9549878835678101, |
|
"learning_rate": 1.7294125938964163e-05, |
|
"loss": 0.6636, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.202090592334495, |
|
"grad_norm": 1.26228928565979, |
|
"learning_rate": 1.7239831068263366e-05, |
|
"loss": 0.3134, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.2035423925667827, |
|
"grad_norm": 1.7492179870605469, |
|
"learning_rate": 1.718557667251974e-05, |
|
"loss": 0.7868, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.2049941927990708, |
|
"grad_norm": 5.789414405822754, |
|
"learning_rate": 1.7131363034709647e-05, |
|
"loss": 0.3828, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.206445993031359, |
|
"grad_norm": 5.111294746398926, |
|
"learning_rate": 1.7077190437596864e-05, |
|
"loss": 0.5902, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.207897793263647, |
|
"grad_norm": 1.8779693841934204, |
|
"learning_rate": 1.7023059163731097e-05, |
|
"loss": 0.5968, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.209349593495935, |
|
"grad_norm": 4.733475685119629, |
|
"learning_rate": 1.696896949544654e-05, |
|
"loss": 0.6245, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.210801393728223, |
|
"grad_norm": 0.9428911805152893, |
|
"learning_rate": 1.6914921714860378e-05, |
|
"loss": 0.537, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.212253193960511, |
|
"grad_norm": 0.8777297735214233, |
|
"learning_rate": 1.686091610387133e-05, |
|
"loss": 0.5012, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.213704994192799, |
|
"grad_norm": 4.631138801574707, |
|
"learning_rate": 1.680695294415815e-05, |
|
"loss": 0.6156, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.2151567944250872, |
|
"grad_norm": 0.6276788711547852, |
|
"learning_rate": 1.6753032517178187e-05, |
|
"loss": 0.6097, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.2166085946573753, |
|
"grad_norm": 0.7549428939819336, |
|
"learning_rate": 1.6699155104165904e-05, |
|
"loss": 0.7467, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.2180603948896631, |
|
"grad_norm": 0.9138199687004089, |
|
"learning_rate": 1.6645320986131433e-05, |
|
"loss": 0.5846, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 1.1513859033584595, |
|
"learning_rate": 1.659153044385906e-05, |
|
"loss": 0.4798, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.2209639953542393, |
|
"grad_norm": 1.6771997213363647, |
|
"learning_rate": 1.6537783757905816e-05, |
|
"loss": 0.8278, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.2224157955865274, |
|
"grad_norm": 1.2027699947357178, |
|
"learning_rate": 1.648408120859998e-05, |
|
"loss": 0.7619, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.2238675958188154, |
|
"grad_norm": 2.257286310195923, |
|
"learning_rate": 1.643042307603964e-05, |
|
"loss": 0.7877, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.2253193960511033, |
|
"grad_norm": 0.687853217124939, |
|
"learning_rate": 1.6376809640091174e-05, |
|
"loss": 0.6319, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.2267711962833914, |
|
"grad_norm": 1.3753950595855713, |
|
"learning_rate": 1.63232411803879e-05, |
|
"loss": 0.5018, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.2282229965156795, |
|
"grad_norm": 3.117898464202881, |
|
"learning_rate": 1.6269717976328503e-05, |
|
"loss": 0.6428, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.2296747967479675, |
|
"grad_norm": 1.2253605127334595, |
|
"learning_rate": 1.6216240307075642e-05, |
|
"loss": 0.6265, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.2311265969802556, |
|
"grad_norm": 1.9370412826538086, |
|
"learning_rate": 1.6162808451554483e-05, |
|
"loss": 0.6584, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.2325783972125435, |
|
"grad_norm": 4.468973636627197, |
|
"learning_rate": 1.6109422688451224e-05, |
|
"loss": 0.6343, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.2340301974448316, |
|
"grad_norm": 6.738311290740967, |
|
"learning_rate": 1.605608329621168e-05, |
|
"loss": 0.6665, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.2354819976771196, |
|
"grad_norm": 1.266482949256897, |
|
"learning_rate": 1.6002790553039803e-05, |
|
"loss": 0.7137, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.2369337979094077, |
|
"grad_norm": 0.7233752012252808, |
|
"learning_rate": 1.594954473689621e-05, |
|
"loss": 0.5351, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.2383855981416958, |
|
"grad_norm": 3.379714012145996, |
|
"learning_rate": 1.5896346125496793e-05, |
|
"loss": 0.5488, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.2398373983739837, |
|
"grad_norm": 2.4713003635406494, |
|
"learning_rate": 1.5843194996311213e-05, |
|
"loss": 0.7367, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.2412891986062717, |
|
"grad_norm": 0.4656989574432373, |
|
"learning_rate": 1.5790091626561494e-05, |
|
"loss": 0.3323, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.2427409988385598, |
|
"grad_norm": 1.3530571460723877, |
|
"learning_rate": 1.5737036293220554e-05, |
|
"loss": 0.5089, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.244192799070848, |
|
"grad_norm": 1.5478246212005615, |
|
"learning_rate": 1.568402927301076e-05, |
|
"loss": 0.6737, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.245644599303136, |
|
"grad_norm": 1.6007646322250366, |
|
"learning_rate": 1.5631070842402494e-05, |
|
"loss": 0.5032, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.2470963995354238, |
|
"grad_norm": 1.9949185848236084, |
|
"learning_rate": 1.5578161277612707e-05, |
|
"loss": 0.746, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.248548199767712, |
|
"grad_norm": 1.552194595336914, |
|
"learning_rate": 1.5525300854603486e-05, |
|
"loss": 0.4807, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 6.406808376312256, |
|
"learning_rate": 1.547248984908059e-05, |
|
"loss": 0.5125, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.251451800232288, |
|
"grad_norm": 0.6398019790649414, |
|
"learning_rate": 1.5419728536492055e-05, |
|
"loss": 0.3386, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.2529036004645762, |
|
"grad_norm": 1.874664306640625, |
|
"learning_rate": 1.5367017192026713e-05, |
|
"loss": 0.5268, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.254355400696864, |
|
"grad_norm": 2.999232053756714, |
|
"learning_rate": 1.5314356090612776e-05, |
|
"loss": 0.5744, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.255807200929152, |
|
"grad_norm": 9.338212966918945, |
|
"learning_rate": 1.5261745506916408e-05, |
|
"loss": 0.6682, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.2572590011614402, |
|
"grad_norm": 3.1387779712677, |
|
"learning_rate": 1.5209185715340294e-05, |
|
"loss": 0.4691, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.2587108013937283, |
|
"grad_norm": 0.6614925861358643, |
|
"learning_rate": 1.5156676990022184e-05, |
|
"loss": 0.4255, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.2601626016260163, |
|
"grad_norm": 0.9042619466781616, |
|
"learning_rate": 1.5104219604833494e-05, |
|
"loss": 0.3958, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.2616144018583042, |
|
"grad_norm": 0.8313902020454407, |
|
"learning_rate": 1.5051813833377859e-05, |
|
"loss": 0.5207, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.2630662020905923, |
|
"grad_norm": 1.0558016300201416, |
|
"learning_rate": 1.4999459948989702e-05, |
|
"loss": 0.3235, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.2645180023228804, |
|
"grad_norm": 1.1987258195877075, |
|
"learning_rate": 1.4947158224732827e-05, |
|
"loss": 0.4936, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.2659698025551684, |
|
"grad_norm": 1.0946906805038452, |
|
"learning_rate": 1.4894908933398989e-05, |
|
"loss": 0.6256, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.2674216027874565, |
|
"grad_norm": 1.2409650087356567, |
|
"learning_rate": 1.4842712347506443e-05, |
|
"loss": 0.793, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.2688734030197444, |
|
"grad_norm": 0.7660655379295349, |
|
"learning_rate": 1.4790568739298582e-05, |
|
"loss": 0.5611, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.2703252032520325, |
|
"grad_norm": 0.7420207262039185, |
|
"learning_rate": 1.473847838074245e-05, |
|
"loss": 0.6045, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.2717770034843205, |
|
"grad_norm": 0.743302047252655, |
|
"learning_rate": 1.4686441543527374e-05, |
|
"loss": 0.7294, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.2732288037166086, |
|
"grad_norm": 1.441884160041809, |
|
"learning_rate": 1.4634458499063536e-05, |
|
"loss": 0.6125, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.2746806039488967, |
|
"grad_norm": 0.13829253613948822, |
|
"learning_rate": 1.458252951848051e-05, |
|
"loss": 0.4259, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.2761324041811846, |
|
"grad_norm": 1.4842077493667603, |
|
"learning_rate": 1.4530654872625935e-05, |
|
"loss": 0.5568, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"grad_norm": 1.0749858617782593, |
|
"learning_rate": 1.4478834832064026e-05, |
|
"loss": 0.5374, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.2790360046457607, |
|
"grad_norm": 8.395951271057129, |
|
"learning_rate": 1.4427069667074184e-05, |
|
"loss": 0.4693, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.2804878048780488, |
|
"grad_norm": 0.30601173639297485, |
|
"learning_rate": 1.4375359647649634e-05, |
|
"loss": 0.3597, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.2819396051103369, |
|
"grad_norm": 1.7369287014007568, |
|
"learning_rate": 1.4323705043495938e-05, |
|
"loss": 0.4448, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.2833914053426247, |
|
"grad_norm": 1.2835052013397217, |
|
"learning_rate": 1.4272106124029627e-05, |
|
"loss": 0.7685, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.2848432055749128, |
|
"grad_norm": 3.1556379795074463, |
|
"learning_rate": 1.4220563158376832e-05, |
|
"loss": 0.5719, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.286295005807201, |
|
"grad_norm": 2.188831090927124, |
|
"learning_rate": 1.4169076415371802e-05, |
|
"loss": 0.5663, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.287746806039489, |
|
"grad_norm": 2.645719051361084, |
|
"learning_rate": 1.4117646163555565e-05, |
|
"loss": 0.653, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.289198606271777, |
|
"grad_norm": 1.1469491720199585, |
|
"learning_rate": 1.4066272671174512e-05, |
|
"loss": 0.5314, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.290650406504065, |
|
"grad_norm": 1.8526806831359863, |
|
"learning_rate": 1.4014956206178987e-05, |
|
"loss": 0.4409, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.292102206736353, |
|
"grad_norm": 0.9696226716041565, |
|
"learning_rate": 1.3963697036221863e-05, |
|
"loss": 0.7264, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.293554006968641, |
|
"grad_norm": 2.522721529006958, |
|
"learning_rate": 1.3912495428657236e-05, |
|
"loss": 0.7832, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.2950058072009292, |
|
"grad_norm": 1.7248927354812622, |
|
"learning_rate": 1.3861351650538929e-05, |
|
"loss": 0.548, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.2964576074332173, |
|
"grad_norm": 0.5419870018959045, |
|
"learning_rate": 1.3810265968619141e-05, |
|
"loss": 0.6291, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.297909407665505, |
|
"grad_norm": 0.7840960025787354, |
|
"learning_rate": 1.3759238649347091e-05, |
|
"loss": 0.5772, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.2993612078977932, |
|
"grad_norm": 1.4585460424423218, |
|
"learning_rate": 1.3708269958867565e-05, |
|
"loss": 0.8735, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.3008130081300813, |
|
"grad_norm": 3.5455801486968994, |
|
"learning_rate": 1.3657360163019544e-05, |
|
"loss": 0.6392, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.3022648083623694, |
|
"grad_norm": 0.890296220779419, |
|
"learning_rate": 1.3606509527334894e-05, |
|
"loss": 0.853, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.3037166085946574, |
|
"grad_norm": 2.1235806941986084, |
|
"learning_rate": 1.3555718317036847e-05, |
|
"loss": 0.6268, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.3051684088269453, |
|
"grad_norm": 1.9171247482299805, |
|
"learning_rate": 1.3504986797038715e-05, |
|
"loss": 0.5688, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.3066202090592334, |
|
"grad_norm": 2.7999086380004883, |
|
"learning_rate": 1.3454315231942499e-05, |
|
"loss": 0.5062, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3066202090592334, |
|
"eval_loss": 0.6196444034576416, |
|
"eval_runtime": 107.7639, |
|
"eval_samples_per_second": 13.455, |
|
"eval_steps_per_second": 3.368, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3080720092915215, |
|
"grad_norm": 1.666410207748413, |
|
"learning_rate": 1.3403703886037466e-05, |
|
"loss": 0.7899, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.3095238095238095, |
|
"grad_norm": 1.47067129611969, |
|
"learning_rate": 1.3353153023298789e-05, |
|
"loss": 0.5773, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.3109756097560976, |
|
"grad_norm": 1.4645687341690063, |
|
"learning_rate": 1.3302662907386222e-05, |
|
"loss": 0.6352, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.3124274099883855, |
|
"grad_norm": 1.135907530784607, |
|
"learning_rate": 1.325223380164263e-05, |
|
"loss": 0.5388, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.3138792102206736, |
|
"grad_norm": 0.8413094282150269, |
|
"learning_rate": 1.3201865969092686e-05, |
|
"loss": 0.7493, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.3153310104529616, |
|
"grad_norm": 1.01530921459198, |
|
"learning_rate": 1.315155967244149e-05, |
|
"loss": 0.4492, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.3167828106852497, |
|
"grad_norm": 2.6221423149108887, |
|
"learning_rate": 1.3101315174073162e-05, |
|
"loss": 0.5208, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.3182346109175378, |
|
"grad_norm": 5.264577865600586, |
|
"learning_rate": 1.305113273604952e-05, |
|
"loss": 0.4573, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.3196864111498257, |
|
"grad_norm": 1.6519479751586914, |
|
"learning_rate": 1.3001012620108693e-05, |
|
"loss": 0.5216, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.321138211382114, |
|
"grad_norm": 1.1643894910812378, |
|
"learning_rate": 1.2950955087663741e-05, |
|
"loss": 0.4458, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.3225900116144018, |
|
"grad_norm": 1.967511534690857, |
|
"learning_rate": 1.2900960399801292e-05, |
|
"loss": 0.7898, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.32404181184669, |
|
"grad_norm": 1.269264578819275, |
|
"learning_rate": 1.2851028817280242e-05, |
|
"loss": 0.5747, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.325493612078978, |
|
"grad_norm": 1.0032755136489868, |
|
"learning_rate": 1.2801160600530299e-05, |
|
"loss": 0.5245, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.3269454123112658, |
|
"grad_norm": 4.5418925285339355, |
|
"learning_rate": 1.2751356009650681e-05, |
|
"loss": 0.6442, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.3283972125435541, |
|
"grad_norm": 1.1265850067138672, |
|
"learning_rate": 1.270161530440878e-05, |
|
"loss": 0.4234, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.329849012775842, |
|
"grad_norm": 0.029596175998449326, |
|
"learning_rate": 1.2651938744238745e-05, |
|
"loss": 0.4876, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.33130081300813, |
|
"grad_norm": 4.938312530517578, |
|
"learning_rate": 1.2602326588240168e-05, |
|
"loss": 0.5431, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.3327526132404182, |
|
"grad_norm": 1.1647799015045166, |
|
"learning_rate": 1.2552779095176737e-05, |
|
"loss": 0.5084, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.334204413472706, |
|
"grad_norm": 0.8059009313583374, |
|
"learning_rate": 1.2503296523474883e-05, |
|
"loss": 0.7431, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"grad_norm": 1.5592460632324219, |
|
"learning_rate": 1.245387913122239e-05, |
|
"loss": 0.312, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.3371080139372822, |
|
"grad_norm": 1.1873098611831665, |
|
"learning_rate": 1.2404527176167124e-05, |
|
"loss": 0.7229, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.3385598141695703, |
|
"grad_norm": 1.4901853799819946, |
|
"learning_rate": 1.2355240915715618e-05, |
|
"loss": 0.538, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.3400116144018583, |
|
"grad_norm": 1.5089656114578247, |
|
"learning_rate": 1.2306020606931767e-05, |
|
"loss": 0.5226, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.3414634146341464, |
|
"grad_norm": 0.9845458269119263, |
|
"learning_rate": 1.2256866506535497e-05, |
|
"loss": 0.61, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.3429152148664345, |
|
"grad_norm": 0.9404434561729431, |
|
"learning_rate": 1.220777887090139e-05, |
|
"loss": 0.5815, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.3443670150987224, |
|
"grad_norm": 1.297400712966919, |
|
"learning_rate": 1.2158757956057357e-05, |
|
"loss": 0.5703, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.3458188153310104, |
|
"grad_norm": 5.133298397064209, |
|
"learning_rate": 1.2109804017683349e-05, |
|
"loss": 0.3776, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 1.3472706155632985, |
|
"grad_norm": 0.2664077579975128, |
|
"learning_rate": 1.206091731110994e-05, |
|
"loss": 0.4978, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.3487224157955866, |
|
"grad_norm": 0.8112949132919312, |
|
"learning_rate": 1.2012098091317083e-05, |
|
"loss": 0.4887, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 1.3501742160278747, |
|
"grad_norm": 1.9871488809585571, |
|
"learning_rate": 1.1963346612932702e-05, |
|
"loss": 0.7117, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.3516260162601625, |
|
"grad_norm": 3.5719833374023438, |
|
"learning_rate": 1.191466313023143e-05, |
|
"loss": 0.568, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 1.3530778164924506, |
|
"grad_norm": 1.1161819696426392, |
|
"learning_rate": 1.1866047897133223e-05, |
|
"loss": 0.4455, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.3545296167247387, |
|
"grad_norm": 1.2592240571975708, |
|
"learning_rate": 1.1817501167202099e-05, |
|
"loss": 0.5396, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 1.3559814169570268, |
|
"grad_norm": 8.793890953063965, |
|
"learning_rate": 1.1769023193644757e-05, |
|
"loss": 0.5515, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.3574332171893149, |
|
"grad_norm": 1.0319164991378784, |
|
"learning_rate": 1.1720614229309277e-05, |
|
"loss": 0.62, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.3588850174216027, |
|
"grad_norm": 1.9891750812530518, |
|
"learning_rate": 1.1672274526683835e-05, |
|
"loss": 0.5769, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.3603368176538908, |
|
"grad_norm": 3.4943082332611084, |
|
"learning_rate": 1.162400433789533e-05, |
|
"loss": 0.6463, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 1.3617886178861789, |
|
"grad_norm": 1.8810696601867676, |
|
"learning_rate": 1.1575803914708096e-05, |
|
"loss": 0.7964, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.363240418118467, |
|
"grad_norm": 1.418583869934082, |
|
"learning_rate": 1.1527673508522604e-05, |
|
"loss": 0.428, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 1.364692218350755, |
|
"grad_norm": 4.40504264831543, |
|
"learning_rate": 1.1479613370374136e-05, |
|
"loss": 0.6119, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.366144018583043, |
|
"grad_norm": 2.4559905529022217, |
|
"learning_rate": 1.143162375093145e-05, |
|
"loss": 0.5134, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 1.367595818815331, |
|
"grad_norm": 3.8561477661132812, |
|
"learning_rate": 1.1383704900495529e-05, |
|
"loss": 0.4626, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.369047619047619, |
|
"grad_norm": 1.6356045007705688, |
|
"learning_rate": 1.1335857068998221e-05, |
|
"loss": 0.5223, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 1.3704994192799071, |
|
"grad_norm": 1.7519195079803467, |
|
"learning_rate": 1.1288080506000955e-05, |
|
"loss": 0.641, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.3719512195121952, |
|
"grad_norm": 0.4097733199596405, |
|
"learning_rate": 1.1240375460693475e-05, |
|
"loss": 0.5781, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.373403019744483, |
|
"grad_norm": 2.5884532928466797, |
|
"learning_rate": 1.119274218189247e-05, |
|
"loss": 0.5514, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.3748548199767712, |
|
"grad_norm": 1.4594874382019043, |
|
"learning_rate": 1.1145180918040332e-05, |
|
"loss": 0.7619, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.3763066202090593, |
|
"grad_norm": 7.807918548583984, |
|
"learning_rate": 1.109769191720384e-05, |
|
"loss": 0.3226, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.3777584204413473, |
|
"grad_norm": 0.6364027261734009, |
|
"learning_rate": 1.1050275427072884e-05, |
|
"loss": 0.5776, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 1.3792102206736354, |
|
"grad_norm": 0.4011842608451843, |
|
"learning_rate": 1.1002931694959131e-05, |
|
"loss": 0.4091, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.3806620209059233, |
|
"grad_norm": 5.032822132110596, |
|
"learning_rate": 1.0955660967794768e-05, |
|
"loss": 0.5523, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 1.3821138211382114, |
|
"grad_norm": 3.3209786415100098, |
|
"learning_rate": 1.0908463492131227e-05, |
|
"loss": 0.5782, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.3835656213704994, |
|
"grad_norm": 0.4670596718788147, |
|
"learning_rate": 1.086133951413785e-05, |
|
"loss": 0.6112, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 1.3850174216027875, |
|
"grad_norm": 6.041258335113525, |
|
"learning_rate": 1.081428927960067e-05, |
|
"loss": 0.6415, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.3864692218350756, |
|
"grad_norm": 2.76751446723938, |
|
"learning_rate": 1.0767313033921067e-05, |
|
"loss": 0.3524, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.3879210220673635, |
|
"grad_norm": 2.8424673080444336, |
|
"learning_rate": 1.0720411022114512e-05, |
|
"loss": 0.6496, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.3893728222996515, |
|
"grad_norm": 1.2790861129760742, |
|
"learning_rate": 1.0673583488809321e-05, |
|
"loss": 0.6281, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 1.3908246225319396, |
|
"grad_norm": 2.5029184818267822, |
|
"learning_rate": 1.0626830678245329e-05, |
|
"loss": 0.6078, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.3922764227642277, |
|
"grad_norm": 1.0946515798568726, |
|
"learning_rate": 1.0580152834272622e-05, |
|
"loss": 0.5256, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"grad_norm": 1.5489437580108643, |
|
"learning_rate": 1.0533550200350314e-05, |
|
"loss": 0.6867, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.3951800232288036, |
|
"grad_norm": 1.4204350709915161, |
|
"learning_rate": 1.0487023019545235e-05, |
|
"loss": 0.6683, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 1.3966318234610917, |
|
"grad_norm": 1.674791932106018, |
|
"learning_rate": 1.044057153453066e-05, |
|
"loss": 0.7691, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.3980836236933798, |
|
"grad_norm": 2.3372557163238525, |
|
"learning_rate": 1.039419598758505e-05, |
|
"loss": 0.5875, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 1.3995354239256679, |
|
"grad_norm": 4.951801300048828, |
|
"learning_rate": 1.0347896620590819e-05, |
|
"loss": 0.4327, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.400987224157956, |
|
"grad_norm": 1.4369560480117798, |
|
"learning_rate": 1.0301673675033017e-05, |
|
"loss": 0.4592, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.4024390243902438, |
|
"grad_norm": 1.2974849939346313, |
|
"learning_rate": 1.025552739199813e-05, |
|
"loss": 0.5833, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.403890824622532, |
|
"grad_norm": 1.037194848060608, |
|
"learning_rate": 1.0209458012172768e-05, |
|
"loss": 0.4698, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 1.40534262485482, |
|
"grad_norm": 2.5829808712005615, |
|
"learning_rate": 1.016346577584244e-05, |
|
"loss": 0.5585, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.406794425087108, |
|
"grad_norm": 2.260946273803711, |
|
"learning_rate": 1.0117550922890307e-05, |
|
"loss": 0.6017, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 1.4082462253193961, |
|
"grad_norm": 1.81033194065094, |
|
"learning_rate": 1.0071713692795918e-05, |
|
"loss": 0.6426, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.409698025551684, |
|
"grad_norm": 1.978293776512146, |
|
"learning_rate": 1.0025954324633948e-05, |
|
"loss": 0.4709, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 1.411149825783972, |
|
"grad_norm": 1.209401249885559, |
|
"learning_rate": 9.980273057072968e-06, |
|
"loss": 0.4459, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.4126016260162602, |
|
"grad_norm": 1.3207520246505737, |
|
"learning_rate": 9.934670128374212e-06, |
|
"loss": 0.3628, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 1.4140534262485482, |
|
"grad_norm": 0.9167854189872742, |
|
"learning_rate": 9.889145776390308e-06, |
|
"loss": 0.5037, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.4155052264808363, |
|
"grad_norm": 2.53662109375, |
|
"learning_rate": 9.843700238564035e-06, |
|
"loss": 0.4758, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.4169570267131242, |
|
"grad_norm": 2.7502434253692627, |
|
"learning_rate": 9.798333751927139e-06, |
|
"loss": 0.6707, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.4184088269454123, |
|
"grad_norm": 1.7120157480239868, |
|
"learning_rate": 9.753046553099007e-06, |
|
"loss": 0.7902, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 1.4198606271777003, |
|
"grad_norm": 2.2297070026397705, |
|
"learning_rate": 9.707838878285527e-06, |
|
"loss": 0.7242, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.4213124274099884, |
|
"grad_norm": 2.1308581829071045, |
|
"learning_rate": 9.662710963277783e-06, |
|
"loss": 0.5492, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 1.4227642276422765, |
|
"grad_norm": 7.506939888000488, |
|
"learning_rate": 9.617663043450847e-06, |
|
"loss": 0.469, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.4242160278745644, |
|
"grad_norm": 7.771796703338623, |
|
"learning_rate": 9.572695353762584e-06, |
|
"loss": 0.4342, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 1.4256678281068524, |
|
"grad_norm": 5.498221397399902, |
|
"learning_rate": 9.527808128752397e-06, |
|
"loss": 0.6446, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.4271196283391405, |
|
"grad_norm": 3.744574785232544, |
|
"learning_rate": 9.483001602539984e-06, |
|
"loss": 0.4798, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.2909547090530396, |
|
"learning_rate": 9.43827600882415e-06, |
|
"loss": 0.4513, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.4300232288037167, |
|
"grad_norm": 1.5674768686294556, |
|
"learning_rate": 9.393631580881596e-06, |
|
"loss": 0.3784, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.4314750290360045, |
|
"grad_norm": 0.9246693253517151, |
|
"learning_rate": 9.349068551565649e-06, |
|
"loss": 0.3879, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.4329268292682926, |
|
"grad_norm": 4.146523952484131, |
|
"learning_rate": 9.304587153305122e-06, |
|
"loss": 0.4375, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 1.4343786295005807, |
|
"grad_norm": 0.4749496579170227, |
|
"learning_rate": 9.260187618103036e-06, |
|
"loss": 0.6098, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.4358304297328688, |
|
"grad_norm": 0.9822236895561218, |
|
"learning_rate": 9.215870177535433e-06, |
|
"loss": 0.6339, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 1.4372822299651569, |
|
"grad_norm": 1.2814334630966187, |
|
"learning_rate": 9.171635062750189e-06, |
|
"loss": 0.8344, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.4387340301974447, |
|
"grad_norm": 2.548846483230591, |
|
"learning_rate": 9.127482504465792e-06, |
|
"loss": 0.5309, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 1.4401858304297328, |
|
"grad_norm": 2.3129475116729736, |
|
"learning_rate": 9.083412732970123e-06, |
|
"loss": 0.6082, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.4416376306620209, |
|
"grad_norm": 1.459028959274292, |
|
"learning_rate": 9.039425978119267e-06, |
|
"loss": 0.5144, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 1.443089430894309, |
|
"grad_norm": 2.7794339656829834, |
|
"learning_rate": 8.995522469336337e-06, |
|
"loss": 0.6107, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.444541231126597, |
|
"grad_norm": 1.3913737535476685, |
|
"learning_rate": 8.951702435610244e-06, |
|
"loss": 0.5444, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.445993031358885, |
|
"grad_norm": 1.145751714706421, |
|
"learning_rate": 8.907966105494498e-06, |
|
"loss": 0.5478, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.447444831591173, |
|
"grad_norm": 1.6435590982437134, |
|
"learning_rate": 8.864313707106075e-06, |
|
"loss": 0.5803, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 1.448896631823461, |
|
"grad_norm": 2.8100969791412354, |
|
"learning_rate": 8.820745468124144e-06, |
|
"loss": 0.6449, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.4503484320557491, |
|
"grad_norm": 0.4345937967300415, |
|
"learning_rate": 8.777261615788956e-06, |
|
"loss": 0.7335, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 3.7761106491088867, |
|
"learning_rate": 8.733862376900597e-06, |
|
"loss": 0.4368, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.453252032520325, |
|
"grad_norm": 0.9145069718360901, |
|
"learning_rate": 8.690547977817839e-06, |
|
"loss": 0.6349, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 1.4547038327526132, |
|
"grad_norm": 0.9794019460678101, |
|
"learning_rate": 8.64731864445696e-06, |
|
"loss": 0.5048, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.4561556329849012, |
|
"grad_norm": 2.5523462295532227, |
|
"learning_rate": 8.604174602290563e-06, |
|
"loss": 0.5025, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 1.4576074332171893, |
|
"grad_norm": 1.2542840242385864, |
|
"learning_rate": 8.561116076346377e-06, |
|
"loss": 0.342, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.4590592334494774, |
|
"grad_norm": 4.584123611450195, |
|
"learning_rate": 8.518143291206099e-06, |
|
"loss": 0.5593, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.4605110336817653, |
|
"grad_norm": 2.7633087635040283, |
|
"learning_rate": 8.475256471004259e-06, |
|
"loss": 0.5616, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.4619628339140534, |
|
"grad_norm": 4.123738765716553, |
|
"learning_rate": 8.43245583942698e-06, |
|
"loss": 0.5572, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 6.2447428703308105, |
|
"learning_rate": 8.389741619710855e-06, |
|
"loss": 0.3971, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.4648664343786295, |
|
"grad_norm": 1.1899082660675049, |
|
"learning_rate": 8.347114034641806e-06, |
|
"loss": 0.333, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 1.4663182346109176, |
|
"grad_norm": 5.325255393981934, |
|
"learning_rate": 8.304573306553846e-06, |
|
"loss": 0.4626, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.4677700348432055, |
|
"grad_norm": 2.715012788772583, |
|
"learning_rate": 8.262119657327996e-06, |
|
"loss": 0.4834, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 1.4692218350754935, |
|
"grad_norm": 1.1475021839141846, |
|
"learning_rate": 8.219753308391101e-06, |
|
"loss": 0.5551, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.4706736353077816, |
|
"grad_norm": 7.364482402801514, |
|
"learning_rate": 8.17747448071465e-06, |
|
"loss": 0.5282, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 1.4721254355400697, |
|
"grad_norm": 1.1067121028900146, |
|
"learning_rate": 8.135283394813651e-06, |
|
"loss": 0.569, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.4735772357723578, |
|
"grad_norm": 1.5818873643875122, |
|
"learning_rate": 8.093180270745485e-06, |
|
"loss": 0.5892, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.4750290360046456, |
|
"grad_norm": 2.0473148822784424, |
|
"learning_rate": 8.05116532810874e-06, |
|
"loss": 0.8704, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.476480836236934, |
|
"grad_norm": 1.3639038801193237, |
|
"learning_rate": 8.009238786042062e-06, |
|
"loss": 0.517, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 1.4779326364692218, |
|
"grad_norm": 1.7621372938156128, |
|
"learning_rate": 7.967400863223051e-06, |
|
"loss": 0.469, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.4793844367015099, |
|
"grad_norm": 1.9453188180923462, |
|
"learning_rate": 7.925651777867068e-06, |
|
"loss": 0.5911, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 1.480836236933798, |
|
"grad_norm": 1.6738969087600708, |
|
"learning_rate": 7.883991747726127e-06, |
|
"loss": 0.5271, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.4822880371660858, |
|
"grad_norm": 1.6197839975357056, |
|
"learning_rate": 7.842420990087774e-06, |
|
"loss": 0.5143, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 1.4837398373983741, |
|
"grad_norm": 1.3254222869873047, |
|
"learning_rate": 7.800939721773893e-06, |
|
"loss": 0.5526, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.485191637630662, |
|
"grad_norm": 2.3349244594573975, |
|
"learning_rate": 7.759548159139654e-06, |
|
"loss": 0.557, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 1.48664343786295, |
|
"grad_norm": 1.9867162704467773, |
|
"learning_rate": 7.718246518072341e-06, |
|
"loss": 0.4553, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.4880952380952381, |
|
"grad_norm": 0.5736078023910522, |
|
"learning_rate": 7.677035013990211e-06, |
|
"loss": 0.6118, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.489547038327526, |
|
"grad_norm": 2.2709176540374756, |
|
"learning_rate": 7.635913861841395e-06, |
|
"loss": 0.7102, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.4909988385598143, |
|
"grad_norm": 0.5769612789154053, |
|
"learning_rate": 7.594883276102799e-06, |
|
"loss": 0.639, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 1.4924506387921022, |
|
"grad_norm": 2.3885700702667236, |
|
"learning_rate": 7.5539434707789266e-06, |
|
"loss": 0.767, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.4939024390243902, |
|
"grad_norm": 2.5631144046783447, |
|
"learning_rate": 7.513094659400802e-06, |
|
"loss": 0.557, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 1.4953542392566783, |
|
"grad_norm": 1.6625310182571411, |
|
"learning_rate": 7.47233705502487e-06, |
|
"loss": 0.419, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.4968060394889664, |
|
"grad_norm": 3.3970789909362793, |
|
"learning_rate": 7.431670870231844e-06, |
|
"loss": 0.4773, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 1.4982578397212545, |
|
"grad_norm": 2.158837080001831, |
|
"learning_rate": 7.391096317125607e-06, |
|
"loss": 0.5095, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.4997096399535423, |
|
"grad_norm": 2.132723569869995, |
|
"learning_rate": 7.350613607332163e-06, |
|
"loss": 0.582, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 1.5011614401858304, |
|
"grad_norm": 3.694959878921509, |
|
"learning_rate": 7.310222951998438e-06, |
|
"loss": 0.3228, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.5026132404181185, |
|
"grad_norm": 3.7945165634155273, |
|
"learning_rate": 7.269924561791236e-06, |
|
"loss": 0.5246, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.5040650406504064, |
|
"grad_norm": 1.9424091577529907, |
|
"learning_rate": 7.2297186468961554e-06, |
|
"loss": 0.6539, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.5055168408826947, |
|
"grad_norm": 1.384211540222168, |
|
"learning_rate": 7.189605417016443e-06, |
|
"loss": 0.5089, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 1.5069686411149825, |
|
"grad_norm": 1.18372642993927, |
|
"learning_rate": 7.149585081371923e-06, |
|
"loss": 0.624, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.5084204413472706, |
|
"grad_norm": 2.478210926055908, |
|
"learning_rate": 7.109657848697937e-06, |
|
"loss": 0.5944, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"grad_norm": 1.7582294940948486, |
|
"learning_rate": 7.0698239272441985e-06, |
|
"loss": 0.3679, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.5113240418118465, |
|
"grad_norm": 3.0840678215026855, |
|
"learning_rate": 7.03008352477374e-06, |
|
"loss": 0.6691, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 1.5127758420441348, |
|
"grad_norm": 5.244002342224121, |
|
"learning_rate": 6.99043684856184e-06, |
|
"loss": 0.5773, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.5142276422764227, |
|
"grad_norm": 2.812211513519287, |
|
"learning_rate": 6.950884105394903e-06, |
|
"loss": 0.4341, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 1.5156794425087108, |
|
"grad_norm": 0.9920812845230103, |
|
"learning_rate": 6.911425501569418e-06, |
|
"loss": 0.5441, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.5171312427409989, |
|
"grad_norm": 0.8474797606468201, |
|
"learning_rate": 6.872061242890882e-06, |
|
"loss": 0.7427, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.5185830429732867, |
|
"grad_norm": 1.2484221458435059, |
|
"learning_rate": 6.8327915346726806e-06, |
|
"loss": 0.5319, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.520034843205575, |
|
"grad_norm": 2.2322065830230713, |
|
"learning_rate": 6.793616581735062e-06, |
|
"loss": 0.7047, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 1.5214866434378629, |
|
"grad_norm": 3.255192756652832, |
|
"learning_rate": 6.754536588404078e-06, |
|
"loss": 0.5605, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.522938443670151, |
|
"grad_norm": 2.065782308578491, |
|
"learning_rate": 6.715551758510469e-06, |
|
"loss": 0.609, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 1.524390243902439, |
|
"grad_norm": 1.5074211359024048, |
|
"learning_rate": 6.676662295388631e-06, |
|
"loss": 0.4149, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.525842044134727, |
|
"grad_norm": 1.3542487621307373, |
|
"learning_rate": 6.637868401875577e-06, |
|
"loss": 0.4952, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 1.5272938443670152, |
|
"grad_norm": 6.184685230255127, |
|
"learning_rate": 6.599170280309824e-06, |
|
"loss": 0.6942, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.528745644599303, |
|
"grad_norm": 1.49580979347229, |
|
"learning_rate": 6.560568132530376e-06, |
|
"loss": 0.5696, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 1.5301974448315911, |
|
"grad_norm": 1.4806469678878784, |
|
"learning_rate": 6.522062159875692e-06, |
|
"loss": 0.6504, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.5316492450638792, |
|
"grad_norm": 2.461064577102661, |
|
"learning_rate": 6.4836525631825714e-06, |
|
"loss": 0.5862, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.533101045296167, |
|
"grad_norm": 1.635206937789917, |
|
"learning_rate": 6.4453395427851475e-06, |
|
"loss": 0.5664, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.5345528455284554, |
|
"grad_norm": 2.978720188140869, |
|
"learning_rate": 6.407123298513865e-06, |
|
"loss": 0.6014, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 1.5360046457607432, |
|
"grad_norm": 3.055194854736328, |
|
"learning_rate": 6.369004029694378e-06, |
|
"loss": 0.5824, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.5374564459930313, |
|
"grad_norm": 2.0090768337249756, |
|
"learning_rate": 6.330981935146555e-06, |
|
"loss": 0.7431, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 1.5389082462253194, |
|
"grad_norm": 3.167921781539917, |
|
"learning_rate": 6.29305721318344e-06, |
|
"loss": 0.4873, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.5403600464576073, |
|
"grad_norm": 2.447772264480591, |
|
"learning_rate": 6.25523006161019e-06, |
|
"loss": 0.6174, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 1.5418118466898956, |
|
"grad_norm": 1.068217396736145, |
|
"learning_rate": 6.217500677723065e-06, |
|
"loss": 0.6131, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.5432636469221834, |
|
"grad_norm": 3.059321403503418, |
|
"learning_rate": 6.179869258308407e-06, |
|
"loss": 0.5651, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 1.5447154471544715, |
|
"grad_norm": 1.0179533958435059, |
|
"learning_rate": 6.142335999641599e-06, |
|
"loss": 0.8561, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.5461672473867596, |
|
"grad_norm": 3.0670573711395264, |
|
"learning_rate": 6.104901097486024e-06, |
|
"loss": 0.4205, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.5476190476190477, |
|
"grad_norm": 1.0629135370254517, |
|
"learning_rate": 6.067564747092094e-06, |
|
"loss": 0.7445, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.5490708478513358, |
|
"grad_norm": 1.5961717367172241, |
|
"learning_rate": 6.030327143196179e-06, |
|
"loss": 0.6035, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 1.5505226480836236, |
|
"grad_norm": 2.1358516216278076, |
|
"learning_rate": 5.993188480019615e-06, |
|
"loss": 0.3647, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.5519744483159117, |
|
"grad_norm": 3.7955398559570312, |
|
"learning_rate": 5.956148951267706e-06, |
|
"loss": 0.4885, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 1.5534262485481998, |
|
"grad_norm": 1.8019474744796753, |
|
"learning_rate": 5.919208750128685e-06, |
|
"loss": 0.4086, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.5548780487804879, |
|
"grad_norm": 0.8065319061279297, |
|
"learning_rate": 5.882368069272709e-06, |
|
"loss": 0.6092, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 1.556329849012776, |
|
"grad_norm": 1.8280988931655884, |
|
"learning_rate": 5.8456271008508955e-06, |
|
"loss": 0.583, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.5577816492450638, |
|
"grad_norm": 2.872685670852661, |
|
"learning_rate": 5.808986036494254e-06, |
|
"loss": 0.3497, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 1.5592334494773519, |
|
"grad_norm": 2.1516687870025635, |
|
"learning_rate": 5.772445067312729e-06, |
|
"loss": 0.4461, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.56068524970964, |
|
"grad_norm": 0.9675107598304749, |
|
"learning_rate": 5.736004383894231e-06, |
|
"loss": 0.8109, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.562137049941928, |
|
"grad_norm": 3.2056965827941895, |
|
"learning_rate": 5.69966417630356e-06, |
|
"loss": 0.7312, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.5635888501742161, |
|
"grad_norm": 1.4558873176574707, |
|
"learning_rate": 5.663424634081474e-06, |
|
"loss": 0.5516, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 1.565040650406504, |
|
"grad_norm": 1.376585602760315, |
|
"learning_rate": 5.62728594624371e-06, |
|
"loss": 0.3938, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.566492450638792, |
|
"grad_norm": 1.4354243278503418, |
|
"learning_rate": 5.59124830127995e-06, |
|
"loss": 0.6572, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"grad_norm": 2.6437528133392334, |
|
"learning_rate": 5.555311887152867e-06, |
|
"loss": 0.4434, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.5693960511033682, |
|
"grad_norm": 2.039637327194214, |
|
"learning_rate": 5.5194768912971565e-06, |
|
"loss": 0.4561, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 1.5708478513356563, |
|
"grad_norm": 1.7926547527313232, |
|
"learning_rate": 5.483743500618529e-06, |
|
"loss": 0.7296, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 1.5722996515679442, |
|
"grad_norm": 2.8525867462158203, |
|
"learning_rate": 5.448111901492747e-06, |
|
"loss": 0.5546, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 1.5737514518002322, |
|
"grad_norm": 1.2368862628936768, |
|
"learning_rate": 5.412582279764669e-06, |
|
"loss": 0.5491, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 1.5752032520325203, |
|
"grad_norm": 2.139909505844116, |
|
"learning_rate": 5.377154820747271e-06, |
|
"loss": 0.5243, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.5766550522648084, |
|
"grad_norm": 1.4064335823059082, |
|
"learning_rate": 5.341829709220647e-06, |
|
"loss": 0.9336, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 1.5781068524970965, |
|
"grad_norm": 3.3268957138061523, |
|
"learning_rate": 5.306607129431107e-06, |
|
"loss": 0.57, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 1.5795586527293843, |
|
"grad_norm": 5.089993000030518, |
|
"learning_rate": 5.271487265090163e-06, |
|
"loss": 0.5028, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 1.5810104529616724, |
|
"grad_norm": 1.5383248329162598, |
|
"learning_rate": 5.236470299373589e-06, |
|
"loss": 0.4664, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 1.5824622531939605, |
|
"grad_norm": 1.7498929500579834, |
|
"learning_rate": 5.201556414920486e-06, |
|
"loss": 0.8543, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.5839140534262486, |
|
"grad_norm": 1.0989433526992798, |
|
"learning_rate": 5.1667457938322925e-06, |
|
"loss": 0.4634, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 1.1718672513961792, |
|
"learning_rate": 5.1320386176718555e-06, |
|
"loss": 0.2643, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 1.5868176538908245, |
|
"grad_norm": 2.5529532432556152, |
|
"learning_rate": 5.097435067462497e-06, |
|
"loss": 0.7085, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 1.5882694541231128, |
|
"grad_norm": 0.7183501124382019, |
|
"learning_rate": 5.0629353236870375e-06, |
|
"loss": 0.6274, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 1.5897212543554007, |
|
"grad_norm": 0.3472674787044525, |
|
"learning_rate": 5.02853956628686e-06, |
|
"loss": 0.6934, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.5911730545876888, |
|
"grad_norm": 1.4065948724746704, |
|
"learning_rate": 4.994247974661026e-06, |
|
"loss": 0.7115, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 1.5926248548199768, |
|
"grad_norm": 0.9510209560394287, |
|
"learning_rate": 4.960060727665255e-06, |
|
"loss": 0.5962, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 1.5940766550522647, |
|
"grad_norm": 3.3892316818237305, |
|
"learning_rate": 4.92597800361104e-06, |
|
"loss": 0.4962, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 1.595528455284553, |
|
"grad_norm": 3.7970123291015625, |
|
"learning_rate": 4.891999980264728e-06, |
|
"loss": 0.537, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 1.5969802555168409, |
|
"grad_norm": 1.8874022960662842, |
|
"learning_rate": 4.85812683484656e-06, |
|
"loss": 0.6591, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.598432055749129, |
|
"grad_norm": 3.3695411682128906, |
|
"learning_rate": 4.824358744029761e-06, |
|
"loss": 0.4808, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 1.599883855981417, |
|
"grad_norm": 4.303611755371094, |
|
"learning_rate": 4.790695883939633e-06, |
|
"loss": 0.4313, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 1.6013356562137049, |
|
"grad_norm": 2.4233243465423584, |
|
"learning_rate": 4.757138430152608e-06, |
|
"loss": 0.4927, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 1.6027874564459932, |
|
"grad_norm": 1.4356447458267212, |
|
"learning_rate": 4.72368655769535e-06, |
|
"loss": 0.4185, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 1.604239256678281, |
|
"grad_norm": 5.6396636962890625, |
|
"learning_rate": 4.690340441043847e-06, |
|
"loss": 0.5059, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.6056910569105691, |
|
"grad_norm": 0.8661177754402161, |
|
"learning_rate": 4.6571002541224955e-06, |
|
"loss": 0.6568, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 8.649468421936035, |
|
"learning_rate": 4.623966170303171e-06, |
|
"loss": 0.4749, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 1.608594657375145, |
|
"grad_norm": 0.2540145516395569, |
|
"learning_rate": 4.590938362404368e-06, |
|
"loss": 0.6654, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 1.6100464576074334, |
|
"grad_norm": 2.6379213333129883, |
|
"learning_rate": 4.558017002690257e-06, |
|
"loss": 0.5673, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 1.6114982578397212, |
|
"grad_norm": 0.4164746403694153, |
|
"learning_rate": 4.525202262869804e-06, |
|
"loss": 0.4536, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.6129500580720093, |
|
"grad_norm": 0.745841920375824, |
|
"learning_rate": 4.492494314095891e-06, |
|
"loss": 0.5186, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 1.6144018583042974, |
|
"grad_norm": 2.1661605834960938, |
|
"learning_rate": 4.45989332696439e-06, |
|
"loss": 0.67, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 1.6158536585365852, |
|
"grad_norm": 1.8644750118255615, |
|
"learning_rate": 4.427399471513288e-06, |
|
"loss": 0.6665, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 1.6173054587688735, |
|
"grad_norm": 0.8033472299575806, |
|
"learning_rate": 4.395012917221825e-06, |
|
"loss": 0.6176, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 1.6187572590011614, |
|
"grad_norm": 1.7303358316421509, |
|
"learning_rate": 4.362733833009558e-06, |
|
"loss": 0.4351, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.6202090592334495, |
|
"grad_norm": 5.510407447814941, |
|
"learning_rate": 4.330562387235512e-06, |
|
"loss": 0.7516, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 1.6216608594657376, |
|
"grad_norm": 0.6413615942001343, |
|
"learning_rate": 4.298498747697335e-06, |
|
"loss": 0.3923, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 1.6231126596980254, |
|
"grad_norm": 1.7095063924789429, |
|
"learning_rate": 4.266543081630347e-06, |
|
"loss": 0.3482, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 1.6245644599303137, |
|
"grad_norm": 0.7411553859710693, |
|
"learning_rate": 4.234695555706714e-06, |
|
"loss": 0.3467, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 6.189662456512451, |
|
"learning_rate": 4.202956336034591e-06, |
|
"loss": 0.6474, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.6274680603948897, |
|
"grad_norm": 0.5904057025909424, |
|
"learning_rate": 4.171325588157218e-06, |
|
"loss": 0.3935, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 1.6289198606271778, |
|
"grad_norm": 1.5648072957992554, |
|
"learning_rate": 4.139803477052076e-06, |
|
"loss": 0.6161, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 1.6303716608594656, |
|
"grad_norm": 4.8867597579956055, |
|
"learning_rate": 4.108390167130044e-06, |
|
"loss": 0.5963, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 1.631823461091754, |
|
"grad_norm": 0.865047037601471, |
|
"learning_rate": 4.077085822234503e-06, |
|
"loss": 0.4213, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 1.6332752613240418, |
|
"grad_norm": 1.2489089965820312, |
|
"learning_rate": 4.045890605640504e-06, |
|
"loss": 0.4975, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.6347270615563299, |
|
"grad_norm": 0.8895522952079773, |
|
"learning_rate": 4.0148046800539265e-06, |
|
"loss": 0.5152, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 1.636178861788618, |
|
"grad_norm": 7.4556121826171875, |
|
"learning_rate": 3.983828207610615e-06, |
|
"loss": 0.8086, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 1.6376306620209058, |
|
"grad_norm": 2.2906975746154785, |
|
"learning_rate": 3.9529613498755165e-06, |
|
"loss": 0.4963, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 1.639082462253194, |
|
"grad_norm": 1.934874415397644, |
|
"learning_rate": 3.922204267841889e-06, |
|
"loss": 0.4317, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 1.640534262485482, |
|
"grad_norm": 1.597822666168213, |
|
"learning_rate": 3.8915571219304055e-06, |
|
"loss": 0.5763, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.64198606271777, |
|
"grad_norm": 1.507169485092163, |
|
"learning_rate": 3.861020071988339e-06, |
|
"loss": 0.4695, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 1.6434378629500581, |
|
"grad_norm": 0.8798676133155823, |
|
"learning_rate": 3.830593277288757e-06, |
|
"loss": 0.4347, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 1.644889663182346, |
|
"grad_norm": 2.6623973846435547, |
|
"learning_rate": 3.800276896529642e-06, |
|
"loss": 0.4887, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 1.6463414634146343, |
|
"grad_norm": 1.0119774341583252, |
|
"learning_rate": 3.7700710878330907e-06, |
|
"loss": 0.4776, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 1.6477932636469221, |
|
"grad_norm": 1.744946002960205, |
|
"learning_rate": 3.7399760087444975e-06, |
|
"loss": 0.3542, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.6492450638792102, |
|
"grad_norm": 0.9083417057991028, |
|
"learning_rate": 3.7099918162317114e-06, |
|
"loss": 0.5441, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 1.6506968641114983, |
|
"grad_norm": 1.0866427421569824, |
|
"learning_rate": 3.680118666684218e-06, |
|
"loss": 0.6087, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 1.6521486643437862, |
|
"grad_norm": 1.4837919473648071, |
|
"learning_rate": 3.6503567159123536e-06, |
|
"loss": 0.5775, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 1.6536004645760745, |
|
"grad_norm": 1.312999963760376, |
|
"learning_rate": 3.6207061191464636e-06, |
|
"loss": 0.6444, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 1.6550522648083623, |
|
"grad_norm": 1.7816232442855835, |
|
"learning_rate": 3.5911670310360882e-06, |
|
"loss": 0.7579, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.6565040650406504, |
|
"grad_norm": 5.434678077697754, |
|
"learning_rate": 3.561739605649189e-06, |
|
"loss": 0.5099, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 1.6579558652729385, |
|
"grad_norm": 3.44694185256958, |
|
"learning_rate": 3.532423996471307e-06, |
|
"loss": 0.8014, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 1.6594076655052263, |
|
"grad_norm": 1.121071219444275, |
|
"learning_rate": 3.503220356404785e-06, |
|
"loss": 0.7484, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 1.6608594657375146, |
|
"grad_norm": 1.6010463237762451, |
|
"learning_rate": 3.4741288377679732e-06, |
|
"loss": 0.6689, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 1.6623112659698025, |
|
"grad_norm": 2.2549779415130615, |
|
"learning_rate": 3.4451495922944195e-06, |
|
"loss": 0.5535, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.6637630662020906, |
|
"grad_norm": 0.8929911255836487, |
|
"learning_rate": 3.4162827711320788e-06, |
|
"loss": 0.6548, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 1.6652148664343787, |
|
"grad_norm": 0.8602511286735535, |
|
"learning_rate": 3.3875285248425427e-06, |
|
"loss": 0.4342, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 2.5482707023620605, |
|
"learning_rate": 3.358887003400246e-06, |
|
"loss": 0.5578, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 1.6681184668989548, |
|
"grad_norm": 2.0349433422088623, |
|
"learning_rate": 3.3303583561916624e-06, |
|
"loss": 0.4982, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 1.6695702671312427, |
|
"grad_norm": 0.9225724339485168, |
|
"learning_rate": 3.3019427320145542e-06, |
|
"loss": 0.4313, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.6710220673635308, |
|
"grad_norm": 1.9111758470535278, |
|
"learning_rate": 3.2736402790771948e-06, |
|
"loss": 0.4434, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 1.6724738675958188, |
|
"grad_norm": 3.7338778972625732, |
|
"learning_rate": 3.245451144997569e-06, |
|
"loss": 0.6636, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 1.6739256678281067, |
|
"grad_norm": 1.3860654830932617, |
|
"learning_rate": 3.2173754768026394e-06, |
|
"loss": 0.5516, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 1.675377468060395, |
|
"grad_norm": 1.1442302465438843, |
|
"learning_rate": 3.189413420927545e-06, |
|
"loss": 0.3753, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 1.6768292682926829, |
|
"grad_norm": 1.216781497001648, |
|
"learning_rate": 3.1615651232148547e-06, |
|
"loss": 0.478, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.678281068524971, |
|
"grad_norm": 1.9530525207519531, |
|
"learning_rate": 3.1338307289138254e-06, |
|
"loss": 0.5791, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 1.679732868757259, |
|
"grad_norm": 1.5259203910827637, |
|
"learning_rate": 3.1062103826796e-06, |
|
"loss": 0.8218, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 1.6811846689895469, |
|
"grad_norm": 1.0574325323104858, |
|
"learning_rate": 3.078704228572485e-06, |
|
"loss": 0.4432, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 1.6826364692218352, |
|
"grad_norm": 1.1627357006072998, |
|
"learning_rate": 3.0513124100571944e-06, |
|
"loss": 0.4513, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"grad_norm": 1.7360894680023193, |
|
"learning_rate": 3.0240350700021097e-06, |
|
"loss": 0.4008, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.6855400696864111, |
|
"grad_norm": 1.3201311826705933, |
|
"learning_rate": 2.9968723506784953e-06, |
|
"loss": 0.7546, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 1.6869918699186992, |
|
"grad_norm": 2.6173791885375977, |
|
"learning_rate": 2.9698243937598125e-06, |
|
"loss": 0.587, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 1.688443670150987, |
|
"grad_norm": 1.6192914247512817, |
|
"learning_rate": 2.942891340320936e-06, |
|
"loss": 0.5349, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 1.6898954703832754, |
|
"grad_norm": 1.5096668004989624, |
|
"learning_rate": 2.9160733308374347e-06, |
|
"loss": 0.5358, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 1.6913472706155632, |
|
"grad_norm": 2.530461072921753, |
|
"learning_rate": 2.8893705051848546e-06, |
|
"loss": 0.4036, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.6927990708478513, |
|
"grad_norm": 1.12082040309906, |
|
"learning_rate": 2.862783002637959e-06, |
|
"loss": 0.6056, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 1.6942508710801394, |
|
"grad_norm": 2.175119400024414, |
|
"learning_rate": 2.836310961870012e-06, |
|
"loss": 0.5726, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 1.6957026713124272, |
|
"grad_norm": 1.1249805688858032, |
|
"learning_rate": 2.8099545209520794e-06, |
|
"loss": 0.6046, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 1.6971544715447155, |
|
"grad_norm": 1.3748245239257812, |
|
"learning_rate": 2.783713817352282e-06, |
|
"loss": 0.5619, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 1.6986062717770034, |
|
"grad_norm": 1.3081672191619873, |
|
"learning_rate": 2.757588987935078e-06, |
|
"loss": 0.4904, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.7000580720092915, |
|
"grad_norm": 1.0695126056671143, |
|
"learning_rate": 2.731580168960557e-06, |
|
"loss": 0.4588, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 1.7015098722415796, |
|
"grad_norm": 0.9099944829940796, |
|
"learning_rate": 2.705687496083742e-06, |
|
"loss": 0.6389, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 1.7029616724738676, |
|
"grad_norm": 3.102823495864868, |
|
"learning_rate": 2.679911104353855e-06, |
|
"loss": 0.4546, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 1.7044134727061557, |
|
"grad_norm": 2.0113303661346436, |
|
"learning_rate": 2.654251128213642e-06, |
|
"loss": 0.5193, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 1.7058652729384436, |
|
"grad_norm": 5.120201110839844, |
|
"learning_rate": 2.6287077014986396e-06, |
|
"loss": 0.3837, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 1.8609308004379272, |
|
"learning_rate": 2.603280957436499e-06, |
|
"loss": 0.6775, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 1.7087688734030198, |
|
"grad_norm": 0.5862835049629211, |
|
"learning_rate": 2.5779710286463006e-06, |
|
"loss": 0.4387, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 1.7102206736353078, |
|
"grad_norm": 1.4512196779251099, |
|
"learning_rate": 2.552778047137824e-06, |
|
"loss": 0.551, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 1.711672473867596, |
|
"grad_norm": 1.8097496032714844, |
|
"learning_rate": 2.527702144310909e-06, |
|
"loss": 0.4015, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 1.7131242740998838, |
|
"grad_norm": 1.6561768054962158, |
|
"learning_rate": 2.502743450954714e-06, |
|
"loss": 0.6328, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.7145760743321719, |
|
"grad_norm": 3.0371804237365723, |
|
"learning_rate": 2.477902097247095e-06, |
|
"loss": 0.5383, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 1.71602787456446, |
|
"grad_norm": 1.1968135833740234, |
|
"learning_rate": 2.453178212753876e-06, |
|
"loss": 0.4626, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 1.717479674796748, |
|
"grad_norm": 1.953162431716919, |
|
"learning_rate": 2.428571926428194e-06, |
|
"loss": 0.416, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 1.718931475029036, |
|
"grad_norm": 5.2054443359375, |
|
"learning_rate": 2.4040833666098413e-06, |
|
"loss": 0.5872, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 1.720383275261324, |
|
"grad_norm": 0.8436479568481445, |
|
"learning_rate": 2.3797126610245605e-06, |
|
"loss": 0.6168, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.721835075493612, |
|
"grad_norm": 1.6038563251495361, |
|
"learning_rate": 2.3554599367834137e-06, |
|
"loss": 0.4696, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 1.7232868757259001, |
|
"grad_norm": 1.6510204076766968, |
|
"learning_rate": 2.3313253203820965e-06, |
|
"loss": 0.5301, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 1.7247386759581882, |
|
"grad_norm": 1.8987841606140137, |
|
"learning_rate": 2.307308937700278e-06, |
|
"loss": 0.4072, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 1.7261904761904763, |
|
"grad_norm": 1.4687321186065674, |
|
"learning_rate": 2.283410914000969e-06, |
|
"loss": 0.6519, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 1.7276422764227641, |
|
"grad_norm": 1.3027409315109253, |
|
"learning_rate": 2.2596313739298462e-06, |
|
"loss": 0.5261, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.7290940766550522, |
|
"grad_norm": 1.6218777894973755, |
|
"learning_rate": 2.235970441514598e-06, |
|
"loss": 0.5705, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 1.7305458768873403, |
|
"grad_norm": 1.294359803199768, |
|
"learning_rate": 2.2124282401642936e-06, |
|
"loss": 0.5077, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 1.7319976771196284, |
|
"grad_norm": 3.3764312267303467, |
|
"learning_rate": 2.189004892668742e-06, |
|
"loss": 0.5721, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 1.7334494773519165, |
|
"grad_norm": 2.6626949310302734, |
|
"learning_rate": 2.165700521197825e-06, |
|
"loss": 0.4905, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 1.7349012775842043, |
|
"grad_norm": 1.1768718957901, |
|
"learning_rate": 2.1425152473008832e-06, |
|
"loss": 0.6437, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.7363530778164924, |
|
"grad_norm": 3.1630823612213135, |
|
"learning_rate": 2.119449191906089e-06, |
|
"loss": 0.5341, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 1.7378048780487805, |
|
"grad_norm": 6.051577568054199, |
|
"learning_rate": 2.096502475319781e-06, |
|
"loss": 0.4468, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 1.7392566782810686, |
|
"grad_norm": 1.5965017080307007, |
|
"learning_rate": 2.0736752172258846e-06, |
|
"loss": 0.8102, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 1.7407084785133566, |
|
"grad_norm": 1.8911795616149902, |
|
"learning_rate": 2.050967536685233e-06, |
|
"loss": 0.7823, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 1.6226707696914673, |
|
"learning_rate": 2.0283795521350042e-06, |
|
"loss": 0.7913, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.6174443364143372, |
|
"eval_runtime": 107.8262, |
|
"eval_samples_per_second": 13.448, |
|
"eval_steps_per_second": 3.367, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.7436120789779328, |
|
"grad_norm": 1.1205623149871826, |
|
"learning_rate": 2.005911381388048e-06, |
|
"loss": 0.6055, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 1.7450638792102207, |
|
"grad_norm": 1.6299880743026733, |
|
"learning_rate": 1.9835631416323164e-06, |
|
"loss": 0.599, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 1.7465156794425087, |
|
"grad_norm": 1.34322190284729, |
|
"learning_rate": 1.961334949430227e-06, |
|
"loss": 0.5602, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 1.7479674796747968, |
|
"grad_norm": 0.7588962912559509, |
|
"learning_rate": 1.9392269207180512e-06, |
|
"loss": 0.4644, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 1.7494192799070847, |
|
"grad_norm": 3.5957090854644775, |
|
"learning_rate": 1.9172391708053408e-06, |
|
"loss": 1.0411, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.750871080139373, |
|
"grad_norm": 1.6750432252883911, |
|
"learning_rate": 1.895371814374286e-06, |
|
"loss": 0.5805, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 1.7523228803716608, |
|
"grad_norm": 1.9335286617279053, |
|
"learning_rate": 1.8736249654791538e-06, |
|
"loss": 0.7541, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 1.753774680603949, |
|
"grad_norm": 1.4929413795471191, |
|
"learning_rate": 1.8519987375456654e-06, |
|
"loss": 0.5656, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 1.755226480836237, |
|
"grad_norm": 1.5122934579849243, |
|
"learning_rate": 1.8304932433704097e-06, |
|
"loss": 0.5031, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 1.7566782810685249, |
|
"grad_norm": 1.0562947988510132, |
|
"learning_rate": 1.809108595120279e-06, |
|
"loss": 0.5551, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.7581300813008132, |
|
"grad_norm": 1.0522669553756714, |
|
"learning_rate": 1.7878449043318534e-06, |
|
"loss": 0.4314, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 1.759581881533101, |
|
"grad_norm": 0.8575490117073059, |
|
"learning_rate": 1.766702281910837e-06, |
|
"loss": 0.4565, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 1.761033681765389, |
|
"grad_norm": 1.6525681018829346, |
|
"learning_rate": 1.7456808381314583e-06, |
|
"loss": 0.4301, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 1.7624854819976772, |
|
"grad_norm": 1.931264042854309, |
|
"learning_rate": 1.7247806826359375e-06, |
|
"loss": 0.4871, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 1.763937282229965, |
|
"grad_norm": 1.8501496315002441, |
|
"learning_rate": 1.704001924433865e-06, |
|
"loss": 0.657, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.7653890824622533, |
|
"grad_norm": 1.272760272026062, |
|
"learning_rate": 1.6833446719016627e-06, |
|
"loss": 0.606, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.7668408826945412, |
|
"grad_norm": 0.9544461369514465, |
|
"learning_rate": 1.6628090327820172e-06, |
|
"loss": 0.6067, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 1.7682926829268293, |
|
"grad_norm": 9.855928421020508, |
|
"learning_rate": 1.6423951141833011e-06, |
|
"loss": 0.3548, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 1.7697444831591174, |
|
"grad_norm": 3.282947540283203, |
|
"learning_rate": 1.6221030225790413e-06, |
|
"loss": 0.3999, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 1.7711962833914052, |
|
"grad_norm": 3.6573681831359863, |
|
"learning_rate": 1.6019328638073261e-06, |
|
"loss": 0.5574, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.7726480836236935, |
|
"grad_norm": 3.0135483741760254, |
|
"learning_rate": 1.581884743070297e-06, |
|
"loss": 0.7062, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 1.7740998838559814, |
|
"grad_norm": 1.4390877485275269, |
|
"learning_rate": 1.5619587649335605e-06, |
|
"loss": 0.5718, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 1.7755516840882695, |
|
"grad_norm": 0.5463725328445435, |
|
"learning_rate": 1.5421550333256734e-06, |
|
"loss": 0.609, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 1.7770034843205575, |
|
"grad_norm": 2.245574712753296, |
|
"learning_rate": 1.5224736515375814e-06, |
|
"loss": 0.5087, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 1.7784552845528454, |
|
"grad_norm": 1.2518837451934814, |
|
"learning_rate": 1.502914722222079e-06, |
|
"loss": 0.6448, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.7799070847851337, |
|
"grad_norm": 1.8152894973754883, |
|
"learning_rate": 1.4834783473932994e-06, |
|
"loss": 0.6077, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 1.7813588850174216, |
|
"grad_norm": 1.3485125303268433, |
|
"learning_rate": 1.4641646284261485e-06, |
|
"loss": 0.5192, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 1.7828106852497096, |
|
"grad_norm": 4.338469982147217, |
|
"learning_rate": 1.444973666055796e-06, |
|
"loss": 0.6732, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 1.7842624854819977, |
|
"grad_norm": 0.5736151933670044, |
|
"learning_rate": 1.4259055603771527e-06, |
|
"loss": 0.4268, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 2.7274398803710938, |
|
"learning_rate": 1.4069604108443296e-06, |
|
"loss": 0.6137, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.787166085946574, |
|
"grad_norm": 1.3027504682540894, |
|
"learning_rate": 1.3881383162701433e-06, |
|
"loss": 0.48, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 1.7886178861788617, |
|
"grad_norm": 1.979504942893982, |
|
"learning_rate": 1.3694393748255902e-06, |
|
"loss": 0.3862, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.7900696864111498, |
|
"grad_norm": 2.1074235439300537, |
|
"learning_rate": 1.3508636840393246e-06, |
|
"loss": 0.5215, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 1.791521486643438, |
|
"grad_norm": 2.5477986335754395, |
|
"learning_rate": 1.3324113407971516e-06, |
|
"loss": 0.4583, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 1.7929732868757258, |
|
"grad_norm": 0.5263664126396179, |
|
"learning_rate": 1.314082441341552e-06, |
|
"loss": 0.6051, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.794425087108014, |
|
"grad_norm": 1.8962557315826416, |
|
"learning_rate": 1.2958770812711352e-06, |
|
"loss": 0.6069, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 1.795876887340302, |
|
"grad_norm": 2.079145908355713, |
|
"learning_rate": 1.2777953555401678e-06, |
|
"loss": 0.7225, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 1.79732868757259, |
|
"grad_norm": 0.8144702315330505, |
|
"learning_rate": 1.2598373584580824e-06, |
|
"loss": 0.474, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 1.798780487804878, |
|
"grad_norm": 6.011617660522461, |
|
"learning_rate": 1.2420031836889668e-06, |
|
"loss": 0.4614, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"grad_norm": 2.0123348236083984, |
|
"learning_rate": 1.224292924251083e-06, |
|
"loss": 0.4744, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.8016840882694543, |
|
"grad_norm": 1.1453293561935425, |
|
"learning_rate": 1.2067066725163946e-06, |
|
"loss": 0.7232, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 1.8031358885017421, |
|
"grad_norm": 3.011850357055664, |
|
"learning_rate": 1.1892445202100643e-06, |
|
"loss": 0.6242, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 1.8045876887340302, |
|
"grad_norm": 1.3641666173934937, |
|
"learning_rate": 1.1719065584099881e-06, |
|
"loss": 0.6855, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 1.8060394889663183, |
|
"grad_norm": 2.096034288406372, |
|
"learning_rate": 1.1546928775463234e-06, |
|
"loss": 0.5658, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 1.8074912891986061, |
|
"grad_norm": 1.173338770866394, |
|
"learning_rate": 1.137603567401005e-06, |
|
"loss": 0.6926, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.8089430894308944, |
|
"grad_norm": 7.650896072387695, |
|
"learning_rate": 1.1206387171072808e-06, |
|
"loss": 0.5958, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 1.8103948896631823, |
|
"grad_norm": 4.46699857711792, |
|
"learning_rate": 1.1037984151492624e-06, |
|
"loss": 0.4605, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 1.8118466898954704, |
|
"grad_norm": 2.164135217666626, |
|
"learning_rate": 1.0870827493614344e-06, |
|
"loss": 0.665, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 1.8132984901277585, |
|
"grad_norm": 1.531969428062439, |
|
"learning_rate": 1.0704918069282226e-06, |
|
"loss": 0.4462, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 1.8147502903600463, |
|
"grad_norm": 1.4021626710891724, |
|
"learning_rate": 1.0540256743835297e-06, |
|
"loss": 0.5399, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.8162020905923346, |
|
"grad_norm": 1.4886596202850342, |
|
"learning_rate": 1.0376844376102784e-06, |
|
"loss": 0.5748, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 1.8176538908246225, |
|
"grad_norm": 2.1300623416900635, |
|
"learning_rate": 1.0214681818399712e-06, |
|
"loss": 0.6041, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 1.8191056910569106, |
|
"grad_norm": 2.5872268676757812, |
|
"learning_rate": 1.0053769916522488e-06, |
|
"loss": 0.6594, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 1.8205574912891986, |
|
"grad_norm": 0.8167919516563416, |
|
"learning_rate": 9.894109509744342e-07, |
|
"loss": 0.5516, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 1.8220092915214865, |
|
"grad_norm": 1.7698092460632324, |
|
"learning_rate": 9.735701430811067e-07, |
|
"loss": 0.4946, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.8234610917537748, |
|
"grad_norm": 1.8498523235321045, |
|
"learning_rate": 9.578546505936676e-07, |
|
"loss": 0.6975, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 1.8249128919860627, |
|
"grad_norm": 2.3681557178497314, |
|
"learning_rate": 9.422645554799048e-07, |
|
"loss": 0.7246, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 1.8263646922183507, |
|
"grad_norm": 1.6103743314743042, |
|
"learning_rate": 9.267999390535659e-07, |
|
"loss": 0.597, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 1.8278164924506388, |
|
"grad_norm": 1.0685875415802002, |
|
"learning_rate": 9.11460881973944e-07, |
|
"loss": 0.4749, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 1.7253142595291138, |
|
"learning_rate": 8.962474642454338e-07, |
|
"loss": 0.7401, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.830720092915215, |
|
"grad_norm": 3.835946798324585, |
|
"learning_rate": 8.811597652171377e-07, |
|
"loss": 0.5107, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 1.8321718931475028, |
|
"grad_norm": 3.366118907928467, |
|
"learning_rate": 8.661978635824464e-07, |
|
"loss": 0.5523, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 1.833623693379791, |
|
"grad_norm": 1.4780124425888062, |
|
"learning_rate": 8.513618373786198e-07, |
|
"loss": 0.4592, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 1.835075493612079, |
|
"grad_norm": 1.5837668180465698, |
|
"learning_rate": 8.366517639863819e-07, |
|
"loss": 0.5838, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 1.8365272938443669, |
|
"grad_norm": 5.799704551696777, |
|
"learning_rate": 8.220677201295296e-07, |
|
"loss": 0.7116, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.8379790940766552, |
|
"grad_norm": 1.668204665184021, |
|
"learning_rate": 8.076097818745188e-07, |
|
"loss": 0.5013, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 1.839430894308943, |
|
"grad_norm": 2.408761501312256, |
|
"learning_rate": 7.932780246300703e-07, |
|
"loss": 0.4475, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 1.840882694541231, |
|
"grad_norm": 1.6833887100219727, |
|
"learning_rate": 7.790725231467844e-07, |
|
"loss": 0.3637, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 1.8423344947735192, |
|
"grad_norm": 2.4150564670562744, |
|
"learning_rate": 7.649933515167407e-07, |
|
"loss": 0.5217, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 1.843786295005807, |
|
"grad_norm": 0.23005646467208862, |
|
"learning_rate": 7.510405831731155e-07, |
|
"loss": 0.7733, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.8452380952380953, |
|
"grad_norm": 5.178878307342529, |
|
"learning_rate": 7.372142908898038e-07, |
|
"loss": 0.5562, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 1.8466898954703832, |
|
"grad_norm": 1.1063512563705444, |
|
"learning_rate": 7.235145467810344e-07, |
|
"loss": 0.6543, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 1.8481416957026713, |
|
"grad_norm": 1.4870764017105103, |
|
"learning_rate": 7.099414223009859e-07, |
|
"loss": 0.5468, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 1.8495934959349594, |
|
"grad_norm": 0.8903436660766602, |
|
"learning_rate": 6.964949882434402e-07, |
|
"loss": 0.441, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 1.8510452961672472, |
|
"grad_norm": 2.586010694503784, |
|
"learning_rate": 6.831753147413827e-07, |
|
"loss": 0.7283, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.8524970963995355, |
|
"grad_norm": 7.29203987121582, |
|
"learning_rate": 6.699824712666503e-07, |
|
"loss": 0.5616, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 1.8539488966318234, |
|
"grad_norm": 2.853286027908325, |
|
"learning_rate": 6.569165266295779e-07, |
|
"loss": 0.6829, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 1.8554006968641115, |
|
"grad_norm": 1.3794944286346436, |
|
"learning_rate": 6.439775489786193e-07, |
|
"loss": 0.6023, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 1.8568524970963995, |
|
"grad_norm": 1.2747677564620972, |
|
"learning_rate": 6.311656058000076e-07, |
|
"loss": 0.5941, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"grad_norm": 1.2692632675170898, |
|
"learning_rate": 6.184807639173979e-07, |
|
"loss": 0.54, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.8597560975609757, |
|
"grad_norm": 3.867017984390259, |
|
"learning_rate": 6.059230894915224e-07, |
|
"loss": 0.4035, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 1.8612078977932636, |
|
"grad_norm": 5.028682708740234, |
|
"learning_rate": 5.934926480198333e-07, |
|
"loss": 0.6283, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 1.8626596980255516, |
|
"grad_norm": 1.321096658706665, |
|
"learning_rate": 5.811895043361742e-07, |
|
"loss": 0.3401, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 1.8641114982578397, |
|
"grad_norm": 1.8228753805160522, |
|
"learning_rate": 5.690137226104481e-07, |
|
"loss": 0.6275, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 1.8655632984901278, |
|
"grad_norm": 1.2849879264831543, |
|
"learning_rate": 5.569653663482527e-07, |
|
"loss": 0.5017, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.8670150987224159, |
|
"grad_norm": 1.4431346654891968, |
|
"learning_rate": 5.450444983905845e-07, |
|
"loss": 0.4334, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 1.8684668989547037, |
|
"grad_norm": 3.2771754264831543, |
|
"learning_rate": 5.332511809134883e-07, |
|
"loss": 0.5051, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 1.8699186991869918, |
|
"grad_norm": 0.42705005407333374, |
|
"learning_rate": 5.215854754277382e-07, |
|
"loss": 0.5255, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 1.87137049941928, |
|
"grad_norm": 1.7732504606246948, |
|
"learning_rate": 5.100474427785245e-07, |
|
"loss": 0.5235, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 1.872822299651568, |
|
"grad_norm": 1.3279632329940796, |
|
"learning_rate": 4.986371431451254e-07, |
|
"loss": 0.7319, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.874274099883856, |
|
"grad_norm": 3.910167694091797, |
|
"learning_rate": 4.87354636040599e-07, |
|
"loss": 0.4989, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 1.875725900116144, |
|
"grad_norm": 4.26170015335083, |
|
"learning_rate": 4.7619998031147304e-07, |
|
"loss": 0.3566, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 1.877177700348432, |
|
"grad_norm": 1.6784484386444092, |
|
"learning_rate": 4.651732341374365e-07, |
|
"loss": 0.4187, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 1.87862950058072, |
|
"grad_norm": 2.036226511001587, |
|
"learning_rate": 4.5427445503103684e-07, |
|
"loss": 0.504, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 1.8800813008130082, |
|
"grad_norm": 1.2957289218902588, |
|
"learning_rate": 4.435036998373776e-07, |
|
"loss": 0.419, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.8815331010452963, |
|
"grad_norm": 1.349066972732544, |
|
"learning_rate": 4.3286102473382994e-07, |
|
"loss": 0.3789, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 1.8829849012775841, |
|
"grad_norm": 8.302398681640625, |
|
"learning_rate": 4.2234648522972156e-07, |
|
"loss": 0.3884, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 1.8844367015098722, |
|
"grad_norm": 4.297000885009766, |
|
"learning_rate": 4.11960136166073e-07, |
|
"loss": 0.4167, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 1.8858885017421603, |
|
"grad_norm": 0.8533451557159424, |
|
"learning_rate": 4.0170203171528974e-07, |
|
"loss": 0.3217, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 1.8873403019744484, |
|
"grad_norm": 1.4826991558074951, |
|
"learning_rate": 3.9157222538088454e-07, |
|
"loss": 0.3191, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.8887921022067364, |
|
"grad_norm": 2.1450116634368896, |
|
"learning_rate": 3.815707699972165e-07, |
|
"loss": 0.3731, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 1.8902439024390243, |
|
"grad_norm": 1.522496223449707, |
|
"learning_rate": 3.716977177291886e-07, |
|
"loss": 0.5099, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 1.8916957026713124, |
|
"grad_norm": 0.7258571982383728, |
|
"learning_rate": 3.619531200719839e-07, |
|
"loss": 0.5385, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 1.8931475029036005, |
|
"grad_norm": 3.8711488246917725, |
|
"learning_rate": 3.5233702785081035e-07, |
|
"loss": 0.3283, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 1.8945993031358885, |
|
"grad_norm": 1.6500895023345947, |
|
"learning_rate": 3.428494912206259e-07, |
|
"loss": 0.2962, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.8960511033681766, |
|
"grad_norm": 1.1146255731582642, |
|
"learning_rate": 3.334905596658666e-07, |
|
"loss": 0.7563, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 1.8975029036004645, |
|
"grad_norm": 1.4949523210525513, |
|
"learning_rate": 3.242602820002161e-07, |
|
"loss": 0.4394, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 1.8989547038327528, |
|
"grad_norm": 3.016923189163208, |
|
"learning_rate": 3.1515870636631696e-07, |
|
"loss": 0.5093, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 1.9004065040650406, |
|
"grad_norm": 1.2064356803894043, |
|
"learning_rate": 3.061858802355433e-07, |
|
"loss": 0.5408, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 1.9018583042973287, |
|
"grad_norm": 2.9825875759124756, |
|
"learning_rate": 2.97341850407748e-07, |
|
"loss": 0.4946, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.9033101045296168, |
|
"grad_norm": 2.217625617980957, |
|
"learning_rate": 2.886266630110185e-07, |
|
"loss": 0.5713, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 1.1467139720916748, |
|
"learning_rate": 2.8004036350142705e-07, |
|
"loss": 0.4261, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 1.906213704994193, |
|
"grad_norm": 2.285097599029541, |
|
"learning_rate": 2.7158299666280864e-07, |
|
"loss": 0.53, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 1.9076655052264808, |
|
"grad_norm": 3.382395029067993, |
|
"learning_rate": 2.6325460660651393e-07, |
|
"loss": 0.761, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 1.909117305458769, |
|
"grad_norm": 2.245380163192749, |
|
"learning_rate": 2.550552367711956e-07, |
|
"loss": 0.6742, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.910569105691057, |
|
"grad_norm": 2.25600528717041, |
|
"learning_rate": 2.469849299225585e-07, |
|
"loss": 0.5978, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 1.9120209059233448, |
|
"grad_norm": 1.8853719234466553, |
|
"learning_rate": 2.390437281531627e-07, |
|
"loss": 0.422, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 1.9134727061556331, |
|
"grad_norm": 1.3422927856445312, |
|
"learning_rate": 2.3123167288217618e-07, |
|
"loss": 0.6992, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 1.914924506387921, |
|
"grad_norm": 1.3293629884719849, |
|
"learning_rate": 2.2354880485518648e-07, |
|
"loss": 0.3887, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"grad_norm": 1.4933987855911255, |
|
"learning_rate": 2.1599516414396726e-07, |
|
"loss": 0.7679, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.9178281068524972, |
|
"grad_norm": 2.126613140106201, |
|
"learning_rate": 2.0857079014628135e-07, |
|
"loss": 0.5869, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 1.919279907084785, |
|
"grad_norm": 1.1330955028533936, |
|
"learning_rate": 2.0127572158566976e-07, |
|
"loss": 0.6385, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 1.9207317073170733, |
|
"grad_norm": 3.0028634071350098, |
|
"learning_rate": 1.9410999651125196e-07, |
|
"loss": 0.3487, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 1.9221835075493612, |
|
"grad_norm": 3.6127331256866455, |
|
"learning_rate": 1.8707365229752306e-07, |
|
"loss": 0.3929, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 1.9236353077816493, |
|
"grad_norm": 3.5471410751342773, |
|
"learning_rate": 1.8016672564416526e-07, |
|
"loss": 0.4829, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.9250871080139373, |
|
"grad_norm": 2.57403564453125, |
|
"learning_rate": 1.7338925257585626e-07, |
|
"loss": 0.4579, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 1.9265389082462252, |
|
"grad_norm": 0.892292320728302, |
|
"learning_rate": 1.6674126844207215e-07, |
|
"loss": 0.6123, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 1.9279907084785135, |
|
"grad_norm": 5.541355609893799, |
|
"learning_rate": 1.6022280791691547e-07, |
|
"loss": 0.5871, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 1.9294425087108014, |
|
"grad_norm": 4.043185234069824, |
|
"learning_rate": 1.5383390499892625e-07, |
|
"loss": 0.7962, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 1.9308943089430894, |
|
"grad_norm": 3.4358088970184326, |
|
"learning_rate": 1.4757459301089904e-07, |
|
"loss": 0.8971, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.9323461091753775, |
|
"grad_norm": 2.0731537342071533, |
|
"learning_rate": 1.414449045997357e-07, |
|
"loss": 0.587, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 1.9337979094076654, |
|
"grad_norm": 2.296551465988159, |
|
"learning_rate": 1.3544487173623443e-07, |
|
"loss": 0.6924, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 1.9352497096399537, |
|
"grad_norm": 1.4577088356018066, |
|
"learning_rate": 1.295745257149622e-07, |
|
"loss": 0.607, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 1.9367015098722415, |
|
"grad_norm": 2.300415277481079, |
|
"learning_rate": 1.2383389715406592e-07, |
|
"loss": 0.584, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 1.9381533101045296, |
|
"grad_norm": 5.345014572143555, |
|
"learning_rate": 1.1822301599511976e-07, |
|
"loss": 0.5331, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.9396051103368177, |
|
"grad_norm": 1.9530677795410156, |
|
"learning_rate": 1.1274191150297542e-07, |
|
"loss": 0.4817, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 1.9410569105691056, |
|
"grad_norm": 2.270688533782959, |
|
"learning_rate": 1.0739061226560099e-07, |
|
"loss": 0.3805, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 1.9425087108013939, |
|
"grad_norm": 4.100897312164307, |
|
"learning_rate": 1.021691461939367e-07, |
|
"loss": 0.5722, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 1.9439605110336817, |
|
"grad_norm": 1.5650062561035156, |
|
"learning_rate": 9.707754052174777e-08, |
|
"loss": 0.4488, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 1.9454123112659698, |
|
"grad_norm": 3.1894681453704834, |
|
"learning_rate": 9.211582180548295e-08, |
|
"loss": 0.8613, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.9468641114982579, |
|
"grad_norm": 4.314057350158691, |
|
"learning_rate": 8.728401592413283e-08, |
|
"loss": 0.5325, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 1.9483159117305457, |
|
"grad_norm": 1.2972936630249023, |
|
"learning_rate": 8.258214807909947e-08, |
|
"loss": 0.3854, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 1.949767711962834, |
|
"grad_norm": 1.0964834690093994, |
|
"learning_rate": 7.801024279406599e-08, |
|
"loss": 0.5104, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 2.3164939880371094, |
|
"learning_rate": 7.356832391485769e-08, |
|
"loss": 0.5469, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 1.95267131242741, |
|
"grad_norm": 0.8079650402069092, |
|
"learning_rate": 6.925641460933107e-08, |
|
"loss": 0.5622, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.954123112659698, |
|
"grad_norm": 0.8280888795852661, |
|
"learning_rate": 6.5074537367249e-08, |
|
"loss": 0.4257, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 1.955574912891986, |
|
"grad_norm": 1.360197901725769, |
|
"learning_rate": 6.102271400016124e-08, |
|
"loss": 0.4915, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 1.9570267131242742, |
|
"grad_norm": 4.283596992492676, |
|
"learning_rate": 5.710096564128797e-08, |
|
"loss": 0.4378, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 1.958478513356562, |
|
"grad_norm": 1.3778785467147827, |
|
"learning_rate": 5.3309312745419835e-08, |
|
"loss": 0.5003, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 1.9599303135888502, |
|
"grad_norm": 2.252000093460083, |
|
"learning_rate": 4.9647775088793035e-08, |
|
"loss": 0.5867, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.9613821138211383, |
|
"grad_norm": 2.1115314960479736, |
|
"learning_rate": 4.611637176901162e-08, |
|
"loss": 0.6936, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 1.962833914053426, |
|
"grad_norm": 0.7023373246192932, |
|
"learning_rate": 4.2715121204922606e-08, |
|
"loss": 0.4358, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 2.6030709743499756, |
|
"learning_rate": 3.944404113653544e-08, |
|
"loss": 0.6004, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 1.9657375145180023, |
|
"grad_norm": 1.5096830129623413, |
|
"learning_rate": 3.630314862492767e-08, |
|
"loss": 0.66, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 1.9671893147502904, |
|
"grad_norm": 2.45739483833313, |
|
"learning_rate": 3.3292460052147814e-08, |
|
"loss": 0.5751, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.9686411149825784, |
|
"grad_norm": 1.5947012901306152, |
|
"learning_rate": 3.0411991121143124e-08, |
|
"loss": 0.4873, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 1.9700929152148663, |
|
"grad_norm": 4.944673538208008, |
|
"learning_rate": 2.76617568556653e-08, |
|
"loss": 0.5386, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 1.9715447154471546, |
|
"grad_norm": 1.3134737014770508, |
|
"learning_rate": 2.5041771600195496e-08, |
|
"loss": 0.596, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 1.9729965156794425, |
|
"grad_norm": 1.0245544910430908, |
|
"learning_rate": 2.2552049019874955e-08, |
|
"loss": 0.7618, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 1.9744483159117305, |
|
"grad_norm": 1.1660908460617065, |
|
"learning_rate": 2.0192602100424507e-08, |
|
"loss": 0.5421, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.9759001161440186, |
|
"grad_norm": 2.883108377456665, |
|
"learning_rate": 1.796344314809184e-08, |
|
"loss": 0.7773, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 1.9773519163763065, |
|
"grad_norm": 1.4589025974273682, |
|
"learning_rate": 1.5864583789565457e-08, |
|
"loss": 0.7103, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 1.9788037166085948, |
|
"grad_norm": 0.33630794286727905, |
|
"learning_rate": 1.3896034971935812e-08, |
|
"loss": 0.5487, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 1.9802555168408826, |
|
"grad_norm": 8.537004470825195, |
|
"learning_rate": 1.2057806962625928e-08, |
|
"loss": 0.7031, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 1.9817073170731707, |
|
"grad_norm": 2.483400583267212, |
|
"learning_rate": 1.0349909349333109e-08, |
|
"loss": 0.5642, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.9831591173054588, |
|
"grad_norm": 3.4803388118743896, |
|
"learning_rate": 8.77235104000118e-09, |
|
"loss": 0.3511, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 1.9846109175377467, |
|
"grad_norm": 3.3463430404663086, |
|
"learning_rate": 7.3251402627427805e-09, |
|
"loss": 0.3859, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 1.986062717770035, |
|
"grad_norm": 2.710141897201538, |
|
"learning_rate": 6.008284565825473e-09, |
|
"loss": 0.5433, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 1.9875145180023228, |
|
"grad_norm": 1.5595346689224243, |
|
"learning_rate": 4.82179081761347e-09, |
|
"loss": 0.5447, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 1.988966318234611, |
|
"grad_norm": 2.3480842113494873, |
|
"learning_rate": 3.76566520653987e-09, |
|
"loss": 0.4713, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.990418118466899, |
|
"grad_norm": 0.828123152256012, |
|
"learning_rate": 2.8399132410733553e-09, |
|
"loss": 0.5635, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 1.9918699186991868, |
|
"grad_norm": 2.631500720977783, |
|
"learning_rate": 2.044539749684882e-09, |
|
"loss": 0.5184, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 1.9933217189314751, |
|
"grad_norm": 2.2084085941314697, |
|
"learning_rate": 1.3795488808310274e-09, |
|
"loss": 0.5075, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 1.994773519163763, |
|
"grad_norm": 2.0961720943450928, |
|
"learning_rate": 8.449441029234617e-10, |
|
"loss": 0.9044, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 1.996225319396051, |
|
"grad_norm": 1.8734853267669678, |
|
"learning_rate": 4.4072820432061733e-10, |
|
"loss": 0.4804, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.9976771196283392, |
|
"grad_norm": 1.1035456657409668, |
|
"learning_rate": 1.6690329330271147e-10, |
|
"loss": 0.4869, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 1.999128919860627, |
|
"grad_norm": 3.4342620372772217, |
|
"learning_rate": 2.3470798063418564e-11, |
|
"loss": 0.4943, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 13776, |
|
"total_flos": 2.442921933399982e+18, |
|
"train_loss": 0.63349506684712, |
|
"train_runtime": 13338.7431, |
|
"train_samples_per_second": 4.131, |
|
"train_steps_per_second": 1.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13776, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 3000, |
|
"total_flos": 2.442921933399982e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|