|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 4155, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007220216606498195, |
|
"grad_norm": 15.138864517211914, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.9322, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01444043321299639, |
|
"grad_norm": 11.833885192871094, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7888, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021660649819494584, |
|
"grad_norm": 7.410687446594238, |
|
"learning_rate": 3e-06, |
|
"loss": 0.5394, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02888086642599278, |
|
"grad_norm": 3.045423746109009, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.3, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.036101083032490974, |
|
"grad_norm": 0.5977299809455872, |
|
"learning_rate": 5e-06, |
|
"loss": 0.158, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04332129963898917, |
|
"grad_norm": 0.23663052916526794, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1049, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05054151624548736, |
|
"grad_norm": 0.24775876104831696, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.0929, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05776173285198556, |
|
"grad_norm": 0.250676691532135, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0818, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06498194945848375, |
|
"grad_norm": 0.2527087926864624, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0812, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07220216606498195, |
|
"grad_norm": 0.24259184300899506, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0726, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07942238267148015, |
|
"grad_norm": 0.19805486500263214, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.0657, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08664259927797834, |
|
"grad_norm": 0.21339212357997894, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0649, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09386281588447654, |
|
"grad_norm": 0.27472904324531555, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.0608, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10108303249097472, |
|
"grad_norm": 0.3953518867492676, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.055, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10830324909747292, |
|
"grad_norm": 0.19267311692237854, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0546, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11552346570397112, |
|
"grad_norm": 0.16826550662517548, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0519, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12274368231046931, |
|
"grad_norm": 0.2655981183052063, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.0521, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1299638989169675, |
|
"grad_norm": 0.25509196519851685, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0535, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1371841155234657, |
|
"grad_norm": 0.38085201382637024, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0502, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1444043321299639, |
|
"grad_norm": 0.2515423893928528, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15162454873646208, |
|
"grad_norm": 0.2517891824245453, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.0514, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1588447653429603, |
|
"grad_norm": 0.21332941949367523, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.0463, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16606498194945848, |
|
"grad_norm": 0.2846091687679291, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.0526, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17328519855595667, |
|
"grad_norm": 0.2511647045612335, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0487, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18050541516245489, |
|
"grad_norm": 0.20305795967578888, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0437, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18772563176895307, |
|
"grad_norm": 0.27594617009162903, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.0455, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19494584837545126, |
|
"grad_norm": 0.4420526921749115, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.0507, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.20216606498194944, |
|
"grad_norm": 0.3529132604598999, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.0483, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20938628158844766, |
|
"grad_norm": 0.18353258073329926, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.0435, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21660649819494585, |
|
"grad_norm": 0.2105967402458191, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0489, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22382671480144403, |
|
"grad_norm": 0.21252156794071198, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.0434, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23104693140794225, |
|
"grad_norm": 0.189384326338768, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0391, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23826714801444043, |
|
"grad_norm": 0.22159768640995026, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.0438, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24548736462093862, |
|
"grad_norm": 0.2974785268306732, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0421, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2527075812274368, |
|
"grad_norm": 0.21233530342578888, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0419, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.259927797833935, |
|
"grad_norm": 0.198137104511261, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.0411, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.26714801444043323, |
|
"grad_norm": 0.29586824774742126, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.0451, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2743682310469314, |
|
"grad_norm": 0.1930268108844757, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0401, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2815884476534296, |
|
"grad_norm": 0.21896091103553772, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.0393, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2888086642599278, |
|
"grad_norm": 0.21269367635250092, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0415, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.296028880866426, |
|
"grad_norm": 0.2300368845462799, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0436, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.30324909747292417, |
|
"grad_norm": 0.17586210370063782, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0417, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3104693140794224, |
|
"grad_norm": 0.20255301892757416, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0373, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3176895306859206, |
|
"grad_norm": 0.29133227467536926, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.0427, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3249097472924188, |
|
"grad_norm": 0.14938658475875854, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0402, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.33212996389891697, |
|
"grad_norm": 0.20491056144237518, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0407, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33935018050541516, |
|
"grad_norm": 0.38727957010269165, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0393, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.34657039711191334, |
|
"grad_norm": 0.2549976408481598, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.0381, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.35379061371841153, |
|
"grad_norm": 0.21853362023830414, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.0407, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36101083032490977, |
|
"grad_norm": 0.2548893094062805, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0366, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36823104693140796, |
|
"grad_norm": 0.2966635525226593, |
|
"learning_rate": 4.986320109439125e-05, |
|
"loss": 0.0388, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.37545126353790614, |
|
"grad_norm": 0.2587813436985016, |
|
"learning_rate": 4.9726402188782486e-05, |
|
"loss": 0.0422, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.38267148014440433, |
|
"grad_norm": 0.20156943798065186, |
|
"learning_rate": 4.958960328317374e-05, |
|
"loss": 0.0379, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3898916967509025, |
|
"grad_norm": 0.23318979144096375, |
|
"learning_rate": 4.945280437756498e-05, |
|
"loss": 0.0387, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3971119133574007, |
|
"grad_norm": 0.21295292675495148, |
|
"learning_rate": 4.931600547195623e-05, |
|
"loss": 0.0363, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4043321299638989, |
|
"grad_norm": 0.23067612946033478, |
|
"learning_rate": 4.917920656634747e-05, |
|
"loss": 0.0396, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.41155234657039713, |
|
"grad_norm": 0.1751265823841095, |
|
"learning_rate": 4.904240766073871e-05, |
|
"loss": 0.0386, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4187725631768953, |
|
"grad_norm": 0.32408931851387024, |
|
"learning_rate": 4.8905608755129964e-05, |
|
"loss": 0.0369, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4259927797833935, |
|
"grad_norm": 0.1949748396873474, |
|
"learning_rate": 4.876880984952121e-05, |
|
"loss": 0.0387, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4332129963898917, |
|
"grad_norm": 0.21588054299354553, |
|
"learning_rate": 4.863201094391245e-05, |
|
"loss": 0.0365, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4404332129963899, |
|
"grad_norm": 0.19144290685653687, |
|
"learning_rate": 4.849521203830369e-05, |
|
"loss": 0.035, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44765342960288806, |
|
"grad_norm": 0.25373053550720215, |
|
"learning_rate": 4.835841313269494e-05, |
|
"loss": 0.0391, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4548736462093863, |
|
"grad_norm": 0.2404279261827469, |
|
"learning_rate": 4.822161422708619e-05, |
|
"loss": 0.0364, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4620938628158845, |
|
"grad_norm": 0.1697109341621399, |
|
"learning_rate": 4.808481532147743e-05, |
|
"loss": 0.0322, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4693140794223827, |
|
"grad_norm": 0.17318709194660187, |
|
"learning_rate": 4.7948016415868674e-05, |
|
"loss": 0.0365, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.47653429602888087, |
|
"grad_norm": 0.18541967868804932, |
|
"learning_rate": 4.781121751025992e-05, |
|
"loss": 0.0394, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.48375451263537905, |
|
"grad_norm": 0.21878574788570404, |
|
"learning_rate": 4.7674418604651164e-05, |
|
"loss": 0.0336, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.49097472924187724, |
|
"grad_norm": 0.2381717413663864, |
|
"learning_rate": 4.753761969904241e-05, |
|
"loss": 0.0343, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4981949458483754, |
|
"grad_norm": 0.2559703290462494, |
|
"learning_rate": 4.7400820793433654e-05, |
|
"loss": 0.0356, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5054151624548736, |
|
"grad_norm": 0.25602829456329346, |
|
"learning_rate": 4.72640218878249e-05, |
|
"loss": 0.0341, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5126353790613718, |
|
"grad_norm": 0.30914345383644104, |
|
"learning_rate": 4.7127222982216145e-05, |
|
"loss": 0.0378, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.51985559566787, |
|
"grad_norm": 0.24148257076740265, |
|
"learning_rate": 4.699042407660739e-05, |
|
"loss": 0.0329, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5270758122743683, |
|
"grad_norm": 0.16448011994361877, |
|
"learning_rate": 4.6853625170998635e-05, |
|
"loss": 0.0373, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5342960288808665, |
|
"grad_norm": 0.20992177724838257, |
|
"learning_rate": 4.671682626538988e-05, |
|
"loss": 0.0358, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5415162454873647, |
|
"grad_norm": 0.1955261081457138, |
|
"learning_rate": 4.6580027359781126e-05, |
|
"loss": 0.0373, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5487364620938628, |
|
"grad_norm": 0.1674635410308838, |
|
"learning_rate": 4.6443228454172364e-05, |
|
"loss": 0.0332, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.555956678700361, |
|
"grad_norm": 0.22619013488292694, |
|
"learning_rate": 4.630642954856361e-05, |
|
"loss": 0.0375, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5631768953068592, |
|
"grad_norm": 0.20930354297161102, |
|
"learning_rate": 4.616963064295486e-05, |
|
"loss": 0.0385, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5703971119133574, |
|
"grad_norm": 0.23155862092971802, |
|
"learning_rate": 4.6032831737346106e-05, |
|
"loss": 0.0325, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5776173285198556, |
|
"grad_norm": 0.25036847591400146, |
|
"learning_rate": 4.5896032831737345e-05, |
|
"loss": 0.0326, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5848375451263538, |
|
"grad_norm": 0.17637784779071808, |
|
"learning_rate": 4.575923392612859e-05, |
|
"loss": 0.0352, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.592057761732852, |
|
"grad_norm": 0.2410997748374939, |
|
"learning_rate": 4.5622435020519835e-05, |
|
"loss": 0.0343, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5992779783393501, |
|
"grad_norm": 0.18689364194869995, |
|
"learning_rate": 4.548563611491109e-05, |
|
"loss": 0.0337, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6064981949458483, |
|
"grad_norm": 0.1366495043039322, |
|
"learning_rate": 4.5348837209302326e-05, |
|
"loss": 0.0332, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6137184115523465, |
|
"grad_norm": 0.1845828890800476, |
|
"learning_rate": 4.521203830369357e-05, |
|
"loss": 0.0337, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6209386281588448, |
|
"grad_norm": 0.17139053344726562, |
|
"learning_rate": 4.5075239398084816e-05, |
|
"loss": 0.0373, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.628158844765343, |
|
"grad_norm": 0.25084948539733887, |
|
"learning_rate": 4.493844049247606e-05, |
|
"loss": 0.0347, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6353790613718412, |
|
"grad_norm": 0.23243854939937592, |
|
"learning_rate": 4.4801641586867306e-05, |
|
"loss": 0.0298, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6425992779783394, |
|
"grad_norm": 0.16921032965183258, |
|
"learning_rate": 4.466484268125855e-05, |
|
"loss": 0.0335, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6498194945848376, |
|
"grad_norm": 0.21709440648555756, |
|
"learning_rate": 4.45280437756498e-05, |
|
"loss": 0.0294, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6570397111913358, |
|
"grad_norm": 0.25876304507255554, |
|
"learning_rate": 4.439124487004104e-05, |
|
"loss": 0.0355, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6642599277978339, |
|
"grad_norm": 0.23916779458522797, |
|
"learning_rate": 4.425444596443229e-05, |
|
"loss": 0.0301, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6714801444043321, |
|
"grad_norm": 0.19482851028442383, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.0331, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6787003610108303, |
|
"grad_norm": 0.1518426537513733, |
|
"learning_rate": 4.398084815321478e-05, |
|
"loss": 0.0313, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6859205776173285, |
|
"grad_norm": 0.17477025091648102, |
|
"learning_rate": 4.384404924760602e-05, |
|
"loss": 0.0284, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6931407942238267, |
|
"grad_norm": 0.2821789085865021, |
|
"learning_rate": 4.370725034199726e-05, |
|
"loss": 0.0294, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7003610108303249, |
|
"grad_norm": 0.19530260562896729, |
|
"learning_rate": 4.3570451436388506e-05, |
|
"loss": 0.0307, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7075812274368231, |
|
"grad_norm": 0.2189357876777649, |
|
"learning_rate": 4.343365253077976e-05, |
|
"loss": 0.0325, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7148014440433214, |
|
"grad_norm": 0.18680965900421143, |
|
"learning_rate": 4.3296853625171004e-05, |
|
"loss": 0.0298, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7220216606498195, |
|
"grad_norm": 0.20822104811668396, |
|
"learning_rate": 4.316005471956224e-05, |
|
"loss": 0.0318, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7292418772563177, |
|
"grad_norm": 0.17637012898921967, |
|
"learning_rate": 4.302325581395349e-05, |
|
"loss": 0.0267, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7364620938628159, |
|
"grad_norm": 0.16878972947597504, |
|
"learning_rate": 4.288645690834473e-05, |
|
"loss": 0.0285, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7436823104693141, |
|
"grad_norm": 0.3033148944377899, |
|
"learning_rate": 4.2749658002735984e-05, |
|
"loss": 0.0315, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7509025270758123, |
|
"grad_norm": 0.21088653802871704, |
|
"learning_rate": 4.261285909712722e-05, |
|
"loss": 0.0329, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7581227436823105, |
|
"grad_norm": 0.20485380291938782, |
|
"learning_rate": 4.247606019151847e-05, |
|
"loss": 0.0321, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7653429602888087, |
|
"grad_norm": 0.21797245740890503, |
|
"learning_rate": 4.233926128590971e-05, |
|
"loss": 0.0288, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7725631768953068, |
|
"grad_norm": 0.1585124433040619, |
|
"learning_rate": 4.220246238030096e-05, |
|
"loss": 0.0332, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.779783393501805, |
|
"grad_norm": 0.18445636332035065, |
|
"learning_rate": 4.2065663474692204e-05, |
|
"loss": 0.0317, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7870036101083032, |
|
"grad_norm": 0.2596589922904968, |
|
"learning_rate": 4.192886456908345e-05, |
|
"loss": 0.0283, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7942238267148014, |
|
"grad_norm": 0.15991349518299103, |
|
"learning_rate": 4.1792065663474694e-05, |
|
"loss": 0.027, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8014440433212996, |
|
"grad_norm": 0.2013750970363617, |
|
"learning_rate": 4.165526675786594e-05, |
|
"loss": 0.0285, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8086642599277978, |
|
"grad_norm": 0.16817732155323029, |
|
"learning_rate": 4.1518467852257184e-05, |
|
"loss": 0.0304, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8158844765342961, |
|
"grad_norm": 0.20662030577659607, |
|
"learning_rate": 4.138166894664843e-05, |
|
"loss": 0.0309, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8231046931407943, |
|
"grad_norm": 0.24165768921375275, |
|
"learning_rate": 4.1244870041039675e-05, |
|
"loss": 0.0305, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8303249097472925, |
|
"grad_norm": 0.15945856273174286, |
|
"learning_rate": 4.110807113543092e-05, |
|
"loss": 0.0331, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8375451263537906, |
|
"grad_norm": 0.23878367245197296, |
|
"learning_rate": 4.097127222982216e-05, |
|
"loss": 0.0287, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8447653429602888, |
|
"grad_norm": 0.20223484933376312, |
|
"learning_rate": 4.083447332421341e-05, |
|
"loss": 0.0283, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.851985559566787, |
|
"grad_norm": 0.23693232238292694, |
|
"learning_rate": 4.0697674418604655e-05, |
|
"loss": 0.0283, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8592057761732852, |
|
"grad_norm": 0.21208103001117706, |
|
"learning_rate": 4.05608755129959e-05, |
|
"loss": 0.0291, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8664259927797834, |
|
"grad_norm": 0.21978527307510376, |
|
"learning_rate": 4.042407660738714e-05, |
|
"loss": 0.031, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8736462093862816, |
|
"grad_norm": 0.18041111528873444, |
|
"learning_rate": 4.0287277701778384e-05, |
|
"loss": 0.0266, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8808664259927798, |
|
"grad_norm": 0.15536178648471832, |
|
"learning_rate": 4.015047879616963e-05, |
|
"loss": 0.0311, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8880866425992779, |
|
"grad_norm": 0.17821329832077026, |
|
"learning_rate": 4.001367989056088e-05, |
|
"loss": 0.0303, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8953068592057761, |
|
"grad_norm": 0.22445450723171234, |
|
"learning_rate": 3.987688098495212e-05, |
|
"loss": 0.0249, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9025270758122743, |
|
"grad_norm": 0.22271278500556946, |
|
"learning_rate": 3.9740082079343365e-05, |
|
"loss": 0.0287, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9097472924187726, |
|
"grad_norm": 0.23373781144618988, |
|
"learning_rate": 3.960328317373461e-05, |
|
"loss": 0.0301, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9169675090252708, |
|
"grad_norm": 0.1760292649269104, |
|
"learning_rate": 3.9466484268125855e-05, |
|
"loss": 0.0266, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.924187725631769, |
|
"grad_norm": 0.19504322111606598, |
|
"learning_rate": 3.93296853625171e-05, |
|
"loss": 0.027, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9314079422382672, |
|
"grad_norm": 0.23191095888614655, |
|
"learning_rate": 3.9192886456908346e-05, |
|
"loss": 0.0275, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9386281588447654, |
|
"grad_norm": 0.28094080090522766, |
|
"learning_rate": 3.905608755129959e-05, |
|
"loss": 0.0299, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9458483754512635, |
|
"grad_norm": 0.15457668900489807, |
|
"learning_rate": 3.8919288645690836e-05, |
|
"loss": 0.0281, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9530685920577617, |
|
"grad_norm": 0.20441775023937225, |
|
"learning_rate": 3.878248974008208e-05, |
|
"loss": 0.0314, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9602888086642599, |
|
"grad_norm": 0.2961263954639435, |
|
"learning_rate": 3.864569083447333e-05, |
|
"loss": 0.0285, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9675090252707581, |
|
"grad_norm": 0.16792945563793182, |
|
"learning_rate": 3.850889192886457e-05, |
|
"loss": 0.0277, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9747292418772563, |
|
"grad_norm": 0.3934960961341858, |
|
"learning_rate": 3.837209302325582e-05, |
|
"loss": 0.0303, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9819494584837545, |
|
"grad_norm": 0.24637605249881744, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.0272, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9891696750902527, |
|
"grad_norm": 0.21808254718780518, |
|
"learning_rate": 3.809849521203831e-05, |
|
"loss": 0.0266, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9963898916967509, |
|
"grad_norm": 0.2921408712863922, |
|
"learning_rate": 3.796169630642955e-05, |
|
"loss": 0.0274, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9881444816395965, |
|
"eval_f1": 0.8667477505754343, |
|
"eval_loss": 0.030792543664574623, |
|
"eval_precision": 0.8156923076923077, |
|
"eval_recall": 0.9246212226903653, |
|
"eval_runtime": 271.409, |
|
"eval_samples_per_second": 81.464, |
|
"eval_steps_per_second": 1.275, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.0036101083032491, |
|
"grad_norm": 0.18204669654369354, |
|
"learning_rate": 3.78248974008208e-05, |
|
"loss": 0.0265, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0108303249097472, |
|
"grad_norm": 0.13540150225162506, |
|
"learning_rate": 3.7688098495212036e-05, |
|
"loss": 0.0239, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0180505415162455, |
|
"grad_norm": 0.2114001214504242, |
|
"learning_rate": 3.755129958960328e-05, |
|
"loss": 0.0208, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0252707581227436, |
|
"grad_norm": 0.20667189359664917, |
|
"learning_rate": 3.741450068399453e-05, |
|
"loss": 0.0282, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.032490974729242, |
|
"grad_norm": 0.16794054210186005, |
|
"learning_rate": 3.727770177838578e-05, |
|
"loss": 0.0216, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.03971119133574, |
|
"grad_norm": 0.19682972133159637, |
|
"learning_rate": 3.714090287277702e-05, |
|
"loss": 0.0209, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0469314079422383, |
|
"grad_norm": 0.18295951187610626, |
|
"learning_rate": 3.700410396716826e-05, |
|
"loss": 0.0251, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0541516245487366, |
|
"grad_norm": 0.1491762399673462, |
|
"learning_rate": 3.686730506155951e-05, |
|
"loss": 0.0215, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0613718411552346, |
|
"grad_norm": 0.3141365647315979, |
|
"learning_rate": 3.673050615595075e-05, |
|
"loss": 0.0235, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.068592057761733, |
|
"grad_norm": 0.18586914241313934, |
|
"learning_rate": 3.6593707250342e-05, |
|
"loss": 0.0226, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.075812274368231, |
|
"grad_norm": 0.26788514852523804, |
|
"learning_rate": 3.645690834473324e-05, |
|
"loss": 0.0214, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.0830324909747293, |
|
"grad_norm": 0.18625912070274353, |
|
"learning_rate": 3.632010943912449e-05, |
|
"loss": 0.0223, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0902527075812274, |
|
"grad_norm": 0.23848237097263336, |
|
"learning_rate": 3.6183310533515733e-05, |
|
"loss": 0.0222, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0974729241877257, |
|
"grad_norm": 0.24550049006938934, |
|
"learning_rate": 3.604651162790698e-05, |
|
"loss": 0.0223, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1046931407942238, |
|
"grad_norm": 0.33519259095191956, |
|
"learning_rate": 3.5909712722298224e-05, |
|
"loss": 0.0207, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.111913357400722, |
|
"grad_norm": 0.19318382441997528, |
|
"learning_rate": 3.577291381668947e-05, |
|
"loss": 0.0255, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.1191335740072201, |
|
"grad_norm": 0.23220829665660858, |
|
"learning_rate": 3.5636114911080714e-05, |
|
"loss": 0.0218, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1263537906137184, |
|
"grad_norm": 0.1848268061876297, |
|
"learning_rate": 3.549931600547195e-05, |
|
"loss": 0.0252, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1335740072202167, |
|
"grad_norm": 0.20493029057979584, |
|
"learning_rate": 3.5362517099863205e-05, |
|
"loss": 0.0233, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.1407942238267148, |
|
"grad_norm": 0.19206510484218597, |
|
"learning_rate": 3.522571819425445e-05, |
|
"loss": 0.0227, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1480144404332129, |
|
"grad_norm": 0.25989973545074463, |
|
"learning_rate": 3.5088919288645695e-05, |
|
"loss": 0.0205, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1552346570397112, |
|
"grad_norm": 0.20777900516986847, |
|
"learning_rate": 3.4952120383036933e-05, |
|
"loss": 0.026, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1624548736462095, |
|
"grad_norm": 0.25129932165145874, |
|
"learning_rate": 3.481532147742818e-05, |
|
"loss": 0.0246, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1696750902527075, |
|
"grad_norm": 0.245501309633255, |
|
"learning_rate": 3.467852257181943e-05, |
|
"loss": 0.0232, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1768953068592058, |
|
"grad_norm": 0.19470812380313873, |
|
"learning_rate": 3.4541723666210676e-05, |
|
"loss": 0.02, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.184115523465704, |
|
"grad_norm": 0.23183578252792358, |
|
"learning_rate": 3.4404924760601914e-05, |
|
"loss": 0.0257, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1913357400722022, |
|
"grad_norm": 0.2162778079509735, |
|
"learning_rate": 3.426812585499316e-05, |
|
"loss": 0.0242, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1985559566787003, |
|
"grad_norm": 0.15980581939220428, |
|
"learning_rate": 3.4131326949384405e-05, |
|
"loss": 0.0242, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2057761732851986, |
|
"grad_norm": 0.1984899491071701, |
|
"learning_rate": 3.399452804377565e-05, |
|
"loss": 0.0235, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2129963898916967, |
|
"grad_norm": 0.19146302342414856, |
|
"learning_rate": 3.3857729138166895e-05, |
|
"loss": 0.0212, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.220216606498195, |
|
"grad_norm": 0.19686651229858398, |
|
"learning_rate": 3.372093023255814e-05, |
|
"loss": 0.0242, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2274368231046933, |
|
"grad_norm": 0.17915378510951996, |
|
"learning_rate": 3.3584131326949385e-05, |
|
"loss": 0.0264, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2346570397111913, |
|
"grad_norm": 0.18991689383983612, |
|
"learning_rate": 3.344733242134063e-05, |
|
"loss": 0.0227, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.2418772563176894, |
|
"grad_norm": 0.16964809596538544, |
|
"learning_rate": 3.3310533515731876e-05, |
|
"loss": 0.0229, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2490974729241877, |
|
"grad_norm": 0.33044272661209106, |
|
"learning_rate": 3.317373461012312e-05, |
|
"loss": 0.0233, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.256317689530686, |
|
"grad_norm": 0.27375954389572144, |
|
"learning_rate": 3.3036935704514366e-05, |
|
"loss": 0.023, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.263537906137184, |
|
"grad_norm": 0.19099849462509155, |
|
"learning_rate": 3.290013679890561e-05, |
|
"loss": 0.0199, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2707581227436824, |
|
"grad_norm": 0.21204090118408203, |
|
"learning_rate": 3.276333789329685e-05, |
|
"loss": 0.0228, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2779783393501805, |
|
"grad_norm": 0.17759856581687927, |
|
"learning_rate": 3.26265389876881e-05, |
|
"loss": 0.0209, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.2851985559566788, |
|
"grad_norm": 0.19070599973201752, |
|
"learning_rate": 3.248974008207935e-05, |
|
"loss": 0.0223, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2924187725631768, |
|
"grad_norm": 0.18185602128505707, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.0201, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2996389891696751, |
|
"grad_norm": 0.20534738898277283, |
|
"learning_rate": 3.221614227086183e-05, |
|
"loss": 0.0202, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3068592057761732, |
|
"grad_norm": 0.2479701191186905, |
|
"learning_rate": 3.2079343365253076e-05, |
|
"loss": 0.019, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3140794223826715, |
|
"grad_norm": 0.2699389159679413, |
|
"learning_rate": 3.194254445964433e-05, |
|
"loss": 0.023, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.3212996389891698, |
|
"grad_norm": 0.22228173911571503, |
|
"learning_rate": 3.180574555403557e-05, |
|
"loss": 0.0199, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3285198555956679, |
|
"grad_norm": 0.1938995122909546, |
|
"learning_rate": 3.166894664842681e-05, |
|
"loss": 0.0235, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.335740072202166, |
|
"grad_norm": 0.17550387978553772, |
|
"learning_rate": 3.1532147742818057e-05, |
|
"loss": 0.02, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.3429602888086642, |
|
"grad_norm": 0.13713954389095306, |
|
"learning_rate": 3.13953488372093e-05, |
|
"loss": 0.0239, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3501805054151625, |
|
"grad_norm": 0.19476300477981567, |
|
"learning_rate": 3.125854993160055e-05, |
|
"loss": 0.0197, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3574007220216606, |
|
"grad_norm": 0.23414915800094604, |
|
"learning_rate": 3.112175102599179e-05, |
|
"loss": 0.0179, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.364620938628159, |
|
"grad_norm": 0.20327487587928772, |
|
"learning_rate": 3.098495212038304e-05, |
|
"loss": 0.0215, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.371841155234657, |
|
"grad_norm": 0.2250749170780182, |
|
"learning_rate": 3.084815321477428e-05, |
|
"loss": 0.0213, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3790613718411553, |
|
"grad_norm": 0.17825260758399963, |
|
"learning_rate": 3.071135430916553e-05, |
|
"loss": 0.0199, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.3862815884476534, |
|
"grad_norm": 0.22727660834789276, |
|
"learning_rate": 3.057455540355677e-05, |
|
"loss": 0.0223, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3935018050541517, |
|
"grad_norm": 0.18458124995231628, |
|
"learning_rate": 3.0437756497948018e-05, |
|
"loss": 0.0223, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.4007220216606497, |
|
"grad_norm": 0.2251572608947754, |
|
"learning_rate": 3.0300957592339263e-05, |
|
"loss": 0.0227, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.407942238267148, |
|
"grad_norm": 0.2070099413394928, |
|
"learning_rate": 3.016415868673051e-05, |
|
"loss": 0.023, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4151624548736463, |
|
"grad_norm": 0.1247980073094368, |
|
"learning_rate": 3.002735978112175e-05, |
|
"loss": 0.0201, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.4223826714801444, |
|
"grad_norm": 0.19043412804603577, |
|
"learning_rate": 2.9890560875512996e-05, |
|
"loss": 0.0186, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4296028880866425, |
|
"grad_norm": 0.22574257850646973, |
|
"learning_rate": 2.9753761969904244e-05, |
|
"loss": 0.0189, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4368231046931408, |
|
"grad_norm": 0.17224276065826416, |
|
"learning_rate": 2.961696306429549e-05, |
|
"loss": 0.0205, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.444043321299639, |
|
"grad_norm": 0.18050329387187958, |
|
"learning_rate": 2.9480164158686728e-05, |
|
"loss": 0.0215, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4512635379061372, |
|
"grad_norm": 0.23059330880641937, |
|
"learning_rate": 2.9343365253077976e-05, |
|
"loss": 0.0225, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4584837545126355, |
|
"grad_norm": 0.24907854199409485, |
|
"learning_rate": 2.920656634746922e-05, |
|
"loss": 0.0204, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.4657039711191335, |
|
"grad_norm": 0.1699414849281311, |
|
"learning_rate": 2.9069767441860467e-05, |
|
"loss": 0.024, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4729241877256318, |
|
"grad_norm": 0.16529108583927155, |
|
"learning_rate": 2.893296853625171e-05, |
|
"loss": 0.0196, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.48014440433213, |
|
"grad_norm": 0.18618591129779816, |
|
"learning_rate": 2.8796169630642954e-05, |
|
"loss": 0.0235, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4873646209386282, |
|
"grad_norm": 0.15030409395694733, |
|
"learning_rate": 2.8659370725034202e-05, |
|
"loss": 0.0217, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4945848375451263, |
|
"grad_norm": 0.28873658180236816, |
|
"learning_rate": 2.8522571819425448e-05, |
|
"loss": 0.0249, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.5018050541516246, |
|
"grad_norm": 0.19894906878471375, |
|
"learning_rate": 2.838577291381669e-05, |
|
"loss": 0.0217, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.5090252707581229, |
|
"grad_norm": 0.20735248923301697, |
|
"learning_rate": 2.8248974008207935e-05, |
|
"loss": 0.0223, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.516245487364621, |
|
"grad_norm": 0.13647274672985077, |
|
"learning_rate": 2.811217510259918e-05, |
|
"loss": 0.02, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.523465703971119, |
|
"grad_norm": 0.16693101823329926, |
|
"learning_rate": 2.797537619699043e-05, |
|
"loss": 0.0243, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.5306859205776173, |
|
"grad_norm": 0.21320907771587372, |
|
"learning_rate": 2.7838577291381667e-05, |
|
"loss": 0.0222, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5379061371841156, |
|
"grad_norm": 0.2132214903831482, |
|
"learning_rate": 2.7701778385772915e-05, |
|
"loss": 0.0219, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.5451263537906137, |
|
"grad_norm": 0.1506602168083191, |
|
"learning_rate": 2.756497948016416e-05, |
|
"loss": 0.0224, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5523465703971118, |
|
"grad_norm": 0.18722060322761536, |
|
"learning_rate": 2.7428180574555406e-05, |
|
"loss": 0.0222, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.55956678700361, |
|
"grad_norm": 0.22277644276618958, |
|
"learning_rate": 2.7291381668946648e-05, |
|
"loss": 0.0248, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.5667870036101084, |
|
"grad_norm": 0.16420182585716248, |
|
"learning_rate": 2.7154582763337893e-05, |
|
"loss": 0.0197, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5740072202166067, |
|
"grad_norm": 0.23195837438106537, |
|
"learning_rate": 2.701778385772914e-05, |
|
"loss": 0.0213, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5812274368231047, |
|
"grad_norm": 0.19835099577903748, |
|
"learning_rate": 2.6880984952120387e-05, |
|
"loss": 0.0224, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.5884476534296028, |
|
"grad_norm": 0.1974717378616333, |
|
"learning_rate": 2.674418604651163e-05, |
|
"loss": 0.0248, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.595667870036101, |
|
"grad_norm": 0.23695822060108185, |
|
"learning_rate": 2.6607387140902874e-05, |
|
"loss": 0.0199, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.6028880866425994, |
|
"grad_norm": 0.2396339625120163, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.0224, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6101083032490975, |
|
"grad_norm": 0.20308814942836761, |
|
"learning_rate": 2.6333789329685364e-05, |
|
"loss": 0.0224, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6173285198555956, |
|
"grad_norm": 0.2676025927066803, |
|
"learning_rate": 2.6196990424076606e-05, |
|
"loss": 0.0192, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6245487364620939, |
|
"grad_norm": 0.1903916597366333, |
|
"learning_rate": 2.606019151846785e-05, |
|
"loss": 0.0234, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6317689530685922, |
|
"grad_norm": 0.194184347987175, |
|
"learning_rate": 2.59233926128591e-05, |
|
"loss": 0.021, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6389891696750902, |
|
"grad_norm": 0.22250430285930634, |
|
"learning_rate": 2.5786593707250345e-05, |
|
"loss": 0.0211, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6462093862815883, |
|
"grad_norm": 0.18064111471176147, |
|
"learning_rate": 2.5649794801641587e-05, |
|
"loss": 0.0176, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6534296028880866, |
|
"grad_norm": 0.22021658718585968, |
|
"learning_rate": 2.5512995896032832e-05, |
|
"loss": 0.0205, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.660649819494585, |
|
"grad_norm": 0.18112881481647491, |
|
"learning_rate": 2.5376196990424077e-05, |
|
"loss": 0.0198, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6678700361010832, |
|
"grad_norm": 0.25381046533584595, |
|
"learning_rate": 2.5239398084815325e-05, |
|
"loss": 0.0209, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6750902527075813, |
|
"grad_norm": 0.19742076098918915, |
|
"learning_rate": 2.5102599179206564e-05, |
|
"loss": 0.0214, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6823104693140793, |
|
"grad_norm": 0.20728257298469543, |
|
"learning_rate": 2.4965800273597812e-05, |
|
"loss": 0.0218, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.6895306859205776, |
|
"grad_norm": 0.14151689410209656, |
|
"learning_rate": 2.4829001367989058e-05, |
|
"loss": 0.0211, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.696750902527076, |
|
"grad_norm": 0.2515600621700287, |
|
"learning_rate": 2.46922024623803e-05, |
|
"loss": 0.0215, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.703971119133574, |
|
"grad_norm": 0.21007981896400452, |
|
"learning_rate": 2.4555403556771548e-05, |
|
"loss": 0.0207, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.711191335740072, |
|
"grad_norm": 0.20441214740276337, |
|
"learning_rate": 2.441860465116279e-05, |
|
"loss": 0.0204, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7184115523465704, |
|
"grad_norm": 0.21464481949806213, |
|
"learning_rate": 2.428180574555404e-05, |
|
"loss": 0.0187, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7256317689530687, |
|
"grad_norm": 0.18914371728897095, |
|
"learning_rate": 2.414500683994528e-05, |
|
"loss": 0.0214, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.7328519855595668, |
|
"grad_norm": 0.16250531375408173, |
|
"learning_rate": 2.4008207934336525e-05, |
|
"loss": 0.0229, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7400722021660648, |
|
"grad_norm": 0.14336325228214264, |
|
"learning_rate": 2.387140902872777e-05, |
|
"loss": 0.0211, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7472924187725631, |
|
"grad_norm": 0.2290533185005188, |
|
"learning_rate": 2.3734610123119016e-05, |
|
"loss": 0.0216, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7545126353790614, |
|
"grad_norm": 0.21229666471481323, |
|
"learning_rate": 2.359781121751026e-05, |
|
"loss": 0.023, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7617328519855595, |
|
"grad_norm": 0.17029692232608795, |
|
"learning_rate": 2.3461012311901506e-05, |
|
"loss": 0.0226, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7689530685920578, |
|
"grad_norm": 0.1786644607782364, |
|
"learning_rate": 2.3324213406292748e-05, |
|
"loss": 0.0201, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7761732851985559, |
|
"grad_norm": 0.1429545283317566, |
|
"learning_rate": 2.3187414500683997e-05, |
|
"loss": 0.0201, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7833935018050542, |
|
"grad_norm": 0.18863752484321594, |
|
"learning_rate": 2.305061559507524e-05, |
|
"loss": 0.0205, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7906137184115525, |
|
"grad_norm": 0.16493657231330872, |
|
"learning_rate": 2.2913816689466487e-05, |
|
"loss": 0.0214, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7978339350180506, |
|
"grad_norm": 0.23174484074115753, |
|
"learning_rate": 2.277701778385773e-05, |
|
"loss": 0.0178, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8050541516245486, |
|
"grad_norm": 0.2224528193473816, |
|
"learning_rate": 2.2640218878248974e-05, |
|
"loss": 0.0217, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.812274368231047, |
|
"grad_norm": 0.19585539400577545, |
|
"learning_rate": 2.250341997264022e-05, |
|
"loss": 0.0218, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8194945848375452, |
|
"grad_norm": 0.18590857088565826, |
|
"learning_rate": 2.2366621067031464e-05, |
|
"loss": 0.0211, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8267148014440433, |
|
"grad_norm": 0.17392487823963165, |
|
"learning_rate": 2.222982216142271e-05, |
|
"loss": 0.0208, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8339350180505414, |
|
"grad_norm": 0.33551010489463806, |
|
"learning_rate": 2.2093023255813955e-05, |
|
"loss": 0.0213, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8411552346570397, |
|
"grad_norm": 0.16854000091552734, |
|
"learning_rate": 2.19562243502052e-05, |
|
"loss": 0.0189, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.848375451263538, |
|
"grad_norm": 0.20123836398124695, |
|
"learning_rate": 2.1819425444596445e-05, |
|
"loss": 0.0216, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.855595667870036, |
|
"grad_norm": 0.1919688880443573, |
|
"learning_rate": 2.1682626538987687e-05, |
|
"loss": 0.0208, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8628158844765343, |
|
"grad_norm": 0.20486849546432495, |
|
"learning_rate": 2.1545827633378936e-05, |
|
"loss": 0.0204, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8700361010830324, |
|
"grad_norm": 0.15359218418598175, |
|
"learning_rate": 2.1409028727770177e-05, |
|
"loss": 0.0187, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8772563176895307, |
|
"grad_norm": 0.17525233328342438, |
|
"learning_rate": 2.1272229822161423e-05, |
|
"loss": 0.0256, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.884476534296029, |
|
"grad_norm": 0.2669832408428192, |
|
"learning_rate": 2.1135430916552668e-05, |
|
"loss": 0.02, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.891696750902527, |
|
"grad_norm": 0.2607351839542389, |
|
"learning_rate": 2.0998632010943913e-05, |
|
"loss": 0.021, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8989169675090252, |
|
"grad_norm": 0.1423048973083496, |
|
"learning_rate": 2.0861833105335158e-05, |
|
"loss": 0.0186, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9061371841155235, |
|
"grad_norm": 0.2874491810798645, |
|
"learning_rate": 2.0725034199726403e-05, |
|
"loss": 0.0225, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9133574007220218, |
|
"grad_norm": 0.19905777275562286, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.0205, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9205776173285198, |
|
"grad_norm": 0.18077979981899261, |
|
"learning_rate": 2.0451436388508894e-05, |
|
"loss": 0.0172, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.927797833935018, |
|
"grad_norm": 0.21182967722415924, |
|
"learning_rate": 2.0314637482900136e-05, |
|
"loss": 0.0208, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9350180505415162, |
|
"grad_norm": 0.23693156242370605, |
|
"learning_rate": 2.0177838577291384e-05, |
|
"loss": 0.0208, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9422382671480145, |
|
"grad_norm": 0.1655312329530716, |
|
"learning_rate": 2.0041039671682626e-05, |
|
"loss": 0.0194, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9494584837545126, |
|
"grad_norm": 0.18168915808200836, |
|
"learning_rate": 1.990424076607387e-05, |
|
"loss": 0.0202, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.9566787003610109, |
|
"grad_norm": 0.16807179152965546, |
|
"learning_rate": 1.9767441860465116e-05, |
|
"loss": 0.0225, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.963898916967509, |
|
"grad_norm": 0.15267817676067352, |
|
"learning_rate": 1.963064295485636e-05, |
|
"loss": 0.0189, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9711191335740073, |
|
"grad_norm": 0.35743266344070435, |
|
"learning_rate": 1.9493844049247607e-05, |
|
"loss": 0.0172, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9783393501805056, |
|
"grad_norm": 0.14603158831596375, |
|
"learning_rate": 1.9357045143638852e-05, |
|
"loss": 0.0175, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9855595667870036, |
|
"grad_norm": 0.1915268450975418, |
|
"learning_rate": 1.9220246238030097e-05, |
|
"loss": 0.0202, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9927797833935017, |
|
"grad_norm": 0.2167324721813202, |
|
"learning_rate": 1.9083447332421342e-05, |
|
"loss": 0.0191, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.22105805575847626, |
|
"learning_rate": 1.8946648426812584e-05, |
|
"loss": 0.0206, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9911191241647089, |
|
"eval_f1": 0.8963686600905629, |
|
"eval_loss": 0.024568406865000725, |
|
"eval_precision": 0.8729883765521071, |
|
"eval_recall": 0.9210357431847986, |
|
"eval_runtime": 271.5143, |
|
"eval_samples_per_second": 81.432, |
|
"eval_steps_per_second": 1.274, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0072202166064983, |
|
"grad_norm": 0.15133358538150787, |
|
"learning_rate": 1.8809849521203833e-05, |
|
"loss": 0.0135, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.0144404332129966, |
|
"grad_norm": 0.23863369226455688, |
|
"learning_rate": 1.8673050615595075e-05, |
|
"loss": 0.0153, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.0216606498194944, |
|
"grad_norm": 0.16403010487556458, |
|
"learning_rate": 1.853625170998632e-05, |
|
"loss": 0.0161, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.0288808664259927, |
|
"grad_norm": 0.19030123949050903, |
|
"learning_rate": 1.8399452804377565e-05, |
|
"loss": 0.0137, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.036101083032491, |
|
"grad_norm": 0.17739665508270264, |
|
"learning_rate": 1.826265389876881e-05, |
|
"loss": 0.0149, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0433212996389893, |
|
"grad_norm": 0.18450963497161865, |
|
"learning_rate": 1.8125854993160055e-05, |
|
"loss": 0.0151, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.050541516245487, |
|
"grad_norm": 0.1654953956604004, |
|
"learning_rate": 1.79890560875513e-05, |
|
"loss": 0.0152, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.0577617328519855, |
|
"grad_norm": 0.18726739287376404, |
|
"learning_rate": 1.7852257181942546e-05, |
|
"loss": 0.0136, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.064981949458484, |
|
"grad_norm": 0.15999707579612732, |
|
"learning_rate": 1.771545827633379e-05, |
|
"loss": 0.0167, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.072202166064982, |
|
"grad_norm": 0.20449534058570862, |
|
"learning_rate": 1.7578659370725033e-05, |
|
"loss": 0.0138, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.07942238267148, |
|
"grad_norm": 0.18105897307395935, |
|
"learning_rate": 1.744186046511628e-05, |
|
"loss": 0.0137, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.0866425992779782, |
|
"grad_norm": 0.2451828122138977, |
|
"learning_rate": 1.7305061559507523e-05, |
|
"loss": 0.0183, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.0938628158844765, |
|
"grad_norm": 0.24325129389762878, |
|
"learning_rate": 1.716826265389877e-05, |
|
"loss": 0.0142, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.101083032490975, |
|
"grad_norm": 0.21412310004234314, |
|
"learning_rate": 1.7031463748290014e-05, |
|
"loss": 0.0158, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.108303249097473, |
|
"grad_norm": 0.2084820568561554, |
|
"learning_rate": 1.689466484268126e-05, |
|
"loss": 0.0146, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.115523465703971, |
|
"grad_norm": 0.1751072257757187, |
|
"learning_rate": 1.6757865937072504e-05, |
|
"loss": 0.0153, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.1227436823104693, |
|
"grad_norm": 0.1661452054977417, |
|
"learning_rate": 1.662106703146375e-05, |
|
"loss": 0.0142, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.1299638989169676, |
|
"grad_norm": 0.15145418047904968, |
|
"learning_rate": 1.6484268125854994e-05, |
|
"loss": 0.0163, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.137184115523466, |
|
"grad_norm": 0.22948749363422394, |
|
"learning_rate": 1.634746922024624e-05, |
|
"loss": 0.0154, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.1444043321299637, |
|
"grad_norm": 0.20568566024303436, |
|
"learning_rate": 1.621067031463748e-05, |
|
"loss": 0.0152, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.151624548736462, |
|
"grad_norm": 0.1933920830488205, |
|
"learning_rate": 1.607387140902873e-05, |
|
"loss": 0.0163, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.1588447653429603, |
|
"grad_norm": 0.25813716650009155, |
|
"learning_rate": 1.5937072503419972e-05, |
|
"loss": 0.0128, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.1660649819494586, |
|
"grad_norm": 0.17636476457118988, |
|
"learning_rate": 1.580027359781122e-05, |
|
"loss": 0.0142, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1732851985559565, |
|
"grad_norm": 0.2212180197238922, |
|
"learning_rate": 1.5663474692202462e-05, |
|
"loss": 0.0133, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.1805054151624548, |
|
"grad_norm": 0.23074433207511902, |
|
"learning_rate": 1.5526675786593707e-05, |
|
"loss": 0.0151, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.187725631768953, |
|
"grad_norm": 0.2936890423297882, |
|
"learning_rate": 1.5389876880984953e-05, |
|
"loss": 0.0132, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1949458483754514, |
|
"grad_norm": 0.10483139753341675, |
|
"learning_rate": 1.5253077975376198e-05, |
|
"loss": 0.0146, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.2021660649819497, |
|
"grad_norm": 0.13914550840854645, |
|
"learning_rate": 1.5116279069767441e-05, |
|
"loss": 0.0178, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.2093862815884475, |
|
"grad_norm": 0.15463340282440186, |
|
"learning_rate": 1.4979480164158688e-05, |
|
"loss": 0.0126, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.216606498194946, |
|
"grad_norm": 0.18321174383163452, |
|
"learning_rate": 1.4842681258549932e-05, |
|
"loss": 0.0144, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.223826714801444, |
|
"grad_norm": 0.2047768235206604, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.0149, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.2310469314079424, |
|
"grad_norm": 0.15067626535892487, |
|
"learning_rate": 1.456908344733242e-05, |
|
"loss": 0.0134, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.2382671480144403, |
|
"grad_norm": 0.1758282482624054, |
|
"learning_rate": 1.4432284541723667e-05, |
|
"loss": 0.0133, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2454873646209386, |
|
"grad_norm": 0.20649059116840363, |
|
"learning_rate": 1.429548563611491e-05, |
|
"loss": 0.0161, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.252707581227437, |
|
"grad_norm": 0.17980459332466125, |
|
"learning_rate": 1.4158686730506158e-05, |
|
"loss": 0.0135, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.259927797833935, |
|
"grad_norm": 0.22073961794376373, |
|
"learning_rate": 1.4021887824897401e-05, |
|
"loss": 0.0146, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.2671480144404335, |
|
"grad_norm": 0.20626509189605713, |
|
"learning_rate": 1.3885088919288646e-05, |
|
"loss": 0.0138, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.2743682310469313, |
|
"grad_norm": 0.16285601258277893, |
|
"learning_rate": 1.374829001367989e-05, |
|
"loss": 0.0136, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.2815884476534296, |
|
"grad_norm": 0.17455926537513733, |
|
"learning_rate": 1.3611491108071137e-05, |
|
"loss": 0.0138, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.288808664259928, |
|
"grad_norm": 0.18891878426074982, |
|
"learning_rate": 1.347469220246238e-05, |
|
"loss": 0.0151, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.2960288808664258, |
|
"grad_norm": 0.13757237792015076, |
|
"learning_rate": 1.3337893296853627e-05, |
|
"loss": 0.017, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.303249097472924, |
|
"grad_norm": 0.18987725675106049, |
|
"learning_rate": 1.3201094391244869e-05, |
|
"loss": 0.0147, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.3104693140794224, |
|
"grad_norm": 0.13427282869815826, |
|
"learning_rate": 1.3064295485636116e-05, |
|
"loss": 0.0142, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.3176895306859207, |
|
"grad_norm": 0.2868107557296753, |
|
"learning_rate": 1.292749658002736e-05, |
|
"loss": 0.0141, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.324909747292419, |
|
"grad_norm": 0.19995689392089844, |
|
"learning_rate": 1.2790697674418606e-05, |
|
"loss": 0.0145, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.332129963898917, |
|
"grad_norm": 0.15799254179000854, |
|
"learning_rate": 1.265389876880985e-05, |
|
"loss": 0.0149, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.339350180505415, |
|
"grad_norm": 0.2354062795639038, |
|
"learning_rate": 1.2517099863201095e-05, |
|
"loss": 0.0173, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.3465703971119134, |
|
"grad_norm": 0.2461709827184677, |
|
"learning_rate": 1.238030095759234e-05, |
|
"loss": 0.0165, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3537906137184117, |
|
"grad_norm": 0.17912667989730835, |
|
"learning_rate": 1.2243502051983585e-05, |
|
"loss": 0.0168, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.3610108303249095, |
|
"grad_norm": 0.2090194672346115, |
|
"learning_rate": 1.210670314637483e-05, |
|
"loss": 0.0133, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.368231046931408, |
|
"grad_norm": 0.16298013925552368, |
|
"learning_rate": 1.1969904240766076e-05, |
|
"loss": 0.0164, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.375451263537906, |
|
"grad_norm": 0.14699536561965942, |
|
"learning_rate": 1.183310533515732e-05, |
|
"loss": 0.0165, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.3826714801444044, |
|
"grad_norm": 0.1909872591495514, |
|
"learning_rate": 1.1696306429548564e-05, |
|
"loss": 0.0146, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3898916967509027, |
|
"grad_norm": 0.149457648396492, |
|
"learning_rate": 1.155950752393981e-05, |
|
"loss": 0.0154, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.3971119133574006, |
|
"grad_norm": 0.18534165620803833, |
|
"learning_rate": 1.1422708618331055e-05, |
|
"loss": 0.0147, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.404332129963899, |
|
"grad_norm": 0.17388072609901428, |
|
"learning_rate": 1.12859097127223e-05, |
|
"loss": 0.0132, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.411552346570397, |
|
"grad_norm": 0.2680976390838623, |
|
"learning_rate": 1.1149110807113544e-05, |
|
"loss": 0.0148, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.4187725631768955, |
|
"grad_norm": 0.2023518681526184, |
|
"learning_rate": 1.1012311901504789e-05, |
|
"loss": 0.0133, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.4259927797833933, |
|
"grad_norm": 0.19814668595790863, |
|
"learning_rate": 1.0875512995896034e-05, |
|
"loss": 0.0139, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.4332129963898916, |
|
"grad_norm": 0.1586819589138031, |
|
"learning_rate": 1.0738714090287279e-05, |
|
"loss": 0.0139, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.44043321299639, |
|
"grad_norm": 0.12945076823234558, |
|
"learning_rate": 1.0601915184678524e-05, |
|
"loss": 0.0136, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.4476534296028882, |
|
"grad_norm": 0.22156469523906708, |
|
"learning_rate": 1.0465116279069768e-05, |
|
"loss": 0.017, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4548736462093865, |
|
"grad_norm": 0.24720166623592377, |
|
"learning_rate": 1.0328317373461013e-05, |
|
"loss": 0.0131, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.4620938628158844, |
|
"grad_norm": 0.15608453750610352, |
|
"learning_rate": 1.0191518467852258e-05, |
|
"loss": 0.0155, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.4693140794223827, |
|
"grad_norm": 0.34953010082244873, |
|
"learning_rate": 1.0054719562243503e-05, |
|
"loss": 0.017, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.476534296028881, |
|
"grad_norm": 0.20051924884319305, |
|
"learning_rate": 9.917920656634749e-06, |
|
"loss": 0.0136, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.483754512635379, |
|
"grad_norm": 0.20402652025222778, |
|
"learning_rate": 9.781121751025992e-06, |
|
"loss": 0.0133, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.490974729241877, |
|
"grad_norm": 0.1991117298603058, |
|
"learning_rate": 9.644322845417237e-06, |
|
"loss": 0.0153, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.4981949458483754, |
|
"grad_norm": 0.1154688373208046, |
|
"learning_rate": 9.507523939808483e-06, |
|
"loss": 0.0126, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.5054151624548737, |
|
"grad_norm": 0.1297905594110489, |
|
"learning_rate": 9.370725034199728e-06, |
|
"loss": 0.0146, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.512635379061372, |
|
"grad_norm": 0.23860248923301697, |
|
"learning_rate": 9.233926128590973e-06, |
|
"loss": 0.0159, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.51985559566787, |
|
"grad_norm": 0.20767924189567566, |
|
"learning_rate": 9.097127222982216e-06, |
|
"loss": 0.0133, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.527075812274368, |
|
"grad_norm": 0.22992978990077972, |
|
"learning_rate": 8.960328317373462e-06, |
|
"loss": 0.016, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.5342960288808665, |
|
"grad_norm": 0.13888780772686005, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.0129, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.5415162454873648, |
|
"grad_norm": 0.17588818073272705, |
|
"learning_rate": 8.686730506155952e-06, |
|
"loss": 0.0155, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.5487364620938626, |
|
"grad_norm": 0.17710047960281372, |
|
"learning_rate": 8.549931600547197e-06, |
|
"loss": 0.0147, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.555956678700361, |
|
"grad_norm": 0.2465488165616989, |
|
"learning_rate": 8.41313269493844e-06, |
|
"loss": 0.0159, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.563176895306859, |
|
"grad_norm": 0.211311474442482, |
|
"learning_rate": 8.276333789329686e-06, |
|
"loss": 0.0142, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.5703971119133575, |
|
"grad_norm": 0.19003704190254211, |
|
"learning_rate": 8.139534883720931e-06, |
|
"loss": 0.0156, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.577617328519856, |
|
"grad_norm": 0.20325423777103424, |
|
"learning_rate": 8.002735978112176e-06, |
|
"loss": 0.0138, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.5848375451263537, |
|
"grad_norm": 0.20516729354858398, |
|
"learning_rate": 7.865937072503421e-06, |
|
"loss": 0.0146, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.592057761732852, |
|
"grad_norm": 0.15101560950279236, |
|
"learning_rate": 7.729138166894665e-06, |
|
"loss": 0.0144, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.5992779783393503, |
|
"grad_norm": 0.20914088189601898, |
|
"learning_rate": 7.592339261285911e-06, |
|
"loss": 0.0147, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.606498194945848, |
|
"grad_norm": 0.21813435852527618, |
|
"learning_rate": 7.455540355677155e-06, |
|
"loss": 0.0148, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.6137184115523464, |
|
"grad_norm": 0.23971576988697052, |
|
"learning_rate": 7.318741450068401e-06, |
|
"loss": 0.0131, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.6209386281588447, |
|
"grad_norm": 0.17804710566997528, |
|
"learning_rate": 7.181942544459645e-06, |
|
"loss": 0.0136, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.628158844765343, |
|
"grad_norm": 0.15980184078216553, |
|
"learning_rate": 7.04514363885089e-06, |
|
"loss": 0.0118, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.6353790613718413, |
|
"grad_norm": 0.23447448015213013, |
|
"learning_rate": 6.908344733242135e-06, |
|
"loss": 0.0119, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.6425992779783396, |
|
"grad_norm": 0.2265952080488205, |
|
"learning_rate": 6.77154582763338e-06, |
|
"loss": 0.0135, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.6498194945848375, |
|
"grad_norm": 0.27709242701530457, |
|
"learning_rate": 6.634746922024625e-06, |
|
"loss": 0.0139, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6570397111913358, |
|
"grad_norm": 0.189510315656662, |
|
"learning_rate": 6.497948016415869e-06, |
|
"loss": 0.0128, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.664259927797834, |
|
"grad_norm": 0.09412135928869247, |
|
"learning_rate": 6.3611491108071144e-06, |
|
"loss": 0.0155, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.671480144404332, |
|
"grad_norm": 0.21313029527664185, |
|
"learning_rate": 6.224350205198359e-06, |
|
"loss": 0.0144, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.67870036101083, |
|
"grad_norm": 0.35182449221611023, |
|
"learning_rate": 6.087551299589603e-06, |
|
"loss": 0.0158, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.6859205776173285, |
|
"grad_norm": 0.20150898396968842, |
|
"learning_rate": 5.950752393980848e-06, |
|
"loss": 0.0139, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.693140794223827, |
|
"grad_norm": 0.1621181070804596, |
|
"learning_rate": 5.8139534883720935e-06, |
|
"loss": 0.0131, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.700361010830325, |
|
"grad_norm": 0.20502717792987823, |
|
"learning_rate": 5.677154582763338e-06, |
|
"loss": 0.0132, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.707581227436823, |
|
"grad_norm": 0.14961372315883636, |
|
"learning_rate": 5.540355677154583e-06, |
|
"loss": 0.013, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.7148014440433212, |
|
"grad_norm": 0.2430783361196518, |
|
"learning_rate": 5.403556771545827e-06, |
|
"loss": 0.014, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.7220216606498195, |
|
"grad_norm": 0.2312983125448227, |
|
"learning_rate": 5.266757865937073e-06, |
|
"loss": 0.0151, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.729241877256318, |
|
"grad_norm": 0.23572318255901337, |
|
"learning_rate": 5.129958960328318e-06, |
|
"loss": 0.0146, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.7364620938628157, |
|
"grad_norm": 0.16730424761772156, |
|
"learning_rate": 4.993160054719562e-06, |
|
"loss": 0.014, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.743682310469314, |
|
"grad_norm": 0.26095303893089294, |
|
"learning_rate": 4.856361149110807e-06, |
|
"loss": 0.0153, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.7509025270758123, |
|
"grad_norm": 0.18118229508399963, |
|
"learning_rate": 4.7195622435020526e-06, |
|
"loss": 0.0148, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7581227436823106, |
|
"grad_norm": 0.19273220002651215, |
|
"learning_rate": 4.582763337893297e-06, |
|
"loss": 0.0132, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.765342960288809, |
|
"grad_norm": 0.17820940911769867, |
|
"learning_rate": 4.445964432284542e-06, |
|
"loss": 0.0129, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.7725631768953067, |
|
"grad_norm": 0.255801260471344, |
|
"learning_rate": 4.3091655266757865e-06, |
|
"loss": 0.0138, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.779783393501805, |
|
"grad_norm": 0.2841118276119232, |
|
"learning_rate": 4.172366621067032e-06, |
|
"loss": 0.0165, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7870036101083033, |
|
"grad_norm": 0.19371943175792694, |
|
"learning_rate": 4.035567715458277e-06, |
|
"loss": 0.0121, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.794223826714801, |
|
"grad_norm": 0.2262773960828781, |
|
"learning_rate": 3.898768809849521e-06, |
|
"loss": 0.0136, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.8014440433212995, |
|
"grad_norm": 0.17181913554668427, |
|
"learning_rate": 3.7619699042407664e-06, |
|
"loss": 0.0133, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.808664259927798, |
|
"grad_norm": 0.1269243359565735, |
|
"learning_rate": 3.625170998632011e-06, |
|
"loss": 0.0129, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.815884476534296, |
|
"grad_norm": 0.24962696433067322, |
|
"learning_rate": 3.488372093023256e-06, |
|
"loss": 0.0156, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.8231046931407944, |
|
"grad_norm": 0.21303677558898926, |
|
"learning_rate": 3.3515731874145007e-06, |
|
"loss": 0.016, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.8303249097472927, |
|
"grad_norm": 0.2242385894060135, |
|
"learning_rate": 3.2147742818057455e-06, |
|
"loss": 0.0147, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.8375451263537905, |
|
"grad_norm": 0.1730586290359497, |
|
"learning_rate": 3.0779753761969907e-06, |
|
"loss": 0.0125, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.844765342960289, |
|
"grad_norm": 0.17158852517604828, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.0141, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.851985559566787, |
|
"grad_norm": 0.21147002279758453, |
|
"learning_rate": 2.8043775649794802e-06, |
|
"loss": 0.0142, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.859205776173285, |
|
"grad_norm": 0.22795933485031128, |
|
"learning_rate": 2.667578659370725e-06, |
|
"loss": 0.0133, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.8664259927797833, |
|
"grad_norm": 0.19155286252498627, |
|
"learning_rate": 2.5307797537619698e-06, |
|
"loss": 0.014, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.8736462093862816, |
|
"grad_norm": 0.1995808482170105, |
|
"learning_rate": 2.393980848153215e-06, |
|
"loss": 0.0131, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.88086642599278, |
|
"grad_norm": 0.20883041620254517, |
|
"learning_rate": 2.2571819425444598e-06, |
|
"loss": 0.0125, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.888086642599278, |
|
"grad_norm": 0.22262053191661835, |
|
"learning_rate": 2.1203830369357045e-06, |
|
"loss": 0.0151, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.895306859205776, |
|
"grad_norm": 0.17650048434734344, |
|
"learning_rate": 1.9835841313269493e-06, |
|
"loss": 0.0116, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.9025270758122743, |
|
"grad_norm": 0.2514309883117676, |
|
"learning_rate": 1.8467852257181943e-06, |
|
"loss": 0.0149, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.9097472924187726, |
|
"grad_norm": 0.13858270645141602, |
|
"learning_rate": 1.7099863201094393e-06, |
|
"loss": 0.0122, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.916967509025271, |
|
"grad_norm": 0.20119492709636688, |
|
"learning_rate": 1.573187414500684e-06, |
|
"loss": 0.0148, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.9241877256317688, |
|
"grad_norm": 0.2429434359073639, |
|
"learning_rate": 1.4363885088919288e-06, |
|
"loss": 0.0138, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.931407942238267, |
|
"grad_norm": 0.158556267619133, |
|
"learning_rate": 1.2995896032831738e-06, |
|
"loss": 0.0131, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.9386281588447654, |
|
"grad_norm": 0.15908589959144592, |
|
"learning_rate": 1.1627906976744186e-06, |
|
"loss": 0.0122, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.9458483754512637, |
|
"grad_norm": 0.20639561116695404, |
|
"learning_rate": 1.0259917920656636e-06, |
|
"loss": 0.0145, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.953068592057762, |
|
"grad_norm": 0.18711957335472107, |
|
"learning_rate": 8.891928864569083e-07, |
|
"loss": 0.0138, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.96028880866426, |
|
"grad_norm": 0.2037838101387024, |
|
"learning_rate": 7.523939808481532e-07, |
|
"loss": 0.0133, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.967509025270758, |
|
"grad_norm": 0.18255527317523956, |
|
"learning_rate": 6.155950752393981e-07, |
|
"loss": 0.0119, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.9747292418772564, |
|
"grad_norm": 0.21082955598831177, |
|
"learning_rate": 4.78796169630643e-07, |
|
"loss": 0.0145, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.9819494584837543, |
|
"grad_norm": 0.16639681160449982, |
|
"learning_rate": 3.4199726402188785e-07, |
|
"loss": 0.0116, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9891696750902526, |
|
"grad_norm": 0.19572538137435913, |
|
"learning_rate": 2.051983584131327e-07, |
|
"loss": 0.0137, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.996389891696751, |
|
"grad_norm": 0.2220822423696518, |
|
"learning_rate": 6.839945280437757e-08, |
|
"loss": 0.0129, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9919225627188221, |
|
"eval_f1": 0.9047710499602184, |
|
"eval_loss": 0.024072397500276566, |
|
"eval_precision": 0.8898783021668151, |
|
"eval_recall": 0.9201707636931834, |
|
"eval_runtime": 271.2138, |
|
"eval_samples_per_second": 81.522, |
|
"eval_steps_per_second": 1.276, |
|
"step": 4155 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4155, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.9767875544127224e+16, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|