|
{
|
|
"best_metric": 0.10594170403587444,
|
|
"best_model_checkpoint": "d:\\\\whisper-medium-pt-cv18-fleurs2-lr\\checkpoint-5000",
|
|
"epoch": 11.502185415228894,
|
|
"eval_steps": 5000,
|
|
"global_step": 25000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.011502185415228893,
|
|
"grad_norm": 15.794218063354492,
|
|
"learning_rate": 2.875e-08,
|
|
"loss": 0.7429,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.023004370830457786,
|
|
"grad_norm": 37.62531661987305,
|
|
"learning_rate": 5.8750000000000007e-08,
|
|
"loss": 1.3461,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.03450655624568668,
|
|
"grad_norm": 13.57304573059082,
|
|
"learning_rate": 9e-08,
|
|
"loss": 0.7409,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.04600874166091557,
|
|
"grad_norm": 37.35552215576172,
|
|
"learning_rate": 1.2125e-07,
|
|
"loss": 1.2656,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.05751092707614447,
|
|
"grad_norm": 13.972356796264648,
|
|
"learning_rate": 1.5250000000000002e-07,
|
|
"loss": 0.6919,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.06901311249137336,
|
|
"grad_norm": 28.50897789001465,
|
|
"learning_rate": 1.8375000000000001e-07,
|
|
"loss": 1.131,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.08051529790660225,
|
|
"grad_norm": 12.919734954833984,
|
|
"learning_rate": 2.15e-07,
|
|
"loss": 0.5826,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.09201748332183114,
|
|
"grad_norm": 28.706079483032227,
|
|
"learning_rate": 2.4624999999999997e-07,
|
|
"loss": 0.7895,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.10351966873706005,
|
|
"grad_norm": 8.280942916870117,
|
|
"learning_rate": 2.7750000000000004e-07,
|
|
"loss": 0.3197,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.11502185415228894,
|
|
"grad_norm": 23.930252075195312,
|
|
"learning_rate": 3.0875e-07,
|
|
"loss": 0.3855,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.12652403956751782,
|
|
"grad_norm": 6.460832595825195,
|
|
"learning_rate": 3.4e-07,
|
|
"loss": 0.2569,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.13802622498274672,
|
|
"grad_norm": 22.88783836364746,
|
|
"learning_rate": 3.7125000000000005e-07,
|
|
"loss": 0.3504,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.14952841039797563,
|
|
"grad_norm": 7.947082042694092,
|
|
"learning_rate": 4.025e-07,
|
|
"loss": 0.2123,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.1610305958132045,
|
|
"grad_norm": 17.616596221923828,
|
|
"learning_rate": 4.3375000000000003e-07,
|
|
"loss": 0.2902,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.1725327812284334,
|
|
"grad_norm": 4.91463565826416,
|
|
"learning_rate": 4.65e-07,
|
|
"loss": 0.1828,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.18403496664366228,
|
|
"grad_norm": 18.81287956237793,
|
|
"learning_rate": 4.9625e-07,
|
|
"loss": 0.2847,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.1955371520588912,
|
|
"grad_norm": 6.390377044677734,
|
|
"learning_rate": 5.275e-07,
|
|
"loss": 0.2107,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.2070393374741201,
|
|
"grad_norm": 18.839292526245117,
|
|
"learning_rate": 5.587499999999999e-07,
|
|
"loss": 0.234,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.21854152288934897,
|
|
"grad_norm": 6.151549816131592,
|
|
"learning_rate": 5.9e-07,
|
|
"loss": 0.2198,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.23004370830457788,
|
|
"grad_norm": 13.628652572631836,
|
|
"learning_rate": 6.212500000000001e-07,
|
|
"loss": 0.2575,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.24154589371980675,
|
|
"grad_norm": 5.531817436218262,
|
|
"learning_rate": 6.525000000000001e-07,
|
|
"loss": 0.1854,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.25304807913503563,
|
|
"grad_norm": 14.468728065490723,
|
|
"learning_rate": 6.8375e-07,
|
|
"loss": 0.244,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"grad_norm": 6.999868869781494,
|
|
"learning_rate": 7.15e-07,
|
|
"loss": 0.1865,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.27605244996549344,
|
|
"grad_norm": 17.996356964111328,
|
|
"learning_rate": 7.462500000000001e-07,
|
|
"loss": 0.2358,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.28755463538072235,
|
|
"grad_norm": 8.172720909118652,
|
|
"learning_rate": 7.775e-07,
|
|
"loss": 0.168,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.29905682079595125,
|
|
"grad_norm": 16.850492477416992,
|
|
"learning_rate": 8.0875e-07,
|
|
"loss": 0.2229,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.3105590062111801,
|
|
"grad_norm": 6.139720916748047,
|
|
"learning_rate": 8.4e-07,
|
|
"loss": 0.1813,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.322061191626409,
|
|
"grad_norm": 13.691137313842773,
|
|
"learning_rate": 8.7125e-07,
|
|
"loss": 0.2124,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.3335633770416379,
|
|
"grad_norm": 6.144876956939697,
|
|
"learning_rate": 9.025e-07,
|
|
"loss": 0.1865,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.3450655624568668,
|
|
"grad_norm": 19.069873809814453,
|
|
"learning_rate": 9.337500000000001e-07,
|
|
"loss": 0.1917,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.3565677478720957,
|
|
"grad_norm": 8.921222686767578,
|
|
"learning_rate": 9.65e-07,
|
|
"loss": 0.2001,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.36806993328732457,
|
|
"grad_norm": 16.66543197631836,
|
|
"learning_rate": 9.9625e-07,
|
|
"loss": 0.2174,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.3795721187025535,
|
|
"grad_norm": 6.839846134185791,
|
|
"learning_rate": 1.0275e-06,
|
|
"loss": 0.1836,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.3910743041177824,
|
|
"grad_norm": 18.923572540283203,
|
|
"learning_rate": 1.05875e-06,
|
|
"loss": 0.2126,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.4025764895330113,
|
|
"grad_norm": 4.898512840270996,
|
|
"learning_rate": 1.0900000000000002e-06,
|
|
"loss": 0.1661,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.4140786749482402,
|
|
"grad_norm": 14.121219635009766,
|
|
"learning_rate": 1.12125e-06,
|
|
"loss": 0.1825,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.42558086036346904,
|
|
"grad_norm": 7.532533645629883,
|
|
"learning_rate": 1.1525000000000002e-06,
|
|
"loss": 0.1767,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.43708304577869794,
|
|
"grad_norm": 12.991471290588379,
|
|
"learning_rate": 1.18375e-06,
|
|
"loss": 0.1993,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.44858523119392685,
|
|
"grad_norm": 6.528143882751465,
|
|
"learning_rate": 1.215e-06,
|
|
"loss": 0.1772,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.46008741660915575,
|
|
"grad_norm": 15.690600395202637,
|
|
"learning_rate": 1.24625e-06,
|
|
"loss": 0.184,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.47158960202438466,
|
|
"grad_norm": 6.040137767791748,
|
|
"learning_rate": 1.2775e-06,
|
|
"loss": 0.1684,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.4830917874396135,
|
|
"grad_norm": 12.872380256652832,
|
|
"learning_rate": 1.3087500000000002e-06,
|
|
"loss": 0.1934,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.4945939728548424,
|
|
"grad_norm": 6.453334331512451,
|
|
"learning_rate": 1.34e-06,
|
|
"loss": 0.1671,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.5060961582700713,
|
|
"grad_norm": 12.546272277832031,
|
|
"learning_rate": 1.3712500000000002e-06,
|
|
"loss": 0.1901,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5175983436853002,
|
|
"grad_norm": 6.841800689697266,
|
|
"learning_rate": 1.4025e-06,
|
|
"loss": 0.1726,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"grad_norm": 16.96964454650879,
|
|
"learning_rate": 1.43375e-06,
|
|
"loss": 0.2027,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.540602714515758,
|
|
"grad_norm": 5.311614036560059,
|
|
"learning_rate": 1.465e-06,
|
|
"loss": 0.152,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.5521048999309869,
|
|
"grad_norm": 15.524170875549316,
|
|
"learning_rate": 1.49625e-06,
|
|
"loss": 0.1986,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.5636070853462157,
|
|
"grad_norm": 9.683732986450195,
|
|
"learning_rate": 1.5275000000000002e-06,
|
|
"loss": 0.1624,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.5751092707614447,
|
|
"grad_norm": 11.873454093933105,
|
|
"learning_rate": 1.5587500000000001e-06,
|
|
"loss": 0.1875,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.5866114561766735,
|
|
"grad_norm": 5.891846179962158,
|
|
"learning_rate": 1.5900000000000002e-06,
|
|
"loss": 0.1603,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.5981136415919025,
|
|
"grad_norm": 14.153804779052734,
|
|
"learning_rate": 1.6212500000000001e-06,
|
|
"loss": 0.1641,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.6096158270071314,
|
|
"grad_norm": 7.2772955894470215,
|
|
"learning_rate": 1.6525000000000003e-06,
|
|
"loss": 0.154,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.6211180124223602,
|
|
"grad_norm": 14.763301849365234,
|
|
"learning_rate": 1.68375e-06,
|
|
"loss": 0.1745,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.6326201978375892,
|
|
"grad_norm": 8.715161323547363,
|
|
"learning_rate": 1.7149999999999999e-06,
|
|
"loss": 0.1667,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.644122383252818,
|
|
"grad_norm": 11.507594108581543,
|
|
"learning_rate": 1.74625e-06,
|
|
"loss": 0.17,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.655624568668047,
|
|
"grad_norm": 5.30320405960083,
|
|
"learning_rate": 1.7775e-06,
|
|
"loss": 0.1735,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.6671267540832758,
|
|
"grad_norm": 12.242377281188965,
|
|
"learning_rate": 1.80875e-06,
|
|
"loss": 0.1834,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.6786289394985047,
|
|
"grad_norm": 7.7493462562561035,
|
|
"learning_rate": 1.84e-06,
|
|
"loss": 0.1575,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.6901311249137336,
|
|
"grad_norm": 14.08462142944336,
|
|
"learning_rate": 1.87125e-06,
|
|
"loss": 0.1818,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7016333103289625,
|
|
"grad_norm": 5.448755264282227,
|
|
"learning_rate": 1.9025000000000002e-06,
|
|
"loss": 0.1483,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.7131354957441914,
|
|
"grad_norm": 14.599383354187012,
|
|
"learning_rate": 1.9337500000000003e-06,
|
|
"loss": 0.1877,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.7246376811594203,
|
|
"grad_norm": 6.832576274871826,
|
|
"learning_rate": 1.9650000000000002e-06,
|
|
"loss": 0.1639,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.7361398665746491,
|
|
"grad_norm": 11.764741897583008,
|
|
"learning_rate": 1.99625e-06,
|
|
"loss": 0.1887,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.7476420519898781,
|
|
"grad_norm": 5.073885917663574,
|
|
"learning_rate": 2.0275e-06,
|
|
"loss": 0.1449,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.759144237405107,
|
|
"grad_norm": 11.470847129821777,
|
|
"learning_rate": 2.0587500000000004e-06,
|
|
"loss": 0.1776,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.7706464228203359,
|
|
"grad_norm": 8.333059310913086,
|
|
"learning_rate": 2.09e-06,
|
|
"loss": 0.1572,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.7821486082355648,
|
|
"grad_norm": 10.23344612121582,
|
|
"learning_rate": 2.12125e-06,
|
|
"loss": 0.2047,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 7.107337951660156,
|
|
"learning_rate": 2.1525e-06,
|
|
"loss": 0.1635,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.8051529790660226,
|
|
"grad_norm": 12.670259475708008,
|
|
"learning_rate": 2.18375e-06,
|
|
"loss": 0.1784,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.8166551644812514,
|
|
"grad_norm": 6.77697229385376,
|
|
"learning_rate": 2.215e-06,
|
|
"loss": 0.1526,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.8281573498964804,
|
|
"grad_norm": 13.887433052062988,
|
|
"learning_rate": 2.24625e-06,
|
|
"loss": 0.1897,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.8396595353117092,
|
|
"grad_norm": 4.762125492095947,
|
|
"learning_rate": 2.2775000000000002e-06,
|
|
"loss": 0.1464,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.8511617207269381,
|
|
"grad_norm": 19.292552947998047,
|
|
"learning_rate": 2.30875e-06,
|
|
"loss": 0.174,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.862663906142167,
|
|
"grad_norm": 7.12981653213501,
|
|
"learning_rate": 2.34e-06,
|
|
"loss": 0.1602,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.8741660915573959,
|
|
"grad_norm": 9.92496395111084,
|
|
"learning_rate": 2.3712500000000004e-06,
|
|
"loss": 0.1571,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.8856682769726248,
|
|
"grad_norm": 6.081151008605957,
|
|
"learning_rate": 2.4025000000000003e-06,
|
|
"loss": 0.1608,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.8971704623878537,
|
|
"grad_norm": 15.237141609191895,
|
|
"learning_rate": 2.43375e-06,
|
|
"loss": 0.1634,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.9086726478030825,
|
|
"grad_norm": 6.994187831878662,
|
|
"learning_rate": 2.465e-06,
|
|
"loss": 0.1531,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.9201748332183115,
|
|
"grad_norm": 16.033370971679688,
|
|
"learning_rate": 2.49625e-06,
|
|
"loss": 0.1583,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.9316770186335404,
|
|
"grad_norm": 6.178311824798584,
|
|
"learning_rate": 2.5275e-06,
|
|
"loss": 0.1502,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.9431792040487693,
|
|
"grad_norm": 10.050224304199219,
|
|
"learning_rate": 2.55875e-06,
|
|
"loss": 0.1763,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.9546813894639982,
|
|
"grad_norm": 5.254738807678223,
|
|
"learning_rate": 2.59e-06,
|
|
"loss": 0.1453,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.966183574879227,
|
|
"grad_norm": 13.425460815429688,
|
|
"learning_rate": 2.62125e-06,
|
|
"loss": 0.1536,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.977685760294456,
|
|
"grad_norm": 4.7515459060668945,
|
|
"learning_rate": 2.6525e-06,
|
|
"loss": 0.1579,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.9891879457096848,
|
|
"grad_norm": 19.656898498535156,
|
|
"learning_rate": 2.6837500000000004e-06,
|
|
"loss": 0.1984,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.0006901311249137,
|
|
"grad_norm": 2.677222728729248,
|
|
"learning_rate": 2.7150000000000003e-06,
|
|
"loss": 0.1498,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.0121923165401425,
|
|
"grad_norm": 5.08892297744751,
|
|
"learning_rate": 2.74625e-06,
|
|
"loss": 0.1184,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.0236945019553716,
|
|
"grad_norm": 3.9588983058929443,
|
|
"learning_rate": 2.7775e-06,
|
|
"loss": 0.1467,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.0351966873706004,
|
|
"grad_norm": 11.414639472961426,
|
|
"learning_rate": 2.8087500000000004e-06,
|
|
"loss": 0.1113,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.0466988727858293,
|
|
"grad_norm": 2.633138418197632,
|
|
"learning_rate": 2.8400000000000003e-06,
|
|
"loss": 0.1353,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.0582010582010581,
|
|
"grad_norm": 5.479538917541504,
|
|
"learning_rate": 2.87125e-06,
|
|
"loss": 0.1261,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.069703243616287,
|
|
"grad_norm": 3.630627393722534,
|
|
"learning_rate": 2.9025e-06,
|
|
"loss": 0.1227,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.081205429031516,
|
|
"grad_norm": 8.805641174316406,
|
|
"learning_rate": 2.93375e-06,
|
|
"loss": 0.1229,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.092707614446745,
|
|
"grad_norm": 3.6392369270324707,
|
|
"learning_rate": 2.965e-06,
|
|
"loss": 0.1476,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 1.1042097998619738,
|
|
"grad_norm": 4.040417194366455,
|
|
"learning_rate": 2.99625e-06,
|
|
"loss": 0.1146,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.1157119852772026,
|
|
"grad_norm": 2.933556318283081,
|
|
"learning_rate": 3.0275000000000002e-06,
|
|
"loss": 0.1226,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 1.1272141706924317,
|
|
"grad_norm": 3.0352232456207275,
|
|
"learning_rate": 3.05875e-06,
|
|
"loss": 0.1224,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.1387163561076605,
|
|
"grad_norm": 3.9333884716033936,
|
|
"learning_rate": 3.09e-06,
|
|
"loss": 0.116,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 1.1502185415228894,
|
|
"grad_norm": 4.144917011260986,
|
|
"learning_rate": 3.1212500000000004e-06,
|
|
"loss": 0.1101,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 1.1617207269381182,
|
|
"grad_norm": 3.215536117553711,
|
|
"learning_rate": 3.1525e-06,
|
|
"loss": 0.1464,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 1.173222912353347,
|
|
"grad_norm": 10.1427640914917,
|
|
"learning_rate": 3.18375e-06,
|
|
"loss": 0.122,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 1.1847250977685762,
|
|
"grad_norm": 2.707651138305664,
|
|
"learning_rate": 3.215e-06,
|
|
"loss": 0.129,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 1.196227283183805,
|
|
"grad_norm": 4.589003562927246,
|
|
"learning_rate": 3.24625e-06,
|
|
"loss": 0.1083,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 1.2077294685990339,
|
|
"grad_norm": 2.6789445877075195,
|
|
"learning_rate": 3.2775e-06,
|
|
"loss": 0.1446,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 1.2192316540142627,
|
|
"grad_norm": 7.148416519165039,
|
|
"learning_rate": 3.30875e-06,
|
|
"loss": 0.1153,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 1.2307338394294915,
|
|
"grad_norm": 2.8945469856262207,
|
|
"learning_rate": 3.34e-06,
|
|
"loss": 0.1267,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 1.2422360248447206,
|
|
"grad_norm": 7.540188312530518,
|
|
"learning_rate": 3.37125e-06,
|
|
"loss": 0.1106,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 1.2537382102599495,
|
|
"grad_norm": 3.4765818119049072,
|
|
"learning_rate": 3.4025e-06,
|
|
"loss": 0.1377,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 1.2652403956751783,
|
|
"grad_norm": 5.349803447723389,
|
|
"learning_rate": 3.4337500000000004e-06,
|
|
"loss": 0.0954,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 1.2767425810904072,
|
|
"grad_norm": 2.526627779006958,
|
|
"learning_rate": 3.4650000000000003e-06,
|
|
"loss": 0.1224,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 1.288244766505636,
|
|
"grad_norm": 6.571626663208008,
|
|
"learning_rate": 3.49625e-06,
|
|
"loss": 0.118,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 1.299746951920865,
|
|
"grad_norm": 2.319915533065796,
|
|
"learning_rate": 3.5275e-06,
|
|
"loss": 0.1581,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 1.311249137336094,
|
|
"grad_norm": 3.2540760040283203,
|
|
"learning_rate": 3.5587500000000004e-06,
|
|
"loss": 0.1196,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 1.3227513227513228,
|
|
"grad_norm": 3.912529706954956,
|
|
"learning_rate": 3.5900000000000004e-06,
|
|
"loss": 0.146,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 1.3342535081665516,
|
|
"grad_norm": 3.1499977111816406,
|
|
"learning_rate": 3.6212500000000003e-06,
|
|
"loss": 0.1158,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 1.3457556935817805,
|
|
"grad_norm": 3.1882896423339844,
|
|
"learning_rate": 3.6525e-06,
|
|
"loss": 0.1517,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 1.3572578789970096,
|
|
"grad_norm": 5.0317888259887695,
|
|
"learning_rate": 3.6837500000000005e-06,
|
|
"loss": 0.1129,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 1.3687600644122382,
|
|
"grad_norm": 3.961643695831299,
|
|
"learning_rate": 3.7150000000000004e-06,
|
|
"loss": 0.1233,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 1.3802622498274673,
|
|
"grad_norm": 5.409346580505371,
|
|
"learning_rate": 3.7462500000000003e-06,
|
|
"loss": 0.1302,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 1.391764435242696,
|
|
"grad_norm": 3.978931427001953,
|
|
"learning_rate": 3.7775000000000007e-06,
|
|
"loss": 0.1361,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 1.403266620657925,
|
|
"grad_norm": 6.479454040527344,
|
|
"learning_rate": 3.8087500000000006e-06,
|
|
"loss": 0.1167,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 1.414768806073154,
|
|
"grad_norm": 3.1381306648254395,
|
|
"learning_rate": 3.84e-06,
|
|
"loss": 0.1199,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 1.4262709914883827,
|
|
"grad_norm": 5.639588832855225,
|
|
"learning_rate": 3.8712499999999996e-06,
|
|
"loss": 0.0967,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 1.4377731769036117,
|
|
"grad_norm": 3.615877866744995,
|
|
"learning_rate": 3.9025e-06,
|
|
"loss": 0.1334,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 1.4492753623188406,
|
|
"grad_norm": 5.772467136383057,
|
|
"learning_rate": 3.93375e-06,
|
|
"loss": 0.114,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 1.4607775477340694,
|
|
"grad_norm": 3.489830255508423,
|
|
"learning_rate": 3.965e-06,
|
|
"loss": 0.1421,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 1.4722797331492985,
|
|
"grad_norm": 6.027266502380371,
|
|
"learning_rate": 3.99625e-06,
|
|
"loss": 0.1212,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 1.4837819185645271,
|
|
"grad_norm": 3.047349452972412,
|
|
"learning_rate": 4.0275e-06,
|
|
"loss": 0.122,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 1.4952841039797562,
|
|
"grad_norm": 7.183162689208984,
|
|
"learning_rate": 4.05875e-06,
|
|
"loss": 0.1265,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 1.506786289394985,
|
|
"grad_norm": 3.0059525966644287,
|
|
"learning_rate": 4.09e-06,
|
|
"loss": 0.1468,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 1.518288474810214,
|
|
"grad_norm": 7.402144908905029,
|
|
"learning_rate": 4.12125e-06,
|
|
"loss": 0.1156,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 1.529790660225443,
|
|
"grad_norm": 3.278670072555542,
|
|
"learning_rate": 4.1525000000000005e-06,
|
|
"loss": 0.1201,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 1.5412928456406716,
|
|
"grad_norm": 5.25584602355957,
|
|
"learning_rate": 4.18375e-06,
|
|
"loss": 0.0967,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 1.5527950310559007,
|
|
"grad_norm": 4.171654224395752,
|
|
"learning_rate": 4.215e-06,
|
|
"loss": 0.1219,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 1.5642972164711295,
|
|
"grad_norm": 4.809912204742432,
|
|
"learning_rate": 4.24625e-06,
|
|
"loss": 0.1082,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 1.5757994018863584,
|
|
"grad_norm": 4.222274303436279,
|
|
"learning_rate": 4.2775e-06,
|
|
"loss": 0.1219,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 1.5873015873015874,
|
|
"grad_norm": 7.9530792236328125,
|
|
"learning_rate": 4.30875e-06,
|
|
"loss": 0.1228,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 1.598803772716816,
|
|
"grad_norm": 3.8543384075164795,
|
|
"learning_rate": 4.34e-06,
|
|
"loss": 0.1422,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 1.6103059581320451,
|
|
"grad_norm": 7.819809436798096,
|
|
"learning_rate": 4.371250000000001e-06,
|
|
"loss": 0.1306,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 1.621808143547274,
|
|
"grad_norm": 3.5284693241119385,
|
|
"learning_rate": 4.402500000000001e-06,
|
|
"loss": 0.1196,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 1.6333103289625028,
|
|
"grad_norm": 8.282682418823242,
|
|
"learning_rate": 4.4337500000000005e-06,
|
|
"loss": 0.102,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 1.644812514377732,
|
|
"grad_norm": 4.162339210510254,
|
|
"learning_rate": 4.4650000000000004e-06,
|
|
"loss": 0.1474,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 1.6563146997929605,
|
|
"grad_norm": 6.563460350036621,
|
|
"learning_rate": 4.49625e-06,
|
|
"loss": 0.1218,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 1.6678168852081896,
|
|
"grad_norm": 3.1366829872131348,
|
|
"learning_rate": 4.5275e-06,
|
|
"loss": 0.1332,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 1.6793190706234185,
|
|
"grad_norm": 6.515697956085205,
|
|
"learning_rate": 4.55875e-06,
|
|
"loss": 0.1108,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 1.6908212560386473,
|
|
"grad_norm": 3.636465072631836,
|
|
"learning_rate": 4.590000000000001e-06,
|
|
"loss": 0.1126,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 1.7023234414538764,
|
|
"grad_norm": 5.450216770172119,
|
|
"learning_rate": 4.62125e-06,
|
|
"loss": 0.1014,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 1.713825626869105,
|
|
"grad_norm": 3.8343234062194824,
|
|
"learning_rate": 4.6525e-06,
|
|
"loss": 0.147,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 1.725327812284334,
|
|
"grad_norm": 6.672384738922119,
|
|
"learning_rate": 4.68375e-06,
|
|
"loss": 0.1196,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 1.736829997699563,
|
|
"grad_norm": 3.536450147628784,
|
|
"learning_rate": 4.715e-06,
|
|
"loss": 0.1546,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 1.7483321831147918,
|
|
"grad_norm": 4.106471538543701,
|
|
"learning_rate": 4.74625e-06,
|
|
"loss": 0.1016,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 1.7598343685300208,
|
|
"grad_norm": 3.1923904418945312,
|
|
"learning_rate": 4.7775e-06,
|
|
"loss": 0.1414,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 1.7713365539452495,
|
|
"grad_norm": 5.6106157302856445,
|
|
"learning_rate": 4.80875e-06,
|
|
"loss": 0.1113,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 1.7828387393604785,
|
|
"grad_norm": 3.33258056640625,
|
|
"learning_rate": 4.84e-06,
|
|
"loss": 0.1228,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 1.7943409247757074,
|
|
"grad_norm": 4.954050064086914,
|
|
"learning_rate": 4.87125e-06,
|
|
"loss": 0.1204,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 1.8058431101909362,
|
|
"grad_norm": 3.758305072784424,
|
|
"learning_rate": 4.9025e-06,
|
|
"loss": 0.1226,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 1.8173452956061653,
|
|
"grad_norm": 5.375064373016357,
|
|
"learning_rate": 4.93375e-06,
|
|
"loss": 0.1344,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 1.828847481021394,
|
|
"grad_norm": 3.2198784351348877,
|
|
"learning_rate": 4.965e-06,
|
|
"loss": 0.1352,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 1.840349666436623,
|
|
"grad_norm": 6.347688674926758,
|
|
"learning_rate": 4.996250000000001e-06,
|
|
"loss": 0.1218,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 1.8518518518518519,
|
|
"grad_norm": 2.7024085521698,
|
|
"learning_rate": 5.0275000000000006e-06,
|
|
"loss": 0.1361,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 1.8633540372670807,
|
|
"grad_norm": 4.011370658874512,
|
|
"learning_rate": 5.0587500000000005e-06,
|
|
"loss": 0.1191,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 1.8748562226823098,
|
|
"grad_norm": 3.9904325008392334,
|
|
"learning_rate": 5.09e-06,
|
|
"loss": 0.1446,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 1.8863584080975384,
|
|
"grad_norm": 9.331755638122559,
|
|
"learning_rate": 5.12125e-06,
|
|
"loss": 0.1054,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 1.8978605935127675,
|
|
"grad_norm": 3.661421060562134,
|
|
"learning_rate": 5.151250000000001e-06,
|
|
"loss": 0.1424,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 1.9093627789279963,
|
|
"grad_norm": 5.570228576660156,
|
|
"learning_rate": 5.182500000000001e-06,
|
|
"loss": 0.1106,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 1.9208649643432252,
|
|
"grad_norm": 2.8392717838287354,
|
|
"learning_rate": 5.213750000000001e-06,
|
|
"loss": 0.1464,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 1.9323671497584543,
|
|
"grad_norm": 3.853571891784668,
|
|
"learning_rate": 5.245e-06,
|
|
"loss": 0.1287,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 1.9438693351736829,
|
|
"grad_norm": 3.817902088165283,
|
|
"learning_rate": 5.27625e-06,
|
|
"loss": 0.1274,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 1.955371520588912,
|
|
"grad_norm": 7.2849297523498535,
|
|
"learning_rate": 5.3075e-06,
|
|
"loss": 0.1192,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 1.9668737060041408,
|
|
"grad_norm": 3.0766196250915527,
|
|
"learning_rate": 5.33875e-06,
|
|
"loss": 0.1546,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 1.9783758914193696,
|
|
"grad_norm": 4.272324085235596,
|
|
"learning_rate": 5.37e-06,
|
|
"loss": 0.1152,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 1.9898780768345987,
|
|
"grad_norm": 3.574113607406616,
|
|
"learning_rate": 5.40125e-06,
|
|
"loss": 0.1537,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 2.0013802622498273,
|
|
"grad_norm": 3.3172659873962402,
|
|
"learning_rate": 5.4325e-06,
|
|
"loss": 0.1174,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 2.0128824476650564,
|
|
"grad_norm": 9.897499084472656,
|
|
"learning_rate": 5.46375e-06,
|
|
"loss": 0.0623,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 2.024384633080285,
|
|
"grad_norm": 2.7083864212036133,
|
|
"learning_rate": 5.495e-06,
|
|
"loss": 0.0892,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 2.035886818495514,
|
|
"grad_norm": 4.11065149307251,
|
|
"learning_rate": 5.52625e-06,
|
|
"loss": 0.0634,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 2.047389003910743,
|
|
"grad_norm": 2.104963779449463,
|
|
"learning_rate": 5.557500000000001e-06,
|
|
"loss": 0.0661,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 2.058891189325972,
|
|
"grad_norm": 3.848796844482422,
|
|
"learning_rate": 5.5887500000000005e-06,
|
|
"loss": 0.0586,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 2.070393374741201,
|
|
"grad_norm": 2.1717464923858643,
|
|
"learning_rate": 5.62e-06,
|
|
"loss": 0.0773,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 2.0818955601564295,
|
|
"grad_norm": 4.033133506774902,
|
|
"learning_rate": 5.65125e-06,
|
|
"loss": 0.0584,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 2.0933977455716586,
|
|
"grad_norm": 2.5643622875213623,
|
|
"learning_rate": 5.6825e-06,
|
|
"loss": 0.0776,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 2.1048999309868877,
|
|
"grad_norm": 6.559327602386475,
|
|
"learning_rate": 5.71375e-06,
|
|
"loss": 0.0643,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 2.1164021164021163,
|
|
"grad_norm": 2.1577560901641846,
|
|
"learning_rate": 5.745e-06,
|
|
"loss": 0.0687,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 2.1279043018173454,
|
|
"grad_norm": 8.438713073730469,
|
|
"learning_rate": 5.776250000000001e-06,
|
|
"loss": 0.072,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 2.139406487232574,
|
|
"grad_norm": 2.4797635078430176,
|
|
"learning_rate": 5.807500000000001e-06,
|
|
"loss": 0.087,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 2.150908672647803,
|
|
"grad_norm": 2.543196201324463,
|
|
"learning_rate": 5.838750000000001e-06,
|
|
"loss": 0.0586,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 2.162410858063032,
|
|
"grad_norm": 2.4707229137420654,
|
|
"learning_rate": 5.8700000000000005e-06,
|
|
"loss": 0.0723,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 5.645440101623535,
|
|
"learning_rate": 5.9012500000000005e-06,
|
|
"loss": 0.0654,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 2.18541522889349,
|
|
"grad_norm": 2.701606512069702,
|
|
"learning_rate": 5.9325e-06,
|
|
"loss": 0.0846,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 2.1969174143087185,
|
|
"grad_norm": 3.8023571968078613,
|
|
"learning_rate": 5.96375e-06,
|
|
"loss": 0.0581,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 2.2084195997239475,
|
|
"grad_norm": 1.9498426914215088,
|
|
"learning_rate": 5.995e-06,
|
|
"loss": 0.0911,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 2.2199217851391766,
|
|
"grad_norm": 2.360180139541626,
|
|
"learning_rate": 6.02625e-06,
|
|
"loss": 0.0728,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 2.2314239705544052,
|
|
"grad_norm": 2.91253924369812,
|
|
"learning_rate": 6.0575e-06,
|
|
"loss": 0.086,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 2.2429261559696343,
|
|
"grad_norm": 4.982974052429199,
|
|
"learning_rate": 6.08875e-06,
|
|
"loss": 0.071,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 2.2544283413848634,
|
|
"grad_norm": 2.393528461456299,
|
|
"learning_rate": 6.12e-06,
|
|
"loss": 0.0889,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 2.265930526800092,
|
|
"grad_norm": 3.294156312942505,
|
|
"learning_rate": 6.15125e-06,
|
|
"loss": 0.0659,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 2.277432712215321,
|
|
"grad_norm": 2.5716331005096436,
|
|
"learning_rate": 6.1825e-06,
|
|
"loss": 0.072,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 2.2889348976305497,
|
|
"grad_norm": 5.017734527587891,
|
|
"learning_rate": 6.2137500000000004e-06,
|
|
"loss": 0.0742,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 2.3004370830457788,
|
|
"grad_norm": 2.866231679916382,
|
|
"learning_rate": 6.245e-06,
|
|
"loss": 0.0876,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 2.3004370830457788,
|
|
"eval_loss": 0.16616719961166382,
|
|
"eval_runtime": 5341.8191,
|
|
"eval_samples_per_second": 1.777,
|
|
"eval_steps_per_second": 0.222,
|
|
"eval_wer": 0.10594170403587444,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 2.311939268461008,
|
|
"grad_norm": 6.034395694732666,
|
|
"learning_rate": 6.2434375e-06,
|
|
"loss": 0.0644,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 2.3234414538762365,
|
|
"grad_norm": 2.2458648681640625,
|
|
"learning_rate": 6.235625e-06,
|
|
"loss": 0.0776,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 2.3349436392914655,
|
|
"grad_norm": 4.230370998382568,
|
|
"learning_rate": 6.2278125e-06,
|
|
"loss": 0.0663,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 2.346445824706694,
|
|
"grad_norm": 2.9568865299224854,
|
|
"learning_rate": 6.22e-06,
|
|
"loss": 0.0908,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 2.3579480101219232,
|
|
"grad_norm": 3.1515731811523438,
|
|
"learning_rate": 6.2121875e-06,
|
|
"loss": 0.0767,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 2.3694501955371523,
|
|
"grad_norm": 3.660957098007202,
|
|
"learning_rate": 6.204375e-06,
|
|
"loss": 0.08,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 3.4517126083374023,
|
|
"learning_rate": 6.196562500000001e-06,
|
|
"loss": 0.0715,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 2.39245456636761,
|
|
"grad_norm": 3.5403709411621094,
|
|
"learning_rate": 6.18875e-06,
|
|
"loss": 0.0749,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 2.4039567517828386,
|
|
"grad_norm": 4.9767866134643555,
|
|
"learning_rate": 6.1809375000000005e-06,
|
|
"loss": 0.0757,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 2.4154589371980677,
|
|
"grad_norm": 3.120891809463501,
|
|
"learning_rate": 6.173125e-06,
|
|
"loss": 0.0968,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 2.4269611226132968,
|
|
"grad_norm": 3.43932843208313,
|
|
"learning_rate": 6.165312500000001e-06,
|
|
"loss": 0.0724,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 2.4384633080285254,
|
|
"grad_norm": 3.4927871227264404,
|
|
"learning_rate": 6.1575e-06,
|
|
"loss": 0.0831,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 2.4499654934437545,
|
|
"grad_norm": 3.009047746658325,
|
|
"learning_rate": 6.1496875000000006e-06,
|
|
"loss": 0.0695,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 2.461467678858983,
|
|
"grad_norm": 3.073551654815674,
|
|
"learning_rate": 6.141875e-06,
|
|
"loss": 0.0893,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 2.472969864274212,
|
|
"grad_norm": 5.566808223724365,
|
|
"learning_rate": 6.1340625e-06,
|
|
"loss": 0.0743,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 2.4844720496894412,
|
|
"grad_norm": 2.416825771331787,
|
|
"learning_rate": 6.12625e-06,
|
|
"loss": 0.079,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 2.49597423510467,
|
|
"grad_norm": 4.4972357749938965,
|
|
"learning_rate": 6.1184375e-06,
|
|
"loss": 0.0712,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 2.507476420519899,
|
|
"grad_norm": 3.5067849159240723,
|
|
"learning_rate": 6.1106250000000005e-06,
|
|
"loss": 0.0883,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 2.5189786059351276,
|
|
"grad_norm": 4.83007287979126,
|
|
"learning_rate": 6.1028125e-06,
|
|
"loss": 0.0775,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 2.5304807913503566,
|
|
"grad_norm": 3.0138561725616455,
|
|
"learning_rate": 6.095e-06,
|
|
"loss": 0.0814,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 2.5419829767655857,
|
|
"grad_norm": 5.821829795837402,
|
|
"learning_rate": 6.0871875e-06,
|
|
"loss": 0.0632,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 2.5534851621808143,
|
|
"grad_norm": 2.7620084285736084,
|
|
"learning_rate": 6.0793750000000006e-06,
|
|
"loss": 0.0789,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 2.5649873475960434,
|
|
"grad_norm": 5.065167427062988,
|
|
"learning_rate": 6.0715625e-06,
|
|
"loss": 0.0758,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 2.576489533011272,
|
|
"grad_norm": 1.777954339981079,
|
|
"learning_rate": 6.06375e-06,
|
|
"loss": 0.0881,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 2.587991718426501,
|
|
"grad_norm": 4.819468975067139,
|
|
"learning_rate": 6.0559375e-06,
|
|
"loss": 0.0715,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 2.59949390384173,
|
|
"grad_norm": 2.716526985168457,
|
|
"learning_rate": 6.048125000000001e-06,
|
|
"loss": 0.0912,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 2.610996089256959,
|
|
"grad_norm": 4.544143199920654,
|
|
"learning_rate": 6.0403125000000005e-06,
|
|
"loss": 0.0689,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 2.622498274672188,
|
|
"grad_norm": 2.0918431282043457,
|
|
"learning_rate": 6.0325e-06,
|
|
"loss": 0.0832,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 2.6340004600874165,
|
|
"grad_norm": 4.814356803894043,
|
|
"learning_rate": 6.0246875e-06,
|
|
"loss": 0.073,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 2.6455026455026456,
|
|
"grad_norm": 3.759373664855957,
|
|
"learning_rate": 6.016875e-06,
|
|
"loss": 0.0803,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 2.6570048309178746,
|
|
"grad_norm": 3.2967991828918457,
|
|
"learning_rate": 6.0090625000000005e-06,
|
|
"loss": 0.0767,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 2.6685070163331033,
|
|
"grad_norm": 4.567154884338379,
|
|
"learning_rate": 6.00125e-06,
|
|
"loss": 0.079,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 2.6800092017483323,
|
|
"grad_norm": 3.424586534500122,
|
|
"learning_rate": 5.9934375e-06,
|
|
"loss": 0.0761,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 2.691511387163561,
|
|
"grad_norm": 2.420856475830078,
|
|
"learning_rate": 5.985625e-06,
|
|
"loss": 0.0848,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 2.70301357257879,
|
|
"grad_norm": 6.956820487976074,
|
|
"learning_rate": 5.977812500000001e-06,
|
|
"loss": 0.0702,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 2.714515757994019,
|
|
"grad_norm": 2.5272533893585205,
|
|
"learning_rate": 5.9700000000000004e-06,
|
|
"loss": 0.0842,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 2.7260179434092477,
|
|
"grad_norm": 5.917661190032959,
|
|
"learning_rate": 5.9621875e-06,
|
|
"loss": 0.081,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 2.7375201288244764,
|
|
"grad_norm": 2.5169830322265625,
|
|
"learning_rate": 5.954375e-06,
|
|
"loss": 0.0929,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 2.7490223142397054,
|
|
"grad_norm": 8.81894588470459,
|
|
"learning_rate": 5.946562500000001e-06,
|
|
"loss": 0.0764,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 2.7605244996549345,
|
|
"grad_norm": 3.4220263957977295,
|
|
"learning_rate": 5.9387500000000005e-06,
|
|
"loss": 0.0821,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 2.7720266850701636,
|
|
"grad_norm": 3.72196626663208,
|
|
"learning_rate": 5.9309375e-06,
|
|
"loss": 0.0773,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 2.783528870485392,
|
|
"grad_norm": 3.5996947288513184,
|
|
"learning_rate": 5.923125e-06,
|
|
"loss": 0.0952,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 2.795031055900621,
|
|
"grad_norm": 3.508704423904419,
|
|
"learning_rate": 5.9153125e-06,
|
|
"loss": 0.0677,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 2.80653324131585,
|
|
"grad_norm": 3.768465042114258,
|
|
"learning_rate": 5.907500000000001e-06,
|
|
"loss": 0.0903,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 2.818035426731079,
|
|
"grad_norm": 7.676156997680664,
|
|
"learning_rate": 5.8996875000000004e-06,
|
|
"loss": 0.0889,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 2.829537612146308,
|
|
"grad_norm": 4.185784816741943,
|
|
"learning_rate": 5.891875e-06,
|
|
"loss": 0.0901,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 2.8410397975615367,
|
|
"grad_norm": 5.07861328125,
|
|
"learning_rate": 5.8840625e-06,
|
|
"loss": 0.0736,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 2.8525419829767653,
|
|
"grad_norm": 2.5589280128479004,
|
|
"learning_rate": 5.876250000000001e-06,
|
|
"loss": 0.0726,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 2.8640441683919944,
|
|
"grad_norm": 5.522654056549072,
|
|
"learning_rate": 5.8684375e-06,
|
|
"loss": 0.081,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 2.8755463538072235,
|
|
"grad_norm": 2.873734474182129,
|
|
"learning_rate": 5.860625e-06,
|
|
"loss": 0.0934,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 2.8870485392224525,
|
|
"grad_norm": 4.135101318359375,
|
|
"learning_rate": 5.8528125e-06,
|
|
"loss": 0.0691,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 2.898550724637681,
|
|
"grad_norm": 3.2022476196289062,
|
|
"learning_rate": 5.845312500000001e-06,
|
|
"loss": 0.0876,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 2.91005291005291,
|
|
"grad_norm": 4.878911018371582,
|
|
"learning_rate": 5.8375000000000004e-06,
|
|
"loss": 0.0784,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 2.921555095468139,
|
|
"grad_norm": 2.7555994987487793,
|
|
"learning_rate": 5.8296875e-06,
|
|
"loss": 0.0932,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 2.933057280883368,
|
|
"grad_norm": 4.3659257888793945,
|
|
"learning_rate": 5.821875e-06,
|
|
"loss": 0.0678,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 2.944559466298597,
|
|
"grad_norm": 2.2182586193084717,
|
|
"learning_rate": 5.814062500000001e-06,
|
|
"loss": 0.0832,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 2.9560616517138256,
|
|
"grad_norm": 4.891880989074707,
|
|
"learning_rate": 5.8062500000000005e-06,
|
|
"loss": 0.077,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 2.9675638371290542,
|
|
"grad_norm": 3.2281267642974854,
|
|
"learning_rate": 5.7984375e-06,
|
|
"loss": 0.0862,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 2.9790660225442833,
|
|
"grad_norm": 4.891918659210205,
|
|
"learning_rate": 5.790625e-06,
|
|
"loss": 0.0713,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 2.9905682079595124,
|
|
"grad_norm": 3.6200342178344727,
|
|
"learning_rate": 5.782812500000001e-06,
|
|
"loss": 0.1053,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 3.002070393374741,
|
|
"grad_norm": 2.0583813190460205,
|
|
"learning_rate": 5.775000000000001e-06,
|
|
"loss": 0.0679,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 3.01357257878997,
|
|
"grad_norm": 4.742440700531006,
|
|
"learning_rate": 5.7671875e-06,
|
|
"loss": 0.0332,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 3.025074764205199,
|
|
"grad_norm": 1.5032464265823364,
|
|
"learning_rate": 5.759375e-06,
|
|
"loss": 0.0537,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 3.036576949620428,
|
|
"grad_norm": 2.877703905105591,
|
|
"learning_rate": 5.7515625e-06,
|
|
"loss": 0.0373,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 3.048079135035657,
|
|
"grad_norm": 2.60418963432312,
|
|
"learning_rate": 5.743750000000001e-06,
|
|
"loss": 0.0441,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 3.0595813204508855,
|
|
"grad_norm": 1.8395668268203735,
|
|
"learning_rate": 5.7359375e-06,
|
|
"loss": 0.0348,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 3.0710835058661146,
|
|
"grad_norm": 2.9202868938446045,
|
|
"learning_rate": 5.728125e-06,
|
|
"loss": 0.0534,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 3.0825856912813436,
|
|
"grad_norm": 2.7743561267852783,
|
|
"learning_rate": 5.7203125e-06,
|
|
"loss": 0.0353,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 3.0940878766965723,
|
|
"grad_norm": 3.1167683601379395,
|
|
"learning_rate": 5.712500000000001e-06,
|
|
"loss": 0.0505,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 3.1055900621118013,
|
|
"grad_norm": 4.696991920471191,
|
|
"learning_rate": 5.7046875e-06,
|
|
"loss": 0.0321,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 3.11709224752703,
|
|
"grad_norm": 1.5438411235809326,
|
|
"learning_rate": 5.696875e-06,
|
|
"loss": 0.0499,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 3.128594432942259,
|
|
"grad_norm": 6.239833831787109,
|
|
"learning_rate": 5.6890625e-06,
|
|
"loss": 0.0369,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 3.140096618357488,
|
|
"grad_norm": 4.79664421081543,
|
|
"learning_rate": 5.681250000000001e-06,
|
|
"loss": 0.0541,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 3.1515988037727167,
|
|
"grad_norm": 3.2525932788848877,
|
|
"learning_rate": 5.6734375e-06,
|
|
"loss": 0.0359,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 3.163100989187946,
|
|
"grad_norm": 2.3709487915039062,
|
|
"learning_rate": 5.6656250000000005e-06,
|
|
"loss": 0.0464,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 3.1746031746031744,
|
|
"grad_norm": 3.340402126312256,
|
|
"learning_rate": 5.6578125e-06,
|
|
"loss": 0.0331,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 3.1861053600184035,
|
|
"grad_norm": 2.4604740142822266,
|
|
"learning_rate": 5.65e-06,
|
|
"loss": 0.0578,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 3.1976075454336326,
|
|
"grad_norm": 4.909114837646484,
|
|
"learning_rate": 5.642187500000001e-06,
|
|
"loss": 0.0382,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 3.209109730848861,
|
|
"grad_norm": 4.222381591796875,
|
|
"learning_rate": 5.634375e-06,
|
|
"loss": 0.0573,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 3.2206119162640903,
|
|
"grad_norm": 3.5466387271881104,
|
|
"learning_rate": 5.6265625e-06,
|
|
"loss": 0.0355,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 3.232114101679319,
|
|
"grad_norm": 4.068739891052246,
|
|
"learning_rate": 5.61875e-06,
|
|
"loss": 0.0524,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 3.243616287094548,
|
|
"grad_norm": 3.5331459045410156,
|
|
"learning_rate": 5.610937500000001e-06,
|
|
"loss": 0.0388,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 3.255118472509777,
|
|
"grad_norm": 5.634138107299805,
|
|
"learning_rate": 5.603125e-06,
|
|
"loss": 0.0476,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 3.2666206579250057,
|
|
"grad_norm": 4.637297630310059,
|
|
"learning_rate": 5.5953125000000005e-06,
|
|
"loss": 0.038,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 3.2781228433402347,
|
|
"grad_norm": 2.1430771350860596,
|
|
"learning_rate": 5.5875e-06,
|
|
"loss": 0.0536,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 3.2896250287554634,
|
|
"grad_norm": 2.6287930011749268,
|
|
"learning_rate": 5.579687500000001e-06,
|
|
"loss": 0.0393,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 3.3011272141706924,
|
|
"grad_norm": 4.748372554779053,
|
|
"learning_rate": 5.571875e-06,
|
|
"loss": 0.0424,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 3.3126293995859215,
|
|
"grad_norm": 3.630303382873535,
|
|
"learning_rate": 5.5640625000000006e-06,
|
|
"loss": 0.0387,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 3.32413158500115,
|
|
"grad_norm": 4.786473751068115,
|
|
"learning_rate": 5.55625e-06,
|
|
"loss": 0.0561,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 3.335633770416379,
|
|
"grad_norm": 2.6991186141967773,
|
|
"learning_rate": 5.5484375e-06,
|
|
"loss": 0.0441,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 3.347135955831608,
|
|
"grad_norm": 2.1756906509399414,
|
|
"learning_rate": 5.540625e-06,
|
|
"loss": 0.0537,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 3.358638141246837,
|
|
"grad_norm": 1.9589180946350098,
|
|
"learning_rate": 5.5328125e-06,
|
|
"loss": 0.0341,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 3.370140326662066,
|
|
"grad_norm": 3.504366159439087,
|
|
"learning_rate": 5.5250000000000005e-06,
|
|
"loss": 0.0556,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 3.3816425120772946,
|
|
"grad_norm": 2.558767557144165,
|
|
"learning_rate": 5.5171875e-06,
|
|
"loss": 0.0411,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 3.3931446974925237,
|
|
"grad_norm": 2.466121196746826,
|
|
"learning_rate": 5.509375e-06,
|
|
"loss": 0.0488,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 3.4046468829077523,
|
|
"grad_norm": 4.102237224578857,
|
|
"learning_rate": 5.5015625e-06,
|
|
"loss": 0.0399,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 3.4161490683229814,
|
|
"grad_norm": 2.008484125137329,
|
|
"learning_rate": 5.4937500000000006e-06,
|
|
"loss": 0.0612,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 3.4276512537382104,
|
|
"grad_norm": 4.083756446838379,
|
|
"learning_rate": 5.4859375e-06,
|
|
"loss": 0.0417,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 3.439153439153439,
|
|
"grad_norm": 2.526481866836548,
|
|
"learning_rate": 5.478125e-06,
|
|
"loss": 0.0572,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 3.450655624568668,
|
|
"grad_norm": 5.448037624359131,
|
|
"learning_rate": 5.4703125e-06,
|
|
"loss": 0.0487,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 3.4621578099838968,
|
|
"grad_norm": 2.4314920902252197,
|
|
"learning_rate": 5.462500000000001e-06,
|
|
"loss": 0.0465,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 3.473659995399126,
|
|
"grad_norm": 2.6774253845214844,
|
|
"learning_rate": 5.4546875000000004e-06,
|
|
"loss": 0.0398,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 3.485162180814355,
|
|
"grad_norm": 2.9031014442443848,
|
|
"learning_rate": 5.446875e-06,
|
|
"loss": 0.0629,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 3.4966643662295835,
|
|
"grad_norm": 3.0640227794647217,
|
|
"learning_rate": 5.4390625e-06,
|
|
"loss": 0.0344,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 3.5081665516448126,
|
|
"grad_norm": 3.262568950653076,
|
|
"learning_rate": 5.43125e-06,
|
|
"loss": 0.0538,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 3.5196687370600412,
|
|
"grad_norm": 3.67341685295105,
|
|
"learning_rate": 5.4234375000000005e-06,
|
|
"loss": 0.0409,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 3.5311709224752703,
|
|
"grad_norm": 2.3461291790008545,
|
|
"learning_rate": 5.415625e-06,
|
|
"loss": 0.0506,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 3.542673107890499,
|
|
"grad_norm": 7.1874589920043945,
|
|
"learning_rate": 5.4078125e-06,
|
|
"loss": 0.0462,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 3.554175293305728,
|
|
"grad_norm": 2.4068679809570312,
|
|
"learning_rate": 5.4e-06,
|
|
"loss": 0.0517,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 3.565677478720957,
|
|
"grad_norm": 10.364015579223633,
|
|
"learning_rate": 5.392187500000001e-06,
|
|
"loss": 0.0459,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 3.5771796641361857,
|
|
"grad_norm": 1.969916582107544,
|
|
"learning_rate": 5.3843750000000004e-06,
|
|
"loss": 0.0395,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 3.588681849551415,
|
|
"grad_norm": 2.2081713676452637,
|
|
"learning_rate": 5.3765625e-06,
|
|
"loss": 0.0333,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 3.6001840349666434,
|
|
"grad_norm": 2.534886598587036,
|
|
"learning_rate": 5.36875e-06,
|
|
"loss": 0.0628,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 3.6116862203818725,
|
|
"grad_norm": 4.844212532043457,
|
|
"learning_rate": 5.360937500000001e-06,
|
|
"loss": 0.0399,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 3.6231884057971016,
|
|
"grad_norm": 2.0327091217041016,
|
|
"learning_rate": 5.3531250000000005e-06,
|
|
"loss": 0.0479,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 3.63469059121233,
|
|
"grad_norm": 5.35178804397583,
|
|
"learning_rate": 5.3453125e-06,
|
|
"loss": 0.0449,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 3.6461927766275593,
|
|
"grad_norm": 1.815317153930664,
|
|
"learning_rate": 5.3375e-06,
|
|
"loss": 0.061,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 3.657694962042788,
|
|
"grad_norm": 4.710953235626221,
|
|
"learning_rate": 5.3296875e-06,
|
|
"loss": 0.0366,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 3.669197147458017,
|
|
"grad_norm": 2.8497252464294434,
|
|
"learning_rate": 5.321875000000001e-06,
|
|
"loss": 0.0523,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 3.680699332873246,
|
|
"grad_norm": 3.102057695388794,
|
|
"learning_rate": 5.3140624999999996e-06,
|
|
"loss": 0.0424,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 3.6922015182884746,
|
|
"grad_norm": 2.153207540512085,
|
|
"learning_rate": 5.30625e-06,
|
|
"loss": 0.0524,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 3.7037037037037037,
|
|
"grad_norm": 2.5647189617156982,
|
|
"learning_rate": 5.2984375e-06,
|
|
"loss": 0.041,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 3.7152058891189323,
|
|
"grad_norm": 3.3755810260772705,
|
|
"learning_rate": 5.290625000000001e-06,
|
|
"loss": 0.0574,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 3.7267080745341614,
|
|
"grad_norm": 3.370281457901001,
|
|
"learning_rate": 5.2828125e-06,
|
|
"loss": 0.0427,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 3.7382102599493905,
|
|
"grad_norm": 3.2038800716400146,
|
|
"learning_rate": 5.275e-06,
|
|
"loss": 0.0544,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 3.749712445364619,
|
|
"grad_norm": 1.8960700035095215,
|
|
"learning_rate": 5.2671875e-06,
|
|
"loss": 0.0436,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 3.761214630779848,
|
|
"grad_norm": 3.4356021881103516,
|
|
"learning_rate": 5.259375000000001e-06,
|
|
"loss": 0.0593,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 3.772716816195077,
|
|
"grad_norm": 3.514998197555542,
|
|
"learning_rate": 5.251562500000001e-06,
|
|
"loss": 0.0361,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 3.784219001610306,
|
|
"grad_norm": 1.705615758895874,
|
|
"learning_rate": 5.24375e-06,
|
|
"loss": 0.0551,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 3.795721187025535,
|
|
"grad_norm": 4.432028770446777,
|
|
"learning_rate": 5.2359375e-06,
|
|
"loss": 0.0409,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 3.8072233724407636,
|
|
"grad_norm": 1.7736481428146362,
|
|
"learning_rate": 5.228125e-06,
|
|
"loss": 0.0431,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 3.8187255578559927,
|
|
"grad_norm": 6.601363658905029,
|
|
"learning_rate": 5.220312500000001e-06,
|
|
"loss": 0.0385,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 3.8302277432712213,
|
|
"grad_norm": 1.307138204574585,
|
|
"learning_rate": 5.2128125e-06,
|
|
"loss": 0.0551,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 3.8417299286864504,
|
|
"grad_norm": 4.539569854736328,
|
|
"learning_rate": 5.205e-06,
|
|
"loss": 0.0414,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 3.8532321141016794,
|
|
"grad_norm": 1.4391796588897705,
|
|
"learning_rate": 5.1971875e-06,
|
|
"loss": 0.0468,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 3.864734299516908,
|
|
"grad_norm": 6.971019268035889,
|
|
"learning_rate": 5.189375000000001e-06,
|
|
"loss": 0.0399,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 3.876236484932137,
|
|
"grad_norm": 3.1996214389801025,
|
|
"learning_rate": 5.1815624999999996e-06,
|
|
"loss": 0.0586,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 3.8877386703473658,
|
|
"grad_norm": 2.8706111907958984,
|
|
"learning_rate": 5.17375e-06,
|
|
"loss": 0.0372,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 3.899240855762595,
|
|
"grad_norm": 2.5482466220855713,
|
|
"learning_rate": 5.1659375e-06,
|
|
"loss": 0.0588,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 3.910743041177824,
|
|
"grad_norm": 3.1475095748901367,
|
|
"learning_rate": 5.158125000000001e-06,
|
|
"loss": 0.0357,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 3.9222452265930525,
|
|
"grad_norm": 3.1267690658569336,
|
|
"learning_rate": 5.1503125e-06,
|
|
"loss": 0.063,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 3.9337474120082816,
|
|
"grad_norm": 1.667446255683899,
|
|
"learning_rate": 5.1425e-06,
|
|
"loss": 0.041,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 3.9452495974235102,
|
|
"grad_norm": 1.526341438293457,
|
|
"learning_rate": 5.1346875e-06,
|
|
"loss": 0.0447,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 3.9567517828387393,
|
|
"grad_norm": 5.230031490325928,
|
|
"learning_rate": 5.126875000000001e-06,
|
|
"loss": 0.0429,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 3.9682539682539684,
|
|
"grad_norm": 2.7826850414276123,
|
|
"learning_rate": 5.1190625e-06,
|
|
"loss": 0.0492,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 3.979756153669197,
|
|
"grad_norm": 3.674015760421753,
|
|
"learning_rate": 5.11125e-06,
|
|
"loss": 0.0391,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 3.991258339084426,
|
|
"grad_norm": 3.0181193351745605,
|
|
"learning_rate": 5.1034375e-06,
|
|
"loss": 0.0538,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 4.002760524499655,
|
|
"grad_norm": 0.9257192015647888,
|
|
"learning_rate": 5.095625e-06,
|
|
"loss": 0.0428,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 4.014262709914884,
|
|
"grad_norm": 3.171478033065796,
|
|
"learning_rate": 5.087812500000001e-06,
|
|
"loss": 0.0175,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 4.025764895330113,
|
|
"grad_norm": 1.8162753582000732,
|
|
"learning_rate": 5.08e-06,
|
|
"loss": 0.0279,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 4.037267080745342,
|
|
"grad_norm": 0.8982828855514526,
|
|
"learning_rate": 5.0721875e-06,
|
|
"loss": 0.0195,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 4.04876926616057,
|
|
"grad_norm": 0.8609257936477661,
|
|
"learning_rate": 5.064375e-06,
|
|
"loss": 0.0386,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 4.060271451575799,
|
|
"grad_norm": 2.8455467224121094,
|
|
"learning_rate": 5.056562500000001e-06,
|
|
"loss": 0.0224,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 4.071773636991028,
|
|
"grad_norm": 4.009946346282959,
|
|
"learning_rate": 5.04875e-06,
|
|
"loss": 0.0332,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 4.083275822406257,
|
|
"grad_norm": 6.834461688995361,
|
|
"learning_rate": 5.0409375e-06,
|
|
"loss": 0.0243,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 4.094778007821486,
|
|
"grad_norm": 2.6603477001190186,
|
|
"learning_rate": 5.033125e-06,
|
|
"loss": 0.0337,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 4.106280193236715,
|
|
"grad_norm": 0.7769069075584412,
|
|
"learning_rate": 5.025312500000001e-06,
|
|
"loss": 0.0227,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 4.117782378651944,
|
|
"grad_norm": 2.2610023021698,
|
|
"learning_rate": 5.0175e-06,
|
|
"loss": 0.0316,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 4.129284564067173,
|
|
"grad_norm": 0.38040891289711,
|
|
"learning_rate": 5.0096875000000005e-06,
|
|
"loss": 0.0222,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 4.140786749482402,
|
|
"grad_norm": 3.0205907821655273,
|
|
"learning_rate": 5.001875e-06,
|
|
"loss": 0.04,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 4.152288934897631,
|
|
"grad_norm": 2.703075647354126,
|
|
"learning_rate": 4.9940625e-06,
|
|
"loss": 0.022,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 4.163791120312859,
|
|
"grad_norm": 1.172072410583496,
|
|
"learning_rate": 4.98625e-06,
|
|
"loss": 0.0325,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 4.175293305728088,
|
|
"grad_norm": 2.555593967437744,
|
|
"learning_rate": 4.9784375e-06,
|
|
"loss": 0.0222,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 4.186795491143317,
|
|
"grad_norm": 1.3756543397903442,
|
|
"learning_rate": 4.970625e-06,
|
|
"loss": 0.035,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 4.198297676558546,
|
|
"grad_norm": 3.3377280235290527,
|
|
"learning_rate": 4.9628125e-06,
|
|
"loss": 0.0136,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 4.209799861973775,
|
|
"grad_norm": 3.4057676792144775,
|
|
"learning_rate": 4.955e-06,
|
|
"loss": 0.0284,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 4.2213020473890035,
|
|
"grad_norm": 0.6658844351768494,
|
|
"learning_rate": 4.9471875e-06,
|
|
"loss": 0.0191,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 4.232804232804233,
|
|
"grad_norm": 3.073923349380493,
|
|
"learning_rate": 4.9393750000000005e-06,
|
|
"loss": 0.0365,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 4.244306418219462,
|
|
"grad_norm": 1.391367793083191,
|
|
"learning_rate": 4.9315625e-06,
|
|
"loss": 0.0206,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 4.255808603634691,
|
|
"grad_norm": 2.199260711669922,
|
|
"learning_rate": 4.92375e-06,
|
|
"loss": 0.0325,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 4.26731078904992,
|
|
"grad_norm": 0.31522706151008606,
|
|
"learning_rate": 4.9159375e-06,
|
|
"loss": 0.0191,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 4.278812974465148,
|
|
"grad_norm": 1.3394752740859985,
|
|
"learning_rate": 4.9081250000000005e-06,
|
|
"loss": 0.031,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 4.290315159880377,
|
|
"grad_norm": 3.397002696990967,
|
|
"learning_rate": 4.9003125e-06,
|
|
"loss": 0.0283,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 4.301817345295606,
|
|
"grad_norm": 1.9605752229690552,
|
|
"learning_rate": 4.8925e-06,
|
|
"loss": 0.0334,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 4.313319530710835,
|
|
"grad_norm": 3.3331549167633057,
|
|
"learning_rate": 4.8846875e-06,
|
|
"loss": 0.0173,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 4.324821716126064,
|
|
"grad_norm": 1.6721982955932617,
|
|
"learning_rate": 4.876875e-06,
|
|
"loss": 0.0335,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 4.336323901541292,
|
|
"grad_norm": 2.483747720718384,
|
|
"learning_rate": 4.8690625000000004e-06,
|
|
"loss": 0.0161,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 2.7441227436065674,
|
|
"learning_rate": 4.86125e-06,
|
|
"loss": 0.0299,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 4.359328272371751,
|
|
"grad_norm": 6.056209087371826,
|
|
"learning_rate": 4.8534375e-06,
|
|
"loss": 0.0191,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 4.37083045778698,
|
|
"grad_norm": 3.665620803833008,
|
|
"learning_rate": 4.845625e-06,
|
|
"loss": 0.0413,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 4.382332643202209,
|
|
"grad_norm": 0.7313005924224854,
|
|
"learning_rate": 4.8378125000000005e-06,
|
|
"loss": 0.0279,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 4.393834828617437,
|
|
"grad_norm": 6.102551460266113,
|
|
"learning_rate": 4.83e-06,
|
|
"loss": 0.0365,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 4.405337014032666,
|
|
"grad_norm": 3.0939571857452393,
|
|
"learning_rate": 4.8221875e-06,
|
|
"loss": 0.0235,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 4.416839199447895,
|
|
"grad_norm": 6.218958854675293,
|
|
"learning_rate": 4.814375e-06,
|
|
"loss": 0.04,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 4.428341384863124,
|
|
"grad_norm": 3.746630907058716,
|
|
"learning_rate": 4.806562500000001e-06,
|
|
"loss": 0.0252,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 4.439843570278353,
|
|
"grad_norm": 2.458735466003418,
|
|
"learning_rate": 4.7987500000000004e-06,
|
|
"loss": 0.0407,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 4.451345755693581,
|
|
"grad_norm": 0.9115240573883057,
|
|
"learning_rate": 4.7909375e-06,
|
|
"loss": 0.0216,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 4.4628479411088104,
|
|
"grad_norm": 2.0379064083099365,
|
|
"learning_rate": 4.783125e-06,
|
|
"loss": 0.0326,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 4.4743501265240395,
|
|
"grad_norm": 3.2523980140686035,
|
|
"learning_rate": 4.7753125e-06,
|
|
"loss": 0.0216,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 4.485852311939269,
|
|
"grad_norm": 0.8420194387435913,
|
|
"learning_rate": 4.7675000000000005e-06,
|
|
"loss": 0.0337,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 4.497354497354498,
|
|
"grad_norm": 4.6802167892456055,
|
|
"learning_rate": 4.7596875e-06,
|
|
"loss": 0.0245,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 4.508856682769727,
|
|
"grad_norm": 1.313185214996338,
|
|
"learning_rate": 4.751875e-06,
|
|
"loss": 0.0261,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 4.520358868184955,
|
|
"grad_norm": 0.6525618433952332,
|
|
"learning_rate": 4.7440625e-06,
|
|
"loss": 0.0222,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 4.531861053600184,
|
|
"grad_norm": 1.9755631685256958,
|
|
"learning_rate": 4.736250000000001e-06,
|
|
"loss": 0.0328,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 4.543363239015413,
|
|
"grad_norm": 5.463117599487305,
|
|
"learning_rate": 4.7284374999999996e-06,
|
|
"loss": 0.0243,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 4.554865424430642,
|
|
"grad_norm": 2.689207077026367,
|
|
"learning_rate": 4.720625e-06,
|
|
"loss": 0.0294,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 4.566367609845871,
|
|
"grad_norm": 2.54421067237854,
|
|
"learning_rate": 4.7128125e-06,
|
|
"loss": 0.0249,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 4.577869795261099,
|
|
"grad_norm": 9.304972648620605,
|
|
"learning_rate": 4.705000000000001e-06,
|
|
"loss": 0.0413,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 4.5893719806763285,
|
|
"grad_norm": 1.8700443506240845,
|
|
"learning_rate": 4.6971875000000005e-06,
|
|
"loss": 0.0199,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 4.6008741660915575,
|
|
"grad_norm": 1.8287808895111084,
|
|
"learning_rate": 4.689375e-06,
|
|
"loss": 0.0371,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 4.6008741660915575,
|
|
"eval_loss": 0.18389073014259338,
|
|
"eval_runtime": 5170.7691,
|
|
"eval_samples_per_second": 1.836,
|
|
"eval_steps_per_second": 0.23,
|
|
"eval_wer": 0.09987187700192185,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 4.612376351506787,
|
|
"grad_norm": 4.324032306671143,
|
|
"learning_rate": 4.6815625e-06,
|
|
"loss": 0.0254,
|
|
"step": 10025
|
|
},
|
|
{
|
|
"epoch": 4.623878536922016,
|
|
"grad_norm": 1.790111780166626,
|
|
"learning_rate": 4.67375e-06,
|
|
"loss": 0.0317,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 4.635380722337244,
|
|
"grad_norm": 3.236027717590332,
|
|
"learning_rate": 4.665937500000001e-06,
|
|
"loss": 0.0169,
|
|
"step": 10075
|
|
},
|
|
{
|
|
"epoch": 4.646882907752473,
|
|
"grad_norm": 2.1184232234954834,
|
|
"learning_rate": 4.658125e-06,
|
|
"loss": 0.0272,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 4.658385093167702,
|
|
"grad_norm": 2.2520174980163574,
|
|
"learning_rate": 4.6503125e-06,
|
|
"loss": 0.0195,
|
|
"step": 10125
|
|
},
|
|
{
|
|
"epoch": 4.669887278582931,
|
|
"grad_norm": 3.1109213829040527,
|
|
"learning_rate": 4.6425e-06,
|
|
"loss": 0.0377,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 4.68138946399816,
|
|
"grad_norm": 0.9269886612892151,
|
|
"learning_rate": 4.634687500000001e-06,
|
|
"loss": 0.0245,
|
|
"step": 10175
|
|
},
|
|
{
|
|
"epoch": 4.692891649413388,
|
|
"grad_norm": 2.5646307468414307,
|
|
"learning_rate": 4.626875e-06,
|
|
"loss": 0.0335,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 4.704393834828617,
|
|
"grad_norm": 2.609894037246704,
|
|
"learning_rate": 4.6190625e-06,
|
|
"loss": 0.0286,
|
|
"step": 10225
|
|
},
|
|
{
|
|
"epoch": 4.7158960202438465,
|
|
"grad_norm": 4.488738059997559,
|
|
"learning_rate": 4.61125e-06,
|
|
"loss": 0.0311,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 4.7273982056590755,
|
|
"grad_norm": 1.5146970748901367,
|
|
"learning_rate": 4.603437500000001e-06,
|
|
"loss": 0.0193,
|
|
"step": 10275
|
|
},
|
|
{
|
|
"epoch": 4.738900391074305,
|
|
"grad_norm": 0.9895784258842468,
|
|
"learning_rate": 4.595625e-06,
|
|
"loss": 0.0263,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 4.750402576489533,
|
|
"grad_norm": 5.285503387451172,
|
|
"learning_rate": 4.5878125e-06,
|
|
"loss": 0.0262,
|
|
"step": 10325
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 0.3710523843765259,
|
|
"learning_rate": 4.58e-06,
|
|
"loss": 0.0414,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 4.773406947319991,
|
|
"grad_norm": 1.5297555923461914,
|
|
"learning_rate": 4.572187500000001e-06,
|
|
"loss": 0.0226,
|
|
"step": 10375
|
|
},
|
|
{
|
|
"epoch": 4.78490913273522,
|
|
"grad_norm": 2.7512269020080566,
|
|
"learning_rate": 4.564375e-06,
|
|
"loss": 0.0296,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 4.796411318150449,
|
|
"grad_norm": 7.514578342437744,
|
|
"learning_rate": 4.5565625000000005e-06,
|
|
"loss": 0.0252,
|
|
"step": 10425
|
|
},
|
|
{
|
|
"epoch": 4.807913503565677,
|
|
"grad_norm": 2.6303036212921143,
|
|
"learning_rate": 4.54875e-06,
|
|
"loss": 0.0281,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 4.819415688980906,
|
|
"grad_norm": 2.5647971630096436,
|
|
"learning_rate": 4.5409375e-06,
|
|
"loss": 0.0236,
|
|
"step": 10475
|
|
},
|
|
{
|
|
"epoch": 4.830917874396135,
|
|
"grad_norm": 1.5822607278823853,
|
|
"learning_rate": 4.533125e-06,
|
|
"loss": 0.03,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 4.8424200598113645,
|
|
"grad_norm": 3.3824303150177,
|
|
"learning_rate": 4.5253125e-06,
|
|
"loss": 0.0277,
|
|
"step": 10525
|
|
},
|
|
{
|
|
"epoch": 4.8539222452265935,
|
|
"grad_norm": 2.4250597953796387,
|
|
"learning_rate": 4.5175e-06,
|
|
"loss": 0.0288,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 4.865424430641822,
|
|
"grad_norm": 5.316204071044922,
|
|
"learning_rate": 4.5096875e-06,
|
|
"loss": 0.0248,
|
|
"step": 10575
|
|
},
|
|
{
|
|
"epoch": 4.876926616057051,
|
|
"grad_norm": 2.7238681316375732,
|
|
"learning_rate": 4.501875000000001e-06,
|
|
"loss": 0.0332,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 4.88842880147228,
|
|
"grad_norm": 3.1399307250976562,
|
|
"learning_rate": 4.4940625e-06,
|
|
"loss": 0.0222,
|
|
"step": 10625
|
|
},
|
|
{
|
|
"epoch": 4.899930986887509,
|
|
"grad_norm": 2.6083264350891113,
|
|
"learning_rate": 4.4862500000000005e-06,
|
|
"loss": 0.0325,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 4.911433172302738,
|
|
"grad_norm": 1.1990541219711304,
|
|
"learning_rate": 4.4784375e-06,
|
|
"loss": 0.0218,
|
|
"step": 10675
|
|
},
|
|
{
|
|
"epoch": 4.922935357717966,
|
|
"grad_norm": 2.0461394786834717,
|
|
"learning_rate": 4.470625000000001e-06,
|
|
"loss": 0.0312,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 4.934437543133195,
|
|
"grad_norm": 0.7773350477218628,
|
|
"learning_rate": 4.4628125e-06,
|
|
"loss": 0.0256,
|
|
"step": 10725
|
|
},
|
|
{
|
|
"epoch": 4.945939728548424,
|
|
"grad_norm": 2.645052671432495,
|
|
"learning_rate": 4.4550000000000005e-06,
|
|
"loss": 0.0334,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 4.957441913963653,
|
|
"grad_norm": 3.0270519256591797,
|
|
"learning_rate": 4.4471875e-06,
|
|
"loss": 0.0221,
|
|
"step": 10775
|
|
},
|
|
{
|
|
"epoch": 4.9689440993788825,
|
|
"grad_norm": 2.553375720977783,
|
|
"learning_rate": 4.439375e-06,
|
|
"loss": 0.0337,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 4.980446284794111,
|
|
"grad_norm": 5.496345520019531,
|
|
"learning_rate": 4.4315625e-06,
|
|
"loss": 0.0197,
|
|
"step": 10825
|
|
},
|
|
{
|
|
"epoch": 4.99194847020934,
|
|
"grad_norm": 2.0996806621551514,
|
|
"learning_rate": 4.4240625000000005e-06,
|
|
"loss": 0.0389,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 5.003450655624569,
|
|
"grad_norm": 1.9347798824310303,
|
|
"learning_rate": 4.41625e-06,
|
|
"loss": 0.0229,
|
|
"step": 10875
|
|
},
|
|
{
|
|
"epoch": 5.014952841039798,
|
|
"grad_norm": 3.568023443222046,
|
|
"learning_rate": 4.4084375e-06,
|
|
"loss": 0.0135,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 5.026455026455026,
|
|
"grad_norm": 0.6959520578384399,
|
|
"learning_rate": 4.400625e-06,
|
|
"loss": 0.0216,
|
|
"step": 10925
|
|
},
|
|
{
|
|
"epoch": 5.037957211870255,
|
|
"grad_norm": 0.2912887930870056,
|
|
"learning_rate": 4.3928125e-06,
|
|
"loss": 0.0157,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 5.049459397285484,
|
|
"grad_norm": 2.004884719848633,
|
|
"learning_rate": 4.385e-06,
|
|
"loss": 0.0208,
|
|
"step": 10975
|
|
},
|
|
{
|
|
"epoch": 5.060961582700713,
|
|
"grad_norm": 4.345211505889893,
|
|
"learning_rate": 4.3771875e-06,
|
|
"loss": 0.0163,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 5.072463768115942,
|
|
"grad_norm": 1.2300424575805664,
|
|
"learning_rate": 4.369375000000001e-06,
|
|
"loss": 0.0261,
|
|
"step": 11025
|
|
},
|
|
{
|
|
"epoch": 5.0839659535311705,
|
|
"grad_norm": 2.4417171478271484,
|
|
"learning_rate": 4.3615625e-06,
|
|
"loss": 0.0177,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 5.0954681389464,
|
|
"grad_norm": 0.4104786217212677,
|
|
"learning_rate": 4.3537500000000005e-06,
|
|
"loss": 0.0221,
|
|
"step": 11075
|
|
},
|
|
{
|
|
"epoch": 5.106970324361629,
|
|
"grad_norm": 1.5912240743637085,
|
|
"learning_rate": 4.3459375e-06,
|
|
"loss": 0.0108,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 5.118472509776858,
|
|
"grad_norm": 2.6581289768218994,
|
|
"learning_rate": 4.338125000000001e-06,
|
|
"loss": 0.0196,
|
|
"step": 11125
|
|
},
|
|
{
|
|
"epoch": 5.129974695192087,
|
|
"grad_norm": 0.41683322191238403,
|
|
"learning_rate": 4.3303125e-06,
|
|
"loss": 0.012,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 5.141476880607315,
|
|
"grad_norm": 0.7937358021736145,
|
|
"learning_rate": 4.3225000000000005e-06,
|
|
"loss": 0.0198,
|
|
"step": 11175
|
|
},
|
|
{
|
|
"epoch": 5.152979066022544,
|
|
"grad_norm": 2.042461633682251,
|
|
"learning_rate": 4.3146875e-06,
|
|
"loss": 0.0157,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 5.164481251437773,
|
|
"grad_norm": 1.6334706544876099,
|
|
"learning_rate": 4.306875e-06,
|
|
"loss": 0.0185,
|
|
"step": 11225
|
|
},
|
|
{
|
|
"epoch": 5.175983436853002,
|
|
"grad_norm": 2.2739877700805664,
|
|
"learning_rate": 4.2990625e-06,
|
|
"loss": 0.0178,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 5.187485622268231,
|
|
"grad_norm": 1.6258018016815186,
|
|
"learning_rate": 4.29125e-06,
|
|
"loss": 0.0173,
|
|
"step": 11275
|
|
},
|
|
{
|
|
"epoch": 5.1989878076834595,
|
|
"grad_norm": 4.973881244659424,
|
|
"learning_rate": 4.2834375000000004e-06,
|
|
"loss": 0.0129,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 5.2104899930986885,
|
|
"grad_norm": 0.9437686204910278,
|
|
"learning_rate": 4.275625e-06,
|
|
"loss": 0.0237,
|
|
"step": 11325
|
|
},
|
|
{
|
|
"epoch": 5.221992178513918,
|
|
"grad_norm": 3.4761786460876465,
|
|
"learning_rate": 4.2678125e-06,
|
|
"loss": 0.0147,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 5.233494363929147,
|
|
"grad_norm": 2.692365884780884,
|
|
"learning_rate": 4.26e-06,
|
|
"loss": 0.0189,
|
|
"step": 11375
|
|
},
|
|
{
|
|
"epoch": 5.244996549344376,
|
|
"grad_norm": 2.9584712982177734,
|
|
"learning_rate": 4.2521875000000005e-06,
|
|
"loss": 0.0174,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 5.256498734759604,
|
|
"grad_norm": 9.468619346618652,
|
|
"learning_rate": 4.244375e-06,
|
|
"loss": 0.0176,
|
|
"step": 11425
|
|
},
|
|
{
|
|
"epoch": 5.268000920174833,
|
|
"grad_norm": 0.9750581383705139,
|
|
"learning_rate": 4.2365625e-06,
|
|
"loss": 0.021,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 5.279503105590062,
|
|
"grad_norm": 3.0626797676086426,
|
|
"learning_rate": 4.22875e-06,
|
|
"loss": 0.0298,
|
|
"step": 11475
|
|
},
|
|
{
|
|
"epoch": 5.291005291005291,
|
|
"grad_norm": 2.2305479049682617,
|
|
"learning_rate": 4.220937500000001e-06,
|
|
"loss": 0.0161,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 5.30250747642052,
|
|
"grad_norm": 0.5742190480232239,
|
|
"learning_rate": 4.2131250000000004e-06,
|
|
"loss": 0.0216,
|
|
"step": 11525
|
|
},
|
|
{
|
|
"epoch": 5.314009661835748,
|
|
"grad_norm": 2.7238235473632812,
|
|
"learning_rate": 4.2053125e-06,
|
|
"loss": 0.0178,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 5.3255118472509775,
|
|
"grad_norm": 1.7739872932434082,
|
|
"learning_rate": 4.1975e-06,
|
|
"loss": 0.0203,
|
|
"step": 11575
|
|
},
|
|
{
|
|
"epoch": 5.3370140326662066,
|
|
"grad_norm": 0.9364586472511292,
|
|
"learning_rate": 4.1896875e-06,
|
|
"loss": 0.0185,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 5.348516218081436,
|
|
"grad_norm": 3.462063789367676,
|
|
"learning_rate": 4.1818750000000005e-06,
|
|
"loss": 0.027,
|
|
"step": 11625
|
|
},
|
|
{
|
|
"epoch": 5.360018403496665,
|
|
"grad_norm": 4.4536051750183105,
|
|
"learning_rate": 4.1740625e-06,
|
|
"loss": 0.0137,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 5.371520588911893,
|
|
"grad_norm": 1.4285831451416016,
|
|
"learning_rate": 4.16625e-06,
|
|
"loss": 0.022,
|
|
"step": 11675
|
|
},
|
|
{
|
|
"epoch": 5.383022774327122,
|
|
"grad_norm": 0.1919490098953247,
|
|
"learning_rate": 4.1584375e-06,
|
|
"loss": 0.0127,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 5.394524959742351,
|
|
"grad_norm": 1.8607268333435059,
|
|
"learning_rate": 4.150625000000001e-06,
|
|
"loss": 0.0221,
|
|
"step": 11725
|
|
},
|
|
{
|
|
"epoch": 5.40602714515758,
|
|
"grad_norm": 2.7783443927764893,
|
|
"learning_rate": 4.1428125e-06,
|
|
"loss": 0.0114,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 5.417529330572809,
|
|
"grad_norm": 0.5502150058746338,
|
|
"learning_rate": 4.135e-06,
|
|
"loss": 0.0283,
|
|
"step": 11775
|
|
},
|
|
{
|
|
"epoch": 5.429031515988037,
|
|
"grad_norm": 4.172156810760498,
|
|
"learning_rate": 4.1271875e-06,
|
|
"loss": 0.0148,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 5.440533701403266,
|
|
"grad_norm": 4.028002738952637,
|
|
"learning_rate": 4.119375000000001e-06,
|
|
"loss": 0.0248,
|
|
"step": 11825
|
|
},
|
|
{
|
|
"epoch": 5.4520358868184955,
|
|
"grad_norm": 2.3138980865478516,
|
|
"learning_rate": 4.1115625000000005e-06,
|
|
"loss": 0.0132,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 5.463538072233725,
|
|
"grad_norm": 1.1849713325500488,
|
|
"learning_rate": 4.10375e-06,
|
|
"loss": 0.0226,
|
|
"step": 11875
|
|
},
|
|
{
|
|
"epoch": 5.475040257648954,
|
|
"grad_norm": 0.587064266204834,
|
|
"learning_rate": 4.0959375e-06,
|
|
"loss": 0.019,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 5.486542443064182,
|
|
"grad_norm": 2.445380449295044,
|
|
"learning_rate": 4.088125e-06,
|
|
"loss": 0.0294,
|
|
"step": 11925
|
|
},
|
|
{
|
|
"epoch": 5.498044628479411,
|
|
"grad_norm": 0.9706999659538269,
|
|
"learning_rate": 4.080312500000001e-06,
|
|
"loss": 0.0243,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 5.50954681389464,
|
|
"grad_norm": 3.1857030391693115,
|
|
"learning_rate": 4.0724999999999995e-06,
|
|
"loss": 0.0285,
|
|
"step": 11975
|
|
},
|
|
{
|
|
"epoch": 5.521048999309869,
|
|
"grad_norm": 2.901566982269287,
|
|
"learning_rate": 4.0646875e-06,
|
|
"loss": 0.0144,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 5.532551184725098,
|
|
"grad_norm": 1.5645906925201416,
|
|
"learning_rate": 4.056875e-06,
|
|
"loss": 0.0209,
|
|
"step": 12025
|
|
},
|
|
{
|
|
"epoch": 5.544053370140326,
|
|
"grad_norm": 0.3500732481479645,
|
|
"learning_rate": 4.049062500000001e-06,
|
|
"loss": 0.0154,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 1.8092036247253418,
|
|
"learning_rate": 4.04125e-06,
|
|
"loss": 0.0244,
|
|
"step": 12075
|
|
},
|
|
{
|
|
"epoch": 5.567057740970784,
|
|
"grad_norm": 0.872996985912323,
|
|
"learning_rate": 4.0334375e-06,
|
|
"loss": 0.0128,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 5.5785599263860135,
|
|
"grad_norm": 1.7378208637237549,
|
|
"learning_rate": 4.025625e-06,
|
|
"loss": 0.0173,
|
|
"step": 12125
|
|
},
|
|
{
|
|
"epoch": 5.590062111801243,
|
|
"grad_norm": 2.589615821838379,
|
|
"learning_rate": 4.017812500000001e-06,
|
|
"loss": 0.019,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 5.601564297216471,
|
|
"grad_norm": 1.2749011516571045,
|
|
"learning_rate": 4.01e-06,
|
|
"loss": 0.0219,
|
|
"step": 12175
|
|
},
|
|
{
|
|
"epoch": 5.6130664826317,
|
|
"grad_norm": 3.331284761428833,
|
|
"learning_rate": 4.0021875e-06,
|
|
"loss": 0.0187,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 5.624568668046929,
|
|
"grad_norm": 3.3452606201171875,
|
|
"learning_rate": 3.994375e-06,
|
|
"loss": 0.0251,
|
|
"step": 12225
|
|
},
|
|
{
|
|
"epoch": 5.636070853462158,
|
|
"grad_norm": 3.171482563018799,
|
|
"learning_rate": 3.9865625e-06,
|
|
"loss": 0.0111,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 5.647573038877387,
|
|
"grad_norm": 1.3494371175765991,
|
|
"learning_rate": 3.978750000000001e-06,
|
|
"loss": 0.0252,
|
|
"step": 12275
|
|
},
|
|
{
|
|
"epoch": 5.659075224292615,
|
|
"grad_norm": 5.425341606140137,
|
|
"learning_rate": 3.9709375e-06,
|
|
"loss": 0.0154,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 5.670577409707844,
|
|
"grad_norm": 3.904033899307251,
|
|
"learning_rate": 3.963125e-06,
|
|
"loss": 0.0257,
|
|
"step": 12325
|
|
},
|
|
{
|
|
"epoch": 5.682079595123073,
|
|
"grad_norm": 6.220149993896484,
|
|
"learning_rate": 3.9553125e-06,
|
|
"loss": 0.0197,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 5.693581780538302,
|
|
"grad_norm": 2.4043924808502197,
|
|
"learning_rate": 3.947500000000001e-06,
|
|
"loss": 0.0206,
|
|
"step": 12375
|
|
},
|
|
{
|
|
"epoch": 5.7050839659535315,
|
|
"grad_norm": 1.716847538948059,
|
|
"learning_rate": 3.9396875e-06,
|
|
"loss": 0.0128,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 5.71658615136876,
|
|
"grad_norm": 2.778402805328369,
|
|
"learning_rate": 3.931875e-06,
|
|
"loss": 0.025,
|
|
"step": 12425
|
|
},
|
|
{
|
|
"epoch": 5.728088336783989,
|
|
"grad_norm": 1.2575827836990356,
|
|
"learning_rate": 3.9240625e-06,
|
|
"loss": 0.0165,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 5.739590522199218,
|
|
"grad_norm": 1.911150574684143,
|
|
"learning_rate": 3.916250000000001e-06,
|
|
"loss": 0.0274,
|
|
"step": 12475
|
|
},
|
|
{
|
|
"epoch": 5.751092707614447,
|
|
"grad_norm": 4.2152099609375,
|
|
"learning_rate": 3.9084375e-06,
|
|
"loss": 0.0205,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 5.762594893029676,
|
|
"grad_norm": 2.5663771629333496,
|
|
"learning_rate": 3.9006250000000005e-06,
|
|
"loss": 0.0233,
|
|
"step": 12525
|
|
},
|
|
{
|
|
"epoch": 5.774097078444904,
|
|
"grad_norm": 2.787692070007324,
|
|
"learning_rate": 3.8928125e-06,
|
|
"loss": 0.0168,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 5.785599263860133,
|
|
"grad_norm": 1.5613856315612793,
|
|
"learning_rate": 3.885e-06,
|
|
"loss": 0.0208,
|
|
"step": 12575
|
|
},
|
|
{
|
|
"epoch": 5.797101449275362,
|
|
"grad_norm": 4.623484134674072,
|
|
"learning_rate": 3.8771875e-06,
|
|
"loss": 0.0154,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 5.808603634690591,
|
|
"grad_norm": 1.0770273208618164,
|
|
"learning_rate": 3.869375e-06,
|
|
"loss": 0.0251,
|
|
"step": 12625
|
|
},
|
|
{
|
|
"epoch": 5.8201058201058204,
|
|
"grad_norm": 1.7411080598831177,
|
|
"learning_rate": 3.8615625e-06,
|
|
"loss": 0.0187,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 5.831608005521049,
|
|
"grad_norm": 1.9490394592285156,
|
|
"learning_rate": 3.85375e-06,
|
|
"loss": 0.0255,
|
|
"step": 12675
|
|
},
|
|
{
|
|
"epoch": 5.843110190936278,
|
|
"grad_norm": 3.750220537185669,
|
|
"learning_rate": 3.8459375e-06,
|
|
"loss": 0.015,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 5.854612376351507,
|
|
"grad_norm": 4.523986339569092,
|
|
"learning_rate": 3.838125e-06,
|
|
"loss": 0.0251,
|
|
"step": 12725
|
|
},
|
|
{
|
|
"epoch": 5.866114561766736,
|
|
"grad_norm": 0.7033637762069702,
|
|
"learning_rate": 3.8303125000000004e-06,
|
|
"loss": 0.0109,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 5.877616747181965,
|
|
"grad_norm": 1.5173693895339966,
|
|
"learning_rate": 3.8225e-06,
|
|
"loss": 0.026,
|
|
"step": 12775
|
|
},
|
|
{
|
|
"epoch": 5.889118932597193,
|
|
"grad_norm": 1.4995800256729126,
|
|
"learning_rate": 3.8146875e-06,
|
|
"loss": 0.0194,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 5.900621118012422,
|
|
"grad_norm": 2.203517436981201,
|
|
"learning_rate": 3.806875e-06,
|
|
"loss": 0.0292,
|
|
"step": 12825
|
|
},
|
|
{
|
|
"epoch": 5.912123303427651,
|
|
"grad_norm": 2.84173321723938,
|
|
"learning_rate": 3.7990625e-06,
|
|
"loss": 0.0159,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 5.92362548884288,
|
|
"grad_norm": 0.5612061619758606,
|
|
"learning_rate": 3.7912500000000003e-06,
|
|
"loss": 0.0288,
|
|
"step": 12875
|
|
},
|
|
{
|
|
"epoch": 5.935127674258109,
|
|
"grad_norm": 0.5369181632995605,
|
|
"learning_rate": 3.7834375000000006e-06,
|
|
"loss": 0.015,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 5.946629859673338,
|
|
"grad_norm": 2.2607927322387695,
|
|
"learning_rate": 3.775625e-06,
|
|
"loss": 0.0286,
|
|
"step": 12925
|
|
},
|
|
{
|
|
"epoch": 5.958132045088567,
|
|
"grad_norm": 2.4500582218170166,
|
|
"learning_rate": 3.7678125e-06,
|
|
"loss": 0.0204,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 5.969634230503796,
|
|
"grad_norm": 1.60666024684906,
|
|
"learning_rate": 3.7600000000000004e-06,
|
|
"loss": 0.0199,
|
|
"step": 12975
|
|
},
|
|
{
|
|
"epoch": 5.981136415919025,
|
|
"grad_norm": 0.704494059085846,
|
|
"learning_rate": 3.7521875000000007e-06,
|
|
"loss": 0.0117,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 5.992638601334254,
|
|
"grad_norm": 0.9249849319458008,
|
|
"learning_rate": 3.744375e-06,
|
|
"loss": 0.0235,
|
|
"step": 13025
|
|
},
|
|
{
|
|
"epoch": 6.004140786749482,
|
|
"grad_norm": 3.008746862411499,
|
|
"learning_rate": 3.7365625000000003e-06,
|
|
"loss": 0.0129,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 6.015642972164711,
|
|
"grad_norm": 1.7184109687805176,
|
|
"learning_rate": 3.7287500000000005e-06,
|
|
"loss": 0.0127,
|
|
"step": 13075
|
|
},
|
|
{
|
|
"epoch": 6.02714515757994,
|
|
"grad_norm": 0.9730533957481384,
|
|
"learning_rate": 3.7209375000000003e-06,
|
|
"loss": 0.0175,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 6.038647342995169,
|
|
"grad_norm": 1.0653347969055176,
|
|
"learning_rate": 3.713125e-06,
|
|
"loss": 0.0088,
|
|
"step": 13125
|
|
},
|
|
{
|
|
"epoch": 6.050149528410398,
|
|
"grad_norm": 2.9114506244659424,
|
|
"learning_rate": 3.7053125e-06,
|
|
"loss": 0.0137,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 6.0616517138256265,
|
|
"grad_norm": 0.32335150241851807,
|
|
"learning_rate": 3.6975e-06,
|
|
"loss": 0.0077,
|
|
"step": 13175
|
|
},
|
|
{
|
|
"epoch": 6.073153899240856,
|
|
"grad_norm": 0.48962631821632385,
|
|
"learning_rate": 3.6896875000000004e-06,
|
|
"loss": 0.0198,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 6.084656084656085,
|
|
"grad_norm": 0.669650137424469,
|
|
"learning_rate": 3.681875e-06,
|
|
"loss": 0.0121,
|
|
"step": 13225
|
|
},
|
|
{
|
|
"epoch": 6.096158270071314,
|
|
"grad_norm": 3.413156270980835,
|
|
"learning_rate": 3.6740625e-06,
|
|
"loss": 0.0128,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 6.107660455486543,
|
|
"grad_norm": 2.7622175216674805,
|
|
"learning_rate": 3.6662500000000003e-06,
|
|
"loss": 0.0132,
|
|
"step": 13275
|
|
},
|
|
{
|
|
"epoch": 6.119162640901771,
|
|
"grad_norm": 2.454317808151245,
|
|
"learning_rate": 3.6584375000000005e-06,
|
|
"loss": 0.0189,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 6.130664826317,
|
|
"grad_norm": 1.0187458992004395,
|
|
"learning_rate": 3.650625e-06,
|
|
"loss": 0.0099,
|
|
"step": 13325
|
|
},
|
|
{
|
|
"epoch": 6.142167011732229,
|
|
"grad_norm": 1.0523874759674072,
|
|
"learning_rate": 3.6428125e-06,
|
|
"loss": 0.0186,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 6.153669197147458,
|
|
"grad_norm": 2.4889376163482666,
|
|
"learning_rate": 3.6350000000000003e-06,
|
|
"loss": 0.0102,
|
|
"step": 13375
|
|
},
|
|
{
|
|
"epoch": 6.165171382562687,
|
|
"grad_norm": 1.4157731533050537,
|
|
"learning_rate": 3.6275000000000004e-06,
|
|
"loss": 0.0232,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 6.1766735679779154,
|
|
"grad_norm": 2.285750389099121,
|
|
"learning_rate": 3.6196875000000007e-06,
|
|
"loss": 0.0112,
|
|
"step": 13425
|
|
},
|
|
{
|
|
"epoch": 6.1881757533931445,
|
|
"grad_norm": 3.0357449054718018,
|
|
"learning_rate": 3.611875e-06,
|
|
"loss": 0.0151,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 6.199677938808374,
|
|
"grad_norm": 1.9853347539901733,
|
|
"learning_rate": 3.6040625000000003e-06,
|
|
"loss": 0.0119,
|
|
"step": 13475
|
|
},
|
|
{
|
|
"epoch": 6.211180124223603,
|
|
"grad_norm": 3.489882469177246,
|
|
"learning_rate": 3.5962500000000005e-06,
|
|
"loss": 0.0156,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 6.222682309638832,
|
|
"grad_norm": 7.6776299476623535,
|
|
"learning_rate": 3.5884375000000003e-06,
|
|
"loss": 0.0107,
|
|
"step": 13525
|
|
},
|
|
{
|
|
"epoch": 6.23418449505406,
|
|
"grad_norm": 1.0730276107788086,
|
|
"learning_rate": 3.580625e-06,
|
|
"loss": 0.0194,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 6.245686680469289,
|
|
"grad_norm": 41.845558166503906,
|
|
"learning_rate": 3.5728125e-06,
|
|
"loss": 0.0115,
|
|
"step": 13575
|
|
},
|
|
{
|
|
"epoch": 6.257188865884518,
|
|
"grad_norm": 2.866692543029785,
|
|
"learning_rate": 3.565e-06,
|
|
"loss": 0.0159,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 6.268691051299747,
|
|
"grad_norm": 3.9908344745635986,
|
|
"learning_rate": 3.5571875000000004e-06,
|
|
"loss": 0.0099,
|
|
"step": 13625
|
|
},
|
|
{
|
|
"epoch": 6.280193236714976,
|
|
"grad_norm": 2.3821098804473877,
|
|
"learning_rate": 3.549375e-06,
|
|
"loss": 0.0191,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 6.291695422130204,
|
|
"grad_norm": 0.6831459403038025,
|
|
"learning_rate": 3.5415625e-06,
|
|
"loss": 0.0123,
|
|
"step": 13675
|
|
},
|
|
{
|
|
"epoch": 6.3031976075454335,
|
|
"grad_norm": 1.3033053874969482,
|
|
"learning_rate": 3.5337500000000003e-06,
|
|
"loss": 0.0183,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 6.3146997929606625,
|
|
"grad_norm": 0.3791348934173584,
|
|
"learning_rate": 3.5259375000000005e-06,
|
|
"loss": 0.0078,
|
|
"step": 13725
|
|
},
|
|
{
|
|
"epoch": 6.326201978375892,
|
|
"grad_norm": 0.43763110041618347,
|
|
"learning_rate": 3.518125e-06,
|
|
"loss": 0.0131,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 6.337704163791121,
|
|
"grad_norm": 2.630368232727051,
|
|
"learning_rate": 3.5103125e-06,
|
|
"loss": 0.0071,
|
|
"step": 13775
|
|
},
|
|
{
|
|
"epoch": 6.349206349206349,
|
|
"grad_norm": 0.5663381814956665,
|
|
"learning_rate": 3.5025000000000003e-06,
|
|
"loss": 0.0212,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 6.360708534621578,
|
|
"grad_norm": 2.189028739929199,
|
|
"learning_rate": 3.4946875000000006e-06,
|
|
"loss": 0.0137,
|
|
"step": 13825
|
|
},
|
|
{
|
|
"epoch": 6.372210720036807,
|
|
"grad_norm": 1.2615488767623901,
|
|
"learning_rate": 3.486875e-06,
|
|
"loss": 0.0231,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 6.383712905452036,
|
|
"grad_norm": 1.6485449075698853,
|
|
"learning_rate": 3.4790625e-06,
|
|
"loss": 0.0116,
|
|
"step": 13875
|
|
},
|
|
{
|
|
"epoch": 6.395215090867265,
|
|
"grad_norm": 3.103294610977173,
|
|
"learning_rate": 3.47125e-06,
|
|
"loss": 0.0198,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 6.406717276282493,
|
|
"grad_norm": 6.216635704040527,
|
|
"learning_rate": 3.4634375000000002e-06,
|
|
"loss": 0.0137,
|
|
"step": 13925
|
|
},
|
|
{
|
|
"epoch": 6.418219461697722,
|
|
"grad_norm": 3.772575855255127,
|
|
"learning_rate": 3.4556249999999996e-06,
|
|
"loss": 0.0207,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 6.4297216471129515,
|
|
"grad_norm": 0.9796826243400574,
|
|
"learning_rate": 3.4478125e-06,
|
|
"loss": 0.0094,
|
|
"step": 13975
|
|
},
|
|
{
|
|
"epoch": 6.4412238325281805,
|
|
"grad_norm": 4.275996208190918,
|
|
"learning_rate": 3.44e-06,
|
|
"loss": 0.019,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 6.45272601794341,
|
|
"grad_norm": 5.775335311889648,
|
|
"learning_rate": 3.4321875000000003e-06,
|
|
"loss": 0.0099,
|
|
"step": 14025
|
|
},
|
|
{
|
|
"epoch": 6.464228203358638,
|
|
"grad_norm": 2.5531623363494873,
|
|
"learning_rate": 3.4243750000000006e-06,
|
|
"loss": 0.0213,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 6.475730388773867,
|
|
"grad_norm": 0.7260667085647583,
|
|
"learning_rate": 3.4165625e-06,
|
|
"loss": 0.009,
|
|
"step": 14075
|
|
},
|
|
{
|
|
"epoch": 6.487232574189096,
|
|
"grad_norm": 1.1557809114456177,
|
|
"learning_rate": 3.40875e-06,
|
|
"loss": 0.0218,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 6.498734759604325,
|
|
"grad_norm": 1.5365861654281616,
|
|
"learning_rate": 3.4009375000000004e-06,
|
|
"loss": 0.0049,
|
|
"step": 14125
|
|
},
|
|
{
|
|
"epoch": 6.510236945019554,
|
|
"grad_norm": 4.833078861236572,
|
|
"learning_rate": 3.3931250000000007e-06,
|
|
"loss": 0.0196,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 6.521739130434782,
|
|
"grad_norm": 3.0063588619232178,
|
|
"learning_rate": 3.3853125e-06,
|
|
"loss": 0.0102,
|
|
"step": 14175
|
|
},
|
|
{
|
|
"epoch": 6.533241315850011,
|
|
"grad_norm": 3.6960723400115967,
|
|
"learning_rate": 3.3775000000000003e-06,
|
|
"loss": 0.0182,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 6.54474350126524,
|
|
"grad_norm": 0.6360275149345398,
|
|
"learning_rate": 3.3696875e-06,
|
|
"loss": 0.0093,
|
|
"step": 14225
|
|
},
|
|
{
|
|
"epoch": 6.5562456866804695,
|
|
"grad_norm": 2.3226568698883057,
|
|
"learning_rate": 3.3618750000000003e-06,
|
|
"loss": 0.0211,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 6.5677478720956985,
|
|
"grad_norm": 2.793957471847534,
|
|
"learning_rate": 3.3540624999999997e-06,
|
|
"loss": 0.0118,
|
|
"step": 14275
|
|
},
|
|
{
|
|
"epoch": 6.579250057510927,
|
|
"grad_norm": 0.3393898904323578,
|
|
"learning_rate": 3.34625e-06,
|
|
"loss": 0.0115,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 6.590752242926156,
|
|
"grad_norm": 0.8301447033882141,
|
|
"learning_rate": 3.3384375e-06,
|
|
"loss": 0.0069,
|
|
"step": 14325
|
|
},
|
|
{
|
|
"epoch": 6.602254428341385,
|
|
"grad_norm": 0.6139010190963745,
|
|
"learning_rate": 3.3306250000000004e-06,
|
|
"loss": 0.0112,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 6.613756613756614,
|
|
"grad_norm": 0.24870969355106354,
|
|
"learning_rate": 3.3228125e-06,
|
|
"loss": 0.0116,
|
|
"step": 14375
|
|
},
|
|
{
|
|
"epoch": 6.625258799171843,
|
|
"grad_norm": 1.3729124069213867,
|
|
"learning_rate": 3.315e-06,
|
|
"loss": 0.0115,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 6.636760984587071,
|
|
"grad_norm": 2.537313938140869,
|
|
"learning_rate": 3.3071875000000003e-06,
|
|
"loss": 0.0081,
|
|
"step": 14425
|
|
},
|
|
{
|
|
"epoch": 6.6482631700023,
|
|
"grad_norm": 2.8953866958618164,
|
|
"learning_rate": 3.2993750000000005e-06,
|
|
"loss": 0.0152,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 6.659765355417529,
|
|
"grad_norm": 0.47888821363449097,
|
|
"learning_rate": 3.2915625e-06,
|
|
"loss": 0.0098,
|
|
"step": 14475
|
|
},
|
|
{
|
|
"epoch": 6.671267540832758,
|
|
"grad_norm": 0.4152248799800873,
|
|
"learning_rate": 3.28375e-06,
|
|
"loss": 0.0285,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 6.6827697262479875,
|
|
"grad_norm": 0.7531673312187195,
|
|
"learning_rate": 3.2759375000000003e-06,
|
|
"loss": 0.0063,
|
|
"step": 14525
|
|
},
|
|
{
|
|
"epoch": 6.694271911663216,
|
|
"grad_norm": 2.4068517684936523,
|
|
"learning_rate": 3.268125e-06,
|
|
"loss": 0.0213,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 6.705774097078445,
|
|
"grad_norm": 2.0701165199279785,
|
|
"learning_rate": 3.2603125e-06,
|
|
"loss": 0.0096,
|
|
"step": 14575
|
|
},
|
|
{
|
|
"epoch": 6.717276282493674,
|
|
"grad_norm": 3.544454336166382,
|
|
"learning_rate": 3.2525e-06,
|
|
"loss": 0.0234,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 6.728778467908903,
|
|
"grad_norm": 0.8146782517433167,
|
|
"learning_rate": 3.2446875e-06,
|
|
"loss": 0.0066,
|
|
"step": 14625
|
|
},
|
|
{
|
|
"epoch": 6.740280653324132,
|
|
"grad_norm": 2.016157627105713,
|
|
"learning_rate": 3.2368750000000002e-06,
|
|
"loss": 0.0139,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 6.75178283873936,
|
|
"grad_norm": 3.558530807495117,
|
|
"learning_rate": 3.2290625000000005e-06,
|
|
"loss": 0.016,
|
|
"step": 14675
|
|
},
|
|
{
|
|
"epoch": 6.763285024154589,
|
|
"grad_norm": 2.2227861881256104,
|
|
"learning_rate": 3.22125e-06,
|
|
"loss": 0.0197,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 6.774787209569818,
|
|
"grad_norm": 1.3837552070617676,
|
|
"learning_rate": 3.2134375e-06,
|
|
"loss": 0.0096,
|
|
"step": 14725
|
|
},
|
|
{
|
|
"epoch": 6.786289394985047,
|
|
"grad_norm": 7.47805643081665,
|
|
"learning_rate": 3.2056250000000003e-06,
|
|
"loss": 0.0136,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 6.797791580400276,
|
|
"grad_norm": 2.1846354007720947,
|
|
"learning_rate": 3.1978125000000006e-06,
|
|
"loss": 0.0142,
|
|
"step": 14775
|
|
},
|
|
{
|
|
"epoch": 6.809293765815505,
|
|
"grad_norm": 0.4281105101108551,
|
|
"learning_rate": 3.19e-06,
|
|
"loss": 0.0168,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 6.820795951230734,
|
|
"grad_norm": 3.293972969055176,
|
|
"learning_rate": 3.1821875e-06,
|
|
"loss": 0.0125,
|
|
"step": 14825
|
|
},
|
|
{
|
|
"epoch": 6.832298136645963,
|
|
"grad_norm": 1.7543948888778687,
|
|
"learning_rate": 3.1743750000000004e-06,
|
|
"loss": 0.0125,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 6.843800322061192,
|
|
"grad_norm": 0.2645922005176544,
|
|
"learning_rate": 3.1665625000000002e-06,
|
|
"loss": 0.0094,
|
|
"step": 14875
|
|
},
|
|
{
|
|
"epoch": 6.855302507476421,
|
|
"grad_norm": 0.2636635899543762,
|
|
"learning_rate": 3.15875e-06,
|
|
"loss": 0.012,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 6.866804692891649,
|
|
"grad_norm": 7.451193332672119,
|
|
"learning_rate": 3.1509375000000003e-06,
|
|
"loss": 0.0078,
|
|
"step": 14925
|
|
},
|
|
{
|
|
"epoch": 6.878306878306878,
|
|
"grad_norm": 3.3548383712768555,
|
|
"learning_rate": 3.143125e-06,
|
|
"loss": 0.0155,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 6.889809063722107,
|
|
"grad_norm": 4.6555023193359375,
|
|
"learning_rate": 3.1353125000000003e-06,
|
|
"loss": 0.0108,
|
|
"step": 14975
|
|
},
|
|
{
|
|
"epoch": 6.901311249137336,
|
|
"grad_norm": 1.744326114654541,
|
|
"learning_rate": 3.1274999999999997e-06,
|
|
"loss": 0.0246,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 6.901311249137336,
|
|
"eval_loss": 0.20269618928432465,
|
|
"eval_runtime": 5635.5955,
|
|
"eval_samples_per_second": 1.685,
|
|
"eval_steps_per_second": 0.211,
|
|
"eval_wer": 0.09967969250480462,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 6.912813434552565,
|
|
"grad_norm": 0.898334801197052,
|
|
"learning_rate": 3.1196875e-06,
|
|
"loss": 0.0087,
|
|
"step": 15025
|
|
},
|
|
{
|
|
"epoch": 6.9243156199677935,
|
|
"grad_norm": 1.3468067646026611,
|
|
"learning_rate": 3.111875e-06,
|
|
"loss": 0.0212,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 6.935817805383023,
|
|
"grad_norm": 0.397684782743454,
|
|
"learning_rate": 3.1040625e-06,
|
|
"loss": 0.0127,
|
|
"step": 15075
|
|
},
|
|
{
|
|
"epoch": 6.947319990798252,
|
|
"grad_norm": 0.8051169514656067,
|
|
"learning_rate": 3.0962500000000002e-06,
|
|
"loss": 0.0186,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 6.958822176213481,
|
|
"grad_norm": 0.3554774224758148,
|
|
"learning_rate": 3.0884375e-06,
|
|
"loss": 0.0083,
|
|
"step": 15125
|
|
},
|
|
{
|
|
"epoch": 6.97032436162871,
|
|
"grad_norm": 2.511303424835205,
|
|
"learning_rate": 3.0806250000000003e-06,
|
|
"loss": 0.0144,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 6.981826547043938,
|
|
"grad_norm": 0.17072099447250366,
|
|
"learning_rate": 3.0728125e-06,
|
|
"loss": 0.0097,
|
|
"step": 15175
|
|
},
|
|
{
|
|
"epoch": 6.993328732459167,
|
|
"grad_norm": 0.16506649553775787,
|
|
"learning_rate": 3.0650000000000003e-06,
|
|
"loss": 0.0163,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 7.004830917874396,
|
|
"grad_norm": 0.12069711089134216,
|
|
"learning_rate": 3.0571875e-06,
|
|
"loss": 0.0079,
|
|
"step": 15225
|
|
},
|
|
{
|
|
"epoch": 7.016333103289625,
|
|
"grad_norm": 0.12159192562103271,
|
|
"learning_rate": 3.0493750000000003e-06,
|
|
"loss": 0.007,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 7.027835288704854,
|
|
"grad_norm": 0.15364721417427063,
|
|
"learning_rate": 3.0415625e-06,
|
|
"loss": 0.0155,
|
|
"step": 15275
|
|
},
|
|
{
|
|
"epoch": 7.0393374741200825,
|
|
"grad_norm": 0.516640841960907,
|
|
"learning_rate": 3.03375e-06,
|
|
"loss": 0.0092,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 7.0508396595353116,
|
|
"grad_norm": 0.5874799489974976,
|
|
"learning_rate": 3.0259375e-06,
|
|
"loss": 0.0079,
|
|
"step": 15325
|
|
},
|
|
{
|
|
"epoch": 7.062341844950541,
|
|
"grad_norm": 1.2351374626159668,
|
|
"learning_rate": 3.018125e-06,
|
|
"loss": 0.0148,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 7.07384403036577,
|
|
"grad_norm": 0.7071799635887146,
|
|
"learning_rate": 3.0103125000000002e-06,
|
|
"loss": 0.015,
|
|
"step": 15375
|
|
},
|
|
{
|
|
"epoch": 7.085346215780999,
|
|
"grad_norm": 7.619936943054199,
|
|
"learning_rate": 3.0025e-06,
|
|
"loss": 0.0063,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 7.096848401196227,
|
|
"grad_norm": 0.20653395354747772,
|
|
"learning_rate": 2.9946875000000003e-06,
|
|
"loss": 0.0088,
|
|
"step": 15425
|
|
},
|
|
{
|
|
"epoch": 7.108350586611456,
|
|
"grad_norm": 0.3233853280544281,
|
|
"learning_rate": 2.986875e-06,
|
|
"loss": 0.0058,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 7.119852772026685,
|
|
"grad_norm": 1.1703747510910034,
|
|
"learning_rate": 2.9790625000000003e-06,
|
|
"loss": 0.0149,
|
|
"step": 15475
|
|
},
|
|
{
|
|
"epoch": 7.131354957441914,
|
|
"grad_norm": 0.07214687764644623,
|
|
"learning_rate": 2.97125e-06,
|
|
"loss": 0.0041,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 7.142857142857143,
|
|
"grad_norm": 1.454972505569458,
|
|
"learning_rate": 2.9634375000000004e-06,
|
|
"loss": 0.0121,
|
|
"step": 15525
|
|
},
|
|
{
|
|
"epoch": 7.154359328272371,
|
|
"grad_norm": 0.7644615769386292,
|
|
"learning_rate": 2.955625e-06,
|
|
"loss": 0.0086,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 7.1658615136876005,
|
|
"grad_norm": 3.402597188949585,
|
|
"learning_rate": 2.9478125000000004e-06,
|
|
"loss": 0.0095,
|
|
"step": 15575
|
|
},
|
|
{
|
|
"epoch": 7.17736369910283,
|
|
"grad_norm": 3.2097392082214355,
|
|
"learning_rate": 2.9400000000000002e-06,
|
|
"loss": 0.0104,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 7.188865884518059,
|
|
"grad_norm": 1.831574559211731,
|
|
"learning_rate": 2.9325000000000003e-06,
|
|
"loss": 0.0181,
|
|
"step": 15625
|
|
},
|
|
{
|
|
"epoch": 7.200368069933288,
|
|
"grad_norm": 2.661018133163452,
|
|
"learning_rate": 2.9246875e-06,
|
|
"loss": 0.0048,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 7.211870255348516,
|
|
"grad_norm": 1.9798295497894287,
|
|
"learning_rate": 2.9168750000000003e-06,
|
|
"loss": 0.0109,
|
|
"step": 15675
|
|
},
|
|
{
|
|
"epoch": 7.223372440763745,
|
|
"grad_norm": 1.1260945796966553,
|
|
"learning_rate": 2.9090625e-06,
|
|
"loss": 0.0076,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 7.234874626178974,
|
|
"grad_norm": 0.178892120718956,
|
|
"learning_rate": 2.90125e-06,
|
|
"loss": 0.0118,
|
|
"step": 15725
|
|
},
|
|
{
|
|
"epoch": 7.246376811594203,
|
|
"grad_norm": 2.5324740409851074,
|
|
"learning_rate": 2.8934374999999998e-06,
|
|
"loss": 0.0068,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 7.257878997009432,
|
|
"grad_norm": 0.655042290687561,
|
|
"learning_rate": 2.885625e-06,
|
|
"loss": 0.0075,
|
|
"step": 15775
|
|
},
|
|
{
|
|
"epoch": 7.26938118242466,
|
|
"grad_norm": 5.1071624755859375,
|
|
"learning_rate": 2.8778125000000002e-06,
|
|
"loss": 0.0065,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 7.280883367839889,
|
|
"grad_norm": 0.35025542974472046,
|
|
"learning_rate": 2.87e-06,
|
|
"loss": 0.0171,
|
|
"step": 15825
|
|
},
|
|
{
|
|
"epoch": 7.2923855532551185,
|
|
"grad_norm": 0.16820687055587769,
|
|
"learning_rate": 2.8621875000000003e-06,
|
|
"loss": 0.0077,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 7.303887738670348,
|
|
"grad_norm": 1.0210137367248535,
|
|
"learning_rate": 2.854375e-06,
|
|
"loss": 0.0088,
|
|
"step": 15875
|
|
},
|
|
{
|
|
"epoch": 7.315389924085577,
|
|
"grad_norm": 0.3880836069583893,
|
|
"learning_rate": 2.8465625000000003e-06,
|
|
"loss": 0.0155,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 7.326892109500805,
|
|
"grad_norm": 0.7286210060119629,
|
|
"learning_rate": 2.83875e-06,
|
|
"loss": 0.0153,
|
|
"step": 15925
|
|
},
|
|
{
|
|
"epoch": 7.338394294916034,
|
|
"grad_norm": 1.0173991918563843,
|
|
"learning_rate": 2.8309375000000004e-06,
|
|
"loss": 0.0082,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 7.349896480331263,
|
|
"grad_norm": 0.4859057664871216,
|
|
"learning_rate": 2.823125e-06,
|
|
"loss": 0.0104,
|
|
"step": 15975
|
|
},
|
|
{
|
|
"epoch": 7.361398665746492,
|
|
"grad_norm": 0.1217811331152916,
|
|
"learning_rate": 2.8153125000000004e-06,
|
|
"loss": 0.0075,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 7.372900851161721,
|
|
"grad_norm": 1.074440360069275,
|
|
"learning_rate": 2.8075000000000002e-06,
|
|
"loss": 0.0123,
|
|
"step": 16025
|
|
},
|
|
{
|
|
"epoch": 7.384403036576949,
|
|
"grad_norm": 5.824402332305908,
|
|
"learning_rate": 2.7996875e-06,
|
|
"loss": 0.0057,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 7.395905221992178,
|
|
"grad_norm": 2.5496740341186523,
|
|
"learning_rate": 2.791875e-06,
|
|
"loss": 0.0098,
|
|
"step": 16075
|
|
},
|
|
{
|
|
"epoch": 7.407407407407407,
|
|
"grad_norm": 5.917174339294434,
|
|
"learning_rate": 2.7840625e-06,
|
|
"loss": 0.0057,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 7.4189095928226365,
|
|
"grad_norm": 0.3634544909000397,
|
|
"learning_rate": 2.77625e-06,
|
|
"loss": 0.0128,
|
|
"step": 16125
|
|
},
|
|
{
|
|
"epoch": 7.430411778237866,
|
|
"grad_norm": 3.414944887161255,
|
|
"learning_rate": 2.7684375e-06,
|
|
"loss": 0.0086,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 7.441913963653094,
|
|
"grad_norm": 4.660429000854492,
|
|
"learning_rate": 2.760625e-06,
|
|
"loss": 0.0131,
|
|
"step": 16175
|
|
},
|
|
{
|
|
"epoch": 7.453416149068323,
|
|
"grad_norm": 2.309429407119751,
|
|
"learning_rate": 2.7528125e-06,
|
|
"loss": 0.0058,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 7.464918334483552,
|
|
"grad_norm": 5.828795909881592,
|
|
"learning_rate": 2.745e-06,
|
|
"loss": 0.0101,
|
|
"step": 16225
|
|
},
|
|
{
|
|
"epoch": 7.476420519898781,
|
|
"grad_norm": 0.5273516774177551,
|
|
"learning_rate": 2.7371875e-06,
|
|
"loss": 0.0059,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 7.48792270531401,
|
|
"grad_norm": 1.4716130495071411,
|
|
"learning_rate": 2.729375e-06,
|
|
"loss": 0.014,
|
|
"step": 16275
|
|
},
|
|
{
|
|
"epoch": 7.499424890729238,
|
|
"grad_norm": 1.8553239107131958,
|
|
"learning_rate": 2.7215625000000003e-06,
|
|
"loss": 0.0068,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 7.510927076144467,
|
|
"grad_norm": 2.6758711338043213,
|
|
"learning_rate": 2.71375e-06,
|
|
"loss": 0.0112,
|
|
"step": 16325
|
|
},
|
|
{
|
|
"epoch": 7.522429261559696,
|
|
"grad_norm": 0.19957537949085236,
|
|
"learning_rate": 2.7059375000000003e-06,
|
|
"loss": 0.0095,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 7.5339314469749254,
|
|
"grad_norm": 5.1007161140441895,
|
|
"learning_rate": 2.698125e-06,
|
|
"loss": 0.0109,
|
|
"step": 16375
|
|
},
|
|
{
|
|
"epoch": 7.545433632390154,
|
|
"grad_norm": 8.458159446716309,
|
|
"learning_rate": 2.6903125e-06,
|
|
"loss": 0.0071,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 7.556935817805383,
|
|
"grad_norm": 0.6693940758705139,
|
|
"learning_rate": 2.6825e-06,
|
|
"loss": 0.0187,
|
|
"step": 16425
|
|
},
|
|
{
|
|
"epoch": 7.568438003220612,
|
|
"grad_norm": 1.4511332511901855,
|
|
"learning_rate": 2.6746875e-06,
|
|
"loss": 0.0079,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 7.579940188635841,
|
|
"grad_norm": 3.054114818572998,
|
|
"learning_rate": 2.666875e-06,
|
|
"loss": 0.0185,
|
|
"step": 16475
|
|
},
|
|
{
|
|
"epoch": 7.59144237405107,
|
|
"grad_norm": 6.097264766693115,
|
|
"learning_rate": 2.6590625e-06,
|
|
"loss": 0.01,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 7.602944559466298,
|
|
"grad_norm": 1.9860618114471436,
|
|
"learning_rate": 2.6512500000000002e-06,
|
|
"loss": 0.0188,
|
|
"step": 16525
|
|
},
|
|
{
|
|
"epoch": 7.614446744881527,
|
|
"grad_norm": 0.1648847460746765,
|
|
"learning_rate": 2.6434375e-06,
|
|
"loss": 0.0114,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 7.625948930296756,
|
|
"grad_norm": 2.5525155067443848,
|
|
"learning_rate": 2.6356250000000003e-06,
|
|
"loss": 0.0158,
|
|
"step": 16575
|
|
},
|
|
{
|
|
"epoch": 7.637451115711985,
|
|
"grad_norm": 2.294396162033081,
|
|
"learning_rate": 2.6278125e-06,
|
|
"loss": 0.0093,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 7.648953301127214,
|
|
"grad_norm": 3.0494441986083984,
|
|
"learning_rate": 2.6200000000000003e-06,
|
|
"loss": 0.0233,
|
|
"step": 16625
|
|
},
|
|
{
|
|
"epoch": 7.660455486542443,
|
|
"grad_norm": 0.1415322870016098,
|
|
"learning_rate": 2.6121875e-06,
|
|
"loss": 0.0087,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 7.671957671957672,
|
|
"grad_norm": 2.2960498332977295,
|
|
"learning_rate": 2.6043750000000004e-06,
|
|
"loss": 0.0139,
|
|
"step": 16675
|
|
},
|
|
{
|
|
"epoch": 7.683459857372901,
|
|
"grad_norm": 0.5837172269821167,
|
|
"learning_rate": 2.5965625e-06,
|
|
"loss": 0.0055,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 7.69496204278813,
|
|
"grad_norm": 1.8971115350723267,
|
|
"learning_rate": 2.5887500000000004e-06,
|
|
"loss": 0.0115,
|
|
"step": 16725
|
|
},
|
|
{
|
|
"epoch": 7.706464228203359,
|
|
"grad_norm": 4.95072603225708,
|
|
"learning_rate": 2.5809375000000002e-06,
|
|
"loss": 0.0077,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 7.717966413618587,
|
|
"grad_norm": 2.0752921104431152,
|
|
"learning_rate": 2.573125e-06,
|
|
"loss": 0.0132,
|
|
"step": 16775
|
|
},
|
|
{
|
|
"epoch": 7.729468599033816,
|
|
"grad_norm": 0.07725714892148972,
|
|
"learning_rate": 2.5653125e-06,
|
|
"loss": 0.0089,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 7.740970784449045,
|
|
"grad_norm": 0.4947813153266907,
|
|
"learning_rate": 2.5575e-06,
|
|
"loss": 0.0089,
|
|
"step": 16825
|
|
},
|
|
{
|
|
"epoch": 7.752472969864274,
|
|
"grad_norm": 3.427701234817505,
|
|
"learning_rate": 2.5496875e-06,
|
|
"loss": 0.0087,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 7.763975155279503,
|
|
"grad_norm": 2.016444683074951,
|
|
"learning_rate": 2.541875e-06,
|
|
"loss": 0.0099,
|
|
"step": 16875
|
|
},
|
|
{
|
|
"epoch": 7.7754773406947315,
|
|
"grad_norm": 0.25170424580574036,
|
|
"learning_rate": 2.5340625e-06,
|
|
"loss": 0.007,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 7.786979526109961,
|
|
"grad_norm": 0.6936759948730469,
|
|
"learning_rate": 2.52625e-06,
|
|
"loss": 0.0069,
|
|
"step": 16925
|
|
},
|
|
{
|
|
"epoch": 7.79848171152519,
|
|
"grad_norm": 1.1857188940048218,
|
|
"learning_rate": 2.5184375e-06,
|
|
"loss": 0.008,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 7.809983896940419,
|
|
"grad_norm": 3.1954314708709717,
|
|
"learning_rate": 2.510625e-06,
|
|
"loss": 0.0147,
|
|
"step": 16975
|
|
},
|
|
{
|
|
"epoch": 7.821486082355648,
|
|
"grad_norm": 1.4031758308410645,
|
|
"learning_rate": 2.5028125e-06,
|
|
"loss": 0.0081,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 7.832988267770876,
|
|
"grad_norm": 0.52959805727005,
|
|
"learning_rate": 2.4950000000000003e-06,
|
|
"loss": 0.0123,
|
|
"step": 17025
|
|
},
|
|
{
|
|
"epoch": 7.844490453186105,
|
|
"grad_norm": 4.232771396636963,
|
|
"learning_rate": 2.4871875000000005e-06,
|
|
"loss": 0.0054,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 7.855992638601334,
|
|
"grad_norm": 0.2118764966726303,
|
|
"learning_rate": 2.4793750000000003e-06,
|
|
"loss": 0.0149,
|
|
"step": 17075
|
|
},
|
|
{
|
|
"epoch": 7.867494824016563,
|
|
"grad_norm": 0.4932823181152344,
|
|
"learning_rate": 2.4715625e-06,
|
|
"loss": 0.0098,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 7.878997009431792,
|
|
"grad_norm": 3.5741822719573975,
|
|
"learning_rate": 2.46375e-06,
|
|
"loss": 0.0128,
|
|
"step": 17125
|
|
},
|
|
{
|
|
"epoch": 7.8904991948470204,
|
|
"grad_norm": 1.6678423881530762,
|
|
"learning_rate": 2.4559375e-06,
|
|
"loss": 0.0071,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 7.9020013802622495,
|
|
"grad_norm": 3.0652177333831787,
|
|
"learning_rate": 2.448125e-06,
|
|
"loss": 0.0136,
|
|
"step": 17175
|
|
},
|
|
{
|
|
"epoch": 7.913503565677479,
|
|
"grad_norm": 1.4408318996429443,
|
|
"learning_rate": 2.4403125e-06,
|
|
"loss": 0.0045,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 7.925005751092708,
|
|
"grad_norm": 0.2646098732948303,
|
|
"learning_rate": 2.4325e-06,
|
|
"loss": 0.0155,
|
|
"step": 17225
|
|
},
|
|
{
|
|
"epoch": 7.936507936507937,
|
|
"grad_norm": 2.7373244762420654,
|
|
"learning_rate": 2.4246875000000002e-06,
|
|
"loss": 0.0067,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 7.948010121923165,
|
|
"grad_norm": 6.320342540740967,
|
|
"learning_rate": 2.416875e-06,
|
|
"loss": 0.0095,
|
|
"step": 17275
|
|
},
|
|
{
|
|
"epoch": 7.959512307338394,
|
|
"grad_norm": 0.8445401191711426,
|
|
"learning_rate": 2.4090625000000003e-06,
|
|
"loss": 0.0061,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 7.971014492753623,
|
|
"grad_norm": 1.2060354948043823,
|
|
"learning_rate": 2.40125e-06,
|
|
"loss": 0.0203,
|
|
"step": 17325
|
|
},
|
|
{
|
|
"epoch": 7.982516678168852,
|
|
"grad_norm": 1.851037621498108,
|
|
"learning_rate": 2.3934375000000003e-06,
|
|
"loss": 0.007,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 7.994018863584081,
|
|
"grad_norm": 0.901386022567749,
|
|
"learning_rate": 2.385625e-06,
|
|
"loss": 0.0088,
|
|
"step": 17375
|
|
},
|
|
{
|
|
"epoch": 8.00552104899931,
|
|
"grad_norm": 0.07420093566179276,
|
|
"learning_rate": 2.3778125000000004e-06,
|
|
"loss": 0.013,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 8.01702323441454,
|
|
"grad_norm": 0.05671022832393646,
|
|
"learning_rate": 2.37e-06,
|
|
"loss": 0.007,
|
|
"step": 17425
|
|
},
|
|
{
|
|
"epoch": 8.028525419829768,
|
|
"grad_norm": 0.5261373519897461,
|
|
"learning_rate": 2.3621875e-06,
|
|
"loss": 0.0071,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 8.040027605244996,
|
|
"grad_norm": 0.4346860349178314,
|
|
"learning_rate": 2.354375e-06,
|
|
"loss": 0.0067,
|
|
"step": 17475
|
|
},
|
|
{
|
|
"epoch": 8.051529790660226,
|
|
"grad_norm": 0.08124396204948425,
|
|
"learning_rate": 2.3465625e-06,
|
|
"loss": 0.0113,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 8.063031976075454,
|
|
"grad_norm": 2.8863022327423096,
|
|
"learning_rate": 2.33875e-06,
|
|
"loss": 0.0096,
|
|
"step": 17525
|
|
},
|
|
{
|
|
"epoch": 8.074534161490684,
|
|
"grad_norm": 0.4603404700756073,
|
|
"learning_rate": 2.3309375e-06,
|
|
"loss": 0.0111,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 8.086036346905912,
|
|
"grad_norm": 2.8856966495513916,
|
|
"learning_rate": 2.323125e-06,
|
|
"loss": 0.0059,
|
|
"step": 17575
|
|
},
|
|
{
|
|
"epoch": 8.09753853232114,
|
|
"grad_norm": 0.10746220499277115,
|
|
"learning_rate": 2.3153125e-06,
|
|
"loss": 0.0095,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 8.10904071773637,
|
|
"grad_norm": 0.5317927002906799,
|
|
"learning_rate": 2.3075e-06,
|
|
"loss": 0.006,
|
|
"step": 17625
|
|
},
|
|
{
|
|
"epoch": 8.120542903151598,
|
|
"grad_norm": 0.14040638506412506,
|
|
"learning_rate": 2.2996875e-06,
|
|
"loss": 0.0177,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 8.132045088566828,
|
|
"grad_norm": 0.5447073578834534,
|
|
"learning_rate": 2.2918750000000004e-06,
|
|
"loss": 0.0088,
|
|
"step": 17675
|
|
},
|
|
{
|
|
"epoch": 8.143547273982056,
|
|
"grad_norm": 2.9328360557556152,
|
|
"learning_rate": 2.2840625e-06,
|
|
"loss": 0.018,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 8.155049459397285,
|
|
"grad_norm": 0.19200760126113892,
|
|
"learning_rate": 2.2762500000000004e-06,
|
|
"loss": 0.0087,
|
|
"step": 17725
|
|
},
|
|
{
|
|
"epoch": 8.166551644812515,
|
|
"grad_norm": 0.07997579872608185,
|
|
"learning_rate": 2.2684375000000003e-06,
|
|
"loss": 0.0087,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 8.178053830227743,
|
|
"grad_norm": 2.795677900314331,
|
|
"learning_rate": 2.260625e-06,
|
|
"loss": 0.0095,
|
|
"step": 17775
|
|
},
|
|
{
|
|
"epoch": 8.189556015642973,
|
|
"grad_norm": 0.10675506293773651,
|
|
"learning_rate": 2.2528125e-06,
|
|
"loss": 0.0103,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 8.201058201058201,
|
|
"grad_norm": 1.484521746635437,
|
|
"learning_rate": 2.245e-06,
|
|
"loss": 0.0077,
|
|
"step": 17825
|
|
},
|
|
{
|
|
"epoch": 8.21256038647343,
|
|
"grad_norm": 1.530125379562378,
|
|
"learning_rate": 2.2371875e-06,
|
|
"loss": 0.0136,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 8.224062571888659,
|
|
"grad_norm": 8.444382667541504,
|
|
"learning_rate": 2.229375e-06,
|
|
"loss": 0.0061,
|
|
"step": 17875
|
|
},
|
|
{
|
|
"epoch": 8.235564757303887,
|
|
"grad_norm": 0.18373289704322815,
|
|
"learning_rate": 2.2215625e-06,
|
|
"loss": 0.0103,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 8.247066942719117,
|
|
"grad_norm": 8.391464233398438,
|
|
"learning_rate": 2.21375e-06,
|
|
"loss": 0.007,
|
|
"step": 17925
|
|
},
|
|
{
|
|
"epoch": 8.258569128134345,
|
|
"grad_norm": 0.1739552766084671,
|
|
"learning_rate": 2.2059375e-06,
|
|
"loss": 0.0041,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 8.270071313549574,
|
|
"grad_norm": 2.3926053047180176,
|
|
"learning_rate": 2.1981250000000002e-06,
|
|
"loss": 0.0049,
|
|
"step": 17975
|
|
},
|
|
{
|
|
"epoch": 8.281573498964804,
|
|
"grad_norm": 0.11882667243480682,
|
|
"learning_rate": 2.1903125e-06,
|
|
"loss": 0.0137,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 8.293075684380032,
|
|
"grad_norm": 0.7215703129768372,
|
|
"learning_rate": 2.1825000000000003e-06,
|
|
"loss": 0.0125,
|
|
"step": 18025
|
|
},
|
|
{
|
|
"epoch": 8.304577869795262,
|
|
"grad_norm": 0.07454714179039001,
|
|
"learning_rate": 2.1746875e-06,
|
|
"loss": 0.0048,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 8.31608005521049,
|
|
"grad_norm": 0.11876709014177322,
|
|
"learning_rate": 2.1668750000000003e-06,
|
|
"loss": 0.0083,
|
|
"step": 18075
|
|
},
|
|
{
|
|
"epoch": 8.327582240625718,
|
|
"grad_norm": 2.6528899669647217,
|
|
"learning_rate": 2.1590625e-06,
|
|
"loss": 0.0211,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 8.339084426040948,
|
|
"grad_norm": 0.046709995716810226,
|
|
"learning_rate": 2.15125e-06,
|
|
"loss": 0.0039,
|
|
"step": 18125
|
|
},
|
|
{
|
|
"epoch": 8.350586611456176,
|
|
"grad_norm": 0.5029109716415405,
|
|
"learning_rate": 2.1434374999999998e-06,
|
|
"loss": 0.0137,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 8.362088796871406,
|
|
"grad_norm": 8.609116554260254,
|
|
"learning_rate": 2.135625e-06,
|
|
"loss": 0.0081,
|
|
"step": 18175
|
|
},
|
|
{
|
|
"epoch": 8.373590982286634,
|
|
"grad_norm": 0.24618186056613922,
|
|
"learning_rate": 2.1278125e-06,
|
|
"loss": 0.0127,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 8.385093167701863,
|
|
"grad_norm": 0.26581060886383057,
|
|
"learning_rate": 2.12e-06,
|
|
"loss": 0.0058,
|
|
"step": 18225
|
|
},
|
|
{
|
|
"epoch": 8.396595353117092,
|
|
"grad_norm": 1.805894374847412,
|
|
"learning_rate": 2.1125e-06,
|
|
"loss": 0.0067,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 8.40809753853232,
|
|
"grad_norm": 0.11837717145681381,
|
|
"learning_rate": 2.1046875e-06,
|
|
"loss": 0.0085,
|
|
"step": 18275
|
|
},
|
|
{
|
|
"epoch": 8.41959972394755,
|
|
"grad_norm": 1.3716498613357544,
|
|
"learning_rate": 2.096875e-06,
|
|
"loss": 0.01,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 8.431101909362779,
|
|
"grad_norm": 0.13939572870731354,
|
|
"learning_rate": 2.0890625e-06,
|
|
"loss": 0.0042,
|
|
"step": 18325
|
|
},
|
|
{
|
|
"epoch": 8.442604094778007,
|
|
"grad_norm": 0.18078701198101044,
|
|
"learning_rate": 2.08125e-06,
|
|
"loss": 0.0062,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 8.454106280193237,
|
|
"grad_norm": 0.35175594687461853,
|
|
"learning_rate": 2.0734375e-06,
|
|
"loss": 0.0032,
|
|
"step": 18375
|
|
},
|
|
{
|
|
"epoch": 8.465608465608465,
|
|
"grad_norm": 1.0184096097946167,
|
|
"learning_rate": 2.0656250000000002e-06,
|
|
"loss": 0.0135,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 8.477110651023695,
|
|
"grad_norm": 0.35224124789237976,
|
|
"learning_rate": 2.0578125e-06,
|
|
"loss": 0.0061,
|
|
"step": 18425
|
|
},
|
|
{
|
|
"epoch": 8.488612836438923,
|
|
"grad_norm": 4.74711275100708,
|
|
"learning_rate": 2.0500000000000003e-06,
|
|
"loss": 0.0144,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 8.500115021854151,
|
|
"grad_norm": 0.630029022693634,
|
|
"learning_rate": 2.0421875e-06,
|
|
"loss": 0.0049,
|
|
"step": 18475
|
|
},
|
|
{
|
|
"epoch": 8.511617207269381,
|
|
"grad_norm": 3.0329389572143555,
|
|
"learning_rate": 2.0343750000000003e-06,
|
|
"loss": 0.0107,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 8.52311939268461,
|
|
"grad_norm": 1.9780049324035645,
|
|
"learning_rate": 2.0265625e-06,
|
|
"loss": 0.0048,
|
|
"step": 18525
|
|
},
|
|
{
|
|
"epoch": 8.53462157809984,
|
|
"grad_norm": 2.6098361015319824,
|
|
"learning_rate": 2.0187500000000004e-06,
|
|
"loss": 0.0155,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 8.546123763515068,
|
|
"grad_norm": 0.2777227461338043,
|
|
"learning_rate": 2.0109375e-06,
|
|
"loss": 0.0056,
|
|
"step": 18575
|
|
},
|
|
{
|
|
"epoch": 8.557625948930296,
|
|
"grad_norm": 2.4276092052459717,
|
|
"learning_rate": 2.003125e-06,
|
|
"loss": 0.0126,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 8.569128134345526,
|
|
"grad_norm": 0.2835908532142639,
|
|
"learning_rate": 1.9953125e-06,
|
|
"loss": 0.0059,
|
|
"step": 18625
|
|
},
|
|
{
|
|
"epoch": 8.580630319760754,
|
|
"grad_norm": 2.6612679958343506,
|
|
"learning_rate": 1.9875e-06,
|
|
"loss": 0.0107,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 8.592132505175984,
|
|
"grad_norm": 1.2350229024887085,
|
|
"learning_rate": 1.9796875e-06,
|
|
"loss": 0.0093,
|
|
"step": 18675
|
|
},
|
|
{
|
|
"epoch": 8.603634690591212,
|
|
"grad_norm": 0.15607894957065582,
|
|
"learning_rate": 1.971875e-06,
|
|
"loss": 0.0107,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 8.61513687600644,
|
|
"grad_norm": 0.07845500856637955,
|
|
"learning_rate": 1.9640625e-06,
|
|
"loss": 0.0073,
|
|
"step": 18725
|
|
},
|
|
{
|
|
"epoch": 8.62663906142167,
|
|
"grad_norm": 1.0883415937423706,
|
|
"learning_rate": 1.95625e-06,
|
|
"loss": 0.0118,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 8.638141246836899,
|
|
"grad_norm": 0.17504793405532837,
|
|
"learning_rate": 1.9484375000000004e-06,
|
|
"loss": 0.0067,
|
|
"step": 18775
|
|
},
|
|
{
|
|
"epoch": 8.649643432252129,
|
|
"grad_norm": 1.5559245347976685,
|
|
"learning_rate": 1.940625e-06,
|
|
"loss": 0.0167,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 8.661145617667357,
|
|
"grad_norm": 0.06407686322927475,
|
|
"learning_rate": 1.9328125000000004e-06,
|
|
"loss": 0.0036,
|
|
"step": 18825
|
|
},
|
|
{
|
|
"epoch": 8.672647803082585,
|
|
"grad_norm": 1.3386480808258057,
|
|
"learning_rate": 1.925e-06,
|
|
"loss": 0.011,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 8.684149988497815,
|
|
"grad_norm": 0.0743364468216896,
|
|
"learning_rate": 1.9171875000000004e-06,
|
|
"loss": 0.0081,
|
|
"step": 18875
|
|
},
|
|
{
|
|
"epoch": 8.695652173913043,
|
|
"grad_norm": 2.949956178665161,
|
|
"learning_rate": 1.9093750000000002e-06,
|
|
"loss": 0.0072,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 8.707154359328273,
|
|
"grad_norm": 0.7283833026885986,
|
|
"learning_rate": 1.9015625000000003e-06,
|
|
"loss": 0.0049,
|
|
"step": 18925
|
|
},
|
|
{
|
|
"epoch": 8.718656544743501,
|
|
"grad_norm": 0.5794746279716492,
|
|
"learning_rate": 1.89375e-06,
|
|
"loss": 0.0101,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 8.73015873015873,
|
|
"grad_norm": 1.550223708152771,
|
|
"learning_rate": 1.8859375e-06,
|
|
"loss": 0.0068,
|
|
"step": 18975
|
|
},
|
|
{
|
|
"epoch": 8.74166091557396,
|
|
"grad_norm": 0.12086187303066254,
|
|
"learning_rate": 1.8781250000000001e-06,
|
|
"loss": 0.0096,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 8.753163100989187,
|
|
"grad_norm": 2.682513475418091,
|
|
"learning_rate": 1.8703125000000001e-06,
|
|
"loss": 0.0048,
|
|
"step": 19025
|
|
},
|
|
{
|
|
"epoch": 8.764665286404417,
|
|
"grad_norm": 0.8820897340774536,
|
|
"learning_rate": 1.8625e-06,
|
|
"loss": 0.0067,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 8.776167471819646,
|
|
"grad_norm": 1.6323509216308594,
|
|
"learning_rate": 1.8546875000000002e-06,
|
|
"loss": 0.0066,
|
|
"step": 19075
|
|
},
|
|
{
|
|
"epoch": 8.787669657234874,
|
|
"grad_norm": 0.3018206059932709,
|
|
"learning_rate": 1.846875e-06,
|
|
"loss": 0.0123,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 8.799171842650104,
|
|
"grad_norm": 0.10852475464344025,
|
|
"learning_rate": 1.8390625000000002e-06,
|
|
"loss": 0.0059,
|
|
"step": 19125
|
|
},
|
|
{
|
|
"epoch": 8.810674028065332,
|
|
"grad_norm": 3.6190342903137207,
|
|
"learning_rate": 1.83125e-06,
|
|
"loss": 0.01,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 8.822176213480562,
|
|
"grad_norm": 4.896969795227051,
|
|
"learning_rate": 1.8234375e-06,
|
|
"loss": 0.007,
|
|
"step": 19175
|
|
},
|
|
{
|
|
"epoch": 8.83367839889579,
|
|
"grad_norm": 0.1353834569454193,
|
|
"learning_rate": 1.8156249999999999e-06,
|
|
"loss": 0.0082,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 8.845180584311018,
|
|
"grad_norm": 0.18897338211536407,
|
|
"learning_rate": 1.8078125000000001e-06,
|
|
"loss": 0.0034,
|
|
"step": 19225
|
|
},
|
|
{
|
|
"epoch": 8.856682769726248,
|
|
"grad_norm": 1.5457974672317505,
|
|
"learning_rate": 1.8e-06,
|
|
"loss": 0.0066,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 8.868184955141476,
|
|
"grad_norm": 0.9404191374778748,
|
|
"learning_rate": 1.7921875000000002e-06,
|
|
"loss": 0.0096,
|
|
"step": 19275
|
|
},
|
|
{
|
|
"epoch": 8.879687140556706,
|
|
"grad_norm": 0.1098804920911789,
|
|
"learning_rate": 1.784375e-06,
|
|
"loss": 0.0073,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 8.891189325971935,
|
|
"grad_norm": 1.549627661705017,
|
|
"learning_rate": 1.7765625000000002e-06,
|
|
"loss": 0.0043,
|
|
"step": 19325
|
|
},
|
|
{
|
|
"epoch": 8.902691511387163,
|
|
"grad_norm": 0.5851069092750549,
|
|
"learning_rate": 1.76875e-06,
|
|
"loss": 0.007,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 8.914193696802393,
|
|
"grad_norm": 0.2933562994003296,
|
|
"learning_rate": 1.7609375e-06,
|
|
"loss": 0.0059,
|
|
"step": 19375
|
|
},
|
|
{
|
|
"epoch": 8.925695882217621,
|
|
"grad_norm": 0.9020884037017822,
|
|
"learning_rate": 1.7531250000000003e-06,
|
|
"loss": 0.0108,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 8.93719806763285,
|
|
"grad_norm": 0.11604174226522446,
|
|
"learning_rate": 1.7453125e-06,
|
|
"loss": 0.0078,
|
|
"step": 19425
|
|
},
|
|
{
|
|
"epoch": 8.948700253048079,
|
|
"grad_norm": 0.2992897033691406,
|
|
"learning_rate": 1.7375000000000003e-06,
|
|
"loss": 0.0159,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 8.960202438463307,
|
|
"grad_norm": 6.1458306312561035,
|
|
"learning_rate": 1.7296875000000001e-06,
|
|
"loss": 0.0083,
|
|
"step": 19475
|
|
},
|
|
{
|
|
"epoch": 8.971704623878537,
|
|
"grad_norm": 0.6744798421859741,
|
|
"learning_rate": 1.7218750000000001e-06,
|
|
"loss": 0.0126,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 8.983206809293765,
|
|
"grad_norm": 0.13921253383159637,
|
|
"learning_rate": 1.7140625e-06,
|
|
"loss": 0.007,
|
|
"step": 19525
|
|
},
|
|
{
|
|
"epoch": 8.994708994708995,
|
|
"grad_norm": 0.5519439578056335,
|
|
"learning_rate": 1.7062500000000002e-06,
|
|
"loss": 0.0081,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 9.006211180124224,
|
|
"grad_norm": 0.20477654039859772,
|
|
"learning_rate": 1.6984375e-06,
|
|
"loss": 0.0056,
|
|
"step": 19575
|
|
},
|
|
{
|
|
"epoch": 9.017713365539452,
|
|
"grad_norm": 0.7848691940307617,
|
|
"learning_rate": 1.6906250000000002e-06,
|
|
"loss": 0.0097,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 9.029215550954682,
|
|
"grad_norm": 0.11725778132677078,
|
|
"learning_rate": 1.6828125e-06,
|
|
"loss": 0.0098,
|
|
"step": 19625
|
|
},
|
|
{
|
|
"epoch": 9.04071773636991,
|
|
"grad_norm": 0.863402247428894,
|
|
"learning_rate": 1.6750000000000003e-06,
|
|
"loss": 0.0066,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 9.05221992178514,
|
|
"grad_norm": 0.6256903409957886,
|
|
"learning_rate": 1.6671875e-06,
|
|
"loss": 0.0051,
|
|
"step": 19675
|
|
},
|
|
{
|
|
"epoch": 9.063722107200368,
|
|
"grad_norm": 0.27045947313308716,
|
|
"learning_rate": 1.659375e-06,
|
|
"loss": 0.0062,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 9.075224292615596,
|
|
"grad_norm": 1.267508864402771,
|
|
"learning_rate": 1.6515625e-06,
|
|
"loss": 0.0096,
|
|
"step": 19725
|
|
},
|
|
{
|
|
"epoch": 9.086726478030826,
|
|
"grad_norm": 0.2194228619337082,
|
|
"learning_rate": 1.6437500000000001e-06,
|
|
"loss": 0.0073,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 9.098228663446054,
|
|
"grad_norm": 0.7281507849693298,
|
|
"learning_rate": 1.6359375e-06,
|
|
"loss": 0.0109,
|
|
"step": 19775
|
|
},
|
|
{
|
|
"epoch": 9.109730848861284,
|
|
"grad_norm": 0.041582778096199036,
|
|
"learning_rate": 1.6281250000000002e-06,
|
|
"loss": 0.0053,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 9.121233034276512,
|
|
"grad_norm": 1.166810393333435,
|
|
"learning_rate": 1.6203125e-06,
|
|
"loss": 0.005,
|
|
"step": 19825
|
|
},
|
|
{
|
|
"epoch": 9.13273521969174,
|
|
"grad_norm": 1.6084290742874146,
|
|
"learning_rate": 1.6125e-06,
|
|
"loss": 0.0039,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 9.14423740510697,
|
|
"grad_norm": 0.07307042181491852,
|
|
"learning_rate": 1.6046875e-06,
|
|
"loss": 0.0078,
|
|
"step": 19875
|
|
},
|
|
{
|
|
"epoch": 9.155739590522199,
|
|
"grad_norm": 0.04875970631837845,
|
|
"learning_rate": 1.596875e-06,
|
|
"loss": 0.0052,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 9.167241775937429,
|
|
"grad_norm": 0.23526020348072052,
|
|
"learning_rate": 1.5890624999999999e-06,
|
|
"loss": 0.0129,
|
|
"step": 19925
|
|
},
|
|
{
|
|
"epoch": 9.178743961352657,
|
|
"grad_norm": 0.24947020411491394,
|
|
"learning_rate": 1.5812500000000001e-06,
|
|
"loss": 0.0061,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 9.190246146767885,
|
|
"grad_norm": 0.8459863066673279,
|
|
"learning_rate": 1.5734375e-06,
|
|
"loss": 0.008,
|
|
"step": 19975
|
|
},
|
|
{
|
|
"epoch": 9.201748332183115,
|
|
"grad_norm": 0.057964421808719635,
|
|
"learning_rate": 1.5656250000000002e-06,
|
|
"loss": 0.0072,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 9.201748332183115,
|
|
"eval_loss": 0.21524043381214142,
|
|
"eval_runtime": 5582.0879,
|
|
"eval_samples_per_second": 1.701,
|
|
"eval_steps_per_second": 0.213,
|
|
"eval_wer": 0.0967008327994875,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 9.213250517598343,
|
|
"grad_norm": 0.24494586884975433,
|
|
"learning_rate": 1.5578125000000002e-06,
|
|
"loss": 0.0123,
|
|
"step": 20025
|
|
},
|
|
{
|
|
"epoch": 9.224752703013573,
|
|
"grad_norm": 0.16685990989208221,
|
|
"learning_rate": 1.55e-06,
|
|
"loss": 0.0032,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 9.236254888428801,
|
|
"grad_norm": 1.2150516510009766,
|
|
"learning_rate": 1.5421875e-06,
|
|
"loss": 0.0076,
|
|
"step": 20075
|
|
},
|
|
{
|
|
"epoch": 9.24775707384403,
|
|
"grad_norm": 3.1026265621185303,
|
|
"learning_rate": 1.534375e-06,
|
|
"loss": 0.003,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 9.25925925925926,
|
|
"grad_norm": 1.430039882659912,
|
|
"learning_rate": 1.5265625e-06,
|
|
"loss": 0.0064,
|
|
"step": 20125
|
|
},
|
|
{
|
|
"epoch": 9.270761444674488,
|
|
"grad_norm": 0.9484136700630188,
|
|
"learning_rate": 1.51875e-06,
|
|
"loss": 0.0067,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 9.282263630089718,
|
|
"grad_norm": 2.5765624046325684,
|
|
"learning_rate": 1.5109375e-06,
|
|
"loss": 0.0079,
|
|
"step": 20175
|
|
},
|
|
{
|
|
"epoch": 9.293765815504946,
|
|
"grad_norm": 0.821998119354248,
|
|
"learning_rate": 1.5031250000000001e-06,
|
|
"loss": 0.0047,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 9.305268000920174,
|
|
"grad_norm": 0.3511562645435333,
|
|
"learning_rate": 1.4953125e-06,
|
|
"loss": 0.0103,
|
|
"step": 20225
|
|
},
|
|
{
|
|
"epoch": 9.316770186335404,
|
|
"grad_norm": 0.04969533905386925,
|
|
"learning_rate": 1.4875e-06,
|
|
"loss": 0.0034,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 9.328272371750632,
|
|
"grad_norm": 0.08082418143749237,
|
|
"learning_rate": 1.4796875e-06,
|
|
"loss": 0.0072,
|
|
"step": 20275
|
|
},
|
|
{
|
|
"epoch": 9.339774557165862,
|
|
"grad_norm": 0.06818880885839462,
|
|
"learning_rate": 1.471875e-06,
|
|
"loss": 0.0067,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 9.35127674258109,
|
|
"grad_norm": 1.4060240983963013,
|
|
"learning_rate": 1.4640625000000002e-06,
|
|
"loss": 0.0071,
|
|
"step": 20325
|
|
},
|
|
{
|
|
"epoch": 9.362778927996319,
|
|
"grad_norm": 0.20024247467517853,
|
|
"learning_rate": 1.4562500000000002e-06,
|
|
"loss": 0.0044,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 9.374281113411548,
|
|
"grad_norm": 1.157453179359436,
|
|
"learning_rate": 1.4484375e-06,
|
|
"loss": 0.0094,
|
|
"step": 20375
|
|
},
|
|
{
|
|
"epoch": 9.385783298826777,
|
|
"grad_norm": 3.1247355937957764,
|
|
"learning_rate": 1.440625e-06,
|
|
"loss": 0.0088,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 9.397285484242007,
|
|
"grad_norm": 0.12344136089086533,
|
|
"learning_rate": 1.4328125e-06,
|
|
"loss": 0.0114,
|
|
"step": 20425
|
|
},
|
|
{
|
|
"epoch": 9.408787669657235,
|
|
"grad_norm": 0.16506707668304443,
|
|
"learning_rate": 1.4250000000000001e-06,
|
|
"loss": 0.007,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 9.420289855072463,
|
|
"grad_norm": 0.12009504437446594,
|
|
"learning_rate": 1.4171875000000001e-06,
|
|
"loss": 0.0062,
|
|
"step": 20475
|
|
},
|
|
{
|
|
"epoch": 9.431792040487693,
|
|
"grad_norm": 0.33941954374313354,
|
|
"learning_rate": 1.4093750000000002e-06,
|
|
"loss": 0.0049,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 9.443294225902921,
|
|
"grad_norm": 2.774742603302002,
|
|
"learning_rate": 1.4015625000000002e-06,
|
|
"loss": 0.015,
|
|
"step": 20525
|
|
},
|
|
{
|
|
"epoch": 9.454796411318151,
|
|
"grad_norm": 2.181804656982422,
|
|
"learning_rate": 1.39375e-06,
|
|
"loss": 0.0068,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 9.46629859673338,
|
|
"grad_norm": 0.1634143590927124,
|
|
"learning_rate": 1.3859375e-06,
|
|
"loss": 0.0131,
|
|
"step": 20575
|
|
},
|
|
{
|
|
"epoch": 9.477800782148607,
|
|
"grad_norm": 0.28276145458221436,
|
|
"learning_rate": 1.378125e-06,
|
|
"loss": 0.0033,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 9.489302967563837,
|
|
"grad_norm": 0.23474666476249695,
|
|
"learning_rate": 1.3703125e-06,
|
|
"loss": 0.0076,
|
|
"step": 20625
|
|
},
|
|
{
|
|
"epoch": 9.500805152979066,
|
|
"grad_norm": 0.23035980761051178,
|
|
"learning_rate": 1.3625e-06,
|
|
"loss": 0.0081,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 9.512307338394296,
|
|
"grad_norm": 0.15259630978107452,
|
|
"learning_rate": 1.3546875e-06,
|
|
"loss": 0.0078,
|
|
"step": 20675
|
|
},
|
|
{
|
|
"epoch": 9.523809523809524,
|
|
"grad_norm": 1.3978540897369385,
|
|
"learning_rate": 1.3468750000000001e-06,
|
|
"loss": 0.0039,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 9.535311709224752,
|
|
"grad_norm": 1.555802345275879,
|
|
"learning_rate": 1.3390625e-06,
|
|
"loss": 0.007,
|
|
"step": 20725
|
|
},
|
|
{
|
|
"epoch": 9.546813894639982,
|
|
"grad_norm": 0.11839170753955841,
|
|
"learning_rate": 1.33125e-06,
|
|
"loss": 0.0045,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 9.55831608005521,
|
|
"grad_norm": 1.8819234371185303,
|
|
"learning_rate": 1.3234375e-06,
|
|
"loss": 0.0079,
|
|
"step": 20775
|
|
},
|
|
{
|
|
"epoch": 9.56981826547044,
|
|
"grad_norm": 2.1347029209136963,
|
|
"learning_rate": 1.315625e-06,
|
|
"loss": 0.005,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 9.581320450885668,
|
|
"grad_norm": 0.05001327767968178,
|
|
"learning_rate": 1.3078125e-06,
|
|
"loss": 0.0094,
|
|
"step": 20825
|
|
},
|
|
{
|
|
"epoch": 9.592822636300896,
|
|
"grad_norm": 4.695704460144043,
|
|
"learning_rate": 1.3e-06,
|
|
"loss": 0.0041,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 9.604324821716126,
|
|
"grad_norm": 0.27146437764167786,
|
|
"learning_rate": 1.2921875e-06,
|
|
"loss": 0.005,
|
|
"step": 20875
|
|
},
|
|
{
|
|
"epoch": 9.615827007131355,
|
|
"grad_norm": 2.277533769607544,
|
|
"learning_rate": 1.284375e-06,
|
|
"loss": 0.0064,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 9.627329192546584,
|
|
"grad_norm": 0.10806547850370407,
|
|
"learning_rate": 1.2765625e-06,
|
|
"loss": 0.0063,
|
|
"step": 20925
|
|
},
|
|
{
|
|
"epoch": 9.638831377961813,
|
|
"grad_norm": 1.3636436462402344,
|
|
"learning_rate": 1.2690625000000002e-06,
|
|
"loss": 0.0103,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 9.65033356337704,
|
|
"grad_norm": 2.6159441471099854,
|
|
"learning_rate": 1.2612500000000002e-06,
|
|
"loss": 0.0052,
|
|
"step": 20975
|
|
},
|
|
{
|
|
"epoch": 9.66183574879227,
|
|
"grad_norm": 0.5515117645263672,
|
|
"learning_rate": 1.2534375e-06,
|
|
"loss": 0.0077,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 9.673337934207499,
|
|
"grad_norm": 0.8103013038635254,
|
|
"learning_rate": 1.245625e-06,
|
|
"loss": 0.0091,
|
|
"step": 21025
|
|
},
|
|
{
|
|
"epoch": 9.684840119622729,
|
|
"grad_norm": 2.336944103240967,
|
|
"learning_rate": 1.2378125e-06,
|
|
"loss": 0.0048,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 9.696342305037957,
|
|
"grad_norm": 3.5888826847076416,
|
|
"learning_rate": 1.23e-06,
|
|
"loss": 0.0059,
|
|
"step": 21075
|
|
},
|
|
{
|
|
"epoch": 9.707844490453185,
|
|
"grad_norm": 2.42950439453125,
|
|
"learning_rate": 1.2221875e-06,
|
|
"loss": 0.0071,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 9.719346675868415,
|
|
"grad_norm": 1.5883959531784058,
|
|
"learning_rate": 1.2143750000000001e-06,
|
|
"loss": 0.0103,
|
|
"step": 21125
|
|
},
|
|
{
|
|
"epoch": 9.730848861283643,
|
|
"grad_norm": 0.9261289238929749,
|
|
"learning_rate": 1.2065625000000001e-06,
|
|
"loss": 0.0101,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 9.742351046698873,
|
|
"grad_norm": 2.5712246894836426,
|
|
"learning_rate": 1.19875e-06,
|
|
"loss": 0.0071,
|
|
"step": 21175
|
|
},
|
|
{
|
|
"epoch": 9.753853232114102,
|
|
"grad_norm": 6.325390815734863,
|
|
"learning_rate": 1.1909375e-06,
|
|
"loss": 0.0042,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 9.76535541752933,
|
|
"grad_norm": 0.3912803828716278,
|
|
"learning_rate": 1.183125e-06,
|
|
"loss": 0.0064,
|
|
"step": 21225
|
|
},
|
|
{
|
|
"epoch": 9.77685760294456,
|
|
"grad_norm": 4.111480712890625,
|
|
"learning_rate": 1.1753125e-06,
|
|
"loss": 0.0056,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 9.788359788359788,
|
|
"grad_norm": 1.7652671337127686,
|
|
"learning_rate": 1.1675e-06,
|
|
"loss": 0.0061,
|
|
"step": 21275
|
|
},
|
|
{
|
|
"epoch": 9.799861973775018,
|
|
"grad_norm": 1.497182011604309,
|
|
"learning_rate": 1.1596875e-06,
|
|
"loss": 0.0045,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 9.811364159190246,
|
|
"grad_norm": 0.23965400457382202,
|
|
"learning_rate": 1.151875e-06,
|
|
"loss": 0.0117,
|
|
"step": 21325
|
|
},
|
|
{
|
|
"epoch": 9.822866344605474,
|
|
"grad_norm": 1.4831360578536987,
|
|
"learning_rate": 1.1440625e-06,
|
|
"loss": 0.0067,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 9.834368530020704,
|
|
"grad_norm": 0.11675738543272018,
|
|
"learning_rate": 1.13625e-06,
|
|
"loss": 0.0047,
|
|
"step": 21375
|
|
},
|
|
{
|
|
"epoch": 9.845870715435932,
|
|
"grad_norm": 0.06517274677753448,
|
|
"learning_rate": 1.1284375e-06,
|
|
"loss": 0.0076,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 9.857372900851162,
|
|
"grad_norm": 0.10633418709039688,
|
|
"learning_rate": 1.120625e-06,
|
|
"loss": 0.0061,
|
|
"step": 21425
|
|
},
|
|
{
|
|
"epoch": 9.86887508626639,
|
|
"grad_norm": 0.047500479966402054,
|
|
"learning_rate": 1.1128125000000002e-06,
|
|
"loss": 0.0058,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 9.880377271681619,
|
|
"grad_norm": 1.3911513090133667,
|
|
"learning_rate": 1.1050000000000002e-06,
|
|
"loss": 0.0097,
|
|
"step": 21475
|
|
},
|
|
{
|
|
"epoch": 9.891879457096849,
|
|
"grad_norm": 0.05551273375749588,
|
|
"learning_rate": 1.0971875e-06,
|
|
"loss": 0.0071,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 9.903381642512077,
|
|
"grad_norm": 0.4141998291015625,
|
|
"learning_rate": 1.089375e-06,
|
|
"loss": 0.0068,
|
|
"step": 21525
|
|
},
|
|
{
|
|
"epoch": 9.914883827927307,
|
|
"grad_norm": 0.11964666098356247,
|
|
"learning_rate": 1.0815625e-06,
|
|
"loss": 0.0058,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 9.926386013342535,
|
|
"grad_norm": 0.20705200731754303,
|
|
"learning_rate": 1.07375e-06,
|
|
"loss": 0.0082,
|
|
"step": 21575
|
|
},
|
|
{
|
|
"epoch": 9.937888198757763,
|
|
"grad_norm": 0.06934256851673126,
|
|
"learning_rate": 1.0659375000000001e-06,
|
|
"loss": 0.006,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 9.949390384172993,
|
|
"grad_norm": 0.9905650615692139,
|
|
"learning_rate": 1.0581250000000001e-06,
|
|
"loss": 0.0082,
|
|
"step": 21625
|
|
},
|
|
{
|
|
"epoch": 9.960892569588221,
|
|
"grad_norm": 6.720580101013184,
|
|
"learning_rate": 1.0503125000000002e-06,
|
|
"loss": 0.0087,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 9.972394755003451,
|
|
"grad_norm": 2.596421480178833,
|
|
"learning_rate": 1.0425e-06,
|
|
"loss": 0.0103,
|
|
"step": 21675
|
|
},
|
|
{
|
|
"epoch": 9.98389694041868,
|
|
"grad_norm": 0.18345965445041656,
|
|
"learning_rate": 1.0346875e-06,
|
|
"loss": 0.006,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 9.995399125833908,
|
|
"grad_norm": 0.20510388910770416,
|
|
"learning_rate": 1.026875e-06,
|
|
"loss": 0.01,
|
|
"step": 21725
|
|
},
|
|
{
|
|
"epoch": 10.006901311249138,
|
|
"grad_norm": 1.5538175106048584,
|
|
"learning_rate": 1.0190625e-06,
|
|
"loss": 0.0051,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 10.018403496664366,
|
|
"grad_norm": 4.992893695831299,
|
|
"learning_rate": 1.01125e-06,
|
|
"loss": 0.007,
|
|
"step": 21775
|
|
},
|
|
{
|
|
"epoch": 10.029905682079596,
|
|
"grad_norm": 2.5293116569519043,
|
|
"learning_rate": 1.0034375e-06,
|
|
"loss": 0.0097,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 10.041407867494824,
|
|
"grad_norm": 2.987748861312866,
|
|
"learning_rate": 9.95625e-07,
|
|
"loss": 0.0039,
|
|
"step": 21825
|
|
},
|
|
{
|
|
"epoch": 10.052910052910052,
|
|
"grad_norm": 0.8729166984558105,
|
|
"learning_rate": 9.878125000000001e-07,
|
|
"loss": 0.0102,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 10.064412238325282,
|
|
"grad_norm": 4.021528244018555,
|
|
"learning_rate": 9.8e-07,
|
|
"loss": 0.0032,
|
|
"step": 21875
|
|
},
|
|
{
|
|
"epoch": 10.07591442374051,
|
|
"grad_norm": 0.037765491753816605,
|
|
"learning_rate": 9.721875e-07,
|
|
"loss": 0.0061,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 10.08741660915574,
|
|
"grad_norm": 0.4817400574684143,
|
|
"learning_rate": 9.64375e-07,
|
|
"loss": 0.0052,
|
|
"step": 21925
|
|
},
|
|
{
|
|
"epoch": 10.098918794570968,
|
|
"grad_norm": 0.18971405923366547,
|
|
"learning_rate": 9.565625e-07,
|
|
"loss": 0.0065,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 10.110420979986197,
|
|
"grad_norm": 2.408292770385742,
|
|
"learning_rate": 9.4875e-07,
|
|
"loss": 0.0049,
|
|
"step": 21975
|
|
},
|
|
{
|
|
"epoch": 10.121923165401427,
|
|
"grad_norm": 1.3557521104812622,
|
|
"learning_rate": 9.409374999999999e-07,
|
|
"loss": 0.0069,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 10.133425350816655,
|
|
"grad_norm": 4.988788604736328,
|
|
"learning_rate": 9.33125e-07,
|
|
"loss": 0.0037,
|
|
"step": 22025
|
|
},
|
|
{
|
|
"epoch": 10.144927536231885,
|
|
"grad_norm": 1.5118516683578491,
|
|
"learning_rate": 9.253125e-07,
|
|
"loss": 0.0041,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 10.156429721647113,
|
|
"grad_norm": 0.13832196593284607,
|
|
"learning_rate": 9.175000000000001e-07,
|
|
"loss": 0.0058,
|
|
"step": 22075
|
|
},
|
|
{
|
|
"epoch": 10.167931907062341,
|
|
"grad_norm": 2.6763222217559814,
|
|
"learning_rate": 9.096875000000001e-07,
|
|
"loss": 0.0073,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 10.179434092477571,
|
|
"grad_norm": 0.1248018741607666,
|
|
"learning_rate": 9.018750000000002e-07,
|
|
"loss": 0.01,
|
|
"step": 22125
|
|
},
|
|
{
|
|
"epoch": 10.1909362778928,
|
|
"grad_norm": 0.22059573233127594,
|
|
"learning_rate": 8.940625000000001e-07,
|
|
"loss": 0.0099,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 10.20243846330803,
|
|
"grad_norm": 0.35290199518203735,
|
|
"learning_rate": 8.862500000000001e-07,
|
|
"loss": 0.0043,
|
|
"step": 22175
|
|
},
|
|
{
|
|
"epoch": 10.213940648723257,
|
|
"grad_norm": 0.038650188595056534,
|
|
"learning_rate": 8.784375000000001e-07,
|
|
"loss": 0.0073,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 10.225442834138486,
|
|
"grad_norm": 1.4717687368392944,
|
|
"learning_rate": 8.706250000000001e-07,
|
|
"loss": 0.0084,
|
|
"step": 22225
|
|
},
|
|
{
|
|
"epoch": 10.236945019553715,
|
|
"grad_norm": 0.5546708703041077,
|
|
"learning_rate": 8.628125e-07,
|
|
"loss": 0.0067,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 10.248447204968944,
|
|
"grad_norm": 0.07813633978366852,
|
|
"learning_rate": 8.550000000000001e-07,
|
|
"loss": 0.0034,
|
|
"step": 22275
|
|
},
|
|
{
|
|
"epoch": 10.259949390384174,
|
|
"grad_norm": 0.40856441855430603,
|
|
"learning_rate": 8.471875000000001e-07,
|
|
"loss": 0.0091,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 10.271451575799402,
|
|
"grad_norm": 0.05716840550303459,
|
|
"learning_rate": 8.39375e-07,
|
|
"loss": 0.0069,
|
|
"step": 22325
|
|
},
|
|
{
|
|
"epoch": 10.28295376121463,
|
|
"grad_norm": 0.07827286422252655,
|
|
"learning_rate": 8.315625e-07,
|
|
"loss": 0.0066,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 10.29445594662986,
|
|
"grad_norm": 8.593811988830566,
|
|
"learning_rate": 8.237500000000001e-07,
|
|
"loss": 0.0088,
|
|
"step": 22375
|
|
},
|
|
{
|
|
"epoch": 10.305958132045088,
|
|
"grad_norm": 1.3823190927505493,
|
|
"learning_rate": 8.159375000000001e-07,
|
|
"loss": 0.0085,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 10.317460317460318,
|
|
"grad_norm": 5.355988025665283,
|
|
"learning_rate": 8.08125e-07,
|
|
"loss": 0.0031,
|
|
"step": 22425
|
|
},
|
|
{
|
|
"epoch": 10.328962502875546,
|
|
"grad_norm": 0.39019063115119934,
|
|
"learning_rate": 8.003125e-07,
|
|
"loss": 0.006,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 10.340464688290774,
|
|
"grad_norm": 5.189722537994385,
|
|
"learning_rate": 7.925e-07,
|
|
"loss": 0.0064,
|
|
"step": 22475
|
|
},
|
|
{
|
|
"epoch": 10.351966873706004,
|
|
"grad_norm": 1.724393606185913,
|
|
"learning_rate": 7.846875000000001e-07,
|
|
"loss": 0.0067,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 10.363469059121233,
|
|
"grad_norm": 5.20131778717041,
|
|
"learning_rate": 7.76875e-07,
|
|
"loss": 0.0077,
|
|
"step": 22525
|
|
},
|
|
{
|
|
"epoch": 10.374971244536463,
|
|
"grad_norm": 0.0890192911028862,
|
|
"learning_rate": 7.690625000000001e-07,
|
|
"loss": 0.0036,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 10.38647342995169,
|
|
"grad_norm": 0.6779229640960693,
|
|
"learning_rate": 7.612500000000001e-07,
|
|
"loss": 0.0048,
|
|
"step": 22575
|
|
},
|
|
{
|
|
"epoch": 10.397975615366919,
|
|
"grad_norm": 0.616266667842865,
|
|
"learning_rate": 7.534375e-07,
|
|
"loss": 0.0097,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 10.409477800782149,
|
|
"grad_norm": 0.03939608484506607,
|
|
"learning_rate": 7.456250000000001e-07,
|
|
"loss": 0.0047,
|
|
"step": 22625
|
|
},
|
|
{
|
|
"epoch": 10.420979986197377,
|
|
"grad_norm": 0.14820168912410736,
|
|
"learning_rate": 7.378125000000001e-07,
|
|
"loss": 0.0096,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 10.432482171612607,
|
|
"grad_norm": 0.12704187631607056,
|
|
"learning_rate": 7.3e-07,
|
|
"loss": 0.0066,
|
|
"step": 22675
|
|
},
|
|
{
|
|
"epoch": 10.443984357027835,
|
|
"grad_norm": 0.037611182779073715,
|
|
"learning_rate": 7.221875e-07,
|
|
"loss": 0.0061,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 10.455486542443063,
|
|
"grad_norm": 4.043584823608398,
|
|
"learning_rate": 7.14375e-07,
|
|
"loss": 0.0085,
|
|
"step": 22725
|
|
},
|
|
{
|
|
"epoch": 10.466988727858293,
|
|
"grad_norm": 2.1938695907592773,
|
|
"learning_rate": 7.065625000000001e-07,
|
|
"loss": 0.0123,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 10.478490913273522,
|
|
"grad_norm": 0.1777833104133606,
|
|
"learning_rate": 6.9875e-07,
|
|
"loss": 0.0041,
|
|
"step": 22775
|
|
},
|
|
{
|
|
"epoch": 10.489993098688752,
|
|
"grad_norm": 0.21180225908756256,
|
|
"learning_rate": 6.909375e-07,
|
|
"loss": 0.0089,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 10.50149528410398,
|
|
"grad_norm": 0.3625020682811737,
|
|
"learning_rate": 6.83125e-07,
|
|
"loss": 0.0052,
|
|
"step": 22825
|
|
},
|
|
{
|
|
"epoch": 10.512997469519208,
|
|
"grad_norm": 1.3957737684249878,
|
|
"learning_rate": 6.753124999999999e-07,
|
|
"loss": 0.0105,
|
|
"step": 22850
|
|
},
|
|
{
|
|
"epoch": 10.524499654934438,
|
|
"grad_norm": 3.243558168411255,
|
|
"learning_rate": 6.675000000000001e-07,
|
|
"loss": 0.005,
|
|
"step": 22875
|
|
},
|
|
{
|
|
"epoch": 10.536001840349666,
|
|
"grad_norm": 2.227569341659546,
|
|
"learning_rate": 6.596875000000001e-07,
|
|
"loss": 0.0081,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 10.547504025764896,
|
|
"grad_norm": 2.815009593963623,
|
|
"learning_rate": 6.51875e-07,
|
|
"loss": 0.0078,
|
|
"step": 22925
|
|
},
|
|
{
|
|
"epoch": 10.559006211180124,
|
|
"grad_norm": 0.16958042979240417,
|
|
"learning_rate": 6.440625e-07,
|
|
"loss": 0.0127,
|
|
"step": 22950
|
|
},
|
|
{
|
|
"epoch": 10.570508396595352,
|
|
"grad_norm": 4.25639009475708,
|
|
"learning_rate": 6.3625e-07,
|
|
"loss": 0.0088,
|
|
"step": 22975
|
|
},
|
|
{
|
|
"epoch": 10.582010582010582,
|
|
"grad_norm": 0.4477657675743103,
|
|
"learning_rate": 6.284375000000001e-07,
|
|
"loss": 0.0043,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 10.59351276742581,
|
|
"grad_norm": 2.4832139015197754,
|
|
"learning_rate": 6.20625e-07,
|
|
"loss": 0.0045,
|
|
"step": 23025
|
|
},
|
|
{
|
|
"epoch": 10.60501495284104,
|
|
"grad_norm": 0.2865842282772064,
|
|
"learning_rate": 6.128125e-07,
|
|
"loss": 0.007,
|
|
"step": 23050
|
|
},
|
|
{
|
|
"epoch": 10.616517138256269,
|
|
"grad_norm": 0.3463385999202728,
|
|
"learning_rate": 6.05e-07,
|
|
"loss": 0.0068,
|
|
"step": 23075
|
|
},
|
|
{
|
|
"epoch": 10.628019323671497,
|
|
"grad_norm": 0.08812834322452545,
|
|
"learning_rate": 5.971875e-07,
|
|
"loss": 0.005,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 10.639521509086727,
|
|
"grad_norm": 0.11067871749401093,
|
|
"learning_rate": 5.89375e-07,
|
|
"loss": 0.0047,
|
|
"step": 23125
|
|
},
|
|
{
|
|
"epoch": 10.651023694501955,
|
|
"grad_norm": 1.1273283958435059,
|
|
"learning_rate": 5.815625e-07,
|
|
"loss": 0.0049,
|
|
"step": 23150
|
|
},
|
|
{
|
|
"epoch": 10.662525879917185,
|
|
"grad_norm": 0.0541173480451107,
|
|
"learning_rate": 5.737500000000001e-07,
|
|
"loss": 0.0042,
|
|
"step": 23175
|
|
},
|
|
{
|
|
"epoch": 10.674028065332413,
|
|
"grad_norm": 0.04831864312291145,
|
|
"learning_rate": 5.659375e-07,
|
|
"loss": 0.0075,
|
|
"step": 23200
|
|
},
|
|
{
|
|
"epoch": 10.685530250747641,
|
|
"grad_norm": 0.07917584478855133,
|
|
"learning_rate": 5.581250000000001e-07,
|
|
"loss": 0.0044,
|
|
"step": 23225
|
|
},
|
|
{
|
|
"epoch": 10.697032436162871,
|
|
"grad_norm": 2.2876992225646973,
|
|
"learning_rate": 5.503125000000001e-07,
|
|
"loss": 0.0073,
|
|
"step": 23250
|
|
},
|
|
{
|
|
"epoch": 10.7085346215781,
|
|
"grad_norm": 0.03435774892568588,
|
|
"learning_rate": 5.425e-07,
|
|
"loss": 0.0031,
|
|
"step": 23275
|
|
},
|
|
{
|
|
"epoch": 10.72003680699333,
|
|
"grad_norm": 1.8223545551300049,
|
|
"learning_rate": 5.346875e-07,
|
|
"loss": 0.0072,
|
|
"step": 23300
|
|
},
|
|
{
|
|
"epoch": 10.731538992408558,
|
|
"grad_norm": 12.397570610046387,
|
|
"learning_rate": 5.26875e-07,
|
|
"loss": 0.0035,
|
|
"step": 23325
|
|
},
|
|
{
|
|
"epoch": 10.743041177823786,
|
|
"grad_norm": 0.5610597133636475,
|
|
"learning_rate": 5.19375e-07,
|
|
"loss": 0.0088,
|
|
"step": 23350
|
|
},
|
|
{
|
|
"epoch": 10.754543363239016,
|
|
"grad_norm": 0.4081664979457855,
|
|
"learning_rate": 5.115625e-07,
|
|
"loss": 0.0051,
|
|
"step": 23375
|
|
},
|
|
{
|
|
"epoch": 10.766045548654244,
|
|
"grad_norm": 0.123787522315979,
|
|
"learning_rate": 5.0375e-07,
|
|
"loss": 0.0069,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 10.777547734069474,
|
|
"grad_norm": 2.293886423110962,
|
|
"learning_rate": 4.959375000000001e-07,
|
|
"loss": 0.0057,
|
|
"step": 23425
|
|
},
|
|
{
|
|
"epoch": 10.789049919484702,
|
|
"grad_norm": 6.182718753814697,
|
|
"learning_rate": 4.881250000000001e-07,
|
|
"loss": 0.0082,
|
|
"step": 23450
|
|
},
|
|
{
|
|
"epoch": 10.80055210489993,
|
|
"grad_norm": 0.033033497631549835,
|
|
"learning_rate": 4.803125e-07,
|
|
"loss": 0.0064,
|
|
"step": 23475
|
|
},
|
|
{
|
|
"epoch": 10.81205429031516,
|
|
"grad_norm": 0.5063899755477905,
|
|
"learning_rate": 4.7250000000000003e-07,
|
|
"loss": 0.0107,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 10.823556475730388,
|
|
"grad_norm": 1.6246389150619507,
|
|
"learning_rate": 4.646875e-07,
|
|
"loss": 0.0066,
|
|
"step": 23525
|
|
},
|
|
{
|
|
"epoch": 10.835058661145618,
|
|
"grad_norm": 0.16620787978172302,
|
|
"learning_rate": 4.56875e-07,
|
|
"loss": 0.0059,
|
|
"step": 23550
|
|
},
|
|
{
|
|
"epoch": 10.846560846560847,
|
|
"grad_norm": 0.7030823826789856,
|
|
"learning_rate": 4.490625e-07,
|
|
"loss": 0.0022,
|
|
"step": 23575
|
|
},
|
|
{
|
|
"epoch": 10.858063031976075,
|
|
"grad_norm": 0.8228742480278015,
|
|
"learning_rate": 4.4125e-07,
|
|
"loss": 0.0125,
|
|
"step": 23600
|
|
},
|
|
{
|
|
"epoch": 10.869565217391305,
|
|
"grad_norm": 4.570178031921387,
|
|
"learning_rate": 4.334375e-07,
|
|
"loss": 0.0052,
|
|
"step": 23625
|
|
},
|
|
{
|
|
"epoch": 10.881067402806533,
|
|
"grad_norm": 2.8716719150543213,
|
|
"learning_rate": 4.25625e-07,
|
|
"loss": 0.005,
|
|
"step": 23650
|
|
},
|
|
{
|
|
"epoch": 10.892569588221763,
|
|
"grad_norm": 5.523809909820557,
|
|
"learning_rate": 4.178125000000001e-07,
|
|
"loss": 0.0063,
|
|
"step": 23675
|
|
},
|
|
{
|
|
"epoch": 10.904071773636991,
|
|
"grad_norm": 1.16761314868927,
|
|
"learning_rate": 4.1000000000000004e-07,
|
|
"loss": 0.0095,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 10.91557395905222,
|
|
"grad_norm": 0.024232987314462662,
|
|
"learning_rate": 4.0218750000000007e-07,
|
|
"loss": 0.0036,
|
|
"step": 23725
|
|
},
|
|
{
|
|
"epoch": 10.92707614446745,
|
|
"grad_norm": 4.76410436630249,
|
|
"learning_rate": 3.9437500000000004e-07,
|
|
"loss": 0.0097,
|
|
"step": 23750
|
|
},
|
|
{
|
|
"epoch": 10.938578329882677,
|
|
"grad_norm": 5.289037704467773,
|
|
"learning_rate": 3.8656250000000006e-07,
|
|
"loss": 0.006,
|
|
"step": 23775
|
|
},
|
|
{
|
|
"epoch": 10.950080515297907,
|
|
"grad_norm": 0.05810718610882759,
|
|
"learning_rate": 3.7875000000000003e-07,
|
|
"loss": 0.0063,
|
|
"step": 23800
|
|
},
|
|
{
|
|
"epoch": 10.961582700713135,
|
|
"grad_norm": 2.577594757080078,
|
|
"learning_rate": 3.709375e-07,
|
|
"loss": 0.0041,
|
|
"step": 23825
|
|
},
|
|
{
|
|
"epoch": 10.973084886128364,
|
|
"grad_norm": 0.039299797266721725,
|
|
"learning_rate": 3.63125e-07,
|
|
"loss": 0.0062,
|
|
"step": 23850
|
|
},
|
|
{
|
|
"epoch": 10.984587071543594,
|
|
"grad_norm": 1.8657886981964111,
|
|
"learning_rate": 3.553125e-07,
|
|
"loss": 0.0029,
|
|
"step": 23875
|
|
},
|
|
{
|
|
"epoch": 10.996089256958822,
|
|
"grad_norm": 0.037355002015829086,
|
|
"learning_rate": 3.475e-07,
|
|
"loss": 0.0068,
|
|
"step": 23900
|
|
},
|
|
{
|
|
"epoch": 11.007591442374052,
|
|
"grad_norm": 0.03586237132549286,
|
|
"learning_rate": 3.3968750000000003e-07,
|
|
"loss": 0.0055,
|
|
"step": 23925
|
|
},
|
|
{
|
|
"epoch": 11.01909362778928,
|
|
"grad_norm": 1.9938002824783325,
|
|
"learning_rate": 3.31875e-07,
|
|
"loss": 0.0054,
|
|
"step": 23950
|
|
},
|
|
{
|
|
"epoch": 11.030595813204508,
|
|
"grad_norm": 0.05722161382436752,
|
|
"learning_rate": 3.240625e-07,
|
|
"loss": 0.0022,
|
|
"step": 23975
|
|
},
|
|
{
|
|
"epoch": 11.042097998619738,
|
|
"grad_norm": 0.16819870471954346,
|
|
"learning_rate": 3.1625e-07,
|
|
"loss": 0.0077,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 11.053600184034966,
|
|
"grad_norm": 1.404698371887207,
|
|
"learning_rate": 3.084375e-07,
|
|
"loss": 0.0078,
|
|
"step": 24025
|
|
},
|
|
{
|
|
"epoch": 11.065102369450196,
|
|
"grad_norm": 1.955828309059143,
|
|
"learning_rate": 3.00625e-07,
|
|
"loss": 0.0056,
|
|
"step": 24050
|
|
},
|
|
{
|
|
"epoch": 11.076604554865424,
|
|
"grad_norm": 0.1421104073524475,
|
|
"learning_rate": 2.9281250000000006e-07,
|
|
"loss": 0.0044,
|
|
"step": 24075
|
|
},
|
|
{
|
|
"epoch": 11.088106740280653,
|
|
"grad_norm": 0.2362927496433258,
|
|
"learning_rate": 2.85e-07,
|
|
"loss": 0.0046,
|
|
"step": 24100
|
|
},
|
|
{
|
|
"epoch": 11.099608925695883,
|
|
"grad_norm": 0.3808918297290802,
|
|
"learning_rate": 2.771875e-07,
|
|
"loss": 0.0041,
|
|
"step": 24125
|
|
},
|
|
{
|
|
"epoch": 11.11111111111111,
|
|
"grad_norm": 0.596932590007782,
|
|
"learning_rate": 2.69375e-07,
|
|
"loss": 0.0036,
|
|
"step": 24150
|
|
},
|
|
{
|
|
"epoch": 11.12261329652634,
|
|
"grad_norm": 2.1200876235961914,
|
|
"learning_rate": 2.615625e-07,
|
|
"loss": 0.0055,
|
|
"step": 24175
|
|
},
|
|
{
|
|
"epoch": 11.134115481941569,
|
|
"grad_norm": 0.17938555777072906,
|
|
"learning_rate": 2.5375e-07,
|
|
"loss": 0.0035,
|
|
"step": 24200
|
|
},
|
|
{
|
|
"epoch": 11.145617667356797,
|
|
"grad_norm": 0.06144664064049721,
|
|
"learning_rate": 2.4593750000000003e-07,
|
|
"loss": 0.0066,
|
|
"step": 24225
|
|
},
|
|
{
|
|
"epoch": 11.157119852772027,
|
|
"grad_norm": 0.05789117515087128,
|
|
"learning_rate": 2.3812500000000002e-07,
|
|
"loss": 0.0033,
|
|
"step": 24250
|
|
},
|
|
{
|
|
"epoch": 11.168622038187255,
|
|
"grad_norm": 1.0644292831420898,
|
|
"learning_rate": 2.3031250000000002e-07,
|
|
"loss": 0.0055,
|
|
"step": 24275
|
|
},
|
|
{
|
|
"epoch": 11.180124223602485,
|
|
"grad_norm": 0.05568142980337143,
|
|
"learning_rate": 2.2250000000000001e-07,
|
|
"loss": 0.0096,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 11.191626409017713,
|
|
"grad_norm": 3.572922468185425,
|
|
"learning_rate": 2.146875e-07,
|
|
"loss": 0.0058,
|
|
"step": 24325
|
|
},
|
|
{
|
|
"epoch": 11.203128594432942,
|
|
"grad_norm": 0.3286091089248657,
|
|
"learning_rate": 2.06875e-07,
|
|
"loss": 0.0035,
|
|
"step": 24350
|
|
},
|
|
{
|
|
"epoch": 11.214630779848171,
|
|
"grad_norm": 3.0645318031311035,
|
|
"learning_rate": 1.9906250000000003e-07,
|
|
"loss": 0.007,
|
|
"step": 24375
|
|
},
|
|
{
|
|
"epoch": 11.2261329652634,
|
|
"grad_norm": 9.076870918273926,
|
|
"learning_rate": 1.9125e-07,
|
|
"loss": 0.0038,
|
|
"step": 24400
|
|
},
|
|
{
|
|
"epoch": 11.23763515067863,
|
|
"grad_norm": 0.054851166903972626,
|
|
"learning_rate": 1.8343750000000002e-07,
|
|
"loss": 0.0049,
|
|
"step": 24425
|
|
},
|
|
{
|
|
"epoch": 11.249137336093858,
|
|
"grad_norm": 0.7788383364677429,
|
|
"learning_rate": 1.75625e-07,
|
|
"loss": 0.0044,
|
|
"step": 24450
|
|
},
|
|
{
|
|
"epoch": 11.260639521509086,
|
|
"grad_norm": 0.06403613835573196,
|
|
"learning_rate": 1.678125e-07,
|
|
"loss": 0.0055,
|
|
"step": 24475
|
|
},
|
|
{
|
|
"epoch": 11.272141706924316,
|
|
"grad_norm": 0.1481279581785202,
|
|
"learning_rate": 1.6e-07,
|
|
"loss": 0.0043,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 11.283643892339544,
|
|
"grad_norm": 1.9446523189544678,
|
|
"learning_rate": 1.521875e-07,
|
|
"loss": 0.0065,
|
|
"step": 24525
|
|
},
|
|
{
|
|
"epoch": 11.295146077754774,
|
|
"grad_norm": 0.01817043498158455,
|
|
"learning_rate": 1.44375e-07,
|
|
"loss": 0.0043,
|
|
"step": 24550
|
|
},
|
|
{
|
|
"epoch": 11.306648263170002,
|
|
"grad_norm": 2.417325258255005,
|
|
"learning_rate": 1.3656250000000002e-07,
|
|
"loss": 0.0037,
|
|
"step": 24575
|
|
},
|
|
{
|
|
"epoch": 11.31815044858523,
|
|
"grad_norm": 0.08347784727811813,
|
|
"learning_rate": 1.2875e-07,
|
|
"loss": 0.0025,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 11.32965263400046,
|
|
"grad_norm": 3.372894763946533,
|
|
"learning_rate": 1.209375e-07,
|
|
"loss": 0.0099,
|
|
"step": 24625
|
|
},
|
|
{
|
|
"epoch": 11.341154819415689,
|
|
"grad_norm": 0.43756258487701416,
|
|
"learning_rate": 1.1312500000000002e-07,
|
|
"loss": 0.0058,
|
|
"step": 24650
|
|
},
|
|
{
|
|
"epoch": 11.352657004830919,
|
|
"grad_norm": 0.4029462933540344,
|
|
"learning_rate": 1.0531250000000001e-07,
|
|
"loss": 0.0061,
|
|
"step": 24675
|
|
},
|
|
{
|
|
"epoch": 11.364159190246147,
|
|
"grad_norm": 4.701152324676514,
|
|
"learning_rate": 9.75e-08,
|
|
"loss": 0.0051,
|
|
"step": 24700
|
|
},
|
|
{
|
|
"epoch": 11.375661375661375,
|
|
"grad_norm": 0.5707642436027527,
|
|
"learning_rate": 8.96875e-08,
|
|
"loss": 0.0054,
|
|
"step": 24725
|
|
},
|
|
{
|
|
"epoch": 11.387163561076605,
|
|
"grad_norm": 0.0709519162774086,
|
|
"learning_rate": 8.187500000000001e-08,
|
|
"loss": 0.0031,
|
|
"step": 24750
|
|
},
|
|
{
|
|
"epoch": 11.398665746491833,
|
|
"grad_norm": 1.804284930229187,
|
|
"learning_rate": 7.40625e-08,
|
|
"loss": 0.0052,
|
|
"step": 24775
|
|
},
|
|
{
|
|
"epoch": 11.410167931907063,
|
|
"grad_norm": 0.04206651449203491,
|
|
"learning_rate": 6.625e-08,
|
|
"loss": 0.0038,
|
|
"step": 24800
|
|
},
|
|
{
|
|
"epoch": 11.421670117322291,
|
|
"grad_norm": 0.5502020716667175,
|
|
"learning_rate": 5.843750000000001e-08,
|
|
"loss": 0.0087,
|
|
"step": 24825
|
|
},
|
|
{
|
|
"epoch": 11.43317230273752,
|
|
"grad_norm": 0.21491751074790955,
|
|
"learning_rate": 5.0625e-08,
|
|
"loss": 0.003,
|
|
"step": 24850
|
|
},
|
|
{
|
|
"epoch": 11.44467448815275,
|
|
"grad_norm": 2.5923168659210205,
|
|
"learning_rate": 4.28125e-08,
|
|
"loss": 0.0059,
|
|
"step": 24875
|
|
},
|
|
{
|
|
"epoch": 11.456176673567978,
|
|
"grad_norm": 0.21680384874343872,
|
|
"learning_rate": 3.5e-08,
|
|
"loss": 0.0063,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 11.467678858983207,
|
|
"grad_norm": 0.0767444297671318,
|
|
"learning_rate": 2.7187499999999998e-08,
|
|
"loss": 0.0054,
|
|
"step": 24925
|
|
},
|
|
{
|
|
"epoch": 11.479181044398436,
|
|
"grad_norm": 0.42297929525375366,
|
|
"learning_rate": 1.9375e-08,
|
|
"loss": 0.0025,
|
|
"step": 24950
|
|
},
|
|
{
|
|
"epoch": 11.490683229813664,
|
|
"grad_norm": 0.13791832327842712,
|
|
"learning_rate": 1.1562500000000002e-08,
|
|
"loss": 0.0088,
|
|
"step": 24975
|
|
},
|
|
{
|
|
"epoch": 11.502185415228894,
|
|
"grad_norm": 5.369208812713623,
|
|
"learning_rate": 3.75e-09,
|
|
"loss": 0.0074,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 11.502185415228894,
|
|
"eval_loss": 0.2163197249174118,
|
|
"eval_runtime": 5391.0097,
|
|
"eval_samples_per_second": 1.761,
|
|
"eval_steps_per_second": 0.22,
|
|
"eval_wer": 0.09290518898142217,
|
|
"step": 25000
|
|
}
|
|
],
|
|
"logging_steps": 25,
|
|
"max_steps": 25000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 12,
|
|
"save_steps": 5000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.082307363540173e+20,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|