HNet_Ori-BPT3 / checkpoint-6000 /trainer_state.json
andyjzhao's picture
Upload folder using huggingface_hub
d607722 verified
{
"best_global_step": 6000,
"best_metric": 2.7438295521464737,
"best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/HNet_Ori-BPT3/checkpoint-6000",
"epoch": 1.7017941989929792,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.0028366782497695198,
"grad_norm": 590.2120361328125,
"loss": 144.5784,
"loss_ce": 170.91241455078125,
"loss_region": 0.030412333086133003,
"loss_total": 170.9428253173828,
"lr": 2.20454076850486e-05,
"router/selected_tokens_s0": 1.0,
"step": 10,
"tokens_trained": 0.03276544
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.0056733564995390395,
"grad_norm": 565.2921142578125,
"loss": 52.047,
"loss_ce": 28.61202049255371,
"loss_region": 0.03181665763258934,
"loss_total": 28.643836975097656,
"lr": 4.654030511288038e-05,
"router/selected_tokens_s0": 1.0,
"step": 20,
"tokens_trained": 0.06553088
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.00851003474930856,
"grad_norm": 361.24432373046875,
"loss": 18.4265,
"loss_ce": 16.737817764282227,
"loss_region": 0.03595759719610214,
"loss_total": 16.773775100708008,
"lr": 7.103520254071216e-05,
"router/selected_tokens_s0": 1.0,
"step": 30,
"tokens_trained": 0.09829632
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.011346712999078079,
"grad_norm": 649.0695190429688,
"loss": 8.0445,
"loss_ce": 11.410881996154785,
"loss_region": 0.03821098059415817,
"loss_total": 11.449092864990234,
"lr": 9.553009996854394e-05,
"router/selected_tokens_s0": 1.0,
"step": 40,
"tokens_trained": 0.13106176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.014183391248847599,
"grad_norm": 534.2383422851562,
"loss": 9.3219,
"loss_ce": 9.884474754333496,
"loss_region": 0.040100596845149994,
"loss_total": 9.924575805664062,
"lr": 0.00012002499739637572,
"router/selected_tokens_s0": 1.0,
"step": 50,
"tokens_trained": 0.1638272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.01702006949861712,
"grad_norm": 273.8401184082031,
"loss": 14.1755,
"loss_ce": 12.677406311035156,
"loss_region": 0.041250791400671005,
"loss_total": 12.718657493591309,
"lr": 0.00014451989482420748,
"router/selected_tokens_s0": 1.0,
"step": 60,
"tokens_trained": 0.19659264
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.01985674774838664,
"grad_norm": 544.6290893554688,
"loss": 14.1136,
"loss_ce": 14.262775421142578,
"loss_region": 0.042144227772951126,
"loss_total": 14.304919242858887,
"lr": 0.00016901479225203927,
"router/selected_tokens_s0": 1.0,
"step": 70,
"tokens_trained": 0.22935808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.022693425998156158,
"grad_norm": 527.1918334960938,
"loss": 15.2492,
"loss_ce": 11.932450294494629,
"loss_region": 0.04246167093515396,
"loss_total": 11.9749116897583,
"lr": 0.00019350968967987104,
"router/selected_tokens_s0": 1.0,
"step": 80,
"tokens_trained": 0.26212192
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.025530104247925678,
"grad_norm": 343.09454345703125,
"loss": 12.0101,
"loss_ce": 6.092933177947998,
"loss_region": 0.04214272275567055,
"loss_total": 6.13507604598999,
"lr": 0.0002180045871077028,
"router/selected_tokens_s0": 1.0,
"step": 90,
"tokens_trained": 0.29488736
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.028366782497695198,
"grad_norm": 309.79541015625,
"loss": 9.8843,
"loss_ce": 5.214886665344238,
"loss_region": 0.041769951581954956,
"loss_total": 5.256656646728516,
"lr": 0.00024249948453553463,
"router/selected_tokens_s0": 1.0,
"step": 100,
"tokens_trained": 0.3276528
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.031203460747464717,
"grad_norm": 251.26068115234375,
"loss": 8.5835,
"loss_ce": 12.269608497619629,
"loss_region": 0.04041137546300888,
"loss_total": 12.310019493103027,
"lr": 0.00026699438196336637,
"router/selected_tokens_s0": 1.0,
"step": 110,
"tokens_trained": 0.36041744
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.03404013899723424,
"grad_norm": 148.94601440429688,
"loss": 6.4366,
"loss_ce": 3.2050940990448,
"loss_region": 0.03642381727695465,
"loss_total": 3.241518020629883,
"lr": 0.00029148927939119814,
"router/selected_tokens_s0": 1.0,
"step": 120,
"tokens_trained": 0.39318128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.03687681724700376,
"grad_norm": 187.2681427001953,
"loss": 10.4928,
"loss_ce": 6.001107215881348,
"loss_region": 0.030254848301410675,
"loss_total": 6.031362056732178,
"lr": 0.00031598417681902996,
"router/selected_tokens_s0": 4752.0,
"step": 130,
"tokens_trained": 0.42594672
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.03971349549677328,
"grad_norm": 218.34559631347656,
"loss": 8.5742,
"loss_ce": 3.848691701889038,
"loss_region": 0.03400004655122757,
"loss_total": 3.8826918601989746,
"lr": 0.00034047907424686173,
"router/selected_tokens_s0": 7042.125,
"step": 140,
"tokens_trained": 0.458709112
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.042550173746542796,
"grad_norm": 215.60699462890625,
"loss": 4.5762,
"loss_ce": 5.0876851081848145,
"loss_region": 0.03198177367448807,
"loss_total": 5.119667053222656,
"lr": 0.0003649739716746935,
"router/selected_tokens_s0": 424.5,
"step": 150,
"tokens_trained": 0.491469992
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.045386851996312316,
"grad_norm": 147.6339111328125,
"loss": 5.8047,
"loss_ce": 8.435795783996582,
"loss_region": 0.03364315256476402,
"loss_total": 8.469438552856445,
"lr": 0.00038946886910252526,
"router/selected_tokens_s0": 536.875,
"step": 160,
"tokens_trained": 0.524234632
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.048223530246081836,
"grad_norm": 218.0553741455078,
"loss": 5.7968,
"loss_ce": 6.644444942474365,
"loss_region": 0.031727153807878494,
"loss_total": 6.676172256469727,
"lr": 0.0004139637665303571,
"router/selected_tokens_s0": 1833.5,
"step": 170,
"tokens_trained": 0.556999272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.051060208495851356,
"grad_norm": 100.56309509277344,
"loss": 6.7503,
"loss_ce": 8.332029342651367,
"loss_region": 0.03232778236269951,
"loss_total": 8.364356994628906,
"lr": 0.0004384586639581888,
"router/selected_tokens_s0": 1649.75,
"step": 180,
"tokens_trained": 0.589762952
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.053896886745620876,
"grad_norm": 157.10765075683594,
"loss": 6.4449,
"loss_ce": 4.925128936767578,
"loss_region": 0.031663134694099426,
"loss_total": 4.956791877746582,
"lr": 0.0004629535613860206,
"router/selected_tokens_s0": 1687.375,
"step": 190,
"tokens_trained": 0.622527592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.056733564995390395,
"grad_norm": 83.81340026855469,
"loss": 3.7524,
"loss_ce": 5.0940961837768555,
"loss_region": 0.02894311398267746,
"loss_total": 5.123039245605469,
"lr": 0.00048744845881385244,
"router/selected_tokens_s0": 3074.125,
"step": 200,
"tokens_trained": 0.655293032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.059570243245159915,
"grad_norm": 169.4013671875,
"loss": 5.9481,
"loss_ce": 9.220865249633789,
"loss_region": 0.02949724718928337,
"loss_total": 9.250362396240234,
"lr": 0.0005119433562416841,
"router/selected_tokens_s0": 3610.375,
"step": 210,
"tokens_trained": 0.688057672
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.062406921494929435,
"grad_norm": 80.7753677368164,
"loss": 5.1122,
"loss_ce": 3.287958860397339,
"loss_region": 0.029488109052181244,
"loss_total": 3.3174469470977783,
"lr": 0.0005364382536695159,
"router/selected_tokens_s0": 2584.75,
"step": 220,
"tokens_trained": 0.720823112
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.06524359974469895,
"grad_norm": 89.39635467529297,
"loss": 3.3047,
"loss_ce": 2.1086361408233643,
"loss_region": 0.029821382835507393,
"loss_total": 2.1384575366973877,
"lr": 0.0005609331510973477,
"router/selected_tokens_s0": 3991.5,
"step": 230,
"tokens_trained": 0.753588552
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.06808027799446847,
"grad_norm": 90.892333984375,
"loss": 4.2563,
"loss_ce": 2.7003867626190186,
"loss_region": 0.030828693881630898,
"loss_total": 2.731215476989746,
"lr": 0.0005854280485251795,
"router/selected_tokens_s0": 4964.125,
"step": 240,
"tokens_trained": 0.786353992
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.070916956244238,
"grad_norm": 86.70359802246094,
"loss": 2.8849,
"loss_ce": 3.55375599861145,
"loss_region": 0.029162542894482613,
"loss_total": 3.582918643951416,
"lr": 0.0006099229459530113,
"router/selected_tokens_s0": 2891.75,
"step": 250,
"tokens_trained": 0.819119432
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.07375363449400751,
"grad_norm": 106.11075592041016,
"loss": 4.4058,
"loss_ce": 5.333348751068115,
"loss_region": 0.029971925541758537,
"loss_total": 5.363320827484131,
"lr": 0.0006344178433808431,
"router/selected_tokens_s0": 4181.375,
"step": 260,
"tokens_trained": 0.851884072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.07659031274377703,
"grad_norm": 75.7653579711914,
"loss": 3.6076,
"loss_ce": 2.3445212841033936,
"loss_region": 0.029431568458676338,
"loss_total": 2.373952865600586,
"lr": 0.0006589127408086749,
"router/selected_tokens_s0": 3440.0,
"step": 270,
"tokens_trained": 0.884649512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.07942699099354655,
"grad_norm": 95.4271469116211,
"loss": 2.8447,
"loss_ce": 3.030097007751465,
"loss_region": 0.030556708574295044,
"loss_total": 3.0606536865234375,
"lr": 0.0006834076382365066,
"router/selected_tokens_s0": 4730.5,
"step": 280,
"tokens_trained": 0.917414936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.08226366924331607,
"grad_norm": 74.673828125,
"loss": 2.0288,
"loss_ce": 2.1509435176849365,
"loss_region": 0.028712084516882896,
"loss_total": 2.1796555519104004,
"lr": 0.0007079025356643384,
"router/selected_tokens_s0": 2658.625,
"step": 290,
"tokens_trained": 0.950180376
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.08510034749308559,
"grad_norm": 77.71709442138672,
"loss": 2.0227,
"loss_ce": 2.286048650741577,
"loss_region": 0.03060404770076275,
"loss_total": 2.316652774810791,
"lr": 0.0007323974330921702,
"router/selected_tokens_s0": 4752.0,
"step": 300,
"tokens_trained": 0.982945816
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.08793702574285511,
"grad_norm": 55.31558609008789,
"loss": 2.1281,
"loss_ce": 2.0437748432159424,
"loss_region": 0.030610591173171997,
"loss_total": 2.074385404586792,
"lr": 0.000756892330520002,
"router/selected_tokens_s0": 4748.625,
"step": 310,
"tokens_trained": 1.015711256
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.09077370399262463,
"grad_norm": 77.07698059082031,
"loss": 2.5761,
"loss_ce": 2.7218589782714844,
"loss_region": 0.03093603625893593,
"loss_total": 2.7527949810028076,
"lr": 0.0007813872279478337,
"router/selected_tokens_s0": 4946.625,
"step": 320,
"tokens_trained": 1.048476696
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.09361038224239415,
"grad_norm": 47.57994842529297,
"loss": 2.239,
"loss_ce": 1.9163914918899536,
"loss_region": 0.029897142201662064,
"loss_total": 1.9462885856628418,
"lr": 0.0008058821253756655,
"router/selected_tokens_s0": 4135.875,
"step": 330,
"tokens_trained": 1.081242136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.09644706049216367,
"grad_norm": 58.62579345703125,
"loss": 2.8423,
"loss_ce": 3.2828376293182373,
"loss_region": 0.03434763103723526,
"loss_total": 3.317185163497925,
"lr": 0.0008303770228034974,
"router/selected_tokens_s0": 6686.5,
"step": 340,
"tokens_trained": 1.114007576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.09928373874193319,
"grad_norm": 34.5246696472168,
"loss": 2.5891,
"loss_ce": 1.537825345993042,
"loss_region": 0.02885586954653263,
"loss_total": 1.5666812658309937,
"lr": 0.0008548719202313291,
"router/selected_tokens_s0": 154.125,
"step": 350,
"tokens_trained": 1.146773016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.10212041699170271,
"grad_norm": 37.228973388671875,
"loss": 2.7756,
"loss_ce": 1.9871504306793213,
"loss_region": 0.029301652684807777,
"loss_total": 2.0164520740509033,
"lr": 0.0008793668176591608,
"router/selected_tokens_s0": 3631.75,
"step": 360,
"tokens_trained": 1.179538456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.10495709524147223,
"grad_norm": 30.546344757080078,
"loss": 2.4884,
"loss_ce": 1.4886701107025146,
"loss_region": 0.031588103622198105,
"loss_total": 1.5202581882476807,
"lr": 0.0009038617150869926,
"router/selected_tokens_s0": 5236.625,
"step": 370,
"tokens_trained": 1.212303896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.10779377349124175,
"grad_norm": 45.68803405761719,
"loss": 2.8937,
"loss_ce": 2.285705804824829,
"loss_region": 0.030362222343683243,
"loss_total": 2.316067934036255,
"lr": 0.0009283566125148244,
"router/selected_tokens_s0": 4493.625,
"step": 380,
"tokens_trained": 1.245068536
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.11063045174101127,
"grad_norm": 32.428009033203125,
"loss": 1.9186,
"loss_ce": 1.5672893524169922,
"loss_region": 0.03746495023369789,
"loss_total": 1.6047543287277222,
"lr": 0.0009528515099426562,
"router/selected_tokens_s0": 8134.375,
"step": 390,
"tokens_trained": 1.277833176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.11346712999078079,
"grad_norm": 35.54498291015625,
"loss": 1.6959,
"loss_ce": 1.6413251161575317,
"loss_region": 0.026098042726516724,
"loss_total": 1.667423129081726,
"lr": 0.000977346407370488,
"router/selected_tokens_s0": 625.5,
"step": 400,
"tokens_trained": 1.310598616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.11630380824055031,
"grad_norm": 8.186758041381836,
"loss": 1.671,
"loss_ce": 1.324172019958496,
"loss_region": 0.03537043184041977,
"loss_total": 1.3595424890518188,
"lr": 0.0010018413047983197,
"router/selected_tokens_s0": 7117.75,
"step": 410,
"tokens_trained": 1.343364056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.11914048649031983,
"grad_norm": 32.364845275878906,
"loss": 1.7487,
"loss_ce": 1.6946724653244019,
"loss_region": 0.030674295499920845,
"loss_total": 1.7253468036651611,
"lr": 0.0010263362022261515,
"router/selected_tokens_s0": 4591.75,
"step": 420,
"tokens_trained": 1.376129496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.12197716474008935,
"grad_norm": 51.924861907958984,
"loss": 1.6652,
"loss_ce": 1.7081111669540405,
"loss_region": 0.029956262558698654,
"loss_total": 1.738067388534546,
"lr": 0.0010508310996539833,
"router/selected_tokens_s0": 4165.25,
"step": 430,
"tokens_trained": 1.408889864
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.12481384298985887,
"grad_norm": 31.08187484741211,
"loss": 1.6269,
"loss_ce": 1.688795804977417,
"loss_region": 0.030442532151937485,
"loss_total": 1.71923828125,
"lr": 0.0010753259970818151,
"router/selected_tokens_s0": 4528.875,
"step": 440,
"tokens_trained": 1.441655304
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1276505212396284,
"grad_norm": 9.750688552856445,
"loss": 1.646,
"loss_ce": 1.342025637626648,
"loss_region": 0.0289932768791914,
"loss_total": 1.371018886566162,
"lr": 0.001099820894509647,
"router/selected_tokens_s0": 3472.375,
"step": 450,
"tokens_trained": 1.474420744
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1304871994893979,
"grad_norm": 69.62458038330078,
"loss": 2.646,
"loss_ce": 2.835515022277832,
"loss_region": 0.03730851039290428,
"loss_total": 2.872823476791382,
"lr": 0.0011243157919374788,
"router/selected_tokens_s0": 7822.125,
"step": 460,
"tokens_trained": 1.507186184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.13332387773916743,
"grad_norm": 62.241451263427734,
"loss": 2.2121,
"loss_ce": 1.9173500537872314,
"loss_region": 0.033008284866809845,
"loss_total": 1.9503583908081055,
"lr": 0.0011488106893653104,
"router/selected_tokens_s0": 5854.125,
"step": 470,
"tokens_trained": 1.539950832
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.13616055598893695,
"grad_norm": 36.45135498046875,
"loss": 1.8122,
"loss_ce": 1.579708456993103,
"loss_region": 0.030225276947021484,
"loss_total": 1.6099337339401245,
"lr": 0.0011733055867931422,
"router/selected_tokens_s0": 4330.5,
"step": 480,
"tokens_trained": 1.572715472
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.13899723423870647,
"grad_norm": 13.028325080871582,
"loss": 1.5027,
"loss_ce": 1.357754111289978,
"loss_region": 0.03526536747813225,
"loss_total": 1.393019437789917,
"lr": 0.001197800484220974,
"router/selected_tokens_s0": 7119.25,
"step": 490,
"tokens_trained": 1.605480912
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.141833912488476,
"grad_norm": 24.705984115600586,
"loss": 1.6096,
"loss_ce": 1.6097279787063599,
"loss_region": 0.02911153808236122,
"loss_total": 1.6388394832611084,
"lr": 0.0012222953816488059,
"router/selected_tokens_s0": 3648.75,
"step": 500,
"tokens_trained": 1.638244216
},
{
"epoch": 0.141833912488476,
"eval_ppl": 4.8348835473380465,
"eval_runtime": 2.9238,
"step": 500,
"tokens_trained": 1.638244216
},
{
"epoch": 0.141833912488476,
"eval_F": 0.3934690889573574,
"eval_F_cds": 0.29905151571508276,
"eval_F_dig": 0.4478214443836758,
"eval_F_exon": 0.39103450221457386,
"eval_F_intron": 0.40873021991492037,
"eval_F_nig": 0.4262229153142855,
"eval_F_promoter": 0.30306008909923465,
"eval_F_utr": 0.3906123042448191,
"eval_G": 0.49025372407568035,
"eval_G_cds": 0.48331595902636837,
"eval_G_dig": 0.49727705981261555,
"eval_G_exon": 0.4909996295084916,
"eval_G_intron": 0.4915825135015993,
"eval_G_nig": 0.49304083637658525,
"eval_G_promoter": 0.48200754687828323,
"eval_G_utr": 0.4901697268782234,
"eval_avg_bp_per_token": 2.541495705926663,
"eval_bp_per_token/cds": 3.343905472636816,
"eval_bp_per_token/dig": 2.2330328583890666,
"eval_bp_per_token/exon": 2.5573190967462667,
"eval_bp_per_token/intron": 2.4466015755041455,
"eval_bp_per_token/nig": 2.346190136826938,
"eval_bp_per_token/promoter": 3.299675661589863,
"eval_bp_per_token/utr": 2.56008320560543,
"eval_ppl_cds": 5.567015659246301,
"eval_ppl_dig": 4.898425899350941,
"eval_ppl_exon": 4.9662705320329295,
"eval_ppl_intron": 4.767518067357663,
"eval_ppl_nig": 4.6987085494689405,
"eval_ppl_promoter": 5.216405144788708,
"eval_ppl_utr": 4.913846632347962,
"step": 500,
"tokens_trained": 1.638244216
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1446705907382455,
"grad_norm": 37.46432113647461,
"loss": 1.5582,
"loss_ce": 1.5548115968704224,
"loss_region": 0.02565930411219597,
"loss_total": 1.5804709196090698,
"lr": 0.0012243786686061229,
"router/selected_tokens_s0": 1004.25,
"step": 510,
"tokens_trained": 1.671005424
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.14750726898801503,
"grad_norm": 27.722349166870117,
"loss": 1.5672,
"loss_ce": 1.478359341621399,
"loss_region": 0.031882915645837784,
"loss_total": 1.510242223739624,
"lr": 0.0012239717766222718,
"router/selected_tokens_s0": 5380.75,
"step": 520,
"tokens_trained": 1.703770864
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.15034394723778455,
"grad_norm": 26.949983596801758,
"loss": 1.6157,
"loss_ce": 1.4986213445663452,
"loss_region": 0.03651271015405655,
"loss_total": 1.5351340770721436,
"lr": 0.001223564884638421,
"router/selected_tokens_s0": 7781.0,
"step": 530,
"tokens_trained": 1.736536304
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.15318062548755407,
"grad_norm": 28.232316970825195,
"loss": 1.6637,
"loss_ce": 1.4607714414596558,
"loss_region": 0.025137916207313538,
"loss_total": 1.485909342765808,
"lr": 0.00122315799265457,
"router/selected_tokens_s0": 612.875,
"step": 540,
"tokens_trained": 1.769301744
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1560173037373236,
"grad_norm": 23.33485221862793,
"loss": 1.4993,
"loss_ce": 1.4412897825241089,
"loss_region": 0.035474810749292374,
"loss_total": 1.4767645597457886,
"lr": 0.001222751100670719,
"router/selected_tokens_s0": 7357.5,
"step": 550,
"tokens_trained": 1.802067184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1588539819870931,
"grad_norm": 21.005512237548828,
"loss": 1.4335,
"loss_ce": 1.3612841367721558,
"loss_region": 0.029854778200387955,
"loss_total": 1.3911389112472534,
"lr": 0.001222344208686868,
"router/selected_tokens_s0": 4172.125,
"step": 560,
"tokens_trained": 1.834832624
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.16169066023686263,
"grad_norm": 19.53492546081543,
"loss": 1.4383,
"loss_ce": 1.4045627117156982,
"loss_region": 0.02937491238117218,
"loss_total": 1.433937668800354,
"lr": 0.0012219373167030169,
"router/selected_tokens_s0": 3881.875,
"step": 570,
"tokens_trained": 1.867598064
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.16452733848663215,
"grad_norm": 25.31780242919922,
"loss": 1.7004,
"loss_ce": 1.591187834739685,
"loss_region": 0.03149839863181114,
"loss_total": 1.6226862668991089,
"lr": 0.0012215304247191658,
"router/selected_tokens_s0": 5153.875,
"step": 580,
"tokens_trained": 1.900363504
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.16736401673640167,
"grad_norm": 16.421045303344727,
"loss": 1.5092,
"loss_ce": 1.2439810037612915,
"loss_region": 0.02931862138211727,
"loss_total": 1.2732995748519897,
"lr": 0.0012211235327353148,
"router/selected_tokens_s0": 3840.5,
"step": 590,
"tokens_trained": 1.933128944
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.17020069498617119,
"grad_norm": 25.38547706604004,
"loss": 1.5893,
"loss_ce": 1.5482516288757324,
"loss_region": 0.025499241426587105,
"loss_total": 1.5737508535385132,
"lr": 0.0012207166407514638,
"router/selected_tokens_s0": 1237.25,
"step": 600,
"tokens_trained": 1.96589048
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1730373732359407,
"grad_norm": 14.48205852508545,
"loss": 1.3098,
"loss_ce": 1.2969579696655273,
"loss_region": 0.03318855166435242,
"loss_total": 1.3301465511322021,
"lr": 0.0012203097487676127,
"router/selected_tokens_s0": 6087.625,
"step": 610,
"tokens_trained": 1.99865592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.17587405148571023,
"grad_norm": 10.29987907409668,
"loss": 1.2844,
"loss_ce": 1.2728289365768433,
"loss_region": 0.03153729811310768,
"loss_total": 1.3043662309646606,
"lr": 0.0012199028567837617,
"router/selected_tokens_s0": 5177.0,
"step": 620,
"tokens_trained": 2.03142136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.17871072973547975,
"grad_norm": 14.114507675170898,
"loss": 1.2792,
"loss_ce": 1.2729930877685547,
"loss_region": 0.03177153319120407,
"loss_total": 1.3047646284103394,
"lr": 0.0012194959647999107,
"router/selected_tokens_s0": 5318.5,
"step": 630,
"tokens_trained": 2.0641868
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.18154740798524927,
"grad_norm": 35.086570739746094,
"loss": 1.327,
"loss_ce": 1.4959396123886108,
"loss_region": 0.031267955899238586,
"loss_total": 1.527207612991333,
"lr": 0.0012190890728160596,
"router/selected_tokens_s0": 5018.625,
"step": 640,
"tokens_trained": 2.09695224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.18438408623501878,
"grad_norm": 12.891855239868164,
"loss": 1.3231,
"loss_ce": 1.251932978630066,
"loss_region": 0.030069500207901,
"loss_total": 1.2820024490356445,
"lr": 0.0012186821808322086,
"router/selected_tokens_s0": 4308.125,
"step": 650,
"tokens_trained": 2.12971768
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.1872207644847883,
"grad_norm": 16.94170570373535,
"loss": 1.273,
"loss_ce": 1.303807258605957,
"loss_region": 0.030183279886841774,
"loss_total": 1.3339905738830566,
"lr": 0.0012182752888483576,
"router/selected_tokens_s0": 4374.375,
"step": 660,
"tokens_trained": 2.16248312
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.19005744273455782,
"grad_norm": 8.820389747619629,
"loss": 1.291,
"loss_ce": 1.2488102912902832,
"loss_region": 0.030493643134832382,
"loss_total": 1.2793039083480835,
"lr": 0.0012178683968645065,
"router/selected_tokens_s0": 4566.875,
"step": 670,
"tokens_trained": 2.19524856
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.19289412098432734,
"grad_norm": 12.072690963745117,
"loss": 1.2551,
"loss_ce": 1.257431149482727,
"loss_region": 0.02906171977519989,
"loss_total": 1.2864928245544434,
"lr": 0.0012174615048806555,
"router/selected_tokens_s0": 3676.75,
"step": 680,
"tokens_trained": 2.228014
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.19573079923409686,
"grad_norm": 3.4100522994995117,
"loss": 1.2685,
"loss_ce": 1.217279314994812,
"loss_region": 0.03290281072258949,
"loss_total": 1.2501821517944336,
"lr": 0.0012170546128968045,
"router/selected_tokens_s0": 5992.0,
"step": 690,
"tokens_trained": 2.26077944
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.19856747748386638,
"grad_norm": 6.675322532653809,
"loss": 1.2504,
"loss_ce": 1.1835153102874756,
"loss_region": 0.031250134110450745,
"loss_total": 1.2147654294967651,
"lr": 0.0012166477209129534,
"router/selected_tokens_s0": 5040.625,
"step": 700,
"tokens_trained": 2.29354488
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2014041557336359,
"grad_norm": 21.388051986694336,
"loss": 1.267,
"loss_ce": 1.3746044635772705,
"loss_region": 0.027913136407732964,
"loss_total": 1.402517557144165,
"lr": 0.0012162408289291026,
"router/selected_tokens_s0": 2922.75,
"step": 710,
"tokens_trained": 2.32631032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.20424083398340542,
"grad_norm": 12.917130470275879,
"loss": 1.3025,
"loss_ce": 1.2145620584487915,
"loss_region": 0.031132886186242104,
"loss_total": 1.2456949949264526,
"lr": 0.0012158339369452516,
"router/selected_tokens_s0": 4968.875,
"step": 720,
"tokens_trained": 2.35907576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.20707751223317494,
"grad_norm": 8.888051986694336,
"loss": 1.2457,
"loss_ce": 1.185524821281433,
"loss_region": 0.03197301924228668,
"loss_total": 1.2174978256225586,
"lr": 0.0012154270449614005,
"router/selected_tokens_s0": 5463.0,
"step": 730,
"tokens_trained": 2.3918396
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.20991419048294446,
"grad_norm": 13.051305770874023,
"loss": 1.2446,
"loss_ce": 1.1078685522079468,
"loss_region": 0.0308807585388422,
"loss_total": 1.138749361038208,
"lr": 0.0012150201529775495,
"router/selected_tokens_s0": 4844.0,
"step": 740,
"tokens_trained": 2.424600048
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.21275086873271398,
"grad_norm": 6.593105316162109,
"loss": 1.2851,
"loss_ce": 1.255039930343628,
"loss_region": 0.029710784554481506,
"loss_total": 1.2847506999969482,
"lr": 0.0012146132609936982,
"router/selected_tokens_s0": 4083.875,
"step": 750,
"tokens_trained": 2.457364688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2155875469824835,
"grad_norm": 3.900451183319092,
"loss": 1.2291,
"loss_ce": 1.1926592588424683,
"loss_region": 0.030736476182937622,
"loss_total": 1.2233957052230835,
"lr": 0.0012142063690098472,
"router/selected_tokens_s0": 4719.25,
"step": 760,
"tokens_trained": 2.490130128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.21842422523225302,
"grad_norm": 8.001019477844238,
"loss": 1.2285,
"loss_ce": 1.1942657232284546,
"loss_region": 0.03041156381368637,
"loss_total": 1.224677324295044,
"lr": 0.0012137994770259962,
"router/selected_tokens_s0": 4525.75,
"step": 770,
"tokens_trained": 2.522895568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.22126090348202254,
"grad_norm": 5.169371128082275,
"loss": 1.2072,
"loss_ce": 1.2079213857650757,
"loss_region": 0.031087037175893784,
"loss_total": 1.2390084266662598,
"lr": 0.0012133925850421454,
"router/selected_tokens_s0": 4938.25,
"step": 780,
"tokens_trained": 2.555659392
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.22409758173179206,
"grad_norm": 8.434707641601562,
"loss": 1.2079,
"loss_ce": 1.2038490772247314,
"loss_region": 0.02821769006550312,
"loss_total": 1.2320667505264282,
"lr": 0.0012129856930582943,
"router/selected_tokens_s0": 3119.875,
"step": 790,
"tokens_trained": 2.588422136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.22693425998156158,
"grad_norm": 8.451072692871094,
"loss": 1.2072,
"loss_ce": 1.2617510557174683,
"loss_region": 0.0316130593419075,
"loss_total": 1.29336416721344,
"lr": 0.0012125788010744433,
"router/selected_tokens_s0": 5238.75,
"step": 800,
"tokens_trained": 2.621187576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2297709382313311,
"grad_norm": 12.750673294067383,
"loss": 1.2283,
"loss_ce": 1.2528263330459595,
"loss_region": 0.03109751269221306,
"loss_total": 1.283923864364624,
"lr": 0.0012121719090905923,
"router/selected_tokens_s0": 4940.75,
"step": 810,
"tokens_trained": 2.653953016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.23260761648110062,
"grad_norm": 10.307655334472656,
"loss": 1.2544,
"loss_ce": 1.2496147155761719,
"loss_region": 0.02913491614162922,
"loss_total": 1.2787495851516724,
"lr": 0.0012117650171067412,
"router/selected_tokens_s0": 3717.75,
"step": 820,
"tokens_trained": 2.686718456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.23544429473087014,
"grad_norm": 0.6592714190483093,
"loss": 1.2022,
"loss_ce": 1.0889158248901367,
"loss_region": 0.031184613704681396,
"loss_total": 1.120100498199463,
"lr": 0.0012113581251228902,
"router/selected_tokens_s0": 5037.375,
"step": 830,
"tokens_trained": 2.71948036
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.23828097298063966,
"grad_norm": 3.0865817070007324,
"loss": 1.1958,
"loss_ce": 1.267112374305725,
"loss_region": 0.02916303649544716,
"loss_total": 1.2962753772735596,
"lr": 0.0012109512331390391,
"router/selected_tokens_s0": 3734.375,
"step": 840,
"tokens_trained": 2.7522458
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.24111765123040918,
"grad_norm": 2.513849973678589,
"loss": 1.2014,
"loss_ce": 1.108485221862793,
"loss_region": 0.0302209984511137,
"loss_total": 1.1387062072753906,
"lr": 0.0012105443411551881,
"router/selected_tokens_s0": 4417.125,
"step": 850,
"tokens_trained": 2.78501124
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2439543294801787,
"grad_norm": 5.594594478607178,
"loss": 1.206,
"loss_ce": 1.1815146207809448,
"loss_region": 0.031508028507232666,
"loss_total": 1.2130227088928223,
"lr": 0.001210137449171337,
"router/selected_tokens_s0": 5212.875,
"step": 860,
"tokens_trained": 2.81777668
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.24679100772994822,
"grad_norm": 2.2655980587005615,
"loss": 1.1897,
"loss_ce": 1.2304372787475586,
"loss_region": 0.031548820436000824,
"loss_total": 1.2619861364364624,
"lr": 0.001209730557187486,
"router/selected_tokens_s0": 5213.25,
"step": 870,
"tokens_trained": 2.85054212
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.24962768597971774,
"grad_norm": 4.335860252380371,
"loss": 1.1897,
"loss_ce": 1.2337130308151245,
"loss_region": 0.02997858263552189,
"loss_total": 1.2636916637420654,
"lr": 0.001209323665203635,
"router/selected_tokens_s0": 4252.5,
"step": 880,
"tokens_trained": 2.88330756
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.25246436422948726,
"grad_norm": 12.377155303955078,
"loss": 1.1966,
"loss_ce": 1.1369762420654297,
"loss_region": 0.029613491147756577,
"loss_total": 1.1665897369384766,
"lr": 0.001208916773219784,
"router/selected_tokens_s0": 4027.75,
"step": 890,
"tokens_trained": 2.916073
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2553010424792568,
"grad_norm": 7.238094806671143,
"loss": 1.2143,
"loss_ce": 1.1700671911239624,
"loss_region": 0.029774101451039314,
"loss_total": 1.1998412609100342,
"lr": 0.001208509881235933,
"router/selected_tokens_s0": 4116.875,
"step": 900,
"tokens_trained": 2.94883828
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2581377207290263,
"grad_norm": 3.2694191932678223,
"loss": 1.1892,
"loss_ce": 1.1454379558563232,
"loss_region": 0.029824109748005867,
"loss_total": 1.1752620935440063,
"lr": 0.001208102989252082,
"router/selected_tokens_s0": 4152.625,
"step": 910,
"tokens_trained": 2.981597288
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2609743989787958,
"grad_norm": 9.457625389099121,
"loss": 1.2038,
"loss_ce": 1.3160332441329956,
"loss_region": 0.030873605981469154,
"loss_total": 1.3469069004058838,
"lr": 0.0012076960972682309,
"router/selected_tokens_s0": 4797.5,
"step": 920,
"tokens_trained": 3.014362456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.26381107722856534,
"grad_norm": 4.293655872344971,
"loss": 1.1978,
"loss_ce": 1.1440948247909546,
"loss_region": 0.02935035713016987,
"loss_total": 1.173445224761963,
"lr": 0.0012072892052843798,
"router/selected_tokens_s0": 3829.5,
"step": 930,
"tokens_trained": 3.047127096
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.26664775547833486,
"grad_norm": 1.7136532068252563,
"loss": 1.1906,
"loss_ce": 1.1432236433029175,
"loss_region": 0.028851088136434555,
"loss_total": 1.1720746755599976,
"lr": 0.0012068823133005288,
"router/selected_tokens_s0": 3479.125,
"step": 940,
"tokens_trained": 3.079892536
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2694844337281044,
"grad_norm": 4.0433244705200195,
"loss": 1.1868,
"loss_ce": 1.168936014175415,
"loss_region": 0.02876598760485649,
"loss_total": 1.1977020502090454,
"lr": 0.0012064754213166778,
"router/selected_tokens_s0": 3396.25,
"step": 950,
"tokens_trained": 3.11265336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2723211119778739,
"grad_norm": 6.829047203063965,
"loss": 1.1828,
"loss_ce": 1.2480430603027344,
"loss_region": 0.02934931591153145,
"loss_total": 1.2773923873901367,
"lr": 0.001206068529332827,
"router/selected_tokens_s0": 3843.75,
"step": 960,
"tokens_trained": 3.1454188
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2751577902276434,
"grad_norm": 5.5668439865112305,
"loss": 1.1882,
"loss_ce": 1.1349202394485474,
"loss_region": 0.0297370757907629,
"loss_total": 1.1646573543548584,
"lr": 0.001205661637348976,
"router/selected_tokens_s0": 4102.5,
"step": 970,
"tokens_trained": 3.17818424
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.27799446847741294,
"grad_norm": 3.729381561279297,
"loss": 1.1839,
"loss_ce": 1.1995916366577148,
"loss_region": 0.03041483648121357,
"loss_total": 1.230006456375122,
"lr": 0.0012052547453651249,
"router/selected_tokens_s0": 4537.125,
"step": 980,
"tokens_trained": 3.21094968
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.28083114672718246,
"grad_norm": 2.7978885173797607,
"loss": 1.1739,
"loss_ce": 1.1886447668075562,
"loss_region": 0.030223874375224113,
"loss_total": 1.218868613243103,
"lr": 0.0012048478533812738,
"router/selected_tokens_s0": 4418.875,
"step": 990,
"tokens_trained": 3.24371512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.283667824976952,
"grad_norm": 2.7768421173095703,
"loss": 1.1695,
"loss_ce": 1.1791244745254517,
"loss_region": 0.03016069531440735,
"loss_total": 1.2092851400375366,
"lr": 0.0012044409613974226,
"router/selected_tokens_s0": 4373.0,
"step": 1000,
"tokens_trained": 3.27648056
},
{
"epoch": 0.283667824976952,
"eval_ppl": 3.1457362037176693,
"eval_runtime": 2.5704,
"step": 1000,
"tokens_trained": 3.27648056
},
{
"epoch": 0.283667824976952,
"eval_F": 0.35905403615092213,
"eval_F_cds": 0.3614752043728926,
"eval_F_dig": 0.36203349219991143,
"eval_F_exon": 0.3609332242502892,
"eval_F_intron": 0.3608845011093654,
"eval_F_nig": 0.36360427639485304,
"eval_F_promoter": 0.3446594753609168,
"eval_F_utr": 0.35993294503032014,
"eval_G": 0.4747950002316863,
"eval_G_cds": 0.4875693056072159,
"eval_G_dig": 0.4165539971384483,
"eval_G_exon": 0.4825983323253731,
"eval_G_intron": 0.4746974505122046,
"eval_G_nig": 0.4719204972271849,
"eval_G_promoter": 0.47860970096474814,
"eval_G_utr": 0.4806883865646302,
"eval_avg_bp_per_token": 2.785096111772066,
"eval_bp_per_token/cds": 2.7664414817466,
"eval_bp_per_token/dig": 2.7621753830659665,
"eval_bp_per_token/exon": 2.770595591683602,
"eval_bp_per_token/intron": 2.7709696507497057,
"eval_bp_per_token/nig": 2.7502426811780905,
"eval_bp_per_token/promoter": 2.90141450181467,
"eval_bp_per_token/utr": 2.77829527362593,
"eval_ppl_cds": 3.7937951600140427,
"eval_ppl_dig": 1.292568207392483,
"eval_ppl_exon": 3.5063285971819904,
"eval_ppl_intron": 3.1623742022954864,
"eval_ppl_nig": 3.03123217862896,
"eval_ppl_promoter": 3.420873133996253,
"eval_ppl_utr": 3.4079030610184535,
"step": 1000,
"tokens_trained": 3.27648056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2865045032267215,
"grad_norm": 1.750190258026123,
"loss": 1.1681,
"loss_ce": 1.1951100826263428,
"loss_region": 0.029561972245573997,
"loss_total": 1.2246720790863037,
"lr": 0.0012040340694135716,
"router/selected_tokens_s0": 3974.5,
"step": 1010,
"tokens_trained": 3.309246
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.289341181476491,
"grad_norm": 5.037286758422852,
"loss": 1.1855,
"loss_ce": 1.1606330871582031,
"loss_region": 0.030172061175107956,
"loss_total": 1.190805196762085,
"lr": 0.0012036271774297205,
"router/selected_tokens_s0": 4388.375,
"step": 1020,
"tokens_trained": 3.34201144
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.29217785972626054,
"grad_norm": 5.963747024536133,
"loss": 1.1794,
"loss_ce": 1.116599678993225,
"loss_region": 0.030543407425284386,
"loss_total": 1.1471431255340576,
"lr": 0.0012032202854458697,
"router/selected_tokens_s0": 4640.0,
"step": 1030,
"tokens_trained": 3.37477688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.29501453797603006,
"grad_norm": 4.626336574554443,
"loss": 1.1934,
"loss_ce": 1.094927430152893,
"loss_region": 0.02999301068484783,
"loss_total": 1.1249204874038696,
"lr": 0.0012028133934620187,
"router/selected_tokens_s0": 4248.5,
"step": 1040,
"tokens_trained": 3.40754232
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.2978512162257996,
"grad_norm": 4.208251476287842,
"loss": 1.1843,
"loss_ce": 1.1818771362304688,
"loss_region": 0.030715491622686386,
"loss_total": 1.212592601776123,
"lr": 0.0012024065014781676,
"router/selected_tokens_s0": 4729.75,
"step": 1050,
"tokens_trained": 3.44030696
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3006878944755691,
"grad_norm": 2.3673582077026367,
"loss": 1.1726,
"loss_ce": 1.1216882467269897,
"loss_region": 0.030366381630301476,
"loss_total": 1.1520546674728394,
"lr": 0.0012019996094943166,
"router/selected_tokens_s0": 4503.625,
"step": 1060,
"tokens_trained": 3.4730724
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3035245727253386,
"grad_norm": 2.6513352394104004,
"loss": 1.1707,
"loss_ce": 1.1285063028335571,
"loss_region": 0.02974226139485836,
"loss_total": 1.1582485437393188,
"lr": 0.0012015927175104656,
"router/selected_tokens_s0": 4085.375,
"step": 1070,
"tokens_trained": 3.50583784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.30636125097510813,
"grad_norm": 1.0276976823806763,
"loss": 1.165,
"loss_ce": 1.1330546140670776,
"loss_region": 0.029834387823939323,
"loss_total": 1.162889003753662,
"lr": 0.0012011858255266145,
"router/selected_tokens_s0": 4155.5,
"step": 1080,
"tokens_trained": 3.53860328
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.30919792922487765,
"grad_norm": 3.4352457523345947,
"loss": 1.1759,
"loss_ce": 1.153834581375122,
"loss_region": 0.030001970008015633,
"loss_total": 1.183836579322815,
"lr": 0.0012007789335427635,
"router/selected_tokens_s0": 4271.375,
"step": 1090,
"tokens_trained": 3.57136872
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3120346074746472,
"grad_norm": 3.4334914684295654,
"loss": 1.1668,
"loss_ce": 1.0656555891036987,
"loss_region": 0.03014238551259041,
"loss_total": 1.0957980155944824,
"lr": 0.0012003720415589125,
"router/selected_tokens_s0": 4376.625,
"step": 1100,
"tokens_trained": 3.60413416
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3148712857244167,
"grad_norm": 7.573620796203613,
"loss": 1.1737,
"loss_ce": 1.1206940412521362,
"loss_region": 0.030071774497628212,
"loss_total": 1.1507657766342163,
"lr": 0.0011999651495750614,
"router/selected_tokens_s0": 4325.0,
"step": 1110,
"tokens_trained": 3.6368996
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3177079639741862,
"grad_norm": 4.200015544891357,
"loss": 1.1705,
"loss_ce": 1.1700469255447388,
"loss_region": 0.02990192547440529,
"loss_total": 1.1999489068984985,
"lr": 0.0011995582575912104,
"router/selected_tokens_s0": 4194.25,
"step": 1120,
"tokens_trained": 3.669661712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.32054464222395573,
"grad_norm": 5.207011699676514,
"loss": 1.1668,
"loss_ce": 1.1708717346191406,
"loss_region": 0.029880443587899208,
"loss_total": 1.2007521390914917,
"lr": 0.0011991513656073594,
"router/selected_tokens_s0": 4177.25,
"step": 1130,
"tokens_trained": 3.702426352
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.32338132047372525,
"grad_norm": 4.160227298736572,
"loss": 1.1671,
"loss_ce": 1.1502091884613037,
"loss_region": 0.030087152495980263,
"loss_total": 1.1802963018417358,
"lr": 0.0011987444736235083,
"router/selected_tokens_s0": 4325.25,
"step": 1140,
"tokens_trained": 3.735191792
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3262179987234948,
"grad_norm": 2.3496572971343994,
"loss": 1.1578,
"loss_ce": 1.0942906141281128,
"loss_region": 0.02960728108882904,
"loss_total": 1.123897910118103,
"lr": 0.0011983375816396573,
"router/selected_tokens_s0": 3976.25,
"step": 1150,
"tokens_trained": 3.767957232
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3290546769732643,
"grad_norm": 3.0820891857147217,
"loss": 1.158,
"loss_ce": 1.2191810607910156,
"loss_region": 0.030029961839318275,
"loss_total": 1.249211072921753,
"lr": 0.0011979306896558062,
"router/selected_tokens_s0": 4285.125,
"step": 1160,
"tokens_trained": 3.800722672
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3318913552230338,
"grad_norm": 1.7340823411941528,
"loss": 1.1537,
"loss_ce": 1.0748310089111328,
"loss_region": 0.030402792617678642,
"loss_total": 1.1052337884902954,
"lr": 0.0011975237976719552,
"router/selected_tokens_s0": 4566.375,
"step": 1170,
"tokens_trained": 3.833488112
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.33472803347280333,
"grad_norm": 1.6883597373962402,
"loss": 1.1524,
"loss_ce": 1.15337073802948,
"loss_region": 0.029628688469529152,
"loss_total": 1.1829993724822998,
"lr": 0.0011971169056881042,
"router/selected_tokens_s0": 3994.125,
"step": 1180,
"tokens_trained": 3.866252752
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.33756471172257285,
"grad_norm": 1.3079456090927124,
"loss": 1.155,
"loss_ce": 1.147839903831482,
"loss_region": 0.029972558841109276,
"loss_total": 1.1778124570846558,
"lr": 0.0011967100137042531,
"router/selected_tokens_s0": 4250.125,
"step": 1190,
"tokens_trained": 3.899018184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.34040138997234237,
"grad_norm": 2.042187452316284,
"loss": 1.1551,
"loss_ce": 1.1045622825622559,
"loss_region": 0.030126892030239105,
"loss_total": 1.134689211845398,
"lr": 0.0011963031217204021,
"router/selected_tokens_s0": 4366.25,
"step": 1200,
"tokens_trained": 3.931783624
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3432380682221119,
"grad_norm": 0.5720299482345581,
"loss": 1.1514,
"loss_ce": 1.1252881288528442,
"loss_region": 0.02972925268113613,
"loss_total": 1.155017375946045,
"lr": 0.0011958962297365513,
"router/selected_tokens_s0": 4055.0,
"step": 1210,
"tokens_trained": 3.964549064
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3460747464718814,
"grad_norm": 2.726912498474121,
"loss": 1.1481,
"loss_ce": 1.0980409383773804,
"loss_region": 0.030369114130735397,
"loss_total": 1.1284101009368896,
"lr": 0.0011954893377527003,
"router/selected_tokens_s0": 4549.75,
"step": 1220,
"tokens_trained": 3.997311912
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.34891142472165093,
"grad_norm": 1.576530933380127,
"loss": 1.1547,
"loss_ce": 1.1488255262374878,
"loss_region": 0.03008064441382885,
"loss_total": 1.1789062023162842,
"lr": 0.0011950824457688492,
"router/selected_tokens_s0": 4327.125,
"step": 1230,
"tokens_trained": 4.030077352
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.35174810297142045,
"grad_norm": 1.7633917331695557,
"loss": 1.1491,
"loss_ce": 1.0437774658203125,
"loss_region": 0.03009728156030178,
"loss_total": 1.0738747119903564,
"lr": 0.0011946755537849982,
"router/selected_tokens_s0": 4352.5,
"step": 1240,
"tokens_trained": 4.062842792
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.35458478122118997,
"grad_norm": 0.8599131107330322,
"loss": 1.1502,
"loss_ce": 1.1635342836380005,
"loss_region": 0.030227093026041985,
"loss_total": 1.1937613487243652,
"lr": 0.001194268661801147,
"router/selected_tokens_s0": 4437.875,
"step": 1250,
"tokens_trained": 4.095608232
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3574214594709595,
"grad_norm": 2.0207033157348633,
"loss": 1.1525,
"loss_ce": 1.161281943321228,
"loss_region": 0.02980414777994156,
"loss_total": 1.1910860538482666,
"lr": 0.001193861769817296,
"router/selected_tokens_s0": 4113.375,
"step": 1260,
"tokens_trained": 4.128373672
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.360258137720729,
"grad_norm": 1.6762081384658813,
"loss": 1.1549,
"loss_ce": 1.176638126373291,
"loss_region": 0.02979988045990467,
"loss_total": 1.2064380645751953,
"lr": 0.0011934548778334449,
"router/selected_tokens_s0": 4110.375,
"step": 1270,
"tokens_trained": 4.161136768
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.36309481597049853,
"grad_norm": 1.5674160718917847,
"loss": 1.1538,
"loss_ce": 1.1160061359405518,
"loss_region": 0.029819507151842117,
"loss_total": 1.1458256244659424,
"lr": 0.001193047985849594,
"router/selected_tokens_s0": 4122.75,
"step": 1280,
"tokens_trained": 4.193902208
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.36593149422026805,
"grad_norm": 1.232892394065857,
"loss": 1.1499,
"loss_ce": 1.192215085029602,
"loss_region": 0.030095556750893593,
"loss_total": 1.2223106622695923,
"lr": 0.001192641093865743,
"router/selected_tokens_s0": 4337.75,
"step": 1290,
"tokens_trained": 4.226667648
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.36876817247003757,
"grad_norm": 1.280081033706665,
"loss": 1.1625,
"loss_ce": 1.0769988298416138,
"loss_region": 0.030076846480369568,
"loss_total": 1.1070756912231445,
"lr": 0.001192234201881892,
"router/selected_tokens_s0": 4330.625,
"step": 1300,
"tokens_trained": 4.259424272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3716048507198071,
"grad_norm": 0.7819789052009583,
"loss": 1.1516,
"loss_ce": 1.0531295537948608,
"loss_region": 0.029812535271048546,
"loss_total": 1.0829421281814575,
"lr": 0.001191827309898041,
"router/selected_tokens_s0": 4107.75,
"step": 1310,
"tokens_trained": 4.292189712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3744415289695766,
"grad_norm": 4.3887505531311035,
"loss": 1.1524,
"loss_ce": 1.0992565155029297,
"loss_region": 0.030015140771865845,
"loss_total": 1.1292716264724731,
"lr": 0.00119142041791419,
"router/selected_tokens_s0": 4279.625,
"step": 1320,
"tokens_trained": 4.32495164
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.37727820721934613,
"grad_norm": 2.5429630279541016,
"loss": 1.1622,
"loss_ce": 0.9915607571601868,
"loss_region": 0.02960185892879963,
"loss_total": 1.0211626291275024,
"lr": 0.0011910135259303389,
"router/selected_tokens_s0": 3922.75,
"step": 1330,
"tokens_trained": 4.35771708
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.38011488546911565,
"grad_norm": 1.3790112733840942,
"loss": 1.1526,
"loss_ce": 1.2076722383499146,
"loss_region": 0.029480615630745888,
"loss_total": 1.2371528148651123,
"lr": 0.0011906066339464878,
"router/selected_tokens_s0": 3831.5,
"step": 1340,
"tokens_trained": 4.39048252
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.38295156371888517,
"grad_norm": 3.28352427482605,
"loss": 1.1523,
"loss_ce": 0.9999480247497559,
"loss_region": 0.02995798923075199,
"loss_total": 1.0299060344696045,
"lr": 0.0011901997419626368,
"router/selected_tokens_s0": 4236.0,
"step": 1350,
"tokens_trained": 4.42324796
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3857882419686547,
"grad_norm": 2.173388719558716,
"loss": 1.1469,
"loss_ce": 1.1173208951950073,
"loss_region": 0.030063528567552567,
"loss_total": 1.1473844051361084,
"lr": 0.0011897928499787858,
"router/selected_tokens_s0": 4322.25,
"step": 1360,
"tokens_trained": 4.4560134
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3886249202184242,
"grad_norm": 1.3337340354919434,
"loss": 1.1514,
"loss_ce": 1.097347617149353,
"loss_region": 0.030277268961071968,
"loss_total": 1.1276248693466187,
"lr": 0.0011893859579949347,
"router/selected_tokens_s0": 4490.375,
"step": 1370,
"tokens_trained": 4.48877884
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3914615984681937,
"grad_norm": 1.5072178840637207,
"loss": 1.1454,
"loss_ce": 1.1354695558547974,
"loss_region": 0.0300710741430521,
"loss_total": 1.1655405759811401,
"lr": 0.0011889790660110837,
"router/selected_tokens_s0": 4323.125,
"step": 1380,
"tokens_trained": 4.52154428
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.39429827671796325,
"grad_norm": 1.4634846448898315,
"loss": 1.1434,
"loss_ce": 1.1472464799880981,
"loss_region": 0.029943954199552536,
"loss_total": 1.1771904230117798,
"lr": 0.0011885721740272327,
"router/selected_tokens_s0": 4222.625,
"step": 1390,
"tokens_trained": 4.55430972
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.39713495496773277,
"grad_norm": 1.1301681995391846,
"loss": 1.1491,
"loss_ce": 0.932141900062561,
"loss_region": 0.03013395331799984,
"loss_total": 0.9622758626937866,
"lr": 0.0011881652820433816,
"router/selected_tokens_s0": 4389.125,
"step": 1400,
"tokens_trained": 4.58707516
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.3999716332175023,
"grad_norm": 1.153057336807251,
"loss": 1.1483,
"loss_ce": 1.0930418968200684,
"loss_region": 0.029886895790696144,
"loss_total": 1.1229287385940552,
"lr": 0.0011877583900595306,
"router/selected_tokens_s0": 4177.875,
"step": 1410,
"tokens_trained": 4.6198406
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4028083114672718,
"grad_norm": 2.0346107482910156,
"loss": 1.1355,
"loss_ce": 1.130191683769226,
"loss_region": 0.030217666178941727,
"loss_total": 1.1604093313217163,
"lr": 0.0011873514980756796,
"router/selected_tokens_s0": 4435.0,
"step": 1420,
"tokens_trained": 4.652606024
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4056449897170413,
"grad_norm": 1.2362136840820312,
"loss": 1.1461,
"loss_ce": 1.1180355548858643,
"loss_region": 0.029944026842713356,
"loss_total": 1.1479796171188354,
"lr": 0.0011869446060918285,
"router/selected_tokens_s0": 4219.25,
"step": 1430,
"tokens_trained": 4.685371464
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.40848166796681085,
"grad_norm": 1.6414567232131958,
"loss": 1.1476,
"loss_ce": 1.1310675144195557,
"loss_region": 0.030178584158420563,
"loss_total": 1.1612460613250732,
"lr": 0.0011865377141079775,
"router/selected_tokens_s0": 4406.125,
"step": 1440,
"tokens_trained": 4.718136904
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.41131834621658037,
"grad_norm": 0.8733806014060974,
"loss": 1.1452,
"loss_ce": 1.1529111862182617,
"loss_region": 0.029908571392297745,
"loss_total": 1.1828197240829468,
"lr": 0.0011861308221241265,
"router/selected_tokens_s0": 4186.5,
"step": 1450,
"tokens_trained": 4.750902344
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4141550244663499,
"grad_norm": 2.170149087905884,
"loss": 1.1364,
"loss_ce": 1.1446956396102905,
"loss_region": 0.030016543343663216,
"loss_total": 1.1747121810913086,
"lr": 0.0011857239301402756,
"router/selected_tokens_s0": 4279.125,
"step": 1460,
"tokens_trained": 4.783666984
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4169917027161194,
"grad_norm": 1.5901942253112793,
"loss": 1.1418,
"loss_ce": 1.1736469268798828,
"loss_region": 0.02991572767496109,
"loss_total": 1.203562617301941,
"lr": 0.0011853170381564246,
"router/selected_tokens_s0": 4190.375,
"step": 1470,
"tokens_trained": 4.816432424
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4198283809658889,
"grad_norm": 0.7801039814949036,
"loss": 1.1359,
"loss_ce": 1.0415936708450317,
"loss_region": 0.030063536018133163,
"loss_total": 1.0716571807861328,
"lr": 0.0011849101461725736,
"router/selected_tokens_s0": 4323.5,
"step": 1480,
"tokens_trained": 4.849197864
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.42266505921565845,
"grad_norm": 1.1225630044937134,
"loss": 1.1387,
"loss_ce": 1.1764026880264282,
"loss_region": 0.02989169955253601,
"loss_total": 1.2062944173812866,
"lr": 0.0011845032541887225,
"router/selected_tokens_s0": 4166.375,
"step": 1490,
"tokens_trained": 4.881963248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.42550173746542796,
"grad_norm": 1.3516196012496948,
"loss": 1.1445,
"loss_ce": 1.1119225025177002,
"loss_region": 0.03007156029343605,
"loss_total": 1.1419941186904907,
"lr": 0.0011840963622048713,
"router/selected_tokens_s0": 4332.625,
"step": 1500,
"tokens_trained": 4.914728608
},
{
"epoch": 0.42550173746542796,
"eval_ppl": 3.0476700462359805,
"eval_runtime": 2.5167,
"step": 1500,
"tokens_trained": 4.914728608
},
{
"epoch": 0.42550173746542796,
"eval_F": 0.3395766737890528,
"eval_F_cds": 0.33560010026602843,
"eval_F_dig": 0.34591244107612573,
"eval_F_exon": 0.33732050667193275,
"eval_F_intron": 0.340589821591843,
"eval_F_nig": 0.3449097161371641,
"eval_F_promoter": 0.3287406377406758,
"eval_F_utr": 0.33810586816514,
"eval_G": 0.4388793285567115,
"eval_G_cds": 0.4465895620992391,
"eval_G_dig": 0.39567722372516084,
"eval_G_exon": 0.44327135296181625,
"eval_G_intron": 0.4386635275964277,
"eval_G_nig": 0.4373593879668909,
"eval_G_promoter": 0.44171817290159177,
"eval_G_utr": 0.44355779628952524,
"eval_avg_bp_per_token": 2.944843027178028,
"eval_bp_per_token/cds": 2.9797368928296066,
"eval_bp_per_token/dig": 2.8909049841891283,
"eval_bp_per_token/exon": 2.9645396002341724,
"eval_bp_per_token/intron": 2.93608304360423,
"eval_bp_per_token/nig": 2.8993094517590188,
"eval_bp_per_token/promoter": 3.0419117237000717,
"eval_bp_per_token/utr": 2.9576534871366778,
"eval_ppl_cds": 3.7328596405663,
"eval_ppl_dig": 1.1534605141350962,
"eval_ppl_exon": 3.4439528933373436,
"eval_ppl_intron": 3.0653985302604827,
"eval_ppl_nig": 2.904936687189015,
"eval_ppl_promoter": 3.3618258190318606,
"eval_ppl_utr": 3.3512748939063846,
"step": 1500,
"tokens_trained": 4.914728608
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4283384157151975,
"grad_norm": 1.0354516506195068,
"loss": 1.1407,
"loss_ce": 1.2179700136184692,
"loss_region": 0.029973506927490234,
"loss_total": 1.2479435205459595,
"lr": 0.0011836894702210202,
"router/selected_tokens_s0": 4242.375,
"step": 1510,
"tokens_trained": 4.947494048
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.431175093964967,
"grad_norm": 0.9974690675735474,
"loss": 1.1361,
"loss_ce": 1.1464780569076538,
"loss_region": 0.03020160086452961,
"loss_total": 1.1766796112060547,
"lr": 0.0011832825782371692,
"router/selected_tokens_s0": 4443.875,
"step": 1520,
"tokens_trained": 4.980259488
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4340117722147365,
"grad_norm": 1.61404550075531,
"loss": 1.1383,
"loss_ce": 1.1023921966552734,
"loss_region": 0.029910210520029068,
"loss_total": 1.1323024034500122,
"lr": 0.0011828756862533184,
"router/selected_tokens_s0": 4174.25,
"step": 1530,
"tokens_trained": 5.013024928
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.43684845046450604,
"grad_norm": 1.551711082458496,
"loss": 1.1369,
"loss_ce": 1.085469365119934,
"loss_region": 0.02990012802183628,
"loss_total": 1.115369439125061,
"lr": 0.0011824687942694674,
"router/selected_tokens_s0": 4162.25,
"step": 1540,
"tokens_trained": 5.04578704
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.43968512871427556,
"grad_norm": 1.3328109979629517,
"loss": 1.1358,
"loss_ce": 1.1522539854049683,
"loss_region": 0.02980169840157032,
"loss_total": 1.1820557117462158,
"lr": 0.0011820619022856163,
"router/selected_tokens_s0": 4050.75,
"step": 1550,
"tokens_trained": 5.078551904
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4425218069640451,
"grad_norm": 2.2517945766448975,
"loss": 1.1398,
"loss_ce": 1.0304194688796997,
"loss_region": 0.030139248818159103,
"loss_total": 1.0605586767196655,
"lr": 0.0011816550103017653,
"router/selected_tokens_s0": 4399.625,
"step": 1560,
"tokens_trained": 5.111317344
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4453584852138146,
"grad_norm": 1.0419440269470215,
"loss": 1.1423,
"loss_ce": 1.2029235363006592,
"loss_region": 0.029878782108426094,
"loss_total": 1.2328022718429565,
"lr": 0.0011812481183179143,
"router/selected_tokens_s0": 4131.75,
"step": 1570,
"tokens_trained": 5.144082784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4481951634635841,
"grad_norm": 0.8405026197433472,
"loss": 1.1357,
"loss_ce": 1.1085268259048462,
"loss_region": 0.02992934361100197,
"loss_total": 1.1384562253952026,
"lr": 0.0011808412263340632,
"router/selected_tokens_s0": 4185.75,
"step": 1580,
"tokens_trained": 5.176848224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.45103184171335364,
"grad_norm": 1.8782676458358765,
"loss": 1.1447,
"loss_ce": 1.0933234691619873,
"loss_region": 0.030135583132505417,
"loss_total": 1.1234591007232666,
"lr": 0.0011804343343502122,
"router/selected_tokens_s0": 4400.5,
"step": 1590,
"tokens_trained": 5.209613664
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.45386851996312316,
"grad_norm": 1.116540551185608,
"loss": 1.1417,
"loss_ce": 1.1890523433685303,
"loss_region": 0.0303688682615757,
"loss_total": 1.2194212675094604,
"lr": 0.0011800274423663611,
"router/selected_tokens_s0": 4597.375,
"step": 1600,
"tokens_trained": 5.242378304
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4567051982128927,
"grad_norm": 0.9224187135696411,
"loss": 1.1352,
"loss_ce": 1.0753121376037598,
"loss_region": 0.030113881453871727,
"loss_total": 1.1054260730743408,
"lr": 0.0011796205503825101,
"router/selected_tokens_s0": 4381.5,
"step": 1610,
"tokens_trained": 5.275142944
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4595418764626622,
"grad_norm": 1.250409483909607,
"loss": 1.1423,
"loss_ce": 1.1405887603759766,
"loss_region": 0.030090278014540672,
"loss_total": 1.1706790924072266,
"lr": 0.001179213658398659,
"router/selected_tokens_s0": 4360.875,
"step": 1620,
"tokens_trained": 5.307906784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4623785547124317,
"grad_norm": 0.6683188080787659,
"loss": 1.1358,
"loss_ce": 1.0137219429016113,
"loss_region": 0.0301409512758255,
"loss_total": 1.0438629388809204,
"lr": 0.001178806766414808,
"router/selected_tokens_s0": 4420.25,
"step": 1630,
"tokens_trained": 5.340672224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.46521523296220124,
"grad_norm": 1.3055206537246704,
"loss": 1.1378,
"loss_ce": 1.120367407798767,
"loss_region": 0.029992438852787018,
"loss_total": 1.150359869003296,
"lr": 0.001178399874430957,
"router/selected_tokens_s0": 4256.375,
"step": 1640,
"tokens_trained": 5.373436896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.46805191121197076,
"grad_norm": 1.2817225456237793,
"loss": 1.1365,
"loss_ce": 1.159173607826233,
"loss_region": 0.030014952644705772,
"loss_total": 1.1891885995864868,
"lr": 0.001177992982447106,
"router/selected_tokens_s0": 4277.875,
"step": 1650,
"tokens_trained": 5.406202336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4708885894617403,
"grad_norm": 1.2652041912078857,
"loss": 1.1303,
"loss_ce": 1.1445159912109375,
"loss_region": 0.03000623546540737,
"loss_total": 1.1745222806930542,
"lr": 0.001177586090463255,
"router/selected_tokens_s0": 4274.375,
"step": 1660,
"tokens_trained": 5.438967776
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4737252677115098,
"grad_norm": 1.7784186601638794,
"loss": 1.1334,
"loss_ce": 1.1069244146347046,
"loss_region": 0.030016450211405754,
"loss_total": 1.136940836906433,
"lr": 0.001177179198479404,
"router/selected_tokens_s0": 4287.625,
"step": 1670,
"tokens_trained": 5.471733216
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4765619459612793,
"grad_norm": 1.0779353380203247,
"loss": 1.1315,
"loss_ce": 1.1237202882766724,
"loss_region": 0.029916411265730858,
"loss_total": 1.1536366939544678,
"lr": 0.0011767723064955529,
"router/selected_tokens_s0": 4156.75,
"step": 1680,
"tokens_trained": 5.504498656
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.47939862421104884,
"grad_norm": 0.7689351439476013,
"loss": 1.1324,
"loss_ce": 1.0980726480484009,
"loss_region": 0.030096061527729034,
"loss_total": 1.1281687021255493,
"lr": 0.0011763654145117018,
"router/selected_tokens_s0": 4377.5,
"step": 1690,
"tokens_trained": 5.537264096
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.48223530246081836,
"grad_norm": 0.6869276165962219,
"loss": 1.1332,
"loss_ce": 1.0792652368545532,
"loss_region": 0.030072998255491257,
"loss_total": 1.1093382835388184,
"lr": 0.0011759585225278508,
"router/selected_tokens_s0": 4349.625,
"step": 1700,
"tokens_trained": 5.570029536
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4850719807105879,
"grad_norm": 0.9587815403938293,
"loss": 1.1361,
"loss_ce": 1.0378434658050537,
"loss_region": 0.03009817562997341,
"loss_total": 1.067941665649414,
"lr": 0.001175551630544,
"router/selected_tokens_s0": 4384.25,
"step": 1710,
"tokens_trained": 5.602794976
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4879086589603574,
"grad_norm": 1.1542259454727173,
"loss": 1.1294,
"loss_ce": 1.074008584022522,
"loss_region": 0.030034121125936508,
"loss_total": 1.104042649269104,
"lr": 0.001175144738560149,
"router/selected_tokens_s0": 4306.0,
"step": 1720,
"tokens_trained": 5.635560416
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4907453372101269,
"grad_norm": 1.0194206237792969,
"loss": 1.1296,
"loss_ce": 1.1548231840133667,
"loss_region": 0.03011094592511654,
"loss_total": 1.184934139251709,
"lr": 0.001174737846576298,
"router/selected_tokens_s0": 4395.625,
"step": 1730,
"tokens_trained": 5.668325856
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.49358201545989644,
"grad_norm": 1.108144998550415,
"loss": 1.1351,
"loss_ce": 1.0953419208526611,
"loss_region": 0.03002314455807209,
"loss_total": 1.1253650188446045,
"lr": 0.0011743309545924469,
"router/selected_tokens_s0": 4292.125,
"step": 1740,
"tokens_trained": 5.701091296
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.49641869370966596,
"grad_norm": 1.075562834739685,
"loss": 1.1347,
"loss_ce": 1.1154391765594482,
"loss_region": 0.029949212446808815,
"loss_total": 1.1453883647918701,
"lr": 0.0011739240626085956,
"router/selected_tokens_s0": 4188.625,
"step": 1750,
"tokens_trained": 5.733856736
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.4992553719594355,
"grad_norm": 1.3173739910125732,
"loss": 1.1325,
"loss_ce": 1.0855435132980347,
"loss_region": 0.02994917891919613,
"loss_total": 1.1154927015304565,
"lr": 0.0011735171706247446,
"router/selected_tokens_s0": 4183.625,
"step": 1760,
"tokens_trained": 5.766622176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.502092050209205,
"grad_norm": 0.8734815716743469,
"loss": 1.1316,
"loss_ce": 1.190360188484192,
"loss_region": 0.03002040646970272,
"loss_total": 1.2203805446624756,
"lr": 0.0011731102786408936,
"router/selected_tokens_s0": 4294.5,
"step": 1770,
"tokens_trained": 5.799387616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5049287284589745,
"grad_norm": 2.5296459197998047,
"loss": 1.1361,
"loss_ce": 0.9863566756248474,
"loss_region": 0.02998475357890129,
"loss_total": 1.0163414478302002,
"lr": 0.0011727033866570427,
"router/selected_tokens_s0": 4235.875,
"step": 1780,
"tokens_trained": 5.832153056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.507765406708744,
"grad_norm": 0.7834669947624207,
"loss": 1.1297,
"loss_ce": 0.9555173516273499,
"loss_region": 0.0301660243421793,
"loss_total": 0.9856833815574646,
"lr": 0.0011722964946731917,
"router/selected_tokens_s0": 4416.375,
"step": 1790,
"tokens_trained": 5.864918496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5106020849585136,
"grad_norm": 0.9466329216957092,
"loss": 1.1295,
"loss_ce": 1.0096023082733154,
"loss_region": 0.030076030641794205,
"loss_total": 1.0396783351898193,
"lr": 0.0011718896026893407,
"router/selected_tokens_s0": 4354.625,
"step": 1800,
"tokens_trained": 5.897683936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5134387632082831,
"grad_norm": 1.151943325996399,
"loss": 1.1267,
"loss_ce": 1.0721287727355957,
"loss_region": 0.029984835535287857,
"loss_total": 1.1021136045455933,
"lr": 0.0011714827107054896,
"router/selected_tokens_s0": 4239.75,
"step": 1810,
"tokens_trained": 5.930449376
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5162754414580526,
"grad_norm": 0.5502280592918396,
"loss": 1.1249,
"loss_ce": 1.0287433862686157,
"loss_region": 0.029946208000183105,
"loss_total": 1.0586895942687988,
"lr": 0.0011710758187216386,
"router/selected_tokens_s0": 4179.375,
"step": 1820,
"tokens_trained": 5.96321104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5191121197078221,
"grad_norm": 1.5447858572006226,
"loss": 1.1319,
"loss_ce": 1.1280238628387451,
"loss_region": 0.030087478458881378,
"loss_total": 1.158111333847046,
"lr": 0.0011706689267377876,
"router/selected_tokens_s0": 4389.75,
"step": 1830,
"tokens_trained": 5.99597648
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5219487979575916,
"grad_norm": 0.9524003863334656,
"loss": 1.1274,
"loss_ce": 1.0977569818496704,
"loss_region": 0.030062809586524963,
"loss_total": 1.1278197765350342,
"lr": 0.0011702620347539365,
"router/selected_tokens_s0": 4354.0,
"step": 1840,
"tokens_trained": 6.028741744
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5247854762073612,
"grad_norm": 0.6106662750244141,
"loss": 1.1264,
"loss_ce": 1.06783926486969,
"loss_region": 0.029942721128463745,
"loss_total": 1.097782015800476,
"lr": 0.0011698551427700855,
"router/selected_tokens_s0": 4162.625,
"step": 1850,
"tokens_trained": 6.061507184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5276221544571307,
"grad_norm": 1.2853341102600098,
"loss": 1.1329,
"loss_ce": 1.0429413318634033,
"loss_region": 0.02999758906662464,
"loss_total": 1.0729389190673828,
"lr": 0.0011694482507862345,
"router/selected_tokens_s0": 4247.5,
"step": 1860,
"tokens_trained": 6.094268624
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5304588327069002,
"grad_norm": 2.993485927581787,
"loss": 1.1236,
"loss_ce": 1.0583568811416626,
"loss_region": 0.030023684725165367,
"loss_total": 1.0883805751800537,
"lr": 0.0011690413588023834,
"router/selected_tokens_s0": 4302.0,
"step": 1870,
"tokens_trained": 6.127034064
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5332955109566697,
"grad_norm": 0.7363700866699219,
"loss": 1.1308,
"loss_ce": 1.1353397369384766,
"loss_region": 0.029933562502264977,
"loss_total": 1.1652733087539673,
"lr": 0.0011686344668185324,
"router/selected_tokens_s0": 4149.375,
"step": 1880,
"tokens_trained": 6.159799504
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5361321892064392,
"grad_norm": 0.8693296313285828,
"loss": 1.1274,
"loss_ce": 1.0827381610870361,
"loss_region": 0.030024589970707893,
"loss_total": 1.1127628087997437,
"lr": 0.0011682275748346814,
"router/selected_tokens_s0": 4302.25,
"step": 1890,
"tokens_trained": 6.192561072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5389688674562088,
"grad_norm": 0.4028984606266022,
"loss": 1.1162,
"loss_ce": 1.1056593656539917,
"loss_region": 0.030071411281824112,
"loss_total": 1.1357307434082031,
"lr": 0.0011678206828508303,
"router/selected_tokens_s0": 4372.625,
"step": 1900,
"tokens_trained": 6.225326512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5418055457059783,
"grad_norm": 1.1904973983764648,
"loss": 1.1294,
"loss_ce": 1.0976545810699463,
"loss_region": 0.030053725466132164,
"loss_total": 1.1277083158493042,
"lr": 0.0011674137908669793,
"router/selected_tokens_s0": 4348.125,
"step": 1910,
"tokens_trained": 6.258091952
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5446422239557478,
"grad_norm": 1.018221378326416,
"loss": 1.1277,
"loss_ce": 1.1479384899139404,
"loss_region": 0.030054787173867226,
"loss_total": 1.1779932975769043,
"lr": 0.0011670068988831283,
"router/selected_tokens_s0": 4353.25,
"step": 1920,
"tokens_trained": 6.290857392
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5474789022055173,
"grad_norm": 0.4506734013557434,
"loss": 1.1235,
"loss_ce": 1.1137655973434448,
"loss_region": 0.03005811758339405,
"loss_total": 1.1438237428665161,
"lr": 0.0011666000068992772,
"router/selected_tokens_s0": 4341.5,
"step": 1930,
"tokens_trained": 6.323622832
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5503155804552868,
"grad_norm": 1.5671348571777344,
"loss": 1.1318,
"loss_ce": 1.1652703285217285,
"loss_region": 0.030141720548272133,
"loss_total": 1.195412039756775,
"lr": 0.0011661931149154262,
"router/selected_tokens_s0": 4458.125,
"step": 1940,
"tokens_trained": 6.356388272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5531522587050564,
"grad_norm": 1.2511063814163208,
"loss": 1.1246,
"loss_ce": 1.2078148126602173,
"loss_region": 0.03000708669424057,
"loss_total": 1.2378219366073608,
"lr": 0.0011657862229315751,
"router/selected_tokens_s0": 4275.625,
"step": 1950,
"tokens_trained": 6.389153712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5559889369548259,
"grad_norm": 1.1278033256530762,
"loss": 1.1253,
"loss_ce": 1.1528972387313843,
"loss_region": 0.029990505427122116,
"loss_total": 1.1828877925872803,
"lr": 0.0011653793309477243,
"router/selected_tokens_s0": 4247.625,
"step": 1960,
"tokens_trained": 6.421919152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5588256152045954,
"grad_norm": 0.7347070574760437,
"loss": 1.1292,
"loss_ce": 1.1609221696853638,
"loss_region": 0.03007410652935505,
"loss_total": 1.1909962892532349,
"lr": 0.0011649724389638733,
"router/selected_tokens_s0": 4377.0,
"step": 1970,
"tokens_trained": 6.454684592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5616622934543649,
"grad_norm": 0.8754347562789917,
"loss": 1.1321,
"loss_ce": 1.1314905881881714,
"loss_region": 0.030018918216228485,
"loss_total": 1.1615095138549805,
"lr": 0.0011645655469800223,
"router/selected_tokens_s0": 4292.375,
"step": 1980,
"tokens_trained": 6.487450032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5644989717041344,
"grad_norm": 1.4375395774841309,
"loss": 1.1251,
"loss_ce": 1.15834641456604,
"loss_region": 0.030011450871825218,
"loss_total": 1.1883578300476074,
"lr": 0.0011641586549961712,
"router/selected_tokens_s0": 4281.875,
"step": 1990,
"tokens_trained": 6.520215472
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.567335649953904,
"grad_norm": 1.3012388944625854,
"loss": 1.1244,
"loss_ce": 1.1547801494598389,
"loss_region": 0.03002019412815571,
"loss_total": 1.184800386428833,
"lr": 0.00116375176301232,
"router/selected_tokens_s0": 4298.375,
"step": 2000,
"tokens_trained": 6.552980912
},
{
"epoch": 0.567335649953904,
"eval_ppl": 2.997264738752139,
"eval_runtime": 2.4974,
"step": 2000,
"tokens_trained": 6.552980912
},
{
"epoch": 0.567335649953904,
"eval_F": 0.33877094677913017,
"eval_F_cds": 0.3354545528054273,
"eval_F_dig": 0.3349740865171758,
"eval_F_exon": 0.33771546252151097,
"eval_F_intron": 0.3394511609404705,
"eval_F_nig": 0.33961248247030124,
"eval_F_promoter": 0.33587224314868064,
"eval_F_utr": 0.3390466904438115,
"eval_G": 0.3927095408069945,
"eval_G_cds": 0.38760326352277413,
"eval_G_dig": 0.38993240031773313,
"eval_G_exon": 0.3922000848097159,
"eval_G_intron": 0.39271919880055167,
"eval_G_nig": 0.3935918508753731,
"eval_G_promoter": 0.3926971556782782,
"eval_G_utr": 0.3912176578977754,
"eval_avg_bp_per_token": 2.9518469913300267,
"eval_bp_per_token/cds": 2.981029744974208,
"eval_bp_per_token/dig": 2.9853055512361997,
"eval_bp_per_token/exon": 2.9610725920975693,
"eval_bp_per_token/intron": 2.9459318896698954,
"eval_bp_per_token/nig": 2.9445325234400035,
"eval_bp_per_token/promoter": 2.977322539741189,
"eval_bp_per_token/utr": 2.9494462803662875,
"eval_ppl_cds": 3.6941119312579422,
"eval_ppl_dig": 1.1218375588220217,
"eval_ppl_exon": 3.4074634485917565,
"eval_ppl_intron": 3.014504389955456,
"eval_ppl_nig": 2.843623870937302,
"eval_ppl_promoter": 3.3305259507076883,
"eval_ppl_utr": 3.322006494837333,
"step": 2000,
"tokens_trained": 6.552980912
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5701723282036735,
"grad_norm": 1.7854270935058594,
"loss": 1.1275,
"loss_ce": 1.1118180751800537,
"loss_region": 0.030034875497221947,
"loss_total": 1.1418529748916626,
"lr": 0.001163344871028469,
"router/selected_tokens_s0": 4323.625,
"step": 2010,
"tokens_trained": 6.585746352
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.573009006453443,
"grad_norm": 1.2741203308105469,
"loss": 1.1297,
"loss_ce": 1.1596630811691284,
"loss_region": 0.030020276084542274,
"loss_total": 1.1896833181381226,
"lr": 0.001162937979044618,
"router/selected_tokens_s0": 4296.625,
"step": 2020,
"tokens_trained": 6.618511792
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5758456847032125,
"grad_norm": 1.3113727569580078,
"loss": 1.1274,
"loss_ce": 1.130359411239624,
"loss_region": 0.030052313581109047,
"loss_total": 1.1604117155075073,
"lr": 0.001162531087060767,
"router/selected_tokens_s0": 4347.25,
"step": 2030,
"tokens_trained": 6.651277232
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.578682362952982,
"grad_norm": 1.585740089416504,
"loss": 1.1242,
"loss_ce": 1.113228440284729,
"loss_region": 0.029946262016892433,
"loss_total": 1.143174648284912,
"lr": 0.001162124195076916,
"router/selected_tokens_s0": 4151.5,
"step": 2040,
"tokens_trained": 6.684041872
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5815190412027516,
"grad_norm": 1.4227651357650757,
"loss": 1.1227,
"loss_ce": 1.1707289218902588,
"loss_region": 0.03000037930905819,
"loss_total": 1.200729250907898,
"lr": 0.001161717303093065,
"router/selected_tokens_s0": 4264.25,
"step": 2050,
"tokens_trained": 6.716806512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5843557194525211,
"grad_norm": 1.4349584579467773,
"loss": 1.126,
"loss_ce": 1.123897910118103,
"loss_region": 0.029999535530805588,
"loss_total": 1.1538974046707153,
"lr": 0.001161310411109214,
"router/selected_tokens_s0": 4258.5,
"step": 2060,
"tokens_trained": 6.749571952
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5871923977022906,
"grad_norm": 1.525637149810791,
"loss": 1.1223,
"loss_ce": 1.0622094869613647,
"loss_region": 0.03016025200486183,
"loss_total": 1.092369794845581,
"lr": 0.001160903519125363,
"router/selected_tokens_s0": 4409.25,
"step": 2070,
"tokens_trained": 6.782337392
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5900290759520601,
"grad_norm": 0.31481412053108215,
"loss": 1.1308,
"loss_ce": 1.1158243417739868,
"loss_region": 0.030056282877922058,
"loss_total": 1.1458805799484253,
"lr": 0.001160496627141512,
"router/selected_tokens_s0": 4358.875,
"step": 2080,
"tokens_trained": 6.815102832
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5928657542018296,
"grad_norm": 1.4279309511184692,
"loss": 1.1212,
"loss_ce": 1.1024186611175537,
"loss_region": 0.03000911884009838,
"loss_total": 1.1324278116226196,
"lr": 0.0011600897351576609,
"router/selected_tokens_s0": 4277.25,
"step": 2090,
"tokens_trained": 6.847868272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5957024324515992,
"grad_norm": 1.3502033948898315,
"loss": 1.1243,
"loss_ce": 1.215091347694397,
"loss_region": 0.03004975989460945,
"loss_total": 1.2451411485671997,
"lr": 0.0011596828431738098,
"router/selected_tokens_s0": 4345.25,
"step": 2100,
"tokens_trained": 6.880633712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.5985391107013687,
"grad_norm": 0.30469629168510437,
"loss": 1.1227,
"loss_ce": 1.0989904403686523,
"loss_region": 0.03004642389714718,
"loss_total": 1.1290369033813477,
"lr": 0.0011592759511899588,
"router/selected_tokens_s0": 4339.125,
"step": 2110,
"tokens_trained": 6.913397016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6013757889511382,
"grad_norm": 3.0106451511383057,
"loss": 1.1271,
"loss_ce": 1.0580655336380005,
"loss_region": 0.03005184419453144,
"loss_total": 1.0881173610687256,
"lr": 0.0011588690592061078,
"router/selected_tokens_s0": 4347.75,
"step": 2120,
"tokens_trained": 6.946162296
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6042124672009077,
"grad_norm": 1.4084529876708984,
"loss": 1.1261,
"loss_ce": 0.9337919354438782,
"loss_region": 0.029956450685858727,
"loss_total": 0.9637483954429626,
"lr": 0.0011584621672222567,
"router/selected_tokens_s0": 4181.25,
"step": 2130,
"tokens_trained": 6.978927736
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6070491454506772,
"grad_norm": 0.7794283032417297,
"loss": 1.1287,
"loss_ce": 1.0321320295333862,
"loss_region": 0.030011894181370735,
"loss_total": 1.0621439218521118,
"lr": 0.0011580552752384057,
"router/selected_tokens_s0": 4285.875,
"step": 2140,
"tokens_trained": 7.011693176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6098858237004467,
"grad_norm": 0.7242727279663086,
"loss": 1.1314,
"loss_ce": 1.1077067852020264,
"loss_region": 0.030075622722506523,
"loss_total": 1.1377824544906616,
"lr": 0.0011576483832545547,
"router/selected_tokens_s0": 4383.25,
"step": 2150,
"tokens_trained": 7.044458616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6127225019502163,
"grad_norm": 0.8703320622444153,
"loss": 1.1255,
"loss_ce": 1.042706847190857,
"loss_region": 0.030024481937289238,
"loss_total": 1.072731375694275,
"lr": 0.0011572414912707036,
"router/selected_tokens_s0": 4306.0,
"step": 2160,
"tokens_trained": 7.077224056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6155591801999858,
"grad_norm": 2.464707374572754,
"loss": 1.12,
"loss_ce": 1.0845450162887573,
"loss_region": 0.029988931491971016,
"loss_total": 1.1145339012145996,
"lr": 0.0011568345992868526,
"router/selected_tokens_s0": 4238.875,
"step": 2170,
"tokens_trained": 7.109989496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6183958584497553,
"grad_norm": 2.0766637325286865,
"loss": 1.1266,
"loss_ce": 1.1240020990371704,
"loss_region": 0.030013838782906532,
"loss_total": 1.1540158987045288,
"lr": 0.0011564277073030016,
"router/selected_tokens_s0": 4291.875,
"step": 2180,
"tokens_trained": 7.142754936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6212325366995248,
"grad_norm": 1.402709722518921,
"loss": 1.1265,
"loss_ce": 1.1370148658752441,
"loss_region": 0.03003770112991333,
"loss_total": 1.1670525074005127,
"lr": 0.0011560208153191505,
"router/selected_tokens_s0": 4328.625,
"step": 2190,
"tokens_trained": 7.175520376
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6240692149492943,
"grad_norm": 0.7657859325408936,
"loss": 1.1259,
"loss_ce": 1.116765022277832,
"loss_region": 0.030005475506186485,
"loss_total": 1.1467704772949219,
"lr": 0.0011556139233352995,
"router/selected_tokens_s0": 4272.125,
"step": 2200,
"tokens_trained": 7.208285816
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6269058931990639,
"grad_norm": 3.5244100093841553,
"loss": 1.1305,
"loss_ce": 1.1446946859359741,
"loss_region": 0.030087754130363464,
"loss_total": 1.174782395362854,
"lr": 0.0011552070313514487,
"router/selected_tokens_s0": 4414.25,
"step": 2210,
"tokens_trained": 7.241051256
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6297425714488334,
"grad_norm": 0.599822998046875,
"loss": 1.1324,
"loss_ce": 1.0551592111587524,
"loss_region": 0.030122289434075356,
"loss_total": 1.085281491279602,
"lr": 0.0011548001393675976,
"router/selected_tokens_s0": 4453.875,
"step": 2220,
"tokens_trained": 7.273816696
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6325792496986029,
"grad_norm": 2.314722776412964,
"loss": 1.1277,
"loss_ce": 1.1485532522201538,
"loss_region": 0.030024103820323944,
"loss_total": 1.1785773038864136,
"lr": 0.0011543932473837466,
"router/selected_tokens_s0": 4313.5,
"step": 2230,
"tokens_trained": 7.306582136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6354159279483724,
"grad_norm": 2.072960615158081,
"loss": 1.131,
"loss_ce": 1.0349353551864624,
"loss_region": 0.030028166249394417,
"loss_total": 1.0649635791778564,
"lr": 0.0011539863553998956,
"router/selected_tokens_s0": 4319.625,
"step": 2240,
"tokens_trained": 7.339347576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.638252606198142,
"grad_norm": 1.371410846710205,
"loss": 1.1226,
"loss_ce": 1.0738561153411865,
"loss_region": 0.030064314603805542,
"loss_total": 1.1039204597473145,
"lr": 0.0011535794634160443,
"router/selected_tokens_s0": 4378.375,
"step": 2250,
"tokens_trained": 7.372113016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6410892844479115,
"grad_norm": 3.474445343017578,
"loss": 1.1284,
"loss_ce": 1.0069116353988647,
"loss_region": 0.030036170035600662,
"loss_total": 1.0369478464126587,
"lr": 0.0011531725714321933,
"router/selected_tokens_s0": 4332.625,
"step": 2260,
"tokens_trained": 7.404878456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.643925962697681,
"grad_norm": 0.5796771049499512,
"loss": 1.1245,
"loss_ce": 1.138779640197754,
"loss_region": 0.030022747814655304,
"loss_total": 1.1688023805618286,
"lr": 0.0011527656794483422,
"router/selected_tokens_s0": 4308.875,
"step": 2270,
"tokens_trained": 7.437643896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6467626409474505,
"grad_norm": 1.155604362487793,
"loss": 1.1216,
"loss_ce": 0.9782689809799194,
"loss_region": 0.030030813068151474,
"loss_total": 1.0082998275756836,
"lr": 0.0011523587874644914,
"router/selected_tokens_s0": 4321.625,
"step": 2280,
"tokens_trained": 7.470409336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.64959931919722,
"grad_norm": 1.8259997367858887,
"loss": 1.1318,
"loss_ce": 1.055479884147644,
"loss_region": 0.030021535232663155,
"loss_total": 1.0855014324188232,
"lr": 0.0011519518954806404,
"router/selected_tokens_s0": 4307.375,
"step": 2290,
"tokens_trained": 7.503173472
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6524359974469895,
"grad_norm": 1.2909961938858032,
"loss": 1.1216,
"loss_ce": 1.1016438007354736,
"loss_region": 0.030030114576220512,
"loss_total": 1.1316739320755005,
"lr": 0.0011515450034967894,
"router/selected_tokens_s0": 4321.625,
"step": 2300,
"tokens_trained": 7.535938912
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6552726756967591,
"grad_norm": 3.855242967605591,
"loss": 1.1332,
"loss_ce": 1.1084688901901245,
"loss_region": 0.030001208186149597,
"loss_total": 1.1384700536727905,
"lr": 0.0011511381115129383,
"router/selected_tokens_s0": 4267.625,
"step": 2310,
"tokens_trained": 7.568704352
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6581093539465286,
"grad_norm": 0.6401855945587158,
"loss": 1.1235,
"loss_ce": 1.068629503250122,
"loss_region": 0.030046915635466576,
"loss_total": 1.0986764430999756,
"lr": 0.0011507312195290873,
"router/selected_tokens_s0": 4353.5,
"step": 2320,
"tokens_trained": 7.601469792
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6609460321962981,
"grad_norm": 2.758415460586548,
"loss": 1.1224,
"loss_ce": 1.1197397708892822,
"loss_region": 0.030033273622393608,
"loss_total": 1.1497730016708374,
"lr": 0.0011503243275452363,
"router/selected_tokens_s0": 4317.625,
"step": 2330,
"tokens_trained": 7.634233608
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6637827104460676,
"grad_norm": 3.6356966495513916,
"loss": 1.1258,
"loss_ce": 1.192346453666687,
"loss_region": 0.030019540339708328,
"loss_total": 1.2223659753799438,
"lr": 0.0011499174355613852,
"router/selected_tokens_s0": 4307.0,
"step": 2340,
"tokens_trained": 7.666998248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6666193886958371,
"grad_norm": 0.5084363222122192,
"loss": 1.1211,
"loss_ce": 1.0241565704345703,
"loss_region": 0.030024418607354164,
"loss_total": 1.0541809797286987,
"lr": 0.0011495105435775342,
"router/selected_tokens_s0": 4311.75,
"step": 2350,
"tokens_trained": 7.699763688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6694560669456067,
"grad_norm": 2.6118147373199463,
"loss": 1.1205,
"loss_ce": 1.094053864479065,
"loss_region": 0.030054572969675064,
"loss_total": 1.1241084337234497,
"lr": 0.0011491036515936831,
"router/selected_tokens_s0": 4375.625,
"step": 2360,
"tokens_trained": 7.732529128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6722927451953762,
"grad_norm": 1.5716001987457275,
"loss": 1.1174,
"loss_ce": 1.0806825160980225,
"loss_region": 0.02999335154891014,
"loss_total": 1.1106758117675781,
"lr": 0.0011486967596098321,
"router/selected_tokens_s0": 4245.125,
"step": 2370,
"tokens_trained": 7.765294568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6751294234451457,
"grad_norm": 1.6855603456497192,
"loss": 1.1248,
"loss_ce": 1.0957375764846802,
"loss_region": 0.030019070953130722,
"loss_total": 1.1257566213607788,
"lr": 0.001148289867625981,
"router/selected_tokens_s0": 4306.25,
"step": 2380,
"tokens_trained": 7.798060008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6779661016949152,
"grad_norm": 1.7085551023483276,
"loss": 1.1219,
"loss_ce": 1.0849840641021729,
"loss_region": 0.029990842565894127,
"loss_total": 1.114974856376648,
"lr": 0.00114788297564213,
"router/selected_tokens_s0": 4250.875,
"step": 2390,
"tokens_trained": 7.830825448
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6808027799446847,
"grad_norm": 2.7529702186584473,
"loss": 1.1278,
"loss_ce": 1.1395268440246582,
"loss_region": 0.030015477910637856,
"loss_total": 1.1695423126220703,
"lr": 0.001147476083658279,
"router/selected_tokens_s0": 4305.125,
"step": 2400,
"tokens_trained": 7.863590888
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6836394581944543,
"grad_norm": 1.855435848236084,
"loss": 1.1225,
"loss_ce": 1.055867075920105,
"loss_region": 0.030039696022868156,
"loss_total": 1.085906744003296,
"lr": 0.001147069191674428,
"router/selected_tokens_s0": 4357.375,
"step": 2410,
"tokens_trained": 7.896356328
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6864761364442238,
"grad_norm": 1.9066152572631836,
"loss": 1.1243,
"loss_ce": 0.9804560542106628,
"loss_region": 0.03004065528512001,
"loss_total": 1.010496735572815,
"lr": 0.001146662299690577,
"router/selected_tokens_s0": 4339.375,
"step": 2420,
"tokens_trained": 7.929121768
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6893128146939933,
"grad_norm": 1.6631957292556763,
"loss": 1.1181,
"loss_ce": 1.1269235610961914,
"loss_region": 0.030016232281923294,
"loss_total": 1.1569397449493408,
"lr": 0.001146255407706726,
"router/selected_tokens_s0": 4304.375,
"step": 2430,
"tokens_trained": 7.961887208
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6921494929437628,
"grad_norm": 1.932186245918274,
"loss": 1.1318,
"loss_ce": 1.1084073781967163,
"loss_region": 0.030037561431527138,
"loss_total": 1.1384449005126953,
"lr": 0.0011458485157228749,
"router/selected_tokens_s0": 4342.375,
"step": 2440,
"tokens_trained": 7.994651848
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6949861711935323,
"grad_norm": 2.0729987621307373,
"loss": 1.1219,
"loss_ce": 1.0754549503326416,
"loss_region": 0.030010342597961426,
"loss_total": 1.105465292930603,
"lr": 0.0011454416237390238,
"router/selected_tokens_s0": 4284.25,
"step": 2450,
"tokens_trained": 8.027417288
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.6978228494433019,
"grad_norm": 2.743365526199341,
"loss": 1.1183,
"loss_ce": 1.1507514715194702,
"loss_region": 0.030012760311365128,
"loss_total": 1.1807641983032227,
"lr": 0.001145034731755173,
"router/selected_tokens_s0": 4299.125,
"step": 2460,
"tokens_trained": 8.060182704
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7006595276930714,
"grad_norm": 1.968074083328247,
"loss": 1.1248,
"loss_ce": 1.1554365158081055,
"loss_region": 0.03006228432059288,
"loss_total": 1.185498833656311,
"lr": 0.001144627839771322,
"router/selected_tokens_s0": 4397.5,
"step": 2470,
"tokens_trained": 8.092948144
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7034962059428409,
"grad_norm": 0.6022619605064392,
"loss": 1.1233,
"loss_ce": 1.0739916563034058,
"loss_region": 0.030015716329216957,
"loss_total": 1.104007363319397,
"lr": 0.001144220947787471,
"router/selected_tokens_s0": 4304.25,
"step": 2480,
"tokens_trained": 8.125713584
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7063328841926104,
"grad_norm": 2.9086802005767822,
"loss": 1.1155,
"loss_ce": 1.1227823495864868,
"loss_region": 0.030057305470108986,
"loss_total": 1.1528396606445312,
"lr": 0.00114381405580362,
"router/selected_tokens_s0": 4393.875,
"step": 2490,
"tokens_trained": 8.158479016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7091695624423799,
"grad_norm": 1.8187512159347534,
"loss": 1.1248,
"loss_ce": 1.0580413341522217,
"loss_region": 0.030032671988010406,
"loss_total": 1.088073968887329,
"lr": 0.0011434071638197687,
"router/selected_tokens_s0": 4340.0,
"step": 2500,
"tokens_trained": 8.191244456
},
{
"epoch": 0.7091695624423799,
"eval_ppl": 2.9815305929864326,
"eval_runtime": 2.4796,
"step": 2500,
"tokens_trained": 8.191244456
},
{
"epoch": 0.7091695624423799,
"eval_F": 0.34048558481131336,
"eval_F_cds": 0.3413653968998391,
"eval_F_dig": 0.3326561970987317,
"eval_F_exon": 0.34301915535870453,
"eval_F_intron": 0.3409895477582185,
"eval_F_nig": 0.34018024599300895,
"eval_F_promoter": 0.3386885010090298,
"eval_F_utr": 0.34306656745268094,
"eval_G": 0.37360140820500265,
"eval_G_cds": 0.37391617995023085,
"eval_G_dig": 0.39410936238508215,
"eval_G_exon": 0.37318875715857475,
"eval_G_intron": 0.3727733807645177,
"eval_G_nig": 0.3734594960312147,
"eval_G_promoter": 0.37594098275253596,
"eval_G_utr": 0.3722500326080449,
"eval_avg_bp_per_token": 2.9369819005822793,
"eval_bp_per_token/cds": 2.929412322050359,
"eval_bp_per_token/dig": 3.006106631175135,
"eval_bp_per_token/exon": 2.915289086273542,
"eval_bp_per_token/intron": 2.932641210190579,
"eval_bp_per_token/nig": 2.9396180753557073,
"eval_bp_per_token/promoter": 2.952565549231147,
"eval_bp_per_token/utr": 2.9148861908204733,
"eval_ppl_cds": 3.7211953918524787,
"eval_ppl_dig": 1.1071312956552213,
"eval_ppl_exon": 3.408594147596357,
"eval_ppl_intron": 2.996762231969892,
"eval_ppl_nig": 2.8097869859130795,
"eval_ppl_promoter": 3.341004188366384,
"eval_ppl_utr": 3.3285188682998834,
"step": 2500,
"tokens_trained": 8.191244456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7120062406921495,
"grad_norm": 1.3883668184280396,
"loss": 1.1168,
"loss_ce": 1.0345538854599,
"loss_region": 0.030011983588337898,
"loss_total": 1.064565896987915,
"lr": 0.0011430002718359176,
"router/selected_tokens_s0": 4293.25,
"step": 2510,
"tokens_trained": 8.224009896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.714842918941919,
"grad_norm": 0.5920007228851318,
"loss": 1.1128,
"loss_ce": 1.1446270942687988,
"loss_region": 0.030029037967324257,
"loss_total": 1.1746561527252197,
"lr": 0.0011425933798520666,
"router/selected_tokens_s0": 4338.125,
"step": 2520,
"tokens_trained": 8.256775336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7176795971916885,
"grad_norm": 2.293912410736084,
"loss": 1.119,
"loss_ce": 1.1278671026229858,
"loss_region": 0.030034860596060753,
"loss_total": 1.1579020023345947,
"lr": 0.0011421864878682158,
"router/selected_tokens_s0": 4356.25,
"step": 2530,
"tokens_trained": 8.289540776
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.720516275441458,
"grad_norm": 1.4504122734069824,
"loss": 1.1161,
"loss_ce": 0.9545093774795532,
"loss_region": 0.030026227235794067,
"loss_total": 0.9845355749130249,
"lr": 0.0011417795958843647,
"router/selected_tokens_s0": 4322.375,
"step": 2540,
"tokens_trained": 8.322306216
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7233529536912275,
"grad_norm": 1.777256727218628,
"loss": 1.1177,
"loss_ce": 1.0747570991516113,
"loss_region": 0.030009755864739418,
"loss_total": 1.104766845703125,
"lr": 0.0011413727039005137,
"router/selected_tokens_s0": 4293.375,
"step": 2550,
"tokens_trained": 8.355071656
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7261896319409971,
"grad_norm": 1.637231707572937,
"loss": 1.1121,
"loss_ce": 1.1526259183883667,
"loss_region": 0.030018767341971397,
"loss_total": 1.1826447248458862,
"lr": 0.0011409658119166627,
"router/selected_tokens_s0": 4318.5,
"step": 2560,
"tokens_trained": 8.387835072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7290263101907666,
"grad_norm": 1.0746310949325562,
"loss": 1.1151,
"loss_ce": 1.1064670085906982,
"loss_region": 0.03001333586871624,
"loss_total": 1.1364803314208984,
"lr": 0.0011405589199328116,
"router/selected_tokens_s0": 4294.375,
"step": 2570,
"tokens_trained": 8.420600512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7318629884405361,
"grad_norm": 1.3798960447311401,
"loss": 1.1198,
"loss_ce": 1.073905110359192,
"loss_region": 0.030032221227884293,
"loss_total": 1.1039373874664307,
"lr": 0.0011401520279489606,
"router/selected_tokens_s0": 4356.375,
"step": 2580,
"tokens_trained": 8.453365928
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7346996666903056,
"grad_norm": 1.8040990829467773,
"loss": 1.1175,
"loss_ce": 1.0255845785140991,
"loss_region": 0.03001689724624157,
"loss_total": 1.0556014776229858,
"lr": 0.0011397451359651096,
"router/selected_tokens_s0": 4312.5,
"step": 2590,
"tokens_trained": 8.486131368
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7375363449400751,
"grad_norm": 2.420259952545166,
"loss": 1.1193,
"loss_ce": 1.0581092834472656,
"loss_region": 0.030017009004950523,
"loss_total": 1.088126301765442,
"lr": 0.0011393382439812585,
"router/selected_tokens_s0": 4316.25,
"step": 2600,
"tokens_trained": 8.518896808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7403730231898447,
"grad_norm": 2.068054437637329,
"loss": 1.1114,
"loss_ce": 1.0681673288345337,
"loss_region": 0.030040811747312546,
"loss_total": 1.0982081890106201,
"lr": 0.0011389313519974075,
"router/selected_tokens_s0": 4369.0,
"step": 2610,
"tokens_trained": 8.551662248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7432097014396142,
"grad_norm": 1.7490754127502441,
"loss": 1.1182,
"loss_ce": 1.0639960765838623,
"loss_region": 0.030034611001610756,
"loss_total": 1.094030737876892,
"lr": 0.0011385244600135565,
"router/selected_tokens_s0": 4363.5,
"step": 2620,
"tokens_trained": 8.584426888
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7460463796893837,
"grad_norm": 1.4811182022094727,
"loss": 1.1131,
"loss_ce": 1.0907317399978638,
"loss_region": 0.03001326695084572,
"loss_total": 1.120745062828064,
"lr": 0.0011381175680297054,
"router/selected_tokens_s0": 4307.875,
"step": 2630,
"tokens_trained": 8.617192328
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7488830579391532,
"grad_norm": 2.1497602462768555,
"loss": 1.1096,
"loss_ce": 1.123599886894226,
"loss_region": 0.030037013813853264,
"loss_total": 1.1536369323730469,
"lr": 0.0011377106760458544,
"router/selected_tokens_s0": 4368.5,
"step": 2640,
"tokens_trained": 8.649951656
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7517197361889227,
"grad_norm": 2.179588556289673,
"loss": 1.1129,
"loss_ce": 0.9365400671958923,
"loss_region": 0.030036216601729393,
"loss_total": 0.9665762782096863,
"lr": 0.0011373037840620034,
"router/selected_tokens_s0": 4350.375,
"step": 2650,
"tokens_trained": 8.682717096
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7545564144386923,
"grad_norm": 1.6021926403045654,
"loss": 1.1095,
"loss_ce": 1.1449388265609741,
"loss_region": 0.030037803575396538,
"loss_total": 1.1749765872955322,
"lr": 0.0011368968920781523,
"router/selected_tokens_s0": 4373.0,
"step": 2660,
"tokens_trained": 8.715482536
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7573930926884618,
"grad_norm": 1.2494678497314453,
"loss": 1.1097,
"loss_ce": 1.0806819200515747,
"loss_region": 0.03000866435468197,
"loss_total": 1.1106905937194824,
"lr": 0.0011364900000943013,
"router/selected_tokens_s0": 4295.25,
"step": 2670,
"tokens_trained": 8.748247976
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7602297709382313,
"grad_norm": 1.3196409940719604,
"loss": 1.1136,
"loss_ce": 1.069360375404358,
"loss_region": 0.030009115114808083,
"loss_total": 1.0993695259094238,
"lr": 0.0011360831081104503,
"router/selected_tokens_s0": 4291.75,
"step": 2680,
"tokens_trained": 8.781013416
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7630664491880008,
"grad_norm": 2.674771308898926,
"loss": 1.1188,
"loss_ce": 1.1771190166473389,
"loss_region": 0.030019955709576607,
"loss_total": 1.207139015197754,
"lr": 0.0011356762161265992,
"router/selected_tokens_s0": 4330.125,
"step": 2690,
"tokens_trained": 8.813778696
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7659031274377703,
"grad_norm": 1.6932164430618286,
"loss": 1.1031,
"loss_ce": 1.0857900381088257,
"loss_region": 0.0300260242074728,
"loss_total": 1.1158161163330078,
"lr": 0.0011352693241427482,
"router/selected_tokens_s0": 4347.5,
"step": 2700,
"tokens_trained": 8.846544136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7687398056875399,
"grad_norm": 1.5329583883285522,
"loss": 1.1098,
"loss_ce": 1.0980348587036133,
"loss_region": 0.030030502006411552,
"loss_total": 1.1280653476715088,
"lr": 0.0011348624321588974,
"router/selected_tokens_s0": 4366.375,
"step": 2710,
"tokens_trained": 8.879309576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7715764839373094,
"grad_norm": 1.829464077949524,
"loss": 1.1093,
"loss_ce": 1.1128755807876587,
"loss_region": 0.03000422567129135,
"loss_total": 1.142879843711853,
"lr": 0.0011344555401750463,
"router/selected_tokens_s0": 4282.0,
"step": 2720,
"tokens_trained": 8.912075016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7744131621870789,
"grad_norm": 2.8766870498657227,
"loss": 1.1187,
"loss_ce": 1.12075674533844,
"loss_region": 0.030019240453839302,
"loss_total": 1.1507760286331177,
"lr": 0.0011340486481911953,
"router/selected_tokens_s0": 4327.25,
"step": 2730,
"tokens_trained": 8.944840456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7772498404368484,
"grad_norm": 2.2969014644622803,
"loss": 1.1166,
"loss_ce": 1.0795077085494995,
"loss_region": 0.030028002336621284,
"loss_total": 1.1095356941223145,
"lr": 0.001133641756207344,
"router/selected_tokens_s0": 4352.75,
"step": 2740,
"tokens_trained": 8.977605896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7800865186866179,
"grad_norm": 1.7521798610687256,
"loss": 1.1139,
"loss_ce": 1.1274807453155518,
"loss_region": 0.030016858130693436,
"loss_total": 1.1574976444244385,
"lr": 0.001133234864223493,
"router/selected_tokens_s0": 4320.0,
"step": 2750,
"tokens_trained": 9.010371336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7829231969363875,
"grad_norm": 2.6245367527008057,
"loss": 1.1075,
"loss_ce": 1.1328058242797852,
"loss_region": 0.03003484010696411,
"loss_total": 1.1628406047821045,
"lr": 0.001132827972239642,
"router/selected_tokens_s0": 4367.625,
"step": 2760,
"tokens_trained": 9.043136776
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.785759875186157,
"grad_norm": 1.162583351135254,
"loss": 1.1181,
"loss_ce": 1.151093602180481,
"loss_region": 0.030036624521017075,
"loss_total": 1.1811301708221436,
"lr": 0.001132421080255791,
"router/selected_tokens_s0": 4392.5,
"step": 2770,
"tokens_trained": 9.075902216
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7885965534359265,
"grad_norm": 1.4981096982955933,
"loss": 1.1104,
"loss_ce": 1.0844680070877075,
"loss_region": 0.030015481635928154,
"loss_total": 1.1144834756851196,
"lr": 0.0011320141882719401,
"router/selected_tokens_s0": 4314.625,
"step": 2780,
"tokens_trained": 9.108667656
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.791433231685696,
"grad_norm": 1.8612878322601318,
"loss": 1.1073,
"loss_ce": 1.0089409351348877,
"loss_region": 0.029995379969477654,
"loss_total": 1.0389362573623657,
"lr": 0.001131607296288089,
"router/selected_tokens_s0": 4257.875,
"step": 2790,
"tokens_trained": 9.14143004
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7942699099354655,
"grad_norm": 0.6861640810966492,
"loss": 1.1058,
"loss_ce": 0.9385975003242493,
"loss_region": 0.029996687546372414,
"loss_total": 0.9685941934585571,
"lr": 0.001131200404304238,
"router/selected_tokens_s0": 4290.625,
"step": 2800,
"tokens_trained": 9.17419548
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.797106588185235,
"grad_norm": 2.205390214920044,
"loss": 1.108,
"loss_ce": 1.0670945644378662,
"loss_region": 0.030025651678442955,
"loss_total": 1.0971201658248901,
"lr": 0.001130793512320387,
"router/selected_tokens_s0": 4342.875,
"step": 2810,
"tokens_trained": 9.20696092
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.7999432664350046,
"grad_norm": 2.068150520324707,
"loss": 1.106,
"loss_ce": 1.0238165855407715,
"loss_region": 0.03002651408314705,
"loss_total": 1.0538431406021118,
"lr": 0.001130386620336536,
"router/selected_tokens_s0": 4359.75,
"step": 2820,
"tokens_trained": 9.23972636
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8027799446847741,
"grad_norm": 1.1060576438903809,
"loss": 1.1065,
"loss_ce": 1.0474674701690674,
"loss_region": 0.03000919334590435,
"loss_total": 1.0774766206741333,
"lr": 0.001129979728352685,
"router/selected_tokens_s0": 4301.75,
"step": 2830,
"tokens_trained": 9.2724918
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8056166229345436,
"grad_norm": 1.369165301322937,
"loss": 1.1081,
"loss_ce": 1.0370676517486572,
"loss_region": 0.030027758330106735,
"loss_total": 1.067095398902893,
"lr": 0.001129572836368834,
"router/selected_tokens_s0": 4375.625,
"step": 2840,
"tokens_trained": 9.30525692
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8084533011843131,
"grad_norm": 2.285675525665283,
"loss": 1.109,
"loss_ce": 1.0967503786087036,
"loss_region": 0.0300269927829504,
"loss_total": 1.1267774105072021,
"lr": 0.0011291659443849829,
"router/selected_tokens_s0": 4370.0,
"step": 2850,
"tokens_trained": 9.33802236
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8112899794340827,
"grad_norm": 0.8950642943382263,
"loss": 1.1015,
"loss_ce": 1.091797947883606,
"loss_region": 0.03003113530576229,
"loss_total": 1.1218290328979492,
"lr": 0.0011287590524011318,
"router/selected_tokens_s0": 4384.125,
"step": 2860,
"tokens_trained": 9.3707878
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8141266576838522,
"grad_norm": 2.1465282440185547,
"loss": 1.1012,
"loss_ce": 0.9929934144020081,
"loss_region": 0.030015377327799797,
"loss_total": 1.0230088233947754,
"lr": 0.0011283521604172808,
"router/selected_tokens_s0": 4333.0,
"step": 2870,
"tokens_trained": 9.403548272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8169633359336217,
"grad_norm": 2.1108782291412354,
"loss": 1.1029,
"loss_ce": 1.0729644298553467,
"loss_region": 0.03002534806728363,
"loss_total": 1.1029897928237915,
"lr": 0.0011279452684334298,
"router/selected_tokens_s0": 4357.25,
"step": 2880,
"tokens_trained": 9.436313712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8198000141833912,
"grad_norm": 1.7104750871658325,
"loss": 1.1041,
"loss_ce": 1.116651177406311,
"loss_region": 0.030016740784049034,
"loss_total": 1.1466679573059082,
"lr": 0.0011275383764495787,
"router/selected_tokens_s0": 4340.875,
"step": 2890,
"tokens_trained": 9.469079152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8226366924331607,
"grad_norm": 1.7549395561218262,
"loss": 1.1098,
"loss_ce": 0.977597713470459,
"loss_region": 0.030016236007213593,
"loss_total": 1.0076138973236084,
"lr": 0.0011271314844657277,
"router/selected_tokens_s0": 4328.875,
"step": 2900,
"tokens_trained": 9.50184356
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8254733706829303,
"grad_norm": 2.076667547225952,
"loss": 1.1041,
"loss_ce": 0.9882082343101501,
"loss_region": 0.03001856803894043,
"loss_total": 1.0182268619537354,
"lr": 0.0011267245924818767,
"router/selected_tokens_s0": 4341.75,
"step": 2910,
"tokens_trained": 9.534608992
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8283100489326998,
"grad_norm": 1.930834412574768,
"loss": 1.1031,
"loss_ce": 1.1864138841629028,
"loss_region": 0.03002503328025341,
"loss_total": 1.216438889503479,
"lr": 0.0011263177004980256,
"router/selected_tokens_s0": 4379.125,
"step": 2920,
"tokens_trained": 9.567373632
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8311467271824693,
"grad_norm": 0.7202333807945251,
"loss": 1.103,
"loss_ce": 1.0883651971817017,
"loss_region": 0.030032740905880928,
"loss_total": 1.1183979511260986,
"lr": 0.0011259108085141746,
"router/selected_tokens_s0": 4386.375,
"step": 2930,
"tokens_trained": 9.600139072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8339834054322388,
"grad_norm": 1.0626195669174194,
"loss": 1.1043,
"loss_ce": 1.0197147130966187,
"loss_region": 0.03001200221478939,
"loss_total": 1.0497267246246338,
"lr": 0.0011255039165303236,
"router/selected_tokens_s0": 4317.625,
"step": 2940,
"tokens_trained": 9.632904512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8368200836820083,
"grad_norm": 2.428861379623413,
"loss": 1.1036,
"loss_ce": 0.9022196531295776,
"loss_region": 0.030008511617779732,
"loss_total": 0.932228147983551,
"lr": 0.0011250970245464725,
"router/selected_tokens_s0": 4322.125,
"step": 2950,
"tokens_trained": 9.665669952
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8396567619317779,
"grad_norm": 0.9146430492401123,
"loss": 1.1015,
"loss_ce": 1.1206673383712769,
"loss_region": 0.030019836500287056,
"loss_total": 1.1506872177124023,
"lr": 0.0011246901325626217,
"router/selected_tokens_s0": 4355.75,
"step": 2960,
"tokens_trained": 9.698432616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8424934401815474,
"grad_norm": 1.3183574676513672,
"loss": 1.0992,
"loss_ce": 1.088549256324768,
"loss_region": 0.030013682320713997,
"loss_total": 1.118562936782837,
"lr": 0.0011242832405787707,
"router/selected_tokens_s0": 4324.875,
"step": 2970,
"tokens_trained": 9.731196464
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8453301184313169,
"grad_norm": 1.7237669229507446,
"loss": 1.1016,
"loss_ce": 1.1303554773330688,
"loss_region": 0.030019812285900116,
"loss_total": 1.1603752374649048,
"lr": 0.0011238763485949196,
"router/selected_tokens_s0": 4352.75,
"step": 2980,
"tokens_trained": 9.763955848
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8481667966810864,
"grad_norm": 2.353868246078491,
"loss": 1.097,
"loss_ce": 1.1438567638397217,
"loss_region": 0.03001641482114792,
"loss_total": 1.1738731861114502,
"lr": 0.0011234694566110684,
"router/selected_tokens_s0": 4346.25,
"step": 2990,
"tokens_trained": 9.796721288
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8510034749308559,
"grad_norm": 2.239737033843994,
"loss": 1.099,
"loss_ce": 1.137770414352417,
"loss_region": 0.03001844510436058,
"loss_total": 1.1677888631820679,
"lr": 0.0011230625646272174,
"router/selected_tokens_s0": 4345.625,
"step": 3000,
"tokens_trained": 9.829486728
},
{
"epoch": 0.8510034749308559,
"eval_ppl": 2.91798250107805,
"eval_runtime": 2.489,
"step": 3000,
"tokens_trained": 9.829486728
},
{
"epoch": 0.8510034749308559,
"eval_F": 0.34119725725854944,
"eval_F_cds": 0.339909922293828,
"eval_F_dig": 0.3374221944422741,
"eval_F_exon": 0.3444720286625102,
"eval_F_intron": 0.3423051363848719,
"eval_F_nig": 0.3420074982635899,
"eval_F_promoter": 0.33568609090152685,
"eval_F_utr": 0.3433317082766702,
"eval_G": 0.35626090599344656,
"eval_G_cds": 0.3533774528284723,
"eval_G_dig": 0.39929882420827145,
"eval_G_exon": 0.35481589922102014,
"eval_G_intron": 0.3559872186522367,
"eval_G_nig": 0.35704285773301014,
"eval_G_promoter": 0.354736183175574,
"eval_G_utr": 0.3543053844969594,
"eval_avg_bp_per_token": 2.930855916119598,
"eval_bp_per_token/cds": 2.9419558959963843,
"eval_bp_per_token/dig": 2.9636461870947826,
"eval_bp_per_token/exon": 2.9029933254166496,
"eval_bp_per_token/intron": 2.921370127720335,
"eval_bp_per_token/nig": 2.923912502144284,
"eval_bp_per_token/promoter": 2.9789735920078644,
"eval_bp_per_token/utr": 2.9126351452344177,
"eval_ppl_cds": 3.5636364626812047,
"eval_ppl_dig": 1.0968188962634289,
"eval_ppl_exon": 3.3285669782872387,
"eval_ppl_intron": 2.935885553210843,
"eval_ppl_nig": 2.7347129604188645,
"eval_ppl_promoter": 3.292230226733986,
"eval_ppl_utr": 3.2942767869833,
"step": 3000,
"tokens_trained": 9.829486728
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8538401531806254,
"grad_norm": 1.0114418268203735,
"loss": 1.092,
"loss_ce": 1.0748389959335327,
"loss_region": 0.030020562931895256,
"loss_total": 1.1048595905303955,
"lr": 0.0011226556726433663,
"router/selected_tokens_s0": 4357.25,
"step": 3010,
"tokens_trained": 9.862252168
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.856676831430395,
"grad_norm": 2.267549753189087,
"loss": 1.0971,
"loss_ce": 1.088890552520752,
"loss_region": 0.03001115657389164,
"loss_total": 1.1189017295837402,
"lr": 0.0011222487806595153,
"router/selected_tokens_s0": 4332.125,
"step": 3020,
"tokens_trained": 9.895016808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8595135096801645,
"grad_norm": 1.3197458982467651,
"loss": 1.0948,
"loss_ce": 0.9036920070648193,
"loss_region": 0.029985321685671806,
"loss_total": 0.9336773157119751,
"lr": 0.0011218418886756645,
"router/selected_tokens_s0": 4263.875,
"step": 3030,
"tokens_trained": 9.927782248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.862350187929934,
"grad_norm": 1.6852810382843018,
"loss": 1.1014,
"loss_ce": 1.1086361408233643,
"loss_region": 0.030025212094187737,
"loss_total": 1.1386613845825195,
"lr": 0.0011214349966918134,
"router/selected_tokens_s0": 4387.0,
"step": 3040,
"tokens_trained": 9.960547688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8651868661797035,
"grad_norm": 1.753929853439331,
"loss": 1.1021,
"loss_ce": 1.020461082458496,
"loss_region": 0.03000788949429989,
"loss_total": 1.050468921661377,
"lr": 0.0011210281047079624,
"router/selected_tokens_s0": 4309.625,
"step": 3050,
"tokens_trained": 9.993313128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.868023544429473,
"grad_norm": 1.005724310874939,
"loss": 1.1004,
"loss_ce": 1.0221396684646606,
"loss_region": 0.030008656904101372,
"loss_total": 1.0521483421325684,
"lr": 0.0011206212127241114,
"router/selected_tokens_s0": 4308.25,
"step": 3060,
"tokens_trained": 10.026078568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8708602226792426,
"grad_norm": 1.1538729667663574,
"loss": 1.0968,
"loss_ce": 1.045743465423584,
"loss_region": 0.03000555746257305,
"loss_total": 1.0757490396499634,
"lr": 0.0011202143207402603,
"router/selected_tokens_s0": 4306.375,
"step": 3070,
"tokens_trained": 10.058844008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8736969009290121,
"grad_norm": 1.8730782270431519,
"loss": 1.1067,
"loss_ce": 1.0756598711013794,
"loss_region": 0.03001215122640133,
"loss_total": 1.105672001838684,
"lr": 0.0011198074287564093,
"router/selected_tokens_s0": 4321.25,
"step": 3080,
"tokens_trained": 10.091604144
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8765335791787816,
"grad_norm": 2.151102066040039,
"loss": 1.1026,
"loss_ce": 1.1392779350280762,
"loss_region": 0.030021771788597107,
"loss_total": 1.1692997217178345,
"lr": 0.0011194005367725583,
"router/selected_tokens_s0": 4390.625,
"step": 3090,
"tokens_trained": 10.124368784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8793702574285511,
"grad_norm": 2.1336331367492676,
"loss": 1.0989,
"loss_ce": 1.0690431594848633,
"loss_region": 0.030014000833034515,
"loss_total": 1.0990571975708008,
"lr": 0.0011189936447887072,
"router/selected_tokens_s0": 4350.0,
"step": 3100,
"tokens_trained": 10.157134224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8822069356783206,
"grad_norm": 2.0660271644592285,
"loss": 1.0952,
"loss_ce": 1.130868911743164,
"loss_region": 0.030013030394911766,
"loss_total": 1.1608819961547852,
"lr": 0.0011185867528048562,
"router/selected_tokens_s0": 4349.5,
"step": 3110,
"tokens_trained": 10.189899664
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8850436139280902,
"grad_norm": 0.5485074520111084,
"loss": 1.0989,
"loss_ce": 1.0510705709457397,
"loss_region": 0.030014289543032646,
"loss_total": 1.0810848474502563,
"lr": 0.0011181798608210052,
"router/selected_tokens_s0": 4352.875,
"step": 3120,
"tokens_trained": 10.222665104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8878802921778597,
"grad_norm": 1.4256670475006104,
"loss": 1.098,
"loss_ce": 1.012781023979187,
"loss_region": 0.030008379369974136,
"loss_total": 1.0427894592285156,
"lr": 0.0011177729688371541,
"router/selected_tokens_s0": 4316.25,
"step": 3130,
"tokens_trained": 10.255429744
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8907169704276292,
"grad_norm": 1.5442920923233032,
"loss": 1.0946,
"loss_ce": 0.9727160334587097,
"loss_region": 0.030018145218491554,
"loss_total": 1.0027341842651367,
"lr": 0.001117366076853303,
"router/selected_tokens_s0": 4410.125,
"step": 3140,
"tokens_trained": 10.288194384
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8935536486773987,
"grad_norm": 1.4367228746414185,
"loss": 1.0967,
"loss_ce": 1.1256974935531616,
"loss_region": 0.03001110814511776,
"loss_total": 1.1557085514068604,
"lr": 0.001116959184869452,
"router/selected_tokens_s0": 4353.0,
"step": 3150,
"tokens_trained": 10.320959664
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8963903269271682,
"grad_norm": 1.1446796655654907,
"loss": 1.0908,
"loss_ce": 1.0701184272766113,
"loss_region": 0.03002384677529335,
"loss_total": 1.100142240524292,
"lr": 0.001116552292885601,
"router/selected_tokens_s0": 4393.625,
"step": 3160,
"tokens_trained": 10.353725104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.8992270051769378,
"grad_norm": 1.2145036458969116,
"loss": 1.0973,
"loss_ce": 1.064907193183899,
"loss_region": 0.030015455558896065,
"loss_total": 1.094922661781311,
"lr": 0.00111614540090175,
"router/selected_tokens_s0": 4343.5,
"step": 3170,
"tokens_trained": 10.386490544
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9020636834267073,
"grad_norm": 1.4071613550186157,
"loss": 1.1022,
"loss_ce": 1.0602645874023438,
"loss_region": 0.030010098591446877,
"loss_total": 1.090274691581726,
"lr": 0.001115738508917899,
"router/selected_tokens_s0": 4317.875,
"step": 3180,
"tokens_trained": 10.419255984
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9049003616764768,
"grad_norm": 1.6725516319274902,
"loss": 1.0985,
"loss_ce": 1.0676279067993164,
"loss_region": 0.030014997348189354,
"loss_total": 1.0976428985595703,
"lr": 0.001115331616934048,
"router/selected_tokens_s0": 4344.375,
"step": 3190,
"tokens_trained": 10.452021424
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9077370399262463,
"grad_norm": 1.2930175065994263,
"loss": 1.0925,
"loss_ce": 1.0800068378448486,
"loss_region": 0.030022740364074707,
"loss_total": 1.1100295782089233,
"lr": 0.0011149247249501969,
"router/selected_tokens_s0": 4410.0,
"step": 3200,
"tokens_trained": 10.484786064
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9105737181760158,
"grad_norm": 1.8128279447555542,
"loss": 1.0904,
"loss_ce": 0.978069007396698,
"loss_region": 0.03001835197210312,
"loss_total": 1.008087396621704,
"lr": 0.001114517832966346,
"router/selected_tokens_s0": 4359.875,
"step": 3210,
"tokens_trained": 10.517551504
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9134103964257854,
"grad_norm": 2.7744452953338623,
"loss": 1.0922,
"loss_ce": 1.0043200254440308,
"loss_region": 0.030023187398910522,
"loss_total": 1.0343432426452637,
"lr": 0.001114110940982495,
"router/selected_tokens_s0": 4378.0,
"step": 3220,
"tokens_trained": 10.550316944
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9162470746755549,
"grad_norm": 1.3683607578277588,
"loss": 1.1049,
"loss_ce": 1.1094452142715454,
"loss_region": 0.03001292422413826,
"loss_total": 1.139458179473877,
"lr": 0.001113704048998644,
"router/selected_tokens_s0": 4363.5,
"step": 3230,
"tokens_trained": 10.583082384
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9190837529253244,
"grad_norm": 0.2690750062465668,
"loss": 1.0941,
"loss_ce": 1.1386804580688477,
"loss_region": 0.030020570382475853,
"loss_total": 1.1687010526657104,
"lr": 0.0011132971570147927,
"router/selected_tokens_s0": 4375.5,
"step": 3240,
"tokens_trained": 10.615847824
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9219204311750939,
"grad_norm": 3.4670774936676025,
"loss": 1.1104,
"loss_ce": 1.0814176797866821,
"loss_region": 0.030020495876669884,
"loss_total": 1.1114381551742554,
"lr": 0.0011128902650309417,
"router/selected_tokens_s0": 4377.625,
"step": 3250,
"tokens_trained": 10.648613248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9247571094248634,
"grad_norm": 0.8661336302757263,
"loss": 1.0912,
"loss_ce": 0.8863070011138916,
"loss_region": 0.030015867203474045,
"loss_total": 0.9163228869438171,
"lr": 0.0011124833730470907,
"router/selected_tokens_s0": 4347.25,
"step": 3260,
"tokens_trained": 10.681378688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.927593787674633,
"grad_norm": 1.5195131301879883,
"loss": 1.0884,
"loss_ce": 0.9159345626831055,
"loss_region": 0.03002041205763817,
"loss_total": 0.9459549784660339,
"lr": 0.0011120764810632396,
"router/selected_tokens_s0": 4345.625,
"step": 3270,
"tokens_trained": 10.714144128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9304304659244025,
"grad_norm": 1.2169824838638306,
"loss": 1.0911,
"loss_ce": 1.0144977569580078,
"loss_region": 0.03001958690583706,
"loss_total": 1.0445173978805542,
"lr": 0.0011116695890793888,
"router/selected_tokens_s0": 4394.125,
"step": 3280,
"tokens_trained": 10.746909568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.933267144174172,
"grad_norm": 0.5433168411254883,
"loss": 1.0815,
"loss_ce": 1.0072896480560303,
"loss_region": 0.03001921810209751,
"loss_total": 1.0373088121414185,
"lr": 0.0011112626970955378,
"router/selected_tokens_s0": 4374.375,
"step": 3290,
"tokens_trained": 10.779675008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9361038224239415,
"grad_norm": 1.009089469909668,
"loss": 1.0887,
"loss_ce": 1.1322096586227417,
"loss_region": 0.030020419508218765,
"loss_total": 1.162230134010315,
"lr": 0.0011108558051116867,
"router/selected_tokens_s0": 4387.875,
"step": 3300,
"tokens_trained": 10.812440448
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.938940500673711,
"grad_norm": 1.1186658143997192,
"loss": 1.0913,
"loss_ce": 1.0417039394378662,
"loss_region": 0.030010957270860672,
"loss_total": 1.0717148780822754,
"lr": 0.0011104489131278357,
"router/selected_tokens_s0": 4330.5,
"step": 3310,
"tokens_trained": 10.845202832
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9417771789234806,
"grad_norm": 1.406549334526062,
"loss": 1.0884,
"loss_ce": 1.1175626516342163,
"loss_region": 0.030026914551854134,
"loss_total": 1.1475895643234253,
"lr": 0.0011100420211439847,
"router/selected_tokens_s0": 4413.375,
"step": 3320,
"tokens_trained": 10.877968272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9446138571732501,
"grad_norm": 1.7244771718978882,
"loss": 1.0946,
"loss_ce": 1.044142246246338,
"loss_region": 0.03001406043767929,
"loss_total": 1.0741562843322754,
"lr": 0.0011096351291601336,
"router/selected_tokens_s0": 4364.875,
"step": 3330,
"tokens_trained": 10.910733712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9474505354230196,
"grad_norm": 1.0954854488372803,
"loss": 1.0904,
"loss_ce": 1.0164037942886353,
"loss_region": 0.030010055750608444,
"loss_total": 1.0464138984680176,
"lr": 0.0011092282371762826,
"router/selected_tokens_s0": 4342.75,
"step": 3340,
"tokens_trained": 10.943499152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9502872136727891,
"grad_norm": 2.0860025882720947,
"loss": 1.0943,
"loss_ce": 1.155411958694458,
"loss_region": 0.030009519308805466,
"loss_total": 1.1854214668273926,
"lr": 0.0011088213451924316,
"router/selected_tokens_s0": 4352.25,
"step": 3350,
"tokens_trained": 10.976264592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9531238919225586,
"grad_norm": 1.1401242017745972,
"loss": 1.0872,
"loss_ce": 1.0502551794052124,
"loss_region": 0.030008839443325996,
"loss_total": 1.0802639722824097,
"lr": 0.0011084144532085805,
"router/selected_tokens_s0": 4341.5,
"step": 3360,
"tokens_trained": 11.009030032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9559605701723282,
"grad_norm": 1.704352617263794,
"loss": 1.0874,
"loss_ce": 0.9294300079345703,
"loss_region": 0.030014311894774437,
"loss_total": 0.9594443440437317,
"lr": 0.0011080075612247295,
"router/selected_tokens_s0": 4352.125,
"step": 3370,
"tokens_trained": 11.041794728
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9587972484220977,
"grad_norm": 0.9261600971221924,
"loss": 1.0935,
"loss_ce": 1.0589406490325928,
"loss_region": 0.030018316581845284,
"loss_total": 1.088958978652954,
"lr": 0.0011076006692408785,
"router/selected_tokens_s0": 4379.125,
"step": 3380,
"tokens_trained": 11.074559368
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9616339266718672,
"grad_norm": 0.7537907958030701,
"loss": 1.0865,
"loss_ce": 1.0465339422225952,
"loss_region": 0.030020853504538536,
"loss_total": 1.076554775238037,
"lr": 0.0011071937772570274,
"router/selected_tokens_s0": 4404.75,
"step": 3390,
"tokens_trained": 11.107324808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9644706049216367,
"grad_norm": 1.0815021991729736,
"loss": 1.0952,
"loss_ce": 1.0314733982086182,
"loss_region": 0.030017558485269547,
"loss_total": 1.0614910125732422,
"lr": 0.0011067868852731764,
"router/selected_tokens_s0": 4383.625,
"step": 3400,
"tokens_trained": 11.140090248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9673072831714062,
"grad_norm": 1.633634090423584,
"loss": 1.088,
"loss_ce": 0.9958590269088745,
"loss_region": 0.030007656663656235,
"loss_total": 1.0258666276931763,
"lr": 0.0011063799932893254,
"router/selected_tokens_s0": 4323.5,
"step": 3410,
"tokens_trained": 11.172855688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9701439614211758,
"grad_norm": 1.0511754751205444,
"loss": 1.0871,
"loss_ce": 1.0179994106292725,
"loss_region": 0.030009476467967033,
"loss_total": 1.048008918762207,
"lr": 0.0011059731013054743,
"router/selected_tokens_s0": 4384.875,
"step": 3420,
"tokens_trained": 11.205621096
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9729806396709453,
"grad_norm": 1.6246494054794312,
"loss": 1.0822,
"loss_ce": 0.9318454265594482,
"loss_region": 0.029994873329997063,
"loss_total": 0.9618402719497681,
"lr": 0.0011055662093216233,
"router/selected_tokens_s0": 4316.375,
"step": 3430,
"tokens_trained": 11.238384136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9758173179207148,
"grad_norm": 1.043264627456665,
"loss": 1.0927,
"loss_ce": 1.0697022676467896,
"loss_region": 0.03001013770699501,
"loss_total": 1.0997123718261719,
"lr": 0.0011051593173377723,
"router/selected_tokens_s0": 4351.5,
"step": 3440,
"tokens_trained": 11.271149576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9786539961704843,
"grad_norm": 0.5135401487350464,
"loss": 1.0883,
"loss_ce": 1.0403201580047607,
"loss_region": 0.03001248463988304,
"loss_total": 1.070332646369934,
"lr": 0.0011047524253539212,
"router/selected_tokens_s0": 4380.0,
"step": 3450,
"tokens_trained": 11.303915016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9814906744202538,
"grad_norm": 0.6191660165786743,
"loss": 1.0821,
"loss_ce": 0.9997903108596802,
"loss_region": 0.030010463669896126,
"loss_total": 1.0298007726669312,
"lr": 0.0011043455333700704,
"router/selected_tokens_s0": 4325.0,
"step": 3460,
"tokens_trained": 11.336680456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9843273526700234,
"grad_norm": 2.0896031856536865,
"loss": 1.0852,
"loss_ce": 0.9407132863998413,
"loss_region": 0.030007831752300262,
"loss_total": 0.9707211256027222,
"lr": 0.0011039386413862194,
"router/selected_tokens_s0": 4340.0,
"step": 3470,
"tokens_trained": 11.369444288
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9871640309197929,
"grad_norm": 1.5842080116271973,
"loss": 1.0773,
"loss_ce": 1.0703927278518677,
"loss_region": 0.030027827247977257,
"loss_total": 1.100420594215393,
"lr": 0.0011035317494023683,
"router/selected_tokens_s0": 4422.875,
"step": 3480,
"tokens_trained": 11.402209728
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9900007091695624,
"grad_norm": 1.6284222602844238,
"loss": 1.0867,
"loss_ce": 1.0688589811325073,
"loss_region": 0.03001173585653305,
"loss_total": 1.0988707542419434,
"lr": 0.001103124857418517,
"router/selected_tokens_s0": 4370.375,
"step": 3490,
"tokens_trained": 11.434975168
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9928373874193319,
"grad_norm": 1.9294172525405884,
"loss": 1.0776,
"loss_ce": 1.0885505676269531,
"loss_region": 0.03002048470079899,
"loss_total": 1.1185710430145264,
"lr": 0.001102717965434666,
"router/selected_tokens_s0": 4402.375,
"step": 3500,
"tokens_trained": 11.467740608
},
{
"epoch": 0.9928373874193319,
"eval_ppl": 2.8891544142739582,
"eval_runtime": 2.4909,
"step": 3500,
"tokens_trained": 11.467740608
},
{
"epoch": 0.9928373874193319,
"eval_F": 0.3418065949445779,
"eval_F_cds": 0.34451083924282977,
"eval_F_dig": 0.33807328697554495,
"eval_F_exon": 0.34726493074167064,
"eval_F_intron": 0.34224230575312725,
"eval_F_nig": 0.3415901920743997,
"eval_F_promoter": 0.339251188483381,
"eval_F_utr": 0.34464885946681034,
"eval_G": 0.34547800502934484,
"eval_G_cds": 0.3442593687333732,
"eval_G_dig": 0.3991668762370758,
"eval_G_exon": 0.3453487502027675,
"eval_G_intron": 0.34490806786026235,
"eval_G_nig": 0.3455515495124094,
"eval_G_promoter": 0.34497689050299185,
"eval_G_utr": 0.3442341927156835,
"eval_avg_bp_per_token": 2.9256310872589943,
"eval_bp_per_token/cds": 2.902666291132704,
"eval_bp_per_token/dig": 2.957938525537324,
"eval_bp_per_token/exon": 2.8796458020228277,
"eval_bp_per_token/intron": 2.921906448121405,
"eval_bp_per_token/nig": 2.9274845215175147,
"eval_bp_per_token/promoter": 2.947668376551575,
"eval_bp_per_token/utr": 2.9015038713519954,
"eval_ppl_cds": 3.5389763754938555,
"eval_ppl_dig": 1.091459889456152,
"eval_ppl_exon": 3.306826954534152,
"eval_ppl_intron": 2.9106190204474447,
"eval_ppl_nig": 2.694991382732784,
"eval_ppl_promoter": 3.283923741138257,
"eval_ppl_utr": 3.295742249982149,
"step": 3500,
"tokens_trained": 11.467740608
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.9956740656691014,
"grad_norm": 1.324675440788269,
"loss": 1.0868,
"loss_ce": 1.0776382684707642,
"loss_region": 0.03000813163816929,
"loss_total": 1.1076463460922241,
"lr": 0.001102311073450815,
"router/selected_tokens_s0": 4340.25,
"step": 3510,
"tokens_trained": 11.500506048
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 0.998510743918871,
"grad_norm": 1.6382735967636108,
"loss": 1.0901,
"loss_ce": 1.0050663948059082,
"loss_region": 0.030013620853424072,
"loss_total": 1.0350799560546875,
"lr": 0.001101904181466964,
"router/selected_tokens_s0": 4364.0,
"step": 3520,
"tokens_trained": 11.533271488
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0011346712999079,
"grad_norm": 1.1215876340866089,
"loss": 1.0781,
"loss_ce": 1.0582538843154907,
"loss_region": 0.030011579394340515,
"loss_total": 1.0882654190063477,
"lr": 0.0011014972894831132,
"router/selected_tokens_s0": 4343.125,
"step": 3530,
"tokens_trained": 11.56357952
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0039713495496774,
"grad_norm": 1.4025973081588745,
"loss": 1.0809,
"loss_ce": 1.037977695465088,
"loss_region": 0.02999720722436905,
"loss_total": 1.0679749250411987,
"lr": 0.0011010903974992621,
"router/selected_tokens_s0": 4358.5,
"step": 3540,
"tokens_trained": 11.59634496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.006808027799447,
"grad_norm": 0.7676182985305786,
"loss": 1.0854,
"loss_ce": 0.96112459897995,
"loss_region": 0.030009040609002113,
"loss_total": 0.9911336302757263,
"lr": 0.001100683505515411,
"router/selected_tokens_s0": 4343.25,
"step": 3550,
"tokens_trained": 11.6291104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0096447060492164,
"grad_norm": 0.8700928688049316,
"loss": 1.0844,
"loss_ce": 1.12131667137146,
"loss_region": 0.030012287199497223,
"loss_total": 1.1513289213180542,
"lr": 0.00110027661353156,
"router/selected_tokens_s0": 4377.75,
"step": 3560,
"tokens_trained": 11.66187584
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.012481384298986,
"grad_norm": 0.3621160089969635,
"loss": 1.0866,
"loss_ce": 0.9774419665336609,
"loss_region": 0.030013367533683777,
"loss_total": 1.0074553489685059,
"lr": 0.001099869721547709,
"router/selected_tokens_s0": 4358.375,
"step": 3570,
"tokens_trained": 11.69464128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0153180625487555,
"grad_norm": 0.8118414878845215,
"loss": 1.089,
"loss_ce": 1.0349894762039185,
"loss_region": 0.03000425547361374,
"loss_total": 1.0649937391281128,
"lr": 0.001099462829563858,
"router/selected_tokens_s0": 4320.625,
"step": 3580,
"tokens_trained": 11.72740592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.018154740798525,
"grad_norm": 1.3667856454849243,
"loss": 1.0864,
"loss_ce": 1.1101551055908203,
"loss_region": 0.030023517087101936,
"loss_total": 1.1401786804199219,
"lr": 0.001099055937580007,
"router/selected_tokens_s0": 4445.875,
"step": 3590,
"tokens_trained": 11.76017136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0209914190482945,
"grad_norm": 1.459873914718628,
"loss": 1.0875,
"loss_ce": 1.038351058959961,
"loss_region": 0.030004626139998436,
"loss_total": 1.068355679512024,
"lr": 0.001098649045596156,
"router/selected_tokens_s0": 4320.75,
"step": 3600,
"tokens_trained": 11.7929352
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.023828097298064,
"grad_norm": 0.9393401145935059,
"loss": 1.0852,
"loss_ce": 1.040799856185913,
"loss_region": 0.030013561248779297,
"loss_total": 1.0708134174346924,
"lr": 0.0010982421536123049,
"router/selected_tokens_s0": 4366.625,
"step": 3610,
"tokens_trained": 11.82570064
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0266647755478335,
"grad_norm": 1.4277124404907227,
"loss": 1.0821,
"loss_ce": 0.9711215496063232,
"loss_region": 0.03001675009727478,
"loss_total": 1.0011383295059204,
"lr": 0.0010978352616284538,
"router/selected_tokens_s0": 4369.5,
"step": 3620,
"tokens_trained": 11.85846448
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.029501453797603,
"grad_norm": 0.8825812935829163,
"loss": 1.0782,
"loss_ce": 1.0676993131637573,
"loss_region": 0.030013611540198326,
"loss_total": 1.0977128744125366,
"lr": 0.0010974283696446028,
"router/selected_tokens_s0": 4372.375,
"step": 3630,
"tokens_trained": 11.89122992
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0323381320473726,
"grad_norm": 0.9022896885871887,
"loss": 1.0733,
"loss_ce": 1.0407861471176147,
"loss_region": 0.030012723058462143,
"loss_total": 1.0707988739013672,
"lr": 0.0010970214776607518,
"router/selected_tokens_s0": 4355.75,
"step": 3640,
"tokens_trained": 11.92399536
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.035174810297142,
"grad_norm": 0.8870510458946228,
"loss": 1.0749,
"loss_ce": 1.1323661804199219,
"loss_region": 0.03002365306019783,
"loss_total": 1.162389874458313,
"lr": 0.0010966145856769007,
"router/selected_tokens_s0": 4435.75,
"step": 3650,
"tokens_trained": 11.9567608
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0380114885469116,
"grad_norm": 1.4656965732574463,
"loss": 1.0832,
"loss_ce": 1.0585097074508667,
"loss_region": 0.030004626139998436,
"loss_total": 1.0885143280029297,
"lr": 0.0010962076936930497,
"router/selected_tokens_s0": 4329.25,
"step": 3660,
"tokens_trained": 11.98952624
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0408481667966811,
"grad_norm": 0.9163527488708496,
"loss": 1.0781,
"loss_ce": 1.0935940742492676,
"loss_region": 0.030009469017386436,
"loss_total": 1.1236035823822021,
"lr": 0.0010958008017091987,
"router/selected_tokens_s0": 4359.375,
"step": 3670,
"tokens_trained": 12.02229168
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0436848450464506,
"grad_norm": 1.3013805150985718,
"loss": 1.0876,
"loss_ce": 1.0595567226409912,
"loss_region": 0.030006933957338333,
"loss_total": 1.0895636081695557,
"lr": 0.0010953939097253476,
"router/selected_tokens_s0": 4343.625,
"step": 3680,
"tokens_trained": 12.05505712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0465215232962202,
"grad_norm": 0.39981648325920105,
"loss": 1.0707,
"loss_ce": 1.1020740270614624,
"loss_region": 0.030012022703886032,
"loss_total": 1.1320860385894775,
"lr": 0.0010949870177414966,
"router/selected_tokens_s0": 4384.75,
"step": 3690,
"tokens_trained": 12.08782256
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0493582015459897,
"grad_norm": 0.9564698338508606,
"loss": 1.0787,
"loss_ce": 1.0650237798690796,
"loss_region": 0.03000781685113907,
"loss_total": 1.0950316190719604,
"lr": 0.0010945801257576456,
"router/selected_tokens_s0": 4363.75,
"step": 3700,
"tokens_trained": 12.1205864
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0521948797957592,
"grad_norm": 0.9331677556037903,
"loss": 1.08,
"loss_ce": 1.0856927633285522,
"loss_region": 0.030012279748916626,
"loss_total": 1.1157050132751465,
"lr": 0.0010941732337737947,
"router/selected_tokens_s0": 4404.5,
"step": 3710,
"tokens_trained": 12.15335184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0550315580455287,
"grad_norm": 1.2002500295639038,
"loss": 1.0818,
"loss_ce": 1.058534026145935,
"loss_region": 0.030018793419003487,
"loss_total": 1.0885528326034546,
"lr": 0.0010937663417899437,
"router/selected_tokens_s0": 4392.125,
"step": 3720,
"tokens_trained": 12.18611712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0578682362952982,
"grad_norm": 1.4924200773239136,
"loss": 1.0778,
"loss_ce": 1.020750880241394,
"loss_region": 0.030010871589183807,
"loss_total": 1.0507616996765137,
"lr": 0.0010933594498060927,
"router/selected_tokens_s0": 4356.625,
"step": 3730,
"tokens_trained": 12.21888256
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0607049145450678,
"grad_norm": 0.6684730648994446,
"loss": 1.0769,
"loss_ce": 1.0789055824279785,
"loss_region": 0.030011769384145737,
"loss_total": 1.1089173555374146,
"lr": 0.0010929525578222414,
"router/selected_tokens_s0": 4387.875,
"step": 3740,
"tokens_trained": 12.251648
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0635415927948373,
"grad_norm": 0.6117927432060242,
"loss": 1.0797,
"loss_ce": 1.0758237838745117,
"loss_region": 0.030013950541615486,
"loss_total": 1.1058377027511597,
"lr": 0.0010925456658383904,
"router/selected_tokens_s0": 4407.75,
"step": 3750,
"tokens_trained": 12.284409608
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0663782710446068,
"grad_norm": 0.6212737560272217,
"loss": 1.08,
"loss_ce": 1.074630618095398,
"loss_region": 0.03000750206410885,
"loss_total": 1.1046380996704102,
"lr": 0.0010921387738545394,
"router/selected_tokens_s0": 4371.375,
"step": 3760,
"tokens_trained": 12.317175048
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0692149492943763,
"grad_norm": 1.4000393152236938,
"loss": 1.0721,
"loss_ce": 0.9761142134666443,
"loss_region": 0.030007638037204742,
"loss_total": 1.0061218738555908,
"lr": 0.0010917318818706883,
"router/selected_tokens_s0": 4287.125,
"step": 3770,
"tokens_trained": 12.349940488
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0720516275441458,
"grad_norm": 0.7589385509490967,
"loss": 1.0796,
"loss_ce": 1.0433313846588135,
"loss_region": 0.030011288821697235,
"loss_total": 1.0733426809310913,
"lr": 0.0010913249898868375,
"router/selected_tokens_s0": 4376.375,
"step": 3780,
"tokens_trained": 12.382705928
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0748883057939154,
"grad_norm": 0.9254264235496521,
"loss": 1.0757,
"loss_ce": 1.0672318935394287,
"loss_region": 0.030009465292096138,
"loss_total": 1.0972414016723633,
"lr": 0.0010909180979029865,
"router/selected_tokens_s0": 4369.625,
"step": 3790,
"tokens_trained": 12.415470568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0777249840436849,
"grad_norm": 0.8547407984733582,
"loss": 1.0803,
"loss_ce": 1.086848497390747,
"loss_region": 0.03000779263675213,
"loss_total": 1.116856336593628,
"lr": 0.0010905112059191354,
"router/selected_tokens_s0": 4377.5,
"step": 3800,
"tokens_trained": 12.448236008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0805616622934544,
"grad_norm": 2.435622215270996,
"loss": 1.0783,
"loss_ce": 1.0909473896026611,
"loss_region": 0.03001653589308262,
"loss_total": 1.1209639310836792,
"lr": 0.0010901043139352844,
"router/selected_tokens_s0": 4412.5,
"step": 3810,
"tokens_trained": 12.481001448
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.083398340543224,
"grad_norm": 0.8429534435272217,
"loss": 1.0679,
"loss_ce": 1.0455752611160278,
"loss_region": 0.030013732612133026,
"loss_total": 1.0755889415740967,
"lr": 0.0010896974219514334,
"router/selected_tokens_s0": 4386.125,
"step": 3820,
"tokens_trained": 12.513766888
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0862350187929934,
"grad_norm": 1.3827040195465088,
"loss": 1.0802,
"loss_ce": 1.1371749639511108,
"loss_region": 0.030014697462320328,
"loss_total": 1.1671897172927856,
"lr": 0.0010892905299675823,
"router/selected_tokens_s0": 4394.5,
"step": 3830,
"tokens_trained": 12.546532328
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.089071697042763,
"grad_norm": 1.121778130531311,
"loss": 1.0882,
"loss_ce": 1.0060161352157593,
"loss_region": 0.030006043612957,
"loss_total": 1.0360221862792969,
"lr": 0.0010888836379837313,
"router/selected_tokens_s0": 4312.375,
"step": 3840,
"tokens_trained": 12.57929404
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0919083752925325,
"grad_norm": 1.114713430404663,
"loss": 1.0791,
"loss_ce": 1.0474870204925537,
"loss_region": 0.03002369962632656,
"loss_total": 1.0775107145309448,
"lr": 0.0010884767459998803,
"router/selected_tokens_s0": 4427.0,
"step": 3850,
"tokens_trained": 12.61205788
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.094745053542302,
"grad_norm": 0.42109477519989014,
"loss": 1.0759,
"loss_ce": 1.0544071197509766,
"loss_region": 0.030009722337126732,
"loss_total": 1.0844168663024902,
"lr": 0.0010880698540160292,
"router/selected_tokens_s0": 4372.875,
"step": 3860,
"tokens_trained": 12.64482332
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.0975817317920715,
"grad_norm": 1.0385088920593262,
"loss": 1.0656,
"loss_ce": 1.1451784372329712,
"loss_region": 0.030008675530552864,
"loss_total": 1.175187110900879,
"lr": 0.0010876629620321782,
"router/selected_tokens_s0": 4386.0,
"step": 3870,
"tokens_trained": 12.677587992
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.100418410041841,
"grad_norm": 1.0264872312545776,
"loss": 1.0732,
"loss_ce": 1.0522844791412354,
"loss_region": 0.030011439695954323,
"loss_total": 1.0822958946228027,
"lr": 0.0010872560700483272,
"router/selected_tokens_s0": 4350.75,
"step": 3880,
"tokens_trained": 12.710352632
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1032550882916106,
"grad_norm": 0.6505580544471741,
"loss": 1.077,
"loss_ce": 1.0118770599365234,
"loss_region": 0.03001365438103676,
"loss_total": 1.0418907403945923,
"lr": 0.0010868491780644761,
"router/selected_tokens_s0": 4405.125,
"step": 3890,
"tokens_trained": 12.743118072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.10609176654138,
"grad_norm": 1.2206717729568481,
"loss": 1.0648,
"loss_ce": 1.009666919708252,
"loss_region": 0.030009053647518158,
"loss_total": 1.0396759510040283,
"lr": 0.001086442286080625,
"router/selected_tokens_s0": 4362.0,
"step": 3900,
"tokens_trained": 12.775882712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1089284447911496,
"grad_norm": 0.9762550592422485,
"loss": 1.0833,
"loss_ce": 1.1374930143356323,
"loss_region": 0.03000687249004841,
"loss_total": 1.1674998998641968,
"lr": 0.001086035394096774,
"router/selected_tokens_s0": 4368.875,
"step": 3910,
"tokens_trained": 12.808648152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1117651230409191,
"grad_norm": 0.6860953569412231,
"loss": 1.0783,
"loss_ce": 1.1070737838745117,
"loss_region": 0.030013803392648697,
"loss_total": 1.1370875835418701,
"lr": 0.001085628502112923,
"router/selected_tokens_s0": 4440.25,
"step": 3920,
"tokens_trained": 12.841413592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1146018012906886,
"grad_norm": 0.5641375184059143,
"loss": 1.0779,
"loss_ce": 0.9596564173698425,
"loss_region": 0.030009876936674118,
"loss_total": 0.9896662831306458,
"lr": 0.001085221610129072,
"router/selected_tokens_s0": 4372.25,
"step": 3930,
"tokens_trained": 12.874179032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1174384795404582,
"grad_norm": 1.3612422943115234,
"loss": 1.0745,
"loss_ce": 1.0073390007019043,
"loss_region": 0.030006812885403633,
"loss_total": 1.0373457670211792,
"lr": 0.001084814718145221,
"router/selected_tokens_s0": 4349.25,
"step": 3940,
"tokens_trained": 12.906944472
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1202751577902277,
"grad_norm": 0.9040305614471436,
"loss": 1.0713,
"loss_ce": 0.9823886156082153,
"loss_region": 0.03001086413860321,
"loss_total": 1.012399435043335,
"lr": 0.00108440782616137,
"router/selected_tokens_s0": 4368.375,
"step": 3950,
"tokens_trained": 12.939709912
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1231118360399972,
"grad_norm": 0.6158255934715271,
"loss": 1.0631,
"loss_ce": 1.072802186012268,
"loss_region": 0.030010098591446877,
"loss_total": 1.1028122901916504,
"lr": 0.001084000934177519,
"router/selected_tokens_s0": 4384.5,
"step": 3960,
"tokens_trained": 12.972475352
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1259485142897667,
"grad_norm": 0.8492525815963745,
"loss": 1.0761,
"loss_ce": 1.1295671463012695,
"loss_region": 0.03000839613378048,
"loss_total": 1.1595755815505981,
"lr": 0.001083594042193668,
"router/selected_tokens_s0": 4394.5,
"step": 3970,
"tokens_trained": 13.005240792
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1287851925395362,
"grad_norm": 1.4454373121261597,
"loss": 1.0671,
"loss_ce": 1.059134840965271,
"loss_region": 0.030011136084794998,
"loss_total": 1.0891460180282593,
"lr": 0.001083187150209817,
"router/selected_tokens_s0": 4354.625,
"step": 3980,
"tokens_trained": 13.038005432
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1316218707893058,
"grad_norm": 1.1143423318862915,
"loss": 1.0746,
"loss_ce": 1.058432698249817,
"loss_region": 0.030015287920832634,
"loss_total": 1.08844792842865,
"lr": 0.0010827802582259658,
"router/selected_tokens_s0": 4413.25,
"step": 3990,
"tokens_trained": 13.070770872
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1344585490390753,
"grad_norm": 0.926952600479126,
"loss": 1.0726,
"loss_ce": 1.0535566806793213,
"loss_region": 0.03000652976334095,
"loss_total": 1.083563208580017,
"lr": 0.0010823733662421147,
"router/selected_tokens_s0": 4332.125,
"step": 4000,
"tokens_trained": 13.103536312
},
{
"epoch": 1.1344585490390753,
"eval_ppl": 2.858632587615727,
"eval_runtime": 2.4962,
"step": 4000,
"tokens_trained": 13.103536312
},
{
"epoch": 1.1344585490390753,
"eval_F": 0.34138791413731373,
"eval_F_cds": 0.3448908798343993,
"eval_F_dig": 0.3374221944422741,
"eval_F_exon": 0.34516320139927215,
"eval_F_intron": 0.34168107017140814,
"eval_F_nig": 0.3411899187908908,
"eval_F_promoter": 0.33952618612122176,
"eval_F_utr": 0.34327183776802744,
"eval_G": 0.34184569864652725,
"eval_G_cds": 0.34168474533742754,
"eval_G_dig": 0.39810788440503164,
"eval_G_exon": 0.34140616184021216,
"eval_G_intron": 0.3411866290260269,
"eval_G_nig": 0.34169810595567374,
"eval_G_promoter": 0.3417757543881514,
"eval_G_utr": 0.3401876713258752,
"eval_avg_bp_per_token": 2.9292191041003814,
"eval_bp_per_token/cds": 2.899467798279136,
"eval_bp_per_token/dig": 2.9636461870947826,
"eval_bp_per_token/exon": 2.897180220678382,
"eval_bp_per_token/intron": 2.9267058883254458,
"eval_bp_per_token/nig": 2.930918954299122,
"eval_bp_per_token/promoter": 2.945280926411278,
"eval_bp_per_token/utr": 2.9131431418961005,
"eval_ppl_cds": 3.4450720333639553,
"eval_ppl_dig": 1.088491176901866,
"eval_ppl_exon": 3.2953068260471907,
"eval_ppl_intron": 2.887916254694354,
"eval_ppl_nig": 2.65992247163589,
"eval_ppl_promoter": 3.249167345940797,
"eval_ppl_utr": 3.267379860704035,
"step": 4000,
"tokens_trained": 13.103536312
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1372952272888448,
"grad_norm": 1.135546088218689,
"loss": 1.0709,
"loss_ce": 1.05037522315979,
"loss_region": 0.030016878619790077,
"loss_total": 1.0803921222686768,
"lr": 0.0010819664742582637,
"router/selected_tokens_s0": 4442.875,
"step": 4010,
"tokens_trained": 13.136300952
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1401319055386143,
"grad_norm": 0.9313811659812927,
"loss": 1.0706,
"loss_ce": 1.0053969621658325,
"loss_region": 0.029991673305630684,
"loss_total": 1.0353885889053345,
"lr": 0.0010815595822744127,
"router/selected_tokens_s0": 4338.625,
"step": 4020,
"tokens_trained": 13.169065592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1429685837883838,
"grad_norm": 1.150964617729187,
"loss": 1.0746,
"loss_ce": 1.0354498624801636,
"loss_region": 0.03000836819410324,
"loss_total": 1.0654581785202026,
"lr": 0.0010811526902905618,
"router/selected_tokens_s0": 4357.125,
"step": 4030,
"tokens_trained": 13.201831032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1458052620381534,
"grad_norm": 0.3598765432834625,
"loss": 1.069,
"loss_ce": 0.9737571477890015,
"loss_region": 0.030010921880602837,
"loss_total": 1.0037680864334106,
"lr": 0.0010807457983067108,
"router/selected_tokens_s0": 4364.125,
"step": 4040,
"tokens_trained": 13.234596472
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1486419402879229,
"grad_norm": 1.6098700761795044,
"loss": 1.0724,
"loss_ce": 1.090259075164795,
"loss_region": 0.030013924464583397,
"loss_total": 1.1202729940414429,
"lr": 0.0010803389063228598,
"router/selected_tokens_s0": 4396.5,
"step": 4050,
"tokens_trained": 13.267361888
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1514786185376924,
"grad_norm": 1.2312268018722534,
"loss": 1.0755,
"loss_ce": 1.0816667079925537,
"loss_region": 0.0300059225410223,
"loss_total": 1.1116726398468018,
"lr": 0.0010799320143390087,
"router/selected_tokens_s0": 4383.875,
"step": 4060,
"tokens_trained": 13.300127328
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.154315296787462,
"grad_norm": 0.8974295854568481,
"loss": 1.074,
"loss_ce": 0.999101459980011,
"loss_region": 0.030007481575012207,
"loss_total": 1.029109001159668,
"lr": 0.0010795251223551577,
"router/selected_tokens_s0": 4339.875,
"step": 4070,
"tokens_trained": 13.332892768
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1571519750372314,
"grad_norm": 0.9909172654151917,
"loss": 1.0664,
"loss_ce": 1.1255804300308228,
"loss_region": 0.030011769384145737,
"loss_total": 1.1555922031402588,
"lr": 0.0010791182303713067,
"router/selected_tokens_s0": 4367.375,
"step": 4080,
"tokens_trained": 13.365658208
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.159988653287001,
"grad_norm": 2.2824649810791016,
"loss": 1.0724,
"loss_ce": 0.9207800626754761,
"loss_region": 0.030002696439623833,
"loss_total": 0.9507827758789062,
"lr": 0.0010787113383874556,
"router/selected_tokens_s0": 4278.5,
"step": 4090,
"tokens_trained": 13.398422568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1628253315367705,
"grad_norm": 0.4956927001476288,
"loss": 1.0733,
"loss_ce": 1.1247563362121582,
"loss_region": 0.03000745177268982,
"loss_total": 1.1547638177871704,
"lr": 0.0010783044464036046,
"router/selected_tokens_s0": 4366.5,
"step": 4100,
"tokens_trained": 13.431185432
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.16566200978654,
"grad_norm": 0.6976671814918518,
"loss": 1.0697,
"loss_ce": 1.0819003582000732,
"loss_region": 0.03002307377755642,
"loss_total": 1.1119234561920166,
"lr": 0.0010778975544197536,
"router/selected_tokens_s0": 4439.5,
"step": 4110,
"tokens_trained": 13.463950872
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1684986880363095,
"grad_norm": 1.2541862726211548,
"loss": 1.0653,
"loss_ce": 0.7924370169639587,
"loss_region": 0.029992103576660156,
"loss_total": 0.8224291205406189,
"lr": 0.0010774906624359025,
"router/selected_tokens_s0": 4297.25,
"step": 4120,
"tokens_trained": 13.496713976
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.171335366286079,
"grad_norm": 1.4140042066574097,
"loss": 1.0685,
"loss_ce": 1.0910414457321167,
"loss_region": 0.03000573255121708,
"loss_total": 1.1210471391677856,
"lr": 0.0010770837704520515,
"router/selected_tokens_s0": 4412.5,
"step": 4130,
"tokens_trained": 13.529479416
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1741720445358486,
"grad_norm": 0.5698431134223938,
"loss": 1.0775,
"loss_ce": 0.8972399830818176,
"loss_region": 0.029999306425452232,
"loss_total": 0.9272392988204956,
"lr": 0.0010766768784682005,
"router/selected_tokens_s0": 4358.875,
"step": 4140,
"tokens_trained": 13.562244856
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.177008722785618,
"grad_norm": 1.3367623090744019,
"loss": 1.0704,
"loss_ce": 1.1016747951507568,
"loss_region": 0.03001498058438301,
"loss_total": 1.1316897869110107,
"lr": 0.0010762699864843494,
"router/selected_tokens_s0": 4404.625,
"step": 4150,
"tokens_trained": 13.595010296
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1798454010353876,
"grad_norm": 0.7532950043678284,
"loss": 1.0571,
"loss_ce": 0.9316068887710571,
"loss_region": 0.03000483848154545,
"loss_total": 0.9616117477416992,
"lr": 0.0010758630945004984,
"router/selected_tokens_s0": 4305.625,
"step": 4160,
"tokens_trained": 13.627775736
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1826820792851571,
"grad_norm": 0.8210463523864746,
"loss": 1.066,
"loss_ce": 1.0508811473846436,
"loss_region": 0.030011793598532677,
"loss_total": 1.0808929204940796,
"lr": 0.0010754562025166474,
"router/selected_tokens_s0": 4387.75,
"step": 4170,
"tokens_trained": 13.660541176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1855187575349266,
"grad_norm": 1.2525079250335693,
"loss": 1.0643,
"loss_ce": 1.0516717433929443,
"loss_region": 0.030006369575858116,
"loss_total": 1.0816781520843506,
"lr": 0.0010750493105327963,
"router/selected_tokens_s0": 4361.0,
"step": 4180,
"tokens_trained": 13.693306616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1883554357846962,
"grad_norm": 1.0099766254425049,
"loss": 1.0655,
"loss_ce": 0.9692405462265015,
"loss_region": 0.0300027746707201,
"loss_total": 0.9992433190345764,
"lr": 0.0010746424185489453,
"router/selected_tokens_s0": 4389.0,
"step": 4190,
"tokens_trained": 13.726072056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1911921140344657,
"grad_norm": 0.7821201682090759,
"loss": 1.0697,
"loss_ce": 1.018595814704895,
"loss_region": 0.030003640800714493,
"loss_total": 1.0485994815826416,
"lr": 0.0010742355265650943,
"router/selected_tokens_s0": 4325.0,
"step": 4200,
"tokens_trained": 13.758837496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1940287922842352,
"grad_norm": 1.1446911096572876,
"loss": 1.0679,
"loss_ce": 1.0561493635177612,
"loss_region": 0.030004194006323814,
"loss_total": 1.086153507232666,
"lr": 0.0010738286345812434,
"router/selected_tokens_s0": 4347.875,
"step": 4210,
"tokens_trained": 13.791602936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1968654705340047,
"grad_norm": 0.8190633654594421,
"loss": 1.079,
"loss_ce": 1.0492353439331055,
"loss_region": 0.030007485300302505,
"loss_total": 1.0792428255081177,
"lr": 0.0010734217425973924,
"router/selected_tokens_s0": 4382.25,
"step": 4220,
"tokens_trained": 13.824368376
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.1997021487837742,
"grad_norm": 1.038085699081421,
"loss": 1.066,
"loss_ce": 0.9230837821960449,
"loss_region": 0.030007129535079002,
"loss_total": 0.9530909061431885,
"lr": 0.0010730148506135414,
"router/selected_tokens_s0": 4315.875,
"step": 4230,
"tokens_trained": 13.857133016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2025388270335438,
"grad_norm": 1.4383383989334106,
"loss": 1.0598,
"loss_ce": 1.0923779010772705,
"loss_region": 0.03000866062939167,
"loss_total": 1.1223865747451782,
"lr": 0.0010726079586296901,
"router/selected_tokens_s0": 4404.625,
"step": 4240,
"tokens_trained": 13.889896856
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2053755052833133,
"grad_norm": 0.6213952898979187,
"loss": 1.0635,
"loss_ce": 1.0380363464355469,
"loss_region": 0.030008360743522644,
"loss_total": 1.068044662475586,
"lr": 0.001072201066645839,
"router/selected_tokens_s0": 4405.25,
"step": 4250,
"tokens_trained": 13.922662296
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2082121835330828,
"grad_norm": 0.4920593202114105,
"loss": 1.0628,
"loss_ce": 1.0617847442626953,
"loss_region": 0.030003665015101433,
"loss_total": 1.091788411140442,
"lr": 0.001071794174661988,
"router/selected_tokens_s0": 4345.0,
"step": 4260,
"tokens_trained": 13.955427736
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2110488617828523,
"grad_norm": 1.1883982419967651,
"loss": 1.0624,
"loss_ce": 1.0569326877593994,
"loss_region": 0.03000705875456333,
"loss_total": 1.0869396924972534,
"lr": 0.0010713872826781372,
"router/selected_tokens_s0": 4376.875,
"step": 4270,
"tokens_trained": 13.988193176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2138855400326218,
"grad_norm": 0.7268418669700623,
"loss": 1.0621,
"loss_ce": 1.1230334043502808,
"loss_region": 0.030010607093572617,
"loss_total": 1.1530439853668213,
"lr": 0.0010709803906942862,
"router/selected_tokens_s0": 4399.0,
"step": 4280,
"tokens_trained": 14.020958616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2167222182823914,
"grad_norm": 0.8508139848709106,
"loss": 1.0648,
"loss_ce": 1.012586236000061,
"loss_region": 0.030012402683496475,
"loss_total": 1.0425986051559448,
"lr": 0.0010705734987104352,
"router/selected_tokens_s0": 4399.25,
"step": 4290,
"tokens_trained": 14.053724056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2195588965321609,
"grad_norm": 1.003320574760437,
"loss": 1.072,
"loss_ce": 1.092397928237915,
"loss_region": 0.030008139088749886,
"loss_total": 1.1224061250686646,
"lr": 0.0010701666067265841,
"router/selected_tokens_s0": 4384.5,
"step": 4300,
"tokens_trained": 14.086488728
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2223955747819304,
"grad_norm": 0.6508564949035645,
"loss": 1.0592,
"loss_ce": 0.9947494864463806,
"loss_region": 0.030006522312760353,
"loss_total": 1.0247559547424316,
"lr": 0.001069759714742733,
"router/selected_tokens_s0": 4323.125,
"step": 4310,
"tokens_trained": 14.119254168
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2252322530317,
"grad_norm": 0.6111332774162292,
"loss": 1.0612,
"loss_ce": 1.0148944854736328,
"loss_region": 0.030005091801285744,
"loss_total": 1.044899582862854,
"lr": 0.001069352822758882,
"router/selected_tokens_s0": 4324.25,
"step": 4320,
"tokens_trained": 14.152019608
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2280689312814694,
"grad_norm": 1.111618161201477,
"loss": 1.0653,
"loss_ce": 0.9681676030158997,
"loss_region": 0.03000504896044731,
"loss_total": 0.9981726408004761,
"lr": 0.001068945930775031,
"router/selected_tokens_s0": 4334.0,
"step": 4330,
"tokens_trained": 14.184785048
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.230905609531239,
"grad_norm": 0.6468565464019775,
"loss": 1.0612,
"loss_ce": 1.0180631875991821,
"loss_region": 0.030008560046553612,
"loss_total": 1.0480717420578003,
"lr": 0.00106853903879118,
"router/selected_tokens_s0": 4372.0,
"step": 4340,
"tokens_trained": 14.217550488
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2337422877810085,
"grad_norm": 0.4824322760105133,
"loss": 1.0606,
"loss_ce": 1.054766058921814,
"loss_region": 0.030008021742105484,
"loss_total": 1.084774136543274,
"lr": 0.001068132146807329,
"router/selected_tokens_s0": 4369.5,
"step": 4350,
"tokens_trained": 14.250315928
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.236578966030778,
"grad_norm": 0.8620288968086243,
"loss": 1.0604,
"loss_ce": 1.075032114982605,
"loss_region": 0.030014168471097946,
"loss_total": 1.105046272277832,
"lr": 0.001067725254823478,
"router/selected_tokens_s0": 4397.375,
"step": 4360,
"tokens_trained": 14.283081368
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2394156442805475,
"grad_norm": 1.4154425859451294,
"loss": 1.0614,
"loss_ce": 1.0848474502563477,
"loss_region": 0.030006200075149536,
"loss_total": 1.1148536205291748,
"lr": 0.0010673183628396269,
"router/selected_tokens_s0": 4343.25,
"step": 4370,
"tokens_trained": 14.315846808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.242252322530317,
"grad_norm": 0.7003890872001648,
"loss": 1.0594,
"loss_ce": 1.1462643146514893,
"loss_region": 0.030004924163222313,
"loss_total": 1.176269292831421,
"lr": 0.0010669114708557758,
"router/selected_tokens_s0": 4383.25,
"step": 4380,
"tokens_trained": 14.348612232
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2450890007800866,
"grad_norm": 0.9333593845367432,
"loss": 1.0628,
"loss_ce": 1.1088054180145264,
"loss_region": 0.0300068948417902,
"loss_total": 1.1388123035430908,
"lr": 0.0010665045788719248,
"router/selected_tokens_s0": 4374.875,
"step": 4390,
"tokens_trained": 14.381377672
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.247925679029856,
"grad_norm": 0.7894501686096191,
"loss": 1.0632,
"loss_ce": 0.9245185256004333,
"loss_region": 0.030007462948560715,
"loss_total": 0.9545260071754456,
"lr": 0.0010660976868880738,
"router/selected_tokens_s0": 4363.625,
"step": 4400,
"tokens_trained": 14.414143112
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2507623572796256,
"grad_norm": 1.051579236984253,
"loss": 1.0676,
"loss_ce": 1.0265341997146606,
"loss_region": 0.03001078963279724,
"loss_total": 1.0565450191497803,
"lr": 0.0010656907949042227,
"router/selected_tokens_s0": 4416.375,
"step": 4410,
"tokens_trained": 14.446908552
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2535990355293951,
"grad_norm": 1.4166457653045654,
"loss": 1.0661,
"loss_ce": 1.1268078088760376,
"loss_region": 0.0300078634172678,
"loss_total": 1.1568156480789185,
"lr": 0.0010652839029203717,
"router/selected_tokens_s0": 4376.375,
"step": 4420,
"tokens_trained": 14.479673992
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2564357137791646,
"grad_norm": 0.9460220336914062,
"loss": 1.0627,
"loss_ce": 1.0467621088027954,
"loss_region": 0.030009562149643898,
"loss_total": 1.07677161693573,
"lr": 0.0010648770109365207,
"router/selected_tokens_s0": 4361.5,
"step": 4430,
"tokens_trained": 14.512439432
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2592723920289342,
"grad_norm": 0.726325273513794,
"loss": 1.0611,
"loss_ce": 1.060929298400879,
"loss_region": 0.030005550011992455,
"loss_total": 1.0909348726272583,
"lr": 0.0010644701189526696,
"router/selected_tokens_s0": 4371.625,
"step": 4440,
"tokens_trained": 14.545204872
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2621090702787037,
"grad_norm": 0.8391557931900024,
"loss": 1.0602,
"loss_ce": 0.9280992150306702,
"loss_region": 0.030011408030986786,
"loss_total": 0.9581106305122375,
"lr": 0.0010640632269688186,
"router/selected_tokens_s0": 4365.625,
"step": 4450,
"tokens_trained": 14.577970312
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2649457485284732,
"grad_norm": 0.5766838788986206,
"loss": 1.0606,
"loss_ce": 1.0834920406341553,
"loss_region": 0.03000706620514393,
"loss_total": 1.1134991645812988,
"lr": 0.0010636563349849678,
"router/selected_tokens_s0": 4382.625,
"step": 4460,
"tokens_trained": 14.610735752
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2677824267782427,
"grad_norm": 0.7650503516197205,
"loss": 1.0655,
"loss_ce": 1.0730332136154175,
"loss_region": 0.030004587024450302,
"loss_total": 1.1030378341674805,
"lr": 0.0010632494430011167,
"router/selected_tokens_s0": 4357.375,
"step": 4470,
"tokens_trained": 14.643501192
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2706191050280122,
"grad_norm": 0.9502279758453369,
"loss": 1.065,
"loss_ce": 0.9979308247566223,
"loss_region": 0.030005743727087975,
"loss_total": 1.027936577796936,
"lr": 0.0010628425510172657,
"router/selected_tokens_s0": 4355.125,
"step": 4480,
"tokens_trained": 14.676266632
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2734557832777817,
"grad_norm": 0.42353641986846924,
"loss": 1.0697,
"loss_ce": 0.9458868503570557,
"loss_region": 0.03001011349260807,
"loss_total": 0.975896954536438,
"lr": 0.0010624356590334145,
"router/selected_tokens_s0": 4327.25,
"step": 4490,
"tokens_trained": 14.709032072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2762924615275513,
"grad_norm": 0.8221932649612427,
"loss": 1.0532,
"loss_ce": 1.0401586294174194,
"loss_region": 0.03000483103096485,
"loss_total": 1.0701634883880615,
"lr": 0.0010620287670495634,
"router/selected_tokens_s0": 4364.625,
"step": 4500,
"tokens_trained": 14.741797512
},
{
"epoch": 1.2762924615275513,
"eval_ppl": 2.8214683274593275,
"eval_runtime": 2.514,
"step": 4500,
"tokens_trained": 14.741797512
},
{
"epoch": 1.2762924615275513,
"eval_F": 0.3404783661767054,
"eval_F_cds": 0.34415505656136036,
"eval_F_dig": 0.34039117639398914,
"eval_F_exon": 0.3441758117753265,
"eval_F_intron": 0.3404660876483458,
"eval_F_nig": 0.34068460925568095,
"eval_F_promoter": 0.3389634986468706,
"eval_F_utr": 0.3419717924374995,
"eval_G": 0.33991637341184733,
"eval_G_cds": 0.33970658857472646,
"eval_G_dig": 0.40259630268966323,
"eval_G_exon": 0.3395000370271109,
"eval_G_intron": 0.33899745701570494,
"eval_G_nig": 0.3395800911177655,
"eval_G_promoter": 0.34070296612716594,
"eval_G_utr": 0.33846465688852967,
"eval_avg_bp_per_token": 2.9370441688533258,
"eval_bp_per_token/cds": 2.9056670269254266,
"eval_bp_per_token/dig": 2.937796480489671,
"eval_bp_per_token/exon": 2.9054918032786885,
"eval_bp_per_token/intron": 2.937150090063775,
"eval_bp_per_token/nig": 2.9352661459664247,
"eval_bp_per_token/promoter": 2.9501701628404295,
"eval_bp_per_token/utr": 2.924217792561839,
"eval_ppl_cds": 3.362117196422307,
"eval_ppl_dig": 1.084496515952716,
"eval_ppl_exon": 3.2638291915167845,
"eval_ppl_intron": 2.863014968465922,
"eval_ppl_nig": 2.628156788503626,
"eval_ppl_promoter": 3.186888990118295,
"eval_ppl_utr": 3.231163925316475,
"step": 4500,
"tokens_trained": 14.741797512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2791291397773208,
"grad_norm": 0.574324905872345,
"loss": 1.0626,
"loss_ce": 1.1369348764419556,
"loss_region": 0.030004041269421577,
"loss_total": 1.1669389009475708,
"lr": 0.0010616218750657124,
"router/selected_tokens_s0": 4390.0,
"step": 4510,
"tokens_trained": 14.774562152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2819658180270903,
"grad_norm": 1.4291346073150635,
"loss": 1.0542,
"loss_ce": 1.0772862434387207,
"loss_region": 0.030009262263774872,
"loss_total": 1.1072955131530762,
"lr": 0.0010612149830818616,
"router/selected_tokens_s0": 4374.125,
"step": 4520,
"tokens_trained": 14.807327592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2848024962768598,
"grad_norm": 1.110178828239441,
"loss": 1.0581,
"loss_ce": 1.0101850032806396,
"loss_region": 0.030011240392923355,
"loss_total": 1.0401962995529175,
"lr": 0.0010608080910980105,
"router/selected_tokens_s0": 4400.875,
"step": 4530,
"tokens_trained": 14.840092232
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2876391745266293,
"grad_norm": 0.6965222358703613,
"loss": 1.0677,
"loss_ce": 1.0458329916000366,
"loss_region": 0.030003167688846588,
"loss_total": 1.075836181640625,
"lr": 0.0010604011991141595,
"router/selected_tokens_s0": 4285.875,
"step": 4540,
"tokens_trained": 14.872854344
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2904758527763989,
"grad_norm": 0.7366101145744324,
"loss": 1.0673,
"loss_ce": 1.127131700515747,
"loss_region": 0.030009111389517784,
"loss_total": 1.157140851020813,
"lr": 0.0010599943071303085,
"router/selected_tokens_s0": 4368.75,
"step": 4550,
"tokens_trained": 14.905619784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2933125310261684,
"grad_norm": 0.5127747058868408,
"loss": 1.0583,
"loss_ce": 1.0789624452590942,
"loss_region": 0.030002892017364502,
"loss_total": 1.1089653968811035,
"lr": 0.0010595874151464574,
"router/selected_tokens_s0": 4366.5,
"step": 4560,
"tokens_trained": 14.938385224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.296149209275938,
"grad_norm": 0.8208303451538086,
"loss": 1.0564,
"loss_ce": 1.0156883001327515,
"loss_region": 0.03000694513320923,
"loss_total": 1.0456953048706055,
"lr": 0.0010591805231626064,
"router/selected_tokens_s0": 4368.5,
"step": 4570,
"tokens_trained": 14.971149864
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.2989858875257074,
"grad_norm": 0.9243417978286743,
"loss": 1.0546,
"loss_ce": 1.1268659830093384,
"loss_region": 0.030008889734745026,
"loss_total": 1.1568748950958252,
"lr": 0.0010587736311787554,
"router/selected_tokens_s0": 4378.75,
"step": 4580,
"tokens_trained": 15.003915304
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.301822565775477,
"grad_norm": 1.2765685319900513,
"loss": 1.0589,
"loss_ce": 1.0031052827835083,
"loss_region": 0.03000444732606411,
"loss_total": 1.0331097841262817,
"lr": 0.0010583667391949043,
"router/selected_tokens_s0": 4328.75,
"step": 4590,
"tokens_trained": 15.036680744
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3046592440252465,
"grad_norm": 0.7742276787757874,
"loss": 1.06,
"loss_ce": 1.079167127609253,
"loss_region": 0.030005935579538345,
"loss_total": 1.109173059463501,
"lr": 0.0010579598472110533,
"router/selected_tokens_s0": 4353.25,
"step": 4600,
"tokens_trained": 15.069442824
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.307495922275016,
"grad_norm": 0.793449878692627,
"loss": 1.0558,
"loss_ce": 1.1816250085830688,
"loss_region": 0.030013153329491615,
"loss_total": 1.2116382122039795,
"lr": 0.0010575529552272023,
"router/selected_tokens_s0": 4410.125,
"step": 4610,
"tokens_trained": 15.102208264
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3103326005247855,
"grad_norm": 0.45884019136428833,
"loss": 1.0581,
"loss_ce": 1.03169584274292,
"loss_region": 0.030012015253305435,
"loss_total": 1.061707854270935,
"lr": 0.0010571460632433512,
"router/selected_tokens_s0": 4406.125,
"step": 4620,
"tokens_trained": 15.134972904
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.313169278774555,
"grad_norm": 0.297644704580307,
"loss": 1.0543,
"loss_ce": 0.9691150188446045,
"loss_region": 0.030010992661118507,
"loss_total": 0.9991260170936584,
"lr": 0.0010567391712595002,
"router/selected_tokens_s0": 4378.125,
"step": 4630,
"tokens_trained": 15.167735632
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3160059570243245,
"grad_norm": 1.0193889141082764,
"loss": 1.0631,
"loss_ce": 0.9981553554534912,
"loss_region": 0.030008038505911827,
"loss_total": 1.0281634330749512,
"lr": 0.0010563322792756492,
"router/selected_tokens_s0": 4370.875,
"step": 4640,
"tokens_trained": 15.200501072
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.318842635274094,
"grad_norm": 0.5435932278633118,
"loss": 1.0567,
"loss_ce": 1.0280530452728271,
"loss_region": 0.030006207525730133,
"loss_total": 1.0580592155456543,
"lr": 0.0010559253872917981,
"router/selected_tokens_s0": 4404.625,
"step": 4650,
"tokens_trained": 15.233266512
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3216793135238636,
"grad_norm": 1.5278434753417969,
"loss": 1.0548,
"loss_ce": 1.136357307434082,
"loss_region": 0.03000808134675026,
"loss_total": 1.166365385055542,
"lr": 0.001055518495307947,
"router/selected_tokens_s0": 4371.0,
"step": 4660,
"tokens_trained": 15.266028416
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.324515991773633,
"grad_norm": 1.0518913269042969,
"loss": 1.0663,
"loss_ce": 1.0637515783309937,
"loss_region": 0.030026618391275406,
"loss_total": 1.0937782526016235,
"lr": 0.001055111603324096,
"router/selected_tokens_s0": 4432.5,
"step": 4670,
"tokens_trained": 15.298793832
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3273526700234026,
"grad_norm": 0.41353392601013184,
"loss": 1.0573,
"loss_ce": 1.1546579599380493,
"loss_region": 0.030011750757694244,
"loss_total": 1.1846697330474854,
"lr": 0.001054704711340245,
"router/selected_tokens_s0": 4418.375,
"step": 4680,
"tokens_trained": 15.331559256
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3301893482731721,
"grad_norm": 1.145396113395691,
"loss": 1.0615,
"loss_ce": 0.9925062656402588,
"loss_region": 0.030006494373083115,
"loss_total": 1.0225127935409546,
"lr": 0.001054297819356394,
"router/selected_tokens_s0": 4337.125,
"step": 4690,
"tokens_trained": 15.364323896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3330260265229417,
"grad_norm": 0.48764264583587646,
"loss": 1.0592,
"loss_ce": 1.013685941696167,
"loss_region": 0.030003532767295837,
"loss_total": 1.043689489364624,
"lr": 0.0010538909273725432,
"router/selected_tokens_s0": 4342.125,
"step": 4700,
"tokens_trained": 15.397089336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3358627047727112,
"grad_norm": 0.8164799809455872,
"loss": 1.0516,
"loss_ce": 0.9586069583892822,
"loss_region": 0.030013196170330048,
"loss_total": 0.9886201620101929,
"lr": 0.0010534840353886921,
"router/selected_tokens_s0": 4396.625,
"step": 4710,
"tokens_trained": 15.42985476
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3386993830224807,
"grad_norm": 1.13369619846344,
"loss": 1.0594,
"loss_ce": 1.0693820714950562,
"loss_region": 0.030003618448972702,
"loss_total": 1.0993857383728027,
"lr": 0.001053077143404841,
"router/selected_tokens_s0": 4341.625,
"step": 4720,
"tokens_trained": 15.4626202
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3415360612722502,
"grad_norm": 0.8559716939926147,
"loss": 1.052,
"loss_ce": 0.9566583633422852,
"loss_region": 0.029995476827025414,
"loss_total": 0.9866538643836975,
"lr": 0.0010526702514209898,
"router/selected_tokens_s0": 4327.0,
"step": 4730,
"tokens_trained": 15.49538484
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3443727395220197,
"grad_norm": 0.642441987991333,
"loss": 1.0486,
"loss_ce": 0.9989664554595947,
"loss_region": 0.030002903193235397,
"loss_total": 1.028969407081604,
"lr": 0.0010522633594371388,
"router/selected_tokens_s0": 4368.75,
"step": 4740,
"tokens_trained": 15.52815028
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3472094177717893,
"grad_norm": 1.2081654071807861,
"loss": 1.0543,
"loss_ce": 1.0263653993606567,
"loss_region": 0.030002884566783905,
"loss_total": 1.0563682317733765,
"lr": 0.0010518564674532878,
"router/selected_tokens_s0": 4350.625,
"step": 4750,
"tokens_trained": 15.56091572
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3500460960215588,
"grad_norm": 0.8961039781570435,
"loss": 1.0605,
"loss_ce": 0.8918865919113159,
"loss_region": 0.02998742088675499,
"loss_total": 0.9218739867210388,
"lr": 0.0010514495754694367,
"router/selected_tokens_s0": 4227.875,
"step": 4760,
"tokens_trained": 15.593678736
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3528827742713283,
"grad_norm": 0.9029963612556458,
"loss": 1.054,
"loss_ce": 1.041927695274353,
"loss_region": 0.030009398236870766,
"loss_total": 1.071937084197998,
"lr": 0.001051042683485586,
"router/selected_tokens_s0": 4384.125,
"step": 4770,
"tokens_trained": 15.626444176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3557194525210978,
"grad_norm": 0.9543034434318542,
"loss": 1.0544,
"loss_ce": 1.0488133430480957,
"loss_region": 0.030006732791662216,
"loss_total": 1.0788201093673706,
"lr": 0.0010506357915017349,
"router/selected_tokens_s0": 4367.625,
"step": 4780,
"tokens_trained": 15.659209616
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3585561307708673,
"grad_norm": 1.4998373985290527,
"loss": 1.0569,
"loss_ce": 1.1177537441253662,
"loss_region": 0.030007855966687202,
"loss_total": 1.147761583328247,
"lr": 0.0010502288995178838,
"router/selected_tokens_s0": 4367.375,
"step": 4790,
"tokens_trained": 15.691975056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3613928090206369,
"grad_norm": 0.9493989944458008,
"loss": 1.0632,
"loss_ce": 1.1187384128570557,
"loss_region": 0.030011937022209167,
"loss_total": 1.1487503051757812,
"lr": 0.0010498220075340328,
"router/selected_tokens_s0": 4409.0,
"step": 4800,
"tokens_trained": 15.724740496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3642294872704064,
"grad_norm": 0.8666090965270996,
"loss": 1.0504,
"loss_ce": 1.0977500677108765,
"loss_region": 0.030009282752871513,
"loss_total": 1.127759337425232,
"lr": 0.0010494151155501818,
"router/selected_tokens_s0": 4413.0,
"step": 4810,
"tokens_trained": 15.757505936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.367066165520176,
"grad_norm": 1.7307463884353638,
"loss": 1.0497,
"loss_ce": 0.9964741468429565,
"loss_region": 0.03000612184405327,
"loss_total": 1.0264803171157837,
"lr": 0.0010490082235663307,
"router/selected_tokens_s0": 4340.625,
"step": 4820,
"tokens_trained": 15.790271376
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3699028437699454,
"grad_norm": 1.00801420211792,
"loss": 1.0593,
"loss_ce": 1.056219458580017,
"loss_region": 0.030001841485500336,
"loss_total": 1.0862213373184204,
"lr": 0.0010486013315824797,
"router/selected_tokens_s0": 4315.75,
"step": 4830,
"tokens_trained": 15.823036016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.372739522019715,
"grad_norm": 0.6336276531219482,
"loss": 1.0541,
"loss_ce": 0.939272940158844,
"loss_region": 0.030005156993865967,
"loss_total": 0.96927809715271,
"lr": 0.0010481944395986287,
"router/selected_tokens_s0": 4368.625,
"step": 4840,
"tokens_trained": 15.855801456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3755762002694845,
"grad_norm": 1.0217934846878052,
"loss": 1.052,
"loss_ce": 0.9233169555664062,
"loss_region": 0.02999945543706417,
"loss_total": 0.9533163905143738,
"lr": 0.0010477875476147776,
"router/selected_tokens_s0": 4278.125,
"step": 4850,
"tokens_trained": 15.888566096
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.378412878519254,
"grad_norm": 0.8734573125839233,
"loss": 1.0512,
"loss_ce": 1.0704128742218018,
"loss_region": 0.030010642483830452,
"loss_total": 1.1004235744476318,
"lr": 0.0010473806556309266,
"router/selected_tokens_s0": 4385.25,
"step": 4860,
"tokens_trained": 15.921331536
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3812495567690235,
"grad_norm": 0.9410074949264526,
"loss": 1.0473,
"loss_ce": 1.0614757537841797,
"loss_region": 0.03000745177268982,
"loss_total": 1.091483235359192,
"lr": 0.0010469737636470756,
"router/selected_tokens_s0": 4373.375,
"step": 4870,
"tokens_trained": 15.954096176
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.384086235018793,
"grad_norm": 0.9506546854972839,
"loss": 1.0553,
"loss_ce": 1.0681748390197754,
"loss_region": 0.03000558167695999,
"loss_total": 1.0981804132461548,
"lr": 0.0010465668716632245,
"router/selected_tokens_s0": 4355.875,
"step": 4880,
"tokens_trained": 15.9868564
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3869229132685625,
"grad_norm": 0.6107691526412964,
"loss": 1.0465,
"loss_ce": 1.0117448568344116,
"loss_region": 0.030008085072040558,
"loss_total": 1.0417529344558716,
"lr": 0.0010461599796793735,
"router/selected_tokens_s0": 4373.0,
"step": 4890,
"tokens_trained": 16.019621784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.389759591518332,
"grad_norm": 0.8904739618301392,
"loss": 1.0524,
"loss_ce": 1.0931122303009033,
"loss_region": 0.030003776773810387,
"loss_total": 1.1231160163879395,
"lr": 0.0010457530876955225,
"router/selected_tokens_s0": 4370.75,
"step": 4900,
"tokens_trained": 16.052387224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3925962697681016,
"grad_norm": 0.8106483817100525,
"loss": 1.0501,
"loss_ce": 0.8826806545257568,
"loss_region": 0.030005743727087975,
"loss_total": 0.9126864075660706,
"lr": 0.0010453461957116714,
"router/selected_tokens_s0": 4362.25,
"step": 4910,
"tokens_trained": 16.085152664
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.395432948017871,
"grad_norm": 0.8432952761650085,
"loss": 1.0554,
"loss_ce": 0.9541028738021851,
"loss_region": 0.03000727668404579,
"loss_total": 0.9841101765632629,
"lr": 0.0010449393037278204,
"router/selected_tokens_s0": 4347.75,
"step": 4920,
"tokens_trained": 16.117918104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.3982696262676406,
"grad_norm": 0.7111901640892029,
"loss": 1.0514,
"loss_ce": 0.9792753458023071,
"loss_region": 0.030003240332007408,
"loss_total": 1.0092785358428955,
"lr": 0.0010445324117439694,
"router/selected_tokens_s0": 4354.5,
"step": 4930,
"tokens_trained": 16.150683544
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4011063045174101,
"grad_norm": 0.623307466506958,
"loss": 1.0486,
"loss_ce": 0.8407849669456482,
"loss_region": 0.030006825923919678,
"loss_total": 0.8707917928695679,
"lr": 0.0010441255197601183,
"router/selected_tokens_s0": 4359.375,
"step": 4940,
"tokens_trained": 16.183448184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4039429827671797,
"grad_norm": 0.7381167411804199,
"loss": 1.0515,
"loss_ce": 0.9730595350265503,
"loss_region": 0.030005378648638725,
"loss_total": 1.0030648708343506,
"lr": 0.0010437186277762675,
"router/selected_tokens_s0": 4344.625,
"step": 4950,
"tokens_trained": 16.216210576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4067796610169492,
"grad_norm": 1.5306568145751953,
"loss": 1.0493,
"loss_ce": 1.1129964590072632,
"loss_region": 0.03000766597688198,
"loss_total": 1.1430041790008545,
"lr": 0.0010433117357924165,
"router/selected_tokens_s0": 4405.875,
"step": 4960,
"tokens_trained": 16.248975216
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4096163392667187,
"grad_norm": 1.0243196487426758,
"loss": 1.053,
"loss_ce": 1.0323237180709839,
"loss_region": 0.0300030205398798,
"loss_total": 1.0623267889022827,
"lr": 0.0010429048438085654,
"router/selected_tokens_s0": 4350.75,
"step": 4970,
"tokens_trained": 16.281739056
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4124530175164882,
"grad_norm": 0.41617700457572937,
"loss": 1.0515,
"loss_ce": 1.0560728311538696,
"loss_region": 0.030008800327777863,
"loss_total": 1.086081624031067,
"lr": 0.0010424979518247142,
"router/selected_tokens_s0": 4361.875,
"step": 4980,
"tokens_trained": 16.314504496
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4152896957662577,
"grad_norm": 1.0856021642684937,
"loss": 1.0525,
"loss_ce": 0.9173005223274231,
"loss_region": 0.030003489926457405,
"loss_total": 0.9473040103912354,
"lr": 0.0010420910598408631,
"router/selected_tokens_s0": 4334.375,
"step": 4990,
"tokens_trained": 16.347269936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4181263740160273,
"grad_norm": 0.7011216878890991,
"loss": 1.0519,
"loss_ce": 1.0040265321731567,
"loss_region": 0.030011465772986412,
"loss_total": 1.0340379476547241,
"lr": 0.0010416841678570121,
"router/selected_tokens_s0": 4393.0,
"step": 5000,
"tokens_trained": 16.380034576
},
{
"epoch": 1.4181263740160273,
"eval_ppl": 2.788169093074203,
"eval_runtime": 2.4902,
"step": 5000,
"tokens_trained": 16.380034576
},
{
"epoch": 1.4181263740160273,
"eval_F": 0.3413229464258417,
"eval_F_cds": 0.34488279386436593,
"eval_F_dig": 0.3365887959996875,
"eval_F_exon": 0.34454255649279203,
"eval_F_intron": 0.34125204874115167,
"eval_F_nig": 0.34140188383936837,
"eval_F_promoter": 0.3405175878617453,
"eval_F_utr": 0.3429040617863649,
"eval_G": 0.3415226863108361,
"eval_G_cds": 0.34280297813856925,
"eval_G_dig": 0.39681383799515585,
"eval_G_exon": 0.3413263496822721,
"eval_G_intron": 0.3403742930108439,
"eval_G_nig": 0.34037531674994653,
"eval_G_promoter": 0.34437466936381766,
"eval_G_utr": 0.34068450968191655,
"eval_avg_bp_per_token": 2.929776654255114,
"eval_bp_per_token/cds": 2.8995357779236612,
"eval_bp_per_token/dig": 2.9709842154131847,
"eval_bp_per_token/exon": 2.9023990829444033,
"eval_bp_per_token/intron": 2.930385337432876,
"eval_bp_per_token/nig": 2.9290992444274444,
"eval_bp_per_token/promoter": 2.9367058726082993,
"eval_bp_per_token/utr": 2.916267584555522,
"eval_ppl_cds": 3.2638592963627073,
"eval_ppl_dig": 1.0816082686781683,
"eval_ppl_exon": 3.2486368312535303,
"eval_ppl_intron": 2.8401016704114306,
"eval_ppl_nig": 2.6066140907483235,
"eval_ppl_promoter": 3.109201262145088,
"eval_ppl_utr": 3.1537355648599172,
"step": 5000,
"tokens_trained": 16.380034576
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4209630522657968,
"grad_norm": 0.9108154773712158,
"loss": 1.0451,
"loss_ce": 1.066657543182373,
"loss_region": 0.03000144474208355,
"loss_total": 1.0966589450836182,
"lr": 0.001041277275873161,
"router/selected_tokens_s0": 4315.875,
"step": 5010,
"tokens_trained": 16.412800016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4237997305155663,
"grad_norm": 0.8278728127479553,
"loss": 1.0582,
"loss_ce": 1.0547834634780884,
"loss_region": 0.03000921569764614,
"loss_total": 1.0847927331924438,
"lr": 0.0010408703838893103,
"router/selected_tokens_s0": 4401.25,
"step": 5020,
"tokens_trained": 16.445565456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4266364087653358,
"grad_norm": 0.3643961250782013,
"loss": 1.0453,
"loss_ce": 1.0456041097640991,
"loss_region": 0.030006922781467438,
"loss_total": 1.0756109952926636,
"lr": 0.0010404634919054592,
"router/selected_tokens_s0": 4321.5,
"step": 5030,
"tokens_trained": 16.478329936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4294730870151053,
"grad_norm": 0.8244170546531677,
"loss": 1.0413,
"loss_ce": 1.1286065578460693,
"loss_region": 0.03001227229833603,
"loss_total": 1.1586188077926636,
"lr": 0.0010400565999216082,
"router/selected_tokens_s0": 4414.625,
"step": 5040,
"tokens_trained": 16.511090344
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4323097652648749,
"grad_norm": 0.7646901607513428,
"loss": 1.0397,
"loss_ce": 0.9538711905479431,
"loss_region": 0.030008982867002487,
"loss_total": 0.9838801622390747,
"lr": 0.0010396497079377572,
"router/selected_tokens_s0": 4366.25,
"step": 5050,
"tokens_trained": 16.543855784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4351464435146444,
"grad_norm": 0.5411663055419922,
"loss": 1.0546,
"loss_ce": 1.10159432888031,
"loss_region": 0.030010610818862915,
"loss_total": 1.1316049098968506,
"lr": 0.0010392428159539061,
"router/selected_tokens_s0": 4370.5,
"step": 5060,
"tokens_trained": 16.576621224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.437983121764414,
"grad_norm": 0.4553099274635315,
"loss": 1.053,
"loss_ce": 0.924439549446106,
"loss_region": 0.030009716749191284,
"loss_total": 0.9544492959976196,
"lr": 0.001038835923970055,
"router/selected_tokens_s0": 4413.125,
"step": 5070,
"tokens_trained": 16.609386664
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4408198000141834,
"grad_norm": 0.7976288795471191,
"loss": 1.0524,
"loss_ce": 0.8370477557182312,
"loss_region": 0.03001226671040058,
"loss_total": 0.8670600056648254,
"lr": 0.001038429031986204,
"router/selected_tokens_s0": 4350.125,
"step": 5080,
"tokens_trained": 16.642152104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.443656478263953,
"grad_norm": 0.7374417781829834,
"loss": 1.0481,
"loss_ce": 1.087965726852417,
"loss_region": 0.030008574947714806,
"loss_total": 1.1179742813110352,
"lr": 0.001038022140002353,
"router/selected_tokens_s0": 4363.5,
"step": 5090,
"tokens_trained": 16.674914848
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4464931565137225,
"grad_norm": 0.8701497912406921,
"loss": 1.0423,
"loss_ce": 1.102858066558838,
"loss_region": 0.030004970729351044,
"loss_total": 1.1328630447387695,
"lr": 0.001037615248018502,
"router/selected_tokens_s0": 4363.125,
"step": 5100,
"tokens_trained": 16.707680128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.449329834763492,
"grad_norm": 0.7448126077651978,
"loss": 1.0436,
"loss_ce": 1.1202232837677002,
"loss_region": 0.030008699744939804,
"loss_total": 1.150231957435608,
"lr": 0.001037208356034651,
"router/selected_tokens_s0": 4389.375,
"step": 5110,
"tokens_trained": 16.740445568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4521665130132615,
"grad_norm": 0.6302592754364014,
"loss": 1.0486,
"loss_ce": 1.0902258157730103,
"loss_region": 0.03000813163816929,
"loss_total": 1.1202338933944702,
"lr": 0.0010368014640508,
"router/selected_tokens_s0": 4353.375,
"step": 5120,
"tokens_trained": 16.773211008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.455003191263031,
"grad_norm": 1.019002079963684,
"loss": 1.0513,
"loss_ce": 1.0098958015441895,
"loss_region": 0.02999117411673069,
"loss_total": 1.0398869514465332,
"lr": 0.0010363945720669489,
"router/selected_tokens_s0": 4293.875,
"step": 5130,
"tokens_trained": 16.805971784
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4578398695128005,
"grad_norm": 0.9714931845664978,
"loss": 1.0518,
"loss_ce": 0.9985664486885071,
"loss_region": 0.030006328597664833,
"loss_total": 1.0285727977752686,
"lr": 0.0010359876800830978,
"router/selected_tokens_s0": 4355.5,
"step": 5140,
"tokens_trained": 16.838737224
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.46067654776257,
"grad_norm": 0.8966345191001892,
"loss": 1.0456,
"loss_ce": 1.061452031135559,
"loss_region": 0.030002159997820854,
"loss_total": 1.0914541482925415,
"lr": 0.0010355807880992468,
"router/selected_tokens_s0": 4324.375,
"step": 5150,
"tokens_trained": 16.871502664
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4635132260123396,
"grad_norm": 0.6720635294914246,
"loss": 1.0403,
"loss_ce": 1.0887715816497803,
"loss_region": 0.030005764216184616,
"loss_total": 1.1187773942947388,
"lr": 0.0010351738961153958,
"router/selected_tokens_s0": 4345.375,
"step": 5160,
"tokens_trained": 16.904268104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.466349904262109,
"grad_norm": 0.7350347638130188,
"loss": 1.0505,
"loss_ce": 1.0535961389541626,
"loss_region": 0.030005428940057755,
"loss_total": 1.0836015939712524,
"lr": 0.0010347670041315447,
"router/selected_tokens_s0": 4381.75,
"step": 5170,
"tokens_trained": 16.937030128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4691865825118786,
"grad_norm": 0.44304972887039185,
"loss": 1.0432,
"loss_ce": 1.0507234334945679,
"loss_region": 0.030006472021341324,
"loss_total": 1.0807299613952637,
"lr": 0.0010343601121476937,
"router/selected_tokens_s0": 4387.25,
"step": 5180,
"tokens_trained": 16.969795184
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4720232607616481,
"grad_norm": 0.441457062959671,
"loss": 1.0491,
"loss_ce": 1.0120573043823242,
"loss_region": 0.030006378889083862,
"loss_total": 1.0420637130737305,
"lr": 0.0010339532201638427,
"router/selected_tokens_s0": 4363.625,
"step": 5190,
"tokens_trained": 17.002560624
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4748599390114177,
"grad_norm": 0.8241252303123474,
"loss": 1.0567,
"loss_ce": 1.0892800092697144,
"loss_region": 0.03000868298113346,
"loss_total": 1.119288682937622,
"lr": 0.0010335463281799918,
"router/selected_tokens_s0": 4367.125,
"step": 5200,
"tokens_trained": 17.035326064
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4776966172611872,
"grad_norm": 0.5182619690895081,
"loss": 1.0408,
"loss_ce": 0.9733250737190247,
"loss_region": 0.030004369094967842,
"loss_total": 1.0033293962478638,
"lr": 0.0010331394361961408,
"router/selected_tokens_s0": 4326.25,
"step": 5210,
"tokens_trained": 17.068091504
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4805332955109567,
"grad_norm": 0.7119126319885254,
"loss": 1.0389,
"loss_ce": 0.9868582487106323,
"loss_region": 0.03000757470726967,
"loss_total": 1.016865849494934,
"lr": 0.0010327325442122898,
"router/selected_tokens_s0": 4388.0,
"step": 5220,
"tokens_trained": 17.100856944
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4833699737607262,
"grad_norm": 1.0945305824279785,
"loss": 1.0462,
"loss_ce": 0.9747534990310669,
"loss_region": 0.030006079003214836,
"loss_total": 1.0047595500946045,
"lr": 0.0010323256522284385,
"router/selected_tokens_s0": 4341.25,
"step": 5230,
"tokens_trained": 17.133622384
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4862066520104957,
"grad_norm": 0.8298206329345703,
"loss": 1.0351,
"loss_ce": 1.05121648311615,
"loss_region": 0.030005764216184616,
"loss_total": 1.0812222957611084,
"lr": 0.0010319187602445875,
"router/selected_tokens_s0": 4326.375,
"step": 5240,
"tokens_trained": 17.166385424
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4890433302602653,
"grad_norm": 0.4219936728477478,
"loss": 1.0484,
"loss_ce": 0.8292503952980042,
"loss_region": 0.030046647414565086,
"loss_total": 0.8592970371246338,
"lr": 0.0010315118682607365,
"router/selected_tokens_s0": 4365.125,
"step": 5250,
"tokens_trained": 17.199150864
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4918800085100348,
"grad_norm": 0.19638904929161072,
"loss": 1.0493,
"loss_ce": 0.9696671962738037,
"loss_region": 0.030005378648638725,
"loss_total": 0.9996725916862488,
"lr": 0.0010311049762768854,
"router/selected_tokens_s0": 4325.25,
"step": 5260,
"tokens_trained": 17.231916304
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4947166867598043,
"grad_norm": 0.786198079586029,
"loss": 1.0451,
"loss_ce": 1.0354949235916138,
"loss_region": 0.03000979870557785,
"loss_total": 1.0655046701431274,
"lr": 0.0010306980842930346,
"router/selected_tokens_s0": 4347.125,
"step": 5270,
"tokens_trained": 17.264676432
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.4975533650095738,
"grad_norm": 0.7419178485870361,
"loss": 1.041,
"loss_ce": 0.928809404373169,
"loss_region": 0.030001329258084297,
"loss_total": 0.9588107466697693,
"lr": 0.0010302911923091836,
"router/selected_tokens_s0": 4279.625,
"step": 5280,
"tokens_trained": 17.297440272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5003900432593433,
"grad_norm": 0.8321080803871155,
"loss": 1.0508,
"loss_ce": 1.090352177619934,
"loss_region": 0.030001569539308548,
"loss_total": 1.1203536987304688,
"lr": 0.0010298843003253325,
"router/selected_tokens_s0": 4361.625,
"step": 5290,
"tokens_trained": 17.330205712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5032267215091129,
"grad_norm": 0.43943366408348083,
"loss": 1.0423,
"loss_ce": 1.054518461227417,
"loss_region": 0.03000991977751255,
"loss_total": 1.0845283269882202,
"lr": 0.0010294774083414815,
"router/selected_tokens_s0": 4375.625,
"step": 5300,
"tokens_trained": 17.362971152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5060633997588824,
"grad_norm": 0.9542965888977051,
"loss": 1.0469,
"loss_ce": 1.113122820854187,
"loss_region": 0.030009111389517784,
"loss_total": 1.143131971359253,
"lr": 0.0010290705163576305,
"router/selected_tokens_s0": 4369.0,
"step": 5310,
"tokens_trained": 17.395736592
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.508900078008652,
"grad_norm": 0.6456644535064697,
"loss": 1.0438,
"loss_ce": 1.0606231689453125,
"loss_region": 0.030006930232048035,
"loss_total": 1.090630054473877,
"lr": 0.0010286636243737794,
"router/selected_tokens_s0": 4360.125,
"step": 5320,
"tokens_trained": 17.428502032
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5117367562584214,
"grad_norm": 1.505149006843567,
"loss": 1.0426,
"loss_ce": 1.065946340560913,
"loss_region": 0.030012134462594986,
"loss_total": 1.0959584712982178,
"lr": 0.0010282567323899284,
"router/selected_tokens_s0": 4342.875,
"step": 5330,
"tokens_trained": 17.461262816
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.514573434508191,
"grad_norm": 0.5819237232208252,
"loss": 1.0424,
"loss_ce": 0.9712111353874207,
"loss_region": 0.030006036162376404,
"loss_total": 1.0012171268463135,
"lr": 0.0010278498404060774,
"router/selected_tokens_s0": 4354.625,
"step": 5340,
"tokens_trained": 17.494028256
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5174101127579604,
"grad_norm": 0.5115887522697449,
"loss": 1.0468,
"loss_ce": 1.0570096969604492,
"loss_region": 0.0300076175481081,
"loss_total": 1.087017297744751,
"lr": 0.0010274429484222263,
"router/selected_tokens_s0": 4346.5,
"step": 5350,
"tokens_trained": 17.526793696
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.52024679100773,
"grad_norm": 0.5018046498298645,
"loss": 1.0437,
"loss_ce": 0.9743192195892334,
"loss_region": 0.030005570501089096,
"loss_total": 1.0043247938156128,
"lr": 0.0010270360564383753,
"router/selected_tokens_s0": 4360.75,
"step": 5360,
"tokens_trained": 17.559559136
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5230834692574995,
"grad_norm": 0.7800183892250061,
"loss": 1.0429,
"loss_ce": 1.0386371612548828,
"loss_region": 0.030008897185325623,
"loss_total": 1.0686460733413696,
"lr": 0.0010266291644545243,
"router/selected_tokens_s0": 4364.125,
"step": 5370,
"tokens_trained": 17.59232456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.525920147507269,
"grad_norm": 0.6966549754142761,
"loss": 1.0489,
"loss_ce": 0.8995506167411804,
"loss_region": 0.03000815026462078,
"loss_total": 0.9295587539672852,
"lr": 0.0010262222724706732,
"router/selected_tokens_s0": 4385.75,
"step": 5380,
"tokens_trained": 17.6250892
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5287568257570385,
"grad_norm": 0.5771371722221375,
"loss": 1.0467,
"loss_ce": 0.9319908022880554,
"loss_region": 0.0300018098205328,
"loss_total": 0.961992621421814,
"lr": 0.0010258153804868222,
"router/selected_tokens_s0": 4344.5,
"step": 5390,
"tokens_trained": 17.65785464
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.531593504006808,
"grad_norm": 0.553460955619812,
"loss": 1.0509,
"loss_ce": 1.0958824157714844,
"loss_region": 0.030008256435394287,
"loss_total": 1.1258907318115234,
"lr": 0.0010254084885029712,
"router/selected_tokens_s0": 4395.875,
"step": 5400,
"tokens_trained": 17.69062008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5344301822565776,
"grad_norm": 0.7295851111412048,
"loss": 1.0446,
"loss_ce": 1.0735763311386108,
"loss_region": 0.030002327635884285,
"loss_total": 1.1035786867141724,
"lr": 0.0010250015965191201,
"router/selected_tokens_s0": 4300.625,
"step": 5410,
"tokens_trained": 17.72338552
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.537266860506347,
"grad_norm": 0.4493541419506073,
"loss": 1.041,
"loss_ce": 1.1575278043746948,
"loss_region": 0.030008507892489433,
"loss_total": 1.187536358833313,
"lr": 0.001024594704535269,
"router/selected_tokens_s0": 4385.875,
"step": 5420,
"tokens_trained": 17.75615016
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5401035387561166,
"grad_norm": 1.0313796997070312,
"loss": 1.0498,
"loss_ce": 1.0423924922943115,
"loss_region": 0.030005216598510742,
"loss_total": 1.0723977088928223,
"lr": 0.001024187812551418,
"router/selected_tokens_s0": 4385.5,
"step": 5430,
"tokens_trained": 17.78891464
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5429402170058861,
"grad_norm": 0.6560305953025818,
"loss": 1.0418,
"loss_ce": 0.9702669978141785,
"loss_region": 0.030001483857631683,
"loss_total": 1.0002684593200684,
"lr": 0.001023780920567567,
"router/selected_tokens_s0": 4346.375,
"step": 5440,
"tokens_trained": 17.821677712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5457768952556556,
"grad_norm": 0.2240542620420456,
"loss": 1.038,
"loss_ce": 1.117678165435791,
"loss_region": 0.03000263124704361,
"loss_total": 1.1476807594299316,
"lr": 0.0010233740285837162,
"router/selected_tokens_s0": 4357.25,
"step": 5450,
"tokens_trained": 17.854443152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5486135735054252,
"grad_norm": 0.3824736475944519,
"loss": 1.0438,
"loss_ce": 1.013928771018982,
"loss_region": 0.030003132298588753,
"loss_total": 1.0439319610595703,
"lr": 0.0010229671365998652,
"router/selected_tokens_s0": 4288.0,
"step": 5460,
"tokens_trained": 17.88720588
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5514502517551947,
"grad_norm": 1.2875090837478638,
"loss": 1.0417,
"loss_ce": 1.0048933029174805,
"loss_region": 0.03000313974916935,
"loss_total": 1.0348964929580688,
"lr": 0.0010225602446160141,
"router/selected_tokens_s0": 4326.0,
"step": 5470,
"tokens_trained": 17.91997132
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5542869300049642,
"grad_norm": 0.32429569959640503,
"loss": 1.0345,
"loss_ce": 0.8651331067085266,
"loss_region": 0.029996881261467934,
"loss_total": 0.8951299786567688,
"lr": 0.0010221533526321629,
"router/selected_tokens_s0": 4298.625,
"step": 5480,
"tokens_trained": 17.952736472
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5571236082547337,
"grad_norm": 0.7797481417655945,
"loss": 1.0423,
"loss_ce": 1.0778536796569824,
"loss_region": 0.03001037798821926,
"loss_total": 1.1078640222549438,
"lr": 0.0010217464606483118,
"router/selected_tokens_s0": 4417.75,
"step": 5490,
"tokens_trained": 17.985498936
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5599602865045032,
"grad_norm": 0.6121678352355957,
"loss": 1.0487,
"loss_ce": 1.0614382028579712,
"loss_region": 0.030010351911187172,
"loss_total": 1.0914485454559326,
"lr": 0.0010213395686644608,
"router/selected_tokens_s0": 4413.5,
"step": 5500,
"tokens_trained": 18.018264376
},
{
"epoch": 1.5599602865045032,
"eval_ppl": 2.76765789476978,
"eval_runtime": 2.4835,
"step": 5500,
"tokens_trained": 18.018264376
},
{
"epoch": 1.5599602865045032,
"eval_F": 0.3403231655326333,
"eval_F_cds": 0.34277235568564984,
"eval_F_dig": 0.3267703205979634,
"eval_F_exon": 0.34366801139729736,
"eval_F_intron": 0.3408284238068733,
"eval_F_nig": 0.34099593292063235,
"eval_F_promoter": 0.33725428255936774,
"eval_F_utr": 0.34147572250874536,
"eval_G": 0.3415901383306685,
"eval_G_cds": 0.3420066364335616,
"eval_G_dig": 0.39036181619006693,
"eval_G_exon": 0.3413997151557254,
"eval_G_intron": 0.34085139204059245,
"eval_G_nig": 0.3408783641023706,
"eval_G_promoter": 0.34279264794696634,
"eval_G_utr": 0.34078024569894544,
"eval_avg_bp_per_token": 2.9383835756079635,
"eval_bp_per_token/cds": 2.9173881248378195,
"eval_bp_per_token/dig": 3.0602534470391327,
"eval_bp_per_token/exon": 2.9097849285831554,
"eval_bp_per_token/intron": 2.934027593211061,
"eval_bp_per_token/nig": 2.9325862963672136,
"eval_bp_per_token/promoter": 2.9651217248040944,
"eval_bp_per_token/utr": 2.9284658735128364,
"eval_ppl_cds": 3.18907175811622,
"eval_ppl_dig": 1.081375085654356,
"eval_ppl_exon": 3.2359904440638463,
"eval_ppl_intron": 2.8314949801099587,
"eval_ppl_nig": 2.600385066779234,
"eval_ppl_promoter": 3.03807385354442,
"eval_ppl_utr": 3.1444852350411727,
"step": 5500,
"tokens_trained": 18.018264376
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5627969647542728,
"grad_norm": 0.4803471267223358,
"loss": 1.0374,
"loss_ce": 0.9833582043647766,
"loss_region": 0.030004315078258514,
"loss_total": 1.0133625268936157,
"lr": 0.0010209326766806098,
"router/selected_tokens_s0": 4338.875,
"step": 5510,
"tokens_trained": 18.051029816
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5656336430040423,
"grad_norm": 1.0378037691116333,
"loss": 1.0417,
"loss_ce": 1.0130213499069214,
"loss_region": 0.030004722997546196,
"loss_total": 1.043026089668274,
"lr": 0.001020525784696759,
"router/selected_tokens_s0": 4365.25,
"step": 5520,
"tokens_trained": 18.083794456
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5684703212538118,
"grad_norm": 0.2039332240819931,
"loss": 1.0337,
"loss_ce": 0.972812831401825,
"loss_region": 0.030001500621438026,
"loss_total": 1.0028142929077148,
"lr": 0.001020118892712908,
"router/selected_tokens_s0": 4273.375,
"step": 5530,
"tokens_trained": 18.116559896
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5713069995035813,
"grad_norm": 0.9828659296035767,
"loss": 1.0435,
"loss_ce": 1.0882686376571655,
"loss_region": 0.030004315078258514,
"loss_total": 1.1182729005813599,
"lr": 0.0010197120007290569,
"router/selected_tokens_s0": 4338.375,
"step": 5540,
"tokens_trained": 18.149325336
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5741436777533508,
"grad_norm": 0.4906889498233795,
"loss": 1.0405,
"loss_ce": 0.9475066661834717,
"loss_region": 0.030002078041434288,
"loss_total": 0.9775087237358093,
"lr": 0.0010193051087452058,
"router/selected_tokens_s0": 4325.25,
"step": 5550,
"tokens_trained": 18.182090776
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5769803560031204,
"grad_norm": 0.6963337659835815,
"loss": 1.0379,
"loss_ce": 0.7684432864189148,
"loss_region": 0.029997603967785835,
"loss_total": 0.7984408736228943,
"lr": 0.0010188982167613548,
"router/selected_tokens_s0": 4256.625,
"step": 5560,
"tokens_trained": 18.214852704
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5798170342528899,
"grad_norm": 0.6726390719413757,
"loss": 1.0506,
"loss_ce": 1.0656613111495972,
"loss_region": 0.030005795881152153,
"loss_total": 1.0956671237945557,
"lr": 0.0010184913247775038,
"router/selected_tokens_s0": 4383.75,
"step": 5570,
"tokens_trained": 18.247618144
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5826537125026594,
"grad_norm": 0.5460783839225769,
"loss": 1.0391,
"loss_ce": 0.9652643203735352,
"loss_region": 0.030002180486917496,
"loss_total": 0.9952664971351624,
"lr": 0.0010180844327936527,
"router/selected_tokens_s0": 4303.875,
"step": 5580,
"tokens_trained": 18.280383584
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.585490390752429,
"grad_norm": 0.35639381408691406,
"loss": 1.0356,
"loss_ce": 0.88677579164505,
"loss_region": 0.030002212151885033,
"loss_total": 0.916778028011322,
"lr": 0.0010176775408098017,
"router/selected_tokens_s0": 4362.625,
"step": 5590,
"tokens_trained": 18.313149024
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5883270690021984,
"grad_norm": 0.5016544461250305,
"loss": 1.0311,
"loss_ce": 1.0120795965194702,
"loss_region": 0.030004823580384254,
"loss_total": 1.0420844554901123,
"lr": 0.0010172706488259507,
"router/selected_tokens_s0": 4349.75,
"step": 5600,
"tokens_trained": 18.345914464
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.591163747251968,
"grad_norm": 0.687404990196228,
"loss": 1.0409,
"loss_ce": 1.033144235610962,
"loss_region": 0.03000274859368801,
"loss_total": 1.063146948814392,
"lr": 0.0010168637568420996,
"router/selected_tokens_s0": 4313.5,
"step": 5610,
"tokens_trained": 18.378679104
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5940004255017375,
"grad_norm": 0.3557313084602356,
"loss": 1.0342,
"loss_ce": 0.9033691883087158,
"loss_region": 0.030007191002368927,
"loss_total": 0.9333763718605042,
"lr": 0.0010164568648582486,
"router/selected_tokens_s0": 4394.375,
"step": 5620,
"tokens_trained": 18.411444544
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.596837103751507,
"grad_norm": 0.6189426183700562,
"loss": 1.04,
"loss_ce": 0.8717077970504761,
"loss_region": 0.03000614605844021,
"loss_total": 0.9017139673233032,
"lr": 0.0010160499728743976,
"router/selected_tokens_s0": 4337.5,
"step": 5630,
"tokens_trained": 18.444209984
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.5996737820012765,
"grad_norm": 0.6988716721534729,
"loss": 1.0305,
"loss_ce": 0.975788950920105,
"loss_region": 0.029997356235980988,
"loss_total": 1.0057862997055054,
"lr": 0.0010156430808905465,
"router/selected_tokens_s0": 4335.875,
"step": 5640,
"tokens_trained": 18.476975424
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.602510460251046,
"grad_norm": 0.6643272042274475,
"loss": 1.0403,
"loss_ce": 1.1351817846298218,
"loss_region": 0.030008381232619286,
"loss_total": 1.1651902198791504,
"lr": 0.0010152361889066955,
"router/selected_tokens_s0": 4362.25,
"step": 5650,
"tokens_trained": 18.509739264
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6053471385008156,
"grad_norm": 0.8286615014076233,
"loss": 1.0373,
"loss_ce": 0.8297767043113708,
"loss_region": 0.03000292181968689,
"loss_total": 0.8597795963287354,
"lr": 0.0010148292969228445,
"router/selected_tokens_s0": 4327.25,
"step": 5660,
"tokens_trained": 18.542503904
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.608183816750585,
"grad_norm": 0.22749805450439453,
"loss": 1.0352,
"loss_ce": 0.9598128795623779,
"loss_region": 0.03000240959227085,
"loss_total": 0.9898152947425842,
"lr": 0.0010144224049389934,
"router/selected_tokens_s0": 4316.75,
"step": 5670,
"tokens_trained": 18.575269344
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6110204950003546,
"grad_norm": 0.25335147976875305,
"loss": 1.0285,
"loss_ce": 0.975443422794342,
"loss_region": 0.030008111149072647,
"loss_total": 1.0054515600204468,
"lr": 0.0010140155129551424,
"router/selected_tokens_s0": 4387.0,
"step": 5680,
"tokens_trained": 18.608031832
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6138571732501241,
"grad_norm": 0.8008378148078918,
"loss": 1.0393,
"loss_ce": 1.088114619255066,
"loss_region": 0.030002346262335777,
"loss_total": 1.1181169748306274,
"lr": 0.0010136086209712914,
"router/selected_tokens_s0": 4368.875,
"step": 5690,
"tokens_trained": 18.640797272
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6166938514998936,
"grad_norm": 0.6417054533958435,
"loss": 1.0381,
"loss_ce": 0.9379876255989075,
"loss_region": 0.03000989928841591,
"loss_total": 0.9679975509643555,
"lr": 0.0010132017289874405,
"router/selected_tokens_s0": 4368.875,
"step": 5700,
"tokens_trained": 18.673562712
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6195305297496632,
"grad_norm": 1.3401010036468506,
"loss": 1.0339,
"loss_ce": 1.040677785873413,
"loss_region": 0.030000442638993263,
"loss_total": 1.0706782341003418,
"lr": 0.0010127948370035895,
"router/selected_tokens_s0": 4332.25,
"step": 5710,
"tokens_trained": 18.706328152
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6223672079994327,
"grad_norm": 0.9140957593917847,
"loss": 1.0432,
"loss_ce": 1.1209052801132202,
"loss_region": 0.03001645766198635,
"loss_total": 1.1509217023849487,
"lr": 0.0010123879450197385,
"router/selected_tokens_s0": 4408.625,
"step": 5720,
"tokens_trained": 18.739093568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6252038862492022,
"grad_norm": 0.38389793038368225,
"loss": 1.0393,
"loss_ce": 0.9129772782325745,
"loss_region": 0.02999955601990223,
"loss_total": 0.9429768323898315,
"lr": 0.0010119810530358872,
"router/selected_tokens_s0": 4306.5,
"step": 5730,
"tokens_trained": 18.771858208
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6280405644989717,
"grad_norm": 1.761724829673767,
"loss": 1.0455,
"loss_ce": 1.0788276195526123,
"loss_region": 0.030005717650055885,
"loss_total": 1.1088333129882812,
"lr": 0.0010115741610520362,
"router/selected_tokens_s0": 4344.625,
"step": 5740,
"tokens_trained": 18.804619848
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6308772427487412,
"grad_norm": 0.514764666557312,
"loss": 1.0331,
"loss_ce": 1.0706536769866943,
"loss_region": 0.030006183311343193,
"loss_total": 1.1006598472595215,
"lr": 0.0010111672690681851,
"router/selected_tokens_s0": 4372.25,
"step": 5750,
"tokens_trained": 18.837385288
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6337139209985108,
"grad_norm": 0.40612781047821045,
"loss": 1.036,
"loss_ce": 1.0906848907470703,
"loss_region": 0.030003517866134644,
"loss_total": 1.1206884384155273,
"lr": 0.0010107603770843341,
"router/selected_tokens_s0": 4322.25,
"step": 5760,
"tokens_trained": 18.870150648
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6365505992482803,
"grad_norm": 0.26753610372543335,
"loss": 1.0316,
"loss_ce": 1.00560462474823,
"loss_region": 0.030005289241671562,
"loss_total": 1.0356099605560303,
"lr": 0.0010103534851004833,
"router/selected_tokens_s0": 4323.5,
"step": 5770,
"tokens_trained": 18.902916088
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6393872774980498,
"grad_norm": 0.41570785641670227,
"loss": 1.0386,
"loss_ce": 0.9916934370994568,
"loss_region": 0.03000512719154358,
"loss_total": 1.0216985940933228,
"lr": 0.0010099465931166323,
"router/selected_tokens_s0": 4347.375,
"step": 5780,
"tokens_trained": 18.935681528
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6422239557478193,
"grad_norm": 0.5421174168586731,
"loss": 1.0318,
"loss_ce": 0.9400946497917175,
"loss_region": 0.030001504346728325,
"loss_total": 0.9700961709022522,
"lr": 0.0010095397011327812,
"router/selected_tokens_s0": 4331.0,
"step": 5790,
"tokens_trained": 18.968446968
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6450606339975888,
"grad_norm": 0.4832181930541992,
"loss": 1.0337,
"loss_ce": 0.9629077911376953,
"loss_region": 0.030001387000083923,
"loss_total": 0.9929091930389404,
"lr": 0.0010091328091489302,
"router/selected_tokens_s0": 4295.375,
"step": 5800,
"tokens_trained": 19.001210808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6478973122473584,
"grad_norm": 1.0204321146011353,
"loss": 1.0304,
"loss_ce": 1.013646125793457,
"loss_region": 0.03000660054385662,
"loss_total": 1.0436527729034424,
"lr": 0.0010087259171650792,
"router/selected_tokens_s0": 4419.5,
"step": 5810,
"tokens_trained": 19.033976248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6507339904971279,
"grad_norm": 0.5032446384429932,
"loss": 1.0334,
"loss_ce": 1.0649542808532715,
"loss_region": 0.030000925064086914,
"loss_total": 1.0949552059173584,
"lr": 0.0010083190251812281,
"router/selected_tokens_s0": 4287.125,
"step": 5820,
"tokens_trained": 19.066741688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6535706687468974,
"grad_norm": 0.5891702175140381,
"loss": 1.0297,
"loss_ce": 1.0153957605361938,
"loss_region": 0.030004706233739853,
"loss_total": 1.0454005002975464,
"lr": 0.001007912133197377,
"router/selected_tokens_s0": 4354.75,
"step": 5830,
"tokens_trained": 19.099507128
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.656407346996667,
"grad_norm": 0.6594350934028625,
"loss": 1.0346,
"loss_ce": 1.0509767532348633,
"loss_region": 0.03000517748296261,
"loss_total": 1.080981969833374,
"lr": 0.001007505241213526,
"router/selected_tokens_s0": 4363.25,
"step": 5840,
"tokens_trained": 19.132272568
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6592440252464364,
"grad_norm": 0.5906273126602173,
"loss": 1.0337,
"loss_ce": 1.0429621934890747,
"loss_region": 0.030009938403964043,
"loss_total": 1.0729721784591675,
"lr": 0.001007098349229675,
"router/selected_tokens_s0": 4366.125,
"step": 5850,
"tokens_trained": 19.165038008
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.662080703496206,
"grad_norm": 0.47190093994140625,
"loss": 1.0335,
"loss_ce": 0.950088381767273,
"loss_region": 0.030002925544977188,
"loss_total": 0.9800913333892822,
"lr": 0.001006691457245824,
"router/selected_tokens_s0": 4327.5,
"step": 5860,
"tokens_trained": 19.197803448
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6649173817459755,
"grad_norm": 0.5748708844184875,
"loss": 1.0324,
"loss_ce": 1.0693488121032715,
"loss_region": 0.030013523995876312,
"loss_total": 1.0993623733520508,
"lr": 0.001006284565261973,
"router/selected_tokens_s0": 4386.25,
"step": 5870,
"tokens_trained": 19.230568888
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.667754059995745,
"grad_norm": 0.5576515793800354,
"loss": 1.0347,
"loss_ce": 1.090317964553833,
"loss_region": 0.030007001012563705,
"loss_total": 1.120324969291687,
"lr": 0.001005877673278122,
"router/selected_tokens_s0": 4399.0,
"step": 5880,
"tokens_trained": 19.263333528
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6705907382455145,
"grad_norm": 0.4692791998386383,
"loss": 1.0231,
"loss_ce": 0.9621900320053101,
"loss_region": 0.02999553643167019,
"loss_total": 0.9921855926513672,
"lr": 0.0010054707812942709,
"router/selected_tokens_s0": 4265.375,
"step": 5890,
"tokens_trained": 19.296098168
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.673427416495284,
"grad_norm": 0.5106430649757385,
"loss": 1.0364,
"loss_ce": 0.8931739330291748,
"loss_region": 0.030010921880602837,
"loss_total": 0.923184871673584,
"lr": 0.0010050638893104198,
"router/selected_tokens_s0": 4364.125,
"step": 5900,
"tokens_trained": 19.328862688
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6762640947450536,
"grad_norm": 0.5919066667556763,
"loss": 1.0405,
"loss_ce": 1.1164112091064453,
"loss_region": 0.03000679798424244,
"loss_total": 1.1464179754257202,
"lr": 0.0010046569973265688,
"router/selected_tokens_s0": 4368.0,
"step": 5910,
"tokens_trained": 19.361627328
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.679100772994823,
"grad_norm": 0.5985324382781982,
"loss": 1.029,
"loss_ce": 0.8035845160484314,
"loss_region": 0.030008496716618538,
"loss_total": 0.8335930109024048,
"lr": 0.0010042501053427178,
"router/selected_tokens_s0": 4344.0,
"step": 5920,
"tokens_trained": 19.394392768
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6819374512445926,
"grad_norm": 0.46029677987098694,
"loss": 1.0369,
"loss_ce": 1.1152499914169312,
"loss_region": 0.030002374202013016,
"loss_total": 1.1452523469924927,
"lr": 0.0010038432133588667,
"router/selected_tokens_s0": 4378.875,
"step": 5930,
"tokens_trained": 19.427158208
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6847741294943621,
"grad_norm": 0.5811964273452759,
"loss": 1.0304,
"loss_ce": 1.0038641691207886,
"loss_region": 0.03000630810856819,
"loss_total": 1.0338704586029053,
"lr": 0.0010034363213750157,
"router/selected_tokens_s0": 4316.0,
"step": 5940,
"tokens_trained": 19.459922848
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6876108077441316,
"grad_norm": 0.2964920401573181,
"loss": 1.0347,
"loss_ce": 1.0238761901855469,
"loss_region": 0.03000667691230774,
"loss_total": 1.0538828372955322,
"lr": 0.0010030294293911649,
"router/selected_tokens_s0": 4331.875,
"step": 5950,
"tokens_trained": 19.492688288
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6904474859939012,
"grad_norm": 0.20688390731811523,
"loss": 1.031,
"loss_ce": 1.0256707668304443,
"loss_region": 0.030003707855939865,
"loss_total": 1.055674433708191,
"lr": 0.0010026225374073139,
"router/selected_tokens_s0": 4327.875,
"step": 5960,
"tokens_trained": 19.525453728
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6932841642436707,
"grad_norm": 0.4191875755786896,
"loss": 1.0357,
"loss_ce": 0.9435751438140869,
"loss_region": 0.03000720962882042,
"loss_total": 0.9735823273658752,
"lr": 0.0010022156454234628,
"router/selected_tokens_s0": 4370.375,
"step": 5970,
"tokens_trained": 19.558219168
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6961208424934402,
"grad_norm": 0.6510814428329468,
"loss": 1.0246,
"loss_ce": 0.9768120050430298,
"loss_region": 0.030007587745785713,
"loss_total": 1.0068196058273315,
"lr": 0.0010018087534396116,
"router/selected_tokens_s0": 4360.875,
"step": 5980,
"tokens_trained": 19.590983808
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.6989575207432097,
"grad_norm": 0.477987676858902,
"loss": 1.0252,
"loss_ce": 1.0579345226287842,
"loss_region": 0.030001208186149597,
"loss_total": 1.0879356861114502,
"lr": 0.0010014018614557605,
"router/selected_tokens_s0": 4310.375,
"step": 5990,
"tokens_trained": 19.623749248
},
{
"comp/rl_weight": 0.03,
"comp/strictness": 0.0,
"epoch": 1.7017941989929792,
"grad_norm": 0.5178934335708618,
"loss": 1.0325,
"loss_ce": 1.01963472366333,
"loss_region": 0.03001089207828045,
"loss_total": 1.0496456623077393,
"lr": 0.0010009949694719095,
"router/selected_tokens_s0": 4379.5,
"step": 6000,
"tokens_trained": 19.65651468
},
{
"epoch": 1.7017941989929792,
"eval_ppl": 2.7438295521464737,
"eval_runtime": 2.524,
"step": 6000,
"tokens_trained": 19.65651468
},
{
"epoch": 1.7017941989929792,
"eval_F": 0.34151615105799704,
"eval_F_cds": 0.3443491198421619,
"eval_F_dig": 0.3249212178034742,
"eval_F_exon": 0.3452196236634976,
"eval_F_intron": 0.3418051895789037,
"eval_F_nig": 0.3420160147164305,
"eval_F_promoter": 0.33950926318966235,
"eval_F_utr": 0.34296393229500766,
"eval_G": 0.3440260132521393,
"eval_G_cds": 0.34685659178384587,
"eval_G_dig": 0.3855714028765268,
"eval_G_exon": 0.3442092298449093,
"eval_G_intron": 0.3428874806376615,
"eval_G_nig": 0.34218012092239336,
"eval_G_promoter": 0.34825656510586706,
"eval_G_utr": 0.3435660953042277,
"eval_avg_bp_per_token": 2.9281192028607097,
"eval_bp_per_token/cds": 2.904029493260696,
"eval_bp_per_token/dig": 3.077669124719461,
"eval_bp_per_token/exon": 2.896706709160742,
"eval_bp_per_token/intron": 2.9256431162791223,
"eval_bp_per_token/nig": 2.9238396945508875,
"eval_bp_per_token/promoter": 2.9454277347400777,
"eval_bp_per_token/utr": 2.9157584977181474,
"eval_ppl_cds": 3.1473755860930703,
"eval_ppl_dig": 1.0781771784423138,
"eval_ppl_exon": 3.215945780217024,
"eval_ppl_intron": 2.8130272235017966,
"eval_ppl_nig": 2.58073245677091,
"eval_ppl_promoter": 2.9110875223569272,
"eval_ppl_utr": 3.090760374244644,
"step": 6000,
"tokens_trained": 19.65651468
}
],
"logging_steps": 10,
"max_steps": 30600,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 3000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}