| { |
| "best_global_step": 6000, |
| "best_metric": 2.7438295521464737, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/HNet_Ori-BPT3/checkpoint-6000", |
| "epoch": 1.7017941989929792, |
| "eval_steps": 500, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0028366782497695198, |
| "grad_norm": 590.2120361328125, |
| "loss": 144.5784, |
| "loss_ce": 170.91241455078125, |
| "loss_region": 0.030412333086133003, |
| "loss_total": 170.9428253173828, |
| "lr": 2.20454076850486e-05, |
| "router/selected_tokens_s0": 1.0, |
| "step": 10, |
| "tokens_trained": 0.03276544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.0056733564995390395, |
| "grad_norm": 565.2921142578125, |
| "loss": 52.047, |
| "loss_ce": 28.61202049255371, |
| "loss_region": 0.03181665763258934, |
| "loss_total": 28.643836975097656, |
| "lr": 4.654030511288038e-05, |
| "router/selected_tokens_s0": 1.0, |
| "step": 20, |
| "tokens_trained": 0.06553088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.00851003474930856, |
| "grad_norm": 361.24432373046875, |
| "loss": 18.4265, |
| "loss_ce": 16.737817764282227, |
| "loss_region": 0.03595759719610214, |
| "loss_total": 16.773775100708008, |
| "lr": 7.103520254071216e-05, |
| "router/selected_tokens_s0": 1.0, |
| "step": 30, |
| "tokens_trained": 0.09829632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.011346712999078079, |
| "grad_norm": 649.0695190429688, |
| "loss": 8.0445, |
| "loss_ce": 11.410881996154785, |
| "loss_region": 0.03821098059415817, |
| "loss_total": 11.449092864990234, |
| "lr": 9.553009996854394e-05, |
| "router/selected_tokens_s0": 1.0, |
| "step": 40, |
| "tokens_trained": 0.13106176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.014183391248847599, |
| "grad_norm": 534.2383422851562, |
| "loss": 9.3219, |
| "loss_ce": 9.884474754333496, |
| "loss_region": 0.040100596845149994, |
| "loss_total": 9.924575805664062, |
| "lr": 0.00012002499739637572, |
| "router/selected_tokens_s0": 1.0, |
| "step": 50, |
| "tokens_trained": 0.1638272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.01702006949861712, |
| "grad_norm": 273.8401184082031, |
| "loss": 14.1755, |
| "loss_ce": 12.677406311035156, |
| "loss_region": 0.041250791400671005, |
| "loss_total": 12.718657493591309, |
| "lr": 0.00014451989482420748, |
| "router/selected_tokens_s0": 1.0, |
| "step": 60, |
| "tokens_trained": 0.19659264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.01985674774838664, |
| "grad_norm": 544.6290893554688, |
| "loss": 14.1136, |
| "loss_ce": 14.262775421142578, |
| "loss_region": 0.042144227772951126, |
| "loss_total": 14.304919242858887, |
| "lr": 0.00016901479225203927, |
| "router/selected_tokens_s0": 1.0, |
| "step": 70, |
| "tokens_trained": 0.22935808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.022693425998156158, |
| "grad_norm": 527.1918334960938, |
| "loss": 15.2492, |
| "loss_ce": 11.932450294494629, |
| "loss_region": 0.04246167093515396, |
| "loss_total": 11.9749116897583, |
| "lr": 0.00019350968967987104, |
| "router/selected_tokens_s0": 1.0, |
| "step": 80, |
| "tokens_trained": 0.26212192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.025530104247925678, |
| "grad_norm": 343.09454345703125, |
| "loss": 12.0101, |
| "loss_ce": 6.092933177947998, |
| "loss_region": 0.04214272275567055, |
| "loss_total": 6.13507604598999, |
| "lr": 0.0002180045871077028, |
| "router/selected_tokens_s0": 1.0, |
| "step": 90, |
| "tokens_trained": 0.29488736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.028366782497695198, |
| "grad_norm": 309.79541015625, |
| "loss": 9.8843, |
| "loss_ce": 5.214886665344238, |
| "loss_region": 0.041769951581954956, |
| "loss_total": 5.256656646728516, |
| "lr": 0.00024249948453553463, |
| "router/selected_tokens_s0": 1.0, |
| "step": 100, |
| "tokens_trained": 0.3276528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.031203460747464717, |
| "grad_norm": 251.26068115234375, |
| "loss": 8.5835, |
| "loss_ce": 12.269608497619629, |
| "loss_region": 0.04041137546300888, |
| "loss_total": 12.310019493103027, |
| "lr": 0.00026699438196336637, |
| "router/selected_tokens_s0": 1.0, |
| "step": 110, |
| "tokens_trained": 0.36041744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03404013899723424, |
| "grad_norm": 148.94601440429688, |
| "loss": 6.4366, |
| "loss_ce": 3.2050940990448, |
| "loss_region": 0.03642381727695465, |
| "loss_total": 3.241518020629883, |
| "lr": 0.00029148927939119814, |
| "router/selected_tokens_s0": 1.0, |
| "step": 120, |
| "tokens_trained": 0.39318128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03687681724700376, |
| "grad_norm": 187.2681427001953, |
| "loss": 10.4928, |
| "loss_ce": 6.001107215881348, |
| "loss_region": 0.030254848301410675, |
| "loss_total": 6.031362056732178, |
| "lr": 0.00031598417681902996, |
| "router/selected_tokens_s0": 4752.0, |
| "step": 130, |
| "tokens_trained": 0.42594672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.03971349549677328, |
| "grad_norm": 218.34559631347656, |
| "loss": 8.5742, |
| "loss_ce": 3.848691701889038, |
| "loss_region": 0.03400004655122757, |
| "loss_total": 3.8826918601989746, |
| "lr": 0.00034047907424686173, |
| "router/selected_tokens_s0": 7042.125, |
| "step": 140, |
| "tokens_trained": 0.458709112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.042550173746542796, |
| "grad_norm": 215.60699462890625, |
| "loss": 4.5762, |
| "loss_ce": 5.0876851081848145, |
| "loss_region": 0.03198177367448807, |
| "loss_total": 5.119667053222656, |
| "lr": 0.0003649739716746935, |
| "router/selected_tokens_s0": 424.5, |
| "step": 150, |
| "tokens_trained": 0.491469992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.045386851996312316, |
| "grad_norm": 147.6339111328125, |
| "loss": 5.8047, |
| "loss_ce": 8.435795783996582, |
| "loss_region": 0.03364315256476402, |
| "loss_total": 8.469438552856445, |
| "lr": 0.00038946886910252526, |
| "router/selected_tokens_s0": 536.875, |
| "step": 160, |
| "tokens_trained": 0.524234632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.048223530246081836, |
| "grad_norm": 218.0553741455078, |
| "loss": 5.7968, |
| "loss_ce": 6.644444942474365, |
| "loss_region": 0.031727153807878494, |
| "loss_total": 6.676172256469727, |
| "lr": 0.0004139637665303571, |
| "router/selected_tokens_s0": 1833.5, |
| "step": 170, |
| "tokens_trained": 0.556999272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.051060208495851356, |
| "grad_norm": 100.56309509277344, |
| "loss": 6.7503, |
| "loss_ce": 8.332029342651367, |
| "loss_region": 0.03232778236269951, |
| "loss_total": 8.364356994628906, |
| "lr": 0.0004384586639581888, |
| "router/selected_tokens_s0": 1649.75, |
| "step": 180, |
| "tokens_trained": 0.589762952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.053896886745620876, |
| "grad_norm": 157.10765075683594, |
| "loss": 6.4449, |
| "loss_ce": 4.925128936767578, |
| "loss_region": 0.031663134694099426, |
| "loss_total": 4.956791877746582, |
| "lr": 0.0004629535613860206, |
| "router/selected_tokens_s0": 1687.375, |
| "step": 190, |
| "tokens_trained": 0.622527592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.056733564995390395, |
| "grad_norm": 83.81340026855469, |
| "loss": 3.7524, |
| "loss_ce": 5.0940961837768555, |
| "loss_region": 0.02894311398267746, |
| "loss_total": 5.123039245605469, |
| "lr": 0.00048744845881385244, |
| "router/selected_tokens_s0": 3074.125, |
| "step": 200, |
| "tokens_trained": 0.655293032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.059570243245159915, |
| "grad_norm": 169.4013671875, |
| "loss": 5.9481, |
| "loss_ce": 9.220865249633789, |
| "loss_region": 0.02949724718928337, |
| "loss_total": 9.250362396240234, |
| "lr": 0.0005119433562416841, |
| "router/selected_tokens_s0": 3610.375, |
| "step": 210, |
| "tokens_trained": 0.688057672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.062406921494929435, |
| "grad_norm": 80.7753677368164, |
| "loss": 5.1122, |
| "loss_ce": 3.287958860397339, |
| "loss_region": 0.029488109052181244, |
| "loss_total": 3.3174469470977783, |
| "lr": 0.0005364382536695159, |
| "router/selected_tokens_s0": 2584.75, |
| "step": 220, |
| "tokens_trained": 0.720823112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06524359974469895, |
| "grad_norm": 89.39635467529297, |
| "loss": 3.3047, |
| "loss_ce": 2.1086361408233643, |
| "loss_region": 0.029821382835507393, |
| "loss_total": 2.1384575366973877, |
| "lr": 0.0005609331510973477, |
| "router/selected_tokens_s0": 3991.5, |
| "step": 230, |
| "tokens_trained": 0.753588552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.06808027799446847, |
| "grad_norm": 90.892333984375, |
| "loss": 4.2563, |
| "loss_ce": 2.7003867626190186, |
| "loss_region": 0.030828693881630898, |
| "loss_total": 2.731215476989746, |
| "lr": 0.0005854280485251795, |
| "router/selected_tokens_s0": 4964.125, |
| "step": 240, |
| "tokens_trained": 0.786353992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.070916956244238, |
| "grad_norm": 86.70359802246094, |
| "loss": 2.8849, |
| "loss_ce": 3.55375599861145, |
| "loss_region": 0.029162542894482613, |
| "loss_total": 3.582918643951416, |
| "lr": 0.0006099229459530113, |
| "router/selected_tokens_s0": 2891.75, |
| "step": 250, |
| "tokens_trained": 0.819119432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07375363449400751, |
| "grad_norm": 106.11075592041016, |
| "loss": 4.4058, |
| "loss_ce": 5.333348751068115, |
| "loss_region": 0.029971925541758537, |
| "loss_total": 5.363320827484131, |
| "lr": 0.0006344178433808431, |
| "router/selected_tokens_s0": 4181.375, |
| "step": 260, |
| "tokens_trained": 0.851884072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07659031274377703, |
| "grad_norm": 75.7653579711914, |
| "loss": 3.6076, |
| "loss_ce": 2.3445212841033936, |
| "loss_region": 0.029431568458676338, |
| "loss_total": 2.373952865600586, |
| "lr": 0.0006589127408086749, |
| "router/selected_tokens_s0": 3440.0, |
| "step": 270, |
| "tokens_trained": 0.884649512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.07942699099354655, |
| "grad_norm": 95.4271469116211, |
| "loss": 2.8447, |
| "loss_ce": 3.030097007751465, |
| "loss_region": 0.030556708574295044, |
| "loss_total": 3.0606536865234375, |
| "lr": 0.0006834076382365066, |
| "router/selected_tokens_s0": 4730.5, |
| "step": 280, |
| "tokens_trained": 0.917414936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08226366924331607, |
| "grad_norm": 74.673828125, |
| "loss": 2.0288, |
| "loss_ce": 2.1509435176849365, |
| "loss_region": 0.028712084516882896, |
| "loss_total": 2.1796555519104004, |
| "lr": 0.0007079025356643384, |
| "router/selected_tokens_s0": 2658.625, |
| "step": 290, |
| "tokens_trained": 0.950180376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08510034749308559, |
| "grad_norm": 77.71709442138672, |
| "loss": 2.0227, |
| "loss_ce": 2.286048650741577, |
| "loss_region": 0.03060404770076275, |
| "loss_total": 2.316652774810791, |
| "lr": 0.0007323974330921702, |
| "router/selected_tokens_s0": 4752.0, |
| "step": 300, |
| "tokens_trained": 0.982945816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.08793702574285511, |
| "grad_norm": 55.31558609008789, |
| "loss": 2.1281, |
| "loss_ce": 2.0437748432159424, |
| "loss_region": 0.030610591173171997, |
| "loss_total": 2.074385404586792, |
| "lr": 0.000756892330520002, |
| "router/selected_tokens_s0": 4748.625, |
| "step": 310, |
| "tokens_trained": 1.015711256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09077370399262463, |
| "grad_norm": 77.07698059082031, |
| "loss": 2.5761, |
| "loss_ce": 2.7218589782714844, |
| "loss_region": 0.03093603625893593, |
| "loss_total": 2.7527949810028076, |
| "lr": 0.0007813872279478337, |
| "router/selected_tokens_s0": 4946.625, |
| "step": 320, |
| "tokens_trained": 1.048476696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09361038224239415, |
| "grad_norm": 47.57994842529297, |
| "loss": 2.239, |
| "loss_ce": 1.9163914918899536, |
| "loss_region": 0.029897142201662064, |
| "loss_total": 1.9462885856628418, |
| "lr": 0.0008058821253756655, |
| "router/selected_tokens_s0": 4135.875, |
| "step": 330, |
| "tokens_trained": 1.081242136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09644706049216367, |
| "grad_norm": 58.62579345703125, |
| "loss": 2.8423, |
| "loss_ce": 3.2828376293182373, |
| "loss_region": 0.03434763103723526, |
| "loss_total": 3.317185163497925, |
| "lr": 0.0008303770228034974, |
| "router/selected_tokens_s0": 6686.5, |
| "step": 340, |
| "tokens_trained": 1.114007576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.09928373874193319, |
| "grad_norm": 34.5246696472168, |
| "loss": 2.5891, |
| "loss_ce": 1.537825345993042, |
| "loss_region": 0.02885586954653263, |
| "loss_total": 1.5666812658309937, |
| "lr": 0.0008548719202313291, |
| "router/selected_tokens_s0": 154.125, |
| "step": 350, |
| "tokens_trained": 1.146773016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10212041699170271, |
| "grad_norm": 37.228973388671875, |
| "loss": 2.7756, |
| "loss_ce": 1.9871504306793213, |
| "loss_region": 0.029301652684807777, |
| "loss_total": 2.0164520740509033, |
| "lr": 0.0008793668176591608, |
| "router/selected_tokens_s0": 3631.75, |
| "step": 360, |
| "tokens_trained": 1.179538456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10495709524147223, |
| "grad_norm": 30.546344757080078, |
| "loss": 2.4884, |
| "loss_ce": 1.4886701107025146, |
| "loss_region": 0.031588103622198105, |
| "loss_total": 1.5202581882476807, |
| "lr": 0.0009038617150869926, |
| "router/selected_tokens_s0": 5236.625, |
| "step": 370, |
| "tokens_trained": 1.212303896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.10779377349124175, |
| "grad_norm": 45.68803405761719, |
| "loss": 2.8937, |
| "loss_ce": 2.285705804824829, |
| "loss_region": 0.030362222343683243, |
| "loss_total": 2.316067934036255, |
| "lr": 0.0009283566125148244, |
| "router/selected_tokens_s0": 4493.625, |
| "step": 380, |
| "tokens_trained": 1.245068536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11063045174101127, |
| "grad_norm": 32.428009033203125, |
| "loss": 1.9186, |
| "loss_ce": 1.5672893524169922, |
| "loss_region": 0.03746495023369789, |
| "loss_total": 1.6047543287277222, |
| "lr": 0.0009528515099426562, |
| "router/selected_tokens_s0": 8134.375, |
| "step": 390, |
| "tokens_trained": 1.277833176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11346712999078079, |
| "grad_norm": 35.54498291015625, |
| "loss": 1.6959, |
| "loss_ce": 1.6413251161575317, |
| "loss_region": 0.026098042726516724, |
| "loss_total": 1.667423129081726, |
| "lr": 0.000977346407370488, |
| "router/selected_tokens_s0": 625.5, |
| "step": 400, |
| "tokens_trained": 1.310598616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11630380824055031, |
| "grad_norm": 8.186758041381836, |
| "loss": 1.671, |
| "loss_ce": 1.324172019958496, |
| "loss_region": 0.03537043184041977, |
| "loss_total": 1.3595424890518188, |
| "lr": 0.0010018413047983197, |
| "router/selected_tokens_s0": 7117.75, |
| "step": 410, |
| "tokens_trained": 1.343364056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.11914048649031983, |
| "grad_norm": 32.364845275878906, |
| "loss": 1.7487, |
| "loss_ce": 1.6946724653244019, |
| "loss_region": 0.030674295499920845, |
| "loss_total": 1.7253468036651611, |
| "lr": 0.0010263362022261515, |
| "router/selected_tokens_s0": 4591.75, |
| "step": 420, |
| "tokens_trained": 1.376129496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12197716474008935, |
| "grad_norm": 51.924861907958984, |
| "loss": 1.6652, |
| "loss_ce": 1.7081111669540405, |
| "loss_region": 0.029956262558698654, |
| "loss_total": 1.738067388534546, |
| "lr": 0.0010508310996539833, |
| "router/selected_tokens_s0": 4165.25, |
| "step": 430, |
| "tokens_trained": 1.408889864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.12481384298985887, |
| "grad_norm": 31.08187484741211, |
| "loss": 1.6269, |
| "loss_ce": 1.688795804977417, |
| "loss_region": 0.030442532151937485, |
| "loss_total": 1.71923828125, |
| "lr": 0.0010753259970818151, |
| "router/selected_tokens_s0": 4528.875, |
| "step": 440, |
| "tokens_trained": 1.441655304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1276505212396284, |
| "grad_norm": 9.750688552856445, |
| "loss": 1.646, |
| "loss_ce": 1.342025637626648, |
| "loss_region": 0.0289932768791914, |
| "loss_total": 1.371018886566162, |
| "lr": 0.001099820894509647, |
| "router/selected_tokens_s0": 3472.375, |
| "step": 450, |
| "tokens_trained": 1.474420744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1304871994893979, |
| "grad_norm": 69.62458038330078, |
| "loss": 2.646, |
| "loss_ce": 2.835515022277832, |
| "loss_region": 0.03730851039290428, |
| "loss_total": 2.872823476791382, |
| "lr": 0.0011243157919374788, |
| "router/selected_tokens_s0": 7822.125, |
| "step": 460, |
| "tokens_trained": 1.507186184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13332387773916743, |
| "grad_norm": 62.241451263427734, |
| "loss": 2.2121, |
| "loss_ce": 1.9173500537872314, |
| "loss_region": 0.033008284866809845, |
| "loss_total": 1.9503583908081055, |
| "lr": 0.0011488106893653104, |
| "router/selected_tokens_s0": 5854.125, |
| "step": 470, |
| "tokens_trained": 1.539950832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13616055598893695, |
| "grad_norm": 36.45135498046875, |
| "loss": 1.8122, |
| "loss_ce": 1.579708456993103, |
| "loss_region": 0.030225276947021484, |
| "loss_total": 1.6099337339401245, |
| "lr": 0.0011733055867931422, |
| "router/selected_tokens_s0": 4330.5, |
| "step": 480, |
| "tokens_trained": 1.572715472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.13899723423870647, |
| "grad_norm": 13.028325080871582, |
| "loss": 1.5027, |
| "loss_ce": 1.357754111289978, |
| "loss_region": 0.03526536747813225, |
| "loss_total": 1.393019437789917, |
| "lr": 0.001197800484220974, |
| "router/selected_tokens_s0": 7119.25, |
| "step": 490, |
| "tokens_trained": 1.605480912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.141833912488476, |
| "grad_norm": 24.705984115600586, |
| "loss": 1.6096, |
| "loss_ce": 1.6097279787063599, |
| "loss_region": 0.02911153808236122, |
| "loss_total": 1.6388394832611084, |
| "lr": 0.0012222953816488059, |
| "router/selected_tokens_s0": 3648.75, |
| "step": 500, |
| "tokens_trained": 1.638244216 |
| }, |
| { |
| "epoch": 0.141833912488476, |
| "eval_ppl": 4.8348835473380465, |
| "eval_runtime": 2.9238, |
| "step": 500, |
| "tokens_trained": 1.638244216 |
| }, |
| { |
| "epoch": 0.141833912488476, |
| "eval_F": 0.3934690889573574, |
| "eval_F_cds": 0.29905151571508276, |
| "eval_F_dig": 0.4478214443836758, |
| "eval_F_exon": 0.39103450221457386, |
| "eval_F_intron": 0.40873021991492037, |
| "eval_F_nig": 0.4262229153142855, |
| "eval_F_promoter": 0.30306008909923465, |
| "eval_F_utr": 0.3906123042448191, |
| "eval_G": 0.49025372407568035, |
| "eval_G_cds": 0.48331595902636837, |
| "eval_G_dig": 0.49727705981261555, |
| "eval_G_exon": 0.4909996295084916, |
| "eval_G_intron": 0.4915825135015993, |
| "eval_G_nig": 0.49304083637658525, |
| "eval_G_promoter": 0.48200754687828323, |
| "eval_G_utr": 0.4901697268782234, |
| "eval_avg_bp_per_token": 2.541495705926663, |
| "eval_bp_per_token/cds": 3.343905472636816, |
| "eval_bp_per_token/dig": 2.2330328583890666, |
| "eval_bp_per_token/exon": 2.5573190967462667, |
| "eval_bp_per_token/intron": 2.4466015755041455, |
| "eval_bp_per_token/nig": 2.346190136826938, |
| "eval_bp_per_token/promoter": 3.299675661589863, |
| "eval_bp_per_token/utr": 2.56008320560543, |
| "eval_ppl_cds": 5.567015659246301, |
| "eval_ppl_dig": 4.898425899350941, |
| "eval_ppl_exon": 4.9662705320329295, |
| "eval_ppl_intron": 4.767518067357663, |
| "eval_ppl_nig": 4.6987085494689405, |
| "eval_ppl_promoter": 5.216405144788708, |
| "eval_ppl_utr": 4.913846632347962, |
| "step": 500, |
| "tokens_trained": 1.638244216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1446705907382455, |
| "grad_norm": 37.46432113647461, |
| "loss": 1.5582, |
| "loss_ce": 1.5548115968704224, |
| "loss_region": 0.02565930411219597, |
| "loss_total": 1.5804709196090698, |
| "lr": 0.0012243786686061229, |
| "router/selected_tokens_s0": 1004.25, |
| "step": 510, |
| "tokens_trained": 1.671005424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.14750726898801503, |
| "grad_norm": 27.722349166870117, |
| "loss": 1.5672, |
| "loss_ce": 1.478359341621399, |
| "loss_region": 0.031882915645837784, |
| "loss_total": 1.510242223739624, |
| "lr": 0.0012239717766222718, |
| "router/selected_tokens_s0": 5380.75, |
| "step": 520, |
| "tokens_trained": 1.703770864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15034394723778455, |
| "grad_norm": 26.949983596801758, |
| "loss": 1.6157, |
| "loss_ce": 1.4986213445663452, |
| "loss_region": 0.03651271015405655, |
| "loss_total": 1.5351340770721436, |
| "lr": 0.001223564884638421, |
| "router/selected_tokens_s0": 7781.0, |
| "step": 530, |
| "tokens_trained": 1.736536304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.15318062548755407, |
| "grad_norm": 28.232316970825195, |
| "loss": 1.6637, |
| "loss_ce": 1.4607714414596558, |
| "loss_region": 0.025137916207313538, |
| "loss_total": 1.485909342765808, |
| "lr": 0.00122315799265457, |
| "router/selected_tokens_s0": 612.875, |
| "step": 540, |
| "tokens_trained": 1.769301744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1560173037373236, |
| "grad_norm": 23.33485221862793, |
| "loss": 1.4993, |
| "loss_ce": 1.4412897825241089, |
| "loss_region": 0.035474810749292374, |
| "loss_total": 1.4767645597457886, |
| "lr": 0.001222751100670719, |
| "router/selected_tokens_s0": 7357.5, |
| "step": 550, |
| "tokens_trained": 1.802067184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1588539819870931, |
| "grad_norm": 21.005512237548828, |
| "loss": 1.4335, |
| "loss_ce": 1.3612841367721558, |
| "loss_region": 0.029854778200387955, |
| "loss_total": 1.3911389112472534, |
| "lr": 0.001222344208686868, |
| "router/selected_tokens_s0": 4172.125, |
| "step": 560, |
| "tokens_trained": 1.834832624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16169066023686263, |
| "grad_norm": 19.53492546081543, |
| "loss": 1.4383, |
| "loss_ce": 1.4045627117156982, |
| "loss_region": 0.02937491238117218, |
| "loss_total": 1.433937668800354, |
| "lr": 0.0012219373167030169, |
| "router/selected_tokens_s0": 3881.875, |
| "step": 570, |
| "tokens_trained": 1.867598064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16452733848663215, |
| "grad_norm": 25.31780242919922, |
| "loss": 1.7004, |
| "loss_ce": 1.591187834739685, |
| "loss_region": 0.03149839863181114, |
| "loss_total": 1.6226862668991089, |
| "lr": 0.0012215304247191658, |
| "router/selected_tokens_s0": 5153.875, |
| "step": 580, |
| "tokens_trained": 1.900363504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.16736401673640167, |
| "grad_norm": 16.421045303344727, |
| "loss": 1.5092, |
| "loss_ce": 1.2439810037612915, |
| "loss_region": 0.02931862138211727, |
| "loss_total": 1.2732995748519897, |
| "lr": 0.0012211235327353148, |
| "router/selected_tokens_s0": 3840.5, |
| "step": 590, |
| "tokens_trained": 1.933128944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17020069498617119, |
| "grad_norm": 25.38547706604004, |
| "loss": 1.5893, |
| "loss_ce": 1.5482516288757324, |
| "loss_region": 0.025499241426587105, |
| "loss_total": 1.5737508535385132, |
| "lr": 0.0012207166407514638, |
| "router/selected_tokens_s0": 1237.25, |
| "step": 600, |
| "tokens_trained": 1.96589048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1730373732359407, |
| "grad_norm": 14.48205852508545, |
| "loss": 1.3098, |
| "loss_ce": 1.2969579696655273, |
| "loss_region": 0.03318855166435242, |
| "loss_total": 1.3301465511322021, |
| "lr": 0.0012203097487676127, |
| "router/selected_tokens_s0": 6087.625, |
| "step": 610, |
| "tokens_trained": 1.99865592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17587405148571023, |
| "grad_norm": 10.29987907409668, |
| "loss": 1.2844, |
| "loss_ce": 1.2728289365768433, |
| "loss_region": 0.03153729811310768, |
| "loss_total": 1.3043662309646606, |
| "lr": 0.0012199028567837617, |
| "router/selected_tokens_s0": 5177.0, |
| "step": 620, |
| "tokens_trained": 2.03142136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.17871072973547975, |
| "grad_norm": 14.114507675170898, |
| "loss": 1.2792, |
| "loss_ce": 1.2729930877685547, |
| "loss_region": 0.03177153319120407, |
| "loss_total": 1.3047646284103394, |
| "lr": 0.0012194959647999107, |
| "router/selected_tokens_s0": 5318.5, |
| "step": 630, |
| "tokens_trained": 2.0641868 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18154740798524927, |
| "grad_norm": 35.086570739746094, |
| "loss": 1.327, |
| "loss_ce": 1.4959396123886108, |
| "loss_region": 0.031267955899238586, |
| "loss_total": 1.527207612991333, |
| "lr": 0.0012190890728160596, |
| "router/selected_tokens_s0": 5018.625, |
| "step": 640, |
| "tokens_trained": 2.09695224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.18438408623501878, |
| "grad_norm": 12.891855239868164, |
| "loss": 1.3231, |
| "loss_ce": 1.251932978630066, |
| "loss_region": 0.030069500207901, |
| "loss_total": 1.2820024490356445, |
| "lr": 0.0012186821808322086, |
| "router/selected_tokens_s0": 4308.125, |
| "step": 650, |
| "tokens_trained": 2.12971768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.1872207644847883, |
| "grad_norm": 16.94170570373535, |
| "loss": 1.273, |
| "loss_ce": 1.303807258605957, |
| "loss_region": 0.030183279886841774, |
| "loss_total": 1.3339905738830566, |
| "lr": 0.0012182752888483576, |
| "router/selected_tokens_s0": 4374.375, |
| "step": 660, |
| "tokens_trained": 2.16248312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19005744273455782, |
| "grad_norm": 8.820389747619629, |
| "loss": 1.291, |
| "loss_ce": 1.2488102912902832, |
| "loss_region": 0.030493643134832382, |
| "loss_total": 1.2793039083480835, |
| "lr": 0.0012178683968645065, |
| "router/selected_tokens_s0": 4566.875, |
| "step": 670, |
| "tokens_trained": 2.19524856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19289412098432734, |
| "grad_norm": 12.072690963745117, |
| "loss": 1.2551, |
| "loss_ce": 1.257431149482727, |
| "loss_region": 0.02906171977519989, |
| "loss_total": 1.2864928245544434, |
| "lr": 0.0012174615048806555, |
| "router/selected_tokens_s0": 3676.75, |
| "step": 680, |
| "tokens_trained": 2.228014 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19573079923409686, |
| "grad_norm": 3.4100522994995117, |
| "loss": 1.2685, |
| "loss_ce": 1.217279314994812, |
| "loss_region": 0.03290281072258949, |
| "loss_total": 1.2501821517944336, |
| "lr": 0.0012170546128968045, |
| "router/selected_tokens_s0": 5992.0, |
| "step": 690, |
| "tokens_trained": 2.26077944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.19856747748386638, |
| "grad_norm": 6.675322532653809, |
| "loss": 1.2504, |
| "loss_ce": 1.1835153102874756, |
| "loss_region": 0.031250134110450745, |
| "loss_total": 1.2147654294967651, |
| "lr": 0.0012166477209129534, |
| "router/selected_tokens_s0": 5040.625, |
| "step": 700, |
| "tokens_trained": 2.29354488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2014041557336359, |
| "grad_norm": 21.388051986694336, |
| "loss": 1.267, |
| "loss_ce": 1.3746044635772705, |
| "loss_region": 0.027913136407732964, |
| "loss_total": 1.402517557144165, |
| "lr": 0.0012162408289291026, |
| "router/selected_tokens_s0": 2922.75, |
| "step": 710, |
| "tokens_trained": 2.32631032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20424083398340542, |
| "grad_norm": 12.917130470275879, |
| "loss": 1.3025, |
| "loss_ce": 1.2145620584487915, |
| "loss_region": 0.031132886186242104, |
| "loss_total": 1.2456949949264526, |
| "lr": 0.0012158339369452516, |
| "router/selected_tokens_s0": 4968.875, |
| "step": 720, |
| "tokens_trained": 2.35907576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20707751223317494, |
| "grad_norm": 8.888051986694336, |
| "loss": 1.2457, |
| "loss_ce": 1.185524821281433, |
| "loss_region": 0.03197301924228668, |
| "loss_total": 1.2174978256225586, |
| "lr": 0.0012154270449614005, |
| "router/selected_tokens_s0": 5463.0, |
| "step": 730, |
| "tokens_trained": 2.3918396 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.20991419048294446, |
| "grad_norm": 13.051305770874023, |
| "loss": 1.2446, |
| "loss_ce": 1.1078685522079468, |
| "loss_region": 0.0308807585388422, |
| "loss_total": 1.138749361038208, |
| "lr": 0.0012150201529775495, |
| "router/selected_tokens_s0": 4844.0, |
| "step": 740, |
| "tokens_trained": 2.424600048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21275086873271398, |
| "grad_norm": 6.593105316162109, |
| "loss": 1.2851, |
| "loss_ce": 1.255039930343628, |
| "loss_region": 0.029710784554481506, |
| "loss_total": 1.2847506999969482, |
| "lr": 0.0012146132609936982, |
| "router/selected_tokens_s0": 4083.875, |
| "step": 750, |
| "tokens_trained": 2.457364688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2155875469824835, |
| "grad_norm": 3.900451183319092, |
| "loss": 1.2291, |
| "loss_ce": 1.1926592588424683, |
| "loss_region": 0.030736476182937622, |
| "loss_total": 1.2233957052230835, |
| "lr": 0.0012142063690098472, |
| "router/selected_tokens_s0": 4719.25, |
| "step": 760, |
| "tokens_trained": 2.490130128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.21842422523225302, |
| "grad_norm": 8.001019477844238, |
| "loss": 1.2285, |
| "loss_ce": 1.1942657232284546, |
| "loss_region": 0.03041156381368637, |
| "loss_total": 1.224677324295044, |
| "lr": 0.0012137994770259962, |
| "router/selected_tokens_s0": 4525.75, |
| "step": 770, |
| "tokens_trained": 2.522895568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22126090348202254, |
| "grad_norm": 5.169371128082275, |
| "loss": 1.2072, |
| "loss_ce": 1.2079213857650757, |
| "loss_region": 0.031087037175893784, |
| "loss_total": 1.2390084266662598, |
| "lr": 0.0012133925850421454, |
| "router/selected_tokens_s0": 4938.25, |
| "step": 780, |
| "tokens_trained": 2.555659392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22409758173179206, |
| "grad_norm": 8.434707641601562, |
| "loss": 1.2079, |
| "loss_ce": 1.2038490772247314, |
| "loss_region": 0.02821769006550312, |
| "loss_total": 1.2320667505264282, |
| "lr": 0.0012129856930582943, |
| "router/selected_tokens_s0": 3119.875, |
| "step": 790, |
| "tokens_trained": 2.588422136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.22693425998156158, |
| "grad_norm": 8.451072692871094, |
| "loss": 1.2072, |
| "loss_ce": 1.2617510557174683, |
| "loss_region": 0.0316130593419075, |
| "loss_total": 1.29336416721344, |
| "lr": 0.0012125788010744433, |
| "router/selected_tokens_s0": 5238.75, |
| "step": 800, |
| "tokens_trained": 2.621187576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2297709382313311, |
| "grad_norm": 12.750673294067383, |
| "loss": 1.2283, |
| "loss_ce": 1.2528263330459595, |
| "loss_region": 0.03109751269221306, |
| "loss_total": 1.283923864364624, |
| "lr": 0.0012121719090905923, |
| "router/selected_tokens_s0": 4940.75, |
| "step": 810, |
| "tokens_trained": 2.653953016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23260761648110062, |
| "grad_norm": 10.307655334472656, |
| "loss": 1.2544, |
| "loss_ce": 1.2496147155761719, |
| "loss_region": 0.02913491614162922, |
| "loss_total": 1.2787495851516724, |
| "lr": 0.0012117650171067412, |
| "router/selected_tokens_s0": 3717.75, |
| "step": 820, |
| "tokens_trained": 2.686718456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23544429473087014, |
| "grad_norm": 0.6592714190483093, |
| "loss": 1.2022, |
| "loss_ce": 1.0889158248901367, |
| "loss_region": 0.031184613704681396, |
| "loss_total": 1.120100498199463, |
| "lr": 0.0012113581251228902, |
| "router/selected_tokens_s0": 5037.375, |
| "step": 830, |
| "tokens_trained": 2.71948036 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.23828097298063966, |
| "grad_norm": 3.0865817070007324, |
| "loss": 1.1958, |
| "loss_ce": 1.267112374305725, |
| "loss_region": 0.02916303649544716, |
| "loss_total": 1.2962753772735596, |
| "lr": 0.0012109512331390391, |
| "router/selected_tokens_s0": 3734.375, |
| "step": 840, |
| "tokens_trained": 2.7522458 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24111765123040918, |
| "grad_norm": 2.513849973678589, |
| "loss": 1.2014, |
| "loss_ce": 1.108485221862793, |
| "loss_region": 0.0302209984511137, |
| "loss_total": 1.1387062072753906, |
| "lr": 0.0012105443411551881, |
| "router/selected_tokens_s0": 4417.125, |
| "step": 850, |
| "tokens_trained": 2.78501124 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2439543294801787, |
| "grad_norm": 5.594594478607178, |
| "loss": 1.206, |
| "loss_ce": 1.1815146207809448, |
| "loss_region": 0.031508028507232666, |
| "loss_total": 1.2130227088928223, |
| "lr": 0.001210137449171337, |
| "router/selected_tokens_s0": 5212.875, |
| "step": 860, |
| "tokens_trained": 2.81777668 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24679100772994822, |
| "grad_norm": 2.2655980587005615, |
| "loss": 1.1897, |
| "loss_ce": 1.2304372787475586, |
| "loss_region": 0.031548820436000824, |
| "loss_total": 1.2619861364364624, |
| "lr": 0.001209730557187486, |
| "router/selected_tokens_s0": 5213.25, |
| "step": 870, |
| "tokens_trained": 2.85054212 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.24962768597971774, |
| "grad_norm": 4.335860252380371, |
| "loss": 1.1897, |
| "loss_ce": 1.2337130308151245, |
| "loss_region": 0.02997858263552189, |
| "loss_total": 1.2636916637420654, |
| "lr": 0.001209323665203635, |
| "router/selected_tokens_s0": 4252.5, |
| "step": 880, |
| "tokens_trained": 2.88330756 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.25246436422948726, |
| "grad_norm": 12.377155303955078, |
| "loss": 1.1966, |
| "loss_ce": 1.1369762420654297, |
| "loss_region": 0.029613491147756577, |
| "loss_total": 1.1665897369384766, |
| "lr": 0.001208916773219784, |
| "router/selected_tokens_s0": 4027.75, |
| "step": 890, |
| "tokens_trained": 2.916073 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2553010424792568, |
| "grad_norm": 7.238094806671143, |
| "loss": 1.2143, |
| "loss_ce": 1.1700671911239624, |
| "loss_region": 0.029774101451039314, |
| "loss_total": 1.1998412609100342, |
| "lr": 0.001208509881235933, |
| "router/selected_tokens_s0": 4116.875, |
| "step": 900, |
| "tokens_trained": 2.94883828 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2581377207290263, |
| "grad_norm": 3.2694191932678223, |
| "loss": 1.1892, |
| "loss_ce": 1.1454379558563232, |
| "loss_region": 0.029824109748005867, |
| "loss_total": 1.1752620935440063, |
| "lr": 0.001208102989252082, |
| "router/selected_tokens_s0": 4152.625, |
| "step": 910, |
| "tokens_trained": 2.981597288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2609743989787958, |
| "grad_norm": 9.457625389099121, |
| "loss": 1.2038, |
| "loss_ce": 1.3160332441329956, |
| "loss_region": 0.030873605981469154, |
| "loss_total": 1.3469069004058838, |
| "lr": 0.0012076960972682309, |
| "router/selected_tokens_s0": 4797.5, |
| "step": 920, |
| "tokens_trained": 3.014362456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26381107722856534, |
| "grad_norm": 4.293655872344971, |
| "loss": 1.1978, |
| "loss_ce": 1.1440948247909546, |
| "loss_region": 0.02935035713016987, |
| "loss_total": 1.173445224761963, |
| "lr": 0.0012072892052843798, |
| "router/selected_tokens_s0": 3829.5, |
| "step": 930, |
| "tokens_trained": 3.047127096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.26664775547833486, |
| "grad_norm": 1.7136532068252563, |
| "loss": 1.1906, |
| "loss_ce": 1.1432236433029175, |
| "loss_region": 0.028851088136434555, |
| "loss_total": 1.1720746755599976, |
| "lr": 0.0012068823133005288, |
| "router/selected_tokens_s0": 3479.125, |
| "step": 940, |
| "tokens_trained": 3.079892536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2694844337281044, |
| "grad_norm": 4.0433244705200195, |
| "loss": 1.1868, |
| "loss_ce": 1.168936014175415, |
| "loss_region": 0.02876598760485649, |
| "loss_total": 1.1977020502090454, |
| "lr": 0.0012064754213166778, |
| "router/selected_tokens_s0": 3396.25, |
| "step": 950, |
| "tokens_trained": 3.11265336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2723211119778739, |
| "grad_norm": 6.829047203063965, |
| "loss": 1.1828, |
| "loss_ce": 1.2480430603027344, |
| "loss_region": 0.02934931591153145, |
| "loss_total": 1.2773923873901367, |
| "lr": 0.001206068529332827, |
| "router/selected_tokens_s0": 3843.75, |
| "step": 960, |
| "tokens_trained": 3.1454188 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2751577902276434, |
| "grad_norm": 5.5668439865112305, |
| "loss": 1.1882, |
| "loss_ce": 1.1349202394485474, |
| "loss_region": 0.0297370757907629, |
| "loss_total": 1.1646573543548584, |
| "lr": 0.001205661637348976, |
| "router/selected_tokens_s0": 4102.5, |
| "step": 970, |
| "tokens_trained": 3.17818424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.27799446847741294, |
| "grad_norm": 3.729381561279297, |
| "loss": 1.1839, |
| "loss_ce": 1.1995916366577148, |
| "loss_region": 0.03041483648121357, |
| "loss_total": 1.230006456375122, |
| "lr": 0.0012052547453651249, |
| "router/selected_tokens_s0": 4537.125, |
| "step": 980, |
| "tokens_trained": 3.21094968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.28083114672718246, |
| "grad_norm": 2.7978885173797607, |
| "loss": 1.1739, |
| "loss_ce": 1.1886447668075562, |
| "loss_region": 0.030223874375224113, |
| "loss_total": 1.218868613243103, |
| "lr": 0.0012048478533812738, |
| "router/selected_tokens_s0": 4418.875, |
| "step": 990, |
| "tokens_trained": 3.24371512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.283667824976952, |
| "grad_norm": 2.7768421173095703, |
| "loss": 1.1695, |
| "loss_ce": 1.1791244745254517, |
| "loss_region": 0.03016069531440735, |
| "loss_total": 1.2092851400375366, |
| "lr": 0.0012044409613974226, |
| "router/selected_tokens_s0": 4373.0, |
| "step": 1000, |
| "tokens_trained": 3.27648056 |
| }, |
| { |
| "epoch": 0.283667824976952, |
| "eval_ppl": 3.1457362037176693, |
| "eval_runtime": 2.5704, |
| "step": 1000, |
| "tokens_trained": 3.27648056 |
| }, |
| { |
| "epoch": 0.283667824976952, |
| "eval_F": 0.35905403615092213, |
| "eval_F_cds": 0.3614752043728926, |
| "eval_F_dig": 0.36203349219991143, |
| "eval_F_exon": 0.3609332242502892, |
| "eval_F_intron": 0.3608845011093654, |
| "eval_F_nig": 0.36360427639485304, |
| "eval_F_promoter": 0.3446594753609168, |
| "eval_F_utr": 0.35993294503032014, |
| "eval_G": 0.4747950002316863, |
| "eval_G_cds": 0.4875693056072159, |
| "eval_G_dig": 0.4165539971384483, |
| "eval_G_exon": 0.4825983323253731, |
| "eval_G_intron": 0.4746974505122046, |
| "eval_G_nig": 0.4719204972271849, |
| "eval_G_promoter": 0.47860970096474814, |
| "eval_G_utr": 0.4806883865646302, |
| "eval_avg_bp_per_token": 2.785096111772066, |
| "eval_bp_per_token/cds": 2.7664414817466, |
| "eval_bp_per_token/dig": 2.7621753830659665, |
| "eval_bp_per_token/exon": 2.770595591683602, |
| "eval_bp_per_token/intron": 2.7709696507497057, |
| "eval_bp_per_token/nig": 2.7502426811780905, |
| "eval_bp_per_token/promoter": 2.90141450181467, |
| "eval_bp_per_token/utr": 2.77829527362593, |
| "eval_ppl_cds": 3.7937951600140427, |
| "eval_ppl_dig": 1.292568207392483, |
| "eval_ppl_exon": 3.5063285971819904, |
| "eval_ppl_intron": 3.1623742022954864, |
| "eval_ppl_nig": 3.03123217862896, |
| "eval_ppl_promoter": 3.420873133996253, |
| "eval_ppl_utr": 3.4079030610184535, |
| "step": 1000, |
| "tokens_trained": 3.27648056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2865045032267215, |
| "grad_norm": 1.750190258026123, |
| "loss": 1.1681, |
| "loss_ce": 1.1951100826263428, |
| "loss_region": 0.029561972245573997, |
| "loss_total": 1.2246720790863037, |
| "lr": 0.0012040340694135716, |
| "router/selected_tokens_s0": 3974.5, |
| "step": 1010, |
| "tokens_trained": 3.309246 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.289341181476491, |
| "grad_norm": 5.037286758422852, |
| "loss": 1.1855, |
| "loss_ce": 1.1606330871582031, |
| "loss_region": 0.030172061175107956, |
| "loss_total": 1.190805196762085, |
| "lr": 0.0012036271774297205, |
| "router/selected_tokens_s0": 4388.375, |
| "step": 1020, |
| "tokens_trained": 3.34201144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.29217785972626054, |
| "grad_norm": 5.963747024536133, |
| "loss": 1.1794, |
| "loss_ce": 1.116599678993225, |
| "loss_region": 0.030543407425284386, |
| "loss_total": 1.1471431255340576, |
| "lr": 0.0012032202854458697, |
| "router/selected_tokens_s0": 4640.0, |
| "step": 1030, |
| "tokens_trained": 3.37477688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.29501453797603006, |
| "grad_norm": 4.626336574554443, |
| "loss": 1.1934, |
| "loss_ce": 1.094927430152893, |
| "loss_region": 0.02999301068484783, |
| "loss_total": 1.1249204874038696, |
| "lr": 0.0012028133934620187, |
| "router/selected_tokens_s0": 4248.5, |
| "step": 1040, |
| "tokens_trained": 3.40754232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.2978512162257996, |
| "grad_norm": 4.208251476287842, |
| "loss": 1.1843, |
| "loss_ce": 1.1818771362304688, |
| "loss_region": 0.030715491622686386, |
| "loss_total": 1.212592601776123, |
| "lr": 0.0012024065014781676, |
| "router/selected_tokens_s0": 4729.75, |
| "step": 1050, |
| "tokens_trained": 3.44030696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3006878944755691, |
| "grad_norm": 2.3673582077026367, |
| "loss": 1.1726, |
| "loss_ce": 1.1216882467269897, |
| "loss_region": 0.030366381630301476, |
| "loss_total": 1.1520546674728394, |
| "lr": 0.0012019996094943166, |
| "router/selected_tokens_s0": 4503.625, |
| "step": 1060, |
| "tokens_trained": 3.4730724 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3035245727253386, |
| "grad_norm": 2.6513352394104004, |
| "loss": 1.1707, |
| "loss_ce": 1.1285063028335571, |
| "loss_region": 0.02974226139485836, |
| "loss_total": 1.1582485437393188, |
| "lr": 0.0012015927175104656, |
| "router/selected_tokens_s0": 4085.375, |
| "step": 1070, |
| "tokens_trained": 3.50583784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30636125097510813, |
| "grad_norm": 1.0276976823806763, |
| "loss": 1.165, |
| "loss_ce": 1.1330546140670776, |
| "loss_region": 0.029834387823939323, |
| "loss_total": 1.162889003753662, |
| "lr": 0.0012011858255266145, |
| "router/selected_tokens_s0": 4155.5, |
| "step": 1080, |
| "tokens_trained": 3.53860328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.30919792922487765, |
| "grad_norm": 3.4352457523345947, |
| "loss": 1.1759, |
| "loss_ce": 1.153834581375122, |
| "loss_region": 0.030001970008015633, |
| "loss_total": 1.183836579322815, |
| "lr": 0.0012007789335427635, |
| "router/selected_tokens_s0": 4271.375, |
| "step": 1090, |
| "tokens_trained": 3.57136872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3120346074746472, |
| "grad_norm": 3.4334914684295654, |
| "loss": 1.1668, |
| "loss_ce": 1.0656555891036987, |
| "loss_region": 0.03014238551259041, |
| "loss_total": 1.0957980155944824, |
| "lr": 0.0012003720415589125, |
| "router/selected_tokens_s0": 4376.625, |
| "step": 1100, |
| "tokens_trained": 3.60413416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3148712857244167, |
| "grad_norm": 7.573620796203613, |
| "loss": 1.1737, |
| "loss_ce": 1.1206940412521362, |
| "loss_region": 0.030071774497628212, |
| "loss_total": 1.1507657766342163, |
| "lr": 0.0011999651495750614, |
| "router/selected_tokens_s0": 4325.0, |
| "step": 1110, |
| "tokens_trained": 3.6368996 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3177079639741862, |
| "grad_norm": 4.200015544891357, |
| "loss": 1.1705, |
| "loss_ce": 1.1700469255447388, |
| "loss_region": 0.02990192547440529, |
| "loss_total": 1.1999489068984985, |
| "lr": 0.0011995582575912104, |
| "router/selected_tokens_s0": 4194.25, |
| "step": 1120, |
| "tokens_trained": 3.669661712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32054464222395573, |
| "grad_norm": 5.207011699676514, |
| "loss": 1.1668, |
| "loss_ce": 1.1708717346191406, |
| "loss_region": 0.029880443587899208, |
| "loss_total": 1.2007521390914917, |
| "lr": 0.0011991513656073594, |
| "router/selected_tokens_s0": 4177.25, |
| "step": 1130, |
| "tokens_trained": 3.702426352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.32338132047372525, |
| "grad_norm": 4.160227298736572, |
| "loss": 1.1671, |
| "loss_ce": 1.1502091884613037, |
| "loss_region": 0.030087152495980263, |
| "loss_total": 1.1802963018417358, |
| "lr": 0.0011987444736235083, |
| "router/selected_tokens_s0": 4325.25, |
| "step": 1140, |
| "tokens_trained": 3.735191792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3262179987234948, |
| "grad_norm": 2.3496572971343994, |
| "loss": 1.1578, |
| "loss_ce": 1.0942906141281128, |
| "loss_region": 0.02960728108882904, |
| "loss_total": 1.123897910118103, |
| "lr": 0.0011983375816396573, |
| "router/selected_tokens_s0": 3976.25, |
| "step": 1150, |
| "tokens_trained": 3.767957232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3290546769732643, |
| "grad_norm": 3.0820891857147217, |
| "loss": 1.158, |
| "loss_ce": 1.2191810607910156, |
| "loss_region": 0.030029961839318275, |
| "loss_total": 1.249211072921753, |
| "lr": 0.0011979306896558062, |
| "router/selected_tokens_s0": 4285.125, |
| "step": 1160, |
| "tokens_trained": 3.800722672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3318913552230338, |
| "grad_norm": 1.7340823411941528, |
| "loss": 1.1537, |
| "loss_ce": 1.0748310089111328, |
| "loss_region": 0.030402792617678642, |
| "loss_total": 1.1052337884902954, |
| "lr": 0.0011975237976719552, |
| "router/selected_tokens_s0": 4566.375, |
| "step": 1170, |
| "tokens_trained": 3.833488112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.33472803347280333, |
| "grad_norm": 1.6883597373962402, |
| "loss": 1.1524, |
| "loss_ce": 1.15337073802948, |
| "loss_region": 0.029628688469529152, |
| "loss_total": 1.1829993724822998, |
| "lr": 0.0011971169056881042, |
| "router/selected_tokens_s0": 3994.125, |
| "step": 1180, |
| "tokens_trained": 3.866252752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.33756471172257285, |
| "grad_norm": 1.3079456090927124, |
| "loss": 1.155, |
| "loss_ce": 1.147839903831482, |
| "loss_region": 0.029972558841109276, |
| "loss_total": 1.1778124570846558, |
| "lr": 0.0011967100137042531, |
| "router/selected_tokens_s0": 4250.125, |
| "step": 1190, |
| "tokens_trained": 3.899018184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34040138997234237, |
| "grad_norm": 2.042187452316284, |
| "loss": 1.1551, |
| "loss_ce": 1.1045622825622559, |
| "loss_region": 0.030126892030239105, |
| "loss_total": 1.134689211845398, |
| "lr": 0.0011963031217204021, |
| "router/selected_tokens_s0": 4366.25, |
| "step": 1200, |
| "tokens_trained": 3.931783624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3432380682221119, |
| "grad_norm": 0.5720299482345581, |
| "loss": 1.1514, |
| "loss_ce": 1.1252881288528442, |
| "loss_region": 0.02972925268113613, |
| "loss_total": 1.155017375946045, |
| "lr": 0.0011958962297365513, |
| "router/selected_tokens_s0": 4055.0, |
| "step": 1210, |
| "tokens_trained": 3.964549064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3460747464718814, |
| "grad_norm": 2.726912498474121, |
| "loss": 1.1481, |
| "loss_ce": 1.0980409383773804, |
| "loss_region": 0.030369114130735397, |
| "loss_total": 1.1284101009368896, |
| "lr": 0.0011954893377527003, |
| "router/selected_tokens_s0": 4549.75, |
| "step": 1220, |
| "tokens_trained": 3.997311912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.34891142472165093, |
| "grad_norm": 1.576530933380127, |
| "loss": 1.1547, |
| "loss_ce": 1.1488255262374878, |
| "loss_region": 0.03008064441382885, |
| "loss_total": 1.1789062023162842, |
| "lr": 0.0011950824457688492, |
| "router/selected_tokens_s0": 4327.125, |
| "step": 1230, |
| "tokens_trained": 4.030077352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.35174810297142045, |
| "grad_norm": 1.7633917331695557, |
| "loss": 1.1491, |
| "loss_ce": 1.0437774658203125, |
| "loss_region": 0.03009728156030178, |
| "loss_total": 1.0738747119903564, |
| "lr": 0.0011946755537849982, |
| "router/selected_tokens_s0": 4352.5, |
| "step": 1240, |
| "tokens_trained": 4.062842792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.35458478122118997, |
| "grad_norm": 0.8599131107330322, |
| "loss": 1.1502, |
| "loss_ce": 1.1635342836380005, |
| "loss_region": 0.030227093026041985, |
| "loss_total": 1.1937613487243652, |
| "lr": 0.001194268661801147, |
| "router/selected_tokens_s0": 4437.875, |
| "step": 1250, |
| "tokens_trained": 4.095608232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3574214594709595, |
| "grad_norm": 2.0207033157348633, |
| "loss": 1.1525, |
| "loss_ce": 1.161281943321228, |
| "loss_region": 0.02980414777994156, |
| "loss_total": 1.1910860538482666, |
| "lr": 0.001193861769817296, |
| "router/selected_tokens_s0": 4113.375, |
| "step": 1260, |
| "tokens_trained": 4.128373672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.360258137720729, |
| "grad_norm": 1.6762081384658813, |
| "loss": 1.1549, |
| "loss_ce": 1.176638126373291, |
| "loss_region": 0.02979988045990467, |
| "loss_total": 1.2064380645751953, |
| "lr": 0.0011934548778334449, |
| "router/selected_tokens_s0": 4110.375, |
| "step": 1270, |
| "tokens_trained": 4.161136768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36309481597049853, |
| "grad_norm": 1.5674160718917847, |
| "loss": 1.1538, |
| "loss_ce": 1.1160061359405518, |
| "loss_region": 0.029819507151842117, |
| "loss_total": 1.1458256244659424, |
| "lr": 0.001193047985849594, |
| "router/selected_tokens_s0": 4122.75, |
| "step": 1280, |
| "tokens_trained": 4.193902208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36593149422026805, |
| "grad_norm": 1.232892394065857, |
| "loss": 1.1499, |
| "loss_ce": 1.192215085029602, |
| "loss_region": 0.030095556750893593, |
| "loss_total": 1.2223106622695923, |
| "lr": 0.001192641093865743, |
| "router/selected_tokens_s0": 4337.75, |
| "step": 1290, |
| "tokens_trained": 4.226667648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.36876817247003757, |
| "grad_norm": 1.280081033706665, |
| "loss": 1.1625, |
| "loss_ce": 1.0769988298416138, |
| "loss_region": 0.030076846480369568, |
| "loss_total": 1.1070756912231445, |
| "lr": 0.001192234201881892, |
| "router/selected_tokens_s0": 4330.625, |
| "step": 1300, |
| "tokens_trained": 4.259424272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3716048507198071, |
| "grad_norm": 0.7819789052009583, |
| "loss": 1.1516, |
| "loss_ce": 1.0531295537948608, |
| "loss_region": 0.029812535271048546, |
| "loss_total": 1.0829421281814575, |
| "lr": 0.001191827309898041, |
| "router/selected_tokens_s0": 4107.75, |
| "step": 1310, |
| "tokens_trained": 4.292189712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3744415289695766, |
| "grad_norm": 4.3887505531311035, |
| "loss": 1.1524, |
| "loss_ce": 1.0992565155029297, |
| "loss_region": 0.030015140771865845, |
| "loss_total": 1.1292716264724731, |
| "lr": 0.00119142041791419, |
| "router/selected_tokens_s0": 4279.625, |
| "step": 1320, |
| "tokens_trained": 4.32495164 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.37727820721934613, |
| "grad_norm": 2.5429630279541016, |
| "loss": 1.1622, |
| "loss_ce": 0.9915607571601868, |
| "loss_region": 0.02960185892879963, |
| "loss_total": 1.0211626291275024, |
| "lr": 0.0011910135259303389, |
| "router/selected_tokens_s0": 3922.75, |
| "step": 1330, |
| "tokens_trained": 4.35771708 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38011488546911565, |
| "grad_norm": 1.3790112733840942, |
| "loss": 1.1526, |
| "loss_ce": 1.2076722383499146, |
| "loss_region": 0.029480615630745888, |
| "loss_total": 1.2371528148651123, |
| "lr": 0.0011906066339464878, |
| "router/selected_tokens_s0": 3831.5, |
| "step": 1340, |
| "tokens_trained": 4.39048252 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.38295156371888517, |
| "grad_norm": 3.28352427482605, |
| "loss": 1.1523, |
| "loss_ce": 0.9999480247497559, |
| "loss_region": 0.02995798923075199, |
| "loss_total": 1.0299060344696045, |
| "lr": 0.0011901997419626368, |
| "router/selected_tokens_s0": 4236.0, |
| "step": 1350, |
| "tokens_trained": 4.42324796 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3857882419686547, |
| "grad_norm": 2.173388719558716, |
| "loss": 1.1469, |
| "loss_ce": 1.1173208951950073, |
| "loss_region": 0.030063528567552567, |
| "loss_total": 1.1473844051361084, |
| "lr": 0.0011897928499787858, |
| "router/selected_tokens_s0": 4322.25, |
| "step": 1360, |
| "tokens_trained": 4.4560134 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3886249202184242, |
| "grad_norm": 1.3337340354919434, |
| "loss": 1.1514, |
| "loss_ce": 1.097347617149353, |
| "loss_region": 0.030277268961071968, |
| "loss_total": 1.1276248693466187, |
| "lr": 0.0011893859579949347, |
| "router/selected_tokens_s0": 4490.375, |
| "step": 1370, |
| "tokens_trained": 4.48877884 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3914615984681937, |
| "grad_norm": 1.5072178840637207, |
| "loss": 1.1454, |
| "loss_ce": 1.1354695558547974, |
| "loss_region": 0.0300710741430521, |
| "loss_total": 1.1655405759811401, |
| "lr": 0.0011889790660110837, |
| "router/selected_tokens_s0": 4323.125, |
| "step": 1380, |
| "tokens_trained": 4.52154428 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.39429827671796325, |
| "grad_norm": 1.4634846448898315, |
| "loss": 1.1434, |
| "loss_ce": 1.1472464799880981, |
| "loss_region": 0.029943954199552536, |
| "loss_total": 1.1771904230117798, |
| "lr": 0.0011885721740272327, |
| "router/selected_tokens_s0": 4222.625, |
| "step": 1390, |
| "tokens_trained": 4.55430972 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.39713495496773277, |
| "grad_norm": 1.1301681995391846, |
| "loss": 1.1491, |
| "loss_ce": 0.932141900062561, |
| "loss_region": 0.03013395331799984, |
| "loss_total": 0.9622758626937866, |
| "lr": 0.0011881652820433816, |
| "router/selected_tokens_s0": 4389.125, |
| "step": 1400, |
| "tokens_trained": 4.58707516 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.3999716332175023, |
| "grad_norm": 1.153057336807251, |
| "loss": 1.1483, |
| "loss_ce": 1.0930418968200684, |
| "loss_region": 0.029886895790696144, |
| "loss_total": 1.1229287385940552, |
| "lr": 0.0011877583900595306, |
| "router/selected_tokens_s0": 4177.875, |
| "step": 1410, |
| "tokens_trained": 4.6198406 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4028083114672718, |
| "grad_norm": 2.0346107482910156, |
| "loss": 1.1355, |
| "loss_ce": 1.130191683769226, |
| "loss_region": 0.030217666178941727, |
| "loss_total": 1.1604093313217163, |
| "lr": 0.0011873514980756796, |
| "router/selected_tokens_s0": 4435.0, |
| "step": 1420, |
| "tokens_trained": 4.652606024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4056449897170413, |
| "grad_norm": 1.2362136840820312, |
| "loss": 1.1461, |
| "loss_ce": 1.1180355548858643, |
| "loss_region": 0.029944026842713356, |
| "loss_total": 1.1479796171188354, |
| "lr": 0.0011869446060918285, |
| "router/selected_tokens_s0": 4219.25, |
| "step": 1430, |
| "tokens_trained": 4.685371464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.40848166796681085, |
| "grad_norm": 1.6414567232131958, |
| "loss": 1.1476, |
| "loss_ce": 1.1310675144195557, |
| "loss_region": 0.030178584158420563, |
| "loss_total": 1.1612460613250732, |
| "lr": 0.0011865377141079775, |
| "router/selected_tokens_s0": 4406.125, |
| "step": 1440, |
| "tokens_trained": 4.718136904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.41131834621658037, |
| "grad_norm": 0.8733806014060974, |
| "loss": 1.1452, |
| "loss_ce": 1.1529111862182617, |
| "loss_region": 0.029908571392297745, |
| "loss_total": 1.1828197240829468, |
| "lr": 0.0011861308221241265, |
| "router/selected_tokens_s0": 4186.5, |
| "step": 1450, |
| "tokens_trained": 4.750902344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4141550244663499, |
| "grad_norm": 2.170149087905884, |
| "loss": 1.1364, |
| "loss_ce": 1.1446956396102905, |
| "loss_region": 0.030016543343663216, |
| "loss_total": 1.1747121810913086, |
| "lr": 0.0011857239301402756, |
| "router/selected_tokens_s0": 4279.125, |
| "step": 1460, |
| "tokens_trained": 4.783666984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4169917027161194, |
| "grad_norm": 1.5901942253112793, |
| "loss": 1.1418, |
| "loss_ce": 1.1736469268798828, |
| "loss_region": 0.02991572767496109, |
| "loss_total": 1.203562617301941, |
| "lr": 0.0011853170381564246, |
| "router/selected_tokens_s0": 4190.375, |
| "step": 1470, |
| "tokens_trained": 4.816432424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4198283809658889, |
| "grad_norm": 0.7801039814949036, |
| "loss": 1.1359, |
| "loss_ce": 1.0415936708450317, |
| "loss_region": 0.030063536018133163, |
| "loss_total": 1.0716571807861328, |
| "lr": 0.0011849101461725736, |
| "router/selected_tokens_s0": 4323.5, |
| "step": 1480, |
| "tokens_trained": 4.849197864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.42266505921565845, |
| "grad_norm": 1.1225630044937134, |
| "loss": 1.1387, |
| "loss_ce": 1.1764026880264282, |
| "loss_region": 0.02989169955253601, |
| "loss_total": 1.2062944173812866, |
| "lr": 0.0011845032541887225, |
| "router/selected_tokens_s0": 4166.375, |
| "step": 1490, |
| "tokens_trained": 4.881963248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.42550173746542796, |
| "grad_norm": 1.3516196012496948, |
| "loss": 1.1445, |
| "loss_ce": 1.1119225025177002, |
| "loss_region": 0.03007156029343605, |
| "loss_total": 1.1419941186904907, |
| "lr": 0.0011840963622048713, |
| "router/selected_tokens_s0": 4332.625, |
| "step": 1500, |
| "tokens_trained": 4.914728608 |
| }, |
| { |
| "epoch": 0.42550173746542796, |
| "eval_ppl": 3.0476700462359805, |
| "eval_runtime": 2.5167, |
| "step": 1500, |
| "tokens_trained": 4.914728608 |
| }, |
| { |
| "epoch": 0.42550173746542796, |
| "eval_F": 0.3395766737890528, |
| "eval_F_cds": 0.33560010026602843, |
| "eval_F_dig": 0.34591244107612573, |
| "eval_F_exon": 0.33732050667193275, |
| "eval_F_intron": 0.340589821591843, |
| "eval_F_nig": 0.3449097161371641, |
| "eval_F_promoter": 0.3287406377406758, |
| "eval_F_utr": 0.33810586816514, |
| "eval_G": 0.4388793285567115, |
| "eval_G_cds": 0.4465895620992391, |
| "eval_G_dig": 0.39567722372516084, |
| "eval_G_exon": 0.44327135296181625, |
| "eval_G_intron": 0.4386635275964277, |
| "eval_G_nig": 0.4373593879668909, |
| "eval_G_promoter": 0.44171817290159177, |
| "eval_G_utr": 0.44355779628952524, |
| "eval_avg_bp_per_token": 2.944843027178028, |
| "eval_bp_per_token/cds": 2.9797368928296066, |
| "eval_bp_per_token/dig": 2.8909049841891283, |
| "eval_bp_per_token/exon": 2.9645396002341724, |
| "eval_bp_per_token/intron": 2.93608304360423, |
| "eval_bp_per_token/nig": 2.8993094517590188, |
| "eval_bp_per_token/promoter": 3.0419117237000717, |
| "eval_bp_per_token/utr": 2.9576534871366778, |
| "eval_ppl_cds": 3.7328596405663, |
| "eval_ppl_dig": 1.1534605141350962, |
| "eval_ppl_exon": 3.4439528933373436, |
| "eval_ppl_intron": 3.0653985302604827, |
| "eval_ppl_nig": 2.904936687189015, |
| "eval_ppl_promoter": 3.3618258190318606, |
| "eval_ppl_utr": 3.3512748939063846, |
| "step": 1500, |
| "tokens_trained": 4.914728608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4283384157151975, |
| "grad_norm": 1.0354516506195068, |
| "loss": 1.1407, |
| "loss_ce": 1.2179700136184692, |
| "loss_region": 0.029973506927490234, |
| "loss_total": 1.2479435205459595, |
| "lr": 0.0011836894702210202, |
| "router/selected_tokens_s0": 4242.375, |
| "step": 1510, |
| "tokens_trained": 4.947494048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.431175093964967, |
| "grad_norm": 0.9974690675735474, |
| "loss": 1.1361, |
| "loss_ce": 1.1464780569076538, |
| "loss_region": 0.03020160086452961, |
| "loss_total": 1.1766796112060547, |
| "lr": 0.0011832825782371692, |
| "router/selected_tokens_s0": 4443.875, |
| "step": 1520, |
| "tokens_trained": 4.980259488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4340117722147365, |
| "grad_norm": 1.61404550075531, |
| "loss": 1.1383, |
| "loss_ce": 1.1023921966552734, |
| "loss_region": 0.029910210520029068, |
| "loss_total": 1.1323024034500122, |
| "lr": 0.0011828756862533184, |
| "router/selected_tokens_s0": 4174.25, |
| "step": 1530, |
| "tokens_trained": 5.013024928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.43684845046450604, |
| "grad_norm": 1.551711082458496, |
| "loss": 1.1369, |
| "loss_ce": 1.085469365119934, |
| "loss_region": 0.02990012802183628, |
| "loss_total": 1.115369439125061, |
| "lr": 0.0011824687942694674, |
| "router/selected_tokens_s0": 4162.25, |
| "step": 1540, |
| "tokens_trained": 5.04578704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.43968512871427556, |
| "grad_norm": 1.3328109979629517, |
| "loss": 1.1358, |
| "loss_ce": 1.1522539854049683, |
| "loss_region": 0.02980169840157032, |
| "loss_total": 1.1820557117462158, |
| "lr": 0.0011820619022856163, |
| "router/selected_tokens_s0": 4050.75, |
| "step": 1550, |
| "tokens_trained": 5.078551904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4425218069640451, |
| "grad_norm": 2.2517945766448975, |
| "loss": 1.1398, |
| "loss_ce": 1.0304194688796997, |
| "loss_region": 0.030139248818159103, |
| "loss_total": 1.0605586767196655, |
| "lr": 0.0011816550103017653, |
| "router/selected_tokens_s0": 4399.625, |
| "step": 1560, |
| "tokens_trained": 5.111317344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4453584852138146, |
| "grad_norm": 1.0419440269470215, |
| "loss": 1.1423, |
| "loss_ce": 1.2029235363006592, |
| "loss_region": 0.029878782108426094, |
| "loss_total": 1.2328022718429565, |
| "lr": 0.0011812481183179143, |
| "router/selected_tokens_s0": 4131.75, |
| "step": 1570, |
| "tokens_trained": 5.144082784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4481951634635841, |
| "grad_norm": 0.8405026197433472, |
| "loss": 1.1357, |
| "loss_ce": 1.1085268259048462, |
| "loss_region": 0.02992934361100197, |
| "loss_total": 1.1384562253952026, |
| "lr": 0.0011808412263340632, |
| "router/selected_tokens_s0": 4185.75, |
| "step": 1580, |
| "tokens_trained": 5.176848224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45103184171335364, |
| "grad_norm": 1.8782676458358765, |
| "loss": 1.1447, |
| "loss_ce": 1.0933234691619873, |
| "loss_region": 0.030135583132505417, |
| "loss_total": 1.1234591007232666, |
| "lr": 0.0011804343343502122, |
| "router/selected_tokens_s0": 4400.5, |
| "step": 1590, |
| "tokens_trained": 5.209613664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.45386851996312316, |
| "grad_norm": 1.116540551185608, |
| "loss": 1.1417, |
| "loss_ce": 1.1890523433685303, |
| "loss_region": 0.0303688682615757, |
| "loss_total": 1.2194212675094604, |
| "lr": 0.0011800274423663611, |
| "router/selected_tokens_s0": 4597.375, |
| "step": 1600, |
| "tokens_trained": 5.242378304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4567051982128927, |
| "grad_norm": 0.9224187135696411, |
| "loss": 1.1352, |
| "loss_ce": 1.0753121376037598, |
| "loss_region": 0.030113881453871727, |
| "loss_total": 1.1054260730743408, |
| "lr": 0.0011796205503825101, |
| "router/selected_tokens_s0": 4381.5, |
| "step": 1610, |
| "tokens_trained": 5.275142944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4595418764626622, |
| "grad_norm": 1.250409483909607, |
| "loss": 1.1423, |
| "loss_ce": 1.1405887603759766, |
| "loss_region": 0.030090278014540672, |
| "loss_total": 1.1706790924072266, |
| "lr": 0.001179213658398659, |
| "router/selected_tokens_s0": 4360.875, |
| "step": 1620, |
| "tokens_trained": 5.307906784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4623785547124317, |
| "grad_norm": 0.6683188080787659, |
| "loss": 1.1358, |
| "loss_ce": 1.0137219429016113, |
| "loss_region": 0.0301409512758255, |
| "loss_total": 1.0438629388809204, |
| "lr": 0.001178806766414808, |
| "router/selected_tokens_s0": 4420.25, |
| "step": 1630, |
| "tokens_trained": 5.340672224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.46521523296220124, |
| "grad_norm": 1.3055206537246704, |
| "loss": 1.1378, |
| "loss_ce": 1.120367407798767, |
| "loss_region": 0.029992438852787018, |
| "loss_total": 1.150359869003296, |
| "lr": 0.001178399874430957, |
| "router/selected_tokens_s0": 4256.375, |
| "step": 1640, |
| "tokens_trained": 5.373436896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.46805191121197076, |
| "grad_norm": 1.2817225456237793, |
| "loss": 1.1365, |
| "loss_ce": 1.159173607826233, |
| "loss_region": 0.030014952644705772, |
| "loss_total": 1.1891885995864868, |
| "lr": 0.001177992982447106, |
| "router/selected_tokens_s0": 4277.875, |
| "step": 1650, |
| "tokens_trained": 5.406202336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4708885894617403, |
| "grad_norm": 1.2652041912078857, |
| "loss": 1.1303, |
| "loss_ce": 1.1445159912109375, |
| "loss_region": 0.03000623546540737, |
| "loss_total": 1.1745222806930542, |
| "lr": 0.001177586090463255, |
| "router/selected_tokens_s0": 4274.375, |
| "step": 1660, |
| "tokens_trained": 5.438967776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4737252677115098, |
| "grad_norm": 1.7784186601638794, |
| "loss": 1.1334, |
| "loss_ce": 1.1069244146347046, |
| "loss_region": 0.030016450211405754, |
| "loss_total": 1.136940836906433, |
| "lr": 0.001177179198479404, |
| "router/selected_tokens_s0": 4287.625, |
| "step": 1670, |
| "tokens_trained": 5.471733216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4765619459612793, |
| "grad_norm": 1.0779353380203247, |
| "loss": 1.1315, |
| "loss_ce": 1.1237202882766724, |
| "loss_region": 0.029916411265730858, |
| "loss_total": 1.1536366939544678, |
| "lr": 0.0011767723064955529, |
| "router/selected_tokens_s0": 4156.75, |
| "step": 1680, |
| "tokens_trained": 5.504498656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.47939862421104884, |
| "grad_norm": 0.7689351439476013, |
| "loss": 1.1324, |
| "loss_ce": 1.0980726480484009, |
| "loss_region": 0.030096061527729034, |
| "loss_total": 1.1281687021255493, |
| "lr": 0.0011763654145117018, |
| "router/selected_tokens_s0": 4377.5, |
| "step": 1690, |
| "tokens_trained": 5.537264096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.48223530246081836, |
| "grad_norm": 0.6869276165962219, |
| "loss": 1.1332, |
| "loss_ce": 1.0792652368545532, |
| "loss_region": 0.030072998255491257, |
| "loss_total": 1.1093382835388184, |
| "lr": 0.0011759585225278508, |
| "router/selected_tokens_s0": 4349.625, |
| "step": 1700, |
| "tokens_trained": 5.570029536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4850719807105879, |
| "grad_norm": 0.9587815403938293, |
| "loss": 1.1361, |
| "loss_ce": 1.0378434658050537, |
| "loss_region": 0.03009817562997341, |
| "loss_total": 1.067941665649414, |
| "lr": 0.001175551630544, |
| "router/selected_tokens_s0": 4384.25, |
| "step": 1710, |
| "tokens_trained": 5.602794976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4879086589603574, |
| "grad_norm": 1.1542259454727173, |
| "loss": 1.1294, |
| "loss_ce": 1.074008584022522, |
| "loss_region": 0.030034121125936508, |
| "loss_total": 1.104042649269104, |
| "lr": 0.001175144738560149, |
| "router/selected_tokens_s0": 4306.0, |
| "step": 1720, |
| "tokens_trained": 5.635560416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4907453372101269, |
| "grad_norm": 1.0194206237792969, |
| "loss": 1.1296, |
| "loss_ce": 1.1548231840133667, |
| "loss_region": 0.03011094592511654, |
| "loss_total": 1.184934139251709, |
| "lr": 0.001174737846576298, |
| "router/selected_tokens_s0": 4395.625, |
| "step": 1730, |
| "tokens_trained": 5.668325856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49358201545989644, |
| "grad_norm": 1.108144998550415, |
| "loss": 1.1351, |
| "loss_ce": 1.0953419208526611, |
| "loss_region": 0.03002314455807209, |
| "loss_total": 1.1253650188446045, |
| "lr": 0.0011743309545924469, |
| "router/selected_tokens_s0": 4292.125, |
| "step": 1740, |
| "tokens_trained": 5.701091296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.49641869370966596, |
| "grad_norm": 1.075562834739685, |
| "loss": 1.1347, |
| "loss_ce": 1.1154391765594482, |
| "loss_region": 0.029949212446808815, |
| "loss_total": 1.1453883647918701, |
| "lr": 0.0011739240626085956, |
| "router/selected_tokens_s0": 4188.625, |
| "step": 1750, |
| "tokens_trained": 5.733856736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.4992553719594355, |
| "grad_norm": 1.3173739910125732, |
| "loss": 1.1325, |
| "loss_ce": 1.0855435132980347, |
| "loss_region": 0.02994917891919613, |
| "loss_total": 1.1154927015304565, |
| "lr": 0.0011735171706247446, |
| "router/selected_tokens_s0": 4183.625, |
| "step": 1760, |
| "tokens_trained": 5.766622176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.502092050209205, |
| "grad_norm": 0.8734815716743469, |
| "loss": 1.1316, |
| "loss_ce": 1.190360188484192, |
| "loss_region": 0.03002040646970272, |
| "loss_total": 1.2203805446624756, |
| "lr": 0.0011731102786408936, |
| "router/selected_tokens_s0": 4294.5, |
| "step": 1770, |
| "tokens_trained": 5.799387616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5049287284589745, |
| "grad_norm": 2.5296459197998047, |
| "loss": 1.1361, |
| "loss_ce": 0.9863566756248474, |
| "loss_region": 0.02998475357890129, |
| "loss_total": 1.0163414478302002, |
| "lr": 0.0011727033866570427, |
| "router/selected_tokens_s0": 4235.875, |
| "step": 1780, |
| "tokens_trained": 5.832153056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.507765406708744, |
| "grad_norm": 0.7834669947624207, |
| "loss": 1.1297, |
| "loss_ce": 0.9555173516273499, |
| "loss_region": 0.0301660243421793, |
| "loss_total": 0.9856833815574646, |
| "lr": 0.0011722964946731917, |
| "router/selected_tokens_s0": 4416.375, |
| "step": 1790, |
| "tokens_trained": 5.864918496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5106020849585136, |
| "grad_norm": 0.9466329216957092, |
| "loss": 1.1295, |
| "loss_ce": 1.0096023082733154, |
| "loss_region": 0.030076030641794205, |
| "loss_total": 1.0396783351898193, |
| "lr": 0.0011718896026893407, |
| "router/selected_tokens_s0": 4354.625, |
| "step": 1800, |
| "tokens_trained": 5.897683936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5134387632082831, |
| "grad_norm": 1.151943325996399, |
| "loss": 1.1267, |
| "loss_ce": 1.0721287727355957, |
| "loss_region": 0.029984835535287857, |
| "loss_total": 1.1021136045455933, |
| "lr": 0.0011714827107054896, |
| "router/selected_tokens_s0": 4239.75, |
| "step": 1810, |
| "tokens_trained": 5.930449376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5162754414580526, |
| "grad_norm": 0.5502280592918396, |
| "loss": 1.1249, |
| "loss_ce": 1.0287433862686157, |
| "loss_region": 0.029946208000183105, |
| "loss_total": 1.0586895942687988, |
| "lr": 0.0011710758187216386, |
| "router/selected_tokens_s0": 4179.375, |
| "step": 1820, |
| "tokens_trained": 5.96321104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5191121197078221, |
| "grad_norm": 1.5447858572006226, |
| "loss": 1.1319, |
| "loss_ce": 1.1280238628387451, |
| "loss_region": 0.030087478458881378, |
| "loss_total": 1.158111333847046, |
| "lr": 0.0011706689267377876, |
| "router/selected_tokens_s0": 4389.75, |
| "step": 1830, |
| "tokens_trained": 5.99597648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5219487979575916, |
| "grad_norm": 0.9524003863334656, |
| "loss": 1.1274, |
| "loss_ce": 1.0977569818496704, |
| "loss_region": 0.030062809586524963, |
| "loss_total": 1.1278197765350342, |
| "lr": 0.0011702620347539365, |
| "router/selected_tokens_s0": 4354.0, |
| "step": 1840, |
| "tokens_trained": 6.028741744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5247854762073612, |
| "grad_norm": 0.6106662750244141, |
| "loss": 1.1264, |
| "loss_ce": 1.06783926486969, |
| "loss_region": 0.029942721128463745, |
| "loss_total": 1.097782015800476, |
| "lr": 0.0011698551427700855, |
| "router/selected_tokens_s0": 4162.625, |
| "step": 1850, |
| "tokens_trained": 6.061507184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5276221544571307, |
| "grad_norm": 1.2853341102600098, |
| "loss": 1.1329, |
| "loss_ce": 1.0429413318634033, |
| "loss_region": 0.02999758906662464, |
| "loss_total": 1.0729389190673828, |
| "lr": 0.0011694482507862345, |
| "router/selected_tokens_s0": 4247.5, |
| "step": 1860, |
| "tokens_trained": 6.094268624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5304588327069002, |
| "grad_norm": 2.993485927581787, |
| "loss": 1.1236, |
| "loss_ce": 1.0583568811416626, |
| "loss_region": 0.030023684725165367, |
| "loss_total": 1.0883805751800537, |
| "lr": 0.0011690413588023834, |
| "router/selected_tokens_s0": 4302.0, |
| "step": 1870, |
| "tokens_trained": 6.127034064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5332955109566697, |
| "grad_norm": 0.7363700866699219, |
| "loss": 1.1308, |
| "loss_ce": 1.1353397369384766, |
| "loss_region": 0.029933562502264977, |
| "loss_total": 1.1652733087539673, |
| "lr": 0.0011686344668185324, |
| "router/selected_tokens_s0": 4149.375, |
| "step": 1880, |
| "tokens_trained": 6.159799504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5361321892064392, |
| "grad_norm": 0.8693296313285828, |
| "loss": 1.1274, |
| "loss_ce": 1.0827381610870361, |
| "loss_region": 0.030024589970707893, |
| "loss_total": 1.1127628087997437, |
| "lr": 0.0011682275748346814, |
| "router/selected_tokens_s0": 4302.25, |
| "step": 1890, |
| "tokens_trained": 6.192561072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5389688674562088, |
| "grad_norm": 0.4028984606266022, |
| "loss": 1.1162, |
| "loss_ce": 1.1056593656539917, |
| "loss_region": 0.030071411281824112, |
| "loss_total": 1.1357307434082031, |
| "lr": 0.0011678206828508303, |
| "router/selected_tokens_s0": 4372.625, |
| "step": 1900, |
| "tokens_trained": 6.225326512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5418055457059783, |
| "grad_norm": 1.1904973983764648, |
| "loss": 1.1294, |
| "loss_ce": 1.0976545810699463, |
| "loss_region": 0.030053725466132164, |
| "loss_total": 1.1277083158493042, |
| "lr": 0.0011674137908669793, |
| "router/selected_tokens_s0": 4348.125, |
| "step": 1910, |
| "tokens_trained": 6.258091952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5446422239557478, |
| "grad_norm": 1.018221378326416, |
| "loss": 1.1277, |
| "loss_ce": 1.1479384899139404, |
| "loss_region": 0.030054787173867226, |
| "loss_total": 1.1779932975769043, |
| "lr": 0.0011670068988831283, |
| "router/selected_tokens_s0": 4353.25, |
| "step": 1920, |
| "tokens_trained": 6.290857392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5474789022055173, |
| "grad_norm": 0.4506734013557434, |
| "loss": 1.1235, |
| "loss_ce": 1.1137655973434448, |
| "loss_region": 0.03005811758339405, |
| "loss_total": 1.1438237428665161, |
| "lr": 0.0011666000068992772, |
| "router/selected_tokens_s0": 4341.5, |
| "step": 1930, |
| "tokens_trained": 6.323622832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5503155804552868, |
| "grad_norm": 1.5671348571777344, |
| "loss": 1.1318, |
| "loss_ce": 1.1652703285217285, |
| "loss_region": 0.030141720548272133, |
| "loss_total": 1.195412039756775, |
| "lr": 0.0011661931149154262, |
| "router/selected_tokens_s0": 4458.125, |
| "step": 1940, |
| "tokens_trained": 6.356388272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5531522587050564, |
| "grad_norm": 1.2511063814163208, |
| "loss": 1.1246, |
| "loss_ce": 1.2078148126602173, |
| "loss_region": 0.03000708669424057, |
| "loss_total": 1.2378219366073608, |
| "lr": 0.0011657862229315751, |
| "router/selected_tokens_s0": 4275.625, |
| "step": 1950, |
| "tokens_trained": 6.389153712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5559889369548259, |
| "grad_norm": 1.1278033256530762, |
| "loss": 1.1253, |
| "loss_ce": 1.1528972387313843, |
| "loss_region": 0.029990505427122116, |
| "loss_total": 1.1828877925872803, |
| "lr": 0.0011653793309477243, |
| "router/selected_tokens_s0": 4247.625, |
| "step": 1960, |
| "tokens_trained": 6.421919152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5588256152045954, |
| "grad_norm": 0.7347070574760437, |
| "loss": 1.1292, |
| "loss_ce": 1.1609221696853638, |
| "loss_region": 0.03007410652935505, |
| "loss_total": 1.1909962892532349, |
| "lr": 0.0011649724389638733, |
| "router/selected_tokens_s0": 4377.0, |
| "step": 1970, |
| "tokens_trained": 6.454684592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5616622934543649, |
| "grad_norm": 0.8754347562789917, |
| "loss": 1.1321, |
| "loss_ce": 1.1314905881881714, |
| "loss_region": 0.030018918216228485, |
| "loss_total": 1.1615095138549805, |
| "lr": 0.0011645655469800223, |
| "router/selected_tokens_s0": 4292.375, |
| "step": 1980, |
| "tokens_trained": 6.487450032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5644989717041344, |
| "grad_norm": 1.4375395774841309, |
| "loss": 1.1251, |
| "loss_ce": 1.15834641456604, |
| "loss_region": 0.030011450871825218, |
| "loss_total": 1.1883578300476074, |
| "lr": 0.0011641586549961712, |
| "router/selected_tokens_s0": 4281.875, |
| "step": 1990, |
| "tokens_trained": 6.520215472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.567335649953904, |
| "grad_norm": 1.3012388944625854, |
| "loss": 1.1244, |
| "loss_ce": 1.1547801494598389, |
| "loss_region": 0.03002019412815571, |
| "loss_total": 1.184800386428833, |
| "lr": 0.00116375176301232, |
| "router/selected_tokens_s0": 4298.375, |
| "step": 2000, |
| "tokens_trained": 6.552980912 |
| }, |
| { |
| "epoch": 0.567335649953904, |
| "eval_ppl": 2.997264738752139, |
| "eval_runtime": 2.4974, |
| "step": 2000, |
| "tokens_trained": 6.552980912 |
| }, |
| { |
| "epoch": 0.567335649953904, |
| "eval_F": 0.33877094677913017, |
| "eval_F_cds": 0.3354545528054273, |
| "eval_F_dig": 0.3349740865171758, |
| "eval_F_exon": 0.33771546252151097, |
| "eval_F_intron": 0.3394511609404705, |
| "eval_F_nig": 0.33961248247030124, |
| "eval_F_promoter": 0.33587224314868064, |
| "eval_F_utr": 0.3390466904438115, |
| "eval_G": 0.3927095408069945, |
| "eval_G_cds": 0.38760326352277413, |
| "eval_G_dig": 0.38993240031773313, |
| "eval_G_exon": 0.3922000848097159, |
| "eval_G_intron": 0.39271919880055167, |
| "eval_G_nig": 0.3935918508753731, |
| "eval_G_promoter": 0.3926971556782782, |
| "eval_G_utr": 0.3912176578977754, |
| "eval_avg_bp_per_token": 2.9518469913300267, |
| "eval_bp_per_token/cds": 2.981029744974208, |
| "eval_bp_per_token/dig": 2.9853055512361997, |
| "eval_bp_per_token/exon": 2.9610725920975693, |
| "eval_bp_per_token/intron": 2.9459318896698954, |
| "eval_bp_per_token/nig": 2.9445325234400035, |
| "eval_bp_per_token/promoter": 2.977322539741189, |
| "eval_bp_per_token/utr": 2.9494462803662875, |
| "eval_ppl_cds": 3.6941119312579422, |
| "eval_ppl_dig": 1.1218375588220217, |
| "eval_ppl_exon": 3.4074634485917565, |
| "eval_ppl_intron": 3.014504389955456, |
| "eval_ppl_nig": 2.843623870937302, |
| "eval_ppl_promoter": 3.3305259507076883, |
| "eval_ppl_utr": 3.322006494837333, |
| "step": 2000, |
| "tokens_trained": 6.552980912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5701723282036735, |
| "grad_norm": 1.7854270935058594, |
| "loss": 1.1275, |
| "loss_ce": 1.1118180751800537, |
| "loss_region": 0.030034875497221947, |
| "loss_total": 1.1418529748916626, |
| "lr": 0.001163344871028469, |
| "router/selected_tokens_s0": 4323.625, |
| "step": 2010, |
| "tokens_trained": 6.585746352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.573009006453443, |
| "grad_norm": 1.2741203308105469, |
| "loss": 1.1297, |
| "loss_ce": 1.1596630811691284, |
| "loss_region": 0.030020276084542274, |
| "loss_total": 1.1896833181381226, |
| "lr": 0.001162937979044618, |
| "router/selected_tokens_s0": 4296.625, |
| "step": 2020, |
| "tokens_trained": 6.618511792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5758456847032125, |
| "grad_norm": 1.3113727569580078, |
| "loss": 1.1274, |
| "loss_ce": 1.130359411239624, |
| "loss_region": 0.030052313581109047, |
| "loss_total": 1.1604117155075073, |
| "lr": 0.001162531087060767, |
| "router/selected_tokens_s0": 4347.25, |
| "step": 2030, |
| "tokens_trained": 6.651277232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.578682362952982, |
| "grad_norm": 1.585740089416504, |
| "loss": 1.1242, |
| "loss_ce": 1.113228440284729, |
| "loss_region": 0.029946262016892433, |
| "loss_total": 1.143174648284912, |
| "lr": 0.001162124195076916, |
| "router/selected_tokens_s0": 4151.5, |
| "step": 2040, |
| "tokens_trained": 6.684041872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5815190412027516, |
| "grad_norm": 1.4227651357650757, |
| "loss": 1.1227, |
| "loss_ce": 1.1707289218902588, |
| "loss_region": 0.03000037930905819, |
| "loss_total": 1.200729250907898, |
| "lr": 0.001161717303093065, |
| "router/selected_tokens_s0": 4264.25, |
| "step": 2050, |
| "tokens_trained": 6.716806512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5843557194525211, |
| "grad_norm": 1.4349584579467773, |
| "loss": 1.126, |
| "loss_ce": 1.123897910118103, |
| "loss_region": 0.029999535530805588, |
| "loss_total": 1.1538974046707153, |
| "lr": 0.001161310411109214, |
| "router/selected_tokens_s0": 4258.5, |
| "step": 2060, |
| "tokens_trained": 6.749571952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5871923977022906, |
| "grad_norm": 1.525637149810791, |
| "loss": 1.1223, |
| "loss_ce": 1.0622094869613647, |
| "loss_region": 0.03016025200486183, |
| "loss_total": 1.092369794845581, |
| "lr": 0.001160903519125363, |
| "router/selected_tokens_s0": 4409.25, |
| "step": 2070, |
| "tokens_trained": 6.782337392 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5900290759520601, |
| "grad_norm": 0.31481412053108215, |
| "loss": 1.1308, |
| "loss_ce": 1.1158243417739868, |
| "loss_region": 0.030056282877922058, |
| "loss_total": 1.1458805799484253, |
| "lr": 0.001160496627141512, |
| "router/selected_tokens_s0": 4358.875, |
| "step": 2080, |
| "tokens_trained": 6.815102832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5928657542018296, |
| "grad_norm": 1.4279309511184692, |
| "loss": 1.1212, |
| "loss_ce": 1.1024186611175537, |
| "loss_region": 0.03000911884009838, |
| "loss_total": 1.1324278116226196, |
| "lr": 0.0011600897351576609, |
| "router/selected_tokens_s0": 4277.25, |
| "step": 2090, |
| "tokens_trained": 6.847868272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5957024324515992, |
| "grad_norm": 1.3502033948898315, |
| "loss": 1.1243, |
| "loss_ce": 1.215091347694397, |
| "loss_region": 0.03004975989460945, |
| "loss_total": 1.2451411485671997, |
| "lr": 0.0011596828431738098, |
| "router/selected_tokens_s0": 4345.25, |
| "step": 2100, |
| "tokens_trained": 6.880633712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.5985391107013687, |
| "grad_norm": 0.30469629168510437, |
| "loss": 1.1227, |
| "loss_ce": 1.0989904403686523, |
| "loss_region": 0.03004642389714718, |
| "loss_total": 1.1290369033813477, |
| "lr": 0.0011592759511899588, |
| "router/selected_tokens_s0": 4339.125, |
| "step": 2110, |
| "tokens_trained": 6.913397016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6013757889511382, |
| "grad_norm": 3.0106451511383057, |
| "loss": 1.1271, |
| "loss_ce": 1.0580655336380005, |
| "loss_region": 0.03005184419453144, |
| "loss_total": 1.0881173610687256, |
| "lr": 0.0011588690592061078, |
| "router/selected_tokens_s0": 4347.75, |
| "step": 2120, |
| "tokens_trained": 6.946162296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6042124672009077, |
| "grad_norm": 1.4084529876708984, |
| "loss": 1.1261, |
| "loss_ce": 0.9337919354438782, |
| "loss_region": 0.029956450685858727, |
| "loss_total": 0.9637483954429626, |
| "lr": 0.0011584621672222567, |
| "router/selected_tokens_s0": 4181.25, |
| "step": 2130, |
| "tokens_trained": 6.978927736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6070491454506772, |
| "grad_norm": 0.7794283032417297, |
| "loss": 1.1287, |
| "loss_ce": 1.0321320295333862, |
| "loss_region": 0.030011894181370735, |
| "loss_total": 1.0621439218521118, |
| "lr": 0.0011580552752384057, |
| "router/selected_tokens_s0": 4285.875, |
| "step": 2140, |
| "tokens_trained": 7.011693176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6098858237004467, |
| "grad_norm": 0.7242727279663086, |
| "loss": 1.1314, |
| "loss_ce": 1.1077067852020264, |
| "loss_region": 0.030075622722506523, |
| "loss_total": 1.1377824544906616, |
| "lr": 0.0011576483832545547, |
| "router/selected_tokens_s0": 4383.25, |
| "step": 2150, |
| "tokens_trained": 7.044458616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6127225019502163, |
| "grad_norm": 0.8703320622444153, |
| "loss": 1.1255, |
| "loss_ce": 1.042706847190857, |
| "loss_region": 0.030024481937289238, |
| "loss_total": 1.072731375694275, |
| "lr": 0.0011572414912707036, |
| "router/selected_tokens_s0": 4306.0, |
| "step": 2160, |
| "tokens_trained": 7.077224056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6155591801999858, |
| "grad_norm": 2.464707374572754, |
| "loss": 1.12, |
| "loss_ce": 1.0845450162887573, |
| "loss_region": 0.029988931491971016, |
| "loss_total": 1.1145339012145996, |
| "lr": 0.0011568345992868526, |
| "router/selected_tokens_s0": 4238.875, |
| "step": 2170, |
| "tokens_trained": 7.109989496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6183958584497553, |
| "grad_norm": 2.0766637325286865, |
| "loss": 1.1266, |
| "loss_ce": 1.1240020990371704, |
| "loss_region": 0.030013838782906532, |
| "loss_total": 1.1540158987045288, |
| "lr": 0.0011564277073030016, |
| "router/selected_tokens_s0": 4291.875, |
| "step": 2180, |
| "tokens_trained": 7.142754936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6212325366995248, |
| "grad_norm": 1.402709722518921, |
| "loss": 1.1265, |
| "loss_ce": 1.1370148658752441, |
| "loss_region": 0.03003770112991333, |
| "loss_total": 1.1670525074005127, |
| "lr": 0.0011560208153191505, |
| "router/selected_tokens_s0": 4328.625, |
| "step": 2190, |
| "tokens_trained": 7.175520376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6240692149492943, |
| "grad_norm": 0.7657859325408936, |
| "loss": 1.1259, |
| "loss_ce": 1.116765022277832, |
| "loss_region": 0.030005475506186485, |
| "loss_total": 1.1467704772949219, |
| "lr": 0.0011556139233352995, |
| "router/selected_tokens_s0": 4272.125, |
| "step": 2200, |
| "tokens_trained": 7.208285816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6269058931990639, |
| "grad_norm": 3.5244100093841553, |
| "loss": 1.1305, |
| "loss_ce": 1.1446946859359741, |
| "loss_region": 0.030087754130363464, |
| "loss_total": 1.174782395362854, |
| "lr": 0.0011552070313514487, |
| "router/selected_tokens_s0": 4414.25, |
| "step": 2210, |
| "tokens_trained": 7.241051256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6297425714488334, |
| "grad_norm": 0.599822998046875, |
| "loss": 1.1324, |
| "loss_ce": 1.0551592111587524, |
| "loss_region": 0.030122289434075356, |
| "loss_total": 1.085281491279602, |
| "lr": 0.0011548001393675976, |
| "router/selected_tokens_s0": 4453.875, |
| "step": 2220, |
| "tokens_trained": 7.273816696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6325792496986029, |
| "grad_norm": 2.314722776412964, |
| "loss": 1.1277, |
| "loss_ce": 1.1485532522201538, |
| "loss_region": 0.030024103820323944, |
| "loss_total": 1.1785773038864136, |
| "lr": 0.0011543932473837466, |
| "router/selected_tokens_s0": 4313.5, |
| "step": 2230, |
| "tokens_trained": 7.306582136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6354159279483724, |
| "grad_norm": 2.072960615158081, |
| "loss": 1.131, |
| "loss_ce": 1.0349353551864624, |
| "loss_region": 0.030028166249394417, |
| "loss_total": 1.0649635791778564, |
| "lr": 0.0011539863553998956, |
| "router/selected_tokens_s0": 4319.625, |
| "step": 2240, |
| "tokens_trained": 7.339347576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.638252606198142, |
| "grad_norm": 1.371410846710205, |
| "loss": 1.1226, |
| "loss_ce": 1.0738561153411865, |
| "loss_region": 0.030064314603805542, |
| "loss_total": 1.1039204597473145, |
| "lr": 0.0011535794634160443, |
| "router/selected_tokens_s0": 4378.375, |
| "step": 2250, |
| "tokens_trained": 7.372113016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6410892844479115, |
| "grad_norm": 3.474445343017578, |
| "loss": 1.1284, |
| "loss_ce": 1.0069116353988647, |
| "loss_region": 0.030036170035600662, |
| "loss_total": 1.0369478464126587, |
| "lr": 0.0011531725714321933, |
| "router/selected_tokens_s0": 4332.625, |
| "step": 2260, |
| "tokens_trained": 7.404878456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.643925962697681, |
| "grad_norm": 0.5796771049499512, |
| "loss": 1.1245, |
| "loss_ce": 1.138779640197754, |
| "loss_region": 0.030022747814655304, |
| "loss_total": 1.1688023805618286, |
| "lr": 0.0011527656794483422, |
| "router/selected_tokens_s0": 4308.875, |
| "step": 2270, |
| "tokens_trained": 7.437643896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6467626409474505, |
| "grad_norm": 1.155604362487793, |
| "loss": 1.1216, |
| "loss_ce": 0.9782689809799194, |
| "loss_region": 0.030030813068151474, |
| "loss_total": 1.0082998275756836, |
| "lr": 0.0011523587874644914, |
| "router/selected_tokens_s0": 4321.625, |
| "step": 2280, |
| "tokens_trained": 7.470409336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.64959931919722, |
| "grad_norm": 1.8259997367858887, |
| "loss": 1.1318, |
| "loss_ce": 1.055479884147644, |
| "loss_region": 0.030021535232663155, |
| "loss_total": 1.0855014324188232, |
| "lr": 0.0011519518954806404, |
| "router/selected_tokens_s0": 4307.375, |
| "step": 2290, |
| "tokens_trained": 7.503173472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6524359974469895, |
| "grad_norm": 1.2909961938858032, |
| "loss": 1.1216, |
| "loss_ce": 1.1016438007354736, |
| "loss_region": 0.030030114576220512, |
| "loss_total": 1.1316739320755005, |
| "lr": 0.0011515450034967894, |
| "router/selected_tokens_s0": 4321.625, |
| "step": 2300, |
| "tokens_trained": 7.535938912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6552726756967591, |
| "grad_norm": 3.855242967605591, |
| "loss": 1.1332, |
| "loss_ce": 1.1084688901901245, |
| "loss_region": 0.030001208186149597, |
| "loss_total": 1.1384700536727905, |
| "lr": 0.0011511381115129383, |
| "router/selected_tokens_s0": 4267.625, |
| "step": 2310, |
| "tokens_trained": 7.568704352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6581093539465286, |
| "grad_norm": 0.6401855945587158, |
| "loss": 1.1235, |
| "loss_ce": 1.068629503250122, |
| "loss_region": 0.030046915635466576, |
| "loss_total": 1.0986764430999756, |
| "lr": 0.0011507312195290873, |
| "router/selected_tokens_s0": 4353.5, |
| "step": 2320, |
| "tokens_trained": 7.601469792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6609460321962981, |
| "grad_norm": 2.758415460586548, |
| "loss": 1.1224, |
| "loss_ce": 1.1197397708892822, |
| "loss_region": 0.030033273622393608, |
| "loss_total": 1.1497730016708374, |
| "lr": 0.0011503243275452363, |
| "router/selected_tokens_s0": 4317.625, |
| "step": 2330, |
| "tokens_trained": 7.634233608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6637827104460676, |
| "grad_norm": 3.6356966495513916, |
| "loss": 1.1258, |
| "loss_ce": 1.192346453666687, |
| "loss_region": 0.030019540339708328, |
| "loss_total": 1.2223659753799438, |
| "lr": 0.0011499174355613852, |
| "router/selected_tokens_s0": 4307.0, |
| "step": 2340, |
| "tokens_trained": 7.666998248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6666193886958371, |
| "grad_norm": 0.5084363222122192, |
| "loss": 1.1211, |
| "loss_ce": 1.0241565704345703, |
| "loss_region": 0.030024418607354164, |
| "loss_total": 1.0541809797286987, |
| "lr": 0.0011495105435775342, |
| "router/selected_tokens_s0": 4311.75, |
| "step": 2350, |
| "tokens_trained": 7.699763688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6694560669456067, |
| "grad_norm": 2.6118147373199463, |
| "loss": 1.1205, |
| "loss_ce": 1.094053864479065, |
| "loss_region": 0.030054572969675064, |
| "loss_total": 1.1241084337234497, |
| "lr": 0.0011491036515936831, |
| "router/selected_tokens_s0": 4375.625, |
| "step": 2360, |
| "tokens_trained": 7.732529128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6722927451953762, |
| "grad_norm": 1.5716001987457275, |
| "loss": 1.1174, |
| "loss_ce": 1.0806825160980225, |
| "loss_region": 0.02999335154891014, |
| "loss_total": 1.1106758117675781, |
| "lr": 0.0011486967596098321, |
| "router/selected_tokens_s0": 4245.125, |
| "step": 2370, |
| "tokens_trained": 7.765294568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6751294234451457, |
| "grad_norm": 1.6855603456497192, |
| "loss": 1.1248, |
| "loss_ce": 1.0957375764846802, |
| "loss_region": 0.030019070953130722, |
| "loss_total": 1.1257566213607788, |
| "lr": 0.001148289867625981, |
| "router/selected_tokens_s0": 4306.25, |
| "step": 2380, |
| "tokens_trained": 7.798060008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6779661016949152, |
| "grad_norm": 1.7085551023483276, |
| "loss": 1.1219, |
| "loss_ce": 1.0849840641021729, |
| "loss_region": 0.029990842565894127, |
| "loss_total": 1.114974856376648, |
| "lr": 0.00114788297564213, |
| "router/selected_tokens_s0": 4250.875, |
| "step": 2390, |
| "tokens_trained": 7.830825448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6808027799446847, |
| "grad_norm": 2.7529702186584473, |
| "loss": 1.1278, |
| "loss_ce": 1.1395268440246582, |
| "loss_region": 0.030015477910637856, |
| "loss_total": 1.1695423126220703, |
| "lr": 0.001147476083658279, |
| "router/selected_tokens_s0": 4305.125, |
| "step": 2400, |
| "tokens_trained": 7.863590888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6836394581944543, |
| "grad_norm": 1.855435848236084, |
| "loss": 1.1225, |
| "loss_ce": 1.055867075920105, |
| "loss_region": 0.030039696022868156, |
| "loss_total": 1.085906744003296, |
| "lr": 0.001147069191674428, |
| "router/selected_tokens_s0": 4357.375, |
| "step": 2410, |
| "tokens_trained": 7.896356328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6864761364442238, |
| "grad_norm": 1.9066152572631836, |
| "loss": 1.1243, |
| "loss_ce": 0.9804560542106628, |
| "loss_region": 0.03004065528512001, |
| "loss_total": 1.010496735572815, |
| "lr": 0.001146662299690577, |
| "router/selected_tokens_s0": 4339.375, |
| "step": 2420, |
| "tokens_trained": 7.929121768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6893128146939933, |
| "grad_norm": 1.6631957292556763, |
| "loss": 1.1181, |
| "loss_ce": 1.1269235610961914, |
| "loss_region": 0.030016232281923294, |
| "loss_total": 1.1569397449493408, |
| "lr": 0.001146255407706726, |
| "router/selected_tokens_s0": 4304.375, |
| "step": 2430, |
| "tokens_trained": 7.961887208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6921494929437628, |
| "grad_norm": 1.932186245918274, |
| "loss": 1.1318, |
| "loss_ce": 1.1084073781967163, |
| "loss_region": 0.030037561431527138, |
| "loss_total": 1.1384449005126953, |
| "lr": 0.0011458485157228749, |
| "router/selected_tokens_s0": 4342.375, |
| "step": 2440, |
| "tokens_trained": 7.994651848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6949861711935323, |
| "grad_norm": 2.0729987621307373, |
| "loss": 1.1219, |
| "loss_ce": 1.0754549503326416, |
| "loss_region": 0.030010342597961426, |
| "loss_total": 1.105465292930603, |
| "lr": 0.0011454416237390238, |
| "router/selected_tokens_s0": 4284.25, |
| "step": 2450, |
| "tokens_trained": 8.027417288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.6978228494433019, |
| "grad_norm": 2.743365526199341, |
| "loss": 1.1183, |
| "loss_ce": 1.1507514715194702, |
| "loss_region": 0.030012760311365128, |
| "loss_total": 1.1807641983032227, |
| "lr": 0.001145034731755173, |
| "router/selected_tokens_s0": 4299.125, |
| "step": 2460, |
| "tokens_trained": 8.060182704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7006595276930714, |
| "grad_norm": 1.968074083328247, |
| "loss": 1.1248, |
| "loss_ce": 1.1554365158081055, |
| "loss_region": 0.03006228432059288, |
| "loss_total": 1.185498833656311, |
| "lr": 0.001144627839771322, |
| "router/selected_tokens_s0": 4397.5, |
| "step": 2470, |
| "tokens_trained": 8.092948144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7034962059428409, |
| "grad_norm": 0.6022619605064392, |
| "loss": 1.1233, |
| "loss_ce": 1.0739916563034058, |
| "loss_region": 0.030015716329216957, |
| "loss_total": 1.104007363319397, |
| "lr": 0.001144220947787471, |
| "router/selected_tokens_s0": 4304.25, |
| "step": 2480, |
| "tokens_trained": 8.125713584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7063328841926104, |
| "grad_norm": 2.9086802005767822, |
| "loss": 1.1155, |
| "loss_ce": 1.1227823495864868, |
| "loss_region": 0.030057305470108986, |
| "loss_total": 1.1528396606445312, |
| "lr": 0.00114381405580362, |
| "router/selected_tokens_s0": 4393.875, |
| "step": 2490, |
| "tokens_trained": 8.158479016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7091695624423799, |
| "grad_norm": 1.8187512159347534, |
| "loss": 1.1248, |
| "loss_ce": 1.0580413341522217, |
| "loss_region": 0.030032671988010406, |
| "loss_total": 1.088073968887329, |
| "lr": 0.0011434071638197687, |
| "router/selected_tokens_s0": 4340.0, |
| "step": 2500, |
| "tokens_trained": 8.191244456 |
| }, |
| { |
| "epoch": 0.7091695624423799, |
| "eval_ppl": 2.9815305929864326, |
| "eval_runtime": 2.4796, |
| "step": 2500, |
| "tokens_trained": 8.191244456 |
| }, |
| { |
| "epoch": 0.7091695624423799, |
| "eval_F": 0.34048558481131336, |
| "eval_F_cds": 0.3413653968998391, |
| "eval_F_dig": 0.3326561970987317, |
| "eval_F_exon": 0.34301915535870453, |
| "eval_F_intron": 0.3409895477582185, |
| "eval_F_nig": 0.34018024599300895, |
| "eval_F_promoter": 0.3386885010090298, |
| "eval_F_utr": 0.34306656745268094, |
| "eval_G": 0.37360140820500265, |
| "eval_G_cds": 0.37391617995023085, |
| "eval_G_dig": 0.39410936238508215, |
| "eval_G_exon": 0.37318875715857475, |
| "eval_G_intron": 0.3727733807645177, |
| "eval_G_nig": 0.3734594960312147, |
| "eval_G_promoter": 0.37594098275253596, |
| "eval_G_utr": 0.3722500326080449, |
| "eval_avg_bp_per_token": 2.9369819005822793, |
| "eval_bp_per_token/cds": 2.929412322050359, |
| "eval_bp_per_token/dig": 3.006106631175135, |
| "eval_bp_per_token/exon": 2.915289086273542, |
| "eval_bp_per_token/intron": 2.932641210190579, |
| "eval_bp_per_token/nig": 2.9396180753557073, |
| "eval_bp_per_token/promoter": 2.952565549231147, |
| "eval_bp_per_token/utr": 2.9148861908204733, |
| "eval_ppl_cds": 3.7211953918524787, |
| "eval_ppl_dig": 1.1071312956552213, |
| "eval_ppl_exon": 3.408594147596357, |
| "eval_ppl_intron": 2.996762231969892, |
| "eval_ppl_nig": 2.8097869859130795, |
| "eval_ppl_promoter": 3.341004188366384, |
| "eval_ppl_utr": 3.3285188682998834, |
| "step": 2500, |
| "tokens_trained": 8.191244456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7120062406921495, |
| "grad_norm": 1.3883668184280396, |
| "loss": 1.1168, |
| "loss_ce": 1.0345538854599, |
| "loss_region": 0.030011983588337898, |
| "loss_total": 1.064565896987915, |
| "lr": 0.0011430002718359176, |
| "router/selected_tokens_s0": 4293.25, |
| "step": 2510, |
| "tokens_trained": 8.224009896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.714842918941919, |
| "grad_norm": 0.5920007228851318, |
| "loss": 1.1128, |
| "loss_ce": 1.1446270942687988, |
| "loss_region": 0.030029037967324257, |
| "loss_total": 1.1746561527252197, |
| "lr": 0.0011425933798520666, |
| "router/selected_tokens_s0": 4338.125, |
| "step": 2520, |
| "tokens_trained": 8.256775336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7176795971916885, |
| "grad_norm": 2.293912410736084, |
| "loss": 1.119, |
| "loss_ce": 1.1278671026229858, |
| "loss_region": 0.030034860596060753, |
| "loss_total": 1.1579020023345947, |
| "lr": 0.0011421864878682158, |
| "router/selected_tokens_s0": 4356.25, |
| "step": 2530, |
| "tokens_trained": 8.289540776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.720516275441458, |
| "grad_norm": 1.4504122734069824, |
| "loss": 1.1161, |
| "loss_ce": 0.9545093774795532, |
| "loss_region": 0.030026227235794067, |
| "loss_total": 0.9845355749130249, |
| "lr": 0.0011417795958843647, |
| "router/selected_tokens_s0": 4322.375, |
| "step": 2540, |
| "tokens_trained": 8.322306216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7233529536912275, |
| "grad_norm": 1.777256727218628, |
| "loss": 1.1177, |
| "loss_ce": 1.0747570991516113, |
| "loss_region": 0.030009755864739418, |
| "loss_total": 1.104766845703125, |
| "lr": 0.0011413727039005137, |
| "router/selected_tokens_s0": 4293.375, |
| "step": 2550, |
| "tokens_trained": 8.355071656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7261896319409971, |
| "grad_norm": 1.637231707572937, |
| "loss": 1.1121, |
| "loss_ce": 1.1526259183883667, |
| "loss_region": 0.030018767341971397, |
| "loss_total": 1.1826447248458862, |
| "lr": 0.0011409658119166627, |
| "router/selected_tokens_s0": 4318.5, |
| "step": 2560, |
| "tokens_trained": 8.387835072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7290263101907666, |
| "grad_norm": 1.0746310949325562, |
| "loss": 1.1151, |
| "loss_ce": 1.1064670085906982, |
| "loss_region": 0.03001333586871624, |
| "loss_total": 1.1364803314208984, |
| "lr": 0.0011405589199328116, |
| "router/selected_tokens_s0": 4294.375, |
| "step": 2570, |
| "tokens_trained": 8.420600512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7318629884405361, |
| "grad_norm": 1.3798960447311401, |
| "loss": 1.1198, |
| "loss_ce": 1.073905110359192, |
| "loss_region": 0.030032221227884293, |
| "loss_total": 1.1039373874664307, |
| "lr": 0.0011401520279489606, |
| "router/selected_tokens_s0": 4356.375, |
| "step": 2580, |
| "tokens_trained": 8.453365928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7346996666903056, |
| "grad_norm": 1.8040990829467773, |
| "loss": 1.1175, |
| "loss_ce": 1.0255845785140991, |
| "loss_region": 0.03001689724624157, |
| "loss_total": 1.0556014776229858, |
| "lr": 0.0011397451359651096, |
| "router/selected_tokens_s0": 4312.5, |
| "step": 2590, |
| "tokens_trained": 8.486131368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7375363449400751, |
| "grad_norm": 2.420259952545166, |
| "loss": 1.1193, |
| "loss_ce": 1.0581092834472656, |
| "loss_region": 0.030017009004950523, |
| "loss_total": 1.088126301765442, |
| "lr": 0.0011393382439812585, |
| "router/selected_tokens_s0": 4316.25, |
| "step": 2600, |
| "tokens_trained": 8.518896808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7403730231898447, |
| "grad_norm": 2.068054437637329, |
| "loss": 1.1114, |
| "loss_ce": 1.0681673288345337, |
| "loss_region": 0.030040811747312546, |
| "loss_total": 1.0982081890106201, |
| "lr": 0.0011389313519974075, |
| "router/selected_tokens_s0": 4369.0, |
| "step": 2610, |
| "tokens_trained": 8.551662248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7432097014396142, |
| "grad_norm": 1.7490754127502441, |
| "loss": 1.1182, |
| "loss_ce": 1.0639960765838623, |
| "loss_region": 0.030034611001610756, |
| "loss_total": 1.094030737876892, |
| "lr": 0.0011385244600135565, |
| "router/selected_tokens_s0": 4363.5, |
| "step": 2620, |
| "tokens_trained": 8.584426888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7460463796893837, |
| "grad_norm": 1.4811182022094727, |
| "loss": 1.1131, |
| "loss_ce": 1.0907317399978638, |
| "loss_region": 0.03001326695084572, |
| "loss_total": 1.120745062828064, |
| "lr": 0.0011381175680297054, |
| "router/selected_tokens_s0": 4307.875, |
| "step": 2630, |
| "tokens_trained": 8.617192328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7488830579391532, |
| "grad_norm": 2.1497602462768555, |
| "loss": 1.1096, |
| "loss_ce": 1.123599886894226, |
| "loss_region": 0.030037013813853264, |
| "loss_total": 1.1536369323730469, |
| "lr": 0.0011377106760458544, |
| "router/selected_tokens_s0": 4368.5, |
| "step": 2640, |
| "tokens_trained": 8.649951656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7517197361889227, |
| "grad_norm": 2.179588556289673, |
| "loss": 1.1129, |
| "loss_ce": 0.9365400671958923, |
| "loss_region": 0.030036216601729393, |
| "loss_total": 0.9665762782096863, |
| "lr": 0.0011373037840620034, |
| "router/selected_tokens_s0": 4350.375, |
| "step": 2650, |
| "tokens_trained": 8.682717096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7545564144386923, |
| "grad_norm": 1.6021926403045654, |
| "loss": 1.1095, |
| "loss_ce": 1.1449388265609741, |
| "loss_region": 0.030037803575396538, |
| "loss_total": 1.1749765872955322, |
| "lr": 0.0011368968920781523, |
| "router/selected_tokens_s0": 4373.0, |
| "step": 2660, |
| "tokens_trained": 8.715482536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7573930926884618, |
| "grad_norm": 1.2494678497314453, |
| "loss": 1.1097, |
| "loss_ce": 1.0806819200515747, |
| "loss_region": 0.03000866435468197, |
| "loss_total": 1.1106905937194824, |
| "lr": 0.0011364900000943013, |
| "router/selected_tokens_s0": 4295.25, |
| "step": 2670, |
| "tokens_trained": 8.748247976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7602297709382313, |
| "grad_norm": 1.3196409940719604, |
| "loss": 1.1136, |
| "loss_ce": 1.069360375404358, |
| "loss_region": 0.030009115114808083, |
| "loss_total": 1.0993695259094238, |
| "lr": 0.0011360831081104503, |
| "router/selected_tokens_s0": 4291.75, |
| "step": 2680, |
| "tokens_trained": 8.781013416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7630664491880008, |
| "grad_norm": 2.674771308898926, |
| "loss": 1.1188, |
| "loss_ce": 1.1771190166473389, |
| "loss_region": 0.030019955709576607, |
| "loss_total": 1.207139015197754, |
| "lr": 0.0011356762161265992, |
| "router/selected_tokens_s0": 4330.125, |
| "step": 2690, |
| "tokens_trained": 8.813778696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7659031274377703, |
| "grad_norm": 1.6932164430618286, |
| "loss": 1.1031, |
| "loss_ce": 1.0857900381088257, |
| "loss_region": 0.0300260242074728, |
| "loss_total": 1.1158161163330078, |
| "lr": 0.0011352693241427482, |
| "router/selected_tokens_s0": 4347.5, |
| "step": 2700, |
| "tokens_trained": 8.846544136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7687398056875399, |
| "grad_norm": 1.5329583883285522, |
| "loss": 1.1098, |
| "loss_ce": 1.0980348587036133, |
| "loss_region": 0.030030502006411552, |
| "loss_total": 1.1280653476715088, |
| "lr": 0.0011348624321588974, |
| "router/selected_tokens_s0": 4366.375, |
| "step": 2710, |
| "tokens_trained": 8.879309576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7715764839373094, |
| "grad_norm": 1.829464077949524, |
| "loss": 1.1093, |
| "loss_ce": 1.1128755807876587, |
| "loss_region": 0.03000422567129135, |
| "loss_total": 1.142879843711853, |
| "lr": 0.0011344555401750463, |
| "router/selected_tokens_s0": 4282.0, |
| "step": 2720, |
| "tokens_trained": 8.912075016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7744131621870789, |
| "grad_norm": 2.8766870498657227, |
| "loss": 1.1187, |
| "loss_ce": 1.12075674533844, |
| "loss_region": 0.030019240453839302, |
| "loss_total": 1.1507760286331177, |
| "lr": 0.0011340486481911953, |
| "router/selected_tokens_s0": 4327.25, |
| "step": 2730, |
| "tokens_trained": 8.944840456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7772498404368484, |
| "grad_norm": 2.2969014644622803, |
| "loss": 1.1166, |
| "loss_ce": 1.0795077085494995, |
| "loss_region": 0.030028002336621284, |
| "loss_total": 1.1095356941223145, |
| "lr": 0.001133641756207344, |
| "router/selected_tokens_s0": 4352.75, |
| "step": 2740, |
| "tokens_trained": 8.977605896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7800865186866179, |
| "grad_norm": 1.7521798610687256, |
| "loss": 1.1139, |
| "loss_ce": 1.1274807453155518, |
| "loss_region": 0.030016858130693436, |
| "loss_total": 1.1574976444244385, |
| "lr": 0.001133234864223493, |
| "router/selected_tokens_s0": 4320.0, |
| "step": 2750, |
| "tokens_trained": 9.010371336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7829231969363875, |
| "grad_norm": 2.6245367527008057, |
| "loss": 1.1075, |
| "loss_ce": 1.1328058242797852, |
| "loss_region": 0.03003484010696411, |
| "loss_total": 1.1628406047821045, |
| "lr": 0.001132827972239642, |
| "router/selected_tokens_s0": 4367.625, |
| "step": 2760, |
| "tokens_trained": 9.043136776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.785759875186157, |
| "grad_norm": 1.162583351135254, |
| "loss": 1.1181, |
| "loss_ce": 1.151093602180481, |
| "loss_region": 0.030036624521017075, |
| "loss_total": 1.1811301708221436, |
| "lr": 0.001132421080255791, |
| "router/selected_tokens_s0": 4392.5, |
| "step": 2770, |
| "tokens_trained": 9.075902216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7885965534359265, |
| "grad_norm": 1.4981096982955933, |
| "loss": 1.1104, |
| "loss_ce": 1.0844680070877075, |
| "loss_region": 0.030015481635928154, |
| "loss_total": 1.1144834756851196, |
| "lr": 0.0011320141882719401, |
| "router/selected_tokens_s0": 4314.625, |
| "step": 2780, |
| "tokens_trained": 9.108667656 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.791433231685696, |
| "grad_norm": 1.8612878322601318, |
| "loss": 1.1073, |
| "loss_ce": 1.0089409351348877, |
| "loss_region": 0.029995379969477654, |
| "loss_total": 1.0389362573623657, |
| "lr": 0.001131607296288089, |
| "router/selected_tokens_s0": 4257.875, |
| "step": 2790, |
| "tokens_trained": 9.14143004 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7942699099354655, |
| "grad_norm": 0.6861640810966492, |
| "loss": 1.1058, |
| "loss_ce": 0.9385975003242493, |
| "loss_region": 0.029996687546372414, |
| "loss_total": 0.9685941934585571, |
| "lr": 0.001131200404304238, |
| "router/selected_tokens_s0": 4290.625, |
| "step": 2800, |
| "tokens_trained": 9.17419548 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.797106588185235, |
| "grad_norm": 2.205390214920044, |
| "loss": 1.108, |
| "loss_ce": 1.0670945644378662, |
| "loss_region": 0.030025651678442955, |
| "loss_total": 1.0971201658248901, |
| "lr": 0.001130793512320387, |
| "router/selected_tokens_s0": 4342.875, |
| "step": 2810, |
| "tokens_trained": 9.20696092 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.7999432664350046, |
| "grad_norm": 2.068150520324707, |
| "loss": 1.106, |
| "loss_ce": 1.0238165855407715, |
| "loss_region": 0.03002651408314705, |
| "loss_total": 1.0538431406021118, |
| "lr": 0.001130386620336536, |
| "router/selected_tokens_s0": 4359.75, |
| "step": 2820, |
| "tokens_trained": 9.23972636 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8027799446847741, |
| "grad_norm": 1.1060576438903809, |
| "loss": 1.1065, |
| "loss_ce": 1.0474674701690674, |
| "loss_region": 0.03000919334590435, |
| "loss_total": 1.0774766206741333, |
| "lr": 0.001129979728352685, |
| "router/selected_tokens_s0": 4301.75, |
| "step": 2830, |
| "tokens_trained": 9.2724918 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8056166229345436, |
| "grad_norm": 1.369165301322937, |
| "loss": 1.1081, |
| "loss_ce": 1.0370676517486572, |
| "loss_region": 0.030027758330106735, |
| "loss_total": 1.067095398902893, |
| "lr": 0.001129572836368834, |
| "router/selected_tokens_s0": 4375.625, |
| "step": 2840, |
| "tokens_trained": 9.30525692 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8084533011843131, |
| "grad_norm": 2.285675525665283, |
| "loss": 1.109, |
| "loss_ce": 1.0967503786087036, |
| "loss_region": 0.0300269927829504, |
| "loss_total": 1.1267774105072021, |
| "lr": 0.0011291659443849829, |
| "router/selected_tokens_s0": 4370.0, |
| "step": 2850, |
| "tokens_trained": 9.33802236 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8112899794340827, |
| "grad_norm": 0.8950642943382263, |
| "loss": 1.1015, |
| "loss_ce": 1.091797947883606, |
| "loss_region": 0.03003113530576229, |
| "loss_total": 1.1218290328979492, |
| "lr": 0.0011287590524011318, |
| "router/selected_tokens_s0": 4384.125, |
| "step": 2860, |
| "tokens_trained": 9.3707878 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8141266576838522, |
| "grad_norm": 2.1465282440185547, |
| "loss": 1.1012, |
| "loss_ce": 0.9929934144020081, |
| "loss_region": 0.030015377327799797, |
| "loss_total": 1.0230088233947754, |
| "lr": 0.0011283521604172808, |
| "router/selected_tokens_s0": 4333.0, |
| "step": 2870, |
| "tokens_trained": 9.403548272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8169633359336217, |
| "grad_norm": 2.1108782291412354, |
| "loss": 1.1029, |
| "loss_ce": 1.0729644298553467, |
| "loss_region": 0.03002534806728363, |
| "loss_total": 1.1029897928237915, |
| "lr": 0.0011279452684334298, |
| "router/selected_tokens_s0": 4357.25, |
| "step": 2880, |
| "tokens_trained": 9.436313712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8198000141833912, |
| "grad_norm": 1.7104750871658325, |
| "loss": 1.1041, |
| "loss_ce": 1.116651177406311, |
| "loss_region": 0.030016740784049034, |
| "loss_total": 1.1466679573059082, |
| "lr": 0.0011275383764495787, |
| "router/selected_tokens_s0": 4340.875, |
| "step": 2890, |
| "tokens_trained": 9.469079152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8226366924331607, |
| "grad_norm": 1.7549395561218262, |
| "loss": 1.1098, |
| "loss_ce": 0.977597713470459, |
| "loss_region": 0.030016236007213593, |
| "loss_total": 1.0076138973236084, |
| "lr": 0.0011271314844657277, |
| "router/selected_tokens_s0": 4328.875, |
| "step": 2900, |
| "tokens_trained": 9.50184356 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8254733706829303, |
| "grad_norm": 2.076667547225952, |
| "loss": 1.1041, |
| "loss_ce": 0.9882082343101501, |
| "loss_region": 0.03001856803894043, |
| "loss_total": 1.0182268619537354, |
| "lr": 0.0011267245924818767, |
| "router/selected_tokens_s0": 4341.75, |
| "step": 2910, |
| "tokens_trained": 9.534608992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8283100489326998, |
| "grad_norm": 1.930834412574768, |
| "loss": 1.1031, |
| "loss_ce": 1.1864138841629028, |
| "loss_region": 0.03002503328025341, |
| "loss_total": 1.216438889503479, |
| "lr": 0.0011263177004980256, |
| "router/selected_tokens_s0": 4379.125, |
| "step": 2920, |
| "tokens_trained": 9.567373632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8311467271824693, |
| "grad_norm": 0.7202333807945251, |
| "loss": 1.103, |
| "loss_ce": 1.0883651971817017, |
| "loss_region": 0.030032740905880928, |
| "loss_total": 1.1183979511260986, |
| "lr": 0.0011259108085141746, |
| "router/selected_tokens_s0": 4386.375, |
| "step": 2930, |
| "tokens_trained": 9.600139072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8339834054322388, |
| "grad_norm": 1.0626195669174194, |
| "loss": 1.1043, |
| "loss_ce": 1.0197147130966187, |
| "loss_region": 0.03001200221478939, |
| "loss_total": 1.0497267246246338, |
| "lr": 0.0011255039165303236, |
| "router/selected_tokens_s0": 4317.625, |
| "step": 2940, |
| "tokens_trained": 9.632904512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8368200836820083, |
| "grad_norm": 2.428861379623413, |
| "loss": 1.1036, |
| "loss_ce": 0.9022196531295776, |
| "loss_region": 0.030008511617779732, |
| "loss_total": 0.932228147983551, |
| "lr": 0.0011250970245464725, |
| "router/selected_tokens_s0": 4322.125, |
| "step": 2950, |
| "tokens_trained": 9.665669952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8396567619317779, |
| "grad_norm": 0.9146430492401123, |
| "loss": 1.1015, |
| "loss_ce": 1.1206673383712769, |
| "loss_region": 0.030019836500287056, |
| "loss_total": 1.1506872177124023, |
| "lr": 0.0011246901325626217, |
| "router/selected_tokens_s0": 4355.75, |
| "step": 2960, |
| "tokens_trained": 9.698432616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8424934401815474, |
| "grad_norm": 1.3183574676513672, |
| "loss": 1.0992, |
| "loss_ce": 1.088549256324768, |
| "loss_region": 0.030013682320713997, |
| "loss_total": 1.118562936782837, |
| "lr": 0.0011242832405787707, |
| "router/selected_tokens_s0": 4324.875, |
| "step": 2970, |
| "tokens_trained": 9.731196464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8453301184313169, |
| "grad_norm": 1.7237669229507446, |
| "loss": 1.1016, |
| "loss_ce": 1.1303554773330688, |
| "loss_region": 0.030019812285900116, |
| "loss_total": 1.1603752374649048, |
| "lr": 0.0011238763485949196, |
| "router/selected_tokens_s0": 4352.75, |
| "step": 2980, |
| "tokens_trained": 9.763955848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8481667966810864, |
| "grad_norm": 2.353868246078491, |
| "loss": 1.097, |
| "loss_ce": 1.1438567638397217, |
| "loss_region": 0.03001641482114792, |
| "loss_total": 1.1738731861114502, |
| "lr": 0.0011234694566110684, |
| "router/selected_tokens_s0": 4346.25, |
| "step": 2990, |
| "tokens_trained": 9.796721288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8510034749308559, |
| "grad_norm": 2.239737033843994, |
| "loss": 1.099, |
| "loss_ce": 1.137770414352417, |
| "loss_region": 0.03001844510436058, |
| "loss_total": 1.1677888631820679, |
| "lr": 0.0011230625646272174, |
| "router/selected_tokens_s0": 4345.625, |
| "step": 3000, |
| "tokens_trained": 9.829486728 |
| }, |
| { |
| "epoch": 0.8510034749308559, |
| "eval_ppl": 2.91798250107805, |
| "eval_runtime": 2.489, |
| "step": 3000, |
| "tokens_trained": 9.829486728 |
| }, |
| { |
| "epoch": 0.8510034749308559, |
| "eval_F": 0.34119725725854944, |
| "eval_F_cds": 0.339909922293828, |
| "eval_F_dig": 0.3374221944422741, |
| "eval_F_exon": 0.3444720286625102, |
| "eval_F_intron": 0.3423051363848719, |
| "eval_F_nig": 0.3420074982635899, |
| "eval_F_promoter": 0.33568609090152685, |
| "eval_F_utr": 0.3433317082766702, |
| "eval_G": 0.35626090599344656, |
| "eval_G_cds": 0.3533774528284723, |
| "eval_G_dig": 0.39929882420827145, |
| "eval_G_exon": 0.35481589922102014, |
| "eval_G_intron": 0.3559872186522367, |
| "eval_G_nig": 0.35704285773301014, |
| "eval_G_promoter": 0.354736183175574, |
| "eval_G_utr": 0.3543053844969594, |
| "eval_avg_bp_per_token": 2.930855916119598, |
| "eval_bp_per_token/cds": 2.9419558959963843, |
| "eval_bp_per_token/dig": 2.9636461870947826, |
| "eval_bp_per_token/exon": 2.9029933254166496, |
| "eval_bp_per_token/intron": 2.921370127720335, |
| "eval_bp_per_token/nig": 2.923912502144284, |
| "eval_bp_per_token/promoter": 2.9789735920078644, |
| "eval_bp_per_token/utr": 2.9126351452344177, |
| "eval_ppl_cds": 3.5636364626812047, |
| "eval_ppl_dig": 1.0968188962634289, |
| "eval_ppl_exon": 3.3285669782872387, |
| "eval_ppl_intron": 2.935885553210843, |
| "eval_ppl_nig": 2.7347129604188645, |
| "eval_ppl_promoter": 3.292230226733986, |
| "eval_ppl_utr": 3.2942767869833, |
| "step": 3000, |
| "tokens_trained": 9.829486728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8538401531806254, |
| "grad_norm": 1.0114418268203735, |
| "loss": 1.092, |
| "loss_ce": 1.0748389959335327, |
| "loss_region": 0.030020562931895256, |
| "loss_total": 1.1048595905303955, |
| "lr": 0.0011226556726433663, |
| "router/selected_tokens_s0": 4357.25, |
| "step": 3010, |
| "tokens_trained": 9.862252168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.856676831430395, |
| "grad_norm": 2.267549753189087, |
| "loss": 1.0971, |
| "loss_ce": 1.088890552520752, |
| "loss_region": 0.03001115657389164, |
| "loss_total": 1.1189017295837402, |
| "lr": 0.0011222487806595153, |
| "router/selected_tokens_s0": 4332.125, |
| "step": 3020, |
| "tokens_trained": 9.895016808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8595135096801645, |
| "grad_norm": 1.3197458982467651, |
| "loss": 1.0948, |
| "loss_ce": 0.9036920070648193, |
| "loss_region": 0.029985321685671806, |
| "loss_total": 0.9336773157119751, |
| "lr": 0.0011218418886756645, |
| "router/selected_tokens_s0": 4263.875, |
| "step": 3030, |
| "tokens_trained": 9.927782248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.862350187929934, |
| "grad_norm": 1.6852810382843018, |
| "loss": 1.1014, |
| "loss_ce": 1.1086361408233643, |
| "loss_region": 0.030025212094187737, |
| "loss_total": 1.1386613845825195, |
| "lr": 0.0011214349966918134, |
| "router/selected_tokens_s0": 4387.0, |
| "step": 3040, |
| "tokens_trained": 9.960547688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8651868661797035, |
| "grad_norm": 1.753929853439331, |
| "loss": 1.1021, |
| "loss_ce": 1.020461082458496, |
| "loss_region": 0.03000788949429989, |
| "loss_total": 1.050468921661377, |
| "lr": 0.0011210281047079624, |
| "router/selected_tokens_s0": 4309.625, |
| "step": 3050, |
| "tokens_trained": 9.993313128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.868023544429473, |
| "grad_norm": 1.005724310874939, |
| "loss": 1.1004, |
| "loss_ce": 1.0221396684646606, |
| "loss_region": 0.030008656904101372, |
| "loss_total": 1.0521483421325684, |
| "lr": 0.0011206212127241114, |
| "router/selected_tokens_s0": 4308.25, |
| "step": 3060, |
| "tokens_trained": 10.026078568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8708602226792426, |
| "grad_norm": 1.1538729667663574, |
| "loss": 1.0968, |
| "loss_ce": 1.045743465423584, |
| "loss_region": 0.03000555746257305, |
| "loss_total": 1.0757490396499634, |
| "lr": 0.0011202143207402603, |
| "router/selected_tokens_s0": 4306.375, |
| "step": 3070, |
| "tokens_trained": 10.058844008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8736969009290121, |
| "grad_norm": 1.8730782270431519, |
| "loss": 1.1067, |
| "loss_ce": 1.0756598711013794, |
| "loss_region": 0.03001215122640133, |
| "loss_total": 1.105672001838684, |
| "lr": 0.0011198074287564093, |
| "router/selected_tokens_s0": 4321.25, |
| "step": 3080, |
| "tokens_trained": 10.091604144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8765335791787816, |
| "grad_norm": 2.151102066040039, |
| "loss": 1.1026, |
| "loss_ce": 1.1392779350280762, |
| "loss_region": 0.030021771788597107, |
| "loss_total": 1.1692997217178345, |
| "lr": 0.0011194005367725583, |
| "router/selected_tokens_s0": 4390.625, |
| "step": 3090, |
| "tokens_trained": 10.124368784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8793702574285511, |
| "grad_norm": 2.1336331367492676, |
| "loss": 1.0989, |
| "loss_ce": 1.0690431594848633, |
| "loss_region": 0.030014000833034515, |
| "loss_total": 1.0990571975708008, |
| "lr": 0.0011189936447887072, |
| "router/selected_tokens_s0": 4350.0, |
| "step": 3100, |
| "tokens_trained": 10.157134224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8822069356783206, |
| "grad_norm": 2.0660271644592285, |
| "loss": 1.0952, |
| "loss_ce": 1.130868911743164, |
| "loss_region": 0.030013030394911766, |
| "loss_total": 1.1608819961547852, |
| "lr": 0.0011185867528048562, |
| "router/selected_tokens_s0": 4349.5, |
| "step": 3110, |
| "tokens_trained": 10.189899664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8850436139280902, |
| "grad_norm": 0.5485074520111084, |
| "loss": 1.0989, |
| "loss_ce": 1.0510705709457397, |
| "loss_region": 0.030014289543032646, |
| "loss_total": 1.0810848474502563, |
| "lr": 0.0011181798608210052, |
| "router/selected_tokens_s0": 4352.875, |
| "step": 3120, |
| "tokens_trained": 10.222665104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8878802921778597, |
| "grad_norm": 1.4256670475006104, |
| "loss": 1.098, |
| "loss_ce": 1.012781023979187, |
| "loss_region": 0.030008379369974136, |
| "loss_total": 1.0427894592285156, |
| "lr": 0.0011177729688371541, |
| "router/selected_tokens_s0": 4316.25, |
| "step": 3130, |
| "tokens_trained": 10.255429744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8907169704276292, |
| "grad_norm": 1.5442920923233032, |
| "loss": 1.0946, |
| "loss_ce": 0.9727160334587097, |
| "loss_region": 0.030018145218491554, |
| "loss_total": 1.0027341842651367, |
| "lr": 0.001117366076853303, |
| "router/selected_tokens_s0": 4410.125, |
| "step": 3140, |
| "tokens_trained": 10.288194384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8935536486773987, |
| "grad_norm": 1.4367228746414185, |
| "loss": 1.0967, |
| "loss_ce": 1.1256974935531616, |
| "loss_region": 0.03001110814511776, |
| "loss_total": 1.1557085514068604, |
| "lr": 0.001116959184869452, |
| "router/selected_tokens_s0": 4353.0, |
| "step": 3150, |
| "tokens_trained": 10.320959664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8963903269271682, |
| "grad_norm": 1.1446796655654907, |
| "loss": 1.0908, |
| "loss_ce": 1.0701184272766113, |
| "loss_region": 0.03002384677529335, |
| "loss_total": 1.100142240524292, |
| "lr": 0.001116552292885601, |
| "router/selected_tokens_s0": 4393.625, |
| "step": 3160, |
| "tokens_trained": 10.353725104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.8992270051769378, |
| "grad_norm": 1.2145036458969116, |
| "loss": 1.0973, |
| "loss_ce": 1.064907193183899, |
| "loss_region": 0.030015455558896065, |
| "loss_total": 1.094922661781311, |
| "lr": 0.00111614540090175, |
| "router/selected_tokens_s0": 4343.5, |
| "step": 3170, |
| "tokens_trained": 10.386490544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9020636834267073, |
| "grad_norm": 1.4071613550186157, |
| "loss": 1.1022, |
| "loss_ce": 1.0602645874023438, |
| "loss_region": 0.030010098591446877, |
| "loss_total": 1.090274691581726, |
| "lr": 0.001115738508917899, |
| "router/selected_tokens_s0": 4317.875, |
| "step": 3180, |
| "tokens_trained": 10.419255984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9049003616764768, |
| "grad_norm": 1.6725516319274902, |
| "loss": 1.0985, |
| "loss_ce": 1.0676279067993164, |
| "loss_region": 0.030014997348189354, |
| "loss_total": 1.0976428985595703, |
| "lr": 0.001115331616934048, |
| "router/selected_tokens_s0": 4344.375, |
| "step": 3190, |
| "tokens_trained": 10.452021424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9077370399262463, |
| "grad_norm": 1.2930175065994263, |
| "loss": 1.0925, |
| "loss_ce": 1.0800068378448486, |
| "loss_region": 0.030022740364074707, |
| "loss_total": 1.1100295782089233, |
| "lr": 0.0011149247249501969, |
| "router/selected_tokens_s0": 4410.0, |
| "step": 3200, |
| "tokens_trained": 10.484786064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9105737181760158, |
| "grad_norm": 1.8128279447555542, |
| "loss": 1.0904, |
| "loss_ce": 0.978069007396698, |
| "loss_region": 0.03001835197210312, |
| "loss_total": 1.008087396621704, |
| "lr": 0.001114517832966346, |
| "router/selected_tokens_s0": 4359.875, |
| "step": 3210, |
| "tokens_trained": 10.517551504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9134103964257854, |
| "grad_norm": 2.7744452953338623, |
| "loss": 1.0922, |
| "loss_ce": 1.0043200254440308, |
| "loss_region": 0.030023187398910522, |
| "loss_total": 1.0343432426452637, |
| "lr": 0.001114110940982495, |
| "router/selected_tokens_s0": 4378.0, |
| "step": 3220, |
| "tokens_trained": 10.550316944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9162470746755549, |
| "grad_norm": 1.3683607578277588, |
| "loss": 1.1049, |
| "loss_ce": 1.1094452142715454, |
| "loss_region": 0.03001292422413826, |
| "loss_total": 1.139458179473877, |
| "lr": 0.001113704048998644, |
| "router/selected_tokens_s0": 4363.5, |
| "step": 3230, |
| "tokens_trained": 10.583082384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9190837529253244, |
| "grad_norm": 0.2690750062465668, |
| "loss": 1.0941, |
| "loss_ce": 1.1386804580688477, |
| "loss_region": 0.030020570382475853, |
| "loss_total": 1.1687010526657104, |
| "lr": 0.0011132971570147927, |
| "router/selected_tokens_s0": 4375.5, |
| "step": 3240, |
| "tokens_trained": 10.615847824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9219204311750939, |
| "grad_norm": 3.4670774936676025, |
| "loss": 1.1104, |
| "loss_ce": 1.0814176797866821, |
| "loss_region": 0.030020495876669884, |
| "loss_total": 1.1114381551742554, |
| "lr": 0.0011128902650309417, |
| "router/selected_tokens_s0": 4377.625, |
| "step": 3250, |
| "tokens_trained": 10.648613248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9247571094248634, |
| "grad_norm": 0.8661336302757263, |
| "loss": 1.0912, |
| "loss_ce": 0.8863070011138916, |
| "loss_region": 0.030015867203474045, |
| "loss_total": 0.9163228869438171, |
| "lr": 0.0011124833730470907, |
| "router/selected_tokens_s0": 4347.25, |
| "step": 3260, |
| "tokens_trained": 10.681378688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.927593787674633, |
| "grad_norm": 1.5195131301879883, |
| "loss": 1.0884, |
| "loss_ce": 0.9159345626831055, |
| "loss_region": 0.03002041205763817, |
| "loss_total": 0.9459549784660339, |
| "lr": 0.0011120764810632396, |
| "router/selected_tokens_s0": 4345.625, |
| "step": 3270, |
| "tokens_trained": 10.714144128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9304304659244025, |
| "grad_norm": 1.2169824838638306, |
| "loss": 1.0911, |
| "loss_ce": 1.0144977569580078, |
| "loss_region": 0.03001958690583706, |
| "loss_total": 1.0445173978805542, |
| "lr": 0.0011116695890793888, |
| "router/selected_tokens_s0": 4394.125, |
| "step": 3280, |
| "tokens_trained": 10.746909568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.933267144174172, |
| "grad_norm": 0.5433168411254883, |
| "loss": 1.0815, |
| "loss_ce": 1.0072896480560303, |
| "loss_region": 0.03001921810209751, |
| "loss_total": 1.0373088121414185, |
| "lr": 0.0011112626970955378, |
| "router/selected_tokens_s0": 4374.375, |
| "step": 3290, |
| "tokens_trained": 10.779675008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9361038224239415, |
| "grad_norm": 1.009089469909668, |
| "loss": 1.0887, |
| "loss_ce": 1.1322096586227417, |
| "loss_region": 0.030020419508218765, |
| "loss_total": 1.162230134010315, |
| "lr": 0.0011108558051116867, |
| "router/selected_tokens_s0": 4387.875, |
| "step": 3300, |
| "tokens_trained": 10.812440448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.938940500673711, |
| "grad_norm": 1.1186658143997192, |
| "loss": 1.0913, |
| "loss_ce": 1.0417039394378662, |
| "loss_region": 0.030010957270860672, |
| "loss_total": 1.0717148780822754, |
| "lr": 0.0011104489131278357, |
| "router/selected_tokens_s0": 4330.5, |
| "step": 3310, |
| "tokens_trained": 10.845202832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9417771789234806, |
| "grad_norm": 1.406549334526062, |
| "loss": 1.0884, |
| "loss_ce": 1.1175626516342163, |
| "loss_region": 0.030026914551854134, |
| "loss_total": 1.1475895643234253, |
| "lr": 0.0011100420211439847, |
| "router/selected_tokens_s0": 4413.375, |
| "step": 3320, |
| "tokens_trained": 10.877968272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9446138571732501, |
| "grad_norm": 1.7244771718978882, |
| "loss": 1.0946, |
| "loss_ce": 1.044142246246338, |
| "loss_region": 0.03001406043767929, |
| "loss_total": 1.0741562843322754, |
| "lr": 0.0011096351291601336, |
| "router/selected_tokens_s0": 4364.875, |
| "step": 3330, |
| "tokens_trained": 10.910733712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9474505354230196, |
| "grad_norm": 1.0954854488372803, |
| "loss": 1.0904, |
| "loss_ce": 1.0164037942886353, |
| "loss_region": 0.030010055750608444, |
| "loss_total": 1.0464138984680176, |
| "lr": 0.0011092282371762826, |
| "router/selected_tokens_s0": 4342.75, |
| "step": 3340, |
| "tokens_trained": 10.943499152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9502872136727891, |
| "grad_norm": 2.0860025882720947, |
| "loss": 1.0943, |
| "loss_ce": 1.155411958694458, |
| "loss_region": 0.030009519308805466, |
| "loss_total": 1.1854214668273926, |
| "lr": 0.0011088213451924316, |
| "router/selected_tokens_s0": 4352.25, |
| "step": 3350, |
| "tokens_trained": 10.976264592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9531238919225586, |
| "grad_norm": 1.1401242017745972, |
| "loss": 1.0872, |
| "loss_ce": 1.0502551794052124, |
| "loss_region": 0.030008839443325996, |
| "loss_total": 1.0802639722824097, |
| "lr": 0.0011084144532085805, |
| "router/selected_tokens_s0": 4341.5, |
| "step": 3360, |
| "tokens_trained": 11.009030032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9559605701723282, |
| "grad_norm": 1.704352617263794, |
| "loss": 1.0874, |
| "loss_ce": 0.9294300079345703, |
| "loss_region": 0.030014311894774437, |
| "loss_total": 0.9594443440437317, |
| "lr": 0.0011080075612247295, |
| "router/selected_tokens_s0": 4352.125, |
| "step": 3370, |
| "tokens_trained": 11.041794728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9587972484220977, |
| "grad_norm": 0.9261600971221924, |
| "loss": 1.0935, |
| "loss_ce": 1.0589406490325928, |
| "loss_region": 0.030018316581845284, |
| "loss_total": 1.088958978652954, |
| "lr": 0.0011076006692408785, |
| "router/selected_tokens_s0": 4379.125, |
| "step": 3380, |
| "tokens_trained": 11.074559368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9616339266718672, |
| "grad_norm": 0.7537907958030701, |
| "loss": 1.0865, |
| "loss_ce": 1.0465339422225952, |
| "loss_region": 0.030020853504538536, |
| "loss_total": 1.076554775238037, |
| "lr": 0.0011071937772570274, |
| "router/selected_tokens_s0": 4404.75, |
| "step": 3390, |
| "tokens_trained": 11.107324808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9644706049216367, |
| "grad_norm": 1.0815021991729736, |
| "loss": 1.0952, |
| "loss_ce": 1.0314733982086182, |
| "loss_region": 0.030017558485269547, |
| "loss_total": 1.0614910125732422, |
| "lr": 0.0011067868852731764, |
| "router/selected_tokens_s0": 4383.625, |
| "step": 3400, |
| "tokens_trained": 11.140090248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9673072831714062, |
| "grad_norm": 1.633634090423584, |
| "loss": 1.088, |
| "loss_ce": 0.9958590269088745, |
| "loss_region": 0.030007656663656235, |
| "loss_total": 1.0258666276931763, |
| "lr": 0.0011063799932893254, |
| "router/selected_tokens_s0": 4323.5, |
| "step": 3410, |
| "tokens_trained": 11.172855688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9701439614211758, |
| "grad_norm": 1.0511754751205444, |
| "loss": 1.0871, |
| "loss_ce": 1.0179994106292725, |
| "loss_region": 0.030009476467967033, |
| "loss_total": 1.048008918762207, |
| "lr": 0.0011059731013054743, |
| "router/selected_tokens_s0": 4384.875, |
| "step": 3420, |
| "tokens_trained": 11.205621096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9729806396709453, |
| "grad_norm": 1.6246494054794312, |
| "loss": 1.0822, |
| "loss_ce": 0.9318454265594482, |
| "loss_region": 0.029994873329997063, |
| "loss_total": 0.9618402719497681, |
| "lr": 0.0011055662093216233, |
| "router/selected_tokens_s0": 4316.375, |
| "step": 3430, |
| "tokens_trained": 11.238384136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9758173179207148, |
| "grad_norm": 1.043264627456665, |
| "loss": 1.0927, |
| "loss_ce": 1.0697022676467896, |
| "loss_region": 0.03001013770699501, |
| "loss_total": 1.0997123718261719, |
| "lr": 0.0011051593173377723, |
| "router/selected_tokens_s0": 4351.5, |
| "step": 3440, |
| "tokens_trained": 11.271149576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9786539961704843, |
| "grad_norm": 0.5135401487350464, |
| "loss": 1.0883, |
| "loss_ce": 1.0403201580047607, |
| "loss_region": 0.03001248463988304, |
| "loss_total": 1.070332646369934, |
| "lr": 0.0011047524253539212, |
| "router/selected_tokens_s0": 4380.0, |
| "step": 3450, |
| "tokens_trained": 11.303915016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9814906744202538, |
| "grad_norm": 0.6191660165786743, |
| "loss": 1.0821, |
| "loss_ce": 0.9997903108596802, |
| "loss_region": 0.030010463669896126, |
| "loss_total": 1.0298007726669312, |
| "lr": 0.0011043455333700704, |
| "router/selected_tokens_s0": 4325.0, |
| "step": 3460, |
| "tokens_trained": 11.336680456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9843273526700234, |
| "grad_norm": 2.0896031856536865, |
| "loss": 1.0852, |
| "loss_ce": 0.9407132863998413, |
| "loss_region": 0.030007831752300262, |
| "loss_total": 0.9707211256027222, |
| "lr": 0.0011039386413862194, |
| "router/selected_tokens_s0": 4340.0, |
| "step": 3470, |
| "tokens_trained": 11.369444288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9871640309197929, |
| "grad_norm": 1.5842080116271973, |
| "loss": 1.0773, |
| "loss_ce": 1.0703927278518677, |
| "loss_region": 0.030027827247977257, |
| "loss_total": 1.100420594215393, |
| "lr": 0.0011035317494023683, |
| "router/selected_tokens_s0": 4422.875, |
| "step": 3480, |
| "tokens_trained": 11.402209728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9900007091695624, |
| "grad_norm": 1.6284222602844238, |
| "loss": 1.0867, |
| "loss_ce": 1.0688589811325073, |
| "loss_region": 0.03001173585653305, |
| "loss_total": 1.0988707542419434, |
| "lr": 0.001103124857418517, |
| "router/selected_tokens_s0": 4370.375, |
| "step": 3490, |
| "tokens_trained": 11.434975168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9928373874193319, |
| "grad_norm": 1.9294172525405884, |
| "loss": 1.0776, |
| "loss_ce": 1.0885505676269531, |
| "loss_region": 0.03002048470079899, |
| "loss_total": 1.1185710430145264, |
| "lr": 0.001102717965434666, |
| "router/selected_tokens_s0": 4402.375, |
| "step": 3500, |
| "tokens_trained": 11.467740608 |
| }, |
| { |
| "epoch": 0.9928373874193319, |
| "eval_ppl": 2.8891544142739582, |
| "eval_runtime": 2.4909, |
| "step": 3500, |
| "tokens_trained": 11.467740608 |
| }, |
| { |
| "epoch": 0.9928373874193319, |
| "eval_F": 0.3418065949445779, |
| "eval_F_cds": 0.34451083924282977, |
| "eval_F_dig": 0.33807328697554495, |
| "eval_F_exon": 0.34726493074167064, |
| "eval_F_intron": 0.34224230575312725, |
| "eval_F_nig": 0.3415901920743997, |
| "eval_F_promoter": 0.339251188483381, |
| "eval_F_utr": 0.34464885946681034, |
| "eval_G": 0.34547800502934484, |
| "eval_G_cds": 0.3442593687333732, |
| "eval_G_dig": 0.3991668762370758, |
| "eval_G_exon": 0.3453487502027675, |
| "eval_G_intron": 0.34490806786026235, |
| "eval_G_nig": 0.3455515495124094, |
| "eval_G_promoter": 0.34497689050299185, |
| "eval_G_utr": 0.3442341927156835, |
| "eval_avg_bp_per_token": 2.9256310872589943, |
| "eval_bp_per_token/cds": 2.902666291132704, |
| "eval_bp_per_token/dig": 2.957938525537324, |
| "eval_bp_per_token/exon": 2.8796458020228277, |
| "eval_bp_per_token/intron": 2.921906448121405, |
| "eval_bp_per_token/nig": 2.9274845215175147, |
| "eval_bp_per_token/promoter": 2.947668376551575, |
| "eval_bp_per_token/utr": 2.9015038713519954, |
| "eval_ppl_cds": 3.5389763754938555, |
| "eval_ppl_dig": 1.091459889456152, |
| "eval_ppl_exon": 3.306826954534152, |
| "eval_ppl_intron": 2.9106190204474447, |
| "eval_ppl_nig": 2.694991382732784, |
| "eval_ppl_promoter": 3.283923741138257, |
| "eval_ppl_utr": 3.295742249982149, |
| "step": 3500, |
| "tokens_trained": 11.467740608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.9956740656691014, |
| "grad_norm": 1.324675440788269, |
| "loss": 1.0868, |
| "loss_ce": 1.0776382684707642, |
| "loss_region": 0.03000813163816929, |
| "loss_total": 1.1076463460922241, |
| "lr": 0.001102311073450815, |
| "router/selected_tokens_s0": 4340.25, |
| "step": 3510, |
| "tokens_trained": 11.500506048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 0.998510743918871, |
| "grad_norm": 1.6382735967636108, |
| "loss": 1.0901, |
| "loss_ce": 1.0050663948059082, |
| "loss_region": 0.030013620853424072, |
| "loss_total": 1.0350799560546875, |
| "lr": 0.001101904181466964, |
| "router/selected_tokens_s0": 4364.0, |
| "step": 3520, |
| "tokens_trained": 11.533271488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0011346712999079, |
| "grad_norm": 1.1215876340866089, |
| "loss": 1.0781, |
| "loss_ce": 1.0582538843154907, |
| "loss_region": 0.030011579394340515, |
| "loss_total": 1.0882654190063477, |
| "lr": 0.0011014972894831132, |
| "router/selected_tokens_s0": 4343.125, |
| "step": 3530, |
| "tokens_trained": 11.56357952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0039713495496774, |
| "grad_norm": 1.4025973081588745, |
| "loss": 1.0809, |
| "loss_ce": 1.037977695465088, |
| "loss_region": 0.02999720722436905, |
| "loss_total": 1.0679749250411987, |
| "lr": 0.0011010903974992621, |
| "router/selected_tokens_s0": 4358.5, |
| "step": 3540, |
| "tokens_trained": 11.59634496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.006808027799447, |
| "grad_norm": 0.7676182985305786, |
| "loss": 1.0854, |
| "loss_ce": 0.96112459897995, |
| "loss_region": 0.030009040609002113, |
| "loss_total": 0.9911336302757263, |
| "lr": 0.001100683505515411, |
| "router/selected_tokens_s0": 4343.25, |
| "step": 3550, |
| "tokens_trained": 11.6291104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0096447060492164, |
| "grad_norm": 0.8700928688049316, |
| "loss": 1.0844, |
| "loss_ce": 1.12131667137146, |
| "loss_region": 0.030012287199497223, |
| "loss_total": 1.1513289213180542, |
| "lr": 0.00110027661353156, |
| "router/selected_tokens_s0": 4377.75, |
| "step": 3560, |
| "tokens_trained": 11.66187584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.012481384298986, |
| "grad_norm": 0.3621160089969635, |
| "loss": 1.0866, |
| "loss_ce": 0.9774419665336609, |
| "loss_region": 0.030013367533683777, |
| "loss_total": 1.0074553489685059, |
| "lr": 0.001099869721547709, |
| "router/selected_tokens_s0": 4358.375, |
| "step": 3570, |
| "tokens_trained": 11.69464128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0153180625487555, |
| "grad_norm": 0.8118414878845215, |
| "loss": 1.089, |
| "loss_ce": 1.0349894762039185, |
| "loss_region": 0.03000425547361374, |
| "loss_total": 1.0649937391281128, |
| "lr": 0.001099462829563858, |
| "router/selected_tokens_s0": 4320.625, |
| "step": 3580, |
| "tokens_trained": 11.72740592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.018154740798525, |
| "grad_norm": 1.3667856454849243, |
| "loss": 1.0864, |
| "loss_ce": 1.1101551055908203, |
| "loss_region": 0.030023517087101936, |
| "loss_total": 1.1401786804199219, |
| "lr": 0.001099055937580007, |
| "router/selected_tokens_s0": 4445.875, |
| "step": 3590, |
| "tokens_trained": 11.76017136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0209914190482945, |
| "grad_norm": 1.459873914718628, |
| "loss": 1.0875, |
| "loss_ce": 1.038351058959961, |
| "loss_region": 0.030004626139998436, |
| "loss_total": 1.068355679512024, |
| "lr": 0.001098649045596156, |
| "router/selected_tokens_s0": 4320.75, |
| "step": 3600, |
| "tokens_trained": 11.7929352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.023828097298064, |
| "grad_norm": 0.9393401145935059, |
| "loss": 1.0852, |
| "loss_ce": 1.040799856185913, |
| "loss_region": 0.030013561248779297, |
| "loss_total": 1.0708134174346924, |
| "lr": 0.0010982421536123049, |
| "router/selected_tokens_s0": 4366.625, |
| "step": 3610, |
| "tokens_trained": 11.82570064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0266647755478335, |
| "grad_norm": 1.4277124404907227, |
| "loss": 1.0821, |
| "loss_ce": 0.9711215496063232, |
| "loss_region": 0.03001675009727478, |
| "loss_total": 1.0011383295059204, |
| "lr": 0.0010978352616284538, |
| "router/selected_tokens_s0": 4369.5, |
| "step": 3620, |
| "tokens_trained": 11.85846448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.029501453797603, |
| "grad_norm": 0.8825812935829163, |
| "loss": 1.0782, |
| "loss_ce": 1.0676993131637573, |
| "loss_region": 0.030013611540198326, |
| "loss_total": 1.0977128744125366, |
| "lr": 0.0010974283696446028, |
| "router/selected_tokens_s0": 4372.375, |
| "step": 3630, |
| "tokens_trained": 11.89122992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0323381320473726, |
| "grad_norm": 0.9022896885871887, |
| "loss": 1.0733, |
| "loss_ce": 1.0407861471176147, |
| "loss_region": 0.030012723058462143, |
| "loss_total": 1.0707988739013672, |
| "lr": 0.0010970214776607518, |
| "router/selected_tokens_s0": 4355.75, |
| "step": 3640, |
| "tokens_trained": 11.92399536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.035174810297142, |
| "grad_norm": 0.8870510458946228, |
| "loss": 1.0749, |
| "loss_ce": 1.1323661804199219, |
| "loss_region": 0.03002365306019783, |
| "loss_total": 1.162389874458313, |
| "lr": 0.0010966145856769007, |
| "router/selected_tokens_s0": 4435.75, |
| "step": 3650, |
| "tokens_trained": 11.9567608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0380114885469116, |
| "grad_norm": 1.4656965732574463, |
| "loss": 1.0832, |
| "loss_ce": 1.0585097074508667, |
| "loss_region": 0.030004626139998436, |
| "loss_total": 1.0885143280029297, |
| "lr": 0.0010962076936930497, |
| "router/selected_tokens_s0": 4329.25, |
| "step": 3660, |
| "tokens_trained": 11.98952624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0408481667966811, |
| "grad_norm": 0.9163527488708496, |
| "loss": 1.0781, |
| "loss_ce": 1.0935940742492676, |
| "loss_region": 0.030009469017386436, |
| "loss_total": 1.1236035823822021, |
| "lr": 0.0010958008017091987, |
| "router/selected_tokens_s0": 4359.375, |
| "step": 3670, |
| "tokens_trained": 12.02229168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0436848450464506, |
| "grad_norm": 1.3013805150985718, |
| "loss": 1.0876, |
| "loss_ce": 1.0595567226409912, |
| "loss_region": 0.030006933957338333, |
| "loss_total": 1.0895636081695557, |
| "lr": 0.0010953939097253476, |
| "router/selected_tokens_s0": 4343.625, |
| "step": 3680, |
| "tokens_trained": 12.05505712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0465215232962202, |
| "grad_norm": 0.39981648325920105, |
| "loss": 1.0707, |
| "loss_ce": 1.1020740270614624, |
| "loss_region": 0.030012022703886032, |
| "loss_total": 1.1320860385894775, |
| "lr": 0.0010949870177414966, |
| "router/selected_tokens_s0": 4384.75, |
| "step": 3690, |
| "tokens_trained": 12.08782256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0493582015459897, |
| "grad_norm": 0.9564698338508606, |
| "loss": 1.0787, |
| "loss_ce": 1.0650237798690796, |
| "loss_region": 0.03000781685113907, |
| "loss_total": 1.0950316190719604, |
| "lr": 0.0010945801257576456, |
| "router/selected_tokens_s0": 4363.75, |
| "step": 3700, |
| "tokens_trained": 12.1205864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0521948797957592, |
| "grad_norm": 0.9331677556037903, |
| "loss": 1.08, |
| "loss_ce": 1.0856927633285522, |
| "loss_region": 0.030012279748916626, |
| "loss_total": 1.1157050132751465, |
| "lr": 0.0010941732337737947, |
| "router/selected_tokens_s0": 4404.5, |
| "step": 3710, |
| "tokens_trained": 12.15335184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0550315580455287, |
| "grad_norm": 1.2002500295639038, |
| "loss": 1.0818, |
| "loss_ce": 1.058534026145935, |
| "loss_region": 0.030018793419003487, |
| "loss_total": 1.0885528326034546, |
| "lr": 0.0010937663417899437, |
| "router/selected_tokens_s0": 4392.125, |
| "step": 3720, |
| "tokens_trained": 12.18611712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0578682362952982, |
| "grad_norm": 1.4924200773239136, |
| "loss": 1.0778, |
| "loss_ce": 1.020750880241394, |
| "loss_region": 0.030010871589183807, |
| "loss_total": 1.0507616996765137, |
| "lr": 0.0010933594498060927, |
| "router/selected_tokens_s0": 4356.625, |
| "step": 3730, |
| "tokens_trained": 12.21888256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0607049145450678, |
| "grad_norm": 0.6684730648994446, |
| "loss": 1.0769, |
| "loss_ce": 1.0789055824279785, |
| "loss_region": 0.030011769384145737, |
| "loss_total": 1.1089173555374146, |
| "lr": 0.0010929525578222414, |
| "router/selected_tokens_s0": 4387.875, |
| "step": 3740, |
| "tokens_trained": 12.251648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0635415927948373, |
| "grad_norm": 0.6117927432060242, |
| "loss": 1.0797, |
| "loss_ce": 1.0758237838745117, |
| "loss_region": 0.030013950541615486, |
| "loss_total": 1.1058377027511597, |
| "lr": 0.0010925456658383904, |
| "router/selected_tokens_s0": 4407.75, |
| "step": 3750, |
| "tokens_trained": 12.284409608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0663782710446068, |
| "grad_norm": 0.6212737560272217, |
| "loss": 1.08, |
| "loss_ce": 1.074630618095398, |
| "loss_region": 0.03000750206410885, |
| "loss_total": 1.1046380996704102, |
| "lr": 0.0010921387738545394, |
| "router/selected_tokens_s0": 4371.375, |
| "step": 3760, |
| "tokens_trained": 12.317175048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0692149492943763, |
| "grad_norm": 1.4000393152236938, |
| "loss": 1.0721, |
| "loss_ce": 0.9761142134666443, |
| "loss_region": 0.030007638037204742, |
| "loss_total": 1.0061218738555908, |
| "lr": 0.0010917318818706883, |
| "router/selected_tokens_s0": 4287.125, |
| "step": 3770, |
| "tokens_trained": 12.349940488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0720516275441458, |
| "grad_norm": 0.7589385509490967, |
| "loss": 1.0796, |
| "loss_ce": 1.0433313846588135, |
| "loss_region": 0.030011288821697235, |
| "loss_total": 1.0733426809310913, |
| "lr": 0.0010913249898868375, |
| "router/selected_tokens_s0": 4376.375, |
| "step": 3780, |
| "tokens_trained": 12.382705928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0748883057939154, |
| "grad_norm": 0.9254264235496521, |
| "loss": 1.0757, |
| "loss_ce": 1.0672318935394287, |
| "loss_region": 0.030009465292096138, |
| "loss_total": 1.0972414016723633, |
| "lr": 0.0010909180979029865, |
| "router/selected_tokens_s0": 4369.625, |
| "step": 3790, |
| "tokens_trained": 12.415470568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0777249840436849, |
| "grad_norm": 0.8547407984733582, |
| "loss": 1.0803, |
| "loss_ce": 1.086848497390747, |
| "loss_region": 0.03000779263675213, |
| "loss_total": 1.116856336593628, |
| "lr": 0.0010905112059191354, |
| "router/selected_tokens_s0": 4377.5, |
| "step": 3800, |
| "tokens_trained": 12.448236008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0805616622934544, |
| "grad_norm": 2.435622215270996, |
| "loss": 1.0783, |
| "loss_ce": 1.0909473896026611, |
| "loss_region": 0.03001653589308262, |
| "loss_total": 1.1209639310836792, |
| "lr": 0.0010901043139352844, |
| "router/selected_tokens_s0": 4412.5, |
| "step": 3810, |
| "tokens_trained": 12.481001448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.083398340543224, |
| "grad_norm": 0.8429534435272217, |
| "loss": 1.0679, |
| "loss_ce": 1.0455752611160278, |
| "loss_region": 0.030013732612133026, |
| "loss_total": 1.0755889415740967, |
| "lr": 0.0010896974219514334, |
| "router/selected_tokens_s0": 4386.125, |
| "step": 3820, |
| "tokens_trained": 12.513766888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0862350187929934, |
| "grad_norm": 1.3827040195465088, |
| "loss": 1.0802, |
| "loss_ce": 1.1371749639511108, |
| "loss_region": 0.030014697462320328, |
| "loss_total": 1.1671897172927856, |
| "lr": 0.0010892905299675823, |
| "router/selected_tokens_s0": 4394.5, |
| "step": 3830, |
| "tokens_trained": 12.546532328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.089071697042763, |
| "grad_norm": 1.121778130531311, |
| "loss": 1.0882, |
| "loss_ce": 1.0060161352157593, |
| "loss_region": 0.030006043612957, |
| "loss_total": 1.0360221862792969, |
| "lr": 0.0010888836379837313, |
| "router/selected_tokens_s0": 4312.375, |
| "step": 3840, |
| "tokens_trained": 12.57929404 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0919083752925325, |
| "grad_norm": 1.114713430404663, |
| "loss": 1.0791, |
| "loss_ce": 1.0474870204925537, |
| "loss_region": 0.03002369962632656, |
| "loss_total": 1.0775107145309448, |
| "lr": 0.0010884767459998803, |
| "router/selected_tokens_s0": 4427.0, |
| "step": 3850, |
| "tokens_trained": 12.61205788 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.094745053542302, |
| "grad_norm": 0.42109477519989014, |
| "loss": 1.0759, |
| "loss_ce": 1.0544071197509766, |
| "loss_region": 0.030009722337126732, |
| "loss_total": 1.0844168663024902, |
| "lr": 0.0010880698540160292, |
| "router/selected_tokens_s0": 4372.875, |
| "step": 3860, |
| "tokens_trained": 12.64482332 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.0975817317920715, |
| "grad_norm": 1.0385088920593262, |
| "loss": 1.0656, |
| "loss_ce": 1.1451784372329712, |
| "loss_region": 0.030008675530552864, |
| "loss_total": 1.175187110900879, |
| "lr": 0.0010876629620321782, |
| "router/selected_tokens_s0": 4386.0, |
| "step": 3870, |
| "tokens_trained": 12.677587992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.100418410041841, |
| "grad_norm": 1.0264872312545776, |
| "loss": 1.0732, |
| "loss_ce": 1.0522844791412354, |
| "loss_region": 0.030011439695954323, |
| "loss_total": 1.0822958946228027, |
| "lr": 0.0010872560700483272, |
| "router/selected_tokens_s0": 4350.75, |
| "step": 3880, |
| "tokens_trained": 12.710352632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1032550882916106, |
| "grad_norm": 0.6505580544471741, |
| "loss": 1.077, |
| "loss_ce": 1.0118770599365234, |
| "loss_region": 0.03001365438103676, |
| "loss_total": 1.0418907403945923, |
| "lr": 0.0010868491780644761, |
| "router/selected_tokens_s0": 4405.125, |
| "step": 3890, |
| "tokens_trained": 12.743118072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.10609176654138, |
| "grad_norm": 1.2206717729568481, |
| "loss": 1.0648, |
| "loss_ce": 1.009666919708252, |
| "loss_region": 0.030009053647518158, |
| "loss_total": 1.0396759510040283, |
| "lr": 0.001086442286080625, |
| "router/selected_tokens_s0": 4362.0, |
| "step": 3900, |
| "tokens_trained": 12.775882712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1089284447911496, |
| "grad_norm": 0.9762550592422485, |
| "loss": 1.0833, |
| "loss_ce": 1.1374930143356323, |
| "loss_region": 0.03000687249004841, |
| "loss_total": 1.1674998998641968, |
| "lr": 0.001086035394096774, |
| "router/selected_tokens_s0": 4368.875, |
| "step": 3910, |
| "tokens_trained": 12.808648152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1117651230409191, |
| "grad_norm": 0.6860953569412231, |
| "loss": 1.0783, |
| "loss_ce": 1.1070737838745117, |
| "loss_region": 0.030013803392648697, |
| "loss_total": 1.1370875835418701, |
| "lr": 0.001085628502112923, |
| "router/selected_tokens_s0": 4440.25, |
| "step": 3920, |
| "tokens_trained": 12.841413592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1146018012906886, |
| "grad_norm": 0.5641375184059143, |
| "loss": 1.0779, |
| "loss_ce": 0.9596564173698425, |
| "loss_region": 0.030009876936674118, |
| "loss_total": 0.9896662831306458, |
| "lr": 0.001085221610129072, |
| "router/selected_tokens_s0": 4372.25, |
| "step": 3930, |
| "tokens_trained": 12.874179032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1174384795404582, |
| "grad_norm": 1.3612422943115234, |
| "loss": 1.0745, |
| "loss_ce": 1.0073390007019043, |
| "loss_region": 0.030006812885403633, |
| "loss_total": 1.0373457670211792, |
| "lr": 0.001084814718145221, |
| "router/selected_tokens_s0": 4349.25, |
| "step": 3940, |
| "tokens_trained": 12.906944472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1202751577902277, |
| "grad_norm": 0.9040305614471436, |
| "loss": 1.0713, |
| "loss_ce": 0.9823886156082153, |
| "loss_region": 0.03001086413860321, |
| "loss_total": 1.012399435043335, |
| "lr": 0.00108440782616137, |
| "router/selected_tokens_s0": 4368.375, |
| "step": 3950, |
| "tokens_trained": 12.939709912 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1231118360399972, |
| "grad_norm": 0.6158255934715271, |
| "loss": 1.0631, |
| "loss_ce": 1.072802186012268, |
| "loss_region": 0.030010098591446877, |
| "loss_total": 1.1028122901916504, |
| "lr": 0.001084000934177519, |
| "router/selected_tokens_s0": 4384.5, |
| "step": 3960, |
| "tokens_trained": 12.972475352 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1259485142897667, |
| "grad_norm": 0.8492525815963745, |
| "loss": 1.0761, |
| "loss_ce": 1.1295671463012695, |
| "loss_region": 0.03000839613378048, |
| "loss_total": 1.1595755815505981, |
| "lr": 0.001083594042193668, |
| "router/selected_tokens_s0": 4394.5, |
| "step": 3970, |
| "tokens_trained": 13.005240792 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1287851925395362, |
| "grad_norm": 1.4454373121261597, |
| "loss": 1.0671, |
| "loss_ce": 1.059134840965271, |
| "loss_region": 0.030011136084794998, |
| "loss_total": 1.0891460180282593, |
| "lr": 0.001083187150209817, |
| "router/selected_tokens_s0": 4354.625, |
| "step": 3980, |
| "tokens_trained": 13.038005432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1316218707893058, |
| "grad_norm": 1.1143423318862915, |
| "loss": 1.0746, |
| "loss_ce": 1.058432698249817, |
| "loss_region": 0.030015287920832634, |
| "loss_total": 1.08844792842865, |
| "lr": 0.0010827802582259658, |
| "router/selected_tokens_s0": 4413.25, |
| "step": 3990, |
| "tokens_trained": 13.070770872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1344585490390753, |
| "grad_norm": 0.926952600479126, |
| "loss": 1.0726, |
| "loss_ce": 1.0535566806793213, |
| "loss_region": 0.03000652976334095, |
| "loss_total": 1.083563208580017, |
| "lr": 0.0010823733662421147, |
| "router/selected_tokens_s0": 4332.125, |
| "step": 4000, |
| "tokens_trained": 13.103536312 |
| }, |
| { |
| "epoch": 1.1344585490390753, |
| "eval_ppl": 2.858632587615727, |
| "eval_runtime": 2.4962, |
| "step": 4000, |
| "tokens_trained": 13.103536312 |
| }, |
| { |
| "epoch": 1.1344585490390753, |
| "eval_F": 0.34138791413731373, |
| "eval_F_cds": 0.3448908798343993, |
| "eval_F_dig": 0.3374221944422741, |
| "eval_F_exon": 0.34516320139927215, |
| "eval_F_intron": 0.34168107017140814, |
| "eval_F_nig": 0.3411899187908908, |
| "eval_F_promoter": 0.33952618612122176, |
| "eval_F_utr": 0.34327183776802744, |
| "eval_G": 0.34184569864652725, |
| "eval_G_cds": 0.34168474533742754, |
| "eval_G_dig": 0.39810788440503164, |
| "eval_G_exon": 0.34140616184021216, |
| "eval_G_intron": 0.3411866290260269, |
| "eval_G_nig": 0.34169810595567374, |
| "eval_G_promoter": 0.3417757543881514, |
| "eval_G_utr": 0.3401876713258752, |
| "eval_avg_bp_per_token": 2.9292191041003814, |
| "eval_bp_per_token/cds": 2.899467798279136, |
| "eval_bp_per_token/dig": 2.9636461870947826, |
| "eval_bp_per_token/exon": 2.897180220678382, |
| "eval_bp_per_token/intron": 2.9267058883254458, |
| "eval_bp_per_token/nig": 2.930918954299122, |
| "eval_bp_per_token/promoter": 2.945280926411278, |
| "eval_bp_per_token/utr": 2.9131431418961005, |
| "eval_ppl_cds": 3.4450720333639553, |
| "eval_ppl_dig": 1.088491176901866, |
| "eval_ppl_exon": 3.2953068260471907, |
| "eval_ppl_intron": 2.887916254694354, |
| "eval_ppl_nig": 2.65992247163589, |
| "eval_ppl_promoter": 3.249167345940797, |
| "eval_ppl_utr": 3.267379860704035, |
| "step": 4000, |
| "tokens_trained": 13.103536312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1372952272888448, |
| "grad_norm": 1.135546088218689, |
| "loss": 1.0709, |
| "loss_ce": 1.05037522315979, |
| "loss_region": 0.030016878619790077, |
| "loss_total": 1.0803921222686768, |
| "lr": 0.0010819664742582637, |
| "router/selected_tokens_s0": 4442.875, |
| "step": 4010, |
| "tokens_trained": 13.136300952 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1401319055386143, |
| "grad_norm": 0.9313811659812927, |
| "loss": 1.0706, |
| "loss_ce": 1.0053969621658325, |
| "loss_region": 0.029991673305630684, |
| "loss_total": 1.0353885889053345, |
| "lr": 0.0010815595822744127, |
| "router/selected_tokens_s0": 4338.625, |
| "step": 4020, |
| "tokens_trained": 13.169065592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1429685837883838, |
| "grad_norm": 1.150964617729187, |
| "loss": 1.0746, |
| "loss_ce": 1.0354498624801636, |
| "loss_region": 0.03000836819410324, |
| "loss_total": 1.0654581785202026, |
| "lr": 0.0010811526902905618, |
| "router/selected_tokens_s0": 4357.125, |
| "step": 4030, |
| "tokens_trained": 13.201831032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1458052620381534, |
| "grad_norm": 0.3598765432834625, |
| "loss": 1.069, |
| "loss_ce": 0.9737571477890015, |
| "loss_region": 0.030010921880602837, |
| "loss_total": 1.0037680864334106, |
| "lr": 0.0010807457983067108, |
| "router/selected_tokens_s0": 4364.125, |
| "step": 4040, |
| "tokens_trained": 13.234596472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1486419402879229, |
| "grad_norm": 1.6098700761795044, |
| "loss": 1.0724, |
| "loss_ce": 1.090259075164795, |
| "loss_region": 0.030013924464583397, |
| "loss_total": 1.1202729940414429, |
| "lr": 0.0010803389063228598, |
| "router/selected_tokens_s0": 4396.5, |
| "step": 4050, |
| "tokens_trained": 13.267361888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1514786185376924, |
| "grad_norm": 1.2312268018722534, |
| "loss": 1.0755, |
| "loss_ce": 1.0816667079925537, |
| "loss_region": 0.0300059225410223, |
| "loss_total": 1.1116726398468018, |
| "lr": 0.0010799320143390087, |
| "router/selected_tokens_s0": 4383.875, |
| "step": 4060, |
| "tokens_trained": 13.300127328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.154315296787462, |
| "grad_norm": 0.8974295854568481, |
| "loss": 1.074, |
| "loss_ce": 0.999101459980011, |
| "loss_region": 0.030007481575012207, |
| "loss_total": 1.029109001159668, |
| "lr": 0.0010795251223551577, |
| "router/selected_tokens_s0": 4339.875, |
| "step": 4070, |
| "tokens_trained": 13.332892768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1571519750372314, |
| "grad_norm": 0.9909172654151917, |
| "loss": 1.0664, |
| "loss_ce": 1.1255804300308228, |
| "loss_region": 0.030011769384145737, |
| "loss_total": 1.1555922031402588, |
| "lr": 0.0010791182303713067, |
| "router/selected_tokens_s0": 4367.375, |
| "step": 4080, |
| "tokens_trained": 13.365658208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.159988653287001, |
| "grad_norm": 2.2824649810791016, |
| "loss": 1.0724, |
| "loss_ce": 0.9207800626754761, |
| "loss_region": 0.030002696439623833, |
| "loss_total": 0.9507827758789062, |
| "lr": 0.0010787113383874556, |
| "router/selected_tokens_s0": 4278.5, |
| "step": 4090, |
| "tokens_trained": 13.398422568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1628253315367705, |
| "grad_norm": 0.4956927001476288, |
| "loss": 1.0733, |
| "loss_ce": 1.1247563362121582, |
| "loss_region": 0.03000745177268982, |
| "loss_total": 1.1547638177871704, |
| "lr": 0.0010783044464036046, |
| "router/selected_tokens_s0": 4366.5, |
| "step": 4100, |
| "tokens_trained": 13.431185432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.16566200978654, |
| "grad_norm": 0.6976671814918518, |
| "loss": 1.0697, |
| "loss_ce": 1.0819003582000732, |
| "loss_region": 0.03002307377755642, |
| "loss_total": 1.1119234561920166, |
| "lr": 0.0010778975544197536, |
| "router/selected_tokens_s0": 4439.5, |
| "step": 4110, |
| "tokens_trained": 13.463950872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1684986880363095, |
| "grad_norm": 1.2541862726211548, |
| "loss": 1.0653, |
| "loss_ce": 0.7924370169639587, |
| "loss_region": 0.029992103576660156, |
| "loss_total": 0.8224291205406189, |
| "lr": 0.0010774906624359025, |
| "router/selected_tokens_s0": 4297.25, |
| "step": 4120, |
| "tokens_trained": 13.496713976 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.171335366286079, |
| "grad_norm": 1.4140042066574097, |
| "loss": 1.0685, |
| "loss_ce": 1.0910414457321167, |
| "loss_region": 0.03000573255121708, |
| "loss_total": 1.1210471391677856, |
| "lr": 0.0010770837704520515, |
| "router/selected_tokens_s0": 4412.5, |
| "step": 4130, |
| "tokens_trained": 13.529479416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1741720445358486, |
| "grad_norm": 0.5698431134223938, |
| "loss": 1.0775, |
| "loss_ce": 0.8972399830818176, |
| "loss_region": 0.029999306425452232, |
| "loss_total": 0.9272392988204956, |
| "lr": 0.0010766768784682005, |
| "router/selected_tokens_s0": 4358.875, |
| "step": 4140, |
| "tokens_trained": 13.562244856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.177008722785618, |
| "grad_norm": 1.3367623090744019, |
| "loss": 1.0704, |
| "loss_ce": 1.1016747951507568, |
| "loss_region": 0.03001498058438301, |
| "loss_total": 1.1316897869110107, |
| "lr": 0.0010762699864843494, |
| "router/selected_tokens_s0": 4404.625, |
| "step": 4150, |
| "tokens_trained": 13.595010296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1798454010353876, |
| "grad_norm": 0.7532950043678284, |
| "loss": 1.0571, |
| "loss_ce": 0.9316068887710571, |
| "loss_region": 0.03000483848154545, |
| "loss_total": 0.9616117477416992, |
| "lr": 0.0010758630945004984, |
| "router/selected_tokens_s0": 4305.625, |
| "step": 4160, |
| "tokens_trained": 13.627775736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1826820792851571, |
| "grad_norm": 0.8210463523864746, |
| "loss": 1.066, |
| "loss_ce": 1.0508811473846436, |
| "loss_region": 0.030011793598532677, |
| "loss_total": 1.0808929204940796, |
| "lr": 0.0010754562025166474, |
| "router/selected_tokens_s0": 4387.75, |
| "step": 4170, |
| "tokens_trained": 13.660541176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1855187575349266, |
| "grad_norm": 1.2525079250335693, |
| "loss": 1.0643, |
| "loss_ce": 1.0516717433929443, |
| "loss_region": 0.030006369575858116, |
| "loss_total": 1.0816781520843506, |
| "lr": 0.0010750493105327963, |
| "router/selected_tokens_s0": 4361.0, |
| "step": 4180, |
| "tokens_trained": 13.693306616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1883554357846962, |
| "grad_norm": 1.0099766254425049, |
| "loss": 1.0655, |
| "loss_ce": 0.9692405462265015, |
| "loss_region": 0.0300027746707201, |
| "loss_total": 0.9992433190345764, |
| "lr": 0.0010746424185489453, |
| "router/selected_tokens_s0": 4389.0, |
| "step": 4190, |
| "tokens_trained": 13.726072056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1911921140344657, |
| "grad_norm": 0.7821201682090759, |
| "loss": 1.0697, |
| "loss_ce": 1.018595814704895, |
| "loss_region": 0.030003640800714493, |
| "loss_total": 1.0485994815826416, |
| "lr": 0.0010742355265650943, |
| "router/selected_tokens_s0": 4325.0, |
| "step": 4200, |
| "tokens_trained": 13.758837496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1940287922842352, |
| "grad_norm": 1.1446911096572876, |
| "loss": 1.0679, |
| "loss_ce": 1.0561493635177612, |
| "loss_region": 0.030004194006323814, |
| "loss_total": 1.086153507232666, |
| "lr": 0.0010738286345812434, |
| "router/selected_tokens_s0": 4347.875, |
| "step": 4210, |
| "tokens_trained": 13.791602936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1968654705340047, |
| "grad_norm": 0.8190633654594421, |
| "loss": 1.079, |
| "loss_ce": 1.0492353439331055, |
| "loss_region": 0.030007485300302505, |
| "loss_total": 1.0792428255081177, |
| "lr": 0.0010734217425973924, |
| "router/selected_tokens_s0": 4382.25, |
| "step": 4220, |
| "tokens_trained": 13.824368376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.1997021487837742, |
| "grad_norm": 1.038085699081421, |
| "loss": 1.066, |
| "loss_ce": 0.9230837821960449, |
| "loss_region": 0.030007129535079002, |
| "loss_total": 0.9530909061431885, |
| "lr": 0.0010730148506135414, |
| "router/selected_tokens_s0": 4315.875, |
| "step": 4230, |
| "tokens_trained": 13.857133016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2025388270335438, |
| "grad_norm": 1.4383383989334106, |
| "loss": 1.0598, |
| "loss_ce": 1.0923779010772705, |
| "loss_region": 0.03000866062939167, |
| "loss_total": 1.1223865747451782, |
| "lr": 0.0010726079586296901, |
| "router/selected_tokens_s0": 4404.625, |
| "step": 4240, |
| "tokens_trained": 13.889896856 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2053755052833133, |
| "grad_norm": 0.6213952898979187, |
| "loss": 1.0635, |
| "loss_ce": 1.0380363464355469, |
| "loss_region": 0.030008360743522644, |
| "loss_total": 1.068044662475586, |
| "lr": 0.001072201066645839, |
| "router/selected_tokens_s0": 4405.25, |
| "step": 4250, |
| "tokens_trained": 13.922662296 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2082121835330828, |
| "grad_norm": 0.4920593202114105, |
| "loss": 1.0628, |
| "loss_ce": 1.0617847442626953, |
| "loss_region": 0.030003665015101433, |
| "loss_total": 1.091788411140442, |
| "lr": 0.001071794174661988, |
| "router/selected_tokens_s0": 4345.0, |
| "step": 4260, |
| "tokens_trained": 13.955427736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2110488617828523, |
| "grad_norm": 1.1883982419967651, |
| "loss": 1.0624, |
| "loss_ce": 1.0569326877593994, |
| "loss_region": 0.03000705875456333, |
| "loss_total": 1.0869396924972534, |
| "lr": 0.0010713872826781372, |
| "router/selected_tokens_s0": 4376.875, |
| "step": 4270, |
| "tokens_trained": 13.988193176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2138855400326218, |
| "grad_norm": 0.7268418669700623, |
| "loss": 1.0621, |
| "loss_ce": 1.1230334043502808, |
| "loss_region": 0.030010607093572617, |
| "loss_total": 1.1530439853668213, |
| "lr": 0.0010709803906942862, |
| "router/selected_tokens_s0": 4399.0, |
| "step": 4280, |
| "tokens_trained": 14.020958616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2167222182823914, |
| "grad_norm": 0.8508139848709106, |
| "loss": 1.0648, |
| "loss_ce": 1.012586236000061, |
| "loss_region": 0.030012402683496475, |
| "loss_total": 1.0425986051559448, |
| "lr": 0.0010705734987104352, |
| "router/selected_tokens_s0": 4399.25, |
| "step": 4290, |
| "tokens_trained": 14.053724056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2195588965321609, |
| "grad_norm": 1.003320574760437, |
| "loss": 1.072, |
| "loss_ce": 1.092397928237915, |
| "loss_region": 0.030008139088749886, |
| "loss_total": 1.1224061250686646, |
| "lr": 0.0010701666067265841, |
| "router/selected_tokens_s0": 4384.5, |
| "step": 4300, |
| "tokens_trained": 14.086488728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2223955747819304, |
| "grad_norm": 0.6508564949035645, |
| "loss": 1.0592, |
| "loss_ce": 0.9947494864463806, |
| "loss_region": 0.030006522312760353, |
| "loss_total": 1.0247559547424316, |
| "lr": 0.001069759714742733, |
| "router/selected_tokens_s0": 4323.125, |
| "step": 4310, |
| "tokens_trained": 14.119254168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2252322530317, |
| "grad_norm": 0.6111332774162292, |
| "loss": 1.0612, |
| "loss_ce": 1.0148944854736328, |
| "loss_region": 0.030005091801285744, |
| "loss_total": 1.044899582862854, |
| "lr": 0.001069352822758882, |
| "router/selected_tokens_s0": 4324.25, |
| "step": 4320, |
| "tokens_trained": 14.152019608 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2280689312814694, |
| "grad_norm": 1.111618161201477, |
| "loss": 1.0653, |
| "loss_ce": 0.9681676030158997, |
| "loss_region": 0.03000504896044731, |
| "loss_total": 0.9981726408004761, |
| "lr": 0.001068945930775031, |
| "router/selected_tokens_s0": 4334.0, |
| "step": 4330, |
| "tokens_trained": 14.184785048 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.230905609531239, |
| "grad_norm": 0.6468565464019775, |
| "loss": 1.0612, |
| "loss_ce": 1.0180631875991821, |
| "loss_region": 0.030008560046553612, |
| "loss_total": 1.0480717420578003, |
| "lr": 0.00106853903879118, |
| "router/selected_tokens_s0": 4372.0, |
| "step": 4340, |
| "tokens_trained": 14.217550488 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2337422877810085, |
| "grad_norm": 0.4824322760105133, |
| "loss": 1.0606, |
| "loss_ce": 1.054766058921814, |
| "loss_region": 0.030008021742105484, |
| "loss_total": 1.084774136543274, |
| "lr": 0.001068132146807329, |
| "router/selected_tokens_s0": 4369.5, |
| "step": 4350, |
| "tokens_trained": 14.250315928 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.236578966030778, |
| "grad_norm": 0.8620288968086243, |
| "loss": 1.0604, |
| "loss_ce": 1.075032114982605, |
| "loss_region": 0.030014168471097946, |
| "loss_total": 1.105046272277832, |
| "lr": 0.001067725254823478, |
| "router/selected_tokens_s0": 4397.375, |
| "step": 4360, |
| "tokens_trained": 14.283081368 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2394156442805475, |
| "grad_norm": 1.4154425859451294, |
| "loss": 1.0614, |
| "loss_ce": 1.0848474502563477, |
| "loss_region": 0.030006200075149536, |
| "loss_total": 1.1148536205291748, |
| "lr": 0.0010673183628396269, |
| "router/selected_tokens_s0": 4343.25, |
| "step": 4370, |
| "tokens_trained": 14.315846808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.242252322530317, |
| "grad_norm": 0.7003890872001648, |
| "loss": 1.0594, |
| "loss_ce": 1.1462643146514893, |
| "loss_region": 0.030004924163222313, |
| "loss_total": 1.176269292831421, |
| "lr": 0.0010669114708557758, |
| "router/selected_tokens_s0": 4383.25, |
| "step": 4380, |
| "tokens_trained": 14.348612232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2450890007800866, |
| "grad_norm": 0.9333593845367432, |
| "loss": 1.0628, |
| "loss_ce": 1.1088054180145264, |
| "loss_region": 0.0300068948417902, |
| "loss_total": 1.1388123035430908, |
| "lr": 0.0010665045788719248, |
| "router/selected_tokens_s0": 4374.875, |
| "step": 4390, |
| "tokens_trained": 14.381377672 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.247925679029856, |
| "grad_norm": 0.7894501686096191, |
| "loss": 1.0632, |
| "loss_ce": 0.9245185256004333, |
| "loss_region": 0.030007462948560715, |
| "loss_total": 0.9545260071754456, |
| "lr": 0.0010660976868880738, |
| "router/selected_tokens_s0": 4363.625, |
| "step": 4400, |
| "tokens_trained": 14.414143112 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2507623572796256, |
| "grad_norm": 1.051579236984253, |
| "loss": 1.0676, |
| "loss_ce": 1.0265341997146606, |
| "loss_region": 0.03001078963279724, |
| "loss_total": 1.0565450191497803, |
| "lr": 0.0010656907949042227, |
| "router/selected_tokens_s0": 4416.375, |
| "step": 4410, |
| "tokens_trained": 14.446908552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2535990355293951, |
| "grad_norm": 1.4166457653045654, |
| "loss": 1.0661, |
| "loss_ce": 1.1268078088760376, |
| "loss_region": 0.0300078634172678, |
| "loss_total": 1.1568156480789185, |
| "lr": 0.0010652839029203717, |
| "router/selected_tokens_s0": 4376.375, |
| "step": 4420, |
| "tokens_trained": 14.479673992 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2564357137791646, |
| "grad_norm": 0.9460220336914062, |
| "loss": 1.0627, |
| "loss_ce": 1.0467621088027954, |
| "loss_region": 0.030009562149643898, |
| "loss_total": 1.07677161693573, |
| "lr": 0.0010648770109365207, |
| "router/selected_tokens_s0": 4361.5, |
| "step": 4430, |
| "tokens_trained": 14.512439432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2592723920289342, |
| "grad_norm": 0.726325273513794, |
| "loss": 1.0611, |
| "loss_ce": 1.060929298400879, |
| "loss_region": 0.030005550011992455, |
| "loss_total": 1.0909348726272583, |
| "lr": 0.0010644701189526696, |
| "router/selected_tokens_s0": 4371.625, |
| "step": 4440, |
| "tokens_trained": 14.545204872 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2621090702787037, |
| "grad_norm": 0.8391557931900024, |
| "loss": 1.0602, |
| "loss_ce": 0.9280992150306702, |
| "loss_region": 0.030011408030986786, |
| "loss_total": 0.9581106305122375, |
| "lr": 0.0010640632269688186, |
| "router/selected_tokens_s0": 4365.625, |
| "step": 4450, |
| "tokens_trained": 14.577970312 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2649457485284732, |
| "grad_norm": 0.5766838788986206, |
| "loss": 1.0606, |
| "loss_ce": 1.0834920406341553, |
| "loss_region": 0.03000706620514393, |
| "loss_total": 1.1134991645812988, |
| "lr": 0.0010636563349849678, |
| "router/selected_tokens_s0": 4382.625, |
| "step": 4460, |
| "tokens_trained": 14.610735752 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2677824267782427, |
| "grad_norm": 0.7650503516197205, |
| "loss": 1.0655, |
| "loss_ce": 1.0730332136154175, |
| "loss_region": 0.030004587024450302, |
| "loss_total": 1.1030378341674805, |
| "lr": 0.0010632494430011167, |
| "router/selected_tokens_s0": 4357.375, |
| "step": 4470, |
| "tokens_trained": 14.643501192 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2706191050280122, |
| "grad_norm": 0.9502279758453369, |
| "loss": 1.065, |
| "loss_ce": 0.9979308247566223, |
| "loss_region": 0.030005743727087975, |
| "loss_total": 1.027936577796936, |
| "lr": 0.0010628425510172657, |
| "router/selected_tokens_s0": 4355.125, |
| "step": 4480, |
| "tokens_trained": 14.676266632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2734557832777817, |
| "grad_norm": 0.42353641986846924, |
| "loss": 1.0697, |
| "loss_ce": 0.9458868503570557, |
| "loss_region": 0.03001011349260807, |
| "loss_total": 0.975896954536438, |
| "lr": 0.0010624356590334145, |
| "router/selected_tokens_s0": 4327.25, |
| "step": 4490, |
| "tokens_trained": 14.709032072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2762924615275513, |
| "grad_norm": 0.8221932649612427, |
| "loss": 1.0532, |
| "loss_ce": 1.0401586294174194, |
| "loss_region": 0.03000483103096485, |
| "loss_total": 1.0701634883880615, |
| "lr": 0.0010620287670495634, |
| "router/selected_tokens_s0": 4364.625, |
| "step": 4500, |
| "tokens_trained": 14.741797512 |
| }, |
| { |
| "epoch": 1.2762924615275513, |
| "eval_ppl": 2.8214683274593275, |
| "eval_runtime": 2.514, |
| "step": 4500, |
| "tokens_trained": 14.741797512 |
| }, |
| { |
| "epoch": 1.2762924615275513, |
| "eval_F": 0.3404783661767054, |
| "eval_F_cds": 0.34415505656136036, |
| "eval_F_dig": 0.34039117639398914, |
| "eval_F_exon": 0.3441758117753265, |
| "eval_F_intron": 0.3404660876483458, |
| "eval_F_nig": 0.34068460925568095, |
| "eval_F_promoter": 0.3389634986468706, |
| "eval_F_utr": 0.3419717924374995, |
| "eval_G": 0.33991637341184733, |
| "eval_G_cds": 0.33970658857472646, |
| "eval_G_dig": 0.40259630268966323, |
| "eval_G_exon": 0.3395000370271109, |
| "eval_G_intron": 0.33899745701570494, |
| "eval_G_nig": 0.3395800911177655, |
| "eval_G_promoter": 0.34070296612716594, |
| "eval_G_utr": 0.33846465688852967, |
| "eval_avg_bp_per_token": 2.9370441688533258, |
| "eval_bp_per_token/cds": 2.9056670269254266, |
| "eval_bp_per_token/dig": 2.937796480489671, |
| "eval_bp_per_token/exon": 2.9054918032786885, |
| "eval_bp_per_token/intron": 2.937150090063775, |
| "eval_bp_per_token/nig": 2.9352661459664247, |
| "eval_bp_per_token/promoter": 2.9501701628404295, |
| "eval_bp_per_token/utr": 2.924217792561839, |
| "eval_ppl_cds": 3.362117196422307, |
| "eval_ppl_dig": 1.084496515952716, |
| "eval_ppl_exon": 3.2638291915167845, |
| "eval_ppl_intron": 2.863014968465922, |
| "eval_ppl_nig": 2.628156788503626, |
| "eval_ppl_promoter": 3.186888990118295, |
| "eval_ppl_utr": 3.231163925316475, |
| "step": 4500, |
| "tokens_trained": 14.741797512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2791291397773208, |
| "grad_norm": 0.574324905872345, |
| "loss": 1.0626, |
| "loss_ce": 1.1369348764419556, |
| "loss_region": 0.030004041269421577, |
| "loss_total": 1.1669389009475708, |
| "lr": 0.0010616218750657124, |
| "router/selected_tokens_s0": 4390.0, |
| "step": 4510, |
| "tokens_trained": 14.774562152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2819658180270903, |
| "grad_norm": 1.4291346073150635, |
| "loss": 1.0542, |
| "loss_ce": 1.0772862434387207, |
| "loss_region": 0.030009262263774872, |
| "loss_total": 1.1072955131530762, |
| "lr": 0.0010612149830818616, |
| "router/selected_tokens_s0": 4374.125, |
| "step": 4520, |
| "tokens_trained": 14.807327592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2848024962768598, |
| "grad_norm": 1.110178828239441, |
| "loss": 1.0581, |
| "loss_ce": 1.0101850032806396, |
| "loss_region": 0.030011240392923355, |
| "loss_total": 1.0401962995529175, |
| "lr": 0.0010608080910980105, |
| "router/selected_tokens_s0": 4400.875, |
| "step": 4530, |
| "tokens_trained": 14.840092232 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2876391745266293, |
| "grad_norm": 0.6965222358703613, |
| "loss": 1.0677, |
| "loss_ce": 1.0458329916000366, |
| "loss_region": 0.030003167688846588, |
| "loss_total": 1.075836181640625, |
| "lr": 0.0010604011991141595, |
| "router/selected_tokens_s0": 4285.875, |
| "step": 4540, |
| "tokens_trained": 14.872854344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2904758527763989, |
| "grad_norm": 0.7366101145744324, |
| "loss": 1.0673, |
| "loss_ce": 1.127131700515747, |
| "loss_region": 0.030009111389517784, |
| "loss_total": 1.157140851020813, |
| "lr": 0.0010599943071303085, |
| "router/selected_tokens_s0": 4368.75, |
| "step": 4550, |
| "tokens_trained": 14.905619784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2933125310261684, |
| "grad_norm": 0.5127747058868408, |
| "loss": 1.0583, |
| "loss_ce": 1.0789624452590942, |
| "loss_region": 0.030002892017364502, |
| "loss_total": 1.1089653968811035, |
| "lr": 0.0010595874151464574, |
| "router/selected_tokens_s0": 4366.5, |
| "step": 4560, |
| "tokens_trained": 14.938385224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.296149209275938, |
| "grad_norm": 0.8208303451538086, |
| "loss": 1.0564, |
| "loss_ce": 1.0156883001327515, |
| "loss_region": 0.03000694513320923, |
| "loss_total": 1.0456953048706055, |
| "lr": 0.0010591805231626064, |
| "router/selected_tokens_s0": 4368.5, |
| "step": 4570, |
| "tokens_trained": 14.971149864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.2989858875257074, |
| "grad_norm": 0.9243417978286743, |
| "loss": 1.0546, |
| "loss_ce": 1.1268659830093384, |
| "loss_region": 0.030008889734745026, |
| "loss_total": 1.1568748950958252, |
| "lr": 0.0010587736311787554, |
| "router/selected_tokens_s0": 4378.75, |
| "step": 4580, |
| "tokens_trained": 15.003915304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.301822565775477, |
| "grad_norm": 1.2765685319900513, |
| "loss": 1.0589, |
| "loss_ce": 1.0031052827835083, |
| "loss_region": 0.03000444732606411, |
| "loss_total": 1.0331097841262817, |
| "lr": 0.0010583667391949043, |
| "router/selected_tokens_s0": 4328.75, |
| "step": 4590, |
| "tokens_trained": 15.036680744 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3046592440252465, |
| "grad_norm": 0.7742276787757874, |
| "loss": 1.06, |
| "loss_ce": 1.079167127609253, |
| "loss_region": 0.030005935579538345, |
| "loss_total": 1.109173059463501, |
| "lr": 0.0010579598472110533, |
| "router/selected_tokens_s0": 4353.25, |
| "step": 4600, |
| "tokens_trained": 15.069442824 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.307495922275016, |
| "grad_norm": 0.793449878692627, |
| "loss": 1.0558, |
| "loss_ce": 1.1816250085830688, |
| "loss_region": 0.030013153329491615, |
| "loss_total": 1.2116382122039795, |
| "lr": 0.0010575529552272023, |
| "router/selected_tokens_s0": 4410.125, |
| "step": 4610, |
| "tokens_trained": 15.102208264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3103326005247855, |
| "grad_norm": 0.45884019136428833, |
| "loss": 1.0581, |
| "loss_ce": 1.03169584274292, |
| "loss_region": 0.030012015253305435, |
| "loss_total": 1.061707854270935, |
| "lr": 0.0010571460632433512, |
| "router/selected_tokens_s0": 4406.125, |
| "step": 4620, |
| "tokens_trained": 15.134972904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.313169278774555, |
| "grad_norm": 0.297644704580307, |
| "loss": 1.0543, |
| "loss_ce": 0.9691150188446045, |
| "loss_region": 0.030010992661118507, |
| "loss_total": 0.9991260170936584, |
| "lr": 0.0010567391712595002, |
| "router/selected_tokens_s0": 4378.125, |
| "step": 4630, |
| "tokens_trained": 15.167735632 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3160059570243245, |
| "grad_norm": 1.0193889141082764, |
| "loss": 1.0631, |
| "loss_ce": 0.9981553554534912, |
| "loss_region": 0.030008038505911827, |
| "loss_total": 1.0281634330749512, |
| "lr": 0.0010563322792756492, |
| "router/selected_tokens_s0": 4370.875, |
| "step": 4640, |
| "tokens_trained": 15.200501072 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.318842635274094, |
| "grad_norm": 0.5435932278633118, |
| "loss": 1.0567, |
| "loss_ce": 1.0280530452728271, |
| "loss_region": 0.030006207525730133, |
| "loss_total": 1.0580592155456543, |
| "lr": 0.0010559253872917981, |
| "router/selected_tokens_s0": 4404.625, |
| "step": 4650, |
| "tokens_trained": 15.233266512 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3216793135238636, |
| "grad_norm": 1.5278434753417969, |
| "loss": 1.0548, |
| "loss_ce": 1.136357307434082, |
| "loss_region": 0.03000808134675026, |
| "loss_total": 1.166365385055542, |
| "lr": 0.001055518495307947, |
| "router/selected_tokens_s0": 4371.0, |
| "step": 4660, |
| "tokens_trained": 15.266028416 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.324515991773633, |
| "grad_norm": 1.0518913269042969, |
| "loss": 1.0663, |
| "loss_ce": 1.0637515783309937, |
| "loss_region": 0.030026618391275406, |
| "loss_total": 1.0937782526016235, |
| "lr": 0.001055111603324096, |
| "router/selected_tokens_s0": 4432.5, |
| "step": 4670, |
| "tokens_trained": 15.298793832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3273526700234026, |
| "grad_norm": 0.41353392601013184, |
| "loss": 1.0573, |
| "loss_ce": 1.1546579599380493, |
| "loss_region": 0.030011750757694244, |
| "loss_total": 1.1846697330474854, |
| "lr": 0.001054704711340245, |
| "router/selected_tokens_s0": 4418.375, |
| "step": 4680, |
| "tokens_trained": 15.331559256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3301893482731721, |
| "grad_norm": 1.145396113395691, |
| "loss": 1.0615, |
| "loss_ce": 0.9925062656402588, |
| "loss_region": 0.030006494373083115, |
| "loss_total": 1.0225127935409546, |
| "lr": 0.001054297819356394, |
| "router/selected_tokens_s0": 4337.125, |
| "step": 4690, |
| "tokens_trained": 15.364323896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3330260265229417, |
| "grad_norm": 0.48764264583587646, |
| "loss": 1.0592, |
| "loss_ce": 1.013685941696167, |
| "loss_region": 0.030003532767295837, |
| "loss_total": 1.043689489364624, |
| "lr": 0.0010538909273725432, |
| "router/selected_tokens_s0": 4342.125, |
| "step": 4700, |
| "tokens_trained": 15.397089336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3358627047727112, |
| "grad_norm": 0.8164799809455872, |
| "loss": 1.0516, |
| "loss_ce": 0.9586069583892822, |
| "loss_region": 0.030013196170330048, |
| "loss_total": 0.9886201620101929, |
| "lr": 0.0010534840353886921, |
| "router/selected_tokens_s0": 4396.625, |
| "step": 4710, |
| "tokens_trained": 15.42985476 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3386993830224807, |
| "grad_norm": 1.13369619846344, |
| "loss": 1.0594, |
| "loss_ce": 1.0693820714950562, |
| "loss_region": 0.030003618448972702, |
| "loss_total": 1.0993857383728027, |
| "lr": 0.001053077143404841, |
| "router/selected_tokens_s0": 4341.625, |
| "step": 4720, |
| "tokens_trained": 15.4626202 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3415360612722502, |
| "grad_norm": 0.8559716939926147, |
| "loss": 1.052, |
| "loss_ce": 0.9566583633422852, |
| "loss_region": 0.029995476827025414, |
| "loss_total": 0.9866538643836975, |
| "lr": 0.0010526702514209898, |
| "router/selected_tokens_s0": 4327.0, |
| "step": 4730, |
| "tokens_trained": 15.49538484 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3443727395220197, |
| "grad_norm": 0.642441987991333, |
| "loss": 1.0486, |
| "loss_ce": 0.9989664554595947, |
| "loss_region": 0.030002903193235397, |
| "loss_total": 1.028969407081604, |
| "lr": 0.0010522633594371388, |
| "router/selected_tokens_s0": 4368.75, |
| "step": 4740, |
| "tokens_trained": 15.52815028 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3472094177717893, |
| "grad_norm": 1.2081654071807861, |
| "loss": 1.0543, |
| "loss_ce": 1.0263653993606567, |
| "loss_region": 0.030002884566783905, |
| "loss_total": 1.0563682317733765, |
| "lr": 0.0010518564674532878, |
| "router/selected_tokens_s0": 4350.625, |
| "step": 4750, |
| "tokens_trained": 15.56091572 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3500460960215588, |
| "grad_norm": 0.8961039781570435, |
| "loss": 1.0605, |
| "loss_ce": 0.8918865919113159, |
| "loss_region": 0.02998742088675499, |
| "loss_total": 0.9218739867210388, |
| "lr": 0.0010514495754694367, |
| "router/selected_tokens_s0": 4227.875, |
| "step": 4760, |
| "tokens_trained": 15.593678736 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3528827742713283, |
| "grad_norm": 0.9029963612556458, |
| "loss": 1.054, |
| "loss_ce": 1.041927695274353, |
| "loss_region": 0.030009398236870766, |
| "loss_total": 1.071937084197998, |
| "lr": 0.001051042683485586, |
| "router/selected_tokens_s0": 4384.125, |
| "step": 4770, |
| "tokens_trained": 15.626444176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3557194525210978, |
| "grad_norm": 0.9543034434318542, |
| "loss": 1.0544, |
| "loss_ce": 1.0488133430480957, |
| "loss_region": 0.030006732791662216, |
| "loss_total": 1.0788201093673706, |
| "lr": 0.0010506357915017349, |
| "router/selected_tokens_s0": 4367.625, |
| "step": 4780, |
| "tokens_trained": 15.659209616 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3585561307708673, |
| "grad_norm": 1.4998373985290527, |
| "loss": 1.0569, |
| "loss_ce": 1.1177537441253662, |
| "loss_region": 0.030007855966687202, |
| "loss_total": 1.147761583328247, |
| "lr": 0.0010502288995178838, |
| "router/selected_tokens_s0": 4367.375, |
| "step": 4790, |
| "tokens_trained": 15.691975056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3613928090206369, |
| "grad_norm": 0.9493989944458008, |
| "loss": 1.0632, |
| "loss_ce": 1.1187384128570557, |
| "loss_region": 0.030011937022209167, |
| "loss_total": 1.1487503051757812, |
| "lr": 0.0010498220075340328, |
| "router/selected_tokens_s0": 4409.0, |
| "step": 4800, |
| "tokens_trained": 15.724740496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3642294872704064, |
| "grad_norm": 0.8666090965270996, |
| "loss": 1.0504, |
| "loss_ce": 1.0977500677108765, |
| "loss_region": 0.030009282752871513, |
| "loss_total": 1.127759337425232, |
| "lr": 0.0010494151155501818, |
| "router/selected_tokens_s0": 4413.0, |
| "step": 4810, |
| "tokens_trained": 15.757505936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.367066165520176, |
| "grad_norm": 1.7307463884353638, |
| "loss": 1.0497, |
| "loss_ce": 0.9964741468429565, |
| "loss_region": 0.03000612184405327, |
| "loss_total": 1.0264803171157837, |
| "lr": 0.0010490082235663307, |
| "router/selected_tokens_s0": 4340.625, |
| "step": 4820, |
| "tokens_trained": 15.790271376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3699028437699454, |
| "grad_norm": 1.00801420211792, |
| "loss": 1.0593, |
| "loss_ce": 1.056219458580017, |
| "loss_region": 0.030001841485500336, |
| "loss_total": 1.0862213373184204, |
| "lr": 0.0010486013315824797, |
| "router/selected_tokens_s0": 4315.75, |
| "step": 4830, |
| "tokens_trained": 15.823036016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.372739522019715, |
| "grad_norm": 0.6336276531219482, |
| "loss": 1.0541, |
| "loss_ce": 0.939272940158844, |
| "loss_region": 0.030005156993865967, |
| "loss_total": 0.96927809715271, |
| "lr": 0.0010481944395986287, |
| "router/selected_tokens_s0": 4368.625, |
| "step": 4840, |
| "tokens_trained": 15.855801456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3755762002694845, |
| "grad_norm": 1.0217934846878052, |
| "loss": 1.052, |
| "loss_ce": 0.9233169555664062, |
| "loss_region": 0.02999945543706417, |
| "loss_total": 0.9533163905143738, |
| "lr": 0.0010477875476147776, |
| "router/selected_tokens_s0": 4278.125, |
| "step": 4850, |
| "tokens_trained": 15.888566096 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.378412878519254, |
| "grad_norm": 0.8734573125839233, |
| "loss": 1.0512, |
| "loss_ce": 1.0704128742218018, |
| "loss_region": 0.030010642483830452, |
| "loss_total": 1.1004235744476318, |
| "lr": 0.0010473806556309266, |
| "router/selected_tokens_s0": 4385.25, |
| "step": 4860, |
| "tokens_trained": 15.921331536 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3812495567690235, |
| "grad_norm": 0.9410074949264526, |
| "loss": 1.0473, |
| "loss_ce": 1.0614757537841797, |
| "loss_region": 0.03000745177268982, |
| "loss_total": 1.091483235359192, |
| "lr": 0.0010469737636470756, |
| "router/selected_tokens_s0": 4373.375, |
| "step": 4870, |
| "tokens_trained": 15.954096176 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.384086235018793, |
| "grad_norm": 0.9506546854972839, |
| "loss": 1.0553, |
| "loss_ce": 1.0681748390197754, |
| "loss_region": 0.03000558167695999, |
| "loss_total": 1.0981804132461548, |
| "lr": 0.0010465668716632245, |
| "router/selected_tokens_s0": 4355.875, |
| "step": 4880, |
| "tokens_trained": 15.9868564 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3869229132685625, |
| "grad_norm": 0.6107691526412964, |
| "loss": 1.0465, |
| "loss_ce": 1.0117448568344116, |
| "loss_region": 0.030008085072040558, |
| "loss_total": 1.0417529344558716, |
| "lr": 0.0010461599796793735, |
| "router/selected_tokens_s0": 4373.0, |
| "step": 4890, |
| "tokens_trained": 16.019621784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.389759591518332, |
| "grad_norm": 0.8904739618301392, |
| "loss": 1.0524, |
| "loss_ce": 1.0931122303009033, |
| "loss_region": 0.030003776773810387, |
| "loss_total": 1.1231160163879395, |
| "lr": 0.0010457530876955225, |
| "router/selected_tokens_s0": 4370.75, |
| "step": 4900, |
| "tokens_trained": 16.052387224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3925962697681016, |
| "grad_norm": 0.8106483817100525, |
| "loss": 1.0501, |
| "loss_ce": 0.8826806545257568, |
| "loss_region": 0.030005743727087975, |
| "loss_total": 0.9126864075660706, |
| "lr": 0.0010453461957116714, |
| "router/selected_tokens_s0": 4362.25, |
| "step": 4910, |
| "tokens_trained": 16.085152664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.395432948017871, |
| "grad_norm": 0.8432952761650085, |
| "loss": 1.0554, |
| "loss_ce": 0.9541028738021851, |
| "loss_region": 0.03000727668404579, |
| "loss_total": 0.9841101765632629, |
| "lr": 0.0010449393037278204, |
| "router/selected_tokens_s0": 4347.75, |
| "step": 4920, |
| "tokens_trained": 16.117918104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.3982696262676406, |
| "grad_norm": 0.7111901640892029, |
| "loss": 1.0514, |
| "loss_ce": 0.9792753458023071, |
| "loss_region": 0.030003240332007408, |
| "loss_total": 1.0092785358428955, |
| "lr": 0.0010445324117439694, |
| "router/selected_tokens_s0": 4354.5, |
| "step": 4930, |
| "tokens_trained": 16.150683544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4011063045174101, |
| "grad_norm": 0.623307466506958, |
| "loss": 1.0486, |
| "loss_ce": 0.8407849669456482, |
| "loss_region": 0.030006825923919678, |
| "loss_total": 0.8707917928695679, |
| "lr": 0.0010441255197601183, |
| "router/selected_tokens_s0": 4359.375, |
| "step": 4940, |
| "tokens_trained": 16.183448184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4039429827671797, |
| "grad_norm": 0.7381167411804199, |
| "loss": 1.0515, |
| "loss_ce": 0.9730595350265503, |
| "loss_region": 0.030005378648638725, |
| "loss_total": 1.0030648708343506, |
| "lr": 0.0010437186277762675, |
| "router/selected_tokens_s0": 4344.625, |
| "step": 4950, |
| "tokens_trained": 16.216210576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4067796610169492, |
| "grad_norm": 1.5306568145751953, |
| "loss": 1.0493, |
| "loss_ce": 1.1129964590072632, |
| "loss_region": 0.03000766597688198, |
| "loss_total": 1.1430041790008545, |
| "lr": 0.0010433117357924165, |
| "router/selected_tokens_s0": 4405.875, |
| "step": 4960, |
| "tokens_trained": 16.248975216 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4096163392667187, |
| "grad_norm": 1.0243196487426758, |
| "loss": 1.053, |
| "loss_ce": 1.0323237180709839, |
| "loss_region": 0.0300030205398798, |
| "loss_total": 1.0623267889022827, |
| "lr": 0.0010429048438085654, |
| "router/selected_tokens_s0": 4350.75, |
| "step": 4970, |
| "tokens_trained": 16.281739056 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4124530175164882, |
| "grad_norm": 0.41617700457572937, |
| "loss": 1.0515, |
| "loss_ce": 1.0560728311538696, |
| "loss_region": 0.030008800327777863, |
| "loss_total": 1.086081624031067, |
| "lr": 0.0010424979518247142, |
| "router/selected_tokens_s0": 4361.875, |
| "step": 4980, |
| "tokens_trained": 16.314504496 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4152896957662577, |
| "grad_norm": 1.0856021642684937, |
| "loss": 1.0525, |
| "loss_ce": 0.9173005223274231, |
| "loss_region": 0.030003489926457405, |
| "loss_total": 0.9473040103912354, |
| "lr": 0.0010420910598408631, |
| "router/selected_tokens_s0": 4334.375, |
| "step": 4990, |
| "tokens_trained": 16.347269936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4181263740160273, |
| "grad_norm": 0.7011216878890991, |
| "loss": 1.0519, |
| "loss_ce": 1.0040265321731567, |
| "loss_region": 0.030011465772986412, |
| "loss_total": 1.0340379476547241, |
| "lr": 0.0010416841678570121, |
| "router/selected_tokens_s0": 4393.0, |
| "step": 5000, |
| "tokens_trained": 16.380034576 |
| }, |
| { |
| "epoch": 1.4181263740160273, |
| "eval_ppl": 2.788169093074203, |
| "eval_runtime": 2.4902, |
| "step": 5000, |
| "tokens_trained": 16.380034576 |
| }, |
| { |
| "epoch": 1.4181263740160273, |
| "eval_F": 0.3413229464258417, |
| "eval_F_cds": 0.34488279386436593, |
| "eval_F_dig": 0.3365887959996875, |
| "eval_F_exon": 0.34454255649279203, |
| "eval_F_intron": 0.34125204874115167, |
| "eval_F_nig": 0.34140188383936837, |
| "eval_F_promoter": 0.3405175878617453, |
| "eval_F_utr": 0.3429040617863649, |
| "eval_G": 0.3415226863108361, |
| "eval_G_cds": 0.34280297813856925, |
| "eval_G_dig": 0.39681383799515585, |
| "eval_G_exon": 0.3413263496822721, |
| "eval_G_intron": 0.3403742930108439, |
| "eval_G_nig": 0.34037531674994653, |
| "eval_G_promoter": 0.34437466936381766, |
| "eval_G_utr": 0.34068450968191655, |
| "eval_avg_bp_per_token": 2.929776654255114, |
| "eval_bp_per_token/cds": 2.8995357779236612, |
| "eval_bp_per_token/dig": 2.9709842154131847, |
| "eval_bp_per_token/exon": 2.9023990829444033, |
| "eval_bp_per_token/intron": 2.930385337432876, |
| "eval_bp_per_token/nig": 2.9290992444274444, |
| "eval_bp_per_token/promoter": 2.9367058726082993, |
| "eval_bp_per_token/utr": 2.916267584555522, |
| "eval_ppl_cds": 3.2638592963627073, |
| "eval_ppl_dig": 1.0816082686781683, |
| "eval_ppl_exon": 3.2486368312535303, |
| "eval_ppl_intron": 2.8401016704114306, |
| "eval_ppl_nig": 2.6066140907483235, |
| "eval_ppl_promoter": 3.109201262145088, |
| "eval_ppl_utr": 3.1537355648599172, |
| "step": 5000, |
| "tokens_trained": 16.380034576 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4209630522657968, |
| "grad_norm": 0.9108154773712158, |
| "loss": 1.0451, |
| "loss_ce": 1.066657543182373, |
| "loss_region": 0.03000144474208355, |
| "loss_total": 1.0966589450836182, |
| "lr": 0.001041277275873161, |
| "router/selected_tokens_s0": 4315.875, |
| "step": 5010, |
| "tokens_trained": 16.412800016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4237997305155663, |
| "grad_norm": 0.8278728127479553, |
| "loss": 1.0582, |
| "loss_ce": 1.0547834634780884, |
| "loss_region": 0.03000921569764614, |
| "loss_total": 1.0847927331924438, |
| "lr": 0.0010408703838893103, |
| "router/selected_tokens_s0": 4401.25, |
| "step": 5020, |
| "tokens_trained": 16.445565456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4266364087653358, |
| "grad_norm": 0.3643961250782013, |
| "loss": 1.0453, |
| "loss_ce": 1.0456041097640991, |
| "loss_region": 0.030006922781467438, |
| "loss_total": 1.0756109952926636, |
| "lr": 0.0010404634919054592, |
| "router/selected_tokens_s0": 4321.5, |
| "step": 5030, |
| "tokens_trained": 16.478329936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4294730870151053, |
| "grad_norm": 0.8244170546531677, |
| "loss": 1.0413, |
| "loss_ce": 1.1286065578460693, |
| "loss_region": 0.03001227229833603, |
| "loss_total": 1.1586188077926636, |
| "lr": 0.0010400565999216082, |
| "router/selected_tokens_s0": 4414.625, |
| "step": 5040, |
| "tokens_trained": 16.511090344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4323097652648749, |
| "grad_norm": 0.7646901607513428, |
| "loss": 1.0397, |
| "loss_ce": 0.9538711905479431, |
| "loss_region": 0.030008982867002487, |
| "loss_total": 0.9838801622390747, |
| "lr": 0.0010396497079377572, |
| "router/selected_tokens_s0": 4366.25, |
| "step": 5050, |
| "tokens_trained": 16.543855784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4351464435146444, |
| "grad_norm": 0.5411663055419922, |
| "loss": 1.0546, |
| "loss_ce": 1.10159432888031, |
| "loss_region": 0.030010610818862915, |
| "loss_total": 1.1316049098968506, |
| "lr": 0.0010392428159539061, |
| "router/selected_tokens_s0": 4370.5, |
| "step": 5060, |
| "tokens_trained": 16.576621224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.437983121764414, |
| "grad_norm": 0.4553099274635315, |
| "loss": 1.053, |
| "loss_ce": 0.924439549446106, |
| "loss_region": 0.030009716749191284, |
| "loss_total": 0.9544492959976196, |
| "lr": 0.001038835923970055, |
| "router/selected_tokens_s0": 4413.125, |
| "step": 5070, |
| "tokens_trained": 16.609386664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4408198000141834, |
| "grad_norm": 0.7976288795471191, |
| "loss": 1.0524, |
| "loss_ce": 0.8370477557182312, |
| "loss_region": 0.03001226671040058, |
| "loss_total": 0.8670600056648254, |
| "lr": 0.001038429031986204, |
| "router/selected_tokens_s0": 4350.125, |
| "step": 5080, |
| "tokens_trained": 16.642152104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.443656478263953, |
| "grad_norm": 0.7374417781829834, |
| "loss": 1.0481, |
| "loss_ce": 1.087965726852417, |
| "loss_region": 0.030008574947714806, |
| "loss_total": 1.1179742813110352, |
| "lr": 0.001038022140002353, |
| "router/selected_tokens_s0": 4363.5, |
| "step": 5090, |
| "tokens_trained": 16.674914848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4464931565137225, |
| "grad_norm": 0.8701497912406921, |
| "loss": 1.0423, |
| "loss_ce": 1.102858066558838, |
| "loss_region": 0.030004970729351044, |
| "loss_total": 1.1328630447387695, |
| "lr": 0.001037615248018502, |
| "router/selected_tokens_s0": 4363.125, |
| "step": 5100, |
| "tokens_trained": 16.707680128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.449329834763492, |
| "grad_norm": 0.7448126077651978, |
| "loss": 1.0436, |
| "loss_ce": 1.1202232837677002, |
| "loss_region": 0.030008699744939804, |
| "loss_total": 1.150231957435608, |
| "lr": 0.001037208356034651, |
| "router/selected_tokens_s0": 4389.375, |
| "step": 5110, |
| "tokens_trained": 16.740445568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4521665130132615, |
| "grad_norm": 0.6302592754364014, |
| "loss": 1.0486, |
| "loss_ce": 1.0902258157730103, |
| "loss_region": 0.03000813163816929, |
| "loss_total": 1.1202338933944702, |
| "lr": 0.0010368014640508, |
| "router/selected_tokens_s0": 4353.375, |
| "step": 5120, |
| "tokens_trained": 16.773211008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.455003191263031, |
| "grad_norm": 1.019002079963684, |
| "loss": 1.0513, |
| "loss_ce": 1.0098958015441895, |
| "loss_region": 0.02999117411673069, |
| "loss_total": 1.0398869514465332, |
| "lr": 0.0010363945720669489, |
| "router/selected_tokens_s0": 4293.875, |
| "step": 5130, |
| "tokens_trained": 16.805971784 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4578398695128005, |
| "grad_norm": 0.9714931845664978, |
| "loss": 1.0518, |
| "loss_ce": 0.9985664486885071, |
| "loss_region": 0.030006328597664833, |
| "loss_total": 1.0285727977752686, |
| "lr": 0.0010359876800830978, |
| "router/selected_tokens_s0": 4355.5, |
| "step": 5140, |
| "tokens_trained": 16.838737224 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.46067654776257, |
| "grad_norm": 0.8966345191001892, |
| "loss": 1.0456, |
| "loss_ce": 1.061452031135559, |
| "loss_region": 0.030002159997820854, |
| "loss_total": 1.0914541482925415, |
| "lr": 0.0010355807880992468, |
| "router/selected_tokens_s0": 4324.375, |
| "step": 5150, |
| "tokens_trained": 16.871502664 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4635132260123396, |
| "grad_norm": 0.6720635294914246, |
| "loss": 1.0403, |
| "loss_ce": 1.0887715816497803, |
| "loss_region": 0.030005764216184616, |
| "loss_total": 1.1187773942947388, |
| "lr": 0.0010351738961153958, |
| "router/selected_tokens_s0": 4345.375, |
| "step": 5160, |
| "tokens_trained": 16.904268104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.466349904262109, |
| "grad_norm": 0.7350347638130188, |
| "loss": 1.0505, |
| "loss_ce": 1.0535961389541626, |
| "loss_region": 0.030005428940057755, |
| "loss_total": 1.0836015939712524, |
| "lr": 0.0010347670041315447, |
| "router/selected_tokens_s0": 4381.75, |
| "step": 5170, |
| "tokens_trained": 16.937030128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4691865825118786, |
| "grad_norm": 0.44304972887039185, |
| "loss": 1.0432, |
| "loss_ce": 1.0507234334945679, |
| "loss_region": 0.030006472021341324, |
| "loss_total": 1.0807299613952637, |
| "lr": 0.0010343601121476937, |
| "router/selected_tokens_s0": 4387.25, |
| "step": 5180, |
| "tokens_trained": 16.969795184 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4720232607616481, |
| "grad_norm": 0.441457062959671, |
| "loss": 1.0491, |
| "loss_ce": 1.0120573043823242, |
| "loss_region": 0.030006378889083862, |
| "loss_total": 1.0420637130737305, |
| "lr": 0.0010339532201638427, |
| "router/selected_tokens_s0": 4363.625, |
| "step": 5190, |
| "tokens_trained": 17.002560624 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4748599390114177, |
| "grad_norm": 0.8241252303123474, |
| "loss": 1.0567, |
| "loss_ce": 1.0892800092697144, |
| "loss_region": 0.03000868298113346, |
| "loss_total": 1.119288682937622, |
| "lr": 0.0010335463281799918, |
| "router/selected_tokens_s0": 4367.125, |
| "step": 5200, |
| "tokens_trained": 17.035326064 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4776966172611872, |
| "grad_norm": 0.5182619690895081, |
| "loss": 1.0408, |
| "loss_ce": 0.9733250737190247, |
| "loss_region": 0.030004369094967842, |
| "loss_total": 1.0033293962478638, |
| "lr": 0.0010331394361961408, |
| "router/selected_tokens_s0": 4326.25, |
| "step": 5210, |
| "tokens_trained": 17.068091504 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4805332955109567, |
| "grad_norm": 0.7119126319885254, |
| "loss": 1.0389, |
| "loss_ce": 0.9868582487106323, |
| "loss_region": 0.03000757470726967, |
| "loss_total": 1.016865849494934, |
| "lr": 0.0010327325442122898, |
| "router/selected_tokens_s0": 4388.0, |
| "step": 5220, |
| "tokens_trained": 17.100856944 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4833699737607262, |
| "grad_norm": 1.0945305824279785, |
| "loss": 1.0462, |
| "loss_ce": 0.9747534990310669, |
| "loss_region": 0.030006079003214836, |
| "loss_total": 1.0047595500946045, |
| "lr": 0.0010323256522284385, |
| "router/selected_tokens_s0": 4341.25, |
| "step": 5230, |
| "tokens_trained": 17.133622384 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4862066520104957, |
| "grad_norm": 0.8298206329345703, |
| "loss": 1.0351, |
| "loss_ce": 1.05121648311615, |
| "loss_region": 0.030005764216184616, |
| "loss_total": 1.0812222957611084, |
| "lr": 0.0010319187602445875, |
| "router/selected_tokens_s0": 4326.375, |
| "step": 5240, |
| "tokens_trained": 17.166385424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4890433302602653, |
| "grad_norm": 0.4219936728477478, |
| "loss": 1.0484, |
| "loss_ce": 0.8292503952980042, |
| "loss_region": 0.030046647414565086, |
| "loss_total": 0.8592970371246338, |
| "lr": 0.0010315118682607365, |
| "router/selected_tokens_s0": 4365.125, |
| "step": 5250, |
| "tokens_trained": 17.199150864 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4918800085100348, |
| "grad_norm": 0.19638904929161072, |
| "loss": 1.0493, |
| "loss_ce": 0.9696671962738037, |
| "loss_region": 0.030005378648638725, |
| "loss_total": 0.9996725916862488, |
| "lr": 0.0010311049762768854, |
| "router/selected_tokens_s0": 4325.25, |
| "step": 5260, |
| "tokens_trained": 17.231916304 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4947166867598043, |
| "grad_norm": 0.786198079586029, |
| "loss": 1.0451, |
| "loss_ce": 1.0354949235916138, |
| "loss_region": 0.03000979870557785, |
| "loss_total": 1.0655046701431274, |
| "lr": 0.0010306980842930346, |
| "router/selected_tokens_s0": 4347.125, |
| "step": 5270, |
| "tokens_trained": 17.264676432 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.4975533650095738, |
| "grad_norm": 0.7419178485870361, |
| "loss": 1.041, |
| "loss_ce": 0.928809404373169, |
| "loss_region": 0.030001329258084297, |
| "loss_total": 0.9588107466697693, |
| "lr": 0.0010302911923091836, |
| "router/selected_tokens_s0": 4279.625, |
| "step": 5280, |
| "tokens_trained": 17.297440272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5003900432593433, |
| "grad_norm": 0.8321080803871155, |
| "loss": 1.0508, |
| "loss_ce": 1.090352177619934, |
| "loss_region": 0.030001569539308548, |
| "loss_total": 1.1203536987304688, |
| "lr": 0.0010298843003253325, |
| "router/selected_tokens_s0": 4361.625, |
| "step": 5290, |
| "tokens_trained": 17.330205712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5032267215091129, |
| "grad_norm": 0.43943366408348083, |
| "loss": 1.0423, |
| "loss_ce": 1.054518461227417, |
| "loss_region": 0.03000991977751255, |
| "loss_total": 1.0845283269882202, |
| "lr": 0.0010294774083414815, |
| "router/selected_tokens_s0": 4375.625, |
| "step": 5300, |
| "tokens_trained": 17.362971152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5060633997588824, |
| "grad_norm": 0.9542965888977051, |
| "loss": 1.0469, |
| "loss_ce": 1.113122820854187, |
| "loss_region": 0.030009111389517784, |
| "loss_total": 1.143131971359253, |
| "lr": 0.0010290705163576305, |
| "router/selected_tokens_s0": 4369.0, |
| "step": 5310, |
| "tokens_trained": 17.395736592 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.508900078008652, |
| "grad_norm": 0.6456644535064697, |
| "loss": 1.0438, |
| "loss_ce": 1.0606231689453125, |
| "loss_region": 0.030006930232048035, |
| "loss_total": 1.090630054473877, |
| "lr": 0.0010286636243737794, |
| "router/selected_tokens_s0": 4360.125, |
| "step": 5320, |
| "tokens_trained": 17.428502032 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5117367562584214, |
| "grad_norm": 1.505149006843567, |
| "loss": 1.0426, |
| "loss_ce": 1.065946340560913, |
| "loss_region": 0.030012134462594986, |
| "loss_total": 1.0959584712982178, |
| "lr": 0.0010282567323899284, |
| "router/selected_tokens_s0": 4342.875, |
| "step": 5330, |
| "tokens_trained": 17.461262816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.514573434508191, |
| "grad_norm": 0.5819237232208252, |
| "loss": 1.0424, |
| "loss_ce": 0.9712111353874207, |
| "loss_region": 0.030006036162376404, |
| "loss_total": 1.0012171268463135, |
| "lr": 0.0010278498404060774, |
| "router/selected_tokens_s0": 4354.625, |
| "step": 5340, |
| "tokens_trained": 17.494028256 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5174101127579604, |
| "grad_norm": 0.5115887522697449, |
| "loss": 1.0468, |
| "loss_ce": 1.0570096969604492, |
| "loss_region": 0.0300076175481081, |
| "loss_total": 1.087017297744751, |
| "lr": 0.0010274429484222263, |
| "router/selected_tokens_s0": 4346.5, |
| "step": 5350, |
| "tokens_trained": 17.526793696 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.52024679100773, |
| "grad_norm": 0.5018046498298645, |
| "loss": 1.0437, |
| "loss_ce": 0.9743192195892334, |
| "loss_region": 0.030005570501089096, |
| "loss_total": 1.0043247938156128, |
| "lr": 0.0010270360564383753, |
| "router/selected_tokens_s0": 4360.75, |
| "step": 5360, |
| "tokens_trained": 17.559559136 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5230834692574995, |
| "grad_norm": 0.7800183892250061, |
| "loss": 1.0429, |
| "loss_ce": 1.0386371612548828, |
| "loss_region": 0.030008897185325623, |
| "loss_total": 1.0686460733413696, |
| "lr": 0.0010266291644545243, |
| "router/selected_tokens_s0": 4364.125, |
| "step": 5370, |
| "tokens_trained": 17.59232456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.525920147507269, |
| "grad_norm": 0.6966549754142761, |
| "loss": 1.0489, |
| "loss_ce": 0.8995506167411804, |
| "loss_region": 0.03000815026462078, |
| "loss_total": 0.9295587539672852, |
| "lr": 0.0010262222724706732, |
| "router/selected_tokens_s0": 4385.75, |
| "step": 5380, |
| "tokens_trained": 17.6250892 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5287568257570385, |
| "grad_norm": 0.5771371722221375, |
| "loss": 1.0467, |
| "loss_ce": 0.9319908022880554, |
| "loss_region": 0.0300018098205328, |
| "loss_total": 0.961992621421814, |
| "lr": 0.0010258153804868222, |
| "router/selected_tokens_s0": 4344.5, |
| "step": 5390, |
| "tokens_trained": 17.65785464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.531593504006808, |
| "grad_norm": 0.553460955619812, |
| "loss": 1.0509, |
| "loss_ce": 1.0958824157714844, |
| "loss_region": 0.030008256435394287, |
| "loss_total": 1.1258907318115234, |
| "lr": 0.0010254084885029712, |
| "router/selected_tokens_s0": 4395.875, |
| "step": 5400, |
| "tokens_trained": 17.69062008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5344301822565776, |
| "grad_norm": 0.7295851111412048, |
| "loss": 1.0446, |
| "loss_ce": 1.0735763311386108, |
| "loss_region": 0.030002327635884285, |
| "loss_total": 1.1035786867141724, |
| "lr": 0.0010250015965191201, |
| "router/selected_tokens_s0": 4300.625, |
| "step": 5410, |
| "tokens_trained": 17.72338552 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.537266860506347, |
| "grad_norm": 0.4493541419506073, |
| "loss": 1.041, |
| "loss_ce": 1.1575278043746948, |
| "loss_region": 0.030008507892489433, |
| "loss_total": 1.187536358833313, |
| "lr": 0.001024594704535269, |
| "router/selected_tokens_s0": 4385.875, |
| "step": 5420, |
| "tokens_trained": 17.75615016 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5401035387561166, |
| "grad_norm": 1.0313796997070312, |
| "loss": 1.0498, |
| "loss_ce": 1.0423924922943115, |
| "loss_region": 0.030005216598510742, |
| "loss_total": 1.0723977088928223, |
| "lr": 0.001024187812551418, |
| "router/selected_tokens_s0": 4385.5, |
| "step": 5430, |
| "tokens_trained": 17.78891464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5429402170058861, |
| "grad_norm": 0.6560305953025818, |
| "loss": 1.0418, |
| "loss_ce": 0.9702669978141785, |
| "loss_region": 0.030001483857631683, |
| "loss_total": 1.0002684593200684, |
| "lr": 0.001023780920567567, |
| "router/selected_tokens_s0": 4346.375, |
| "step": 5440, |
| "tokens_trained": 17.821677712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5457768952556556, |
| "grad_norm": 0.2240542620420456, |
| "loss": 1.038, |
| "loss_ce": 1.117678165435791, |
| "loss_region": 0.03000263124704361, |
| "loss_total": 1.1476807594299316, |
| "lr": 0.0010233740285837162, |
| "router/selected_tokens_s0": 4357.25, |
| "step": 5450, |
| "tokens_trained": 17.854443152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5486135735054252, |
| "grad_norm": 0.3824736475944519, |
| "loss": 1.0438, |
| "loss_ce": 1.013928771018982, |
| "loss_region": 0.030003132298588753, |
| "loss_total": 1.0439319610595703, |
| "lr": 0.0010229671365998652, |
| "router/selected_tokens_s0": 4288.0, |
| "step": 5460, |
| "tokens_trained": 17.88720588 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5514502517551947, |
| "grad_norm": 1.2875090837478638, |
| "loss": 1.0417, |
| "loss_ce": 1.0048933029174805, |
| "loss_region": 0.03000313974916935, |
| "loss_total": 1.0348964929580688, |
| "lr": 0.0010225602446160141, |
| "router/selected_tokens_s0": 4326.0, |
| "step": 5470, |
| "tokens_trained": 17.91997132 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5542869300049642, |
| "grad_norm": 0.32429569959640503, |
| "loss": 1.0345, |
| "loss_ce": 0.8651331067085266, |
| "loss_region": 0.029996881261467934, |
| "loss_total": 0.8951299786567688, |
| "lr": 0.0010221533526321629, |
| "router/selected_tokens_s0": 4298.625, |
| "step": 5480, |
| "tokens_trained": 17.952736472 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5571236082547337, |
| "grad_norm": 0.7797481417655945, |
| "loss": 1.0423, |
| "loss_ce": 1.0778536796569824, |
| "loss_region": 0.03001037798821926, |
| "loss_total": 1.1078640222549438, |
| "lr": 0.0010217464606483118, |
| "router/selected_tokens_s0": 4417.75, |
| "step": 5490, |
| "tokens_trained": 17.985498936 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5599602865045032, |
| "grad_norm": 0.6121678352355957, |
| "loss": 1.0487, |
| "loss_ce": 1.0614382028579712, |
| "loss_region": 0.030010351911187172, |
| "loss_total": 1.0914485454559326, |
| "lr": 0.0010213395686644608, |
| "router/selected_tokens_s0": 4413.5, |
| "step": 5500, |
| "tokens_trained": 18.018264376 |
| }, |
| { |
| "epoch": 1.5599602865045032, |
| "eval_ppl": 2.76765789476978, |
| "eval_runtime": 2.4835, |
| "step": 5500, |
| "tokens_trained": 18.018264376 |
| }, |
| { |
| "epoch": 1.5599602865045032, |
| "eval_F": 0.3403231655326333, |
| "eval_F_cds": 0.34277235568564984, |
| "eval_F_dig": 0.3267703205979634, |
| "eval_F_exon": 0.34366801139729736, |
| "eval_F_intron": 0.3408284238068733, |
| "eval_F_nig": 0.34099593292063235, |
| "eval_F_promoter": 0.33725428255936774, |
| "eval_F_utr": 0.34147572250874536, |
| "eval_G": 0.3415901383306685, |
| "eval_G_cds": 0.3420066364335616, |
| "eval_G_dig": 0.39036181619006693, |
| "eval_G_exon": 0.3413997151557254, |
| "eval_G_intron": 0.34085139204059245, |
| "eval_G_nig": 0.3408783641023706, |
| "eval_G_promoter": 0.34279264794696634, |
| "eval_G_utr": 0.34078024569894544, |
| "eval_avg_bp_per_token": 2.9383835756079635, |
| "eval_bp_per_token/cds": 2.9173881248378195, |
| "eval_bp_per_token/dig": 3.0602534470391327, |
| "eval_bp_per_token/exon": 2.9097849285831554, |
| "eval_bp_per_token/intron": 2.934027593211061, |
| "eval_bp_per_token/nig": 2.9325862963672136, |
| "eval_bp_per_token/promoter": 2.9651217248040944, |
| "eval_bp_per_token/utr": 2.9284658735128364, |
| "eval_ppl_cds": 3.18907175811622, |
| "eval_ppl_dig": 1.081375085654356, |
| "eval_ppl_exon": 3.2359904440638463, |
| "eval_ppl_intron": 2.8314949801099587, |
| "eval_ppl_nig": 2.600385066779234, |
| "eval_ppl_promoter": 3.03807385354442, |
| "eval_ppl_utr": 3.1444852350411727, |
| "step": 5500, |
| "tokens_trained": 18.018264376 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5627969647542728, |
| "grad_norm": 0.4803471267223358, |
| "loss": 1.0374, |
| "loss_ce": 0.9833582043647766, |
| "loss_region": 0.030004315078258514, |
| "loss_total": 1.0133625268936157, |
| "lr": 0.0010209326766806098, |
| "router/selected_tokens_s0": 4338.875, |
| "step": 5510, |
| "tokens_trained": 18.051029816 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5656336430040423, |
| "grad_norm": 1.0378037691116333, |
| "loss": 1.0417, |
| "loss_ce": 1.0130213499069214, |
| "loss_region": 0.030004722997546196, |
| "loss_total": 1.043026089668274, |
| "lr": 0.001020525784696759, |
| "router/selected_tokens_s0": 4365.25, |
| "step": 5520, |
| "tokens_trained": 18.083794456 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5684703212538118, |
| "grad_norm": 0.2039332240819931, |
| "loss": 1.0337, |
| "loss_ce": 0.972812831401825, |
| "loss_region": 0.030001500621438026, |
| "loss_total": 1.0028142929077148, |
| "lr": 0.001020118892712908, |
| "router/selected_tokens_s0": 4273.375, |
| "step": 5530, |
| "tokens_trained": 18.116559896 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5713069995035813, |
| "grad_norm": 0.9828659296035767, |
| "loss": 1.0435, |
| "loss_ce": 1.0882686376571655, |
| "loss_region": 0.030004315078258514, |
| "loss_total": 1.1182729005813599, |
| "lr": 0.0010197120007290569, |
| "router/selected_tokens_s0": 4338.375, |
| "step": 5540, |
| "tokens_trained": 18.149325336 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5741436777533508, |
| "grad_norm": 0.4906889498233795, |
| "loss": 1.0405, |
| "loss_ce": 0.9475066661834717, |
| "loss_region": 0.030002078041434288, |
| "loss_total": 0.9775087237358093, |
| "lr": 0.0010193051087452058, |
| "router/selected_tokens_s0": 4325.25, |
| "step": 5550, |
| "tokens_trained": 18.182090776 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5769803560031204, |
| "grad_norm": 0.6963337659835815, |
| "loss": 1.0379, |
| "loss_ce": 0.7684432864189148, |
| "loss_region": 0.029997603967785835, |
| "loss_total": 0.7984408736228943, |
| "lr": 0.0010188982167613548, |
| "router/selected_tokens_s0": 4256.625, |
| "step": 5560, |
| "tokens_trained": 18.214852704 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5798170342528899, |
| "grad_norm": 0.6726390719413757, |
| "loss": 1.0506, |
| "loss_ce": 1.0656613111495972, |
| "loss_region": 0.030005795881152153, |
| "loss_total": 1.0956671237945557, |
| "lr": 0.0010184913247775038, |
| "router/selected_tokens_s0": 4383.75, |
| "step": 5570, |
| "tokens_trained": 18.247618144 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5826537125026594, |
| "grad_norm": 0.5460783839225769, |
| "loss": 1.0391, |
| "loss_ce": 0.9652643203735352, |
| "loss_region": 0.030002180486917496, |
| "loss_total": 0.9952664971351624, |
| "lr": 0.0010180844327936527, |
| "router/selected_tokens_s0": 4303.875, |
| "step": 5580, |
| "tokens_trained": 18.280383584 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.585490390752429, |
| "grad_norm": 0.35639381408691406, |
| "loss": 1.0356, |
| "loss_ce": 0.88677579164505, |
| "loss_region": 0.030002212151885033, |
| "loss_total": 0.916778028011322, |
| "lr": 0.0010176775408098017, |
| "router/selected_tokens_s0": 4362.625, |
| "step": 5590, |
| "tokens_trained": 18.313149024 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5883270690021984, |
| "grad_norm": 0.5016544461250305, |
| "loss": 1.0311, |
| "loss_ce": 1.0120795965194702, |
| "loss_region": 0.030004823580384254, |
| "loss_total": 1.0420844554901123, |
| "lr": 0.0010172706488259507, |
| "router/selected_tokens_s0": 4349.75, |
| "step": 5600, |
| "tokens_trained": 18.345914464 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.591163747251968, |
| "grad_norm": 0.687404990196228, |
| "loss": 1.0409, |
| "loss_ce": 1.033144235610962, |
| "loss_region": 0.03000274859368801, |
| "loss_total": 1.063146948814392, |
| "lr": 0.0010168637568420996, |
| "router/selected_tokens_s0": 4313.5, |
| "step": 5610, |
| "tokens_trained": 18.378679104 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5940004255017375, |
| "grad_norm": 0.3557313084602356, |
| "loss": 1.0342, |
| "loss_ce": 0.9033691883087158, |
| "loss_region": 0.030007191002368927, |
| "loss_total": 0.9333763718605042, |
| "lr": 0.0010164568648582486, |
| "router/selected_tokens_s0": 4394.375, |
| "step": 5620, |
| "tokens_trained": 18.411444544 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.596837103751507, |
| "grad_norm": 0.6189426183700562, |
| "loss": 1.04, |
| "loss_ce": 0.8717077970504761, |
| "loss_region": 0.03000614605844021, |
| "loss_total": 0.9017139673233032, |
| "lr": 0.0010160499728743976, |
| "router/selected_tokens_s0": 4337.5, |
| "step": 5630, |
| "tokens_trained": 18.444209984 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.5996737820012765, |
| "grad_norm": 0.6988716721534729, |
| "loss": 1.0305, |
| "loss_ce": 0.975788950920105, |
| "loss_region": 0.029997356235980988, |
| "loss_total": 1.0057862997055054, |
| "lr": 0.0010156430808905465, |
| "router/selected_tokens_s0": 4335.875, |
| "step": 5640, |
| "tokens_trained": 18.476975424 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.602510460251046, |
| "grad_norm": 0.6643272042274475, |
| "loss": 1.0403, |
| "loss_ce": 1.1351817846298218, |
| "loss_region": 0.030008381232619286, |
| "loss_total": 1.1651902198791504, |
| "lr": 0.0010152361889066955, |
| "router/selected_tokens_s0": 4362.25, |
| "step": 5650, |
| "tokens_trained": 18.509739264 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6053471385008156, |
| "grad_norm": 0.8286615014076233, |
| "loss": 1.0373, |
| "loss_ce": 0.8297767043113708, |
| "loss_region": 0.03000292181968689, |
| "loss_total": 0.8597795963287354, |
| "lr": 0.0010148292969228445, |
| "router/selected_tokens_s0": 4327.25, |
| "step": 5660, |
| "tokens_trained": 18.542503904 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.608183816750585, |
| "grad_norm": 0.22749805450439453, |
| "loss": 1.0352, |
| "loss_ce": 0.9598128795623779, |
| "loss_region": 0.03000240959227085, |
| "loss_total": 0.9898152947425842, |
| "lr": 0.0010144224049389934, |
| "router/selected_tokens_s0": 4316.75, |
| "step": 5670, |
| "tokens_trained": 18.575269344 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6110204950003546, |
| "grad_norm": 0.25335147976875305, |
| "loss": 1.0285, |
| "loss_ce": 0.975443422794342, |
| "loss_region": 0.030008111149072647, |
| "loss_total": 1.0054515600204468, |
| "lr": 0.0010140155129551424, |
| "router/selected_tokens_s0": 4387.0, |
| "step": 5680, |
| "tokens_trained": 18.608031832 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6138571732501241, |
| "grad_norm": 0.8008378148078918, |
| "loss": 1.0393, |
| "loss_ce": 1.088114619255066, |
| "loss_region": 0.030002346262335777, |
| "loss_total": 1.1181169748306274, |
| "lr": 0.0010136086209712914, |
| "router/selected_tokens_s0": 4368.875, |
| "step": 5690, |
| "tokens_trained": 18.640797272 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6166938514998936, |
| "grad_norm": 0.6417054533958435, |
| "loss": 1.0381, |
| "loss_ce": 0.9379876255989075, |
| "loss_region": 0.03000989928841591, |
| "loss_total": 0.9679975509643555, |
| "lr": 0.0010132017289874405, |
| "router/selected_tokens_s0": 4368.875, |
| "step": 5700, |
| "tokens_trained": 18.673562712 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6195305297496632, |
| "grad_norm": 1.3401010036468506, |
| "loss": 1.0339, |
| "loss_ce": 1.040677785873413, |
| "loss_region": 0.030000442638993263, |
| "loss_total": 1.0706782341003418, |
| "lr": 0.0010127948370035895, |
| "router/selected_tokens_s0": 4332.25, |
| "step": 5710, |
| "tokens_trained": 18.706328152 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6223672079994327, |
| "grad_norm": 0.9140957593917847, |
| "loss": 1.0432, |
| "loss_ce": 1.1209052801132202, |
| "loss_region": 0.03001645766198635, |
| "loss_total": 1.1509217023849487, |
| "lr": 0.0010123879450197385, |
| "router/selected_tokens_s0": 4408.625, |
| "step": 5720, |
| "tokens_trained": 18.739093568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6252038862492022, |
| "grad_norm": 0.38389793038368225, |
| "loss": 1.0393, |
| "loss_ce": 0.9129772782325745, |
| "loss_region": 0.02999955601990223, |
| "loss_total": 0.9429768323898315, |
| "lr": 0.0010119810530358872, |
| "router/selected_tokens_s0": 4306.5, |
| "step": 5730, |
| "tokens_trained": 18.771858208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6280405644989717, |
| "grad_norm": 1.761724829673767, |
| "loss": 1.0455, |
| "loss_ce": 1.0788276195526123, |
| "loss_region": 0.030005717650055885, |
| "loss_total": 1.1088333129882812, |
| "lr": 0.0010115741610520362, |
| "router/selected_tokens_s0": 4344.625, |
| "step": 5740, |
| "tokens_trained": 18.804619848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6308772427487412, |
| "grad_norm": 0.514764666557312, |
| "loss": 1.0331, |
| "loss_ce": 1.0706536769866943, |
| "loss_region": 0.030006183311343193, |
| "loss_total": 1.1006598472595215, |
| "lr": 0.0010111672690681851, |
| "router/selected_tokens_s0": 4372.25, |
| "step": 5750, |
| "tokens_trained": 18.837385288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6337139209985108, |
| "grad_norm": 0.40612781047821045, |
| "loss": 1.036, |
| "loss_ce": 1.0906848907470703, |
| "loss_region": 0.030003517866134644, |
| "loss_total": 1.1206884384155273, |
| "lr": 0.0010107603770843341, |
| "router/selected_tokens_s0": 4322.25, |
| "step": 5760, |
| "tokens_trained": 18.870150648 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6365505992482803, |
| "grad_norm": 0.26753610372543335, |
| "loss": 1.0316, |
| "loss_ce": 1.00560462474823, |
| "loss_region": 0.030005289241671562, |
| "loss_total": 1.0356099605560303, |
| "lr": 0.0010103534851004833, |
| "router/selected_tokens_s0": 4323.5, |
| "step": 5770, |
| "tokens_trained": 18.902916088 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6393872774980498, |
| "grad_norm": 0.41570785641670227, |
| "loss": 1.0386, |
| "loss_ce": 0.9916934370994568, |
| "loss_region": 0.03000512719154358, |
| "loss_total": 1.0216985940933228, |
| "lr": 0.0010099465931166323, |
| "router/selected_tokens_s0": 4347.375, |
| "step": 5780, |
| "tokens_trained": 18.935681528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6422239557478193, |
| "grad_norm": 0.5421174168586731, |
| "loss": 1.0318, |
| "loss_ce": 0.9400946497917175, |
| "loss_region": 0.030001504346728325, |
| "loss_total": 0.9700961709022522, |
| "lr": 0.0010095397011327812, |
| "router/selected_tokens_s0": 4331.0, |
| "step": 5790, |
| "tokens_trained": 18.968446968 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6450606339975888, |
| "grad_norm": 0.4832181930541992, |
| "loss": 1.0337, |
| "loss_ce": 0.9629077911376953, |
| "loss_region": 0.030001387000083923, |
| "loss_total": 0.9929091930389404, |
| "lr": 0.0010091328091489302, |
| "router/selected_tokens_s0": 4295.375, |
| "step": 5800, |
| "tokens_trained": 19.001210808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6478973122473584, |
| "grad_norm": 1.0204321146011353, |
| "loss": 1.0304, |
| "loss_ce": 1.013646125793457, |
| "loss_region": 0.03000660054385662, |
| "loss_total": 1.0436527729034424, |
| "lr": 0.0010087259171650792, |
| "router/selected_tokens_s0": 4419.5, |
| "step": 5810, |
| "tokens_trained": 19.033976248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6507339904971279, |
| "grad_norm": 0.5032446384429932, |
| "loss": 1.0334, |
| "loss_ce": 1.0649542808532715, |
| "loss_region": 0.030000925064086914, |
| "loss_total": 1.0949552059173584, |
| "lr": 0.0010083190251812281, |
| "router/selected_tokens_s0": 4287.125, |
| "step": 5820, |
| "tokens_trained": 19.066741688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6535706687468974, |
| "grad_norm": 0.5891702175140381, |
| "loss": 1.0297, |
| "loss_ce": 1.0153957605361938, |
| "loss_region": 0.030004706233739853, |
| "loss_total": 1.0454005002975464, |
| "lr": 0.001007912133197377, |
| "router/selected_tokens_s0": 4354.75, |
| "step": 5830, |
| "tokens_trained": 19.099507128 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.656407346996667, |
| "grad_norm": 0.6594350934028625, |
| "loss": 1.0346, |
| "loss_ce": 1.0509767532348633, |
| "loss_region": 0.03000517748296261, |
| "loss_total": 1.080981969833374, |
| "lr": 0.001007505241213526, |
| "router/selected_tokens_s0": 4363.25, |
| "step": 5840, |
| "tokens_trained": 19.132272568 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6592440252464364, |
| "grad_norm": 0.5906273126602173, |
| "loss": 1.0337, |
| "loss_ce": 1.0429621934890747, |
| "loss_region": 0.030009938403964043, |
| "loss_total": 1.0729721784591675, |
| "lr": 0.001007098349229675, |
| "router/selected_tokens_s0": 4366.125, |
| "step": 5850, |
| "tokens_trained": 19.165038008 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.662080703496206, |
| "grad_norm": 0.47190093994140625, |
| "loss": 1.0335, |
| "loss_ce": 0.950088381767273, |
| "loss_region": 0.030002925544977188, |
| "loss_total": 0.9800913333892822, |
| "lr": 0.001006691457245824, |
| "router/selected_tokens_s0": 4327.5, |
| "step": 5860, |
| "tokens_trained": 19.197803448 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6649173817459755, |
| "grad_norm": 0.5748708844184875, |
| "loss": 1.0324, |
| "loss_ce": 1.0693488121032715, |
| "loss_region": 0.030013523995876312, |
| "loss_total": 1.0993623733520508, |
| "lr": 0.001006284565261973, |
| "router/selected_tokens_s0": 4386.25, |
| "step": 5870, |
| "tokens_trained": 19.230568888 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.667754059995745, |
| "grad_norm": 0.5576515793800354, |
| "loss": 1.0347, |
| "loss_ce": 1.090317964553833, |
| "loss_region": 0.030007001012563705, |
| "loss_total": 1.120324969291687, |
| "lr": 0.001005877673278122, |
| "router/selected_tokens_s0": 4399.0, |
| "step": 5880, |
| "tokens_trained": 19.263333528 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6705907382455145, |
| "grad_norm": 0.4692791998386383, |
| "loss": 1.0231, |
| "loss_ce": 0.9621900320053101, |
| "loss_region": 0.02999553643167019, |
| "loss_total": 0.9921855926513672, |
| "lr": 0.0010054707812942709, |
| "router/selected_tokens_s0": 4265.375, |
| "step": 5890, |
| "tokens_trained": 19.296098168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.673427416495284, |
| "grad_norm": 0.5106430649757385, |
| "loss": 1.0364, |
| "loss_ce": 0.8931739330291748, |
| "loss_region": 0.030010921880602837, |
| "loss_total": 0.923184871673584, |
| "lr": 0.0010050638893104198, |
| "router/selected_tokens_s0": 4364.125, |
| "step": 5900, |
| "tokens_trained": 19.328862688 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6762640947450536, |
| "grad_norm": 0.5919066667556763, |
| "loss": 1.0405, |
| "loss_ce": 1.1164112091064453, |
| "loss_region": 0.03000679798424244, |
| "loss_total": 1.1464179754257202, |
| "lr": 0.0010046569973265688, |
| "router/selected_tokens_s0": 4368.0, |
| "step": 5910, |
| "tokens_trained": 19.361627328 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.679100772994823, |
| "grad_norm": 0.5985324382781982, |
| "loss": 1.029, |
| "loss_ce": 0.8035845160484314, |
| "loss_region": 0.030008496716618538, |
| "loss_total": 0.8335930109024048, |
| "lr": 0.0010042501053427178, |
| "router/selected_tokens_s0": 4344.0, |
| "step": 5920, |
| "tokens_trained": 19.394392768 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6819374512445926, |
| "grad_norm": 0.46029677987098694, |
| "loss": 1.0369, |
| "loss_ce": 1.1152499914169312, |
| "loss_region": 0.030002374202013016, |
| "loss_total": 1.1452523469924927, |
| "lr": 0.0010038432133588667, |
| "router/selected_tokens_s0": 4378.875, |
| "step": 5930, |
| "tokens_trained": 19.427158208 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6847741294943621, |
| "grad_norm": 0.5811964273452759, |
| "loss": 1.0304, |
| "loss_ce": 1.0038641691207886, |
| "loss_region": 0.03000630810856819, |
| "loss_total": 1.0338704586029053, |
| "lr": 0.0010034363213750157, |
| "router/selected_tokens_s0": 4316.0, |
| "step": 5940, |
| "tokens_trained": 19.459922848 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6876108077441316, |
| "grad_norm": 0.2964920401573181, |
| "loss": 1.0347, |
| "loss_ce": 1.0238761901855469, |
| "loss_region": 0.03000667691230774, |
| "loss_total": 1.0538828372955322, |
| "lr": 0.0010030294293911649, |
| "router/selected_tokens_s0": 4331.875, |
| "step": 5950, |
| "tokens_trained": 19.492688288 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6904474859939012, |
| "grad_norm": 0.20688390731811523, |
| "loss": 1.031, |
| "loss_ce": 1.0256707668304443, |
| "loss_region": 0.030003707855939865, |
| "loss_total": 1.055674433708191, |
| "lr": 0.0010026225374073139, |
| "router/selected_tokens_s0": 4327.875, |
| "step": 5960, |
| "tokens_trained": 19.525453728 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6932841642436707, |
| "grad_norm": 0.4191875755786896, |
| "loss": 1.0357, |
| "loss_ce": 0.9435751438140869, |
| "loss_region": 0.03000720962882042, |
| "loss_total": 0.9735823273658752, |
| "lr": 0.0010022156454234628, |
| "router/selected_tokens_s0": 4370.375, |
| "step": 5970, |
| "tokens_trained": 19.558219168 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6961208424934402, |
| "grad_norm": 0.6510814428329468, |
| "loss": 1.0246, |
| "loss_ce": 0.9768120050430298, |
| "loss_region": 0.030007587745785713, |
| "loss_total": 1.0068196058273315, |
| "lr": 0.0010018087534396116, |
| "router/selected_tokens_s0": 4360.875, |
| "step": 5980, |
| "tokens_trained": 19.590983808 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.6989575207432097, |
| "grad_norm": 0.477987676858902, |
| "loss": 1.0252, |
| "loss_ce": 1.0579345226287842, |
| "loss_region": 0.030001208186149597, |
| "loss_total": 1.0879356861114502, |
| "lr": 0.0010014018614557605, |
| "router/selected_tokens_s0": 4310.375, |
| "step": 5990, |
| "tokens_trained": 19.623749248 |
| }, |
| { |
| "comp/rl_weight": 0.03, |
| "comp/strictness": 0.0, |
| "epoch": 1.7017941989929792, |
| "grad_norm": 0.5178934335708618, |
| "loss": 1.0325, |
| "loss_ce": 1.01963472366333, |
| "loss_region": 0.03001089207828045, |
| "loss_total": 1.0496456623077393, |
| "lr": 0.0010009949694719095, |
| "router/selected_tokens_s0": 4379.5, |
| "step": 6000, |
| "tokens_trained": 19.65651468 |
| }, |
| { |
| "epoch": 1.7017941989929792, |
| "eval_ppl": 2.7438295521464737, |
| "eval_runtime": 2.524, |
| "step": 6000, |
| "tokens_trained": 19.65651468 |
| }, |
| { |
| "epoch": 1.7017941989929792, |
| "eval_F": 0.34151615105799704, |
| "eval_F_cds": 0.3443491198421619, |
| "eval_F_dig": 0.3249212178034742, |
| "eval_F_exon": 0.3452196236634976, |
| "eval_F_intron": 0.3418051895789037, |
| "eval_F_nig": 0.3420160147164305, |
| "eval_F_promoter": 0.33950926318966235, |
| "eval_F_utr": 0.34296393229500766, |
| "eval_G": 0.3440260132521393, |
| "eval_G_cds": 0.34685659178384587, |
| "eval_G_dig": 0.3855714028765268, |
| "eval_G_exon": 0.3442092298449093, |
| "eval_G_intron": 0.3428874806376615, |
| "eval_G_nig": 0.34218012092239336, |
| "eval_G_promoter": 0.34825656510586706, |
| "eval_G_utr": 0.3435660953042277, |
| "eval_avg_bp_per_token": 2.9281192028607097, |
| "eval_bp_per_token/cds": 2.904029493260696, |
| "eval_bp_per_token/dig": 3.077669124719461, |
| "eval_bp_per_token/exon": 2.896706709160742, |
| "eval_bp_per_token/intron": 2.9256431162791223, |
| "eval_bp_per_token/nig": 2.9238396945508875, |
| "eval_bp_per_token/promoter": 2.9454277347400777, |
| "eval_bp_per_token/utr": 2.9157584977181474, |
| "eval_ppl_cds": 3.1473755860930703, |
| "eval_ppl_dig": 1.0781771784423138, |
| "eval_ppl_exon": 3.215945780217024, |
| "eval_ppl_intron": 2.8130272235017966, |
| "eval_ppl_nig": 2.58073245677091, |
| "eval_ppl_promoter": 2.9110875223569272, |
| "eval_ppl_utr": 3.090760374244644, |
| "step": 6000, |
| "tokens_trained": 19.65651468 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 30600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 3000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|