{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992509363295881, "global_step": 2668, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.8518518518518518e-05, "loss": 11.3968, "theoretical_loss": 10.87642657795271, "tokens_seen": 1048576 }, { "epoch": 0.0, "learning_rate": 3.7037037037037037e-05, "loss": 11.3956, "theoretical_loss": 9.382678282406216, "tokens_seen": 2097152 }, { "epoch": 0.0, "learning_rate": 5.555555555555555e-05, "loss": 10.5571, "theoretical_loss": 8.634232609231233, "tokens_seen": 3145728 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.4819035232067108, "objective/train/docs_used": 13098, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 9.65994930267334, "objective/train/original_loss": 9.659950256347656, "objective/train/theoretical_loss": 8.563481156106828, "objective/train/tokens_used": 23736800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2415286898612976, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494179725646973, "objective/train/weighted_lm_loss": 10.137272834777832, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9525310397148132, "theoretical_loss": 8.563481156106828, "tokens_seen": 3276800 }, { "epoch": 0.0, "learning_rate": 7.407407407407407e-05, "loss": 10.1633, "theoretical_loss": 8.152440604135377, "tokens_seen": 4194304 }, { "epoch": 0.0, "learning_rate": 9.259259259259259e-05, "loss": 10.086, "theoretical_loss": 7.804563746449924, "tokens_seen": 5242880 }, { "epoch": 0.0, "learning_rate": 0.0001111111111111111, "loss": 9.6408, "theoretical_loss": 7.536027470795679, "tokens_seen": 6291456 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.45301443338394165, "objective/train/docs_used": 15224, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 9.799736976623535, "objective/train/original_loss": 9.799737930297852, "objective/train/theoretical_loss": 7.477757209543791, "objective/train/tokens_used": 27013600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23901385068893433, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.046515703201294, "objective/train/weighted_lm_loss": 10.25350570678711, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9514407515525818, "theoretical_loss": 7.477757209543791, "tokens_seen": 6553600 }, { "epoch": 0.0, "learning_rate": 0.00012962962962962963, "loss": 9.8744, "theoretical_loss": 7.319437165569436, "tokens_seen": 7340032 }, { "epoch": 0.0, "learning_rate": 0.00014814814814814815, "loss": 9.604, "theoretical_loss": 7.139227903207399, "tokens_seen": 8388608 }, { "epoch": 0.0, "learning_rate": 0.00016666666666666666, "loss": 9.5326, "theoretical_loss": 6.985769514638539, "tokens_seen": 9437184 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.47840288281440735, "objective/train/docs_used": 16449, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 9.037789344787598, "objective/train/original_loss": 9.037790298461914, "objective/train/theoretical_loss": 6.9337529803906595, "objective/train/tokens_used": 30290400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23556047677993774, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490374565124512, "objective/train/weighted_lm_loss": 9.48002815246582, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9567553997039795, "theoretical_loss": 6.9337529803906595, "tokens_seen": 9830400 }, { "epoch": 0.0, "learning_rate": 0.00018518518518518518, "loss": 9.4931, "theoretical_loss": 6.85271964810239, "tokens_seen": 10485760 }, { "epoch": 0.0, "learning_rate": 0.0002037037037037037, "loss": 9.2078, "theoretical_loss": 6.735696451044834, "tokens_seen": 11534336 }, { "epoch": 0.0, "learning_rate": 0.0002222222222222222, "loss": 9.1417, "theoretical_loss": 6.631555583663063, "tokens_seen": 12582912 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.48848089575767517, "objective/train/docs_used": 18197, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 8.314473152160645, "objective/train/original_loss": 8.314474105834961, "objective/train/theoretical_loss": 6.583564719922174, "objective/train/tokens_used": 33567200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2417714148759842, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500770807266235, "objective/train/weighted_lm_loss": 8.729887962341309, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9551335573196411, "theoretical_loss": 6.583564719922174, "tokens_seen": 13107200 }, { "epoch": 0.0, "learning_rate": 0.00024074074074074072, "loss": 8.9637, "theoretical_loss": 6.537970167599786, "tokens_seen": 13631488 }, { "epoch": 0.01, "learning_rate": 0.00025925925925925926, "loss": 8.8478, "theoretical_loss": 6.453173753851489, "tokens_seen": 14680064 }, { "epoch": 0.01, "learning_rate": 0.0002777777777777778, "loss": 8.6147, "theoretical_loss": 6.3757961938167265, "tokens_seen": 15728640 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.48041555285453796, "objective/train/docs_used": 19928, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 7.850920677185059, "objective/train/original_loss": 7.850921154022217, "objective/train/theoretical_loss": 6.330712056611843, "objective/train/tokens_used": 36844000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23706857860088348, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492464303970337, "objective/train/weighted_lm_loss": 8.236246109008789, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9536896347999573, "theoretical_loss": 6.330712056611843, "tokens_seen": 16384000 }, { "epoch": 0.01, "learning_rate": 0.0002962962962962963, "loss": 8.4719, "theoretical_loss": 6.304755024141699, "tokens_seen": 16777216 }, { "epoch": 0.01, "learning_rate": 0.0003148148148148148, "loss": 8.2341, "theoretical_loss": 6.239181372596676, "tokens_seen": 17825792 }, { "epoch": 0.01, "learning_rate": 0.0003333333333333333, "loss": 8.0206, "theoretical_loss": 6.17836807379203, "tokens_seen": 18874368 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.4757058024406433, "objective/train/docs_used": 21444, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 7.8217854499816895, "objective/train/original_loss": 7.821784496307373, "objective/train/theoretical_loss": 6.135527723436086, "objective/train/tokens_used": 40120800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23544126749038696, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487672090530396, "objective/train/weighted_lm_loss": 8.202380180358887, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9515462517738342, "theoretical_loss": 6.135527723436086, "tokens_seen": 19660800 }, { "epoch": 0.01, "learning_rate": 0.0003518518518518519, "loss": 7.9621, "theoretical_loss": 6.121732500517281, "tokens_seen": 19922944 }, { "epoch": 0.01, "learning_rate": 0.00037037037037037035, "loss": 7.7465, "theoretical_loss": 6.068789398864208, "tokens_seen": 20971520 }, { "epoch": 0.01, "learning_rate": 0.0003888888888888889, "loss": 7.6596, "theoretical_loss": 6.019130679617959, "tokens_seen": 22020096 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.4673197567462921, "objective/train/docs_used": 23342, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.7955641746521, "objective/train/original_loss": 6.795564651489258, "objective/train/theoretical_loss": 5.978100075365368, "objective/train/tokens_used": 43397600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22965674102306366, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478991270065308, "objective/train/weighted_lm_loss": 7.11885404586792, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9708001017570496, "theoretical_loss": 5.978100075365368, "tokens_seen": 22937600 }, { "epoch": 0.01, "learning_rate": 0.0004074074074074074, "loss": 7.534, "theoretical_loss": 5.972410144955672, "tokens_seen": 23068672 }, { "epoch": 0.01, "learning_rate": 0.00042592592592592595, "loss": 7.4855, "theoretical_loss": 5.928331780102846, "tokens_seen": 24117248 }, { "epoch": 0.01, "learning_rate": 0.0004444444444444444, "loss": 7.5487, "theoretical_loss": 5.886640662049593, "tokens_seen": 25165824 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.47823473811149597, "objective/train/docs_used": 25475, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 7.412741184234619, "objective/train/original_loss": 7.412740707397461, "objective/train/theoretical_loss": 5.847115817761683, "objective/train/tokens_used": 46674400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23657572269439697, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490257740020752, "objective/train/weighted_lm_loss": 7.774959564208984, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9553595781326294, "theoretical_loss": 5.847115817761683, "tokens_seen": 26214400 }, { "epoch": 0.01, "learning_rate": 0.000462962962962963, "loss": 7.4963, "theoretical_loss": 5.847115817761683, "tokens_seen": 26214400 }, { "epoch": 0.01, "learning_rate": 0.00048148148148148144, "loss": 7.3715, "theoretical_loss": 5.809564554032628, "tokens_seen": 27262976 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 7.5494, "theoretical_loss": 5.773817911814618, "tokens_seen": 28311552 }, { "epoch": 0.01, "learning_rate": 0.0004998108210367007, "loss": 7.3456, "theoretical_loss": 5.739726989373027, "tokens_seen": 29360128 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.4513802230358124, "objective/train/docs_used": 27675, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.641174793243408, "objective/train/original_loss": 6.641175270080566, "objective/train/theoretical_loss": 5.735575307377884, "objective/train/tokens_used": 49951200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2184389978647232, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0462480783462524, "objective/train/weighted_lm_loss": 6.950745105743408, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9532368183135986, "theoretical_loss": 5.735575307377884, "tokens_seen": 29491200 }, { "epoch": 0.01, "learning_rate": 0.0004996216420734014, "loss": 7.4153, "theoretical_loss": 5.707159943639638, "tokens_seen": 30408704 }, { "epoch": 0.01, "learning_rate": 0.0004994324631101022, "loss": 7.4387, "theoretical_loss": 5.6759995259903135, "tokens_seen": 31457280 }, { "epoch": 0.01, "learning_rate": 0.0004992432841468029, "loss": 7.3125, "theoretical_loss": 5.646141042853927, "tokens_seen": 32505856 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.4938547611236572, "objective/train/docs_used": 29692, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.995054244995117, "objective/train/original_loss": 6.995054244995117, "objective/train/theoretical_loss": 5.638868635567113, "objective/train/tokens_used": 53228000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24502605199813843, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506311655044556, "objective/train/weighted_lm_loss": 7.349433898925781, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9657517075538635, "theoretical_loss": 5.638868635567113, "tokens_seen": 32768000 }, { "epoch": 0.01, "learning_rate": 0.0004990541051835036, "loss": 7.3289, "theoretical_loss": 5.61749065678978, "tokens_seen": 33554432 }, { "epoch": 0.01, "learning_rate": 0.0004988649262202043, "loss": 7.2839, "theoretical_loss": 5.589963962496837, "tokens_seen": 34603008 }, { "epoch": 0.01, "learning_rate": 0.0004986757472569051, "loss": 7.2524, "theoretical_loss": 5.563484786407864, "tokens_seen": 35651584 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.48527762293815613, "objective/train/docs_used": 31846, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 7.310849666595459, "objective/train/original_loss": 7.310849189758301, "objective/train/theoretical_loss": 5.553810873340668, "objective/train/tokens_used": 56504800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24185144901275635, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497572422027588, "objective/train/weighted_lm_loss": 7.675657272338867, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9533452987670898, "theoretical_loss": 5.553810873340668, "tokens_seen": 36044800 }, { "epoch": 0.01, "learning_rate": 0.0004984865682936058, "loss": 7.1968, "theoretical_loss": 5.537984169317745, "tokens_seen": 36700160 }, { "epoch": 0.01, "learning_rate": 0.0004982973893303065, "loss": 7.129, "theoretical_loss": 5.513399499781949, "tokens_seen": 37748736 }, { "epoch": 0.01, "learning_rate": 0.0004981082103670072, "loss": 7.1462, "theoretical_loss": 5.4896737724343065, "tokens_seen": 38797312 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.4853717088699341, "objective/train/docs_used": 33055, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.262423038482666, "objective/train/original_loss": 6.262422561645508, "objective/train/theoretical_loss": 5.478116572052198, "objective/train/tokens_used": 59781600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24034540355205536, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497589111328125, "objective/train/weighted_lm_loss": 6.57367467880249, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9534735083580017, "theoretical_loss": 5.478116572052198, "tokens_seen": 39321600 }, { "epoch": 0.01, "learning_rate": 0.000497919031403708, "loss": 7.0583, "theoretical_loss": 5.466754950375066, "tokens_seen": 39845888 }, { "epoch": 0.01, "learning_rate": 0.0004977298524404087, "loss": 6.9816, "theoretical_loss": 5.444595414709691, "tokens_seen": 40894464 }, { "epoch": 0.01, "learning_rate": 0.0004975406734771094, "loss": 7.0311, "theoretical_loss": 5.423151487427312, "tokens_seen": 41943040 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.48702144622802734, "objective/train/docs_used": 34814, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.590912818908691, "objective/train/original_loss": 6.590912818908691, "objective/train/theoretical_loss": 5.410094451075121, "objective/train/tokens_used": 63058400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24035364389419556, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499237775802612, "objective/train/weighted_lm_loss": 6.920047760009766, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9549703001976013, "theoretical_loss": 5.410094451075121, "tokens_seen": 42598400 }, { "epoch": 0.02, "learning_rate": 0.00049735149451381, "loss": 7.0719, "theoretical_loss": 5.402383016282556, "tokens_seen": 42991616 }, { "epoch": 0.02, "learning_rate": 0.0004971623155505107, "loss": 6.9474, "theoretical_loss": 5.382253012327088, "tokens_seen": 44040192 }, { "epoch": 0.02, "learning_rate": 0.0004969731365872115, "loss": 6.9685, "theoretical_loss": 5.362727332334602, "tokens_seen": 45088768 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.49159982800483704, "objective/train/docs_used": 36830, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.507728099822998, "objective/train/original_loss": 6.50772762298584, "objective/train/theoretical_loss": 5.348460575231594, "objective/train/tokens_used": 66335200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2427477091550827, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503939390182495, "objective/train/weighted_lm_loss": 6.835729122161865, "objective/train/weights_max": 1.0512152910232544, "objective/train/weights_min": 0.9736581444740295, "theoretical_loss": 5.348460575231594, "tokens_seen": 45875200 }, { "epoch": 0.02, "learning_rate": 0.0004967839576239122, "loss": 6.8813, "theoretical_loss": 5.343774399657292, "tokens_seen": 46137344 }, { "epoch": 0.02, "learning_rate": 0.0004965947786606129, "loss": 6.7701, "theoretical_loss": 5.325364958105951, "tokens_seen": 47185920 }, { "epoch": 0.02, "learning_rate": 0.0004964055996973137, "loss": 6.8432, "theoretical_loss": 5.307471854308661, "tokens_seen": 48234496 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.4919637143611908, "objective/train/docs_used": 38907, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.092850685119629, "objective/train/original_loss": 6.092850208282471, "objective/train/theoretical_loss": 5.292219058433327, "objective/train/tokens_used": 69612000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2443958818912506, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504387617111206, "objective/train/weighted_lm_loss": 6.401121616363525, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9526038765907288, "theoretical_loss": 5.292219058433327, "tokens_seen": 49152000 }, { "epoch": 0.02, "learning_rate": 0.0004962164207340144, "loss": 6.7243, "theoretical_loss": 5.290069844712654, "tokens_seen": 49283072 }, { "epoch": 0.02, "learning_rate": 0.0004960272417707151, "loss": 6.8188, "theoretical_loss": 5.273135423980159, "tokens_seen": 50331648 }, { "epoch": 0.02, "learning_rate": 0.0004958380628074158, "loss": 6.7243, "theoretical_loss": 5.256646672015468, "tokens_seen": 51380224 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.4699273407459259, "objective/train/docs_used": 40119, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.565382480621338, "objective/train/original_loss": 6.56538200378418, "objective/train/theoretical_loss": 5.240583117265738, "objective/train/tokens_used": 72888800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2322714626789093, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481733083724976, "objective/train/weighted_lm_loss": 6.87912654876709, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9521334767341614, "theoretical_loss": 5.240583117265738, "tokens_seen": 52428800 }, { "epoch": 0.02, "learning_rate": 0.0004956488838441165, "loss": 6.7436, "theoretical_loss": 5.240583117265738, "tokens_seen": 52428800 }, { "epoch": 0.02, "learning_rate": 0.0004954597048808173, "loss": 6.7854, "theoretical_loss": 5.224925614276991, "tokens_seen": 53477376 }, { "epoch": 0.02, "learning_rate": 0.000495270525917518, "loss": 6.7126, "theoretical_loss": 5.209656233771442, "tokens_seen": 54525952 }, { "epoch": 0.02, "learning_rate": 0.0004950813469542187, "loss": 6.7657, "theoretical_loss": 5.194758163752068, "tokens_seen": 55574528 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.48516738414764404, "objective/train/docs_used": 42175, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.86571741104126, "objective/train/original_loss": 5.865716934204102, "objective/train/theoretical_loss": 5.192921216021549, "objective/train/tokens_used": 76165600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24054694175720215, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497394800186157, "objective/train/weighted_lm_loss": 6.157922744750977, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9579119682312012, "theoretical_loss": 5.192921216021549, "tokens_seen": 55705600 }, { "epoch": 0.02, "learning_rate": 0.0004948921679909194, "loss": 6.667, "theoretical_loss": 5.180215620343211, "tokens_seen": 56623104 }, { "epoch": 0.02, "learning_rate": 0.0004947029890276201, "loss": 6.726, "theoretical_loss": 5.166013767248007, "tokens_seen": 57671680 }, { "epoch": 0.02, "learning_rate": 0.0004945138100643209, "loss": 6.6426, "theoretical_loss": 5.152138642849951, "tokens_seen": 58720256 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.45703786611557007, "objective/train/docs_used": 44036, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.240392208099365, "objective/train/original_loss": 6.240392684936523, "objective/train/theoretical_loss": 5.148719354852201, "objective/train/tokens_used": 79442400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24004234373569489, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0469226837158203, "objective/train/weighted_lm_loss": 6.5345845222473145, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9527196884155273, "theoretical_loss": 5.148719354852201, "tokens_seen": 58982400 }, { "epoch": 0.02, "learning_rate": 0.0004943246311010216, "loss": 6.6952, "theoretical_loss": 5.138577094110906, "tokens_seen": 59768832 }, { "epoch": 0.02, "learning_rate": 0.0004941354521377223, "loss": 6.6242, "theoretical_loss": 5.12531671652499, "tokens_seen": 60817408 }, { "epoch": 0.02, "learning_rate": 0.0004939462731744231, "loss": 6.6385, "theoretical_loss": 5.112345799479678, "tokens_seen": 61865984 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.4917111396789551, "objective/train/docs_used": 45869, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.257774353027344, "objective/train/original_loss": 6.257774353027344, "objective/train/theoretical_loss": 5.107554053900861, "objective/train/tokens_used": 82719200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2443944662809372, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504134893417358, "objective/train/weighted_lm_loss": 6.572638511657715, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.964363157749176, "theoretical_loss": 5.107554053900861, "tokens_seen": 62259200 }, { "epoch": 0.02, "learning_rate": 0.0004937570942111238, "loss": 6.5263, "theoretical_loss": 5.0996532764547, "tokens_seen": 62914560 }, { "epoch": 0.02, "learning_rate": 0.0004935679152478245, "loss": 6.5435, "theoretical_loss": 5.087228679557634, "tokens_seen": 63963136 }, { "epoch": 0.02, "learning_rate": 0.0004933787362845251, "loss": 6.5413, "theoretical_loss": 5.075062097954335, "tokens_seen": 65011712 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.47832614183425903, "objective/train/docs_used": 47778, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.022909641265869, "objective/train/original_loss": 6.022909164428711, "objective/train/theoretical_loss": 5.069072608639006, "objective/train/tokens_used": 85996000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2356715351343155, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490304231643677, "objective/train/weighted_lm_loss": 6.317652702331543, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.952083170413971, "theoretical_loss": 5.069072608639006, "tokens_seen": 65536000 }, { "epoch": 0.02, "learning_rate": 0.0004931895573212258, "loss": 6.4814, "theoretical_loss": 5.063144139803664, "tokens_seen": 66060288 }, { "epoch": 0.02, "learning_rate": 0.0004930003783579266, "loss": 6.5092, "theoretical_loss": 5.051465897350656, "tokens_seen": 67108864 }, { "epoch": 0.02, "learning_rate": 0.0004928111993946273, "loss": 6.4835, "theoretical_loss": 5.040018914871285, "tokens_seen": 68157440 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.4900747239589691, "objective/train/docs_used": 49496, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.184676170349121, "objective/train/original_loss": 6.184676647186279, "objective/train/theoretical_loss": 5.032978401333766, "objective/train/tokens_used": 89272800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24198560416698456, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502376556396484, "objective/train/weighted_lm_loss": 6.495528697967529, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9738534092903137, "theoretical_loss": 5.032978401333766, "tokens_seen": 68812800 }, { "epoch": 0.02, "learning_rate": 0.000492622020431328, "loss": 6.5187, "theoretical_loss": 5.028795159195919, "tokens_seen": 69206016 }, { "epoch": 0.03, "learning_rate": 0.0004924328414680287, "loss": 6.431, "theoretical_loss": 5.01778699256848, "tokens_seen": 70254592 }, { "epoch": 0.03, "learning_rate": 0.0004922436625047294, "loss": 6.4495, "theoretical_loss": 5.006987147624395, "tokens_seen": 71303168 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.48679879307746887, "objective/train/docs_used": 51476, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.056151390075684, "objective/train/original_loss": 6.056151390075684, "objective/train/theoretical_loss": 4.999019799720424, "objective/train/tokens_used": 92549600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23905406892299652, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498950481414795, "objective/train/weighted_lm_loss": 6.357154846191406, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9834248423576355, "theoretical_loss": 4.999019799720424, "tokens_seen": 72089600 }, { "epoch": 0.03, "learning_rate": 0.0004920544835414302, "loss": 6.4097, "theoretical_loss": 4.996388704293487, "tokens_seen": 72351744 }, { "epoch": 0.03, "learning_rate": 0.0004918653045781309, "loss": 6.3944, "theoretical_loss": 4.985985068454193, "tokens_seen": 73400320 }, { "epoch": 0.03, "learning_rate": 0.0004916761256148316, "loss": 6.3967, "theoretical_loss": 4.9757699521834, "tokens_seen": 74448896 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.48649123311042786, "objective/train/docs_used": 53412, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.1873979568481445, "objective/train/original_loss": 6.187397480010986, "objective/train/theoretical_loss": 4.966981646847723, "objective/train/tokens_used": 95826400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23994770646095276, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498689413070679, "objective/train/weighted_lm_loss": 6.4954962730407715, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9554786682128906, "theoretical_loss": 4.966981646847723, "tokens_seen": 75366400 }, { "epoch": 0.03, "learning_rate": 0.0004914869466515324, "loss": 6.4082, "theoretical_loss": 4.96573735546202, "tokens_seen": 75497472 }, { "epoch": 0.03, "learning_rate": 0.0004912977676882331, "loss": 6.3538, "theoretical_loss": 4.955881549210428, "tokens_seen": 76546048 }, { "epoch": 0.03, "learning_rate": 0.0004911085887249338, "loss": 6.2587, "theoretical_loss": 4.946197059540362, "tokens_seen": 77594624 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.4663962125778198, "objective/train/docs_used": 55300, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.6434221267700195, "objective/train/original_loss": 5.6434221267700195, "objective/train/theoretical_loss": 4.936678653120895, "objective/train/tokens_used": 99103200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23964135348796844, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478569269180298, "objective/train/weighted_lm_loss": 5.911611557006836, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.951373279094696, "theoretical_loss": 4.936678653120895, "tokens_seen": 78643200 }, { "epoch": 0.03, "learning_rate": 0.0004909194097616345, "loss": 6.2099, "theoretical_loss": 4.936678653120895, "tokens_seen": 78643200 }, { "epoch": 0.03, "learning_rate": 0.0004907302307983352, "loss": 6.3581, "theoretical_loss": 4.927321323566017, "tokens_seen": 79691776 }, { "epoch": 0.03, "learning_rate": 0.000490541051835036, "loss": 6.2595, "theoretical_loss": 4.918120278760069, "tokens_seen": 80740352 }, { "epoch": 0.03, "learning_rate": 0.0004903518728717367, "loss": 6.2955, "theoretical_loss": 4.909070929045194, "tokens_seen": 81788928 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.46273210644721985, "objective/train/docs_used": 57273, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.127193450927734, "objective/train/original_loss": 6.127194404602051, "objective/train/theoretical_loss": 4.907950205325841, "objective/train/tokens_used": 102380000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23514951765537262, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0474683046340942, "objective/train/weighted_lm_loss": 6.427692413330078, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9521082639694214, "theoretical_loss": 4.907950205325841, "tokens_seen": 81920000 }, { "epoch": 0.03, "learning_rate": 0.0004901626939084374, "loss": 6.2293, "theoretical_loss": 4.90016887620194, "tokens_seen": 82837504 }, { "epoch": 0.03, "learning_rate": 0.0004899735149451381, "loss": 6.2417, "theoretical_loss": 4.891409903160486, "tokens_seen": 83886080 }, { "epoch": 0.03, "learning_rate": 0.0004897843359818388, "loss": 6.1855, "theoretical_loss": 4.882789964385566, "tokens_seen": 84934656 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.47956690192222595, "objective/train/docs_used": 58950, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.768993377685547, "objective/train/original_loss": 5.768993377685547, "objective/train/theoretical_loss": 4.880656245308686, "objective/train/tokens_used": 105656800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23697242140769958, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491610765457153, "objective/train/weighted_lm_loss": 6.05217981338501, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9515382647514343, "theoretical_loss": 4.880656245308686, "tokens_seen": 85196800 }, { "epoch": 0.03, "learning_rate": 0.0004895951570185396, "loss": 6.2568, "theoretical_loss": 4.874305176883285, "tokens_seen": 85983232 }, { "epoch": 0.03, "learning_rate": 0.0004894059780552403, "loss": 6.1378, "theoretical_loss": 4.865951811782555, "tokens_seen": 87031808 }, { "epoch": 0.03, "learning_rate": 0.0004892167990919411, "loss": 6.1333, "theoretical_loss": 4.857726286448001, "tokens_seen": 88080384 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.4823387563228607, "objective/train/docs_used": 60915, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.946232795715332, "objective/train/original_loss": 5.946232795715332, "objective/train/theoretical_loss": 4.854673965977539, "objective/train/tokens_used": 108933600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23662100732326508, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494364500045776, "objective/train/weighted_lm_loss": 6.240575313568115, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9716301560401917, "theoretical_loss": 4.854673965977539, "tokens_seen": 88473600 }, { "epoch": 0.03, "learning_rate": 0.0004890276201286417, "loss": 6.0885, "theoretical_loss": 4.849625157084915, "tokens_seen": 89128960 }, { "epoch": 0.03, "learning_rate": 0.0004888384411653424, "loss": 6.0157, "theoretical_loss": 4.8416451118001484, "tokens_seen": 90177536 }, { "epoch": 0.03, "learning_rate": 0.0004886492622020431, "loss": 6.0435, "theoretical_loss": 4.833782964085925, "tokens_seen": 91226112 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.4862677752971649, "objective/train/docs_used": 62413, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.789903163909912, "objective/train/original_loss": 5.789902687072754, "objective/train/theoretical_loss": 4.829895138804573, "objective/train/tokens_used": 112210400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24461686611175537, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498701333999634, "objective/train/weighted_lm_loss": 6.078371524810791, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9515182375907898, "theoretical_loss": 4.829895138804573, "tokens_seen": 91750400 }, { "epoch": 0.03, "learning_rate": 0.0004884600832387438, "loss": 6.0701, "theoretical_loss": 4.826035646696238, "tokens_seen": 92274688 }, { "epoch": 0.03, "learning_rate": 0.0004882709042754446, "loss": 6.0581, "theoretical_loss": 4.8184002058880395, "tokens_seen": 93323264 }, { "epoch": 0.03, "learning_rate": 0.00048808172531214527, "loss": 6.0027, "theoretical_loss": 4.810873796001641, "tokens_seen": 94371840 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.4866400361061096, "objective/train/docs_used": 64216, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 6.136915683746338, "objective/train/original_loss": 6.136915683746338, "objective/train/theoretical_loss": 4.80622393427368, "objective/train/tokens_used": 115487200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24255988001823425, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498968362808228, "objective/train/weighted_lm_loss": 6.4431023597717285, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.951479434967041, "theoretical_loss": 4.80622393427368, "tokens_seen": 95027200 }, { "epoch": 0.03, "learning_rate": 0.000487892546348846, "loss": 6.0778, "theoretical_loss": 4.803453674356847, "tokens_seen": 95420416 }, { "epoch": 0.03, "learning_rate": 0.0004877033673855467, "loss": 6.023, "theoretical_loss": 4.796137196443157, "tokens_seen": 96468992 }, { "epoch": 0.03, "learning_rate": 0.0004875141884222474, "loss": 6.0011, "theoretical_loss": 4.788921811384128, "tokens_seen": 97517568 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.48324474692344666, "objective/train/docs_used": 66562, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.525176525115967, "objective/train/original_loss": 5.525176525115967, "objective/train/theoretical_loss": 4.783575130772016, "objective/train/tokens_used": 118764000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2386062890291214, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495374202728271, "objective/train/weighted_lm_loss": 5.799572944641113, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9542084336280823, "theoretical_loss": 4.783575130772016, "tokens_seen": 98304000 }, { "epoch": 0.04, "learning_rate": 0.0004873250094589482, "loss": 5.9655, "theoretical_loss": 4.781805057657483, "tokens_seen": 98566144 }, { "epoch": 0.04, "learning_rate": 0.0004871358304956489, "loss": 5.9567, "theoretical_loss": 4.774784559054009, "tokens_seen": 99614720 }, { "epoch": 0.04, "learning_rate": 0.00048694665153234965, "loss": 6.0355, "theoretical_loss": 4.76785802085957, "tokens_seen": 100663296 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.466911256313324, "objective/train/docs_used": 67858, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.060521125793457, "objective/train/original_loss": 5.060521125793457, "objective/train/theoretical_loss": 4.761872632268167, "objective/train/tokens_used": 122040800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2336144745349884, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478785037994385, "objective/train/weighted_lm_loss": 5.305319786071777, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9513893127441406, "theoretical_loss": 4.761872632268167, "tokens_seen": 101580800 }, { "epoch": 0.04, "learning_rate": 0.00048675747256905034, "loss": 5.966, "theoretical_loss": 4.761023226245718, "tokens_seen": 101711872 }, { "epoch": 0.04, "learning_rate": 0.00048656829360575103, "loss": 5.9966, "theoretical_loss": 4.754278032855497, "tokens_seen": 102760448 }, { "epoch": 0.04, "learning_rate": 0.0004863791146424518, "loss": 6.0478, "theoretical_loss": 4.747620369572012, "tokens_seen": 103809024 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.4873834550380707, "objective/train/docs_used": 69339, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.778015613555908, "objective/train/original_loss": 5.778016090393066, "objective/train/theoretical_loss": 4.741048233458233, "objective/train/tokens_used": 125317600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23935331404209137, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499550104141235, "objective/train/weighted_lm_loss": 6.066779613494873, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9623346328735352, "theoretical_loss": 4.741048233458233, "tokens_seen": 104857600 }, { "epoch": 0.04, "learning_rate": 0.0004861899356791525, "loss": 5.9699, "theoretical_loss": 4.741048233458233, "tokens_seen": 104857600 }, { "epoch": 0.04, "learning_rate": 0.0004860007567158532, "loss": 5.9896, "theoretical_loss": 4.734559686857368, "tokens_seen": 105906176 }, { "epoch": 0.04, "learning_rate": 0.0004858115777525539, "loss": 6.0698, "theoretical_loss": 4.728152854643862, "tokens_seen": 106954752 }, { "epoch": 0.04, "learning_rate": 0.0004856223987892546, "loss": 6.0114, "theoretical_loss": 4.721825921615813, "tokens_seen": 108003328 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.48563089966773987, "objective/train/docs_used": 71265, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.9209771156311035, "objective/train/original_loss": 5.920976638793945, "objective/train/theoretical_loss": 4.721040584744811, "objective/train/tokens_used": 128594400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2397887110710144, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497820377349854, "objective/train/weighted_lm_loss": 6.215074062347412, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9557585120201111, "theoretical_loss": 4.721040584744811, "tokens_seen": 108134400 }, { "epoch": 0.04, "learning_rate": 0.00048543321982595536, "loss": 5.974, "theoretical_loss": 4.71557713002025, "tokens_seen": 109051904 }, { "epoch": 0.04, "learning_rate": 0.00048524404086265605, "loss": 5.9541, "theoretical_loss": 4.709404777203279, "tokens_seen": 110100480 }, { "epoch": 0.04, "learning_rate": 0.0004850548618993568, "loss": 5.9167, "theoretical_loss": 4.703307213377671, "tokens_seen": 111149056 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.48972105979919434, "objective/train/docs_used": 72747, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.520245552062988, "objective/train/original_loss": 5.520245552062988, "objective/train/theoretical_loss": 4.701794319727625, "objective/train/tokens_used": 131871200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24246089160442352, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502045154571533, "objective/train/weighted_lm_loss": 5.797506809234619, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9541246891021729, "theoretical_loss": 4.701794319727625, "tokens_seen": 111411200 }, { "epoch": 0.04, "learning_rate": 0.00048486568293605755, "loss": 5.947, "theoretical_loss": 4.697282839501012, "tokens_seen": 112197632 }, { "epoch": 0.04, "learning_rate": 0.00048467650397275824, "loss": 5.9188, "theoretical_loss": 4.691330105257913, "tokens_seen": 113246208 }, { "epoch": 0.04, "learning_rate": 0.000484487325009459, "loss": 5.8738, "theoretical_loss": 4.685447507140298, "tokens_seen": 114294784 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.4893937110900879, "objective/train/docs_used": 74963, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.727111339569092, "objective/train/original_loss": 5.727111339569092, "objective/train/theoretical_loss": 4.683259315731689, "objective/train/tokens_used": 135148000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24246010184288025, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050171971321106, "objective/train/weighted_lm_loss": 6.014029026031494, "objective/train/weights_max": 1.0512195825576782, "objective/train/weights_min": 0.9718445539474487, "theoretical_loss": 4.683259315731689, "tokens_seen": 114688000 }, { "epoch": 0.04, "learning_rate": 0.0004842981460461597, "loss": 5.8587, "theoretical_loss": 4.679633586620149, "tokens_seen": 115343360 }, { "epoch": 0.04, "learning_rate": 0.0004841089670828604, "loss": 5.857, "theoretical_loss": 4.673886928409454, "tokens_seen": 116391936 }, { "epoch": 0.04, "learning_rate": 0.0004839197881195611, "loss": 5.8442, "theoretical_loss": 4.668206158802439, "tokens_seen": 117440512 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.48216262459754944, "objective/train/docs_used": 77191, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.769596099853516, "objective/train/original_loss": 5.769596099853516, "objective/train/theoretical_loss": 4.665390063922041, "objective/train/tokens_used": 138424800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23990829288959503, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494357347488403, "objective/train/weighted_lm_loss": 6.053895950317383, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9527034759521484, "theoretical_loss": 4.665390063922041, "tokens_seen": 117964800 }, { "epoch": 0.04, "learning_rate": 0.0004837306091562618, "loss": 5.8264, "theoretical_loss": 4.662589944095533, "tokens_seen": 118489088 }, { "epoch": 0.04, "learning_rate": 0.00048354143019296256, "loss": 5.8773, "theoretical_loss": 4.657036989080726, "tokens_seen": 119537664 }, { "epoch": 0.04, "learning_rate": 0.00048335225122966326, "loss": 5.8006, "theoretical_loss": 4.651546035608336, "tokens_seen": 120586240 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.4687286913394928, "objective/train/docs_used": 78964, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.389880657196045, "objective/train/original_loss": 5.389880657196045, "objective/train/theoretical_loss": 4.648145130215498, "objective/train/tokens_used": 141701600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23362566530704498, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0480601787567139, "objective/train/weighted_lm_loss": 5.650588035583496, "objective/train/weights_max": 1.0512198209762573, "objective/train/weights_min": 0.9520321488380432, "theoretical_loss": 4.648145130215498, "tokens_seen": 121241600 }, { "epoch": 0.04, "learning_rate": 0.00048316307226636395, "loss": 5.7878, "theoretical_loss": 4.646115861215389, "tokens_seen": 121634816 }, { "epoch": 0.04, "learning_rate": 0.0004829738933030647, "loss": 5.7865, "theoretical_loss": 4.640745277816107, "tokens_seen": 122683392 }, { "epoch": 0.04, "learning_rate": 0.00048278471433976544, "loss": 5.8501, "theoretical_loss": 4.635433130451148, "tokens_seen": 123731968 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.4775085151195526, "objective/train/docs_used": 81017, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.654141426086426, "objective/train/original_loss": 5.654140472412109, "objective/train/theoretical_loss": 4.631486691835402, "objective/train/tokens_used": 144978400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23175130784511566, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489286184310913, "objective/train/weighted_lm_loss": 5.932117938995361, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9547845721244812, "theoretical_loss": 4.631486691835402, "tokens_seen": 124518400 }, { "epoch": 0.04, "learning_rate": 0.0004825955353764662, "loss": 5.8575, "theoretical_loss": 4.630178296092535, "tokens_seen": 124780544 }, { "epoch": 0.04, "learning_rate": 0.0004824063564131669, "loss": 5.8373, "theoretical_loss": 4.624979682501314, "tokens_seen": 125829120 }, { "epoch": 0.05, "learning_rate": 0.0004822171774498676, "loss": 5.7121, "theoretical_loss": 4.619836227135212, "tokens_seen": 126877696 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4863889813423157, "objective/train/docs_used": 82924, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.5550537109375, "objective/train/original_loss": 5.5550537109375, "objective/train/theoretical_loss": 4.615380137211477, "objective/train/tokens_used": 148255200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23805738985538483, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498487949371338, "objective/train/weighted_lm_loss": 5.832069396972656, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 1.0026726722717285, "theoretical_loss": 4.615380137211477, "tokens_seen": 127795200 }, { "epoch": 0.05, "learning_rate": 0.0004820279984865683, "loss": 5.8039, "theoretical_loss": 4.6147468961037195, "tokens_seen": 127926272 }, { "epoch": 0.05, "learning_rate": 0.000481838819523269, "loss": 5.8314, "theoretical_loss": 4.609710683168146, "tokens_seen": 128974848 }, { "epoch": 0.05, "learning_rate": 0.0004816496405599697, "loss": 5.7879, "theoretical_loss": 4.604726608784391, "tokens_seen": 130023424 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.48750680685043335, "objective/train/docs_used": 84906, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.394794940948486, "objective/train/original_loss": 5.39479398727417, "objective/train/theoretical_loss": 4.599793719186264, "objective/train/tokens_used": 151532000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24220028519630432, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049981713294983, "objective/train/weighted_lm_loss": 5.664059162139893, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.952854573726654, "theoretical_loss": 4.599793719186264, "tokens_seen": 131072000 }, { "epoch": 0.05, "learning_rate": 0.00048146046159667046, "loss": 5.6343, "theoretical_loss": 4.599793719186264, "tokens_seen": 131072000 }, { "epoch": 0.05, "learning_rate": 0.00048127128263337115, "loss": 5.8198, "theoretical_loss": 4.594911085507325, "tokens_seen": 132120576 }, { "epoch": 0.05, "learning_rate": 0.0004810821036700719, "loss": 5.7462, "theoretical_loss": 4.5900778029393425, "tokens_seen": 133169152 }, { "epoch": 0.05, "learning_rate": 0.0004808929247067726, "loss": 5.7185, "theoretical_loss": 4.585292989925557, "tokens_seen": 134217728 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4873538017272949, "objective/train/docs_used": 86934, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.74575138092041, "objective/train/original_loss": 5.74575138092041, "objective/train/theoretical_loss": 4.584698253288433, "objective/train/tokens_used": 154808800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24090476334095, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499600172042847, "objective/train/weighted_lm_loss": 6.0322418212890625, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9532478451728821, "theoretical_loss": 4.584698253288433, "tokens_seen": 134348800 }, { "epoch": 0.05, "learning_rate": 0.0004807037457434733, "loss": 5.7085, "theoretical_loss": 4.580555787387068, "tokens_seen": 135266304 }, { "epoch": 0.05, "learning_rate": 0.0004805145667801741, "loss": 5.6594, "theoretical_loss": 4.575865357980726, "tokens_seen": 136314880 }, { "epoch": 0.05, "learning_rate": 0.0004803253878168748, "loss": 5.6742, "theoretical_loss": 4.571220885387013, "tokens_seen": 137363456 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4891941249370575, "objective/train/docs_used": 89023, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.547139644622803, "objective/train/original_loss": 5.5471391677856445, "objective/train/theoretical_loss": 4.570066854274275, "objective/train/tokens_used": 158085600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.241221621632576, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501456260681152, "objective/train/weighted_lm_loss": 5.825089931488037, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9656258821487427, "theoretical_loss": 4.570066854274275, "tokens_seen": 137625600 }, { "epoch": 0.05, "learning_rate": 0.00048013620885357553, "loss": 5.6366, "theoretical_loss": 4.566621573626489, "tokens_seen": 138412032 }, { "epoch": 0.05, "learning_rate": 0.0004799470298902762, "loss": 5.6921, "theoretical_loss": 4.562066646403457, "tokens_seen": 139460608 }, { "epoch": 0.05, "learning_rate": 0.0004797578509269769, "loss": 5.7071, "theoretical_loss": 4.557555346475546, "tokens_seen": 140509184 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4865155816078186, "objective/train/docs_used": 91007, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.0053534507751465, "objective/train/original_loss": 5.005353927612305, "objective/train/theoretical_loss": 4.5558747052997965, "objective/train/tokens_used": 161362400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24105681478977203, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498768091201782, "objective/train/weighted_lm_loss": 5.255599498748779, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9540678262710571, "theoretical_loss": 4.5558747052997965, "tokens_seen": 140902400 }, { "epoch": 0.05, "learning_rate": 0.00047956867196367767, "loss": 5.6781, "theoretical_loss": 4.553086935048029, "tokens_seen": 141557760 }, { "epoch": 0.05, "learning_rate": 0.00047937949300037836, "loss": 5.7415, "theoretical_loss": 4.5486606911917145, "tokens_seen": 142606336 }, { "epoch": 0.05, "learning_rate": 0.00047919031403707905, "loss": 5.6154, "theoretical_loss": 4.544275911283326, "tokens_seen": 143654912 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4804496467113495, "objective/train/docs_used": 93093, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.170466899871826, "objective/train/original_loss": 5.170466423034668, "objective/train/theoretical_loss": 4.542098855026559, "objective/train/tokens_used": 164639200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2387777715921402, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492587089538574, "objective/train/weighted_lm_loss": 5.424218654632568, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9516282677650452, "theoretical_loss": 4.542098855026559, "tokens_seen": 144179200 }, { "epoch": 0.05, "learning_rate": 0.0004790011350737798, "loss": 5.6278, "theoretical_loss": 4.539931908467359, "tokens_seen": 144703488 }, { "epoch": 0.05, "learning_rate": 0.0004788119561104805, "loss": 5.656, "theoretical_loss": 4.53562801213843, "tokens_seen": 145752064 }, { "epoch": 0.05, "learning_rate": 0.00047862277714718124, "loss": 5.6628, "theoretical_loss": 4.531363567443194, "tokens_seen": 146800640 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4876106083393097, "objective/train/docs_used": 95084, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.640732765197754, "objective/train/original_loss": 5.640732288360596, "objective/train/theoretical_loss": 4.5287180387305765, "objective/train/tokens_used": 167916000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2436685711145401, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049999713897705, "objective/train/weighted_lm_loss": 5.9231648445129395, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9515953063964844, "theoretical_loss": 4.5287180387305765, "tokens_seen": 147456000 }, { "epoch": 0.05, "learning_rate": 0.00047843359818388194, "loss": 5.642, "theoretical_loss": 4.527137934800969, "tokens_seen": 147849216 }, { "epoch": 0.05, "learning_rate": 0.0004782444192205827, "loss": 5.5955, "theoretical_loss": 4.522950489442225, "tokens_seen": 148897792 }, { "epoch": 0.05, "learning_rate": 0.00047805524025728343, "loss": 5.5843, "theoretical_loss": 4.518800620964164, "tokens_seen": 149946368 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4914400279521942, "objective/train/docs_used": 96518, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.291038513183594, "objective/train/original_loss": 5.291038513183594, "objective/train/theoretical_loss": 4.515712520110756, "objective/train/tokens_used": 171192800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24376289546489716, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.05038321018219, "objective/train/weighted_lm_loss": 5.557369709014893, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9825595021247864, "theoretical_loss": 4.515712520110756, "tokens_seen": 150732800 }, { "epoch": 0.05, "learning_rate": 0.0004778660612939841, "loss": 5.5918, "theoretical_loss": 4.514687732902628, "tokens_seen": 150994944 }, { "epoch": 0.05, "learning_rate": 0.00047767688233068487, "loss": 5.6056, "theoretical_loss": 4.510611242319637, "tokens_seen": 152043520 }, { "epoch": 0.05, "learning_rate": 0.00047748770336738557, "loss": 5.6134, "theoretical_loss": 4.506570579405888, "tokens_seen": 153092096 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.4901806712150574, "objective/train/docs_used": 98118, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.289200782775879, "objective/train/original_loss": 5.289201736450195, "objective/train/theoretical_loss": 4.503063951009098, "objective/train/tokens_used": 174469600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24192149937152863, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502479076385498, "objective/train/weighted_lm_loss": 5.555331707000732, "objective/train/weights_max": 1.0512194633483887, "objective/train/weights_min": 0.9558950066566467, "theoretical_loss": 4.503063951009098, "tokens_seen": 154009600 }, { "epoch": 0.06, "learning_rate": 0.00047729852440408626, "loss": 5.6166, "theoretical_loss": 4.502565187097554, "tokens_seen": 154140672 }, { "epoch": 0.06, "learning_rate": 0.000477109345440787, "loss": 5.5655, "theoretical_loss": 4.498594520706801, "tokens_seen": 155189248 }, { "epoch": 0.06, "learning_rate": 0.0004769201664774877, "loss": 5.5982, "theoretical_loss": 4.494658047565416, "tokens_seen": 156237824 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.4884991943836212, "objective/train/docs_used": 100657, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.095714569091797, "objective/train/original_loss": 5.095714092254639, "objective/train/theoretical_loss": 4.490755246681026, "objective/train/tokens_used": 177746400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2406940460205078, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500733852386475, "objective/train/weighted_lm_loss": 5.351465225219727, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9665477275848389, "theoretical_loss": 4.490755246681026, "tokens_seen": 157286400 }, { "epoch": 0.06, "learning_rate": 0.0004767309875141884, "loss": 5.6162, "theoretical_loss": 4.490755246681026, "tokens_seen": 157286400 }, { "epoch": 0.06, "learning_rate": 0.00047654180855088914, "loss": 5.5682, "theoretical_loss": 4.48688560840535, "tokens_seen": 158334976 }, { "epoch": 0.06, "learning_rate": 0.00047635262958758983, "loss": 5.5178, "theoretical_loss": 4.483048634114016, "tokens_seen": 159383552 }, { "epoch": 0.06, "learning_rate": 0.0004761634506242906, "loss": 5.5363, "theoretical_loss": 4.479243835897444, "tokens_seen": 160432128 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.48170924186706543, "objective/train/docs_used": 101862, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.535853862762451, "objective/train/original_loss": 5.535854339599609, "objective/train/theoretical_loss": 4.478770474607726, "objective/train/tokens_used": 181023200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23878274857997894, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049384593963623, "objective/train/weighted_lm_loss": 5.808737754821777, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.9529873728752136, "theoretical_loss": 4.478770474607726, "tokens_seen": 160563200 }, { "epoch": 0.06, "learning_rate": 0.00047597427166099133, "loss": 5.5977, "theoretical_loss": 4.475470736262361, "tokens_seen": 161480704 }, { "epoch": 0.06, "learning_rate": 0.000475785092697692, "loss": 5.5844, "theoretical_loss": 4.471728867843497, "tokens_seen": 162529280 }, { "epoch": 0.06, "learning_rate": 0.00047559591373439277, "loss": 5.5151, "theoretical_loss": 4.4680177731250765, "tokens_seen": 163577856 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.47214475274086, "objective/train/docs_used": 104000, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.165049076080322, "objective/train/original_loss": 5.165048599243164, "objective/train/theoretical_loss": 4.467094755136979, "objective/train/tokens_used": 184300000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22914351522922516, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0483790636062622, "objective/train/weighted_lm_loss": 5.412636756896973, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9813255667686462, "theoretical_loss": 4.467094755136979, "tokens_seen": 163840000 }, { "epoch": 0.06, "learning_rate": 0.00047540673477109346, "loss": 5.4748, "theoretical_loss": 4.464337004171679, "tokens_seen": 164626432 }, { "epoch": 0.06, "learning_rate": 0.0004752175558077942, "loss": 5.4863, "theoretical_loss": 4.460686122368132, "tokens_seen": 165675008 }, { "epoch": 0.06, "learning_rate": 0.0004750283768444949, "loss": 5.5302, "theoretical_loss": 4.457064698168051, "tokens_seen": 166723584 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.4795248210430145, "objective/train/docs_used": 106106, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.443030834197998, "objective/train/original_loss": 5.443031311035156, "objective/train/theoretical_loss": 4.455714172485305, "objective/train/tokens_used": 187576800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23740240931510925, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491594076156616, "objective/train/weighted_lm_loss": 5.712173938751221, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9545544385910034, "theoretical_loss": 4.455714172485305, "tokens_seen": 167116800 }, { "epoch": 0.06, "learning_rate": 0.0004748391978811956, "loss": 5.4637, "theoretical_loss": 4.453472310850701, "tokens_seen": 167772160 }, { "epoch": 0.06, "learning_rate": 0.00047465001891789635, "loss": 5.4558, "theoretical_loss": 4.449908548285846, "tokens_seen": 168820736 }, { "epoch": 0.06, "learning_rate": 0.00047446083995459704, "loss": 5.4025, "theoretical_loss": 4.446373006706281, "tokens_seen": 169869312 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.4749578535556793, "objective/train/docs_used": 107805, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.134139060974121, "objective/train/original_loss": 5.134139537811279, "objective/train/theoretical_loss": 4.44461569484119, "objective/train/tokens_used": 190853600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2378183901309967, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487042665481567, "objective/train/weighted_lm_loss": 5.386655330657959, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9524388313293457, "theoretical_loss": 4.44461569484119, "tokens_seen": 170393600 }, { "epoch": 0.06, "learning_rate": 0.00047427166099129773, "loss": 5.4333, "theoretical_loss": 4.442865290487752, "tokens_seen": 170917888 }, { "epoch": 0.06, "learning_rate": 0.0004740824820279985, "loss": 5.3058, "theoretical_loss": 4.439385011935977, "tokens_seen": 171966464 }, { "epoch": 0.06, "learning_rate": 0.0004738933030646992, "loss": 5.412, "theoretical_loss": 4.435931791080489, "tokens_seen": 173015040 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.4863138794898987, "objective/train/docs_used": 109422, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.005147457122803, "objective/train/original_loss": 5.005147933959961, "objective/train/theoretical_loss": 4.433787102483406, "objective/train/tokens_used": 194130400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24057146906852722, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049854040145874, "objective/train/weighted_lm_loss": 5.255906105041504, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9543647170066833, "theoretical_loss": 4.433787102483406, "tokens_seen": 173670400 }, { "epoch": 0.06, "learning_rate": 0.0004737041241014, "loss": 5.3846, "theoretical_loss": 4.43250525547506, "tokens_seen": 174063616 }, { "epoch": 0.06, "learning_rate": 0.00047351494513810067, "loss": 5.4827, "theoretical_loss": 4.429105040004445, "tokens_seen": 175112192 }, { "epoch": 0.06, "learning_rate": 0.00047332576617480136, "loss": 5.4385, "theoretical_loss": 4.4257307866972155, "tokens_seen": 176160768 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.474359929561615, "objective/train/docs_used": 111276, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.441802024841309, "objective/train/original_loss": 5.441802024841309, "objective/train/theoretical_loss": 4.42321692297592, "objective/train/tokens_used": 197407200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23385858535766602, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486245155334473, "objective/train/weighted_lm_loss": 5.70626163482666, "objective/train/weights_max": 1.051215410232544, "objective/train/weights_min": 0.9550078511238098, "theoretical_loss": 4.42321692297592, "tokens_seen": 176947200 }, { "epoch": 0.06, "learning_rate": 0.0004731365872115021, "loss": 5.5213, "theoretical_loss": 4.422382144544446, "tokens_seen": 177209344 }, { "epoch": 0.06, "learning_rate": 0.0004729474082482028, "loss": 5.426, "theoretical_loss": 4.419058769324055, "tokens_seen": 178257920 }, { "epoch": 0.06, "learning_rate": 0.00047275822928490355, "loss": 5.4245, "theoretical_loss": 4.415760323430568, "tokens_seen": 179306496 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.49394339323043823, "objective/train/docs_used": 113383, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.963852882385254, "objective/train/original_loss": 4.963852882385254, "objective/train/theoretical_loss": 4.412894372625901, "objective/train/tokens_used": 200684000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24598737061023712, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506449937820435, "objective/train/weighted_lm_loss": 5.2148284912109375, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9575862288475037, "theoretical_loss": 4.412894372625901, "tokens_seen": 180224000 }, { "epoch": 0.06, "learning_rate": 0.00047256905032160425, "loss": 5.4082, "theoretical_loss": 4.412486475710132, "tokens_seen": 180355072 }, { "epoch": 0.06, "learning_rate": 0.00047237987135830494, "loss": 5.3975, "theoretical_loss": 4.409236901300563, "tokens_seen": 181403648 }, { "epoch": 0.07, "learning_rate": 0.0004721906923950057, "loss": 5.3623, "theoretical_loss": 4.406011281476267, "tokens_seen": 182452224 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.48171699047088623, "objective/train/docs_used": 115260, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.099734306335449, "objective/train/original_loss": 5.099733829498291, "objective/train/theoretical_loss": 4.40280930349784, "objective/train/tokens_used": 203960800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2392299622297287, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049387812614441, "objective/train/weighted_lm_loss": 5.351738929748535, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9590397477149963, "theoretical_loss": 4.40280930349784, "tokens_seen": 183500800 }, { "epoch": 0.07, "learning_rate": 0.0004720015134317064, "loss": 5.4224, "theoretical_loss": 4.40280930349784, "tokens_seen": 183500800 }, { "epoch": 0.07, "learning_rate": 0.0004718123344684071, "loss": 5.4357, "theoretical_loss": 4.3996306604662, "tokens_seen": 184549376 }, { "epoch": 0.07, "learning_rate": 0.0004716231555051078, "loss": 5.431, "theoretical_loss": 4.396475051181074, "tokens_seen": 185597952 }, { "epoch": 0.07, "learning_rate": 0.00047143397654180857, "loss": 5.3839, "theoretical_loss": 4.393342180003689, "tokens_seen": 186646528 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.4751421809196472, "objective/train/docs_used": 117211, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.888144493103027, "objective/train/original_loss": 4.888144493103027, "objective/train/theoretical_loss": 4.392952155367621, "objective/train/tokens_used": 207237600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24074752628803253, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487377643585205, "objective/train/weighted_lm_loss": 5.126180648803711, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9516778588294983, "theoretical_loss": 4.392952155367621, "tokens_seen": 186777600 }, { "epoch": 0.07, "learning_rate": 0.0004712447975785093, "loss": 5.3899, "theoretical_loss": 4.390231756723523, "tokens_seen": 187695104 }, { "epoch": 0.07, "learning_rate": 0.00047105561861521, "loss": 5.3971, "theoretical_loss": 4.387143496428978, "tokens_seen": 188743680 }, { "epoch": 0.07, "learning_rate": 0.0004708664396519107, "loss": 5.4104, "theoretical_loss": 4.384077119381821, "tokens_seen": 189792256 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.4631696939468384, "objective/train/docs_used": 119241, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.791223526000977, "objective/train/original_loss": 4.791223526000977, "objective/train/theoretical_loss": 4.383313912078293, "objective/train/tokens_used": 210514400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23190495371818542, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0474956035614014, "objective/train/weighted_lm_loss": 5.014393329620361, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9541125297546387, "theoretical_loss": 4.383313912078293, "tokens_seen": 190054400 }, { "epoch": 0.07, "learning_rate": 0.00047067726068861145, "loss": 5.4451, "theoretical_loss": 4.381032350895292, "tokens_seen": 190840832 }, { "epoch": 0.07, "learning_rate": 0.00047048808172531214, "loss": 5.3872, "theoretical_loss": 4.378008921215717, "tokens_seen": 191889408 }, { "epoch": 0.07, "learning_rate": 0.0004702989027620129, "loss": 5.3645, "theoretical_loss": 4.375006565407541, "tokens_seen": 192937984 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.4861815869808197, "objective/train/docs_used": 121008, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.046594619750977, "objective/train/original_loss": 5.046595096588135, "objective/train/theoretical_loss": 4.373886061826036, "objective/train/tokens_used": 213791200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2395814061164856, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498359203338623, "objective/train/weighted_lm_loss": 5.298506259918213, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9604629278182983, "theoretical_loss": 4.373886061826036, "tokens_seen": 193331200 }, { "epoch": 0.07, "learning_rate": 0.0004701097237987136, "loss": 5.3806, "theoretical_loss": 4.372025023241637, "tokens_seen": 193986560 }, { "epoch": 0.07, "learning_rate": 0.0004699205448354143, "loss": 5.4043, "theoretical_loss": 4.3690640390867985, "tokens_seen": 195035136 }, { "epoch": 0.07, "learning_rate": 0.000469731365872115, "loss": 5.2706, "theoretical_loss": 4.366123361804301, "tokens_seen": 196083712 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.49126356840133667, "objective/train/docs_used": 122985, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.289262771606445, "objective/train/original_loss": 5.289262294769287, "objective/train/theoretical_loss": 4.364660560962464, "objective/train/tokens_used": 217068000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2440873384475708, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503672361373901, "objective/train/weighted_lm_loss": 5.555089473724365, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9654108881950378, "theoretical_loss": 4.364660560962464, "tokens_seen": 196608000 }, { "epoch": 0.07, "learning_rate": 0.0004695421869088157, "loss": 5.2876, "theoretical_loss": 4.363202744645427, "tokens_seen": 197132288 }, { "epoch": 0.07, "learning_rate": 0.00046935300794551647, "loss": 5.3211, "theoretical_loss": 4.360301945151863, "tokens_seen": 198180864 }, { "epoch": 0.07, "learning_rate": 0.0004691638289822172, "loss": 5.2849, "theoretical_loss": 4.357420725058867, "tokens_seen": 199229440 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.4830451011657715, "objective/train/docs_used": 124865, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.945188522338867, "objective/train/original_loss": 4.945188522338867, "objective/train/theoretical_loss": 4.355629800949043, "objective/train/tokens_used": 220344800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23967084288597107, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495227575302124, "objective/train/weighted_lm_loss": 5.192099094390869, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.952029287815094, "theoretical_loss": 4.355629800949043, "tokens_seen": 199884800 }, { "epoch": 0.07, "learning_rate": 0.0004689746500189179, "loss": 5.3328, "theoretical_loss": 4.354558850201118, "tokens_seen": 200278016 }, { "epoch": 0.07, "learning_rate": 0.00046878547105561866, "loss": 5.2951, "theoretical_loss": 4.351716090421165, "tokens_seen": 201326592 }, { "epoch": 0.07, "learning_rate": 0.00046859629209231935, "loss": 5.2508, "theoretical_loss": 4.348892219480378, "tokens_seen": 202375168 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.47592583298683167, "objective/train/docs_used": 126478, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.113393783569336, "objective/train/original_loss": 5.113393783569336, "objective/train/theoretical_loss": 4.3467865781424315, "objective/train/tokens_used": 223621600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24106524884700775, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04881751537323, "objective/train/weighted_lm_loss": 5.362763404846191, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9515713453292847, "theoretical_loss": 4.3467865781424315, "tokens_seen": 203161600 }, { "epoch": 0.07, "learning_rate": 0.00046840711312902004, "loss": 5.2435, "theoretical_loss": 4.346087014972328, "tokens_seen": 203423744 }, { "epoch": 0.07, "learning_rate": 0.0004682179341657208, "loss": 5.3514, "theoretical_loss": 4.343300258238523, "tokens_seen": 204472320 }, { "epoch": 0.07, "learning_rate": 0.0004680287552024215, "loss": 5.286, "theoretical_loss": 4.34053173428641, "tokens_seen": 205520896 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.4761151969432831, "objective/train/docs_used": 128257, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.176480770111084, "objective/train/original_loss": 5.176480770111084, "objective/train/theoretical_loss": 4.33812406612692, "objective/train/tokens_used": 226898400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2329566329717636, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048795461654663, "objective/train/weighted_lm_loss": 5.43222713470459, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9516546130180359, "theoretical_loss": 4.33812406612692, "tokens_seen": 206438400 }, { "epoch": 0.07, "learning_rate": 0.00046783957623912223, "loss": 5.3401, "theoretical_loss": 4.337781231709587, "tokens_seen": 206569472 }, { "epoch": 0.07, "learning_rate": 0.0004676503972758229, "loss": 5.3552, "theoretical_loss": 4.3350485426101395, "tokens_seen": 207618048 }, { "epoch": 0.07, "learning_rate": 0.0004674612183125236, "loss": 5.2876, "theoretical_loss": 4.332333462523044, "tokens_seen": 208666624 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.48480576276779175, "objective/train/docs_used": 130307, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.197201728820801, "objective/train/original_loss": 5.197201728820801, "objective/train/theoretical_loss": 4.3296357903425715, "objective/train/tokens_used": 230175200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.240909144282341, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049705147743225, "objective/train/weighted_lm_loss": 5.455105781555176, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9520548582077026, "theoretical_loss": 4.3296357903425715, "tokens_seen": 209715200 }, { "epoch": 0.07, "learning_rate": 0.00046727203934922437, "loss": 5.273, "theoretical_loss": 4.3296357903425715, "tokens_seen": 209715200 }, { "epoch": 0.08, "learning_rate": 0.00046708286038592506, "loss": 5.3009, "theoretical_loss": 4.326955328250631, "tokens_seen": 210763776 }, { "epoch": 0.08, "learning_rate": 0.00046689368142262586, "loss": 5.1924, "theoretical_loss": 4.324291881646978, "tokens_seen": 211812352 }, { "epoch": 0.08, "learning_rate": 0.00046670450245932656, "loss": 5.2265, "theoretical_loss": 4.321645259081256, "tokens_seen": 212860928 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.48363375663757324, "objective/train/docs_used": 132385, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.02983283996582, "objective/train/original_loss": 5.02983283996582, "objective/train/theoretical_loss": 4.321315604786012, "objective/train/tokens_used": 233452000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23789982497692108, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495727062225342, "objective/train/weighted_lm_loss": 5.278566360473633, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9722732901573181, "theoretical_loss": 4.321315604786012, "tokens_seen": 212992000 }, { "epoch": 0.08, "learning_rate": 0.00046651532349602725, "loss": 5.2079, "theoretical_loss": 4.3190152721867925, "tokens_seen": 213909504 }, { "epoch": 0.08, "learning_rate": 0.000466326144532728, "loss": 5.2469, "theoretical_loss": 4.3164017356160995, "tokens_seen": 214958080 }, { "epoch": 0.08, "learning_rate": 0.0004661369655694287, "loss": 5.1772, "theoretical_loss": 4.313804466978039, "tokens_seen": 216006656 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.48313969373703003, "objective/train/docs_used": 133646, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.160606861114502, "objective/train/original_loss": 5.160606384277344, "objective/train/theoretical_loss": 4.313157670585552, "objective/train/tokens_used": 236728800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2380112111568451, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495237112045288, "objective/train/weighted_lm_loss": 5.417488098144531, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9516239166259766, "theoretical_loss": 4.313157670585552, "tokens_seen": 216268800 }, { "epoch": 0.08, "learning_rate": 0.0004659477866061294, "loss": 5.2124, "theoretical_loss": 4.311223286776586, "tokens_seen": 217055232 }, { "epoch": 0.08, "learning_rate": 0.00046575860764283013, "loss": 5.292, "theoretical_loss": 4.3086580183511565, "tokens_seen": 218103808 }, { "epoch": 0.08, "learning_rate": 0.0004655694286795308, "loss": 5.2061, "theoretical_loss": 4.306108487818438, "tokens_seen": 219152384 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.48529815673828125, "objective/train/docs_used": 136163, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.18554162979126, "objective/train/original_loss": 5.18554162979126, "objective/train/theoretical_loss": 4.305156436273988, "objective/train/tokens_used": 240005600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24352312088012695, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497677326202393, "objective/train/weighted_lm_loss": 5.44387149810791, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9514811038970947, "theoretical_loss": 4.305156436273988, "tokens_seen": 219545600 }, { "epoch": 0.08, "learning_rate": 0.00046538024971623157, "loss": 5.2285, "theoretical_loss": 4.3035745240156915, "tokens_seen": 220200960 }, { "epoch": 0.08, "learning_rate": 0.00046519107075293227, "loss": 5.2309, "theoretical_loss": 4.301055958445467, "tokens_seen": 221249536 }, { "epoch": 0.08, "learning_rate": 0.00046500189178963296, "loss": 5.2038, "theoretical_loss": 4.2985526252217054, "tokens_seen": 222298112 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.4832799434661865, "objective/train/docs_used": 137875, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.737259864807129, "objective/train/original_loss": 4.737259387969971, "objective/train/theoretical_loss": 4.297306619601446, "objective/train/tokens_used": 243282400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23784860968589783, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049536943435669, "objective/train/weighted_lm_loss": 4.9707770347595215, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9518852829933167, "theoretical_loss": 4.297306619601446, "tokens_seen": 222822400 }, { "epoch": 0.08, "learning_rate": 0.0004648127128263337, "loss": 5.2808, "theoretical_loss": 4.296064361017181, "tokens_seen": 223346688 }, { "epoch": 0.08, "learning_rate": 0.00046462353386303445, "loss": 5.2182, "theoretical_loss": 4.293591005012228, "tokens_seen": 224395264 }, { "epoch": 0.08, "learning_rate": 0.0004644343548997352, "loss": 5.2337, "theoretical_loss": 4.291132398844749, "tokens_seen": 225443840 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.4908119738101959, "objective/train/docs_used": 139547, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.02741003036499, "objective/train/original_loss": 5.027409553527832, "objective/train/theoretical_loss": 4.289603190747359, "objective/train/tokens_used": 246559200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24388805031776428, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503208637237549, "objective/train/weighted_lm_loss": 5.2804741859436035, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.951794445514679, "theoretical_loss": 4.289603190747359, "tokens_seen": 226099200 }, { "epoch": 0.08, "learning_rate": 0.0004642451759364359, "loss": 5.2597, "theoretical_loss": 4.2886883865614305, "tokens_seen": 226492416 }, { "epoch": 0.08, "learning_rate": 0.0004640559969731366, "loss": 5.2596, "theoretical_loss": 4.286258814570154, "tokens_seen": 227540992 }, { "epoch": 0.08, "learning_rate": 0.00046386681800983734, "loss": 5.287, "theoretical_loss": 4.283843531593567, "tokens_seen": 228589568 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.4877687394618988, "objective/train/docs_used": 141330, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.088724136352539, "objective/train/original_loss": 5.088723182678223, "objective/train/theoretical_loss": 4.282041356805376, "objective/train/tokens_used": 249836000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24049794673919678, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049999475479126, "objective/train/weighted_lm_loss": 5.342431545257568, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9596543908119202, "theoretical_loss": 4.282041356805376, "tokens_seen": 229376000 }, { "epoch": 0.08, "learning_rate": 0.00046367763904653803, "loss": 5.2102, "theoretical_loss": 4.281442388623764, "tokens_seen": 229638144 }, { "epoch": 0.08, "learning_rate": 0.0004634884600832387, "loss": 5.1973, "theoretical_loss": 4.279055238878065, "tokens_seen": 230686720 }, { "epoch": 0.08, "learning_rate": 0.00046329928111993947, "loss": 5.1969, "theoretical_loss": 4.276681937755853, "tokens_seen": 231735296 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.4821692407131195, "objective/train/docs_used": 143449, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.025097370147705, "objective/train/original_loss": 5.025097846984863, "objective/train/theoretical_loss": 4.274616547428058, "objective/train/tokens_used": 253112800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23833133280277252, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494284629821777, "objective/train/weighted_lm_loss": 5.27445125579834, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9557837247848511, "theoretical_loss": 4.274616547428058, "tokens_seen": 232652800 }, { "epoch": 0.08, "learning_rate": 0.00046311010215664016, "loss": 5.242, "theoretical_loss": 4.274322342796429, "tokens_seen": 232783872 }, { "epoch": 0.08, "learning_rate": 0.0004629209231933409, "loss": 5.2641, "theoretical_loss": 4.271976313637885, "tokens_seen": 233832448 }, { "epoch": 0.08, "learning_rate": 0.0004627317442300416, "loss": 5.1358, "theoretical_loss": 4.269643711976926, "tokens_seen": 234881024 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.46489331126213074, "objective/train/docs_used": 144727, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.503349781036377, "objective/train/original_loss": 4.503350257873535, "objective/train/theoretical_loss": 4.267324401529657, "objective/train/tokens_used": 256389600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22985953092575073, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0476576089859009, "objective/train/weighted_lm_loss": 4.7209906578063965, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9633262157440186, "theoretical_loss": 4.267324401529657, "tokens_seen": 235929600 }, { "epoch": 0.08, "learning_rate": 0.0004625425652667423, "loss": 5.1814, "theoretical_loss": 4.267324401529657, "tokens_seen": 235929600 }, { "epoch": 0.08, "learning_rate": 0.0004623533863034431, "loss": 5.1582, "theoretical_loss": 4.265018247993272, "tokens_seen": 236978176 }, { "epoch": 0.09, "learning_rate": 0.0004621642073401438, "loss": 5.133, "theoretical_loss": 4.262725119008646, "tokens_seen": 238026752 }, { "epoch": 0.09, "learning_rate": 0.00046197502837684454, "loss": 5.17, "theoretical_loss": 4.260444884123785, "tokens_seen": 239075328 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.48313987255096436, "objective/train/docs_used": 146731, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.778027534484863, "objective/train/original_loss": 4.778027534484863, "objective/train/theoretical_loss": 4.260160754955504, "objective/train/tokens_used": 259666400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24219833314418793, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495449304580688, "objective/train/weighted_lm_loss": 5.0156965255737305, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9516287446022034, "theoretical_loss": 4.260160754955504, "tokens_seen": 239206400 }, { "epoch": 0.09, "learning_rate": 0.00046178584941354524, "loss": 5.1438, "theoretical_loss": 4.258177414758135, "tokens_seen": 240123904 }, { "epoch": 0.09, "learning_rate": 0.00046159667045024593, "loss": 5.2066, "theoretical_loss": 4.25592258416769, "tokens_seen": 241172480 }, { "epoch": 0.09, "learning_rate": 0.0004614074914869467, "loss": 5.1412, "theoretical_loss": 4.253680267410921, "tokens_seen": 242221056 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.48830899596214294, "objective/train/docs_used": 148718, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.019001483917236, "objective/train/original_loss": 5.019001007080078, "objective/train/theoretical_loss": 4.253121629035574, "objective/train/tokens_used": 262943200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2425418645143509, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500637292861938, "objective/train/weighted_lm_loss": 5.270970344543457, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9595874547958374, "theoretical_loss": 4.253121629035574, "tokens_seen": 242483200 }, { "epoch": 0.09, "learning_rate": 0.00046121831252364737, "loss": 5.2042, "theoretical_loss": 4.251450341315464, "tokens_seen": 243269632 }, { "epoch": 0.09, "learning_rate": 0.00046102913356034806, "loss": 5.1482, "theoretical_loss": 4.249232684445579, "tokens_seen": 244318208 }, { "epoch": 0.09, "learning_rate": 0.0004608399545970488, "loss": 5.1712, "theoretical_loss": 4.247027177070329, "tokens_seen": 245366784 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.4830223023891449, "objective/train/docs_used": 150524, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.851802825927734, "objective/train/original_loss": 4.851802825927734, "objective/train/theoretical_loss": 4.246203219947814, "objective/train/tokens_used": 266220000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24083028733730316, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049526333808899, "objective/train/weighted_lm_loss": 5.092092990875244, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9516807198524475, "theoretical_loss": 4.246203219947814, "tokens_seen": 245760000 }, { "epoch": 0.09, "learning_rate": 0.0004606507756337495, "loss": 5.1937, "theoretical_loss": 4.24483370113249, "tokens_seen": 246415360 }, { "epoch": 0.09, "learning_rate": 0.00046046159667045025, "loss": 5.1309, "theoretical_loss": 4.242652140218147, "tokens_seen": 247463936 }, { "epoch": 0.09, "learning_rate": 0.00046027241770715095, "loss": 5.1674, "theoretical_loss": 4.240482379526973, "tokens_seen": 248512512 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.48063239455223083, "objective/train/docs_used": 152460, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.242099761962891, "objective/train/original_loss": 5.242099285125732, "objective/train/theoretical_loss": 4.2394018888240215, "objective/train/tokens_used": 269496800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23866912722587585, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492764711380005, "objective/train/weighted_lm_loss": 5.502182483673096, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9536293148994446, "theoretical_loss": 4.2394018888240215, "tokens_seen": 249036800 }, { "epoch": 0.09, "learning_rate": 0.0004600832387438517, "loss": 5.155, "theoretical_loss": 4.2383243058431646, "tokens_seen": 249561088 }, { "epoch": 0.09, "learning_rate": 0.00045989405978055244, "loss": 5.1044, "theoretical_loss": 4.23617780750703, "tokens_seen": 250609664 }, { "epoch": 0.09, "learning_rate": 0.00045970488081725313, "loss": 5.1227, "theoretical_loss": 4.23404277438719, "tokens_seen": 251658240 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.4906507432460785, "objective/train/docs_used": 154232, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.827680587768555, "objective/train/original_loss": 4.827680587768555, "objective/train/theoretical_loss": 4.232714152537391, "objective/train/tokens_used": 272773600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24195529520511627, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502949953079224, "objective/train/weighted_lm_loss": 5.070379257202148, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9697064161300659, "theoretical_loss": 4.232714152537391, "tokens_seen": 252313600 }, { "epoch": 0.09, "learning_rate": 0.0004595157018539539, "loss": 5.0412, "theoretical_loss": 4.231919097853398, "tokens_seen": 252706816 }, { "epoch": 0.09, "learning_rate": 0.0004593265228906546, "loss": 5.0234, "theoretical_loss": 4.2298066707499515, "tokens_seen": 253755392 }, { "epoch": 0.09, "learning_rate": 0.00045913734392735527, "loss": 5.1059, "theoretical_loss": 4.227705387369683, "tokens_seen": 254803968 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.48569464683532715, "objective/train/docs_used": 156109, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.89478063583374, "objective/train/original_loss": 4.894780158996582, "objective/train/theoretical_loss": 4.226136675116626, "objective/train/tokens_used": 276050400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24085675179958344, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497938394546509, "objective/train/weighted_lm_loss": 5.138565540313721, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9536014795303345, "theoretical_loss": 4.226136675116626, "tokens_seen": 255590400 }, { "epoch": 0.09, "learning_rate": 0.000458948164964056, "loss": 5.1322, "theoretical_loss": 4.225615143428513, "tokens_seen": 255852544 }, { "epoch": 0.09, "learning_rate": 0.0004587589860007567, "loss": 5.0152, "theoretical_loss": 4.223535836040548, "tokens_seen": 256901120 }, { "epoch": 0.09, "learning_rate": 0.00045856980703745746, "loss": 5.0449, "theoretical_loss": 4.221467363693727, "tokens_seen": 257949696 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.4772140085697174, "objective/train/docs_used": 158136, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.774804592132568, "objective/train/original_loss": 4.774805068969727, "objective/train/theoretical_loss": 4.219666259736535, "objective/train/tokens_used": 279327200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2351672351360321, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489166975021362, "objective/train/weighted_lm_loss": 5.007704257965088, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9616609215736389, "theoretical_loss": 4.219666259736535, "tokens_seen": 258867200 }, { "epoch": 0.09, "learning_rate": 0.00045838062807415815, "loss": 4.8683, "theoretical_loss": 4.219409626225975, "tokens_seen": 258998272 }, { "epoch": 0.09, "learning_rate": 0.00045819144911085884, "loss": 5.0815, "theoretical_loss": 4.217362524801874, "tokens_seen": 260046848 }, { "epoch": 0.09, "learning_rate": 0.0004580022701475596, "loss": 5.0467, "theoretical_loss": 4.215325961889821, "tokens_seen": 261095424 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.47997015714645386, "objective/train/docs_used": 160243, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.758581161499023, "objective/train/original_loss": 4.758580684661865, "objective/train/theoretical_loss": 4.213299841239684, "objective/train/tokens_used": 282604000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2388996034860611, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492112636566162, "objective/train/weighted_lm_loss": 4.9929280281066895, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9515440464019775, "theoretical_loss": 4.213299841239684, "tokens_seen": 262144000 }, { "epoch": 0.09, "learning_rate": 0.00045781309118426034, "loss": 4.9857, "theoretical_loss": 4.213299841239684, "tokens_seen": 262144000 }, { "epoch": 0.09, "learning_rate": 0.00045762391222096103, "loss": 4.9844, "theoretical_loss": 4.211284067860909, "tokens_seen": 263192576 }, { "epoch": 0.09, "learning_rate": 0.0004574347332576618, "loss": 4.9032, "theoretical_loss": 4.209278548001103, "tokens_seen": 264241152 }, { "epoch": 0.09, "learning_rate": 0.0004572455542943625, "loss": 4.9459, "theoretical_loss": 4.207283189125054, "tokens_seen": 265289728 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.4564594626426697, "objective/train/docs_used": 161560, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.229607582092285, "objective/train/original_loss": 4.229607582092285, "objective/train/theoretical_loss": 4.20703447914773, "objective/train/tokens_used": 285880800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24206334352493286, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.046875238418579, "objective/train/weighted_lm_loss": 4.431016445159912, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9516088366508484, "theoretical_loss": 4.20703447914773, "tokens_seen": 265420800 }, { "epoch": 0.1, "learning_rate": 0.0004570563753310632, "loss": 4.9257, "theoretical_loss": 4.2052978998941954, "tokens_seen": 266338304 }, { "epoch": 0.1, "learning_rate": 0.0004568671963677639, "loss": 4.9745, "theoretical_loss": 4.203322590146491, "tokens_seen": 267386880 }, { "epoch": 0.1, "learning_rate": 0.0004566780174044646, "loss": 4.8664, "theoretical_loss": 4.2013571708767365, "tokens_seen": 268435456 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.47331157326698303, "objective/train/docs_used": 163521, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.584531307220459, "objective/train/original_loss": 4.584530830383301, "objective/train/theoretical_loss": 4.200867351124762, "objective/train/tokens_used": 289157600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23217462003231049, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485113859176636, "objective/train/weighted_lm_loss": 4.808891773223877, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9518424868583679, "theoretical_loss": 4.200867351124762, "tokens_seen": 268697600 }, { "epoch": 0.1, "learning_rate": 0.00045648883844116536, "loss": 4.873, "theoretical_loss": 4.199401554217266, "tokens_seen": 269484032 }, { "epoch": 0.1, "learning_rate": 0.00045629965947786605, "loss": 4.9196, "theoretical_loss": 4.19745565341906, "tokens_seen": 270532608 }, { "epoch": 0.1, "learning_rate": 0.0004561104805145668, "loss": 4.8289, "theoretical_loss": 4.195519382833226, "tokens_seen": 271581184 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.47363853454589844, "objective/train/docs_used": 165284, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.5816330909729, "objective/train/original_loss": 4.5816330909729, "objective/train/theoretical_loss": 4.194795746858309, "objective/train/tokens_used": 292434400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2301483005285263, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485334396362305, "objective/train/weighted_lm_loss": 4.80542516708374, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.955842137336731, "theoretical_loss": 4.194795746858309, "tokens_seen": 271974400 }, { "epoch": 0.1, "learning_rate": 0.0004559213015512675, "loss": 4.8413, "theoretical_loss": 4.193592657892869, "tokens_seen": 272629760 }, { "epoch": 0.1, "learning_rate": 0.0004557321225879682, "loss": 4.9962, "theoretical_loss": 4.191675395095324, "tokens_seen": 273678336 }, { "epoch": 0.1, "learning_rate": 0.000455542943624669, "loss": 4.9632, "theoretical_loss": 4.189767511984741, "tokens_seen": 274726912 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.4624802768230438, "objective/train/docs_used": 166979, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.361681938171387, "objective/train/original_loss": 4.36168098449707, "objective/train/theoretical_loss": 4.188817062326644, "objective/train/tokens_used": 295711200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.244588240981102, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0474902391433716, "objective/train/weighted_lm_loss": 4.574804306030273, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9531933069229126, "theoretical_loss": 4.188817062326644, "tokens_seen": 275251200 }, { "epoch": 0.1, "learning_rate": 0.0004553537646613697, "loss": 5.0148, "theoretical_loss": 4.187868927135035, "tokens_seen": 275775488 }, { "epoch": 0.1, "learning_rate": 0.0004551645856980704, "loss": 4.896, "theoretical_loss": 4.185979560133161, "tokens_seen": 276824064 }, { "epoch": 0.1, "learning_rate": 0.0004549754067347711, "loss": 5.0797, "theoretical_loss": 4.184099331562732, "tokens_seen": 277872640 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.480182021856308, "objective/train/docs_used": 168865, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 5.012778282165527, "objective/train/original_loss": 5.012779235839844, "objective/train/theoretical_loss": 4.182928794423724, "objective/train/tokens_used": 298988000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23854205012321472, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049230694770813, "objective/train/weighted_lm_loss": 5.2581329345703125, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9522960782051086, "theoretical_loss": 4.182928794423724, "tokens_seen": 278528000 }, { "epoch": 0.1, "learning_rate": 0.0004547862277714718, "loss": 4.9065, "theoretical_loss": 4.182228162987963, "tokens_seen": 278921216 }, { "epoch": 0.1, "learning_rate": 0.00045459704880817256, "loss": 4.902, "theoretical_loss": 4.18036597693793, "tokens_seen": 279969792 }, { "epoch": 0.1, "learning_rate": 0.00045440786984487326, "loss": 4.9617, "theoretical_loss": 4.178512696891136, "tokens_seen": 281018368 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.4830773174762726, "objective/train/docs_used": 170536, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.846111297607422, "objective/train/original_loss": 4.846111297607422, "objective/train/theoretical_loss": 4.177128535915539, "objective/train/tokens_used": 302264800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24183543026447296, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049536943435669, "objective/train/weighted_lm_loss": 5.087429523468018, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9517684578895569, "theoretical_loss": 4.177128535915539, "tokens_seen": 281804800 }, { "epoch": 0.1, "learning_rate": 0.00045421869088157395, "loss": 4.9619, "theoretical_loss": 4.176668247260391, "tokens_seen": 282066944 }, { "epoch": 0.1, "learning_rate": 0.0004540295119182747, "loss": 4.9715, "theoretical_loss": 4.174832553377978, "tokens_seen": 283115520 }, { "epoch": 0.1, "learning_rate": 0.0004538403329549754, "loss": 5.0139, "theoretical_loss": 4.173005541481111, "tokens_seen": 284164096 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.4926310181617737, "objective/train/docs_used": 172212, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.892210483551025, "objective/train/original_loss": 4.892210006713867, "objective/train/theoretical_loss": 4.171413970703851, "objective/train/tokens_used": 305541600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24494759738445282, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505083799362183, "objective/train/weighted_lm_loss": 5.139614105224609, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9525986909866333, "theoretical_loss": 4.171413970703851, "tokens_seen": 285081600 }, { "epoch": 0.1, "learning_rate": 0.00045365115399167614, "loss": 4.9594, "theoretical_loss": 4.1711871386976815, "tokens_seen": 285212672 }, { "epoch": 0.1, "learning_rate": 0.00045346197502837683, "loss": 4.9551, "theoretical_loss": 4.16937727303227, "tokens_seen": 286261248 }, { "epoch": 0.1, "learning_rate": 0.0004532727960650776, "loss": 4.9784, "theoretical_loss": 4.167575873352437, "tokens_seen": 287309824 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.49215155839920044, "objective/train/docs_used": 173965, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.988898754119873, "objective/train/original_loss": 4.988898277282715, "objective/train/theoretical_loss": 4.165782869375278, "objective/train/tokens_used": 308818400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.244205504655838, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504565238952637, "objective/train/weighted_lm_loss": 5.240926265716553, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.956809401512146, "theoretical_loss": 4.165782869375278, "tokens_seen": 288358400 }, { "epoch": 0.1, "learning_rate": 0.0004530836171017783, "loss": 4.9721, "theoretical_loss": 4.165782869375278, "tokens_seen": 288358400 }, { "epoch": 0.1, "learning_rate": 0.000452894438138479, "loss": 4.9523, "theoretical_loss": 4.163998191654223, "tokens_seen": 289406976 }, { "epoch": 0.1, "learning_rate": 0.0004527052591751797, "loss": 4.9486, "theoretical_loss": 4.162221771566105, "tokens_seen": 290455552 }, { "epoch": 0.1, "learning_rate": 0.00045251608021188046, "loss": 4.9565, "theoretical_loss": 4.160453541298465, "tokens_seen": 291504128 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.48595374822616577, "objective/train/docs_used": 176091, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.911673069000244, "objective/train/original_loss": 4.911673069000244, "objective/train/theoretical_loss": 4.160233085015529, "objective/train/tokens_used": 312095200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2406431883573532, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498186349868774, "objective/train/weighted_lm_loss": 5.156929969787598, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9550408124923706, "theoretical_loss": 4.160233085015529, "tokens_seen": 291635200 }, { "epoch": 0.1, "learning_rate": 0.00045232690124858115, "loss": 4.9944, "theoretical_loss": 4.158693433837098, "tokens_seen": 292552704 }, { "epoch": 0.1, "learning_rate": 0.0004521377222852819, "loss": 4.9663, "theoretical_loss": 4.156941382953835, "tokens_seen": 293601280 }, { "epoch": 0.11, "learning_rate": 0.0004519485433219826, "loss": 4.899, "theoretical_loss": 4.155197323194555, "tokens_seen": 294649856 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4910581409931183, "objective/train/docs_used": 178137, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.6035237312316895, "objective/train/original_loss": 4.603524208068848, "objective/train/theoretical_loss": 4.154762549270199, "objective/train/tokens_used": 315372000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24331238865852356, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503426790237427, "objective/train/weighted_lm_loss": 4.835586071014404, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9553537964820862, "theoretical_loss": 4.154762549270199, "tokens_seen": 294912000 }, { "epoch": 0.11, "learning_rate": 0.0004517593643586833, "loss": 4.9008, "theoretical_loss": 4.153461189867425, "tokens_seen": 295698432 }, { "epoch": 0.11, "learning_rate": 0.00045157018539538404, "loss": 4.9441, "theoretical_loss": 4.151732919031354, "tokens_seen": 296747008 }, { "epoch": 0.11, "learning_rate": 0.00045138100643208473, "loss": 4.9052, "theoretical_loss": 4.150012447484665, "tokens_seen": 297795584 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4841580390930176, "objective/train/docs_used": 180121, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.756556034088135, "objective/train/original_loss": 4.756556510925293, "objective/train/theoretical_loss": 4.149369268635046, "objective/train/tokens_used": 318648800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24232666194438934, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496474504470825, "objective/train/weighted_lm_loss": 4.992458820343018, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9520720839500427, "theoretical_loss": 4.149369268635046, "tokens_seen": 298188800 }, { "epoch": 0.11, "learning_rate": 0.0004511918274687855, "loss": 4.8953, "theoretical_loss": 4.148299712753977, "tokens_seen": 298844160 }, { "epoch": 0.11, "learning_rate": 0.0004510026485054862, "loss": 4.8506, "theoretical_loss": 4.146594653083293, "tokens_seen": 299892736 }, { "epoch": 0.11, "learning_rate": 0.0004508134695421869, "loss": 4.9196, "theoretical_loss": 4.144897207423284, "tokens_seen": 300941312 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4888658821582794, "objective/train/docs_used": 182298, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.590531349182129, "objective/train/original_loss": 4.590531349182129, "objective/train/theoretical_loss": 4.144051320960009, "objective/train/tokens_used": 321925600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2443903088569641, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501290559768677, "objective/train/weighted_lm_loss": 4.820474624633789, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9515647888183594, "theoretical_loss": 4.144051320960009, "tokens_seen": 301465600 }, { "epoch": 0.11, "learning_rate": 0.00045062429057888767, "loss": 4.8532, "theoretical_loss": 4.143207315420783, "tokens_seen": 301989888 }, { "epoch": 0.11, "learning_rate": 0.00045043511161558836, "loss": 4.7713, "theoretical_loss": 4.141524917408454, "tokens_seen": 303038464 }, { "epoch": 0.11, "learning_rate": 0.0004502459326522891, "loss": 4.8744, "theoretical_loss": 4.1398499543946565, "tokens_seen": 304087040 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.48735523223876953, "objective/train/docs_used": 183436, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.56964635848999, "objective/train/original_loss": 4.569646835327148, "objective/train/theoretical_loss": 4.138806852152502, "objective/train/tokens_used": 325202400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2419733703136444, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049965500831604, "objective/train/weighted_lm_loss": 4.798412799835205, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9514796137809753, "theoretical_loss": 4.138806852152502, "tokens_seen": 304742400 }, { "epoch": 0.11, "learning_rate": 0.0004500567536889898, "loss": 4.8162, "theoretical_loss": 4.138182368053505, "tokens_seen": 305135616 }, { "epoch": 0.11, "learning_rate": 0.0004498675747256905, "loss": 4.774, "theoretical_loss": 4.136522100715087, "tokens_seen": 306184192 }, { "epoch": 0.11, "learning_rate": 0.00044967839576239124, "loss": 4.7775, "theoretical_loss": 4.134869095355876, "tokens_seen": 307232768 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4952714741230011, "objective/train/docs_used": 185431, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.745488166809082, "objective/train/original_loss": 4.745488166809082, "objective/train/theoretical_loss": 4.133634073066595, "objective/train/tokens_used": 328479200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2461199015378952, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0507783889770508, "objective/train/weighted_lm_loss": 4.98633337020874, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9608399868011475, "theoretical_loss": 4.133634073066595, "tokens_seen": 308019200 }, { "epoch": 0.11, "learning_rate": 0.00044948921679909194, "loss": 4.852, "theoretical_loss": 4.1332232955893105, "tokens_seen": 308281344 }, { "epoch": 0.11, "learning_rate": 0.00044930003783579263, "loss": 4.8169, "theoretical_loss": 4.131584645656535, "tokens_seen": 309329920 }, { "epoch": 0.11, "learning_rate": 0.0004491108588724934, "loss": 4.7574, "theoretical_loss": 4.129953090417319, "tokens_seen": 310378496 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4933873116970062, "objective/train/docs_used": 187155, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.723814010620117, "objective/train/original_loss": 4.723814010620117, "objective/train/theoretical_loss": 4.128531256565763, "objective/train/tokens_used": 331756000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24465975165367126, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505824089050293, "objective/train/weighted_lm_loss": 4.962899684906006, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9555627107620239, "theoretical_loss": 4.128531256565763, "tokens_seen": 311296000 }, { "epoch": 0.11, "learning_rate": 0.00044892167990919407, "loss": 4.7497, "theoretical_loss": 4.128328575341129, "tokens_seen": 311427072 }, { "epoch": 0.11, "learning_rate": 0.00044873250094589487, "loss": 4.7832, "theoretical_loss": 4.12671104649836, "tokens_seen": 312475648 }, { "epoch": 0.11, "learning_rate": 0.00044854332198259557, "loss": 4.7443, "theoretical_loss": 4.125100450551725, "tokens_seen": 313524224 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4791731834411621, "objective/train/docs_used": 189293, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.741052150726318, "objective/train/original_loss": 4.741052150726318, "objective/train/theoretical_loss": 4.123496734747793, "objective/train/tokens_used": 335032800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24330000579357147, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491536855697632, "objective/train/weighted_lm_loss": 4.974308967590332, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9513964653015137, "theoretical_loss": 4.123496734747793, "tokens_seen": 314572800 }, { "epoch": 0.11, "learning_rate": 0.00044835414301929626, "loss": 4.7864, "theoretical_loss": 4.123496734747793, "tokens_seen": 314572800 }, { "epoch": 0.11, "learning_rate": 0.000448164964055997, "loss": 4.766, "theoretical_loss": 4.121899846908677, "tokens_seen": 315621376 }, { "epoch": 0.11, "learning_rate": 0.0004479757850926977, "loss": 4.7524, "theoretical_loss": 4.120309735423871, "tokens_seen": 316669952 }, { "epoch": 0.11, "learning_rate": 0.00044778660612939845, "loss": 4.7824, "theoretical_loss": 4.118726349242221, "tokens_seen": 317718528 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.48897671699523926, "objective/train/docs_used": 191279, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.168673992156982, "objective/train/original_loss": 4.168674468994141, "objective/train/theoretical_loss": 4.118528896321316, "objective/train/tokens_used": 338309600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24297229945659637, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501326322555542, "objective/train/weighted_lm_loss": 4.377741813659668, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9516782164573669, "theoretical_loss": 4.118528896321316, "tokens_seen": 317849600 }, { "epoch": 0.11, "learning_rate": 0.00044759742716609914, "loss": 4.7589, "theoretical_loss": 4.117149637864041, "tokens_seen": 318767104 }, { "epoch": 0.11, "learning_rate": 0.00044740824820279983, "loss": 4.7901, "theoretical_loss": 4.115579551333372, "tokens_seen": 319815680 }, { "epoch": 0.11, "learning_rate": 0.0004472190692395006, "loss": 4.8469, "theoretical_loss": 4.114016040230357, "tokens_seen": 320864256 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.4896620512008667, "objective/train/docs_used": 193378, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.6810662746429443, "objective/train/original_loss": 3.6810660362243652, "objective/train/theoretical_loss": 4.113626184124224, "objective/train/tokens_used": 341586400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2405911087989807, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501891374588013, "objective/train/weighted_lm_loss": 3.866628646850586, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9788739681243896, "theoretical_loss": 4.113626184124224, "tokens_seen": 321126400 }, { "epoch": 0.11, "learning_rate": 0.0004470298902762013, "loss": 4.7207, "theoretical_loss": 4.112459055663768, "tokens_seen": 321912832 }, { "epoch": 0.12, "learning_rate": 0.00044684071131290197, "loss": 4.8422, "theoretical_loss": 4.110908549263647, "tokens_seen": 322961408 }, { "epoch": 0.12, "learning_rate": 0.0004466515323496027, "loss": 4.7674, "theoretical_loss": 4.109364473174075, "tokens_seen": 324009984 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.4944951832294464, "objective/train/docs_used": 195243, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.5325846672058105, "objective/train/original_loss": 4.532584190368652, "objective/train/theoretical_loss": 4.108787092774909, "objective/train/tokens_used": 344863200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.246404230594635, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0507020950317383, "objective/train/weighted_lm_loss": 4.762205600738525, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9522090554237366, "theoretical_loss": 4.108787092774909, "tokens_seen": 324403200 }, { "epoch": 0.12, "learning_rate": 0.00044646235338630346, "loss": 4.8064, "theoretical_loss": 4.107826780046074, "tokens_seen": 325058560 }, { "epoch": 0.12, "learning_rate": 0.0004462731744230042, "loss": 4.8592, "theoretical_loss": 4.106295423030614, "tokens_seen": 326107136 }, { "epoch": 0.12, "learning_rate": 0.0004460839954597049, "loss": 4.8731, "theoretical_loss": 4.104770355771754, "tokens_seen": 327155712 }, { "debugging/Self-BLEU-5": 0.5365128506817183, "debugging/distinct-1-grams": 0.7612814402327299, "debugging/distinct-2-grams": 0.9694583753853511, "debugging/entropy-1-grams": 6.003629944255698, "debugging/entropy-2-grams": 7.054987089269872, "debugging/length": 495.25, "debugging/num_segments": 16, "debugging/raw_token_scores_avg": 0.04385810345411301, "debugging/raw_token_scores_std": 0.15687797963619232, "epoch": 0.12, "objective/train/advantage_avg": 0.45616579055786133, "objective/train/docs_used": 197327, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.436026573181152, "objective/train/original_loss": 4.436026573181152, "objective/train/theoretical_loss": 4.10401016644798, "objective/train/tokens_used": 348140000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23268143832683563, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.046798825263977, "objective/train/weighted_lm_loss": 4.649384498596191, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9514419436454773, "theoretical_loss": 4.10401016644798, "tokens_seen": 327680000 }, { "epoch": 0.12, "learning_rate": 0.0004458948164964056, "loss": 4.7522, "theoretical_loss": 4.103251532399884, "tokens_seen": 328204288 }, { "epoch": 0.12, "learning_rate": 0.00044570563753310635, "loss": 4.8371, "theoretical_loss": 4.101738907525098, "tokens_seen": 329252864 }, { "epoch": 0.12, "learning_rate": 0.00044551645856980704, "loss": 4.8124, "theoretical_loss": 4.100232436230659, "tokens_seen": 330301440 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.4855183959007263, "objective/train/docs_used": 199093, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.6984477043151855, "objective/train/original_loss": 4.698448181152344, "objective/train/theoretical_loss": 4.099293996766681, "objective/train/tokens_used": 351416800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24312277138233185, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497876405715942, "objective/train/weighted_lm_loss": 4.9305100440979, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9514390826225281, "theoretical_loss": 4.099293996766681, "tokens_seen": 330956800 }, { "epoch": 0.12, "learning_rate": 0.0004453272796065078, "loss": 4.7747, "theoretical_loss": 4.098732074066591, "tokens_seen": 331350016 }, { "epoch": 0.12, "learning_rate": 0.0004451381006432085, "loss": 4.7442, "theoretical_loss": 4.097237777043363, "tokens_seen": 332398592 }, { "epoch": 0.12, "learning_rate": 0.0004449489216799092, "loss": 4.7144, "theoretical_loss": 4.095749501625689, "tokens_seen": 333447168 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.4659195840358734, "objective/train/docs_used": 200392, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.83096170425415, "objective/train/original_loss": 4.830961227416992, "objective/train/theoretical_loss": 4.09463722080479, "objective/train/tokens_used": 354693600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2294236719608307, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0477579832077026, "objective/train/weighted_lm_loss": 5.058286190032959, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9520472288131714, "theoretical_loss": 4.09463722080479, "tokens_seen": 334233600 }, { "epoch": 0.12, "learning_rate": 0.0004447597427166099, "loss": 4.719, "theoretical_loss": 4.094267204726426, "tokens_seen": 334495744 }, { "epoch": 0.12, "learning_rate": 0.0004445705637533106, "loss": 4.822, "theoretical_loss": 4.092790843700574, "tokens_seen": 335544320 }, { "epoch": 0.12, "learning_rate": 0.0004443813847900113, "loss": 4.7045, "theoretical_loss": 4.091320376339368, "tokens_seen": 336592896 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.48759955167770386, "objective/train/docs_used": 202247, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.562262535095215, "objective/train/original_loss": 4.562261581420898, "objective/train/theoretical_loss": 4.0900385191913164, "objective/train/tokens_used": 357970400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2414817214012146, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499873161315918, "objective/train/weighted_lm_loss": 4.790578365325928, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.959564745426178, "theoretical_loss": 4.0900385191913164, "tokens_seen": 337510400 }, { "epoch": 0.12, "learning_rate": 0.0004441922058267121, "loss": 4.7015, "theoretical_loss": 4.089855760864484, "tokens_seen": 337641472 }, { "epoch": 0.12, "learning_rate": 0.0004440030268634128, "loss": 4.6724, "theoretical_loss": 4.0883969559223186, "tokens_seen": 338690048 }, { "epoch": 0.12, "learning_rate": 0.00044381384790011355, "loss": 4.6783, "theoretical_loss": 4.086943920578378, "tokens_seen": 339738624 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.48911985754966736, "objective/train/docs_used": 204260, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.2867255210876465, "objective/train/original_loss": 4.286725997924805, "objective/train/theoretical_loss": 4.085496614311752, "objective/train/tokens_used": 361247200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2409982979297638, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501370429992676, "objective/train/weighted_lm_loss": 4.502540111541748, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9587234258651733, "theoretical_loss": 4.085496614311752, "tokens_seen": 340787200 }, { "epoch": 0.12, "learning_rate": 0.00044362466893681425, "loss": 4.7008, "theoretical_loss": 4.085496614311752, "tokens_seen": 340787200 }, { "epoch": 0.12, "learning_rate": 0.00044343548997351494, "loss": 4.6856, "theoretical_loss": 4.084054997009675, "tokens_seen": 341835776 }, { "epoch": 0.12, "learning_rate": 0.0004432463110102157, "loss": 4.707, "theoretical_loss": 4.082619028962182, "tokens_seen": 342884352 }, { "epoch": 0.12, "learning_rate": 0.0004430571320469164, "loss": 4.73, "theoretical_loss": 4.081188670856844, "tokens_seen": 343932928 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.4761353135108948, "objective/train/docs_used": 206338, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.324362754821777, "objective/train/original_loss": 4.3243632316589355, "objective/train/theoretical_loss": 4.081010268600093, "objective/train/tokens_used": 364524000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23528918623924255, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048809289932251, "objective/train/weighted_lm_loss": 4.536293983459473, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.951631486415863, "theoretical_loss": 4.081010268600093, "tokens_seen": 344064000 }, { "epoch": 0.12, "learning_rate": 0.00044286795308361713, "loss": 4.7097, "theoretical_loss": 4.079763883773593, "tokens_seen": 344981504 }, { "epoch": 0.12, "learning_rate": 0.0004426787741203178, "loss": 4.7134, "theoretical_loss": 4.078344629179623, "tokens_seen": 346030080 }, { "epoch": 0.12, "learning_rate": 0.0004424895951570185, "loss": 4.7597, "theoretical_loss": 4.076930868924384, "tokens_seen": 347078656 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.466165155172348, "objective/train/docs_used": 208654, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.435503005981445, "objective/train/original_loss": 4.435503959655762, "objective/train/theoretical_loss": 4.076578282916229, "objective/train/tokens_used": 367800800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23283718526363373, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0477997064590454, "objective/train/weighted_lm_loss": 4.647334098815918, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9514264464378357, "theoretical_loss": 4.076578282916229, "tokens_seen": 347340800 }, { "epoch": 0.12, "learning_rate": 0.00044230041619371926, "loss": 4.791, "theoretical_loss": 4.075522565234643, "tokens_seen": 348127232 }, { "epoch": 0.12, "learning_rate": 0.00044211123723041996, "loss": 4.7082, "theoretical_loss": 4.074119680709633, "tokens_seen": 349175808 }, { "epoch": 0.13, "learning_rate": 0.0004419220582671207, "loss": 4.6896, "theoretical_loss": 4.072722178316271, "tokens_seen": 350224384 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4762899577617645, "objective/train/docs_used": 210524, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.206195831298828, "objective/train/original_loss": 4.206194877624512, "objective/train/theoretical_loss": 4.072199495003675, "objective/train/tokens_used": 371077600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24041838943958282, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488507747650146, "objective/train/weighted_lm_loss": 4.409471035003662, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9514844417572021, "theoretical_loss": 4.072199495003675, "tokens_seen": 350617600 }, { "epoch": 0.13, "learning_rate": 0.00044173287930382145, "loss": 4.7272, "theoretical_loss": 4.071330021384458, "tokens_seen": 351272960 }, { "epoch": 0.13, "learning_rate": 0.00044154370034052214, "loss": 4.7532, "theoretical_loss": 4.069943173602451, "tokens_seen": 352321536 }, { "epoch": 0.13, "learning_rate": 0.0004413545213772229, "loss": 4.661, "theoretical_loss": 4.068561599012304, "tokens_seen": 353370112 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4837128520011902, "objective/train/docs_used": 211739, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.511666774749756, "objective/train/original_loss": 4.511666297912598, "objective/train/theoretical_loss": 4.0678727780229575, "objective/train/tokens_used": 374354400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23984749615192413, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495903491973877, "objective/train/weighted_lm_loss": 4.7349019050598145, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9533966183662415, "theoretical_loss": 4.0678727780229575, "tokens_seen": 353894400 }, { "epoch": 0.13, "learning_rate": 0.0004411653424139236, "loss": 4.7159, "theoretical_loss": 4.0671852620053865, "tokens_seen": 354418688 }, { "epoch": 0.13, "learning_rate": 0.0004409761634506243, "loss": 4.6523, "theoretical_loss": 4.0658141273179655, "tokens_seen": 355467264 }, { "epoch": 0.13, "learning_rate": 0.000440786984487325, "loss": 4.6922, "theoretical_loss": 4.0644481600268625, "tokens_seen": 356515840 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4922027587890625, "objective/train/docs_used": 213839, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.124780654907227, "objective/train/original_loss": 4.124780178070068, "objective/train/theoretical_loss": 4.06359703915628, "objective/train/tokens_used": 377631200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24517571926116943, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050466537475586, "objective/train/weighted_lm_loss": 4.333748817443848, "objective/train/weights_max": 1.0512195825576782, "objective/train/weights_min": 0.958421528339386, "theoretical_loss": 4.06359703915628, "tokens_seen": 357171200 }, { "epoch": 0.13, "learning_rate": 0.0004405978055240257, "loss": 4.6883, "theoretical_loss": 4.06308732554517, "tokens_seen": 357564416 }, { "epoch": 0.13, "learning_rate": 0.00044040862656072647, "loss": 4.7466, "theoretical_loss": 4.061731589618044, "tokens_seen": 358612992 }, { "epoch": 0.13, "learning_rate": 0.00044021944759742716, "loss": 4.7125, "theoretical_loss": 4.060380918318552, "tokens_seen": 359661568 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4845718443393707, "objective/train/docs_used": 215849, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.4106268882751465, "objective/train/original_loss": 4.410626411437988, "objective/train/theoretical_loss": 4.05937121827939, "objective/train/tokens_used": 380908000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2417263388633728, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496858358383179, "objective/train/weighted_lm_loss": 4.631262302398682, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9544422030448914, "theoretical_loss": 4.05937121827939, "tokens_seen": 360448000 }, { "epoch": 0.13, "learning_rate": 0.00044003026863412785, "loss": 4.687, "theoretical_loss": 4.059035278043591, "tokens_seen": 360710144 }, { "epoch": 0.13, "learning_rate": 0.0004398410896708286, "loss": 4.7187, "theoretical_loss": 4.057694635509866, "tokens_seen": 361758720 }, { "epoch": 0.13, "learning_rate": 0.00043965191070752935, "loss": 4.7245, "theoretical_loss": 4.056358957749928, "tokens_seen": 362807296 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4675551652908325, "objective/train/docs_used": 217416, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.687634468078613, "objective/train/original_loss": 4.687634468078613, "objective/train/theoretical_loss": 4.055194286696828, "objective/train/tokens_used": 384184800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23009935021400452, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0479251146316528, "objective/train/weighted_lm_loss": 4.910377502441406, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9614465832710266, "theoretical_loss": 4.055194286696828, "tokens_seen": 363724800 }, { "epoch": 0.13, "learning_rate": 0.0004394627317442301, "loss": 4.6763, "theoretical_loss": 4.055028212108276, "tokens_seen": 363855872 }, { "epoch": 0.13, "learning_rate": 0.0004392735527809308, "loss": 4.6411, "theoretical_loss": 4.053702366237517, "tokens_seen": 364904448 }, { "epoch": 0.13, "learning_rate": 0.0004390843738176315, "loss": 4.6192, "theoretical_loss": 4.05238138809458, "tokens_seen": 365953024 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.48896151781082153, "objective/train/docs_used": 219277, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.551604270935059, "objective/train/original_loss": 4.551604270935059, "objective/train/theoretical_loss": 4.051065245936996, "objective/train/tokens_used": 387461600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24276012182235718, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501302480697632, "objective/train/weighted_lm_loss": 4.779492378234863, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9522210359573364, "theoretical_loss": 4.051065245936996, "tokens_seen": 367001600 }, { "epoch": 0.13, "learning_rate": 0.00043889519485433223, "loss": 4.5711, "theoretical_loss": 4.051065245936996, "tokens_seen": 367001600 }, { "epoch": 0.13, "learning_rate": 0.0004387060158910329, "loss": 4.6239, "theoretical_loss": 4.049753908319223, "tokens_seen": 368050176 }, { "epoch": 0.13, "learning_rate": 0.0004385168369277336, "loss": 4.6925, "theoretical_loss": 4.048447344089038, "tokens_seen": 369098752 }, { "epoch": 0.13, "learning_rate": 0.00043832765796443437, "loss": 4.6265, "theoretical_loss": 4.04714552238397, "tokens_seen": 370147328 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.48688071966171265, "objective/train/docs_used": 221307, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.563080310821533, "objective/train/original_loss": 4.563079833984375, "objective/train/theoretical_loss": 4.0469831266037, "objective/train/tokens_used": 390738400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24148398637771606, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499156713485718, "objective/train/weighted_lm_loss": 4.790269374847412, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9574251174926758, "theoretical_loss": 4.0469831266037, "tokens_seen": 370278400 }, { "epoch": 0.13, "learning_rate": 0.00043813847900113506, "loss": 4.6133, "theoretical_loss": 4.045848412627802, "tokens_seen": 371195904 }, { "epoch": 0.13, "learning_rate": 0.0004379493000378358, "loss": 4.6978, "theoretical_loss": 4.044555984527107, "tokens_seen": 372244480 }, { "epoch": 0.13, "learning_rate": 0.0004377601210745365, "loss": 4.6412, "theoretical_loss": 4.04326820806785, "tokens_seen": 373293056 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4824051558971405, "objective/train/docs_used": 223312, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.324045658111572, "objective/train/original_loss": 4.324045658111572, "objective/train/theoretical_loss": 4.042946987281072, "objective/train/tokens_used": 394015200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2413923442363739, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494674444198608, "objective/train/weighted_lm_loss": 4.537526607513428, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.951562762260437, "theoretical_loss": 4.042946987281072, "tokens_seen": 373555200 }, { "epoch": 0.13, "learning_rate": 0.0004375709421112372, "loss": 4.6788, "theoretical_loss": 4.041985053512038, "tokens_seen": 374341632 }, { "epoch": 0.13, "learning_rate": 0.000437381763147938, "loss": 4.5914, "theoretical_loss": 4.040706491394406, "tokens_seen": 375390208 }, { "epoch": 0.13, "learning_rate": 0.0004371925841846387, "loss": 4.5749, "theoretical_loss": 4.0394324925191745, "tokens_seen": 376438784 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.4723070561885834, "objective/train/docs_used": 225129, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.304555892944336, "objective/train/original_loss": 4.304555892944336, "objective/train/theoretical_loss": 4.038955913488913, "objective/train/tokens_used": 397292000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23399347066879272, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484198331832886, "objective/train/weighted_lm_loss": 4.51221227645874, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9514410495758057, "theoretical_loss": 4.038955913488913, "tokens_seen": 376832000 }, { "epoch": 0.13, "learning_rate": 0.00043700340522133944, "loss": 4.5628, "theoretical_loss": 4.038163027956834, "tokens_seen": 377487360 }, { "epoch": 0.14, "learning_rate": 0.00043681422625804013, "loss": 4.5568, "theoretical_loss": 4.036898069040989, "tokens_seen": 378535936 }, { "epoch": 0.14, "learning_rate": 0.0004366250472947408, "loss": 4.5313, "theoretical_loss": 4.03563758736524, "tokens_seen": 379584512 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.4890446662902832, "objective/train/docs_used": 226852, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.509145259857178, "objective/train/original_loss": 4.5091447830200195, "objective/train/theoretical_loss": 4.035009016685741, "objective/train/tokens_used": 400568800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24364469945430756, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501431226730347, "objective/train/weighted_lm_loss": 4.735697269439697, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9592508673667908, "theoretical_loss": 4.035009016685741, "tokens_seen": 380108800 }, { "epoch": 0.14, "learning_rate": 0.00043643586833144157, "loss": 4.6097, "theoretical_loss": 4.034381554780124, "tokens_seen": 380633088 }, { "epoch": 0.14, "learning_rate": 0.00043624668936814227, "loss": 4.5698, "theoretical_loss": 4.033129943390076, "tokens_seen": 381681664 }, { "epoch": 0.14, "learning_rate": 0.00043605751040484296, "loss": 4.6048, "theoretical_loss": 4.031882725550463, "tokens_seen": 382730240 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.47916504740715027, "objective/train/docs_used": 228664, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.19334602355957, "objective/train/original_loss": 4.19334602355957, "objective/train/theoretical_loss": 4.031105433316977, "objective/train/tokens_used": 403845600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23511439561843872, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491114854812622, "objective/train/weighted_lm_loss": 4.397870063781738, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9654620289802551, "theoretical_loss": 4.031105433316977, "tokens_seen": 383385600 }, { "epoch": 0.14, "learning_rate": 0.0004358683314415437, "loss": 4.6059, "theoretical_loss": 4.030639873864638, "tokens_seen": 383778816 }, { "epoch": 0.14, "learning_rate": 0.0004356791524782444, "loss": 4.4924, "theoretical_loss": 4.029401361181049, "tokens_seen": 384827392 }, { "epoch": 0.14, "learning_rate": 0.00043548997351494515, "loss": 4.5844, "theoretical_loss": 4.028167160590383, "tokens_seen": 385875968 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.48542872071266174, "objective/train/docs_used": 230356, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9225900173187256, "objective/train/original_loss": 3.922590732574463, "objective/train/theoretical_loss": 4.027244323905839, "objective/train/tokens_used": 407122400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24344097077846527, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497803688049316, "objective/train/weighted_lm_loss": 4.117582321166992, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9526301622390747, "theoretical_loss": 4.027244323905839, "tokens_seen": 386662400 }, { "epoch": 0.14, "learning_rate": 0.00043530079455164584, "loss": 4.5285, "theoretical_loss": 4.026937245422756, "tokens_seen": 386924544 }, { "epoch": 0.14, "learning_rate": 0.0004351116155883466, "loss": 4.4802, "theoretical_loss": 4.025711589244939, "tokens_seen": 387973120 }, { "epoch": 0.14, "learning_rate": 0.00043492243662504734, "loss": 4.5097, "theoretical_loss": 4.024490165857627, "tokens_seen": 389021696 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.4711974859237671, "objective/train/docs_used": 231398, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.248833656311035, "objective/train/original_loss": 4.248834133148193, "objective/train/theoretical_loss": 4.0234248721847035, "objective/train/tokens_used": 410399200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23104798793792725, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048293948173523, "objective/train/weighted_lm_loss": 4.452199935913086, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9518858790397644, "theoretical_loss": 4.0234248721847035, "tokens_seen": 389939200 }, { "epoch": 0.14, "learning_rate": 0.00043473325766174803, "loss": 4.5464, "theoretical_loss": 4.023272949292743, "tokens_seen": 390070272 }, { "epoch": 0.14, "learning_rate": 0.0004345440786984488, "loss": 4.5371, "theoretical_loss": 4.022059913810782, "tokens_seen": 391118848 }, { "epoch": 0.14, "learning_rate": 0.00043435489973514947, "loss": 4.5544, "theoretical_loss": 4.020851033898196, "tokens_seen": 392167424 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.47954094409942627, "objective/train/docs_used": 233309, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.41207218170166, "objective/train/original_loss": 4.412071228027344, "objective/train/theoretical_loss": 4.019646284264807, "objective/train/tokens_used": 413676000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23941875994205475, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491708517074585, "objective/train/weighted_lm_loss": 4.62913703918457, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9515848755836487, "theoretical_loss": 4.019646284264807, "tokens_seen": 393216000 }, { "epoch": 0.14, "learning_rate": 0.00043416572077185016, "loss": 4.6125, "theoretical_loss": 4.019646284264807, "tokens_seen": 393216000 }, { "epoch": 0.14, "learning_rate": 0.0004339765418085509, "loss": 4.6338, "theoretical_loss": 4.01844563984127, "tokens_seen": 394264576 }, { "epoch": 0.14, "learning_rate": 0.0004337873628452516, "loss": 4.6127, "theoretical_loss": 4.0172490757765535, "tokens_seen": 395313152 }, { "epoch": 0.14, "learning_rate": 0.0004335981838819523, "loss": 4.632, "theoretical_loss": 4.016056567435475, "tokens_seen": 396361728 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.48889124393463135, "objective/train/docs_used": 234734, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.166934967041016, "objective/train/original_loss": 4.166934967041016, "objective/train/theoretical_loss": 4.0159077878422815, "objective/train/tokens_used": 416952800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2409810870885849, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501141548156738, "objective/train/weighted_lm_loss": 4.375463962554932, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9956961274147034, "theoretical_loss": 4.0159077878422815, "tokens_seen": 396492800 }, { "epoch": 0.14, "learning_rate": 0.00043340900491865305, "loss": 4.5567, "theoretical_loss": 4.014868090396256, "tokens_seen": 397410304 }, { "epoch": 0.14, "learning_rate": 0.00043321982595535374, "loss": 4.4485, "theoretical_loss": 4.013683620448113, "tokens_seen": 398458880 }, { "epoch": 0.14, "learning_rate": 0.0004330306469920545, "loss": 4.5947, "theoretical_loss": 4.0125031335888925, "tokens_seen": 399507456 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.476698637008667, "objective/train/docs_used": 236668, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.221770286560059, "objective/train/original_loss": 4.221770763397217, "objective/train/theoretical_loss": 4.0122086314386545, "objective/train/tokens_used": 420229600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23127447068691254, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488451719284058, "objective/train/weighted_lm_loss": 4.427043914794922, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9582767486572266, "theoretical_loss": 4.0122086314386545, "tokens_seen": 399769600 }, { "epoch": 0.14, "learning_rate": 0.00043284146802875524, "loss": 4.554, "theoretical_loss": 4.0113266060227275, "tokens_seen": 400556032 }, { "epoch": 0.14, "learning_rate": 0.00043265228906545593, "loss": 4.5778, "theoretical_loss": 4.010154014157727, "tokens_seen": 401604608 }, { "epoch": 0.14, "learning_rate": 0.0004324631101021567, "loss": 4.5306, "theoretical_loss": 4.008985334603709, "tokens_seen": 402653184 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.4728894829750061, "objective/train/docs_used": 238677, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.1277618408203125, "objective/train/original_loss": 4.1277618408203125, "objective/train/theoretical_loss": 4.00854808367405, "objective/train/tokens_used": 423506400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23286591470241547, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048472285270691, "objective/train/weighted_lm_loss": 4.3288140296936035, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9518867135047913, "theoretical_loss": 4.00854808367405, "tokens_seen": 403046400 }, { "epoch": 0.14, "learning_rate": 0.00043227393113885737, "loss": 4.541, "theoretical_loss": 4.007820544169944, "tokens_seen": 403701760 }, { "epoch": 0.14, "learning_rate": 0.0004320847521755581, "loss": 4.5066, "theoretical_loss": 4.006659619862954, "tokens_seen": 404750336 }, { "epoch": 0.14, "learning_rate": 0.0004318955732122588, "loss": 4.5251, "theoretical_loss": 4.0055025388843175, "tokens_seen": 405798912 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.4813375473022461, "objective/train/docs_used": 240483, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.646761417388916, "objective/train/original_loss": 4.646761894226074, "objective/train/theoretical_loss": 4.004925432571433, "objective/train/tokens_used": 426783200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23505516350269318, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493284463882446, "objective/train/weighted_lm_loss": 4.875973224639893, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9840477705001831, "theoretical_loss": 4.004925432571433, "tokens_seen": 406323200 }, { "epoch": 0.15, "learning_rate": 0.0004317063942489595, "loss": 4.5157, "theoretical_loss": 4.004349278628525, "tokens_seen": 406847488 }, { "epoch": 0.15, "learning_rate": 0.00043151721528566025, "loss": 4.6079, "theoretical_loss": 4.00319981668085, "tokens_seen": 407896064 }, { "epoch": 0.15, "learning_rate": 0.00043132803632236095, "loss": 4.617, "theoretical_loss": 4.002054130815253, "tokens_seen": 408944640 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.4804536998271942, "objective/train/docs_used": 242341, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.573429107666016, "objective/train/original_loss": 4.573429107666016, "objective/train/theoretical_loss": 4.0013399848903175, "objective/train/tokens_used": 430060000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23860077559947968, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492582321166992, "objective/train/weighted_lm_loss": 4.798383712768555, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9523076415061951, "theoretical_loss": 4.0013399848903175, "tokens_seen": 409600000 }, { "epoch": 0.15, "learning_rate": 0.00043113885735906164, "loss": 4.6468, "theoretical_loss": 4.000912198992316, "tokens_seen": 409993216 }, { "epoch": 0.15, "learning_rate": 0.0004309496783957624, "loss": 4.5594, "theoretical_loss": 3.9997739993572035, "tokens_seen": 411041792 }, { "epoch": 0.15, "learning_rate": 0.0004307604994324631, "loss": 4.6211, "theoretical_loss": 3.9986395102376453, "tokens_seen": 412090368 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.480744332075119, "objective/train/docs_used": 244062, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.4469990730285645, "objective/train/original_loss": 4.446999549865723, "objective/train/theoretical_loss": 3.997791065488486, "objective/train/tokens_used": 433336800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2378513216972351, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492832660675049, "objective/train/weighted_lm_loss": 4.665361404418945, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9535585641860962, "theoretical_loss": 3.997791065488486, "tokens_seen": 412876800 }, { "epoch": 0.15, "learning_rate": 0.0004305713204691639, "loss": 4.6416, "theoretical_loss": 3.99750871014196, "tokens_seen": 413138944 }, { "epoch": 0.15, "learning_rate": 0.0004303821415058646, "loss": 4.5362, "theoretical_loss": 3.9963815777570897, "tokens_seen": 414187520 }, { "epoch": 0.15, "learning_rate": 0.00043019296254256527, "loss": 4.5415, "theoretical_loss": 3.99525809194667, "tokens_seen": 415236096 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.48422208428382874, "objective/train/docs_used": 245710, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.429144382476807, "objective/train/original_loss": 4.429144859313965, "objective/train/theoretical_loss": 3.9942780167103145, "objective/train/tokens_used": 436613600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23887021839618683, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496364831924438, "objective/train/weighted_lm_loss": 4.65024995803833, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9516666531562805, "theoretical_loss": 3.9942780167103145, "tokens_seen": 416153600 }, { "epoch": 0.15, "learning_rate": 0.000430003783579266, "loss": 4.4764, "theoretical_loss": 3.9941382317491225, "tokens_seen": 416284672 }, { "epoch": 0.15, "learning_rate": 0.0004298146046159667, "loss": 4.5263, "theoretical_loss": 3.9930219763757755, "tokens_seen": 417333248 }, { "epoch": 0.15, "learning_rate": 0.00042962542565266746, "loss": 4.5673, "theoretical_loss": 3.9919093052090058, "tokens_seen": 418381824 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.4735833406448364, "objective/train/docs_used": 247828, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.22092866897583, "objective/train/original_loss": 4.220929145812988, "objective/train/theoretical_loss": 3.9908001978004064, "objective/train/tokens_used": 439890400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23547500371932983, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485553741455078, "objective/train/weighted_lm_loss": 4.430475234985352, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9541728496551514, "theoretical_loss": 3.9908001978004064, "tokens_seen": 419430400 }, { "epoch": 0.15, "learning_rate": 0.00042943624668936815, "loss": 4.5733, "theoretical_loss": 3.9908001978004064, "tokens_seen": 419430400 }, { "epoch": 0.15, "learning_rate": 0.00042924706772606884, "loss": 4.5431, "theoretical_loss": 3.989694633868981, "tokens_seen": 420478976 }, { "epoch": 0.15, "learning_rate": 0.0004290578887627696, "loss": 4.5784, "theoretical_loss": 3.988592593299358, "tokens_seen": 421527552 }, { "epoch": 0.15, "learning_rate": 0.0004288687097994703, "loss": 4.4724, "theoretical_loss": 3.9874940561400294, "tokens_seen": 422576128 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.47702404856681824, "objective/train/docs_used": 249572, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.135619163513184, "objective/train/original_loss": 4.135619163513184, "objective/train/theoretical_loss": 3.9873569843412913, "objective/train/tokens_used": 443167200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2411937266588211, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048928141593933, "objective/train/weighted_lm_loss": 4.336599826812744, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9514898061752319, "theoretical_loss": 3.9873569843412913, "tokens_seen": 422707200 }, { "epoch": 0.15, "learning_rate": 0.000428679530836171, "loss": 4.6252, "theoretical_loss": 3.986399002601617, "tokens_seen": 423624704 }, { "epoch": 0.15, "learning_rate": 0.0004284903518728717, "loss": 4.5332, "theoretical_loss": 3.9853074130551542, "tokens_seen": 424673280 }, { "epoch": 0.15, "learning_rate": 0.0004283011729095725, "loss": 4.4783, "theoretical_loss": 3.984219268030392, "tokens_seen": 425721856 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.4593813717365265, "objective/train/docs_used": 251147, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.276764392852783, "objective/train/original_loss": 4.276763916015625, "objective/train/theoretical_loss": 3.9839477677140245, "objective/train/tokens_used": 446444000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23159852623939514, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0471152067184448, "objective/train/weighted_lm_loss": 4.481888771057129, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9540482759475708, "theoretical_loss": 3.9839477677140245, "tokens_seen": 425984000 }, { "epoch": 0.15, "learning_rate": 0.0004281119939462732, "loss": 4.5639, "theoretical_loss": 3.983134548214133, "tokens_seen": 426770432 }, { "epoch": 0.15, "learning_rate": 0.0004279228149829739, "loss": 4.5079, "theoretical_loss": 3.98205323444858, "tokens_seen": 427819008 }, { "epoch": 0.15, "learning_rate": 0.0004277336360196746, "loss": 4.4224, "theoretical_loss": 3.9809753077297074, "tokens_seen": 428867584 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.47926458716392517, "objective/train/docs_used": 253081, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.053443431854248, "objective/train/original_loss": 4.053443908691406, "objective/train/theoretical_loss": 3.98057195458059, "objective/train/tokens_used": 449720800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23603300750255585, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049126148223877, "objective/train/weighted_lm_loss": 4.253237247467041, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9541853070259094, "theoretical_loss": 3.98057195458059, "tokens_seen": 429260800 }, { "epoch": 0.15, "learning_rate": 0.00042754445705637536, "loss": 4.5223, "theoretical_loss": 3.979900749205657, "tokens_seen": 429916160 }, { "epoch": 0.15, "learning_rate": 0.00042735527809307605, "loss": 4.4297, "theoretical_loss": 3.9788295401751483, "tokens_seen": 430964736 }, { "epoch": 0.15, "learning_rate": 0.0004271660991297768, "loss": 4.4719, "theoretical_loss": 3.9777616620859186, "tokens_seen": 432013312 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.4837666153907776, "objective/train/docs_used": 255033, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.001176357269287, "objective/train/original_loss": 4.001176834106445, "objective/train/theoretical_loss": 3.9772289663870657, "objective/train/tokens_used": 452997600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23786011338233948, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495855808258057, "objective/train/weighted_lm_loss": 4.199849605560303, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9561724066734314, "theoretical_loss": 3.9772289663870657, "tokens_seen": 432537600 }, { "epoch": 0.15, "learning_rate": 0.0004269769201664775, "loss": 4.4716, "theoretical_loss": 3.976697096533171, "tokens_seen": 433061888 }, { "epoch": 0.16, "learning_rate": 0.0004267877412031782, "loss": 4.4514, "theoretical_loss": 3.975635825258053, "tokens_seen": 434110464 }, { "epoch": 0.16, "learning_rate": 0.00042659856223987893, "loss": 4.448, "theoretical_loss": 3.9745778301461483, "tokens_seen": 435159040 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.4698527157306671, "objective/train/docs_used": 256972, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.00714635848999, "objective/train/original_loss": 4.007145881652832, "objective/train/theoretical_loss": 3.9739182388865606, "objective/train/tokens_used": 456274400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22894169390201569, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481488704681396, "objective/train/weighted_lm_loss": 4.203254699707031, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9517961740493774, "theoretical_loss": 3.9739182388865606, "tokens_seen": 435814400 }, { "epoch": 0.16, "learning_rate": 0.0004264093832765796, "loss": 4.4264, "theoretical_loss": 3.9735230932259893, "tokens_seen": 436207616 }, { "epoch": 0.16, "learning_rate": 0.0004262202043132803, "loss": 4.5008, "theoretical_loss": 3.9724715966675896, "tokens_seen": 437256192 }, { "epoch": 0.16, "learning_rate": 0.0004260310253499811, "loss": 4.4519, "theoretical_loss": 3.9714233227809936, "tokens_seen": 438304768 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.4772937297821045, "objective/train/docs_used": 259026, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.386899948120117, "objective/train/original_loss": 4.386899948120117, "objective/train/theoretical_loss": 3.9706392216810085, "objective/train/tokens_used": 459551200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24028657376766205, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048950433731079, "objective/train/weighted_lm_loss": 4.601118564605713, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9514262080192566, "theoretical_loss": 3.9706392216810085, "tokens_seen": 439091200 }, { "epoch": 0.16, "learning_rate": 0.0004258418463866818, "loss": 4.4169, "theoretical_loss": 3.970378254014844, "tokens_seen": 439353344 }, { "epoch": 0.16, "learning_rate": 0.00042565266742338256, "loss": 4.4168, "theoretical_loss": 3.96933637295497, "tokens_seen": 440401920 }, { "epoch": 0.16, "learning_rate": 0.00042546348846008326, "loss": 4.4585, "theoretical_loss": 3.96829766232299, "tokens_seen": 441450496 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.4840179681777954, "objective/train/docs_used": 261126, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.244239330291748, "objective/train/original_loss": 4.244239330291748, "objective/train/theoretical_loss": 3.9673913777809253, "objective/train/tokens_used": 462828000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24010293185710907, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496221780776978, "objective/train/weighted_lm_loss": 4.457010746002197, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9591333270072937, "theoretical_loss": 3.9673913777809253, "tokens_seen": 442368000 }, { "epoch": 0.16, "learning_rate": 0.00042527430949678395, "loss": 4.4311, "theoretical_loss": 3.9672621049749335, "tokens_seen": 442499072 }, { "epoch": 0.16, "learning_rate": 0.0004250851305334847, "loss": 4.4316, "theoretical_loss": 3.96622968389988, "tokens_seen": 443547648 }, { "epoch": 0.16, "learning_rate": 0.0004248959515701854, "loss": 4.3984, "theoretical_loss": 3.9652003822186166, "tokens_seen": 444596224 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.4815636873245239, "objective/train/docs_used": 262792, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.1629133224487305, "objective/train/original_loss": 4.1629133224487305, "objective/train/theoretical_loss": 3.9641741831823065, "objective/train/tokens_used": 466104800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24196134507656097, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493861436843872, "objective/train/weighted_lm_loss": 4.367969989776611, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.9516691565513611, "theoretical_loss": 3.9641741831823065, "tokens_seen": 445644800 }, { "epoch": 0.16, "learning_rate": 0.00042470677260688614, "loss": 4.48, "theoretical_loss": 3.9641741831823065, "tokens_seen": 445644800 }, { "epoch": 0.16, "learning_rate": 0.00042451759364358683, "loss": 4.4281, "theoretical_loss": 3.9631510701711816, "tokens_seen": 446693376 }, { "epoch": 0.16, "learning_rate": 0.0004243284146802875, "loss": 4.4256, "theoretical_loss": 3.9621310266932457, "tokens_seen": 447741952 }, { "epoch": 0.16, "learning_rate": 0.00042413923571698827, "loss": 4.3659, "theoretical_loss": 3.9611140363829977, "tokens_seen": 448790528 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.481901079416275, "objective/train/docs_used": 264102, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.1385955810546875, "objective/train/original_loss": 4.138594627380371, "objective/train/theoretical_loss": 3.960987126459872, "objective/train/tokens_used": 469381600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2416316270828247, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494182109832764, "objective/train/weighted_lm_loss": 4.344489097595215, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9545145630836487, "theoretical_loss": 3.960987126459872, "tokens_seen": 448921600 }, { "epoch": 0.16, "learning_rate": 0.00042395005675368897, "loss": 4.4184, "theoretical_loss": 3.9601000830001665, "tokens_seen": 449839104 }, { "epoch": 0.16, "learning_rate": 0.00042376087779038977, "loss": 4.3922, "theoretical_loss": 3.9590891504284635, "tokens_seen": 450887680 }, { "epoch": 0.16, "learning_rate": 0.00042357169882709046, "loss": 4.2944, "theoretical_loss": 3.9580812226743523, "tokens_seen": 451936256 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.490369588136673, "objective/train/docs_used": 265980, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.172577381134033, "objective/train/original_loss": 4.172577857971191, "objective/train/theoretical_loss": 3.9578297083759195, "objective/train/tokens_used": 472658400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2438024878501892, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050276279449463, "objective/train/weighted_lm_loss": 4.382692337036133, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9516772627830505, "theoretical_loss": 3.9578297083759195, "tokens_seen": 452198400 }, { "epoch": 0.16, "learning_rate": 0.00042338251986379115, "loss": 4.3378, "theoretical_loss": 3.95707628386583, "tokens_seen": 452984832 }, { "epoch": 0.16, "learning_rate": 0.0004231933409004919, "loss": 4.4524, "theoretical_loss": 3.9560743182512255, "tokens_seen": 454033408 }, { "epoch": 0.16, "learning_rate": 0.0004230041619371926, "loss": 4.3017, "theoretical_loss": 3.9550753101980103, "tokens_seen": 455081984 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.47568219900131226, "objective/train/docs_used": 267725, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.0070085525512695, "objective/train/original_loss": 4.0070085525512695, "objective/train/theoretical_loss": 3.954701441504068, "objective/train/tokens_used": 475935200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23760542273521423, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487757921218872, "objective/train/weighted_lm_loss": 4.202053546905518, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9515478014945984, "theoretical_loss": 3.954701441504068, "tokens_seen": 455475200 }, { "epoch": 0.16, "learning_rate": 0.0004228149829738933, "loss": 4.3259, "theoretical_loss": 3.954079244191628, "tokens_seen": 456130560 }, { "epoch": 0.16, "learning_rate": 0.00042262580401059404, "loss": 4.3595, "theoretical_loss": 3.953086104834334, "tokens_seen": 457179136 }, { "epoch": 0.16, "learning_rate": 0.00042243662504729473, "loss": 4.3055, "theoretical_loss": 3.9520958768440484, "tokens_seen": 458227712 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.47381123900413513, "objective/train/docs_used": 269663, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.464710235595703, "objective/train/original_loss": 4.464710235595703, "objective/train/theoretical_loss": 3.951601849867233, "objective/train/tokens_used": 479212000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23545365035533905, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485777854919434, "objective/train/weighted_lm_loss": 4.680891036987305, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9522637724876404, "theoretical_loss": 3.951601849867233, "tokens_seen": 458752000 }, { "epoch": 0.16, "learning_rate": 0.0004222474460839955, "loss": 4.2918, "theoretical_loss": 3.951108545053229, "tokens_seen": 459276288 }, { "epoch": 0.16, "learning_rate": 0.00042205826712069617, "loss": 4.3763, "theoretical_loss": 3.9501240944077494, "tokens_seen": 460324864 }, { "epoch": 0.16, "learning_rate": 0.00042186908815739686, "loss": 4.3612, "theoretical_loss": 3.949142509965799, "tokens_seen": 461373440 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.48686516284942627, "objective/train/docs_used": 271706, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.096804141998291, "objective/train/original_loss": 4.096804618835449, "objective/train/theoretical_loss": 3.948530468589195, "objective/train/tokens_used": 482488800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23930074274539948, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499029159545898, "objective/train/weighted_lm_loss": 4.302978515625, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9692782759666443, "theoretical_loss": 3.948530468589195, "tokens_seen": 462028800 }, { "epoch": 0.17, "learning_rate": 0.0004216799091940976, "loss": 4.3302, "theoretical_loss": 3.9481637768967883, "tokens_seen": 462422016 }, { "epoch": 0.17, "learning_rate": 0.00042149073023079836, "loss": 4.3089, "theoretical_loss": 3.9471878804802736, "tokens_seen": 463470592 }, { "epoch": 0.17, "learning_rate": 0.0004213015512674991, "loss": 4.3075, "theoretical_loss": 3.9462148061048907, "tokens_seen": 464519168 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.48215600848197937, "objective/train/docs_used": 273436, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.740396738052368, "objective/train/original_loss": 3.740396499633789, "objective/train/theoretical_loss": 3.9454868435591504, "objective/train/tokens_used": 485765600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2377597689628601, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494240522384644, "objective/train/weighted_lm_loss": 3.9267635345458984, "objective/train/weights_max": 1.0512155294418335, "objective/train/weights_min": 0.951538622379303, "theoretical_loss": 3.9454868435591504, "tokens_seen": 465305600 }, { "epoch": 0.17, "learning_rate": 0.0004211123723041998, "loss": 4.3019, "theoretical_loss": 3.945244539267303, "tokens_seen": 465567744 }, { "epoch": 0.17, "learning_rate": 0.0004209231933409005, "loss": 4.2432, "theoretical_loss": 3.9442770655711614, "tokens_seen": 466616320 }, { "epoch": 0.17, "learning_rate": 0.00042073401437760124, "loss": 4.2614, "theoretical_loss": 3.9433123707260775, "tokens_seen": 467664896 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.4793383777141571, "objective/train/docs_used": 275552, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.101151943206787, "objective/train/original_loss": 4.101151943206787, "objective/train/theoretical_loss": 3.9424705311086856, "objective/train/tokens_used": 489042400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2382468432188034, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491446256637573, "objective/train/weighted_lm_loss": 4.304407119750977, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9532943367958069, "theoretical_loss": 3.9424705311086856, "tokens_seen": 468582400 }, { "epoch": 0.17, "learning_rate": 0.00042054483541430194, "loss": 4.2677, "theoretical_loss": 3.9423504405466074, "tokens_seen": 468713472 }, { "epoch": 0.17, "learning_rate": 0.00042035565645100263, "loss": 4.1909, "theoretical_loss": 3.9413912609512485, "tokens_seen": 469762048 }, { "epoch": 0.17, "learning_rate": 0.0004201664774877034, "loss": 4.3726, "theoretical_loss": 3.940434817961448, "tokens_seen": 470810624 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.4858047068119049, "objective/train/docs_used": 277578, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.086038112640381, "objective/train/original_loss": 4.086038112640381, "objective/train/theoretical_loss": 3.939481097700623, "objective/train/tokens_used": 492319200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2407042235136032, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498039722442627, "objective/train/weighted_lm_loss": 4.289696216583252, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9598610401153564, "theoretical_loss": 3.939481097700623, "tokens_seen": 471859200 }, { "epoch": 0.17, "learning_rate": 0.00041997729852440407, "loss": 4.3418, "theoretical_loss": 3.939481097700623, "tokens_seen": 471859200 }, { "epoch": 0.17, "learning_rate": 0.0004197881195611048, "loss": 4.2716, "theoretical_loss": 3.9385300863931914, "tokens_seen": 472907776 }, { "epoch": 0.17, "learning_rate": 0.0004195989405978055, "loss": 4.3335, "theoretical_loss": 3.9375817703636167, "tokens_seen": 473956352 }, { "epoch": 0.17, "learning_rate": 0.0004194097616345062, "loss": 4.2623, "theoretical_loss": 3.9366361360354585, "tokens_seen": 475004928 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.4816089868545532, "objective/train/docs_used": 279357, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.476291656494141, "objective/train/original_loss": 4.476291656494141, "objective/train/theoretical_loss": 3.936518119629225, "objective/train/tokens_used": 495596000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2425791174173355, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049393892288208, "objective/train/weighted_lm_loss": 4.69778299331665, "objective/train/weights_max": 1.0512197017669678, "objective/train/weights_min": 0.9523156881332397, "theoretical_loss": 3.936518119629225, "tokens_seen": 475136000 }, { "epoch": 0.17, "learning_rate": 0.000419220582671207, "loss": 4.2778, "theoretical_loss": 3.9356931699304427, "tokens_seen": 476053504 }, { "epoch": 0.17, "learning_rate": 0.0004190314037079077, "loss": 4.3016, "theoretical_loss": 3.9347528586675304, "tokens_seen": 477102080 }, { "epoch": 0.17, "learning_rate": 0.00041884222474460845, "loss": 4.3464, "theoretical_loss": 3.9338151889620114, "tokens_seen": 478150656 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.4777883291244507, "objective/train/docs_used": 280385, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.414139270782471, "objective/train/original_loss": 4.4141387939453125, "objective/train/theoretical_loss": 3.933581182731271, "objective/train/tokens_used": 498872800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2336207926273346, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489662885665894, "objective/train/weighted_lm_loss": 4.631285190582275, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9555152058601379, "theoretical_loss": 3.933581182731271, "tokens_seen": 478412800 }, { "epoch": 0.17, "learning_rate": 0.00041865304578130914, "loss": 4.3909, "theoretical_loss": 3.9328801476245987, "tokens_seen": 479199232 }, { "epoch": 0.17, "learning_rate": 0.00041846386681800983, "loss": 4.3807, "theoretical_loss": 3.9319477215605323, "tokens_seen": 480247808 }, { "epoch": 0.17, "learning_rate": 0.0004182746878547106, "loss": 4.3472, "theoretical_loss": 3.9310178977687045, "tokens_seen": 481296384 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.48086094856262207, "objective/train/docs_used": 282096, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.888185977935791, "objective/train/original_loss": 3.888186454772949, "objective/train/theoretical_loss": 3.930669882107529, "objective/train/tokens_used": 502149600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23717832565307617, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049291729927063, "objective/train/weighted_lm_loss": 4.0802998542785645, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9538412094116211, "theoretical_loss": 3.930669882107529, "tokens_seen": 481689600 }, { "epoch": 0.17, "learning_rate": 0.0004180855088914113, "loss": 4.2477, "theoretical_loss": 3.930090663340782, "tokens_seen": 482344960 }, { "epoch": 0.17, "learning_rate": 0.00041789632992811197, "loss": 4.267, "theoretical_loss": 3.9291660054603454, "tokens_seen": 483393536 }, { "epoch": 0.17, "learning_rate": 0.0004177071509648127, "loss": 4.2292, "theoretical_loss": 3.9282439114020375, "tokens_seen": 484442112 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.48390454053878784, "objective/train/docs_used": 284179, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.326482772827148, "objective/train/original_loss": 4.326481819152832, "objective/train/theoretical_loss": 3.927783821854201, "objective/train/tokens_used": 505426400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2400244176387787, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496103763580322, "objective/train/weighted_lm_loss": 4.54107666015625, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9513835310935974, "theoretical_loss": 3.927783821854201, "tokens_seen": 484966400 }, { "epoch": 0.17, "learning_rate": 0.0004175179720015134, "loss": 4.2245, "theoretical_loss": 3.927324368530723, "tokens_seen": 485490688 }, { "epoch": 0.17, "learning_rate": 0.00041732879303821416, "loss": 4.1996, "theoretical_loss": 3.926407364300649, "tokens_seen": 486539264 }, { "epoch": 0.17, "learning_rate": 0.00041713961407491485, "loss": 4.2332, "theoretical_loss": 3.9254928862546303, "tokens_seen": 487587840 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.4841022193431854, "objective/train/docs_used": 285838, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9023804664611816, "objective/train/original_loss": 3.90238094329834, "objective/train/theoretical_loss": 3.9249226148038927, "objective/train/tokens_used": 508703200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2402951419353485, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496317148208618, "objective/train/weighted_lm_loss": 4.096041202545166, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9679876565933228, "theoretical_loss": 3.9249226148038927, "tokens_seen": 488243200 }, { "epoch": 0.17, "learning_rate": 0.0004169504351116156, "loss": 4.2304, "theoretical_loss": 3.9245809220232295, "tokens_seen": 488636416 }, { "epoch": 0.17, "learning_rate": 0.00041676125614831635, "loss": 4.2336, "theoretical_loss": 3.923671459323953, "tokens_seen": 489684992 }, { "epoch": 0.18, "learning_rate": 0.00041657207718501704, "loss": 4.1907, "theoretical_loss": 3.9227644859604562, "tokens_seen": 490733568 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.49535855650901794, "objective/train/docs_used": 287192, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.264978885650635, "objective/train/original_loss": 4.264978408813477, "objective/train/theoretical_loss": 3.9220858822757396, "objective/train/tokens_used": 511980000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24591341614723206, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050786018371582, "objective/train/weighted_lm_loss": 4.481633186340332, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9592586159706116, "theoretical_loss": 3.9220858822757396, "tokens_seen": 491520000 }, { "epoch": 0.18, "learning_rate": 0.0004163828982217178, "loss": 4.234, "theoretical_loss": 3.9218599898217583, "tokens_seen": 491782144 }, { "epoch": 0.18, "learning_rate": 0.0004161937192584185, "loss": 4.2348, "theoretical_loss": 3.92095795888146, "tokens_seen": 492830720 }, { "epoch": 0.18, "learning_rate": 0.0004160045402951192, "loss": 4.2251, "theoretical_loss": 3.9200583811969785, "tokens_seen": 493879296 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.4876641631126404, "objective/train/docs_used": 289259, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9813899993896484, "objective/train/original_loss": 3.9813899993896484, "objective/train/theoretical_loss": 3.9192732538342785, "objective/train/tokens_used": 515256800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24164560437202454, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049994945526123, "objective/train/weighted_lm_loss": 4.1802239418029785, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9596173763275146, "theoretical_loss": 3.9192732538342785, "tokens_seen": 494796800 }, { "epoch": 0.18, "learning_rate": 0.0004158153613318199, "loss": 4.2636, "theoretical_loss": 3.919161244908785, "tokens_seen": 494927872 }, { "epoch": 0.18, "learning_rate": 0.0004156261823685206, "loss": 4.2658, "theoretical_loss": 3.918266538239653, "tokens_seen": 495976448 }, { "epoch": 0.18, "learning_rate": 0.00041543700340522136, "loss": 4.2883, "theoretical_loss": 3.917374249493913, "tokens_seen": 497025024 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.4695127606391907, "objective/train/docs_used": 291398, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.011775970458984, "objective/train/original_loss": 4.011776447296143, "objective/train/theoretical_loss": 3.9164843670567215, "objective/train/tokens_used": 518533600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23077881336212158, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048124074935913, "objective/train/weighted_lm_loss": 4.204472064971924, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9517326354980469, "theoretical_loss": 3.9164843670567215, "tokens_seen": 498073600 }, { "epoch": 0.18, "learning_rate": 0.00041524782444192206, "loss": 4.2508, "theoretical_loss": 3.9164843670567215, "tokens_seen": 498073600 }, { "epoch": 0.18, "learning_rate": 0.00041505864547862275, "loss": 4.255, "theoretical_loss": 3.9155968793933273, "tokens_seen": 499122176 }, { "epoch": 0.18, "learning_rate": 0.0004148694665153235, "loss": 4.2992, "theoretical_loss": 3.9147117750483584, "tokens_seen": 500170752 }, { "epoch": 0.18, "learning_rate": 0.00041468028755202425, "loss": 4.2444, "theoretical_loss": 3.913829042645107, "tokens_seen": 501219328 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.46639207005500793, "objective/train/docs_used": 293236, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7727935314178467, "objective/train/original_loss": 3.772793769836426, "objective/train/theoretical_loss": 3.913718867308278, "objective/train/tokens_used": 521810400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23303526639938354, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047823190689087, "objective/train/weighted_lm_loss": 3.950061559677124, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9522370100021362, "theoretical_loss": 3.913718867308278, "tokens_seen": 501350400 }, { "epoch": 0.18, "learning_rate": 0.00041449110858872494, "loss": 4.1459, "theoretical_loss": 3.912948670884827, "tokens_seen": 502267904 }, { "epoch": 0.18, "learning_rate": 0.0004143019296254257, "loss": 4.1465, "theoretical_loss": 3.912070648546038, "tokens_seen": 503316480 }, { "epoch": 0.18, "learning_rate": 0.0004141127506621264, "loss": 4.1508, "theoretical_loss": 3.9111949644838386, "tokens_seen": 504365056 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.4696812927722931, "objective/train/docs_used": 295292, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.771225690841675, "objective/train/original_loss": 3.7712254524230957, "objective/train/theoretical_loss": 3.910976407525199, "objective/train/tokens_used": 525087200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23411457240581512, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481582880020142, "objective/train/weighted_lm_loss": 3.9577152729034424, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.95467609167099, "theoretical_loss": 3.910976407525199, "tokens_seen": 504627200 }, { "epoch": 0.18, "learning_rate": 0.00041392357169882713, "loss": 4.1547, "theoretical_loss": 3.910321607629225, "tokens_seen": 505413632 }, { "epoch": 0.18, "learning_rate": 0.0004137343927355278, "loss": 4.1705, "theoretical_loss": 3.9094505669884168, "tokens_seen": 506462208 }, { "epoch": 0.18, "learning_rate": 0.0004135452137722285, "loss": 4.1505, "theoretical_loss": 3.9085818316421945, "tokens_seen": 507510784 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.4871479272842407, "objective/train/docs_used": 297338, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9234955310821533, "objective/train/original_loss": 3.923495292663574, "objective/train/theoretical_loss": 3.9082566480052314, "objective/train/tokens_used": 528364000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23970387876033783, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499333143234253, "objective/train/weighted_lm_loss": 4.120626449584961, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9618850350379944, "theoretical_loss": 3.9082566480052314, "tokens_seen": 507904000 }, { "epoch": 0.18, "learning_rate": 0.00041335603480892926, "loss": 4.1253, "theoretical_loss": 3.9077153907452367, "tokens_seen": 508559360 }, { "epoch": 0.18, "learning_rate": 0.00041316685584562996, "loss": 4.1998, "theoretical_loss": 3.9068512335254724, "tokens_seen": 509607936 }, { "epoch": 0.18, "learning_rate": 0.0004129776768823307, "loss": 4.145, "theoretical_loss": 3.905989349283435, "tokens_seen": 510656512 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.46928656101226807, "objective/train/docs_used": 299351, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7585744857788086, "objective/train/original_loss": 3.7585747241973877, "objective/train/theoretical_loss": 3.9055592562051764, "objective/train/tokens_used": 531640800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23103711009025574, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481030941009521, "objective/train/weighted_lm_loss": 3.9365973472595215, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9520386457443237, "theoretical_loss": 3.9055592562051764, "tokens_seen": 511180800 }, { "epoch": 0.18, "learning_rate": 0.0004127884979190314, "loss": 4.1397, "theoretical_loss": 3.9051297273916257, "tokens_seen": 511705088 }, { "epoch": 0.18, "learning_rate": 0.0004125993189557321, "loss": 4.0981, "theoretical_loss": 3.9042723572938836, "tokens_seen": 512753664 }, { "epoch": 0.18, "learning_rate": 0.0004124101399924329, "loss": 4.0203, "theoretical_loss": 3.9034172285047597, "tokens_seen": 513802240 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.4936114549636841, "objective/train/docs_used": 301377, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.8155317306518555, "objective/train/original_loss": 3.8155317306518555, "objective/train/theoretical_loss": 3.9028839065452745, "objective/train/tokens_used": 534917600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24442671239376068, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506037473678589, "objective/train/weighted_lm_loss": 4.008247375488281, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9937735795974731, "theoretical_loss": 3.9028839065452745, "tokens_seen": 514457600 }, { "epoch": 0.18, "learning_rate": 0.0004122209610291336, "loss": 4.0773, "theoretical_loss": 3.902564330608904, "tokens_seen": 514850816 }, { "epoch": 0.18, "learning_rate": 0.0004120317820658343, "loss": 4.0716, "theoretical_loss": 3.901713653260452, "tokens_seen": 515899392 }, { "epoch": 0.18, "learning_rate": 0.000411842603102535, "loss": 4.114, "theoretical_loss": 3.900865186182421, "tokens_seen": 516947968 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.4887354075908661, "objective/train/docs_used": 303002, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.991806983947754, "objective/train/original_loss": 3.991806983947754, "objective/train/theoretical_loss": 3.9002302802201427, "objective/train/tokens_used": 538194400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.243209108710289, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501097440719604, "objective/train/weighted_lm_loss": 4.191769123077393, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9525777697563171, "theoretical_loss": 3.9002302802201427, "tokens_seen": 517734400 }, { "epoch": 0.19, "learning_rate": 0.0004116534241392357, "loss": 4.1111, "theoretical_loss": 3.9000189191661163, "tokens_seen": 517996544 }, { "epoch": 0.19, "learning_rate": 0.00041146424517593647, "loss": 4.0989, "theoretical_loss": 3.8991748420705363, "tokens_seen": 519045120 }, { "epoch": 0.19, "learning_rate": 0.00041127506621263716, "loss": 4.112, "theoretical_loss": 3.8983329448217905, "tokens_seen": 520093696 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.4678727686405182, "objective/train/docs_used": 304925, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.8752501010894775, "objective/train/original_loss": 3.8752501010894775, "objective/train/theoretical_loss": 3.8975980650160067, "objective/train/tokens_used": 541471200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22843264043331146, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0479482412338257, "objective/train/weighted_lm_loss": 4.060460567474365, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9528185129165649, "theoretical_loss": 3.8975980650160067, "tokens_seen": 521011200 }, { "epoch": 0.19, "learning_rate": 0.00041108588724933785, "loss": 4.0669, "theoretical_loss": 3.8974932174125194, "tokens_seen": 521142272 }, { "epoch": 0.19, "learning_rate": 0.0004108967082860386, "loss": 4.147, "theoretical_loss": 3.896655649901324, "tokens_seen": 522190848 }, { "epoch": 0.19, "learning_rate": 0.0004107075293227393, "loss": 4.117, "theoretical_loss": 3.8958202324121984, "tokens_seen": 523239424 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.48702916502952576, "objective/train/docs_used": 306825, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5543291568756104, "objective/train/original_loss": 3.5543293952941895, "objective/train/theoretical_loss": 3.8949869551339704, "objective/train/tokens_used": 544748000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23956939578056335, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049920678138733, "objective/train/weighted_lm_loss": 3.730886936187744, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9572703242301941, "theoretical_loss": 3.8949869551339704, "tokens_seen": 524288000 }, { "epoch": 0.19, "learning_rate": 0.00041051835035944004, "loss": 4.0828, "theoretical_loss": 3.8949869551339704, "tokens_seen": 524288000 }, { "epoch": 0.19, "learning_rate": 0.00041032917139614074, "loss": 4.0647, "theoretical_loss": 3.8941558083197467, "tokens_seen": 525336576 }, { "epoch": 0.19, "learning_rate": 0.0004101399924328415, "loss": 4.0718, "theoretical_loss": 3.8933267822863646, "tokens_seen": 526385152 }, { "epoch": 0.19, "learning_rate": 0.00040995081346954223, "loss": 4.046, "theoretical_loss": 3.8924998674138487, "tokens_seen": 527433728 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.4876173734664917, "objective/train/docs_used": 308042, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9242730140686035, "objective/train/original_loss": 3.9242730140686035, "objective/train/theoretical_loss": 3.892396651019104, "objective/train/tokens_used": 548024800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24128563702106476, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499881505966187, "objective/train/weighted_lm_loss": 4.1212382316589355, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9571343064308167, "theoretical_loss": 3.892396651019104, "tokens_seen": 527564800 }, { "epoch": 0.19, "learning_rate": 0.0004097616345062429, "loss": 4.1247, "theoretical_loss": 3.8916750541448764, "tokens_seen": 528482304 }, { "epoch": 0.19, "learning_rate": 0.0004095724555429436, "loss": 4.1589, "theoretical_loss": 3.890852332984242, "tokens_seen": 529530880 }, { "epoch": 0.19, "learning_rate": 0.00040938327657964437, "loss": 4.1626, "theoretical_loss": 3.890031694498337, "tokens_seen": 530579456 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.47788065671920776, "objective/train/docs_used": 310103, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.8699586391448975, "objective/train/original_loss": 3.8699588775634766, "objective/train/theoretical_loss": 3.889826859195108, "objective/train/tokens_used": 551301600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23559047281742096, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048985481262207, "objective/train/weighted_lm_loss": 4.060449600219727, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9523392915725708, "theoretical_loss": 3.889826859195108, "tokens_seen": 530841600 }, { "epoch": 0.19, "learning_rate": 0.00040919409761634506, "loss": 4.0974, "theoretical_loss": 3.8892131293146237, "tokens_seen": 531628032 }, { "epoch": 0.19, "learning_rate": 0.0004090049186530458, "loss": 3.9809, "theoretical_loss": 3.888396628121124, "tokens_seen": 532676608 }, { "epoch": 0.19, "learning_rate": 0.0004088157396897465, "loss": 4.0358, "theoretical_loss": 3.887582181665909, "tokens_seen": 533725184 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.4851924479007721, "objective/train/docs_used": 311341, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.86273193359375, "objective/train/original_loss": 3.862731695175171, "objective/train/theoretical_loss": 3.887277292104349, "objective/train/tokens_used": 554578400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24329021573066711, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497558116912842, "objective/train/weighted_lm_loss": 4.054152965545654, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9514743685722351, "theoretical_loss": 3.887277292104349, "tokens_seen": 534118400 }, { "epoch": 0.19, "learning_rate": 0.0004086265607264472, "loss": 4.0574, "theoretical_loss": 3.8867697807565937, "tokens_seen": 534773760 }, { "epoch": 0.19, "learning_rate": 0.00040843738176314794, "loss": 4.0136, "theoretical_loss": 3.8859594162598396, "tokens_seen": 535822336 }, { "epoch": 0.19, "learning_rate": 0.00040824820279984864, "loss": 4.1343, "theoretical_loss": 3.8851510791008588, "tokens_seen": 536870912 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.47740688920021057, "objective/train/docs_used": 313016, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9105546474456787, "objective/train/original_loss": 3.910554885864258, "objective/train/theoretical_loss": 3.884747667953053, "objective/train/tokens_used": 557855200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2320510745048523, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489201545715332, "objective/train/weighted_lm_loss": 4.102197170257568, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9539920091629028, "theoretical_loss": 3.884747667953053, "tokens_seen": 537395200 }, { "epoch": 0.19, "learning_rate": 0.0004080590238365494, "loss": 4.1085, "theoretical_loss": 3.8843447602629277, "tokens_seen": 537919488 }, { "epoch": 0.19, "learning_rate": 0.00040786984487325013, "loss": 4.0926, "theoretical_loss": 3.883540450786901, "tokens_seen": 538968064 }, { "epoch": 0.19, "learning_rate": 0.0004076806659099508, "loss": 4.0265, "theoretical_loss": 3.8827381417707327, "tokens_seen": 540016640 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.4817216992378235, "objective/train/docs_used": 314826, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.573957681655884, "objective/train/original_loss": 3.573957920074463, "objective/train/theoretical_loss": 3.8822377105614674, "objective/train/tokens_used": 561132000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2370116412639618, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493767261505127, "objective/train/weighted_lm_loss": 3.7496209144592285, "objective/train/weights_max": 1.0512157678604126, "objective/train/weights_min": 0.9515175819396973, "theoretical_loss": 3.8822377105614674, "tokens_seen": 540672000 }, { "epoch": 0.19, "learning_rate": 0.00040749148694665157, "loss": 4.0104, "theoretical_loss": 3.8819378243690044, "tokens_seen": 541065216 }, { "epoch": 0.19, "learning_rate": 0.00040730230798335227, "loss": 4.0048, "theoretical_loss": 3.881139489792454, "tokens_seen": 542113792 }, { "epoch": 0.19, "learning_rate": 0.00040711312902005296, "loss": 4.0798, "theoretical_loss": 3.880343129307512, "tokens_seen": 543162368 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.47069650888442993, "objective/train/docs_used": 316612, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.242428302764893, "objective/train/original_loss": 4.242428779602051, "objective/train/theoretical_loss": 3.8797471492187987, "objective/train/tokens_used": 564408800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22920586168766022, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0482345819473267, "objective/train/weighted_lm_loss": 4.447694301605225, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.962023138999939, "theoretical_loss": 3.8797471492187987, "tokens_seen": 543948800 }, { "epoch": 0.19, "learning_rate": 0.0004069239500567537, "loss": 4.1018, "theoretical_loss": 3.879548734235843, "tokens_seen": 544210944 }, { "epoch": 0.19, "learning_rate": 0.0004067347710934544, "loss": 4.0261, "theoretical_loss": 3.878756295953889, "tokens_seen": 545259520 }, { "epoch": 0.2, "learning_rate": 0.00040654559213015515, "loss": 4.1229, "theoretical_loss": 3.87796580589242, "tokens_seen": 546308096 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.4922097623348236, "objective/train/docs_used": 318614, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.817476749420166, "objective/train/original_loss": 3.817476511001587, "objective/train/theoretical_loss": 3.877275718542742, "objective/train/tokens_used": 567685600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2433522641658783, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504580736160278, "objective/train/weighted_lm_loss": 4.011016368865967, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9557735323905945, "theoretical_loss": 3.877275718542742, "tokens_seen": 547225600 }, { "epoch": 0.2, "learning_rate": 0.00040635641316685584, "loss": 4.0408, "theoretical_loss": 3.877177255536089, "tokens_seen": 547356672 }, { "epoch": 0.2, "learning_rate": 0.00040616723420355653, "loss": 4.0115, "theoretical_loss": 3.8763906364229888, "tokens_seen": 548405248 }, { "epoch": 0.2, "learning_rate": 0.0004059780552402573, "loss": 3.9969, "theoretical_loss": 3.875605940144217, "tokens_seen": 549453824 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.4792472720146179, "objective/train/docs_used": 320433, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.018838405609131, "objective/train/original_loss": 4.018837928771973, "objective/train/theoretical_loss": 3.8748231583434425, "objective/train/tokens_used": 570962400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24296848475933075, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491595268249512, "objective/train/weighted_lm_loss": 4.21675968170166, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9513953924179077, "theoretical_loss": 3.8748231583434425, "tokens_seen": 550502400 }, { "epoch": 0.2, "learning_rate": 0.000405788876276958, "loss": 3.9935, "theoretical_loss": 3.8748231583434425, "tokens_seen": 550502400 }, { "epoch": 0.2, "learning_rate": 0.0004055996973136588, "loss": 3.9662, "theoretical_loss": 3.8740422827164784, "tokens_seen": 551550976 }, { "epoch": 0.2, "learning_rate": 0.00040541051835035947, "loss": 4.0177, "theoretical_loss": 3.873263305010858, "tokens_seen": 552599552 }, { "epoch": 0.2, "learning_rate": 0.00040522133938706016, "loss": 4.0032, "theoretical_loss": 3.872486217025413, "tokens_seen": 553648128 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.46256640553474426, "objective/train/docs_used": 322451, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.843613386154175, "objective/train/original_loss": 3.843613624572754, "objective/train/theoretical_loss": 3.872389213491709, "objective/train/tokens_used": 574239200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23105382919311523, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0474309921264648, "objective/train/weighted_lm_loss": 4.021306037902832, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9536635279655457, "theoretical_loss": 3.872389213491709, "tokens_seen": 553779200 }, { "epoch": 0.2, "learning_rate": 0.0004050321604237609, "loss": 3.9792, "theoretical_loss": 3.8717110106098627, "tokens_seen": 554696704 }, { "epoch": 0.2, "learning_rate": 0.0004048429814604616, "loss": 4.0535, "theoretical_loss": 3.870937677664398, "tokens_seen": 555745280 }, { "epoch": 0.2, "learning_rate": 0.00040465380249716235, "loss": 4.0048, "theoretical_loss": 3.870166210139278, "tokens_seen": 556793856 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.4924386441707611, "objective/train/docs_used": 324068, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.743295431137085, "objective/train/original_loss": 3.743295431137085, "objective/train/theoretical_loss": 3.869973633791332, "objective/train/tokens_used": 577516000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24441003799438477, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504863262176514, "objective/train/weighted_lm_loss": 3.932000160217285, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9550192356109619, "theoretical_loss": 3.869973633791332, "tokens_seen": 557056000 }, { "epoch": 0.2, "learning_rate": 0.00040446462353386305, "loss": 4.0371, "theoretical_loss": 3.8693966000344253, "tokens_seen": 557842432 }, { "epoch": 0.2, "learning_rate": 0.00040427544457056374, "loss": 4.0484, "theoretical_loss": 3.868628839399026, "tokens_seen": 558891008 }, { "epoch": 0.2, "learning_rate": 0.0004040862656072645, "loss": 4.0689, "theoretical_loss": 3.8678629203311368, "tokens_seen": 559939584 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.48858821392059326, "objective/train/docs_used": 325860, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9442625045776367, "objective/train/original_loss": 3.9442625045776367, "objective/train/theoretical_loss": 3.8675761738553596, "objective/train/tokens_used": 580792800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2418859302997589, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500881671905518, "objective/train/weighted_lm_loss": 4.1417741775512695, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9529458284378052, "theoretical_loss": 3.8675761738553596, "tokens_seen": 560332800 }, { "epoch": 0.2, "learning_rate": 0.0004038970866439652, "loss": 4.0605, "theoretical_loss": 3.8670988349772912, "tokens_seen": 560988160 }, { "epoch": 0.2, "learning_rate": 0.0004037079076806659, "loss": 3.9893, "theoretical_loss": 3.8663365755321157, "tokens_seen": 562036736 }, { "epoch": 0.2, "learning_rate": 0.0004035187287173666, "loss": 4.0399, "theoretical_loss": 3.865576134237943, "tokens_seen": 563085312 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.4872007668018341, "objective/train/docs_used": 327873, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9615707397460938, "objective/train/original_loss": 3.9615705013275146, "objective/train/theoretical_loss": 3.8651965929861625, "objective/train/tokens_used": 584069600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23878945410251617, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499337911605835, "objective/train/weighted_lm_loss": 4.16065788269043, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.954364538192749, "theoretical_loss": 3.8651965929861625, "tokens_seen": 563609600 }, { "epoch": 0.2, "learning_rate": 0.00040332954975406737, "loss": 3.9192, "theoretical_loss": 3.8648175033844323, "tokens_seen": 564133888 }, { "epoch": 0.2, "learning_rate": 0.0004031403707907681, "loss": 3.9902, "theoretical_loss": 3.8640606753081954, "tokens_seen": 565182464 }, { "epoch": 0.2, "learning_rate": 0.0004029511918274688, "loss": 4.0045, "theoretical_loss": 3.8633056423924232, "tokens_seen": 566231040 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.4880378842353821, "objective/train/docs_used": 329813, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.8092806339263916, "objective/train/original_loss": 3.8092806339263916, "objective/train/theoretical_loss": 3.8628346550591868, "objective/train/tokens_used": 587346400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24268953502178192, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500373840332031, "objective/train/weighted_lm_loss": 3.999559164047241, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9522239565849304, "theoretical_loss": 3.8628346550591868, "tokens_seen": 566886400 }, { "epoch": 0.2, "learning_rate": 0.0004027620128641695, "loss": 3.9948, "theoretical_loss": 3.8625523970665174, "tokens_seen": 567279616 }, { "epoch": 0.2, "learning_rate": 0.00040257283390087025, "loss": 3.9634, "theoretical_loss": 3.8618009318057234, "tokens_seen": 568328192 }, { "epoch": 0.2, "learning_rate": 0.00040238365493757095, "loss": 3.984, "theoretical_loss": 3.861051239130771, "tokens_seen": 569376768 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.4942159652709961, "objective/train/docs_used": 331022, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.885915756225586, "objective/train/original_loss": 3.885915517807007, "objective/train/theoretical_loss": 3.8604901284102264, "objective/train/tokens_used": 590623200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24478492140769958, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506659746170044, "objective/train/weighted_lm_loss": 4.08283805847168, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 1.01108717918396, "theoretical_loss": 3.8604901284102264, "tokens_seen": 570163200 }, { "epoch": 0.2, "learning_rate": 0.0004021944759742717, "loss": 3.956, "theoretical_loss": 3.860303311607516, "tokens_seen": 570425344 }, { "epoch": 0.2, "learning_rate": 0.0004020052970109724, "loss": 3.9688, "theoretical_loss": 3.859557141846584, "tokens_seen": 571473920 }, { "epoch": 0.2, "learning_rate": 0.0004018161180476731, "loss": 3.9664, "theoretical_loss": 3.858812722503022, "tokens_seen": 572522496 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.48668184876441956, "objective/train/docs_used": 332875, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9940710067749023, "objective/train/original_loss": 3.994070529937744, "objective/train/theoretical_loss": 3.8581627857261136, "objective/train/tokens_used": 593900000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23973610997200012, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498868227005005, "objective/train/weighted_lm_loss": 4.192169189453125, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9601668119430542, "theoretical_loss": 3.8581627857261136, "tokens_seen": 573440000 }, { "epoch": 0.2, "learning_rate": 0.00040162693908437383, "loss": 3.9147, "theoretical_loss": 3.8580700462759463, "tokens_seen": 573571072 }, { "epoch": 0.21, "learning_rate": 0.0004014377601210745, "loss": 3.8744, "theoretical_loss": 3.857329105908203, "tokens_seen": 574619648 }, { "epoch": 0.21, "learning_rate": 0.0004012485811577752, "loss": 3.8891, "theoretical_loss": 3.8565898941860244, "tokens_seen": 575668224 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.48827463388442993, "objective/train/docs_used": 334850, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.9289157390594482, "objective/train/original_loss": 3.928915500640869, "objective/train/theoretical_loss": 3.855852403938689, "objective/train/tokens_used": 597176800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24086636304855347, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500519275665283, "objective/train/weighted_lm_loss": 4.12490701675415, "objective/train/weights_max": 1.0512205362319946, "objective/train/weights_min": 0.9660784602165222, "theoretical_loss": 3.855852403938689, "tokens_seen": 576716800 }, { "epoch": 0.21, "learning_rate": 0.000401059402194476, "loss": 3.9544, "theoretical_loss": 3.855852403938689, "tokens_seen": 576716800 }, { "epoch": 0.21, "learning_rate": 0.0004008702232311767, "loss": 3.8938, "theoretical_loss": 3.8551166280381928, "tokens_seen": 577765376 }, { "epoch": 0.21, "learning_rate": 0.00040068104426787746, "loss": 3.9262, "theoretical_loss": 3.854382559398911, "tokens_seen": 578813952 }, { "epoch": 0.21, "learning_rate": 0.00040049186530457815, "loss": 3.9064, "theoretical_loss": 3.8536501909772745, "tokens_seen": 579862528 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4799477756023407, "objective/train/docs_used": 336483, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.157769680023193, "objective/train/original_loss": 4.157769203186035, "objective/train/theoretical_loss": 3.8535587641219466, "objective/train/tokens_used": 600453600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23627838492393494, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049195647239685, "objective/train/weighted_lm_loss": 4.361729621887207, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9601474404335022, "theoretical_loss": 3.8535587641219466, "tokens_seen": 579993600 }, { "epoch": 0.21, "learning_rate": 0.00040030268634127884, "loss": 3.9204, "theoretical_loss": 3.852919515771444, "tokens_seen": 580911104 }, { "epoch": 0.21, "learning_rate": 0.0004001135073779796, "loss": 3.8705, "theoretical_loss": 3.8521905268209857, "tokens_seen": 581959680 }, { "epoch": 0.21, "learning_rate": 0.0003999243284146803, "loss": 3.8418, "theoretical_loss": 3.851463217206555, "tokens_seen": 583008256 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4861145317554474, "objective/train/docs_used": 338617, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5503835678100586, "objective/train/original_loss": 3.5503835678100586, "objective/train/theoretical_loss": 3.8512816513922274, "objective/train/tokens_used": 603730400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23959481716156006, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498292446136475, "objective/train/weighted_lm_loss": 3.7280282974243164, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9553146362304688, "theoretical_loss": 3.8512816513922274, "tokens_seen": 583270400 }, { "epoch": 0.21, "learning_rate": 0.00039973514945138103, "loss": 3.9243, "theoretical_loss": 3.85073758004958, "tokens_seen": 584056832 }, { "epoch": 0.21, "learning_rate": 0.0003995459704880817, "loss": 3.8881, "theoretical_loss": 3.850013608511947, "tokens_seen": 585105408 }, { "epoch": 0.21, "learning_rate": 0.0003993567915247824, "loss": 3.8762, "theoretical_loss": 3.8492912957956933, "tokens_seen": 586153984 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.49152064323425293, "objective/train/docs_used": 340897, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.6887147426605225, "objective/train/original_loss": 3.6887147426605225, "objective/train/theoretical_loss": 3.849020854811377, "objective/train/tokens_used": 607007200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24309617280960083, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503878593444824, "objective/train/weighted_lm_loss": 3.874969959259033, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.955743134021759, "theoretical_loss": 3.849020854811377, "tokens_seen": 586547200 }, { "epoch": 0.21, "learning_rate": 0.00039916761256148317, "loss": 3.8016, "theoretical_loss": 3.848570635142696, "tokens_seen": 587202560 }, { "epoch": 0.21, "learning_rate": 0.00039897843359818386, "loss": 3.868, "theoretical_loss": 3.8478516198343717, "tokens_seen": 588251136 }, { "epoch": 0.21, "learning_rate": 0.0003987892546348846, "loss": 3.8427, "theoretical_loss": 3.847134243191375, "tokens_seen": 589299712 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4828517436981201, "objective/train/docs_used": 342928, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4539072513580322, "objective/train/original_loss": 3.4539074897766113, "objective/train/theoretical_loss": 3.8467761672927336, "objective/train/tokens_used": 610284000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23826996982097626, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494961738586426, "objective/train/weighted_lm_loss": 3.62439227104187, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9543166756629944, "theoretical_loss": 3.8467761672927336, "tokens_seen": 589824000 }, { "epoch": 0.21, "learning_rate": 0.00039860007567158536, "loss": 3.8673, "theoretical_loss": 3.8464184985732968, "tokens_seen": 590348288 }, { "epoch": 0.21, "learning_rate": 0.00039841089670828605, "loss": 3.8051, "theoretical_loss": 3.845704379378372, "tokens_seen": 591396864 }, { "epoch": 0.21, "learning_rate": 0.0003982217177449868, "loss": 3.8316, "theoretical_loss": 3.8449918790431843, "tokens_seen": 592445440 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4931422472000122, "objective/train/docs_used": 344591, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7263171672821045, "objective/train/original_loss": 3.7263169288635254, "objective/train/theoretical_loss": 3.844547385509876, "objective/train/tokens_used": 613560800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24458102881908417, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505576133728027, "objective/train/weighted_lm_loss": 3.9147326946258545, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.958858847618103, "theoretical_loss": 3.844547385509876, "tokens_seen": 593100800 }, { "epoch": 0.21, "learning_rate": 0.0003980325387816875, "loss": 3.7537, "theoretical_loss": 3.8442809910423783, "tokens_seen": 593494016 }, { "epoch": 0.21, "learning_rate": 0.0003978433598183882, "loss": 3.8097, "theoretical_loss": 3.8435717088883696, "tokens_seen": 594542592 }, { "epoch": 0.21, "learning_rate": 0.00039765418085508893, "loss": 3.8132, "theoretical_loss": 3.842864026131061, "tokens_seen": 595591168 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4828213155269623, "objective/train/docs_used": 346610, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7849457263946533, "objective/train/original_loss": 3.784945487976074, "objective/train/theoretical_loss": 3.8423343098080185, "objective/train/tokens_used": 616837600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23697614669799805, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494866371154785, "objective/train/weighted_lm_loss": 3.9723143577575684, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9641447067260742, "theoretical_loss": 3.8423343098080185, "tokens_seen": 596377600 }, { "epoch": 0.21, "learning_rate": 0.0003974650018917896, "loss": 3.8631, "theoretical_loss": 3.8421579363575615, "tokens_seen": 596639744 }, { "epoch": 0.21, "learning_rate": 0.0003972758229284904, "loss": 3.8146, "theoretical_loss": 3.841453433191904, "tokens_seen": 597688320 }, { "epoch": 0.21, "learning_rate": 0.00039708664396519107, "loss": 3.6873, "theoretical_loss": 3.8407505102947725, "tokens_seen": 598736896 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4851977527141571, "objective/train/docs_used": 348552, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.527412176132202, "objective/train/original_loss": 3.527411937713623, "objective/train/theoretical_loss": 3.8401367441179683, "objective/train/tokens_used": 620114400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23921814560890198, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04973566532135, "objective/train/weighted_lm_loss": 3.7025134563446045, "objective/train/weights_max": 1.0512197017669678, "objective/train/weights_min": 0.9565510153770447, "theoretical_loss": 3.8401367441179683, "tokens_seen": 599654400 }, { "epoch": 0.21, "learning_rate": 0.00039689746500189176, "loss": 3.8363, "theoretical_loss": 3.840049161363223, "tokens_seen": 599785472 }, { "epoch": 0.21, "learning_rate": 0.0003967082860385925, "loss": 3.8554, "theoretical_loss": 3.839349380130415, "tokens_seen": 600834048 }, { "epoch": 0.21, "learning_rate": 0.00039651910707529326, "loss": 3.8245, "theoretical_loss": 3.838651160365341, "tokens_seen": 601882624 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.4847736060619354, "objective/train/docs_used": 349996, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.6133832931518555, "objective/train/original_loss": 3.6133837699890137, "objective/train/theoretical_loss": 3.837954495872559, "objective/train/tokens_used": 623391200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24165962636470795, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497056245803833, "objective/train/weighted_lm_loss": 3.7934508323669434, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9520853757858276, "theoretical_loss": 3.837954495872559, "tokens_seen": 602931200 }, { "epoch": 0.22, "learning_rate": 0.00039632992811199395, "loss": 3.8619, "theoretical_loss": 3.837954495872559, "tokens_seen": 602931200 }, { "epoch": 0.22, "learning_rate": 0.0003961407491486947, "loss": 3.7791, "theoretical_loss": 3.837259380491929, "tokens_seen": 603979776 }, { "epoch": 0.22, "learning_rate": 0.0003959515701853954, "loss": 3.8418, "theoretical_loss": 3.836565808098351, "tokens_seen": 605028352 }, { "epoch": 0.22, "learning_rate": 0.00039576239122209614, "loss": 3.7307, "theoretical_loss": 3.835873772601505, "tokens_seen": 606076928 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.4784153997898102, "objective/train/docs_used": 351917, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.869309663772583, "objective/train/original_loss": 3.869309902191162, "objective/train/theoretical_loss": 3.8357873759254693, "objective/train/tokens_used": 626668000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23981763422489166, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490604639053345, "objective/train/weighted_lm_loss": 4.058266639709473, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9513825178146362, "theoretical_loss": 3.8357873759254693, "tokens_seen": 606208000 }, { "epoch": 0.22, "learning_rate": 0.00039557321225879683, "loss": 3.8473, "theoretical_loss": 3.8351832679455935, "tokens_seen": 607125504 }, { "epoch": 0.22, "learning_rate": 0.0003953840332954975, "loss": 3.8616, "theoretical_loss": 3.834494288109086, "tokens_seen": 608174080 }, { "epoch": 0.22, "learning_rate": 0.00039519485433219827, "loss": 3.7887, "theoretical_loss": 3.8338068271044703, "tokens_seen": 609222656 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.4771646559238434, "objective/train/docs_used": 353838, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.676715850830078, "objective/train/original_loss": 3.6767160892486572, "objective/train/theoretical_loss": 3.833635198472356, "objective/train/tokens_used": 629944800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24118247628211975, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489422082901, "objective/train/weighted_lm_loss": 3.8588476181030273, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9538320899009705, "theoretical_loss": 3.833635198472356, "tokens_seen": 609484800 }, { "epoch": 0.22, "learning_rate": 0.00039500567536889897, "loss": 3.734, "theoretical_loss": 3.8331208789779954, "tokens_seen": 610271232 }, { "epoch": 0.22, "learning_rate": 0.0003948164964055997, "loss": 3.7623, "theoretical_loss": 3.83243643780943, "tokens_seen": 611319808 }, { "epoch": 0.22, "learning_rate": 0.0003946273174423004, "loss": 3.7609, "theoretical_loss": 3.8317534977118117, "tokens_seen": 612368384 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.48455891013145447, "objective/train/docs_used": 355739, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.41206693649292, "objective/train/original_loss": 3.4120664596557617, "objective/train/theoretical_loss": 3.831497780974214, "objective/train/tokens_used": 633221600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24124765396118164, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496820211410522, "objective/train/weighted_lm_loss": 3.581446647644043, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9515277147293091, "theoretical_loss": 3.831497780974214, "tokens_seen": 612761600 }, { "epoch": 0.22, "learning_rate": 0.0003944381384790011, "loss": 3.712, "theoretical_loss": 3.8310720528312077, "tokens_seen": 613416960 }, { "epoch": 0.22, "learning_rate": 0.0003942489595157019, "loss": 3.8369, "theoretical_loss": 3.830392097346471, "tokens_seen": 614465536 }, { "epoch": 0.22, "learning_rate": 0.0003940597805524026, "loss": 3.783, "theoretical_loss": 3.8297136254690005, "tokens_seen": 615514112 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.4844302237033844, "objective/train/docs_used": 357525, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7821826934814453, "objective/train/original_loss": 3.782182216644287, "objective/train/theoretical_loss": 3.829374944082894, "objective/train/tokens_used": 636498400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2419605404138565, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496729612350464, "objective/train/weighted_lm_loss": 3.9696102142333984, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9518074989318848, "theoretical_loss": 3.829374944082894, "tokens_seen": 616038400 }, { "epoch": 0.22, "learning_rate": 0.00039387060158910334, "loss": 3.7717, "theoretical_loss": 3.829036631442506, "tokens_seen": 616562688 }, { "epoch": 0.22, "learning_rate": 0.00039368142262580404, "loss": 3.8207, "theoretical_loss": 3.8283611095427723, "tokens_seen": 617611264 }, { "epoch": 0.22, "learning_rate": 0.00039349224366250473, "loss": 3.8517, "theoretical_loss": 3.827687054077426, "tokens_seen": 618659840 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.48121803998947144, "objective/train/docs_used": 359292, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7164201736450195, "objective/train/original_loss": 3.7164201736450195, "objective/train/theoretical_loss": 3.8272665115687077, "objective/train/tokens_used": 639775200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23925188183784485, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493377447128296, "objective/train/weighted_lm_loss": 3.8987090587615967, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9516848921775818, "theoretical_loss": 3.8272665115687077, "tokens_seen": 619315200 }, { "epoch": 0.22, "learning_rate": 0.0003933030646992055, "loss": 3.7766, "theoretical_loss": 3.8270144593857056, "tokens_seen": 619708416 }, { "epoch": 0.22, "learning_rate": 0.00039311388573590617, "loss": 3.8282, "theoretical_loss": 3.8263433198382324, "tokens_seen": 620756992 }, { "epoch": 0.22, "learning_rate": 0.00039292470677260686, "loss": 3.8165, "theoretical_loss": 3.825673629836783, "tokens_seen": 621805568 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.4897088408470154, "objective/train/docs_used": 360813, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4187488555908203, "objective/train/original_loss": 3.4187488555908203, "objective/train/theoretical_loss": 3.8251723102500437, "objective/train/tokens_used": 643052000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24293170869350433, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502057075500488, "objective/train/weighted_lm_loss": 3.590527057647705, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9520784020423889, "theoretical_loss": 3.8251723102500437, "tokens_seen": 622592000 }, { "epoch": 0.22, "learning_rate": 0.0003927355278093076, "loss": 3.8059, "theoretical_loss": 3.8250053838140663, "tokens_seen": 622854144 }, { "epoch": 0.22, "learning_rate": 0.0003925463488460083, "loss": 3.8233, "theoretical_loss": 3.8243385762335, "tokens_seen": 623902720 }, { "epoch": 0.22, "learning_rate": 0.00039235716988270905, "loss": 3.7707, "theoretical_loss": 3.8236732015889903, "tokens_seen": 624951296 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.4751550257205963, "objective/train/docs_used": 362724, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.419285535812378, "objective/train/original_loss": 3.419285774230957, "objective/train/theoretical_loss": 3.823092169924938, "objective/train/tokens_used": 646328800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2330601066350937, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048699975013733, "objective/train/weighted_lm_loss": 3.5884759426116943, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9552221894264221, "theoretical_loss": 3.823092169924938, "tokens_seen": 625868800 }, { "epoch": 0.22, "learning_rate": 0.00039216799091940975, "loss": 3.8137, "theoretical_loss": 3.8230092544047123, "tokens_seen": 625999872 }, { "epoch": 0.22, "learning_rate": 0.0003919788119561105, "loss": 3.8197, "theoretical_loss": 3.8223467292348943, "tokens_seen": 627048448 }, { "epoch": 0.22, "learning_rate": 0.00039178963299281124, "loss": 3.8388, "theoretical_loss": 3.8216856206636014, "tokens_seen": 628097024 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.47925102710723877, "objective/train/docs_used": 364484, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.616079568862915, "objective/train/original_loss": 3.6160800457000732, "objective/train/theoretical_loss": 3.8210259233045254, "objective/train/tokens_used": 649605600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23632030189037323, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049126148223877, "objective/train/weighted_lm_loss": 3.7934699058532715, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.956553041934967, "theoretical_loss": 3.8210259233045254, "tokens_seen": 629145600 }, { "epoch": 0.22, "learning_rate": 0.00039160045402951194, "loss": 3.8423, "theoretical_loss": 3.8210259233045254, "tokens_seen": 629145600 }, { "epoch": 0.23, "learning_rate": 0.0003914112750662127, "loss": 3.7631, "theoretical_loss": 3.8203676318007704, "tokens_seen": 630194176 }, { "epoch": 0.23, "learning_rate": 0.0003912220961029134, "loss": 3.7999, "theoretical_loss": 3.819710740824646, "tokens_seen": 631242752 }, { "epoch": 0.23, "learning_rate": 0.00039103291713961407, "loss": 3.7643, "theoretical_loss": 3.8190552450774584, "tokens_seen": 632291328 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.49162372946739197, "objective/train/docs_used": 365542, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.556671619415283, "objective/train/original_loss": 3.556671142578125, "objective/train/theoretical_loss": 3.8189734059483165, "objective/train/tokens_used": 652882400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2439402937889099, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504025220870972, "objective/train/weighted_lm_loss": 3.735517740249634, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9563419818878174, "theoretical_loss": 3.8189734059483165, "tokens_seen": 632422400 }, { "epoch": 0.23, "learning_rate": 0.0003908437381763148, "loss": 3.7508, "theoretical_loss": 3.818401139289306, "tokens_seen": 633339904 }, { "epoch": 0.23, "learning_rate": 0.0003906545592130155, "loss": 3.7896, "theoretical_loss": 3.8177484182188737, "tokens_seen": 634388480 }, { "epoch": 0.23, "learning_rate": 0.0003904653802497162, "loss": 3.7664, "theoretical_loss": 3.8170970766532326, "tokens_seen": 635437056 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.4842968285083771, "objective/train/docs_used": 367606, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4450738430023193, "objective/train/original_loss": 3.4450740814208984, "objective/train/theoretical_loss": 3.816934456201243, "objective/train/tokens_used": 656159200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23944607377052307, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496467351913452, "objective/train/weighted_lm_loss": 3.6150896549224854, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9515578150749207, "theoretical_loss": 3.816934456201243, "tokens_seen": 635699200 }, { "epoch": 0.23, "learning_rate": 0.00039027620128641695, "loss": 3.7711, "theoretical_loss": 3.816447109407641, "tokens_seen": 636485632 }, { "epoch": 0.23, "learning_rate": 0.00039008702232311765, "loss": 3.7285, "theoretical_loss": 3.815798511325341, "tokens_seen": 637534208 }, { "epoch": 0.23, "learning_rate": 0.0003898978433598184, "loss": 3.7098, "theoretical_loss": 3.8151512772773675, "tokens_seen": 638582784 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.4804910719394684, "objective/train/docs_used": 369550, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.8657212257385254, "objective/train/original_loss": 3.8657212257385254, "objective/train/theoretical_loss": 3.8149089151324036, "objective/train/tokens_used": 659436000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2408989667892456, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492734909057617, "objective/train/weighted_lm_loss": 4.05525541305542, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9521989822387695, "theoretical_loss": 3.8149089151324036, "tokens_seen": 638976000 }, { "epoch": 0.23, "learning_rate": 0.00038970866439651914, "loss": 3.7187, "theoretical_loss": 3.814505402162349, "tokens_seen": 639631360 }, { "epoch": 0.23, "learning_rate": 0.00038951948543321983, "loss": 3.7472, "theoretical_loss": 3.813860880906316, "tokens_seen": 640679936 }, { "epoch": 0.23, "learning_rate": 0.0003893303064699206, "loss": 3.6962, "theoretical_loss": 3.813217708462508, "tokens_seen": 641728512 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.4754175543785095, "objective/train/docs_used": 371294, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.896268606185913, "objective/train/original_loss": 3.896268606185913, "objective/train/theoretical_loss": 3.81289662647547, "objective/train/tokens_used": 662712800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23610548675060272, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487416982650757, "objective/train/weighted_lm_loss": 4.083958625793457, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9514708518981934, "theoretical_loss": 3.81289662647547, "tokens_seen": 642252800 }, { "epoch": 0.23, "learning_rate": 0.0003891411275066213, "loss": 3.7512, "theoretical_loss": 3.8125758798111864, "tokens_seen": 642777088 }, { "epoch": 0.23, "learning_rate": 0.000388951948543322, "loss": 3.7466, "theoretical_loss": 3.8119353899594413, "tokens_seen": 643825664 }, { "epoch": 0.23, "learning_rate": 0.0003887627695800227, "loss": 3.7158, "theoretical_loss": 3.8112962339410092, "tokens_seen": 644874240 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.48057374358177185, "objective/train/docs_used": 373458, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5815200805664062, "objective/train/original_loss": 3.5815200805664062, "objective/train/theoretical_loss": 3.8108974365706887, "objective/train/tokens_used": 665989600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23741313815116882, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492641925811768, "objective/train/weighted_lm_loss": 3.756553888320923, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.96083664894104, "theoretical_loss": 3.8108974365706887, "tokens_seen": 645529600 }, { "epoch": 0.23, "learning_rate": 0.0003885735906167234, "loss": 3.761, "theoretical_loss": 3.810658406816085, "tokens_seen": 645922816 }, { "epoch": 0.23, "learning_rate": 0.00038838441165342416, "loss": 3.7984, "theoretical_loss": 3.8100219036711396, "tokens_seen": 646971392 }, { "epoch": 0.23, "learning_rate": 0.00038819523269012485, "loss": 3.6906, "theoretical_loss": 3.809386719618737, "tokens_seen": 648019968 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.48978403210639954, "objective/train/docs_used": 375609, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.641045331954956, "objective/train/original_loss": 3.641045331954956, "objective/train/theoretical_loss": 3.808911194308436, "objective/train/tokens_used": 669266400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24249985814094543, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502111911773682, "objective/train/weighted_lm_loss": 3.823545217514038, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9532389640808105, "theoretical_loss": 3.808911194308436, "tokens_seen": 648806400 }, { "epoch": 0.23, "learning_rate": 0.00038800605372682554, "loss": 3.6938, "theoretical_loss": 3.808752849797353, "tokens_seen": 649068544 }, { "epoch": 0.23, "learning_rate": 0.0003878168747635263, "loss": 3.7689, "theoretical_loss": 3.8081202893712005, "tokens_seen": 650117120 }, { "epoch": 0.23, "learning_rate": 0.000387627695800227, "loss": 3.7645, "theoretical_loss": 3.807489033530046, "tokens_seen": 651165696 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.4892757833003998, "objective/train/docs_used": 377261, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.345855712890625, "objective/train/original_loss": 3.345855712890625, "objective/train/theoretical_loss": 3.806937751074268, "objective/train/tokens_used": 672543200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24105043709278107, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501528978347778, "objective/train/weighted_lm_loss": 3.5135161876678467, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 1.0038224458694458, "theoretical_loss": 3.806937751074268, "tokens_seen": 652083200 }, { "epoch": 0.23, "learning_rate": 0.0003874385168369278, "loss": 3.7125, "theoretical_loss": 3.806859077489038, "tokens_seen": 652214272 }, { "epoch": 0.23, "learning_rate": 0.0003872493378736285, "loss": 3.6562, "theoretical_loss": 3.806230416488531, "tokens_seen": 653262848 }, { "epoch": 0.23, "learning_rate": 0.0003870601589103292, "loss": 3.7719, "theoretical_loss": 3.8056030457939114, "tokens_seen": 654311424 }, { "debugging/Self-BLEU-5": 0.5265375629586004, "debugging/distinct-1-grams": 0.7435820408094715, "debugging/distinct-2-grams": 0.9558103821233092, "debugging/entropy-1-grams": 5.931434510687563, "debugging/entropy-2-grams": 6.886416755326388, "debugging/length": 521.9230769230769, "debugging/num_segments": 13, "debugging/raw_token_scores_avg": 0.022742915898561478, "debugging/raw_token_scores_std": 0.07841178774833679, "epoch": 0.23, "objective/train/advantage_avg": 0.4772515594959259, "objective/train/docs_used": 379091, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.803166627883911, "objective/train/original_loss": 3.803165912628174, "objective/train/theoretical_loss": 3.804976960695429, "objective/train/tokens_used": 675820000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23392102122306824, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489141941070557, "objective/train/weighted_lm_loss": 3.988487958908081, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9569684863090515, "theoretical_loss": 3.804976960695429, "tokens_seen": 655360000 }, { "epoch": 0.23, "learning_rate": 0.0003868709799470299, "loss": 3.7882, "theoretical_loss": 3.804976960695429, "tokens_seen": 655360000 }, { "epoch": 0.23, "learning_rate": 0.0003866818009837306, "loss": 3.7545, "theoretical_loss": 3.8043521565080236, "tokens_seen": 656408576 }, { "epoch": 0.23, "learning_rate": 0.00038649262202043136, "loss": 3.8025, "theoretical_loss": 3.803728628571159, "tokens_seen": 657457152 }, { "epoch": 0.24, "learning_rate": 0.00038630344305713206, "loss": 3.6804, "theoretical_loss": 3.803106372248654, "tokens_seen": 658505728 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.4905237853527069, "objective/train/docs_used": 380902, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.74001145362854, "objective/train/original_loss": 3.740011692047119, "objective/train/theoretical_loss": 3.8030286793887647, "objective/train/tokens_used": 679096800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24173638224601746, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502811670303345, "objective/train/weighted_lm_loss": 3.927738666534424, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.984406590461731, "theoretical_loss": 3.8030286793887647, "tokens_seen": 658636800 }, { "epoch": 0.24, "learning_rate": 0.00038611426409383275, "loss": 3.7401, "theoretical_loss": 3.8024853829285172, "tokens_seen": 659554304 }, { "epoch": 0.24, "learning_rate": 0.0003859250851305335, "loss": 3.755, "theoretical_loss": 3.801865656022783, "tokens_seen": 660602880 }, { "epoch": 0.24, "learning_rate": 0.0003857359061672342, "loss": 3.731, "theoretical_loss": 3.801247186967348, "tokens_seen": 661651456 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.4816935658454895, "objective/train/docs_used": 382599, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.499560832977295, "objective/train/original_loss": 3.499560832977295, "objective/train/theoretical_loss": 3.8010927657100013, "objective/train/tokens_used": 682373600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24318966269493103, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494053363800049, "objective/train/weighted_lm_loss": 3.671086072921753, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9515833854675293, "theoretical_loss": 3.8010927657100013, "tokens_seen": 661913600 }, { "epoch": 0.24, "learning_rate": 0.0003855467272039349, "loss": 3.7489, "theoretical_loss": 3.8006299712218086, "tokens_seen": 662700032 }, { "epoch": 0.24, "learning_rate": 0.00038535754824063563, "loss": 3.8024, "theoretical_loss": 3.8000140042693022, "tokens_seen": 663748608 }, { "epoch": 0.24, "learning_rate": 0.0003851683692773364, "loss": 3.7713, "theoretical_loss": 3.799399281616348, "tokens_seen": 664797184 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.4905191957950592, "objective/train/docs_used": 384593, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 4.0451765060424805, "objective/train/original_loss": 4.0451765060424805, "objective/train/theoretical_loss": 3.7991690805043445, "objective/train/tokens_used": 685650400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24210280179977417, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050282597541809, "objective/train/weighted_lm_loss": 4.24802303314209, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9715428352355957, "theoretical_loss": 3.7991690805043445, "tokens_seen": 665190400 }, { "epoch": 0.24, "learning_rate": 0.00038497919031403713, "loss": 3.7883, "theoretical_loss": 3.798785798792688, "tokens_seen": 665845760 }, { "epoch": 0.24, "learning_rate": 0.0003847900113507378, "loss": 3.7983, "theoretical_loss": 3.798173551351132, "tokens_seen": 666894336 }, { "epoch": 0.24, "learning_rate": 0.0003846008323874385, "loss": 3.7106, "theoretical_loss": 3.797562534867401, "tokens_seen": 667942912 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.47869381308555603, "objective/train/docs_used": 386337, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.6440792083740234, "objective/train/original_loss": 3.6440794467926025, "objective/train/theoretical_loss": 3.797257486858361, "objective/train/tokens_used": 688927200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2353726178407669, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490657091140747, "objective/train/weighted_lm_loss": 3.8236608505249023, "objective/train/weights_max": 1.0512152910232544, "objective/train/weights_min": 0.9515171647071838, "theoretical_loss": 3.797257486858361, "tokens_seen": 668467200 }, { "epoch": 0.24, "learning_rate": 0.00038441165342413926, "loss": 3.7644, "theoretical_loss": 3.796952744939976, "tokens_seen": 668991488 }, { "epoch": 0.24, "learning_rate": 0.00038422247446083996, "loss": 3.7476, "theoretical_loss": 3.7963441771899418, "tokens_seen": 670040064 }, { "epoch": 0.24, "learning_rate": 0.0003840332954975407, "loss": 3.7805, "theoretical_loss": 3.795736827260839, "tokens_seen": 671088640 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.4898402690887451, "objective/train/docs_used": 388157, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.500314474105835, "objective/train/original_loss": 3.500314235687256, "objective/train/theoretical_loss": 3.795357850053097, "objective/train/tokens_used": 692204000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24193520843982697, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502138137817383, "objective/train/weighted_lm_loss": 3.6766164302825928, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9680190682411194, "theoretical_loss": 3.795357850053097, "tokens_seen": 671744000 }, { "epoch": 0.24, "learning_rate": 0.0003838441165342414, "loss": 3.7275, "theoretical_loss": 3.795130690818514, "tokens_seen": 672137216 }, { "epoch": 0.24, "learning_rate": 0.0003836549375709421, "loss": 3.6728, "theoretical_loss": 3.7945257635509657, "tokens_seen": 673185792 }, { "epoch": 0.24, "learning_rate": 0.00038346575860764284, "loss": 3.6878, "theoretical_loss": 3.793922041168204, "tokens_seen": 674234368 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.48224732279777527, "objective/train/docs_used": 390150, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4588170051574707, "objective/train/original_loss": 3.4588167667388916, "objective/train/theoretical_loss": 3.79347003751841, "objective/train/tokens_used": 695480800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23753906786441803, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494320392608643, "objective/train/weighted_lm_loss": 3.6302719116210938, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9518483877182007, "theoretical_loss": 3.79347003751841, "tokens_seen": 675020800 }, { "epoch": 0.24, "learning_rate": 0.00038327657964434353, "loss": 3.6997, "theoretical_loss": 3.7933195194020994, "tokens_seen": 675282944 }, { "epoch": 0.24, "learning_rate": 0.0003830874006810442, "loss": 3.7589, "theoretical_loss": 3.7927181940062407, "tokens_seen": 676331520 }, { "epoch": 0.24, "learning_rate": 0.000382898221717745, "loss": 3.6792, "theoretical_loss": 3.792118060755787, "tokens_seen": 677380096 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.4679737389087677, "objective/train/docs_used": 392039, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2993521690368652, "objective/train/original_loss": 3.299351930618286, "objective/train/theoretical_loss": 3.7915939187884558, "objective/train/tokens_used": 698757600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23201338946819305, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0479761362075806, "objective/train/weighted_lm_loss": 3.4587182998657227, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9521719217300415, "theoretical_loss": 3.7915939187884558, "tokens_seen": 678297600 }, { "epoch": 0.24, "learning_rate": 0.0003827090427544457, "loss": 3.7225, "theoretical_loss": 3.7915191154473287, "tokens_seen": 678428672 }, { "epoch": 0.24, "learning_rate": 0.00038251986379114647, "loss": 3.668, "theoretical_loss": 3.790921353898745, "tokens_seen": 679477248 }, { "epoch": 0.24, "learning_rate": 0.00038233068482784716, "loss": 3.6388, "theoretical_loss": 3.790324771949063, "tokens_seen": 680525824 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.4891808032989502, "objective/train/docs_used": 393961, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5404582023620605, "objective/train/original_loss": 3.5404579639434814, "objective/train/theoretical_loss": 3.7897293654583164, "objective/train/tokens_used": 702034400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24321125447750092, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501543283462524, "objective/train/weighted_lm_loss": 3.718618869781494, "objective/train/weights_max": 1.0512158870697021, "objective/train/weights_min": 0.9517624974250793, "theoretical_loss": 3.7897293654583164, "tokens_seen": 681574400 }, { "epoch": 0.24, "learning_rate": 0.00038214150586454785, "loss": 3.6814, "theoretical_loss": 3.7897293654583164, "tokens_seen": 681574400 }, { "epoch": 0.24, "learning_rate": 0.0003819523269012486, "loss": 3.5899, "theoretical_loss": 3.7891351303074123, "tokens_seen": 682622976 }, { "epoch": 0.24, "learning_rate": 0.0003817631479379493, "loss": 3.5557, "theoretical_loss": 3.7885420623979886, "tokens_seen": 683671552 }, { "epoch": 0.24, "learning_rate": 0.00038157396897465004, "loss": 3.6581, "theoretical_loss": 3.787950157652282, "tokens_seen": 684720128 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.47339844703674316, "objective/train/docs_used": 395905, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.22224497795105, "objective/train/original_loss": 3.222245216369629, "objective/train/theoretical_loss": 3.7878762511417223, "objective/train/tokens_used": 705311200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23162895441055298, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048516869544983, "objective/train/weighted_lm_loss": 3.378674268722534, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9526793956756592, "theoretical_loss": 3.7878762511417223, "tokens_seen": 684851200 }, { "epoch": 0.24, "learning_rate": 0.00038138479001135074, "loss": 3.6211, "theoretical_loss": 3.78735941201299, "tokens_seen": 685768704 }, { "epoch": 0.25, "learning_rate": 0.00038119561104805143, "loss": 3.6447, "theoretical_loss": 3.786769821443141, "tokens_seen": 686817280 }, { "epoch": 0.25, "learning_rate": 0.0003810064320847522, "loss": 3.6458, "theoretical_loss": 3.7861813819259575, "tokens_seen": 687865856 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.46620890498161316, "objective/train/docs_used": 397858, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2505011558532715, "objective/train/original_loss": 3.2505016326904297, "objective/train/theoretical_loss": 3.7860344514298374, "objective/train/tokens_used": 708588000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23270408809185028, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478036403656006, "objective/train/weighted_lm_loss": 3.4039294719696045, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9517877697944641, "theoretical_loss": 3.7860344514298374, "tokens_seen": 688128000 }, { "epoch": 0.25, "learning_rate": 0.00038081725312145287, "loss": 3.6586, "theoretical_loss": 3.7855940894647278, "tokens_seen": 688914432 }, { "epoch": 0.25, "learning_rate": 0.0003806280741581537, "loss": 3.6271, "theoretical_loss": 3.785007940082673, "tokens_seen": 689963008 }, { "epoch": 0.25, "learning_rate": 0.00038043889519485437, "loss": 3.6519, "theoretical_loss": 3.7844229298228176, "tokens_seen": 691011584 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.4676517844200134, "objective/train/docs_used": 398758, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.232473850250244, "objective/train/original_loss": 3.2324740886688232, "objective/train/theoretical_loss": 3.7842038438510803, "objective/train/tokens_used": 711864800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22986283898353577, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0479334592819214, "objective/train/weighted_lm_loss": 3.3902587890625, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9528036713600159, "theoretical_loss": 3.7842038438510803, "tokens_seen": 691404800 }, { "epoch": 0.25, "learning_rate": 0.00038024971623155506, "loss": 3.76, "theoretical_loss": 3.7838390547478635, "tokens_seen": 692060160 }, { "epoch": 0.25, "learning_rate": 0.0003800605372682558, "loss": 3.6894, "theoretical_loss": 3.78325631094006, "tokens_seen": 693108736 }, { "epoch": 0.25, "learning_rate": 0.0003798713583049565, "loss": 3.6554, "theoretical_loss": 3.782674694501079, "tokens_seen": 694157312 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.4899442195892334, "objective/train/docs_used": 400726, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5794906616210938, "objective/train/original_loss": 3.5794901847839355, "objective/train/theoretical_loss": 3.782384307831949, "objective/train/tokens_used": 715141600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2438521683216095, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050234079360962, "objective/train/weighted_lm_loss": 3.758875846862793, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9520761966705322, "theoretical_loss": 3.782384307831949, "tokens_seen": 694681600 }, { "epoch": 0.25, "learning_rate": 0.0003796821793416572, "loss": 3.7036, "theoretical_loss": 3.782094201551887, "tokens_seen": 695205888 }, { "epoch": 0.25, "learning_rate": 0.00037949300037835794, "loss": 3.704, "theoretical_loss": 3.7815148282326243, "tokens_seen": 696254464 }, { "epoch": 0.25, "learning_rate": 0.00037930382141505864, "loss": 3.6473, "theoretical_loss": 3.780936570702478, "tokens_seen": 697303040 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.48783382773399353, "objective/train/docs_used": 402482, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4494855403900146, "objective/train/original_loss": 3.4494853019714355, "objective/train/theoretical_loss": 3.780575724658811, "objective/train/tokens_used": 718418400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24229206144809723, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500150918960571, "objective/train/weighted_lm_loss": 3.621917963027954, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9659635424613953, "theoretical_loss": 3.780575724658811, "tokens_seen": 697958400 }, { "epoch": 0.25, "learning_rate": 0.0003791146424517594, "loss": 3.6276, "theoretical_loss": 3.780359425139562, "tokens_seen": 698351616 }, { "epoch": 0.25, "learning_rate": 0.0003789254634884601, "loss": 3.6618, "theoretical_loss": 3.7797833877407947, "tokens_seen": 699400192 }, { "epoch": 0.25, "learning_rate": 0.00037873628452516077, "loss": 3.5677, "theoretical_loss": 3.779208454721779, "tokens_seen": 700448768 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.4900723993778229, "objective/train/docs_used": 404817, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.499330759048462, "objective/train/original_loss": 3.499330997467041, "objective/train/theoretical_loss": 3.778777977440649, "objective/train/tokens_used": 721695200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24242709577083588, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502395629882812, "objective/train/weighted_lm_loss": 3.675553321838379, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9576201438903809, "theoretical_loss": 3.778777977440649, "tokens_seen": 701235200 }, { "epoch": 0.25, "learning_rate": 0.0003785471055618615, "loss": 3.6181, "theoretical_loss": 3.7786346223166802, "tokens_seen": 701497344 }, { "epoch": 0.25, "learning_rate": 0.00037835792659856227, "loss": 3.5818, "theoretical_loss": 3.778061886778111, "tokens_seen": 702545920 }, { "epoch": 0.25, "learning_rate": 0.000378168747635263, "loss": 3.6, "theoretical_loss": 3.7774902443770113, "tokens_seen": 703594496 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.4579163193702698, "objective/train/docs_used": 406758, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1486806869506836, "objective/train/original_loss": 3.1486809253692627, "objective/train/theoretical_loss": 3.7769909510727144, "objective/train/tokens_used": 724972000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23327672481536865, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.046976923942566, "objective/train/weighted_lm_loss": 3.3008174896240234, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9533183574676514, "theoretical_loss": 3.7769909510727144, "tokens_seen": 704512000 }, { "epoch": 0.25, "learning_rate": 0.0003779795686719637, "loss": 3.6163, "theoretical_loss": 3.776919691402532, "tokens_seen": 704643072 }, { "epoch": 0.25, "learning_rate": 0.0003777903897086644, "loss": 3.6061, "theoretical_loss": 3.7763502241619205, "tokens_seen": 705691648 }, { "epoch": 0.25, "learning_rate": 0.00037760121074536515, "loss": 3.6111, "theoretical_loss": 3.7757818389804023, "tokens_seen": 706740224 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.48214995861053467, "objective/train/docs_used": 408602, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.046311140060425, "objective/train/original_loss": 3.046311140060425, "objective/train/theoretical_loss": 3.775214532201071, "objective/train/tokens_used": 728248800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23837196826934814, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049426555633545, "objective/train/weighted_lm_loss": 3.1969616413116455, "objective/train/weights_max": 1.0512155294418335, "objective/train/weights_min": 0.953264057636261, "theoretical_loss": 3.775214532201071, "tokens_seen": 707788800 }, { "epoch": 0.25, "learning_rate": 0.00037741203178206584, "loss": 3.4984, "theoretical_loss": 3.775214532201071, "tokens_seen": 707788800 }, { "epoch": 0.25, "learning_rate": 0.00037722285281876653, "loss": 3.5725, "theoretical_loss": 3.774648300184772, "tokens_seen": 708837376 }, { "epoch": 0.25, "learning_rate": 0.0003770336738554673, "loss": 3.5147, "theoretical_loss": 3.774083139309993, "tokens_seen": 709885952 }, { "epoch": 0.25, "learning_rate": 0.000376844494892168, "loss": 3.5496, "theoretical_loss": 3.7735190459727486, "tokens_seen": 710934528 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.489047646522522, "objective/train/docs_used": 410204, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3076088428497314, "objective/train/original_loss": 3.3076090812683105, "objective/train/theoretical_loss": 3.7734486091880095, "objective/train/tokens_used": 731525600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24238334596157074, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050136923789978, "objective/train/weighted_lm_loss": 3.473520517349243, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9621482491493225, "theoretical_loss": 3.7734486091880095, "tokens_seen": 711065600 }, { "epoch": 0.25, "learning_rate": 0.0003766553159288687, "loss": 3.5332, "theoretical_loss": 3.7729560165864746, "tokens_seen": 711983104 }, { "epoch": 0.25, "learning_rate": 0.0003764661369655694, "loss": 3.6379, "theoretical_loss": 3.7723940475819147, "tokens_seen": 713031680 }, { "epoch": 0.26, "learning_rate": 0.0003762769580022701, "loss": 3.569, "theoretical_loss": 3.7718331354070127, "tokens_seen": 714080256 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.477344810962677, "objective/train/docs_used": 412084, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.31864595413208, "objective/train/original_loss": 3.31864595413208, "objective/train/theoretical_loss": 3.7716930720782935, "objective/train/tokens_used": 734802400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24045908451080322, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489565134048462, "objective/train/weighted_lm_loss": 3.480764627456665, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.952606737613678, "theoretical_loss": 3.7716930720782935, "tokens_seen": 714342400 }, { "epoch": 0.26, "learning_rate": 0.0003760877790389709, "loss": 3.6623, "theoretical_loss": 3.771273276526805, "tokens_seen": 715128832 }, { "epoch": 0.26, "learning_rate": 0.0003758986000756716, "loss": 3.6001, "theoretical_loss": 3.770714467423313, "tokens_seen": 716177408 }, { "epoch": 0.26, "learning_rate": 0.00037570942111237235, "loss": 3.6465, "theoretical_loss": 3.7701567045954367, "tokens_seen": 717225984 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.47307994961738586, "objective/train/docs_used": 413912, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0646326541900635, "objective/train/original_loss": 3.0646326541900635, "objective/train/theoretical_loss": 3.769947812566226, "objective/train/tokens_used": 738079200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23451007902622223, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484997034072876, "objective/train/weighted_lm_loss": 3.2135608196258545, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9514219760894775, "theoretical_loss": 3.769947812566226, "tokens_seen": 717619200 }, { "epoch": 0.26, "learning_rate": 0.00037552024214907305, "loss": 3.6583, "theoretical_loss": 3.76959998455885, "tokens_seen": 718274560 }, { "epoch": 0.26, "learning_rate": 0.00037533106318577374, "loss": 3.6294, "theoretical_loss": 3.7690443038458943, "tokens_seen": 719323136 }, { "epoch": 0.26, "learning_rate": 0.0003751418842224745, "loss": 3.5343, "theoretical_loss": 3.7684896590054757, "tokens_seen": 720371712 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.48498988151550293, "objective/train/docs_used": 415834, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3692052364349365, "objective/train/original_loss": 3.3692054748535156, "objective/train/theoretical_loss": 3.7682127239635053, "objective/train/tokens_used": 741356000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24069416522979736, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497223138809204, "objective/train/weighted_lm_loss": 3.5365824699401855, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9525206089019775, "theoretical_loss": 3.7682127239635053, "tokens_seen": 720896000 }, { "epoch": 0.26, "learning_rate": 0.0003749527052591752, "loss": 3.646, "theoretical_loss": 3.767936046602963, "tokens_seen": 721420288 }, { "epoch": 0.26, "learning_rate": 0.0003747635262958759, "loss": 3.6593, "theoretical_loss": 3.7673834632200824, "tokens_seen": 722468864 }, { "epoch": 0.26, "learning_rate": 0.0003745743473325766, "loss": 3.675, "theoretical_loss": 3.76683190545482, "tokens_seen": 723517440 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.4824288487434387, "objective/train/docs_used": 417692, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7827279567718506, "objective/train/original_loss": 3.7827279567718506, "objective/train/theoretical_loss": 3.7664877011678484, "objective/train/tokens_used": 744632800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2396303415298462, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049461007118225, "objective/train/weighted_lm_loss": 3.9698286056518555, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9542454481124878, "theoretical_loss": 3.7664877011678484, "tokens_seen": 724172800 }, { "epoch": 0.26, "learning_rate": 0.0003743851683692773, "loss": 3.6335, "theoretical_loss": 3.766281369921316, "tokens_seen": 724566016 }, { "epoch": 0.26, "learning_rate": 0.00037419598940597806, "loss": 3.6526, "theoretical_loss": 3.765731853249771, "tokens_seen": 725614592 }, { "epoch": 0.26, "learning_rate": 0.00037400681044267876, "loss": 3.524, "theoretical_loss": 3.7651833520863396, "tokens_seen": 726663168 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.48516714572906494, "objective/train/docs_used": 419961, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.715956211090088, "objective/train/original_loss": 3.715956211090088, "objective/train/theoretical_loss": 3.7647726406323665, "objective/train/tokens_used": 747909600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24163493514060974, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049744963645935, "objective/train/weighted_lm_loss": 3.8999452590942383, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9520505666732788, "theoretical_loss": 3.7647726406323665, "tokens_seen": 727449600 }, { "epoch": 0.26, "learning_rate": 0.0003738176314793795, "loss": 3.6154, "theoretical_loss": 3.7646358630930385, "tokens_seen": 727711744 }, { "epoch": 0.26, "learning_rate": 0.00037362845251608025, "loss": 3.5866, "theoretical_loss": 3.7640893829476445, "tokens_seen": 728760320 }, { "epoch": 0.26, "learning_rate": 0.00037343927355278095, "loss": 3.6157, "theoretical_loss": 3.7635439083435998, "tokens_seen": 729808896 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.47344446182250977, "objective/train/docs_used": 421797, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.6491940021514893, "objective/train/original_loss": 3.6491942405700684, "objective/train/theoretical_loss": 3.7630674403356625, "objective/train/tokens_used": 751186400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23705269396305084, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485491752624512, "objective/train/weighted_lm_loss": 3.8254892826080322, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9523929953575134, "theoretical_loss": 3.7630674403356625, "tokens_seen": 730726400 }, { "epoch": 0.26, "learning_rate": 0.0003732500945894817, "loss": 3.6271, "theoretical_loss": 3.762999435989914, "tokens_seen": 730857472 }, { "epoch": 0.26, "learning_rate": 0.0003730609156261824, "loss": 3.4846, "theoretical_loss": 3.76245596261107, "tokens_seen": 731906048 }, { "epoch": 0.26, "learning_rate": 0.0003728717366628831, "loss": 3.5969, "theoretical_loss": 3.7619134849469296, "tokens_seen": 732954624 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.47209614515304565, "objective/train/docs_used": 423825, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4763729572296143, "objective/train/original_loss": 3.4763731956481934, "objective/train/theoretical_loss": 3.7613719997526367, "objective/train/tokens_used": 754463200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23541226983070374, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484060049057007, "objective/train/weighted_lm_loss": 3.644237756729126, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9522672295570374, "theoretical_loss": 3.7613719997526367, "tokens_seen": 734003200 }, { "epoch": 0.26, "learning_rate": 0.00037268255769958383, "loss": 3.6206, "theoretical_loss": 3.7613719997526367, "tokens_seen": 734003200 }, { "epoch": 0.26, "learning_rate": 0.0003724933787362845, "loss": 3.6467, "theoretical_loss": 3.760831503798527, "tokens_seen": 735051776 }, { "epoch": 0.26, "learning_rate": 0.0003723041997729852, "loss": 3.6167, "theoretical_loss": 3.760291993870034, "tokens_seen": 736100352 }, { "epoch": 0.26, "learning_rate": 0.00037211502080968596, "loss": 3.5772, "theoretical_loss": 3.759753466767597, "tokens_seen": 737148928 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.48783084750175476, "objective/train/docs_used": 425209, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.385236978530884, "objective/train/original_loss": 3.3852367401123047, "objective/train/theoretical_loss": 3.7596862198259773, "objective/train/tokens_used": 757740000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24238182604312897, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500152111053467, "objective/train/weighted_lm_loss": 3.554072380065918, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.951784074306488, "theoretical_loss": 3.7596862198259773, "tokens_seen": 737280000 }, { "epoch": 0.26, "learning_rate": 0.00037192584184638666, "loss": 3.5612, "theoretical_loss": 3.7592159193065697, "tokens_seen": 738197504 }, { "epoch": 0.26, "learning_rate": 0.0003717366628830874, "loss": 3.6211, "theoretical_loss": 3.758679348317131, "tokens_seen": 739246080 }, { "epoch": 0.26, "learning_rate": 0.00037154748391978815, "loss": 3.6074, "theoretical_loss": 3.7581437506441926, "tokens_seen": 740294656 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.4911670982837677, "objective/train/docs_used": 428014, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.293811559677124, "objective/train/original_loss": 3.293811559677124, "objective/train/theoretical_loss": 3.75801000293832, "objective/train/tokens_used": 761016800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2433631718158722, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050353765487671, "objective/train/weighted_lm_loss": 3.4594292640686035, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9623913764953613, "theoretical_loss": 3.75801000293832, "tokens_seen": 740556800 }, { "epoch": 0.26, "learning_rate": 0.00037135830495648884, "loss": 3.6303, "theoretical_loss": 3.7576091231473114, "tokens_seen": 741343232 }, { "epoch": 0.27, "learning_rate": 0.0003711691259931896, "loss": 3.5686, "theoretical_loss": 3.7570754627006018, "tokens_seen": 742391808 }, { "epoch": 0.27, "learning_rate": 0.0003709799470298903, "loss": 3.5571, "theoretical_loss": 3.756542766192646, "tokens_seen": 743440384 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.4925093352794647, "objective/train/docs_used": 429315, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3616669178009033, "objective/train/original_loss": 3.3616676330566406, "objective/train/theoretical_loss": 3.756343252885055, "objective/train/tokens_used": 764293600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24342967569828033, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504884719848633, "objective/train/weighted_lm_loss": 3.531308650970459, "objective/train/weights_max": 1.0512197017669678, "objective/train/weights_min": 0.9594744443893433, "theoretical_loss": 3.756343252885055, "tokens_seen": 743833600 }, { "epoch": 0.27, "learning_rate": 0.00037079076806659103, "loss": 3.5356, "theoretical_loss": 3.7560110305264054, "tokens_seen": 744488960 }, { "epoch": 0.27, "learning_rate": 0.0003706015891032917, "loss": 3.5672, "theoretical_loss": 3.7554802526191393, "tokens_seen": 745537536 }, { "epoch": 0.27, "learning_rate": 0.0003704124101399924, "loss": 3.4796, "theoretical_loss": 3.7549504294023137, "tokens_seen": 746586112 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.49480339884757996, "objective/train/docs_used": 431156, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.175428628921509, "objective/train/original_loss": 3.175428867340088, "objective/train/theoretical_loss": 3.7546858748477634, "objective/train/tokens_used": 767570400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2462376207113266, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050732135772705, "objective/train/weighted_lm_loss": 3.3367059230804443, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9542807936668396, "theoretical_loss": 3.7546858748477634, "tokens_seen": 747110400 }, { "epoch": 0.27, "learning_rate": 0.00037022323117669317, "loss": 3.5415, "theoretical_loss": 3.7544215578215177, "tokens_seen": 747634688 }, { "epoch": 0.27, "learning_rate": 0.00037003405221339386, "loss": 3.5867, "theoretical_loss": 3.75389363483638, "tokens_seen": 748683264 }, { "epoch": 0.27, "learning_rate": 0.0003698448732500946, "loss": 3.4893, "theoretical_loss": 3.753366657420483, "tokens_seen": 749731840 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.48980623483657837, "objective/train/docs_used": 433094, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1637654304504395, "objective/train/original_loss": 3.1637656688690186, "objective/train/theoretical_loss": 3.7530377753682695, "objective/train/tokens_used": 770847200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2416664958000183, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502090454101562, "objective/train/weighted_lm_loss": 3.3235599994659424, "objective/train/weights_max": 1.0512202978134155, "objective/train/weights_min": 0.95468670129776, "theoretical_loss": 3.7530377753682695, "tokens_seen": 750387200 }, { "epoch": 0.27, "learning_rate": 0.0003696556942867953, "loss": 3.555, "theoretical_loss": 3.75284062256128, "tokens_seen": 750780416 }, { "epoch": 0.27, "learning_rate": 0.000369466515323496, "loss": 3.5191, "theoretical_loss": 3.7523155272600137, "tokens_seen": 751828992 }, { "epoch": 0.27, "learning_rate": 0.0003692773363601968, "loss": 3.6015, "theoretical_loss": 3.751791368531631, "tokens_seen": 752877568 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.48759615421295166, "objective/train/docs_used": 434283, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.427870988845825, "objective/train/original_loss": 3.4278712272644043, "objective/train/theoretical_loss": 3.7513988623232883, "objective/train/tokens_used": 774124000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2424042820930481, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499919652938843, "objective/train/weighted_lm_loss": 3.5984747409820557, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9514016509056091, "theoretical_loss": 3.7513988623232883, "tokens_seen": 753664000 }, { "epoch": 0.27, "learning_rate": 0.0003690881573968975, "loss": 3.6279, "theoretical_loss": 3.7512681434047033, "tokens_seen": 753926144 }, { "epoch": 0.27, "learning_rate": 0.0003688989784335982, "loss": 3.6342, "theoretical_loss": 3.7507458489213477, "tokens_seen": 754974720 }, { "epoch": 0.27, "learning_rate": 0.00036870979947029893, "loss": 3.5992, "theoretical_loss": 3.7502244821371407, "tokens_seen": 756023296 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.486615926027298, "objective/train/docs_used": 435567, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.396214723587036, "objective/train/original_loss": 3.396214485168457, "objective/train/theoretical_loss": 3.7497690448996552, "objective/train/tokens_used": 777400800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24091613292694092, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498863458633423, "objective/train/weighted_lm_loss": 3.564488410949707, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9564319849014282, "theoretical_loss": 3.7497690448996552, "tokens_seen": 756940800 }, { "epoch": 0.27, "learning_rate": 0.0003685206205069996, "loss": 3.5721, "theoretical_loss": 3.7497040401210446, "tokens_seen": 757071872 }, { "epoch": 0.27, "learning_rate": 0.0003683314415437004, "loss": 3.6079, "theoretical_loss": 3.7491845199553238, "tokens_seen": 758120448 }, { "epoch": 0.27, "learning_rate": 0.00036814226258040107, "loss": 3.6028, "theoretical_loss": 3.748665918735468, "tokens_seen": 759169024 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.4809083640575409, "objective/train/docs_used": 437507, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2098705768585205, "objective/train/original_loss": 3.2098708152770996, "objective/train/theoretical_loss": 3.748148233570115, "objective/train/tokens_used": 780677600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23865434527397156, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493038892745972, "objective/train/weighted_lm_loss": 3.3677401542663574, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9517086148262024, "theoretical_loss": 3.748148233570115, "tokens_seen": 760217600 }, { "epoch": 0.27, "learning_rate": 0.00036795308361710176, "loss": 3.5545, "theoretical_loss": 3.748148233570115, "tokens_seen": 760217600 }, { "epoch": 0.27, "learning_rate": 0.0003677639046538025, "loss": 3.6647, "theoretical_loss": 3.74763146158097, "tokens_seen": 761266176 }, { "epoch": 0.27, "learning_rate": 0.0003675747256905032, "loss": 3.6744, "theoretical_loss": 3.747115599902733, "tokens_seen": 762314752 }, { "epoch": 0.27, "learning_rate": 0.00036738554672720395, "loss": 3.5857, "theoretical_loss": 3.746600645683017, "tokens_seen": 763363328 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.4819650948047638, "objective/train/docs_used": 439068, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3648343086242676, "objective/train/original_loss": 3.364834785461426, "objective/train/theoretical_loss": 3.7465363400696683, "objective/train/tokens_used": 783954400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23789268732070923, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494056940078735, "objective/train/weighted_lm_loss": 3.5312013626098633, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9530982971191406, "theoretical_loss": 3.7465363400696683, "tokens_seen": 763494400 }, { "epoch": 0.27, "learning_rate": 0.00036719636776390464, "loss": 3.6457, "theoretical_loss": 3.7460865960822782, "tokens_seen": 764411904 }, { "epoch": 0.27, "learning_rate": 0.0003670071888006054, "loss": 3.6536, "theoretical_loss": 3.745573448273736, "tokens_seen": 765460480 }, { "epoch": 0.27, "learning_rate": 0.00036681800983730614, "loss": 3.623, "theoretical_loss": 3.7450611994433, "tokens_seen": 766509056 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.4673716723918915, "objective/train/docs_used": 441051, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.6296682357788086, "objective/train/original_loss": 3.6296682357788086, "objective/train/theoretical_loss": 3.7449332773724455, "objective/train/tokens_used": 787231200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22804482281208038, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047896146774292, "objective/train/weighted_lm_loss": 3.8018686771392822, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9860634207725525, "theoretical_loss": 3.7449332773724455, "tokens_seen": 766771200 }, { "epoch": 0.27, "learning_rate": 0.00036662883087400683, "loss": 3.6236, "theoretical_loss": 3.7445498467894947, "tokens_seen": 767557632 }, { "epoch": 0.27, "learning_rate": 0.0003664396519107075, "loss": 3.6475, "theoretical_loss": 3.7440393875233893, "tokens_seen": 768606208 }, { "epoch": 0.27, "learning_rate": 0.00036625047294740827, "loss": 3.5919, "theoretical_loss": 3.7435298188685184, "tokens_seen": 769654784 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.4872644245624542, "objective/train/docs_used": 442749, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4281868934631348, "objective/train/original_loss": 3.4281868934631348, "objective/train/theoretical_loss": 3.7433389596691073, "objective/train/tokens_used": 790508000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24073950946331024, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499500036239624, "objective/train/weighted_lm_loss": 3.5999956130981445, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9553505778312683, "theoretical_loss": 3.7433389596691073, "tokens_seen": 770048000 }, { "epoch": 0.28, "learning_rate": 0.00036606129398410897, "loss": 3.6758, "theoretical_loss": 3.7430211380608167, "tokens_seen": 770703360 }, { "epoch": 0.28, "learning_rate": 0.0003658721150208097, "loss": 3.617, "theoretical_loss": 3.74251334234854, "tokens_seen": 771751936 }, { "epoch": 0.28, "learning_rate": 0.0003656829360575104, "loss": 3.6863, "theoretical_loss": 3.742006428992198, "tokens_seen": 772800512 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.46843963861465454, "objective/train/docs_used": 444718, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.53401780128479, "objective/train/original_loss": 3.534018039703369, "objective/train/theoretical_loss": 3.7417533023447445, "objective/train/tokens_used": 793784800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23737366497516632, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0480501651763916, "objective/train/weighted_lm_loss": 3.7014524936676025, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9519520401954651, "theoretical_loss": 3.7417533023447445, "tokens_seen": 773324800 }, { "epoch": 0.28, "learning_rate": 0.0003654937570942111, "loss": 3.4944, "theoretical_loss": 3.741500395264481, "tokens_seen": 773849088 }, { "epoch": 0.28, "learning_rate": 0.00036530457813091185, "loss": 3.5637, "theoretical_loss": 3.7409952384501923, "tokens_seen": 774897664 }, { "epoch": 0.28, "learning_rate": 0.00036511539916761254, "loss": 3.651, "theoretical_loss": 3.740490955846173, "tokens_seen": 775946240 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.48415154218673706, "objective/train/docs_used": 446660, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4447832107543945, "objective/train/original_loss": 3.4447832107543945, "objective/train/theoretical_loss": 3.7401762219572765, "objective/train/tokens_used": 797061600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2410360723733902, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049640417098999, "objective/train/weighted_lm_loss": 3.616288661956787, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9529085755348206, "theoretical_loss": 3.7401762219572765, "tokens_seen": 776601600 }, { "epoch": 0.28, "learning_rate": 0.0003649262202043133, "loss": 3.6152, "theoretical_loss": 3.739987544761238, "tokens_seen": 776994816 }, { "epoch": 0.28, "learning_rate": 0.00036473704124101404, "loss": 3.6166, "theoretical_loss": 3.7394850025161026, "tokens_seen": 778043392 }, { "epoch": 0.28, "learning_rate": 0.00036454786227771473, "loss": 3.6012, "theoretical_loss": 3.738983326443316, "tokens_seen": 779091968 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.4765093922615051, "objective/train/docs_used": 448564, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2798614501953125, "objective/train/original_loss": 3.2798619270324707, "objective/train/theoretical_loss": 3.73860763621633, "objective/train/tokens_used": 800338400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23574241995811462, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048849105834961, "objective/train/weighted_lm_loss": 3.4406769275665283, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9636579751968384, "theoretical_loss": 3.73860763621633, "tokens_seen": 779878400 }, { "epoch": 0.28, "learning_rate": 0.0003643586833144155, "loss": 3.6718, "theoretical_loss": 3.7384825138871944, "tokens_seen": 780140544 }, { "epoch": 0.28, "learning_rate": 0.00036416950435111617, "loss": 3.5764, "theoretical_loss": 3.737982562203749, "tokens_seen": 781189120 }, { "epoch": 0.28, "learning_rate": 0.00036398032538781686, "loss": 3.5433, "theoretical_loss": 3.737483468760624, "tokens_seen": 782237696 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.48366644978523254, "objective/train/docs_used": 450552, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.120129108428955, "objective/train/original_loss": 3.120129108428955, "objective/train/theoretical_loss": 3.737047463962579, "objective/train/tokens_used": 803615200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23800888657569885, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495764017105103, "objective/train/weighted_lm_loss": 3.2741003036499023, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9536738991737366, "theoretical_loss": 3.737047463962579, "tokens_seen": 783155200 }, { "epoch": 0.28, "learning_rate": 0.0003637911464245176, "loss": 3.5818, "theoretical_loss": 3.7369852309370275, "tokens_seen": 783286272 }, { "epoch": 0.28, "learning_rate": 0.0003636019674612183, "loss": 3.5907, "theoretical_loss": 3.736487846123663, "tokens_seen": 784334848 }, { "epoch": 0.28, "learning_rate": 0.00036341278849791905, "loss": 3.5826, "theoretical_loss": 3.7359913117226684, "tokens_seen": 785383424 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.4790653884410858, "objective/train/docs_used": 452240, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.210500955581665, "objective/train/original_loss": 3.210501194000244, "objective/train/theoretical_loss": 3.735495625147548, "objective/train/tokens_used": 806892000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23576776683330536, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491048097610474, "objective/train/weighted_lm_loss": 3.3677072525024414, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9538201689720154, "theoretical_loss": 3.735495625147548, "tokens_seen": 786432000 }, { "epoch": 0.28, "learning_rate": 0.00036322360953461975, "loss": 3.6123, "theoretical_loss": 3.735495625147548, "tokens_seen": 786432000 }, { "epoch": 0.28, "learning_rate": 0.00036303443057132044, "loss": 3.5839, "theoretical_loss": 3.735000783823107, "tokens_seen": 787480576 }, { "epoch": 0.28, "learning_rate": 0.0003628452516080212, "loss": 3.5651, "theoretical_loss": 3.7345067851853897, "tokens_seen": 788529152 }, { "epoch": 0.28, "learning_rate": 0.0003626560726447219, "loss": 3.5895, "theoretical_loss": 3.7340136266816133, "tokens_seen": 789577728 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.4898260533809662, "objective/train/docs_used": 454142, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3126230239868164, "objective/train/original_loss": 3.3126230239868164, "objective/train/theoretical_loss": 3.7339520408138513, "objective/train/tokens_used": 810168800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2416979968547821, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502111911773682, "objective/train/weighted_lm_loss": 3.478757619857788, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9954109191894531, "theoretical_loss": 3.7339520408138513, "tokens_seen": 789708800 }, { "epoch": 0.28, "learning_rate": 0.0003624668936814227, "loss": 3.569, "theoretical_loss": 3.733521305770105, "tokens_seen": 790626304 }, { "epoch": 0.28, "learning_rate": 0.0003622777147181234, "loss": 3.5766, "theoretical_loss": 3.7330298199202394, "tokens_seen": 791674880 }, { "epoch": 0.28, "learning_rate": 0.00036208853575482407, "loss": 3.5995, "theoretical_loss": 3.7325391666123764, "tokens_seen": 792723456 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.4768815040588379, "objective/train/docs_used": 455856, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8263907432556152, "objective/train/original_loss": 2.8263907432556152, "objective/train/theoretical_loss": 3.732416633075869, "objective/train/tokens_used": 813445600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2372640073299408, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488940477371216, "objective/train/weighted_lm_loss": 2.9625203609466553, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9526212215423584, "theoretical_loss": 3.732416633075869, "tokens_seen": 792985600 }, { "epoch": 0.28, "learning_rate": 0.0003618993567915248, "loss": 3.558, "theoretical_loss": 3.7320493433377973, "tokens_seen": 793772032 }, { "epoch": 0.28, "learning_rate": 0.0003617101778282255, "loss": 3.6053, "theoretical_loss": 3.731560347598646, "tokens_seen": 794820608 }, { "epoch": 0.28, "learning_rate": 0.0003615209988649262, "loss": 3.5659, "theoretical_loss": 3.7310721769078636, "tokens_seen": 795869184 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.48475295305252075, "objective/train/docs_used": 457654, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2081458568573, "objective/train/original_loss": 3.2081456184387207, "objective/train/theoretical_loss": 3.7308893251008413, "objective/train/tokens_used": 816722400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24060019850730896, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496982336044312, "objective/train/weighted_lm_loss": 3.367356061935425, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9561263918876648, "theoretical_loss": 3.7308893251008413, "tokens_seen": 796262400 }, { "epoch": 0.28, "learning_rate": 0.00036133181990162695, "loss": 3.5876, "theoretical_loss": 3.730584828789132, "tokens_seen": 796917760 }, { "epoch": 0.29, "learning_rate": 0.00036114264093832765, "loss": 3.543, "theoretical_loss": 3.7300983007768105, "tokens_seen": 797966336 }, { "epoch": 0.29, "learning_rate": 0.0003609534619750284, "loss": 3.5454, "theoretical_loss": 3.729612590415876, "tokens_seen": 799014912 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.4876490533351898, "objective/train/docs_used": 459747, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4556379318237305, "objective/train/original_loss": 3.4556384086608887, "objective/train/theoretical_loss": 3.729370041090373, "objective/train/tokens_used": 819999200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24297496676445007, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500001907348633, "objective/train/weighted_lm_loss": 3.628729820251465, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9550496935844421, "theoretical_loss": 3.729370041090373, "tokens_seen": 799539200 }, { "epoch": 0.29, "learning_rate": 0.0003607642830117291, "loss": 3.525, "theoretical_loss": 3.7291276952618655, "tokens_seen": 800063488 }, { "epoch": 0.29, "learning_rate": 0.0003605751040484298, "loss": 3.5934, "theoretical_loss": 3.7286436128808145, "tokens_seen": 801112064 }, { "epoch": 0.29, "learning_rate": 0.00036038592508513053, "loss": 3.5227, "theoretical_loss": 3.7281603408491995, "tokens_seen": 802160640 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.4763462245464325, "objective/train/docs_used": 461200, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.19844388961792, "objective/train/original_loss": 3.19844388961792, "objective/train/theoretical_loss": 3.727858706262338, "objective/train/tokens_used": 823276000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2413131445646286, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048860788345337, "objective/train/weighted_lm_loss": 3.3524065017700195, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9517171382904053, "theoretical_loss": 3.727858706262338, "tokens_seen": 802816000 }, { "epoch": 0.29, "learning_rate": 0.0003601967461218313, "loss": 3.5443, "theoretical_loss": 3.72767787675388, "tokens_seen": 803209216 }, { "epoch": 0.29, "learning_rate": 0.000360007567158532, "loss": 3.5661, "theoretical_loss": 3.727196218192039, "tokens_seen": 804257792 }, { "epoch": 0.29, "learning_rate": 0.0003598183881952327, "loss": 3.4753, "theoretical_loss": 3.7267153627711256, "tokens_seen": 805306368 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.4879932701587677, "objective/train/docs_used": 463166, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.404329299926758, "objective/train/original_loss": 3.404329299926758, "objective/train/theoretical_loss": 3.7263552468331667, "objective/train/tokens_used": 826552800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24394917488098145, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050039291381836, "objective/train/weighted_lm_loss": 3.575188636779785, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9521428942680359, "theoretical_loss": 3.7263552468331667, "tokens_seen": 806092800 }, { "epoch": 0.29, "learning_rate": 0.0003596292092319334, "loss": 3.58, "theoretical_loss": 3.7262353081088015, "tokens_seen": 806354944 }, { "epoch": 0.29, "learning_rate": 0.00035944003026863416, "loss": 3.4629, "theoretical_loss": 3.725756051832878, "tokens_seen": 807403520 }, { "epoch": 0.29, "learning_rate": 0.00035925085130533485, "loss": 3.5236, "theoretical_loss": 3.7252775915812655, "tokens_seen": 808452096 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.47361043095588684, "objective/train/docs_used": 465044, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0562472343444824, "objective/train/original_loss": 3.0562477111816406, "objective/train/theoretical_loss": 3.724859590000527, "objective/train/tokens_used": 829829600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2366064488887787, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485633611679077, "objective/train/weighted_lm_loss": 3.204601287841797, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9514092803001404, "theoretical_loss": 3.724859590000527, "tokens_seen": 809369600 }, { "epoch": 0.29, "learning_rate": 0.0003590616723420356, "loss": 3.551, "theoretical_loss": 3.724799925001913, "tokens_seen": 809500672 }, { "epoch": 0.29, "learning_rate": 0.0003588724933787363, "loss": 3.5098, "theoretical_loss": 3.7243230497527553, "tokens_seen": 810549248 }, { "epoch": 0.29, "learning_rate": 0.000358683314415437, "loss": 3.4784, "theoretical_loss": 3.723846963501657, "tokens_seen": 811597824 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.4856931269168854, "objective/train/docs_used": 466971, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.61726450920105, "objective/train/original_loss": 3.617264986038208, "objective/train/theoretical_loss": 3.7233716639263568, "objective/train/tokens_used": 833106400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2407991737127304, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497934818267822, "objective/train/weighted_lm_loss": 3.7969565391540527, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9524025917053223, "theoretical_loss": 3.7233716639263568, "tokens_seen": 812646400 }, { "epoch": 0.29, "learning_rate": 0.00035849413545213773, "loss": 3.4899, "theoretical_loss": 3.7233716639263568, "tokens_seen": 812646400 }, { "epoch": 0.29, "learning_rate": 0.0003583049564888384, "loss": 3.5225, "theoretical_loss": 3.7228971487144147, "tokens_seen": 813694976 }, { "epoch": 0.29, "learning_rate": 0.0003581157775255391, "loss": 3.5211, "theoretical_loss": 3.722423415563156, "tokens_seen": 814743552 }, { "epoch": 0.29, "learning_rate": 0.0003579265985622399, "loss": 3.5231, "theoretical_loss": 3.7219504621796187, "tokens_seen": 815792128 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.47521209716796875, "objective/train/docs_used": 469033, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4922797679901123, "objective/train/original_loss": 3.4922800064086914, "objective/train/theoretical_loss": 3.7218913977202703, "objective/train/tokens_used": 836383200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23987308144569397, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048740267753601, "objective/train/weighted_lm_loss": 3.6617021560668945, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9514144659042358, "theoretical_loss": 3.7218913977202703, "tokens_seen": 815923200 }, { "epoch": 0.29, "learning_rate": 0.0003577374195989406, "loss": 3.4287, "theoretical_loss": 3.7214782862805, "tokens_seen": 816840704 }, { "epoch": 0.29, "learning_rate": 0.00035754824063564136, "loss": 3.4614, "theoretical_loss": 3.7210068855921024, "tokens_seen": 817889280 }, { "epoch": 0.29, "learning_rate": 0.00035735906167234206, "loss": 3.4941, "theoretical_loss": 3.7205362578502834, "tokens_seen": 818937856 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.49092575907707214, "objective/train/docs_used": 471128, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.248918056488037, "objective/train/original_loss": 3.248918056488037, "objective/train/theoretical_loss": 3.7204187214233073, "objective/train/tokens_used": 839660000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24439458549022675, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503349304199219, "objective/train/weighted_lm_loss": 3.412680149078369, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9527466297149658, "theoretical_loss": 3.7204187214233073, "tokens_seen": 819200000 }, { "epoch": 0.29, "learning_rate": 0.00035716988270904275, "loss": 3.4172, "theoretical_loss": 3.7200664008004, "tokens_seen": 819986432 }, { "epoch": 0.29, "learning_rate": 0.0003569807037457435, "loss": 3.4968, "theoretical_loss": 3.7195973121972585, "tokens_seen": 821035008 }, { "epoch": 0.29, "learning_rate": 0.0003567915247824442, "loss": 3.5291, "theoretical_loss": 3.7191289898050632, "tokens_seen": 822083584 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.4838717579841614, "objective/train/docs_used": 472950, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.253770589828491, "objective/train/original_loss": 3.2537708282470703, "objective/train/theoretical_loss": 3.718953565992031, "objective/train/tokens_used": 842936800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2399124652147293, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496065616607666, "objective/train/weighted_lm_loss": 3.415417194366455, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9515462517738342, "theoretical_loss": 3.718953565992031, "tokens_seen": 822476800 }, { "epoch": 0.29, "learning_rate": 0.00035660234581914494, "loss": 3.4414, "theoretical_loss": 3.7186614313973645, "tokens_seen": 823132160 }, { "epoch": 0.29, "learning_rate": 0.00035641316685584563, "loss": 3.4772, "theoretical_loss": 3.7181946347570074, "tokens_seen": 824180736 }, { "epoch": 0.29, "learning_rate": 0.0003562239878925463, "loss": 3.4118, "theoretical_loss": 3.7177285976760834, "tokens_seen": 825229312 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.4853784739971161, "objective/train/docs_used": 475156, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4621541500091553, "objective/train/original_loss": 3.4621541500091553, "objective/train/theoretical_loss": 3.7174958632829522, "objective/train/tokens_used": 846213600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24138674139976501, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04976487159729, "objective/train/weighted_lm_loss": 3.634368658065796, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9528544545173645, "theoretical_loss": 3.7174958632829522, "tokens_seen": 825753600 }, { "epoch": 0.3, "learning_rate": 0.0003560348089292471, "loss": 3.5352, "theoretical_loss": 3.7172633179558763, "tokens_seen": 826277888 }, { "epoch": 0.3, "learning_rate": 0.00035584562996594777, "loss": 3.4583, "theoretical_loss": 3.7167987934068156, "tokens_seen": 827326464 }, { "epoch": 0.3, "learning_rate": 0.0003556564510026485, "loss": 3.4423, "theoretical_loss": 3.7163350218484252, "tokens_seen": 828375040 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.48797664046287537, "objective/train/docs_used": 477026, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.330416202545166, "objective/train/original_loss": 3.330416679382324, "objective/train/theoretical_loss": 3.716045546037287, "objective/train/tokens_used": 849490400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24159426987171173, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500258207321167, "objective/train/weighted_lm_loss": 3.4970216751098633, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9596421718597412, "theoretical_loss": 3.716045546037287, "tokens_seen": 829030400 }, { "epoch": 0.3, "learning_rate": 0.00035546727203934926, "loss": 3.4401, "theoretical_loss": 3.7158720011092767, "tokens_seen": 829423616 }, { "epoch": 0.3, "learning_rate": 0.00035527809307604996, "loss": 3.3848, "theoretical_loss": 3.715409729026936, "tokens_seen": 830472192 }, { "epoch": 0.3, "learning_rate": 0.0003550889141127507, "loss": 3.4305, "theoretical_loss": 3.714948203447919, "tokens_seen": 831520768 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.48684290051460266, "objective/train/docs_used": 478220, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.354376792907715, "objective/train/original_loss": 3.3543763160705566, "objective/train/theoretical_loss": 3.7146025478660274, "objective/train/tokens_used": 852767200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24331338703632355, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499210357666016, "objective/train/weighted_lm_loss": 3.5209054946899414, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9515804052352905, "theoretical_loss": 3.7146025478660274, "tokens_seen": 832307200 }, { "epoch": 0.3, "learning_rate": 0.0003548997351494514, "loss": 3.5074, "theoretical_loss": 3.7144874222276405, "tokens_seen": 832569344 }, { "epoch": 0.3, "learning_rate": 0.0003547105561861521, "loss": 3.4283, "theoretical_loss": 3.714027383230369, "tokens_seen": 833617920 }, { "epoch": 0.3, "learning_rate": 0.00035452137722285284, "loss": 3.4665, "theoretical_loss": 3.713568084329175, "tokens_seen": 834666496 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.4813803732395172, "objective/train/docs_used": 480064, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.389320135116577, "objective/train/original_loss": 3.3893203735351562, "objective/train/theoretical_loss": 3.713166803235318, "objective/train/tokens_used": 856044000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24221307039260864, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049368977546692, "objective/train/weighted_lm_loss": 3.5553438663482666, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9520329833030701, "theoretical_loss": 3.713166803235318, "tokens_seen": 835584000 }, { "epoch": 0.3, "learning_rate": 0.00035433219825955353, "loss": 3.4964, "theoretical_loss": 3.7131095234058895, "tokens_seen": 835715072 }, { "epoch": 0.3, "learning_rate": 0.0003541430192962543, "loss": 3.4202, "theoretical_loss": 3.712651698351051, "tokens_seen": 836763648 }, { "epoch": 0.3, "learning_rate": 0.00035395384033295497, "loss": 3.4965, "theoretical_loss": 3.7121946070638625, "tokens_seen": 837812224 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.4783816933631897, "objective/train/docs_used": 482017, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7325849533081055, "objective/train/original_loss": 3.7325844764709473, "objective/train/theoretical_loss": 3.7117382474521436, "objective/train/tokens_used": 859320800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23481260240077972, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490317344665527, "objective/train/weighted_lm_loss": 3.9163506031036377, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9517280459403992, "theoretical_loss": 3.7117382474521436, "tokens_seen": 838860800 }, { "epoch": 0.3, "learning_rate": 0.00035376466136965567, "loss": 3.4554, "theoretical_loss": 3.7117382474521436, "tokens_seen": 838860800 }, { "epoch": 0.3, "learning_rate": 0.0003535754824063564, "loss": 3.4283, "theoretical_loss": 3.7112826174322864, "tokens_seen": 839909376 }, { "epoch": 0.3, "learning_rate": 0.00035338630344305716, "loss": 3.4463, "theoretical_loss": 3.7108277149292066, "tokens_seen": 840957952 }, { "epoch": 0.3, "learning_rate": 0.00035319712447975785, "loss": 3.44, "theoretical_loss": 3.7103735378763014, "tokens_seen": 842006528 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.49178850650787354, "objective/train/docs_used": 484166, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.338574171066284, "objective/train/original_loss": 3.338573694229126, "objective/train/theoretical_loss": 3.7103168166503053, "objective/train/tokens_used": 862597600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24451500177383423, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504218339920044, "objective/train/weighted_lm_loss": 3.5066874027252197, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.951617419719696, "theoretical_loss": 3.7103168166503053, "tokens_seen": 842137600 }, { "epoch": 0.3, "learning_rate": 0.0003530079455164586, "loss": 3.4913, "theoretical_loss": 3.7099200842154003, "tokens_seen": 843055104 }, { "epoch": 0.3, "learning_rate": 0.0003528187665531593, "loss": 3.4763, "theoretical_loss": 3.709467351896726, "tokens_seen": 844103680 }, { "epoch": 0.3, "learning_rate": 0.00035262958758986004, "loss": 3.4748, "theoretical_loss": 3.709015338878843, "tokens_seen": 845152256 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.47579312324523926, "objective/train/docs_used": 485899, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.143003225326538, "objective/train/original_loss": 3.143002986907959, "objective/train/theoretical_loss": 3.7089024477766817, "objective/train/tokens_used": 865874400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2346218377351761, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487719774246216, "objective/train/weighted_lm_loss": 3.297304630279541, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9735310077667236, "theoretical_loss": 3.7089024477766817, "tokens_seen": 845414400 }, { "epoch": 0.3, "learning_rate": 0.00035244040862656074, "loss": 3.4219, "theoretical_loss": 3.7085640431286198, "tokens_seen": 846200832 }, { "epoch": 0.3, "learning_rate": 0.00035225122966326143, "loss": 3.4825, "theoretical_loss": 3.7081134626211796, "tokens_seen": 847249408 }, { "epoch": 0.3, "learning_rate": 0.0003520620506999622, "loss": 3.4676, "theoretical_loss": 3.7076635953398607, "tokens_seen": 848297984 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.4607677459716797, "objective/train/docs_used": 487462, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2809650897979736, "objective/train/original_loss": 3.2809653282165527, "objective/train/theoretical_loss": 3.7074950785777787, "objective/train/tokens_used": 869151200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22898733615875244, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0472406148910522, "objective/train/weighted_lm_loss": 3.438236713409424, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9514792561531067, "theoretical_loss": 3.7074950785777787, "tokens_seen": 848691200 }, { "epoch": 0.3, "learning_rate": 0.00035187287173666287, "loss": 3.4275, "theoretical_loss": 3.7072144392761697, "tokens_seen": 849346560 }, { "epoch": 0.3, "learning_rate": 0.0003516836927733636, "loss": 3.393, "theoretical_loss": 3.70676599242974, "tokens_seen": 850395136 }, { "epoch": 0.3, "learning_rate": 0.0003514945138100643, "loss": 3.4647, "theoretical_loss": 3.706318252808291, "tokens_seen": 851443712 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.47821953892707825, "objective/train/docs_used": 489283, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.037062644958496, "objective/train/original_loss": 3.037062644958496, "objective/train/theoretical_loss": 3.7060946475865455, "objective/train/tokens_used": 872428000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23509417474269867, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490169525146484, "objective/train/weighted_lm_loss": 3.1864781379699707, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9535816311836243, "theoretical_loss": 3.7060946475865455, "tokens_seen": 851968000 }, { "epoch": 0.3, "learning_rate": 0.000351305334846765, "loss": 3.5199, "theoretical_loss": 3.705871218427581, "tokens_seen": 852492288 }, { "epoch": 0.3, "learning_rate": 0.0003511161558834658, "loss": 3.4919, "theoretical_loss": 3.705424887311368, "tokens_seen": 853540864 }, { "epoch": 0.31, "learning_rate": 0.0003509269769201665, "loss": 3.5404, "theoretical_loss": 3.704979257491368, "tokens_seen": 854589440 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.4794943332672119, "objective/train/docs_used": 491311, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2601847648620605, "objective/train/original_loss": 3.2601850032806396, "objective/train/theoretical_loss": 3.70470109410946, "objective/train/tokens_used": 875704800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2394295483827591, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491665601730347, "objective/train/weighted_lm_loss": 3.4213621616363525, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9513980746269226, "theoretical_loss": 3.70470109410946, "tokens_seen": 855244800 }, { "epoch": 0.31, "learning_rate": 0.00035073779795686725, "loss": 3.4952, "theoretical_loss": 3.704534327007211, "tokens_seen": 855638016 }, { "epoch": 0.31, "learning_rate": 0.00035054861899356794, "loss": 3.485, "theoretical_loss": 3.7040900939064008, "tokens_seen": 856686592 }, { "epoch": 0.31, "learning_rate": 0.00035035944003026864, "loss": 3.459, "theoretical_loss": 3.7036465562442746, "tokens_seen": 857735168 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.4735029935836792, "objective/train/docs_used": 493471, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3861629962921143, "objective/train/original_loss": 3.3861632347106934, "objective/train/theoretical_loss": 3.7033143582138752, "objective/train/tokens_used": 878981600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23431852459907532, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485413074493408, "objective/train/weighted_lm_loss": 3.551286220550537, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9518417119979858, "theoretical_loss": 3.7033143582138752, "tokens_seen": 858521600 }, { "epoch": 0.31, "learning_rate": 0.0003501702610669694, "loss": 3.4133, "theoretical_loss": 3.703203712083961, "tokens_seen": 858783744 }, { "epoch": 0.31, "learning_rate": 0.0003499810821036701, "loss": 3.5271, "theoretical_loss": 3.702761559496338, "tokens_seen": 859832320 }, { "epoch": 0.31, "learning_rate": 0.00034979190314037077, "loss": 3.4913, "theoretical_loss": 3.7023200965599967, "tokens_seen": 860880896 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.48700904846191406, "objective/train/docs_used": 495231, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3630502223968506, "objective/train/original_loss": 3.3630504608154297, "objective/train/theoretical_loss": 3.701934380715622, "objective/train/tokens_used": 882258400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24430139362812042, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499428510665894, "objective/train/weighted_lm_loss": 3.531970262527466, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9546670317649841, "theoretical_loss": 3.701934380715622, "tokens_seen": 861798400 }, { "epoch": 0.31, "learning_rate": 0.0003496027241770715, "loss": 3.4939, "theoretical_loss": 3.7018793213611954, "tokens_seen": 861929472 }, { "epoch": 0.31, "learning_rate": 0.0003494135452137722, "loss": 3.4595, "theoretical_loss": 3.7014392319938265, "tokens_seen": 862978048 }, { "epoch": 0.31, "learning_rate": 0.00034922436625047296, "loss": 3.5073, "theoretical_loss": 3.700999826559369, "tokens_seen": 864026624 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.4687807261943817, "objective/train/docs_used": 497313, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8510427474975586, "objective/train/original_loss": 2.8510427474975586, "objective/train/theoretical_loss": 3.700561103166857, "objective/train/tokens_used": 885535200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2361258715391159, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0480778217315674, "objective/train/weighted_lm_loss": 2.9898033142089844, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9532948732376099, "theoretical_loss": 3.700561103166857, "tokens_seen": 865075200 }, { "epoch": 0.31, "learning_rate": 0.00034903518728717365, "loss": 3.435, "theoretical_loss": 3.700561103166857, "tokens_seen": 865075200 }, { "epoch": 0.31, "learning_rate": 0.0003488460083238744, "loss": 3.4834, "theoretical_loss": 3.7001230599328334, "tokens_seen": 866123776 }, { "epoch": 0.31, "learning_rate": 0.00034865682936057515, "loss": 3.4139, "theoretical_loss": 3.6996856949813184, "tokens_seen": 867172352 }, { "epoch": 0.31, "learning_rate": 0.00034846765039727584, "loss": 3.5175, "theoretical_loss": 3.6992490064437624, "tokens_seen": 868220928 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.4850696623325348, "objective/train/docs_used": 499341, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.129650831222534, "objective/train/original_loss": 3.1296510696411133, "objective/train/theoretical_loss": 3.6991944678441504, "objective/train/tokens_used": 888812000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24183686077594757, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497361421585083, "objective/train/weighted_lm_loss": 3.2843034267425537, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.952808678150177, "theoretical_loss": 3.6991944678441504, "tokens_seen": 868352000 }, { "epoch": 0.31, "learning_rate": 0.0003482784714339766, "loss": 3.402, "theoretical_loss": 3.6988129924590156, "tokens_seen": 869269504 }, { "epoch": 0.31, "learning_rate": 0.0003480892924706773, "loss": 3.4196, "theoretical_loss": 3.698377651173285, "tokens_seen": 870318080 }, { "epoch": 0.31, "learning_rate": 0.000347900113507378, "loss": 3.4628, "theoretical_loss": 3.6979429807400965, "tokens_seen": 871366656 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.48964646458625793, "objective/train/docs_used": 501132, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.557471513748169, "objective/train/original_loss": 3.557471752166748, "objective/train/theoretical_loss": 3.69783441773682, "objective/train/tokens_used": 892088800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24149833619594574, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501922369003296, "objective/train/weighted_lm_loss": 3.736229419708252, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.9738736152648926, "theoretical_loss": 3.69783441773682, "tokens_seen": 871628800 }, { "epoch": 0.31, "learning_rate": 0.0003477109345440787, "loss": 3.5082, "theoretical_loss": 3.6975089793202613, "tokens_seen": 872415232 }, { "epoch": 0.31, "learning_rate": 0.0003475217555807794, "loss": 3.459, "theoretical_loss": 3.697075645081833, "tokens_seen": 873463808 }, { "epoch": 0.31, "learning_rate": 0.0003473325766174801, "loss": 3.4345, "theoretical_loss": 3.6966429762000756, "tokens_seen": 874512384 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.48227402567863464, "objective/train/docs_used": 502873, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.550771713256836, "objective/train/original_loss": 3.550771713256836, "objective/train/theoretical_loss": 3.696480896535487, "objective/train/tokens_used": 895365600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23732295632362366, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494335889816284, "objective/train/weighted_lm_loss": 3.7260501384735107, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9638433456420898, "theoretical_loss": 3.696480896535487, "tokens_seen": 874905600 }, { "epoch": 0.31, "learning_rate": 0.00034714339765418086, "loss": 3.4506, "theoretical_loss": 3.696210970857422, "tokens_seen": 875560960 }, { "epoch": 0.31, "learning_rate": 0.00034695421869088155, "loss": 3.3722, "theoretical_loss": 3.695779627243439, "tokens_seen": 876609536 }, { "epoch": 0.31, "learning_rate": 0.0003467650397275823, "loss": 3.3784, "theoretical_loss": 3.695348943554793, "tokens_seen": 877658112 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.48700252175331116, "objective/train/docs_used": 504313, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2833023071289062, "objective/train/original_loss": 3.2833027839660645, "objective/train/theoretical_loss": 3.695133848620862, "objective/train/tokens_used": 898642400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24063783884048462, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499235391616821, "objective/train/weighted_lm_loss": 3.4479916095733643, "objective/train/weights_max": 1.0512157678604126, "objective/train/weights_min": 0.9604228734970093, "theoretical_loss": 3.695133848620862, "tokens_seen": 878182400 }, { "epoch": 0.31, "learning_rate": 0.00034657586076428305, "loss": 3.3717, "theoretical_loss": 3.6949189179952113, "tokens_seen": 878706688 }, { "epoch": 0.31, "learning_rate": 0.00034638668180098374, "loss": 3.4457, "theoretical_loss": 3.6944895487754454, "tokens_seen": 879755264 }, { "epoch": 0.31, "learning_rate": 0.0003461975028376845, "loss": 3.3993, "theoretical_loss": 3.6940608341132375, "tokens_seen": 880803840 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.4854235053062439, "objective/train/docs_used": 506500, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4923324584960938, "objective/train/original_loss": 3.4923322200775146, "objective/train/theoretical_loss": 3.693793219052748, "objective/train/tokens_used": 901919200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24003635346889496, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049762487411499, "objective/train/weighted_lm_loss": 3.665571928024292, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9516666531562805, "theoretical_loss": 3.693793219052748, "tokens_seen": 881459200 }, { "epoch": 0.31, "learning_rate": 0.0003460083238743852, "loss": 3.3099, "theoretical_loss": 3.693632772233284, "tokens_seen": 881852416 }, { "epoch": 0.32, "learning_rate": 0.00034581914491108593, "loss": 3.4134, "theoretical_loss": 3.6932053613671982, "tokens_seen": 882900992 }, { "epoch": 0.32, "learning_rate": 0.0003456299659477866, "loss": 3.4172, "theoretical_loss": 3.6927785997534794, "tokens_seen": 883949568 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.4926452040672302, "objective/train/docs_used": 508372, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.025479793548584, "objective/train/original_loss": 3.025480031967163, "objective/train/theoretical_loss": 3.6924589535592656, "objective/train/tokens_used": 905196000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24352741241455078, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505025386810303, "objective/train/weighted_lm_loss": 3.178568124771118, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9983458518981934, "theoretical_loss": 3.6924589535592656, "tokens_seen": 884736000 }, { "epoch": 0.32, "learning_rate": 0.0003454407869844873, "loss": 3.4296, "theoretical_loss": 3.692352485637474, "tokens_seen": 884998144 }, { "epoch": 0.32, "learning_rate": 0.00034525160802118806, "loss": 3.4518, "theoretical_loss": 3.6919270172713414, "tokens_seen": 886046720 }, { "epoch": 0.32, "learning_rate": 0.00034506242905788876, "loss": 3.4126, "theoretical_loss": 3.6915021929140224, "tokens_seen": 887095296 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.47590452432632446, "objective/train/docs_used": 510118, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.510807991027832, "objective/train/original_loss": 3.510807991027832, "objective/train/theoretical_loss": 3.691130998526281, "objective/train/tokens_used": 908472800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24169528484344482, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048818826675415, "objective/train/weighted_lm_loss": 3.6790857315063477, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9513728022575378, "theoretical_loss": 3.691130998526281, "tokens_seen": 888012800 }, { "epoch": 0.32, "learning_rate": 0.00034487325009458945, "loss": 3.5185, "theoretical_loss": 3.691078010831202, "tokens_seen": 888143872 }, { "epoch": 0.32, "learning_rate": 0.0003446840711312902, "loss": 3.4558, "theoretical_loss": 3.690654469295275, "tokens_seen": 889192448 }, { "epoch": 0.32, "learning_rate": 0.0003444948921679909, "loss": 3.4879, "theoretical_loss": 3.6902315665853163, "tokens_seen": 890241024 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.4895252287387848, "objective/train/docs_used": 511934, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.69549822807312, "objective/train/original_loss": 3.695497989654541, "objective/train/theoretical_loss": 3.689809300987042, "objective/train/tokens_used": 911749600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24316494166851044, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050188660621643, "objective/train/weighted_lm_loss": 3.88081955909729, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9516122937202454, "theoretical_loss": 3.689809300987042, "tokens_seen": 891289600 }, { "epoch": 0.32, "learning_rate": 0.0003443057132046917, "loss": 3.4757, "theoretical_loss": 3.689809300987042, "tokens_seen": 891289600 }, { "epoch": 0.32, "learning_rate": 0.0003441165342413924, "loss": 3.3858, "theoretical_loss": 3.6893876707927777, "tokens_seen": 892338176 }, { "epoch": 0.32, "learning_rate": 0.0003439273552780931, "loss": 3.4611, "theoretical_loss": 3.6889666743014295, "tokens_seen": 893386752 }, { "epoch": 0.32, "learning_rate": 0.00034373817631479383, "loss": 3.3919, "theoretical_loss": 3.6885463098184434, "tokens_seen": 894435328 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.47695353627204895, "objective/train/docs_used": 513625, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3136649131774902, "objective/train/original_loss": 3.3136653900146484, "objective/train/theoretical_loss": 3.688493808612015, "objective/train/tokens_used": 915026400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2371138483285904, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489006042480469, "objective/train/weighted_lm_loss": 3.475831985473633, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9526435136795044, "theoretical_loss": 3.688493808612015, "tokens_seen": 894566400 }, { "epoch": 0.32, "learning_rate": 0.0003435489973514945, "loss": 3.4412, "theoretical_loss": 3.6881265756557795, "tokens_seen": 895483904 }, { "epoch": 0.32, "learning_rate": 0.00034335981838819527, "loss": 3.406, "theoretical_loss": 3.6877074701318735, "tokens_seen": 896532480 }, { "epoch": 0.32, "learning_rate": 0.00034317063942489596, "loss": 3.4235, "theoretical_loss": 3.6872889915716107, "tokens_seen": 897581056 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.48399674892425537, "objective/train/docs_used": 515235, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2255749702453613, "objective/train/original_loss": 3.2255749702453613, "objective/train/theoretical_loss": 3.6871844696989227, "objective/train/tokens_used": 918303200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23830629885196686, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04961097240448, "objective/train/weighted_lm_loss": 3.38556170463562, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9623337388038635, "theoretical_loss": 3.6871844696989227, "tokens_seen": 897843200 }, { "epoch": 0.32, "learning_rate": 0.00034298146046159666, "loss": 3.4544, "theoretical_loss": 3.6868711383062873, "tokens_seen": 898629632 }, { "epoch": 0.32, "learning_rate": 0.0003427922814982974, "loss": 3.4239, "theoretical_loss": 3.686453908673583, "tokens_seen": 899678208 }, { "epoch": 0.32, "learning_rate": 0.0003426031025349981, "loss": 3.4579, "theoretical_loss": 3.6860373010175262, "tokens_seen": 900726784 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.4840245246887207, "objective/train/docs_used": 516962, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5026180744171143, "objective/train/original_loss": 3.5026183128356934, "objective/train/theoretical_loss": 3.685881233162962, "objective/train/tokens_used": 921580000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2395327091217041, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496200323104858, "objective/train/weighted_lm_loss": 3.6757030487060547, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9527707695960999, "theoretical_loss": 3.685881233162962, "tokens_seen": 901120000 }, { "epoch": 0.32, "learning_rate": 0.0003424139235716988, "loss": 3.4299, "theoretical_loss": 3.685621313688465, "tokens_seen": 901775360 }, { "epoch": 0.32, "learning_rate": 0.00034222474460839954, "loss": 3.4742, "theoretical_loss": 3.6852059450430343, "tokens_seen": 902823936 }, { "epoch": 0.32, "learning_rate": 0.0003420355656451003, "loss": 3.4374, "theoretical_loss": 3.6847911934441244, "tokens_seen": 903872512 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.4911254346370697, "objective/train/docs_used": 518997, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.090237617492676, "objective/train/original_loss": 3.0902373790740967, "objective/train/theoretical_loss": 3.6845840485272205, "objective/train/tokens_used": 924856800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24378250539302826, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503517389297485, "objective/train/weighted_lm_loss": 3.2452797889709473, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9669274687767029, "theoretical_loss": 3.6845840485272205, "tokens_seen": 904396800 }, { "epoch": 0.32, "learning_rate": 0.00034184638668180103, "loss": 3.4429, "theoretical_loss": 3.6843770572608507, "tokens_seen": 904921088 }, { "epoch": 0.32, "learning_rate": 0.0003416572077185017, "loss": 3.4243, "theoretical_loss": 3.6839635348685222, "tokens_seen": 905969664 }, { "epoch": 0.32, "learning_rate": 0.0003414680287552024, "loss": 3.4806, "theoretical_loss": 3.6835506246486105, "tokens_seen": 907018240 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.48305195569992065, "objective/train/docs_used": 520740, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1138951778411865, "objective/train/original_loss": 3.1138947010040283, "objective/train/theoretical_loss": 3.6832928659132724, "objective/train/tokens_used": 928133600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23734770715236664, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495115518569946, "objective/train/weighted_lm_loss": 3.2679831981658936, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9549471735954285, "theoretical_loss": 3.6832928659132724, "tokens_seen": 907673600 }, { "epoch": 0.32, "learning_rate": 0.00034127884979190317, "loss": 3.4425, "theoretical_loss": 3.6831383249887226, "tokens_seen": 908066816 }, { "epoch": 0.32, "learning_rate": 0.00034108967082860386, "loss": 3.4278, "theoretical_loss": 3.682726634282564, "tokens_seen": 909115392 }, { "epoch": 0.33, "learning_rate": 0.0003409004918653046, "loss": 3.4633, "theoretical_loss": 3.682315550929917, "tokens_seen": 910163968 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.4881671071052551, "objective/train/docs_used": 522796, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8828225135803223, "objective/train/original_loss": 2.8828227519989014, "objective/train/theoretical_loss": 3.6820076360319485, "objective/train/tokens_used": 931410400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2397347241640091, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500353574752808, "objective/train/weighted_lm_loss": 3.027292251586914, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9536739587783813, "theoretical_loss": 3.6820076360319485, "tokens_seen": 910950400 }, { "epoch": 0.33, "learning_rate": 0.0003407113129020053, "loss": 3.4252, "theoretical_loss": 3.6819050733366017, "tokens_seen": 911212544 }, { "epoch": 0.33, "learning_rate": 0.000340522133938706, "loss": 3.4594, "theoretical_loss": 3.6814951999144547, "tokens_seen": 912261120 }, { "epoch": 0.33, "learning_rate": 0.00034033295497540674, "loss": 3.4095, "theoretical_loss": 3.681085929081294, "tokens_seen": 913309696 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.4804491102695465, "objective/train/docs_used": 524442, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4967141151428223, "objective/train/original_loss": 3.4967143535614014, "objective/train/theoretical_loss": 3.6807283101742865, "objective/train/tokens_used": 934687200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23661978542804718, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04924738407135, "objective/train/weighted_lm_loss": 3.6696856021881104, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9522182941436768, "theoretical_loss": 3.6807283101742865, "tokens_seen": 914227200 }, { "epoch": 0.33, "learning_rate": 0.00034014377601210744, "loss": 3.4546, "theoretical_loss": 3.680677259260892, "tokens_seen": 914358272 }, { "epoch": 0.33, "learning_rate": 0.00033995459704880813, "loss": 3.4304, "theoretical_loss": 3.6802691888829453, "tokens_seen": 915406848 }, { "epoch": 0.33, "learning_rate": 0.00033976541808550893, "loss": 3.5283, "theoretical_loss": 3.679861716383046, "tokens_seen": 916455424 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.49545738101005554, "objective/train/docs_used": 526356, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.091348886489868, "objective/train/original_loss": 3.0913491249084473, "objective/train/theoretical_loss": 3.6794548402026535, "objective/train/tokens_used": 937964000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2462480366230011, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0507975816726685, "objective/train/weighted_lm_loss": 3.2484302520751953, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9557216167449951, "theoretical_loss": 3.6794548402026535, "tokens_seen": 917504000 }, { "epoch": 0.33, "learning_rate": 0.0003395762391222096, "loss": 3.359, "theoretical_loss": 3.6794548402026535, "tokens_seen": 917504000 }, { "epoch": 0.33, "learning_rate": 0.0003393870601589104, "loss": 3.4658, "theoretical_loss": 3.6790485587890642, "tokens_seen": 918552576 }, { "epoch": 0.33, "learning_rate": 0.00033919788119561107, "loss": 3.4122, "theoretical_loss": 3.6786428705953855, "tokens_seen": 919601152 }, { "epoch": 0.33, "learning_rate": 0.00033900870223231176, "loss": 3.4613, "theoretical_loss": 3.6782377740805043, "tokens_seen": 920649728 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.48470258712768555, "objective/train/docs_used": 528450, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.134005308151245, "objective/train/original_loss": 3.1340060234069824, "objective/train/theoretical_loss": 3.678187178542029, "objective/train/tokens_used": 941240800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24029278755187988, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496915578842163, "objective/train/weighted_lm_loss": 3.289982795715332, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9536044597625732, "theoretical_loss": 3.678187178542029, "tokens_seen": 920780800 }, { "epoch": 0.33, "learning_rate": 0.0003388195232690125, "loss": 3.4477, "theoretical_loss": 3.6778332677090617, "tokens_seen": 921698304 }, { "epoch": 0.33, "learning_rate": 0.0003386303443057132, "loss": 3.4984, "theoretical_loss": 3.6774293499514243, "tokens_seen": 922746880 }, { "epoch": 0.33, "learning_rate": 0.00033844116534241395, "loss": 3.4305, "theoretical_loss": 3.6770260192836544, "tokens_seen": 923795456 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.48579150438308716, "objective/train/docs_used": 529792, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3090968132019043, "objective/train/original_loss": 3.3090968132019043, "objective/train/theoretical_loss": 3.6769252781714576, "objective/train/tokens_used": 944517600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24083971977233887, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049803376197815, "objective/train/weighted_lm_loss": 3.472637176513672, "objective/train/weights_max": 1.0512195825576782, "objective/train/weights_min": 0.9520125389099121, "theoretical_loss": 3.6769252781714576, "tokens_seen": 924057600 }, { "epoch": 0.33, "learning_rate": 0.00033825198637911464, "loss": 3.3764, "theoretical_loss": 3.6766232741874845, "tokens_seen": 924844032 }, { "epoch": 0.33, "learning_rate": 0.00033806280741581534, "loss": 3.4332, "theoretical_loss": 3.67622111315029, "tokens_seen": 925892608 }, { "epoch": 0.33, "learning_rate": 0.0003378736284525161, "loss": 3.3611, "theoretical_loss": 3.6758195346650595, "tokens_seen": 926941184 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.4928480386734009, "objective/train/docs_used": 531599, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.557663679122925, "objective/train/original_loss": 3.557663679122925, "objective/train/theoretical_loss": 3.6756690926156557, "objective/train/tokens_used": 947794400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2444053441286087, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505273342132568, "objective/train/weighted_lm_loss": 3.7376136779785156, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9553889632225037, "theoretical_loss": 3.6756690926156557, "tokens_seen": 927334400 }, { "epoch": 0.33, "learning_rate": 0.0003376844494892168, "loss": 3.275, "theoretical_loss": 3.6754185372303705, "tokens_seen": 927989760 }, { "epoch": 0.33, "learning_rate": 0.0003374952705259176, "loss": 3.4063, "theoretical_loss": 3.6750181193503604, "tokens_seen": 929038336 }, { "epoch": 0.33, "learning_rate": 0.00033730609156261827, "loss": 3.2966, "theoretical_loss": 3.6746182795347013, "tokens_seen": 930086912 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.48329272866249084, "objective/train/docs_used": 533595, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.309666156768799, "objective/train/original_loss": 3.309666156768799, "objective/train/theoretical_loss": 3.674418575936782, "objective/train/tokens_used": 951071200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23755574226379395, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495368242263794, "objective/train/weighted_lm_loss": 3.4753262996673584, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9564974308013916, "theoretical_loss": 3.674418575936782, "tokens_seen": 930611200 }, { "epoch": 0.33, "learning_rate": 0.00033711691259931897, "loss": 3.3508, "theoretical_loss": 3.674219016298571, "tokens_seen": 931135488 }, { "epoch": 0.33, "learning_rate": 0.0003369277336360197, "loss": 3.351, "theoretical_loss": 3.673820328162628, "tokens_seen": 932184064 }, { "epoch": 0.33, "learning_rate": 0.0003367385546727204, "loss": 3.367, "theoretical_loss": 3.673422213652986, "tokens_seen": 933232640 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.48411282896995544, "objective/train/docs_used": 535440, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0631942749023438, "objective/train/original_loss": 3.063194751739502, "objective/train/theoretical_loss": 3.6731736827263513, "objective/train/tokens_used": 954348000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23934400081634521, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496280193328857, "objective/train/weighted_lm_loss": 3.2147536277770996, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9552776217460632, "theoretical_loss": 3.6731736827263513, "tokens_seen": 933888000 }, { "epoch": 0.33, "learning_rate": 0.0003365493757094211, "loss": 3.3911, "theoretical_loss": 3.673024671301186, "tokens_seen": 934281216 }, { "epoch": 0.33, "learning_rate": 0.00033636019674612185, "loss": 3.4291, "theoretical_loss": 3.6726276996441705, "tokens_seen": 935329792 }, { "epoch": 0.33, "learning_rate": 0.00033617101778282254, "loss": 3.3124, "theoretical_loss": 3.6722312972242594, "tokens_seen": 936378368 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.4916326701641083, "objective/train/docs_used": 537222, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.992642879486084, "objective/train/original_loss": 2.992642879486084, "objective/train/theoretical_loss": 3.6719343680973067, "objective/train/tokens_used": 957624800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24388481676578522, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050403118133545, "objective/train/weighted_lm_loss": 3.1425609588623047, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9709485173225403, "theoretical_loss": 3.6719343680973067, "tokens_seen": 937164800 }, { "epoch": 0.33, "learning_rate": 0.0003359818388195233, "loss": 3.3708, "theoretical_loss": 3.6718354625891205, "tokens_seen": 937426944 }, { "epoch": 0.34, "learning_rate": 0.000335792659856224, "loss": 3.282, "theoretical_loss": 3.6714401942917485, "tokens_seen": 938475520 }, { "epoch": 0.34, "learning_rate": 0.0003356034808929247, "loss": 3.3235, "theoretical_loss": 3.6710454908904366, "tokens_seen": 939524096 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.48321402072906494, "objective/train/docs_used": 539286, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.189788818359375, "objective/train/original_loss": 3.189789295196533, "objective/train/theoretical_loss": 3.6707005876762313, "objective/train/tokens_used": 960901600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2379058301448822, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495306253433228, "objective/train/weighted_lm_loss": 3.346953868865967, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9523076415061951, "theoretical_loss": 3.6707005876762313, "tokens_seen": 940441600 }, { "epoch": 0.34, "learning_rate": 0.0003354143019296254, "loss": 3.3441, "theoretical_loss": 3.6706513509487513, "tokens_seen": 940572672 }, { "epoch": 0.34, "learning_rate": 0.00033522512296632617, "loss": 3.3747, "theoretical_loss": 3.6702577730355084, "tokens_seen": 941621248 }, { "epoch": 0.34, "learning_rate": 0.0003350359440030269, "loss": 3.4368, "theoretical_loss": 3.6698647557247472, "tokens_seen": 942669824 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.48219361901283264, "objective/train/docs_used": 541407, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.251582622528076, "objective/train/original_loss": 3.251582622528076, "objective/train/theoretical_loss": 3.6694722975957066, "objective/train/tokens_used": 964178400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23559176921844482, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494168996810913, "objective/train/weighted_lm_loss": 3.411501407623291, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9520928859710693, "theoretical_loss": 3.6694722975957066, "tokens_seen": 943718400 }, { "epoch": 0.34, "learning_rate": 0.0003348467650397276, "loss": 3.3476, "theoretical_loss": 3.6694722975957066, "tokens_seen": 943718400 }, { "epoch": 0.34, "learning_rate": 0.0003346575860764283, "loss": 3.4193, "theoretical_loss": 3.6690803972327988, "tokens_seen": 944766976 }, { "epoch": 0.34, "learning_rate": 0.00033446840711312905, "loss": 3.3699, "theoretical_loss": 3.6686890532255862, "tokens_seen": 945815552 }, { "epoch": 0.34, "learning_rate": 0.00033427922814982975, "loss": 3.3711, "theoretical_loss": 3.6682982641687563, "tokens_seen": 946864128 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.48947545886039734, "objective/train/docs_used": 543040, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.059685707092285, "objective/train/original_loss": 3.059685707092285, "objective/train/theoretical_loss": 3.668249454486809, "objective/train/tokens_used": 967455200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24430738389492035, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501893758773804, "objective/train/weighted_lm_loss": 3.213547945022583, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.951458215713501, "theoretical_loss": 3.668249454486809, "tokens_seen": 946995200 }, { "epoch": 0.34, "learning_rate": 0.00033409004918653044, "loss": 3.3339, "theoretical_loss": 3.6679080286620973, "tokens_seen": 947912704 }, { "epoch": 0.34, "learning_rate": 0.0003339008702232312, "loss": 3.3752, "theoretical_loss": 3.6675183453104747, "tokens_seen": 948961280 }, { "epoch": 0.34, "learning_rate": 0.0003337116912599319, "loss": 3.4667, "theoretical_loss": 3.6671292127238067, "tokens_seen": 950009856 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.4804360866546631, "objective/train/docs_used": 545196, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2226321697235107, "objective/train/original_loss": 3.2226319313049316, "objective/train/theoretical_loss": 3.667032015471743, "objective/train/tokens_used": 970732000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24167245626449585, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492719411849976, "objective/train/weighted_lm_loss": 3.3797309398651123, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9517346620559692, "theoretical_loss": 3.667032015471743, "tokens_seen": 950272000 }, { "epoch": 0.34, "learning_rate": 0.00033352251229663263, "loss": 3.4203, "theoretical_loss": 3.66674062951704, "tokens_seen": 951058432 }, { "epoch": 0.34, "learning_rate": 0.0003333333333333333, "loss": 3.335, "theoretical_loss": 3.666352594310127, "tokens_seen": 952107008 }, { "epoch": 0.34, "learning_rate": 0.000333144154370034, "loss": 3.3466, "theoretical_loss": 3.6659651057280023, "tokens_seen": 953155584 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.4831368923187256, "objective/train/docs_used": 546810, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0602049827575684, "objective/train/original_loss": 3.0602047443389893, "objective/train/theoretical_loss": 3.66581993815661, "objective/train/tokens_used": 974008800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2421431988477707, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495444536209106, "objective/train/weighted_lm_loss": 3.211397409439087, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9515434503555298, "theoretical_loss": 3.66581993815661, "tokens_seen": 953548800 }, { "epoch": 0.34, "learning_rate": 0.00033295497540673476, "loss": 3.3687, "theoretical_loss": 3.665578162400558, "tokens_seen": 954204160 }, { "epoch": 0.34, "learning_rate": 0.0003327657964434355, "loss": 3.3398, "theoretical_loss": 3.66519176296262, "tokens_seen": 955252736 }, { "epoch": 0.34, "learning_rate": 0.00033257661748013626, "loss": 3.3475, "theoretical_loss": 3.664805906053928, "tokens_seen": 956301312 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.4918384850025177, "objective/train/docs_used": 548880, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3802547454833984, "objective/train/original_loss": 3.3802547454833984, "objective/train/theoretical_loss": 3.664613180624306, "objective/train/tokens_used": 977285600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2429351955652237, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050418734550476, "objective/train/weighted_lm_loss": 3.550429582595825, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9534175992012024, "theoretical_loss": 3.664613180624306, "tokens_seen": 956825600 }, { "epoch": 0.34, "learning_rate": 0.00033238743851683695, "loss": 3.4054, "theoretical_loss": 3.6644205903191107, "tokens_seen": 957349888 }, { "epoch": 0.34, "learning_rate": 0.00033219825955353765, "loss": 3.362, "theoretical_loss": 3.664035814407661, "tokens_seen": 958398464 }, { "epoch": 0.34, "learning_rate": 0.0003320090805902384, "loss": 3.3747, "theoretical_loss": 3.663651576973915, "tokens_seen": 959447040 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.4942702054977417, "objective/train/docs_used": 550512, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2487757205963135, "objective/train/original_loss": 3.2487761974334717, "objective/train/theoretical_loss": 3.663411701427548, "objective/train/tokens_used": 980562400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24505145847797394, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506727695465088, "objective/train/weighted_lm_loss": 3.413585662841797, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9725967645645142, "theoretical_loss": 3.663411701427548, "tokens_seen": 960102400 }, { "epoch": 0.34, "learning_rate": 0.0003318199016269391, "loss": 3.3048, "theoretical_loss": 3.66326787667703, "tokens_seen": 960495616 }, { "epoch": 0.34, "learning_rate": 0.0003316307226636398, "loss": 3.401, "theoretical_loss": 3.6628847121809613, "tokens_seen": 961544192 }, { "epoch": 0.34, "learning_rate": 0.00033144154370034053, "loss": 3.3882, "theoretical_loss": 3.662502082154439, "tokens_seen": 962592768 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.48432281613349915, "objective/train/docs_used": 552388, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.152970314025879, "objective/train/original_loss": 3.152970314025879, "objective/train/theoretical_loss": 3.662215459582027, "objective/train/tokens_used": 983839200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24182836711406708, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496615171432495, "objective/train/weighted_lm_loss": 3.308091640472412, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9514780640602112, "theoretical_loss": 3.662215459582027, "tokens_seen": 963379200 }, { "epoch": 0.34, "learning_rate": 0.0003312523647370412, "loss": 3.3735, "theoretical_loss": 3.662119985270947, "tokens_seen": 963641344 }, { "epoch": 0.34, "learning_rate": 0.00033106318577374197, "loss": 3.4034, "theoretical_loss": 3.6617384202087004, "tokens_seen": 964689920 }, { "epoch": 0.34, "learning_rate": 0.00033087400681044266, "loss": 3.4614, "theoretical_loss": 3.6613573856506236, "tokens_seen": 965738496 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.49010223150253296, "objective/train/docs_used": 553451, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1867620944976807, "objective/train/original_loss": 3.1867618560791016, "objective/train/theoretical_loss": 3.661024414559681, "objective/train/tokens_used": 987116000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24266354739665985, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050243854522705, "objective/train/weighted_lm_loss": 3.3466124534606934, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9682541489601135, "theoretical_loss": 3.661024414559681, "tokens_seen": 966656000 }, { "epoch": 0.35, "learning_rate": 0.00033068482784714336, "loss": 3.3727, "theoretical_loss": 3.6609768802843274, "tokens_seen": 966787072 }, { "epoch": 0.35, "learning_rate": 0.00033049564888384416, "loss": 3.4051, "theoretical_loss": 3.660596902802089, "tokens_seen": 967835648 }, { "epoch": 0.35, "learning_rate": 0.00033030646992054485, "loss": 3.5094, "theoretical_loss": 3.66021745190083, "tokens_seen": 968884224 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.48655059933662415, "objective/train/docs_used": 554109, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4037506580352783, "objective/train/original_loss": 3.4037506580352783, "objective/train/theoretical_loss": 3.6598385262820923, "objective/train/tokens_used": 990392800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24164670705795288, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049883484840393, "objective/train/weighted_lm_loss": 3.573542356491089, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9580250978469849, "theoretical_loss": 3.6598385262820923, "tokens_seen": 969932800 }, { "epoch": 0.35, "learning_rate": 0.0003301172909572456, "loss": 3.4282, "theoretical_loss": 3.6598385262820923, "tokens_seen": 969932800 }, { "epoch": 0.35, "learning_rate": 0.0003299281119939463, "loss": 3.5318, "theoretical_loss": 3.659460124652022, "tokens_seen": 970981376 }, { "epoch": 0.35, "learning_rate": 0.000329738933030647, "loss": 3.528, "theoretical_loss": 3.6590822457213426, "tokens_seen": 972029952 }, { "epoch": 0.35, "learning_rate": 0.00032954975406734773, "loss": 3.6384, "theoretical_loss": 3.658704888205337, "tokens_seen": 973078528 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.48475736379623413, "objective/train/docs_used": 555679, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5973386764526367, "objective/train/original_loss": 3.5973386764526367, "objective/train/theoretical_loss": 3.6586577551139974, "objective/train/tokens_used": 993669600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2393985539674759, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496925115585327, "objective/train/weighted_lm_loss": 3.776460647583008, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9569512009620667, "theoretical_loss": 3.6586577551139974, "tokens_seen": 973209600 }, { "epoch": 0.35, "learning_rate": 0.0003293605751040484, "loss": 3.5721, "theoretical_loss": 3.658328050823826, "tokens_seen": 974127104 }, { "epoch": 0.35, "learning_rate": 0.0003291713961407491, "loss": 3.5957, "theoretical_loss": 3.657951732301148, "tokens_seen": 975175680 }, { "epoch": 0.35, "learning_rate": 0.00032898221717744987, "loss": 3.5828, "theoretical_loss": 3.657575931366135, "tokens_seen": 976224256 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.4789718985557556, "objective/train/docs_used": 557385, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.122087001800537, "objective/train/original_loss": 3.122087001800537, "objective/train/theoretical_loss": 3.657482061856916, "objective/train/tokens_used": 996946400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23479175567626953, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490905046463013, "objective/train/weighted_lm_loss": 3.275631904602051, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9560718536376953, "theoretical_loss": 3.657482061856916, "tokens_seen": 976486400 }, { "epoch": 0.35, "learning_rate": 0.00032879303821415056, "loss": 3.6054, "theoretical_loss": 3.6572006467520968, "tokens_seen": 977272832 }, { "epoch": 0.35, "learning_rate": 0.0003286038592508513, "loss": 3.6388, "theoretical_loss": 3.6568258771967965, "tokens_seen": 978321408 }, { "epoch": 0.35, "learning_rate": 0.000328414680287552, "loss": 3.5389, "theoretical_loss": 3.6564516214424323, "tokens_seen": 979369984 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.4748334586620331, "objective/train/docs_used": 559012, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1960604190826416, "objective/train/original_loss": 3.1960604190826416, "objective/train/theoretical_loss": 3.656311407742891, "objective/train/tokens_used": 1000223200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2362358719110489, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048684000968933, "objective/train/weighted_lm_loss": 3.353394031524658, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9522448778152466, "theoretical_loss": 3.656311407742891, "tokens_seen": 979763200 }, { "epoch": 0.35, "learning_rate": 0.00032822550132425275, "loss": 3.5887, "theoretical_loss": 3.656077878235617, "tokens_seen": 980418560 }, { "epoch": 0.35, "learning_rate": 0.0003280363223609535, "loss": 3.5621, "theoretical_loss": 3.6557046463273557, "tokens_seen": 981467136 }, { "epoch": 0.35, "learning_rate": 0.0003278471433976542, "loss": 3.5904, "theoretical_loss": 3.655331924473029, "tokens_seen": 982515712 }, { "debugging/Self-BLEU-5": 0.4286046663919377, "debugging/distinct-1-grams": 0.8147567798871364, "debugging/distinct-2-grams": 0.9823269374342457, "debugging/entropy-1-grams": 6.1671920556004824, "debugging/entropy-2-grams": 6.947028138756313, "debugging/length": 477.53333333333336, "debugging/num_segments": 15, "debugging/raw_token_scores_avg": 0.020611366257071495, "debugging/raw_token_scores_std": 0.08496682345867157, "epoch": 0.35, "objective/train/advantage_avg": 0.4793747663497925, "objective/train/docs_used": 560408, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.48490047454834, "objective/train/original_loss": 3.48490047454834, "objective/train/theoretical_loss": 3.6551457544283386, "objective/train/tokens_used": 1003500000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2370256930589676, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491422414779663, "objective/train/weighted_lm_loss": 3.6559360027313232, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9516208171844482, "theoretical_loss": 3.6551457544283386, "tokens_seen": 983040000 }, { "epoch": 0.35, "learning_rate": 0.00032765796443435494, "loss": 3.5123, "theoretical_loss": 3.6549597114323706, "tokens_seen": 983564288 }, { "epoch": 0.35, "learning_rate": 0.00032746878547105563, "loss": 3.567, "theoretical_loss": 3.6545880059694484, "tokens_seen": 984612864 }, { "epoch": 0.35, "learning_rate": 0.0003272796065077563, "loss": 3.5675, "theoretical_loss": 3.6542168068526433, "tokens_seen": 985661440 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.47530868649482727, "objective/train/docs_used": 562309, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.699045181274414, "objective/train/original_loss": 3.699045181274414, "objective/train/theoretical_loss": 3.6539850639880065, "objective/train/tokens_used": 1006776800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23356567323207855, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487180948257446, "objective/train/weighted_lm_loss": 3.881354808807373, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9573073387145996, "theoretical_loss": 3.6539850639880065, "tokens_seen": 986316800 }, { "epoch": 0.35, "learning_rate": 0.0003270904275444571, "loss": 3.5629, "theoretical_loss": 3.653846112854634, "tokens_seen": 986710016 }, { "epoch": 0.35, "learning_rate": 0.00032690124858115777, "loss": 3.4911, "theoretical_loss": 3.6534759227523708, "tokens_seen": 987758592 }, { "epoch": 0.35, "learning_rate": 0.0003267120696178585, "loss": 3.5047, "theoretical_loss": 3.653106235327061, "tokens_seen": 988807168 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.47373899817466736, "objective/train/docs_used": 564240, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.646477222442627, "objective/train/original_loss": 3.646477699279785, "objective/train/theoretical_loss": 3.65282929890904, "objective/train/tokens_used": 1010053600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23349297046661377, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485607385635376, "objective/train/weighted_lm_loss": 3.822693347930908, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9552621245384216, "theoretical_loss": 3.65282929890904, "tokens_seen": 989593600 }, { "epoch": 0.35, "learning_rate": 0.0003265228906545592, "loss": 3.5004, "theoretical_loss": 3.6527370493641493, "tokens_seen": 989855744 }, { "epoch": 0.35, "learning_rate": 0.0003263337116912599, "loss": 3.4973, "theoretical_loss": 3.652368363653297, "tokens_seen": 990904320 }, { "epoch": 0.35, "learning_rate": 0.00032614453272796065, "loss": 3.5477, "theoretical_loss": 3.6520001769883628, "tokens_seen": 991952896 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.47648999094963074, "objective/train/docs_used": 566160, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.7423951625823975, "objective/train/original_loss": 3.7423954010009766, "objective/train/theoretical_loss": 3.651678422085146, "objective/train/tokens_used": 1013330400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2367618978023529, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488523244857788, "objective/train/weighted_lm_loss": 3.9235517978668213, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9544411301612854, "theoretical_loss": 3.651678422085146, "tokens_seen": 992870400 }, { "epoch": 0.35, "learning_rate": 0.0003259553537646614, "loss": 3.4798, "theoretical_loss": 3.651632488167385, "tokens_seen": 993001472 }, { "epoch": 0.36, "learning_rate": 0.0003257661748013621, "loss": 3.5252, "theoretical_loss": 3.651265295992563, "tokens_seen": 994050048 }, { "epoch": 0.36, "learning_rate": 0.00032557699583806284, "loss": 3.4845, "theoretical_loss": 3.650898599270236, "tokens_seen": 995098624 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.46204280853271484, "objective/train/docs_used": 567938, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3266007900238037, "objective/train/original_loss": 3.3266003131866455, "objective/train/theoretical_loss": 3.6505323968108674, "objective/train/tokens_used": 1016607200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23566798865795135, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047401785850525, "objective/train/weighted_lm_loss": 3.4848270416259766, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9548930525779724, "theoretical_loss": 3.6505323968108674, "tokens_seen": 996147200 }, { "epoch": 0.36, "learning_rate": 0.00032538781687476353, "loss": 3.4402, "theoretical_loss": 3.6505323968108674, "tokens_seen": 996147200 }, { "epoch": 0.36, "learning_rate": 0.0003251986379114643, "loss": 3.5624, "theoretical_loss": 3.6501666874290244, "tokens_seen": 997195776 }, { "epoch": 0.36, "learning_rate": 0.00032500945894816497, "loss": 3.4626, "theoretical_loss": 3.6498014699433603, "tokens_seen": 998244352 }, { "epoch": 0.36, "learning_rate": 0.00032482027998486567, "loss": 3.4443, "theoretical_loss": 3.6494367431765955, "tokens_seen": 999292928 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.49096834659576416, "objective/train/docs_used": 570122, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0309109687805176, "objective/train/original_loss": 3.0309109687805176, "objective/train/theoretical_loss": 3.6493911867759463, "objective/train/tokens_used": 1019884000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24276913702487946, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503308773040771, "objective/train/weighted_lm_loss": 3.183624029159546, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9735167622566223, "theoretical_loss": 3.6493911867759463, "tokens_seen": 999424000 }, { "epoch": 0.36, "learning_rate": 0.0003246311010215664, "loss": 3.3911, "theoretical_loss": 3.6490725059554996, "tokens_seen": 1000341504 }, { "epoch": 0.36, "learning_rate": 0.0003244419220582671, "loss": 3.5018, "theoretical_loss": 3.648708757110873, "tokens_seen": 1001390080 }, { "epoch": 0.36, "learning_rate": 0.00032425274309496785, "loss": 3.4298, "theoretical_loss": 3.6483454954775305, "tokens_seen": 1002438656 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.48988527059555054, "objective/train/docs_used": 571707, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.399118185043335, "objective/train/original_loss": 3.399117946624756, "objective/train/theoretical_loss": 3.648254756059793, "objective/train/tokens_used": 1023160800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24150972068309784, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502160787582397, "objective/train/weighted_lm_loss": 3.570770025253296, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9835735559463501, "theoretical_loss": 3.648254756059793, "tokens_seen": 1002700800 }, { "epoch": 0.36, "learning_rate": 0.00032406356413166855, "loss": 3.4353, "theoretical_loss": 3.64798271989428, "tokens_seen": 1003487232 }, { "epoch": 0.36, "learning_rate": 0.00032387438516836924, "loss": 3.3814, "theoretical_loss": 3.647620429203908, "tokens_seen": 1004535808 }, { "epoch": 0.36, "learning_rate": 0.00032368520620507004, "loss": 3.5187, "theoretical_loss": 3.6472586222531587, "tokens_seen": 1005584384 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.48494696617126465, "objective/train/docs_used": 573416, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2771799564361572, "objective/train/original_loss": 3.2771804332733154, "objective/train/theoretical_loss": 3.6471230691260477, "objective/train/tokens_used": 1026437600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23839180171489716, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497063398361206, "objective/train/weighted_lm_loss": 3.440140962600708, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9522702693939209, "theoretical_loss": 3.6471230691260477, "tokens_seen": 1005977600 }, { "epoch": 0.36, "learning_rate": 0.00032349602724177074, "loss": 3.5214, "theoretical_loss": 3.6468972978927208, "tokens_seen": 1006632960 }, { "epoch": 0.36, "learning_rate": 0.00032330684827847143, "loss": 3.4675, "theoretical_loss": 3.646536454977205, "tokens_seen": 1007681536 }, { "epoch": 0.36, "learning_rate": 0.0003231176693151722, "loss": 3.4601, "theoretical_loss": 3.6461760923651294, "tokens_seen": 1008730112 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.4919039309024811, "objective/train/docs_used": 575328, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1079111099243164, "objective/train/original_loss": 3.1079111099243164, "objective/train/theoretical_loss": 3.645996090817232, "objective/train/tokens_used": 1029714400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2433389574289322, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504273176193237, "objective/train/weighted_lm_loss": 3.2646045684814453, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.959859311580658, "theoretical_loss": 3.645996090817232, "tokens_seen": 1009254400 }, { "epoch": 0.36, "learning_rate": 0.00032292849035187287, "loss": 3.4448, "theoretical_loss": 3.645816208918901, "tokens_seen": 1009778688 }, { "epoch": 0.36, "learning_rate": 0.0003227393113885736, "loss": 3.3991, "theoretical_loss": 3.6454568035048003, "tokens_seen": 1010827264 }, { "epoch": 0.36, "learning_rate": 0.0003225501324252743, "loss": 3.4667, "theoretical_loss": 3.645097874992961, "tokens_seen": 1011875840 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.46897462010383606, "objective/train/docs_used": 577263, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.042729139328003, "objective/train/original_loss": 3.042728900909424, "objective/train/theoretical_loss": 3.644873786349497, "objective/train/tokens_used": 1032991200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2294284999370575, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0480632781982422, "objective/train/weighted_lm_loss": 3.192321300506592, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9518375396728516, "theoretical_loss": 3.644873786349497, "tokens_seen": 1012531200 }, { "epoch": 0.36, "learning_rate": 0.000322360953461975, "loss": 3.4738, "theoretical_loss": 3.6447394222573557, "tokens_seen": 1012924416 }, { "epoch": 0.36, "learning_rate": 0.00032217177449867575, "loss": 3.4734, "theoretical_loss": 3.644381444175778, "tokens_seen": 1013972992 }, { "epoch": 0.36, "learning_rate": 0.00032198259553537645, "loss": 3.475, "theoretical_loss": 3.6440239396298244, "tokens_seen": 1015021568 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.47905510663986206, "objective/train/docs_used": 579051, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3449113368988037, "objective/train/original_loss": 3.344911575317383, "objective/train/theoretical_loss": 3.643756121307459, "objective/train/tokens_used": 1036268000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23849982023239136, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491175651550293, "objective/train/weighted_lm_loss": 3.5071990489959717, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9516644477844238, "theoretical_loss": 3.643756121307459, "tokens_seen": 1015808000 }, { "epoch": 0.36, "learning_rate": 0.0003217934165720772, "loss": 3.4243, "theoretical_loss": 3.643666907504879, "tokens_seen": 1016070144 }, { "epoch": 0.36, "learning_rate": 0.0003216042376087779, "loss": 3.4923, "theoretical_loss": 3.6433103466900962, "tokens_seen": 1017118720 }, { "epoch": 0.36, "learning_rate": 0.00032141505864547864, "loss": 3.4227, "theoretical_loss": 3.6429542560783856, "tokens_seen": 1018167296 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.4651617705821991, "objective/train/docs_used": 580918, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.703390121459961, "objective/train/original_loss": 3.70339035987854, "objective/train/theoretical_loss": 3.642643061639121, "objective/train/tokens_used": 1039544800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23710434138774872, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047721028327942, "objective/train/weighted_lm_loss": 3.879617691040039, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9520632028579712, "theoretical_loss": 3.642643061639121, "tokens_seen": 1019084800 }, { "epoch": 0.36, "learning_rate": 0.0003212258796821794, "loss": 3.4734, "theoretical_loss": 3.6425986345663914, "tokens_seen": 1019215872 }, { "epoch": 0.36, "learning_rate": 0.0003210367007188801, "loss": 3.4991, "theoretical_loss": 3.6422434810544813, "tokens_seen": 1020264448 }, { "epoch": 0.36, "learning_rate": 0.00032084752175558077, "loss": 3.4978, "theoretical_loss": 3.641888794446725, "tokens_seen": 1021313024 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.48555630445480347, "objective/train/docs_used": 582721, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.36793851852417, "objective/train/original_loss": 3.3679380416870117, "objective/train/theoretical_loss": 3.6415345736508824, "objective/train/tokens_used": 1042821600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24094036221504211, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497803688049316, "objective/train/weighted_lm_loss": 3.535130023956299, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9528319835662842, "theoretical_loss": 3.6415345736508824, "tokens_seen": 1022361600 }, { "epoch": 0.37, "learning_rate": 0.0003206583427922815, "loss": 3.5065, "theoretical_loss": 3.6415345736508824, "tokens_seen": 1022361600 }, { "epoch": 0.37, "learning_rate": 0.0003204691638289822, "loss": 3.4752, "theoretical_loss": 3.6411808175783844, "tokens_seen": 1023410176 }, { "epoch": 0.37, "learning_rate": 0.00032027998486568296, "loss": 3.3807, "theoretical_loss": 3.640827525144318, "tokens_seen": 1024458752 }, { "epoch": 0.37, "learning_rate": 0.00032009080590238365, "loss": 3.405, "theoretical_loss": 3.64047469526741, "tokens_seen": 1025507328 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.47355058789253235, "objective/train/docs_used": 584025, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.485971212387085, "objective/train/original_loss": 3.485970973968506, "objective/train/theoretical_loss": 3.6404306240026356, "objective/train/tokens_used": 1046098400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2372806966304779, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485612154006958, "objective/train/weighted_lm_loss": 3.65089750289917, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9561977386474609, "theoretical_loss": 3.6404306240026356, "tokens_seen": 1025638400 }, { "epoch": 0.37, "learning_rate": 0.00031990162693908435, "loss": 3.4315, "theoretical_loss": 3.640122326870012, "tokens_seen": 1026555904 }, { "epoch": 0.37, "learning_rate": 0.0003197124479757851, "loss": 3.4454, "theoretical_loss": 3.639770418878081, "tokens_seen": 1027604480 }, { "epoch": 0.37, "learning_rate": 0.0003195232690124858, "loss": 3.4686, "theoretical_loss": 3.6394189702211706, "tokens_seen": 1028653056 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.4855080544948578, "objective/train/docs_used": 586068, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.275355339050293, "objective/train/original_loss": 3.275355100631714, "objective/train/theoretical_loss": 3.6393311797029373, "objective/train/tokens_used": 1049375200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2405899465084076, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497738122940063, "objective/train/weighted_lm_loss": 3.437480926513672, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9573184847831726, "theoretical_loss": 3.6393311797029373, "tokens_seen": 1028915200 }, { "epoch": 0.37, "learning_rate": 0.00031933409004918653, "loss": 3.4559, "theoretical_loss": 3.639067979832408, "tokens_seen": 1029701632 }, { "epoch": 0.37, "learning_rate": 0.0003191449110858873, "loss": 3.3891, "theoretical_loss": 3.6387174466484824, "tokens_seen": 1030750208 }, { "epoch": 0.37, "learning_rate": 0.000318955732122588, "loss": 3.4252, "theoretical_loss": 3.6383673696096297, "tokens_seen": 1031798784 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.4844791293144226, "objective/train/docs_used": 588066, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.536590576171875, "objective/train/original_loss": 3.536591053009033, "objective/train/theoretical_loss": 3.63823620810427, "objective/train/tokens_used": 1052652000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.239736869931221, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496665239334106, "objective/train/weighted_lm_loss": 3.7123160362243652, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9534673094749451, "theoretical_loss": 3.63823620810427, "tokens_seen": 1032192000 }, { "epoch": 0.37, "learning_rate": 0.0003187665531592887, "loss": 3.4282, "theoretical_loss": 3.638017747659614, "tokens_seen": 1032847360 }, { "epoch": 0.37, "learning_rate": 0.0003185773741959894, "loss": 3.3684, "theoretical_loss": 3.637668579745716, "tokens_seen": 1033895936 }, { "epoch": 0.37, "learning_rate": 0.0003183881952326901, "loss": 3.3848, "theoretical_loss": 3.637319864818716, "tokens_seen": 1034944512 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.48198384046554565, "objective/train/docs_used": 589983, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.28519344329834, "objective/train/original_loss": 3.28519344329834, "objective/train/theoretical_loss": 3.637145676898374, "objective/train/tokens_used": 1055928800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2357451617717743, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493967533111572, "objective/train/weighted_lm_loss": 3.4479753971099854, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9625754952430725, "theoretical_loss": 3.637145676898374, "tokens_seen": 1035468800 }, { "epoch": 0.37, "learning_rate": 0.00031819901626939086, "loss": 3.3893, "theoretical_loss": 3.6369716018328777, "tokens_seen": 1035993088 }, { "epoch": 0.37, "learning_rate": 0.00031800983730609155, "loss": 3.3671, "theoretical_loss": 3.6366237897459355, "tokens_seen": 1037041664 }, { "epoch": 0.37, "learning_rate": 0.0003178206583427923, "loss": 3.3868, "theoretical_loss": 3.6362764275190766, "tokens_seen": 1038090240 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.48860275745391846, "objective/train/docs_used": 592008, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0765490531921387, "objective/train/original_loss": 3.0765490531921387, "objective/train/theoretical_loss": 3.636059554111668, "objective/train/tokens_used": 1059205600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2414240688085556, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500874519348145, "objective/train/weighted_lm_loss": 3.2311158180236816, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9935080409049988, "theoretical_loss": 3.636059554111668, "tokens_seen": 1038745600 }, { "epoch": 0.37, "learning_rate": 0.000317631479379493, "loss": 3.3592, "theoretical_loss": 3.6359295141169303, "tokens_seen": 1039138816 }, { "epoch": 0.37, "learning_rate": 0.0003174423004161937, "loss": 3.385, "theoretical_loss": 3.6355830485075473, "tokens_seen": 1040187392 }, { "epoch": 0.37, "learning_rate": 0.00031725312145289443, "loss": 3.3209, "theoretical_loss": 3.635237029662391, "tokens_seen": 1041235968 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.47969964146614075, "objective/train/docs_used": 593661, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0731759071350098, "objective/train/original_loss": 3.0731759071350098, "objective/train/theoretical_loss": 3.6349778081007327, "objective/train/tokens_used": 1062482400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2339300960302353, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491589307785034, "objective/train/weighted_lm_loss": 3.2258410453796387, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9526910185813904, "theoretical_loss": 3.6349778081007327, "tokens_seen": 1042022400 }, { "epoch": 0.37, "learning_rate": 0.0003170639424895951, "loss": 3.337, "theoretical_loss": 3.6348914565563186, "tokens_seen": 1042284544 }, { "epoch": 0.37, "learning_rate": 0.00031687476352629593, "loss": 3.3503, "theoretical_loss": 3.6345463281675676, "tokens_seen": 1043333120 }, { "epoch": 0.37, "learning_rate": 0.0003166855845629966, "loss": 3.339, "theoretical_loss": 3.6342016434777427, "tokens_seen": 1044381696 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.48656022548675537, "objective/train/docs_used": 595739, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4706473350524902, "objective/train/original_loss": 3.470647096633911, "objective/train/theoretical_loss": 3.6339004075478796, "objective/train/tokens_used": 1065759200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2418631613254547, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498853921890259, "objective/train/weighted_lm_loss": 3.6436004638671875, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9542835354804993, "theoretical_loss": 3.6339004075478796, "tokens_seen": 1045299200 }, { "epoch": 0.37, "learning_rate": 0.0003164964055996973, "loss": 3.3997, "theoretical_loss": 3.6338574014717997, "tokens_seen": 1045430272 }, { "epoch": 0.37, "learning_rate": 0.00031630722663639806, "loss": 3.3249, "theoretical_loss": 3.6335136011380307, "tokens_seen": 1046478848 }, { "epoch": 0.37, "learning_rate": 0.00031611804767309876, "loss": 3.3354, "theoretical_loss": 3.6331702414680525, "tokens_seen": 1047527424 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.477405309677124, "objective/train/docs_used": 597756, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.253281831741333, "objective/train/original_loss": 3.253281593322754, "objective/train/theoretical_loss": 3.632827321456789, "objective/train/tokens_used": 1069036000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23487040400505066, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489342212677002, "objective/train/weighted_lm_loss": 3.4119961261749268, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9524772763252258, "theoretical_loss": 3.632827321456789, "tokens_seen": 1048576000 }, { "epoch": 0.37, "learning_rate": 0.0003159288687097995, "loss": 3.4091, "theoretical_loss": 3.632827321456789, "tokens_seen": 1048576000 }, { "epoch": 0.37, "learning_rate": 0.0003157396897465002, "loss": 3.3915, "theoretical_loss": 3.6324848401024594, "tokens_seen": 1049624576 }, { "epoch": 0.38, "learning_rate": 0.0003155505107832009, "loss": 3.3367, "theoretical_loss": 3.632142796406564, "tokens_seen": 1050673152 }, { "epoch": 0.38, "learning_rate": 0.00031536133181990164, "loss": 3.4277, "theoretical_loss": 3.631801189373867, "tokens_seen": 1051721728 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.4907068908214569, "objective/train/docs_used": 599577, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9004616737365723, "objective/train/original_loss": 2.9004616737365723, "objective/train/theoretical_loss": 3.631758519148221, "objective/train/tokens_used": 1072312800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.243531733751297, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503087043762207, "objective/train/weighted_lm_loss": 3.046717882156372, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9606583118438721, "theoretical_loss": 3.631758519148221, "tokens_seen": 1051852800 }, { "epoch": 0.38, "learning_rate": 0.00031517215285660233, "loss": 3.4022, "theoretical_loss": 3.631460018012389, "tokens_seen": 1052770304 }, { "epoch": 0.38, "learning_rate": 0.000314982973893303, "loss": 3.4569, "theoretical_loss": 3.631119281333386, "tokens_seen": 1053818880 }, { "epoch": 0.38, "learning_rate": 0.0003147937949300038, "loss": 3.3993, "theoretical_loss": 3.6307789783513402, "tokens_seen": 1054867456 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.4608767032623291, "objective/train/docs_used": 601442, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.057370901107788, "objective/train/original_loss": 3.057370662689209, "objective/train/theoretical_loss": 3.630693970255794, "objective/train/tokens_used": 1075589600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23330651223659515, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04727303981781, "objective/train/weighted_lm_loss": 3.204721212387085, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9519646167755127, "theoretical_loss": 3.630693970255794, "tokens_seen": 1055129600 }, { "epoch": 0.38, "learning_rate": 0.0003146046159667045, "loss": 3.3655, "theoretical_loss": 3.6304391080839453, "tokens_seen": 1055916032 }, { "epoch": 0.38, "learning_rate": 0.00031441543700340527, "loss": 3.3979, "theoretical_loss": 3.630099669552091, "tokens_seen": 1056964608 }, { "epoch": 0.38, "learning_rate": 0.00031422625804010596, "loss": 3.3503, "theoretical_loss": 3.6297606617798532, "tokens_seen": 1058013184 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.4877772033214569, "objective/train/docs_used": 602965, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0313591957092285, "objective/train/original_loss": 3.0313591957092285, "objective/train/theoretical_loss": 3.629633644721836, "objective/train/tokens_used": 1078866400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24103260040283203, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500030517578125, "objective/train/weighted_lm_loss": 3.182788133621216, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.95267653465271, "theoretical_loss": 3.629633644721836, "tokens_seen": 1058406400 }, { "epoch": 0.38, "learning_rate": 0.00031403707907680666, "loss": 3.3468, "theoretical_loss": 3.629422083794477, "tokens_seen": 1059061760 }, { "epoch": 0.38, "learning_rate": 0.0003138479001135074, "loss": 3.3813, "theoretical_loss": 3.6290839346263644, "tokens_seen": 1060110336 }, { "epoch": 0.38, "learning_rate": 0.0003136587211502081, "loss": 3.3796, "theoretical_loss": 3.6287462133090616, "tokens_seen": 1061158912 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.4929821789264679, "objective/train/docs_used": 604364, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3100390434265137, "objective/train/original_loss": 3.3100390434265137, "objective/train/theoretical_loss": 3.628577512793303, "objective/train/tokens_used": 1082143200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24545590579509735, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505460500717163, "objective/train/weighted_lm_loss": 3.4770169258117676, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.951737642288208, "theoretical_loss": 3.628577512793303, "tokens_seen": 1061683200 }, { "epoch": 0.38, "learning_rate": 0.00031346954218690884, "loss": 3.3702, "theoretical_loss": 3.6284089188792445, "tokens_seen": 1062207488 }, { "epoch": 0.38, "learning_rate": 0.00031328036322360954, "loss": 3.2997, "theoretical_loss": 3.6280720503767077, "tokens_seen": 1063256064 }, { "epoch": 0.38, "learning_rate": 0.00031309118426031023, "loss": 3.4449, "theoretical_loss": 3.627735606844347, "tokens_seen": 1064304640 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.46973249316215515, "objective/train/docs_used": 606079, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8989434242248535, "objective/train/original_loss": 2.8989436626434326, "objective/train/theoretical_loss": 3.62752554501776, "objective/train/tokens_used": 1085420000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23049893975257874, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048144817352295, "objective/train/weighted_lm_loss": 3.0359854698181152, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9730321764945984, "theoretical_loss": 3.62752554501776, "tokens_seen": 1064960000 }, { "epoch": 0.38, "learning_rate": 0.000312902005297011, "loss": 3.3205, "theoretical_loss": 3.627399587328153, "tokens_seen": 1065353216 }, { "epoch": 0.38, "learning_rate": 0.00031271282633371167, "loss": 3.3029, "theoretical_loss": 3.6270639908771907, "tokens_seen": 1066401792 }, { "epoch": 0.38, "learning_rate": 0.00031252364737041237, "loss": 3.3871, "theoretical_loss": 3.6267288165435922, "tokens_seen": 1067450368 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.48607712984085083, "objective/train/docs_used": 607761, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5710361003875732, "objective/train/original_loss": 3.5710363388061523, "objective/train/theoretical_loss": 3.6264777122394327, "objective/train/tokens_used": 1088696800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2421998828649521, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498387813568115, "objective/train/weighted_lm_loss": 3.748819589614868, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9520419836044312, "theoretical_loss": 3.6264777122394327, "tokens_seen": 1068236800 }, { "epoch": 0.38, "learning_rate": 0.00031233446840711317, "loss": 3.4255, "theoretical_loss": 3.626394063382541, "tokens_seen": 1068498944 }, { "epoch": 0.38, "learning_rate": 0.00031214528944381386, "loss": 3.3785, "theoretical_loss": 3.62605973045226, "tokens_seen": 1069547520 }, { "epoch": 0.38, "learning_rate": 0.0003119561104805146, "loss": 3.3981, "theoretical_loss": 3.6257258168139987, "tokens_seen": 1070596096 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.483851820230484, "objective/train/docs_used": 609849, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.248103618621826, "objective/train/original_loss": 3.248103618621826, "objective/train/theoretical_loss": 3.6254339855953184, "objective/train/tokens_used": 1091973600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24208344519138336, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496156215667725, "objective/train/weighted_lm_loss": 3.4082441329956055, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9513829350471497, "theoretical_loss": 3.6254339855953184, "tokens_seen": 1071513600 }, { "epoch": 0.38, "learning_rate": 0.0003117669315172153, "loss": 3.3544, "theoretical_loss": 3.625392321532021, "tokens_seen": 1071644672 }, { "epoch": 0.38, "learning_rate": 0.000311577752553916, "loss": 3.375, "theoretical_loss": 3.6250592436735904, "tokens_seen": 1072693248 }, { "epoch": 0.38, "learning_rate": 0.00031138857359061674, "loss": 3.3512, "theoretical_loss": 3.624726582308961, "tokens_seen": 1073741824 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.4885290265083313, "objective/train/docs_used": 611858, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.38407826423645, "objective/train/original_loss": 3.384077548980713, "objective/train/theoretical_loss": 3.624394336511362, "objective/train/tokens_used": 1095250400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24165187776088715, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500812530517578, "objective/train/weighted_lm_loss": 3.5534675121307373, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9516956806182861, "theoretical_loss": 3.624394336511362, "tokens_seen": 1074790400 }, { "epoch": 0.38, "learning_rate": 0.00031119939462731744, "loss": 3.3759, "theoretical_loss": 3.624394336511362, "tokens_seen": 1074790400 }, { "epoch": 0.38, "learning_rate": 0.0003110102156640182, "loss": 3.3077, "theoretical_loss": 3.6240625053569873, "tokens_seen": 1075838976 }, { "epoch": 0.38, "learning_rate": 0.0003108210367007189, "loss": 3.3128, "theoretical_loss": 3.6237310879249813, "tokens_seen": 1076887552 }, { "epoch": 0.39, "learning_rate": 0.00031063185773741957, "loss": 3.3346, "theoretical_loss": 3.6234000832974282, "tokens_seen": 1077936128 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.4757619798183441, "objective/train/docs_used": 613931, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.840676784515381, "objective/train/original_loss": 2.84067702293396, "objective/train/theoretical_loss": 3.6233587366986946, "objective/train/tokens_used": 1098527200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23324042558670044, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487614870071411, "objective/train/weighted_lm_loss": 2.9804329872131348, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9518725275993347, "theoretical_loss": 3.6233587366986946, "tokens_seen": 1078067200 }, { "epoch": 0.39, "learning_rate": 0.0003104426787741203, "loss": 3.2935, "theoretical_loss": 3.623069490559339, "tokens_seen": 1078984704 }, { "epoch": 0.39, "learning_rate": 0.000310253499810821, "loss": 3.3639, "theoretical_loss": 3.6227393087986393, "tokens_seen": 1080033280 }, { "epoch": 0.39, "learning_rate": 0.00031006432084752176, "loss": 3.3186, "theoretical_loss": 3.622409537106158, "tokens_seen": 1081081856 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.4866965413093567, "objective/train/docs_used": 615851, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2716050148010254, "objective/train/original_loss": 3.2716054916381836, "objective/train/theoretical_loss": 3.622327158149928, "objective/train/tokens_used": 1101804000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24117985367774963, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498956441879272, "objective/train/weighted_lm_loss": 3.434610605239868, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.951644778251648, "theoretical_loss": 3.622327158149928, "tokens_seen": 1081344000 }, { "epoch": 0.39, "learning_rate": 0.0003098751418842225, "loss": 3.3499, "theoretical_loss": 3.622080174575613, "tokens_seen": 1082130432 }, { "epoch": 0.39, "learning_rate": 0.0003096859629209232, "loss": 3.3303, "theoretical_loss": 3.6217512203036026, "tokens_seen": 1083179008 }, { "epoch": 0.39, "learning_rate": 0.00030949678395762395, "loss": 3.4256, "theoretical_loss": 3.621422673389592, "tokens_seen": 1084227584 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.48583704233169556, "objective/train/docs_used": 617294, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.147465705871582, "objective/train/original_loss": 3.147465705871582, "objective/train/theoretical_loss": 3.621299573135513, "objective/train/tokens_used": 1105080800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2386694699525833, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497969388961792, "objective/train/weighted_lm_loss": 3.3032023906707764, "objective/train/weights_max": 1.051215410232544, "objective/train/weights_min": 0.9727230668067932, "theoretical_loss": 3.621299573135513, "tokens_seen": 1084620800 }, { "epoch": 0.39, "learning_rate": 0.00030930760499432464, "loss": 3.3714, "theoretical_loss": 3.6210945329358992, "tokens_seen": 1085276160 }, { "epoch": 0.39, "learning_rate": 0.00030911842603102534, "loss": 3.3833, "theoretical_loss": 3.6207667980476868, "tokens_seen": 1086324736 }, { "epoch": 0.39, "learning_rate": 0.0003089292470677261, "loss": 3.3904, "theoretical_loss": 3.620439467832949, "tokens_seen": 1087373312 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.4890348017215729, "objective/train/docs_used": 619111, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1311914920806885, "objective/train/original_loss": 3.1311917304992676, "objective/train/theoretical_loss": 3.620275954200152, "objective/train/tokens_used": 1108357600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24180911481380463, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501326322555542, "objective/train/weighted_lm_loss": 3.2881572246551514, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.955804169178009, "theoretical_loss": 3.620275954200152, "tokens_seen": 1087897600 }, { "epoch": 0.39, "learning_rate": 0.0003087400681044268, "loss": 3.4003, "theoretical_loss": 3.6201125414024986, "tokens_seen": 1088421888 }, { "epoch": 0.39, "learning_rate": 0.0003085508891411275, "loss": 3.3861, "theoretical_loss": 3.619786017869957, "tokens_seen": 1089470464 }, { "epoch": 0.39, "learning_rate": 0.0003083617101778282, "loss": 3.3314, "theoretical_loss": 3.619459896351742, "tokens_seen": 1090519040 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.4774239659309387, "objective/train/docs_used": 620966, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1619443893432617, "objective/train/original_loss": 3.16194486618042, "objective/train/theoretical_loss": 3.6192562741592726, "objective/train/tokens_used": 1111634400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23663067817687988, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489450693130493, "objective/train/weighted_lm_loss": 3.3209400177001953, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9530478715896606, "theoretical_loss": 3.6192562741592726, "tokens_seen": 1091174400 }, { "epoch": 0.39, "learning_rate": 0.0003081725312145289, "loss": 3.3275, "theoretical_loss": 3.6191341759670568, "tokens_seen": 1091567616 }, { "epoch": 0.39, "learning_rate": 0.00030798335225122966, "loss": 3.4539, "theoretical_loss": 3.618808855837877, "tokens_seen": 1092616192 }, { "epoch": 0.39, "learning_rate": 0.0003077941732879304, "loss": 3.303, "theoretical_loss": 3.6184839350889417, "tokens_seen": 1093664768 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.48794177174568176, "objective/train/docs_used": 623021, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.890498399734497, "objective/train/original_loss": 2.890498399734497, "objective/train/theoretical_loss": 3.6182405060955523, "objective/train/tokens_used": 1114911200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.239480122923851, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500115156173706, "objective/train/weighted_lm_loss": 3.0350229740142822, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9790964126586914, "theoretical_loss": 3.6182405060955523, "tokens_seen": 1094451200 }, { "epoch": 0.39, "learning_rate": 0.0003076049943246311, "loss": 3.3359, "theoretical_loss": 3.6181594128477395, "tokens_seen": 1094713344 }, { "epoch": 0.39, "learning_rate": 0.00030741581536133185, "loss": 3.3334, "theoretical_loss": 3.6178352882444997, "tokens_seen": 1095761920 }, { "epoch": 0.39, "learning_rate": 0.00030722663639803254, "loss": 3.3046, "theoretical_loss": 3.6175115604121793, "tokens_seen": 1096810496 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.4875214993953705, "objective/train/docs_used": 624881, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3388214111328125, "objective/train/original_loss": 3.3388214111328125, "objective/train/theoretical_loss": 3.617228623355502, "objective/train/tokens_used": 1118188000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24309813976287842, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04998779296875, "objective/train/weighted_lm_loss": 3.505209445953369, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9611717462539673, "theoretical_loss": 3.617228623355502, "tokens_seen": 1097728000 }, { "epoch": 0.39, "learning_rate": 0.0003070374574347333, "loss": 3.3572, "theoretical_loss": 3.6171882284864525, "tokens_seen": 1097859072 }, { "epoch": 0.39, "learning_rate": 0.000306848278471434, "loss": 3.32, "theoretical_loss": 3.6168652916056994, "tokens_seen": 1098907648 }, { "epoch": 0.39, "learning_rate": 0.0003066590995081347, "loss": 3.3374, "theoretical_loss": 3.6165427489109963, "tokens_seen": 1099956224 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.49022623896598816, "objective/train/docs_used": 626669, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2733004093170166, "objective/train/original_loss": 3.2733001708984375, "objective/train/theoretical_loss": 3.616220599546101, "objective/train/tokens_used": 1121464800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2436634600162506, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502612590789795, "objective/train/weighted_lm_loss": 3.4382221698760986, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9524181485176086, "theoretical_loss": 3.616220599546101, "tokens_seen": 1101004800 }, { "epoch": 0.39, "learning_rate": 0.0003064699205448354, "loss": 3.339, "theoretical_loss": 3.616220599546101, "tokens_seen": 1101004800 }, { "epoch": 0.39, "learning_rate": 0.0003062807415815361, "loss": 3.2574, "theoretical_loss": 3.615898842657448, "tokens_seen": 1102053376 }, { "epoch": 0.39, "learning_rate": 0.00030609156261823686, "loss": 3.3171, "theoretical_loss": 3.6155774773941305, "tokens_seen": 1103101952 }, { "epoch": 0.39, "learning_rate": 0.00030590238365493756, "loss": 3.3261, "theoretical_loss": 3.615256502907896, "tokens_seen": 1104150528 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.48506712913513184, "objective/train/docs_used": 628420, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.485072612762451, "objective/train/original_loss": 3.485072374343872, "objective/train/theoretical_loss": 3.6152164085314853, "objective/train/tokens_used": 1124741600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23975829780101776, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497254133224487, "objective/train/weighted_lm_loss": 3.658010244369507, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9524231553077698, "theoretical_loss": 3.6152164085314853, "tokens_seen": 1104281600 }, { "epoch": 0.39, "learning_rate": 0.00030571320469163825, "loss": 3.3752, "theoretical_loss": 3.6149359183531296, "tokens_seen": 1105199104 }, { "epoch": 0.4, "learning_rate": 0.00030552402572833905, "loss": 3.3202, "theoretical_loss": 3.614615722886849, "tokens_seen": 1106247680 }, { "epoch": 0.4, "learning_rate": 0.00030533484676503975, "loss": 3.3336, "theoretical_loss": 3.614295915668691, "tokens_seen": 1107296256 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.48640114068984985, "objective/train/docs_used": 629445, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4100451469421387, "objective/train/original_loss": 3.4100446701049805, "objective/train/theoretical_loss": 3.6142160244296884, "objective/train/tokens_used": 1128018400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23953872919082642, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498576164245605, "objective/train/weighted_lm_loss": 3.579258441925049, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9583953022956848, "theoretical_loss": 3.6142160244296884, "tokens_seen": 1107558400 }, { "epoch": 0.4, "learning_rate": 0.0003051456678017405, "loss": 3.3687, "theoretical_loss": 3.613976495860898, "tokens_seen": 1108344832 }, { "epoch": 0.4, "learning_rate": 0.0003049564888384412, "loss": 3.3225, "theoretical_loss": 3.613657462628315, "tokens_seen": 1109393408 }, { "epoch": 0.4, "learning_rate": 0.0003047673098751419, "loss": 3.3378, "theoretical_loss": 3.613338815138371, "tokens_seen": 1110441984 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.47437578439712524, "objective/train/docs_used": 631502, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.02118182182312, "objective/train/original_loss": 3.021181583404541, "objective/train/theoretical_loss": 3.6132194216094313, "objective/train/tokens_used": 1131295200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23702089488506317, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486419200897217, "objective/train/weighted_lm_loss": 3.1670000553131104, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9514435529708862, "theoretical_loss": 3.6132194216094313, "tokens_seen": 1110835200 }, { "epoch": 0.4, "learning_rate": 0.00030457813091184263, "loss": 3.2814, "theoretical_loss": 3.613020552561074, "tokens_seen": 1111490560 }, { "epoch": 0.4, "learning_rate": 0.0003043889519485433, "loss": 3.3688, "theoretical_loss": 3.6127026740689967, "tokens_seen": 1112539136 }, { "epoch": 0.4, "learning_rate": 0.000304199772985244, "loss": 3.316, "theoretical_loss": 3.612385178837271, "tokens_seen": 1113587712 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.4620572030544281, "objective/train/docs_used": 633386, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.020350694656372, "objective/train/original_loss": 3.020350933074951, "objective/train/theoretical_loss": 3.6122265746869653, "objective/train/tokens_used": 1134572000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24398073554039001, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0474449396133423, "objective/train/weighted_lm_loss": 3.1671273708343506, "objective/train/weights_max": 1.0512152910232544, "objective/train/weights_min": 0.9529370665550232, "theoretical_loss": 3.6122265746869653, "tokens_seen": 1114112000 }, { "epoch": 0.4, "learning_rate": 0.00030401059402194476, "loss": 3.3191, "theoretical_loss": 3.6120680660435736, "tokens_seen": 1114636288 }, { "epoch": 0.4, "learning_rate": 0.00030382141505864546, "loss": 3.381, "theoretical_loss": 3.6117513348681163, "tokens_seen": 1115684864 }, { "epoch": 0.4, "learning_rate": 0.0003036322360953462, "loss": 3.3724, "theoretical_loss": 3.611434984493637, "tokens_seen": 1116733440 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.4832095205783844, "objective/train/docs_used": 634684, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4889237880706787, "objective/train/original_loss": 3.488924026489258, "objective/train/theoretical_loss": 3.6112374585229583, "objective/train/tokens_used": 1137848800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24187950789928436, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495502948760986, "objective/train/weighted_lm_loss": 3.661220073699951, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9514582753181458, "theoretical_loss": 3.6112374585229583, "tokens_seen": 1117388800 }, { "epoch": 0.4, "learning_rate": 0.0003034430571320469, "loss": 3.3915, "theoretical_loss": 3.6111190141053893, "tokens_seen": 1117782016 }, { "epoch": 0.4, "learning_rate": 0.00030325387816874765, "loss": 3.4511, "theoretical_loss": 3.6108034228911334, "tokens_seen": 1118830592 }, { "epoch": 0.4, "learning_rate": 0.0003030646992054484, "loss": 3.3911, "theoretical_loss": 3.6104882100411215, "tokens_seen": 1119879168 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.48330286145210266, "objective/train/docs_used": 636549, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.597794771194458, "objective/train/original_loss": 3.597794532775879, "objective/train/theoretical_loss": 3.6102520482194387, "objective/train/tokens_used": 1141125600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23953989148139954, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495479106903076, "objective/train/weighted_lm_loss": 3.7755210399627686, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9581436514854431, "theoretical_loss": 3.6102520482194387, "tokens_seen": 1120665600 }, { "epoch": 0.4, "learning_rate": 0.0003028755202421491, "loss": 3.468, "theoretical_loss": 3.6101733747480957, "tokens_seen": 1120927744 }, { "epoch": 0.4, "learning_rate": 0.00030268634127884983, "loss": 3.3887, "theoretical_loss": 3.609858916207269, "tokens_seen": 1121976320 }, { "epoch": 0.4, "learning_rate": 0.00030249716231555053, "loss": 3.3466, "theoretical_loss": 3.609544833616324, "tokens_seen": 1123024896 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.4877423942089081, "objective/train/docs_used": 638453, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0719454288482666, "objective/train/original_loss": 3.0719454288482666, "objective/train/theoretical_loss": 3.6092703191167743, "objective/train/tokens_used": 1144402400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24203670024871826, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500047206878662, "objective/train/weighted_lm_loss": 3.2259280681610107, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9517780542373657, "theoretical_loss": 3.6092703191167743, "tokens_seen": 1123942400 }, { "epoch": 0.4, "learning_rate": 0.0003023079833522512, "loss": 3.4285, "theoretical_loss": 3.6092311261753958, "tokens_seen": 1124073472 }, { "epoch": 0.4, "learning_rate": 0.00030211880438895197, "loss": 3.3911, "theoretical_loss": 3.608917793087066, "tokens_seen": 1125122048 }, { "epoch": 0.4, "learning_rate": 0.00030192962542565266, "loss": 3.3768, "theoretical_loss": 3.608604833556355, "tokens_seen": 1126170624 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.4918157458305359, "objective/train/docs_used": 640230, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3360931873321533, "objective/train/original_loss": 3.3360931873321533, "objective/train/theoretical_loss": 3.6082922467907066, "objective/train/tokens_used": 1147679200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24366138875484467, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504201650619507, "objective/train/weighted_lm_loss": 3.5035665035247803, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9529560208320618, "theoretical_loss": 3.6082922467907066, "tokens_seen": 1127219200 }, { "epoch": 0.4, "learning_rate": 0.00030174044646235336, "loss": 3.4143, "theoretical_loss": 3.6082922467907066, "tokens_seen": 1127219200 }, { "epoch": 0.4, "learning_rate": 0.0003015512674990541, "loss": 3.3773, "theoretical_loss": 3.6079800319999817, "tokens_seen": 1128267776 }, { "epoch": 0.4, "learning_rate": 0.0003013620885357548, "loss": 3.3424, "theoretical_loss": 3.60766818839645, "tokens_seen": 1129316352 }, { "epoch": 0.4, "learning_rate": 0.00030117290957245554, "loss": 3.3289, "theoretical_loss": 3.6073567151947774, "tokens_seen": 1130364928 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.49019014835357666, "objective/train/docs_used": 641368, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1225857734680176, "objective/train/original_loss": 3.1225852966308594, "objective/train/theoretical_loss": 3.6073178070494287, "objective/train/tokens_used": 1150956000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24214524030685425, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502499341964722, "objective/train/weighted_lm_loss": 3.279658079147339, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9604944586753845, "theoretical_loss": 3.6073178070494287, "tokens_seen": 1130496000 }, { "epoch": 0.4, "learning_rate": 0.0003009837306091563, "loss": 3.3178, "theoretical_loss": 3.607045611612018, "tokens_seen": 1131413504 }, { "epoch": 0.4, "learning_rate": 0.000300794551645857, "loss": 3.3106, "theoretical_loss": 3.6067348768676064, "tokens_seen": 1132462080 }, { "epoch": 0.4, "learning_rate": 0.00030060537268255773, "loss": 3.2938, "theoretical_loss": 3.606424510183343, "tokens_seen": 1133510656 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.48765861988067627, "objective/train/docs_used": 643613, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.259086847305298, "objective/train/original_loss": 3.2590866088867188, "objective/train/theoretical_loss": 3.6063469759307054, "objective/train/tokens_used": 1154232800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2418700009584427, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499954223632812, "objective/train/weighted_lm_loss": 3.421250104904175, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.957314133644104, "theoretical_loss": 3.6063469759307054, "tokens_seen": 1133772800 }, { "epoch": 0.41, "learning_rate": 0.0003004161937192584, "loss": 3.3392, "theoretical_loss": 3.606114510783391, "tokens_seen": 1134559232 }, { "epoch": 0.41, "learning_rate": 0.0003002270147559592, "loss": 3.3265, "theoretical_loss": 3.605804877894263, "tokens_seen": 1135607808 }, { "epoch": 0.41, "learning_rate": 0.00030003783579265987, "loss": 3.4029, "theoretical_loss": 3.6054956107448124, "tokens_seen": 1136656384 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.48545318841934204, "objective/train/docs_used": 645222, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.836273431777954, "objective/train/original_loss": 2.836273193359375, "objective/train/theoretical_loss": 3.605379729699039, "objective/train/tokens_used": 1157509600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2405441403388977, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049768090248108, "objective/train/weighted_lm_loss": 2.977830648422241, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9613069891929626, "theoretical_loss": 3.605379729699039, "tokens_seen": 1137049600 }, { "epoch": 0.41, "learning_rate": 0.00029984865682936056, "loss": 3.3281, "theoretical_loss": 3.605186708566225, "tokens_seen": 1137704960 }, { "epoch": 0.41, "learning_rate": 0.0002996594778660613, "loss": 3.33, "theoretical_loss": 3.6048781705920105, "tokens_seen": 1138753536 }, { "epoch": 0.41, "learning_rate": 0.000299470298902762, "loss": 3.3641, "theoretical_loss": 3.6045699960579896, "tokens_seen": 1139802112 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.4913421869277954, "objective/train/docs_used": 646474, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3484857082366943, "objective/train/original_loss": 3.3484854698181152, "objective/train/theoretical_loss": 3.6044160448428775, "objective/train/tokens_used": 1160786400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24510613083839417, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503802299499512, "objective/train/weighted_lm_loss": 3.5174074172973633, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9515740871429443, "theoretical_loss": 3.6044160448428775, "tokens_seen": 1140326400 }, { "epoch": 0.41, "learning_rate": 0.0002992811199394627, "loss": 3.3049, "theoretical_loss": 3.60426218420229, "tokens_seen": 1140850688 }, { "epoch": 0.41, "learning_rate": 0.00029909194097616344, "loss": 3.3031, "theoretical_loss": 3.603954734265334, "tokens_seen": 1141899264 }, { "epoch": 0.41, "learning_rate": 0.00029890276201286414, "loss": 3.3502, "theoretical_loss": 3.60364764548983, "tokens_seen": 1142947840 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.48611900210380554, "objective/train/docs_used": 648095, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.445695638656616, "objective/train/original_loss": 3.445695400238037, "objective/train/theoretical_loss": 3.603455898071866, "objective/train/tokens_used": 1164063200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2388918399810791, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498261451721191, "objective/train/weighted_lm_loss": 3.6170575618743896, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9571871757507324, "theoretical_loss": 3.603455898071866, "tokens_seen": 1143603200 }, { "epoch": 0.41, "learning_rate": 0.00029871358304956494, "loss": 3.3775, "theoretical_loss": 3.6033409171207644, "tokens_seen": 1143996416 }, { "epoch": 0.41, "learning_rate": 0.00029852440408626563, "loss": 3.2954, "theoretical_loss": 3.6030345484053923, "tokens_seen": 1145044992 }, { "epoch": 0.41, "learning_rate": 0.0002983352251229663, "loss": 3.3485, "theoretical_loss": 3.602728538593227, "tokens_seen": 1146093568 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.47712090611457825, "objective/train/docs_used": 649861, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.083465576171875, "objective/train/original_loss": 3.083465337753296, "objective/train/theoretical_loss": 3.6024992663141386, "objective/train/tokens_used": 1167340000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2367585003376007, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489155054092407, "objective/train/weighted_lm_loss": 3.231121301651001, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9521268010139465, "theoretical_loss": 3.6024992663141386, "tokens_seen": 1146880000 }, { "epoch": 0.41, "learning_rate": 0.0002981460461596671, "loss": 3.2815, "theoretical_loss": 3.6024228869360346, "tokens_seen": 1147142144 }, { "epoch": 0.41, "learning_rate": 0.00029795686719636777, "loss": 3.3857, "theoretical_loss": 3.602117592687822, "tokens_seen": 1148190720 }, { "epoch": 0.41, "learning_rate": 0.0002977676882330685, "loss": 3.3291, "theoretical_loss": 3.6018126551048306, "tokens_seen": 1149239296 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.4793562889099121, "objective/train/docs_used": 651918, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1603188514709473, "objective/train/original_loss": 3.1603193283081055, "objective/train/theoretical_loss": 3.601546126713652, "objective/train/tokens_used": 1170616800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2417171150445938, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491641759872437, "objective/train/weighted_lm_loss": 3.314483642578125, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.952265739440918, "theoretical_loss": 3.601546126713652, "tokens_seen": 1150156800 }, { "epoch": 0.41, "learning_rate": 0.0002975785092697692, "loss": 3.3884, "theoretical_loss": 3.6015080734455243, "tokens_seen": 1150287872 }, { "epoch": 0.41, "learning_rate": 0.0002973893303064699, "loss": 3.4104, "theoretical_loss": 3.601203846970585, "tokens_seen": 1151336448 }, { "epoch": 0.41, "learning_rate": 0.00029720015134317065, "loss": 3.3489, "theoretical_loss": 3.6008999749429007, "tokens_seen": 1152385024 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.48654869198799133, "objective/train/docs_used": 653958, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.908597946166992, "objective/train/original_loss": 2.908597469329834, "objective/train/theoretical_loss": 3.6005964566275575, "objective/train/tokens_used": 1173893600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24034003913402557, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498765707015991, "objective/train/weighted_lm_loss": 3.0537478923797607, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9514062404632568, "theoretical_loss": 3.6005964566275575, "tokens_seen": 1153433600 }, { "epoch": 0.41, "learning_rate": 0.00029701097237987134, "loss": 3.3159, "theoretical_loss": 3.6005964566275575, "tokens_seen": 1153433600 }, { "epoch": 0.41, "learning_rate": 0.00029682179341657204, "loss": 3.3387, "theoretical_loss": 3.600293291291833, "tokens_seen": 1154482176 }, { "epoch": 0.41, "learning_rate": 0.0002966326144532728, "loss": 3.3589, "theoretical_loss": 3.5999904782051866, "tokens_seen": 1155530752 }, { "epoch": 0.41, "learning_rate": 0.00029644343548997353, "loss": 3.4099, "theoretical_loss": 3.5996880166392486, "tokens_seen": 1156579328 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.4857807159423828, "objective/train/docs_used": 655833, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2272090911865234, "objective/train/original_loss": 3.2272090911865234, "objective/train/theoretical_loss": 3.5996502336236142, "objective/train/tokens_used": 1177170400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24008683860301971, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497984886169434, "objective/train/weighted_lm_loss": 3.3878610134124756, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9605950117111206, "theoretical_loss": 3.5996502336236142, "tokens_seen": 1156710400 }, { "epoch": 0.41, "learning_rate": 0.0002962542565266743, "loss": 3.3243, "theoretical_loss": 3.599385905867816, "tokens_seen": 1157627904 }, { "epoch": 0.41, "learning_rate": 0.00029606507756337497, "loss": 3.2962, "theoretical_loss": 3.5990841451668416, "tokens_seen": 1158676480 }, { "epoch": 0.41, "learning_rate": 0.00029587589860007567, "loss": 3.3185, "theoretical_loss": 3.598782733814426, "tokens_seen": 1159725056 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.4894789755344391, "objective/train/docs_used": 657914, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.035065174102783, "objective/train/original_loss": 3.035065174102783, "objective/train/theoretical_loss": 3.5987074354776407, "objective/train/tokens_used": 1180447200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24258311092853546, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501810312271118, "objective/train/weighted_lm_loss": 3.187278985977173, "objective/train/weights_max": 1.0512194633483887, "objective/train/weights_min": 0.9514700770378113, "theoretical_loss": 3.5987074354776407, "tokens_seen": 1159987200 }, { "epoch": 0.41, "learning_rate": 0.0002956867196367764, "loss": 3.2898, "theoretical_loss": 3.598481671090809, "tokens_seen": 1160773632 }, { "epoch": 0.41, "learning_rate": 0.0002954975406734771, "loss": 3.2237, "theoretical_loss": 3.5981809562783633, "tokens_seen": 1161822208 }, { "epoch": 0.42, "learning_rate": 0.00029530836171017785, "loss": 3.2926, "theoretical_loss": 3.5978805886615834, "tokens_seen": 1162870784 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.4899119734764099, "objective/train/docs_used": 659757, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9733612537384033, "objective/train/original_loss": 2.9733614921569824, "objective/train/theoretical_loss": 3.597768040171002, "objective/train/tokens_used": 1183724000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24554485082626343, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502394437789917, "objective/train/weighted_lm_loss": 3.123426914215088, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9529068470001221, "theoretical_loss": 3.597768040171002, "tokens_seen": 1163264000 }, { "epoch": 0.42, "learning_rate": 0.00029511918274687855, "loss": 3.2544, "theoretical_loss": 3.5975805675270784, "tokens_seen": 1163919360 }, { "epoch": 0.42, "learning_rate": 0.00029493000378357924, "loss": 3.3266, "theoretical_loss": 3.5972808921635666, "tokens_seen": 1164967936 }, { "epoch": 0.42, "learning_rate": 0.00029474082482028, "loss": 3.3658, "theoretical_loss": 3.5969815618618615, "tokens_seen": 1166016512 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.4765141010284424, "objective/train/docs_used": 661442, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5711071491241455, "objective/train/original_loss": 3.571107864379883, "objective/train/theoretical_loss": 3.5968320258881388, "objective/train/tokens_used": 1187000800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2353520691394806, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488474369049072, "objective/train/weighted_lm_loss": 3.7464940547943115, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9515637755393982, "theoretical_loss": 3.5968320258881388, "tokens_seen": 1166540800 }, { "epoch": 0.42, "learning_rate": 0.0002945516458569807, "loss": 3.3019, "theoretical_loss": 3.5966825759148704, "tokens_seen": 1167065088 }, { "epoch": 0.42, "learning_rate": 0.0002943624668936814, "loss": 3.3644, "theoretical_loss": 3.5963839336175814, "tokens_seen": 1168113664 }, { "epoch": 0.42, "learning_rate": 0.0002941732879303822, "loss": 3.3195, "theoretical_loss": 3.596085634267058, "tokens_seen": 1169162240 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.48476114869117737, "objective/train/docs_used": 663194, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.447826385498047, "objective/train/original_loss": 3.447826862335205, "objective/train/theoretical_loss": 3.595899371014127, "objective/train/tokens_used": 1190277600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23816771805286407, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496866703033447, "objective/train/weighted_lm_loss": 3.619957208633423, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9524836540222168, "theoretical_loss": 3.595899371014127, "tokens_seen": 1169817600 }, { "epoch": 0.42, "learning_rate": 0.00029398410896708287, "loss": 3.4283, "theoretical_loss": 3.5957876771624298, "tokens_seen": 1170210816 }, { "epoch": 0.42, "learning_rate": 0.0002937949300037836, "loss": 3.3523, "theoretical_loss": 3.5954900616048855, "tokens_seen": 1171259392 }, { "epoch": 0.42, "learning_rate": 0.0002936057510404843, "loss": 3.3727, "theoretical_loss": 3.5951927868976643, "tokens_seen": 1172307968 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.4848986566066742, "objective/train/docs_used": 665258, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.420736312866211, "objective/train/original_loss": 3.4207358360290527, "objective/train/theoretical_loss": 3.594970054132281, "objective/train/tokens_used": 1193554400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23861315846443176, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049702763557434, "objective/train/weighted_lm_loss": 3.590325355529785, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9676954746246338, "theoretical_loss": 3.594970054132281, "tokens_seen": 1173094400 }, { "epoch": 0.42, "learning_rate": 0.000293416572077185, "loss": 3.3649, "theoretical_loss": 3.5948958523460495, "tokens_seen": 1173356544 }, { "epoch": 0.42, "learning_rate": 0.00029322739311388575, "loss": 3.281, "theoretical_loss": 3.5945992572573577, "tokens_seen": 1174405120 }, { "epoch": 0.42, "learning_rate": 0.00029303821415058645, "loss": 3.3138, "theoretical_loss": 3.5943030009409345, "tokens_seen": 1175453696 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.47306984663009644, "objective/train/docs_used": 667248, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9362661838531494, "objective/train/original_loss": 2.936265707015991, "objective/train/theoretical_loss": 3.594044054021782, "objective/train/tokens_used": 1196831200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23313362896442413, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484918355941772, "objective/train/weighted_lm_loss": 3.077251434326172, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9524040818214417, "theoretical_loss": 3.594044054021782, "tokens_seen": 1176371200 }, { "epoch": 0.42, "learning_rate": 0.0002928490351872872, "loss": 3.3222, "theoretical_loss": 3.5940070827081443, "tokens_seen": 1176502272 }, { "epoch": 0.42, "learning_rate": 0.0002926598562239879, "loss": 3.3223, "theoretical_loss": 3.593711501872364, "tokens_seen": 1177550848 }, { "epoch": 0.42, "learning_rate": 0.0002924706772606886, "loss": 3.3199, "theoretical_loss": 3.5934162577489746, "tokens_seen": 1178599424 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.48706483840942383, "objective/train/docs_used": 669029, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9470417499542236, "objective/train/original_loss": 2.9470419883728027, "objective/train/theoretical_loss": 3.5931213496553536, "objective/train/tokens_used": 1200108000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2409461885690689, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499311685562134, "objective/train/weighted_lm_loss": 3.094271421432495, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9517722725868225, "theoretical_loss": 3.5931213496553536, "tokens_seen": 1179648000 }, { "epoch": 0.42, "learning_rate": 0.00029228149829738933, "loss": 3.3198, "theoretical_loss": 3.5931213496553536, "tokens_seen": 1179648000 }, { "epoch": 0.42, "learning_rate": 0.00029209231933409, "loss": 3.397, "theoretical_loss": 3.5928267769108677, "tokens_seen": 1180696576 }, { "epoch": 0.42, "learning_rate": 0.0002919031403707908, "loss": 3.2854, "theoretical_loss": 3.5925325388368656, "tokens_seen": 1181745152 }, { "epoch": 0.42, "learning_rate": 0.0002917139614074915, "loss": 3.2747, "theoretical_loss": 3.5922386347566695, "tokens_seen": 1182793728 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.4930936098098755, "objective/train/docs_used": 670515, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0000312328338623, "objective/train/original_loss": 3.000030994415283, "objective/train/theoretical_loss": 3.592201920196959, "objective/train/tokens_used": 1203384800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24434438347816467, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050551414489746, "objective/train/weighted_lm_loss": 3.1516330242156982, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9934404492378235, "theoretical_loss": 3.592201920196959, "tokens_seen": 1182924800 }, { "epoch": 0.42, "learning_rate": 0.0002915247824441922, "loss": 3.2816, "theoretical_loss": 3.591945063995568, "tokens_seen": 1183842304 }, { "epoch": 0.42, "learning_rate": 0.00029133560348089296, "loss": 3.2813, "theoretical_loss": 3.591651825880809, "tokens_seen": 1184890880 }, { "epoch": 0.42, "learning_rate": 0.00029114642451759365, "loss": 3.2885, "theoretical_loss": 3.591358919741592, "tokens_seen": 1185939456 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.4882916212081909, "objective/train/docs_used": 672504, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.118218421936035, "objective/train/original_loss": 3.118218421936035, "objective/train/theoretical_loss": 3.591285744999542, "objective/train/tokens_used": 1206661600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.242011159658432, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500593185424805, "objective/train/weighted_lm_loss": 3.27449107170105, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9519306421279907, "theoretical_loss": 3.591285744999542, "tokens_seen": 1186201600 }, { "epoch": 0.42, "learning_rate": 0.00029095724555429435, "loss": 3.3389, "theoretical_loss": 3.591066344909062, "tokens_seen": 1186988032 }, { "epoch": 0.42, "learning_rate": 0.0002907680665909951, "loss": 3.4149, "theoretical_loss": 3.590774100716298, "tokens_seen": 1188036608 }, { "epoch": 0.42, "learning_rate": 0.0002905788876276958, "loss": 3.3775, "theoretical_loss": 3.5904821864983116, "tokens_seen": 1189085184 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.49263498187065125, "objective/train/docs_used": 674473, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1062722206115723, "objective/train/original_loss": 3.1062724590301514, "objective/train/theoretical_loss": 3.590372803602795, "objective/train/tokens_used": 1209938400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24459204077720642, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050506830215454, "objective/train/weighted_lm_loss": 3.2633543014526367, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.9603816866874695, "theoretical_loss": 3.590372803602795, "tokens_seen": 1189478400 }, { "epoch": 0.43, "learning_rate": 0.00029038970866439653, "loss": 3.3122, "theoretical_loss": 3.5901906015920355, "tokens_seen": 1190133760 }, { "epoch": 0.43, "learning_rate": 0.00029020052970109723, "loss": 3.3855, "theoretical_loss": 3.5898993453363173, "tokens_seen": 1191182336 }, { "epoch": 0.43, "learning_rate": 0.0002900113507377979, "loss": 3.3267, "theoretical_loss": 3.5896084170719127, "tokens_seen": 1192230912 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.4631289541721344, "objective/train/docs_used": 676257, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0787904262542725, "objective/train/original_loss": 3.0787906646728516, "objective/train/theoretical_loss": 3.589463075730959, "objective/train/tokens_used": 1213215200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23281329870224, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0474958419799805, "objective/train/weighted_lm_loss": 3.2263834476470947, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9525740742683411, "theoretical_loss": 3.589463075730959, "tokens_seen": 1192755200 }, { "epoch": 0.43, "learning_rate": 0.00028982217177449867, "loss": 3.3403, "theoretical_loss": 3.5893178161414783, "tokens_seen": 1193279488 }, { "epoch": 0.43, "learning_rate": 0.0002896329928111994, "loss": 3.3054, "theoretical_loss": 3.589027541889564, "tokens_seen": 1194328064 }, { "epoch": 0.43, "learning_rate": 0.00028944381384790016, "loss": 3.3027, "theoretical_loss": 3.5887375936626067, "tokens_seen": 1195376640 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.47915610671043396, "objective/train/docs_used": 678049, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.249417304992676, "objective/train/original_loss": 3.249417304992676, "objective/train/theoretical_loss": 3.5885565412906617, "objective/train/tokens_used": 1216492000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2335328310728073, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049102544784546, "objective/train/weighted_lm_loss": 3.4090421199798584, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9726335406303406, "theoretical_loss": 3.5885565412906617, "tokens_seen": 1196032000 }, { "epoch": 0.43, "learning_rate": 0.00028925463488460086, "loss": 3.3897, "theoretical_loss": 3.5884479708089216, "tokens_seen": 1196425216 }, { "epoch": 0.43, "learning_rate": 0.00028906545592130155, "loss": 3.2624, "theoretical_loss": 3.5881586726786976, "tokens_seen": 1197473792 }, { "epoch": 0.43, "learning_rate": 0.0002888762769580023, "loss": 3.3199, "theoretical_loss": 3.587869698623987, "tokens_seen": 1198522368 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.48205363750457764, "objective/train/docs_used": 679914, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.01973557472229, "objective/train/original_loss": 3.019735813140869, "objective/train/theoretical_loss": 3.5876531803687786, "objective/train/tokens_used": 1219768800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2353292852640152, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494015216827393, "objective/train/weighted_lm_loss": 3.1688785552978516, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9905664920806885, "theoretical_loss": 3.5876531803687786, "tokens_seen": 1199308800 }, { "epoch": 0.43, "learning_rate": 0.000288687097994703, "loss": 3.2923, "theoretical_loss": 3.587581047998703, "tokens_seen": 1199570944 }, { "epoch": 0.43, "learning_rate": 0.0002884979190314037, "loss": 3.3503, "theoretical_loss": 3.587292720158608, "tokens_seen": 1200619520 }, { "epoch": 0.43, "learning_rate": 0.00028830874006810443, "loss": 3.3273, "theoretical_loss": 3.58700471446131, "tokens_seen": 1201668096 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.49034416675567627, "objective/train/docs_used": 681814, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.349426031112671, "objective/train/original_loss": 3.3494255542755127, "objective/train/theoretical_loss": 3.5867529732303307, "objective/train/tokens_used": 1223045600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24394987523555756, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502744913101196, "objective/train/weighted_lm_loss": 3.517376184463501, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9536815285682678, "theoretical_loss": 3.5867529732303307, "tokens_seen": 1202585600 }, { "epoch": 0.43, "learning_rate": 0.0002881195611048051, "loss": 3.3424, "theoretical_loss": 3.5867170302662537, "tokens_seen": 1202716672 }, { "epoch": 0.43, "learning_rate": 0.0002879303821415059, "loss": 3.3676, "theoretical_loss": 3.586429666934716, "tokens_seen": 1203765248 }, { "epoch": 0.43, "learning_rate": 0.00028774120317820657, "loss": 3.3343, "theoretical_loss": 3.5861426238297964, "tokens_seen": 1204813824 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.4898318946361542, "objective/train/docs_used": 683756, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4327945709228516, "objective/train/original_loss": 3.4327945709228516, "objective/train/theoretical_loss": 3.585855900316411, "objective/train/tokens_used": 1226322400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24356377124786377, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502212047576904, "objective/train/weighted_lm_loss": 3.606135368347168, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9553044438362122, "theoretical_loss": 3.585855900316411, "tokens_seen": 1205862400 }, { "epoch": 0.43, "learning_rate": 0.00028755202421490726, "loss": 3.354, "theoretical_loss": 3.585855900316411, "tokens_seen": 1205862400 }, { "epoch": 0.43, "learning_rate": 0.00028736284525160806, "loss": 3.3778, "theoretical_loss": 3.5855694957612894, "tokens_seen": 1206910976 }, { "epoch": 0.43, "learning_rate": 0.00028717366628830876, "loss": 3.3674, "theoretical_loss": 3.58528340953296, "tokens_seen": 1207959552 }, { "epoch": 0.43, "learning_rate": 0.0002869844873250095, "loss": 3.348, "theoretical_loss": 3.5849976410017526, "tokens_seen": 1209008128 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.4933568835258484, "objective/train/docs_used": 685102, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0827248096466064, "objective/train/original_loss": 3.0827245712280273, "objective/train/theoretical_loss": 3.5849619422421393, "objective/train/tokens_used": 1229599200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24562005698680878, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050584316253662, "objective/train/weighted_lm_loss": 3.2384722232818604, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9516139626502991, "theoretical_loss": 3.5849619422421393, "tokens_seen": 1209139200 }, { "epoch": 0.43, "learning_rate": 0.0002867953083617102, "loss": 3.3845, "theoretical_loss": 3.5847121895397844, "tokens_seen": 1210056704 }, { "epoch": 0.43, "learning_rate": 0.0002866061293984109, "loss": 3.3672, "theoretical_loss": 3.5844270545209582, "tokens_seen": 1211105280 }, { "epoch": 0.43, "learning_rate": 0.00028641695043511164, "loss": 3.4141, "theoretical_loss": 3.584142235320952, "tokens_seen": 1212153856 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.493893563747406, "objective/train/docs_used": 686738, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.313899517059326, "objective/train/original_loss": 3.313899517059326, "objective/train/theoretical_loss": 3.584071079794647, "objective/train/tokens_used": 1232876000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24534015357494354, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506365299224854, "objective/train/weighted_lm_loss": 3.4817614555358887, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9518520832061768, "theoretical_loss": 3.584071079794647, "tokens_seen": 1212416000 }, { "epoch": 0.43, "learning_rate": 0.00028622777147181233, "loss": 3.381, "theoretical_loss": 3.5838577313172157, "tokens_seen": 1213202432 }, { "epoch": 0.43, "learning_rate": 0.000286038592508513, "loss": 3.3996, "theoretical_loss": 3.5835735418889616, "tokens_seen": 1214251008 }, { "epoch": 0.43, "learning_rate": 0.0002858494135452138, "loss": 3.2881, "theoretical_loss": 3.583289666417161, "tokens_seen": 1215299584 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.4792419672012329, "objective/train/docs_used": 688520, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3605501651763916, "objective/train/original_loss": 3.3605504035949707, "objective/train/theoretical_loss": 3.583183293931091, "objective/train/tokens_used": 1236152800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.241739884018898, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491528511047363, "objective/train/weighted_lm_loss": 3.524282693862915, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9515173435211182, "theoretical_loss": 3.583183293931091, "tokens_seen": 1215692800 }, { "epoch": 0.43, "learning_rate": 0.00028566023458191447, "loss": 3.3747, "theoretical_loss": 3.5830061042845363, "tokens_seen": 1216348160 }, { "epoch": 0.43, "learning_rate": 0.0002854710556186152, "loss": 3.3662, "theoretical_loss": 3.582722854875552, "tokens_seen": 1217396736 }, { "epoch": 0.44, "learning_rate": 0.0002852818766553159, "loss": 3.381, "theoretical_loss": 3.5824399175764126, "tokens_seen": 1218445312 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.47432267665863037, "objective/train/docs_used": 690313, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.373595714569092, "objective/train/original_loss": 3.373595714569092, "objective/train/theoretical_loss": 3.5822985657766973, "objective/train/tokens_used": 1239429600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23132449388504028, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486078262329102, "objective/train/weighted_lm_loss": 3.5375053882598877, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9576109647750854, "theoretical_loss": 3.5822985657766973, "tokens_seen": 1218969600 }, { "epoch": 0.44, "learning_rate": 0.00028509269769201666, "loss": 3.3976, "theoretical_loss": 3.5821572917750535, "tokens_seen": 1219493888 }, { "epoch": 0.44, "learning_rate": 0.0002849035187287174, "loss": 3.3473, "theoretical_loss": 3.5818749768611364, "tokens_seen": 1220542464 }, { "epoch": 0.44, "learning_rate": 0.0002847143397654181, "loss": 3.3782, "theoretical_loss": 3.5815929722260402, "tokens_seen": 1221591040 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.4889954626560211, "objective/train/docs_used": 692066, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.303687572479248, "objective/train/original_loss": 3.303687572479248, "objective/train/theoretical_loss": 3.5814168766228267, "objective/train/tokens_used": 1242706400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24146433174610138, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501270294189453, "objective/train/weighted_lm_loss": 3.468892812728882, "objective/train/weights_max": 1.0512158870697021, "objective/train/weights_min": 0.9796527028083801, "theoretical_loss": 3.5814168766228267, "tokens_seen": 1222246400 }, { "epoch": 0.44, "learning_rate": 0.00028452516080211884, "loss": 3.3363, "theoretical_loss": 3.5813112772628575, "tokens_seen": 1222639616 }, { "epoch": 0.44, "learning_rate": 0.00028433598183881954, "loss": 3.3338, "theoretical_loss": 3.581029891366387, "tokens_seen": 1223688192 }, { "epoch": 0.44, "learning_rate": 0.00028414680287552023, "loss": 3.2845, "theoretical_loss": 3.5807488139331274, "tokens_seen": 1224736768 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.4919726550579071, "objective/train/docs_used": 694202, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8666977882385254, "objective/train/original_loss": 2.8666977882385254, "objective/train/theoretical_loss": 3.580538207925077, "objective/train/tokens_used": 1245983200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2440943568944931, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504380464553833, "objective/train/weighted_lm_loss": 3.0116186141967773, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9593926668167114, "theoretical_loss": 3.580538207925077, "tokens_seen": 1225523200 }, { "epoch": 0.44, "learning_rate": 0.000283957623912221, "loss": 3.365, "theoretical_loss": 3.5804680443612718, "tokens_seen": 1225785344 }, { "epoch": 0.44, "learning_rate": 0.00028376844494892167, "loss": 3.3101, "theoretical_loss": 3.5801875820506988, "tokens_seen": 1226833920 }, { "epoch": 0.44, "learning_rate": 0.00028357926598562237, "loss": 3.2876, "theoretical_loss": 3.579907426402972, "tokens_seen": 1227882496 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.47936928272247314, "objective/train/docs_used": 696236, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1124532222747803, "objective/train/original_loss": 3.1124534606933594, "objective/train/theoretical_loss": 3.579662541301401, "objective/train/tokens_used": 1249260000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2382909506559372, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491482019424438, "objective/train/weighted_lm_loss": 3.266861915588379, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9547244310379028, "theoretical_loss": 3.579662541301401, "tokens_seen": 1228800000 }, { "epoch": 0.44, "learning_rate": 0.0002833900870223231, "loss": 3.3348, "theoretical_loss": 3.579627576821328, "tokens_seen": 1228931072 }, { "epoch": 0.44, "learning_rate": 0.0002832009080590238, "loss": 3.3281, "theoretical_loss": 3.579348032710672, "tokens_seen": 1229979648 }, { "epoch": 0.44, "learning_rate": 0.00028301172909572455, "loss": 3.349, "theoretical_loss": 3.5790687934775747, "tokens_seen": 1231028224 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.47580137848854065, "objective/train/docs_used": 697487, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3753488063812256, "objective/train/original_loss": 3.3753488063812256, "objective/train/theoretical_loss": 3.5787898585302615, "objective/train/tokens_used": 1252536800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2382103055715561, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487911701202393, "objective/train/weighted_lm_loss": 3.5415258407592773, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9519994854927063, "theoretical_loss": 3.5787898585302615, "tokens_seen": 1232076800 }, { "epoch": 0.44, "learning_rate": 0.0002828225501324253, "loss": 3.2565, "theoretical_loss": 3.5787898585302615, "tokens_seen": 1232076800 }, { "epoch": 0.44, "learning_rate": 0.000282633371169126, "loss": 3.3331, "theoretical_loss": 3.57851122727861, "tokens_seen": 1233125376 }, { "epoch": 0.44, "learning_rate": 0.00028244419220582674, "loss": 3.2767, "theoretical_loss": 3.578232899134143, "tokens_seen": 1234173952 }, { "epoch": 0.44, "learning_rate": 0.00028225501324252744, "loss": 3.286, "theoretical_loss": 3.5779548735100217, "tokens_seen": 1235222528 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.4829152226448059, "objective/train/docs_used": 699233, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.290647506713867, "objective/train/original_loss": 3.2906479835510254, "objective/train/theoretical_loss": 3.577920141548805, "objective/train/tokens_used": 1255813600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24048534035682678, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495140552520752, "objective/train/weighted_lm_loss": 3.452521562576294, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9529663920402527, "theoretical_loss": 3.577920141548805, "tokens_seen": 1235353600 }, { "epoch": 0.44, "learning_rate": 0.0002820658342792282, "loss": 3.2778, "theoretical_loss": 3.5776771498210413, "tokens_seen": 1236271104 }, { "epoch": 0.44, "learning_rate": 0.0002818766553159289, "loss": 3.252, "theoretical_loss": 3.5773997274836224, "tokens_seen": 1237319680 }, { "epoch": 0.44, "learning_rate": 0.00028168747635262957, "loss": 3.3259, "theoretical_loss": 3.577122605915809, "tokens_seen": 1238368256 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.479489803314209, "objective/train/docs_used": 700968, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0772054195404053, "objective/train/original_loss": 3.0772056579589844, "objective/train/theoretical_loss": 3.5770533724510627, "objective/train/tokens_used": 1259090400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23797892034053802, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491585731506348, "objective/train/weighted_lm_loss": 3.22639536857605, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9590162038803101, "theoretical_loss": 3.5770533724510627, "tokens_seen": 1238630400 }, { "epoch": 0.44, "learning_rate": 0.0002814982973893303, "loss": 3.2479, "theoretical_loss": 3.5768457845372597, "tokens_seen": 1239416832 }, { "epoch": 0.44, "learning_rate": 0.000281309118426031, "loss": 3.2973, "theoretical_loss": 3.576569262769242, "tokens_seen": 1240465408 }, { "epoch": 0.44, "learning_rate": 0.00028111993946273176, "loss": 3.2963, "theoretical_loss": 3.576293040034628, "tokens_seen": 1241513984 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.48357242345809937, "objective/train/docs_used": 702465, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.286665916442871, "objective/train/original_loss": 3.286665916442871, "objective/train/theoretical_loss": 3.576189533486179, "objective/train/tokens_used": 1262367200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23855482041835785, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049569845199585, "objective/train/weighted_lm_loss": 3.4492075443267822, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9514651298522949, "theoretical_loss": 3.576189533486179, "tokens_seen": 1241907200 }, { "epoch": 0.44, "learning_rate": 0.00028093076049943245, "loss": 3.3817, "theoretical_loss": 3.576017115757886, "tokens_seen": 1242562560 }, { "epoch": 0.44, "learning_rate": 0.00028074158153613315, "loss": 3.3114, "theoretical_loss": 3.57574148936508, "tokens_seen": 1243611136 }, { "epoch": 0.44, "learning_rate": 0.00028055240257283395, "loss": 3.319, "theoretical_loss": 3.575466160283857, "tokens_seen": 1244659712 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.484239399433136, "objective/train/docs_used": 704525, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2964892387390137, "objective/train/original_loss": 3.2964892387390137, "objective/train/theoretical_loss": 3.5753286070566617, "objective/train/tokens_used": 1265644000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24136582016944885, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496509075164795, "objective/train/weighted_lm_loss": 3.460113763809204, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9523569941520691, "theoretical_loss": 3.5753286070566617, "tokens_seen": 1245184000 }, { "epoch": 0.44, "learning_rate": 0.00028036322360953464, "loss": 3.3019, "theoretical_loss": 3.575191127943446, "tokens_seen": 1245708288 }, { "epoch": 0.45, "learning_rate": 0.00028017404464623534, "loss": 3.2625, "theoretical_loss": 3.574916391774651, "tokens_seen": 1246756864 }, { "epoch": 0.45, "learning_rate": 0.0002799848656829361, "loss": 3.2362, "theoretical_loss": 3.5746419512098457, "tokens_seen": 1247805440 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.4908085763454437, "objective/train/docs_used": 706749, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0369081497192383, "objective/train/original_loss": 3.0369081497192383, "objective/train/theoretical_loss": 3.5744705757166564, "objective/train/tokens_used": 1268920800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24289442598819733, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050315499305725, "objective/train/weighted_lm_loss": 3.190009593963623, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9634060263633728, "theoretical_loss": 3.5744705757166564, "tokens_seen": 1248460800 }, { "epoch": 0.45, "learning_rate": 0.0002797956867196368, "loss": 3.281, "theoretical_loss": 3.574367805682967, "tokens_seen": 1248854016 }, { "epoch": 0.45, "learning_rate": 0.0002796065077563375, "loss": 3.2693, "theoretical_loss": 3.57409395462951, "tokens_seen": 1249902592 }, { "epoch": 0.45, "learning_rate": 0.0002794173287930382, "loss": 3.1934, "theoretical_loss": 3.5738203974865224, "tokens_seen": 1250951168 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.4866064190864563, "objective/train/docs_used": 708687, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3080501556396484, "objective/train/original_loss": 3.3080499172210693, "objective/train/theoretical_loss": 3.5736154221702483, "objective/train/tokens_used": 1272197600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23975151777267456, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498794317245483, "objective/train/weighted_lm_loss": 3.472594976425171, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9527500867843628, "theoretical_loss": 3.5736154221702483, "tokens_seen": 1251737600 }, { "epoch": 0.45, "learning_rate": 0.0002792281498297389, "loss": 3.2981, "theoretical_loss": 3.5735471336925984, "tokens_seen": 1251999744 }, { "epoch": 0.45, "learning_rate": 0.00027903897086643966, "loss": 3.2379, "theoretical_loss": 3.5732741626878743, "tokens_seen": 1253048320 }, { "epoch": 0.45, "learning_rate": 0.00027884979190314035, "loss": 3.2213, "theoretical_loss": 3.5730014839140223, "tokens_seen": 1254096896 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.460138201713562, "objective/train/docs_used": 710568, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8919012546539307, "objective/train/original_loss": 2.8919010162353516, "objective/train/theoretical_loss": 3.5727631292697843, "objective/train/tokens_used": 1275474400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2265445441007614, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047165036201477, "objective/train/weighted_lm_loss": 3.0274980068206787, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9611805081367493, "theoretical_loss": 3.5727631292697843, "tokens_seen": 1255014400 }, { "epoch": 0.45, "learning_rate": 0.0002786606129398411, "loss": 3.1932, "theoretical_loss": 3.5727290968142444, "tokens_seen": 1255145472 }, { "epoch": 0.45, "learning_rate": 0.0002784714339765418, "loss": 3.2294, "theoretical_loss": 3.572457000833267, "tokens_seen": 1256194048 }, { "epoch": 0.45, "learning_rate": 0.00027828225501324254, "loss": 3.2202, "theoretical_loss": 3.5721851954173376, "tokens_seen": 1257242624 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.4938296675682068, "objective/train/docs_used": 712328, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2921712398529053, "objective/train/original_loss": 3.292171001434326, "objective/train/theoretical_loss": 3.571913680014217, "objective/train/tokens_used": 1278751200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2448359876871109, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050627589225769, "objective/train/weighted_lm_loss": 3.4590396881103516, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9618917107582092, "theoretical_loss": 3.571913680014217, "tokens_seen": 1258291200 }, { "epoch": 0.45, "learning_rate": 0.0002780930760499433, "loss": 3.2465, "theoretical_loss": 3.571913680014217, "tokens_seen": 1258291200 }, { "epoch": 0.45, "learning_rate": 0.000277903897086644, "loss": 3.216, "theoretical_loss": 3.5716424540731735, "tokens_seen": 1259339776 }, { "epoch": 0.45, "learning_rate": 0.0002777147181233447, "loss": 3.2931, "theoretical_loss": 3.571371517044981, "tokens_seen": 1260388352 }, { "epoch": 0.45, "learning_rate": 0.0002775255391600454, "loss": 3.2458, "theoretical_loss": 3.571100868381909, "tokens_seen": 1261436928 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.4796658754348755, "objective/train/docs_used": 713944, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1429102420806885, "objective/train/original_loss": 3.1429104804992676, "objective/train/theoretical_loss": 3.5710670575474763, "objective/train/tokens_used": 1282028000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23792652785778046, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491758584976196, "objective/train/weighted_lm_loss": 3.297544240951538, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9516644477844238, "theoretical_loss": 3.5710670575474763, "tokens_seen": 1261568000 }, { "epoch": 0.45, "learning_rate": 0.0002773363601967461, "loss": 3.2961, "theoretical_loss": 3.5708305075377207, "tokens_seen": 1262485504 }, { "epoch": 0.45, "learning_rate": 0.00027714718123344686, "loss": 3.3117, "theoretical_loss": 3.5705604339676666, "tokens_seen": 1263534080 }, { "epoch": 0.45, "learning_rate": 0.00027695800227014756, "loss": 3.2667, "theoretical_loss": 3.57029064712848, "tokens_seen": 1264582656 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.49083083868026733, "objective/train/docs_used": 715923, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.367497444152832, "objective/train/original_loss": 3.367497444152832, "objective/train/theoretical_loss": 3.570223245156858, "objective/train/tokens_used": 1285304800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2441437840461731, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503242015838623, "objective/train/weighted_lm_loss": 3.536487102508545, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9521356821060181, "theoretical_loss": 3.570223245156858, "tokens_seen": 1264844800 }, { "epoch": 0.45, "learning_rate": 0.00027676882330684825, "loss": 3.3079, "theoretical_loss": 3.5700211464783687, "tokens_seen": 1265631232 }, { "epoch": 0.45, "learning_rate": 0.000276579644343549, "loss": 3.2347, "theoretical_loss": 3.5697519314770148, "tokens_seen": 1266679808 }, { "epoch": 0.45, "learning_rate": 0.0002763904653802497, "loss": 3.3075, "theoretical_loss": 3.5694830015855636, "tokens_seen": 1267728384 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.4871194660663605, "objective/train/docs_used": 717630, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2038087844848633, "objective/train/original_loss": 3.2038087844848633, "objective/train/theoretical_loss": 3.569382226271438, "objective/train/tokens_used": 1288581600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24120700359344482, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499380826950073, "objective/train/weighted_lm_loss": 3.3642735481262207, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9537880420684814, "theoretical_loss": 3.569382226271438, "tokens_seen": 1268121600 }, { "epoch": 0.45, "learning_rate": 0.00027620128641695044, "loss": 3.2755, "theoretical_loss": 3.569214356266625, "tokens_seen": 1268776960 }, { "epoch": 0.45, "learning_rate": 0.0002760121074536512, "loss": 3.2224, "theoretical_loss": 3.5689459949842623, "tokens_seen": 1269825536 }, { "epoch": 0.45, "learning_rate": 0.0002758229284903519, "loss": 3.2175, "theoretical_loss": 3.5686779172039906, "tokens_seen": 1270874112 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.49091073870658875, "objective/train/docs_used": 719315, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.267247438430786, "objective/train/original_loss": 3.267247200012207, "objective/train/theoretical_loss": 3.568543984460508, "objective/train/tokens_used": 1291858400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24294719099998474, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503261089324951, "objective/train/weighted_lm_loss": 3.4315378665924072, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9729084372520447, "theoretical_loss": 3.568543984460508, "tokens_seen": 1271398400 }, { "epoch": 0.45, "learning_rate": 0.00027563374952705263, "loss": 3.2281, "theoretical_loss": 3.5684101223927702, "tokens_seen": 1271922688 }, { "epoch": 0.45, "learning_rate": 0.0002754445705637533, "loss": 3.2489, "theoretical_loss": 3.568142610019003, "tokens_seen": 1272971264 }, { "epoch": 0.46, "learning_rate": 0.000275255391600454, "loss": 3.2501, "theoretical_loss": 3.567875379552525, "tokens_seen": 1274019840 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4882226884365082, "objective/train/docs_used": 720761, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1280031204223633, "objective/train/original_loss": 3.1280031204223633, "objective/train/theoretical_loss": 3.5677085034320273, "objective/train/tokens_used": 1295135200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24101290106773376, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500473976135254, "objective/train/weighted_lm_loss": 3.284193992614746, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9808136820793152, "theoretical_loss": 3.5677085034320273, "tokens_seen": 1274675200 }, { "epoch": 0.46, "learning_rate": 0.00027506621263715476, "loss": 3.275, "theoretical_loss": 3.567608430464604, "tokens_seen": 1275068416 }, { "epoch": 0.46, "learning_rate": 0.00027487703367385546, "loss": 3.2107, "theoretical_loss": 3.567341762227932, "tokens_seen": 1276116992 }, { "epoch": 0.46, "learning_rate": 0.0002746878547105562, "loss": 3.2512, "theoretical_loss": 3.567075374316623, "tokens_seen": 1277165568 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4728912115097046, "objective/train/docs_used": 722731, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8755643367767334, "objective/train/original_loss": 2.8755640983581543, "objective/train/theoretical_loss": 3.566875767031105, "objective/train/tokens_used": 1298412000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23524631559848785, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484849214553833, "objective/train/weighted_lm_loss": 3.0170345306396484, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9677127599716187, "theoretical_loss": 3.566875767031105, "tokens_seen": 1277952000 }, { "epoch": 0.46, "learning_rate": 0.0002744986757472569, "loss": 3.2255, "theoretical_loss": 3.5668092662062048, "tokens_seen": 1278214144 }, { "epoch": 0.46, "learning_rate": 0.0002743094967839576, "loss": 3.2771, "theoretical_loss": 3.566543437373617, "tokens_seen": 1279262720 }, { "epoch": 0.46, "learning_rate": 0.00027412031782065834, "loss": 3.2012, "theoretical_loss": 3.5662778872972036, "tokens_seen": 1280311296 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4915541112422943, "objective/train/docs_used": 724789, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.916830062866211, "objective/train/original_loss": 2.916830062866211, "objective/train/theoretical_loss": 3.5660457592384924, "objective/train/tokens_used": 1301688800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24279290437698364, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503896474838257, "objective/train/weighted_lm_loss": 3.063872814178467, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9648842215538025, "theoretical_loss": 3.5660457592384924, "tokens_seen": 1281228800 }, { "epoch": 0.46, "learning_rate": 0.00027393113885735903, "loss": 3.2504, "theoretical_loss": 3.56601261545671, "tokens_seen": 1281359872 }, { "epoch": 0.46, "learning_rate": 0.00027374195989405983, "loss": 3.1818, "theoretical_loss": 3.565747621333277, "tokens_seen": 1282408448 }, { "epoch": 0.46, "learning_rate": 0.00027355278093076053, "loss": 3.1748, "theoretical_loss": 3.565482904409436, "tokens_seen": 1283457024 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.49075672030448914, "objective/train/docs_used": 727075, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.278653860092163, "objective/train/original_loss": 3.278654098510742, "objective/train/theoretical_loss": 3.5652184641691047, "objective/train/tokens_used": 1304965600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24346491694450378, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503132343292236, "objective/train/weighted_lm_loss": 3.4435787200927734, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9591943025588989, "theoretical_loss": 3.5652184641691047, "tokens_seen": 1284505600 }, { "epoch": 0.46, "learning_rate": 0.0002733636019674612, "loss": 3.2211, "theoretical_loss": 3.5652184641691047, "tokens_seen": 1284505600 }, { "epoch": 0.46, "learning_rate": 0.00027317442300416197, "loss": 3.2112, "theoretical_loss": 3.5649543000975825, "tokens_seen": 1285554176 }, { "epoch": 0.46, "learning_rate": 0.00027298524404086266, "loss": 3.2327, "theoretical_loss": 3.564690411681543, "tokens_seen": 1286602752 }, { "epoch": 0.46, "learning_rate": 0.00027279606507756336, "loss": 3.1368, "theoretical_loss": 3.564426798409034, "tokens_seen": 1287651328 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.49102783203125, "objective/train/docs_used": 728554, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.136857032775879, "objective/train/original_loss": 3.136857032775879, "objective/train/theoretical_loss": 3.5643938660705556, "objective/train/tokens_used": 1308242400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2425263375043869, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503356456756592, "objective/train/weighted_lm_loss": 3.294790029525757, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9526902437210083, "theoretical_loss": 3.5643938660705556, "tokens_seen": 1287782400 }, { "epoch": 0.46, "learning_rate": 0.0002726068861142641, "loss": 3.2618, "theoretical_loss": 3.5641634597694685, "tokens_seen": 1288699904 }, { "epoch": 0.46, "learning_rate": 0.0002724177071509648, "loss": 3.2774, "theoretical_loss": 3.5639003952536212, "tokens_seen": 1289748480 }, { "epoch": 0.46, "learning_rate": 0.00027222852818766554, "loss": 3.2149, "theoretical_loss": 3.563637604353625, "tokens_seen": 1290797056 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4924575090408325, "objective/train/docs_used": 730680, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1504783630371094, "objective/train/original_loss": 3.1504788398742676, "objective/train/theoretical_loss": 3.5635719493217155, "objective/train/tokens_used": 1311519200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2439342588186264, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504858493804932, "objective/train/weighted_lm_loss": 3.309021234512329, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9914089441299438, "theoretical_loss": 3.5635719493217155, "tokens_seen": 1291059200 }, { "epoch": 0.46, "learning_rate": 0.00027203934922436624, "loss": 3.272, "theoretical_loss": 3.563375086562964, "tokens_seen": 1291845632 }, { "epoch": 0.46, "learning_rate": 0.00027185017026106693, "loss": 3.3122, "theoretical_loss": 3.563112841376472, "tokens_seen": 1292894208 }, { "epoch": 0.46, "learning_rate": 0.0002716609912977677, "loss": 3.254, "theoretical_loss": 3.562850868290324, "tokens_seen": 1293942784 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4821608066558838, "objective/train/docs_used": 732486, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.109811305999756, "objective/train/original_loss": 3.109811305999756, "objective/train/theoretical_loss": 3.5627526984312885, "objective/train/tokens_used": 1314796000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2412458062171936, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049442172050476, "objective/train/weighted_lm_loss": 3.2632744312286377, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9536179304122925, "theoretical_loss": 3.5627526984312885, "tokens_seen": 1294336000 }, { "epoch": 0.46, "learning_rate": 0.0002714718123344684, "loss": 3.2944, "theoretical_loss": 3.5625891668020353, "tokens_seen": 1294991360 }, { "epoch": 0.46, "learning_rate": 0.0002712826333711692, "loss": 3.2955, "theoretical_loss": 3.5623277364104537, "tokens_seen": 1296039936 }, { "epoch": 0.46, "learning_rate": 0.00027109345440786987, "loss": 3.2216, "theoretical_loss": 3.562066576615756, "tokens_seen": 1297088512 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4861340820789337, "objective/train/docs_used": 734540, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.098299741744995, "objective/train/original_loss": 3.098299503326416, "objective/train/theoretical_loss": 3.5619360980364068, "objective/train/tokens_used": 1318072800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23957262933254242, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498310327529907, "objective/train/weighted_lm_loss": 3.252875328063965, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9518746733665466, "theoretical_loss": 3.5619360980364068, "tokens_seen": 1297612800 }, { "epoch": 0.46, "learning_rate": 0.00027090427544457056, "loss": 3.1921, "theoretical_loss": 3.5618056869194454, "tokens_seen": 1298137088 }, { "epoch": 0.46, "learning_rate": 0.0002707150964812713, "loss": 3.2447, "theoretical_loss": 3.561545066824343, "tokens_seen": 1299185664 }, { "epoch": 0.46, "learning_rate": 0.000270525917517972, "loss": 3.2583, "theoretical_loss": 3.561284715834587, "tokens_seen": 1300234240 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.4824850559234619, "objective/train/docs_used": 736491, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3983359336853027, "objective/train/original_loss": 3.3983354568481445, "objective/train/theoretical_loss": 3.5611221329012466, "objective/train/tokens_used": 1321349600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24269729852676392, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049481987953186, "objective/train/weighted_lm_loss": 3.5662224292755127, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9514743089675903, "theoretical_loss": 3.5611221329012466, "tokens_seen": 1300889600 }, { "epoch": 0.46, "learning_rate": 0.00027033673855467275, "loss": 3.2885, "theoretical_loss": 3.5610246334556255, "tokens_seen": 1301282816 }, { "epoch": 0.47, "learning_rate": 0.00027014755959137344, "loss": 3.2482, "theoretical_loss": 3.5607648191942145, "tokens_seen": 1302331392 }, { "epoch": 0.47, "learning_rate": 0.00026995838062807414, "loss": 3.2405, "theoretical_loss": 3.56050527255841, "tokens_seen": 1303379968 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.49545571208000183, "objective/train/docs_used": 738318, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4414329528808594, "objective/train/original_loss": 3.4414329528808594, "objective/train/theoretical_loss": 3.5603107879156584, "objective/train/tokens_used": 1324626400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2457858920097351, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050795078277588, "objective/train/weighted_lm_loss": 3.6163370609283447, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 1.01546049118042, "theoretical_loss": 3.5603107879156584, "tokens_seen": 1304166400 }, { "epoch": 0.47, "learning_rate": 0.0002697692016647749, "loss": 3.3299, "theoretical_loss": 3.560245993057567, "tokens_seen": 1304428544 }, { "epoch": 0.47, "learning_rate": 0.0002695800227014756, "loss": 3.2857, "theoretical_loss": 3.5599869802023325, "tokens_seen": 1305477120 }, { "epoch": 0.47, "learning_rate": 0.00026939084373817627, "loss": 3.2556, "theoretical_loss": 3.5597282335046425, "tokens_seen": 1306525696 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.4869755208492279, "objective/train/docs_used": 740377, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0287930965423584, "objective/train/original_loss": 3.0287928581237793, "objective/train/theoretical_loss": 3.5595020480938198, "objective/train/tokens_used": 1327903200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24304500222206116, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049932837486267, "objective/train/weighted_lm_loss": 3.180659294128418, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.952156662940979, "theoretical_loss": 3.5595020480938198, "tokens_seen": 1307443200 }, { "epoch": 0.47, "learning_rate": 0.0002692016647748771, "loss": 3.2115, "theoretical_loss": 3.5594697524777175, "tokens_seen": 1307574272 }, { "epoch": 0.47, "learning_rate": 0.00026901248581157777, "loss": 3.2641, "theoretical_loss": 3.559211536636057, "tokens_seen": 1308622848 }, { "epoch": 0.47, "learning_rate": 0.0002688233068482785, "loss": 3.206, "theoretical_loss": 3.5589535854954364, "tokens_seen": 1309671424 }, { "debugging/Self-BLEU-5": 0.49020908264157476, "debugging/distinct-1-grams": 0.768901113497886, "debugging/distinct-2-grams": 0.9428782333551957, "debugging/entropy-1-grams": 6.085999550681761, "debugging/entropy-2-grams": 7.0033060167714964, "debugging/length": 490.2352941176471, "debugging/num_segments": 17, "debugging/raw_token_scores_avg": 0.02056093141436577, "debugging/raw_token_scores_std": 0.10981010645627975, "epoch": 0.47, "objective/train/advantage_avg": 0.47942253947257996, "objective/train/docs_used": 741674, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0563838481903076, "objective/train/original_loss": 3.0563840866088867, "objective/train/theoretical_loss": 3.5586958985729016, "objective/train/tokens_used": 1331180000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24191518127918243, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049171805381775, "objective/train/weighted_lm_loss": 3.2049503326416016, "objective/train/weights_max": 1.0512198209762573, "objective/train/weights_min": 0.9514583349227905, "theoretical_loss": 3.5586958985729016, "tokens_seen": 1310720000 }, { "epoch": 0.47, "learning_rate": 0.0002686341278849792, "loss": 3.2782, "theoretical_loss": 3.5586958985729016, "tokens_seen": 1310720000 }, { "epoch": 0.47, "learning_rate": 0.0002684449489216799, "loss": 3.2573, "theoretical_loss": 3.558438475386766, "tokens_seen": 1311768576 }, { "epoch": 0.47, "learning_rate": 0.00026825576995838065, "loss": 3.2587, "theoretical_loss": 3.5581813154566038, "tokens_seen": 1312817152 }, { "epoch": 0.47, "learning_rate": 0.00026806659099508134, "loss": 3.2928, "theoretical_loss": 3.5579244183032483, "tokens_seen": 1313865728 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.4831710159778595, "objective/train/docs_used": 743814, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.882990837097168, "objective/train/original_loss": 2.882990837097168, "objective/train/theoretical_loss": 3.5578923246117578, "objective/train/tokens_used": 1334456800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23744919896125793, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049523949623108, "objective/train/weighted_lm_loss": 3.0261266231536865, "objective/train/weights_max": 1.0512158870697021, "objective/train/weights_min": 0.9561281800270081, "theoretical_loss": 3.5578923246117578, "tokens_seen": 1313996800 }, { "epoch": 0.47, "learning_rate": 0.0002678774120317821, "loss": 3.1971, "theoretical_loss": 3.557667783448787, "tokens_seen": 1314914304 }, { "epoch": 0.47, "learning_rate": 0.0002676882330684828, "loss": 3.2104, "theoretical_loss": 3.5574114104165546, "tokens_seen": 1315962880 }, { "epoch": 0.47, "learning_rate": 0.0002674990541051835, "loss": 3.2483, "theoretical_loss": 3.557155298731134, "tokens_seen": 1317011456 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.49083414673805237, "objective/train/docs_used": 745506, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4596221446990967, "objective/train/original_loss": 3.459622383117676, "objective/train/theoretical_loss": 3.5570913115896228, "objective/train/tokens_used": 1337733600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24352119863033295, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050321340560913, "objective/train/weighted_lm_loss": 3.6329755783081055, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9624701142311096, "theoretical_loss": 3.5570913115896228, "tokens_seen": 1317273600 }, { "epoch": 0.47, "learning_rate": 0.0002673098751418842, "loss": 3.2454, "theoretical_loss": 3.5568994479183456, "tokens_seen": 1318060032 }, { "epoch": 0.47, "learning_rate": 0.0002671206961785849, "loss": 3.2333, "theoretical_loss": 3.55664385750525, "tokens_seen": 1319108608 }, { "epoch": 0.47, "learning_rate": 0.00026693151721528567, "loss": 3.3081, "theoretical_loss": 3.556388527020138, "tokens_seen": 1320157184 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.4799773693084717, "objective/train/docs_used": 747613, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.5142557621002197, "objective/train/original_loss": 3.514256000518799, "objective/train/theoretical_loss": 3.5562928450048386, "objective/train/tokens_used": 1341010400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2389097660779953, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049212098121643, "objective/train/weighted_lm_loss": 3.686645030975342, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9529973268508911, "theoretical_loss": 3.5562928450048386, "tokens_seen": 1320550400 }, { "epoch": 0.47, "learning_rate": 0.0002667423382519864, "loss": 3.2451, "theoretical_loss": 3.556133455992528, "tokens_seen": 1321205760 }, { "epoch": 0.47, "learning_rate": 0.0002665531592886871, "loss": 3.2432, "theoretical_loss": 3.5558786439531653, "tokens_seen": 1322254336 }, { "epoch": 0.47, "learning_rate": 0.00026636398032538785, "loss": 3.2586, "theoretical_loss": 3.555624090434014, "tokens_seen": 1323302912 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.4838820695877075, "objective/train/docs_used": 749700, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.848118305206299, "objective/train/original_loss": 2.848118305206299, "objective/train/theoretical_loss": 3.555496910473588, "objective/train/tokens_used": 1344287200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2406865805387497, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496114492416382, "objective/train/weighted_lm_loss": 2.9888694286346436, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9514018893241882, "theoretical_loss": 3.555496910473588, "tokens_seen": 1323827200 }, { "epoch": 0.47, "learning_rate": 0.00026617480136208855, "loss": 3.2677, "theoretical_loss": 3.555369794968252, "tokens_seen": 1324351488 }, { "epoch": 0.47, "learning_rate": 0.00026598562239878924, "loss": 3.2091, "theoretical_loss": 3.555115757090271, "tokens_seen": 1325400064 }, { "epoch": 0.47, "learning_rate": 0.00026579644343549, "loss": 3.2848, "theoretical_loss": 3.554861976335671, "tokens_seen": 1326448640 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.45839741826057434, "objective/train/docs_used": 752163, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.01212739944458, "objective/train/original_loss": 3.01212739944458, "objective/train/theoretical_loss": 3.5547034937286472, "objective/train/tokens_used": 1347564000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2407991886138916, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047062635421753, "objective/train/weighted_lm_loss": 3.158634901046753, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.952105700969696, "theoretical_loss": 3.5547034937286472, "tokens_seen": 1327104000 }, { "epoch": 0.47, "learning_rate": 0.0002656072644721907, "loss": 3.1785, "theoretical_loss": 3.5546084522412533, "tokens_seen": 1327497216 }, { "epoch": 0.47, "learning_rate": 0.00026541808550889143, "loss": 3.3024, "theoretical_loss": 3.5543551843450203, "tokens_seen": 1328545792 }, { "epoch": 0.47, "learning_rate": 0.0002652289065455921, "loss": 3.1727, "theoretical_loss": 3.5541021721861696, "tokens_seen": 1329594368 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.4735065698623657, "objective/train/docs_used": 753856, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.942960739135742, "objective/train/original_loss": 2.942960739135742, "objective/train/theoretical_loss": 3.5539125806181584, "objective/train/tokens_used": 1350840800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23893173038959503, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485646724700928, "objective/train/weighted_lm_loss": 3.0864858627319336, "objective/train/weights_max": 1.0512158870697021, "objective/train/weights_min": 0.9517002701759338, "theoretical_loss": 3.5539125806181584, "tokens_seen": 1330380800 }, { "epoch": 0.48, "learning_rate": 0.0002650397275822928, "loss": 3.2689, "theoretical_loss": 3.5538494153050895, "tokens_seen": 1330642944 }, { "epoch": 0.48, "learning_rate": 0.00026485054861899356, "loss": 3.3248, "theoretical_loss": 3.5535969132433554, "tokens_seen": 1331691520 }, { "epoch": 0.48, "learning_rate": 0.0002646613696556943, "loss": 3.2776, "theoretical_loss": 3.5533446655437277, "tokens_seen": 1332740096 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.47561439871788025, "objective/train/docs_used": 755628, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0288760662078857, "objective/train/original_loss": 3.028876304626465, "objective/train/theoretical_loss": 3.5531241571044148, "objective/train/tokens_used": 1354117600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23352853953838348, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487481355667114, "objective/train/weighted_lm_loss": 3.1767513751983643, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9534009695053101, "theoretical_loss": 3.5531241571044148, "tokens_seen": 1333657600 }, { "epoch": 0.48, "learning_rate": 0.000264472190692395, "loss": 3.2214, "theoretical_loss": 3.5530926717501448, "tokens_seen": 1333788672 }, { "epoch": 0.48, "learning_rate": 0.00026428301172909575, "loss": 3.2087, "theoretical_loss": 3.5528409314077205, "tokens_seen": 1334837248 }, { "epoch": 0.48, "learning_rate": 0.00026409383276579645, "loss": 3.2751, "theoretical_loss": 3.5525894440627415, "tokens_seen": 1335885824 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.48273754119873047, "objective/train/docs_used": 757781, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.197702407836914, "objective/train/original_loss": 3.197701930999756, "objective/train/theoretical_loss": 3.5523382092626603, "objective/train/tokens_used": 1357394400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23674197494983673, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494771003723145, "objective/train/weighted_lm_loss": 3.355456590652466, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9578342437744141, "theoretical_loss": 3.5523382092626603, "tokens_seen": 1336934400 }, { "epoch": 0.48, "learning_rate": 0.0002639046538024972, "loss": 3.2785, "theoretical_loss": 3.5523382092626603, "tokens_seen": 1336934400 }, { "epoch": 0.48, "learning_rate": 0.0002637154748391979, "loss": 3.3046, "theoretical_loss": 3.552087226556094, "tokens_seen": 1337982976 }, { "epoch": 0.48, "learning_rate": 0.0002635262958758986, "loss": 3.2553, "theoretical_loss": 3.5518364954928185, "tokens_seen": 1339031552 }, { "epoch": 0.48, "learning_rate": 0.00026333711691259933, "loss": 3.2336, "theoretical_loss": 3.551586015623767, "tokens_seen": 1340080128 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.49417945742607117, "objective/train/docs_used": 759683, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.986185312271118, "objective/train/original_loss": 2.9861855506896973, "objective/train/theoretical_loss": 3.5515547232799087, "objective/train/tokens_used": 1360671200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2461775243282318, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506694316864014, "objective/train/weighted_lm_loss": 3.137205123901367, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9528197050094604, "theoretical_loss": 3.5515547232799087, "tokens_seen": 1340211200 }, { "epoch": 0.48, "learning_rate": 0.0002631479379493, "loss": 3.2575, "theoretical_loss": 3.5513357865010233, "tokens_seen": 1341128704 }, { "epoch": 0.48, "learning_rate": 0.00026295875898600077, "loss": 3.1575, "theoretical_loss": 3.5510858076778202, "tokens_seen": 1342177280 }, { "epoch": 0.48, "learning_rate": 0.00026276958002270146, "loss": 3.285, "theoretical_loss": 3.5508360787085342, "tokens_seen": 1343225856 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.4842744767665863, "objective/train/docs_used": 761574, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7354183197021484, "objective/train/original_loss": 2.7354183197021484, "objective/train/theoretical_loss": 3.550773685453774, "objective/train/tokens_used": 1363948000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2384546399116516, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496395826339722, "objective/train/weighted_lm_loss": 2.870645046234131, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9521103501319885, "theoretical_loss": 3.550773685453774, "tokens_seen": 1343488000 }, { "epoch": 0.48, "learning_rate": 0.00026258040105940216, "loss": 3.1794, "theoretical_loss": 3.5505865991486827, "tokens_seen": 1344274432 }, { "epoch": 0.48, "learning_rate": 0.00026239122209610296, "loss": 3.1656, "theoretical_loss": 3.5503373685549184, "tokens_seen": 1345323008 }, { "epoch": 0.48, "learning_rate": 0.00026220204313280365, "loss": 3.2142, "theoretical_loss": 3.5500883864850294, "tokens_seen": 1346371584 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.4764693081378937, "objective/train/docs_used": 763625, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9616000652313232, "objective/train/original_loss": 2.9616000652313232, "objective/train/theoretical_loss": 3.5499950821913204, "objective/train/tokens_used": 1367224800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23259218037128448, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488290786743164, "objective/train/weighted_lm_loss": 3.1081156730651855, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9584375023841858, "theoretical_loss": 3.5499950821913204, "tokens_seen": 1346764800 }, { "epoch": 0.48, "learning_rate": 0.0002620128641695044, "loss": 3.3291, "theoretical_loss": 3.5498396524979308, "tokens_seen": 1347420160 }, { "epoch": 0.48, "learning_rate": 0.0002618236852062051, "loss": 3.237, "theoretical_loss": 3.5495911661536637, "tokens_seen": 1348468736 }, { "epoch": 0.48, "learning_rate": 0.0002616345062429058, "loss": 3.293, "theoretical_loss": 3.5493429270133907, "tokens_seen": 1349517312 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.48155462741851807, "objective/train/docs_used": 764744, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.73677396774292, "objective/train/original_loss": 2.736774206161499, "objective/train/theoretical_loss": 3.549218900007921, "objective/train/tokens_used": 1370501600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.235448956489563, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493521690368652, "objective/train/weighted_lm_loss": 2.8729958534240723, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.953024685382843, "theoretical_loss": 3.549218900007921, "tokens_seen": 1350041600 }, { "epoch": 0.48, "learning_rate": 0.00026144532727960653, "loss": 3.2426, "theoretical_loss": 3.549094934639392, "tokens_seen": 1350565888 }, { "epoch": 0.48, "learning_rate": 0.00026125614831630723, "loss": 3.1944, "theoretical_loss": 3.5488471885950625, "tokens_seen": 1351614464 }, { "epoch": 0.48, "learning_rate": 0.0002610669693530079, "loss": 3.2782, "theoretical_loss": 3.5485996884449076, "tokens_seen": 1352663040 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.4894540011882782, "objective/train/docs_used": 766515, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1192467212677, "objective/train/original_loss": 3.1192469596862793, "objective/train/theoretical_loss": 3.5484451255261353, "objective/train/tokens_used": 1373778400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24316319823265076, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501813888549805, "objective/train/weighted_lm_loss": 3.2756810188293457, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9519091248512268, "theoretical_loss": 3.5484451255261353, "tokens_seen": 1353318400 }, { "epoch": 0.48, "learning_rate": 0.00026087779038970867, "loss": 3.2537, "theoretical_loss": 3.5483524337545385, "tokens_seen": 1353711616 }, { "epoch": 0.48, "learning_rate": 0.00026068861142640936, "loss": 3.1763, "theoretical_loss": 3.5481054240906698, "tokens_seen": 1354760192 }, { "epoch": 0.48, "learning_rate": 0.0002604994324631101, "loss": 3.2387, "theoretical_loss": 3.547858659021117, "tokens_seen": 1355808768 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.4911956787109375, "objective/train/docs_used": 768508, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8986051082611084, "objective/train/original_loss": 2.8986048698425293, "objective/train/theoretical_loss": 3.5476737454746035, "objective/train/tokens_used": 1377055200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24308650195598602, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050355315208435, "objective/train/weighted_lm_loss": 3.0448904037475586, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9534386992454529, "theoretical_loss": 3.5476737454746035, "tokens_seen": 1356595200 }, { "epoch": 0.48, "learning_rate": 0.0002603102534998108, "loss": 3.2887, "theoretical_loss": 3.5476121381147894, "tokens_seen": 1356857344 }, { "epoch": 0.49, "learning_rate": 0.00026012107453651155, "loss": 3.2406, "theoretical_loss": 3.5473658609416896, "tokens_seen": 1357905920 }, { "epoch": 0.49, "learning_rate": 0.0002599318955732123, "loss": 3.2552, "theoretical_loss": 3.5471198270729083, "tokens_seen": 1358954496 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.4947124123573303, "objective/train/docs_used": 770636, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0518202781677246, "objective/train/original_loss": 3.0518202781677246, "objective/train/theoretical_loss": 3.5469047466869448, "objective/train/tokens_used": 1380332000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24551640450954437, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0507193803787231, "objective/train/weighted_lm_loss": 3.206749200820923, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9574251174926758, "theoretical_loss": 3.5469047466869448, "tokens_seen": 1359872000 }, { "epoch": 0.49, "learning_rate": 0.000259742716609913, "loss": 3.287, "theoretical_loss": 3.546874036080621, "tokens_seen": 1360003072 }, { "epoch": 0.49, "learning_rate": 0.00025955353764661374, "loss": 3.2775, "theoretical_loss": 3.5466284875380856, "tokens_seen": 1361051648 }, { "epoch": 0.49, "learning_rate": 0.00025936435868331443, "loss": 3.337, "theoretical_loss": 3.546383181019637, "tokens_seen": 1362100224 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.47492796182632446, "objective/train/docs_used": 772517, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1139206886291504, "objective/train/original_loss": 3.1139206886291504, "objective/train/theoretical_loss": 3.5461381161006846, "objective/train/tokens_used": 1383608800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23219169676303864, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486730337142944, "objective/train/weighted_lm_loss": 3.2662875652313232, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9535848498344421, "theoretical_loss": 3.5461381161006846, "tokens_seen": 1363148800 }, { "epoch": 0.49, "learning_rate": 0.0002591751797200151, "loss": 3.2782, "theoretical_loss": 3.5461381161006846, "tokens_seen": 1363148800 }, { "epoch": 0.49, "learning_rate": 0.0002589860007567159, "loss": 3.2394, "theoretical_loss": 3.5458932923577082, "tokens_seen": 1364197376 }, { "epoch": 0.49, "learning_rate": 0.00025879682179341657, "loss": 3.2557, "theoretical_loss": 3.5456487093682547, "tokens_seen": 1365245952 }, { "epoch": 0.49, "learning_rate": 0.00025860764283011726, "loss": 3.2417, "theoretical_loss": 3.5454043667109367, "tokens_seen": 1366294528 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.4928061366081238, "objective/train/docs_used": 774661, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.42317533493042, "objective/train/original_loss": 3.423175096511841, "objective/train/theoretical_loss": 3.545373840756179, "objective/train/tokens_used": 1386885600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24523727595806122, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505273342132568, "objective/train/weighted_lm_loss": 3.59568452835083, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.968864381313324, "theoretical_loss": 3.545373840756179, "tokens_seen": 1366425600 }, { "epoch": 0.49, "learning_rate": 0.000258418463866818, "loss": 3.3035, "theoretical_loss": 3.545160263965424, "tokens_seen": 1367343104 }, { "epoch": 0.49, "learning_rate": 0.0002582292849035187, "loss": 3.27, "theoretical_loss": 3.544916400712445, "tokens_seen": 1368391680 }, { "epoch": 0.49, "learning_rate": 0.00025804010594021945, "loss": 3.2883, "theoretical_loss": 3.5446727765337815, "tokens_seen": 1369440256 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.48920339345932007, "objective/train/docs_used": 776810, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.031377077102661, "objective/train/original_loss": 3.0313773155212402, "objective/train/theoretical_loss": 3.5446119077955673, "objective/train/tokens_used": 1390162400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24134649336338043, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050147294998169, "objective/train/weighted_lm_loss": 3.183500051498413, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9817801117897034, "theoretical_loss": 3.5446119077955673, "tokens_seen": 1369702400 }, { "epoch": 0.49, "learning_rate": 0.0002578509269769202, "loss": 3.2427, "theoretical_loss": 3.5444293910122644, "tokens_seen": 1370488832 }, { "epoch": 0.49, "learning_rate": 0.0002576617480136209, "loss": 3.2541, "theoretical_loss": 3.544186243731771, "tokens_seen": 1371537408 }, { "epoch": 0.49, "learning_rate": 0.00025747256905032164, "loss": 3.2502, "theoretical_loss": 3.5439433342772224, "tokens_seen": 1372585984 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.47569069266319275, "objective/train/docs_used": 778576, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1365725994110107, "objective/train/original_loss": 3.1365721225738525, "objective/train/theoretical_loss": 3.543852304461728, "objective/train/tokens_used": 1393439200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.233524888753891, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487558841705322, "objective/train/weighted_lm_loss": 3.2887704372406006, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9515125155448914, "theoretical_loss": 3.543852304461728, "tokens_seen": 1372979200 }, { "epoch": 0.49, "learning_rate": 0.00025728339008702233, "loss": 3.2909, "theoretical_loss": 3.5437006622345777, "tokens_seen": 1373634560 }, { "epoch": 0.49, "learning_rate": 0.0002570942111237231, "loss": 3.3048, "theoretical_loss": 3.5434582271908344, "tokens_seen": 1374683136 }, { "epoch": 0.49, "learning_rate": 0.0002569050321604238, "loss": 3.2858, "theoretical_loss": 3.5432160287340206, "tokens_seen": 1375731712 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.47667446732521057, "objective/train/docs_used": 780651, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3615801334381104, "objective/train/original_loss": 3.3615803718566895, "objective/train/theoretical_loss": 3.5430950180972527, "objective/train/tokens_used": 1396716000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2355356216430664, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488646030426025, "objective/train/weighted_lm_loss": 3.5261738300323486, "objective/train/weights_max": 1.0512200593948364, "objective/train/weights_min": 0.9540208578109741, "theoretical_loss": 3.5430950180972527, "tokens_seen": 1376256000 }, { "epoch": 0.49, "learning_rate": 0.00025671585319712447, "loss": 3.2503, "theoretical_loss": 3.542974066453195, "tokens_seen": 1376780288 }, { "epoch": 0.49, "learning_rate": 0.0002565266742338252, "loss": 3.2741, "theoretical_loss": 3.5427323399384427, "tokens_seen": 1377828864 }, { "epoch": 0.49, "learning_rate": 0.0002563374952705259, "loss": 3.3355, "theoretical_loss": 3.5424908487808704, "tokens_seen": 1378877440 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.490723192691803, "objective/train/docs_used": 781840, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0954983234405518, "objective/train/original_loss": 3.0954983234405518, "objective/train/theoretical_loss": 3.5423400361434307, "objective/train/tokens_used": 1399992800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24198147654533386, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503023862838745, "objective/train/weighted_lm_loss": 3.2512621879577637, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9551610946655273, "theoretical_loss": 3.5423400361434307, "tokens_seen": 1379532800 }, { "epoch": 0.49, "learning_rate": 0.0002561483163072266, "loss": 3.2322, "theoretical_loss": 3.542249592572605, "tokens_seen": 1379926016 }, { "epoch": 0.49, "learning_rate": 0.00025595913734392735, "loss": 3.2141, "theoretical_loss": 3.5420085709067894, "tokens_seen": 1380974592 }, { "epoch": 0.49, "learning_rate": 0.00025576995838062804, "loss": 3.21, "theoretical_loss": 3.541767783377579, "tokens_seen": 1382023168 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.46955326199531555, "objective/train/docs_used": 784187, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0273990631103516, "objective/train/original_loss": 3.0273988246917725, "objective/train/theoretical_loss": 3.541587346139247, "objective/train/tokens_used": 1403269600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23108002543449402, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481295585632324, "objective/train/weighted_lm_loss": 3.174248218536377, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9517195820808411, "theoretical_loss": 3.541587346139247, "tokens_seen": 1382809600 }, { "epoch": 0.49, "learning_rate": 0.00025558077941732884, "loss": 3.1799, "theoretical_loss": 3.5415272295801388, "tokens_seen": 1383071744 }, { "epoch": 0.49, "learning_rate": 0.00025539160045402954, "loss": 3.2792, "theoretical_loss": 3.5412869091106405, "tokens_seen": 1384120320 }, { "epoch": 0.49, "learning_rate": 0.00025520242149073023, "loss": 3.2038, "theoretical_loss": 3.541046821566258, "tokens_seen": 1385168896 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.47911426424980164, "objective/train/docs_used": 786072, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8968513011932373, "objective/train/original_loss": 2.8968517780303955, "objective/train/theoretical_loss": 3.540836935720394, "objective/train/tokens_used": 1406546400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23572920262813568, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049109697341919, "objective/train/weighted_lm_loss": 3.039515972137451, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9524866938591003, "theoretical_loss": 3.540836935720394, "tokens_seen": 1386086400 }, { "epoch": 0.5, "learning_rate": 0.000255013242527431, "loss": 3.2616, "theoretical_loss": 3.5408069665451656, "tokens_seen": 1386217472 }, { "epoch": 0.5, "learning_rate": 0.00025482406356413167, "loss": 3.1624, "theoretical_loss": 3.5405673436465332, "tokens_seen": 1387266048 }, { "epoch": 0.5, "learning_rate": 0.0002546348846008324, "loss": 3.2074, "theoretical_loss": 3.540327952470525, "tokens_seen": 1388314624 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.48018690943717957, "objective/train/docs_used": 787611, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.299957036972046, "objective/train/original_loss": 3.299956798553467, "objective/train/theoretical_loss": 3.5400887926182953, "objective/train/tokens_used": 1409823200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2368466556072235, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049222469329834, "objective/train/weighted_lm_loss": 3.462350845336914, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9544512629508972, "theoretical_loss": 3.5400887926182953, "tokens_seen": 1389363200 }, { "epoch": 0.5, "learning_rate": 0.0002544457056375331, "loss": 3.2096, "theoretical_loss": 3.5400887926182953, "tokens_seen": 1389363200 }, { "epoch": 0.5, "learning_rate": 0.0002542565266742338, "loss": 3.2465, "theoretical_loss": 3.5398498636919835, "tokens_seen": 1390411776 }, { "epoch": 0.5, "learning_rate": 0.00025406734771093455, "loss": 3.2359, "theoretical_loss": 3.5396111652947146, "tokens_seen": 1391460352 }, { "epoch": 0.5, "learning_rate": 0.00025387816874763525, "loss": 3.1666, "theoretical_loss": 3.539372697030594, "tokens_seen": 1392508928 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.4910789430141449, "objective/train/docs_used": 789336, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.141433000564575, "objective/train/original_loss": 3.1414332389831543, "objective/train/theoretical_loss": 3.5393429046591365, "objective/train/tokens_used": 1413100000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2442692369222641, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503495931625366, "objective/train/weighted_lm_loss": 3.2997946739196777, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9526776075363159, "theoretical_loss": 3.5393429046591365, "tokens_seen": 1392640000 }, { "epoch": 0.5, "learning_rate": 0.00025368898978433594, "loss": 3.3043, "theoretical_loss": 3.5391344585047024, "tokens_seen": 1393557504 }, { "epoch": 0.5, "learning_rate": 0.0002534998108210367, "loss": 3.2916, "theoretical_loss": 3.538896449323098, "tokens_seen": 1394606080 }, { "epoch": 0.5, "learning_rate": 0.00025331063185773744, "loss": 3.2459, "theoretical_loss": 3.538658669092807, "tokens_seen": 1395654656 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.46647822856903076, "objective/train/docs_used": 791369, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.493539810180664, "objective/train/original_loss": 3.493539810180664, "objective/train/theoretical_loss": 3.5385992597629174, "objective/train/tokens_used": 1416376800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2277674525976181, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478054285049438, "objective/train/weighted_lm_loss": 3.659994602203369, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9575470685958862, "theoretical_loss": 3.5385992597629174, "tokens_seen": 1395916800 }, { "epoch": 0.5, "learning_rate": 0.0002531214528944382, "loss": 3.2459, "theoretical_loss": 3.5384211174218247, "tokens_seen": 1396703232 }, { "epoch": 0.5, "learning_rate": 0.0002529322739311389, "loss": 3.2727, "theoretical_loss": 3.538183793919112, "tokens_seen": 1397751808 }, { "epoch": 0.5, "learning_rate": 0.00025274309496783957, "loss": 3.2219, "theoretical_loss": 3.53794669819459, "tokens_seen": 1398800384 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.4890250563621521, "objective/train/docs_used": 793176, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.05161190032959, "objective/train/original_loss": 3.05161190032959, "objective/train/theoretical_loss": 3.5378578459425087, "objective/train/tokens_used": 1419653600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24090653657913208, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501271486282349, "objective/train/weighted_lm_loss": 3.204907178878784, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9735680818557739, "theoretical_loss": 3.5378578459425087, "tokens_seen": 1399193600 }, { "epoch": 0.5, "learning_rate": 0.0002525539160045403, "loss": 3.2726, "theoretical_loss": 3.53770982985914, "tokens_seen": 1399848960 }, { "epoch": 0.5, "learning_rate": 0.000252364737041241, "loss": 3.2014, "theoretical_loss": 3.537473188524598, "tokens_seen": 1400897536 }, { "epoch": 0.5, "learning_rate": 0.00025217555807794176, "loss": 3.2634, "theoretical_loss": 3.5372367738037527, "tokens_seen": 1401946112 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.4814291298389435, "objective/train/docs_used": 795165, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7210984230041504, "objective/train/original_loss": 2.7210984230041504, "objective/train/theoretical_loss": 3.537118651302722, "objective/train/tokens_used": 1422930400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2377876341342926, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493515729904175, "objective/train/weighted_lm_loss": 2.8548381328582764, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9514068365097046, "theoretical_loss": 3.537118651302722, "tokens_seen": 1402470400 }, { "epoch": 0.5, "learning_rate": 0.00025198637911464245, "loss": 3.2325, "theoretical_loss": 3.5370005853103414, "tokens_seen": 1402994688 }, { "epoch": 0.5, "learning_rate": 0.00025179720015134315, "loss": 3.2092, "theoretical_loss": 3.5367646226590503, "tokens_seen": 1404043264 }, { "epoch": 0.5, "learning_rate": 0.0002516080211880439, "loss": 3.1997, "theoretical_loss": 3.536528885465506, "tokens_seen": 1405091840 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.48178187012672424, "objective/train/docs_used": 797297, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.338026285171509, "objective/train/original_loss": 3.3380260467529297, "objective/train/theoretical_loss": 3.536381664039392, "objective/train/tokens_used": 1426207200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24058951437473297, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049401044845581, "objective/train/weighted_lm_loss": 3.503382682800293, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9520537853240967, "theoretical_loss": 3.536381664039392, "tokens_seen": 1405747200 }, { "epoch": 0.5, "learning_rate": 0.0002514188422247446, "loss": 3.2556, "theoretical_loss": 3.536293373346278, "tokens_seen": 1406140416 }, { "epoch": 0.5, "learning_rate": 0.0002512296632614453, "loss": 3.1468, "theoretical_loss": 3.5360580859188713, "tokens_seen": 1407188992 }, { "epoch": 0.5, "learning_rate": 0.0002510404842981461, "loss": 3.1976, "theoretical_loss": 3.5358230228017264, "tokens_seen": 1408237568 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.4824129343032837, "objective/train/docs_used": 799421, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9747111797332764, "objective/train/original_loss": 2.9747114181518555, "objective/train/theoretical_loss": 3.5356468724384706, "objective/train/tokens_used": 1429484000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23822368681430817, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494521856307983, "objective/train/weighted_lm_loss": 3.1242523193359375, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9635123014450073, "theoretical_loss": 3.5356468724384706, "tokens_seen": 1409024000 }, { "epoch": 0.5, "learning_rate": 0.0002508513053348468, "loss": 3.1687, "theoretical_loss": 3.535588183614215, "tokens_seen": 1409286144 }, { "epoch": 0.5, "learning_rate": 0.0002506621263715475, "loss": 3.1706, "theoretical_loss": 3.535353567976637, "tokens_seen": 1410334720 }, { "epoch": 0.5, "learning_rate": 0.0002504729474082482, "loss": 3.1224, "theoretical_loss": 3.5351191755102187, "tokens_seen": 1411383296 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.48010411858558655, "objective/train/docs_used": 800610, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.90187406539917, "objective/train/original_loss": 2.901874542236328, "objective/train/theoretical_loss": 3.5349142648751304, "objective/train/tokens_used": 1432760800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23449444770812988, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492023229599, "objective/train/weighted_lm_loss": 3.0453240871429443, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9521341919898987, "theoretical_loss": 3.5349142648751304, "tokens_seen": 1412300800 }, { "epoch": 0.5, "learning_rate": 0.0002502837684449489, "loss": 3.2347, "theoretical_loss": 3.534885005837108, "tokens_seen": 1412431872 }, { "epoch": 0.5, "learning_rate": 0.00025009458948164966, "loss": 3.1638, "theoretical_loss": 3.5346510585803728, "tokens_seen": 1413480448 }, { "epoch": 0.51, "learning_rate": 0.00024990541051835035, "loss": 3.1473, "theoretical_loss": 3.534417333363997, "tokens_seen": 1414529024 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.48848357796669006, "objective/train/docs_used": 802603, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.191892623901367, "objective/train/original_loss": 3.191892623901367, "objective/train/theoretical_loss": 3.5341838298128803, "objective/train/tokens_used": 1436037600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24162043631076813, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500764846801758, "objective/train/weighted_lm_loss": 3.351858377456665, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9517599940299988, "theoretical_loss": 3.5341838298128803, "tokens_seen": 1415577600 }, { "epoch": 0.51, "learning_rate": 0.0002497162315550511, "loss": 3.2598, "theoretical_loss": 3.5341838298128803, "tokens_seen": 1415577600 }, { "epoch": 0.51, "learning_rate": 0.0002495270525917518, "loss": 3.2427, "theoretical_loss": 3.5339505475528314, "tokens_seen": 1416626176 }, { "epoch": 0.51, "learning_rate": 0.00024933787362845254, "loss": 3.1952, "theoretical_loss": 3.533717486210567, "tokens_seen": 1417674752 }, { "epoch": 0.51, "learning_rate": 0.00024914869466515323, "loss": 3.244, "theoretical_loss": 3.5334846454137114, "tokens_seen": 1418723328 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.4878292977809906, "objective/train/docs_used": 804391, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.043952465057373, "objective/train/original_loss": 3.043951988220215, "objective/train/theoretical_loss": 3.533455555802692, "objective/train/tokens_used": 1439314400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24175363779067993, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500118732452393, "objective/train/weighted_lm_loss": 3.195713996887207, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9565330743789673, "theoretical_loss": 3.533455555802692, "tokens_seen": 1418854400 }, { "epoch": 0.51, "learning_rate": 0.000248959515701854, "loss": 3.2038, "theoretical_loss": 3.533252024790788, "tokens_seen": 1419771904 }, { "epoch": 0.51, "learning_rate": 0.0002487703367385547, "loss": 3.2483, "theoretical_loss": 3.5330196239712217, "tokens_seen": 1420820480 }, { "epoch": 0.51, "learning_rate": 0.00024858115777525537, "loss": 3.1338, "theoretical_loss": 3.532787442585333, "tokens_seen": 1421869056 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.4747827351093292, "objective/train/docs_used": 806541, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.976189136505127, "objective/train/original_loss": 2.976189136505127, "objective/train/theoretical_loss": 3.5327294314821365, "objective/train/tokens_used": 1442591200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2333299070596695, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486640930175781, "objective/train/weighted_lm_loss": 3.120753049850464, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9516189098358154, "theoretical_loss": 3.5327294314821365, "tokens_seen": 1422131200 }, { "epoch": 0.51, "learning_rate": 0.0002483919788119561, "loss": 3.2056, "theoretical_loss": 3.5325554802643375, "tokens_seen": 1422917632 }, { "epoch": 0.51, "learning_rate": 0.00024820279984865686, "loss": 3.2624, "theoretical_loss": 3.5323237366403397, "tokens_seen": 1423966208 }, { "epoch": 0.51, "learning_rate": 0.00024801362088535756, "loss": 3.2228, "theoretical_loss": 3.532092211346335, "tokens_seen": 1425014784 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.491454541683197, "objective/train/docs_used": 808636, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2102763652801514, "objective/train/original_loss": 3.2102766036987305, "objective/train/theoretical_loss": 3.5320054455745304, "objective/train/tokens_used": 1445868000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24370373785495758, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503844022750854, "objective/train/weighted_lm_loss": 3.3718581199645996, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9566341042518616, "theoretical_loss": 3.5320054455745304, "tokens_seen": 1425408000 }, { "epoch": 0.51, "learning_rate": 0.00024782444192205825, "loss": 3.1844, "theoretical_loss": 3.5318609040162015, "tokens_seen": 1426063360 }, { "epoch": 0.51, "learning_rate": 0.000247635262958759, "loss": 3.1546, "theoretical_loss": 3.5316298142847016, "tokens_seen": 1427111936 }, { "epoch": 0.51, "learning_rate": 0.0002474460839954597, "loss": 3.2048, "theoretical_loss": 3.5313989417874763, "tokens_seen": 1428160512 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.4886825978755951, "objective/train/docs_used": 810397, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9854471683502197, "objective/train/original_loss": 2.985447406768799, "objective/train/theoretical_loss": 3.5312835868880947, "objective/train/tokens_used": 1449144800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24093887209892273, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500929355621338, "objective/train/weighted_lm_loss": 3.1349117755889893, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9605134129524231, "theoretical_loss": 3.5312835868880947, "tokens_seen": 1428684800 }, { "epoch": 0.51, "learning_rate": 0.00024725690503216044, "loss": 3.2367, "theoretical_loss": 3.5311682861610447, "tokens_seen": 1429209088 }, { "epoch": 0.51, "learning_rate": 0.00024706772606886113, "loss": 3.2155, "theoretical_loss": 3.5309378470427997, "tokens_seen": 1430257664 }, { "epoch": 0.51, "learning_rate": 0.0002468785471055619, "loss": 3.1929, "theoretical_loss": 3.530707624071006, "tokens_seen": 1431306240 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.4831509590148926, "objective/train/docs_used": 812373, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3620550632476807, "objective/train/original_loss": 3.3620550632476807, "objective/train/theoretical_loss": 3.530563844315122, "objective/train/tokens_used": 1452421600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23847836256027222, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495271682739258, "objective/train/weighted_lm_loss": 3.5282113552093506, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9517561793327332, "theoretical_loss": 3.530563844315122, "tokens_seen": 1431961600 }, { "epoch": 0.51, "learning_rate": 0.0002466893681422626, "loss": 3.2089, "theoretical_loss": 3.5304776168847964, "tokens_seen": 1432354816 }, { "epoch": 0.51, "learning_rate": 0.0002465001891789633, "loss": 3.1689, "theoretical_loss": 3.530247825124171, "tokens_seen": 1433403392 }, { "epoch": 0.51, "learning_rate": 0.000246311010215664, "loss": 3.1184, "theoretical_loss": 3.530018248429992, "tokens_seen": 1434451968 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.49197354912757874, "objective/train/docs_used": 813976, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9489939212799072, "objective/train/original_loss": 2.9489941596984863, "objective/train/theoretical_loss": 3.5298462068311554, "objective/train/tokens_used": 1455698400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24280238151550293, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504316091537476, "objective/train/weighted_lm_loss": 3.0980122089385986, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9728739857673645, "theoretical_loss": 3.5298462068311554, "tokens_seen": 1435238400 }, { "epoch": 0.51, "learning_rate": 0.0002461218312523647, "loss": 3.1976, "theoretical_loss": 3.529788886443983, "tokens_seen": 1435500544 }, { "epoch": 0.51, "learning_rate": 0.00024593265228906546, "loss": 3.0743, "theoretical_loss": 3.529559738808726, "tokens_seen": 1436549120 }, { "epoch": 0.51, "learning_rate": 0.0002457434733257662, "loss": 3.1564, "theoretical_loss": 3.5293308051676573, "tokens_seen": 1437597696 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.4857310950756073, "objective/train/docs_used": 815908, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9717392921447754, "objective/train/original_loss": 2.9717395305633545, "objective/train/theoretical_loss": 3.5291306634941737, "objective/train/tokens_used": 1458975200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24036578834056854, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497949123382568, "objective/train/weighted_lm_loss": 3.1189889907836914, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9525620937347412, "theoretical_loss": 3.5291306634941737, "tokens_seen": 1438515200 }, { "epoch": 0.51, "learning_rate": 0.0002455542943624669, "loss": 3.1752, "theoretical_loss": 3.529102085165067, "tokens_seen": 1438646272 }, { "epoch": 0.51, "learning_rate": 0.0002453651153991676, "loss": 3.1295, "theoretical_loss": 3.5288735784460936, "tokens_seen": 1439694848 }, { "epoch": 0.51, "learning_rate": 0.00024517593643586834, "loss": 3.1535, "theoretical_loss": 3.5286452846567244, "tokens_seen": 1440743424 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.4737866222858429, "objective/train/docs_used": 817781, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.049960136413574, "objective/train/original_loss": 3.049960136413574, "objective/train/theoretical_loss": 3.528417203443791, "objective/train/tokens_used": 1462252000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23372094333171844, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485665798187256, "objective/train/weighted_lm_loss": 3.1971216201782227, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9524149298667908, "theoretical_loss": 3.528417203443791, "tokens_seen": 1441792000 }, { "epoch": 0.51, "learning_rate": 0.00024498675747256903, "loss": 3.1712, "theoretical_loss": 3.528417203443791, "tokens_seen": 1441792000 }, { "epoch": 0.52, "learning_rate": 0.0002447975785092698, "loss": 3.1211, "theoretical_loss": 3.528189334454967, "tokens_seen": 1442840576 }, { "epoch": 0.52, "learning_rate": 0.00024460839954597053, "loss": 3.1512, "theoretical_loss": 3.527961677338765, "tokens_seen": 1443889152 }, { "epoch": 0.52, "learning_rate": 0.0002444192205826712, "loss": 3.1175, "theoretical_loss": 3.5277342317445353, "tokens_seen": 1444937728 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.4741077423095703, "objective/train/docs_used": 819121, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9588849544525146, "objective/train/original_loss": 2.9588847160339355, "objective/train/theoretical_loss": 3.5277058159004637, "objective/train/tokens_used": 1465528800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24014198780059814, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048631191253662, "objective/train/weighted_lm_loss": 3.10211443901062, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9528647065162659, "theoretical_loss": 3.5277058159004637, "tokens_seen": 1445068800 }, { "epoch": 0.52, "learning_rate": 0.0002442300416193719, "loss": 3.1457, "theoretical_loss": 3.5275069973224618, "tokens_seen": 1445986304 }, { "epoch": 0.52, "learning_rate": 0.00024404086265607263, "loss": 3.1661, "theoretical_loss": 3.527279973723561, "tokens_seen": 1447034880 }, { "epoch": 0.52, "learning_rate": 0.00024385168369277336, "loss": 3.2027, "theoretical_loss": 3.5270531605996767, "tokens_seen": 1448083456 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.47648268938064575, "objective/train/docs_used": 821111, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1611850261688232, "objective/train/original_loss": 3.1611852645874023, "objective/train/theoretical_loss": 3.5269964901647066, "objective/train/tokens_used": 1468805600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23558388650417328, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048845648765564, "objective/train/weighted_lm_loss": 3.3149309158325195, "objective/train/weights_max": 1.0512150526046753, "objective/train/weights_min": 0.9515277147293091, "theoretical_loss": 3.5269964901647066, "tokens_seen": 1448345600 }, { "epoch": 0.52, "learning_rate": 0.0002436625047294741, "loss": 3.1585, "theoretical_loss": 3.5268265576034805, "tokens_seen": 1449132032 }, { "epoch": 0.52, "learning_rate": 0.00024347332576617482, "loss": 3.1574, "theoretical_loss": 3.5266001643884684, "tokens_seen": 1450180608 }, { "epoch": 0.52, "learning_rate": 0.00024328414680287552, "loss": 3.1439, "theoretical_loss": 3.526373980608957, "tokens_seen": 1451229184 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.4845767915248871, "objective/train/docs_used": 823345, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.209475517272949, "objective/train/original_loss": 3.209475517272949, "objective/train/theoretical_loss": 3.526289215616317, "objective/train/tokens_used": 1472082400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24037227034568787, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496795177459717, "objective/train/weighted_lm_loss": 3.3680312633514404, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9529867768287659, "theoretical_loss": 3.526289215616317, "tokens_seen": 1451622400 }, { "epoch": 0.52, "learning_rate": 0.00024309496783957624, "loss": 3.1728, "theoretical_loss": 3.5261480059200814, "tokens_seen": 1452277760 }, { "epoch": 0.52, "learning_rate": 0.00024290578887627696, "loss": 3.156, "theoretical_loss": 3.5259222399777945, "tokens_seen": 1453326336 }, { "epoch": 0.52, "learning_rate": 0.00024271660991297768, "loss": 3.1253, "theoretical_loss": 3.525696682438861, "tokens_seen": 1454374912 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.47921082377433777, "objective/train/docs_used": 825270, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7915778160095215, "objective/train/original_loss": 2.7915778160095215, "objective/train/theoretical_loss": 3.525583981713613, "objective/train/tokens_used": 1475359200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23409366607666016, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491108894348145, "objective/train/weighted_lm_loss": 2.9294707775115967, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9529442191123962, "theoretical_loss": 3.525583981713613, "tokens_seen": 1454899200 }, { "epoch": 0.52, "learning_rate": 0.0002425274309496784, "loss": 3.145, "theoretical_loss": 3.5254713329608585, "tokens_seen": 1455423488 }, { "epoch": 0.52, "learning_rate": 0.00024233825198637912, "loss": 3.1236, "theoretical_loss": 3.5252461912021733, "tokens_seen": 1456472064 }, { "epoch": 0.52, "learning_rate": 0.00024214907302307984, "loss": 3.12, "theoretical_loss": 3.525021256821997, "tokens_seen": 1457520640 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.4787052273750305, "objective/train/docs_used": 827227, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9800362586975098, "objective/train/original_loss": 2.9800362586975098, "objective/train/theoretical_loss": 3.524880777992677, "objective/train/tokens_used": 1478636000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23406291007995605, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490602254867554, "objective/train/weighted_lm_loss": 3.127027750015259, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9527052044868469, "theoretical_loss": 3.524880777992677, "tokens_seen": 1458176000 }, { "epoch": 0.52, "learning_rate": 0.00024195989405978056, "loss": 3.15, "theoretical_loss": 3.5247965294803265, "tokens_seen": 1458569216 }, { "epoch": 0.52, "learning_rate": 0.00024177071509648128, "loss": 3.1198, "theoretical_loss": 3.524572008837958, "tokens_seen": 1459617792 }, { "epoch": 0.52, "learning_rate": 0.00024158153613318197, "loss": 3.208, "theoretical_loss": 3.5243476945564893, "tokens_seen": 1460666368 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.4924372732639313, "objective/train/docs_used": 829193, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1463100910186768, "objective/train/original_loss": 3.1463098526000977, "objective/train/theoretical_loss": 3.524179594066606, "objective/train/tokens_used": 1481912800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24343392252922058, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504813194274902, "objective/train/weighted_lm_loss": 3.304593086242676, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9679165482521057, "theoretical_loss": 3.524179594066606, "tokens_seen": 1461452800 }, { "epoch": 0.52, "learning_rate": 0.00024139235716988272, "loss": 3.0859, "theoretical_loss": 3.524123586298312, "tokens_seen": 1461714944 }, { "epoch": 0.52, "learning_rate": 0.00024120317820658344, "loss": 3.1743, "theoretical_loss": 3.5238996837266137, "tokens_seen": 1462763520 }, { "epoch": 0.52, "learning_rate": 0.00024101399924328416, "loss": 3.1895, "theoretical_loss": 3.5236759865053724, "tokens_seen": 1463812096 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.45739272236824036, "objective/train/docs_used": 831131, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.861849546432495, "objective/train/original_loss": 2.861849784851074, "objective/train/theoretical_loss": 3.5234804196247764, "objective/train/tokens_used": 1485189600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23027275502681732, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.046909213066101, "objective/train/weighted_lm_loss": 3.000051736831665, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9622508883476257, "theoretical_loss": 3.5234804196247764, "tokens_seen": 1464729600 }, { "epoch": 0.52, "learning_rate": 0.00024082482027998486, "loss": 3.1312, "theoretical_loss": 3.523452494299356, "tokens_seen": 1464860672 }, { "epoch": 0.52, "learning_rate": 0.00024063564131668558, "loss": 3.2132, "theoretical_loss": 3.5232292067741176, "tokens_seen": 1465909248 }, { "epoch": 0.52, "learning_rate": 0.0002404464623533863, "loss": 3.165, "theoretical_loss": 3.523006123595997, "tokens_seen": 1466957824 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.4719890356063843, "objective/train/docs_used": 832511, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2662267684936523, "objective/train/original_loss": 3.2662272453308105, "objective/train/theoretical_loss": 3.522783244432115, "objective/train/tokens_used": 1488466400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2407318353652954, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04842209815979, "objective/train/weighted_lm_loss": 3.422483205795288, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9514791965484619, "theoretical_loss": 3.522783244432115, "tokens_seen": 1468006400 }, { "epoch": 0.52, "learning_rate": 0.00024025728339008705, "loss": 3.1954, "theoretical_loss": 3.522783244432115, "tokens_seen": 1468006400 }, { "epoch": 0.52, "learning_rate": 0.00024006810442678777, "loss": 3.2216, "theoretical_loss": 3.52256056895037, "tokens_seen": 1469054976 }, { "epoch": 0.53, "learning_rate": 0.00023987892546348846, "loss": 3.1763, "theoretical_loss": 3.5223380968194404, "tokens_seen": 1470103552 }, { "epoch": 0.53, "learning_rate": 0.00023968974650018918, "loss": 3.1598, "theoretical_loss": 3.522115827708778, "tokens_seen": 1471152128 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.492140531539917, "objective/train/docs_used": 834109, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.772519588470459, "objective/train/original_loss": 2.772519588470459, "objective/train/theoretical_loss": 3.522088058328375, "objective/train/tokens_used": 1491743200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2431326061487198, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050450086593628, "objective/train/weighted_lm_loss": 2.913102626800537, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9724238514900208, "theoretical_loss": 3.522088058328375, "tokens_seen": 1471283200 }, { "epoch": 0.53, "learning_rate": 0.0002395005675368899, "loss": 3.1937, "theoretical_loss": 3.5218937612886068, "tokens_seen": 1472200704 }, { "epoch": 0.53, "learning_rate": 0.00023931138857359062, "loss": 3.1715, "theoretical_loss": 3.521671897229922, "tokens_seen": 1473249280 }, { "epoch": 0.53, "learning_rate": 0.00023912220961029134, "loss": 3.1673, "theoretical_loss": 3.521450235204485, "tokens_seen": 1474297856 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.4695150852203369, "objective/train/docs_used": 835915, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.096074342727661, "objective/train/original_loss": 3.096074104309082, "objective/train/theoretical_loss": 3.521394851227428, "objective/train/tokens_used": 1495020000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23337149620056152, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481371879577637, "objective/train/weighted_lm_loss": 3.2463152408599854, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9514370560646057, "theoretical_loss": 3.521394851227428, "tokens_seen": 1474560000 }, { "epoch": 0.53, "learning_rate": 0.00023893303064699206, "loss": 3.2446, "theoretical_loss": 3.521228774884823, "tokens_seen": 1475346432 }, { "epoch": 0.53, "learning_rate": 0.00023874385168369278, "loss": 3.2234, "theoretical_loss": 3.521007515944228, "tokens_seen": 1476395008 }, { "epoch": 0.53, "learning_rate": 0.0002385546727203935, "loss": 3.1922, "theoretical_loss": 3.52078645805675, "tokens_seen": 1477443584 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.49413806200027466, "objective/train/docs_used": 837471, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.045189380645752, "objective/train/original_loss": 3.04518985748291, "objective/train/theoretical_loss": 3.5207036131165568, "objective/train/tokens_used": 1498296800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24563145637512207, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506623983383179, "objective/train/weighted_lm_loss": 3.1994035243988037, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9573822617530823, "theoretical_loss": 3.5207036131165568, "tokens_seen": 1477836800 }, { "epoch": 0.53, "learning_rate": 0.0002383654937570942, "loss": 3.1572, "theoretical_loss": 3.5205656008972, "tokens_seen": 1478492160 }, { "epoch": 0.53, "learning_rate": 0.00023817631479379492, "loss": 3.1917, "theoretical_loss": 3.5203449441411423, "tokens_seen": 1479540736 }, { "epoch": 0.53, "learning_rate": 0.00023798713583049567, "loss": 3.1977, "theoretical_loss": 3.5201244874648983, "tokens_seen": 1480589312 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.4763402044773102, "objective/train/docs_used": 839384, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.983619213104248, "objective/train/original_loss": 2.983618974685669, "objective/train/theoretical_loss": 3.5200143340557615, "objective/train/tokens_used": 1501573600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23397092521190643, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488231182098389, "objective/train/weighted_lm_loss": 3.1282737255096436, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9571855068206787, "theoretical_loss": 3.5200143340557615, "tokens_seen": 1481113600 }, { "epoch": 0.53, "learning_rate": 0.00023779795686719639, "loss": 3.2497, "theoretical_loss": 3.519904230545538, "tokens_seen": 1481637888 }, { "epoch": 0.53, "learning_rate": 0.0002376087779038971, "loss": 3.1995, "theoretical_loss": 3.5196841730608828, "tokens_seen": 1482686464 }, { "epoch": 0.53, "learning_rate": 0.0002374195989405978, "loss": 3.2798, "theoretical_loss": 3.5194643146895, "tokens_seen": 1483735040 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.48236167430877686, "objective/train/docs_used": 841281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0101518630981445, "objective/train/original_loss": 3.0101518630981445, "objective/train/theoretical_loss": 3.5193270041770703, "objective/train/tokens_used": 1504850400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23790954053401947, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494455099105835, "objective/train/weighted_lm_loss": 3.15853214263916, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9519292712211609, "theoretical_loss": 3.5193270041770703, "tokens_seen": 1484390400 }, { "epoch": 0.53, "learning_rate": 0.00023723041997729852, "loss": 3.2623, "theoretical_loss": 3.5192446551107017, "tokens_seen": 1484783616 }, { "epoch": 0.53, "learning_rate": 0.00023704124101399924, "loss": 3.2571, "theoretical_loss": 3.519025194004543, "tokens_seen": 1485832192 }, { "epoch": 0.53, "learning_rate": 0.0002368520620507, "loss": 3.1932, "theoretical_loss": 3.518805931051819, "tokens_seen": 1486880768 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.47434303164482117, "objective/train/docs_used": 843256, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2128210067749023, "objective/train/original_loss": 3.2128210067749023, "objective/train/theoretical_loss": 3.518641613683862, "objective/train/tokens_used": 1508127200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2341526299715042, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486242771148682, "objective/train/weighted_lm_loss": 3.368643045425415, "objective/train/weights_max": 1.0512194633483887, "objective/train/weights_min": 0.9513967037200928, "theoretical_loss": 3.518641613683862, "tokens_seen": 1487667200 }, { "epoch": 0.53, "learning_rate": 0.00023666288308740068, "loss": 3.2328, "theoretical_loss": 3.5185868659340627, "tokens_seen": 1487929344 }, { "epoch": 0.53, "learning_rate": 0.0002364737041241014, "loss": 3.2408, "theoretical_loss": 3.518367998333543, "tokens_seen": 1488977920 }, { "epoch": 0.53, "learning_rate": 0.00023628452516080212, "loss": 3.1762, "theoretical_loss": 3.518149327933262, "tokens_seen": 1490026496 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.4800264835357666, "objective/train/docs_used": 844947, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1356568336486816, "objective/train/original_loss": 3.1356565952301025, "objective/train/theoretical_loss": 3.517958152850192, "objective/train/tokens_used": 1511404000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2361105978488922, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049202799797058, "objective/train/weighted_lm_loss": 3.289992332458496, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9541196823120117, "theoretical_loss": 3.517958152850192, "tokens_seen": 1490944000 }, { "epoch": 0.53, "learning_rate": 0.00023609534619750284, "loss": 3.321, "theoretical_loss": 3.5179308544169543, "tokens_seen": 1491075072 }, { "epoch": 0.53, "learning_rate": 0.00023590616723420354, "loss": 3.2112, "theoretical_loss": 3.5177125774690827, "tokens_seen": 1492123648 }, { "epoch": 0.53, "learning_rate": 0.00023571698827090428, "loss": 3.2648, "theoretical_loss": 3.517494496774837, "tokens_seen": 1493172224 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.4808512330055237, "objective/train/docs_used": 846677, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1397337913513184, "objective/train/original_loss": 3.1397337913513184, "objective/train/theoretical_loss": 3.517276612020132, "objective/train/tokens_used": 1514680800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23949705064296722, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049302339553833, "objective/train/weighted_lm_loss": 3.2939443588256836, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9525419473648071, "theoretical_loss": 3.517276612020132, "tokens_seen": 1494220800 }, { "epoch": 0.53, "learning_rate": 0.000235527809307605, "loss": 3.257, "theoretical_loss": 3.517276612020132, "tokens_seen": 1494220800 }, { "epoch": 0.53, "learning_rate": 0.00023533863034430573, "loss": 3.2537, "theoretical_loss": 3.5170589228916054, "tokens_seen": 1495269376 }, { "epoch": 0.53, "learning_rate": 0.00023514945138100645, "loss": 3.2567, "theoretical_loss": 3.516841429076615, "tokens_seen": 1496317952 }, { "epoch": 0.53, "learning_rate": 0.00023496027241770714, "loss": 3.206, "theoretical_loss": 3.516624130263237, "tokens_seen": 1497366528 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.4818221628665924, "objective/train/docs_used": 848503, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1971991062164307, "objective/train/original_loss": 3.1971988677978516, "objective/train/theoretical_loss": 3.5165969816071083, "objective/train/tokens_used": 1517957600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24132023751735687, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494087934494019, "objective/train/weighted_lm_loss": 3.355829954147339, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9549484848976135, "theoretical_loss": 3.5165969816071083, "tokens_seen": 1497497600 }, { "epoch": 0.54, "learning_rate": 0.00023477109345440786, "loss": 3.2779, "theoretical_loss": 3.5164070261402633, "tokens_seen": 1498415104 }, { "epoch": 0.54, "learning_rate": 0.0002345819144911086, "loss": 3.2636, "theoretical_loss": 3.516190116397201, "tokens_seen": 1499463680 }, { "epoch": 0.54, "learning_rate": 0.00023439273552780933, "loss": 3.2725, "theoretical_loss": 3.5159734007242682, "tokens_seen": 1500512256 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.4783119261264801, "objective/train/docs_used": 850745, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.984816312789917, "objective/train/original_loss": 2.984816312789917, "objective/train/theoretical_loss": 3.5159192520932576, "objective/train/tokens_used": 1521234400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23693495988845825, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490354299545288, "objective/train/weighted_lm_loss": 3.1317691802978516, "objective/train/weights_max": 1.0512199401855469, "objective/train/weights_min": 0.95163893699646, "theoretical_loss": 3.5159192520932576, "tokens_seen": 1500774400 }, { "epoch": 0.54, "learning_rate": 0.00023420355656451002, "loss": 3.2283, "theoretical_loss": 3.5157568788123923, "tokens_seen": 1501560832 }, { "epoch": 0.54, "learning_rate": 0.00023401437760121074, "loss": 3.1501, "theoretical_loss": 3.515540550353209, "tokens_seen": 1502609408 }, { "epoch": 0.54, "learning_rate": 0.00023382519863791146, "loss": 3.2351, "theoretical_loss": 3.5153244150390597, "tokens_seen": 1503657984 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.48390939831733704, "objective/train/docs_used": 852130, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0634584426879883, "objective/train/original_loss": 3.0634589195251465, "objective/train/theoretical_loss": 3.515243414028785, "objective/train/tokens_used": 1524511200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24005842208862305, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496110916137695, "objective/train/weighted_lm_loss": 3.215369462966919, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.954659104347229, "theoretical_loss": 3.515243414028785, "tokens_seen": 1504051200 }, { "epoch": 0.54, "learning_rate": 0.00023363601967461218, "loss": 3.1928, "theoretical_loss": 3.5151084725629884, "tokens_seen": 1504706560 }, { "epoch": 0.54, "learning_rate": 0.00023344684071131293, "loss": 3.1693, "theoretical_loss": 3.5148927226187405, "tokens_seen": 1505755136 }, { "epoch": 0.54, "learning_rate": 0.00023325766174801362, "loss": 3.2186, "theoretical_loss": 3.514677164900762, "tokens_seen": 1506803712 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.47938430309295654, "objective/train/docs_used": 853500, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.098717451095581, "objective/train/original_loss": 3.0987181663513184, "objective/train/theoretical_loss": 3.5145694580313287, "objective/train/tokens_used": 1527788000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23806166648864746, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049148440361023, "objective/train/weighted_lm_loss": 3.24904203414917, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9516683220863342, "theoretical_loss": 3.5145694580313287, "tokens_seen": 1507328000 }, { "epoch": 0.54, "learning_rate": 0.00023306848278471435, "loss": 3.2394, "theoretical_loss": 3.514461799104195, "tokens_seen": 1507852288 }, { "epoch": 0.54, "learning_rate": 0.00023287930382141507, "loss": 3.2555, "theoretical_loss": 3.5142466249248754, "tokens_seen": 1508900864 }, { "epoch": 0.54, "learning_rate": 0.00023269012485811579, "loss": 3.2389, "theoretical_loss": 3.5140316420593347, "tokens_seen": 1509949440 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.48630884289741516, "objective/train/docs_used": 854981, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7976765632629395, "objective/train/original_loss": 2.7976765632629395, "objective/train/theoretical_loss": 3.5138973747853353, "objective/train/tokens_used": 1531064800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23877815902233124, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04984450340271, "objective/train/weighted_lm_loss": 2.9376676082611084, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9745818972587585, "theoretical_loss": 3.5138973747853353, "tokens_seen": 1510604800 }, { "epoch": 0.54, "learning_rate": 0.00023250094589481648, "loss": 3.2002, "theoretical_loss": 3.513816850204793, "tokens_seen": 1510998016 }, { "epoch": 0.54, "learning_rate": 0.00023231176693151723, "loss": 3.2134, "theoretical_loss": 3.5136022490591605, "tokens_seen": 1512046592 }, { "epoch": 0.54, "learning_rate": 0.00023212258796821795, "loss": 3.2219, "theoretical_loss": 3.5133878383210337, "tokens_seen": 1513095168 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.4808279871940613, "objective/train/docs_used": 857043, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.932635545730591, "objective/train/original_loss": 2.93263578414917, "objective/train/theoretical_loss": 3.513227155041438, "objective/train/tokens_used": 1534341600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23708273470401764, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492877960205078, "objective/train/weighted_lm_loss": 3.0757665634155273, "objective/train/weights_max": 1.0512207746505737, "objective/train/weights_min": 0.952289342880249, "theoretical_loss": 3.513227155041438, "tokens_seen": 1513881600 }, { "epoch": 0.54, "learning_rate": 0.00023193340900491867, "loss": 3.2437, "theoretical_loss": 3.513173617689695, "tokens_seen": 1514143744 }, { "epoch": 0.54, "learning_rate": 0.00023174423004161936, "loss": 3.2108, "theoretical_loss": 3.512959586865108, "tokens_seen": 1515192320 }, { "epoch": 0.54, "learning_rate": 0.00023155505107832008, "loss": 3.2642, "theoretical_loss": 3.512745745547918, "tokens_seen": 1516240896 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.4857465922832489, "objective/train/docs_used": 859161, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.04585599899292, "objective/train/original_loss": 3.04585599899292, "objective/train/theoretical_loss": 3.5125587896158477, "objective/train/tokens_used": 1537618400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2411489188671112, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498003959655762, "objective/train/weighted_lm_loss": 3.1971046924591064, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9541661739349365, "theoretical_loss": 3.5125587896158477, "tokens_seen": 1517158400 }, { "epoch": 0.54, "learning_rate": 0.0002313658721150208, "loss": 3.1957, "theoretical_loss": 3.5125320934394484, "tokens_seen": 1517289472 }, { "epoch": 0.54, "learning_rate": 0.00023117669315172155, "loss": 3.2254, "theoretical_loss": 3.5123186302417007, "tokens_seen": 1518338048 }, { "epoch": 0.54, "learning_rate": 0.00023098751418842227, "loss": 3.189, "theoretical_loss": 3.51210535565735, "tokens_seen": 1519386624 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.4926300644874573, "objective/train/docs_used": 861286, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.171475887298584, "objective/train/original_loss": 3.171476125717163, "objective/train/theoretical_loss": 3.511892269389743, "objective/train/tokens_used": 1540895200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2442355751991272, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505046844482422, "objective/train/weighted_lm_loss": 3.3321945667266846, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9602129459381104, "theoretical_loss": 3.511892269389743, "tokens_seen": 1520435200 }, { "epoch": 0.54, "learning_rate": 0.00023079833522512296, "loss": 3.1388, "theoretical_loss": 3.511892269389743, "tokens_seen": 1520435200 }, { "epoch": 0.54, "learning_rate": 0.00023060915626182369, "loss": 3.1666, "theoretical_loss": 3.5116793711429004, "tokens_seen": 1521483776 }, { "epoch": 0.54, "learning_rate": 0.0002304199772985244, "loss": 3.1639, "theoretical_loss": 3.511466660621508, "tokens_seen": 1522532352 }, { "epoch": 0.54, "learning_rate": 0.00023023079833522513, "loss": 3.2193, "theoretical_loss": 3.5112541375309214, "tokens_seen": 1523580928 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.48999133706092834, "objective/train/docs_used": 863379, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7391910552978516, "objective/train/original_loss": 2.7391910552978516, "objective/train/theoretical_loss": 3.511227585308678, "objective/train/tokens_used": 1544172000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24297131597995758, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050234317779541, "objective/train/weighted_lm_loss": 2.8769237995147705, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9612780213356018, "theoretical_loss": 3.511227585308678, "tokens_seen": 1523712000 }, { "epoch": 0.54, "learning_rate": 0.00023004161937192585, "loss": 3.2179, "theoretical_loss": 3.511041801577159, "tokens_seen": 1524629504 }, { "epoch": 0.54, "learning_rate": 0.00022985244040862657, "loss": 3.2151, "theoretical_loss": 3.510829652466904, "tokens_seen": 1525678080 }, { "epoch": 0.55, "learning_rate": 0.0002296632614453273, "loss": 3.2204, "theoretical_loss": 3.5106176899074972, "tokens_seen": 1526726656 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.4816513955593109, "objective/train/docs_used": 865312, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0186238288879395, "objective/train/original_loss": 3.0186238288879395, "objective/train/theoretical_loss": 3.510564728381983, "objective/train/tokens_used": 1547448800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23675787448883057, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493686199188232, "objective/train/weighted_lm_loss": 3.1666901111602783, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9688776731491089, "theoretical_loss": 3.510564728381983, "tokens_seen": 1526988800 }, { "epoch": 0.55, "learning_rate": 0.000229474082482028, "loss": 3.1873, "theoretical_loss": 3.510405913606943, "tokens_seen": 1527775232 }, { "epoch": 0.55, "learning_rate": 0.00022928490351872873, "loss": 3.2087, "theoretical_loss": 3.510194323273899, "tokens_seen": 1528823808 }, { "epoch": 0.55, "learning_rate": 0.00022909572455542942, "loss": 3.153, "theoretical_loss": 3.5099829186176796, "tokens_seen": 1529872384 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.494413286447525, "objective/train/docs_used": 867418, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9637973308563232, "objective/train/original_loss": 2.9637973308563232, "objective/train/theoretical_loss": 3.5099036896821874, "objective/train/tokens_used": 1550725600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24574802815914154, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506906509399414, "objective/train/weighted_lm_loss": 3.114089250564575, "objective/train/weights_max": 1.0512158870697021, "objective/train/weights_min": 0.9844887256622314, "theoretical_loss": 3.5099036896821874, "tokens_seen": 1530265600 }, { "epoch": 0.55, "learning_rate": 0.00022890654559213017, "loss": 3.1477, "theoretical_loss": 3.509771699348253, "tokens_seen": 1530920960 }, { "epoch": 0.55, "learning_rate": 0.0002287173666288309, "loss": 3.1358, "theoretical_loss": 3.5095606651762368, "tokens_seen": 1531969536 }, { "epoch": 0.55, "learning_rate": 0.0002285281876655316, "loss": 3.1609, "theoretical_loss": 3.5093498158128997, "tokens_seen": 1533018112 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.4862027168273926, "objective/train/docs_used": 869264, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.031954526901245, "objective/train/original_loss": 3.031954526901245, "objective/train/theoretical_loss": 3.5092444603444344, "objective/train/tokens_used": 1554002400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2403550148010254, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498418807983398, "objective/train/weighted_lm_loss": 3.1832399368286133, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9602664709091187, "theoretical_loss": 3.5092444603444344, "tokens_seen": 1533542400 }, { "epoch": 0.55, "learning_rate": 0.0002283390087022323, "loss": 3.1915, "theoretical_loss": 3.509139150970157, "tokens_seen": 1534066688 }, { "epoch": 0.55, "learning_rate": 0.00022814982973893303, "loss": 3.1134, "theoretical_loss": 3.5089286703605698, "tokens_seen": 1535115264 }, { "epoch": 0.55, "learning_rate": 0.00022796065077563375, "loss": 3.2115, "theoretical_loss": 3.5087183736973437, "tokens_seen": 1536163840 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.4853838384151459, "objective/train/docs_used": 871030, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.866541624069214, "objective/train/original_loss": 2.866541862487793, "objective/train/theoretical_loss": 3.5085870315659133, "objective/train/tokens_used": 1557279200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23986515402793884, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497575998306274, "objective/train/weighted_lm_loss": 3.0102427005767822, "objective/train/weights_max": 1.051215410232544, "objective/train/weights_min": 0.9542617797851562, "theoretical_loss": 3.5085870315659133, "tokens_seen": 1536819200 }, { "epoch": 0.55, "learning_rate": 0.0002277714718123345, "loss": 3.223, "theoretical_loss": 3.5085082606943243, "tokens_seen": 1537212416 }, { "epoch": 0.55, "learning_rate": 0.0002275822928490352, "loss": 3.1393, "theoretical_loss": 3.508298331065999, "tokens_seen": 1538260992 }, { "epoch": 0.55, "learning_rate": 0.0002273931138857359, "loss": 3.1253, "theoretical_loss": 3.508088584527492, "tokens_seen": 1539309568 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.4807642698287964, "objective/train/docs_used": 872615, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1632378101348877, "objective/train/original_loss": 3.1632375717163086, "objective/train/theoretical_loss": 3.507931394605294, "objective/train/tokens_used": 1560556000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2371273934841156, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492817163467407, "objective/train/weighted_lm_loss": 3.3185274600982666, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9520975351333618, "theoretical_loss": 3.507931394605294, "tokens_seen": 1540096000 }, { "epoch": 0.55, "learning_rate": 0.00022720393492243663, "loss": 3.1095, "theoretical_loss": 3.5078790207945647, "tokens_seen": 1540358144 }, { "epoch": 0.55, "learning_rate": 0.00022701475595913735, "loss": 3.1162, "theoretical_loss": 3.507669639583612, "tokens_seen": 1541406720 }, { "epoch": 0.55, "learning_rate": 0.00022682557699583807, "loss": 3.1987, "theoretical_loss": 3.5074604406116627, "tokens_seen": 1542455296 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.49242639541625977, "objective/train/docs_used": 874504, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1098110675811768, "objective/train/original_loss": 3.1098108291625977, "objective/train/theoretical_loss": 3.507277540782165, "objective/train/tokens_used": 1563832800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24409788846969604, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504834651947021, "objective/train/weighted_lm_loss": 3.2666797637939453, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9536768198013306, "theoretical_loss": 3.507277540782165, "tokens_seen": 1543372800 }, { "epoch": 0.55, "learning_rate": 0.0002266363980325388, "loss": 3.2216, "theoretical_loss": 3.507251423596374, "tokens_seen": 1543503872 }, { "epoch": 0.55, "learning_rate": 0.0002264472190692395, "loss": 3.1687, "theoretical_loss": 3.5070425882560343, "tokens_seen": 1544552448 }, { "epoch": 0.55, "learning_rate": 0.00022625804010594023, "loss": 3.2415, "theoretical_loss": 3.506833934309558, "tokens_seen": 1545601024 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.48686209321022034, "objective/train/docs_used": 876453, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9117236137390137, "objective/train/original_loss": 2.9117236137390137, "objective/train/theoretical_loss": 3.5066254614764842, "objective/train/tokens_used": 1567109600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24117165803909302, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499120950698853, "objective/train/weighted_lm_loss": 3.0574209690093994, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9522051215171814, "theoretical_loss": 3.5066254614764842, "tokens_seen": 1546649600 }, { "epoch": 0.55, "learning_rate": 0.00022606886114264095, "loss": 3.2028, "theoretical_loss": 3.5066254614764842, "tokens_seen": 1546649600 }, { "epoch": 0.55, "learning_rate": 0.00022587968217934164, "loss": 3.2477, "theoretical_loss": 3.5064171694769763, "tokens_seen": 1547698176 }, { "epoch": 0.55, "learning_rate": 0.00022569050321604237, "loss": 3.2099, "theoretical_loss": 3.5062090580318186, "tokens_seen": 1548746752 }, { "epoch": 0.55, "learning_rate": 0.0002255013242527431, "loss": 3.232, "theoretical_loss": 3.506001126862416, "tokens_seen": 1549795328 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.487368643283844, "objective/train/docs_used": 878255, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.103156566619873, "objective/train/original_loss": 3.103156566619873, "objective/train/theoretical_loss": 3.5059751481280284, "objective/train/tokens_used": 1570386400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24058666825294495, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049959659576416, "objective/train/weighted_lm_loss": 3.2585041522979736, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9520174860954285, "theoretical_loss": 3.5059751481280284, "tokens_seen": 1549926400 }, { "epoch": 0.55, "learning_rate": 0.00022531214528944383, "loss": 3.2565, "theoretical_loss": 3.505793375690791, "tokens_seen": 1550843904 }, { "epoch": 0.55, "learning_rate": 0.00022512296632614455, "loss": 3.1939, "theoretical_loss": 3.5055858042395815, "tokens_seen": 1551892480 }, { "epoch": 0.55, "learning_rate": 0.00022493378736284525, "loss": 3.155, "theoretical_loss": 3.5053784122320417, "tokens_seen": 1552941056 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.4909485876560211, "objective/train/docs_used": 880445, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8173828125, "objective/train/original_loss": 2.8173828125, "objective/train/theoretical_loss": 3.505326592235857, "objective/train/tokens_used": 1573663200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24372120201587677, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050333857536316, "objective/train/weighted_lm_loss": 2.9595131874084473, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9670907855033875, "theoretical_loss": 3.505326592235857, "tokens_seen": 1553203200 }, { "epoch": 0.56, "learning_rate": 0.00022474460839954597, "loss": 3.1608, "theoretical_loss": 3.505171199392036, "tokens_seen": 1553989632 }, { "epoch": 0.56, "learning_rate": 0.0002245554294362467, "loss": 3.1919, "theoretical_loss": 3.504964165444042, "tokens_seen": 1555038208 }, { "epoch": 0.56, "learning_rate": 0.00022436625047294744, "loss": 3.1549, "theoretical_loss": 3.504757310113145, "tokens_seen": 1556086784 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.4909573793411255, "objective/train/docs_used": 881444, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.154583215713501, "objective/train/original_loss": 3.15458345413208, "objective/train/theoretical_loss": 3.504679785357773, "objective/train/tokens_used": 1576940000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24360138177871704, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503339767456055, "objective/train/weighted_lm_loss": 3.3135523796081543, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9545013904571533, "theoretical_loss": 3.504679785357773, "tokens_seen": 1556480000 }, { "epoch": 0.56, "learning_rate": 0.00022417707150964813, "loss": 3.203, "theoretical_loss": 3.5045506331250382, "tokens_seen": 1557135360 }, { "epoch": 0.56, "learning_rate": 0.00022398789254634885, "loss": 3.1861, "theoretical_loss": 3.50434413420602, "tokens_seen": 1558183936 }, { "epoch": 0.56, "learning_rate": 0.00022379871358304957, "loss": 3.1586, "theoretical_loss": 3.504137813082994, "tokens_seen": 1559232512 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.47179633378982544, "objective/train/docs_used": 883210, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7676148414611816, "objective/train/original_loss": 2.7676148414611816, "objective/train/theoretical_loss": 3.504034719109799, "objective/train/tokens_used": 1580216800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23406490683555603, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04836905002594, "objective/train/weighted_lm_loss": 2.9026639461517334, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9523040652275085, "theoretical_loss": 3.504034719109799, "tokens_seen": 1559756800 }, { "epoch": 0.56, "learning_rate": 0.0002236095346197503, "loss": 3.1069, "theoretical_loss": 3.5039316694834635, "tokens_seen": 1560281088 }, { "epoch": 0.56, "learning_rate": 0.00022342035565645098, "loss": 3.1383, "theoretical_loss": 3.5037257031355344, "tokens_seen": 1561329664 }, { "epoch": 0.56, "learning_rate": 0.00022323117669315173, "loss": 3.1619, "theoretical_loss": 3.5035199137679105, "tokens_seen": 1562378240 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.4887843728065491, "objective/train/docs_used": 884815, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8651185035705566, "objective/train/original_loss": 2.8651180267333984, "objective/train/theoretical_loss": 3.50339138516565, "objective/train/tokens_used": 1583493600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24095383286476135, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501031875610352, "objective/train/weighted_lm_loss": 3.0087907314300537, "objective/train/weights_max": 1.0512157678604126, "objective/train/weights_min": 0.957360029220581, "theoretical_loss": 3.50339138516565, "tokens_seen": 1563033600 }, { "epoch": 0.56, "learning_rate": 0.00022304199772985245, "loss": 3.234, "theoretical_loss": 3.503314301109892, "tokens_seen": 1563426816 }, { "epoch": 0.56, "learning_rate": 0.00022285281876655317, "loss": 3.1352, "theoretical_loss": 3.5031088648913755, "tokens_seen": 1564475392 }, { "epoch": 0.56, "learning_rate": 0.0002226636398032539, "loss": 3.2193, "theoretical_loss": 3.5029036048428503, "tokens_seen": 1565523968 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.48724234104156494, "objective/train/docs_used": 886685, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.778803825378418, "objective/train/original_loss": 2.778803825378418, "objective/train/theoretical_loss": 3.5027497752562198, "objective/train/tokens_used": 1586770400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23980683088302612, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499430894851685, "objective/train/weighted_lm_loss": 2.9186336994171143, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9529886245727539, "theoretical_loss": 3.5027497752562198, "tokens_seen": 1566310400 }, { "epoch": 0.56, "learning_rate": 0.0002224744608399546, "loss": 3.1987, "theoretical_loss": 3.502698520695398, "tokens_seen": 1566572544 }, { "epoch": 0.56, "learning_rate": 0.0002222852818766553, "loss": 3.0677, "theoretical_loss": 3.5024936121806896, "tokens_seen": 1567621120 }, { "epoch": 0.56, "learning_rate": 0.00022209610291335606, "loss": 3.1452, "theoretical_loss": 3.502288879030986, "tokens_seen": 1568669696 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.4725601077079773, "objective/train/docs_used": 888616, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0799059867858887, "objective/train/original_loss": 3.0799059867858887, "objective/train/theoretical_loss": 3.5021098811690674, "objective/train/tokens_used": 1590047200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.233866885304451, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484446287155151, "objective/train/weighted_lm_loss": 3.228604316711426, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9518508315086365, "theoretical_loss": 3.5021098811690674, "tokens_seen": 1569587200 }, { "epoch": 0.56, "learning_rate": 0.00022190692395005678, "loss": 3.1714, "theoretical_loss": 3.5020843209791326, "tokens_seen": 1569718272 }, { "epoch": 0.56, "learning_rate": 0.00022171774498675747, "loss": 3.1314, "theoretical_loss": 3.501879937758562, "tokens_seen": 1570766848 }, { "epoch": 0.56, "learning_rate": 0.0002215285660234582, "loss": 3.1833, "theoretical_loss": 3.5016757291032903, "tokens_seen": 1571815424 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.48864632844924927, "objective/train/docs_used": 890514, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.864938259124756, "objective/train/original_loss": 2.864938259124756, "objective/train/theoretical_loss": 3.501471694747913, "objective/train/tokens_used": 1593324000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24329259991645813, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501012802124023, "objective/train/weighted_lm_loss": 3.0083770751953125, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9517747163772583, "theoretical_loss": 3.501471694747913, "tokens_seen": 1572864000 }, { "epoch": 0.56, "learning_rate": 0.0002213393870601589, "loss": 3.1617, "theoretical_loss": 3.501471694747913, "tokens_seen": 1572864000 }, { "epoch": 0.56, "learning_rate": 0.00022115020809685963, "loss": 3.2548, "theoretical_loss": 3.5012678344276082, "tokens_seen": 1573912576 }, { "epoch": 0.56, "learning_rate": 0.00022096102913356035, "loss": 3.1995, "theoretical_loss": 3.5010641478781306, "tokens_seen": 1574961152 }, { "epoch": 0.56, "learning_rate": 0.00022077185017026107, "loss": 3.2114, "theoretical_loss": 3.5008606348358136, "tokens_seen": 1576009728 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.4617477357387543, "objective/train/docs_used": 892478, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0483834743499756, "objective/train/original_loss": 3.0483834743499756, "objective/train/theoretical_loss": 3.5008352078921368, "objective/train/tokens_used": 1596600800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.227500781416893, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0473309755325317, "objective/train/weighted_lm_loss": 3.1940760612487793, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9527806043624878, "theoretical_loss": 3.5008352078921368, "tokens_seen": 1576140800 }, { "epoch": 0.56, "learning_rate": 0.0002205826712069618, "loss": 3.2382, "theoretical_loss": 3.5006572950375645, "tokens_seen": 1577058304 }, { "epoch": 0.56, "learning_rate": 0.0002203934922436625, "loss": 3.1722, "theoretical_loss": 3.5004541282208637, "tokens_seen": 1578106880 }, { "epoch": 0.56, "learning_rate": 0.00022020431328036323, "loss": 3.1883, "theoretical_loss": 3.500251134123765, "tokens_seen": 1579155456 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.48032376170158386, "objective/train/docs_used": 894462, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.296074390411377, "objective/train/original_loss": 3.296074390411377, "objective/train/theoretical_loss": 3.5002004125562856, "objective/train/tokens_used": 1599877600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23856282234191895, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492448806762695, "objective/train/weighted_lm_loss": 3.4574201107025146, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9516441226005554, "theoretical_loss": 3.5002004125562856, "tokens_seen": 1579417600 }, { "epoch": 0.56, "learning_rate": 0.00022001513431706393, "loss": 3.2023, "theoretical_loss": 3.500048312484891, "tokens_seen": 1580204032 }, { "epoch": 0.56, "learning_rate": 0.00021982595535376468, "loss": 3.2244, "theoretical_loss": 3.4998456630434336, "tokens_seen": 1581252608 }, { "epoch": 0.57, "learning_rate": 0.0002196367763904654, "loss": 3.1959, "theoretical_loss": 3.499643185539152, "tokens_seen": 1582301184 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.47416922450065613, "objective/train/docs_used": 896326, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.748246431350708, "objective/train/original_loss": 2.748246431350708, "objective/train/theoretical_loss": 3.4995673007495816, "objective/train/tokens_used": 1603154400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2362240105867386, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486173629760742, "objective/train/weighted_lm_loss": 2.8795387744903564, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.951950192451477, "theoretical_loss": 3.4995673007495816, "tokens_seen": 1582694400 }, { "epoch": 0.57, "learning_rate": 0.00021944759742716612, "loss": 3.173, "theoretical_loss": 3.4994408797123704, "tokens_seen": 1583349760 }, { "epoch": 0.57, "learning_rate": 0.0002192584184638668, "loss": 3.1532, "theoretical_loss": 3.499238745303977, "tokens_seen": 1584398336 }, { "epoch": 0.57, "learning_rate": 0.00021906923950056753, "loss": 3.163, "theoretical_loss": 3.4990367820554216, "tokens_seen": 1585446912 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.48116692900657654, "objective/train/docs_used": 897668, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.866941213607788, "objective/train/original_loss": 2.866940975189209, "objective/train/theoretical_loss": 3.49893586453544, "objective/train/tokens_used": 1606431200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23899151384830475, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493313074111938, "objective/train/weighted_lm_loss": 3.009093999862671, "objective/train/weights_max": 1.051215410232544, "objective/train/weights_min": 0.951438844203949, "theoretical_loss": 3.49893586453544, "tokens_seen": 1585971200 }, { "epoch": 0.57, "learning_rate": 0.00021888006053726825, "loss": 3.1054, "theoretical_loss": 3.498834989708716, "tokens_seen": 1586495488 }, { "epoch": 0.57, "learning_rate": 0.000218690881573969, "loss": 3.1044, "theoretical_loss": 3.4986333680064297, "tokens_seen": 1587544064 }, { "epoch": 0.57, "learning_rate": 0.00021850170261066972, "loss": 3.1573, "theoretical_loss": 3.4984319166916906, "tokens_seen": 1588592640 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.44459864497184753, "objective/train/docs_used": 899481, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.089205503463745, "objective/train/original_loss": 3.089205503463745, "objective/train/theoretical_loss": 3.4983060960309915, "objective/train/tokens_used": 1609708000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.233369842171669, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.045644998550415, "objective/train/weighted_lm_loss": 3.2272956371307373, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9539871215820312, "theoretical_loss": 3.4983060960309915, "tokens_seen": 1589248000 }, { "epoch": 0.57, "learning_rate": 0.0002183125236473704, "loss": 3.099, "theoretical_loss": 3.4982306355081825, "tokens_seen": 1589641216 }, { "epoch": 0.57, "learning_rate": 0.00021812334468407113, "loss": 3.1155, "theoretical_loss": 3.4980295242001422, "tokens_seen": 1590689792 }, { "epoch": 0.57, "learning_rate": 0.00021793416572077185, "loss": 3.1532, "theoretical_loss": 3.497828582512361, "tokens_seen": 1591738368 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.4910542070865631, "objective/train/docs_used": 902104, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7395198345184326, "objective/train/original_loss": 2.7395195960998535, "objective/train/theoretical_loss": 3.4976779874066066, "objective/train/tokens_used": 1612984800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24307867884635925, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503411293029785, "objective/train/weighted_lm_loss": 2.8776791095733643, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.95230633020401, "theoretical_loss": 3.4976779874066066, "tokens_seen": 1592524800 }, { "epoch": 0.57, "learning_rate": 0.00021774498675747257, "loss": 3.0806, "theoretical_loss": 3.4976278101901803, "tokens_seen": 1592786944 }, { "epoch": 0.57, "learning_rate": 0.0002175558077941733, "loss": 3.1768, "theoretical_loss": 3.4974272069794914, "tokens_seen": 1593835520 }, { "epoch": 0.57, "learning_rate": 0.00021736662883087402, "loss": 3.1855, "theoretical_loss": 3.4972267726267336, "tokens_seen": 1594884096 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.48677858710289, "objective/train/docs_used": 903830, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6319079399108887, "objective/train/original_loss": 2.6319079399108887, "objective/train/theoretical_loss": 3.497051530885427, "objective/train/tokens_used": 1616261600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23925882577896118, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498939752578735, "objective/train/weighted_lm_loss": 2.763871192932129, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9580140113830566, "theoretical_loss": 3.497051530885427, "tokens_seen": 1595801600 }, { "epoch": 0.57, "learning_rate": 0.00021717744986757474, "loss": 3.1649, "theoretical_loss": 3.4970265068788944, "tokens_seen": 1595932672 }, { "epoch": 0.57, "learning_rate": 0.00021698827090427546, "loss": 3.1523, "theoretical_loss": 3.4968264094835027, "tokens_seen": 1596981248 }, { "epoch": 0.57, "learning_rate": 0.00021679909194097615, "loss": 3.1599, "theoretical_loss": 3.4966264801886346, "tokens_seen": 1598029824 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.49028122425079346, "objective/train/docs_used": 906083, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0217175483703613, "objective/train/original_loss": 3.0217177867889404, "objective/train/theoretical_loss": 3.4964267187429066, "objective/train/tokens_used": 1619538400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24274671077728271, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502620935440063, "objective/train/weighted_lm_loss": 3.173563003540039, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9540677070617676, "theoretical_loss": 3.4964267187429066, "tokens_seen": 1599078400 }, { "epoch": 0.57, "learning_rate": 0.00021660991297767687, "loss": 3.1587, "theoretical_loss": 3.4964267187429066, "tokens_seen": 1599078400 }, { "epoch": 0.57, "learning_rate": 0.00021642073401437762, "loss": 3.134, "theoretical_loss": 3.4962271248954755, "tokens_seen": 1600126976 }, { "epoch": 0.57, "learning_rate": 0.00021623155505107834, "loss": 3.1665, "theoretical_loss": 3.4960276983960368, "tokens_seen": 1601175552 }, { "epoch": 0.57, "learning_rate": 0.00021604237608777906, "loss": 3.0887, "theoretical_loss": 3.495828438994824, "tokens_seen": 1602224128 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.4898524582386017, "objective/train/docs_used": 907881, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.784710645675659, "objective/train/original_loss": 2.784710168838501, "objective/train/theoretical_loss": 3.495803543306348, "objective/train/tokens_used": 1622815200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2425891011953354, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502182245254517, "objective/train/weighted_lm_loss": 2.924830436706543, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.95200514793396, "theoretical_loss": 3.495803543306348, "tokens_seen": 1602355200 }, { "epoch": 0.57, "learning_rate": 0.00021585319712447975, "loss": 3.1445, "theoretical_loss": 3.495629346442607, "tokens_seen": 1603272704 }, { "epoch": 0.57, "learning_rate": 0.00021566401816118047, "loss": 3.1624, "theoretical_loss": 3.4954304204906896, "tokens_seen": 1604321280 }, { "epoch": 0.57, "learning_rate": 0.0002154748391978812, "loss": 3.1413, "theoretical_loss": 3.4952316608909078, "tokens_seen": 1605369856 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.4899497926235199, "objective/train/docs_used": 909847, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2979307174682617, "objective/train/original_loss": 3.2979307174682617, "objective/train/theoretical_loss": 3.495181996954453, "objective/train/tokens_used": 1626092000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24249549210071564, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502275228500366, "objective/train/weighted_lm_loss": 3.4631145000457764, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9523872137069702, "theoretical_loss": 3.495181996954453, "tokens_seen": 1605632000 }, { "epoch": 0.57, "learning_rate": 0.00021528566023458194, "loss": 3.1345, "theoretical_loss": 3.49503306739563, "tokens_seen": 1606418432 }, { "epoch": 0.57, "learning_rate": 0.00021509648127128263, "loss": 3.078, "theoretical_loss": 3.4948346397577543, "tokens_seen": 1607467008 }, { "epoch": 0.57, "learning_rate": 0.00021490730230798336, "loss": 3.1708, "theoretical_loss": 3.4946363777307075, "tokens_seen": 1608515584 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.4887382686138153, "objective/train/docs_used": 911828, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.157907247543335, "objective/train/original_loss": 3.1579067707061768, "objective/train/theoretical_loss": 3.4945620721168713, "objective/train/tokens_used": 1629368800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24136415123939514, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501006841659546, "objective/train/weighted_lm_loss": 3.3168606758117676, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.951468825340271, "theoretical_loss": 3.4945620721168713, "tokens_seen": 1608908800 }, { "epoch": 0.57, "learning_rate": 0.00021471812334468408, "loss": 3.1846, "theoretical_loss": 3.494438281068443, "tokens_seen": 1609564160 }, { "epoch": 0.58, "learning_rate": 0.0002145289443813848, "loss": 3.1879, "theoretical_loss": 3.4942403495254393, "tokens_seen": 1610612736 }, { "epoch": 0.58, "learning_rate": 0.0002143397654180855, "loss": 3.1535, "theoretical_loss": 3.4940425828567, "tokens_seen": 1611661312 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.4839933216571808, "objective/train/docs_used": 913095, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9910757541656494, "objective/train/original_loss": 2.9910759925842285, "objective/train/theoretical_loss": 3.493943761273761, "objective/train/tokens_used": 1632645600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23838478326797485, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04961097240448, "objective/train/weighted_lm_loss": 3.1397407054901123, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9523307681083679, "theoretical_loss": 3.493943761273761, "tokens_seen": 1612185600 }, { "epoch": 0.58, "learning_rate": 0.00021415058645478624, "loss": 3.1622, "theoretical_loss": 3.4938449808177516, "tokens_seen": 1612709888 }, { "epoch": 0.58, "learning_rate": 0.00021396140749148696, "loss": 3.1853, "theoretical_loss": 3.4936475431646397, "tokens_seen": 1613758464 }, { "epoch": 0.58, "learning_rate": 0.00021377222852818768, "loss": 3.2193, "theoretical_loss": 3.4934502696539322, "tokens_seen": 1614807040 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.46676307916641235, "objective/train/docs_used": 915136, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3160529136657715, "objective/train/original_loss": 3.3160529136657715, "objective/train/theoretical_loss": 3.493327056955347, "objective/train/tokens_used": 1635922400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23430095613002777, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478670597076416, "objective/train/weighted_lm_loss": 3.474302291870117, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9522300958633423, "theoretical_loss": 3.493327056955347, "tokens_seen": 1615462400 }, { "epoch": 0.58, "learning_rate": 0.0002135830495648884, "loss": 3.1691, "theoretical_loss": 3.493253160042713, "tokens_seen": 1615855616 }, { "epoch": 0.58, "learning_rate": 0.0002133938706015891, "loss": 3.1744, "theoretical_loss": 3.4930562140885844, "tokens_seen": 1616904192 }, { "epoch": 0.58, "learning_rate": 0.0002132046916382898, "loss": 3.1364, "theoretical_loss": 3.492859431549663, "tokens_seen": 1617952768 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.4899651110172272, "objective/train/docs_used": 916739, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3264360427856445, "objective/train/original_loss": 3.3264360427856445, "objective/train/theoretical_loss": 3.4927119517414846, "objective/train/tokens_used": 1639199200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2438695728778839, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502362251281738, "objective/train/weighted_lm_loss": 3.493744373321533, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.951538622379303, "theoretical_loss": 3.4927119517414846, "tokens_seen": 1618739200 }, { "epoch": 0.58, "learning_rate": 0.00021301551267499056, "loss": 3.1957, "theoretical_loss": 3.49266281218458, "tokens_seen": 1619001344 }, { "epoch": 0.58, "learning_rate": 0.00021282633371169128, "loss": 3.2048, "theoretical_loss": 3.49246635575248, "tokens_seen": 1620049920 }, { "epoch": 0.58, "learning_rate": 0.00021263715474839197, "loss": 3.1662, "theoretical_loss": 3.4922700620130174, "tokens_seen": 1621098496 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.4800923466682434, "objective/train/docs_used": 918332, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.748319625854492, "objective/train/original_loss": 2.748319625854492, "objective/train/theoretical_loss": 3.4920984382612357, "objective/train/tokens_used": 1642476000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23431384563446045, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049200177192688, "objective/train/weighted_lm_loss": 2.885237693786621, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9522053599357605, "theoretical_loss": 3.4920984382612357, "tokens_seen": 1622016000 }, { "epoch": 0.58, "learning_rate": 0.0002124479757850927, "loss": 3.1469, "theoretical_loss": 3.492073930726355, "tokens_seen": 1622147072 }, { "epoch": 0.58, "learning_rate": 0.00021225879682179342, "loss": 3.1629, "theoretical_loss": 3.491877961653168, "tokens_seen": 1623195648 }, { "epoch": 0.58, "learning_rate": 0.00021206961785849414, "loss": 3.1635, "theoretical_loss": 3.4916821545546344, "tokens_seen": 1624244224 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.4663797616958618, "objective/train/docs_used": 920327, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1265220642089844, "objective/train/original_loss": 3.1265220642089844, "objective/train/theoretical_loss": 3.4914865091924394, "objective/train/tokens_used": 1645752800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2259867787361145, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0477863550186157, "objective/train/weighted_lm_loss": 3.2764687538146973, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9532047510147095, "theoretical_loss": 3.4914865091924394, "tokens_seen": 1625292800 }, { "epoch": 0.58, "learning_rate": 0.00021188043889519488, "loss": 3.1512, "theoretical_loss": 3.4914865091924394, "tokens_seen": 1625292800 }, { "epoch": 0.58, "learning_rate": 0.00021169125993189558, "loss": 3.2119, "theoretical_loss": 3.4912910253287732, "tokens_seen": 1626341376 }, { "epoch": 0.58, "learning_rate": 0.0002115020809685963, "loss": 3.2025, "theoretical_loss": 3.4910957027263274, "tokens_seen": 1627389952 }, { "epoch": 0.58, "learning_rate": 0.00021131290200529702, "loss": 3.1581, "theoretical_loss": 3.490900541148295, "tokens_seen": 1628438528 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.4907952547073364, "objective/train/docs_used": 922281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.313007354736328, "objective/train/original_loss": 3.313007354736328, "objective/train/theoretical_loss": 3.4908761572612947, "objective/train/tokens_used": 1649029600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24451680481433868, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050322413444519, "objective/train/weighted_lm_loss": 3.4795234203338623, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9518669247627258, "theoretical_loss": 3.4908761572612947, "tokens_seen": 1628569600 }, { "epoch": 0.58, "learning_rate": 0.00021112372304199774, "loss": 3.0487, "theoretical_loss": 3.490705540358369, "tokens_seen": 1629487104 }, { "epoch": 0.58, "learning_rate": 0.00021093454407869843, "loss": 3.1641, "theoretical_loss": 3.4905107001207414, "tokens_seen": 1630535680 }, { "epoch": 0.58, "learning_rate": 0.00021074536511539918, "loss": 3.0879, "theoretical_loss": 3.490316020200101, "tokens_seen": 1631584256 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.4708525240421295, "objective/train/docs_used": 923944, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9695937633514404, "objective/train/original_loss": 2.9695937633514404, "objective/train/theoretical_loss": 3.4902673752419417, "objective/train/tokens_used": 1652306400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23232224583625793, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048266053199768, "objective/train/weighted_lm_loss": 3.110866069793701, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9532876014709473, "theoretical_loss": 3.4902673752419417, "tokens_seen": 1631846400 }, { "epoch": 0.58, "learning_rate": 0.0002105561861520999, "loss": 3.1031, "theoretical_loss": 3.4901215003616333, "tokens_seen": 1632632832 }, { "epoch": 0.58, "learning_rate": 0.00021036700718880062, "loss": 3.0844, "theoretical_loss": 3.4899271403710164, "tokens_seen": 1633681408 }, { "epoch": 0.58, "learning_rate": 0.00021017782822550131, "loss": 3.1156, "theoretical_loss": 3.4897329399944237, "tokens_seen": 1634729984 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.48439714312553406, "objective/train/docs_used": 925798, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8560261726379395, "objective/train/original_loss": 2.8560264110565186, "objective/train/theoretical_loss": 3.4896601559560523, "objective/train/tokens_used": 1655583200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23803523182868958, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496495962142944, "objective/train/weighted_lm_loss": 2.9979634284973145, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9529846906661987, "theoretical_loss": 3.4896601559560523, "tokens_seen": 1635123200 }, { "epoch": 0.58, "learning_rate": 0.00020998864926220204, "loss": 3.0956, "theoretical_loss": 3.4895388989985188, "tokens_seen": 1635778560 }, { "epoch": 0.58, "learning_rate": 0.00020979947029890276, "loss": 3.111, "theoretical_loss": 3.4893450171504563, "tokens_seen": 1636827136 }, { "epoch": 0.59, "learning_rate": 0.0002096102913356035, "loss": 3.0926, "theoretical_loss": 3.4891512942178795, "tokens_seen": 1637875712 }, { "debugging/Self-BLEU-5": 0.4862537472725578, "debugging/distinct-1-grams": 0.7771784690107355, "debugging/distinct-2-grams": 0.9668669456652452, "debugging/entropy-1-grams": 6.202714981817353, "debugging/entropy-2-grams": 7.211883548596637, "debugging/length": 471.0, "debugging/num_segments": 21, "debugging/raw_token_scores_avg": 0.013972360640764236, "debugging/raw_token_scores_std": 0.06252207607030869, "epoch": 0.59, "objective/train/advantage_avg": 0.4860203266143799, "objective/train/docs_used": 927402, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2737643718719482, "objective/train/original_loss": 3.2737646102905273, "objective/train/theoretical_loss": 3.4890544922724205, "objective/train/tokens_used": 1658860000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2401282638311386, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498225688934326, "objective/train/weighted_lm_loss": 3.4373350143432617, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9551194906234741, "theoretical_loss": 3.4890544922724205, "tokens_seen": 1638400000 }, { "epoch": 0.59, "learning_rate": 0.00020942111237230422, "loss": 3.1852, "theoretical_loss": 3.48895772996892, "tokens_seen": 1638924288 }, { "epoch": 0.59, "learning_rate": 0.00020923193340900492, "loss": 3.1467, "theoretical_loss": 3.4887643241721955, "tokens_seen": 1639972864 }, { "epoch": 0.59, "learning_rate": 0.00020904275444570564, "loss": 3.09, "theoretical_loss": 3.4885710765968088, "tokens_seen": 1641021440 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.48904502391815186, "objective/train/docs_used": 928835, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1367592811584473, "objective/train/original_loss": 3.1367592811584473, "objective/train/theoretical_loss": 3.4884503771065636, "objective/train/tokens_used": 1662136800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24231131374835968, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501362085342407, "objective/train/weighted_lm_loss": 3.2936394214630127, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9644740223884583, "theoretical_loss": 3.4884503771065636, "tokens_seen": 1641676800 }, { "epoch": 0.59, "learning_rate": 0.00020885357548240636, "loss": 3.0997, "theoretical_loss": 3.4883779870123455, "tokens_seen": 1642070016 }, { "epoch": 0.59, "learning_rate": 0.00020866439651910708, "loss": 3.1374, "theoretical_loss": 3.488185055188876, "tokens_seen": 1643118592 }, { "epoch": 0.59, "learning_rate": 0.0002084752175558078, "loss": 3.1162, "theoretical_loss": 3.4879922808969486, "tokens_seen": 1644167168 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.4872593283653259, "objective/train/docs_used": 930288, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.094754695892334, "objective/train/original_loss": 3.094754457473755, "objective/train/theoretical_loss": 3.48784780342032, "objective/train/tokens_used": 1665413600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24150964617729187, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049953579902649, "objective/train/weighted_lm_loss": 3.2483839988708496, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9528509378433228, "theoretical_loss": 3.48784780342032, "tokens_seen": 1644953600 }, { "epoch": 0.59, "learning_rate": 0.00020828603859250852, "loss": 3.1739, "theoretical_loss": 3.4877996639075937, "tokens_seen": 1645215744 }, { "epoch": 0.59, "learning_rate": 0.00020809685962920924, "loss": 3.1529, "theoretical_loss": 3.4876072039923196, "tokens_seen": 1646264320 }, { "epoch": 0.59, "learning_rate": 0.00020790768066590996, "loss": 3.1419, "theoretical_loss": 3.4874149009231123, "tokens_seen": 1647312896 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.4832485616207123, "objective/train/docs_used": 932466, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.70462965965271, "objective/train/original_loss": 2.704629898071289, "objective/train/theoretical_loss": 3.4872467642214566, "objective/train/tokens_used": 1668690400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23652879893779755, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495271682739258, "objective/train/weighted_lm_loss": 2.8395895957946777, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9560015797615051, "theoretical_loss": 3.4872467642214566, "tokens_seen": 1648230400 }, { "epoch": 0.59, "learning_rate": 0.00020771850170261068, "loss": 3.1129, "theoretical_loss": 3.4872227544724312, "tokens_seen": 1648361472 }, { "epoch": 0.59, "learning_rate": 0.00020752932273931138, "loss": 3.1736, "theoretical_loss": 3.487030764413214, "tokens_seen": 1649410048 }, { "epoch": 0.59, "learning_rate": 0.00020734014377601212, "loss": 3.1707, "theoretical_loss": 3.4868389305188687, "tokens_seen": 1650458624 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.48778533935546875, "objective/train/docs_used": 934348, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9127755165100098, "objective/train/original_loss": 2.9127755165100098, "objective/train/theoretical_loss": 3.4866472525632766, "objective/train/tokens_used": 1671967200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24155890941619873, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.05000638961792, "objective/train/weighted_lm_loss": 3.0580127239227295, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9531963467597961, "theoretical_loss": 3.4866472525632766, "tokens_seen": 1651507200 }, { "epoch": 0.59, "learning_rate": 0.00020715096481271284, "loss": 3.096, "theoretical_loss": 3.4866472525632766, "tokens_seen": 1651507200 }, { "epoch": 0.59, "learning_rate": 0.00020696178584941356, "loss": 3.1461, "theoretical_loss": 3.486455730320789, "tokens_seen": 1652555776 }, { "epoch": 0.59, "learning_rate": 0.00020677260688611426, "loss": 3.0677, "theoretical_loss": 3.486264363566228, "tokens_seen": 1653604352 }, { "epoch": 0.59, "learning_rate": 0.00020658342792281498, "loss": 3.1543, "theoretical_loss": 3.4860731520748827, "tokens_seen": 1654652928 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.47505781054496765, "objective/train/docs_used": 936266, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9631354808807373, "objective/train/original_loss": 2.963135242462158, "objective/train/theoretical_loss": 3.4860492615442356, "objective/train/tokens_used": 1675244000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23247912526130676, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04868745803833, "objective/train/weighted_lm_loss": 3.1075222492218018, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9551524519920349, "theoretical_loss": 3.4860492615442356, "tokens_seen": 1654784000 }, { "epoch": 0.59, "learning_rate": 0.0002063942489595157, "loss": 3.0798, "theoretical_loss": 3.4858820956225083, "tokens_seen": 1655701504 }, { "epoch": 0.59, "learning_rate": 0.00020620506999621645, "loss": 3.0016, "theoretical_loss": 3.4856911939853283, "tokens_seen": 1656750080 }, { "epoch": 0.59, "learning_rate": 0.00020601589103291714, "loss": 3.1524, "theoretical_loss": 3.485500446940028, "tokens_seen": 1657798656 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.4916464686393738, "objective/train/docs_used": 938319, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8522729873657227, "objective/train/original_loss": 2.8522729873657227, "objective/train/theoretical_loss": 3.485452784307559, "objective/train/tokens_used": 1678520800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24374902248382568, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504037141799927, "objective/train/weighted_lm_loss": 2.996385335922241, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.994926393032074, "theoretical_loss": 3.485452784307559, "tokens_seen": 1658060800 }, { "epoch": 0.59, "learning_rate": 0.00020582671206961786, "loss": 3.0695, "theoretical_loss": 3.4853098542637566, "tokens_seen": 1658847232 }, { "epoch": 0.59, "learning_rate": 0.00020563753310631858, "loss": 3.0393, "theoretical_loss": 3.4851194157341263, "tokens_seen": 1659895808 }, { "epoch": 0.59, "learning_rate": 0.0002054483541430193, "loss": 3.1038, "theoretical_loss": 3.484929131129207, "tokens_seen": 1660944384 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.48630455136299133, "objective/train/docs_used": 940185, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.038881540298462, "objective/train/original_loss": 3.038881301879883, "objective/train/theoretical_loss": 3.4848578140408613, "objective/train/tokens_used": 1681797600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24110354483127594, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498559474945068, "objective/train/weighted_lm_loss": 3.1900389194488525, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9526039958000183, "theoretical_loss": 3.4848578140408613, "tokens_seen": 1661337600 }, { "epoch": 0.59, "learning_rate": 0.00020525917517972002, "loss": 3.1271, "theoretical_loss": 3.484739000227532, "tokens_seen": 1661992960 }, { "epoch": 0.59, "learning_rate": 0.00020506999621642074, "loss": 3.1116, "theoretical_loss": 3.48454902280809, "tokens_seen": 1663041536 }, { "epoch": 0.59, "learning_rate": 0.00020488081725312146, "loss": 3.106, "theoretical_loss": 3.484359198650326, "tokens_seen": 1664090112 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.46543005108833313, "objective/train/docs_used": 942299, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8884527683258057, "objective/train/original_loss": 2.8884522914886475, "objective/train/theoretical_loss": 3.4842643439757754, "objective/train/tokens_used": 1685074400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2315172702074051, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0477193593978882, "objective/train/weighted_lm_loss": 3.0250158309936523, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9514710903167725, "theoretical_loss": 3.4842643439757754, "tokens_seen": 1664614400 }, { "epoch": 0.59, "learning_rate": 0.00020469163828982218, "loss": 3.097, "theoretical_loss": 3.484169527534143, "tokens_seen": 1665138688 }, { "epoch": 0.6, "learning_rate": 0.0002045024593265229, "loss": 3.0913, "theoretical_loss": 3.4839800092398967, "tokens_seen": 1666187264 }, { "epoch": 0.6, "learning_rate": 0.0002043132803632236, "loss": 3.1178, "theoretical_loss": 3.4837906435483967, "tokens_seen": 1667235840 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.49083784222602844, "objective/train/docs_used": 944133, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.160414695739746, "objective/train/original_loss": 3.160414218902588, "objective/train/theoretical_loss": 3.4836723673875793, "objective/train/tokens_used": 1688351200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2420562356710434, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.05031418800354, "objective/train/weighted_lm_loss": 3.320441484451294, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9800860285758972, "theoretical_loss": 3.4836723673875793, "tokens_seen": 1667891200 }, { "epoch": 0.6, "learning_rate": 0.00020412410139992432, "loss": 3.1704, "theoretical_loss": 3.4836014302409053, "tokens_seen": 1668284416 }, { "epoch": 0.6, "learning_rate": 0.00020393492243662507, "loss": 3.1694, "theoretical_loss": 3.4834123690991334, "tokens_seen": 1669332992 }, { "epoch": 0.6, "learning_rate": 0.00020374574347332579, "loss": 3.1643, "theoretical_loss": 3.4832234599052434, "tokens_seen": 1670381568 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.48427098989486694, "objective/train/docs_used": 945841, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.058107852935791, "objective/train/original_loss": 3.05810809135437, "objective/train/theoretical_loss": 3.4830818775948327, "objective/train/tokens_used": 1691628000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2393985241651535, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049643874168396, "objective/train/weighted_lm_loss": 3.209540843963623, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9518405795097351, "theoretical_loss": 3.4830818775948327, "tokens_seen": 1671168000 }, { "epoch": 0.6, "learning_rate": 0.00020355656451002648, "loss": 3.1219, "theoretical_loss": 3.483034702441845, "tokens_seen": 1671430144 }, { "epoch": 0.6, "learning_rate": 0.0002033673855467272, "loss": 3.178, "theoretical_loss": 3.4828460964919965, "tokens_seen": 1672478720 }, { "epoch": 0.6, "learning_rate": 0.00020317820658342792, "loss": 3.1241, "theoretical_loss": 3.4826576418392, "tokens_seen": 1673527296 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.4750880002975464, "objective/train/docs_used": 947805, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0682384967803955, "objective/train/original_loss": 3.0682387351989746, "objective/train/theoretical_loss": 3.48249286795901, "objective/train/tokens_used": 1694904800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23441539704799652, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048700213432312, "objective/train/weighted_lm_loss": 3.2168827056884766, "objective/train/weights_max": 1.0512195825576782, "objective/train/weights_min": 0.9514750838279724, "theoretical_loss": 3.48249286795901, "tokens_seen": 1674444800 }, { "epoch": 0.6, "learning_rate": 0.00020298902762012864, "loss": 3.1192, "theoretical_loss": 3.4824693382674043, "tokens_seen": 1674575872 }, { "epoch": 0.6, "learning_rate": 0.0002027998486568294, "loss": 3.1398, "theoretical_loss": 3.4822811855610007, "tokens_seen": 1675624448 }, { "epoch": 0.6, "learning_rate": 0.00020261066969353008, "loss": 3.1768, "theoretical_loss": 3.4820931835048223, "tokens_seen": 1676673024 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.48598864674568176, "objective/train/docs_used": 948902, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.975940227508545, "objective/train/original_loss": 2.975940704345703, "objective/train/theoretical_loss": 3.4819053318841453, "objective/train/tokens_used": 1698181600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2399335503578186, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049818515777588, "objective/train/weighted_lm_loss": 3.1240224838256836, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9528480172157288, "theoretical_loss": 3.4819053318841453, "tokens_seen": 1677721600 }, { "epoch": 0.6, "learning_rate": 0.0002024214907302308, "loss": 3.2016, "theoretical_loss": 3.4819053318841453, "tokens_seen": 1677721600 }, { "epoch": 0.6, "learning_rate": 0.00020223231176693152, "loss": 3.1829, "theoretical_loss": 3.481717630484684, "tokens_seen": 1678770176 }, { "epoch": 0.6, "learning_rate": 0.00020204313280363224, "loss": 3.1091, "theoretical_loss": 3.481530079092593, "tokens_seen": 1679818752 }, { "epoch": 0.6, "learning_rate": 0.00020185395384033294, "loss": 3.1407, "theoretical_loss": 3.481342677494464, "tokens_seen": 1680867328 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.47322916984558105, "objective/train/docs_used": 950589, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0295519828796387, "objective/train/original_loss": 3.0295517444610596, "objective/train/theoretical_loss": 3.481319262816474, "objective/train/tokens_used": 1701458400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23179136216640472, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485007762908936, "objective/train/weighted_lm_loss": 3.177227258682251, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9534991383552551, "theoretical_loss": 3.481319262816474, "tokens_seen": 1680998400 }, { "epoch": 0.6, "learning_rate": 0.00020166477487703369, "loss": 3.159, "theoretical_loss": 3.4811554254773243, "tokens_seen": 1681915904 }, { "epoch": 0.6, "learning_rate": 0.0002014755959137344, "loss": 3.0974, "theoretical_loss": 3.4809683228286374, "tokens_seen": 1682964480 }, { "epoch": 0.6, "learning_rate": 0.00020128641695043513, "loss": 3.1729, "theoretical_loss": 3.4807813693363023, "tokens_seen": 1684013056 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.4855785071849823, "objective/train/docs_used": 952447, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7886948585510254, "objective/train/original_loss": 2.7886953353881836, "objective/train/theoretical_loss": 3.4807346542440833, "objective/train/tokens_used": 1704735200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23818425834178925, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497686862945557, "objective/train/weighted_lm_loss": 2.927842855453491, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9933276176452637, "theoretical_loss": 3.4807346542440833, "tokens_seen": 1684275200 }, { "epoch": 0.6, "learning_rate": 0.00020109723798713585, "loss": 3.1108, "theoretical_loss": 3.480594564788648, "tokens_seen": 1685061632 }, { "epoch": 0.6, "learning_rate": 0.00020090805902383654, "loss": 3.1693, "theoretical_loss": 3.4804079089744375, "tokens_seen": 1686110208 }, { "epoch": 0.6, "learning_rate": 0.00020071888006053726, "loss": 3.2593, "theoretical_loss": 3.4802214016828636, "tokens_seen": 1687158784 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.4896550476551056, "objective/train/docs_used": 954180, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.044297218322754, "objective/train/original_loss": 3.044297218322754, "objective/train/theoretical_loss": 3.4801514996965643, "objective/train/tokens_used": 1708012000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24202604591846466, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501958131790161, "objective/train/weighted_lm_loss": 3.196167230606079, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9804592728614807, "theoretical_loss": 3.4801514996965643, "tokens_seen": 1687552000 }, { "epoch": 0.6, "learning_rate": 0.000200529701097238, "loss": 3.1462, "theoretical_loss": 3.48003504270355, "tokens_seen": 1688207360 }, { "epoch": 0.6, "learning_rate": 0.00020034052213393873, "loss": 3.1513, "theoretical_loss": 3.4798488318265477, "tokens_seen": 1689255936 }, { "epoch": 0.6, "learning_rate": 0.00020015134317063942, "loss": 3.1753, "theoretical_loss": 3.479662768842334, "tokens_seen": 1690304512 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.4866858124732971, "objective/train/docs_used": 956253, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9044716358184814, "objective/train/original_loss": 2.9044711589813232, "objective/train/theoretical_loss": 3.4795697927446643, "objective/train/tokens_used": 1711288800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2399260550737381, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049888253211975, "objective/train/weighted_lm_loss": 3.0500967502593994, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9630663990974426, "theoretical_loss": 3.4795697927446643, "tokens_seen": 1690828800 }, { "epoch": 0.6, "learning_rate": 0.00019996216420734014, "loss": 3.1323, "theoretical_loss": 3.4794768535418146, "tokens_seen": 1691353088 }, { "epoch": 0.6, "learning_rate": 0.00019977298524404086, "loss": 3.1002, "theoretical_loss": 3.4792910857163193, "tokens_seen": 1692401664 }, { "epoch": 0.6, "learning_rate": 0.00019958380628074158, "loss": 3.0834, "theoretical_loss": 3.4791054651576006, "tokens_seen": 1693450240 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.48621219396591187, "objective/train/docs_used": 958497, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7586536407470703, "objective/train/original_loss": 2.7586538791656494, "objective/train/theoretical_loss": 3.4789895269999507, "objective/train/tokens_used": 1714565600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24361838400363922, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498595237731934, "objective/train/weighted_lm_loss": 2.896075963973999, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9519256353378296, "theoretical_loss": 3.4789895269999507, "tokens_seen": 1694105600 }, { "epoch": 0.61, "learning_rate": 0.0001993946273174423, "loss": 3.1498, "theoretical_loss": 3.4789199916578353, "tokens_seen": 1694498816 }, { "epoch": 0.61, "learning_rate": 0.00019920544835414303, "loss": 3.2399, "theoretical_loss": 3.478734665009622, "tokens_seen": 1695547392 }, { "epoch": 0.61, "learning_rate": 0.00019901626939084375, "loss": 3.2016, "theoretical_loss": 3.4785494850059786, "tokens_seen": 1696595968 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.4836694300174713, "objective/train/docs_used": 960356, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7970266342163086, "objective/train/original_loss": 2.7970266342163086, "objective/train/theoretical_loss": 3.478410696114469, "objective/train/tokens_used": 1717842400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2396220862865448, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495851039886475, "objective/train/weighted_lm_loss": 2.934868097305298, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9527864456176758, "theoretical_loss": 3.478410696114469, "tokens_seen": 1697382400 }, { "epoch": 0.61, "learning_rate": 0.00019882709042754447, "loss": 3.183, "theoretical_loss": 3.478364451440343, "tokens_seen": 1697644544 }, { "epoch": 0.61, "learning_rate": 0.0001986379114642452, "loss": 3.1736, "theoretical_loss": 3.478179564106571, "tokens_seen": 1698693120 }, { "epoch": 0.61, "learning_rate": 0.00019844873250094588, "loss": 3.1614, "theoretical_loss": 3.4779948227989372, "tokens_seen": 1699741696 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.4798791706562042, "objective/train/docs_used": 961887, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2315726280212402, "objective/train/original_loss": 3.231572151184082, "objective/train/theoretical_loss": 3.477833293780412, "objective/train/tokens_used": 1721119200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2355627715587616, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491852760314941, "objective/train/weighted_lm_loss": 3.3893630504608154, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9673263430595398, "theoretical_loss": 3.477833293780412, "tokens_seen": 1700659200 }, { "epoch": 0.61, "learning_rate": 0.00019825955353764663, "loss": 3.1208, "theoretical_loss": 3.47781022731213, "tokens_seen": 1700790272 }, { "epoch": 0.61, "learning_rate": 0.00019807037457434735, "loss": 3.1911, "theoretical_loss": 3.4776257774412547, "tokens_seen": 1701838848 }, { "epoch": 0.61, "learning_rate": 0.00019788119561104807, "loss": 3.1836, "theoretical_loss": 3.4774414729818295, "tokens_seen": 1702887424 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.4749166965484619, "objective/train/docs_used": 963663, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2296407222747803, "objective/train/original_loss": 3.2296409606933594, "objective/train/theoretical_loss": 3.477257313729786, "objective/train/tokens_used": 1724396000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23224613070487976, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048672080039978, "objective/train/weighted_lm_loss": 3.385560989379883, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9597051739692688, "theoretical_loss": 3.477257313729786, "tokens_seen": 1703936000 }, { "epoch": 0.61, "learning_rate": 0.00019769201664774876, "loss": 3.2243, "theoretical_loss": 3.477257313729786, "tokens_seen": 1703936000 }, { "epoch": 0.61, "learning_rate": 0.00019750283768444948, "loss": 3.1611, "theoretical_loss": 3.477073299481467, "tokens_seen": 1704984576 }, { "epoch": 0.61, "learning_rate": 0.0001973136587211502, "loss": 3.178, "theoretical_loss": 3.4768894300336264, "tokens_seen": 1706033152 }, { "epoch": 0.61, "learning_rate": 0.00019712447975785095, "loss": 3.0676, "theoretical_loss": 3.476705705183427, "tokens_seen": 1707081728 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.4770734906196594, "objective/train/docs_used": 965784, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9515833854675293, "objective/train/original_loss": 2.951582908630371, "objective/train/theoretical_loss": 3.4766827497340875, "objective/train/tokens_used": 1727672800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23381619155406952, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488957166671753, "objective/train/weighted_lm_loss": 3.0978763103485107, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9522743225097656, "theoretical_loss": 3.4766827497340875, "tokens_seen": 1707212800 }, { "epoch": 0.61, "learning_rate": 0.00019693530079455167, "loss": 3.1771, "theoretical_loss": 3.4765221247284415, "tokens_seen": 1708130304 }, { "epoch": 0.61, "learning_rate": 0.00019674612183125237, "loss": 3.0724, "theoretical_loss": 3.4763386884666483, "tokens_seen": 1709178880 }, { "epoch": 0.61, "learning_rate": 0.00019655694286795309, "loss": 3.1534, "theoretical_loss": 3.4761553961964338, "tokens_seen": 1710227456 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.49260079860687256, "objective/train/docs_used": 967686, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8040316104888916, "objective/train/original_loss": 2.8040313720703125, "objective/train/theoretical_loss": 3.476109595603976, "objective/train/tokens_used": 1730949600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2447163164615631, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505040884017944, "objective/train/weighted_lm_loss": 2.9455554485321045, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9516199231147766, "theoretical_loss": 3.476109595603976, "tokens_seen": 1710489600 }, { "epoch": 0.61, "learning_rate": 0.0001963677639046538, "loss": 3.1436, "theoretical_loss": 3.475972247716588, "tokens_seen": 1711276032 }, { "epoch": 0.61, "learning_rate": 0.00019617858494135453, "loss": 3.0968, "theoretical_loss": 3.475789242826307, "tokens_seen": 1712324608 }, { "epoch": 0.61, "learning_rate": 0.00019598940597805525, "loss": 3.1155, "theoretical_loss": 3.4756063813251883, "tokens_seen": 1713373184 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.48203980922698975, "objective/train/docs_used": 969481, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.023247241973877, "objective/train/original_loss": 3.023247241973877, "objective/train/theoretical_loss": 3.475537845188954, "objective/train/tokens_used": 1734226400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23657125234603882, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494064092636108, "objective/train/weighted_lm_loss": 3.1723108291625977, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9527132511138916, "theoretical_loss": 3.475537845188954, "tokens_seen": 1713766400 }, { "epoch": 0.61, "learning_rate": 0.00019580022701475597, "loss": 3.1438, "theoretical_loss": 3.4754236630132325, "tokens_seen": 1714421760 }, { "epoch": 0.61, "learning_rate": 0.0001956110480514567, "loss": 3.1437, "theoretical_loss": 3.4752410876908413, "tokens_seen": 1715470336 }, { "epoch": 0.61, "learning_rate": 0.0001954218690881574, "loss": 3.1662, "theoretical_loss": 3.475058655158816, "tokens_seen": 1716518912 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.46607887744903564, "objective/train/docs_used": 970822, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1122360229492188, "objective/train/original_loss": 3.1122357845306396, "objective/train/theoretical_loss": 3.47496749237705, "objective/train/tokens_used": 1737503200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2279558926820755, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0477663278579712, "objective/train/weighted_lm_loss": 3.261258602142334, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9518850445747375, "theoretical_loss": 3.47496749237705, "tokens_seen": 1717043200 }, { "epoch": 0.61, "learning_rate": 0.0001952326901248581, "loss": 3.1927, "theoretical_loss": 3.474876365218357, "tokens_seen": 1717567488 }, { "epoch": 0.61, "learning_rate": 0.00019504351116155882, "loss": 3.1399, "theoretical_loss": 3.4746942176710633, "tokens_seen": 1718616064 }, { "epoch": 0.61, "learning_rate": 0.00019485433219825957, "loss": 3.1422, "theoretical_loss": 3.4745122123189294, "tokens_seen": 1719664640 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.4768146872520447, "objective/train/docs_used": 972105, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9560952186584473, "objective/train/original_loss": 2.9560952186584473, "objective/train/theoretical_loss": 3.4743985310945047, "objective/train/tokens_used": 1740780000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2370544970035553, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488862991333008, "objective/train/weighted_lm_loss": 3.099886178970337, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9523165822029114, "theoretical_loss": 3.4743985310945047, "tokens_seen": 1720320000 }, { "epoch": 0.61, "learning_rate": 0.0001946651532349603, "loss": 3.1597, "theoretical_loss": 3.4743303489643473, "tokens_seen": 1720713216 }, { "epoch": 0.61, "learning_rate": 0.000194475974271661, "loss": 3.2761, "theoretical_loss": 3.474148627410102, "tokens_seen": 1721761792 }, { "epoch": 0.62, "learning_rate": 0.0001942867953083617, "loss": 3.1427, "theoretical_loss": 3.4739670474593742, "tokens_seen": 1722810368 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.49066078662872314, "objective/train/docs_used": 974104, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2695088386535645, "objective/train/original_loss": 3.2695088386535645, "objective/train/theoretical_loss": 3.473830955305458, "objective/train/tokens_used": 1744056800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24312639236450195, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503020286560059, "objective/train/weighted_lm_loss": 3.4335639476776123, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9519912004470825, "theoretical_loss": 3.473830955305458, "tokens_seen": 1723596800 }, { "epoch": 0.62, "learning_rate": 0.00019409761634506243, "loss": 3.1443, "theoretical_loss": 3.4737856089157355, "tokens_seen": 1723858944 }, { "epoch": 0.62, "learning_rate": 0.00019390843738176315, "loss": 3.1869, "theoretical_loss": 3.4736043115831507, "tokens_seen": 1724907520 }, { "epoch": 0.62, "learning_rate": 0.0001937192584184639, "loss": 3.1776, "theoretical_loss": 3.4734231552659747, "tokens_seen": 1725956096 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.4791083335876465, "objective/train/docs_used": 976014, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.98252534866333, "objective/train/original_loss": 2.98252534866333, "objective/train/theoretical_loss": 3.4732647590116423, "objective/train/tokens_used": 1747333600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23720155656337738, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491164922714233, "objective/train/weighted_lm_loss": 3.1297645568847656, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9517265558242798, "theoretical_loss": 3.4732647590116423, "tokens_seen": 1726873600 }, { "epoch": 0.62, "learning_rate": 0.0001935300794551646, "loss": 3.1976, "theoretical_loss": 3.473242139768953, "tokens_seen": 1727004672 }, { "epoch": 0.62, "learning_rate": 0.0001933409004918653, "loss": 3.199, "theoretical_loss": 3.4730612648972174, "tokens_seen": 1728053248 }, { "epoch": 0.62, "learning_rate": 0.00019315172152856603, "loss": 3.2134, "theoretical_loss": 3.4728805304562904, "tokens_seen": 1729101824 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.48143187165260315, "objective/train/docs_used": 978029, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.023312568664551, "objective/train/original_loss": 3.02331280708313, "objective/train/theoretical_loss": 3.472699936252079, "objective/train/tokens_used": 1750610400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23814232647418976, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493534803390503, "objective/train/weighted_lm_loss": 3.172309637069702, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9516406059265137, "theoretical_loss": 3.472699936252079, "tokens_seen": 1730150400 }, { "epoch": 0.62, "learning_rate": 0.00019296254256526675, "loss": 3.1737, "theoretical_loss": 3.472699936252079, "tokens_seen": 1730150400 }, { "epoch": 0.62, "learning_rate": 0.00019277336360196744, "loss": 3.1689, "theoretical_loss": 3.4725194820908776, "tokens_seen": 1731198976 }, { "epoch": 0.62, "learning_rate": 0.0001925841846386682, "loss": 3.1617, "theoretical_loss": 3.4723391677793627, "tokens_seen": 1732247552 }, { "epoch": 0.62, "learning_rate": 0.0001923950056753689, "loss": 3.2193, "theoretical_loss": 3.472158993124598, "tokens_seen": 1733296128 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.4837075471878052, "objective/train/docs_used": 979550, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9834237098693848, "objective/train/original_loss": 2.9834237098693848, "objective/train/theoretical_loss": 3.4721364811027735, "objective/train/tokens_used": 1753887200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23856787383556366, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495834350585938, "objective/train/weighted_lm_loss": 3.1295249462127686, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.956782877445221, "theoretical_loss": 3.4721364811027735, "tokens_seen": 1733427200 }, { "epoch": 0.62, "learning_rate": 0.00019220582671206963, "loss": 3.2026, "theoretical_loss": 3.471978957934027, "tokens_seen": 1734344704 }, { "epoch": 0.62, "learning_rate": 0.00019201664774877035, "loss": 3.223, "theoretical_loss": 3.471799062015476, "tokens_seen": 1735393280 }, { "epoch": 0.62, "learning_rate": 0.00019182746878547105, "loss": 3.2048, "theoretical_loss": 3.4716193051771524, "tokens_seen": 1736441856 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.4862057566642761, "objective/train/docs_used": 981478, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.999570608139038, "objective/train/original_loss": 2.999570608139038, "objective/train/theoretical_loss": 3.4715743876764176, "objective/train/tokens_used": 1757164000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23953025043010712, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498380661010742, "objective/train/weighted_lm_loss": 3.149038076400757, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9523438811302185, "theoretical_loss": 3.4715743876764176, "tokens_seen": 1736704000 }, { "epoch": 0.62, "learning_rate": 0.00019163828982217177, "loss": 3.2133, "theoretical_loss": 3.4714396872276425, "tokens_seen": 1737490432 }, { "epoch": 0.62, "learning_rate": 0.0001914491108588725, "loss": 3.1317, "theoretical_loss": 3.471260207975912, "tokens_seen": 1738539008 }, { "epoch": 0.62, "learning_rate": 0.00019125993189557323, "loss": 3.1341, "theoretical_loss": 3.471080867231304, "tokens_seen": 1739587584 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.47021281719207764, "objective/train/docs_used": 983277, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.848379373550415, "objective/train/original_loss": 2.848379135131836, "objective/train/theoretical_loss": 3.471013650122095, "objective/train/tokens_used": 1760440800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22874988615512848, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481839179992676, "objective/train/weighted_lm_loss": 2.9869155883789062, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9516876339912415, "theoretical_loss": 3.471013650122095, "tokens_seen": 1739980800 }, { "epoch": 0.62, "learning_rate": 0.00019107075293227393, "loss": 3.1987, "theoretical_loss": 3.470901664803538, "tokens_seen": 1740636160 }, { "epoch": 0.62, "learning_rate": 0.00019088157396897465, "loss": 3.1512, "theoretical_loss": 3.470722600502711, "tokens_seen": 1741684736 }, { "epoch": 0.62, "learning_rate": 0.00019069239500567537, "loss": 3.1581, "theoretical_loss": 3.470543674139293, "tokens_seen": 1742733312 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.4912092387676239, "objective/train/docs_used": 985168, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.71474027633667, "objective/train/original_loss": 2.714740753173828, "objective/train/theoretical_loss": 3.470454262624987, "objective/train/tokens_used": 1763717600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24285683035850525, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503555536270142, "objective/train/weighted_lm_loss": 2.8520116806030273, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9635713696479797, "theoretical_loss": 3.470454262624987, "tokens_seen": 1743257600 }, { "epoch": 0.62, "learning_rate": 0.0001905032160423761, "loss": 3.2016, "theoretical_loss": 3.4703648855241283, "tokens_seen": 1743781888 }, { "epoch": 0.62, "learning_rate": 0.00019031403707907684, "loss": 3.1749, "theoretical_loss": 3.470186234468435, "tokens_seen": 1744830464 }, { "epoch": 0.62, "learning_rate": 0.00019012485811577753, "loss": 3.1757, "theoretical_loss": 3.4700077207838023, "tokens_seen": 1745879040 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.4769324064254761, "objective/train/docs_used": 987031, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9645779132843018, "objective/train/original_loss": 2.9645776748657227, "objective/train/theoretical_loss": 3.469896219406081, "objective/train/tokens_used": 1766994400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2403060346841812, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489145517349243, "objective/train/weighted_lm_loss": 3.1101176738739014, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9535593390464783, "theoretical_loss": 3.469896219406081, "tokens_seen": 1746534400 }, { "epoch": 0.62, "learning_rate": 0.00018993567915247825, "loss": 3.2297, "theoretical_loss": 3.4698293442821915, "tokens_seen": 1746927616 }, { "epoch": 0.62, "learning_rate": 0.00018974650018917897, "loss": 3.1923, "theoretical_loss": 3.4696511047759317, "tokens_seen": 1747976192 }, { "epoch": 0.62, "learning_rate": 0.0001895573212258797, "loss": 3.2484, "theoretical_loss": 3.4694730020777245, "tokens_seen": 1749024768 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.48558348417282104, "objective/train/docs_used": 989075, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1397294998168945, "objective/train/original_loss": 3.1397294998168945, "objective/train/theoretical_loss": 3.4693395147218875, "objective/train/tokens_used": 1770271200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24000008404254913, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497783422470093, "objective/train/weighted_lm_loss": 3.2957050800323486, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9543235301971436, "theoretical_loss": 3.4693395147218875, "tokens_seen": 1749811200 }, { "epoch": 0.63, "learning_rate": 0.00018936814226258039, "loss": 3.1974, "theoretical_loss": 3.4692950360006365, "tokens_seen": 1750073344 }, { "epoch": 0.63, "learning_rate": 0.00018917896329928113, "loss": 3.2439, "theoretical_loss": 3.469117206358103, "tokens_seen": 1751121920 }, { "epoch": 0.63, "learning_rate": 0.00018898978433598185, "loss": 3.2455, "theoretical_loss": 3.4689395129639253, "tokens_seen": 1752170496 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.4787355363368988, "objective/train/docs_used": 991265, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.098680257797241, "objective/train/original_loss": 3.098680019378662, "objective/train/theoretical_loss": 3.4687841428641515, "objective/train/tokens_used": 1773548000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24327175319194794, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491098165512085, "objective/train/weighted_lm_loss": 3.2512500286102295, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.951521098613739, "theoretical_loss": 3.4687841428641515, "tokens_seen": 1753088000 }, { "epoch": 0.63, "learning_rate": 0.00018880060537268257, "loss": 3.1711, "theoretical_loss": 3.468761955632271, "tokens_seen": 1753219072 }, { "epoch": 0.63, "learning_rate": 0.00018861142640938327, "loss": 3.2173, "theoretical_loss": 3.4685845341776704, "tokens_seen": 1754267648 }, { "epoch": 0.63, "learning_rate": 0.000188422247446084, "loss": 3.1868, "theoretical_loss": 3.468407248415019, "tokens_seen": 1755316224 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.4866624176502228, "objective/train/docs_used": 993214, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2612428665161133, "objective/train/original_loss": 3.2612428665161133, "objective/train/theoretical_loss": 3.468230098159573, "objective/train/tokens_used": 1776824800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2405889928340912, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498892068862915, "objective/train/weighted_lm_loss": 3.423818826675415, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9539464116096497, "theoretical_loss": 3.468230098159573, "tokens_seen": 1756364800 }, { "epoch": 0.63, "learning_rate": 0.0001882330684827847, "loss": 3.1879, "theoretical_loss": 3.468230098159573, "tokens_seen": 1756364800 }, { "epoch": 0.63, "learning_rate": 0.00018804388951948546, "loss": 3.1916, "theoretical_loss": 3.468053083226952, "tokens_seen": 1757413376 }, { "epoch": 0.63, "learning_rate": 0.00018785471055618618, "loss": 3.169, "theoretical_loss": 3.4678762034331347, "tokens_seen": 1758461952 }, { "epoch": 0.63, "learning_rate": 0.00018766553159288687, "loss": 3.2222, "theoretical_loss": 3.4676994585944616, "tokens_seen": 1759510528 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.48814916610717773, "objective/train/docs_used": 995165, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.884378433227539, "objective/train/original_loss": 2.884378433227539, "objective/train/theoretical_loss": 3.4676773749695275, "objective/train/tokens_used": 1780101600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24147918820381165, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500423908233643, "objective/train/weighted_lm_loss": 3.029207706451416, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9532400369644165, "theoretical_loss": 3.4676773749695275, "tokens_seen": 1759641600 }, { "epoch": 0.63, "learning_rate": 0.0001874763526295876, "loss": 3.1404, "theoretical_loss": 3.4675228485276297, "tokens_seen": 1760559104 }, { "epoch": 0.63, "learning_rate": 0.0001872871736662883, "loss": 3.2087, "theoretical_loss": 3.467346373049696, "tokens_seen": 1761607680 }, { "epoch": 0.63, "learning_rate": 0.00018709799470298903, "loss": 3.1406, "theoretical_loss": 3.467170031978074, "tokens_seen": 1762656256 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.4851972758769989, "objective/train/docs_used": 997036, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.950261116027832, "objective/train/original_loss": 2.950261116027832, "objective/train/theoretical_loss": 3.4671259676897908, "objective/train/tokens_used": 1783378400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23831064999103546, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497310161590576, "objective/train/weighted_lm_loss": 3.0977723598480225, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9909802675247192, "theoretical_loss": 3.4671259676897908, "tokens_seen": 1762918400 }, { "epoch": 0.63, "learning_rate": 0.00018690881573968975, "loss": 3.1547, "theoretical_loss": 3.4669938251305314, "tokens_seen": 1763704832 }, { "epoch": 0.63, "learning_rate": 0.00018671963677639047, "loss": 3.1313, "theoretical_loss": 3.4668177523251944, "tokens_seen": 1764753408 }, { "epoch": 0.63, "learning_rate": 0.0001865304578130912, "loss": 3.1446, "theoretical_loss": 3.466641813380541, "tokens_seen": 1765801984 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.47915053367614746, "objective/train/docs_used": 998803, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.053201198577881, "objective/train/original_loss": 3.0532007217407227, "objective/train/theoretical_loss": 3.4665758707502654, "objective/train/tokens_used": 1786655200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23946774005889893, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491321086883545, "objective/train/weighted_lm_loss": 3.2024171352386475, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9516938328742981, "theoretical_loss": 3.4665758707502654, "tokens_seen": 1766195200 }, { "epoch": 0.63, "learning_rate": 0.00018634127884979191, "loss": 3.1561, "theoretical_loss": 3.466466008115404, "tokens_seen": 1766850560 }, { "epoch": 0.63, "learning_rate": 0.0001861520998864926, "loss": 3.1794, "theoretical_loss": 3.4662903363489677, "tokens_seen": 1767899136 }, { "epoch": 0.63, "learning_rate": 0.00018596292092319333, "loss": 3.1129, "theoretical_loss": 3.4661147979007687, "tokens_seen": 1768947712 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.4781850278377533, "objective/train/docs_used": 999986, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.99733304977417, "objective/train/original_loss": 2.99733304977417, "objective/train/theoretical_loss": 3.466027078614709, "objective/train/tokens_used": 1789932000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2370835840702057, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490233898162842, "objective/train/weighted_lm_loss": 3.1454968452453613, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9598371982574463, "theoretical_loss": 3.466027078614709, "tokens_seen": 1769472000 }, { "epoch": 0.63, "learning_rate": 0.00018577374195989408, "loss": 3.1839, "theoretical_loss": 3.4659393925906943, "tokens_seen": 1769996288 }, { "epoch": 0.63, "learning_rate": 0.0001855845629965948, "loss": 3.1999, "theoretical_loss": 3.4657641202389815, "tokens_seen": 1771044864 }, { "epoch": 0.63, "learning_rate": 0.00018539538403329552, "loss": 3.177, "theoretical_loss": 3.465588980666216, "tokens_seen": 1772093440 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.4717888832092285, "objective/train/docs_used": 1001522, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.009805917739868, "objective/train/original_loss": 3.0098061561584473, "objective/train/theoretical_loss": 3.465479585780467, "objective/train/tokens_used": 1793208800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2324257791042328, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0483598709106445, "objective/train/weighted_lm_loss": 3.155412435531616, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9513979554176331, "theoretical_loss": 3.465479585780467, "tokens_seen": 1772748800 }, { "epoch": 0.63, "learning_rate": 0.0001852062050699962, "loss": 3.1154, "theoretical_loss": 3.4654139736933325, "tokens_seen": 1773142016 }, { "epoch": 0.63, "learning_rate": 0.00018501702610669693, "loss": 3.1981, "theoretical_loss": 3.4652390991416118, "tokens_seen": 1774190592 }, { "epoch": 0.63, "learning_rate": 0.00018482784714339765, "loss": 3.1889, "theoretical_loss": 3.4650643568326815, "tokens_seen": 1775239168 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.4764856994152069, "objective/train/docs_used": 1003498, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7835757732391357, "objective/train/original_loss": 2.7835755348205566, "objective/train/theoretical_loss": 3.4649333867782075, "objective/train/tokens_used": 1796485600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2330160140991211, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048832893371582, "objective/train/weighted_lm_loss": 2.920579671859741, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9544862508773804, "theoretical_loss": 3.4649333867782075, "tokens_seen": 1776025600 }, { "epoch": 0.63, "learning_rate": 0.0001846386681800984, "loss": 3.1457, "theoretical_loss": 3.464889746588515, "tokens_seen": 1776287744 }, { "epoch": 0.63, "learning_rate": 0.0001844494892167991, "loss": 3.1293, "theoretical_loss": 3.464715268231429, "tokens_seen": 1777336320 }, { "epoch": 0.64, "learning_rate": 0.0001842603102534998, "loss": 3.1338, "theoretical_loss": 3.464540921584086, "tokens_seen": 1778384896 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.4912343919277191, "objective/train/docs_used": 1005457, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6683099269866943, "objective/train/original_loss": 2.6683099269866943, "objective/train/theoretical_loss": 3.464388476171658, "objective/train/tokens_used": 1799762400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24345652759075165, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503610372543335, "objective/train/weighted_lm_loss": 2.8025920391082764, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9594215154647827, "theoretical_loss": 3.464388476171658, "tokens_seen": 1779302400 }, { "epoch": 0.64, "learning_rate": 0.00018407113129020053, "loss": 3.1607, "theoretical_loss": 3.464366706469489, "tokens_seen": 1779433472 }, { "epoch": 0.64, "learning_rate": 0.00018388195232690125, "loss": 3.0636, "theoretical_loss": 3.4641926227109856, "tokens_seen": 1780482048 }, { "epoch": 0.64, "learning_rate": 0.00018369277336360197, "loss": 3.0864, "theoretical_loss": 3.464018670132262, "tokens_seen": 1781530624 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.47258418798446655, "objective/train/docs_used": 1007648, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7945456504821777, "objective/train/original_loss": 2.7945456504821777, "objective/train/theoretical_loss": 3.463844848557345, "objective/train/tokens_used": 1803039200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23542162775993347, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484546422958374, "objective/train/weighted_lm_loss": 2.9284746646881104, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9513460397720337, "theoretical_loss": 3.463844848557345, "tokens_seen": 1782579200 }, { "epoch": 0.64, "learning_rate": 0.0001835035944003027, "loss": 3.1216, "theoretical_loss": 3.463844848557345, "tokens_seen": 1782579200 }, { "epoch": 0.64, "learning_rate": 0.00018331441543700342, "loss": 3.1505, "theoretical_loss": 3.4636711578106034, "tokens_seen": 1783627776 }, { "epoch": 0.64, "learning_rate": 0.00018312523647370414, "loss": 3.0778, "theoretical_loss": 3.4634975977167413, "tokens_seen": 1784676352 }, { "epoch": 0.64, "learning_rate": 0.00018293605751040486, "loss": 3.1852, "theoretical_loss": 3.463324168100802, "tokens_seen": 1785724928 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.4785902500152588, "objective/train/docs_used": 1009512, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8947219848632812, "objective/train/original_loss": 2.8947219848632812, "objective/train/theoretical_loss": 3.463302498564338, "objective/train/tokens_used": 1806316000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23424488306045532, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490496158599854, "objective/train/weighted_lm_loss": 3.0360755920410156, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9536494016647339, "theoretical_loss": 3.463302498564338, "tokens_seen": 1785856000 }, { "epoch": 0.64, "learning_rate": 0.00018274687854710555, "loss": 3.1466, "theoretical_loss": 3.463150868788165, "tokens_seen": 1786773504 }, { "epoch": 0.64, "learning_rate": 0.00018255769958380627, "loss": 3.1598, "theoretical_loss": 3.4629776996045476, "tokens_seen": 1787822080 }, { "epoch": 0.64, "learning_rate": 0.00018236852062050702, "loss": 3.1194, "theoretical_loss": 3.462804660376, "tokens_seen": 1788870656 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.48804527521133423, "objective/train/docs_used": 1011320, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.961782217025757, "objective/train/original_loss": 2.961782932281494, "objective/train/theoretical_loss": 3.46276142085399, "objective/train/tokens_used": 1809592800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2429705113172531, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050039529800415, "objective/train/weighted_lm_loss": 3.109548807144165, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9592433571815491, "theoretical_loss": 3.46276142085399, "tokens_seen": 1789132800 }, { "epoch": 0.64, "learning_rate": 0.00018217934165720774, "loss": 3.1391, "theoretical_loss": 3.4626317509289075, "tokens_seen": 1789919232 }, { "epoch": 0.64, "learning_rate": 0.00018199016269390843, "loss": 3.1193, "theoretical_loss": 3.462458971089989, "tokens_seen": 1790967808 }, { "epoch": 0.64, "learning_rate": 0.00018180098373060915, "loss": 3.1471, "theoretical_loss": 3.462286320686297, "tokens_seen": 1792016384 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.4847014844417572, "objective/train/docs_used": 1013493, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9981818199157715, "objective/train/original_loss": 2.9981818199157715, "objective/train/theoretical_loss": 3.4622216101196894, "objective/train/tokens_used": 1812869600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2426302582025528, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497032403945923, "objective/train/weighted_lm_loss": 3.147491455078125, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.951403021812439, "theoretical_loss": 3.4622216101196894, "tokens_seen": 1792409600 }, { "epoch": 0.64, "learning_rate": 0.00018161180476730987, "loss": 3.1424, "theoretical_loss": 3.4621137995452136, "tokens_seen": 1793064960 }, { "epoch": 0.64, "learning_rate": 0.0001814226258040106, "loss": 3.0922, "theoretical_loss": 3.4619414074944537, "tokens_seen": 1794113536 }, { "epoch": 0.64, "learning_rate": 0.00018123344684071134, "loss": 3.1283, "theoretical_loss": 3.4617691443620617, "tokens_seen": 1795162112 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.4758584201335907, "objective/train/docs_used": 1015450, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7228169441223145, "objective/train/original_loss": 2.7228169441223145, "objective/train/theoretical_loss": 3.4616830610866076, "objective/train/tokens_used": 1816146400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23320035636425018, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487710237503052, "objective/train/weighted_lm_loss": 2.856950283050537, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9522925615310669, "theoretical_loss": 3.4616830610866076, "tokens_seen": 1795686400 }, { "epoch": 0.64, "learning_rate": 0.00018104426787741203, "loss": 3.1451, "theoretical_loss": 3.4615970099764115, "tokens_seen": 1796210688 }, { "epoch": 0.64, "learning_rate": 0.00018085508891411276, "loss": 3.1009, "theoretical_loss": 3.4614250041662054, "tokens_seen": 1797259264 }, { "epoch": 0.64, "learning_rate": 0.00018066590995081348, "loss": 3.0992, "theoretical_loss": 3.4612531267604734, "tokens_seen": 1798307840 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.480893075466156, "objective/train/docs_used": 1017085, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.92893385887146, "objective/train/original_loss": 2.928933620452881, "objective/train/theoretical_loss": 3.461145768511451, "objective/train/tokens_used": 1819423200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2371511161327362, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492945909500122, "objective/train/weighted_lm_loss": 3.072615623474121, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9612343907356262, "theoretical_loss": 3.461145768511451, "tokens_seen": 1798963200 }, { "epoch": 0.64, "learning_rate": 0.0001804767309875142, "loss": 3.131, "theoretical_loss": 3.461081377588572, "tokens_seen": 1799356416 }, { "epoch": 0.64, "learning_rate": 0.0001802875520242149, "loss": 3.0904, "theoretical_loss": 3.4609097564801843, "tokens_seen": 1800404992 }, { "epoch": 0.64, "learning_rate": 0.00018009837306091564, "loss": 3.0347, "theoretical_loss": 3.460738263265319, "tokens_seen": 1801453568 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.4862983524799347, "objective/train/docs_used": 1018281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.062450885772705, "objective/train/original_loss": 3.062450647354126, "objective/train/theoretical_loss": 3.4606097271822156, "objective/train/tokens_used": 1822700000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24067695438861847, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498532056808472, "objective/train/weighted_lm_loss": 3.2150371074676514, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9518213272094727, "theoretical_loss": 3.4606097271822156, "tokens_seen": 1802240000 }, { "epoch": 0.64, "learning_rate": 0.00017990919409761636, "loss": 3.1488, "theoretical_loss": 3.4605668977743074, "tokens_seen": 1802502144 }, { "epoch": 0.64, "learning_rate": 0.00017972001513431708, "loss": 3.0774, "theoretical_loss": 3.4603956598378067, "tokens_seen": 1803550720 }, { "epoch": 0.64, "learning_rate": 0.0001795308361710178, "loss": 3.0499, "theoretical_loss": 3.4602245492867962, "tokens_seen": 1804599296 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.49369117617607117, "objective/train/docs_used": 1020317, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0229692459106445, "objective/train/original_loss": 3.0229697227478027, "objective/train/theoretical_loss": 3.4600749319179434, "objective/train/tokens_used": 1825976800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24504995346069336, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506149530410767, "objective/train/weighted_lm_loss": 3.1760122776031494, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9538986086845398, "theoretical_loss": 3.4600749319179434, "tokens_seen": 1805516800 }, { "epoch": 0.64, "learning_rate": 0.0001793416572077185, "loss": 3.1218, "theoretical_loss": 3.4600535659525757, "tokens_seen": 1805647872 }, { "epoch": 0.65, "learning_rate": 0.0001791524782444192, "loss": 3.1079, "theoretical_loss": 3.4598827096667684, "tokens_seen": 1806696448 }, { "epoch": 0.65, "learning_rate": 0.00017896329928111996, "loss": 3.0629, "theoretical_loss": 3.459711980261316, "tokens_seen": 1807745024 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.47898003458976746, "objective/train/docs_used": 1022235, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0741958618164062, "objective/train/original_loss": 3.074195384979248, "objective/train/theoretical_loss": 3.459541377568482, "objective/train/tokens_used": 1829253600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23444890975952148, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490895509719849, "objective/train/weighted_lm_loss": 3.2251052856445312, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9592746496200562, "theoretical_loss": 3.459541377568482, "tokens_seen": 1808793600 }, { "epoch": 0.65, "learning_rate": 0.00017877412031782068, "loss": 3.043, "theoretical_loss": 3.459541377568482, "tokens_seen": 1808793600 }, { "epoch": 0.65, "learning_rate": 0.00017858494135452138, "loss": 3.0361, "theoretical_loss": 3.4593709014208462, "tokens_seen": 1809842176 }, { "epoch": 0.65, "learning_rate": 0.0001783957623912221, "loss": 3.0718, "theoretical_loss": 3.4592005516513087, "tokens_seen": 1810890752 }, { "epoch": 0.65, "learning_rate": 0.00017820658342792282, "loss": 3.0793, "theoretical_loss": 3.4590303280930854, "tokens_seen": 1811939328 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.47567683458328247, "objective/train/docs_used": 1024230, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0362322330474854, "objective/train/original_loss": 3.0362324714660645, "objective/train/theoretical_loss": 3.4590090590142464, "objective/train/tokens_used": 1832530400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23685020208358765, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048771619796753, "objective/train/weighted_lm_loss": 3.1860275268554688, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9578182697296143, "theoretical_loss": 3.4590090590142464, "tokens_seen": 1812070400 }, { "epoch": 0.65, "learning_rate": 0.00017801740446462354, "loss": 3.0886, "theoretical_loss": 3.4588602305797096, "tokens_seen": 1812987904 }, { "epoch": 0.65, "learning_rate": 0.00017782822550132426, "loss": 3.0809, "theoretical_loss": 3.458690258945029, "tokens_seen": 1814036480 }, { "epoch": 0.65, "learning_rate": 0.00017763904653802498, "loss": 3.1236, "theoretical_loss": 3.458520413023207, "tokens_seen": 1815085056 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.4894339442253113, "objective/train/docs_used": 1026340, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.314352512359619, "objective/train/original_loss": 3.31435227394104, "objective/train/theoretical_loss": 3.4584779711659817, "objective/train/tokens_used": 1835807200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24342581629753113, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501807928085327, "objective/train/weighted_lm_loss": 3.4804110527038574, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9533761739730835, "theoretical_loss": 3.4584779711659817, "tokens_seen": 1815347200 }, { "epoch": 0.65, "learning_rate": 0.0001774498675747257, "loss": 3.1066, "theoretical_loss": 3.458350692648722, "tokens_seen": 1816133632 }, { "epoch": 0.65, "learning_rate": 0.00017726068861142642, "loss": 3.0877, "theoretical_loss": 3.4581810976563645, "tokens_seen": 1817182208 }, { "epoch": 0.65, "learning_rate": 0.00017707150964812714, "loss": 3.1435, "theoretical_loss": 3.4580116278812376, "tokens_seen": 1818230784 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.49165356159210205, "objective/train/docs_used": 1027487, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.3668980598449707, "objective/train/original_loss": 2.3668980598449707, "objective/train/theoretical_loss": 3.4579481089645308, "objective/train/tokens_used": 1839084000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24341939389705658, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504027605056763, "objective/train/weighted_lm_loss": 2.486278533935547, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9554612040519714, "theoretical_loss": 3.4579481089645308, "tokens_seen": 1818624000 }, { "epoch": 0.65, "learning_rate": 0.00017688233068482783, "loss": 3.155, "theoretical_loss": 3.457842283158757, "tokens_seen": 1819279360 }, { "epoch": 0.65, "learning_rate": 0.00017669315172152858, "loss": 3.179, "theoretical_loss": 3.457673063324649, "tokens_seen": 1820327936 }, { "epoch": 0.65, "learning_rate": 0.0001765039727582293, "loss": 3.1094, "theoretical_loss": 3.4575039682149495, "tokens_seen": 1821376512 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.4697323441505432, "objective/train/docs_used": 1029511, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7002482414245605, "objective/train/original_loss": 2.7002482414245605, "objective/train/theoretical_loss": 3.457419467380599, "objective/train/tokens_used": 1842360800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23787598311901093, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481818914413452, "objective/train/weighted_lm_loss": 2.829710006713867, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9514086842536926, "theoretical_loss": 3.457419467380599, "tokens_seen": 1821900800 }, { "epoch": 0.65, "learning_rate": 0.00017631479379493002, "loss": 3.2005, "theoretical_loss": 3.4573349976660053, "tokens_seen": 1822425088 }, { "epoch": 0.65, "learning_rate": 0.00017612561483163072, "loss": 3.0605, "theoretical_loss": 3.4571661515144703, "tokens_seen": 1823473664 }, { "epoch": 0.65, "learning_rate": 0.00017593643586833144, "loss": 3.1137, "theoretical_loss": 3.4569974295973083, "tokens_seen": 1824522240 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.48513007164001465, "objective/train/docs_used": 1031319, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.138819456100464, "objective/train/original_loss": 3.1388192176818848, "objective/train/theoretical_loss": 3.4568920414145294, "objective/train/tokens_used": 1845637600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2404634803533554, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049735188484192, "objective/train/weighted_lm_loss": 3.294522285461426, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9532184600830078, "theoretical_loss": 3.4568920414145294, "tokens_seen": 1825177600 }, { "epoch": 0.65, "learning_rate": 0.00017574725690503216, "loss": 3.1527, "theoretical_loss": 3.456828831751788, "tokens_seen": 1825570816 }, { "epoch": 0.65, "learning_rate": 0.0001755580779417329, "loss": 3.0631, "theoretical_loss": 3.4566603578154877, "tokens_seen": 1826619392 }, { "epoch": 0.65, "learning_rate": 0.00017536889897843362, "loss": 3.1108, "theoretical_loss": 3.456492007626288, "tokens_seen": 1827667968 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.48329445719718933, "objective/train/docs_used": 1033212, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.762449026107788, "objective/train/original_loss": 2.762449264526367, "objective/train/theoretical_loss": 3.4563658260960706, "objective/train/tokens_used": 1848914400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23659563064575195, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495319366455078, "objective/train/weighted_lm_loss": 2.900442123413086, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9571607708930969, "theoretical_loss": 3.4563658260960706, "tokens_seen": 1828454400 }, { "epoch": 0.65, "learning_rate": 0.00017517972001513432, "loss": 3.174, "theoretical_loss": 3.456323781022376, "tokens_seen": 1828716544 }, { "epoch": 0.65, "learning_rate": 0.00017499054105183504, "loss": 3.0913, "theoretical_loss": 3.456155677842244, "tokens_seen": 1829765120 }, { "epoch": 0.65, "learning_rate": 0.00017480136208853576, "loss": 3.086, "theoretical_loss": 3.455987697924686, "tokens_seen": 1830813696 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.49024006724357605, "objective/train/docs_used": 1034888, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1704938411712646, "objective/train/original_loss": 3.1704936027526855, "objective/train/theoretical_loss": 3.455840816484155, "objective/train/tokens_used": 1852191200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24271473288536072, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050257921218872, "objective/train/weighted_lm_loss": 3.3301610946655273, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9584827423095703, "theoretical_loss": 3.455840816484155, "tokens_seen": 1831731200 }, { "epoch": 0.65, "learning_rate": 0.00017461218312523648, "loss": 3.114, "theoretical_loss": 3.4558198411088004, "tokens_seen": 1831862272 }, { "epoch": 0.65, "learning_rate": 0.0001744230041619372, "loss": 3.151, "theoretical_loss": 3.4556521072339854, "tokens_seen": 1832910848 }, { "epoch": 0.66, "learning_rate": 0.00017423382519863792, "loss": 3.1021, "theoretical_loss": 3.455484496139943, "tokens_seen": 1833959424 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.48320844769477844, "objective/train/docs_used": 1036847, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1283445358276367, "objective/train/original_loss": 3.1283445358276367, "objective/train/theoretical_loss": 3.4553170076666744, "objective/train/tokens_used": 1855468000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24146130681037903, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495481491088867, "objective/train/weighted_lm_loss": 3.282777786254883, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9517518877983093, "theoretical_loss": 3.4553170076666744, "tokens_seen": 1835008000 }, { "epoch": 0.66, "learning_rate": 0.00017404464623533864, "loss": 3.0562, "theoretical_loss": 3.4553170076666744, "tokens_seen": 1835008000 }, { "epoch": 0.66, "learning_rate": 0.00017385546727203936, "loss": 3.0563, "theoretical_loss": 3.4551496416544794, "tokens_seen": 1836056576 }, { "epoch": 0.66, "learning_rate": 0.00017366628830874006, "loss": 3.1021, "theoretical_loss": 3.4549823979439593, "tokens_seen": 1837105152 }, { "epoch": 0.66, "learning_rate": 0.00017347710934544078, "loss": 3.1223, "theoretical_loss": 3.454815276376012, "tokens_seen": 1838153728 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.4860526919364929, "objective/train/docs_used": 1038918, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.040283679962158, "objective/train/original_loss": 3.040283679962158, "objective/train/theoretical_loss": 3.454794394760256, "objective/train/tokens_used": 1858744800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2394542098045349, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049822449684143, "objective/train/weighted_lm_loss": 3.1913719177246094, "objective/train/weights_max": 1.0512157678604126, "objective/train/weights_min": 0.9641718864440918, "theoretical_loss": 3.454794394760256, "tokens_seen": 1838284800 }, { "epoch": 0.66, "learning_rate": 0.00017328793038214152, "loss": 3.1416, "theoretical_loss": 3.4546482767918336, "tokens_seen": 1839202304 }, { "epoch": 0.66, "learning_rate": 0.00017309875141884224, "loss": 3.1489, "theoretical_loss": 3.4544813990329173, "tokens_seen": 1840250880 }, { "epoch": 0.66, "learning_rate": 0.00017290957245554296, "loss": 3.0767, "theoretical_loss": 3.4543146429410516, "tokens_seen": 1841299456 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.4855334758758545, "objective/train/docs_used": 1041004, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6250319480895996, "objective/train/original_loss": 2.6250319480895996, "objective/train/theoretical_loss": 3.4542729729100494, "objective/train/tokens_used": 1862021600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2389741688966751, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497679710388184, "objective/train/weighted_lm_loss": 2.7558908462524414, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9570268988609314, "theoretical_loss": 3.4542729729100494, "tokens_seen": 1841561600 }, { "epoch": 0.66, "learning_rate": 0.00017272039349224366, "loss": 3.0565, "theoretical_loss": 3.4541480083583203, "tokens_seen": 1842348032 }, { "epoch": 0.66, "learning_rate": 0.00017253121452894438, "loss": 3.1441, "theoretical_loss": 3.453981495127104, "tokens_seen": 1843396608 }, { "epoch": 0.66, "learning_rate": 0.0001723420355656451, "loss": 3.1785, "theoretical_loss": 3.453815103090075, "tokens_seen": 1844445184 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.4911976158618927, "objective/train/docs_used": 1042471, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1483094692230225, "objective/train/original_loss": 3.1483097076416016, "objective/train/theoretical_loss": 3.4537527372895047, "objective/train/tokens_used": 1865298400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24456915259361267, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503631830215454, "objective/train/weighted_lm_loss": 3.3062546253204346, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9537386298179626, "theoretical_loss": 3.4537527372895047, "tokens_seen": 1844838400 }, { "epoch": 0.66, "learning_rate": 0.00017215285660234585, "loss": 3.1755, "theoretical_loss": 3.453648832090199, "tokens_seen": 1845493760 }, { "epoch": 0.66, "learning_rate": 0.00017196367763904654, "loss": 3.1402, "theoretical_loss": 3.453482681970735, "tokens_seen": 1846542336 }, { "epoch": 0.66, "learning_rate": 0.00017177449867574726, "loss": 3.1996, "theoretical_loss": 3.453316652575235, "tokens_seen": 1847590912 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.4777521789073944, "objective/train/docs_used": 1043966, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8465492725372314, "objective/train/original_loss": 2.8465495109558105, "objective/train/theoretical_loss": 3.45323368310016, "objective/train/tokens_used": 1868575200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2377437949180603, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489834547042847, "objective/train/weighted_lm_loss": 2.9855000972747803, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.95146644115448, "theoretical_loss": 3.45323368310016, "tokens_seen": 1848115200 }, { "epoch": 0.66, "learning_rate": 0.00017158531971244798, "loss": 3.1897, "theoretical_loss": 3.453150743747539, "tokens_seen": 1848639488 }, { "epoch": 0.66, "learning_rate": 0.0001713961407491487, "loss": 3.1487, "theoretical_loss": 3.4529849553317806, "tokens_seen": 1849688064 }, { "epoch": 0.66, "learning_rate": 0.0001712069617858494, "loss": 3.1324, "theoretical_loss": 3.4528192871723813, "tokens_seen": 1850736640 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.49022161960601807, "objective/train/docs_used": 1045702, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6801793575286865, "objective/train/original_loss": 2.6801795959472656, "objective/train/theoretical_loss": 3.452715805571427, "objective/train/tokens_used": 1871852000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24245530366897583, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502547025680542, "objective/train/weighted_lm_loss": 2.814542293548584, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9725965261459351, "theoretical_loss": 3.452715805571427, "tokens_seen": 1851392000 }, { "epoch": 0.66, "learning_rate": 0.00017101778282255014, "loss": 3.1696, "theoretical_loss": 3.4526537391140524, "tokens_seen": 1851785216 }, { "epoch": 0.66, "learning_rate": 0.00017082860385925086, "loss": 3.226, "theoretical_loss": 3.452488311001792, "tokens_seen": 1852833792 }, { "epoch": 0.66, "learning_rate": 0.00017063942489595158, "loss": 3.1755, "theoretical_loss": 3.452323002680888, "tokens_seen": 1853882368 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.4886515140533447, "objective/train/docs_used": 1047742, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.119795083999634, "objective/train/original_loss": 3.119795322418213, "objective/train/theoretical_loss": 3.4521990999603807, "objective/train/tokens_used": 1875128800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24155665934085846, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500929355621338, "objective/train/weighted_lm_loss": 3.276261806488037, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9518893957138062, "theoretical_loss": 3.4521990999603807, "tokens_seen": 1854668800 }, { "epoch": 0.66, "learning_rate": 0.0001704502459326523, "loss": 3.1549, "theoretical_loss": 3.452157813996915, "tokens_seen": 1854930944 }, { "epoch": 0.66, "learning_rate": 0.000170261066969353, "loss": 3.1942, "theoretical_loss": 3.4519927447957315, "tokens_seen": 1855979520 }, { "epoch": 0.66, "learning_rate": 0.00017007188800605372, "loss": 3.2162, "theoretical_loss": 3.4518277949234837, "tokens_seen": 1857028096 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.4771397113800049, "objective/train/docs_used": 1049474, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.327524185180664, "objective/train/original_loss": 3.327524185180664, "objective/train/theoretical_loss": 3.451683561551551, "objective/train/tokens_used": 1878405600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2344101220369339, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048905372619629, "objective/train/weighted_lm_loss": 3.490372896194458, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9525474309921265, "theoretical_loss": 3.451683561551551, "tokens_seen": 1857945600 }, { "epoch": 0.66, "learning_rate": 0.00016988270904275447, "loss": 3.1831, "theoretical_loss": 3.451662964226602, "tokens_seen": 1858076672 }, { "epoch": 0.66, "learning_rate": 0.0001696935300794552, "loss": 3.249, "theoretical_loss": 3.4514982525518008, "tokens_seen": 1859125248 }, { "epoch": 0.66, "learning_rate": 0.00016950435111615588, "loss": 3.2283, "theoretical_loss": 3.451333659746079, "tokens_seen": 1860173824 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.48431384563446045, "objective/train/docs_used": 1051410, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.301570177078247, "objective/train/original_loss": 3.301570177078247, "objective/train/theoretical_loss": 3.451169185656717, "objective/train/tokens_used": 1881682400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23943281173706055, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049648404121399, "objective/train/weighted_lm_loss": 3.4656476974487305, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9544273614883423, "theoretical_loss": 3.451169185656717, "tokens_seen": 1861222400 }, { "epoch": 0.66, "learning_rate": 0.0001693151721528566, "loss": 3.2445, "theoretical_loss": 3.451169185656717, "tokens_seen": 1861222400 }, { "epoch": 0.67, "learning_rate": 0.00016912599318955732, "loss": 3.0725, "theoretical_loss": 3.451004830131277, "tokens_seen": 1862270976 }, { "epoch": 0.67, "learning_rate": 0.00016893681422625804, "loss": 3.2268, "theoretical_loss": 3.450840593017604, "tokens_seen": 1863319552 }, { "epoch": 0.67, "learning_rate": 0.0001687476352629588, "loss": 3.2105, "theoretical_loss": 3.4506764741638234, "tokens_seen": 1864368128 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.49058830738067627, "objective/train/docs_used": 1053575, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6096420288085938, "objective/train/original_loss": 2.6096420288085938, "objective/train/theoretical_loss": 3.450655967614696, "objective/train/tokens_used": 1884959200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24194616079330444, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502886772155762, "objective/train/weighted_lm_loss": 2.7413790225982666, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9557876586914062, "theoretical_loss": 3.450655967614696, "tokens_seen": 1864499200 }, { "epoch": 0.67, "learning_rate": 0.00016855845629965948, "loss": 3.1053, "theoretical_loss": 3.45051247341834, "tokens_seen": 1865416704 }, { "epoch": 0.67, "learning_rate": 0.0001683692773363602, "loss": 3.1242, "theoretical_loss": 3.4503485906298383, "tokens_seen": 1866465280 }, { "epoch": 0.67, "learning_rate": 0.00016818009837306092, "loss": 3.1477, "theoretical_loss": 3.450184825647282, "tokens_seen": 1867513856 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.4847777783870697, "objective/train/docs_used": 1055519, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1753129959106445, "objective/train/original_loss": 3.1753129959106445, "objective/train/theoretical_loss": 3.4501439027911487, "objective/train/tokens_used": 1888236000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24292968213558197, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497125387191772, "objective/train/weighted_lm_loss": 3.332350730895996, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9515226483345032, "theoretical_loss": 3.4501439027911487, "tokens_seen": 1867776000 }, { "epoch": 0.67, "learning_rate": 0.00016799091940976164, "loss": 3.1434, "theoretical_loss": 3.450021178319912, "tokens_seen": 1868562432 }, { "epoch": 0.67, "learning_rate": 0.00016780174044646234, "loss": 3.1115, "theoretical_loss": 3.4498576484972476, "tokens_seen": 1869611008 }, { "epoch": 0.67, "learning_rate": 0.00016761256148316309, "loss": 3.0889, "theoretical_loss": 3.4496942360290843, "tokens_seen": 1870659584 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.4783223867416382, "objective/train/docs_used": 1057572, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.028226375579834, "objective/train/original_loss": 3.028226137161255, "objective/train/theoretical_loss": 3.4496329865783713, "objective/train/tokens_used": 1891512800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23674528300762177, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490355491638184, "objective/train/weighted_lm_loss": 3.176835060119629, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.951676070690155, "theoretical_loss": 3.4496329865783713, "tokens_seen": 1871052800 }, { "epoch": 0.67, "learning_rate": 0.0001674233825198638, "loss": 3.1617, "theoretical_loss": 3.4495309407654937, "tokens_seen": 1871708160 }, { "epoch": 0.67, "learning_rate": 0.00016723420355656453, "loss": 3.1119, "theoretical_loss": 3.449367762556822, "tokens_seen": 1872756736 }, { "epoch": 0.67, "learning_rate": 0.00016704502459326522, "loss": 3.0936, "theoretical_loss": 3.449204701253692, "tokens_seen": 1873805312 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.4878344237804413, "objective/train/docs_used": 1059653, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6503984928131104, "objective/train/original_loss": 2.6503987312316895, "objective/train/theoretical_loss": 3.449123214395099, "objective/train/tokens_used": 1894789600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24199466407299042, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500136613845825, "objective/train/weighted_lm_loss": 2.782848358154297, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9590559005737305, "theoretical_loss": 3.449123214395099, "tokens_seen": 1874329600 }, { "epoch": 0.67, "learning_rate": 0.00016685584562996594, "loss": 3.1029, "theoretical_loss": 3.4490417567069986, "tokens_seen": 1874853888 }, { "epoch": 0.67, "learning_rate": 0.00016666666666666666, "loss": 3.096, "theoretical_loss": 3.4488789287679116, "tokens_seen": 1875902464 }, { "epoch": 0.67, "learning_rate": 0.00016647748770336738, "loss": 3.0593, "theoretical_loss": 3.4487162172878723, "tokens_seen": 1876951040 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.48306140303611755, "objective/train/docs_used": 1061454, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.986534833908081, "objective/train/original_loss": 2.986534595489502, "objective/train/theoretical_loss": 3.4486145816863085, "objective/train/tokens_used": 1898066400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2385236769914627, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495185852050781, "objective/train/weighted_lm_loss": 3.1347832679748535, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9519363045692444, "theoretical_loss": 3.4486145816863085, "tokens_seen": 1877606400 }, { "epoch": 0.67, "learning_rate": 0.00016628830874006813, "loss": 3.0675, "theoretical_loss": 3.4485536221185957, "tokens_seen": 1877999616 }, { "epoch": 0.67, "learning_rate": 0.00016609912977676882, "loss": 3.0906, "theoretical_loss": 3.448391143112067, "tokens_seen": 1879048192 }, { "epoch": 0.67, "learning_rate": 0.00016590995081346954, "loss": 3.1067, "theoretical_loss": 3.4482287801205422, "tokens_seen": 1880096768 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.48760080337524414, "objective/train/docs_used": 1063292, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.3369407653808594, "objective/train/original_loss": 2.3369412422180176, "objective/train/theoretical_loss": 3.448107083923021, "objective/train/tokens_used": 1901343200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23860259354114532, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499727725982666, "objective/train/weighted_lm_loss": 2.45414400100708, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9833489656448364, "theoretical_loss": 3.448107083923021, "tokens_seen": 1880883200 }, { "epoch": 0.67, "learning_rate": 0.00016572077185017026, "loss": 3.0392, "theoretical_loss": 3.4480665329965485, "tokens_seen": 1881145344 }, { "epoch": 0.67, "learning_rate": 0.00016553159288687098, "loss": 3.0593, "theoretical_loss": 3.447904401592882, "tokens_seen": 1882193920 }, { "epoch": 0.67, "learning_rate": 0.00016534241392357168, "loss": 3.0361, "theoretical_loss": 3.4477423857626066, "tokens_seen": 1883242496 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.48698893189430237, "objective/train/docs_used": 1065366, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.880833148956299, "objective/train/original_loss": 2.880833148956299, "objective/train/theoretical_loss": 3.447600716602108, "objective/train/tokens_used": 1904620000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23971621692180634, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499173402786255, "objective/train/weighted_lm_loss": 3.0245883464813232, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9548348784446716, "theoretical_loss": 3.447600716602108, "tokens_seen": 1884160000 }, { "epoch": 0.67, "learning_rate": 0.00016515323496027243, "loss": 3.1034, "theoretical_loss": 3.4475804853590573, "tokens_seen": 1884291072 }, { "epoch": 0.67, "learning_rate": 0.00016496405599697315, "loss": 3.019, "theoretical_loss": 3.447418700235833, "tokens_seen": 1885339648 }, { "epoch": 0.67, "learning_rate": 0.00016477487703367387, "loss": 3.0309, "theoretical_loss": 3.4472570302468037, "tokens_seen": 1886388224 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.4793676733970642, "objective/train/docs_used": 1066806, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5835084915161133, "objective/train/original_loss": 2.5835084915161133, "objective/train/theoretical_loss": 3.447095475246102, "objective/train/tokens_used": 1907896800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23561342060565948, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049134373664856, "objective/train/weighted_lm_loss": 2.7097527980804443, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.96308833360672, "theoretical_loss": 3.447095475246102, "tokens_seen": 1887436800 }, { "epoch": 0.67, "learning_rate": 0.00016458569807037456, "loss": 3.0728, "theoretical_loss": 3.447095475246102, "tokens_seen": 1887436800 }, { "epoch": 0.67, "learning_rate": 0.00016439651910707528, "loss": 3.0546, "theoretical_loss": 3.446934035088128, "tokens_seen": 1888485376 }, { "epoch": 0.67, "learning_rate": 0.000164207340143776, "loss": 3.0043, "theoretical_loss": 3.446772709627547, "tokens_seen": 1889533952 }, { "epoch": 0.68, "learning_rate": 0.00016401816118047675, "loss": 3.0649, "theoretical_loss": 3.4466114987192884, "tokens_seen": 1890582528 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.49177709221839905, "objective/train/docs_used": 1068619, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9133520126342773, "objective/train/original_loss": 2.9133520126342773, "objective/train/theoretical_loss": 3.446591355403001, "objective/train/tokens_used": 1911173600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2440955489873886, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050418496131897, "objective/train/weighted_lm_loss": 3.0605461597442627, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9515418410301208, "theoretical_loss": 3.446591355403001, "tokens_seen": 1890713600 }, { "epoch": 0.68, "learning_rate": 0.00016382898221717747, "loss": 3.0891, "theoretical_loss": 3.446450402218545, "tokens_seen": 1891631104 }, { "epoch": 0.68, "learning_rate": 0.00016363980325387816, "loss": 3.0613, "theoretical_loss": 3.446289419980774, "tokens_seen": 1892679680 }, { "epoch": 0.68, "learning_rate": 0.00016345062429057888, "loss": 3.1123, "theoretical_loss": 3.446128551861694, "tokens_seen": 1893728256 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.4786206781864166, "objective/train/docs_used": 1071183, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7194037437438965, "objective/train/original_loss": 2.7194037437438965, "objective/train/theoretical_loss": 3.446088352646086, "objective/train/tokens_used": 1914450400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23550763726234436, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490589141845703, "objective/train/weighted_lm_loss": 2.853081464767456, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9536006450653076, "theoretical_loss": 3.446088352646086, "tokens_seen": 1893990400 }, { "epoch": 0.68, "learning_rate": 0.0001632614453272796, "loss": 3.0588, "theoretical_loss": 3.4459677977172847, "tokens_seen": 1894776832 }, { "epoch": 0.68, "learning_rate": 0.00016307226636398032, "loss": 3.0971, "theoretical_loss": 3.4458071574037907, "tokens_seen": 1895825408 }, { "epoch": 0.68, "learning_rate": 0.00016288308740068104, "loss": 3.1363, "theoretical_loss": 3.4456466307777127, "tokens_seen": 1896873984 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.4824937582015991, "objective/train/docs_used": 1073215, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.948737621307373, "objective/train/original_loss": 2.948737621307373, "objective/train/theoretical_loss": 3.4455864625737282, "objective/train/tokens_used": 1917727200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2392544150352478, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049465537071228, "objective/train/weighted_lm_loss": 3.0943331718444824, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9522041082382202, "theoretical_loss": 3.4455864625737282, "tokens_seen": 1897267200 }, { "epoch": 0.68, "learning_rate": 0.00016269390843738177, "loss": 3.1575, "theoretical_loss": 3.4454862176958154, "tokens_seen": 1897922560 }, { "epoch": 0.68, "learning_rate": 0.00016250472947408249, "loss": 3.07, "theoretical_loss": 3.4453259180151203, "tokens_seen": 1898971136 }, { "epoch": 0.68, "learning_rate": 0.0001623155505107832, "loss": 3.0711, "theoretical_loss": 3.4451657315929096, "tokens_seen": 1900019712 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.4815155863761902, "objective/train/docs_used": 1074511, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.044644594192505, "objective/train/original_loss": 3.044644832611084, "objective/train/theoretical_loss": 3.445085680809207, "objective/train/tokens_used": 1921004000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23717336356639862, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493569374084473, "objective/train/weighted_lm_loss": 3.19500470161438, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9591754674911499, "theoretical_loss": 3.445085680809207, "tokens_seen": 1900544000 }, { "epoch": 0.68, "learning_rate": 0.00016212637154748393, "loss": 3.0576, "theoretical_loss": 3.445005658286722, "tokens_seen": 1901068288 }, { "epoch": 0.68, "learning_rate": 0.00016193719258418462, "loss": 3.0648, "theoretical_loss": 3.4448456979543556, "tokens_seen": 1902116864 }, { "epoch": 0.68, "learning_rate": 0.00016174801362088537, "loss": 3.0426, "theoretical_loss": 3.4446858504538644, "tokens_seen": 1903165440 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.49052512645721436, "objective/train/docs_used": 1076404, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9473235607147217, "objective/train/original_loss": 2.94732403755188, "objective/train/theoretical_loss": 3.4445860030005253, "objective/train/tokens_used": 1924280800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24399477243423462, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502928495407104, "objective/train/weighted_lm_loss": 3.0951266288757324, "objective/train/weights_max": 1.0512195825576782, "objective/train/weights_min": 0.9533544182777405, "theoretical_loss": 3.4445860030005253, "tokens_seen": 1903820800 }, { "epoch": 0.68, "learning_rate": 0.0001615588346575861, "loss": 3.0305, "theoretical_loss": 3.4445261156435585, "tokens_seen": 1904214016 }, { "epoch": 0.68, "learning_rate": 0.0001613696556942868, "loss": 3.016, "theoretical_loss": 3.4443664933820055, "tokens_seen": 1905262592 }, { "epoch": 0.68, "learning_rate": 0.0001611804767309875, "loss": 3.0832, "theoretical_loss": 3.4442069835280265, "tokens_seen": 1906311168 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.47672393918037415, "objective/train/docs_used": 1078284, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2508022785186768, "objective/train/original_loss": 3.2508018016815186, "objective/train/theoretical_loss": 3.4440874248202293, "objective/train/tokens_used": 1927557600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23937898874282837, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048888921737671, "objective/train/weighted_lm_loss": 3.4074931144714355, "objective/train/weights_max": 1.0512206554412842, "objective/train/weights_min": 0.9522076845169067, "theoretical_loss": 3.4440874248202293, "tokens_seen": 1907097600 }, { "epoch": 0.68, "learning_rate": 0.00016099129776768822, "loss": 3.0102, "theoretical_loss": 3.4440475859406985, "tokens_seen": 1907359744 }, { "epoch": 0.68, "learning_rate": 0.00016080211880438894, "loss": 3.0034, "theoretical_loss": 3.4438883004793515, "tokens_seen": 1908408320 }, { "epoch": 0.68, "learning_rate": 0.0001606129398410897, "loss": 3.0316, "theoretical_loss": 3.4437291270035697, "tokens_seen": 1909456896 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.48424607515335083, "objective/train/docs_used": 1079917, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7775046825408936, "objective/train/original_loss": 2.7775044441223145, "objective/train/theoretical_loss": 3.4435899419652256, "objective/train/tokens_used": 1930834400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23730778694152832, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496307611465454, "objective/train/weighted_lm_loss": 2.9170379638671875, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9515936970710754, "theoretical_loss": 3.4435899419652256, "tokens_seen": 1910374400 }, { "epoch": 0.68, "learning_rate": 0.00016042376087779038, "loss": 3.0128, "theoretical_loss": 3.443570065373189, "tokens_seen": 1910505472 }, { "epoch": 0.68, "learning_rate": 0.0001602345819144911, "loss": 3.0617, "theoretical_loss": 3.443411115448299, "tokens_seen": 1911554048 }, { "epoch": 0.68, "learning_rate": 0.00016004540295119183, "loss": 3.0827, "theoretical_loss": 3.4432522770892398, "tokens_seen": 1912602624 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.48178908228874207, "objective/train/docs_used": 1081633, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.645468235015869, "objective/train/original_loss": 2.6454684734344482, "objective/train/theoretical_loss": 3.443093550156604, "objective/train/tokens_used": 1934111200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24107873439788818, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494041442871094, "objective/train/weighted_lm_loss": 2.7751283645629883, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9514924883842468, "theoretical_loss": 3.443093550156604, "tokens_seen": 1913651200 }, { "epoch": 0.68, "learning_rate": 0.00015985622398789255, "loss": 3.0849, "theoretical_loss": 3.443093550156604, "tokens_seen": 1913651200 }, { "epoch": 0.68, "learning_rate": 0.00015966704502459327, "loss": 3.0829, "theoretical_loss": 3.4429349345112326, "tokens_seen": 1914699776 }, { "epoch": 0.68, "learning_rate": 0.000159477866061294, "loss": 3.0307, "theoretical_loss": 3.442776430014218, "tokens_seen": 1915748352 }, { "epoch": 0.68, "learning_rate": 0.0001592886870979947, "loss": 3.1064, "theoretical_loss": 3.442618036526901, "tokens_seen": 1916796928 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.4814731180667877, "objective/train/docs_used": 1083428, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.869269609451294, "objective/train/original_loss": 2.869269847869873, "objective/train/theoretical_loss": 3.44259824513946, "objective/train/tokens_used": 1937388000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24218010902404785, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493781566619873, "objective/train/weighted_lm_loss": 3.0097241401672363, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9519504904747009, "theoretical_loss": 3.44259824513946, "tokens_seen": 1916928000 }, { "epoch": 0.69, "learning_rate": 0.00015909950813469543, "loss": 3.1398, "theoretical_loss": 3.4424597539108728, "tokens_seen": 1917845504 }, { "epoch": 0.69, "learning_rate": 0.00015891032917139615, "loss": 3.1185, "theoretical_loss": 3.4423015820279703, "tokens_seen": 1918894080 }, { "epoch": 0.69, "learning_rate": 0.00015872115020809684, "loss": 3.0341, "theoretical_loss": 3.4421435207402795, "tokens_seen": 1919942656 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.48610636591911316, "objective/train/docs_used": 1085055, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.043754816055298, "objective/train/original_loss": 3.043754816055298, "objective/train/theoretical_loss": 3.44210402268272, "objective/train/tokens_used": 1940664800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23820815980434418, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049821376800537, "objective/train/weighted_lm_loss": 3.1965363025665283, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9668196439743042, "theoretical_loss": 3.44210402268272, "tokens_seen": 1920204800 }, { "epoch": 0.69, "learning_rate": 0.00015853197124479756, "loss": 3.149, "theoretical_loss": 3.441985569910133, "tokens_seen": 1920991232 }, { "epoch": 0.69, "learning_rate": 0.0001583427922814983, "loss": 3.1465, "theoretical_loss": 3.44182772940011, "tokens_seen": 1922039808 }, { "epoch": 0.69, "learning_rate": 0.00015815361331819903, "loss": 3.1075, "theoretical_loss": 3.441669999073035, "tokens_seen": 1923088384 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.4901277422904968, "objective/train/docs_used": 1086767, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.144951343536377, "objective/train/original_loss": 3.144951343536377, "objective/train/theoretical_loss": 3.4416108785789676, "objective/train/tokens_used": 1943941600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24380195140838623, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502521991729736, "objective/train/weighted_lm_loss": 3.3028461933135986, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9523271918296814, "theoretical_loss": 3.4416108785789676, "tokens_seen": 1923481600 }, { "epoch": 0.69, "learning_rate": 0.00015796443435489975, "loss": 3.1408, "theoretical_loss": 3.441512378791978, "tokens_seen": 1924136960 }, { "epoch": 0.69, "learning_rate": 0.00015777525539160045, "loss": 3.1612, "theoretical_loss": 3.4413548684202544, "tokens_seen": 1925185536 }, { "epoch": 0.69, "learning_rate": 0.00015758607642830117, "loss": 3.1644, "theoretical_loss": 3.4411974678214223, "tokens_seen": 1926234112 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.48439183831214905, "objective/train/docs_used": 1088753, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6918206214904785, "objective/train/original_loss": 2.6918208599090576, "objective/train/theoretical_loss": 3.4411188086442692, "objective/train/tokens_used": 1947218400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23981790244579315, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049658179283142, "objective/train/weighted_lm_loss": 2.8249881267547607, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9570189118385315, "theoretical_loss": 3.4411188086442692, "tokens_seen": 1926758400 }, { "epoch": 0.69, "learning_rate": 0.0001573968974650019, "loss": 3.162, "theoretical_loss": 3.4410401768592846, "tokens_seen": 1927282688 }, { "epoch": 0.69, "learning_rate": 0.00015720771850170263, "loss": 3.1948, "theoretical_loss": 3.440882995397886, "tokens_seen": 1928331264 }, { "epoch": 0.69, "learning_rate": 0.00015701853953840333, "loss": 3.2031, "theoretical_loss": 3.440725923301515, "tokens_seen": 1929379840 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.47249332070350647, "objective/train/docs_used": 1090637, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.311936855316162, "objective/train/original_loss": 3.311936855316162, "objective/train/theoretical_loss": 3.4406278087180064, "objective/train/tokens_used": 1950495200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24297377467155457, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484838485717773, "objective/train/weighted_lm_loss": 3.4713997840881348, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9513853192329407, "theoretical_loss": 3.4406278087180064, "tokens_seen": 1930035200 }, { "epoch": 0.69, "learning_rate": 0.00015682936057510405, "loss": 3.1742, "theoretical_loss": 3.4405689604347005, "tokens_seen": 1930428416 }, { "epoch": 0.69, "learning_rate": 0.00015664018161180477, "loss": 3.154, "theoretical_loss": 3.4404121066622144, "tokens_seen": 1931476992 }, { "epoch": 0.69, "learning_rate": 0.0001564510026485055, "loss": 3.1313, "theoretical_loss": 3.4402553618490668, "tokens_seen": 1932525568 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.4777022898197174, "objective/train/docs_used": 1092665, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2258169651031494, "objective/train/original_loss": 3.2258167266845703, "objective/train/theoretical_loss": 3.440137874662704, "objective/train/tokens_used": 1953772000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23566052317619324, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048967957496643, "objective/train/weighted_lm_loss": 3.3833398818969727, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9548637866973877, "theoretical_loss": 3.440137874662704, "tokens_seen": 1933312000 }, { "epoch": 0.69, "learning_rate": 0.00015626182368520618, "loss": 3.1434, "theoretical_loss": 3.4400987258605102, "tokens_seen": 1933574144 }, { "epoch": 0.69, "learning_rate": 0.00015607264472190693, "loss": 3.1228, "theoretical_loss": 3.4399421985620364, "tokens_seen": 1934622720 }, { "epoch": 0.69, "learning_rate": 0.00015588346575860765, "loss": 3.109, "theoretical_loss": 3.439785779819375, "tokens_seen": 1935671296 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.48318758606910706, "objective/train/docs_used": 1094661, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.008556842803955, "objective/train/original_loss": 3.0085573196411133, "objective/train/theoretical_loss": 3.439649002363864, "objective/train/tokens_used": 1957048800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24022220075130463, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495398044586182, "objective/train/weighted_lm_loss": 3.1570565700531006, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9545727372169495, "theoretical_loss": 3.439649002363864, "tokens_seen": 1936588800 }, { "epoch": 0.69, "learning_rate": 0.00015569428679530837, "loss": 3.1473, "theoretical_loss": 3.4396294694984952, "tokens_seen": 1936719872 }, { "epoch": 0.69, "learning_rate": 0.0001555051078320091, "loss": 3.1707, "theoretical_loss": 3.439473267465604, "tokens_seen": 1937768448 }, { "epoch": 0.69, "learning_rate": 0.00015531592886870979, "loss": 3.0851, "theoretical_loss": 3.4393171735871446, "tokens_seen": 1938817024 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.4772590696811676, "objective/train/docs_used": 1096594, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8366477489471436, "objective/train/original_loss": 2.8366475105285645, "objective/train/theoretical_loss": 3.439161187729799, "objective/train/tokens_used": 1960325600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23779280483722687, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489344596862793, "objective/train/weighted_lm_loss": 2.9748737812042236, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9515256285667419, "theoretical_loss": 3.439161187729799, "tokens_seen": 1939865600 }, { "epoch": 0.69, "learning_rate": 0.0001551267499054105, "loss": 3.0804, "theoretical_loss": 3.439161187729799, "tokens_seen": 1939865600 }, { "epoch": 0.69, "learning_rate": 0.00015493757094211125, "loss": 3.1544, "theoretical_loss": 3.4390053097604847, "tokens_seen": 1940914176 }, { "epoch": 0.69, "learning_rate": 0.00015474839197881197, "loss": 3.1821, "theoretical_loss": 3.4388495395463536, "tokens_seen": 1941962752 }, { "epoch": 0.69, "learning_rate": 0.00015455921301551267, "loss": 3.1627, "theoretical_loss": 3.438693876954795, "tokens_seen": 1943011328 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.48600539565086365, "objective/train/docs_used": 1098399, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.878577709197998, "objective/train/original_loss": 2.8785781860351562, "objective/train/theoretical_loss": 3.438674426691467, "objective/train/tokens_used": 1963602400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24080105125904083, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498244762420654, "objective/train/weighted_lm_loss": 3.0219502449035645, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.952756404876709, "theoretical_loss": 3.438674426691467, "tokens_seen": 1943142400 }, { "epoch": 0.69, "learning_rate": 0.0001543700340522134, "loss": 3.0941, "theoretical_loss": 3.4385383218534313, "tokens_seen": 1944059904 }, { "epoch": 0.69, "learning_rate": 0.0001541808550889141, "loss": 3.0426, "theoretical_loss": 3.43838287411012, "tokens_seen": 1945108480 }, { "epoch": 0.7, "learning_rate": 0.00015399167612561483, "loss": 3.1071, "theoretical_loss": 3.4382275335929506, "tokens_seen": 1946157056 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.4673837423324585, "objective/train/docs_used": 1099983, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.590907573699951, "objective/train/original_loss": 2.590907573699951, "objective/train/theoretical_loss": 3.4381887152023074, "objective/train/tokens_used": 1966879200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23693355917930603, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0479426383972168, "objective/train/weighted_lm_loss": 2.719980478286743, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.969889223575592, "theoretical_loss": 3.4381887152023074, "tokens_seen": 1946419200 }, { "epoch": 0.7, "learning_rate": 0.00015380249716231555, "loss": 3.1018, "theoretical_loss": 3.4380723001702482, "tokens_seen": 1947205632 }, { "epoch": 0.7, "learning_rate": 0.00015361331819901627, "loss": 3.1348, "theoretical_loss": 3.4379171737105683, "tokens_seen": 1948254208 }, { "epoch": 0.7, "learning_rate": 0.000153424139235717, "loss": 3.0988, "theoretical_loss": 3.4377621540826984, "tokens_seen": 1949302784 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.4762834310531616, "objective/train/docs_used": 1101732, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0439748764038086, "objective/train/original_loss": 3.0439748764038086, "objective/train/theoretical_loss": 3.4377040492380826, "objective/train/tokens_used": 1970156000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2366829365491867, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488312244415283, "objective/train/weighted_lm_loss": 3.1925623416900635, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9599282741546631, "theoretical_loss": 3.4377040492380826, "tokens_seen": 1949696000 }, { "epoch": 0.7, "learning_rate": 0.0001532349602724177, "loss": 3.1246, "theoretical_loss": 3.437607241155659, "tokens_seen": 1950351360 }, { "epoch": 0.7, "learning_rate": 0.00015304578130911843, "loss": 3.1243, "theoretical_loss": 3.4374524347986997, "tokens_seen": 1951399936 }, { "epoch": 0.7, "learning_rate": 0.00015285660234581913, "loss": 3.0839, "theoretical_loss": 3.437297734881301, "tokens_seen": 1952448512 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.4811760485172272, "objective/train/docs_used": 1103876, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.098599910736084, "objective/train/original_loss": 3.098599433898926, "objective/train/theoretical_loss": 3.4372204247967133, "objective/train/tokens_used": 1973432800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24242191016674042, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493496656417847, "objective/train/weighted_lm_loss": 3.2506155967712402, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.951482892036438, "theoretical_loss": 3.4372204247967133, "tokens_seen": 1952972800 }, { "epoch": 0.7, "learning_rate": 0.00015266742338251987, "loss": 3.0771, "theoretical_loss": 3.437143141273175, "tokens_seen": 1953497088 }, { "epoch": 0.7, "learning_rate": 0.0001524782444192206, "loss": 3.1538, "theoretical_loss": 3.4369886538442604, "tokens_seen": 1954545664 }, { "epoch": 0.7, "learning_rate": 0.00015228906545592131, "loss": 3.1184, "theoretical_loss": 3.4368342724647265, "tokens_seen": 1955594240 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.4940529763698578, "objective/train/docs_used": 1105993, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3750038146972656, "objective/train/original_loss": 3.3750038146972656, "objective/train/theoretical_loss": 3.436737837898122, "objective/train/tokens_used": 1976709600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2451893538236618, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506517887115479, "objective/train/weighted_lm_loss": 3.5460386276245117, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9603523015975952, "theoretical_loss": 3.436737837898122, "tokens_seen": 1956249600 }, { "epoch": 0.7, "learning_rate": 0.000152099886492622, "loss": 3.1778, "theoretical_loss": 3.43667999700497, "tokens_seen": 1956642816 }, { "epoch": 0.7, "learning_rate": 0.00015191070752932273, "loss": 3.1804, "theoretical_loss": 3.4365258273356156, "tokens_seen": 1957691392 }, { "epoch": 0.7, "learning_rate": 0.00015172152856602345, "loss": 3.1596, "theoretical_loss": 3.4363717633275153, "tokens_seen": 1958739968 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.4944164454936981, "objective/train/docs_used": 1108290, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2766504287719727, "objective/train/original_loss": 3.2766504287719727, "objective/train/theoretical_loss": 3.4362562845840747, "objective/train/tokens_used": 1979986400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2452101707458496, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506881475448608, "objective/train/weighted_lm_loss": 3.4432315826416016, "objective/train/weights_max": 1.0512208938598633, "objective/train/weights_min": 0.9669325351715088, "theoretical_loss": 3.4362562845840747, "tokens_seen": 1959526400 }, { "epoch": 0.7, "learning_rate": 0.0001515323496027242, "loss": 3.1575, "theoretical_loss": 3.436217804851747, "tokens_seen": 1959788544 }, { "epoch": 0.7, "learning_rate": 0.00015134317063942492, "loss": 3.1834, "theoretical_loss": 3.4360639517796168, "tokens_seen": 1960837120 }, { "epoch": 0.7, "learning_rate": 0.0001511539916761256, "loss": 3.1164, "theoretical_loss": 3.4359102039826546, "tokens_seen": 1961885696 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.49615535140037537, "objective/train/docs_used": 1110185, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1375579833984375, "objective/train/original_loss": 3.1375579833984375, "objective/train/theoretical_loss": 3.435775760918026, "objective/train/tokens_used": 1983263200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24696744978427887, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0508711338043213, "objective/train/weighted_lm_loss": 3.2971224784851074, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.957501232624054, "theoretical_loss": 3.435775760918026, "tokens_seen": 1962803200 }, { "epoch": 0.7, "learning_rate": 0.00015096481271282633, "loss": 3.1439, "theoretical_loss": 3.4357565613326155, "tokens_seen": 1962934272 }, { "epoch": 0.7, "learning_rate": 0.00015077563374952705, "loss": 3.0854, "theoretical_loss": 3.4356030237014803, "tokens_seen": 1963982848 }, { "epoch": 0.7, "learning_rate": 0.00015058645478622777, "loss": 3.1707, "theoretical_loss": 3.435449590961453, "tokens_seen": 1965031424 }, { "debugging/Self-BLEU-5": 0.4796244782337672, "debugging/distinct-1-grams": 0.7874466763773558, "debugging/distinct-2-grams": 0.9537592626603371, "debugging/entropy-1-grams": 6.225892551400493, "debugging/entropy-2-grams": 7.143796200176747, "debugging/length": 450.05, "debugging/num_segments": 20, "debugging/raw_token_scores_avg": 0.028914527967572212, "debugging/raw_token_scores_std": 0.09425558149814606, "epoch": 0.7, "objective/train/advantage_avg": 0.47108298540115356, "objective/train/docs_used": 1112341, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.987183094024658, "objective/train/original_loss": 2.987183094024658, "objective/train/theoretical_loss": 3.435296262984961, "objective/train/tokens_used": 1986540000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23080673813819885, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0482814311981201, "objective/train/weighted_lm_loss": 3.131026029586792, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9537268877029419, "theoretical_loss": 3.435296262984961, "tokens_seen": 1966080000 }, { "epoch": 0.7, "learning_rate": 0.0001503972758229285, "loss": 3.1423, "theoretical_loss": 3.435296262984961, "tokens_seen": 1966080000 }, { "epoch": 0.7, "learning_rate": 0.0001502080968596292, "loss": 3.0294, "theoretical_loss": 3.4351430396446565, "tokens_seen": 1967128576 }, { "epoch": 0.7, "learning_rate": 0.00015001891789632993, "loss": 3.1178, "theoretical_loss": 3.4349899208134125, "tokens_seen": 1968177152 }, { "epoch": 0.7, "learning_rate": 0.00014982973893303065, "loss": 3.1248, "theoretical_loss": 3.4348369063643247, "tokens_seen": 1969225728 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.4847583472728729, "objective/train/docs_used": 1114494, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9856464862823486, "objective/train/original_loss": 2.985647201538086, "objective/train/theoretical_loss": 3.434817786891247, "objective/train/tokens_used": 1989816800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24014219641685486, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049696683883667, "objective/train/weighted_lm_loss": 3.1322898864746094, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9546946883201599, "theoretical_loss": 3.434817786891247, "tokens_seen": 1969356800 }, { "epoch": 0.7, "learning_rate": 0.00014964055996973135, "loss": 3.0907, "theoretical_loss": 3.4346839961707096, "tokens_seen": 1970274304 }, { "epoch": 0.7, "learning_rate": 0.00014945138100643207, "loss": 3.1122, "theoretical_loss": 3.4345311901061066, "tokens_seen": 1971322880 }, { "epoch": 0.7, "learning_rate": 0.00014926220204313282, "loss": 3.0943, "theoretical_loss": 3.434378488044274, "tokens_seen": 1972371456 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.49263060092926025, "objective/train/docs_used": 1115816, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0610079765319824, "objective/train/original_loss": 3.0610079765319824, "objective/train/theoretical_loss": 3.434340328764476, "objective/train/tokens_used": 1993093600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24445389211177826, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505057573318481, "objective/train/weighted_lm_loss": 3.2152202129364014, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9796652793884277, "theoretical_loss": 3.434340328764476, "tokens_seen": 1972633600 }, { "epoch": 0.7, "learning_rate": 0.00014907302307983354, "loss": 3.1552, "theoretical_loss": 3.4342258898591904, "tokens_seen": 1973420032 }, { "epoch": 0.71, "learning_rate": 0.00014888384411653426, "loss": 3.1141, "theoretical_loss": 3.434073395425055, "tokens_seen": 1974468608 }, { "epoch": 0.71, "learning_rate": 0.00014869466515323495, "loss": 3.0634, "theoretical_loss": 3.433921004616285, "tokens_seen": 1975517184 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.47501012682914734, "objective/train/docs_used": 1117825, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.095318078994751, "objective/train/original_loss": 3.095317840576172, "objective/train/theoretical_loss": 3.433863884753317, "objective/train/tokens_used": 1996370400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23647457361221313, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487028360366821, "objective/train/weighted_lm_loss": 3.245806932449341, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9513912200927734, "theoretical_loss": 3.433863884753317, "tokens_seen": 1975910400 }, { "epoch": 0.71, "learning_rate": 0.00014850548618993567, "loss": 3.0918, "theoretical_loss": 3.433768717307516, "tokens_seen": 1976565760 }, { "epoch": 0.71, "learning_rate": 0.0001483163072266364, "loss": 3.1025, "theoretical_loss": 3.4336165333736037, "tokens_seen": 1977614336 }, { "epoch": 0.71, "learning_rate": 0.00014812712826333714, "loss": 3.0333, "theoretical_loss": 3.433464452689618, "tokens_seen": 1978662912 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.48081904649734497, "objective/train/docs_used": 1119491, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.902231454849243, "objective/train/original_loss": 2.902231216430664, "objective/train/theoretical_loss": 3.433388451027367, "objective/train/tokens_used": 1999647200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23978132009506226, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493004322052002, "objective/train/weighted_lm_loss": 3.0465242862701416, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9542652368545532, "theoretical_loss": 3.433388451027367, "tokens_seen": 1979187200 }, { "epoch": 0.71, "learning_rate": 0.00014793794930003783, "loss": 3.0627, "theoretical_loss": 3.4333124751308484, "tokens_seen": 1979711488 }, { "epoch": 0.71, "learning_rate": 0.00014774877033673855, "loss": 3.0599, "theoretical_loss": 3.4331606005728004, "tokens_seen": 1980760064 }, { "epoch": 0.71, "learning_rate": 0.00014755959137343927, "loss": 3.0972, "theoretical_loss": 3.4330088288911953, "tokens_seen": 1981808640 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.47215214371681213, "objective/train/docs_used": 1121511, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.187962055206299, "objective/train/original_loss": 3.1879618167877197, "objective/train/theoretical_loss": 3.432914023777001, "objective/train/tokens_used": 2002924000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23091299831867218, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0483888387680054, "objective/train/weighted_lm_loss": 3.343453884124756, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9539908170700073, "theoretical_loss": 3.432914023777001, "tokens_seen": 1982464000 }, { "epoch": 0.71, "learning_rate": 0.00014737041241014, "loss": 3.0673, "theoretical_loss": 3.43285715996197, "tokens_seen": 1982857216 }, { "epoch": 0.71, "learning_rate": 0.0001471812334468407, "loss": 3.0675, "theoretical_loss": 3.432705593661278, "tokens_seen": 1983905792 }, { "epoch": 0.71, "learning_rate": 0.00014699205448354144, "loss": 3.0601, "theoretical_loss": 3.4325541298654842, "tokens_seen": 1984954368 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.4908943474292755, "objective/train/docs_used": 1123355, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.127197504043579, "objective/train/original_loss": 3.127197265625, "objective/train/theoretical_loss": 3.4324405992132268, "objective/train/tokens_used": 2006200800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24222372472286224, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503206253051758, "objective/train/weighted_lm_loss": 3.284457206726074, "objective/train/weights_max": 1.0512199401855469, "objective/train/weights_min": 0.9885050654411316, "theoretical_loss": 3.4324405992132268, "tokens_seen": 1985740800 }, { "epoch": 0.71, "learning_rate": 0.00014680287552024216, "loss": 3.1086, "theoretical_loss": 3.432402768451171, "tokens_seen": 1986002944 }, { "epoch": 0.71, "learning_rate": 0.00014661369655694288, "loss": 3.1612, "theoretical_loss": 3.4322515092951322, "tokens_seen": 1987051520 }, { "epoch": 0.71, "learning_rate": 0.0001464245175936436, "loss": 3.044, "theoretical_loss": 3.432100352274376, "tokens_seen": 1988100096 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.4853982627391815, "objective/train/docs_used": 1125335, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.052032232284546, "objective/train/original_loss": 3.052031993865967, "objective/train/theoretical_loss": 3.4319681735675376, "objective/train/tokens_used": 2009477600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24046699702739716, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04976224899292, "objective/train/weighted_lm_loss": 3.203836679458618, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9524861574172974, "theoretical_loss": 3.4319681735675376, "tokens_seen": 1989017600 }, { "epoch": 0.71, "learning_rate": 0.0001462353386303443, "loss": 3.1721, "theoretical_loss": 3.431949297266123, "tokens_seen": 1989148672 }, { "epoch": 0.71, "learning_rate": 0.000146046159667045, "loss": 3.1841, "theoretical_loss": 3.4317983441478064, "tokens_seen": 1990197248 }, { "epoch": 0.71, "learning_rate": 0.00014585698070374576, "loss": 3.1459, "theoretical_loss": 3.4316474927970697, "tokens_seen": 1991245824 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.4899314045906067, "objective/train/docs_used": 1126843, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9000444412231445, "objective/train/original_loss": 2.9000439643859863, "objective/train/theoretical_loss": 3.431496743091769, "objective/train/tokens_used": 2012754400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24320422112941742, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502294301986694, "objective/train/weighted_lm_loss": 3.0460522174835205, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9620993733406067, "theoretical_loss": 3.431496743091769, "tokens_seen": 1992294400 }, { "epoch": 0.71, "learning_rate": 0.00014566780174044648, "loss": 3.1331, "theoretical_loss": 3.431496743091769, "tokens_seen": 1992294400 }, { "epoch": 0.71, "learning_rate": 0.00014547862277714717, "loss": 3.1277, "theoretical_loss": 3.431346094909971, "tokens_seen": 1993342976 }, { "epoch": 0.71, "learning_rate": 0.0001452894438138479, "loss": 3.1989, "theoretical_loss": 3.4311955481299528, "tokens_seen": 1994391552 }, { "epoch": 0.71, "learning_rate": 0.00014510026485054861, "loss": 3.205, "theoretical_loss": 3.4310451026302005, "tokens_seen": 1995440128 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.48517540097236633, "objective/train/docs_used": 1128158, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8031439781188965, "objective/train/original_loss": 2.8031439781188965, "objective/train/theoretical_loss": 3.4310263040579567, "objective/train/tokens_used": 2016031200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23921027779579163, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497334003448486, "objective/train/weighted_lm_loss": 2.9433789253234863, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.955235481262207, "theoretical_loss": 3.4310263040579567, "tokens_seen": 1995571200 }, { "epoch": 0.71, "learning_rate": 0.00014491108588724933, "loss": 3.1294, "theoretical_loss": 3.430894758289411, "tokens_seen": 1996488704 }, { "epoch": 0.71, "learning_rate": 0.00014472190692395008, "loss": 3.2061, "theoretical_loss": 3.4307445149864884, "tokens_seen": 1997537280 }, { "epoch": 0.71, "learning_rate": 0.00014453272796065078, "loss": 3.1406, "theoretical_loss": 3.430594372600546, "tokens_seen": 1998585856 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.49511581659317017, "objective/train/docs_used": 1130081, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1099729537963867, "objective/train/original_loss": 3.109973192214966, "objective/train/theoretical_loss": 3.430556852758192, "objective/train/tokens_used": 2019308000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2468186765909195, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0507664680480957, "objective/train/weighted_lm_loss": 3.267641544342041, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9615699052810669, "theoretical_loss": 3.430556852758192, "tokens_seen": 1998848000 }, { "epoch": 0.71, "learning_rate": 0.0001443435489973515, "loss": 3.1497, "theoretical_loss": 3.4304443310109063, "tokens_seen": 1999634432 }, { "epoch": 0.71, "learning_rate": 0.00014415437003405222, "loss": 3.092, "theoretical_loss": 3.430294390097097, "tokens_seen": 2000683008 }, { "epoch": 0.71, "learning_rate": 0.00014396519107075294, "loss": 3.1712, "theoretical_loss": 3.4301445497388547, "tokens_seen": 2001731584 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.4925604462623596, "objective/train/docs_used": 1132167, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9438576698303223, "objective/train/original_loss": 2.943857431411743, "objective/train/theoretical_loss": 3.4300883855044857, "objective/train/tokens_used": 2022584800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2444770783185959, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504989624023438, "objective/train/weighted_lm_loss": 3.092851161956787, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9527947306632996, "theoretical_loss": 3.4300883855044857, "tokens_seen": 2002124800 }, { "epoch": 0.72, "learning_rate": 0.00014377601210745363, "loss": 3.1484, "theoretical_loss": 3.4299948098161215, "tokens_seen": 2002780160 }, { "epoch": 0.72, "learning_rate": 0.00014358683314415438, "loss": 3.132, "theoretical_loss": 3.4298451702090462, "tokens_seen": 2003828736 }, { "epoch": 0.72, "learning_rate": 0.0001433976541808551, "loss": 3.1449, "theoretical_loss": 3.4296956307979825, "tokens_seen": 2004877312 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.47776997089385986, "objective/train/docs_used": 1133291, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9905569553375244, "objective/train/original_loss": 2.990556478500366, "objective/train/theoretical_loss": 3.429620898628622, "objective/train/tokens_used": 2025861600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2385081797838211, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048988938331604, "objective/train/weighted_lm_loss": 3.1350185871124268, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9514456391334534, "theoretical_loss": 3.429620898628622, "tokens_seen": 2005401600 }, { "epoch": 0.72, "learning_rate": 0.00014320847521755582, "loss": 3.1673, "theoretical_loss": 3.42954619146349, "tokens_seen": 2005925888 }, { "epoch": 0.72, "learning_rate": 0.0001430192962542565, "loss": 3.1407, "theoretical_loss": 3.4293968520863336, "tokens_seen": 2006974464 }, { "epoch": 0.72, "learning_rate": 0.00014283011729095723, "loss": 3.1269, "theoretical_loss": 3.4292476125474805, "tokens_seen": 2008023040 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.4895833432674408, "objective/train/docs_used": 1134873, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.12807035446167, "objective/train/original_loss": 3.128070831298828, "objective/train/theoretical_loss": 3.4291543884820275, "objective/train/tokens_used": 2029138400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2412732094526291, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501848459243774, "objective/train/weighted_lm_loss": 3.2846426963806152, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9555214643478394, "theoretical_loss": 3.4291543884820275, "tokens_seen": 2008678400 }, { "epoch": 0.72, "learning_rate": 0.00014264093832765795, "loss": 3.1118, "theoretical_loss": 3.429098472728103, "tokens_seen": 2009071616 }, { "epoch": 0.72, "learning_rate": 0.0001424517593643587, "loss": 3.0736, "theoretical_loss": 3.4289494325095773, "tokens_seen": 2010120192 }, { "epoch": 0.72, "learning_rate": 0.00014226258040105942, "loss": 3.1048, "theoretical_loss": 3.4288004917734813, "tokens_seen": 2011168768 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.45610880851745605, "objective/train/docs_used": 1136813, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.221322774887085, "objective/train/original_loss": 3.221322536468506, "objective/train/theoretical_loss": 3.42868885143563, "objective/train/tokens_used": 2032415200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23579107224941254, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0468087196350098, "objective/train/weighted_lm_loss": 3.370765447616577, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.953737735748291, "theoretical_loss": 3.42868885143563, "tokens_seen": 2011955200 }, { "epoch": 0.72, "learning_rate": 0.00014207340143776012, "loss": 3.1595, "theoretical_loss": 3.428651650401596, "tokens_seen": 2012217344 }, { "epoch": 0.72, "learning_rate": 0.00014188422247446084, "loss": 3.1331, "theoretical_loss": 3.4285029082759046, "tokens_seen": 2013265920 }, { "epoch": 0.72, "learning_rate": 0.00014169504351116156, "loss": 3.1415, "theoretical_loss": 3.4283542652785908, "tokens_seen": 2014314496 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.4859057068824768, "objective/train/docs_used": 1138919, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.946512460708618, "objective/train/original_loss": 2.946512222290039, "objective/train/theoretical_loss": 3.4282242838797234, "objective/train/tokens_used": 2035692000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23969919979572296, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498089790344238, "objective/train/weighted_lm_loss": 3.09318208694458, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9610596299171448, "theoretical_loss": 3.4282242838797234, "tokens_seen": 2015232000 }, { "epoch": 0.72, "learning_rate": 0.00014150586454786228, "loss": 3.1535, "theoretical_loss": 3.428205721292041, "tokens_seen": 2015363072 }, { "epoch": 0.72, "learning_rate": 0.000141316685584563, "loss": 3.1432, "theoretical_loss": 3.4280572761988406, "tokens_seen": 2016411648 }, { "epoch": 0.72, "learning_rate": 0.00014112750662126372, "loss": 3.1212, "theoretical_loss": 3.4279089298817764, "tokens_seen": 2017460224 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.4828082323074341, "objective/train/docs_used": 1140999, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7979843616485596, "objective/train/original_loss": 2.7979846000671387, "objective/train/theoretical_loss": 3.427760682223834, "objective/train/tokens_used": 2038968800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23849663138389587, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494930744171143, "objective/train/weighted_lm_loss": 2.936995029449463, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.954368531703949, "theoretical_loss": 3.427760682223834, "tokens_seen": 2018508800 }, { "epoch": 0.72, "learning_rate": 0.00014093832765796444, "loss": 3.119, "theoretical_loss": 3.427760682223834, "tokens_seen": 2018508800 }, { "epoch": 0.72, "learning_rate": 0.00014074914869466516, "loss": 3.1216, "theoretical_loss": 3.4276125331081997, "tokens_seen": 2019557376 }, { "epoch": 0.72, "learning_rate": 0.00014055996973136588, "loss": 3.0604, "theoretical_loss": 3.427464482418257, "tokens_seen": 2020605952 }, { "epoch": 0.72, "learning_rate": 0.00014037079076806657, "loss": 3.0767, "theoretical_loss": 3.4273165300375887, "tokens_seen": 2021654528 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.4603384733200073, "objective/train/docs_used": 1143312, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7195169925689697, "objective/train/original_loss": 2.719517230987549, "objective/train/theoretical_loss": 3.427298042896586, "objective/train/tokens_used": 2042245600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23422212898731232, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0472242832183838, "objective/train/weighted_lm_loss": 2.849896192550659, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9517197012901306, "theoretical_loss": 3.427298042896586, "tokens_seen": 2021785600 }, { "epoch": 0.72, "learning_rate": 0.00014018161180476732, "loss": 3.0877, "theoretical_loss": 3.427168675849976, "tokens_seen": 2022703104 }, { "epoch": 0.72, "learning_rate": 0.00013999243284146804, "loss": 3.1204, "theoretical_loss": 3.427020919739397, "tokens_seen": 2023751680 }, { "epoch": 0.72, "learning_rate": 0.00013980325387816876, "loss": 3.0585, "theoretical_loss": 3.4268732615900266, "tokens_seen": 2024800256 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.4914935827255249, "objective/train/docs_used": 1144755, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.170673131942749, "objective/train/original_loss": 3.170673370361328, "objective/train/theoretical_loss": 3.4268363623455684, "objective/train/tokens_used": 2045522400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24298258125782013, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503846406936646, "objective/train/weighted_lm_loss": 3.3304572105407715, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.981985330581665, "theoretical_loss": 3.4268363623455684, "tokens_seen": 2025062400 }, { "epoch": 0.72, "learning_rate": 0.00013961407491486946, "loss": 3.0733, "theoretical_loss": 3.4267257012862373, "tokens_seen": 2025848832 }, { "epoch": 0.72, "learning_rate": 0.00013942489595157018, "loss": 3.074, "theoretical_loss": 3.4265782387125974, "tokens_seen": 2026897408 }, { "epoch": 0.72, "learning_rate": 0.0001392357169882709, "loss": 3.1012, "theoretical_loss": 3.426430873753871, "tokens_seen": 2027945984 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.49117955565452576, "objective/train/docs_used": 1146855, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9218034744262695, "objective/train/original_loss": 2.9218032360076904, "objective/train/theoretical_loss": 3.426375637037207, "objective/train/tokens_used": 2048799200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2444514036178589, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503606796264648, "objective/train/weighted_lm_loss": 3.0689315795898438, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9535993337631226, "theoretical_loss": 3.426375637037207, "tokens_seen": 2028339200 }, { "epoch": 0.72, "learning_rate": 0.00013904653802497164, "loss": 3.104, "theoretical_loss": 3.4262836062950175, "tokens_seen": 2028994560 }, { "epoch": 0.73, "learning_rate": 0.00013885735906167234, "loss": 3.0803, "theoretical_loss": 3.4261364362211912, "tokens_seen": 2030043136 }, { "epoch": 0.73, "learning_rate": 0.00013866818009837306, "loss": 3.0555, "theoretical_loss": 3.425989363417741, "tokens_seen": 2031091712 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.482932448387146, "objective/train/docs_used": 1149039, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0094153881073, "objective/train/original_loss": 3.0094151496887207, "objective/train/theoretical_loss": 3.425915863456632, "objective/train/tokens_used": 2052076000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23867341876029968, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049506425857544, "objective/train/weighted_lm_loss": 3.15950608253479, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9558206796646118, "theoretical_loss": 3.425915863456632, "tokens_seen": 2031616000 }, { "epoch": 0.73, "learning_rate": 0.00013847900113507378, "loss": 3.0104, "theoretical_loss": 3.42584238777021, "tokens_seen": 2032140288 }, { "epoch": 0.73, "learning_rate": 0.0001382898221717745, "loss": 3.0758, "theoretical_loss": 3.4256955091643353, "tokens_seen": 2033188864 }, { "epoch": 0.73, "learning_rate": 0.00013810064320847522, "loss": 3.0, "theoretical_loss": 3.4255487274860457, "tokens_seen": 2034237440 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.48947277665138245, "objective/train/docs_used": 1151240, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.842642068862915, "objective/train/original_loss": 2.842642307281494, "objective/train/theoretical_loss": 3.425457038107547, "objective/train/tokens_used": 2055352800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24280238151550293, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.05018150806427, "objective/train/weighted_lm_loss": 2.9852519035339355, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9556968212127686, "theoretical_loss": 3.425457038107547, "tokens_seen": 2034892800 }, { "epoch": 0.73, "learning_rate": 0.00013791146424517594, "loss": 2.9684, "theoretical_loss": 3.425402042621465, "tokens_seen": 2035286016 }, { "epoch": 0.73, "learning_rate": 0.00013772228528187666, "loss": 3.0045, "theoretical_loss": 3.4252554544569076, "tokens_seen": 2036334592 }, { "epoch": 0.73, "learning_rate": 0.00013753310631857738, "loss": 2.9956, "theoretical_loss": 3.4251089628788804, "tokens_seen": 2037383168 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.4755612313747406, "objective/train/docs_used": 1153257, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9520466327667236, "objective/train/original_loss": 2.9520463943481445, "objective/train/theoretical_loss": 3.4249991575121053, "objective/train/tokens_used": 2058629600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23398137092590332, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0487455129623413, "objective/train/weighted_lm_loss": 3.0983822345733643, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9720361828804016, "theoretical_loss": 3.4249991575121053, "tokens_seen": 2038169600 }, { "epoch": 0.73, "learning_rate": 0.0001373439273552781, "loss": 3.0354, "theoretical_loss": 3.4249625677740823, "tokens_seen": 2038431744 }, { "epoch": 0.73, "learning_rate": 0.0001371547483919788, "loss": 3.0449, "theoretical_loss": 3.424816269029402, "tokens_seen": 2039480320 }, { "epoch": 0.73, "learning_rate": 0.00013696556942867952, "loss": 3.0413, "theoretical_loss": 3.424670066531922, "tokens_seen": 2040528896 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.48594948649406433, "objective/train/docs_used": 1155326, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8335914611816406, "objective/train/original_loss": 2.833591938018799, "objective/train/theoretical_loss": 3.4245422182107816, "objective/train/tokens_used": 2061906400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2407456785440445, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049818515777588, "objective/train/weighted_lm_loss": 2.974628448486328, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9524859189987183, "theoretical_loss": 3.4245422182107816, "tokens_seen": 2041446400 }, { "epoch": 0.73, "learning_rate": 0.00013677639046538026, "loss": 3.065, "theoretical_loss": 3.4245239601689104, "tokens_seen": 2041577472 }, { "epoch": 0.73, "learning_rate": 0.00013658721150208098, "loss": 3.0519, "theoretical_loss": 3.4243779498278286, "tokens_seen": 2042626048 }, { "epoch": 0.73, "learning_rate": 0.00013639803253878168, "loss": 3.1046, "theoretical_loss": 3.4242320353963267, "tokens_seen": 2043674624 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.489499568939209, "objective/train/docs_used": 1157326, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9112443923950195, "objective/train/original_loss": 2.9112446308135986, "objective/train/theoretical_loss": 3.4240862167622437, "objective/train/tokens_used": 2065183200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24297621846199036, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501850843429565, "objective/train/weighted_lm_loss": 3.057767391204834, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9573585987091064, "theoretical_loss": 3.4240862167622437, "tokens_seen": 2044723200 }, { "epoch": 0.73, "learning_rate": 0.0001362088535754824, "loss": 3.0655, "theoretical_loss": 3.4240862167622437, "tokens_seen": 2044723200 }, { "epoch": 0.73, "learning_rate": 0.00013601967461218312, "loss": 3.039, "theoretical_loss": 3.423940493813606, "tokens_seen": 2045771776 }, { "epoch": 0.73, "learning_rate": 0.00013583049564888384, "loss": 3.0649, "theoretical_loss": 3.42379486643863, "tokens_seen": 2046820352 }, { "epoch": 0.73, "learning_rate": 0.0001356413166855846, "loss": 3.0511, "theoretical_loss": 3.4236493345257193, "tokens_seen": 2047868928 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.4733245372772217, "objective/train/docs_used": 1159281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9886927604675293, "objective/train/original_loss": 2.9886927604675293, "objective/train/theoretical_loss": 3.4236311497432315, "objective/train/tokens_used": 2068460000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23410466313362122, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485223531723022, "objective/train/weighted_lm_loss": 3.134115219116211, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9530761241912842, "theoretical_loss": 3.4236311497432315, "tokens_seen": 2048000000 }, { "epoch": 0.73, "learning_rate": 0.00013545213772228528, "loss": 3.0334, "theoretical_loss": 3.4235038979634647, "tokens_seen": 2048917504 }, { "epoch": 0.73, "learning_rate": 0.000135262958758986, "loss": 3.0765, "theoretical_loss": 3.4233585566406433, "tokens_seen": 2049966080 }, { "epoch": 0.73, "learning_rate": 0.00013507377979568672, "loss": 3.0136, "theoretical_loss": 3.4232133104462195, "tokens_seen": 2051014656 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.4777950942516327, "objective/train/docs_used": 1161569, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8775670528411865, "objective/train/original_loss": 2.8775668144226074, "objective/train/theoretical_loss": 3.4231770137484316, "objective/train/tokens_used": 2071736800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23451338708400726, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489715337753296, "objective/train/weighted_lm_loss": 3.0206103324890137, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9626436829566956, "theoretical_loss": 3.4231770137484316, "tokens_seen": 2051276800 }, { "epoch": 0.73, "learning_rate": 0.00013488460083238744, "loss": 3.0315, "theoretical_loss": 3.423068159269344, "tokens_seen": 2052063232 }, { "epoch": 0.73, "learning_rate": 0.00013469542186908814, "loss": 3.0256, "theoretical_loss": 3.422923102999353, "tokens_seen": 2053111808 }, { "epoch": 0.73, "learning_rate": 0.00013450624290578888, "loss": 3.0888, "theoretical_loss": 3.4227781415257676, "tokens_seen": 2054160384 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.48282214999198914, "objective/train/docs_used": 1163144, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7914364337921143, "objective/train/original_loss": 2.791436195373535, "objective/train/theoretical_loss": 3.422723805390355, "objective/train/tokens_used": 2075013600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2376357913017273, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049489974975586, "objective/train/weighted_lm_loss": 2.928440809249878, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9537105560302734, "theoretical_loss": 3.422723805390355, "tokens_seen": 2054553600 }, { "epoch": 0.73, "learning_rate": 0.0001343170639424896, "loss": 2.9756, "theoretical_loss": 3.4226332747382946, "tokens_seen": 2055208960 }, { "epoch": 0.73, "learning_rate": 0.00013412788497919032, "loss": 3.0469, "theoretical_loss": 3.422488502526824, "tokens_seen": 2056257536 }, { "epoch": 0.73, "learning_rate": 0.00013393870601589104, "loss": 3.0435, "theoretical_loss": 3.422343824781432, "tokens_seen": 2057306112 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.48526403307914734, "objective/train/docs_used": 1165022, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.139117956161499, "objective/train/original_loss": 3.139117956161499, "objective/train/theoretical_loss": 3.422271521299214, "objective/train/tokens_used": 2078290400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2393149435520172, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497426986694336, "objective/train/weighted_lm_loss": 3.2956371307373047, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9516664743423462, "theoretical_loss": 3.422271521299214, "tokens_seen": 2057830400 }, { "epoch": 0.74, "learning_rate": 0.00013374952705259174, "loss": 3.0356, "theoretical_loss": 3.4221992413923767, "tokens_seen": 2058354688 }, { "epoch": 0.74, "learning_rate": 0.00013356034808929246, "loss": 2.9939, "theoretical_loss": 3.4220547522500997, "tokens_seen": 2059403264 }, { "epoch": 0.74, "learning_rate": 0.0001333711691259932, "loss": 3.0836, "theoretical_loss": 3.4219103572452267, "tokens_seen": 2060451840 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.48648935556411743, "objective/train/docs_used": 1166735, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.215547800064087, "objective/train/original_loss": 3.215548038482666, "objective/train/theoretical_loss": 3.421820158122806, "objective/train/tokens_used": 2081567200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2412308305501938, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498751401901245, "objective/train/weighted_lm_loss": 3.376168966293335, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9565590023994446, "theoretical_loss": 3.421820158122806, "tokens_seen": 2061107200 }, { "epoch": 0.74, "learning_rate": 0.00013318199016269393, "loss": 3.0275, "theoretical_loss": 3.421766056268565, "tokens_seen": 2061500416 }, { "epoch": 0.74, "learning_rate": 0.00013299281119939462, "loss": 3.0679, "theoretical_loss": 3.4216218492111032, "tokens_seen": 2062548992 }, { "epoch": 0.74, "learning_rate": 0.00013280363223609534, "loss": 3.0523, "theoretical_loss": 3.4214777359640136, "tokens_seen": 2063597568 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.4906824827194214, "objective/train/docs_used": 1168682, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.187013864517212, "objective/train/original_loss": 3.187013626098633, "objective/train/theoretical_loss": 3.4213697125263884, "objective/train/tokens_used": 2084844000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2428896725177765, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503029823303223, "objective/train/weighted_lm_loss": 3.3470242023468018, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9735086560249329, "theoretical_loss": 3.4213697125263884, "tokens_seen": 2064384000 }, { "epoch": 0.74, "learning_rate": 0.00013261445327279606, "loss": 3.0907, "theoretical_loss": 3.4213337164186486, "tokens_seen": 2064646144 }, { "epoch": 0.74, "learning_rate": 0.00013242527430949678, "loss": 3.0811, "theoretical_loss": 3.4211897904665416, "tokens_seen": 2065694720 }, { "epoch": 0.74, "learning_rate": 0.0001322360953461975, "loss": 3.013, "theoretical_loss": 3.4210459579994064, "tokens_seen": 2066743296 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.48356500267982483, "objective/train/docs_used": 1170870, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.949878454208374, "objective/train/original_loss": 2.949878692626953, "objective/train/theoretical_loss": 3.4209201811925642, "objective/train/tokens_used": 2088120800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24099013209342957, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495814085006714, "objective/train/weighted_lm_loss": 3.0962560176849365, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9516450762748718, "theoretical_loss": 3.4209201811925642, "tokens_seen": 2067660800 }, { "epoch": 0.74, "learning_rate": 0.00013204691638289822, "loss": 3.0967, "theoretical_loss": 3.4209022189091374, "tokens_seen": 2067791872 }, { "epoch": 0.74, "learning_rate": 0.00013185773741959894, "loss": 2.9705, "theoretical_loss": 3.4207585730878085, "tokens_seen": 2068840448 }, { "epoch": 0.74, "learning_rate": 0.00013166855845629966, "loss": 3.0573, "theoretical_loss": 3.4206150204276726, "tokens_seen": 2069889024 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.46928921341896057, "objective/train/docs_used": 1172808, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.938577651977539, "objective/train/original_loss": 2.938577651977539, "objective/train/theoretical_loss": 3.420471560821163, "objective/train/tokens_used": 2091397600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23262350261211395, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481112003326416, "objective/train/weighted_lm_loss": 3.0799367427825928, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9526295065879822, "theoretical_loss": 3.420471560821163, "tokens_seen": 2070937600 }, { "epoch": 0.74, "learning_rate": 0.00013147937949300038, "loss": 3.0212, "theoretical_loss": 3.420471560821163, "tokens_seen": 2070937600 }, { "epoch": 0.74, "learning_rate": 0.00013129020052970108, "loss": 3.0938, "theoretical_loss": 3.4203281941608896, "tokens_seen": 2071986176 }, { "epoch": 0.74, "learning_rate": 0.00013110102156640183, "loss": 3.1445, "theoretical_loss": 3.4201849203396417, "tokens_seen": 2073034752 }, { "epoch": 0.74, "learning_rate": 0.00013091184260310255, "loss": 3.0502, "theoretical_loss": 3.4200417392503866, "tokens_seen": 2074083328 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.4865155816078186, "objective/train/docs_used": 1174600, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.854151964187622, "objective/train/original_loss": 2.854151725769043, "objective/train/theoretical_loss": 3.4200238481291243, "objective/train/tokens_used": 2094674400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24221700429916382, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498827695846558, "objective/train/weighted_lm_loss": 2.9975836277008057, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9545468688011169, "theoretical_loss": 3.4200238481291243, "tokens_seen": 2074214400 }, { "epoch": 0.74, "learning_rate": 0.00013072266363980327, "loss": 3.1057, "theoretical_loss": 3.4198986507862683, "tokens_seen": 2075131904 }, { "epoch": 0.74, "learning_rate": 0.00013053348467650396, "loss": 3.0969, "theoretical_loss": 3.419755654840608, "tokens_seen": 2076180480 }, { "epoch": 0.74, "learning_rate": 0.00013034430571320468, "loss": 3.1735, "theoretical_loss": 3.419612751306904, "tokens_seen": 2077229056 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.48453977704048157, "objective/train/docs_used": 1176526, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9523186683654785, "objective/train/original_loss": 2.9523184299468994, "objective/train/theoretical_loss": 3.419577039850382, "objective/train/tokens_used": 2097951200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23808008432388306, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496641397476196, "objective/train/weighted_lm_loss": 3.0992777347564697, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9584941267967224, "theoretical_loss": 3.419577039850382, "tokens_seen": 2077491200 }, { "epoch": 0.74, "learning_rate": 0.0001301551267499054, "loss": 3.1769, "theoretical_loss": 3.41946994007883, "tokens_seen": 2078277632 }, { "epoch": 0.74, "learning_rate": 0.00012996594778660615, "loss": 3.1278, "theoretical_loss": 3.4193272210502372, "tokens_seen": 2079326208 }, { "epoch": 0.74, "learning_rate": 0.00012977676882330687, "loss": 3.1304, "theoretical_loss": 3.4191845941151504, "tokens_seen": 2080374784 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.48189568519592285, "objective/train/docs_used": 1178616, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7977302074432373, "objective/train/original_loss": 2.7977304458618164, "objective/train/theoretical_loss": 3.4191311327357505, "objective/train/tokens_used": 2101228000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23650392889976501, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493916273117065, "objective/train/weighted_lm_loss": 2.936192512512207, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9538434743881226, "theoretical_loss": 3.4191311327357505, "tokens_seen": 2080768000 }, { "epoch": 0.74, "learning_rate": 0.00012958758986000756, "loss": 3.0905, "theoretical_loss": 3.4190420591677713, "tokens_seen": 2081423360 }, { "epoch": 0.74, "learning_rate": 0.00012939841089670828, "loss": 3.0506, "theoretical_loss": 3.4188996161024745, "tokens_seen": 2082471936 }, { "epoch": 0.74, "learning_rate": 0.000129209231933409, "loss": 3.047, "theoretical_loss": 3.4187572648138107, "tokens_seen": 2083520512 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.4861668646335602, "objective/train/docs_used": 1180033, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.336259603500366, "objective/train/original_loss": 3.3362598419189453, "objective/train/theoretical_loss": 3.4186861235528108, "objective/train/tokens_used": 2104504800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24133270978927612, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498433113098145, "objective/train/weighted_lm_loss": 3.501760244369507, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9516600370407104, "theoretical_loss": 3.4186861235528108, "tokens_seen": 2084044800 }, { "epoch": 0.74, "learning_rate": 0.00012902005297010972, "loss": 3.1077, "theoretical_loss": 3.4186150051965036, "tokens_seen": 2084569088 }, { "epoch": 0.74, "learning_rate": 0.00012883087400681045, "loss": 3.1232, "theoretical_loss": 3.418472837145451, "tokens_seen": 2085617664 }, { "epoch": 0.75, "learning_rate": 0.00012864169504351117, "loss": 3.0959, "theoretical_loss": 3.4183307605557247, "tokens_seen": 2086666240 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.4884311556816101, "objective/train/docs_used": 1182648, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9421818256378174, "objective/train/original_loss": 2.9421820640563965, "objective/train/theoretical_loss": 3.4182420090857955, "objective/train/tokens_used": 2107781600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24259597063064575, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500762462615967, "objective/train/weighted_lm_loss": 3.0886693000793457, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9524582028388977, "theoretical_loss": 3.4182420090857955, "tokens_seen": 2087321600 }, { "epoch": 0.75, "learning_rate": 0.0001284525160802119, "loss": 3.036, "theoretical_loss": 3.418188775322567, "tokens_seen": 2087714816 }, { "epoch": 0.75, "learning_rate": 0.0001282633371169126, "loss": 3.0956, "theoretical_loss": 3.418046881341395, "tokens_seen": 2088763392 }, { "epoch": 0.75, "learning_rate": 0.0001280741581536133, "loss": 3.1281, "theoretical_loss": 3.417905078507798, "tokens_seen": 2089811968 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.48270413279533386, "objective/train/docs_used": 1184346, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0532760620117188, "objective/train/original_loss": 3.0532760620117188, "objective/train/theoretical_loss": 3.4177987861354815, "objective/train/tokens_used": 2111058400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24031376838684082, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494917631149292, "objective/train/weighted_lm_loss": 3.203695297241211, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.952264666557312, "theoretical_loss": 3.4177987861354815, "tokens_seen": 2090598400 }, { "epoch": 0.75, "learning_rate": 0.00012788497919031402, "loss": 3.0645, "theoretical_loss": 3.4177633667175344, "tokens_seen": 2090860544 }, { "epoch": 0.75, "learning_rate": 0.00012769580022701477, "loss": 3.1287, "theoretical_loss": 3.417621745866537, "tokens_seen": 2091909120 }, { "epoch": 0.75, "learning_rate": 0.0001275066212637155, "loss": 3.1295, "theoretical_loss": 3.4174802158509086, "tokens_seen": 2092957696 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.487575501203537, "objective/train/docs_used": 1186411, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.890275001525879, "objective/train/original_loss": 2.890275001525879, "objective/train/theoretical_loss": 3.4173564515190753, "objective/train/tokens_used": 2114335200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2412995547056198, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049984097480774, "objective/train/weighted_lm_loss": 3.034238815307617, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9572398662567139, "theoretical_loss": 3.4173564515190753, "tokens_seen": 2093875200 }, { "epoch": 0.75, "learning_rate": 0.0001273174423004162, "loss": 3.1029, "theoretical_loss": 3.4173387765669228, "tokens_seen": 2094006272 }, { "epoch": 0.75, "learning_rate": 0.0001271282633371169, "loss": 3.0415, "theoretical_loss": 3.4171974279110224, "tokens_seen": 2095054848 }, { "epoch": 0.75, "learning_rate": 0.00012693908437381762, "loss": 3.1255, "theoretical_loss": 3.417056169779822, "tokens_seen": 2096103424 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.4885167181491852, "objective/train/docs_used": 1188334, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.939175844192505, "objective/train/original_loss": 2.939175605773926, "objective/train/theoretical_loss": 3.4169150020701045, "objective/train/tokens_used": 2117612000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24050042033195496, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500741004943848, "objective/train/weighted_lm_loss": 3.0871052742004395, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9530365467071533, "theoretical_loss": 3.4169150020701045, "tokens_seen": 2097152000 }, { "epoch": 0.75, "learning_rate": 0.00012674990541051834, "loss": 3.0906, "theoretical_loss": 3.4169150020701045, "tokens_seen": 2097152000 }, { "epoch": 0.75, "learning_rate": 0.0001265607264472191, "loss": 3.0661, "theoretical_loss": 3.4167739246788225, "tokens_seen": 2098200576 }, { "epoch": 0.75, "learning_rate": 0.00012637154748391979, "loss": 3.0931, "theoretical_loss": 3.4166329375030973, "tokens_seen": 2099249152 }, { "epoch": 0.75, "learning_rate": 0.0001261823685206205, "loss": 3.0467, "theoretical_loss": 3.416492040440219, "tokens_seen": 2100297728 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.4632844030857086, "objective/train/docs_used": 1190051, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.067115545272827, "objective/train/original_loss": 3.0671157836914062, "objective/train/theoretical_loss": 3.4164744346383094, "objective/train/tokens_used": 2120888800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23205097019672394, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047507643699646, "objective/train/weighted_lm_loss": 3.2131385803222656, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.951475977897644, "theoretical_loss": 3.4164744346383094, "tokens_seen": 2100428800 }, { "epoch": 0.75, "learning_rate": 0.00012599318955732123, "loss": 3.0882, "theoretical_loss": 3.416351233387645, "tokens_seen": 2101346304 }, { "epoch": 0.75, "learning_rate": 0.00012580401059402195, "loss": 3.1133, "theoretical_loss": 3.4162105162430008, "tokens_seen": 2102394880 }, { "epoch": 0.75, "learning_rate": 0.00012561483163072264, "loss": 3.1099, "theoretical_loss": 3.4160698889040804, "tokens_seen": 2103443456 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.47364890575408936, "objective/train/docs_used": 1192176, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6278741359710693, "objective/train/original_loss": 2.6278746128082275, "objective/train/theoretical_loss": 3.416034746089533, "objective/train/tokens_used": 2124165600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23315325379371643, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485498905181885, "objective/train/weighted_lm_loss": 2.7560436725616455, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9518656730651855, "theoretical_loss": 3.416034746089533, "tokens_seen": 2103705600 }, { "epoch": 0.75, "learning_rate": 0.0001254256526674234, "loss": 3.1199, "theoretical_loss": 3.4159293512688436, "tokens_seen": 2104492032 }, { "epoch": 0.75, "learning_rate": 0.0001252364737041241, "loss": 3.1159, "theoretical_loss": 3.415788903235418, "tokens_seen": 2105540608 }, { "epoch": 0.75, "learning_rate": 0.00012504729474082483, "loss": 3.0587, "theoretical_loss": 3.415648544702096, "tokens_seen": 2106589184 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.47495296597480774, "objective/train/docs_used": 1193800, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.238267660140991, "objective/train/original_loss": 3.238267421722412, "objective/train/theoretical_loss": 3.4155959333056156, "objective/train/tokens_used": 2127442400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23427124321460724, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048686146736145, "objective/train/weighted_lm_loss": 3.392909288406372, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.952079713344574, "theoretical_loss": 3.4155959333056156, "tokens_seen": 2106982400 }, { "epoch": 0.75, "learning_rate": 0.00012485811577752555, "loss": 3.0348, "theoretical_loss": 3.4155082755673374, "tokens_seen": 2107637760 }, { "epoch": 0.75, "learning_rate": 0.00012466893681422627, "loss": 3.0607, "theoretical_loss": 3.415368095729767, "tokens_seen": 2108686336 }, { "epoch": 0.75, "learning_rate": 0.000124479757850927, "loss": 3.1031, "theoretical_loss": 3.415228005088175, "tokens_seen": 2109734912 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.46726658940315247, "objective/train/docs_used": 1195692, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0192675590515137, "objective/train/original_loss": 3.0192675590515137, "objective/train/theoretical_loss": 3.4151579931842884, "objective/train/tokens_used": 2130719200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2279476523399353, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0478851795196533, "objective/train/weighted_lm_loss": 3.1648991107940674, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9538955688476562, "theoretical_loss": 3.4151579931842884, "tokens_seen": 2110259200 }, { "epoch": 0.75, "learning_rate": 0.00012429057888762768, "loss": 3.1491, "theoretical_loss": 3.4150880035415168, "tokens_seen": 2110783488 }, { "epoch": 0.75, "learning_rate": 0.00012410139992432843, "loss": 3.0926, "theoretical_loss": 3.4149480909889123, "tokens_seen": 2111832064 }, { "epoch": 0.75, "learning_rate": 0.00012391222096102913, "loss": 3.1851, "theoretical_loss": 3.4148082673296445, "tokens_seen": 2112880640 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.4854118824005127, "objective/train/docs_used": 1197624, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.756822109222412, "objective/train/original_loss": 2.756822347640991, "objective/train/theoretical_loss": 3.4147209226390647, "objective/train/tokens_used": 2133996000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24401891231536865, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497814416885376, "objective/train/weighted_lm_loss": 2.893664836883545, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9514028429985046, "theoretical_loss": 3.4147209226390647, "tokens_seen": 2113536000 }, { "epoch": 0.76, "learning_rate": 0.00012372304199772985, "loss": 3.1322, "theoretical_loss": 3.4146685324631627, "tokens_seen": 2113929216 }, { "epoch": 0.76, "learning_rate": 0.00012353386303443057, "loss": 3.0965, "theoretical_loss": 3.4145288862890775, "tokens_seen": 2114977792 }, { "epoch": 0.76, "learning_rate": 0.0001233446840711313, "loss": 3.1856, "theoretical_loss": 3.4143893287071636, "tokens_seen": 2116026368 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.47289353609085083, "objective/train/docs_used": 1198832, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1778388023376465, "objective/train/original_loss": 3.1778388023376465, "objective/train/theoretical_loss": 3.4142847185991414, "objective/train/tokens_used": 2137272800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2338077276945114, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484776496887207, "objective/train/weighted_lm_loss": 3.3315629959106445, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.952247142791748, "theoretical_loss": 3.4142847185991414, "tokens_seen": 2116812800 }, { "epoch": 0.76, "learning_rate": 0.000123155505107832, "loss": 3.1357, "theoretical_loss": 3.4142498596173594, "tokens_seen": 2117074944 }, { "epoch": 0.76, "learning_rate": 0.00012296632614453273, "loss": 3.0476, "theoretical_loss": 3.4141104789197634, "tokens_seen": 2118123520 }, { "epoch": 0.76, "learning_rate": 0.00012277714718123345, "loss": 3.0772, "theoretical_loss": 3.413971186514639, "tokens_seen": 2119172096 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.48130467534065247, "objective/train/docs_used": 1201065, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8641321659088135, "objective/train/original_loss": 2.8641324043273926, "objective/train/theoretical_loss": 3.4138493780092887, "objective/train/tokens_used": 2140549600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2385849952697754, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493431091308594, "objective/train/weighted_lm_loss": 3.0060529708862305, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9514316320419312, "theoretical_loss": 3.4138493780092887, "tokens_seen": 2120089600 }, { "epoch": 0.76, "learning_rate": 0.00012258796821793417, "loss": 3.1735, "theoretical_loss": 3.4138319823024093, "tokens_seen": 2120220672 }, { "epoch": 0.76, "learning_rate": 0.0001223987892546349, "loss": 3.0906, "theoretical_loss": 3.4136928661836605, "tokens_seen": 2121269248 }, { "epoch": 0.76, "learning_rate": 0.0001222096102913356, "loss": 3.1384, "theoretical_loss": 3.413553838059139, "tokens_seen": 2122317824 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.4823879301548004, "objective/train/docs_used": 1202332, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.959242582321167, "objective/train/original_loss": 2.959242582321167, "objective/train/theoretical_loss": 3.4134148978297523, "objective/train/tokens_used": 2143826400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2378014326095581, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494475364685059, "objective/train/weighted_lm_loss": 3.1045055389404297, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9571212530136108, "theoretical_loss": 3.4134148978297523, "tokens_seen": 2123366400 }, { "epoch": 0.76, "learning_rate": 0.00012202043132803632, "loss": 3.0108, "theoretical_loss": 3.4134148978297523, "tokens_seen": 2123366400 }, { "epoch": 0.76, "learning_rate": 0.00012183125236473705, "loss": 3.0749, "theoretical_loss": 3.413276045396567, "tokens_seen": 2124414976 }, { "epoch": 0.76, "learning_rate": 0.00012164207340143776, "loss": 3.1091, "theoretical_loss": 3.413137280660813, "tokens_seen": 2125463552 }, { "epoch": 0.76, "learning_rate": 0.00012145289443813848, "loss": 3.0921, "theoretical_loss": 3.412998603523877, "tokens_seen": 2126512128 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.48719412088394165, "objective/train/docs_used": 1204214, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.252816915512085, "objective/train/original_loss": 3.252816915512085, "objective/train/theoretical_loss": 3.412981275036147, "objective/train/tokens_used": 2147103200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2436467409133911, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499577522277832, "objective/train/weighted_lm_loss": 3.4153482913970947, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9514138698577881, "theoretical_loss": 3.412981275036147, "tokens_seen": 2126643200 }, { "epoch": 0.76, "learning_rate": 0.0001212637154748392, "loss": 3.0569, "theoretical_loss": 3.4128600138873066, "tokens_seen": 2127560704 }, { "epoch": 0.76, "learning_rate": 0.00012107453651153992, "loss": 3.0342, "theoretical_loss": 3.4127215116528076, "tokens_seen": 2128609280 }, { "epoch": 0.76, "learning_rate": 0.00012088535754824064, "loss": 3.0179, "theoretical_loss": 3.412583096722245, "tokens_seen": 2129657856 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.48841291666030884, "objective/train/docs_used": 1206282, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7933783531188965, "objective/train/original_loss": 2.7933781147003174, "objective/train/theoretical_loss": 3.41254850661936, "objective/train/tokens_used": 2150380000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2410411387681961, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500664710998535, "objective/train/weighted_lm_loss": 2.9343910217285156, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9590831995010376, "theoretical_loss": 3.41254850661936, "tokens_seen": 2129920000 }, { "epoch": 0.76, "learning_rate": 0.00012069617858494136, "loss": 3.0067, "theoretical_loss": 3.412444768997643, "tokens_seen": 2130706432 }, { "epoch": 0.76, "learning_rate": 0.00012050699962164208, "loss": 3.0622, "theoretical_loss": 3.4123065283811833, "tokens_seen": 2131755008 }, { "epoch": 0.76, "learning_rate": 0.00012031782065834279, "loss": 3.0901, "theoretical_loss": 3.412168374775204, "tokens_seen": 2132803584 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.48633265495300293, "objective/train/docs_used": 1208010, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.088775873184204, "objective/train/original_loss": 3.088775396347046, "objective/train/theoretical_loss": 3.412116589585446, "objective/train/tokens_used": 2153656800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23890967667102814, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498476028442383, "objective/train/weighted_lm_loss": 3.2435507774353027, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9601168036460876, "theoretical_loss": 3.412116589585446, "tokens_seen": 2133196800 }, { "epoch": 0.76, "learning_rate": 0.00012012864169504352, "loss": 3.0086, "theoretical_loss": 3.412030308082203, "tokens_seen": 2133852160 }, { "epoch": 0.76, "learning_rate": 0.00011993946273174423, "loss": 3.1356, "theoretical_loss": 3.411892328204834, "tokens_seen": 2134900736 }, { "epoch": 0.76, "learning_rate": 0.00011975028376844495, "loss": 3.0847, "theoretical_loss": 3.411754435045907, "tokens_seen": 2135949312 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.48867544531822205, "objective/train/docs_used": 1209970, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9987714290618896, "objective/train/original_loss": 2.9987711906433105, "objective/train/theoretical_loss": 3.4116855209555306, "objective/train/tokens_used": 2156933600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24339966475963593, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501048564910889, "objective/train/weighted_lm_loss": 3.1480982303619385, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9592180848121643, "theoretical_loss": 3.4116855209555306, "tokens_seen": 2136473600 }, { "epoch": 0.76, "learning_rate": 0.00011956110480514567, "loss": 3.0679, "theoretical_loss": 3.4116166285083898, "tokens_seen": 2136997888 }, { "epoch": 0.76, "learning_rate": 0.00011937192584184639, "loss": 3.0748, "theoretical_loss": 3.411478908495406, "tokens_seen": 2138046464 }, { "epoch": 0.76, "learning_rate": 0.0001191827468785471, "loss": 3.0104, "theoretical_loss": 3.411341274910234, "tokens_seen": 2139095040 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.4596433937549591, "objective/train/docs_used": 1212088, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6375980377197266, "objective/train/original_loss": 2.6375980377197266, "objective/train/theoretical_loss": 3.4112552977657105, "objective/train/tokens_used": 2160210400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23229673504829407, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0471444129943848, "objective/train/weighted_lm_loss": 2.7628188133239746, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9519481658935547, "theoretical_loss": 3.4112552977657105, "tokens_seen": 2139750400 }, { "epoch": 0.76, "learning_rate": 0.00011899356791524783, "loss": 3.0396, "theoretical_loss": 3.411203727656308, "tokens_seen": 2140143616 }, { "epoch": 0.76, "learning_rate": 0.00011880438895194855, "loss": 3.0234, "theoretical_loss": 3.411066266637219, "tokens_seen": 2141192192 }, { "epoch": 0.77, "learning_rate": 0.00011861520998864926, "loss": 3.0856, "theoretical_loss": 3.41092889175671, "tokens_seen": 2142240768 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.4793272912502289, "objective/train/docs_used": 1213995, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8040668964385986, "objective/train/original_loss": 2.8040671348571777, "objective/train/theoretical_loss": 3.410825917066955, "objective/train/tokens_used": 2163487200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23690885305404663, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049136996269226, "objective/train/weighted_lm_loss": 2.9428248405456543, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9542986154556274, "theoretical_loss": 3.410825917066955, "tokens_seen": 2143027200 }, { "epoch": 0.77, "learning_rate": 0.00011842603102535, "loss": 3.0692, "theoretical_loss": 3.4107916029186804, "tokens_seen": 2143289344 }, { "epoch": 0.77, "learning_rate": 0.0001182368520620507, "loss": 3.067, "theoretical_loss": 3.410654400027184, "tokens_seen": 2144337920 }, { "epoch": 0.77, "learning_rate": 0.00011804767309875142, "loss": 3.0471, "theoretical_loss": 3.4105172829864268, "tokens_seen": 2145386496 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.4841189980506897, "objective/train/docs_used": 1215500, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.244422197341919, "objective/train/original_loss": 3.2444229125976562, "objective/train/theoretical_loss": 3.4103973759250095, "objective/train/tokens_used": 2166764000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2405548393726349, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496346950531006, "objective/train/weighted_lm_loss": 3.404898166656494, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9524633288383484, "theoretical_loss": 3.4103973759250095, "tokens_seen": 2146304000 }, { "epoch": 0.77, "learning_rate": 0.00011785849413545214, "loss": 3.0911, "theoretical_loss": 3.4103802517007695, "tokens_seen": 2146435072 }, { "epoch": 0.77, "learning_rate": 0.00011766931517215286, "loss": 3.0649, "theoretical_loss": 3.410243306074726, "tokens_seen": 2147483648 }, { "epoch": 0.77, "learning_rate": 0.00011748013620885357, "loss": 3.1124, "theoretical_loss": 3.4101064460129624, "tokens_seen": 2148532224 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.4838142693042755, "objective/train/docs_used": 1217649, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9245572090148926, "objective/train/original_loss": 2.9245574474334717, "objective/train/theoretical_loss": 3.4099696714202983, "objective/train/tokens_used": 2170040800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24016062915325165, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496021509170532, "objective/train/weighted_lm_loss": 3.0698323249816895, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9524503946304321, "theoretical_loss": 3.4099696714202983, "tokens_seen": 2149580800 }, { "epoch": 0.77, "learning_rate": 0.0001172909572455543, "loss": 3.1153, "theoretical_loss": 3.4099696714202983, "tokens_seen": 2149580800 }, { "epoch": 0.77, "learning_rate": 0.00011710177828225501, "loss": 3.0984, "theoretical_loss": 3.4098329822017055, "tokens_seen": 2150629376 }, { "epoch": 0.77, "learning_rate": 0.00011691259931895573, "loss": 3.1195, "theoretical_loss": 3.4096963782623058, "tokens_seen": 2151677952 }, { "epoch": 0.77, "learning_rate": 0.00011672342035565647, "loss": 3.108, "theoretical_loss": 3.4095598595073753, "tokens_seen": 2152726528 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.47976046800613403, "objective/train/docs_used": 1219475, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9882757663726807, "objective/train/original_loss": 2.9882755279541016, "objective/train/theoretical_loss": 3.40954280064783, "objective/train/tokens_used": 2173317600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23530583083629608, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049172043800354, "objective/train/weighted_lm_loss": 3.135436773300171, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9584304690361023, "theoretical_loss": 3.40954280064783, "tokens_seen": 2152857600 }, { "epoch": 0.77, "learning_rate": 0.00011653424139235717, "loss": 3.1106, "theoretical_loss": 3.4094234258423395, "tokens_seen": 2153775104 }, { "epoch": 0.77, "learning_rate": 0.00011634506242905789, "loss": 3.1039, "theoretical_loss": 3.4092870771727766, "tokens_seen": 2154823680 }, { "epoch": 0.77, "learning_rate": 0.00011615588346575861, "loss": 3.0136, "theoretical_loss": 3.409150813404413, "tokens_seen": 2155872256 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.4865594506263733, "objective/train/docs_used": 1221633, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0492663383483887, "objective/train/original_loss": 3.0492663383483887, "objective/train/theoretical_loss": 3.409116760717102, "objective/train/tokens_used": 2176594400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24001134932041168, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049876093864441, "objective/train/weighted_lm_loss": 3.2022552490234375, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9568141102790833, "theoretical_loss": 3.409116760717102, "tokens_seen": 2156134400 }, { "epoch": 0.77, "learning_rate": 0.00011596670450245933, "loss": 3.0194, "theoretical_loss": 3.409014634443128, "tokens_seen": 2156920832 }, { "epoch": 0.77, "learning_rate": 0.00011577752553916004, "loss": 3.0251, "theoretical_loss": 3.408878540194949, "tokens_seen": 2157969408 }, { "epoch": 0.77, "learning_rate": 0.00011558834657586078, "loss": 3.0482, "theoretical_loss": 3.4087425305660544, "tokens_seen": 2159017984 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.48565536737442017, "objective/train/docs_used": 1223129, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.323091745376587, "objective/train/original_loss": 2.323091983795166, "objective/train/theoretical_loss": 3.4086915487520044, "objective/train/tokens_used": 2179871200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23969988524913788, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497839450836182, "objective/train/weighted_lm_loss": 2.4400320053100586, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9515894055366516, "theoretical_loss": 3.4086915487520044, "tokens_seen": 2159411200 }, { "epoch": 0.77, "learning_rate": 0.00011539916761256148, "loss": 3.0768, "theoretical_loss": 3.4086066054627713, "tokens_seen": 2160066560 }, { "epoch": 0.77, "learning_rate": 0.0001152099886492622, "loss": 3.049, "theoretical_loss": 3.408470764791576, "tokens_seen": 2161115136 }, { "epoch": 0.77, "learning_rate": 0.00011502080968596292, "loss": 3.0416, "theoretical_loss": 3.408335008459094, "tokens_seen": 2162163712 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.488086462020874, "objective/train/docs_used": 1224854, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0033509731292725, "objective/train/original_loss": 3.0033512115478516, "objective/train/theoretical_loss": 3.4082671618907314, "objective/train/tokens_used": 2183148000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24069108068943024, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050032138824463, "objective/train/weighted_lm_loss": 3.1541175842285156, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9704567790031433, "theoretical_loss": 3.4082671618907314, "tokens_seen": 2162688000 }, { "epoch": 0.77, "learning_rate": 0.00011483163072266364, "loss": 3.0526, "theoretical_loss": 3.408199336372099, "tokens_seen": 2163212288 }, { "epoch": 0.77, "learning_rate": 0.00011464245175936436, "loss": 3.1291, "theoretical_loss": 3.4080637484375127, "tokens_seen": 2164260864 }, { "epoch": 0.77, "learning_rate": 0.00011445327279606509, "loss": 3.0743, "theoretical_loss": 3.407928244562405, "tokens_seen": 2165309440 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.4888122081756592, "objective/train/docs_used": 1226563, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.946284770965576, "objective/train/original_loss": 2.9462850093841553, "objective/train/theoretical_loss": 3.407843597285684, "objective/train/tokens_used": 2186424800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24123455584049225, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501073598861694, "objective/train/weighted_lm_loss": 3.094599485397339, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9530252814292908, "theoretical_loss": 3.407843597285684, "tokens_seen": 2165964800 }, { "epoch": 0.77, "learning_rate": 0.0001142640938327658, "loss": 3.1164, "theoretical_loss": 3.4077928246539937, "tokens_seen": 2166358016 }, { "epoch": 0.77, "learning_rate": 0.00011407491486946651, "loss": 3.1127, "theoretical_loss": 3.407657488619642, "tokens_seen": 2167406592 }, { "epoch": 0.77, "learning_rate": 0.00011388573590616725, "loss": 3.1488, "theoretical_loss": 3.407522236366863, "tokens_seen": 2168455168 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.48051032423973083, "objective/train/docs_used": 1228356, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.281541585922241, "objective/train/original_loss": 3.2815420627593994, "objective/train/theoretical_loss": 3.4074208521033804, "objective/train/tokens_used": 2189701600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24015885591506958, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0492717027664185, "objective/train/weighted_lm_loss": 3.4423675537109375, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9514672160148621, "theoretical_loss": 3.4074208521033804, "tokens_seen": 2169241600 }, { "epoch": 0.77, "learning_rate": 0.00011369655694286795, "loss": 3.0957, "theoretical_loss": 3.407387067803314, "tokens_seen": 2169503744 }, { "epoch": 0.78, "learning_rate": 0.00011350737797956867, "loss": 3.1708, "theoretical_loss": 3.4072519828367995, "tokens_seen": 2170552320 }, { "epoch": 0.78, "learning_rate": 0.0001133181990162694, "loss": 3.1192, "theoretical_loss": 3.4071169813752706, "tokens_seen": 2171600896 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.48870110511779785, "objective/train/docs_used": 1230300, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.97200083732605, "objective/train/original_loss": 2.972001075744629, "objective/train/theoretical_loss": 3.4069989235243634, "objective/train/tokens_used": 2192978400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24322699010372162, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050106406211853, "objective/train/weighted_lm_loss": 3.1200037002563477, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.955571711063385, "theoretical_loss": 3.4069989235243634, "tokens_seen": 2172518400 }, { "epoch": 0.78, "learning_rate": 0.00011312902005297012, "loss": 3.0742, "theoretical_loss": 3.406982063326823, "tokens_seen": 2172649472 }, { "epoch": 0.78, "learning_rate": 0.00011293984108967082, "loss": 3.102, "theoretical_loss": 3.4068472285996987, "tokens_seen": 2173698048 }, { "epoch": 0.78, "learning_rate": 0.00011275066212637156, "loss": 3.0766, "theoretical_loss": 3.4067124771022845, "tokens_seen": 2174746624 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.47267434000968933, "objective/train/docs_used": 1232394, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9665775299072266, "objective/train/original_loss": 2.9665770530700684, "objective/train/theoretical_loss": 3.4065778087431124, "objective/train/tokens_used": 2196255200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23377160727977753, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048455834388733, "objective/train/weighted_lm_loss": 3.112847328186035, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9565809965133667, "theoretical_loss": 3.4065778087431124, "tokens_seen": 2175795200 }, { "epoch": 0.78, "learning_rate": 0.00011256148316307228, "loss": 3.061, "theoretical_loss": 3.4065778087431124, "tokens_seen": 2175795200 }, { "epoch": 0.78, "learning_rate": 0.00011237230419977298, "loss": 3.0844, "theoretical_loss": 3.406443223430858, "tokens_seen": 2176843776 }, { "epoch": 0.78, "learning_rate": 0.00011218312523647372, "loss": 3.0302, "theoretical_loss": 3.4063087210743426, "tokens_seen": 2177892352 }, { "epoch": 0.78, "learning_rate": 0.00011199394627317443, "loss": 3.0247, "theoretical_loss": 3.4061743015825305, "tokens_seen": 2178940928 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.485377699136734, "objective/train/docs_used": 1233537, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1749722957611084, "objective/train/original_loss": 3.1749720573425293, "objective/train/theoretical_loss": 3.40615750496795, "objective/train/tokens_used": 2199532000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24226607382297516, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497691631317139, "objective/train/weighted_lm_loss": 3.332637310028076, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9522126317024231, "theoretical_loss": 3.40615750496795, "tokens_seen": 2179072000 }, { "epoch": 0.78, "learning_rate": 0.00011180476730987515, "loss": 3.1218, "theoretical_loss": 3.4060399648645294, "tokens_seen": 2179989504 }, { "epoch": 0.78, "learning_rate": 0.00011161558834657587, "loss": 3.0676, "theoretical_loss": 3.4059057108295914, "tokens_seen": 2181038080 }, { "epoch": 0.78, "learning_rate": 0.00011142640938327659, "loss": 3.0838, "theoretical_loss": 3.4057715393871097, "tokens_seen": 2182086656 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.4907640814781189, "objective/train/docs_used": 1235503, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.107961654663086, "objective/train/original_loss": 3.107961654663086, "objective/train/theoretical_loss": 3.405738009420957, "objective/train/tokens_used": 2202808800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24375276267528534, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503153800964355, "objective/train/weighted_lm_loss": 3.2649710178375244, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9541966319084167, "theoretical_loss": 3.405738009420957, "tokens_seen": 2182348800 }, { "epoch": 0.78, "learning_rate": 0.0001112372304199773, "loss": 3.0817, "theoretical_loss": 3.4056374504466236, "tokens_seen": 2183135232 }, { "epoch": 0.78, "learning_rate": 0.00011104805145667803, "loss": 3.1127, "theoretical_loss": 3.405503443917811, "tokens_seen": 2184183808 }, { "epoch": 0.78, "learning_rate": 0.00011085887249337873, "loss": 3.1144, "theoretical_loss": 3.4053695197104945, "tokens_seen": 2185232384 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.4896021783351898, "objective/train/docs_used": 1237335, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.074671506881714, "objective/train/original_loss": 3.0746712684631348, "objective/train/theoretical_loss": 3.4053193193378806, "objective/train/tokens_used": 2206085600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24403336644172668, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502007007598877, "objective/train/weighted_lm_loss": 3.2284257411956787, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9618966579437256, "theoretical_loss": 3.4053193193378806, "tokens_seen": 2185625600 }, { "epoch": 0.78, "learning_rate": 0.00011066969353007946, "loss": 3.1068, "theoretical_loss": 3.4052356777346384, "tokens_seen": 2186280960 }, { "epoch": 0.78, "learning_rate": 0.00011048051456678018, "loss": 3.0704, "theoretical_loss": 3.4051019179003474, "tokens_seen": 2187329536 }, { "epoch": 0.78, "learning_rate": 0.0001102913356034809, "loss": 3.0601, "theoretical_loss": 3.404968240117869, "tokens_seen": 2188378112 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.47622618079185486, "objective/train/docs_used": 1239239, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.253549575805664, "objective/train/original_loss": 3.253549575805664, "objective/train/theoretical_loss": 3.4049014319680495, "objective/train/tokens_used": 2209362400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2409483939409256, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488470792770386, "objective/train/weighted_lm_loss": 3.4102189540863037, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9515026807785034, "theoretical_loss": 3.4049014319680495, "tokens_seen": 2188902400 }, { "epoch": 0.78, "learning_rate": 0.00011010215664018162, "loss": 3.0343, "theoretical_loss": 3.40483464429759, "tokens_seen": 2189426688 }, { "epoch": 0.78, "learning_rate": 0.00010991297767688234, "loss": 3.0409, "theoretical_loss": 3.4047011303500394, "tokens_seen": 2190475264 }, { "epoch": 0.78, "learning_rate": 0.00010972379871358306, "loss": 3.0841, "theoretical_loss": 3.404567698185886, "tokens_seen": 2191523840 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.4836944341659546, "objective/train/docs_used": 1240983, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.245880365371704, "objective/train/original_loss": 3.245880603790283, "objective/train/theoretical_loss": 3.404484344574285, "objective/train/tokens_used": 2212639200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23999130725860596, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495892763137817, "objective/train/weighted_lm_loss": 3.4061005115509033, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.951469898223877, "theoretical_loss": 3.404484344574285, "tokens_seen": 2192179200 }, { "epoch": 0.78, "learning_rate": 0.00010953461975028377, "loss": 3.0935, "theoretical_loss": 3.4044343477159393, "tokens_seen": 2192572416 }, { "epoch": 0.78, "learning_rate": 0.0001093454407869845, "loss": 3.1178, "theoretical_loss": 3.4043010788511476, "tokens_seen": 2193620992 }, { "epoch": 0.78, "learning_rate": 0.0001091562618236852, "loss": 3.0871, "theoretical_loss": 3.4041678915025995, "tokens_seen": 2194669568 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.4901869297027588, "objective/train/docs_used": 1242635, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.035893201828003, "objective/train/original_loss": 3.035892963409424, "objective/train/theoretical_loss": 3.404068054432816, "objective/train/tokens_used": 2215916000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2428739219903946, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502533912658691, "objective/train/weighted_lm_loss": 3.1888182163238525, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9534474611282349, "theoretical_loss": 3.404068054432816, "tokens_seen": 2195456000 }, { "epoch": 0.78, "learning_rate": 0.00010896708286038593, "loss": 3.1038, "theoretical_loss": 3.404034785581523, "tokens_seen": 2195718144 }, { "epoch": 0.78, "learning_rate": 0.00010877790389708665, "loss": 3.1226, "theoretical_loss": 3.4039017609992848, "tokens_seen": 2196766720 }, { "epoch": 0.79, "learning_rate": 0.00010858872493378737, "loss": 3.1207, "theoretical_loss": 3.4037688176673906, "tokens_seen": 2197815296 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.48253607749938965, "objective/train/docs_used": 1244165, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.643556594848633, "objective/train/original_loss": 2.6435563564300537, "objective/train/theoretical_loss": 3.4036525588331927, "objective/train/tokens_used": 2219192800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24120163917541504, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049479365348816, "objective/train/weighted_lm_loss": 2.772040843963623, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9570202231407166, "theoretical_loss": 3.4036525588331927, "tokens_seen": 2198732800 }, { "epoch": 0.79, "learning_rate": 0.00010839954597048807, "loss": 3.0908, "theoretical_loss": 3.403635955497484, "tokens_seen": 2198863872 }, { "epoch": 0.79, "learning_rate": 0.00010821036700718881, "loss": 3.0805, "theoretical_loss": 3.4035031744013473, "tokens_seen": 2199912448 }, { "epoch": 0.79, "learning_rate": 0.00010802118804388953, "loss": 3.0845, "theoretical_loss": 3.4033704742909006, "tokens_seen": 2200961024 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.47237733006477356, "objective/train/docs_used": 1245946, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.21795916557312, "objective/train/original_loss": 3.21795916557312, "objective/train/theoretical_loss": 3.403237855078202, "objective/train/tokens_used": 2222469600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23783241212368011, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484466552734375, "objective/train/weighted_lm_loss": 3.377448797225952, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9808477163314819, "theoretical_loss": 3.403237855078202, "tokens_seen": 2202009600 }, { "epoch": 0.79, "learning_rate": 0.00010783200908059024, "loss": 3.0344, "theoretical_loss": 3.403237855078202, "tokens_seen": 2202009600 }, { "epoch": 0.79, "learning_rate": 0.00010764283011729097, "loss": 3.0654, "theoretical_loss": 3.403105316675445, "tokens_seen": 2203058176 }, { "epoch": 0.79, "learning_rate": 0.00010745365115399168, "loss": 3.1111, "theoretical_loss": 3.402972858994963, "tokens_seen": 2204106752 }, { "epoch": 0.79, "learning_rate": 0.0001072644721906924, "loss": 3.1596, "theoretical_loss": 3.402840481949224, "tokens_seen": 2205155328 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.4866069257259369, "objective/train/docs_used": 1246233, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1187005043029785, "objective/train/original_loss": 3.1187007427215576, "objective/train/theoretical_loss": 3.4028239404837826, "objective/train/tokens_used": 2225746400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2408287674188614, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498849153518677, "objective/train/weighted_lm_loss": 3.2743141651153564, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9550109505653381, "theoretical_loss": 3.4028239404837826, "tokens_seen": 2205286400 }, { "epoch": 0.79, "learning_rate": 0.00010707529322739312, "loss": 3.2249, "theoretical_loss": 3.402708185450833, "tokens_seen": 2206203904 }, { "epoch": 0.79, "learning_rate": 0.00010688611426409384, "loss": 3.3303, "theoretical_loss": 3.4025759694125317, "tokens_seen": 2207252480 }, { "epoch": 0.79, "learning_rate": 0.00010669693530079455, "loss": 3.2622, "theoretical_loss": 3.4024438337471974, "tokens_seen": 2208301056 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.4907922148704529, "objective/train/docs_used": 1246233, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8649120330810547, "objective/train/original_loss": 2.8649120330810547, "objective/train/theoretical_loss": 3.4024108123789434, "objective/train/tokens_used": 2229023200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24190065264701843, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503088235855103, "objective/train/weighted_lm_loss": 3.0094716548919678, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9557651281356812, "theoretical_loss": 3.4024108123789434, "tokens_seen": 2208563200 }, { "epoch": 0.79, "learning_rate": 0.00010650775633749528, "loss": 3.3453, "theoretical_loss": 3.4023117783678436, "tokens_seen": 2209349632 }, { "epoch": 0.79, "learning_rate": 0.00010631857737419599, "loss": 3.3543, "theoretical_loss": 3.4021798031876176, "tokens_seen": 2210398208 }, { "epoch": 0.79, "learning_rate": 0.00010612939841089671, "loss": 3.441, "theoretical_loss": 3.4020479081198034, "tokens_seen": 2211446784 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.45996737480163574, "objective/train/docs_used": 1246233, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2608132362365723, "objective/train/original_loss": 3.2608132362365723, "objective/train/theoretical_loss": 3.4019984681056785, "objective/train/tokens_used": 2232300000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2254868596792221, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0471423864364624, "objective/train/weighted_lm_loss": 3.418138265609741, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9537967443466187, "theoretical_loss": 3.4019984681056785, "tokens_seen": 2211840000 }, { "epoch": 0.79, "learning_rate": 0.00010594021944759744, "loss": 3.4698, "theoretical_loss": 3.4019160930778196, "tokens_seen": 2212495360 }, { "epoch": 0.79, "learning_rate": 0.00010575104048429815, "loss": 3.5284, "theoretical_loss": 3.401784357975218, "tokens_seen": 2213543936 }, { "epoch": 0.79, "learning_rate": 0.00010556186152099887, "loss": 3.4935, "theoretical_loss": 3.401652702725687, "tokens_seen": 2214592512 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.48440462350845337, "objective/train/docs_used": 1248119, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.49127459526062, "objective/train/original_loss": 3.49127459526062, "objective/train/theoretical_loss": 3.401586905018886, "objective/train/tokens_used": 2235576800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23930440843105316, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496569871902466, "objective/train/weighted_lm_loss": 3.665234327316284, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9591064453125, "theoretical_loss": 3.401586905018886, "tokens_seen": 2215116800 }, { "epoch": 0.79, "learning_rate": 0.00010537268255769959, "loss": 3.5465, "theoretical_loss": 3.401521127243046, "tokens_seen": 2215641088 }, { "epoch": 0.79, "learning_rate": 0.00010518350359440031, "loss": 3.4807, "theoretical_loss": 3.4013896314412517, "tokens_seen": 2216689664 }, { "epoch": 0.79, "learning_rate": 0.00010499432463110102, "loss": 3.387, "theoretical_loss": 3.401258215234391, "tokens_seen": 2217738240 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.4825243651866913, "objective/train/docs_used": 1249212, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.276275634765625, "objective/train/original_loss": 3.276275157928467, "objective/train/theoretical_loss": 3.401176120486286, "objective/train/tokens_used": 2238853600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2399078756570816, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494717359542847, "objective/train/weighted_lm_loss": 3.437455415725708, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9523665308952332, "theoretical_loss": 3.401176120486286, "tokens_seen": 2218393600 }, { "epoch": 0.79, "learning_rate": 0.00010480514566780175, "loss": 3.495, "theoretical_loss": 3.401126878536686, "tokens_seen": 2218786816 }, { "epoch": 0.79, "learning_rate": 0.00010461596670450246, "loss": 3.3819, "theoretical_loss": 3.400995621262491, "tokens_seen": 2219835392 }, { "epoch": 0.79, "learning_rate": 0.00010442678774120318, "loss": 3.3977, "theoretical_loss": 3.4008644433262933, "tokens_seen": 2220883968 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.4869391620159149, "objective/train/docs_used": 1251383, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7835676670074463, "objective/train/original_loss": 2.7835679054260254, "objective/train/theoretical_loss": 3.400766111888339, "objective/train/tokens_used": 2242130400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24035805463790894, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499156713485718, "objective/train/weighted_lm_loss": 2.92244815826416, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9542126655578613, "theoretical_loss": 3.400766111888339, "tokens_seen": 2221670400 }, { "epoch": 0.79, "learning_rate": 0.0001042376087779039, "loss": 3.3702, "theoretical_loss": 3.400733344642712, "tokens_seen": 2221932544 }, { "epoch": 0.79, "learning_rate": 0.00010404842981460462, "loss": 3.4368, "theoretical_loss": 3.4006023251264987, "tokens_seen": 2222981120 }, { "epoch": 0.79, "learning_rate": 0.00010385925085130534, "loss": 3.3566, "theoretical_loss": 3.400471384692537, "tokens_seen": 2224029696 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.4827156364917755, "objective/train/docs_used": 1252992, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.140798807144165, "objective/train/original_loss": 3.140798807144165, "objective/train/theoretical_loss": 3.400356876618167, "objective/train/tokens_used": 2245407200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23796583712100983, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494810342788696, "objective/train/weighted_lm_loss": 3.2959017753601074, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9555644392967224, "theoretical_loss": 3.400356876618167, "tokens_seen": 2224947200 }, { "epoch": 0.79, "learning_rate": 0.00010367007188800606, "loss": 3.2769, "theoretical_loss": 3.4003405232558417, "tokens_seen": 2225078272 }, { "epoch": 0.8, "learning_rate": 0.00010348089292470678, "loss": 3.2955, "theoretical_loss": 3.4002097407315595, "tokens_seen": 2226126848 }, { "epoch": 0.8, "learning_rate": 0.00010329171396140749, "loss": 3.3102, "theoretical_loss": 3.4000790370349674, "tokens_seen": 2227175424 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.4734431505203247, "objective/train/docs_used": 1254892, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.233670473098755, "objective/train/original_loss": 3.233670473098755, "objective/train/theoretical_loss": 3.3999484120814736, "objective/train/tokens_used": 2248684000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23569948971271515, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485421419143677, "objective/train/weighted_lm_loss": 3.390467405319214, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9522518515586853, "theoretical_loss": 3.3999484120814736, "tokens_seen": 2228224000 }, { "epoch": 0.8, "learning_rate": 0.00010310253499810822, "loss": 3.3266, "theoretical_loss": 3.3999484120814736, "tokens_seen": 2228224000 }, { "epoch": 0.8, "learning_rate": 0.00010291335603480893, "loss": 3.2585, "theoretical_loss": 3.399817865786617, "tokens_seen": 2229272576 }, { "epoch": 0.8, "learning_rate": 0.00010272417707150965, "loss": 3.2397, "theoretical_loss": 3.399687398066067, "tokens_seen": 2230321152 }, { "epoch": 0.8, "learning_rate": 0.00010253499810821037, "loss": 3.1565, "theoretical_loss": 3.3995570088356217, "tokens_seen": 2231369728 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.49023377895355225, "objective/train/docs_used": 1257149, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8337886333465576, "objective/train/original_loss": 2.8337888717651367, "objective/train/theoretical_loss": 3.399540715696463, "objective/train/tokens_used": 2251960800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2429656982421875, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502585172653198, "objective/train/weighted_lm_loss": 2.9760193824768066, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9533647894859314, "theoretical_loss": 3.399540715696463, "tokens_seen": 2231500800 }, { "epoch": 0.8, "learning_rate": 0.00010234581914491109, "loss": 3.2025, "theoretical_loss": 3.3994266980112107, "tokens_seen": 2232418304 }, { "epoch": 0.8, "learning_rate": 0.0001021566401816118, "loss": 3.2688, "theoretical_loss": 3.3992964655088915, "tokens_seen": 2233466880 }, { "epoch": 0.8, "learning_rate": 0.00010196746121831253, "loss": 3.2317, "theoretical_loss": 3.3991663112448522, "tokens_seen": 2234515456 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.48362892866134644, "objective/train/docs_used": 1259222, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9153990745544434, "objective/train/original_loss": 2.9153990745544434, "objective/train/theoretical_loss": 3.3991337848937637, "objective/train/tokens_used": 2255237600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23721912503242493, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495686531066895, "objective/train/weighted_lm_loss": 3.060173273086548, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.954319179058075, "theoretical_loss": 3.3991337848937637, "tokens_seen": 2234777600 }, { "epoch": 0.8, "learning_rate": 0.00010177828225501324, "loss": 3.1987, "theoretical_loss": 3.3990362351354086, "tokens_seen": 2235564032 }, { "epoch": 0.8, "learning_rate": 0.00010158910329171396, "loss": 3.154, "theoretical_loss": 3.3989062370970062, "tokens_seen": 2236612608 }, { "epoch": 0.8, "learning_rate": 0.0001013999243284147, "loss": 3.2095, "theoretical_loss": 3.3987763170462184, "tokens_seen": 2237661184 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.48375609517097473, "objective/train/docs_used": 1261230, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0727992057800293, "objective/train/original_loss": 3.0727992057800293, "objective/train/theoretical_loss": 3.398727617116349, "objective/train/tokens_used": 2258514400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23994581401348114, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495952367782593, "objective/train/weighted_lm_loss": 3.225282907485962, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9582397937774658, "theoretical_loss": 3.398727617116349, "tokens_seen": 2238054400 }, { "epoch": 0.8, "learning_rate": 0.0001012107453651154, "loss": 3.152, "theoretical_loss": 3.398646474899747, "tokens_seen": 2238709760 }, { "epoch": 0.8, "learning_rate": 0.00010102156640181612, "loss": 3.1287, "theoretical_loss": 3.398516710574422, "tokens_seen": 2239758336 }, { "epoch": 0.8, "learning_rate": 0.00010083238743851684, "loss": 3.1316, "theoretical_loss": 3.3983870239872003, "tokens_seen": 2240806912 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.48984038829803467, "objective/train/docs_used": 1263119, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9827420711517334, "objective/train/original_loss": 2.9827423095703125, "objective/train/theoretical_loss": 3.398322209819462, "objective/train/tokens_used": 2261791200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24176624417304993, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502128601074219, "objective/train/weighted_lm_loss": 3.1325385570526123, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9521621465682983, "theoretical_loss": 3.398322209819462, "tokens_seen": 2241331200 }, { "epoch": 0.8, "learning_rate": 0.00010064320847521756, "loss": 3.14, "theoretical_loss": 3.3982574150551663, "tokens_seen": 2241855488 }, { "epoch": 0.8, "learning_rate": 0.00010045402951191827, "loss": 3.125, "theoretical_loss": 3.3981278836955333, "tokens_seen": 2242904064 }, { "epoch": 0.8, "learning_rate": 0.000100264850548619, "loss": 3.1404, "theoretical_loss": 3.397998429825639, "tokens_seen": 2243952640 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.4860360622406006, "objective/train/docs_used": 1265240, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.894569158554077, "objective/train/original_loss": 2.8945693969726562, "objective/train/theoretical_loss": 3.397917560470535, "objective/train/tokens_used": 2265068000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2396802008152008, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498219728469849, "objective/train/weighted_lm_loss": 3.038137435913086, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9669562578201294, "theoretical_loss": 3.397917560470535, "tokens_seen": 2244608000 }, { "epoch": 0.8, "learning_rate": 0.00010007567158531971, "loss": 3.1545, "theoretical_loss": 3.397869053362949, "tokens_seen": 2245001216 }, { "epoch": 0.8, "learning_rate": 9.988649262202043e-05, "loss": 3.0972, "theoretical_loss": 3.3977397542250563, "tokens_seen": 2246049792 }, { "epoch": 0.8, "learning_rate": 9.969731365872115e-05, "loss": 3.0479, "theoretical_loss": 3.3976105323296775, "tokens_seen": 2247098368 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.4735538065433502, "objective/train/docs_used": 1267114, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.910813093185425, "objective/train/original_loss": 2.9108128547668457, "objective/train/theoretical_loss": 3.3975136665491172, "objective/train/tokens_used": 2268344800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24066385626792908, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485782623291016, "objective/train/weighted_lm_loss": 3.0512495040893555, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9516804814338684, "theoretical_loss": 3.3975136665491172, "tokens_seen": 2247884800 }, { "epoch": 0.8, "learning_rate": 9.950813469542187e-05, "loss": 3.0724, "theoretical_loss": 3.3974813875946577, "tokens_seen": 2248146944 }, { "epoch": 0.8, "learning_rate": 9.93189557321226e-05, "loss": 3.0347, "theoretical_loss": 3.3973523199379656, "tokens_seen": 2249195520 }, { "epoch": 0.8, "learning_rate": 9.912977676882331e-05, "loss": 3.0484, "theoretical_loss": 3.397223329277697, "tokens_seen": 2250244096 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.48258739709854126, "objective/train/docs_used": 1269266, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.058069944381714, "objective/train/original_loss": 3.058070182800293, "objective/train/theoretical_loss": 3.3971105255467977, "objective/train/tokens_used": 2271621600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24109937250614166, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049484133720398, "objective/train/weighted_lm_loss": 3.208803415298462, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9516813158988953, "theoretical_loss": 3.3971105255467977, "tokens_seen": 2251161600 }, { "epoch": 0.8, "learning_rate": 9.894059780552403e-05, "loss": 3.101, "theoretical_loss": 3.397094415532072, "tokens_seen": 2251292672 }, { "epoch": 0.8, "learning_rate": 9.875141884222474e-05, "loss": 3.0379, "theoretical_loss": 3.396965578619435, "tokens_seen": 2252341248 }, { "epoch": 0.8, "learning_rate": 9.856223987892548e-05, "loss": 3.1686, "theoretical_loss": 3.3968368184582562, "tokens_seen": 2253389824 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.48647889494895935, "objective/train/docs_used": 1270683, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8880631923675537, "objective/train/original_loss": 2.8880629539489746, "objective/train/theoretical_loss": 3.39670813496713, "objective/train/tokens_used": 2274898400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2403435856103897, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498695373535156, "objective/train/weighted_lm_loss": 3.032536268234253, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9517598152160645, "theoretical_loss": 3.39670813496713, "tokens_seen": 2254438400 }, { "epoch": 0.81, "learning_rate": 9.837306091562618e-05, "loss": 3.0102, "theoretical_loss": 3.39670813496713, "tokens_seen": 2254438400 }, { "epoch": 0.81, "learning_rate": 9.81838819523269e-05, "loss": 3.0983, "theoretical_loss": 3.396579528064774, "tokens_seen": 2255486976 }, { "epoch": 0.81, "learning_rate": 9.799470298902762e-05, "loss": 3.0334, "theoretical_loss": 3.396450997670031, "tokens_seen": 2256535552 }, { "epoch": 0.81, "learning_rate": 9.780552402572834e-05, "loss": 3.1238, "theoretical_loss": 3.3963225437018663, "tokens_seen": 2257584128 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.4851101040840149, "objective/train/docs_used": 1272388, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.497123956680298, "objective/train/original_loss": 2.497124195098877, "objective/train/theoretical_loss": 3.3963064923255586, "objective/train/tokens_used": 2278175200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2395034283399582, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049728274345398, "objective/train/weighted_lm_loss": 2.621992588043213, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9556369185447693, "theoretical_loss": 3.3963064923255586, "tokens_seen": 2257715200 }, { "epoch": 0.81, "learning_rate": 9.761634506242905e-05, "loss": 3.0345, "theoretical_loss": 3.3961941660793697, "tokens_seen": 2258632704 }, { "epoch": 0.81, "learning_rate": 9.742716609912979e-05, "loss": 3.095, "theoretical_loss": 3.3960658647217534, "tokens_seen": 2259681280 }, { "epoch": 0.81, "learning_rate": 9.72379871358305e-05, "loss": 3.0746, "theoretical_loss": 3.3959376395483525, "tokens_seen": 2260729856 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.443075954914093, "objective/train/docs_used": 1274638, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.932318925857544, "objective/train/original_loss": 2.932318687438965, "objective/train/theoretical_loss": 3.395905595149345, "objective/train/tokens_used": 2281452000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23310764133930206, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.045491099357605, "objective/train/weighted_lm_loss": 3.069843292236328, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9517802000045776, "theoretical_loss": 3.395905595149345, "tokens_seen": 2260992000 }, { "epoch": 0.81, "learning_rate": 9.704880817253121e-05, "loss": 3.0252, "theoretical_loss": 3.3958094904786256, "tokens_seen": 2261778432 }, { "epoch": 0.81, "learning_rate": 9.685962920923195e-05, "loss": 3.0028, "theoretical_loss": 3.3956814174321526, "tokens_seen": 2262827008 }, { "epoch": 0.81, "learning_rate": 9.667045024593265e-05, "loss": 3.0742, "theoretical_loss": 3.3955534203286364, "tokens_seen": 2263875584 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.4872230589389801, "objective/train/docs_used": 1276376, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.892059326171875, "objective/train/original_loss": 2.892059326171875, "objective/train/theoretical_loss": 3.3955054409774936, "objective/train/tokens_used": 2284728800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2388237714767456, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499361753463745, "objective/train/weighted_lm_loss": 3.0363094806671143, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.979783833026886, "theoretical_loss": 3.3955054409774936, "tokens_seen": 2264268800 }, { "epoch": 0.81, "learning_rate": 9.648127128263337e-05, "loss": 3.0924, "theoretical_loss": 3.395425499087902, "tokens_seen": 2264924160 }, { "epoch": 0.81, "learning_rate": 9.62920923193341e-05, "loss": 2.9934, "theoretical_loss": 3.395297653629895, "tokens_seen": 2265972736 }, { "epoch": 0.81, "learning_rate": 9.610291335603482e-05, "loss": 3.1318, "theoretical_loss": 3.3951698838746838, "tokens_seen": 2267021312 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.4725620746612549, "objective/train/docs_used": 1278407, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.076742649078369, "objective/train/original_loss": 3.076742649078369, "objective/train/theoretical_loss": 3.3951060273606806, "objective/train/tokens_used": 2288005600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23542000353336334, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0484524965286255, "objective/train/weighted_lm_loss": 3.224698066711426, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9519801139831543, "theoretical_loss": 3.3951060273606806, "tokens_seen": 2267545600 }, { "epoch": 0.81, "learning_rate": 9.591373439273552e-05, "loss": 3.0248, "theoretical_loss": 3.395042189742457, "tokens_seen": 2268069888 }, { "epoch": 0.81, "learning_rate": 9.572455542943626e-05, "loss": 3.0607, "theoretical_loss": 3.394914571153525, "tokens_seen": 2269118464 }, { "epoch": 0.81, "learning_rate": 9.553537646613696e-05, "loss": 3.0339, "theoretical_loss": 3.3947870280283183, "tokens_seen": 2270167040 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.4836905300617218, "objective/train/docs_used": 1280391, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7354981899261475, "objective/train/original_loss": 2.7354984283447266, "objective/train/theoretical_loss": 3.39470735186118, "objective/train/tokens_used": 2291282400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23765282332897186, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049576997756958, "objective/train/weighted_lm_loss": 2.871483325958252, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9534169435501099, "theoretical_loss": 3.39470735186118, "tokens_seen": 2270822400 }, { "epoch": 0.81, "learning_rate": 9.534619750283768e-05, "loss": 3.0532, "theoretical_loss": 3.3946595602873884, "tokens_seen": 2271215616 }, { "epoch": 0.81, "learning_rate": 9.515701853953842e-05, "loss": 3.1007, "theoretical_loss": 3.3945321678514064, "tokens_seen": 2272264192 }, { "epoch": 0.81, "learning_rate": 9.496783957623913e-05, "loss": 3.0615, "theoretical_loss": 3.394404850641165, "tokens_seen": 2273312768 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.48565566539764404, "objective/train/docs_used": 1282312, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.136152744293213, "objective/train/original_loss": 3.1361522674560547, "objective/train/theoretical_loss": 3.3943094120527944, "objective/train/tokens_used": 2294559200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23977386951446533, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497844219207764, "objective/train/weighted_lm_loss": 3.292649984359741, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9598386883735657, "theoretical_loss": 3.3943094120527944, "tokens_seen": 2274099200 }, { "epoch": 0.81, "learning_rate": 9.477866061293985e-05, "loss": 3.1308, "theoretical_loss": 3.3942776085775743, "tokens_seen": 2274361344 }, { "epoch": 0.81, "learning_rate": 9.458948164964057e-05, "loss": 3.0992, "theoretical_loss": 3.394150441581666, "tokens_seen": 2275409920 }, { "epoch": 0.81, "learning_rate": 9.440030268634129e-05, "loss": 3.0643, "theoretical_loss": 3.3940233495745904, "tokens_seen": 2276458496 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.48642486333847046, "objective/train/docs_used": 1284150, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.593719959259033, "objective/train/original_loss": 2.593719959259033, "objective/train/theoretical_loss": 3.3939122055207807, "objective/train/tokens_used": 2297836000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24082650244235992, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498665571212769, "objective/train/weighted_lm_loss": 2.723041534423828, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9529795050621033, "theoretical_loss": 3.3939122055207807, "tokens_seen": 2277376000 }, { "epoch": 0.81, "learning_rate": 9.4211123723042e-05, "loss": 3.0231, "theoretical_loss": 3.393896332477617, "tokens_seen": 2277507072 }, { "epoch": 0.81, "learning_rate": 9.402194475974273e-05, "loss": 3.0623, "theoretical_loss": 3.3937693902121335, "tokens_seen": 2278555648 }, { "epoch": 0.81, "learning_rate": 9.383276579644344e-05, "loss": 3.0839, "theoretical_loss": 3.393642522699647, "tokens_seen": 2279604224 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.48222169280052185, "objective/train/docs_used": 1285842, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.761013984680176, "objective/train/original_loss": 2.761013984680176, "objective/train/theoretical_loss": 3.393515729861783, "objective/train/tokens_used": 2301112800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2392144352197647, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494379997253418, "objective/train/weighted_lm_loss": 2.8989064693450928, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9535624980926514, "theoretical_loss": 3.393515729861783, "tokens_seen": 2280652800 }, { "epoch": 0.81, "learning_rate": 9.364358683314416e-05, "loss": 3.0902, "theoretical_loss": 3.393515729861783, "tokens_seen": 2280652800 }, { "epoch": 0.81, "learning_rate": 9.345440786984488e-05, "loss": 3.111, "theoretical_loss": 3.3933890116202843, "tokens_seen": 2281701376 }, { "epoch": 0.82, "learning_rate": 9.32652289065456e-05, "loss": 3.1021, "theoretical_loss": 3.3932623678970133, "tokens_seen": 2282749952 }, { "epoch": 0.82, "learning_rate": 9.30760499432463e-05, "loss": 3.0894, "theoretical_loss": 3.393135798613948, "tokens_seen": 2283798528 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.48043379187583923, "objective/train/docs_used": 1287535, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0948588848114014, "objective/train/original_loss": 3.0948591232299805, "objective/train/theoretical_loss": 3.3931199826837606, "objective/train/tokens_used": 2304389600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23888222873210907, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049257516860962, "objective/train/weighted_lm_loss": 3.246586799621582, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9514316320419312, "theoretical_loss": 3.3931199826837606, "tokens_seen": 2283929600 }, { "epoch": 0.82, "learning_rate": 9.288687097994704e-05, "loss": 3.1324, "theoretical_loss": 3.3930093036931854, "tokens_seen": 2284847104 }, { "epoch": 0.82, "learning_rate": 9.269769201664776e-05, "loss": 3.0722, "theoretical_loss": 3.392882883056939, "tokens_seen": 2285895680 }, { "epoch": 0.82, "learning_rate": 9.250851305334847e-05, "loss": 3.0668, "theoretical_loss": 3.39275653662754, "tokens_seen": 2286944256 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.48825782537460327, "objective/train/docs_used": 1288943, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.927969217300415, "objective/train/original_loss": 2.927968978881836, "objective/train/theoretical_loss": 3.392724961605919, "objective/train/tokens_used": 2307666400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24235625565052032, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500577688217163, "objective/train/weighted_lm_loss": 3.073913335800171, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.958259642124176, "theoretical_loss": 3.392724961605919, "tokens_seen": 2287206400 }, { "epoch": 0.82, "learning_rate": 9.23193340900492e-05, "loss": 3.1182, "theoretical_loss": 3.3926302643274355, "tokens_seen": 2287992832 }, { "epoch": 0.82, "learning_rate": 9.21301551267499e-05, "loss": 3.0441, "theoretical_loss": 3.39250406607919, "tokens_seen": 2289041408 }, { "epoch": 0.82, "learning_rate": 9.194097616345063e-05, "loss": 3.1382, "theoretical_loss": 3.3923779418054827, "tokens_seen": 2290089984 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.48361656069755554, "objective/train/docs_used": 1290666, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5616543292999268, "objective/train/original_loss": 2.561654567718506, "objective/train/theoretical_loss": 3.392330664258642, "objective/train/tokens_used": 2310943200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23792539536952972, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495710372924805, "objective/train/weighted_lm_loss": 2.6900863647460938, "objective/train/weights_max": 1.0512185096740723, "objective/train/weights_min": 0.9807561635971069, "theoretical_loss": 3.392330664258642, "tokens_seen": 2290483200 }, { "epoch": 0.82, "learning_rate": 9.175179720015135e-05, "loss": 3.1281, "theoretical_loss": 3.3922518914291113, "tokens_seen": 2291138560 }, { "epoch": 0.82, "learning_rate": 9.156261823685207e-05, "loss": 3.1114, "theoretical_loss": 3.3921259148729876, "tokens_seen": 2292187136 }, { "epoch": 0.82, "learning_rate": 9.137343927355278e-05, "loss": 3.0929, "theoretical_loss": 3.39200001206014, "tokens_seen": 2293235712 }, { "debugging/Self-BLEU-5": 0.515096219338814, "debugging/distinct-1-grams": 0.7923118197529954, "debugging/distinct-2-grams": 0.9542743672238616, "debugging/entropy-1-grams": 6.147062465574793, "debugging/entropy-2-grams": 7.107820582543805, "debugging/length": 504.1666666666667, "debugging/num_segments": 18, "debugging/raw_token_scores_avg": 0.012544393539428711, "debugging/raw_token_scores_std": 0.04549340158700943, "epoch": 0.82, "objective/train/advantage_avg": 0.48745694756507874, "objective/train/docs_used": 1292636, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8265254497528076, "objective/train/original_loss": 2.8265256881713867, "objective/train/theoretical_loss": 3.3919370882834223, "objective/train/tokens_used": 2314220000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23968477547168732, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499640703201294, "objective/train/weighted_lm_loss": 2.9688053131103516, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9514824151992798, "theoretical_loss": 3.3919370882834223, "tokens_seen": 2293760000 }, { "epoch": 0.82, "learning_rate": 9.118426031025351e-05, "loss": 3.0551, "theoretical_loss": 3.3918741829137113, "tokens_seen": 2294284288 }, { "epoch": 0.82, "learning_rate": 9.099508134695422e-05, "loss": 3.1223, "theoretical_loss": 3.3917484273569602, "tokens_seen": 2295332864 }, { "epoch": 0.82, "learning_rate": 9.080590238365494e-05, "loss": 3.1222, "theoretical_loss": 3.39162274531326, "tokens_seen": 2296381440 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.48345085978507996, "objective/train/docs_used": 1294107, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.764035940170288, "objective/train/original_loss": 2.764035701751709, "objective/train/theoretical_loss": 3.391544231332792, "objective/train/tokens_used": 2317496800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23902611434459686, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495600700378418, "objective/train/weighted_lm_loss": 2.902003049850464, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9564229846000671, "theoretical_loss": 3.391544231332792, "tokens_seen": 2297036800 }, { "epoch": 0.82, "learning_rate": 9.061672342035567e-05, "loss": 3.1565, "theoretical_loss": 3.391497136706099, "tokens_seen": 2297430016 }, { "epoch": 0.82, "learning_rate": 9.042754445705638e-05, "loss": 3.16, "theoretical_loss": 3.3913716014590807, "tokens_seen": 2298478592 }, { "epoch": 0.82, "learning_rate": 9.02383654937571e-05, "loss": 3.2324, "theoretical_loss": 3.3912461394959212, "tokens_seen": 2299527168 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.4791503846645355, "objective/train/docs_used": 1294624, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1768290996551514, "objective/train/original_loss": 3.1768290996551514, "objective/train/theoretical_loss": 3.3911520910702593, "objective/train/tokens_used": 2320773600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23488926887512207, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049108862876892, "objective/train/weighted_lm_loss": 3.3320627212524414, "objective/train/weights_max": 1.0512152910232544, "objective/train/weights_min": 0.962348997592926, "theoretical_loss": 3.3911520910702593, "tokens_seen": 2300313600 }, { "epoch": 0.82, "learning_rate": 9.004918653045782e-05, "loss": 3.2351, "theoretical_loss": 3.391120750740452, "tokens_seen": 2300575744 }, { "epoch": 0.82, "learning_rate": 8.986000756715854e-05, "loss": 3.2923, "theoretical_loss": 3.3909954351166176, "tokens_seen": 2301624320 }, { "epoch": 0.82, "learning_rate": 8.967082860385925e-05, "loss": 3.2683, "theoretical_loss": 3.3908701925484768, "tokens_seen": 2302672896 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.45928680896759033, "objective/train/docs_used": 1296468, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9212799072265625, "objective/train/original_loss": 2.9212799072265625, "objective/train/theoretical_loss": 3.390760665170238, "objective/train/tokens_used": 2324050400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2240046262741089, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0470670461654663, "objective/train/weighted_lm_loss": 3.061581611633301, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9757957458496094, "theoretical_loss": 3.390760665170238, "tokens_seen": 2303590400 }, { "epoch": 0.82, "learning_rate": 8.948164964055998e-05, "loss": 3.2527, "theoretical_loss": 3.3907450229602016, "tokens_seen": 2303721472 }, { "epoch": 0.82, "learning_rate": 8.929247067726069e-05, "loss": 3.2998, "theoretical_loss": 3.390619926276077, "tokens_seen": 2304770048 }, { "epoch": 0.82, "learning_rate": 8.910329171396141e-05, "loss": 3.2132, "theoretical_loss": 3.390494902420501, "tokens_seen": 2305818624 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.48398545384407043, "objective/train/docs_used": 1298252, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6797232627868652, "objective/train/original_loss": 2.6797232627868652, "objective/train/theoretical_loss": 3.390369951317984, "objective/train/tokens_used": 2327327200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24018828570842743, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049619436264038, "objective/train/weighted_lm_loss": 2.812175989151001, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9517936706542969, "theoretical_loss": 3.390369951317984, "tokens_seen": 2306867200 }, { "epoch": 0.82, "learning_rate": 8.891411275066213e-05, "loss": 3.2114, "theoretical_loss": 3.390369951317984, "tokens_seen": 2306867200 }, { "epoch": 0.82, "learning_rate": 8.872493378736285e-05, "loss": 3.2249, "theoretical_loss": 3.3902450728931504, "tokens_seen": 2307915776 }, { "epoch": 0.82, "learning_rate": 8.853575482406357e-05, "loss": 3.179, "theoretical_loss": 3.390120267070735, "tokens_seen": 2308964352 }, { "epoch": 0.83, "learning_rate": 8.834657586076429e-05, "loss": 3.167, "theoretical_loss": 3.3899955337755854, "tokens_seen": 2310012928 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.46928122639656067, "objective/train/docs_used": 1299979, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0463294982910156, "objective/train/original_loss": 3.0463294982910156, "objective/train/theoretical_loss": 3.3899799472095267, "objective/train/tokens_used": 2330604000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23485510051250458, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481215715408325, "objective/train/weighted_lm_loss": 3.1912734508514404, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9515340924263, "theoretical_loss": 3.3899799472095267, "tokens_seen": 2310144000 }, { "epoch": 0.83, "learning_rate": 8.815739689746501e-05, "loss": 3.2139, "theoretical_loss": 3.3898708729326614, "tokens_seen": 2311061504 }, { "epoch": 0.83, "learning_rate": 8.796821793416572e-05, "loss": 3.2259, "theoretical_loss": 3.3897462844670345, "tokens_seen": 2312110080 }, { "epoch": 0.83, "learning_rate": 8.777903897086645e-05, "loss": 3.2116, "theoretical_loss": 3.3896217683038863, "tokens_seen": 2313158656 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.4831146001815796, "objective/train/docs_used": 1302096, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2344326972961426, "objective/train/original_loss": 3.2344326972961426, "objective/train/theoretical_loss": 3.3895906505516047, "objective/train/tokens_used": 2333880800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2407372146844864, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495350360870361, "objective/train/weighted_lm_loss": 3.3936874866485596, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9515005946159363, "theoretical_loss": 3.3895906505516047, "tokens_seen": 2313420800 }, { "epoch": 0.83, "learning_rate": 8.758986000756716e-05, "loss": 3.1729, "theoretical_loss": 3.3894973243685116, "tokens_seen": 2314207232 }, { "epoch": 0.83, "learning_rate": 8.740068104426788e-05, "loss": 3.1754, "theoretical_loss": 3.389372952586315, "tokens_seen": 2315255808 }, { "epoch": 0.83, "learning_rate": 8.72115020809686e-05, "loss": 3.1716, "theoretical_loss": 3.3892486528828116, "tokens_seen": 2316304384 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.47961023449897766, "objective/train/docs_used": 1304213, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.84677791595459, "objective/train/original_loss": 2.846778392791748, "objective/train/theoretical_loss": 3.3892020590616028, "objective/train/tokens_used": 2337157600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24148398637771606, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049188494682312, "objective/train/weighted_lm_loss": 2.9851181507110596, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.951810896396637, "theoretical_loss": 3.3892020590616028, "tokens_seen": 2316697600 }, { "epoch": 0.83, "learning_rate": 8.702232311766932e-05, "loss": 3.0908, "theoretical_loss": 3.389124425183628, "tokens_seen": 2317352960 }, { "epoch": 0.83, "learning_rate": 8.683314415437003e-05, "loss": 3.0297, "theoretical_loss": 3.3890002694145007, "tokens_seen": 2318401536 }, { "epoch": 0.83, "learning_rate": 8.664396519107076e-05, "loss": 3.1335, "theoretical_loss": 3.388876185501276, "tokens_seen": 2319450112 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.48336800932884216, "objective/train/docs_used": 1306171, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2488811016082764, "objective/train/original_loss": 3.2488808631896973, "objective/train/theoretical_loss": 3.388814170467484, "objective/train/tokens_used": 2340434400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23837092518806458, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495483875274658, "objective/train/weighted_lm_loss": 3.409419298171997, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9514601826667786, "theoretical_loss": 3.388814170467484, "tokens_seen": 2319974400 }, { "epoch": 0.83, "learning_rate": 8.645478622777148e-05, "loss": 3.1275, "theoretical_loss": 3.3887521733699106, "tokens_seen": 2320498688 }, { "epoch": 0.83, "learning_rate": 8.626560726447219e-05, "loss": 3.1834, "theoretical_loss": 3.388628232946471, "tokens_seen": 2321547264 }, { "epoch": 0.83, "learning_rate": 8.607642830117292e-05, "loss": 3.1321, "theoretical_loss": 3.388504364157133, "tokens_seen": 2322595840 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.472857803106308, "objective/train/docs_used": 1308132, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8800430297851562, "objective/train/original_loss": 2.8800430297851562, "objective/train/theoretical_loss": 3.3884269825077302, "objective/train/tokens_used": 2343711200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23990888893604279, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485048294067383, "objective/train/weighted_lm_loss": 3.0166618824005127, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9515467882156372, "theoretical_loss": 3.3884269825077302, "tokens_seen": 2323251200 }, { "epoch": 0.83, "learning_rate": 8.588724933787363e-05, "loss": 3.1415, "theoretical_loss": 3.3883805669281815, "tokens_seen": 2323644416 }, { "epoch": 0.83, "learning_rate": 8.569807037457435e-05, "loss": 3.1349, "theoretical_loss": 3.388256841186011, "tokens_seen": 2324692992 }, { "epoch": 0.83, "learning_rate": 8.550889141127507e-05, "loss": 3.0799, "theoretical_loss": 3.3881331868571234, "tokens_seen": 2325741568 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.4859318435192108, "objective/train/docs_used": 1310381, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.184096097946167, "objective/train/original_loss": 3.184096336364746, "objective/train/theoretical_loss": 3.3880404929312737, "objective/train/tokens_used": 2346988000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23933574557304382, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498096942901611, "objective/train/weighted_lm_loss": 3.3434245586395264, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.96250981092453, "theoretical_loss": 3.3880404929312737, "tokens_seen": 2326528000 }, { "epoch": 0.83, "learning_rate": 8.531971244797579e-05, "loss": 3.1258, "theoretical_loss": 3.3880096038681313, "tokens_seen": 2326790144 }, { "epoch": 0.83, "learning_rate": 8.51305334846765e-05, "loss": 3.1036, "theoretical_loss": 3.387886092145755, "tokens_seen": 2327838720 }, { "epoch": 0.83, "learning_rate": 8.494135452137723e-05, "loss": 3.0237, "theoretical_loss": 3.387762651616822, "tokens_seen": 2328887296 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.48883962631225586, "objective/train/docs_used": 1312090, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.894986152648926, "objective/train/original_loss": 2.894986152648926, "objective/train/theoretical_loss": 3.3876546994974377, "objective/train/tokens_used": 2350264800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24219678342342377, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501152276992798, "objective/train/weighted_lm_loss": 3.03971266746521, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9689180850982666, "theoretical_loss": 3.3876546994974377, "tokens_seen": 2329804800 }, { "epoch": 0.83, "learning_rate": 8.475217555807794e-05, "loss": 3.1501, "theoretical_loss": 3.3876392822082697, "tokens_seen": 2329935872 }, { "epoch": 0.83, "learning_rate": 8.456299659477866e-05, "loss": 3.1006, "theoretical_loss": 3.3875159838471416, "tokens_seen": 2330984448 }, { "epoch": 0.83, "learning_rate": 8.43738176314794e-05, "loss": 3.0854, "theoretical_loss": 3.3873927564605895, "tokens_seen": 2332033024 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.49127140641212463, "objective/train/docs_used": 1313833, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1920430660247803, "objective/train/original_loss": 3.1920433044433594, "objective/train/theoretical_loss": 3.3872695999758733, "objective/train/tokens_used": 2353541600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24283026158809662, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503616333007812, "objective/train/weighted_lm_loss": 3.3526406288146973, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9671012163162231, "theoretical_loss": 3.3872695999758733, "tokens_seen": 2333081600 }, { "epoch": 0.83, "learning_rate": 8.41846386681801e-05, "loss": 3.0824, "theoretical_loss": 3.3872695999758733, "tokens_seen": 2333081600 }, { "epoch": 0.83, "learning_rate": 8.399545970488082e-05, "loss": 3.1084, "theoretical_loss": 3.3871465143203583, "tokens_seen": 2334130176 }, { "epoch": 0.83, "learning_rate": 8.380628074158154e-05, "loss": 3.0542, "theoretical_loss": 3.387023499421519, "tokens_seen": 2335178752 }, { "epoch": 0.83, "learning_rate": 8.361710177828226e-05, "loss": 3.1007, "theoretical_loss": 3.386900555206935, "tokens_seen": 2336227328 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.47514820098876953, "objective/train/docs_used": 1316188, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9037365913391113, "objective/train/original_loss": 2.9037368297576904, "objective/train/theoretical_loss": 3.3868851921464964, "objective/train/tokens_used": 2356818400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23783263564109802, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048723578453064, "objective/train/weighted_lm_loss": 3.045696496963501, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9521853923797607, "theoretical_loss": 3.3868851921464964, "tokens_seen": 2336358400 }, { "epoch": 0.83, "learning_rate": 8.342792281498297e-05, "loss": 3.1275, "theoretical_loss": 3.3867776816042934, "tokens_seen": 2337275904 }, { "epoch": 0.84, "learning_rate": 8.323874385168369e-05, "loss": 3.081, "theoretical_loss": 3.3866548785413872, "tokens_seen": 2338324480 }, { "epoch": 0.84, "learning_rate": 8.304956488838441e-05, "loss": 3.0834, "theoretical_loss": 3.3865321459461155, "tokens_seen": 2339373056 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.48149341344833374, "objective/train/docs_used": 1317929, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8814351558685303, "objective/train/original_loss": 2.8814353942871094, "objective/train/theoretical_loss": 3.3865014737994263, "objective/train/tokens_used": 2360095200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23636046051979065, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493507385253906, "objective/train/weighted_lm_loss": 3.022583484649658, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9719198942184448, "theoretical_loss": 3.3865014737994263, "tokens_seen": 2339635200 }, { "epoch": 0.84, "learning_rate": 8.286038592508513e-05, "loss": 3.0261, "theoretical_loss": 3.386409483746484, "tokens_seen": 2340421632 }, { "epoch": 0.84, "learning_rate": 8.267120696178584e-05, "loss": 2.9999, "theoretical_loss": 3.386286891870604, "tokens_seen": 2341470208 }, { "epoch": 0.84, "learning_rate": 8.248202799848657e-05, "loss": 3.0883, "theoretical_loss": 3.386164370246692, "tokens_seen": 2342518784 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.492958128452301, "objective/train/docs_used": 1319369, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9171993732452393, "objective/train/original_loss": 2.9171996116638184, "objective/train/theoretical_loss": 3.386118442734927, "objective/train/tokens_used": 2363372000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2451806366443634, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505422353744507, "objective/train/weighted_lm_loss": 3.063903570175171, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9514467716217041, "theoretical_loss": 3.386118442734927, "tokens_seen": 2342912000 }, { "epoch": 0.84, "learning_rate": 8.229284903518728e-05, "loss": 3.2241, "theoretical_loss": 3.3860419188030693, "tokens_seen": 2343567360 }, { "epoch": 0.84, "learning_rate": 8.2103670071888e-05, "loss": 3.1287, "theoretical_loss": 3.3859195374681637, "tokens_seen": 2344615936 }, { "epoch": 0.84, "learning_rate": 8.191449110858873e-05, "loss": 3.0803, "theoretical_loss": 3.3857972261705074, "tokens_seen": 2345664512 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.483732134103775, "objective/train/docs_used": 1321381, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9508039951324463, "objective/train/original_loss": 2.9508039951324463, "objective/train/theoretical_loss": 3.3857360967633428, "objective/train/tokens_used": 2366648800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24197065830230713, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496031045913696, "objective/train/weighted_lm_loss": 3.095860719680786, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9532303214073181, "theoretical_loss": 3.3857360967633428, "tokens_seen": 2346188800 }, { "epoch": 0.84, "learning_rate": 8.172531214528944e-05, "loss": 3.0983, "theoretical_loss": 3.385674984838737, "tokens_seen": 2346713088 }, { "epoch": 0.84, "learning_rate": 8.153613318199016e-05, "loss": 3.0987, "theoretical_loss": 3.3855528134015946, "tokens_seen": 2347761664 }, { "epoch": 0.84, "learning_rate": 8.134695421869088e-05, "loss": 3.1127, "theoretical_loss": 3.385430711787925, "tokens_seen": 2348810240 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.4847314953804016, "objective/train/docs_used": 1323513, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6906464099884033, "objective/train/original_loss": 2.690646171569824, "objective/train/theoretical_loss": 3.38535443370504, "objective/train/tokens_used": 2369925600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24226725101470947, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497045516967773, "objective/train/weighted_lm_loss": 2.8229305744171143, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9514601826667786, "theoretical_loss": 3.38535443370504, "tokens_seen": 2349465600 }, { "epoch": 0.84, "learning_rate": 8.11577752553916e-05, "loss": 3.0166, "theoretical_loss": 3.3853086799266787, "tokens_seen": 2349858816 }, { "epoch": 0.84, "learning_rate": 8.096859629209231e-05, "loss": 3.0753, "theoretical_loss": 3.38518671774691, "tokens_seen": 2350907392 }, { "epoch": 0.84, "learning_rate": 8.077941732879304e-05, "loss": 3.0972, "theoretical_loss": 3.385064825177776, "tokens_seen": 2351955968 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.4856957793235779, "objective/train/docs_used": 1325482, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.927109479904175, "objective/train/original_loss": 2.927109718322754, "objective/train/theoretical_loss": 3.3849734513903473, "objective/train/tokens_used": 2373202400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24306103587150574, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498050451278687, "objective/train/weighted_lm_loss": 3.0720131397247314, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9516735672950745, "theoretical_loss": 3.3849734513903473, "tokens_seen": 2352742400 }, { "epoch": 0.84, "learning_rate": 8.059023836549375e-05, "loss": 3.0963, "theoretical_loss": 3.384943002148538, "tokens_seen": 2353004544 }, { "epoch": 0.84, "learning_rate": 8.040105940219447e-05, "loss": 3.0522, "theoretical_loss": 3.384821248588562, "tokens_seen": 2354053120 }, { "epoch": 0.84, "learning_rate": 8.021188043889519e-05, "loss": 3.0707, "theoretical_loss": 3.3846995644273132, "tokens_seen": 2355101696 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.48670029640197754, "objective/train/docs_used": 1326516, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.063535690307617, "objective/train/original_loss": 3.063535690307617, "objective/train/theoretical_loss": 3.3845931476594964, "objective/train/tokens_used": 2376479200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24172568321228027, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498987436294556, "objective/train/weighted_lm_loss": 3.215937614440918, "objective/train/weights_max": 1.0512151718139648, "objective/train/weights_min": 0.952146589756012, "theoretical_loss": 3.3845931476594964, "tokens_seen": 2356019200 }, { "epoch": 0.84, "learning_rate": 8.002270147559591e-05, "loss": 3.091, "theoretical_loss": 3.384577949594364, "tokens_seen": 2356150272 }, { "epoch": 0.84, "learning_rate": 7.983352251229663e-05, "loss": 3.1252, "theoretical_loss": 3.3844564040193887, "tokens_seen": 2357198848 }, { "epoch": 0.84, "learning_rate": 7.964434354899735e-05, "loss": 3.2091, "theoretical_loss": 3.384334927632162, "tokens_seen": 2358247424 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.47246748208999634, "objective/train/docs_used": 1327582, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9666707515716553, "objective/train/original_loss": 2.9666709899902344, "objective/train/theoretical_loss": 3.3842135203625627, "objective/train/tokens_used": 2379756000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23522746562957764, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048442006111145, "objective/train/weighted_lm_loss": 3.1088528633117676, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9515091180801392, "theoretical_loss": 3.3842135203625627, "tokens_seen": 2359296000 }, { "epoch": 0.84, "learning_rate": 7.945516458569807e-05, "loss": 3.1832, "theoretical_loss": 3.3842135203625627, "tokens_seen": 2359296000 }, { "epoch": 0.84, "learning_rate": 7.926598562239878e-05, "loss": 3.2233, "theoretical_loss": 3.3840921821405723, "tokens_seen": 2360344576 }, { "epoch": 0.84, "learning_rate": 7.907680665909952e-05, "loss": 3.2095, "theoretical_loss": 3.3839709128962725, "tokens_seen": 2361393152 }, { "epoch": 0.84, "learning_rate": 7.888762769580022e-05, "loss": 3.1732, "theoretical_loss": 3.3838497125598486, "tokens_seen": 2362441728 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.4920428693294525, "objective/train/docs_used": 1329627, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9259815216064453, "objective/train/original_loss": 2.9259815216064453, "objective/train/theoretical_loss": 3.3838345673594072, "objective/train/tokens_used": 2383032800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24389143288135529, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504441261291504, "objective/train/weighted_lm_loss": 3.0741143226623535, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9753554463386536, "theoretical_loss": 3.3838345673594072, "tokens_seen": 2362572800 }, { "epoch": 0.84, "learning_rate": 7.869844873250094e-05, "loss": 3.1582, "theoretical_loss": 3.383728581061586, "tokens_seen": 2363490304 }, { "epoch": 0.84, "learning_rate": 7.850926976920166e-05, "loss": 3.1248, "theoretical_loss": 3.383607518331873, "tokens_seen": 2364538880 }, { "epoch": 0.84, "learning_rate": 7.832009080590238e-05, "loss": 3.1062, "theoretical_loss": 3.3834865243011985, "tokens_seen": 2365587456 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.47797757387161255, "objective/train/docs_used": 1331397, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0442779064178467, "objective/train/original_loss": 3.044278144836426, "objective/train/theoretical_loss": 3.383456286519618, "objective/train/tokens_used": 2386309600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24260950088500977, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490306615829468, "objective/train/weighted_lm_loss": 3.191368818283081, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9514516592025757, "theoretical_loss": 3.383456286519618, "tokens_seen": 2365849600 }, { "epoch": 0.85, "learning_rate": 7.813091184260309e-05, "loss": 3.1024, "theoretical_loss": 3.383365598900151, "tokens_seen": 2366636032 }, { "epoch": 0.85, "learning_rate": 7.794173287930383e-05, "loss": 3.1292, "theoretical_loss": 3.3832447420594227, "tokens_seen": 2367684608 }, { "epoch": 0.85, "learning_rate": 7.775255391600455e-05, "loss": 3.1855, "theoretical_loss": 3.383123953709804, "tokens_seen": 2368733184 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.49193400144577026, "objective/train/docs_used": 1333210, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.104257106781006, "objective/train/original_loss": 3.1042566299438477, "objective/train/theoretical_loss": 3.383078675722453, "objective/train/tokens_used": 2389586400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24367199838161469, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504320859909058, "objective/train/weighted_lm_loss": 3.261072874069214, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9764650464057922, "theoretical_loss": 3.383078675722453, "tokens_seen": 2369126400 }, { "epoch": 0.85, "learning_rate": 7.756337495270525e-05, "loss": 3.1193, "theoretical_loss": 3.383003233782187, "tokens_seen": 2369781760 }, { "epoch": 0.85, "learning_rate": 7.737419598940599e-05, "loss": 3.173, "theoretical_loss": 3.382882582207563, "tokens_seen": 2370830336 }, { "epoch": 0.85, "learning_rate": 7.71850170261067e-05, "loss": 3.1487, "theoretical_loss": 3.3827619989170254, "tokens_seen": 2371878912 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.47686776518821716, "objective/train/docs_used": 1334412, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9060118198394775, "objective/train/original_loss": 2.9060120582580566, "objective/train/theoretical_loss": 3.3827017328567823, "objective/train/tokens_used": 2392863200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.235874742269516, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048885703086853, "objective/train/weighted_lm_loss": 3.0491559505462646, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9522488117218018, "theoretical_loss": 3.3827017328567823, "tokens_seen": 2372403200 }, { "epoch": 0.85, "learning_rate": 7.699583806280741e-05, "loss": 3.1017, "theoretical_loss": 3.3826414838417653, "tokens_seen": 2372927488 }, { "epoch": 0.85, "learning_rate": 7.680665909950814e-05, "loss": 3.1413, "theoretical_loss": 3.382521036913075, "tokens_seen": 2373976064 }, { "epoch": 0.85, "learning_rate": 7.661748013620886e-05, "loss": 3.1409, "theoretical_loss": 3.3824006580623447, "tokens_seen": 2375024640 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.4799953103065491, "objective/train/docs_used": 1336383, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9855668544769287, "objective/train/original_loss": 2.9855666160583496, "objective/train/theoretical_loss": 3.3823254558210323, "objective/train/tokens_used": 2396140000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23651105165481567, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049201488494873, "objective/train/weighted_lm_loss": 3.132995128631592, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9516184329986572, "theoretical_loss": 3.3823254558210323, "tokens_seen": 2375680000 }, { "epoch": 0.85, "learning_rate": 7.642830117290956e-05, "loss": 3.0911, "theoretical_loss": 3.382280347221066, "tokens_seen": 2376073216 }, { "epoch": 0.85, "learning_rate": 7.62391222096103e-05, "loss": 3.1107, "theoretical_loss": 3.3821601043208283, "tokens_seen": 2377121792 }, { "epoch": 0.85, "learning_rate": 7.6049943246311e-05, "loss": 3.1237, "theoretical_loss": 3.3820399292933194, "tokens_seen": 2378170368 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.47441014647483826, "objective/train/docs_used": 1338132, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.338620901107788, "objective/train/original_loss": 3.338620662689209, "objective/train/theoretical_loss": 3.381949842523129, "objective/train/tokens_used": 2399416800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2352495640516281, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486366748809814, "objective/train/weighted_lm_loss": 3.5036096572875977, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9518207907676697, "theoretical_loss": 3.381949842523129, "tokens_seen": 2378956800 }, { "epoch": 0.85, "learning_rate": 7.586076428301172e-05, "loss": 3.0793, "theoretical_loss": 3.381919822070328, "tokens_seen": 2379218944 }, { "epoch": 0.85, "learning_rate": 7.567158531971246e-05, "loss": 3.1023, "theoretical_loss": 3.3817997825837396, "tokens_seen": 2380267520 }, { "epoch": 0.85, "learning_rate": 7.548240635641317e-05, "loss": 3.1106, "theoretical_loss": 3.3816798107655384, "tokens_seen": 2381316096 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.48399755358695984, "objective/train/docs_used": 1340136, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9534103870391846, "objective/train/original_loss": 2.9534106254577637, "objective/train/theoretical_loss": 3.381574890880442, "objective/train/tokens_used": 2402693600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24139027297496796, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496268272399902, "objective/train/weighted_lm_loss": 3.0994439125061035, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9525738954544067, "theoretical_loss": 3.381574890880442, "tokens_seen": 2382233600 }, { "epoch": 0.85, "learning_rate": 7.529322739311389e-05, "loss": 3.1093, "theoretical_loss": 3.3815599065478072, "tokens_seen": 2382364672 }, { "epoch": 0.85, "learning_rate": 7.51040484298146e-05, "loss": 3.173, "theoretical_loss": 3.3814400698627263, "tokens_seen": 2383413248 }, { "epoch": 0.85, "learning_rate": 7.491486946651533e-05, "loss": 3.1798, "theoretical_loss": 3.3813203006425745, "tokens_seen": 2384461824 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.4814996123313904, "objective/train/docs_used": 1341857, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8379721641540527, "objective/train/original_loss": 2.837972640991211, "objective/train/theoretical_loss": 3.3812005988197273, "objective/train/tokens_used": 2405970400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2411598116159439, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493756532669067, "objective/train/weighted_lm_loss": 2.9767820835113525, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9520983099937439, "theoretical_loss": 3.3812005988197273, "tokens_seen": 2385510400 }, { "epoch": 0.85, "learning_rate": 7.472569050321603e-05, "loss": 3.1044, "theoretical_loss": 3.3812005988197273, "tokens_seen": 2385510400 }, { "epoch": 0.85, "learning_rate": 7.453651153991677e-05, "loss": 3.1587, "theoretical_loss": 3.3810809643266593, "tokens_seen": 2386558976 }, { "epoch": 0.85, "learning_rate": 7.434733257661748e-05, "loss": 3.1508, "theoretical_loss": 3.3809613970959402, "tokens_seen": 2387607552 }, { "epoch": 0.85, "learning_rate": 7.41581536133182e-05, "loss": 3.1208, "theoretical_loss": 3.3808418970602387, "tokens_seen": 2388656128 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.46042418479919434, "objective/train/docs_used": 1343487, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1994292736053467, "objective/train/original_loss": 3.1994290351867676, "objective/train/theoretical_loss": 3.380826964277076, "objective/train/tokens_used": 2409247200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23168209195137024, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.04721999168396, "objective/train/weighted_lm_loss": 3.3554635047912598, "objective/train/weights_max": 1.0512161254882812, "objective/train/weights_min": 0.9517953991889954, "theoretical_loss": 3.380826964277076, "tokens_seen": 2388787200 }, { "epoch": 0.85, "learning_rate": 7.396897465001892e-05, "loss": 3.1114, "theoretical_loss": 3.3807224641523193, "tokens_seen": 2389704704 }, { "epoch": 0.85, "learning_rate": 7.377979568671964e-05, "loss": 3.1946, "theoretical_loss": 3.380603098305044, "tokens_seen": 2390753280 }, { "epoch": 0.85, "learning_rate": 7.359061672342034e-05, "loss": 3.1657, "theoretical_loss": 3.38048379945137, "tokens_seen": 2391801856 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.4793241322040558, "objective/train/docs_used": 1344994, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0667428970336914, "objective/train/original_loss": 3.066742420196533, "objective/train/theoretical_loss": 3.380453985197855, "objective/train/tokens_used": 2412524000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23756831884384155, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491399765014648, "objective/train/weighted_lm_loss": 3.2188243865966797, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.95333331823349, "theoretical_loss": 3.380453985197855, "tokens_seen": 2392064000 }, { "epoch": 0.85, "learning_rate": 7.340143776012108e-05, "loss": 3.1791, "theoretical_loss": 3.3803645675243534, "tokens_seen": 2392850432 }, { "epoch": 0.86, "learning_rate": 7.32122587968218e-05, "loss": 3.1188, "theoretical_loss": 3.3802454024571436, "tokens_seen": 2393899008 }, { "epoch": 0.86, "learning_rate": 7.30230798335225e-05, "loss": 3.1065, "theoretical_loss": 3.3801263041829883, "tokens_seen": 2394947584 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.4877893328666687, "objective/train/docs_used": 1347049, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.3766496181488037, "objective/train/original_loss": 3.3766493797302246, "objective/train/theoretical_loss": 3.380081659536656, "objective/train/tokens_used": 2415800800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24295225739479065, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500140190124512, "objective/train/weighted_lm_loss": 3.5450875759124756, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.951555073261261, "theoretical_loss": 3.380081659536656, "tokens_seen": 2395340800 }, { "epoch": 0.86, "learning_rate": 7.283390087022324e-05, "loss": 3.1217, "theoretical_loss": 3.3800072726352295, "tokens_seen": 2395996160 }, { "epoch": 0.86, "learning_rate": 7.264472190692395e-05, "loss": 3.106, "theoretical_loss": 3.3798883077473056, "tokens_seen": 2397044736 }, { "epoch": 0.86, "learning_rate": 7.245554294362467e-05, "loss": 3.1241, "theoretical_loss": 3.3797694094527504, "tokens_seen": 2398093312 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.49422043561935425, "objective/train/docs_used": 1348115, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.205146551132202, "objective/train/original_loss": 3.205146551132202, "objective/train/theoretical_loss": 3.379709985257241, "objective/train/tokens_used": 2419077600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24489258229732513, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506669282913208, "objective/train/weighted_lm_loss": 3.3676390647888184, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9589699506759644, "theoretical_loss": 3.379709985257241, "tokens_seen": 2398617600 }, { "epoch": 0.86, "learning_rate": 7.226636398032539e-05, "loss": 3.0947, "theoretical_loss": 3.379650577685193, "tokens_seen": 2399141888 }, { "epoch": 0.86, "learning_rate": 7.207718501702611e-05, "loss": 3.1216, "theoretical_loss": 3.379531812378357, "tokens_seen": 2400190464 }, { "epoch": 0.86, "learning_rate": 7.188800605372682e-05, "loss": 3.1411, "theoretical_loss": 3.3794131134660623, "tokens_seen": 2401239040 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.49548736214637756, "objective/train/docs_used": 1349979, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.4230825901031494, "objective/train/original_loss": 3.4230828285217285, "objective/train/theoretical_loss": 3.379338960332488, "objective/train/tokens_used": 2422354400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24636338651180267, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0508012771606445, "objective/train/weighted_lm_loss": 3.5968568325042725, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.974648118019104, "theoretical_loss": 3.379338960332488, "tokens_seen": 2401894400 }, { "epoch": 0.86, "learning_rate": 7.169882709042755e-05, "loss": 3.1393, "theoretical_loss": 3.3792944808822227, "tokens_seen": 2402287616 }, { "epoch": 0.86, "learning_rate": 7.150964812712826e-05, "loss": 3.1068, "theoretical_loss": 3.3791759145608458, "tokens_seen": 2403336192 }, { "epoch": 0.86, "learning_rate": 7.132046916382898e-05, "loss": 3.0804, "theoretical_loss": 3.3790574144360352, "tokens_seen": 2404384768 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.4680725932121277, "objective/train/docs_used": 1352287, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8557844161987305, "objective/train/original_loss": 2.8557848930358887, "objective/train/theoretical_loss": 3.378968582744336, "objective/train/tokens_used": 2425631200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23578692972660065, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0480057001113892, "objective/train/weighted_lm_loss": 2.99615478515625, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9522780179977417, "theoretical_loss": 3.378968582744336, "tokens_seen": 2405171200 }, { "epoch": 0.86, "learning_rate": 7.113129020052971e-05, "loss": 3.0916, "theoretical_loss": 3.378938980441988, "tokens_seen": 2405433344 }, { "epoch": 0.86, "learning_rate": 7.094211123723042e-05, "loss": 3.0778, "theoretical_loss": 3.3788206125129947, "tokens_seen": 2406481920 }, { "epoch": 0.86, "learning_rate": 7.075293227393114e-05, "loss": 3.0851, "theoretical_loss": 3.3787023105834413, "tokens_seen": 2407530496 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.47238680720329285, "objective/train/docs_used": 1354179, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.906632423400879, "objective/train/original_loss": 2.9066319465637207, "objective/train/theoretical_loss": 3.378598850483736, "objective/train/tokens_used": 2428908000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2311716079711914, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048413634300232, "objective/train/weighted_lm_loss": 3.047044515609741, "objective/train/weights_max": 1.051215648651123, "objective/train/weights_min": 0.9545229077339172, "theoretical_loss": 3.378598850483736, "tokens_seen": 2408448000 }, { "epoch": 0.86, "learning_rate": 7.056375331063186e-05, "loss": 3.0996, "theoretical_loss": 3.3785840745878057, "tokens_seen": 2408579072 }, { "epoch": 0.86, "learning_rate": 7.037457434733258e-05, "loss": 3.0983, "theoretical_loss": 3.3784659044606604, "tokens_seen": 2409627648 }, { "epoch": 0.86, "learning_rate": 7.018539538403329e-05, "loss": 3.0816, "theoretical_loss": 3.378347800136672, "tokens_seen": 2410676224 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.4815124273300171, "objective/train/docs_used": 1355984, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7397754192352295, "objective/train/original_loss": 2.7397756576538086, "objective/train/theoretical_loss": 3.378229761550598, "objective/train/tokens_used": 2432184800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23886069655418396, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493652820587158, "objective/train/weighted_lm_loss": 2.875002861022949, "objective/train/weights_max": 1.0512160062789917, "objective/train/weights_min": 0.9514920115470886, "theoretical_loss": 3.378229761550598, "tokens_seen": 2411724800 }, { "epoch": 0.86, "learning_rate": 6.999621642073402e-05, "loss": 2.9866, "theoretical_loss": 3.378229761550598, "tokens_seen": 2411724800 }, { "epoch": 0.86, "learning_rate": 6.980703745743473e-05, "loss": 3.071, "theoretical_loss": 3.3781117886372902, "tokens_seen": 2412773376 }, { "epoch": 0.86, "learning_rate": 6.961785849413545e-05, "loss": 3.0636, "theoretical_loss": 3.3779938813316943, "tokens_seen": 2413821952 }, { "epoch": 0.86, "learning_rate": 6.942867953083617e-05, "loss": 2.9877, "theoretical_loss": 3.377876039568847, "tokens_seen": 2414870528 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.4852672517299652, "objective/train/docs_used": 1358162, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6078548431396484, "objective/train/original_loss": 2.6078543663024902, "objective/train/theoretical_loss": 3.377861313953734, "objective/train/tokens_used": 2435461600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.240381121635437, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497486591339111, "objective/train/weighted_lm_loss": 2.7380173206329346, "objective/train/weights_max": 1.0512151718139648, "objective/train/weights_min": 0.9583688378334045, "theoretical_loss": 3.377861313953734, "tokens_seen": 2415001600 }, { "epoch": 0.86, "learning_rate": 6.923950056753689e-05, "loss": 3.0179, "theoretical_loss": 3.3777582632838783, "tokens_seen": 2415919104 }, { "epoch": 0.86, "learning_rate": 6.905032160423761e-05, "loss": 3.0409, "theoretical_loss": 3.3776405524120108, "tokens_seen": 2416967680 }, { "epoch": 0.86, "learning_rate": 6.886114264093833e-05, "loss": 3.0834, "theoretical_loss": 3.3775229068885584, "tokens_seen": 2418016256 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.49422746896743774, "objective/train/docs_used": 1359820, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.702634811401367, "objective/train/original_loss": 2.702634811401367, "objective/train/theoretical_loss": 3.3774935057108135, "objective/train/tokens_used": 2438738400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24477465450763702, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506671667099, "objective/train/weighted_lm_loss": 2.839879274368286, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9933627843856812, "theoretical_loss": 3.3774935057108135, "tokens_seen": 2418278400 }, { "epoch": 0.86, "learning_rate": 6.867196367763905e-05, "loss": 3.0768, "theoretical_loss": 3.377405326648927, "tokens_seen": 2419064832 }, { "epoch": 0.86, "learning_rate": 6.848278471433976e-05, "loss": 3.0517, "theoretical_loss": 3.377287811628616, "tokens_seen": 2420113408 }, { "epoch": 0.86, "learning_rate": 6.829360575104049e-05, "loss": 3.0728, "theoretical_loss": 3.3771703617632136, "tokens_seen": 2421161984 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.49273377656936646, "objective/train/docs_used": 1361438, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8423805236816406, "objective/train/original_loss": 2.8423805236816406, "objective/train/theoretical_loss": 3.377126334848307, "objective/train/tokens_used": 2442015200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24471589922904968, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505174398422241, "objective/train/weighted_lm_loss": 2.9853973388671875, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9665617346763611, "theoretical_loss": 3.377126334848307, "tokens_seen": 2421555200 }, { "epoch": 0.87, "learning_rate": 6.81044267877412e-05, "loss": 3.0376, "theoretical_loss": 3.3770529769884017, "tokens_seen": 2422210560 }, { "epoch": 0.87, "learning_rate": 6.791524782444192e-05, "loss": 3.1151, "theoretical_loss": 3.376935657239953, "tokens_seen": 2423259136 }, { "epoch": 0.87, "learning_rate": 6.772606886114264e-05, "loss": 3.06, "theoretical_loss": 3.3768184024537313, "tokens_seen": 2424307712 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.4682343304157257, "objective/train/docs_used": 1363257, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8115761280059814, "objective/train/original_loss": 2.8115761280059814, "objective/train/theoretical_loss": 3.3767597994014373, "objective/train/tokens_used": 2445292000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23106399178504944, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0479977130889893, "objective/train/weighted_lm_loss": 2.944631814956665, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9522070288658142, "theoretical_loss": 3.3767597994014373, "tokens_seen": 2424832000 }, { "epoch": 0.87, "learning_rate": 6.753688989784336e-05, "loss": 3.1218, "theoretical_loss": 3.376701212565691, "tokens_seen": 2425356288 }, { "epoch": 0.87, "learning_rate": 6.734771093454407e-05, "loss": 3.0973, "theoretical_loss": 3.376584087511877, "tokens_seen": 2426404864 }, { "epoch": 0.87, "learning_rate": 6.71585319712448e-05, "loss": 3.0884, "theoretical_loss": 3.3764670272284265, "tokens_seen": 2427453440 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.4915653467178345, "objective/train/docs_used": 1365224, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8232626914978027, "objective/train/original_loss": 2.8232626914978027, "objective/train/theoretical_loss": 3.376393897414129, "objective/train/tokens_used": 2448568800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24267259240150452, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503901243209839, "objective/train/weighted_lm_loss": 2.9657554626464844, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9560016989707947, "theoretical_loss": 3.376393897414129, "tokens_seen": 2428108800 }, { "epoch": 0.87, "learning_rate": 6.696935300794552e-05, "loss": 3.0388, "theoretical_loss": 3.376350031651565, "tokens_seen": 2428502016 }, { "epoch": 0.87, "learning_rate": 6.678017404464623e-05, "loss": 3.0579, "theoretical_loss": 3.37623310071761, "tokens_seen": 2429550592 }, { "epoch": 0.87, "learning_rate": 6.659099508134696e-05, "loss": 3.0543, "theoretical_loss": 3.376116234362968, "tokens_seen": 2430599168 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.4831455647945404, "objective/train/docs_used": 1366375, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2068450450897217, "objective/train/original_loss": 3.2068448066711426, "objective/train/theoretical_loss": 3.376028626938956, "objective/train/tokens_used": 2451845600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2415134757757187, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495421886444092, "objective/train/weighted_lm_loss": 3.365105152130127, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9523228406906128, "theoretical_loss": 3.376028626938956, "tokens_seen": 2431385600 }, { "epoch": 0.87, "learning_rate": 6.640181611804767e-05, "loss": 3.0921, "theoretical_loss": 3.375999432524136, "tokens_seen": 2431647744 }, { "epoch": 0.87, "learning_rate": 6.621263715474839e-05, "loss": 3.0504, "theoretical_loss": 3.3758826951377006, "tokens_seen": 2432696320 }, { "epoch": 0.87, "learning_rate": 6.602345819144911e-05, "loss": 3.0778, "theoretical_loss": 3.375766022140338, "tokens_seen": 2433744896 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.48540857434272766, "objective/train/docs_used": 1367967, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.798003673553467, "objective/train/original_loss": 2.798003673553467, "objective/train/theoretical_loss": 3.375663986037095, "objective/train/tokens_used": 2455122400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24191917479038239, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497705936431885, "objective/train/weighted_lm_loss": 2.936539649963379, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.951850414276123, "theoretical_loss": 3.375663986037095, "tokens_seen": 2434662400 }, { "epoch": 0.87, "learning_rate": 6.583427922814983e-05, "loss": 3.1003, "theoretical_loss": 3.3756494134688144, "tokens_seen": 2434793472 }, { "epoch": 0.87, "learning_rate": 6.564510026485054e-05, "loss": 3.0541, "theoretical_loss": 3.3755328690599846, "tokens_seen": 2435842048 }, { "epoch": 0.87, "learning_rate": 6.545592130155127e-05, "loss": 3.0621, "theoretical_loss": 3.3754163888507933, "tokens_seen": 2436890624 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.47676169872283936, "objective/train/docs_used": 1369908, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7874505519866943, "objective/train/original_loss": 2.7874507904052734, "objective/train/theoretical_loss": 3.375299972778273, "objective/train/tokens_used": 2458399200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23362308740615845, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048863410949707, "objective/train/weighted_lm_loss": 2.9224209785461426, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.951828122138977, "theoretical_loss": 3.375299972778273, "tokens_seen": 2437939200 }, { "epoch": 0.87, "learning_rate": 6.526674233825198e-05, "loss": 2.9885, "theoretical_loss": 3.375299972778273, "tokens_seen": 2437939200 }, { "epoch": 0.87, "learning_rate": 6.50775633749527e-05, "loss": 2.9628, "theoretical_loss": 3.3751836207795463, "tokens_seen": 2438987776 }, { "epoch": 0.87, "learning_rate": 6.488838441165343e-05, "loss": 3.0596, "theoretical_loss": 3.375067332791823, "tokens_seen": 2440036352 }, { "epoch": 0.87, "learning_rate": 6.469920544835414e-05, "loss": 3.0705, "theoretical_loss": 3.3749511087524033, "tokens_seen": 2441084928 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.4786227345466614, "objective/train/docs_used": 1371503, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.74287486076355, "objective/train/original_loss": 2.7428746223449707, "objective/train/theoretical_loss": 3.3749365852407216, "objective/train/tokens_used": 2461676000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23829062283039093, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049073338508606, "objective/train/weighted_lm_loss": 2.8782496452331543, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9517157673835754, "theoretical_loss": 3.3749365852407216, "tokens_seen": 2441216000 }, { "epoch": 0.87, "learning_rate": 6.451002648505486e-05, "loss": 3.0573, "theoretical_loss": 3.3748349485986737, "tokens_seen": 2442133504 }, { "epoch": 0.87, "learning_rate": 6.432084752175558e-05, "loss": 3.1075, "theoretical_loss": 3.37471885226811, "tokens_seen": 2443182080 }, { "epoch": 0.87, "learning_rate": 6.41316685584563e-05, "loss": 3.033, "theoretical_loss": 3.3746028196982762, "tokens_seen": 2444230656 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.4795815050601959, "objective/train/docs_used": 1373215, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7482497692108154, "objective/train/original_loss": 2.7482497692108154, "objective/train/theoretical_loss": 3.3745738215111234, "objective/train/tokens_used": 2464952800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23792575299739838, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049167513847351, "objective/train/weighted_lm_loss": 2.8822453022003174, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9520523548126221, "theoretical_loss": 3.3745738215111234, "tokens_seen": 2444492800 }, { "epoch": 0.87, "learning_rate": 6.394248959515701e-05, "loss": 3.0633, "theoretical_loss": 3.3744868508268233, "tokens_seen": 2445279232 }, { "epoch": 0.87, "learning_rate": 6.375331063185774e-05, "loss": 3.0684, "theoretical_loss": 3.3743709455914903, "tokens_seen": 2446327808 }, { "epoch": 0.87, "learning_rate": 6.356413166855845e-05, "loss": 3.0596, "theoretical_loss": 3.3742551039301043, "tokens_seen": 2447376384 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.4921453595161438, "objective/train/docs_used": 1375012, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.813209295272827, "objective/train/original_loss": 2.8132095336914062, "objective/train/theoretical_loss": 3.374211679684568, "objective/train/tokens_used": 2468229600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2444104254245758, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504570007324219, "objective/train/weighted_lm_loss": 2.9547219276428223, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9768006205558777, "theoretical_loss": 3.374211679684568, "tokens_seen": 2447769600 }, { "epoch": 0.87, "learning_rate": 6.337495270525917e-05, "loss": 2.9949, "theoretical_loss": 3.374139325780579, "tokens_seen": 2448424960 }, { "epoch": 0.87, "learning_rate": 6.318577374195989e-05, "loss": 3.0415, "theoretical_loss": 3.374023611080915, "tokens_seen": 2449473536 }, { "epoch": 0.88, "learning_rate": 6.299659477866061e-05, "loss": 3.1023, "theoretical_loss": 3.3739079597692014, "tokens_seen": 2450522112 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.48276713490486145, "objective/train/docs_used": 1376896, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6670351028442383, "objective/train/original_loss": 2.6670355796813965, "objective/train/theoretical_loss": 3.373850157864502, "objective/train/tokens_used": 2471506400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23791970312595367, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494859218597412, "objective/train/weighted_lm_loss": 2.799875259399414, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9536236524581909, "theoretical_loss": 3.373850157864502, "tokens_seen": 2451046400 }, { "epoch": 0.88, "learning_rate": 6.280741581536132e-05, "loss": 3.1117, "theoretical_loss": 3.3737923717836127, "tokens_seen": 2451570688 }, { "epoch": 0.88, "learning_rate": 6.261823685206205e-05, "loss": 3.1263, "theoretical_loss": 3.3736768470624106, "tokens_seen": 2452619264 }, { "epoch": 0.88, "learning_rate": 6.242905788876277e-05, "loss": 2.9923, "theoretical_loss": 3.373561385543943, "tokens_seen": 2453667840 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.4910533130168915, "objective/train/docs_used": 1379050, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7924742698669434, "objective/train/original_loss": 2.7924742698669434, "objective/train/theoretical_loss": 3.373489254162681, "objective/train/tokens_used": 2474783200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24239566922187805, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503374338150024, "objective/train/weighted_lm_loss": 2.933199405670166, "objective/train/weights_max": 1.0512187480926514, "objective/train/weights_min": 0.9563108682632446, "theoretical_loss": 3.373489254162681, "tokens_seen": 2454323200 }, { "epoch": 0.88, "learning_rate": 6.22398789254635e-05, "loss": 3.0166, "theoretical_loss": 3.3734459871666456, "tokens_seen": 2454716416 }, { "epoch": 0.88, "learning_rate": 6.205069996216422e-05, "loss": 3.1157, "theoretical_loss": 3.373330651869039, "tokens_seen": 2455764992 }, { "epoch": 0.88, "learning_rate": 6.186152099886492e-05, "loss": 3.1398, "theoretical_loss": 3.373215379589729, "tokens_seen": 2456813568 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.49097520112991333, "objective/train/docs_used": 1380428, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9203779697418213, "objective/train/original_loss": 2.9203779697418213, "objective/train/theoretical_loss": 3.3731289666991215, "objective/train/tokens_used": 2478060000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2459252029657364, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503475666046143, "objective/train/weighted_lm_loss": 3.0663514137268066, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9521451592445374, "theoretical_loss": 3.3731289666991215, "tokens_seen": 2457600000 }, { "epoch": 0.88, "learning_rate": 6.167234203556564e-05, "loss": 3.0337, "theoretical_loss": 3.3731001702674104, "tokens_seen": 2457862144 }, { "epoch": 0.88, "learning_rate": 6.148316307226636e-05, "loss": 3.0836, "theoretical_loss": 3.3729850238408607, "tokens_seen": 2458910720 }, { "epoch": 0.88, "learning_rate": 6.129398410896708e-05, "loss": 3.0798, "theoretical_loss": 3.372869940248944, "tokens_seen": 2459959296 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.4859597980976105, "objective/train/docs_used": 1382125, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.264944314956665, "objective/train/original_loss": 3.264944076538086, "objective/train/theoretical_loss": 3.3727692936020572, "objective/train/tokens_used": 2481336800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24115294218063354, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498218536376953, "objective/train/weighted_lm_loss": 3.4269485473632812, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9694421291351318, "theoretical_loss": 3.3727692936020572, "tokens_seen": 2460876800 }, { "epoch": 0.88, "learning_rate": 6.11048051456678e-05, "loss": 3.0595, "theoretical_loss": 3.3727549194306112, "tokens_seen": 2461007872 }, { "epoch": 0.88, "learning_rate": 6.0915626182368526e-05, "loss": 3.1255, "theoretical_loss": 3.372639961324896, "tokens_seen": 2462056448 }, { "epoch": 0.88, "learning_rate": 6.072644721906924e-05, "loss": 3.0553, "theoretical_loss": 3.37252506587092, "tokens_seen": 2463105024 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.4852433204650879, "objective/train/docs_used": 1384145, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.900221586227417, "objective/train/original_loss": 2.900221347808838, "objective/train/theoretical_loss": 3.372410233007887, "objective/train/tokens_used": 2484613600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23983712494373322, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497432947158813, "objective/train/weighted_lm_loss": 3.0446298122406006, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9515975117683411, "theoretical_loss": 3.372410233007887, "tokens_seen": 2464153600 }, { "epoch": 0.88, "learning_rate": 6.053726825576996e-05, "loss": 3.05, "theoretical_loss": 3.372410233007887, "tokens_seen": 2464153600 }, { "epoch": 0.88, "learning_rate": 6.034808929247068e-05, "loss": 3.1139, "theoretical_loss": 3.372295462675088, "tokens_seen": 2465202176 }, { "epoch": 0.88, "learning_rate": 6.0158910329171394e-05, "loss": 3.0191, "theoretical_loss": 3.372180754811897, "tokens_seen": 2466250752 }, { "epoch": 0.88, "learning_rate": 5.9969731365872115e-05, "loss": 3.0621, "theoretical_loss": 3.3720661093577737, "tokens_seen": 2467299328 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.4838484823703766, "objective/train/docs_used": 1386568, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.136174201965332, "objective/train/original_loss": 3.136174440383911, "objective/train/theoretical_loss": 3.372051783061134, "objective/train/tokens_used": 2487890400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23954464495182037, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496023893356323, "objective/train/weighted_lm_loss": 3.292409896850586, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9591426253318787, "theoretical_loss": 3.372051783061134, "tokens_seen": 2467430400 }, { "epoch": 0.88, "learning_rate": 5.9780552402572835e-05, "loss": 3.142, "theoretical_loss": 3.3719515262522615, "tokens_seen": 2468347904 }, { "epoch": 0.88, "learning_rate": 5.959137343927355e-05, "loss": 3.0806, "theoretical_loss": 3.3718370054349878, "tokens_seen": 2469396480 }, { "epoch": 0.88, "learning_rate": 5.9402194475974277e-05, "loss": 3.0506, "theoretical_loss": 3.371722546845665, "tokens_seen": 2470445056 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.47412246465682983, "objective/train/docs_used": 1387925, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0952136516571045, "objective/train/original_loss": 3.0952138900756836, "objective/train/theoretical_loss": 3.3716939419143945, "objective/train/tokens_used": 2491167200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2385615110397339, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0486245155334473, "objective/train/weighted_lm_loss": 3.2452621459960938, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9514861106872559, "theoretical_loss": 3.3716939419143945, "tokens_seen": 2470707200 }, { "epoch": 0.88, "learning_rate": 5.9213015512675e-05, "loss": 3.0562, "theoretical_loss": 3.3716081504240885, "tokens_seen": 2471493632 }, { "epoch": 0.88, "learning_rate": 5.902383654937571e-05, "loss": 3.063, "theoretical_loss": 3.3714938161101378, "tokens_seen": 2472542208 }, { "epoch": 0.88, "learning_rate": 5.883465758607643e-05, "loss": 3.1033, "theoretical_loss": 3.3713795438437764, "tokens_seen": 2473590784 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.49264079332351685, "objective/train/docs_used": 1390126, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.122264862060547, "objective/train/original_loss": 3.122264862060547, "objective/train/theoretical_loss": 3.371336707728296, "objective/train/tokens_used": 2494444000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24525536596775055, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505108833312988, "objective/train/weighted_lm_loss": 3.279707193374634, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9543718099594116, "theoretical_loss": 3.371336707728296, "tokens_seen": 2473984000 }, { "epoch": 0.88, "learning_rate": 5.864547862277715e-05, "loss": 3.0441, "theoretical_loss": 3.3712653335650504, "tokens_seen": 2474639360 }, { "epoch": 0.88, "learning_rate": 5.8456299659477866e-05, "loss": 3.0489, "theoretical_loss": 3.3711511852140905, "tokens_seen": 2475687936 }, { "epoch": 0.88, "learning_rate": 5.8267120696178586e-05, "loss": 3.0745, "theoretical_loss": 3.3710370987311085, "tokens_seen": 2476736512 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.4860904812812805, "objective/train/docs_used": 1392376, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2137277126312256, "objective/train/original_loss": 3.2137279510498047, "objective/train/theoretical_loss": 3.3709800786714488, "objective/train/tokens_used": 2497720800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24208112061023712, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498394966125488, "objective/train/weighted_lm_loss": 3.373750925064087, "objective/train/weights_max": 1.0512208938598633, "objective/train/weights_min": 0.9516987800598145, "theoretical_loss": 3.3709800786714488, "tokens_seen": 2477260800 }, { "epoch": 0.89, "learning_rate": 5.807794173287931e-05, "loss": 3.0616, "theoretical_loss": 3.3709230740564013, "tokens_seen": 2477785088 }, { "epoch": 0.89, "learning_rate": 5.788876276958002e-05, "loss": 3.0668, "theoretical_loss": 3.3708091111303475, "tokens_seen": 2478833664 }, { "epoch": 0.89, "learning_rate": 5.769958380628074e-05, "loss": 3.0424, "theoretical_loss": 3.370695209893409, "tokens_seen": 2479882240 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.486337274312973, "objective/train/docs_used": 1394461, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0017459392547607, "objective/train/original_loss": 3.0017457008361816, "objective/train/theoretical_loss": 3.370624052920404, "objective/train/tokens_used": 2500997600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24043717980384827, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498559474945068, "objective/train/weighted_lm_loss": 3.151277542114258, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9519734382629395, "theoretical_loss": 3.370624052920404, "tokens_seen": 2480537600 }, { "epoch": 0.89, "learning_rate": 5.751040484298146e-05, "loss": 2.9919, "theoretical_loss": 3.3705813702861294, "tokens_seen": 2480930816 }, { "epoch": 0.89, "learning_rate": 5.732122587968218e-05, "loss": 2.9639, "theoretical_loss": 3.370467592249135, "tokens_seen": 2481979392 }, { "epoch": 0.89, "learning_rate": 5.71320469163829e-05, "loss": 3.0105, "theoretical_loss": 3.3703538757231355, "tokens_seen": 2483027968 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.4779159724712372, "objective/train/docs_used": 1396627, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1431424617767334, "objective/train/original_loss": 3.1431422233581543, "objective/train/theoretical_loss": 3.370268628659605, "objective/train/tokens_used": 2504274400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23592840135097504, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489908456802368, "objective/train/weighted_lm_loss": 3.2960641384124756, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9546563625335693, "theoretical_loss": 3.370268628659605, "tokens_seen": 2483814400 }, { "epoch": 0.89, "learning_rate": 5.694286795308362e-05, "loss": 3.0689, "theoretical_loss": 3.3702402206489213, "tokens_seen": 2484076544 }, { "epoch": 0.89, "learning_rate": 5.675368898978434e-05, "loss": 2.9845, "theoretical_loss": 3.3701266269673655, "tokens_seen": 2485125120 }, { "epoch": 0.89, "learning_rate": 5.656451002648506e-05, "loss": 3.0257, "theoretical_loss": 3.3700130946194222, "tokens_seen": 2486173696 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.4774656891822815, "objective/train/docs_used": 1398483, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0128564834594727, "objective/train/original_loss": 3.0128560066223145, "objective/train/theoretical_loss": 3.369913804081346, "objective/train/tokens_used": 2507551200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23861047625541687, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489592552185059, "objective/train/weighted_lm_loss": 3.159787654876709, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9514344334602356, "theoretical_loss": 3.369913804081346, "tokens_seen": 2487091200 }, { "epoch": 0.89, "learning_rate": 5.637533106318578e-05, "loss": 2.9467, "theoretical_loss": 3.3698996235461283, "tokens_seen": 2487222272 }, { "epoch": 0.89, "learning_rate": 5.618615209988649e-05, "loss": 3.0314, "theoretical_loss": 3.369786213688601, "tokens_seen": 2488270848 }, { "epoch": 0.89, "learning_rate": 5.599697313658721e-05, "loss": 3.0424, "theoretical_loss": 3.3696728649880403, "tokens_seen": 2489319424 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.4827979803085327, "objective/train/docs_used": 1400481, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7925925254821777, "objective/train/original_loss": 2.7925925254821777, "objective/train/theoretical_loss": 3.369559577385726, "objective/train/tokens_used": 2510828000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23709921538829803, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494848489761353, "objective/train/weighted_lm_loss": 2.9326014518737793, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9522310495376587, "theoretical_loss": 3.369559577385726, "tokens_seen": 2490368000 }, { "epoch": 0.89, "learning_rate": 5.580779417328793e-05, "loss": 2.9912, "theoretical_loss": 3.369559577385726, "tokens_seen": 2490368000 }, { "epoch": 0.89, "learning_rate": 5.561861520998865e-05, "loss": 2.9337, "theoretical_loss": 3.36944635082302, "tokens_seen": 2491416576 }, { "epoch": 0.89, "learning_rate": 5.542943624668937e-05, "loss": 2.9607, "theoretical_loss": 3.3693331852413637, "tokens_seen": 2492465152 }, { "epoch": 0.89, "learning_rate": 5.524025728339009e-05, "loss": 3.0256, "theoretical_loss": 3.3692200805822816, "tokens_seen": 2493513728 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.47895485162734985, "objective/train/docs_used": 1401835, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9839255809783936, "objective/train/original_loss": 2.9839253425598145, "objective/train/theoretical_loss": 3.369205946780606, "objective/train/tokens_used": 2514104800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23692239820957184, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490996837615967, "objective/train/weighted_lm_loss": 3.1297733783721924, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9525783061981201, "theoretical_loss": 3.369205946780606, "tokens_seen": 2493644800 }, { "epoch": 0.89, "learning_rate": 5.505107832009081e-05, "loss": 3.0499, "theoretical_loss": 3.369107036787377, "tokens_seen": 2494562304 }, { "epoch": 0.89, "learning_rate": 5.486189935679153e-05, "loss": 2.9932, "theoretical_loss": 3.3689940537983345, "tokens_seen": 2495610880 }, { "epoch": 0.89, "learning_rate": 5.467272039349225e-05, "loss": 3.0425, "theoretical_loss": 3.368881131556918, "tokens_seen": 2496659456 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.48761147260665894, "objective/train/docs_used": 1403825, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8729476928710938, "objective/train/original_loss": 2.8729474544525146, "objective/train/theoretical_loss": 3.3688529104815634, "objective/train/tokens_used": 2517381600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23985113203525543, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499802827835083, "objective/train/weighted_lm_loss": 3.0175294876098633, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9651005268096924, "theoretical_loss": 3.3688529104815634, "tokens_seen": 2496921600 }, { "epoch": 0.89, "learning_rate": 5.448354143019296e-05, "loss": 3.0147, "theoretical_loss": 3.368768270004973, "tokens_seen": 2497708032 }, { "epoch": 0.89, "learning_rate": 5.4294362466893684e-05, "loss": 2.9441, "theoretical_loss": 3.368655469084424, "tokens_seen": 2498756608 }, { "epoch": 0.89, "learning_rate": 5.4105183503594404e-05, "loss": 2.9964, "theoretical_loss": 3.3685427287372764, "tokens_seen": 2499805184 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.4772385358810425, "objective/train/docs_used": 1405631, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6067678928375244, "objective/train/original_loss": 2.6067678928375244, "objective/train/theoretical_loss": 3.3685004667118528, "objective/train/tokens_used": 2520658400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2354532927274704, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489205121994019, "objective/train/weighted_lm_loss": 2.7343697547912598, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9528263807296753, "theoretical_loss": 3.3685004667118528, "tokens_seen": 2500198400 }, { "epoch": 0.89, "learning_rate": 5.391600454029512e-05, "loss": 3.0273, "theoretical_loss": 3.3684300489056143, "tokens_seen": 2500853760 }, { "epoch": 0.89, "learning_rate": 5.372682557699584e-05, "loss": 3.0264, "theoretical_loss": 3.3683174295316025, "tokens_seen": 2501902336 }, { "epoch": 0.89, "learning_rate": 5.353764661369656e-05, "loss": 2.9834, "theoretical_loss": 3.368204870557484, "tokens_seen": 2502950912 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.4770272374153137, "objective/train/docs_used": 1407612, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.106952667236328, "objective/train/original_loss": 3.106952667236328, "objective/train/theoretical_loss": 3.3681486137023575, "objective/train/tokens_used": 2523935200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2343224287033081, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488938093185425, "objective/train/weighted_lm_loss": 3.259089946746826, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9528036713600159, "theoretical_loss": 3.3681486137023575, "tokens_seen": 2503475200 }, { "epoch": 0.89, "learning_rate": 5.334846765039727e-05, "loss": 3.0631, "theoretical_loss": 3.368092371925582, "tokens_seen": 2503999488 }, { "epoch": 0.89, "learning_rate": 5.3159288687097994e-05, "loss": 3.0072, "theoretical_loss": 3.3679799335782996, "tokens_seen": 2505048064 }, { "epoch": 0.9, "learning_rate": 5.297010972379872e-05, "loss": 2.9526, "theoretical_loss": 3.3678675554581172, "tokens_seen": 2506096640 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.48289960622787476, "objective/train/docs_used": 1409433, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1777186393737793, "objective/train/original_loss": 3.1777186393737793, "objective/train/theoretical_loss": 3.3677973496915516, "objective/train/tokens_used": 2527212000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23917822539806366, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495057106018066, "objective/train/weighted_lm_loss": 3.33475399017334, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.952046275138855, "theoretical_loss": 3.3677973496915516, "tokens_seen": 2506752000 }, { "epoch": 0.9, "learning_rate": 5.2780930760499435e-05, "loss": 3.0682, "theoretical_loss": 3.367755237507595, "tokens_seen": 2507145216 }, { "epoch": 0.9, "learning_rate": 5.2591751797200155e-05, "loss": 3.0621, "theoretical_loss": 3.367642979669373, "tokens_seen": 2508193792 }, { "epoch": 0.9, "learning_rate": 5.2402572833900876e-05, "loss": 3.0179, "theoretical_loss": 3.3675307818861677, "tokens_seen": 2509242368 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.4875558018684387, "objective/train/docs_used": 1411323, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0832877159118652, "objective/train/original_loss": 3.083287239074707, "objective/train/theoretical_loss": 3.367446672925454, "objective/train/tokens_used": 2530488800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24067936837673187, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499789714813232, "objective/train/weighted_lm_loss": 3.2369744777679443, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9582372307777405, "theoretical_loss": 3.367446672925454, "tokens_seen": 2510028800 }, { "epoch": 0.9, "learning_rate": 5.221339387060159e-05, "loss": 3.0508, "theoretical_loss": 3.367418644100776, "tokens_seen": 2510290944 }, { "epoch": 0.9, "learning_rate": 5.202421490730231e-05, "loss": 3.0319, "theoretical_loss": 3.367306566256072, "tokens_seen": 2511339520 }, { "epoch": 0.9, "learning_rate": 5.183503594400303e-05, "loss": 3.0629, "theoretical_loss": 3.3671945482950085, "tokens_seen": 2512388096 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.4792327284812927, "objective/train/docs_used": 1413432, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.026296854019165, "objective/train/original_loss": 3.0262961387634277, "objective/train/theoretical_loss": 3.3670965816575897, "objective/train/tokens_used": 2533765600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23847414553165436, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491353273391724, "objective/train/weighted_lm_loss": 3.174064874649048, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9517701268196106, "theoretical_loss": 3.3670965816575897, "tokens_seen": 2513305600 }, { "epoch": 0.9, "learning_rate": 5.1645856980703744e-05, "loss": 3.1064, "theoretical_loss": 3.3670825901606167, "tokens_seen": 2513436672 }, { "epoch": 0.9, "learning_rate": 5.1456678017404465e-05, "loss": 3.0377, "theoretical_loss": 3.3669706917960047, "tokens_seen": 2514485248 }, { "epoch": 0.9, "learning_rate": 5.1267499054105186e-05, "loss": 3.0616, "theoretical_loss": 3.3668588531443593, "tokens_seen": 2515533824 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.48915207386016846, "objective/train/docs_used": 1415580, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.92588210105896, "objective/train/original_loss": 2.92588210105896, "objective/train/theoretical_loss": 3.3667470741489445, "objective/train/tokens_used": 2537042400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24214540421962738, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501459836959839, "objective/train/weighted_lm_loss": 3.072300434112549, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9553421139717102, "theoretical_loss": 3.3667470741489445, "tokens_seen": 2516582400 }, { "epoch": 0.9, "learning_rate": 5.10783200908059e-05, "loss": 3.0839, "theoretical_loss": 3.3667470741489445, "tokens_seen": 2516582400 }, { "epoch": 0.9, "learning_rate": 5.088914112750662e-05, "loss": 3.0554, "theoretical_loss": 3.366635354753102, "tokens_seen": 2517630976 }, { "epoch": 0.9, "learning_rate": 5.069996216420735e-05, "loss": 2.977, "theoretical_loss": 3.3665236949002515, "tokens_seen": 2518679552 }, { "epoch": 0.9, "learning_rate": 5.051078320090806e-05, "loss": 3.059, "theoretical_loss": 3.3664120945338882, "tokens_seen": 2519728128 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.4885351061820984, "objective/train/docs_used": 1417591, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9278507232666016, "objective/train/original_loss": 2.9278504848480225, "objective/train/theoretical_loss": 3.3663981486679257, "objective/train/tokens_used": 2540319200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24352295696735382, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.05009126663208, "objective/train/weighted_lm_loss": 3.0739758014678955, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9517725110054016, "theoretical_loss": 3.3663981486679257, "tokens_seen": 2519859200 }, { "epoch": 0.9, "learning_rate": 5.032160423760878e-05, "loss": 2.9885, "theoretical_loss": 3.3663005535975867, "tokens_seen": 2520776704 }, { "epoch": 0.9, "learning_rate": 5.01324252743095e-05, "loss": 3.0065, "theoretical_loss": 3.3661890720349965, "tokens_seen": 2521825280 }, { "epoch": 0.9, "learning_rate": 4.9943246311010216e-05, "loss": 3.0255, "theoretical_loss": 3.366077649789845, "tokens_seen": 2522873856 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.48337897658348083, "objective/train/docs_used": 1418957, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5253660678863525, "objective/train/original_loss": 2.5253658294677734, "objective/train/theoretical_loss": 3.36604980349032, "objective/train/tokens_used": 2543596000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2389601469039917, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495524406433105, "objective/train/weighted_lm_loss": 2.650407314300537, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9553355574607849, "theoretical_loss": 3.36604980349032, "tokens_seen": 2523136000 }, { "epoch": 0.9, "learning_rate": 4.9754067347710936e-05, "loss": 2.9571, "theoretical_loss": 3.365966286805936, "tokens_seen": 2523922432 }, { "epoch": 0.9, "learning_rate": 4.956488838441166e-05, "loss": 2.9194, "theoretical_loss": 3.365854983027151, "tokens_seen": 2524971008 }, { "epoch": 0.9, "learning_rate": 4.937570942111237e-05, "loss": 2.9091, "theoretical_loss": 3.3657437383974456, "tokens_seen": 2526019584 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.4876956641674042, "objective/train/docs_used": 1420931, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1945478916168213, "objective/train/original_loss": 3.1945481300354004, "objective/train/theoretical_loss": 3.3657020368992527, "objective/train/tokens_used": 2546872800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24205021560192108, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499999523162842, "objective/train/weighted_lm_loss": 3.353463649749756, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9520066976547241, "theoretical_loss": 3.3657020368992527, "tokens_seen": 2526412800 }, { "epoch": 0.9, "learning_rate": 4.918653045781309e-05, "loss": 2.9435, "theoretical_loss": 3.3656325528608533, "tokens_seen": 2527068160 }, { "epoch": 0.9, "learning_rate": 4.899735149451381e-05, "loss": 2.9498, "theoretical_loss": 3.365521426361483, "tokens_seen": 2528116736 }, { "epoch": 0.9, "learning_rate": 4.8808172531214526e-05, "loss": 2.9501, "theoretical_loss": 3.365410358843522, "tokens_seen": 2529165312 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.4870266616344452, "objective/train/docs_used": 1422953, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5946903228759766, "objective/train/original_loss": 2.5946898460388184, "objective/train/theoretical_loss": 3.3653548471851478, "objective/train/tokens_used": 2550149600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24236759543418884, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499347448349, "objective/train/weighted_lm_loss": 2.725821018218994, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9607920050621033, "theoretical_loss": 3.3653548471851478, "tokens_seen": 2529689600 }, { "epoch": 0.9, "learning_rate": 4.861899356791525e-05, "loss": 2.9142, "theoretical_loss": 3.365299350251229, "tokens_seen": 2530213888 }, { "epoch": 0.9, "learning_rate": 4.8429814604615973e-05, "loss": 2.9477, "theoretical_loss": 3.3651884005289423, "tokens_seen": 2531262464 }, { "epoch": 0.9, "learning_rate": 4.824063564131669e-05, "loss": 2.9652, "theoretical_loss": 3.3650775096210745, "tokens_seen": 2532311040 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.47654762864112854, "objective/train/docs_used": 1425467, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.940070629119873, "objective/train/original_loss": 2.940070152282715, "objective/train/theoretical_loss": 3.365008232645685, "objective/train/tokens_used": 2553426400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24106010794639587, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048879623413086, "objective/train/weighted_lm_loss": 3.083993434906006, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.9513778686523438, "theoretical_loss": 3.365008232645685, "tokens_seen": 2532966400 }, { "epoch": 0.9, "learning_rate": 4.805145667801741e-05, "loss": 2.9732, "theoretical_loss": 3.3649666774721134, "tokens_seen": 2533359616 }, { "epoch": 0.91, "learning_rate": 4.786227771471813e-05, "loss": 3.0567, "theoretical_loss": 3.3648559040266224, "tokens_seen": 2534408192 }, { "epoch": 0.91, "learning_rate": 4.767309875141884e-05, "loss": 2.9618, "theoretical_loss": 3.36474518922924, "tokens_seen": 2535456768 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.45745569467544556, "objective/train/docs_used": 1426787, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7048633098602295, "objective/train/original_loss": 2.7048633098602295, "objective/train/theoretical_loss": 3.3646621915857633, "objective/train/tokens_used": 2556703200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22381484508514404, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0468831062316895, "objective/train/weighted_lm_loss": 2.8340065479278564, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9606444239616394, "theoretical_loss": 3.3646621915857633, "tokens_seen": 2536243200 }, { "epoch": 0.91, "learning_rate": 4.748391978811956e-05, "loss": 3.0131, "theoretical_loss": 3.36463453302468, "tokens_seen": 2536505344 }, { "epoch": 0.91, "learning_rate": 4.729474082482028e-05, "loss": 2.9159, "theoretical_loss": 3.364523935357731, "tokens_seen": 2537553920 }, { "epoch": 0.91, "learning_rate": 4.7105561861521e-05, "loss": 3.0107, "theoretical_loss": 3.3644133961732567, "tokens_seen": 2538602496 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.4876222312450409, "objective/train/docs_used": 1428565, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.81592059135437, "objective/train/original_loss": 2.815920352935791, "objective/train/theoretical_loss": 3.3643167223174584, "objective/train/tokens_used": 2559980000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24069343507289886, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499857664108276, "objective/train/weighted_lm_loss": 2.9560835361480713, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9616897106170654, "theoretical_loss": 3.3643167223174584, "tokens_seen": 2539520000 }, { "epoch": 0.91, "learning_rate": 4.691638289822172e-05, "loss": 2.977, "theoretical_loss": 3.3643029154161948, "tokens_seen": 2539651072 }, { "epoch": 0.91, "learning_rate": 4.672720393492244e-05, "loss": 2.9811, "theoretical_loss": 3.364192493031558, "tokens_seen": 2540699648 }, { "epoch": 0.91, "learning_rate": 4.653802497162315e-05, "loss": 2.9772, "theoretical_loss": 3.3640821289644336, "tokens_seen": 2541748224 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.48290324211120605, "objective/train/docs_used": 1430679, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6714377403259277, "objective/train/original_loss": 2.6714377403259277, "objective/train/theoretical_loss": 3.363971823159983, "objective/train/tokens_used": 2563256800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2380242496728897, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495001077651978, "objective/train/weighted_lm_loss": 2.803065299987793, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.951354444026947, "theoretical_loss": 3.363971823159983, "tokens_seen": 2542796800 }, { "epoch": 0.91, "learning_rate": 4.634884600832388e-05, "loss": 2.9188, "theoretical_loss": 3.363971823159983, "tokens_seen": 2542796800 }, { "epoch": 0.91, "learning_rate": 4.61596670450246e-05, "loss": 2.9692, "theoretical_loss": 3.363861575563442, "tokens_seen": 2543845376 }, { "epoch": 0.91, "learning_rate": 4.5970488081725313e-05, "loss": 3.0068, "theoretical_loss": 3.363751386120119, "tokens_seen": 2544893952 }, { "epoch": 0.91, "learning_rate": 4.5781309118426034e-05, "loss": 2.921, "theoretical_loss": 3.363641254775399, "tokens_seen": 2545942528 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.4920687675476074, "objective/train/docs_used": 1432418, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8956644535064697, "objective/train/original_loss": 2.8956642150878906, "objective/train/theoretical_loss": 3.3636274924396496, "objective/train/tokens_used": 2566533600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2425273358821869, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050439715385437, "objective/train/weighted_lm_loss": 3.042595863342285, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 1.0194487571716309, "theoretical_loss": 3.3636274924396496, "tokens_seen": 2546073600 }, { "epoch": 0.91, "learning_rate": 4.5592130155126755e-05, "loss": 3.0193, "theoretical_loss": 3.3635311814747384, "tokens_seen": 2546991104 }, { "epoch": 0.91, "learning_rate": 4.540295119182747e-05, "loss": 3.0138, "theoretical_loss": 3.3634211661636675, "tokens_seen": 2548039680 }, { "epoch": 0.91, "learning_rate": 4.521377222852819e-05, "loss": 2.9616, "theoretical_loss": 3.363311208787792, "tokens_seen": 2549088256 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.4532369375228882, "objective/train/docs_used": 1434243, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.017941474914551, "objective/train/original_loss": 3.01794171333313, "objective/train/theoretical_loss": 3.3632837284898294, "objective/train/tokens_used": 2569810400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23174332082271576, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0465010404586792, "objective/train/weighted_lm_loss": 3.1610023975372314, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.951921284198761, "theoretical_loss": 3.3632837284898294, "tokens_seen": 2549350400 }, { "epoch": 0.91, "learning_rate": 4.502459326522891e-05, "loss": 3.0003, "theoretical_loss": 3.363201309292788, "tokens_seen": 2550136832 }, { "epoch": 0.91, "learning_rate": 4.483541430192962e-05, "loss": 3.0131, "theoretical_loss": 3.3630914676244075, "tokens_seen": 2551185408 }, { "epoch": 0.91, "learning_rate": 4.4646235338630344e-05, "loss": 3.0015, "theoretical_loss": 3.3629816837284747, "tokens_seen": 2552233984 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.4917280673980713, "objective/train/docs_used": 1436345, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5419411659240723, "objective/train/original_loss": 2.5419414043426514, "objective/train/theoretical_loss": 3.362940529650914, "objective/train/tokens_used": 2573087200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24304305016994476, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0504082441329956, "objective/train/weighted_lm_loss": 2.669778823852539, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9585726261138916, "theoretical_loss": 3.362940529650914, "tokens_seen": 2552627200 }, { "epoch": 0.91, "learning_rate": 4.4457056375331064e-05, "loss": 2.951, "theoretical_loss": 3.362871957550886, "tokens_seen": 2553282560 }, { "epoch": 0.91, "learning_rate": 4.4267877412031785e-05, "loss": 2.9481, "theoretical_loss": 3.3627622890376117, "tokens_seen": 2554331136 }, { "epoch": 0.91, "learning_rate": 4.4078698448732505e-05, "loss": 2.8988, "theoretical_loss": 3.3626526781346944, "tokens_seen": 2555379712 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.48716670274734497, "objective/train/docs_used": 1438290, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6250998973846436, "objective/train/original_loss": 2.6251001358032227, "objective/train/theoretical_loss": 3.362597894270278, "objective/train/tokens_used": 2576364000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23991335928440094, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499361753463745, "objective/train/weighted_lm_loss": 2.756412982940674, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.956545352935791, "theoretical_loss": 3.362597894270278, "tokens_seen": 2555904000 }, { "epoch": 0.91, "learning_rate": 4.3889519485433226e-05, "loss": 2.96, "theoretical_loss": 3.3625431247882496, "tokens_seen": 2556428288 }, { "epoch": 0.91, "learning_rate": 4.370034052213394e-05, "loss": 2.9932, "theoretical_loss": 3.3624336289444643, "tokens_seen": 2557476864 }, { "epoch": 0.91, "learning_rate": 4.351116155883466e-05, "loss": 3.0289, "theoretical_loss": 3.3623241905495993, "tokens_seen": 2558525440 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.4897761940956116, "objective/train/docs_used": 1439355, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9495246410369873, "objective/train/original_loss": 2.9495248794555664, "objective/train/theoretical_loss": 3.362255820702239, "objective/train/tokens_used": 2579640800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24264518916606903, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502110719680786, "objective/train/weighted_lm_loss": 3.0971879959106445, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.957098126411438, "theoretical_loss": 3.362255820702239, "tokens_seen": 2559180800 }, { "epoch": 0.91, "learning_rate": 4.332198259553538e-05, "loss": 2.9996, "theoretical_loss": 3.3622148095499864, "tokens_seen": 2559574016 }, { "epoch": 0.91, "learning_rate": 4.3132803632236095e-05, "loss": 3.0646, "theoretical_loss": 3.3621054858920303, "tokens_seen": 2560622592 }, { "epoch": 0.91, "learning_rate": 4.2943624668936815e-05, "loss": 3.001, "theoretical_loss": 3.3619962195222075, "tokens_seen": 2561671168 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.48951423168182373, "objective/train/docs_used": 1441205, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.010582447052002, "objective/train/original_loss": 3.010582685470581, "objective/train/theoretical_loss": 3.3619143073080204, "objective/train/tokens_used": 2582917600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24158918857574463, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050179362297058, "objective/train/weighted_lm_loss": 3.161499261856079, "objective/train/weights_max": 1.0512157678604126, "objective/train/weights_min": 0.9615952372550964, "theoretical_loss": 3.3619143073080204, "tokens_seen": 2562457600 }, { "epoch": 0.92, "learning_rate": 4.2754445705637536e-05, "loss": 2.9713, "theoretical_loss": 3.3618870103870657, "tokens_seen": 2562719744 }, { "epoch": 0.92, "learning_rate": 4.256526674233825e-05, "loss": 2.9824, "theoretical_loss": 3.3617778584332254, "tokens_seen": 2563768320 }, { "epoch": 0.92, "learning_rate": 4.237608777903897e-05, "loss": 3.0308, "theoretical_loss": 3.3616687636073777, "tokens_seen": 2564816896 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.4768475890159607, "objective/train/docs_used": 1443158, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.762218713760376, "objective/train/original_loss": 2.762218475341797, "objective/train/theoretical_loss": 3.3615733524557143, "objective/train/tokens_used": 2586194400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2334812730550766, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488715171813965, "objective/train/weighted_lm_loss": 2.8970489501953125, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9876766204833984, "theoretical_loss": 3.3615733524557143, "tokens_seen": 2565734400 }, { "epoch": 0.92, "learning_rate": 4.21869088157397e-05, "loss": 2.9873, "theoretical_loss": 3.3615597258562855, "tokens_seen": 2565865472 }, { "epoch": 0.92, "learning_rate": 4.199772985244041e-05, "loss": 2.9843, "theoretical_loss": 3.3614507451267834, "tokens_seen": 2566914048 }, { "epoch": 0.92, "learning_rate": 4.180855088914113e-05, "loss": 2.954, "theoretical_loss": 3.361341821365777, "tokens_seen": 2567962624 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.4460161030292511, "objective/train/docs_used": 1445048, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.046217441558838, "objective/train/original_loss": 3.046217441558838, "objective/train/theoretical_loss": 3.3612329545202426, "objective/train/tokens_used": 2589471200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23430149257183075, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0457913875579834, "objective/train/weighted_lm_loss": 3.190871238708496, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.9551336169242859, "theoretical_loss": 3.3612329545202426, "tokens_seen": 2569011200 }, { "epoch": 0.92, "learning_rate": 4.1619371925841845e-05, "loss": 2.9734, "theoretical_loss": 3.3612329545202426, "tokens_seen": 2569011200 }, { "epoch": 0.92, "learning_rate": 4.1430192962542566e-05, "loss": 2.9417, "theoretical_loss": 3.361124144537228, "tokens_seen": 2570059776 }, { "epoch": 0.92, "learning_rate": 4.1241013999243287e-05, "loss": 2.9385, "theoretical_loss": 3.361015391363852, "tokens_seen": 2571108352 }, { "epoch": 0.92, "learning_rate": 4.1051835035944e-05, "loss": 3.0063, "theoretical_loss": 3.360906694947303, "tokens_seen": 2572156928 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.48974141478538513, "objective/train/docs_used": 1447122, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.636260986328125, "objective/train/original_loss": 2.636261224746704, "objective/train/theoretical_loss": 3.360893111883321, "objective/train/tokens_used": 2592748000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2414124757051468, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050201177597046, "objective/train/weighted_lm_loss": 2.769242763519287, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9594744443893433, "theoretical_loss": 3.360893111883321, "tokens_seen": 2572288000 }, { "epoch": 0.92, "learning_rate": 4.086265607264472e-05, "loss": 2.9561, "theoretical_loss": 3.360798055234841, "tokens_seen": 2573205504 }, { "epoch": 0.92, "learning_rate": 4.067347710934544e-05, "loss": 2.9818, "theoretical_loss": 3.3606894721737968, "tokens_seen": 2574254080 }, { "epoch": 0.92, "learning_rate": 4.0484298146046155e-05, "loss": 2.9962, "theoretical_loss": 3.36058094571157, "tokens_seen": 2575302656 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.49044662714004517, "objective/train/docs_used": 1449033, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7619566917419434, "objective/train/original_loss": 2.7619566917419434, "objective/train/theoretical_loss": 3.3605538229334218, "objective/train/tokens_used": 2596024800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24158285558223724, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502727031707764, "objective/train/weighted_lm_loss": 2.901848793029785, "objective/train/weights_max": 1.0512210130691528, "objective/train/weights_min": 0.9779994487762451, "theoretical_loss": 3.3605538229334218, "tokens_seen": 2575564800 }, { "epoch": 0.92, "learning_rate": 4.0295119182746876e-05, "loss": 3.0442, "theoretical_loss": 3.360472475795633, "tokens_seen": 2576351232 }, { "epoch": 0.92, "learning_rate": 4.0105940219447596e-05, "loss": 3.0068, "theoretical_loss": 3.3603640623735247, "tokens_seen": 2577399808 }, { "epoch": 0.92, "learning_rate": 3.991676125614832e-05, "loss": 3.0617, "theoretical_loss": 3.360255705392857, "tokens_seen": 2578448384 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.4713362753391266, "objective/train/docs_used": 1450816, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0295400619506836, "objective/train/original_loss": 3.0295395851135254, "objective/train/theoretical_loss": 3.360215086065735, "objective/train/tokens_used": 2599301600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23452427983283997, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0483254194259644, "objective/train/weighted_lm_loss": 3.175776958465576, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9520061016082764, "theoretical_loss": 3.360215086065735, "tokens_seen": 2578841600 }, { "epoch": 0.92, "learning_rate": 3.972758229284904e-05, "loss": 2.9335, "theoretical_loss": 3.3601474048013107, "tokens_seen": 2579496960 }, { "epoch": 0.92, "learning_rate": 3.953840332954976e-05, "loss": 2.9984, "theoretical_loss": 3.3600391605466364, "tokens_seen": 2580545536 }, { "epoch": 0.92, "learning_rate": 3.934922436625047e-05, "loss": 2.9435, "theoretical_loss": 3.359930972576654, "tokens_seen": 2581594112 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.4855765104293823, "objective/train/docs_used": 1453056, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.05781626701355, "objective/train/original_loss": 3.0578160285949707, "objective/train/theoretical_loss": 3.359876899682135, "objective/train/tokens_used": 2602578400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24145200848579407, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497848987579346, "objective/train/weighted_lm_loss": 3.208839178085327, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9515539407730103, "theoretical_loss": 3.359876899682135, "tokens_seen": 2582118400 }, { "epoch": 0.92, "learning_rate": 3.916004540295119e-05, "loss": 2.9645, "theoretical_loss": 3.359822840839253, "tokens_seen": 2582642688 }, { "epoch": 0.92, "learning_rate": 3.897086643965191e-05, "loss": 2.9902, "theoretical_loss": 3.359714765282393, "tokens_seen": 2583691264 }, { "epoch": 0.92, "learning_rate": 3.8781687476352627e-05, "loss": 3.037, "theoretical_loss": 3.3596067458541015, "tokens_seen": 2584739840 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.4761214852333069, "objective/train/docs_used": 1455160, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8679680824279785, "objective/train/original_loss": 2.8679680824279785, "objective/train/theoretical_loss": 3.3595392621911433, "objective/train/tokens_used": 2605855200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24079108238220215, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0488356351852417, "objective/train/weighted_lm_loss": 3.006171226501465, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9513256549835205, "theoretical_loss": 3.3595392621911433, "tokens_seen": 2585395200 }, { "epoch": 0.92, "learning_rate": 3.859250851305335e-05, "loss": 3.0005, "theoretical_loss": 3.3594987825024765, "tokens_seen": 2585788416 }, { "epoch": 0.92, "learning_rate": 3.840332954975407e-05, "loss": 3.0012, "theoretical_loss": 3.359390875175684, "tokens_seen": 2586836992 }, { "epoch": 0.92, "learning_rate": 3.821415058645478e-05, "loss": 3.018, "theoretical_loss": 3.3592830238219595, "tokens_seen": 2587885568 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.49262744188308716, "objective/train/docs_used": 1457167, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8069980144500732, "objective/train/original_loss": 2.8069982528686523, "objective/train/theoretical_loss": 3.359202172007891, "objective/train/tokens_used": 2609132000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24417226016521454, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0505039691925049, "objective/train/weighted_lm_loss": 2.9489877223968506, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9552015066146851, "theoretical_loss": 3.359202172007891, "tokens_seen": 2588672000 }, { "epoch": 0.92, "learning_rate": 3.80249716231555e-05, "loss": 3.0026, "theoretical_loss": 3.359175228389607, "tokens_seen": 2588934144 }, { "epoch": 0.93, "learning_rate": 3.783579265985623e-05, "loss": 3.021, "theoretical_loss": 3.359067488826999, "tokens_seen": 2589982720 }, { "epoch": 0.93, "learning_rate": 3.764661369655694e-05, "loss": 2.9914, "theoretical_loss": 3.3589598050825775, "tokens_seen": 2591031296 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.4884391129016876, "objective/train/docs_used": 1459162, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.828636884689331, "objective/train/original_loss": 2.82863712310791, "objective/train/theoretical_loss": 3.3588656275540845, "objective/train/tokens_used": 2612408800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24153073132038116, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500717163085938, "objective/train/weighted_lm_loss": 2.9701390266418457, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9631984829902649, "theoretical_loss": 3.3588656275540845, "tokens_seen": 2591948800 }, { "epoch": 0.93, "learning_rate": 3.7457434733257664e-05, "loss": 3.0185, "theoretical_loss": 3.3588521771048514, "tokens_seen": 2592079872 }, { "epoch": 0.93, "learning_rate": 3.7268255769958384e-05, "loss": 3.0217, "theoretical_loss": 3.3587446048423995, "tokens_seen": 2593128448 }, { "epoch": 0.93, "learning_rate": 3.70790768066591e-05, "loss": 2.9578, "theoretical_loss": 3.358637088243867, "tokens_seen": 2594177024 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.48765861988067627, "objective/train/docs_used": 1460817, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.20979380607605, "objective/train/original_loss": 3.20979380607605, "objective/train/theoretical_loss": 3.3585296272579694, "objective/train/tokens_used": 2615685600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2429957538843155, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500011444091797, "objective/train/weighted_lm_loss": 3.370309352874756, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9553640484809875, "theoretical_loss": 3.3585296272579694, "tokens_seen": 2595225600 }, { "epoch": 0.93, "learning_rate": 3.688989784335982e-05, "loss": 3.0662, "theoretical_loss": 3.3585296272579694, "tokens_seen": 2595225600 }, { "epoch": 0.93, "learning_rate": 3.670071888006054e-05, "loss": 3.0268, "theoretical_loss": 3.358422221833488, "tokens_seen": 2596274176 }, { "epoch": 0.93, "learning_rate": 3.651153991676125e-05, "loss": 3.0209, "theoretical_loss": 3.358314871919273, "tokens_seen": 2597322752 }, { "epoch": 0.93, "learning_rate": 3.632236095346197e-05, "loss": 3.0275, "theoretical_loss": 3.3582075774642424, "tokens_seen": 2598371328 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.4868325889110565, "objective/train/docs_used": 1462730, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.88214373588562, "objective/train/original_loss": 2.88214373588562, "objective/train/theoretical_loss": 3.358194169554296, "objective/train/tokens_used": 2618962400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24065445363521576, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049906611442566, "objective/train/weighted_lm_loss": 3.026564598083496, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9531733989715576, "theoretical_loss": 3.358194169554296, "tokens_seen": 2598502400 }, { "epoch": 0.93, "learning_rate": 3.6133181990162694e-05, "loss": 3.0284, "theoretical_loss": 3.358100338417381, "tokens_seen": 2599419904 }, { "epoch": 0.93, "learning_rate": 3.594400302686341e-05, "loss": 3.1054, "theoretical_loss": 3.3579931547277426, "tokens_seen": 2600468480 }, { "epoch": 0.93, "learning_rate": 3.575482406356413e-05, "loss": 3.092, "theoretical_loss": 3.3578860263444463, "tokens_seen": 2601517056 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.4892682433128357, "objective/train/docs_used": 1464443, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9992620944976807, "objective/train/original_loss": 2.9992618560791016, "objective/train/theoretical_loss": 3.3578592528842823, "objective/train/tokens_used": 2622239200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24054360389709473, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0501495599746704, "objective/train/weighted_lm_loss": 3.1498708724975586, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9776185154914856, "theoretical_loss": 3.3578592528842823, "tokens_seen": 2601779200 }, { "epoch": 0.93, "learning_rate": 3.5565645100264856e-05, "loss": 3.0732, "theoretical_loss": 3.3577789532166804, "tokens_seen": 2602565632 }, { "epoch": 0.93, "learning_rate": 3.537646613696557e-05, "loss": 3.0749, "theoretical_loss": 3.3576719352936992, "tokens_seen": 2603614208 }, { "epoch": 0.93, "learning_rate": 3.518728717366629e-05, "loss": 3.0803, "theoretical_loss": 3.357564972524824, "tokens_seen": 2604662784 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.49427875876426697, "objective/train/docs_used": 1466332, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.944730043411255, "objective/train/original_loss": 2.944730043411255, "objective/train/theoretical_loss": 3.357524875695582, "objective/train/tokens_used": 2625516000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24559368193149567, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506764650344849, "objective/train/weighted_lm_loss": 3.093897819519043, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9565245509147644, "theoretical_loss": 3.357524875695582, "tokens_seen": 2605056000 }, { "epoch": 0.93, "learning_rate": 3.499810821036701e-05, "loss": 3.0559, "theoretical_loss": 3.357458064859444, "tokens_seen": 2605711360 }, { "epoch": 0.93, "learning_rate": 3.4808929247067724e-05, "loss": 3.0196, "theoretical_loss": 3.3573512122470137, "tokens_seen": 2606759936 }, { "epoch": 0.93, "learning_rate": 3.4619750283768445e-05, "loss": 3.1101, "theoretical_loss": 3.3572444146370555, "tokens_seen": 2607808512 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.48803937435150146, "objective/train/docs_used": 1468043, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.059133529663086, "objective/train/original_loss": 3.0591330528259277, "objective/train/theoretical_loss": 3.357191036442247, "objective/train/tokens_used": 2628792800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2422197312116623, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500352382659912, "objective/train/weighted_lm_loss": 3.2130544185638428, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9526868462562561, "theoretical_loss": 3.357191036442247, "tokens_seen": 2608332800 }, { "epoch": 0.93, "learning_rate": 3.4430571320469165e-05, "loss": 3.0649, "theoretical_loss": 3.3571376719791575, "tokens_seen": 2608857088 }, { "epoch": 0.93, "learning_rate": 3.424139235716988e-05, "loss": 3.0104, "theoretical_loss": 3.357030984222975, "tokens_seen": 2609905664 }, { "epoch": 0.93, "learning_rate": 3.40522133938706e-05, "loss": 3.0704, "theoretical_loss": 3.3569243513182294, "tokens_seen": 2610954240 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.49350228905677795, "objective/train/docs_used": 1469063, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5382068157196045, "objective/train/original_loss": 2.5382070541381836, "objective/train/theoretical_loss": 3.356857733584695, "objective/train/tokens_used": 2632069600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24541838467121124, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050597906112671, "objective/train/weighted_lm_loss": 2.6664652824401855, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.952812910079956, "theoretical_loss": 3.356857733584695, "tokens_seen": 2611609600 }, { "epoch": 0.93, "learning_rate": 3.386303443057132e-05, "loss": 3.0581, "theoretical_loss": 3.356817773214708, "tokens_seen": 2612002816 }, { "epoch": 0.93, "learning_rate": 3.3673855467272034e-05, "loss": 3.0308, "theoretical_loss": 3.3567112498622644, "tokens_seen": 2613051392 }, { "epoch": 0.93, "learning_rate": 3.348467650397276e-05, "loss": 3.058, "theoretical_loss": 3.3566047812108186, "tokens_seen": 2614099968 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.49121448397636414, "objective/train/docs_used": 1470949, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.566678285598755, "objective/train/original_loss": 2.566678047180176, "objective/train/theoretical_loss": 3.356524965589674, "objective/train/tokens_used": 2635346400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24299617111682892, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503567457199097, "objective/train/weighted_lm_loss": 2.6957244873046875, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9728997945785522, "theoretical_loss": 3.356524965589674, "tokens_seen": 2614886400 }, { "epoch": 0.93, "learning_rate": 3.329549754067348e-05, "loss": 3.0031, "theoretical_loss": 3.3564983672103548, "tokens_seen": 2615148544 }, { "epoch": 0.93, "learning_rate": 3.3106318577374196e-05, "loss": 3.0581, "theoretical_loss": 3.3563920078109257, "tokens_seen": 2616197120 }, { "epoch": 0.93, "learning_rate": 3.2917139614074916e-05, "loss": 3.0728, "theoretical_loss": 3.3562857029626474, "tokens_seen": 2617245696 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.4815143346786499, "objective/train/docs_used": 1472903, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8542768955230713, "objective/train/original_loss": 2.854276657104492, "objective/train/theoretical_loss": 3.35619273093023, "objective/train/tokens_used": 2638623200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23790426552295685, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493605136871338, "objective/train/weighted_lm_loss": 2.994652509689331, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.952725350856781, "theoretical_loss": 3.35619273093023, "tokens_seen": 2618163200 }, { "epoch": 0.94, "learning_rate": 3.272796065077564e-05, "loss": 3.1399, "theoretical_loss": 3.3561794526157023, "tokens_seen": 2618294272 }, { "epoch": 0.94, "learning_rate": 3.253878168747635e-05, "loss": 3.0911, "theoretical_loss": 3.356073256720338, "tokens_seen": 2619342848 }, { "epoch": 0.94, "learning_rate": 3.234960272417707e-05, "loss": 3.15, "theoretical_loss": 3.3559671152268686, "tokens_seen": 2620391424 }, { "debugging/Self-BLEU-5": 0.449855913696806, "debugging/distinct-1-grams": 0.769778305351557, "debugging/distinct-2-grams": 0.9467712904639874, "debugging/entropy-1-grams": 5.998633443012235, "debugging/entropy-2-grams": 6.951908950814323, "debugging/length": 469.11764705882354, "debugging/num_segments": 17, "debugging/raw_token_scores_avg": 0.039095163345336914, "debugging/raw_token_scores_std": 0.11093362420797348, "epoch": 0.94, "objective/train/advantage_avg": 0.46091702580451965, "objective/train/docs_used": 1474904, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9136695861816406, "objective/train/original_loss": 2.9136695861816406, "objective/train/theoretical_loss": 3.3558610280856715, "objective/train/tokens_used": 2641900000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22473308444023132, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0472337007522583, "objective/train/weighted_lm_loss": 3.0515451431274414, "objective/train/weights_max": 1.0512163639068604, "objective/train/weights_min": 0.9528725147247314, "theoretical_loss": 3.3558610280856715, "tokens_seen": 2621440000 }, { "epoch": 0.94, "learning_rate": 3.216042376087779e-05, "loss": 3.0863, "theoretical_loss": 3.3558610280856715, "tokens_seen": 2621440000 }, { "epoch": 0.94, "learning_rate": 3.1971244797578505e-05, "loss": 3.0977, "theoretical_loss": 3.3557549952471906, "tokens_seen": 2622488576 }, { "epoch": 0.94, "learning_rate": 3.1782065834279226e-05, "loss": 3.0837, "theoretical_loss": 3.3556490166619337, "tokens_seen": 2623537152 }, { "epoch": 0.94, "learning_rate": 3.1592886870979946e-05, "loss": 3.1327, "theoretical_loss": 3.3555430922804743, "tokens_seen": 2624585728 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.48707959055900574, "objective/train/docs_used": 1477123, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9778811931610107, "objective/train/original_loss": 2.97788143157959, "objective/train/theoretical_loss": 3.3555298555415374, "objective/train/tokens_used": 2645176800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23922058939933777, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499240159988403, "objective/train/weighted_lm_loss": 3.126800298690796, "objective/train/weights_max": 1.0512198209762573, "objective/train/weights_min": 1.0077565908432007, "theoretical_loss": 3.3555298555415374, "tokens_seen": 2624716800 }, { "epoch": 0.94, "learning_rate": 3.140370790768066e-05, "loss": 3.0617, "theoretical_loss": 3.3554372220534505, "tokens_seen": 2625634304 }, { "epoch": 0.94, "learning_rate": 3.121452894438139e-05, "loss": 3.0449, "theoretical_loss": 3.3553314059315653, "tokens_seen": 2626682880 }, { "epoch": 0.94, "learning_rate": 3.102534998108211e-05, "loss": 3.0209, "theoretical_loss": 3.3552256438655856, "tokens_seen": 2627731456 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.4856823980808258, "objective/train/docs_used": 1478969, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.793982982635498, "objective/train/original_loss": 2.793982982635498, "objective/train/theoretical_loss": 3.3551992117895626, "objective/train/tokens_used": 2648453600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2396468073129654, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497863292694092, "objective/train/weighted_lm_loss": 2.9343104362487793, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9753270745277405, "theoretical_loss": 3.3551992117895626, "tokens_seen": 2627993600 }, { "epoch": 0.94, "learning_rate": 3.083617101778282e-05, "loss": 3.081, "theoretical_loss": 3.355119935806343, "tokens_seen": 2628780032 }, { "epoch": 0.94, "learning_rate": 3.064699205448354e-05, "loss": 3.0635, "theoretical_loss": 3.3550142817047335, "tokens_seen": 2629828608 }, { "epoch": 0.94, "learning_rate": 3.0457813091184263e-05, "loss": 3.03, "theoretical_loss": 3.3549086815117164, "tokens_seen": 2630877184 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.48200660943984985, "objective/train/docs_used": 1481036, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2332446575164795, "objective/train/original_loss": 3.2332448959350586, "objective/train/theoretical_loss": 3.3548690953276465, "objective/train/tokens_used": 2651730400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2395787388086319, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049418330192566, "objective/train/weighted_lm_loss": 3.3921494483947754, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9518641233444214, "theoretical_loss": 3.3548690953276465, "tokens_seen": 2631270400 }, { "epoch": 0.94, "learning_rate": 3.026863412788498e-05, "loss": 3.0466, "theoretical_loss": 3.3548031351783174, "tokens_seen": 2631925760 }, { "epoch": 0.94, "learning_rate": 3.0079455164585697e-05, "loss": 3.0208, "theoretical_loss": 3.3546976426556236, "tokens_seen": 2632974336 }, { "epoch": 0.94, "learning_rate": 2.9890276201286418e-05, "loss": 2.9977, "theoretical_loss": 3.3545922038947875, "tokens_seen": 2634022912 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.4820900857448578, "objective/train/docs_used": 1482195, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2531821727752686, "objective/train/original_loss": 3.2531819343566895, "objective/train/theoretical_loss": 3.3545395046598183, "objective/train/tokens_used": 2655007200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23769210278987885, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494171380996704, "objective/train/weighted_lm_loss": 3.413717031478882, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9524181485176086, "theoretical_loss": 3.3545395046598183, "tokens_seen": 2634547200 }, { "epoch": 0.94, "learning_rate": 2.9701097237987138e-05, "loss": 3.0562, "theoretical_loss": 3.3544868188470245, "tokens_seen": 2635071488 }, { "epoch": 0.94, "learning_rate": 2.9511918274687855e-05, "loss": 3.0351, "theoretical_loss": 3.354381487463615, "tokens_seen": 2636120064 }, { "epoch": 0.94, "learning_rate": 2.9322739311388576e-05, "loss": 3.0414, "theoretical_loss": 3.3542762096959007, "tokens_seen": 2637168640 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.48411837220191956, "objective/train/docs_used": 1484156, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8459889888763428, "objective/train/original_loss": 2.8459887504577637, "objective/train/theoretical_loss": 3.3542104382962057, "objective/train/tokens_used": 2658284000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23963847756385803, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049630045890808, "objective/train/weighted_lm_loss": 2.9878950119018555, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9651797413825989, "theoretical_loss": 3.3542104382962057, "tokens_seen": 2637824000 }, { "epoch": 0.94, "learning_rate": 2.9133560348089293e-05, "loss": 3.0507, "theoretical_loss": 3.3541709854952892, "tokens_seen": 2638217216 }, { "epoch": 0.94, "learning_rate": 2.894438138479001e-05, "loss": 3.0462, "theoretical_loss": 3.35406581481325, "tokens_seen": 2639265792 }, { "epoch": 0.94, "learning_rate": 2.875520242149073e-05, "loss": 3.0154, "theoretical_loss": 3.353960697601316, "tokens_seen": 2640314368 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.48868516087532043, "objective/train/docs_used": 1485777, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8094425201416016, "objective/train/original_loss": 2.8094422817230225, "objective/train/theoretical_loss": 3.353881894753002, "objective/train/tokens_used": 2661560800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24112290143966675, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500941276550293, "objective/train/weighted_lm_loss": 2.9495749473571777, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9579086303710938, "theoretical_loss": 3.353881894753002, "tokens_seen": 2641100800 }, { "epoch": 0.94, "learning_rate": 2.856602345819145e-05, "loss": 3.0786, "theoretical_loss": 3.353855633811084, "tokens_seen": 2641362944 }, { "epoch": 0.94, "learning_rate": 2.837684449489217e-05, "loss": 3.0674, "theoretical_loss": 3.3537506233942116, "tokens_seen": 2642411520 }, { "epoch": 0.94, "learning_rate": 2.818766553159289e-05, "loss": 3.0164, "theoretical_loss": 3.353645666302423, "tokens_seen": 2643460096 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.48162642121315, "objective/train/docs_used": 1488108, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0382301807403564, "objective/train/original_loss": 3.0382299423217773, "objective/train/theoretical_loss": 3.353553872552434, "objective/train/tokens_used": 2664837600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23907937109470367, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0493777990341187, "objective/train/weighted_lm_loss": 3.186624050140381, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9515630006790161, "theoretical_loss": 3.353553872552434, "tokens_seen": 2644377600 }, { "epoch": 0.94, "learning_rate": 2.7998486568293606e-05, "loss": 3.0749, "theoretical_loss": 3.3535407624875013, "tokens_seen": 2644508672 }, { "epoch": 0.94, "learning_rate": 2.7809307604994323e-05, "loss": 3.1002, "theoretical_loss": 3.3534359119012946, "tokens_seen": 2645557248 }, { "epoch": 0.95, "learning_rate": 2.7620128641695044e-05, "loss": 3.073, "theoretical_loss": 3.3533311144957136, "tokens_seen": 2646605824 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.4883487820625305, "objective/train/docs_used": 1490143, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5410468578338623, "objective/train/original_loss": 2.541046619415283, "objective/train/theoretical_loss": 3.3532263702227305, "objective/train/tokens_used": 2668114400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24362722039222717, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500731468200684, "objective/train/weighted_lm_loss": 2.6677608489990234, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9517417550086975, "theoretical_loss": 3.3532263702227305, "tokens_seen": 2647654400 }, { "epoch": 0.95, "learning_rate": 2.7430949678395765e-05, "loss": 3.0142, "theoretical_loss": 3.3532263702227305, "tokens_seen": 2647654400 }, { "epoch": 0.95, "learning_rate": 2.724177071509648e-05, "loss": 3.0453, "theoretical_loss": 3.3531216790343805, "tokens_seen": 2648702976 }, { "epoch": 0.95, "learning_rate": 2.7052591751797202e-05, "loss": 3.0265, "theoretical_loss": 3.35301704088276, "tokens_seen": 2649751552 }, { "epoch": 0.95, "learning_rate": 2.686341278849792e-05, "loss": 3.0057, "theoretical_loss": 3.3529124557200296, "tokens_seen": 2650800128 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.47749945521354675, "objective/train/docs_used": 1492100, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0875096321105957, "objective/train/original_loss": 3.0875096321105957, "objective/train/theoretical_loss": 3.35289938629809, "objective/train/tokens_used": 2671391200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24302785098552704, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489848852157593, "objective/train/weighted_lm_loss": 3.23630428314209, "objective/train/weights_max": 1.0512157678604126, "objective/train/weights_min": 0.9513412117958069, "theoretical_loss": 3.35289938629809, "tokens_seen": 2650931200 }, { "epoch": 0.95, "learning_rate": 2.6674233825198637e-05, "loss": 3.0612, "theoretical_loss": 3.3528079234984105, "tokens_seen": 2651848704 }, { "epoch": 0.95, "learning_rate": 2.648505486189936e-05, "loss": 3.0719, "theoretical_loss": 3.352703444170186, "tokens_seen": 2652897280 }, { "epoch": 0.95, "learning_rate": 2.6295875898600078e-05, "loss": 2.9922, "theoretical_loss": 3.3525990176877007, "tokens_seen": 2653945856 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.4898962080478668, "objective/train/docs_used": 1494002, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.18452787399292, "objective/train/original_loss": 3.184528350830078, "objective/train/theoretical_loss": 3.3525729193186478, "objective/train/tokens_used": 2674668000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24191275238990784, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502192974090576, "objective/train/weighted_lm_loss": 3.344353675842285, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.9598506689071655, "theoretical_loss": 3.3525729193186478, "tokens_seen": 2654208000 }, { "epoch": 0.95, "learning_rate": 2.6106696935300795e-05, "loss": 3.0289, "theoretical_loss": 3.3524946440033627, "tokens_seen": 2654994432 }, { "epoch": 0.95, "learning_rate": 2.5917517972001515e-05, "loss": 3.0558, "theoretical_loss": 3.35239032306964, "tokens_seen": 2656043008 }, { "epoch": 0.95, "learning_rate": 2.5728339008702233e-05, "loss": 3.0116, "theoretical_loss": 3.352286054839063, "tokens_seen": 2657091584 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.4882798194885254, "objective/train/docs_used": 1495727, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.735853433609009, "objective/train/original_loss": 2.7358531951904297, "objective/train/theoretical_loss": 3.3522469678304483, "objective/train/tokens_used": 2677944800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24260863661766052, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0500612258911133, "objective/train/weighted_lm_loss": 2.8720898628234863, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9521510004997253, "theoretical_loss": 3.3522469678304483, "tokens_seen": 2657484800 }, { "epoch": 0.95, "learning_rate": 2.553916004540295e-05, "loss": 3.0711, "theoretical_loss": 3.3521818392642233, "tokens_seen": 2658140160 }, { "epoch": 0.95, "learning_rate": 2.5349981082103674e-05, "loss": 2.9868, "theoretical_loss": 3.3520776762977738, "tokens_seen": 2659188736 }, { "epoch": 0.95, "learning_rate": 2.516080211880439e-05, "loss": 3.0218, "theoretical_loss": 3.3519735658924286, "tokens_seen": 2660237312 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.4827539622783661, "objective/train/docs_used": 1497686, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.5873148441314697, "objective/train/original_loss": 2.5873146057128906, "objective/train/theoretical_loss": 3.35192153038541, "objective/train/tokens_used": 2681221600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23605278134346008, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049475073814392, "objective/train/weighted_lm_loss": 2.7146008014678955, "objective/train/weights_max": 1.051217794418335, "objective/train/weights_min": 0.9633801579475403, "theoretical_loss": 3.35192153038541, "tokens_seen": 2660761600 }, { "epoch": 0.95, "learning_rate": 2.4971623155505108e-05, "loss": 3.0077, "theoretical_loss": 3.3518695080009633, "tokens_seen": 2661285888 }, { "epoch": 0.95, "learning_rate": 2.478244419220583e-05, "loss": 3.0271, "theoretical_loss": 3.351765502576214, "tokens_seen": 2662334464 }, { "epoch": 0.95, "learning_rate": 2.4593265228906546e-05, "loss": 3.0663, "theoretical_loss": 3.3516615495710775, "tokens_seen": 2663383040 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.493733286857605, "objective/train/docs_used": 1499388, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9322574138641357, "objective/train/original_loss": 2.932257652282715, "objective/train/theoretical_loss": 3.351596605541298, "objective/train/tokens_used": 2684498400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2454751431941986, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0506212711334229, "objective/train/weighted_lm_loss": 3.0805580615997314, "objective/train/weights_max": 1.051218867301941, "objective/train/weights_min": 0.9514515995979309, "theoretical_loss": 3.351596605541298, "tokens_seen": 2664038400 }, { "epoch": 0.95, "learning_rate": 2.4404086265607263e-05, "loss": 3.0406, "theoretical_loss": 3.351557648938513, "tokens_seen": 2664431616 }, { "epoch": 0.95, "learning_rate": 2.4214907302307987e-05, "loss": 3.0289, "theoretical_loss": 3.351453800631538, "tokens_seen": 2665480192 }, { "epoch": 0.95, "learning_rate": 2.4025728339008704e-05, "loss": 3.0233, "theoretical_loss": 3.3513500046032325, "tokens_seen": 2666528768 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.4782050549983978, "objective/train/docs_used": 1500922, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9047739505767822, "objective/train/original_loss": 2.904773712158203, "objective/train/theoretical_loss": 3.351272191861688, "objective/train/tokens_used": 2687775200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2350698709487915, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490152835845947, "objective/train/weighted_lm_loss": 3.0477681159973145, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9560204148292542, "theoretical_loss": 3.351272191861688, "tokens_seen": 2667315200 }, { "epoch": 0.95, "learning_rate": 2.383654937570942e-05, "loss": 3.0389, "theoretical_loss": 3.351246260806736, "tokens_seen": 2667577344 }, { "epoch": 0.95, "learning_rate": 2.364737041241014e-05, "loss": 3.0276, "theoretical_loss": 3.3511425691952486, "tokens_seen": 2668625920 }, { "epoch": 0.95, "learning_rate": 2.345819144911086e-05, "loss": 3.0211, "theoretical_loss": 3.3510389297220318, "tokens_seen": 2669674496 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.47957465052604675, "objective/train/docs_used": 1502841, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7510986328125, "objective/train/original_loss": 2.7510986328125, "objective/train/theoretical_loss": 3.350948287915944, "objective/train/tokens_used": 2691052000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23619519174098969, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049157977104187, "objective/train/weighted_lm_loss": 2.886131525039673, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9544672966003418, "theoretical_loss": 3.350948287915944, "tokens_seen": 2670592000 }, { "epoch": 0.95, "learning_rate": 2.3269012485811576e-05, "loss": 3.0576, "theoretical_loss": 3.350935342340405, "tokens_seen": 2670723072 }, { "epoch": 0.95, "learning_rate": 2.30798335225123e-05, "loss": 3.0318, "theoretical_loss": 3.3508318070037504, "tokens_seen": 2671771648 }, { "epoch": 0.95, "learning_rate": 2.2890654559213017e-05, "loss": 3.0846, "theoretical_loss": 3.350728323665508, "tokens_seen": 2672820224 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.48705071210861206, "objective/train/docs_used": 1504654, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8059308528900146, "objective/train/original_loss": 2.8059306144714355, "objective/train/theoretical_loss": 3.3506248922791784, "objective/train/tokens_used": 2694328800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24300509691238403, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499402284622192, "objective/train/weighted_lm_loss": 2.945868730545044, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9610604047775269, "theoretical_loss": 3.3506248922791784, "tokens_seen": 2673868800 }, { "epoch": 0.96, "learning_rate": 2.2701475595913734e-05, "loss": 3.0519, "theoretical_loss": 3.3506248922791784, "tokens_seen": 2673868800 }, { "epoch": 0.96, "learning_rate": 2.2512296632614455e-05, "loss": 3.0418, "theoretical_loss": 3.3505215127983226, "tokens_seen": 2674917376 }, { "epoch": 0.96, "learning_rate": 2.2323117669315172e-05, "loss": 3.1014, "theoretical_loss": 3.3504181851765606, "tokens_seen": 2675965952 }, { "epoch": 0.96, "learning_rate": 2.2133938706015892e-05, "loss": 3.041, "theoretical_loss": 3.3503149093675724, "tokens_seen": 2677014528 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.4849916100502014, "objective/train/docs_used": 1506560, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9666738510131836, "objective/train/original_loss": 2.9666740894317627, "objective/train/theoretical_loss": 3.35030200353223, "objective/train/tokens_used": 2697605600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24110575020313263, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0497246980667114, "objective/train/weighted_lm_loss": 3.1141273975372314, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9574628472328186, "theoretical_loss": 3.35030200353223, "tokens_seen": 2677145600 }, { "epoch": 0.96, "learning_rate": 2.1944759742716613e-05, "loss": 3.0359, "theoretical_loss": 3.3502116853250974, "tokens_seen": 2678063104 }, { "epoch": 0.96, "learning_rate": 2.175558077941733e-05, "loss": 3.0588, "theoretical_loss": 3.350108513002934, "tokens_seen": 2679111680 }, { "epoch": 0.96, "learning_rate": 2.1566401816118047e-05, "loss": 3.0685, "theoretical_loss": 3.35000539235494, "tokens_seen": 2680160256 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.4583278298377991, "objective/train/docs_used": 1507767, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.6217093467712402, "objective/train/original_loss": 2.6217093467712402, "objective/train/theoretical_loss": 3.349979620261629, "objective/train/tokens_used": 2700882400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22251355648040771, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0469634532928467, "objective/train/weighted_lm_loss": 2.7458016872406006, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9549773335456848, "theoretical_loss": 3.349979620261629, "tokens_seen": 2680422400 }, { "epoch": 0.96, "learning_rate": 2.1377222852818768e-05, "loss": 3.0494, "theoretical_loss": 3.3499023233350336, "tokens_seen": 2681208832 }, { "epoch": 0.96, "learning_rate": 2.1188043889519485e-05, "loss": 3.0639, "theoretical_loss": 3.3497993058971898, "tokens_seen": 2682257408 }, { "epoch": 0.96, "learning_rate": 2.0998864926220206e-05, "loss": 3.0573, "theoretical_loss": 3.349696339995445, "tokens_seen": 2683305984 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.485384076833725, "objective/train/docs_used": 1509839, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9253435134887695, "objective/train/original_loss": 2.9253437519073486, "objective/train/theoretical_loss": 3.3496577410595694, "objective/train/tokens_used": 2704159200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23941144347190857, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049755334854126, "objective/train/weighted_lm_loss": 3.0702219009399414, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9517756104469299, "theoretical_loss": 3.3496577410595694, "tokens_seen": 2683699200 }, { "epoch": 0.96, "learning_rate": 2.0809685962920923e-05, "loss": 3.0022, "theoretical_loss": 3.3495934255838926, "tokens_seen": 2684354560 }, { "epoch": 0.96, "learning_rate": 2.0620506999621643e-05, "loss": 3.0025, "theoretical_loss": 3.349490562616686, "tokens_seen": 2685403136 }, { "epoch": 0.96, "learning_rate": 2.043132803632236e-05, "loss": 3.0222, "theoretical_loss": 3.349387751048037, "tokens_seen": 2686451712 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.49125921726226807, "objective/train/docs_used": 1511903, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1318821907043457, "objective/train/original_loss": 3.1318821907043457, "objective/train/theoretical_loss": 3.3493363645238787, "objective/train/tokens_used": 2707436000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24440424144268036, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503684282302856, "objective/train/weighted_lm_loss": 3.2890326976776123, "objective/train/weights_max": 1.0512181520462036, "objective/train/weights_min": 0.9706076979637146, "theoretical_loss": 3.3493363645238787, "tokens_seen": 2686976000 }, { "epoch": 0.96, "learning_rate": 2.0242149073023078e-05, "loss": 3.0417, "theoretical_loss": 3.3492849908322158, "tokens_seen": 2687500288 }, { "epoch": 0.96, "learning_rate": 2.0052970109723798e-05, "loss": 3.052, "theoretical_loss": 3.349182281923551, "tokens_seen": 2688548864 }, { "epoch": 0.96, "learning_rate": 1.986379114642452e-05, "loss": 3.0494, "theoretical_loss": 3.3490796242764302, "tokens_seen": 2689597440 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.4908863604068756, "objective/train/docs_used": 1514263, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.605135917663574, "objective/train/original_loss": 2.605135440826416, "objective/train/theoretical_loss": 3.3490154892579884, "objective/train/tokens_used": 2710712800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2420625537633896, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503190755844116, "objective/train/weighted_lm_loss": 2.7363693714141846, "objective/train/weights_max": 1.0512186288833618, "objective/train/weights_min": 0.9990963935852051, "theoretical_loss": 3.3490154892579884, "tokens_seen": 2690252800 }, { "epoch": 0.96, "learning_rate": 1.9674612183125236e-05, "loss": 2.9764, "theoretical_loss": 3.348977017845299, "tokens_seen": 2690646016 }, { "epoch": 0.96, "learning_rate": 1.9485433219825956e-05, "loss": 2.9394, "theoretical_loss": 3.3488744625846607, "tokens_seen": 2691694592 }, { "epoch": 0.96, "learning_rate": 1.9296254256526674e-05, "loss": 3.0521, "theoretical_loss": 3.3487719584490776, "tokens_seen": 2692743168 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.487415611743927, "objective/train/docs_used": 1516259, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.661348581314087, "objective/train/original_loss": 2.661348819732666, "objective/train/theoretical_loss": 3.3486951138709067, "objective/train/tokens_used": 2713989600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24382545053958893, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0499809980392456, "objective/train/weighted_lm_loss": 2.79396915435791, "objective/train/weights_max": 1.0512179136276245, "objective/train/weights_min": 0.956028163433075, "theoretical_loss": 3.3486951138709067, "tokens_seen": 2693529600 }, { "epoch": 0.96, "learning_rate": 1.910707529322739e-05, "loss": 2.9722, "theoretical_loss": 3.348669505393169, "tokens_seen": 2693791744 }, { "epoch": 0.96, "learning_rate": 1.8917896329928115e-05, "loss": 3.0601, "theoretical_loss": 3.348567103371614, "tokens_seen": 2694840320 }, { "epoch": 0.96, "learning_rate": 1.8728717366628832e-05, "loss": 3.0398, "theoretical_loss": 3.3484647523391473, "tokens_seen": 2695888896 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.4843372106552124, "objective/train/docs_used": 1518290, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.877500534057617, "objective/train/original_loss": 2.877500534057617, "objective/train/theoretical_loss": 3.3483752369771853, "objective/train/tokens_used": 2717266400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24128593504428864, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049660086631775, "objective/train/weighted_lm_loss": 3.0197174549102783, "objective/train/weights_max": 1.0512170791625977, "objective/train/weights_min": 0.9525272250175476, "theoretical_loss": 3.3483752369771853, "tokens_seen": 2696806400 }, { "epoch": 0.96, "learning_rate": 1.853953840332955e-05, "loss": 3.0041, "theoretical_loss": 3.3483624522505617, "tokens_seen": 2696937472 }, { "epoch": 0.96, "learning_rate": 1.835035944003027e-05, "loss": 3.0267, "theoretical_loss": 3.34826020306071, "tokens_seen": 2697986048 }, { "epoch": 0.96, "learning_rate": 1.8161180476730987e-05, "loss": 2.9848, "theoretical_loss": 3.3481580047244988, "tokens_seen": 2699034624 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.4914160370826721, "objective/train/docs_used": 1520209, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1949706077575684, "objective/train/original_loss": 3.1949706077575684, "objective/train/theoretical_loss": 3.3480558571968952, "objective/train/tokens_used": 2720543200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24515222012996674, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503878593444824, "objective/train/weighted_lm_loss": 3.3562815189361572, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9568387866020203, "theoretical_loss": 3.3480558571968952, "tokens_seen": 2700083200 }, { "epoch": 0.96, "learning_rate": 1.7972001513431704e-05, "loss": 3.0669, "theoretical_loss": 3.3480558571968952, "tokens_seen": 2700083200 }, { "epoch": 0.96, "learning_rate": 1.7782822550132428e-05, "loss": 3.0338, "theoretical_loss": 3.347953760432923, "tokens_seen": 2701131776 }, { "epoch": 0.97, "learning_rate": 1.7593643586833145e-05, "loss": 2.9885, "theoretical_loss": 3.3478517143876614, "tokens_seen": 2702180352 }, { "epoch": 0.97, "learning_rate": 1.7404464623533862e-05, "loss": 2.9584, "theoretical_loss": 3.3477497190162495, "tokens_seen": 2703228928 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.4775691032409668, "objective/train/docs_used": 1521508, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8530027866363525, "objective/train/original_loss": 2.8530025482177734, "objective/train/theoretical_loss": 3.347736973155596, "objective/train/tokens_used": 2723820000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23687875270843506, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489609241485596, "objective/train/weighted_lm_loss": 2.991417407989502, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9562798738479614, "theoretical_loss": 3.347736973155596, "tokens_seen": 2703360000 }, { "epoch": 0.97, "learning_rate": 1.7215285660234583e-05, "loss": 2.9654, "theoretical_loss": 3.3476477742738817, "tokens_seen": 2704277504 }, { "epoch": 0.97, "learning_rate": 1.70261066969353e-05, "loss": 2.9792, "theoretical_loss": 3.3475458801158093, "tokens_seen": 2705326080 }, { "epoch": 0.97, "learning_rate": 1.6836927733636017e-05, "loss": 3.0474, "theoretical_loss": 3.3474440364973415, "tokens_seen": 2706374656 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.48271217942237854, "objective/train/docs_used": 1523597, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.2933883666992188, "objective/train/original_loss": 2.293388843536377, "objective/train/theoretical_loss": 3.347418583484306, "objective/train/tokens_used": 2727096800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23795966804027557, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049480676651001, "objective/train/weighted_lm_loss": 2.4072484970092773, "objective/train/weights_max": 1.051218032836914, "objective/train/weights_min": 0.955068051815033, "theoretical_loss": 3.347418583484306, "tokens_seen": 2706636800 }, { "epoch": 0.97, "learning_rate": 1.664774877033674e-05, "loss": 2.9809, "theoretical_loss": 3.347342243373844, "tokens_seen": 2707423232 }, { "epoch": 0.97, "learning_rate": 1.6458569807037458e-05, "loss": 2.9979, "theoretical_loss": 3.3472405007007384, "tokens_seen": 2708471808 }, { "epoch": 0.97, "learning_rate": 1.6269390843738175e-05, "loss": 3.0455, "theoretical_loss": 3.347138808433504, "tokens_seen": 2709520384 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.48901796340942383, "objective/train/docs_used": 1525609, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7998077869415283, "objective/train/original_loss": 2.7998077869415283, "objective/train/theoretical_loss": 3.3471006868194775, "objective/train/tokens_used": 2730373600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2417827993631363, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050130844116211, "objective/train/weighted_lm_loss": 2.9398789405822754, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.960767388343811, "theoretical_loss": 3.3471006868194775, "tokens_seen": 2709913600 }, { "epoch": 0.97, "learning_rate": 1.6080211880438896e-05, "loss": 2.9932, "theoretical_loss": 3.3470371665276755, "tokens_seen": 2710568960 }, { "epoch": 0.97, "learning_rate": 1.5891032917139613e-05, "loss": 3.0507, "theoretical_loss": 3.3469355749388447, "tokens_seen": 2711617536 }, { "epoch": 0.97, "learning_rate": 1.570185395384033e-05, "loss": 3.083, "theoretical_loss": 3.3468340336226596, "tokens_seen": 2712666112 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.46617740392684937, "objective/train/docs_used": 1527506, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0006046295166016, "objective/train/original_loss": 3.0006043910980225, "objective/train/theoretical_loss": 3.3467832818029644, "objective/train/tokens_used": 2733650400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22822509706020355, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.047777771949768, "objective/train/weighted_lm_loss": 3.147068500518799, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.9607921838760376, "theoretical_loss": 3.3467832818029644, "tokens_seen": 2713190400 }, { "epoch": 0.97, "learning_rate": 1.5512674990541054e-05, "loss": 3.0005, "theoretical_loss": 3.3467325425348244, "tokens_seen": 2713714688 }, { "epoch": 0.97, "learning_rate": 1.532349602724177e-05, "loss": 3.0609, "theoretical_loss": 3.3466311016310994, "tokens_seen": 2714763264 }, { "epoch": 0.97, "learning_rate": 1.513431706394249e-05, "loss": 3.018, "theoretical_loss": 3.3465297108673013, "tokens_seen": 2715811840 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.4781047999858856, "objective/train/docs_used": 1529013, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.908228874206543, "objective/train/original_loss": 2.908228874206543, "objective/train/theoretical_loss": 3.346466367081999, "objective/train/tokens_used": 2736927200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23485645651817322, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0490041971206665, "objective/train/weighted_lm_loss": 3.0513052940368652, "objective/train/weights_max": 1.0512176752090454, "objective/train/weights_min": 0.959873616695404, "theoretical_loss": 3.346466367081999, "tokens_seen": 2716467200 }, { "epoch": 0.97, "learning_rate": 1.4945138100643209e-05, "loss": 2.99, "theoretical_loss": 3.346428370199302, "tokens_seen": 2716860416 }, { "epoch": 0.97, "learning_rate": 1.4755959137343928e-05, "loss": 3.1079, "theoretical_loss": 3.3463270795830296, "tokens_seen": 2717908992 }, { "epoch": 0.97, "learning_rate": 1.4566780174044647e-05, "loss": 3.1435, "theoretical_loss": 3.3462258389744677, "tokens_seen": 2718957568 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.4586866497993469, "objective/train/docs_used": 1529588, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8062684535980225, "objective/train/original_loss": 2.8062686920166016, "objective/train/theoretical_loss": 3.34614994130916, "objective/train/tokens_used": 2740204000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22812047600746155, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0470279455184937, "objective/train/weighted_lm_loss": 2.941352128982544, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.9811699390411377, "theoretical_loss": 3.34614994130916, "tokens_seen": 2719744000 }, { "epoch": 0.97, "learning_rate": 1.4377601210745365e-05, "loss": 3.1722, "theoretical_loss": 3.3461246483296563, "tokens_seen": 2720006144 }, { "epoch": 0.97, "learning_rate": 1.4188422247446084e-05, "loss": 3.183, "theoretical_loss": 3.346023507604691, "tokens_seen": 2721054720 }, { "epoch": 0.97, "learning_rate": 1.3999243284146803e-05, "loss": 3.0404, "theoretical_loss": 3.3459224167557213, "tokens_seen": 2722103296 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.4828481078147888, "objective/train/docs_used": 1531474, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.01423716545105, "objective/train/original_loss": 3.014237403869629, "objective/train/theoretical_loss": 3.345834003142347, "objective/train/tokens_used": 2743480800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2370041161775589, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0494893789291382, "objective/train/weighted_lm_loss": 3.1627633571624756, "objective/train/weights_max": 1.05121648311615, "objective/train/weights_min": 0.9518007040023804, "theoretical_loss": 3.345834003142347, "tokens_seen": 2723020800 }, { "epoch": 0.97, "learning_rate": 1.3810064320847522e-05, "loss": 3.0889, "theoretical_loss": 3.3458213757389537, "tokens_seen": 2723151872 }, { "epoch": 0.97, "learning_rate": 1.362088535754824e-05, "loss": 3.071, "theoretical_loss": 3.34572038451065, "tokens_seen": 2724200448 }, { "epoch": 0.97, "learning_rate": 1.343170639424896e-05, "loss": 3.1018, "theoretical_loss": 3.3456194430271258, "tokens_seen": 2725249024 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.48170891404151917, "objective/train/docs_used": 1533341, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8722846508026123, "objective/train/original_loss": 2.872284412384033, "objective/train/theoretical_loss": 3.3455185512447527, "objective/train/tokens_used": 2746757600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23601827025413513, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049370527267456, "objective/train/weighted_lm_loss": 3.012676477432251, "objective/train/weights_max": 1.0512162446975708, "objective/train/weights_min": 0.9820363521575928, "theoretical_loss": 3.3455185512447527, "tokens_seen": 2726297600 }, { "epoch": 0.97, "learning_rate": 1.324252743094968e-05, "loss": 3.0983, "theoretical_loss": 3.3455185512447527, "tokens_seen": 2726297600 }, { "epoch": 0.97, "learning_rate": 1.3053348467650397e-05, "loss": 3.1182, "theoretical_loss": 3.3454177091199586, "tokens_seen": 2727346176 }, { "epoch": 0.97, "learning_rate": 1.2864169504351116e-05, "loss": 3.0496, "theoretical_loss": 3.3453169166092236, "tokens_seen": 2728394752 }, { "epoch": 0.97, "learning_rate": 1.2674990541051837e-05, "loss": 3.0598, "theoretical_loss": 3.345216173669085, "tokens_seen": 2729443328 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.48440757393836975, "objective/train/docs_used": 1535248, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1303958892822266, "objective/train/original_loss": 3.1303963661193848, "objective/train/theoretical_loss": 3.3452035842848376, "objective/train/tokens_used": 2750034400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2397458553314209, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0496593713760376, "objective/train/weighted_lm_loss": 3.2855958938598633, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.954217791557312, "theoretical_loss": 3.3452035842848376, "tokens_seen": 2729574400 }, { "epoch": 0.98, "learning_rate": 1.2485811577752554e-05, "loss": 3.0877, "theoretical_loss": 3.345115480256134, "tokens_seen": 2730491904 }, { "epoch": 0.98, "learning_rate": 1.2296632614453273e-05, "loss": 3.1178, "theoretical_loss": 3.3450148363270156, "tokens_seen": 2731540480 }, { "epoch": 0.98, "learning_rate": 1.2107453651153993e-05, "loss": 3.1079, "theoretical_loss": 3.3449142418384312, "tokens_seen": 2732589056 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.4586898982524872, "objective/train/docs_used": 1536975, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1073994636535645, "objective/train/original_loss": 3.1073994636535645, "objective/train/theoretical_loss": 3.3448891009362995, "objective/train/tokens_used": 2753311200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.22674831748008728, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0470212697982788, "objective/train/weighted_lm_loss": 3.251424789428711, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9534921050071716, "theoretical_loss": 3.3448891009362995, "tokens_seen": 2732851200 }, { "epoch": 0.98, "learning_rate": 1.191827468785471e-05, "loss": 3.0302, "theoretical_loss": 3.344813696747135, "tokens_seen": 2733637632 }, { "epoch": 0.98, "learning_rate": 1.172909572455543e-05, "loss": 3.1274, "theoretical_loss": 3.3447132010099363, "tokens_seen": 2734686208 }, { "epoch": 0.98, "learning_rate": 1.153991676125615e-05, "loss": 3.1093, "theoretical_loss": 3.344612754583699, "tokens_seen": 2735734784 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.4917672872543335, "objective/train/docs_used": 1538829, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1836228370666504, "objective/train/original_loss": 3.1836228370666504, "objective/train/theoretical_loss": 3.344575099878048, "objective/train/tokens_used": 2756588000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24380403757095337, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050416111946106, "objective/train/weighted_lm_loss": 3.343857765197754, "objective/train/weights_max": 1.0512166023254395, "objective/train/weights_min": 0.969031035900116, "theoretical_loss": 3.344575099878048, "tokens_seen": 2736128000 }, { "epoch": 0.98, "learning_rate": 1.1350737797956867e-05, "loss": 3.1061, "theoretical_loss": 3.3445123574253417, "tokens_seen": 2736783360 }, { "epoch": 0.98, "learning_rate": 1.1161558834657586e-05, "loss": 3.1514, "theoretical_loss": 3.3444120094918346, "tokens_seen": 2737831936 }, { "epoch": 0.98, "learning_rate": 1.0972379871358306e-05, "loss": 3.0688, "theoretical_loss": 3.344311710740205, "tokens_seen": 2738880512 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.490246057510376, "objective/train/docs_used": 1540914, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9021008014678955, "objective/train/original_loss": 2.9021010398864746, "objective/train/theoretical_loss": 3.3442615797941793, "objective/train/tokens_used": 2759864800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24154502153396606, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502524375915527, "objective/train/weighted_lm_loss": 3.0474276542663574, "objective/train/weights_max": 1.051216959953308, "objective/train/weights_min": 0.9797579646110535, "theoretical_loss": 3.3442615797941793, "tokens_seen": 2739404800 }, { "epoch": 0.98, "learning_rate": 1.0783200908059024e-05, "loss": 3.0935, "theoretical_loss": 3.344211461127532, "tokens_seen": 2739929088 }, { "epoch": 0.98, "learning_rate": 1.0594021944759742e-05, "loss": 3.1297, "theoretical_loss": 3.3441112606109504, "tokens_seen": 2740977664 }, { "epoch": 0.98, "learning_rate": 1.0404842981460461e-05, "loss": 3.1092, "theoretical_loss": 3.344011109147647, "tokens_seen": 2742026240 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.47691646218299866, "objective/train/docs_used": 1542635, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9913744926452637, "objective/train/original_loss": 2.9913742542266846, "objective/train/theoretical_loss": 3.3439485393739488, "objective/train/tokens_used": 2763141600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2393837571144104, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0489083528518677, "objective/train/weighted_lm_loss": 3.13615083694458, "objective/train/weights_max": 1.0512189865112305, "objective/train/weights_min": 0.9514455199241638, "theoretical_loss": 3.3439485393739488, "tokens_seen": 2742681600 }, { "epoch": 0.98, "learning_rate": 1.021566401816118e-05, "loss": 3.1106, "theoretical_loss": 3.343911006694863, "tokens_seen": 2743074816 }, { "epoch": 0.98, "learning_rate": 1.0026485054861899e-05, "loss": 3.0931, "theoretical_loss": 3.3438109532098936, "tokens_seen": 2744123392 }, { "epoch": 0.98, "learning_rate": 9.837306091562618e-06, "loss": 3.0949, "theoretical_loss": 3.343710948650087, "tokens_seen": 2745171968 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.49094340205192566, "objective/train/docs_used": 1544447, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.071685791015625, "objective/train/original_loss": 3.071685791015625, "objective/train/theoretical_loss": 3.343635977311743, "objective/train/tokens_used": 2766418400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24382232129573822, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0503337383270264, "objective/train/weighted_lm_loss": 3.2261123657226562, "objective/train/weights_max": 1.0512171983718872, "objective/train/weights_min": 0.9554123282432556, "theoretical_loss": 3.343635977311743, "tokens_seen": 2745958400 }, { "epoch": 0.98, "learning_rate": 9.648127128263337e-06, "loss": 3.1831, "theoretical_loss": 3.3436109929728453, "tokens_seen": 2746220544 }, { "epoch": 0.98, "learning_rate": 9.458948164964057e-06, "loss": 3.0834, "theoretical_loss": 3.3435110861356234, "tokens_seen": 2747269120 }, { "epoch": 0.98, "learning_rate": 9.269769201664774e-06, "loss": 3.1124, "theoretical_loss": 3.3434112280959294, "tokens_seen": 2748317696 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.47922295331954956, "objective/train/docs_used": 1546277, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.0440077781677246, "objective/train/original_loss": 3.0440077781677246, "objective/train/theoretical_loss": 3.343323892307056, "objective/train/tokens_used": 2769695200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23427808284759521, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0491130352020264, "objective/train/weighted_lm_loss": 3.1924755573272705, "objective/train/weights_max": 1.0512192249298096, "objective/train/weights_min": 0.9526734948158264, "theoretical_loss": 3.343323892307056, "tokens_seen": 2749235200 }, { "epoch": 0.98, "learning_rate": 9.080590238365493e-06, "loss": 3.091, "theoretical_loss": 3.343311418811325, "tokens_seen": 2749366272 }, { "epoch": 0.98, "learning_rate": 8.891411275066214e-06, "loss": 3.1046, "theoretical_loss": 3.3432116582394253, "tokens_seen": 2750414848 }, { "epoch": 0.98, "learning_rate": 8.702232311766931e-06, "loss": 3.1246, "theoretical_loss": 3.3431119463378973, "tokens_seen": 2751463424 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.4734799861907959, "objective/train/docs_used": 1547942, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.2146964073181152, "objective/train/original_loss": 3.2146964073181152, "objective/train/theoretical_loss": 3.343012283064462, "objective/train/tokens_used": 2772972000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23234401643276215, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0485291481018066, "objective/train/weighted_lm_loss": 3.3720321655273438, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.963914155960083, "theoretical_loss": 3.343012283064462, "tokens_seen": 2752512000 }, { "epoch": 0.98, "learning_rate": 8.51305334846765e-06, "loss": 3.0917, "theoretical_loss": 3.343012283064462, "tokens_seen": 2752512000 }, { "epoch": 0.98, "learning_rate": 8.32387438516837e-06, "loss": 3.0486, "theoretical_loss": 3.342912668376892, "tokens_seen": 2753560576 }, { "epoch": 0.98, "learning_rate": 8.134695421869088e-06, "loss": 3.0753, "theoretical_loss": 3.342813102233014, "tokens_seen": 2754609152 }, { "epoch": 0.98, "learning_rate": 7.945516458569806e-06, "loss": 3.0843, "theoretical_loss": 3.3427135845907063, "tokens_seen": 2755657728 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.4692471921443939, "objective/train/docs_used": 1550559, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.7862179279327393, "objective/train/original_loss": 2.78621768951416, "objective/train/theoretical_loss": 3.342701148293589, "objective/train/tokens_used": 2776248800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23171769082546234, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0481022596359253, "objective/train/weighted_lm_loss": 2.9225549697875977, "objective/train/weights_max": 1.0512168407440186, "objective/train/weights_min": 0.9525762796401978, "theoretical_loss": 3.342701148293589, "tokens_seen": 2755788800 }, { "epoch": 0.98, "learning_rate": 7.756337495270527e-06, "loss": 3.1165, "theoretical_loss": 3.3426141154079008, "tokens_seen": 2756706304 }, { "epoch": 0.99, "learning_rate": 7.567158531971245e-06, "loss": 2.9891, "theoretical_loss": 3.3425146946425803, "tokens_seen": 2757754880 }, { "epoch": 0.99, "learning_rate": 7.377979568671964e-06, "loss": 3.0308, "theoretical_loss": 3.3424153222527817, "tokens_seen": 2758803456 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.48670539259910583, "objective/train/docs_used": 1552546, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9341201782226562, "objective/train/original_loss": 2.934119701385498, "objective/train/theoretical_loss": 3.3423904867090948, "objective/train/tokens_used": 2779525600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24417608976364136, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049911618232727, "objective/train/weighted_lm_loss": 3.079969882965088, "objective/train/weights_max": 1.051216721534729, "objective/train/weights_min": 0.9527941942214966, "theoretical_loss": 3.3423904867090948, "tokens_seen": 2759065600 }, { "epoch": 0.99, "learning_rate": 7.188800605372683e-06, "loss": 3.0546, "theoretical_loss": 3.342315998196593, "tokens_seen": 2759852032 }, { "epoch": 0.99, "learning_rate": 6.9996216420734016e-06, "loss": 3.0525, "theoretical_loss": 3.342216722432155, "tokens_seen": 2760900608 }, { "epoch": 0.99, "learning_rate": 6.81044267877412e-06, "loss": 3.0923, "theoretical_loss": 3.3421174949176606, "tokens_seen": 2761949184 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.48612168431282043, "objective/train/docs_used": 1554602, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1402394771575928, "objective/train/original_loss": 3.140239715576172, "objective/train/theoretical_loss": 3.3420802970306394, "objective/train/tokens_used": 2782802400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23997044563293457, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0498319864273071, "objective/train/weighted_lm_loss": 3.2957353591918945, "objective/train/weights_max": 1.0512193441390991, "objective/train/weights_min": 0.9562333226203918, "theoretical_loss": 3.3420802970306394, "tokens_seen": 2762342400 }, { "epoch": 0.99, "learning_rate": 6.62126371547484e-06, "loss": 3.0371, "theoretical_loss": 3.3420183156113543, "tokens_seen": 2762997760 }, { "epoch": 0.99, "learning_rate": 6.432084752175558e-06, "loss": 3.0072, "theoretical_loss": 3.3419191844715326, "tokens_seen": 2764046336 }, { "epoch": 0.99, "learning_rate": 6.242905788876277e-06, "loss": 2.9878, "theoretical_loss": 3.341820101456545, "tokens_seen": 2765094912 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.48010215163230896, "objective/train/docs_used": 1555428, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.860677480697632, "objective/train/original_loss": 2.860677480697632, "objective/train/theoretical_loss": 3.341770577982862, "objective/train/tokens_used": 2786079200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23556600511074066, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049207329750061, "objective/train/weighted_lm_loss": 3.00154709815979, "objective/train/weights_max": 1.0512183904647827, "objective/train/weights_min": 0.9520919322967529, "theoretical_loss": 3.341770577982862, "tokens_seen": 2765619200 }, { "epoch": 0.99, "learning_rate": 6.053726825576997e-06, "loss": 3.1445, "theoretical_loss": 3.3417210665247916, "tokens_seen": 2766143488 }, { "epoch": 0.99, "learning_rate": 5.864547862277715e-06, "loss": 3.1284, "theoretical_loss": 3.3416220796347242, "tokens_seen": 2767192064 }, { "epoch": 0.99, "learning_rate": 5.6753688989784335e-06, "loss": 3.041, "theoretical_loss": 3.341523140744847, "tokens_seen": 2768240640 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.48793041706085205, "objective/train/docs_used": 1557592, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1938629150390625, "objective/train/original_loss": 3.1938624382019043, "objective/train/theoretical_loss": 3.341461328295353, "objective/train/tokens_used": 2789356000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24034656584262848, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050014853477478, "objective/train/weighted_lm_loss": 3.354142427444458, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9659320116043091, "theoretical_loss": 3.341461328295353, "tokens_seen": 2768896000 }, { "epoch": 0.99, "learning_rate": 5.486189935679153e-06, "loss": 3.1212, "theoretical_loss": 3.3414242498137154, "tokens_seen": 2769289216 }, { "epoch": 0.99, "learning_rate": 5.297010972379871e-06, "loss": 3.0925, "theoretical_loss": 3.341325406799936, "tokens_seen": 2770337792 }, { "epoch": 0.99, "learning_rate": 5.10783200908059e-06, "loss": 3.1018, "theoretical_loss": 3.341226611662167, "tokens_seen": 2771386368 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.4908839166164398, "objective/train/docs_used": 1559642, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9026548862457275, "objective/train/original_loss": 2.9026551246643066, "objective/train/theoretical_loss": 3.341152546702631, "objective/train/tokens_used": 2792632800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24205824732780457, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050318717956543, "objective/train/weighted_lm_loss": 3.049380302429199, "objective/train/weights_max": 1.0512174367904663, "objective/train/weights_min": 0.9522111415863037, "theoretical_loss": 3.341152546702631, "tokens_seen": 2772172800 }, { "epoch": 0.99, "learning_rate": 4.918653045781309e-06, "loss": 3.0673, "theoretical_loss": 3.3411278643591173, "tokens_seen": 2772434944 }, { "epoch": 0.99, "learning_rate": 4.729474082482029e-06, "loss": 3.1004, "theoretical_loss": 3.341029164849549, "tokens_seen": 2773483520 }, { "epoch": 0.99, "learning_rate": 4.540295119182747e-06, "loss": 3.2102, "theoretical_loss": 3.3409305130922724, "tokens_seen": 2774532096 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.4898316562175751, "objective/train/docs_used": 1561371, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.8627278804779053, "objective/train/original_loss": 2.862727642059326, "objective/train/theoretical_loss": 3.3408442319441174, "objective/train/tokens_used": 2795909600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24388957023620605, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0502229928970337, "objective/train/weighted_lm_loss": 3.005845069885254, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.9519106149673462, "theoretical_loss": 3.3408442319441174, "tokens_seen": 2775449600 }, { "epoch": 0.99, "learning_rate": 4.3511161558834655e-06, "loss": 3.1312, "theoretical_loss": 3.3408319090461513, "tokens_seen": 2775580672 }, { "epoch": 0.99, "learning_rate": 4.161937192584185e-06, "loss": 3.0719, "theoretical_loss": 3.340733352670099, "tokens_seen": 2776629248 }, { "epoch": 0.99, "learning_rate": 3.972758229284903e-06, "loss": 3.1596, "theoretical_loss": 3.3406348439230804, "tokens_seen": 2777677824 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.48310354351997375, "objective/train/docs_used": 1563208, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1156625747680664, "objective/train/original_loss": 3.1156623363494873, "objective/train/theoretical_loss": 3.3405363827641112, "objective/train/tokens_used": 2799186400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.2416696399450302, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495388507843018, "objective/train/weighted_lm_loss": 3.269275188446045, "objective/train/weights_max": 1.0512175559997559, "objective/train/weights_min": 0.951533854007721, "theoretical_loss": 3.3405363827641112, "tokens_seen": 2778726400 }, { "epoch": 0.99, "learning_rate": 3.7835792659856225e-06, "loss": 3.1249, "theoretical_loss": 3.3405363827641112, "tokens_seen": 2778726400 }, { "epoch": 0.99, "learning_rate": 3.5944003026863414e-06, "loss": 3.1159, "theoretical_loss": 3.340437969152257, "tokens_seen": 2779774976 }, { "epoch": 0.99, "learning_rate": 3.40522133938706e-06, "loss": 3.1107, "theoretical_loss": 3.340339603046636, "tokens_seen": 2780823552 }, { "epoch": 0.99, "learning_rate": 3.216042376087779e-06, "loss": 3.1666, "theoretical_loss": 3.3402412844064138, "tokens_seen": 2781872128 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.4891952872276306, "objective/train/docs_used": 1564970, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.351323127746582, "objective/train/original_loss": 3.351323127746582, "objective/train/theoretical_loss": 3.3402289979117654, "objective/train/tokens_used": 2802463200, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24344053864479065, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.050157070159912, "objective/train/weighted_lm_loss": 3.5188140869140625, "objective/train/weights_max": 1.05121910572052, "objective/train/weights_min": 0.9516726732254028, "theoretical_loss": 3.3402289979117654, "tokens_seen": 2782003200 }, { "epoch": 0.99, "learning_rate": 3.0268634127884983e-06, "loss": 3.1592, "theoretical_loss": 3.340143013190809, "tokens_seen": 2782920704 }, { "epoch": 0.99, "learning_rate": 2.8376844494892168e-06, "loss": 3.1745, "theoretical_loss": 3.3400447893590903, "tokens_seen": 2783969280 }, { "epoch": 0.99, "learning_rate": 2.6485054861899356e-06, "loss": 3.1288, "theoretical_loss": 3.339946612870576, "tokens_seen": 2785017856 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.4746108055114746, "objective/train/docs_used": 1566749, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.9263644218444824, "objective/train/original_loss": 2.9263644218444824, "objective/train/theoretical_loss": 3.33992207614106, "objective/train/tokens_used": 2805740000, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23878102004528046, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.048674464225769, "objective/train/weighted_lm_loss": 3.06941556930542, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9526550769805908, "theoretical_loss": 3.33992207614106, "tokens_seen": 2785280000 }, { "epoch": 1.0, "learning_rate": 2.4593265228906545e-06, "loss": 3.12, "theoretical_loss": 3.3398484836846345, "tokens_seen": 2786066432 }, { "epoch": 1.0, "learning_rate": 2.2701475595913733e-06, "loss": 3.0822, "theoretical_loss": 3.3397504017606847, "tokens_seen": 2787115008 }, { "epoch": 1.0, "learning_rate": 2.0809685962920926e-06, "loss": 3.1112, "theoretical_loss": 3.3396523670581963, "tokens_seen": 2788163584 }, { "epoch": 1.0, "objective/train/advantage_avg": 0.4833011329174042, "objective/train/docs_used": 1568853, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.1042532920837402, "objective/train/original_loss": 3.104253053665161, "objective/train/theoretical_loss": 3.339615616210782, "objective/train/tokens_used": 2809016800, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.24029603600502014, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.0495517253875732, "objective/train/weighted_lm_loss": 3.2570862770080566, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9518541097640991, "theoretical_loss": 3.339615616210782, "tokens_seen": 2788556800 }, { "epoch": 1.0, "learning_rate": 1.8917896329928113e-06, "loss": 3.08, "theoretical_loss": 3.339554379536688, "tokens_seen": 2789212160 }, { "epoch": 1.0, "learning_rate": 1.70261066969353e-06, "loss": 3.0355, "theoretical_loss": 3.339456439155728, "tokens_seen": 2790260736 }, { "epoch": 1.0, "learning_rate": 1.5134317063942492e-06, "loss": 3.0258, "theoretical_loss": 3.339358545874936, "tokens_seen": 2791309312 }, { "epoch": 1.0, "objective/train/advantage_avg": 0.48446664214134216, "objective/train/docs_used": 1571029, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 3.068040132522583, "objective/train/original_loss": 3.068040132522583, "objective/train/theoretical_loss": 3.3393096168844973, "objective/train/tokens_used": 2812293600, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23969173431396484, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049665093421936, "objective/train/weighted_lm_loss": 3.2199342250823975, "objective/train/weights_max": 1.0512182712554932, "objective/train/weights_min": 0.9749250411987305, "theoretical_loss": 3.3393096168844973, "tokens_seen": 2791833600 }, { "epoch": 1.0, "learning_rate": 1.3242527430949678e-06, "loss": 3.0623, "theoretical_loss": 3.3392606996539804, "tokens_seen": 2792357888 }, { "epoch": 1.0, "learning_rate": 1.1350737797956867e-06, "loss": 3.0497, "theoretical_loss": 3.3391629004525782, "tokens_seen": 2793406464 }, { "epoch": 1.0, "learning_rate": 9.458948164964056e-07, "loss": 3.0735, "theoretical_loss": 3.3390651482304983, "tokens_seen": 2794455040 }, { "epoch": 1.0, "objective/train/advantage_avg": 0.4827611446380615, "objective/train/docs_used": 1572728, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 2.77453351020813, "objective/train/original_loss": 2.774533748626709, "objective/train/theoretical_loss": 3.3390040769305287, "objective/train/tokens_used": 2815570400, "objective/train/value_avg": -0.5, "objective/train/value_loss": 0.23958729207515717, "objective/train/value_max": -0.5, "objective/train/value_min": -0.5, "objective/train/value_reward_corr": NaN, "objective/train/value_std": 0.0, "objective/train/weight_avg": 1.049493670463562, "objective/train/weighted_lm_loss": 2.9126195907592773, "objective/train/weights_max": 1.0512173175811768, "objective/train/weights_min": 0.95488440990448, "theoretical_loss": 3.3390040769305287, "tokens_seen": 2795110400 }, { "epoch": 1.0, "learning_rate": 7.567158531971246e-07, "loss": 3.0685, "theoretical_loss": 3.3389674429475575, "tokens_seen": 2795503616 }, { "epoch": 1.0, "learning_rate": 5.675368898978433e-07, "loss": 3.0212, "theoretical_loss": 3.3388697845636224, "tokens_seen": 2796552192 }, { "epoch": 1.0, "learning_rate": 3.783579265985623e-07, "loss": 3.0448, "theoretical_loss": 3.338772173038609, "tokens_seen": 2797600768 } ], "max_steps": 2670, "num_train_epochs": 9223372036854775807, "total_flos": 1.427729276882387e+18, "trial_name": null, "trial_params": null }